|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.15198026808855228, |
|
"eval_steps": 500, |
|
"global_step": 131000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.28216952085494995, |
|
"learning_rate": 9.998839845281768e-05, |
|
"loss": 5.404, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.7618443965911865, |
|
"learning_rate": 9.997679690563535e-05, |
|
"loss": 4.9594, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.8141049742698669, |
|
"learning_rate": 9.996519535845302e-05, |
|
"loss": 4.9031, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8989076614379883, |
|
"learning_rate": 9.995359381127067e-05, |
|
"loss": 4.8882, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.622833251953125, |
|
"learning_rate": 9.994199226408834e-05, |
|
"loss": 4.8752, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.2884034812450409, |
|
"learning_rate": 9.993039071690601e-05, |
|
"loss": 4.8623, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.4078500270843506, |
|
"learning_rate": 9.991878916972368e-05, |
|
"loss": 4.8539, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 4.187527179718018, |
|
"learning_rate": 9.990718762254134e-05, |
|
"loss": 4.8479, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.675564169883728, |
|
"learning_rate": 9.989558607535901e-05, |
|
"loss": 4.8473, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.7946183681488037, |
|
"learning_rate": 9.988398452817668e-05, |
|
"loss": 4.8428, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.8930752277374268, |
|
"learning_rate": 9.987238298099435e-05, |
|
"loss": 4.8403, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.8201618194580078, |
|
"learning_rate": 9.986078143381202e-05, |
|
"loss": 4.8341, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.5597014427185059, |
|
"learning_rate": 9.984917988662968e-05, |
|
"loss": 4.8324, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.2638113498687744, |
|
"learning_rate": 9.983757833944736e-05, |
|
"loss": 4.8274, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.5054211616516113, |
|
"learning_rate": 9.982597679226502e-05, |
|
"loss": 4.8262, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.0371103286743164, |
|
"learning_rate": 9.981437524508269e-05, |
|
"loss": 4.826, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8935482501983643, |
|
"learning_rate": 9.980277369790035e-05, |
|
"loss": 4.823, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.6551401615142822, |
|
"learning_rate": 9.979117215071803e-05, |
|
"loss": 4.8232, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.8115674257278442, |
|
"learning_rate": 9.977957060353569e-05, |
|
"loss": 4.8182, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.1292033195495605, |
|
"learning_rate": 9.976796905635336e-05, |
|
"loss": 4.8205, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.3206559419631958, |
|
"learning_rate": 9.975636750917103e-05, |
|
"loss": 4.8176, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.540377140045166, |
|
"learning_rate": 9.97447659619887e-05, |
|
"loss": 4.8147, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 4.180980682373047, |
|
"learning_rate": 9.973316441480637e-05, |
|
"loss": 4.8146, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.8660849332809448, |
|
"learning_rate": 9.972156286762403e-05, |
|
"loss": 4.8105, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.911318778991699, |
|
"learning_rate": 9.97099613204417e-05, |
|
"loss": 4.8131, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 4.366623878479004, |
|
"learning_rate": 9.969835977325937e-05, |
|
"loss": 4.8113, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.6765426397323608, |
|
"learning_rate": 9.968675822607704e-05, |
|
"loss": 4.8092, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.4829912185668945, |
|
"learning_rate": 9.96751566788947e-05, |
|
"loss": 4.8102, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.6401925086975098, |
|
"learning_rate": 9.966355513171237e-05, |
|
"loss": 4.8039, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.1426470279693604, |
|
"learning_rate": 9.965195358453004e-05, |
|
"loss": 4.8059, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.311486005783081, |
|
"learning_rate": 9.96403520373477e-05, |
|
"loss": 4.8068, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.069549083709717, |
|
"learning_rate": 9.962875049016536e-05, |
|
"loss": 4.8054, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.6770570278167725, |
|
"learning_rate": 9.961714894298305e-05, |
|
"loss": 4.8008, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.9226771593093872, |
|
"learning_rate": 9.960554739580072e-05, |
|
"loss": 4.8004, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.2032277584075928, |
|
"learning_rate": 9.959394584861837e-05, |
|
"loss": 4.7977, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.310493230819702, |
|
"learning_rate": 9.958234430143604e-05, |
|
"loss": 4.7997, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.559783935546875, |
|
"learning_rate": 9.957074275425372e-05, |
|
"loss": 4.7961, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.714775562286377, |
|
"learning_rate": 9.955914120707139e-05, |
|
"loss": 4.7943, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.100062847137451, |
|
"learning_rate": 9.954753965988904e-05, |
|
"loss": 4.7961, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 5.665365695953369, |
|
"learning_rate": 9.953593811270671e-05, |
|
"loss": 4.7992, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.1073904037475586, |
|
"learning_rate": 9.952433656552438e-05, |
|
"loss": 4.7957, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.3605892658233643, |
|
"learning_rate": 9.951273501834205e-05, |
|
"loss": 4.7959, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 4.566422939300537, |
|
"learning_rate": 9.950113347115971e-05, |
|
"loss": 4.7975, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.812922716140747, |
|
"learning_rate": 9.948953192397738e-05, |
|
"loss": 4.7942, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.2284045219421387, |
|
"learning_rate": 9.947793037679507e-05, |
|
"loss": 4.7942, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.2551093101501465, |
|
"learning_rate": 9.946632882961272e-05, |
|
"loss": 4.791, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8002365827560425, |
|
"learning_rate": 9.945472728243039e-05, |
|
"loss": 4.7905, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.5465445518493652, |
|
"learning_rate": 9.944312573524805e-05, |
|
"loss": 4.7939, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.329401969909668, |
|
"learning_rate": 9.943152418806573e-05, |
|
"loss": 4.795, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.2818596363067627, |
|
"learning_rate": 9.941992264088339e-05, |
|
"loss": 4.7952, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9942195415496826, |
|
"learning_rate": 9.940832109370106e-05, |
|
"loss": 4.791, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.174030780792236, |
|
"learning_rate": 9.939671954651872e-05, |
|
"loss": 4.7921, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8438475131988525, |
|
"learning_rate": 9.93851179993364e-05, |
|
"loss": 4.79, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.8706634044647217, |
|
"learning_rate": 9.937351645215407e-05, |
|
"loss": 4.7902, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.187880516052246, |
|
"learning_rate": 9.936191490497173e-05, |
|
"loss": 4.7917, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.2282915115356445, |
|
"learning_rate": 9.93503133577894e-05, |
|
"loss": 4.7869, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.4940683841705322, |
|
"learning_rate": 9.933871181060707e-05, |
|
"loss": 4.7932, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.8060383796691895, |
|
"learning_rate": 9.932711026342474e-05, |
|
"loss": 4.788, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.129607677459717, |
|
"learning_rate": 9.93155087162424e-05, |
|
"loss": 4.7885, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.5312142372131348, |
|
"learning_rate": 9.930390716906007e-05, |
|
"loss": 4.786, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.944730281829834, |
|
"learning_rate": 9.929230562187774e-05, |
|
"loss": 4.7867, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5370301008224487, |
|
"learning_rate": 9.928070407469541e-05, |
|
"loss": 4.7864, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.7849390506744385, |
|
"learning_rate": 9.926910252751306e-05, |
|
"loss": 4.7885, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.041503429412842, |
|
"learning_rate": 9.925750098033075e-05, |
|
"loss": 4.7867, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8032560348510742, |
|
"learning_rate": 9.924589943314842e-05, |
|
"loss": 4.7851, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.296067953109741, |
|
"learning_rate": 9.923429788596608e-05, |
|
"loss": 4.7847, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.063502311706543, |
|
"learning_rate": 9.922269633878375e-05, |
|
"loss": 4.7832, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.149693012237549, |
|
"learning_rate": 9.921109479160142e-05, |
|
"loss": 4.779, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.4809441566467285, |
|
"learning_rate": 9.919949324441909e-05, |
|
"loss": 4.7823, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.948490142822266, |
|
"learning_rate": 9.918789169723674e-05, |
|
"loss": 4.7842, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5616728067398071, |
|
"learning_rate": 9.917629015005441e-05, |
|
"loss": 4.7861, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.1099443435668945, |
|
"learning_rate": 9.916468860287209e-05, |
|
"loss": 4.7829, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.170409679412842, |
|
"learning_rate": 9.915308705568976e-05, |
|
"loss": 4.7817, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.6011829376220703, |
|
"learning_rate": 9.914148550850741e-05, |
|
"loss": 4.7805, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.222562789916992, |
|
"learning_rate": 9.912988396132508e-05, |
|
"loss": 4.7802, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.803954601287842, |
|
"learning_rate": 9.911828241414277e-05, |
|
"loss": 4.7828, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8963723182678223, |
|
"learning_rate": 9.910668086696042e-05, |
|
"loss": 4.7823, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.4876952171325684, |
|
"learning_rate": 9.90950793197781e-05, |
|
"loss": 4.7768, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.709277629852295, |
|
"learning_rate": 9.908347777259575e-05, |
|
"loss": 4.7806, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8382807970046997, |
|
"learning_rate": 9.907187622541343e-05, |
|
"loss": 4.7791, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.2304890155792236, |
|
"learning_rate": 9.906027467823109e-05, |
|
"loss": 4.778, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.252920150756836, |
|
"learning_rate": 9.904867313104876e-05, |
|
"loss": 4.7796, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.16917884349823, |
|
"learning_rate": 9.903707158386642e-05, |
|
"loss": 4.7817, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.6509056091308594, |
|
"learning_rate": 9.90254700366841e-05, |
|
"loss": 4.778, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.1537126302719116, |
|
"learning_rate": 9.901386848950176e-05, |
|
"loss": 4.7755, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.7410359382629395, |
|
"learning_rate": 9.900226694231943e-05, |
|
"loss": 4.7751, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.2682690620422363, |
|
"learning_rate": 9.89906653951371e-05, |
|
"loss": 4.7741, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.825622320175171, |
|
"learning_rate": 9.897906384795477e-05, |
|
"loss": 4.7759, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.8567586541175842, |
|
"learning_rate": 9.896746230077244e-05, |
|
"loss": 4.7738, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5261878967285156, |
|
"learning_rate": 9.89558607535901e-05, |
|
"loss": 4.7722, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.7664969563484192, |
|
"learning_rate": 9.894425920640777e-05, |
|
"loss": 4.7757, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.6178815364837646, |
|
"learning_rate": 9.893265765922544e-05, |
|
"loss": 4.7715, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.013388156890869, |
|
"learning_rate": 9.892105611204311e-05, |
|
"loss": 4.7718, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.765504777431488, |
|
"learning_rate": 9.890945456486077e-05, |
|
"loss": 4.7707, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.0577245950698853, |
|
"learning_rate": 9.889785301767844e-05, |
|
"loss": 4.7709, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5963281393051147, |
|
"learning_rate": 9.888625147049611e-05, |
|
"loss": 4.7738, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5044456720352173, |
|
"learning_rate": 9.887464992331378e-05, |
|
"loss": 4.772, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.449915647506714, |
|
"learning_rate": 9.886304837613145e-05, |
|
"loss": 4.7695, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.757066249847412, |
|
"learning_rate": 9.885144682894912e-05, |
|
"loss": 4.7723, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8173577785491943, |
|
"learning_rate": 9.883984528176679e-05, |
|
"loss": 4.7711, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.4169297218322754, |
|
"learning_rate": 9.882824373458445e-05, |
|
"loss": 4.7739, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.771091938018799, |
|
"learning_rate": 9.881664218740212e-05, |
|
"loss": 4.7747, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.6634373664855957, |
|
"learning_rate": 9.880504064021979e-05, |
|
"loss": 4.7732, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.215581893920898, |
|
"learning_rate": 9.879343909303746e-05, |
|
"loss": 4.7704, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.152705192565918, |
|
"learning_rate": 9.878183754585511e-05, |
|
"loss": 4.7699, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5891577005386353, |
|
"learning_rate": 9.877023599867278e-05, |
|
"loss": 4.7717, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.256149768829346, |
|
"learning_rate": 9.875863445149045e-05, |
|
"loss": 4.7664, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.941390037536621, |
|
"learning_rate": 9.874703290430813e-05, |
|
"loss": 4.7721, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.2469244003295898, |
|
"learning_rate": 9.87354313571258e-05, |
|
"loss": 4.7676, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.073249340057373, |
|
"learning_rate": 9.872382980994345e-05, |
|
"loss": 4.7644, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.6434860229492188, |
|
"learning_rate": 9.871222826276114e-05, |
|
"loss": 4.7645, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.8313368558883667, |
|
"learning_rate": 9.87006267155788e-05, |
|
"loss": 4.7637, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.8201664090156555, |
|
"learning_rate": 9.868902516839646e-05, |
|
"loss": 4.7667, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.47900694608688354, |
|
"learning_rate": 9.867742362121412e-05, |
|
"loss": 4.7652, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.9311307072639465, |
|
"learning_rate": 9.86658220740318e-05, |
|
"loss": 4.766, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.805180549621582, |
|
"learning_rate": 9.865422052684946e-05, |
|
"loss": 4.7641, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 9.388609886169434, |
|
"learning_rate": 9.864261897966713e-05, |
|
"loss": 4.7717, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.2487683296203613, |
|
"learning_rate": 9.86310174324848e-05, |
|
"loss": 4.7689, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.081148624420166, |
|
"learning_rate": 9.861941588530247e-05, |
|
"loss": 4.7673, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.44789981842041, |
|
"learning_rate": 9.860781433812014e-05, |
|
"loss": 4.7722, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.649463176727295, |
|
"learning_rate": 9.85962127909378e-05, |
|
"loss": 4.7667, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.827794075012207, |
|
"learning_rate": 9.858461124375547e-05, |
|
"loss": 4.768, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.945425510406494, |
|
"learning_rate": 9.857300969657314e-05, |
|
"loss": 4.7689, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.1629202365875244, |
|
"learning_rate": 9.856140814939081e-05, |
|
"loss": 4.7656, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5653738379478455, |
|
"learning_rate": 9.854980660220847e-05, |
|
"loss": 4.7631, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.724161684513092, |
|
"learning_rate": 9.853820505502614e-05, |
|
"loss": 4.7622, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5713109970092773, |
|
"learning_rate": 9.852660350784381e-05, |
|
"loss": 4.7633, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.9219802021980286, |
|
"learning_rate": 9.851500196066148e-05, |
|
"loss": 4.7616, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.7306973338127136, |
|
"learning_rate": 9.850340041347914e-05, |
|
"loss": 4.7616, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.1123743057250977, |
|
"learning_rate": 9.849179886629682e-05, |
|
"loss": 4.7635, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5764673352241516, |
|
"learning_rate": 9.848019731911449e-05, |
|
"loss": 4.7602, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.456514835357666, |
|
"learning_rate": 9.846859577193215e-05, |
|
"loss": 4.762, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6817034482955933, |
|
"learning_rate": 9.845699422474982e-05, |
|
"loss": 4.7585, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.166177749633789, |
|
"learning_rate": 9.844539267756749e-05, |
|
"loss": 4.7653, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.546487808227539, |
|
"learning_rate": 9.843379113038516e-05, |
|
"loss": 4.7607, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.610224187374115, |
|
"learning_rate": 9.842218958320282e-05, |
|
"loss": 4.7598, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5171063542366028, |
|
"learning_rate": 9.841058803602049e-05, |
|
"loss": 4.7598, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.632072687149048, |
|
"learning_rate": 9.839898648883816e-05, |
|
"loss": 4.7634, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6531881093978882, |
|
"learning_rate": 9.838738494165583e-05, |
|
"loss": 4.7585, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6771488189697266, |
|
"learning_rate": 9.837578339447348e-05, |
|
"loss": 4.757, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.341787338256836, |
|
"learning_rate": 9.836418184729115e-05, |
|
"loss": 4.7579, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.1839771270751953, |
|
"learning_rate": 9.835258030010884e-05, |
|
"loss": 4.7581, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.8916560411453247, |
|
"learning_rate": 9.83409787529265e-05, |
|
"loss": 4.7599, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.7338578701019287, |
|
"learning_rate": 9.832937720574417e-05, |
|
"loss": 4.7607, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7070731520652771, |
|
"learning_rate": 9.831777565856182e-05, |
|
"loss": 4.7587, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5772482752799988, |
|
"learning_rate": 9.83061741113795e-05, |
|
"loss": 4.763, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.9034737348556519, |
|
"learning_rate": 9.829457256419716e-05, |
|
"loss": 4.7597, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.127034902572632, |
|
"learning_rate": 9.828297101701483e-05, |
|
"loss": 4.7573, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.8689446449279785, |
|
"learning_rate": 9.82713694698325e-05, |
|
"loss": 4.757, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7055838108062744, |
|
"learning_rate": 9.825976792265017e-05, |
|
"loss": 4.7557, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.9538499116897583, |
|
"learning_rate": 9.824816637546783e-05, |
|
"loss": 4.7611, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.487042784690857, |
|
"learning_rate": 9.82365648282855e-05, |
|
"loss": 4.7574, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.1112914085388184, |
|
"learning_rate": 9.822496328110317e-05, |
|
"loss": 4.7566, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.9239519834518433, |
|
"learning_rate": 9.821336173392084e-05, |
|
"loss": 4.7587, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.975541353225708, |
|
"learning_rate": 9.820176018673851e-05, |
|
"loss": 4.7577, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5043540000915527, |
|
"learning_rate": 9.819015863955617e-05, |
|
"loss": 4.7565, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.8457772731781006, |
|
"learning_rate": 9.817855709237384e-05, |
|
"loss": 4.7573, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5468181371688843, |
|
"learning_rate": 9.816695554519151e-05, |
|
"loss": 4.7555, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.87544584274292, |
|
"learning_rate": 9.815535399800918e-05, |
|
"loss": 4.7588, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.0000234842300415, |
|
"learning_rate": 9.814375245082684e-05, |
|
"loss": 4.753, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6940212249755859, |
|
"learning_rate": 9.813215090364452e-05, |
|
"loss": 4.7543, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.9902929663658142, |
|
"learning_rate": 9.812054935646219e-05, |
|
"loss": 4.7562, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 17.795963287353516, |
|
"learning_rate": 9.810894780927985e-05, |
|
"loss": 4.7551, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.341447830200195, |
|
"learning_rate": 9.809734626209752e-05, |
|
"loss": 4.7569, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.844032287597656, |
|
"learning_rate": 9.808574471491519e-05, |
|
"loss": 4.7546, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.8450008034706116, |
|
"learning_rate": 9.807414316773286e-05, |
|
"loss": 4.7554, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.3422025442123413, |
|
"learning_rate": 9.806254162055052e-05, |
|
"loss": 4.7524, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6606966853141785, |
|
"learning_rate": 9.805094007336819e-05, |
|
"loss": 4.7533, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6223414540290833, |
|
"learning_rate": 9.803933852618586e-05, |
|
"loss": 4.7544, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.53001469373703, |
|
"learning_rate": 9.802773697900353e-05, |
|
"loss": 4.7529, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.538820743560791, |
|
"learning_rate": 9.801613543182119e-05, |
|
"loss": 4.7514, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.79536247253418, |
|
"learning_rate": 9.800453388463886e-05, |
|
"loss": 4.7522, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.8284698724746704, |
|
"learning_rate": 9.799293233745654e-05, |
|
"loss": 4.7532, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.8321298956871033, |
|
"learning_rate": 9.79813307902742e-05, |
|
"loss": 4.7553, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.962058067321777, |
|
"learning_rate": 9.796972924309187e-05, |
|
"loss": 4.7641, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.184019088745117, |
|
"learning_rate": 9.795812769590952e-05, |
|
"loss": 4.7608, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.882926106452942, |
|
"learning_rate": 9.794652614872721e-05, |
|
"loss": 4.7582, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.5953168869018555, |
|
"learning_rate": 9.793492460154486e-05, |
|
"loss": 4.756, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.986889839172363, |
|
"learning_rate": 9.792332305436253e-05, |
|
"loss": 4.759, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.7497138977050781, |
|
"learning_rate": 9.791172150718019e-05, |
|
"loss": 4.7567, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4782524108886719, |
|
"learning_rate": 9.790011995999788e-05, |
|
"loss": 4.753, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7002395987510681, |
|
"learning_rate": 9.788851841281553e-05, |
|
"loss": 4.7513, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.49617066979408264, |
|
"learning_rate": 9.78769168656332e-05, |
|
"loss": 4.7508, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.459750235080719, |
|
"learning_rate": 9.786531531845087e-05, |
|
"loss": 4.7518, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6577441692352295, |
|
"learning_rate": 9.785371377126854e-05, |
|
"loss": 4.7512, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.284149408340454, |
|
"learning_rate": 9.784211222408621e-05, |
|
"loss": 4.752, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.703396797180176, |
|
"learning_rate": 9.783051067690387e-05, |
|
"loss": 4.754, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5742882490158081, |
|
"learning_rate": 9.781890912972154e-05, |
|
"loss": 4.7521, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5588614344596863, |
|
"learning_rate": 9.780730758253921e-05, |
|
"loss": 4.7541, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.6749471426010132, |
|
"learning_rate": 9.779570603535688e-05, |
|
"loss": 4.7521, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4725801944732666, |
|
"learning_rate": 9.778410448817454e-05, |
|
"loss": 4.7503, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7287809252738953, |
|
"learning_rate": 9.777250294099222e-05, |
|
"loss": 4.7492, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.7486460208892822, |
|
"learning_rate": 9.776090139380988e-05, |
|
"loss": 4.7488, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.859056234359741, |
|
"learning_rate": 9.774929984662755e-05, |
|
"loss": 4.7492, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5980411171913147, |
|
"learning_rate": 9.773769829944522e-05, |
|
"loss": 4.7534, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6939360499382019, |
|
"learning_rate": 9.772609675226289e-05, |
|
"loss": 4.7484, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.793490886688232, |
|
"learning_rate": 9.771449520508056e-05, |
|
"loss": 4.751, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7629129886627197, |
|
"learning_rate": 9.770289365789822e-05, |
|
"loss": 4.7474, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9731252193450928, |
|
"learning_rate": 9.769129211071589e-05, |
|
"loss": 4.7499, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.9709353446960449, |
|
"learning_rate": 9.767969056353356e-05, |
|
"loss": 4.7488, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.8048213720321655, |
|
"learning_rate": 9.766808901635123e-05, |
|
"loss": 4.7483, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6946632862091064, |
|
"learning_rate": 9.765648746916889e-05, |
|
"loss": 4.748, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 13.102263450622559, |
|
"learning_rate": 9.764488592198656e-05, |
|
"loss": 4.7483, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7083731889724731, |
|
"learning_rate": 9.763328437480423e-05, |
|
"loss": 4.7495, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5328574180603027, |
|
"learning_rate": 9.76216828276219e-05, |
|
"loss": 4.7492, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.2578125, |
|
"learning_rate": 9.761008128043957e-05, |
|
"loss": 4.7474, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.049394607543945, |
|
"learning_rate": 9.759847973325723e-05, |
|
"loss": 4.7484, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5719186663627625, |
|
"learning_rate": 9.758687818607491e-05, |
|
"loss": 4.7476, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.7516430616378784, |
|
"learning_rate": 9.757527663889257e-05, |
|
"loss": 4.7468, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7269846200942993, |
|
"learning_rate": 9.756367509171024e-05, |
|
"loss": 4.7482, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.0633318424224854, |
|
"learning_rate": 9.75520735445279e-05, |
|
"loss": 4.7469, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5097702145576477, |
|
"learning_rate": 9.754047199734558e-05, |
|
"loss": 4.7482, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5308384895324707, |
|
"learning_rate": 9.752887045016323e-05, |
|
"loss": 4.7444, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5017550587654114, |
|
"learning_rate": 9.75172689029809e-05, |
|
"loss": 4.7474, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.526594340801239, |
|
"learning_rate": 9.750566735579858e-05, |
|
"loss": 4.7463, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.675493001937866, |
|
"learning_rate": 9.749406580861625e-05, |
|
"loss": 4.7475, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.905905723571777, |
|
"learning_rate": 9.748246426143392e-05, |
|
"loss": 4.7499, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.458377838134766, |
|
"learning_rate": 9.747086271425157e-05, |
|
"loss": 4.7474, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.3677464723587036, |
|
"learning_rate": 9.745926116706924e-05, |
|
"loss": 4.7502, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.98462200164795, |
|
"learning_rate": 9.744765961988691e-05, |
|
"loss": 4.7486, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.801756858825684, |
|
"learning_rate": 9.743605807270458e-05, |
|
"loss": 4.7458, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5047688484191895, |
|
"learning_rate": 9.742445652552224e-05, |
|
"loss": 4.7574, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5461822152137756, |
|
"learning_rate": 9.741285497833991e-05, |
|
"loss": 4.7441, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.560943603515625, |
|
"learning_rate": 9.740125343115758e-05, |
|
"loss": 4.7447, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5233502984046936, |
|
"learning_rate": 9.738965188397525e-05, |
|
"loss": 4.7474, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5336612462997437, |
|
"learning_rate": 9.737805033679291e-05, |
|
"loss": 4.7425, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.945470094680786, |
|
"learning_rate": 9.736644878961059e-05, |
|
"loss": 4.7482, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.4918281137943268, |
|
"learning_rate": 9.735484724242826e-05, |
|
"loss": 4.7469, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.591059923171997, |
|
"learning_rate": 9.734324569524592e-05, |
|
"loss": 4.7458, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5464999079704285, |
|
"learning_rate": 9.733164414806359e-05, |
|
"loss": 4.7443, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.337867259979248, |
|
"learning_rate": 9.732004260088126e-05, |
|
"loss": 4.7453, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.4762401878833771, |
|
"learning_rate": 9.730844105369893e-05, |
|
"loss": 4.7418, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.0473411083221436, |
|
"learning_rate": 9.729683950651659e-05, |
|
"loss": 4.7454, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.72366863489151, |
|
"learning_rate": 9.728523795933426e-05, |
|
"loss": 4.7425, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5242981314659119, |
|
"learning_rate": 9.727363641215193e-05, |
|
"loss": 4.7461, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6705228090286255, |
|
"learning_rate": 9.72620348649696e-05, |
|
"loss": 4.7495, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.549325704574585, |
|
"learning_rate": 9.725043331778726e-05, |
|
"loss": 4.7436, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.599336564540863, |
|
"learning_rate": 9.723883177060493e-05, |
|
"loss": 4.743, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.103011965751648, |
|
"learning_rate": 9.722723022342261e-05, |
|
"loss": 4.7437, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.4058070182800293, |
|
"learning_rate": 9.721562867624027e-05, |
|
"loss": 4.7466, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.0680614709854126, |
|
"learning_rate": 9.720402712905794e-05, |
|
"loss": 4.7424, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5411515235900879, |
|
"learning_rate": 9.71924255818756e-05, |
|
"loss": 4.7408, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 8.111778259277344, |
|
"learning_rate": 9.718082403469328e-05, |
|
"loss": 4.7454, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.44278204441070557, |
|
"learning_rate": 9.716922248751094e-05, |
|
"loss": 4.7599, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6122348308563232, |
|
"learning_rate": 9.71576209403286e-05, |
|
"loss": 4.7427, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5633386373519897, |
|
"learning_rate": 9.714601939314628e-05, |
|
"loss": 4.746, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.8872265219688416, |
|
"learning_rate": 9.713441784596395e-05, |
|
"loss": 4.7439, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.6827236413955688, |
|
"learning_rate": 9.71228162987816e-05, |
|
"loss": 4.7413, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6328549385070801, |
|
"learning_rate": 9.711121475159927e-05, |
|
"loss": 4.7408, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.4887780249118805, |
|
"learning_rate": 9.709961320441694e-05, |
|
"loss": 4.7425, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9315319061279297, |
|
"learning_rate": 9.708801165723462e-05, |
|
"loss": 4.7431, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6957463026046753, |
|
"learning_rate": 9.707641011005229e-05, |
|
"loss": 4.7414, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5213463306427002, |
|
"learning_rate": 9.706480856286994e-05, |
|
"loss": 4.7429, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.486128032207489, |
|
"learning_rate": 9.705320701568761e-05, |
|
"loss": 4.7405, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.561840295791626, |
|
"learning_rate": 9.704160546850528e-05, |
|
"loss": 4.741, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.198677062988281, |
|
"learning_rate": 9.703000392132295e-05, |
|
"loss": 4.7447, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.6826071739196777, |
|
"learning_rate": 9.701840237414061e-05, |
|
"loss": 4.7435, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.7129253149032593, |
|
"learning_rate": 9.70068008269583e-05, |
|
"loss": 4.7432, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.101804733276367, |
|
"learning_rate": 9.699519927977595e-05, |
|
"loss": 4.7518, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5510717034339905, |
|
"learning_rate": 9.698359773259362e-05, |
|
"loss": 4.7417, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.45920300483703613, |
|
"learning_rate": 9.697199618541129e-05, |
|
"loss": 4.7421, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5336456894874573, |
|
"learning_rate": 9.696039463822896e-05, |
|
"loss": 4.7411, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5019949078559875, |
|
"learning_rate": 9.694879309104663e-05, |
|
"loss": 4.7436, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 33.536495208740234, |
|
"learning_rate": 9.693719154386429e-05, |
|
"loss": 4.7436, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.4645074605941772, |
|
"learning_rate": 9.692558999668196e-05, |
|
"loss": 4.7447, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.655017614364624, |
|
"learning_rate": 9.691398844949963e-05, |
|
"loss": 4.7404, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.49820154905319214, |
|
"learning_rate": 9.69023869023173e-05, |
|
"loss": 4.7432, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6071832180023193, |
|
"learning_rate": 9.689078535513496e-05, |
|
"loss": 4.7436, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5458092093467712, |
|
"learning_rate": 9.687918380795263e-05, |
|
"loss": 4.7438, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5005242824554443, |
|
"learning_rate": 9.686758226077031e-05, |
|
"loss": 4.741, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5511128306388855, |
|
"learning_rate": 9.685598071358797e-05, |
|
"loss": 4.743, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 17.676786422729492, |
|
"learning_rate": 9.684437916640564e-05, |
|
"loss": 4.7423, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.3633731603622437, |
|
"learning_rate": 9.68327776192233e-05, |
|
"loss": 4.7408, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.5432199239730835, |
|
"learning_rate": 9.682117607204098e-05, |
|
"loss": 4.7391, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.02588415145874, |
|
"learning_rate": 9.680957452485864e-05, |
|
"loss": 4.7416, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.1455700397491455, |
|
"learning_rate": 9.679797297767631e-05, |
|
"loss": 4.7398, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.6886168718338013, |
|
"learning_rate": 9.678637143049398e-05, |
|
"loss": 4.7418, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5645326375961304, |
|
"learning_rate": 9.677476988331165e-05, |
|
"loss": 4.7381, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.7613911628723145, |
|
"learning_rate": 9.67631683361293e-05, |
|
"loss": 4.7404, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5324685573577881, |
|
"learning_rate": 9.675156678894698e-05, |
|
"loss": 4.7378, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.839243412017822, |
|
"learning_rate": 9.673996524176465e-05, |
|
"loss": 4.7395, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.458783894777298, |
|
"learning_rate": 9.672836369458232e-05, |
|
"loss": 4.7422, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.9166305065155029, |
|
"learning_rate": 9.671676214739999e-05, |
|
"loss": 4.7428, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.108514428138733, |
|
"learning_rate": 9.670516060021764e-05, |
|
"loss": 4.7403, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.5330324172973633, |
|
"learning_rate": 9.669355905303531e-05, |
|
"loss": 4.7366, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.4888221025466919, |
|
"learning_rate": 9.668195750585298e-05, |
|
"loss": 4.7384, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.600286602973938, |
|
"learning_rate": 9.667035595867066e-05, |
|
"loss": 4.7367, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.486511468887329, |
|
"learning_rate": 9.665875441148831e-05, |
|
"loss": 4.7391, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.9454842209815979, |
|
"learning_rate": 9.6647152864306e-05, |
|
"loss": 4.7384, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.6840468645095825, |
|
"learning_rate": 9.663555131712365e-05, |
|
"loss": 4.7379, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.795483410358429, |
|
"learning_rate": 9.662394976994132e-05, |
|
"loss": 4.7393, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.8666725158691406, |
|
"learning_rate": 9.6612348222759e-05, |
|
"loss": 4.7389, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.16463565826416, |
|
"learning_rate": 9.660074667557666e-05, |
|
"loss": 4.7395, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6866464614868164, |
|
"learning_rate": 9.658914512839433e-05, |
|
"loss": 4.7401, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 23.52367401123047, |
|
"learning_rate": 9.657754358121199e-05, |
|
"loss": 4.7562, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.992708683013916, |
|
"learning_rate": 9.656594203402966e-05, |
|
"loss": 4.7743, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.8701677322387695, |
|
"learning_rate": 9.655434048684733e-05, |
|
"loss": 4.7586, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.0893425941467285, |
|
"learning_rate": 9.6542738939665e-05, |
|
"loss": 4.7572, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.048985958099365, |
|
"learning_rate": 9.653113739248266e-05, |
|
"loss": 4.7514, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.577606678009033, |
|
"learning_rate": 9.651953584530033e-05, |
|
"loss": 4.7529, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.831415176391602, |
|
"learning_rate": 9.6507934298118e-05, |
|
"loss": 4.7512, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.870159149169922, |
|
"learning_rate": 9.649633275093567e-05, |
|
"loss": 4.7494, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.836753845214844, |
|
"learning_rate": 9.648473120375334e-05, |
|
"loss": 4.7493, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.718664169311523, |
|
"learning_rate": 9.6473129656571e-05, |
|
"loss": 4.7511, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.49000358581543, |
|
"learning_rate": 9.646152810938868e-05, |
|
"loss": 4.7494, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.819366931915283, |
|
"learning_rate": 9.644992656220634e-05, |
|
"loss": 4.7504, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7323962450027466, |
|
"learning_rate": 9.643832501502401e-05, |
|
"loss": 4.7461, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.8463295102119446, |
|
"learning_rate": 9.642672346784168e-05, |
|
"loss": 4.7376, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5474389791488647, |
|
"learning_rate": 9.641512192065935e-05, |
|
"loss": 4.7394, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6245602965354919, |
|
"learning_rate": 9.640352037347701e-05, |
|
"loss": 4.7363, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.6616430282592773, |
|
"learning_rate": 9.639191882629468e-05, |
|
"loss": 4.7375, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.0729475021362305, |
|
"learning_rate": 9.638031727911235e-05, |
|
"loss": 4.7393, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4616137146949768, |
|
"learning_rate": 9.636871573193002e-05, |
|
"loss": 4.7374, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.6392802000045776, |
|
"learning_rate": 9.635711418474769e-05, |
|
"loss": 4.7383, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5004580616950989, |
|
"learning_rate": 9.634551263756535e-05, |
|
"loss": 4.7378, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5481105446815491, |
|
"learning_rate": 9.633391109038302e-05, |
|
"loss": 4.7382, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6281868815422058, |
|
"learning_rate": 9.632230954320069e-05, |
|
"loss": 4.7383, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.664202868938446, |
|
"learning_rate": 9.631070799601836e-05, |
|
"loss": 4.739, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6041770577430725, |
|
"learning_rate": 9.629910644883601e-05, |
|
"loss": 4.7359, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.237818717956543, |
|
"learning_rate": 9.62875049016537e-05, |
|
"loss": 4.7384, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4940323829650879, |
|
"learning_rate": 9.627590335447135e-05, |
|
"loss": 4.7363, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5114046335220337, |
|
"learning_rate": 9.626430180728902e-05, |
|
"loss": 4.7383, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.9840266704559326, |
|
"learning_rate": 9.625270026010668e-05, |
|
"loss": 4.7378, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5324087738990784, |
|
"learning_rate": 9.624109871292437e-05, |
|
"loss": 4.7398, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.636378765106201, |
|
"learning_rate": 9.622949716574204e-05, |
|
"loss": 4.7336, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4898914396762848, |
|
"learning_rate": 9.621789561855969e-05, |
|
"loss": 4.7374, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6327505111694336, |
|
"learning_rate": 9.620629407137736e-05, |
|
"loss": 4.7321, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.219440460205078, |
|
"learning_rate": 9.619469252419503e-05, |
|
"loss": 4.737, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5579794049263, |
|
"learning_rate": 9.61830909770127e-05, |
|
"loss": 4.7367, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.46085384488105774, |
|
"learning_rate": 9.617148942983036e-05, |
|
"loss": 4.7379, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5021482110023499, |
|
"learning_rate": 9.615988788264803e-05, |
|
"loss": 4.7368, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 15.20097541809082, |
|
"learning_rate": 9.61482863354657e-05, |
|
"loss": 4.736, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7271726727485657, |
|
"learning_rate": 9.613668478828337e-05, |
|
"loss": 4.7366, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.48880913853645325, |
|
"learning_rate": 9.612508324110103e-05, |
|
"loss": 4.7366, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5590758323669434, |
|
"learning_rate": 9.61134816939187e-05, |
|
"loss": 4.7391, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4674455225467682, |
|
"learning_rate": 9.610188014673638e-05, |
|
"loss": 4.7379, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.517918050289154, |
|
"learning_rate": 9.609027859955404e-05, |
|
"loss": 4.7367, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.2398717403411865, |
|
"learning_rate": 9.607867705237171e-05, |
|
"loss": 4.738, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5215381979942322, |
|
"learning_rate": 9.606707550518937e-05, |
|
"loss": 4.7352, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.9021263122558594, |
|
"learning_rate": 9.605547395800705e-05, |
|
"loss": 4.7348, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.46950528025627136, |
|
"learning_rate": 9.604387241082471e-05, |
|
"loss": 4.7348, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5117696523666382, |
|
"learning_rate": 9.603227086364238e-05, |
|
"loss": 4.7371, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7093480825424194, |
|
"learning_rate": 9.602066931646005e-05, |
|
"loss": 4.7383, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4637995958328247, |
|
"learning_rate": 9.600906776927772e-05, |
|
"loss": 4.7371, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5002011060714722, |
|
"learning_rate": 9.599746622209538e-05, |
|
"loss": 4.7359, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7651986479759216, |
|
"learning_rate": 9.598586467491305e-05, |
|
"loss": 4.7303, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.2940102815628052, |
|
"learning_rate": 9.597426312773072e-05, |
|
"loss": 4.7331, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5338752269744873, |
|
"learning_rate": 9.596266158054839e-05, |
|
"loss": 4.7361, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.049210786819458, |
|
"learning_rate": 9.595106003336606e-05, |
|
"loss": 4.7347, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.3599870204925537, |
|
"learning_rate": 9.593945848618372e-05, |
|
"loss": 4.7375, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5492005348205566, |
|
"learning_rate": 9.59278569390014e-05, |
|
"loss": 4.7352, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5505391955375671, |
|
"learning_rate": 9.591625539181906e-05, |
|
"loss": 4.7332, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6726617813110352, |
|
"learning_rate": 9.590465384463673e-05, |
|
"loss": 4.7315, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.558929979801178, |
|
"learning_rate": 9.589305229745438e-05, |
|
"loss": 4.732, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.7398250102996826, |
|
"learning_rate": 9.588145075027207e-05, |
|
"loss": 4.7329, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5553580522537231, |
|
"learning_rate": 9.586984920308972e-05, |
|
"loss": 4.7355, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5289317965507507, |
|
"learning_rate": 9.58582476559074e-05, |
|
"loss": 4.7363, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.049525499343872, |
|
"learning_rate": 9.584664610872507e-05, |
|
"loss": 4.7342, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4871656000614166, |
|
"learning_rate": 9.583504456154274e-05, |
|
"loss": 4.7356, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.0833821296691895, |
|
"learning_rate": 9.58234430143604e-05, |
|
"loss": 4.7345, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.624002456665039, |
|
"learning_rate": 9.581184146717806e-05, |
|
"loss": 4.7323, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6378651261329651, |
|
"learning_rate": 9.580023991999573e-05, |
|
"loss": 4.7349, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1615560054779053, |
|
"learning_rate": 9.57886383728134e-05, |
|
"loss": 4.7345, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5154897570610046, |
|
"learning_rate": 9.577703682563107e-05, |
|
"loss": 4.7357, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.502463340759277, |
|
"learning_rate": 9.576543527844873e-05, |
|
"loss": 4.7354, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5349368453025818, |
|
"learning_rate": 9.57538337312664e-05, |
|
"loss": 4.7342, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.0265626907348633, |
|
"learning_rate": 9.574223218408407e-05, |
|
"loss": 4.7327, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.8190938830375671, |
|
"learning_rate": 9.573063063690174e-05, |
|
"loss": 4.732, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 9.420807838439941, |
|
"learning_rate": 9.571902908971941e-05, |
|
"loss": 4.7322, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5019901990890503, |
|
"learning_rate": 9.570742754253707e-05, |
|
"loss": 4.7314, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.35811448097229, |
|
"learning_rate": 9.569582599535475e-05, |
|
"loss": 4.7314, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.828240871429443, |
|
"learning_rate": 9.568422444817241e-05, |
|
"loss": 4.7329, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4542797803878784, |
|
"learning_rate": 9.567262290099008e-05, |
|
"loss": 4.7297, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4769699275493622, |
|
"learning_rate": 9.566102135380775e-05, |
|
"loss": 4.7334, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6346319317817688, |
|
"learning_rate": 9.564941980662542e-05, |
|
"loss": 4.7324, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.621337890625, |
|
"learning_rate": 9.563781825944308e-05, |
|
"loss": 4.7335, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.49926477670669556, |
|
"learning_rate": 9.562621671226075e-05, |
|
"loss": 4.7334, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.4097819328308105, |
|
"learning_rate": 9.561461516507842e-05, |
|
"loss": 4.7316, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4786432087421417, |
|
"learning_rate": 9.560301361789609e-05, |
|
"loss": 4.7323, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7441820502281189, |
|
"learning_rate": 9.559141207071376e-05, |
|
"loss": 4.7322, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5333019495010376, |
|
"learning_rate": 9.557981052353142e-05, |
|
"loss": 4.7326, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.9219884872436523, |
|
"learning_rate": 9.556820897634909e-05, |
|
"loss": 4.732, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7354302406311035, |
|
"learning_rate": 9.555660742916676e-05, |
|
"loss": 4.7322, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.43798136711120605, |
|
"learning_rate": 9.554500588198443e-05, |
|
"loss": 4.7319, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5193877220153809, |
|
"learning_rate": 9.553340433480208e-05, |
|
"loss": 4.7313, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6578642725944519, |
|
"learning_rate": 9.552180278761977e-05, |
|
"loss": 4.7301, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.481916606426239, |
|
"learning_rate": 9.551020124043743e-05, |
|
"loss": 4.7336, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.495345801115036, |
|
"learning_rate": 9.54985996932551e-05, |
|
"loss": 4.7292, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2032209634780884, |
|
"learning_rate": 9.548699814607277e-05, |
|
"loss": 4.731, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.45609620213508606, |
|
"learning_rate": 9.547539659889044e-05, |
|
"loss": 4.7309, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.46040889620780945, |
|
"learning_rate": 9.546379505170811e-05, |
|
"loss": 4.7319, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.8751170635223389, |
|
"learning_rate": 9.545219350452576e-05, |
|
"loss": 4.7304, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4785304665565491, |
|
"learning_rate": 9.544059195734343e-05, |
|
"loss": 4.729, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.840359210968018, |
|
"learning_rate": 9.54289904101611e-05, |
|
"loss": 4.7368, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5387877225875854, |
|
"learning_rate": 9.541738886297878e-05, |
|
"loss": 4.7325, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.0035640001296997, |
|
"learning_rate": 9.540578731579643e-05, |
|
"loss": 4.7307, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5036232471466064, |
|
"learning_rate": 9.53941857686141e-05, |
|
"loss": 4.7324, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.8626024127006531, |
|
"learning_rate": 9.538258422143177e-05, |
|
"loss": 4.7286, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.899303436279297, |
|
"learning_rate": 9.537098267424944e-05, |
|
"loss": 4.7311, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5646871328353882, |
|
"learning_rate": 9.535938112706711e-05, |
|
"loss": 4.7303, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.9518368244171143, |
|
"learning_rate": 9.534777957988477e-05, |
|
"loss": 4.7314, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5652722120285034, |
|
"learning_rate": 9.533617803270245e-05, |
|
"loss": 4.7304, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5541896224021912, |
|
"learning_rate": 9.532457648552011e-05, |
|
"loss": 4.73, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.49006637930870056, |
|
"learning_rate": 9.531297493833778e-05, |
|
"loss": 4.7311, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5993065237998962, |
|
"learning_rate": 9.530137339115545e-05, |
|
"loss": 4.7309, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5642876029014587, |
|
"learning_rate": 9.528977184397312e-05, |
|
"loss": 4.7299, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 7.888554096221924, |
|
"learning_rate": 9.527817029679078e-05, |
|
"loss": 4.7435, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.3725004196167, |
|
"learning_rate": 9.526656874960845e-05, |
|
"loss": 4.7401, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.139461040496826, |
|
"learning_rate": 9.525496720242612e-05, |
|
"loss": 4.7359, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4835149645805359, |
|
"learning_rate": 9.524336565524379e-05, |
|
"loss": 4.7327, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4717291593551636, |
|
"learning_rate": 9.523176410806146e-05, |
|
"loss": 4.7305, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.48854538798332214, |
|
"learning_rate": 9.522016256087912e-05, |
|
"loss": 4.7265, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.48703619837760925, |
|
"learning_rate": 9.520856101369679e-05, |
|
"loss": 4.7299, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.49546846747398376, |
|
"learning_rate": 9.519695946651446e-05, |
|
"loss": 4.7297, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4952056109905243, |
|
"learning_rate": 9.518535791933213e-05, |
|
"loss": 4.7326, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.376193046569824, |
|
"learning_rate": 9.517375637214979e-05, |
|
"loss": 4.7306, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4306395351886749, |
|
"learning_rate": 9.516215482496747e-05, |
|
"loss": 4.7286, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.9443186521530151, |
|
"learning_rate": 9.515055327778513e-05, |
|
"loss": 4.7308, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.47867128252983093, |
|
"learning_rate": 9.51389517306028e-05, |
|
"loss": 4.7289, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2091776132583618, |
|
"learning_rate": 9.512735018342045e-05, |
|
"loss": 4.7378, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4395917057991028, |
|
"learning_rate": 9.511574863623814e-05, |
|
"loss": 4.7328, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.723639965057373, |
|
"learning_rate": 9.510414708905581e-05, |
|
"loss": 4.7288, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.487166881561279, |
|
"learning_rate": 9.509254554187347e-05, |
|
"loss": 4.7298, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0608367919921875, |
|
"learning_rate": 9.508094399469114e-05, |
|
"loss": 4.7312, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.465772807598114, |
|
"learning_rate": 9.50693424475088e-05, |
|
"loss": 4.7308, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4866921007633209, |
|
"learning_rate": 9.505774090032648e-05, |
|
"loss": 4.7295, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.1015464067459106, |
|
"learning_rate": 9.504613935314413e-05, |
|
"loss": 4.7296, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6402974128723145, |
|
"learning_rate": 9.50345378059618e-05, |
|
"loss": 4.7285, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.76873517036438, |
|
"learning_rate": 9.502293625877947e-05, |
|
"loss": 4.7306, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.7945021390914917, |
|
"learning_rate": 9.501133471159715e-05, |
|
"loss": 4.729, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3986837565898895, |
|
"learning_rate": 9.49997331644148e-05, |
|
"loss": 4.7306, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4017668068408966, |
|
"learning_rate": 9.498813161723247e-05, |
|
"loss": 4.7277, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.6943633556365967, |
|
"learning_rate": 9.497653007005016e-05, |
|
"loss": 4.7301, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.686843156814575, |
|
"learning_rate": 9.496492852286781e-05, |
|
"loss": 4.728, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.7867078185081482, |
|
"learning_rate": 9.495332697568548e-05, |
|
"loss": 4.7272, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.0648784637451172, |
|
"learning_rate": 9.494172542850315e-05, |
|
"loss": 4.7263, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.6653295755386353, |
|
"learning_rate": 9.493012388132082e-05, |
|
"loss": 4.7309, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.9347316026687622, |
|
"learning_rate": 9.491852233413848e-05, |
|
"loss": 4.7303, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.3211824893951416, |
|
"learning_rate": 9.490692078695615e-05, |
|
"loss": 4.7294, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.8230929970741272, |
|
"learning_rate": 9.489531923977382e-05, |
|
"loss": 4.7282, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.3526966571807861, |
|
"learning_rate": 9.488371769259149e-05, |
|
"loss": 4.7267, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4441579282283783, |
|
"learning_rate": 9.487211614540915e-05, |
|
"loss": 4.7265, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.511246204376221, |
|
"learning_rate": 9.486051459822682e-05, |
|
"loss": 4.7302, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5045779943466187, |
|
"learning_rate": 9.484891305104449e-05, |
|
"loss": 4.73, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4756096601486206, |
|
"learning_rate": 9.483731150386216e-05, |
|
"loss": 4.7291, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4292340576648712, |
|
"learning_rate": 9.482570995667983e-05, |
|
"loss": 4.7272, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6878976821899414, |
|
"learning_rate": 9.481410840949749e-05, |
|
"loss": 4.726, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.502358078956604, |
|
"learning_rate": 9.480250686231517e-05, |
|
"loss": 4.7266, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6721329092979431, |
|
"learning_rate": 9.479090531513283e-05, |
|
"loss": 4.7294, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.438326895236969, |
|
"learning_rate": 9.47793037679505e-05, |
|
"loss": 4.728, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.722699761390686, |
|
"learning_rate": 9.476770222076816e-05, |
|
"loss": 4.7298, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 7.889718532562256, |
|
"learning_rate": 9.475610067358584e-05, |
|
"loss": 4.724, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 25.698381423950195, |
|
"learning_rate": 9.47444991264035e-05, |
|
"loss": 4.7291, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5010745525360107, |
|
"learning_rate": 9.473289757922117e-05, |
|
"loss": 4.7344, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.237755537033081, |
|
"learning_rate": 9.472129603203884e-05, |
|
"loss": 4.7266, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.9018293619155884, |
|
"learning_rate": 9.470969448485651e-05, |
|
"loss": 4.728, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5156495571136475, |
|
"learning_rate": 9.469809293767418e-05, |
|
"loss": 4.7259, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.2465362548828125, |
|
"learning_rate": 9.468649139049184e-05, |
|
"loss": 4.7269, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.610849916934967, |
|
"learning_rate": 9.46748898433095e-05, |
|
"loss": 4.7292, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.3922882080078125, |
|
"learning_rate": 9.466328829612718e-05, |
|
"loss": 4.7283, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.42459169030189514, |
|
"learning_rate": 9.465168674894485e-05, |
|
"loss": 4.727, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4948953092098236, |
|
"learning_rate": 9.46400852017625e-05, |
|
"loss": 4.7247, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4495919346809387, |
|
"learning_rate": 9.462848365458017e-05, |
|
"loss": 4.7275, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.1093862056732178, |
|
"learning_rate": 9.461688210739784e-05, |
|
"loss": 4.7297, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4342365860939026, |
|
"learning_rate": 9.460528056021551e-05, |
|
"loss": 4.7306, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 25.432937622070312, |
|
"learning_rate": 9.459367901303319e-05, |
|
"loss": 4.732, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0753910541534424, |
|
"learning_rate": 9.458207746585086e-05, |
|
"loss": 4.7368, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.888164520263672, |
|
"learning_rate": 9.457047591866853e-05, |
|
"loss": 4.7355, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5124868750572205, |
|
"learning_rate": 9.455887437148618e-05, |
|
"loss": 4.7255, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4563619792461395, |
|
"learning_rate": 9.454727282430385e-05, |
|
"loss": 4.7266, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6962786316871643, |
|
"learning_rate": 9.453567127712152e-05, |
|
"loss": 4.7266, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6182125210762024, |
|
"learning_rate": 9.45240697299392e-05, |
|
"loss": 4.7262, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.44259679317474365, |
|
"learning_rate": 9.451246818275685e-05, |
|
"loss": 4.7289, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.4451597929000854, |
|
"learning_rate": 9.450086663557452e-05, |
|
"loss": 4.7253, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.354624271392822, |
|
"learning_rate": 9.448926508839219e-05, |
|
"loss": 4.7252, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.49475088715553284, |
|
"learning_rate": 9.447766354120986e-05, |
|
"loss": 4.7261, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.40777587890625, |
|
"learning_rate": 9.446606199402753e-05, |
|
"loss": 4.7274, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.0609936714172363, |
|
"learning_rate": 9.445446044684519e-05, |
|
"loss": 4.7283, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.6895414590835571, |
|
"learning_rate": 9.444285889966287e-05, |
|
"loss": 4.7247, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4589858949184418, |
|
"learning_rate": 9.443125735248053e-05, |
|
"loss": 4.7275, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.8931922912597656, |
|
"learning_rate": 9.44196558052982e-05, |
|
"loss": 4.7245, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5537588000297546, |
|
"learning_rate": 9.440805425811586e-05, |
|
"loss": 4.7283, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.216842174530029, |
|
"learning_rate": 9.439645271093354e-05, |
|
"loss": 4.7238, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4659540355205536, |
|
"learning_rate": 9.43848511637512e-05, |
|
"loss": 4.7269, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4347868859767914, |
|
"learning_rate": 9.437324961656887e-05, |
|
"loss": 4.7272, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4908686578273773, |
|
"learning_rate": 9.436164806938654e-05, |
|
"loss": 4.7217, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4566729664802551, |
|
"learning_rate": 9.435004652220421e-05, |
|
"loss": 4.7259, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.6793931126594543, |
|
"learning_rate": 9.433844497502188e-05, |
|
"loss": 4.7272, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4818676710128784, |
|
"learning_rate": 9.432684342783954e-05, |
|
"loss": 4.7261, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.475321888923645, |
|
"learning_rate": 9.431524188065721e-05, |
|
"loss": 4.7261, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4802456200122833, |
|
"learning_rate": 9.430364033347488e-05, |
|
"loss": 4.7276, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.9790329933166504, |
|
"learning_rate": 9.429203878629255e-05, |
|
"loss": 4.7238, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4709641933441162, |
|
"learning_rate": 9.42804372391102e-05, |
|
"loss": 4.7238, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.43033114075660706, |
|
"learning_rate": 9.426883569192788e-05, |
|
"loss": 4.7242, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4878010153770447, |
|
"learning_rate": 9.425723414474555e-05, |
|
"loss": 4.7291, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.6672825813293457, |
|
"learning_rate": 9.424563259756322e-05, |
|
"loss": 4.7276, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.705127239227295, |
|
"learning_rate": 9.423403105038089e-05, |
|
"loss": 4.7258, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4465647339820862, |
|
"learning_rate": 9.422242950319854e-05, |
|
"loss": 4.7254, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.3617794513702393, |
|
"learning_rate": 9.421082795601623e-05, |
|
"loss": 4.7275, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.3789376020431519, |
|
"learning_rate": 9.419922640883388e-05, |
|
"loss": 4.727, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.8773742318153381, |
|
"learning_rate": 9.418762486165156e-05, |
|
"loss": 4.7279, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.47493115067481995, |
|
"learning_rate": 9.417602331446923e-05, |
|
"loss": 4.7244, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4560215175151825, |
|
"learning_rate": 9.41644217672869e-05, |
|
"loss": 4.7258, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5064975023269653, |
|
"learning_rate": 9.415282022010455e-05, |
|
"loss": 4.7246, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.276395320892334, |
|
"learning_rate": 9.414121867292222e-05, |
|
"loss": 4.73, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.6075153350830078, |
|
"learning_rate": 9.41296171257399e-05, |
|
"loss": 4.726, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.9961352348327637, |
|
"learning_rate": 9.411801557855756e-05, |
|
"loss": 4.7256, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.469164103269577, |
|
"learning_rate": 9.410641403137523e-05, |
|
"loss": 4.7264, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.9661677479743958, |
|
"learning_rate": 9.409481248419289e-05, |
|
"loss": 4.7256, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.348484039306641, |
|
"learning_rate": 9.408321093701058e-05, |
|
"loss": 4.7207, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4262404143810272, |
|
"learning_rate": 9.407160938982823e-05, |
|
"loss": 4.7248, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.119818687438965, |
|
"learning_rate": 9.40600078426459e-05, |
|
"loss": 4.7234, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.43322816491127014, |
|
"learning_rate": 9.404840629546356e-05, |
|
"loss": 4.7262, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.43069151043891907, |
|
"learning_rate": 9.403680474828124e-05, |
|
"loss": 4.725, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5061689019203186, |
|
"learning_rate": 9.40252032010989e-05, |
|
"loss": 4.7281, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.387876272201538, |
|
"learning_rate": 9.401360165391657e-05, |
|
"loss": 4.7265, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.48391860723495483, |
|
"learning_rate": 9.400200010673423e-05, |
|
"loss": 4.7293, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.8254903554916382, |
|
"learning_rate": 9.399039855955191e-05, |
|
"loss": 4.7232, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.460291862487793, |
|
"learning_rate": 9.397879701236958e-05, |
|
"loss": 4.7237, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.3960816860198975, |
|
"learning_rate": 9.396719546518724e-05, |
|
"loss": 4.7254, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.771088600158691, |
|
"learning_rate": 9.395559391800491e-05, |
|
"loss": 4.7251, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.49326056241989136, |
|
"learning_rate": 9.394399237082258e-05, |
|
"loss": 4.7239, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.45919495820999146, |
|
"learning_rate": 9.393239082364025e-05, |
|
"loss": 4.7262, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.7638592720031738, |
|
"learning_rate": 9.392078927645791e-05, |
|
"loss": 4.7253, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.0643978118896484, |
|
"learning_rate": 9.390918772927558e-05, |
|
"loss": 4.722, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4550251364707947, |
|
"learning_rate": 9.389758618209325e-05, |
|
"loss": 4.7239, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.7114538550376892, |
|
"learning_rate": 9.388598463491092e-05, |
|
"loss": 4.7239, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4556228220462799, |
|
"learning_rate": 9.387438308772857e-05, |
|
"loss": 4.725, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4266548752784729, |
|
"learning_rate": 9.386278154054625e-05, |
|
"loss": 4.7238, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.829122543334961, |
|
"learning_rate": 9.385117999336393e-05, |
|
"loss": 4.7246, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4608537554740906, |
|
"learning_rate": 9.383957844618159e-05, |
|
"loss": 4.723, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.6241915225982666, |
|
"learning_rate": 9.382797689899926e-05, |
|
"loss": 4.7239, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4583094120025635, |
|
"learning_rate": 9.381637535181693e-05, |
|
"loss": 4.722, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.0573890209197998, |
|
"learning_rate": 9.38047738046346e-05, |
|
"loss": 4.726, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.0593518018722534, |
|
"learning_rate": 9.379317225745225e-05, |
|
"loss": 4.7224, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 15.059647560119629, |
|
"learning_rate": 9.378157071026992e-05, |
|
"loss": 4.7271, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4176161587238312, |
|
"learning_rate": 9.37699691630876e-05, |
|
"loss": 4.7219, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.073505401611328, |
|
"learning_rate": 9.375836761590527e-05, |
|
"loss": 4.7261, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4442752003669739, |
|
"learning_rate": 9.374676606872292e-05, |
|
"loss": 4.7203, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.9274020195007324, |
|
"learning_rate": 9.373516452154059e-05, |
|
"loss": 4.7251, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5794118046760559, |
|
"learning_rate": 9.372356297435826e-05, |
|
"loss": 4.7229, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.49662327766418457, |
|
"learning_rate": 9.371196142717593e-05, |
|
"loss": 4.7233, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.1663075685501099, |
|
"learning_rate": 9.37003598799936e-05, |
|
"loss": 4.7245, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5266515612602234, |
|
"learning_rate": 9.368875833281126e-05, |
|
"loss": 4.7285, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.9966477751731873, |
|
"learning_rate": 9.367715678562894e-05, |
|
"loss": 4.7221, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.42000776529312134, |
|
"learning_rate": 9.36655552384466e-05, |
|
"loss": 4.7277, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4215773344039917, |
|
"learning_rate": 9.365395369126427e-05, |
|
"loss": 4.7204, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4621349573135376, |
|
"learning_rate": 9.364235214408193e-05, |
|
"loss": 4.7214, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4330434203147888, |
|
"learning_rate": 9.363075059689961e-05, |
|
"loss": 4.7244, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.45555201172828674, |
|
"learning_rate": 9.361914904971727e-05, |
|
"loss": 4.7201, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.45038706064224243, |
|
"learning_rate": 9.360754750253494e-05, |
|
"loss": 4.7254, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.2518788576126099, |
|
"learning_rate": 9.359594595535261e-05, |
|
"loss": 4.7237, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.44323647022247314, |
|
"learning_rate": 9.358434440817028e-05, |
|
"loss": 4.7269, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.72452974319458, |
|
"learning_rate": 9.357274286098795e-05, |
|
"loss": 4.7218, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.425994873046875, |
|
"learning_rate": 9.356114131380561e-05, |
|
"loss": 4.7237, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.400721549987793, |
|
"learning_rate": 9.354953976662328e-05, |
|
"loss": 4.7282, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.49108538031578064, |
|
"learning_rate": 9.353793821944095e-05, |
|
"loss": 4.7187, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.47107619047164917, |
|
"learning_rate": 9.352633667225862e-05, |
|
"loss": 4.7217, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.43458929657936096, |
|
"learning_rate": 9.351473512507628e-05, |
|
"loss": 4.72, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4294680655002594, |
|
"learning_rate": 9.350313357789395e-05, |
|
"loss": 4.7232, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.8897697329521179, |
|
"learning_rate": 9.349153203071162e-05, |
|
"loss": 4.721, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4212525188922882, |
|
"learning_rate": 9.347993048352929e-05, |
|
"loss": 4.7234, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.9617418646812439, |
|
"learning_rate": 9.346832893634696e-05, |
|
"loss": 4.7209, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.46716544032096863, |
|
"learning_rate": 9.345672738916463e-05, |
|
"loss": 4.7212, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.8855605721473694, |
|
"learning_rate": 9.34451258419823e-05, |
|
"loss": 4.7255, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.47797083854675293, |
|
"learning_rate": 9.343352429479996e-05, |
|
"loss": 4.7252, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5321144461631775, |
|
"learning_rate": 9.342192274761763e-05, |
|
"loss": 4.7234, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.9607291221618652, |
|
"learning_rate": 9.34103212004353e-05, |
|
"loss": 4.7244, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.45025816559791565, |
|
"learning_rate": 9.339871965325297e-05, |
|
"loss": 4.7214, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.46686848998069763, |
|
"learning_rate": 9.338711810607062e-05, |
|
"loss": 4.7222, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4851933419704437, |
|
"learning_rate": 9.33755165588883e-05, |
|
"loss": 4.7246, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.9222490787506104, |
|
"learning_rate": 9.336391501170596e-05, |
|
"loss": 4.7234, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.9428153038024902, |
|
"learning_rate": 9.335231346452364e-05, |
|
"loss": 4.7218, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5928464531898499, |
|
"learning_rate": 9.33407119173413e-05, |
|
"loss": 4.7201, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.47166940569877625, |
|
"learning_rate": 9.332911037015896e-05, |
|
"loss": 4.719, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5025938153266907, |
|
"learning_rate": 9.331750882297665e-05, |
|
"loss": 4.7216, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.40926048159599304, |
|
"learning_rate": 9.33059072757943e-05, |
|
"loss": 4.7214, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5747073292732239, |
|
"learning_rate": 9.329430572861197e-05, |
|
"loss": 4.7236, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4234209656715393, |
|
"learning_rate": 9.328270418142963e-05, |
|
"loss": 4.7218, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.47158312797546387, |
|
"learning_rate": 9.327110263424731e-05, |
|
"loss": 4.7212, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.48128268122673035, |
|
"learning_rate": 9.325950108706497e-05, |
|
"loss": 4.7217, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6812758445739746, |
|
"learning_rate": 9.324789953988264e-05, |
|
"loss": 4.7229, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4417859613895416, |
|
"learning_rate": 9.32362979927003e-05, |
|
"loss": 4.7203, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.41741812229156494, |
|
"learning_rate": 9.322469644551798e-05, |
|
"loss": 4.7253, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.9673991799354553, |
|
"learning_rate": 9.321309489833565e-05, |
|
"loss": 4.7205, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4821186363697052, |
|
"learning_rate": 9.320149335115331e-05, |
|
"loss": 4.7232, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.46331697702407837, |
|
"learning_rate": 9.318989180397098e-05, |
|
"loss": 4.7198, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4690409004688263, |
|
"learning_rate": 9.317829025678865e-05, |
|
"loss": 4.7235, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.2196197509765625, |
|
"learning_rate": 9.316668870960632e-05, |
|
"loss": 4.7232, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4771330952644348, |
|
"learning_rate": 9.315508716242398e-05, |
|
"loss": 4.7252, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5050112009048462, |
|
"learning_rate": 9.314348561524165e-05, |
|
"loss": 4.7231, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.69675874710083, |
|
"learning_rate": 9.313188406805932e-05, |
|
"loss": 4.7234, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6608816981315613, |
|
"learning_rate": 9.312028252087699e-05, |
|
"loss": 4.721, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.42843931913375854, |
|
"learning_rate": 9.310868097369466e-05, |
|
"loss": 4.7207, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5942014455795288, |
|
"learning_rate": 9.309707942651233e-05, |
|
"loss": 4.7235, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.165734767913818, |
|
"learning_rate": 9.308547787933e-05, |
|
"loss": 4.7201, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4411238729953766, |
|
"learning_rate": 9.307387633214766e-05, |
|
"loss": 4.7214, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.42770957946777344, |
|
"learning_rate": 9.306227478496533e-05, |
|
"loss": 4.7232, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.3136483430862427, |
|
"learning_rate": 9.3050673237783e-05, |
|
"loss": 4.7232, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.146456241607666, |
|
"learning_rate": 9.303907169060067e-05, |
|
"loss": 4.7231, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6570073366165161, |
|
"learning_rate": 9.302747014341833e-05, |
|
"loss": 4.7241, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.44109871983528137, |
|
"learning_rate": 9.3015868596236e-05, |
|
"loss": 4.7227, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.838292598724365, |
|
"learning_rate": 9.300426704905367e-05, |
|
"loss": 4.7241, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6892091631889343, |
|
"learning_rate": 9.299266550187134e-05, |
|
"loss": 4.7245, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4367011785507202, |
|
"learning_rate": 9.298106395468901e-05, |
|
"loss": 4.7186, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.45093631744384766, |
|
"learning_rate": 9.296946240750666e-05, |
|
"loss": 4.7211, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.8475301861763, |
|
"learning_rate": 9.295786086032435e-05, |
|
"loss": 4.7201, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5478389263153076, |
|
"learning_rate": 9.2946259313142e-05, |
|
"loss": 4.7238, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.43729883432388306, |
|
"learning_rate": 9.293465776595968e-05, |
|
"loss": 4.7177, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.243775725364685, |
|
"learning_rate": 9.292305621877733e-05, |
|
"loss": 4.7232, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4283261299133301, |
|
"learning_rate": 9.291145467159502e-05, |
|
"loss": 4.7211, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5506263375282288, |
|
"learning_rate": 9.289985312441267e-05, |
|
"loss": 4.7194, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.6256954669952393, |
|
"learning_rate": 9.288825157723034e-05, |
|
"loss": 4.7229, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.47135502099990845, |
|
"learning_rate": 9.2876650030048e-05, |
|
"loss": 4.7194, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4235610067844391, |
|
"learning_rate": 9.286504848286568e-05, |
|
"loss": 4.7228, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.691027820110321, |
|
"learning_rate": 9.285344693568335e-05, |
|
"loss": 4.7178, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.3785357475280762, |
|
"learning_rate": 9.284184538850101e-05, |
|
"loss": 4.722, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.7555792927742004, |
|
"learning_rate": 9.283024384131868e-05, |
|
"loss": 4.7192, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.45149528980255127, |
|
"learning_rate": 9.281864229413635e-05, |
|
"loss": 4.7223, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3915523886680603, |
|
"learning_rate": 9.280704074695402e-05, |
|
"loss": 4.7281, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6808405518531799, |
|
"learning_rate": 9.279543919977168e-05, |
|
"loss": 4.7186, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4248678982257843, |
|
"learning_rate": 9.278383765258935e-05, |
|
"loss": 4.7212, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4634566605091095, |
|
"learning_rate": 9.277223610540702e-05, |
|
"loss": 4.7232, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.435062438249588, |
|
"learning_rate": 9.276063455822469e-05, |
|
"loss": 4.7195, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.44273582100868225, |
|
"learning_rate": 9.274903301104235e-05, |
|
"loss": 4.7201, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.6790070533752441, |
|
"learning_rate": 9.273743146386002e-05, |
|
"loss": 4.7233, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6105982065200806, |
|
"learning_rate": 9.27258299166777e-05, |
|
"loss": 4.7195, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4533052444458008, |
|
"learning_rate": 9.271422836949536e-05, |
|
"loss": 4.7196, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.9967572689056396, |
|
"learning_rate": 9.270262682231303e-05, |
|
"loss": 4.7203, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4764550030231476, |
|
"learning_rate": 9.26910252751307e-05, |
|
"loss": 4.7199, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6913366913795471, |
|
"learning_rate": 9.267942372794837e-05, |
|
"loss": 4.7205, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.43220970034599304, |
|
"learning_rate": 9.266782218076603e-05, |
|
"loss": 4.7217, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.44269081950187683, |
|
"learning_rate": 9.26562206335837e-05, |
|
"loss": 4.7206, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4221411645412445, |
|
"learning_rate": 9.264461908640137e-05, |
|
"loss": 4.7208, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.0757687091827393, |
|
"learning_rate": 9.263301753921904e-05, |
|
"loss": 4.7207, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.6522767543792725, |
|
"learning_rate": 9.26214159920367e-05, |
|
"loss": 4.721, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5005024075508118, |
|
"learning_rate": 9.260981444485437e-05, |
|
"loss": 4.7216, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.45725876092910767, |
|
"learning_rate": 9.259821289767205e-05, |
|
"loss": 4.7182, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.862248420715332, |
|
"learning_rate": 9.25866113504897e-05, |
|
"loss": 4.7165, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.43823152780532837, |
|
"learning_rate": 9.257500980330738e-05, |
|
"loss": 4.7194, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4907792806625366, |
|
"learning_rate": 9.256340825612503e-05, |
|
"loss": 4.7216, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4658365547657013, |
|
"learning_rate": 9.255180670894272e-05, |
|
"loss": 4.7176, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.7001955509185791, |
|
"learning_rate": 9.254020516176037e-05, |
|
"loss": 4.7205, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4331388771533966, |
|
"learning_rate": 9.252860361457805e-05, |
|
"loss": 4.7193, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.5930231809616089, |
|
"learning_rate": 9.25170020673957e-05, |
|
"loss": 4.7191, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5135887265205383, |
|
"learning_rate": 9.250540052021339e-05, |
|
"loss": 4.7156, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 13.119867324829102, |
|
"learning_rate": 9.249379897303104e-05, |
|
"loss": 4.717, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.43670549988746643, |
|
"learning_rate": 9.248219742584871e-05, |
|
"loss": 4.7208, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.4829180240631104, |
|
"learning_rate": 9.247059587866638e-05, |
|
"loss": 4.717, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5183930397033691, |
|
"learning_rate": 9.245899433148405e-05, |
|
"loss": 4.7213, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3946682810783386, |
|
"learning_rate": 9.244739278430172e-05, |
|
"loss": 4.7212, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.46105512976646423, |
|
"learning_rate": 9.243579123711938e-05, |
|
"loss": 4.7192, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.44661739468574524, |
|
"learning_rate": 9.242418968993705e-05, |
|
"loss": 4.7211, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.6695325374603271, |
|
"learning_rate": 9.241258814275472e-05, |
|
"loss": 4.7193, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.9194817543029785, |
|
"learning_rate": 9.240098659557239e-05, |
|
"loss": 4.7224, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4088338017463684, |
|
"learning_rate": 9.238938504839005e-05, |
|
"loss": 4.7186, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5131499767303467, |
|
"learning_rate": 9.237778350120772e-05, |
|
"loss": 4.7158, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.862509727478027, |
|
"learning_rate": 9.236618195402539e-05, |
|
"loss": 4.716, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.40794941782951355, |
|
"learning_rate": 9.235458040684306e-05, |
|
"loss": 4.718, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.40948912501335144, |
|
"learning_rate": 9.234297885966073e-05, |
|
"loss": 4.7162, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.3764171600341797, |
|
"learning_rate": 9.23313773124784e-05, |
|
"loss": 4.7209, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.44073447585105896, |
|
"learning_rate": 9.231977576529607e-05, |
|
"loss": 4.7222, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5790233016014099, |
|
"learning_rate": 9.230817421811373e-05, |
|
"loss": 4.7167, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.9112139344215393, |
|
"learning_rate": 9.22965726709314e-05, |
|
"loss": 4.7198, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5049501061439514, |
|
"learning_rate": 9.228497112374907e-05, |
|
"loss": 4.7158, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.6256558895111084, |
|
"learning_rate": 9.227336957656674e-05, |
|
"loss": 4.7217, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.392869234085083, |
|
"learning_rate": 9.22617680293844e-05, |
|
"loss": 4.72, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4745340347290039, |
|
"learning_rate": 9.225016648220207e-05, |
|
"loss": 4.719, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4279547333717346, |
|
"learning_rate": 9.223856493501974e-05, |
|
"loss": 4.7191, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.0647037029266357, |
|
"learning_rate": 9.222696338783741e-05, |
|
"loss": 4.7241, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4187820255756378, |
|
"learning_rate": 9.221536184065508e-05, |
|
"loss": 4.7211, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.9273259043693542, |
|
"learning_rate": 9.220376029347274e-05, |
|
"loss": 4.7245, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.49627095460891724, |
|
"learning_rate": 9.219215874629042e-05, |
|
"loss": 4.7182, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.4430867433547974, |
|
"learning_rate": 9.218055719910808e-05, |
|
"loss": 4.7196, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4100867509841919, |
|
"learning_rate": 9.216895565192575e-05, |
|
"loss": 4.7207, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.49010923504829407, |
|
"learning_rate": 9.21573541047434e-05, |
|
"loss": 4.7169, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4209338426589966, |
|
"learning_rate": 9.214575255756109e-05, |
|
"loss": 4.7162, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.45588409900665283, |
|
"learning_rate": 9.213415101037874e-05, |
|
"loss": 4.7177, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.405272901058197, |
|
"learning_rate": 9.212254946319641e-05, |
|
"loss": 4.7175, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.1375739574432373, |
|
"learning_rate": 9.211094791601409e-05, |
|
"loss": 4.7211, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4474073648452759, |
|
"learning_rate": 9.209934636883176e-05, |
|
"loss": 4.7152, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.441603422164917, |
|
"learning_rate": 9.208774482164943e-05, |
|
"loss": 4.7209, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.562470018863678, |
|
"learning_rate": 9.207614327446708e-05, |
|
"loss": 4.7188, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4169847071170807, |
|
"learning_rate": 9.206454172728475e-05, |
|
"loss": 4.7166, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.43674352765083313, |
|
"learning_rate": 9.205294018010242e-05, |
|
"loss": 4.7136, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.39852142333984375, |
|
"learning_rate": 9.20413386329201e-05, |
|
"loss": 4.7176, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.41788631677627563, |
|
"learning_rate": 9.202973708573775e-05, |
|
"loss": 4.7202, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4472859501838684, |
|
"learning_rate": 9.201813553855542e-05, |
|
"loss": 4.7204, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3944435119628906, |
|
"learning_rate": 9.200653399137309e-05, |
|
"loss": 4.7181, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.444679319858551, |
|
"learning_rate": 9.199493244419076e-05, |
|
"loss": 4.723, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4338514804840088, |
|
"learning_rate": 9.198333089700842e-05, |
|
"loss": 4.7213, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4399813711643219, |
|
"learning_rate": 9.19717293498261e-05, |
|
"loss": 4.7167, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.46220558881759644, |
|
"learning_rate": 9.196012780264377e-05, |
|
"loss": 4.7158, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.313628911972046, |
|
"learning_rate": 9.194852625546143e-05, |
|
"loss": 4.7165, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5480996370315552, |
|
"learning_rate": 9.19369247082791e-05, |
|
"loss": 4.7174, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4436434805393219, |
|
"learning_rate": 9.192532316109677e-05, |
|
"loss": 4.7198, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.995100498199463, |
|
"learning_rate": 9.191372161391444e-05, |
|
"loss": 4.7176, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.44759735465049744, |
|
"learning_rate": 9.19021200667321e-05, |
|
"loss": 4.7166, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.9924051761627197, |
|
"learning_rate": 9.189051851954977e-05, |
|
"loss": 4.7191, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.259697675704956, |
|
"learning_rate": 9.187891697236744e-05, |
|
"loss": 4.7166, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5200223326683044, |
|
"learning_rate": 9.186731542518511e-05, |
|
"loss": 4.72, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5573539733886719, |
|
"learning_rate": 9.185571387800278e-05, |
|
"loss": 4.7173, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.46177276968955994, |
|
"learning_rate": 9.184411233082044e-05, |
|
"loss": 4.718, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4673067033290863, |
|
"learning_rate": 9.183251078363812e-05, |
|
"loss": 4.7181, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4644356667995453, |
|
"learning_rate": 9.182090923645578e-05, |
|
"loss": 4.7146, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4181482195854187, |
|
"learning_rate": 9.180930768927345e-05, |
|
"loss": 4.7132, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.42789730429649353, |
|
"learning_rate": 9.17977061420911e-05, |
|
"loss": 4.7172, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.978273391723633, |
|
"learning_rate": 9.178610459490879e-05, |
|
"loss": 4.7201, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.412673473358154, |
|
"learning_rate": 9.177450304772645e-05, |
|
"loss": 4.7201, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.45621249079704285, |
|
"learning_rate": 9.176290150054412e-05, |
|
"loss": 4.7167, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4043485224246979, |
|
"learning_rate": 9.175129995336179e-05, |
|
"loss": 4.7154, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.45778888463974, |
|
"learning_rate": 9.173969840617946e-05, |
|
"loss": 4.7193, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.42115822434425354, |
|
"learning_rate": 9.172809685899713e-05, |
|
"loss": 4.7165, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.421622633934021, |
|
"learning_rate": 9.171649531181478e-05, |
|
"loss": 4.717, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4167540967464447, |
|
"learning_rate": 9.170489376463245e-05, |
|
"loss": 4.7161, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.7092134356498718, |
|
"learning_rate": 9.169329221745013e-05, |
|
"loss": 4.7197, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5543451905250549, |
|
"learning_rate": 9.16816906702678e-05, |
|
"loss": 4.7185, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.44146397709846497, |
|
"learning_rate": 9.167008912308545e-05, |
|
"loss": 4.7146, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5362756848335266, |
|
"learning_rate": 9.165848757590312e-05, |
|
"loss": 4.72, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4360448122024536, |
|
"learning_rate": 9.16468860287208e-05, |
|
"loss": 4.7198, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.40648597478866577, |
|
"learning_rate": 9.163528448153846e-05, |
|
"loss": 4.7168, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.8124480843544006, |
|
"learning_rate": 9.162368293435612e-05, |
|
"loss": 4.7185, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.48873409628868103, |
|
"learning_rate": 9.16120813871738e-05, |
|
"loss": 4.7192, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.013222098350525, |
|
"learning_rate": 9.160047983999148e-05, |
|
"loss": 4.7156, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4423314034938812, |
|
"learning_rate": 9.158887829280913e-05, |
|
"loss": 4.7172, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5054183602333069, |
|
"learning_rate": 9.15772767456268e-05, |
|
"loss": 4.7199, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4140661060810089, |
|
"learning_rate": 9.156567519844447e-05, |
|
"loss": 4.7142, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.734790325164795, |
|
"learning_rate": 9.155407365126214e-05, |
|
"loss": 4.7135, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3996482193470001, |
|
"learning_rate": 9.15424721040798e-05, |
|
"loss": 4.7194, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4260541796684265, |
|
"learning_rate": 9.153087055689747e-05, |
|
"loss": 4.7143, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.339716672897339, |
|
"learning_rate": 9.151926900971514e-05, |
|
"loss": 4.7185, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.48179367184638977, |
|
"learning_rate": 9.150766746253281e-05, |
|
"loss": 4.7161, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4243863821029663, |
|
"learning_rate": 9.149606591535047e-05, |
|
"loss": 4.7168, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.38295459747314453, |
|
"learning_rate": 9.148446436816814e-05, |
|
"loss": 4.715, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5023996829986572, |
|
"learning_rate": 9.147286282098582e-05, |
|
"loss": 4.7115, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5002963542938232, |
|
"learning_rate": 9.146126127380348e-05, |
|
"loss": 4.7145, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.655466079711914, |
|
"learning_rate": 9.144965972662115e-05, |
|
"loss": 4.7166, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5054044723510742, |
|
"learning_rate": 9.14380581794388e-05, |
|
"loss": 4.7193, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.453080415725708, |
|
"learning_rate": 9.142645663225649e-05, |
|
"loss": 4.7161, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.44624748826026917, |
|
"learning_rate": 9.141485508507415e-05, |
|
"loss": 4.7187, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4639555811882019, |
|
"learning_rate": 9.140325353789182e-05, |
|
"loss": 4.7176, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.41725417971611023, |
|
"learning_rate": 9.139165199070947e-05, |
|
"loss": 4.7163, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.866028904914856, |
|
"learning_rate": 9.138005044352716e-05, |
|
"loss": 4.716, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.46224868297576904, |
|
"learning_rate": 9.136844889634482e-05, |
|
"loss": 4.7139, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.43956613540649414, |
|
"learning_rate": 9.135684734916249e-05, |
|
"loss": 4.7171, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4600673019886017, |
|
"learning_rate": 9.134524580198016e-05, |
|
"loss": 4.7157, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.0996651649475098, |
|
"learning_rate": 9.133364425479783e-05, |
|
"loss": 4.7146, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5937373042106628, |
|
"learning_rate": 9.13220427076155e-05, |
|
"loss": 4.719, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.145916223526001, |
|
"learning_rate": 9.131044116043315e-05, |
|
"loss": 4.7144, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.668306827545166, |
|
"learning_rate": 9.129883961325082e-05, |
|
"loss": 4.7148, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5367663502693176, |
|
"learning_rate": 9.12872380660685e-05, |
|
"loss": 4.7152, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4246484637260437, |
|
"learning_rate": 9.127563651888617e-05, |
|
"loss": 4.7177, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.47237107157707214, |
|
"learning_rate": 9.126403497170382e-05, |
|
"loss": 4.7166, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5941634178161621, |
|
"learning_rate": 9.12524334245215e-05, |
|
"loss": 4.7158, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4129926562309265, |
|
"learning_rate": 9.124083187733916e-05, |
|
"loss": 4.7151, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5305066704750061, |
|
"learning_rate": 9.122923033015683e-05, |
|
"loss": 4.7151, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.43299585580825806, |
|
"learning_rate": 9.12176287829745e-05, |
|
"loss": 4.716, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.120293617248535, |
|
"learning_rate": 9.120602723579217e-05, |
|
"loss": 4.7183, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.393725723028183, |
|
"learning_rate": 9.119442568860984e-05, |
|
"loss": 4.7173, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.1044440269470215, |
|
"learning_rate": 9.11828241414275e-05, |
|
"loss": 4.7144, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.38623175024986267, |
|
"learning_rate": 9.117122259424517e-05, |
|
"loss": 4.7165, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.17921781539917, |
|
"learning_rate": 9.115962104706284e-05, |
|
"loss": 4.7171, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4206479489803314, |
|
"learning_rate": 9.114801949988051e-05, |
|
"loss": 4.7119, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.46056804060935974, |
|
"learning_rate": 9.113641795269817e-05, |
|
"loss": 4.7129, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4047752320766449, |
|
"learning_rate": 9.112481640551584e-05, |
|
"loss": 4.7133, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4942108988761902, |
|
"learning_rate": 9.111321485833351e-05, |
|
"loss": 4.7143, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.5642993450164795, |
|
"learning_rate": 9.110161331115118e-05, |
|
"loss": 4.7126, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4183765947818756, |
|
"learning_rate": 9.109001176396885e-05, |
|
"loss": 4.7138, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4382226765155792, |
|
"learning_rate": 9.107841021678651e-05, |
|
"loss": 4.7156, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.429971843957901, |
|
"learning_rate": 9.106680866960419e-05, |
|
"loss": 4.7152, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4051077663898468, |
|
"learning_rate": 9.105520712242185e-05, |
|
"loss": 4.7146, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4416183829307556, |
|
"learning_rate": 9.104360557523952e-05, |
|
"loss": 4.716, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.668931007385254, |
|
"learning_rate": 9.103200402805718e-05, |
|
"loss": 4.7116, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.42298269271850586, |
|
"learning_rate": 9.102040248087486e-05, |
|
"loss": 4.7173, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.7401153445243835, |
|
"learning_rate": 9.100880093369252e-05, |
|
"loss": 4.7148, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.6153188943862915, |
|
"learning_rate": 9.099719938651019e-05, |
|
"loss": 4.7157, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.41621410846710205, |
|
"learning_rate": 9.098559783932786e-05, |
|
"loss": 4.7169, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4147420823574066, |
|
"learning_rate": 9.097399629214553e-05, |
|
"loss": 4.7155, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 8.179230690002441, |
|
"learning_rate": 9.09623947449632e-05, |
|
"loss": 4.7142, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.8721107244491577, |
|
"learning_rate": 9.095079319778086e-05, |
|
"loss": 4.7145, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.0575512647628784, |
|
"learning_rate": 9.093919165059853e-05, |
|
"loss": 4.7156, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.7179790139198303, |
|
"learning_rate": 9.09275901034162e-05, |
|
"loss": 4.7145, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.8817191123962402, |
|
"learning_rate": 9.091598855623387e-05, |
|
"loss": 4.7154, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.47256988286972046, |
|
"learning_rate": 9.090438700905152e-05, |
|
"loss": 4.7176, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.7659066319465637, |
|
"learning_rate": 9.08927854618692e-05, |
|
"loss": 4.7176, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4704316556453705, |
|
"learning_rate": 9.088118391468686e-05, |
|
"loss": 4.7173, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.47578001022338867, |
|
"learning_rate": 9.086958236750454e-05, |
|
"loss": 4.7122, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.0918359756469727, |
|
"learning_rate": 9.085798082032219e-05, |
|
"loss": 4.712, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.712931752204895, |
|
"learning_rate": 9.084637927313988e-05, |
|
"loss": 4.7125, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.39107823371887207, |
|
"learning_rate": 9.083477772595755e-05, |
|
"loss": 4.7133, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.45276641845703125, |
|
"learning_rate": 9.08231761787752e-05, |
|
"loss": 4.7135, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4419955611228943, |
|
"learning_rate": 9.081157463159287e-05, |
|
"loss": 4.7133, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.39920610189437866, |
|
"learning_rate": 9.079997308441054e-05, |
|
"loss": 4.7121, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.8428456783294678, |
|
"learning_rate": 9.078837153722821e-05, |
|
"loss": 4.7135, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.664378821849823, |
|
"learning_rate": 9.077676999004587e-05, |
|
"loss": 4.7165, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4127453863620758, |
|
"learning_rate": 9.076516844286354e-05, |
|
"loss": 4.7149, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.8230032920837402, |
|
"learning_rate": 9.075356689568121e-05, |
|
"loss": 4.7123, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4852809011936188, |
|
"learning_rate": 9.074196534849888e-05, |
|
"loss": 4.7155, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.022642135620117, |
|
"learning_rate": 9.073036380131654e-05, |
|
"loss": 4.7138, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4676394462585449, |
|
"learning_rate": 9.071876225413421e-05, |
|
"loss": 4.7148, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4943694770336151, |
|
"learning_rate": 9.07071607069519e-05, |
|
"loss": 4.7136, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.45253655314445496, |
|
"learning_rate": 9.069555915976955e-05, |
|
"loss": 4.7134, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.42640450596809387, |
|
"learning_rate": 9.068395761258722e-05, |
|
"loss": 4.7107, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.409261018037796, |
|
"learning_rate": 9.067235606540488e-05, |
|
"loss": 4.7141, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9824286103248596, |
|
"learning_rate": 9.066075451822256e-05, |
|
"loss": 4.7141, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.6486682891845703, |
|
"learning_rate": 9.064915297104022e-05, |
|
"loss": 4.7161, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.587768018245697, |
|
"learning_rate": 9.063755142385789e-05, |
|
"loss": 4.712, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4047750234603882, |
|
"learning_rate": 9.062594987667556e-05, |
|
"loss": 4.7133, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.6553632616996765, |
|
"learning_rate": 9.061434832949323e-05, |
|
"loss": 4.7139, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.46661463379859924, |
|
"learning_rate": 9.06027467823109e-05, |
|
"loss": 4.7138, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.44048547744750977, |
|
"learning_rate": 9.059114523512856e-05, |
|
"loss": 4.7159, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9623928666114807, |
|
"learning_rate": 9.057954368794623e-05, |
|
"loss": 4.7134, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.40359175205230713, |
|
"learning_rate": 9.05679421407639e-05, |
|
"loss": 4.7147, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.6080948114395142, |
|
"learning_rate": 9.055634059358157e-05, |
|
"loss": 4.7132, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.992509126663208, |
|
"learning_rate": 9.054473904639923e-05, |
|
"loss": 4.7137, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4392452538013458, |
|
"learning_rate": 9.05331374992169e-05, |
|
"loss": 4.7122, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9614498019218445, |
|
"learning_rate": 9.052153595203457e-05, |
|
"loss": 4.71, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4181789755821228, |
|
"learning_rate": 9.050993440485224e-05, |
|
"loss": 4.7127, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.0023537874221802, |
|
"learning_rate": 9.04983328576699e-05, |
|
"loss": 4.7144, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.406643867492676, |
|
"learning_rate": 9.048673131048758e-05, |
|
"loss": 4.7134, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4215865433216095, |
|
"learning_rate": 9.047512976330525e-05, |
|
"loss": 4.7137, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4328692853450775, |
|
"learning_rate": 9.04635282161229e-05, |
|
"loss": 4.7125, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.38356509804725647, |
|
"learning_rate": 9.045192666894058e-05, |
|
"loss": 4.7195, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4107106626033783, |
|
"learning_rate": 9.044032512175825e-05, |
|
"loss": 4.7097, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 18.908138275146484, |
|
"learning_rate": 9.042872357457592e-05, |
|
"loss": 4.7187, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.48430249094963074, |
|
"learning_rate": 9.041712202739357e-05, |
|
"loss": 4.7174, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4645005166530609, |
|
"learning_rate": 9.040552048021124e-05, |
|
"loss": 4.7146, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.38825261592865, |
|
"learning_rate": 9.039391893302891e-05, |
|
"loss": 4.7159, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.1587923765182495, |
|
"learning_rate": 9.038231738584658e-05, |
|
"loss": 4.7182, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.42529210448265076, |
|
"learning_rate": 9.037071583866424e-05, |
|
"loss": 4.7118, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.866576671600342, |
|
"learning_rate": 9.035911429148191e-05, |
|
"loss": 4.7124, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.415056973695755, |
|
"learning_rate": 9.03475127442996e-05, |
|
"loss": 4.7136, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4472216069698334, |
|
"learning_rate": 9.033591119711725e-05, |
|
"loss": 4.71, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5460192561149597, |
|
"learning_rate": 9.032430964993492e-05, |
|
"loss": 4.7155, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.7862334251403809, |
|
"learning_rate": 9.031270810275258e-05, |
|
"loss": 4.7135, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4369088411331177, |
|
"learning_rate": 9.030110655557026e-05, |
|
"loss": 4.7136, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.40611496567726135, |
|
"learning_rate": 9.028950500838792e-05, |
|
"loss": 4.7144, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.434088796377182, |
|
"learning_rate": 9.027790346120559e-05, |
|
"loss": 4.7161, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3754402995109558, |
|
"learning_rate": 9.026630191402326e-05, |
|
"loss": 4.7112, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.507156491279602, |
|
"learning_rate": 9.025470036684093e-05, |
|
"loss": 4.7091, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.43876662850379944, |
|
"learning_rate": 9.024309881965859e-05, |
|
"loss": 4.7107, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.3095866441726685, |
|
"learning_rate": 9.023149727247626e-05, |
|
"loss": 4.7114, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.327343225479126, |
|
"learning_rate": 9.021989572529393e-05, |
|
"loss": 4.713, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4155600965023041, |
|
"learning_rate": 9.02082941781116e-05, |
|
"loss": 4.7136, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4112358093261719, |
|
"learning_rate": 9.019669263092927e-05, |
|
"loss": 4.7096, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.422731637954712, |
|
"learning_rate": 9.018509108374693e-05, |
|
"loss": 4.7132, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.42826709151268005, |
|
"learning_rate": 9.01734895365646e-05, |
|
"loss": 4.7125, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4018416702747345, |
|
"learning_rate": 9.016188798938227e-05, |
|
"loss": 4.7123, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3829653561115265, |
|
"learning_rate": 9.015028644219994e-05, |
|
"loss": 4.7143, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.45742008090019226, |
|
"learning_rate": 9.01386848950176e-05, |
|
"loss": 4.7114, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.7736401557922363, |
|
"learning_rate": 9.012708334783528e-05, |
|
"loss": 4.7138, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.40791991353034973, |
|
"learning_rate": 9.011548180065294e-05, |
|
"loss": 4.7116, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.40508711338043213, |
|
"learning_rate": 9.01038802534706e-05, |
|
"loss": 4.7109, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.41179603338241577, |
|
"learning_rate": 9.009227870628828e-05, |
|
"loss": 4.7147, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.49404746294021606, |
|
"learning_rate": 9.008067715910595e-05, |
|
"loss": 4.714, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4049994647502899, |
|
"learning_rate": 9.006907561192362e-05, |
|
"loss": 4.7131, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3943819999694824, |
|
"learning_rate": 9.005747406474127e-05, |
|
"loss": 4.7099, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3848678171634674, |
|
"learning_rate": 9.004587251755894e-05, |
|
"loss": 4.7105, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.39097893238067627, |
|
"learning_rate": 9.003427097037662e-05, |
|
"loss": 4.7134, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5381259918212891, |
|
"learning_rate": 9.002266942319429e-05, |
|
"loss": 4.7111, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.44293051958084106, |
|
"learning_rate": 9.001106787601194e-05, |
|
"loss": 4.7132, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.449153423309326, |
|
"learning_rate": 8.999946632882961e-05, |
|
"loss": 4.7108, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.677003026008606, |
|
"learning_rate": 8.998786478164728e-05, |
|
"loss": 4.7117, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4721989035606384, |
|
"learning_rate": 8.997626323446495e-05, |
|
"loss": 4.7084, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4921315908432007, |
|
"learning_rate": 8.996466168728262e-05, |
|
"loss": 4.7157, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6939030885696411, |
|
"learning_rate": 8.995306014010028e-05, |
|
"loss": 4.7114, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.3602544069290161, |
|
"learning_rate": 8.994145859291797e-05, |
|
"loss": 4.7116, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.40210890769958496, |
|
"learning_rate": 8.992985704573562e-05, |
|
"loss": 4.7132, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.41714897751808167, |
|
"learning_rate": 8.991825549855329e-05, |
|
"loss": 4.713, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.1149511337280273, |
|
"learning_rate": 8.990665395137096e-05, |
|
"loss": 4.709, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.3356086015701294, |
|
"learning_rate": 8.989505240418863e-05, |
|
"loss": 4.7102, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.36242830753326416, |
|
"learning_rate": 8.988345085700629e-05, |
|
"loss": 4.7091, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.45975425839424133, |
|
"learning_rate": 8.987184930982396e-05, |
|
"loss": 4.7098, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.38699838519096375, |
|
"learning_rate": 8.986024776264163e-05, |
|
"loss": 4.7113, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.0604562759399414, |
|
"learning_rate": 8.98486462154593e-05, |
|
"loss": 4.7108, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.39834097027778625, |
|
"learning_rate": 8.983704466827697e-05, |
|
"loss": 4.7131, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.40465226769447327, |
|
"learning_rate": 8.982544312109463e-05, |
|
"loss": 4.7101, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.43859535455703735, |
|
"learning_rate": 8.98138415739123e-05, |
|
"loss": 4.7118, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5537000894546509, |
|
"learning_rate": 8.980224002672997e-05, |
|
"loss": 4.7106, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5020445585250854, |
|
"learning_rate": 8.979063847954764e-05, |
|
"loss": 4.7094, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.419494092464447, |
|
"learning_rate": 8.97790369323653e-05, |
|
"loss": 4.7111, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4324280023574829, |
|
"learning_rate": 8.976743538518298e-05, |
|
"loss": 4.7151, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.410915732383728, |
|
"learning_rate": 8.975583383800064e-05, |
|
"loss": 4.7126, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4660918414592743, |
|
"learning_rate": 8.974423229081831e-05, |
|
"loss": 4.7097, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.4695265293121338, |
|
"learning_rate": 8.973263074363596e-05, |
|
"loss": 4.7094, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.7962493896484375, |
|
"learning_rate": 8.972102919645365e-05, |
|
"loss": 4.7106, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.41975751519203186, |
|
"learning_rate": 8.970942764927132e-05, |
|
"loss": 4.7082, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.06545090675354, |
|
"learning_rate": 8.969782610208898e-05, |
|
"loss": 4.7115, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5522103309631348, |
|
"learning_rate": 8.968622455490665e-05, |
|
"loss": 4.7116, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4133060574531555, |
|
"learning_rate": 8.967462300772432e-05, |
|
"loss": 4.7117, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6902609467506409, |
|
"learning_rate": 8.966302146054199e-05, |
|
"loss": 4.7114, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.39645135402679443, |
|
"learning_rate": 8.965141991335964e-05, |
|
"loss": 4.7075, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.702682614326477, |
|
"learning_rate": 8.963981836617731e-05, |
|
"loss": 4.7132, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4847519099712372, |
|
"learning_rate": 8.962821681899499e-05, |
|
"loss": 4.7107, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4338213801383972, |
|
"learning_rate": 8.961661527181266e-05, |
|
"loss": 4.7112, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5931529402732849, |
|
"learning_rate": 8.960501372463031e-05, |
|
"loss": 4.713, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4164227545261383, |
|
"learning_rate": 8.959341217744798e-05, |
|
"loss": 4.7124, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.41698023676872253, |
|
"learning_rate": 8.958181063026567e-05, |
|
"loss": 4.7098, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.45001962780952454, |
|
"learning_rate": 8.957020908308332e-05, |
|
"loss": 4.7099, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5672646164894104, |
|
"learning_rate": 8.9558607535901e-05, |
|
"loss": 4.7101, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4138546586036682, |
|
"learning_rate": 8.954700598871865e-05, |
|
"loss": 4.7114, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5982836484909058, |
|
"learning_rate": 8.953540444153633e-05, |
|
"loss": 4.7077, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.39376482367515564, |
|
"learning_rate": 8.952380289435399e-05, |
|
"loss": 4.713, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.39758920669555664, |
|
"learning_rate": 8.951220134717166e-05, |
|
"loss": 4.7105, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.4163142442703247, |
|
"learning_rate": 8.950059979998933e-05, |
|
"loss": 4.7071, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4065834581851959, |
|
"learning_rate": 8.9488998252807e-05, |
|
"loss": 4.7136, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4078928530216217, |
|
"learning_rate": 8.947739670562466e-05, |
|
"loss": 4.7081, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.143505096435547, |
|
"learning_rate": 8.946579515844233e-05, |
|
"loss": 4.7119, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4692690074443817, |
|
"learning_rate": 8.945419361126e-05, |
|
"loss": 4.71, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4491908848285675, |
|
"learning_rate": 8.944259206407767e-05, |
|
"loss": 4.709, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4015776515007019, |
|
"learning_rate": 8.943099051689534e-05, |
|
"loss": 4.7133, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 12.54626178741455, |
|
"learning_rate": 8.9419388969713e-05, |
|
"loss": 4.7115, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4521196484565735, |
|
"learning_rate": 8.940778742253068e-05, |
|
"loss": 4.7098, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.40918394923210144, |
|
"learning_rate": 8.939618587534834e-05, |
|
"loss": 4.7064, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.7197582721710205, |
|
"learning_rate": 8.938458432816601e-05, |
|
"loss": 4.7083, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4415791630744934, |
|
"learning_rate": 8.937298278098367e-05, |
|
"loss": 4.7121, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.37231072783470154, |
|
"learning_rate": 8.936138123380135e-05, |
|
"loss": 4.7076, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5812079906463623, |
|
"learning_rate": 8.934977968661902e-05, |
|
"loss": 4.7103, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.42401689291000366, |
|
"learning_rate": 8.933817813943668e-05, |
|
"loss": 4.7087, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.433369904756546, |
|
"learning_rate": 8.932657659225435e-05, |
|
"loss": 4.7102, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4144749045372009, |
|
"learning_rate": 8.931497504507202e-05, |
|
"loss": 4.711, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3901808559894562, |
|
"learning_rate": 8.930337349788969e-05, |
|
"loss": 4.712, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.41925275325775146, |
|
"learning_rate": 8.929177195070735e-05, |
|
"loss": 4.7123, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.44110241532325745, |
|
"learning_rate": 8.928017040352502e-05, |
|
"loss": 4.7107, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3761931359767914, |
|
"learning_rate": 8.926856885634269e-05, |
|
"loss": 4.7067, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.37371134757995605, |
|
"learning_rate": 8.925696730916036e-05, |
|
"loss": 4.7076, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.6820991039276123, |
|
"learning_rate": 8.924536576197801e-05, |
|
"loss": 4.7101, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 8.034706115722656, |
|
"learning_rate": 8.923376421479568e-05, |
|
"loss": 4.7052, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.6186811327934265, |
|
"learning_rate": 8.922216266761337e-05, |
|
"loss": 4.7108, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 16.127689361572266, |
|
"learning_rate": 8.921056112043103e-05, |
|
"loss": 4.7109, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4097209572792053, |
|
"learning_rate": 8.91989595732487e-05, |
|
"loss": 4.7107, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4645543098449707, |
|
"learning_rate": 8.918735802606635e-05, |
|
"loss": 4.706, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4279698431491852, |
|
"learning_rate": 8.917575647888404e-05, |
|
"loss": 4.7081, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3847067654132843, |
|
"learning_rate": 8.91641549317017e-05, |
|
"loss": 4.7104, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.9405023455619812, |
|
"learning_rate": 8.915255338451936e-05, |
|
"loss": 4.7077, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4204134941101074, |
|
"learning_rate": 8.914095183733703e-05, |
|
"loss": 4.7102, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.411493718624115, |
|
"learning_rate": 8.91293502901547e-05, |
|
"loss": 4.7128, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4645240604877472, |
|
"learning_rate": 8.911774874297236e-05, |
|
"loss": 4.708, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 8.295918464660645, |
|
"learning_rate": 8.910614719579003e-05, |
|
"loss": 4.7087, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5342881083488464, |
|
"learning_rate": 8.90945456486077e-05, |
|
"loss": 4.7076, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.41708967089653015, |
|
"learning_rate": 8.908294410142537e-05, |
|
"loss": 4.7096, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.50910484790802, |
|
"learning_rate": 8.907134255424304e-05, |
|
"loss": 4.7074, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.8689035773277283, |
|
"learning_rate": 8.90597410070607e-05, |
|
"loss": 4.7084, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.477888584136963, |
|
"learning_rate": 8.904813945987837e-05, |
|
"loss": 4.7089, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3866770565509796, |
|
"learning_rate": 8.903653791269604e-05, |
|
"loss": 4.71, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4197094440460205, |
|
"learning_rate": 8.902493636551371e-05, |
|
"loss": 4.7062, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4031221866607666, |
|
"learning_rate": 8.901333481833137e-05, |
|
"loss": 4.7105, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4098550081253052, |
|
"learning_rate": 8.900173327114905e-05, |
|
"loss": 4.7099, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.37702327966690063, |
|
"learning_rate": 8.899013172396671e-05, |
|
"loss": 4.7073, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4298543632030487, |
|
"learning_rate": 8.897853017678438e-05, |
|
"loss": 4.7092, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.49779465794563293, |
|
"learning_rate": 8.896692862960205e-05, |
|
"loss": 4.7059, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3974045515060425, |
|
"learning_rate": 8.895532708241972e-05, |
|
"loss": 4.7116, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.801963806152344, |
|
"learning_rate": 8.894372553523739e-05, |
|
"loss": 4.7103, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.6826298236846924, |
|
"learning_rate": 8.893212398805505e-05, |
|
"loss": 4.7124, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.8550306558609009, |
|
"learning_rate": 8.892052244087272e-05, |
|
"loss": 4.7078, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4076642394065857, |
|
"learning_rate": 8.890892089369039e-05, |
|
"loss": 4.7065, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4062858521938324, |
|
"learning_rate": 8.889731934650806e-05, |
|
"loss": 4.7086, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.432161808013916, |
|
"learning_rate": 8.888571779932572e-05, |
|
"loss": 4.7049, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.41037145256996155, |
|
"learning_rate": 8.887411625214339e-05, |
|
"loss": 4.7042, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3852427005767822, |
|
"learning_rate": 8.886251470496106e-05, |
|
"loss": 4.7051, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.43618643283843994, |
|
"learning_rate": 8.885091315777873e-05, |
|
"loss": 4.7092, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4727887511253357, |
|
"learning_rate": 8.88393116105964e-05, |
|
"loss": 4.7076, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4139232337474823, |
|
"learning_rate": 8.882771006341405e-05, |
|
"loss": 4.7078, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.8521788120269775, |
|
"learning_rate": 8.881610851623174e-05, |
|
"loss": 4.7086, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.43892329931259155, |
|
"learning_rate": 8.88045069690494e-05, |
|
"loss": 4.7067, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.40643131732940674, |
|
"learning_rate": 8.879290542186707e-05, |
|
"loss": 4.7078, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.44639840722084045, |
|
"learning_rate": 8.878130387468474e-05, |
|
"loss": 4.7092, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.743888854980469, |
|
"learning_rate": 8.87697023275024e-05, |
|
"loss": 4.7048, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.39078572392463684, |
|
"learning_rate": 8.875810078032006e-05, |
|
"loss": 4.7015, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4048124849796295, |
|
"learning_rate": 8.874649923313773e-05, |
|
"loss": 4.7126, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5833559632301331, |
|
"learning_rate": 8.87348976859554e-05, |
|
"loss": 4.7057, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4627319276332855, |
|
"learning_rate": 8.872329613877307e-05, |
|
"loss": 4.7091, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.460225909948349, |
|
"learning_rate": 8.871169459159074e-05, |
|
"loss": 4.7079, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.44441863894462585, |
|
"learning_rate": 8.87000930444084e-05, |
|
"loss": 4.7039, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.40614771842956543, |
|
"learning_rate": 8.868849149722607e-05, |
|
"loss": 4.7067, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.48935455083847046, |
|
"learning_rate": 8.867688995004374e-05, |
|
"loss": 4.7072, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.503010869026184, |
|
"learning_rate": 8.866528840286141e-05, |
|
"loss": 4.7051, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.40835824608802795, |
|
"learning_rate": 8.865368685567907e-05, |
|
"loss": 4.7085, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.47787487506866455, |
|
"learning_rate": 8.864208530849675e-05, |
|
"loss": 4.7042, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.44062966108322144, |
|
"learning_rate": 8.863048376131441e-05, |
|
"loss": 4.7088, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5162871479988098, |
|
"learning_rate": 8.861888221413208e-05, |
|
"loss": 4.705, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.7115480899810791, |
|
"learning_rate": 8.860728066694974e-05, |
|
"loss": 4.7069, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.43299001455307007, |
|
"learning_rate": 8.859567911976742e-05, |
|
"loss": 4.7037, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5156663656234741, |
|
"learning_rate": 8.858407757258509e-05, |
|
"loss": 4.7057, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.3159596920013428, |
|
"learning_rate": 8.857247602540275e-05, |
|
"loss": 4.705, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.239011287689209, |
|
"learning_rate": 8.856087447822042e-05, |
|
"loss": 4.7104, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5143164992332458, |
|
"learning_rate": 8.854927293103809e-05, |
|
"loss": 4.7075, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4091811180114746, |
|
"learning_rate": 8.853767138385576e-05, |
|
"loss": 4.706, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3682583272457123, |
|
"learning_rate": 8.852606983667342e-05, |
|
"loss": 4.7049, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.9574017524719238, |
|
"learning_rate": 8.851446828949109e-05, |
|
"loss": 4.7085, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4863421320915222, |
|
"learning_rate": 8.850286674230876e-05, |
|
"loss": 4.7042, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.41971755027770996, |
|
"learning_rate": 8.849126519512643e-05, |
|
"loss": 4.7046, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.45720914006233215, |
|
"learning_rate": 8.847966364794409e-05, |
|
"loss": 4.705, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3879556953907013, |
|
"learning_rate": 8.846806210076176e-05, |
|
"loss": 4.7051, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.6103081703186035, |
|
"learning_rate": 8.845646055357944e-05, |
|
"loss": 4.7018, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4066198468208313, |
|
"learning_rate": 8.84448590063971e-05, |
|
"loss": 4.7039, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.568277895450592, |
|
"learning_rate": 8.843325745921477e-05, |
|
"loss": 4.7046, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.41322973370552063, |
|
"learning_rate": 8.842165591203244e-05, |
|
"loss": 4.7051, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.952580690383911, |
|
"learning_rate": 8.841005436485011e-05, |
|
"loss": 4.7017, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.40354979038238525, |
|
"learning_rate": 8.839845281766776e-05, |
|
"loss": 4.7076, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.44302529096603394, |
|
"learning_rate": 8.838685127048543e-05, |
|
"loss": 4.7071, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5012074112892151, |
|
"learning_rate": 8.83752497233031e-05, |
|
"loss": 4.7024, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4387538433074951, |
|
"learning_rate": 8.836364817612078e-05, |
|
"loss": 4.7019, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.33089017868042, |
|
"learning_rate": 8.835204662893843e-05, |
|
"loss": 4.7034, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.6666650176048279, |
|
"learning_rate": 8.83404450817561e-05, |
|
"loss": 4.7006, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.43695616722106934, |
|
"learning_rate": 8.832884353457377e-05, |
|
"loss": 4.7028, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.39040979743003845, |
|
"learning_rate": 8.831724198739144e-05, |
|
"loss": 4.703, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4081096053123474, |
|
"learning_rate": 8.830564044020911e-05, |
|
"loss": 4.7062, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4371529519557953, |
|
"learning_rate": 8.829403889302677e-05, |
|
"loss": 4.7027, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4820528030395508, |
|
"learning_rate": 8.828243734584446e-05, |
|
"loss": 4.7026, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4896353781223297, |
|
"learning_rate": 8.827083579866211e-05, |
|
"loss": 4.7052, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4941895604133606, |
|
"learning_rate": 8.825923425147978e-05, |
|
"loss": 4.7065, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4064702093601227, |
|
"learning_rate": 8.824763270429744e-05, |
|
"loss": 4.7032, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.7341260313987732, |
|
"learning_rate": 8.823603115711512e-05, |
|
"loss": 4.7029, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.41835737228393555, |
|
"learning_rate": 8.822442960993278e-05, |
|
"loss": 4.7012, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4256590008735657, |
|
"learning_rate": 8.821282806275045e-05, |
|
"loss": 4.7038, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.43846938014030457, |
|
"learning_rate": 8.820122651556812e-05, |
|
"loss": 4.7014, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4327394962310791, |
|
"learning_rate": 8.818962496838579e-05, |
|
"loss": 4.7022, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.42558759450912476, |
|
"learning_rate": 8.817802342120346e-05, |
|
"loss": 4.7001, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.41578829288482666, |
|
"learning_rate": 8.816642187402112e-05, |
|
"loss": 4.6996, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.1245514154434204, |
|
"learning_rate": 8.815482032683879e-05, |
|
"loss": 4.7055, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.43621131777763367, |
|
"learning_rate": 8.814321877965646e-05, |
|
"loss": 4.7011, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.555548906326294, |
|
"learning_rate": 8.813161723247413e-05, |
|
"loss": 4.7026, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5187997221946716, |
|
"learning_rate": 8.812001568529179e-05, |
|
"loss": 4.7046, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.8702124953269958, |
|
"learning_rate": 8.810841413810946e-05, |
|
"loss": 4.7009, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4865017533302307, |
|
"learning_rate": 8.809681259092714e-05, |
|
"loss": 4.7012, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4582134783267975, |
|
"learning_rate": 8.80852110437448e-05, |
|
"loss": 4.7024, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.48400819301605225, |
|
"learning_rate": 8.807360949656247e-05, |
|
"loss": 4.7009, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4129534363746643, |
|
"learning_rate": 8.806200794938013e-05, |
|
"loss": 4.6953, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5257245302200317, |
|
"learning_rate": 8.805040640219781e-05, |
|
"loss": 4.6998, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.6813905239105225, |
|
"learning_rate": 8.803880485501547e-05, |
|
"loss": 4.7037, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.6715384721755981, |
|
"learning_rate": 8.802720330783314e-05, |
|
"loss": 4.7011, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.41265252232551575, |
|
"learning_rate": 8.801560176065081e-05, |
|
"loss": 4.7019, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.37935546040534973, |
|
"learning_rate": 8.800400021346848e-05, |
|
"loss": 4.6995, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.578775644302368, |
|
"learning_rate": 8.799239866628613e-05, |
|
"loss": 4.7025, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5321478843688965, |
|
"learning_rate": 8.79807971191038e-05, |
|
"loss": 4.6983, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4280731976032257, |
|
"learning_rate": 8.796919557192148e-05, |
|
"loss": 4.6947, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.559546709060669, |
|
"learning_rate": 8.795759402473915e-05, |
|
"loss": 4.7027, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.39881765842437744, |
|
"learning_rate": 8.794599247755682e-05, |
|
"loss": 4.7024, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5303432941436768, |
|
"learning_rate": 8.793439093037447e-05, |
|
"loss": 4.6984, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4243880808353424, |
|
"learning_rate": 8.792278938319216e-05, |
|
"loss": 4.7011, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.8046822547912598, |
|
"learning_rate": 8.791118783600981e-05, |
|
"loss": 4.703, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.45566943287849426, |
|
"learning_rate": 8.789958628882748e-05, |
|
"loss": 4.702, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.0667338371276855, |
|
"learning_rate": 8.788798474164514e-05, |
|
"loss": 4.6984, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4209063947200775, |
|
"learning_rate": 8.787638319446282e-05, |
|
"loss": 4.6985, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.41932806372642517, |
|
"learning_rate": 8.786478164728048e-05, |
|
"loss": 4.6987, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.0371077060699463, |
|
"learning_rate": 8.785318010009815e-05, |
|
"loss": 4.7016, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5310882925987244, |
|
"learning_rate": 8.784157855291582e-05, |
|
"loss": 4.6996, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.43061837553977966, |
|
"learning_rate": 8.782997700573349e-05, |
|
"loss": 4.6987, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5050943493843079, |
|
"learning_rate": 8.781837545855116e-05, |
|
"loss": 4.7012, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.39062759280204773, |
|
"learning_rate": 8.780677391136882e-05, |
|
"loss": 4.6966, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.528038024902344, |
|
"learning_rate": 8.779517236418649e-05, |
|
"loss": 4.6964, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3973850905895233, |
|
"learning_rate": 8.778357081700416e-05, |
|
"loss": 4.6989, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.42976003885269165, |
|
"learning_rate": 8.777196926982183e-05, |
|
"loss": 4.6996, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.43507105112075806, |
|
"learning_rate": 8.776036772263949e-05, |
|
"loss": 4.698, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4329013526439667, |
|
"learning_rate": 8.774876617545716e-05, |
|
"loss": 4.6967, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.40144628286361694, |
|
"learning_rate": 8.773716462827483e-05, |
|
"loss": 4.6944, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5410645008087158, |
|
"learning_rate": 8.77255630810925e-05, |
|
"loss": 4.6962, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.709266722202301, |
|
"learning_rate": 8.771396153391017e-05, |
|
"loss": 4.696, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4841790199279785, |
|
"learning_rate": 8.770235998672783e-05, |
|
"loss": 4.7003, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.48105648159980774, |
|
"learning_rate": 8.769075843954551e-05, |
|
"loss": 4.6925, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4711418151855469, |
|
"learning_rate": 8.767915689236317e-05, |
|
"loss": 4.6967, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4943852126598358, |
|
"learning_rate": 8.766755534518084e-05, |
|
"loss": 4.6958, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.1731679439544678, |
|
"learning_rate": 8.765595379799851e-05, |
|
"loss": 4.697, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.6942670345306396, |
|
"learning_rate": 8.764435225081618e-05, |
|
"loss": 4.6988, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5932111144065857, |
|
"learning_rate": 8.763275070363384e-05, |
|
"loss": 4.7008, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.498844176530838, |
|
"learning_rate": 8.76211491564515e-05, |
|
"loss": 4.6966, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.525398850440979, |
|
"learning_rate": 8.760954760926918e-05, |
|
"loss": 4.6992, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.405345916748047, |
|
"learning_rate": 8.759794606208685e-05, |
|
"loss": 4.6968, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4208034873008728, |
|
"learning_rate": 8.758634451490452e-05, |
|
"loss": 4.6919, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3912876844406128, |
|
"learning_rate": 8.757474296772217e-05, |
|
"loss": 4.6921, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4323633909225464, |
|
"learning_rate": 8.756314142053984e-05, |
|
"loss": 4.6966, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.39893805980682373, |
|
"learning_rate": 8.755153987335752e-05, |
|
"loss": 4.6928, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.42258456349372864, |
|
"learning_rate": 8.753993832617519e-05, |
|
"loss": 4.6955, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.40447258949279785, |
|
"learning_rate": 8.752833677899284e-05, |
|
"loss": 4.6972, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5002463459968567, |
|
"learning_rate": 8.751673523181053e-05, |
|
"loss": 4.6977, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.072608470916748, |
|
"learning_rate": 8.750513368462818e-05, |
|
"loss": 4.6992, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4288334250450134, |
|
"learning_rate": 8.749353213744585e-05, |
|
"loss": 4.6984, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.529216468334198, |
|
"learning_rate": 8.748193059026351e-05, |
|
"loss": 4.698, |
|
"step": 107900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.42044612765312195, |
|
"learning_rate": 8.74703290430812e-05, |
|
"loss": 4.6957, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.8280035853385925, |
|
"learning_rate": 8.745872749589886e-05, |
|
"loss": 4.6916, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.41592320799827576, |
|
"learning_rate": 8.744712594871652e-05, |
|
"loss": 4.6946, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.425014466047287, |
|
"learning_rate": 8.743552440153419e-05, |
|
"loss": 4.6955, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.40150442719459534, |
|
"learning_rate": 8.742392285435186e-05, |
|
"loss": 4.6946, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.441785991191864, |
|
"learning_rate": 8.741232130716953e-05, |
|
"loss": 4.6908, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.035128116607666, |
|
"learning_rate": 8.740071975998719e-05, |
|
"loss": 4.6926, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4290466606616974, |
|
"learning_rate": 8.738911821280486e-05, |
|
"loss": 4.6926, |
|
"step": 108700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4321587085723877, |
|
"learning_rate": 8.737751666562253e-05, |
|
"loss": 4.6937, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.40536269545555115, |
|
"learning_rate": 8.73659151184402e-05, |
|
"loss": 4.69, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3988741934299469, |
|
"learning_rate": 8.735431357125786e-05, |
|
"loss": 4.6935, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.8648415803909302, |
|
"learning_rate": 8.734271202407553e-05, |
|
"loss": 4.6978, |
|
"step": 109100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6086846590042114, |
|
"learning_rate": 8.733111047689321e-05, |
|
"loss": 4.6929, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.38623344898223877, |
|
"learning_rate": 8.731950892971087e-05, |
|
"loss": 4.697, |
|
"step": 109300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.7714725732803345, |
|
"learning_rate": 8.730790738252854e-05, |
|
"loss": 4.6926, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.37906432151794434, |
|
"learning_rate": 8.729630583534621e-05, |
|
"loss": 4.6927, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4156550168991089, |
|
"learning_rate": 8.728470428816388e-05, |
|
"loss": 4.6953, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.39191532135009766, |
|
"learning_rate": 8.727310274098154e-05, |
|
"loss": 4.6935, |
|
"step": 109700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4391324818134308, |
|
"learning_rate": 8.726150119379921e-05, |
|
"loss": 4.6939, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.42475369572639465, |
|
"learning_rate": 8.724989964661688e-05, |
|
"loss": 4.6903, |
|
"step": 109900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4388252794742584, |
|
"learning_rate": 8.723829809943455e-05, |
|
"loss": 4.6914, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4022296667098999, |
|
"learning_rate": 8.72266965522522e-05, |
|
"loss": 4.6948, |
|
"step": 110100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4492380917072296, |
|
"learning_rate": 8.721509500506988e-05, |
|
"loss": 4.6911, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4049302637577057, |
|
"learning_rate": 8.720349345788755e-05, |
|
"loss": 4.6902, |
|
"step": 110300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9303175210952759, |
|
"learning_rate": 8.719189191070522e-05, |
|
"loss": 4.6892, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.48216354846954346, |
|
"learning_rate": 8.718029036352289e-05, |
|
"loss": 4.6882, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.4418731927871704, |
|
"learning_rate": 8.716868881634054e-05, |
|
"loss": 4.6893, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5436264276504517, |
|
"learning_rate": 8.715708726915823e-05, |
|
"loss": 4.6892, |
|
"step": 110700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3900034427642822, |
|
"learning_rate": 8.714548572197588e-05, |
|
"loss": 4.6862, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4286581575870514, |
|
"learning_rate": 8.713388417479356e-05, |
|
"loss": 4.6902, |
|
"step": 110900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.8317441940307617, |
|
"learning_rate": 8.712228262761121e-05, |
|
"loss": 4.692, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4669002890586853, |
|
"learning_rate": 8.71106810804289e-05, |
|
"loss": 4.6933, |
|
"step": 111100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 12.876310348510742, |
|
"learning_rate": 8.709907953324655e-05, |
|
"loss": 4.6933, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4043096601963043, |
|
"learning_rate": 8.708747798606422e-05, |
|
"loss": 4.6905, |
|
"step": 111300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6544508934020996, |
|
"learning_rate": 8.70758764388819e-05, |
|
"loss": 4.6862, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4368617534637451, |
|
"learning_rate": 8.706427489169956e-05, |
|
"loss": 4.6938, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.8235392570495605, |
|
"learning_rate": 8.705267334451723e-05, |
|
"loss": 4.6894, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.44141313433647156, |
|
"learning_rate": 8.704107179733489e-05, |
|
"loss": 4.6899, |
|
"step": 111700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4662952423095703, |
|
"learning_rate": 8.702947025015256e-05, |
|
"loss": 4.6862, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5402454137802124, |
|
"learning_rate": 8.701786870297023e-05, |
|
"loss": 4.6885, |
|
"step": 111900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5668596029281616, |
|
"learning_rate": 8.70062671557879e-05, |
|
"loss": 4.6842, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.586438000202179, |
|
"learning_rate": 8.699466560860556e-05, |
|
"loss": 4.6893, |
|
"step": 112100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4443139135837555, |
|
"learning_rate": 8.698306406142323e-05, |
|
"loss": 4.6914, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4333912432193756, |
|
"learning_rate": 8.69714625142409e-05, |
|
"loss": 4.691, |
|
"step": 112300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5807641744613647, |
|
"learning_rate": 8.695986096705857e-05, |
|
"loss": 4.6935, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3854735493659973, |
|
"learning_rate": 8.694825941987624e-05, |
|
"loss": 4.6889, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.40090426802635193, |
|
"learning_rate": 8.693665787269391e-05, |
|
"loss": 4.6874, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.40820324420928955, |
|
"learning_rate": 8.692505632551158e-05, |
|
"loss": 4.6902, |
|
"step": 112700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9625461101531982, |
|
"learning_rate": 8.691345477832924e-05, |
|
"loss": 4.6876, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.39590463042259216, |
|
"learning_rate": 8.690185323114691e-05, |
|
"loss": 4.6895, |
|
"step": 112900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4090369641780853, |
|
"learning_rate": 8.689025168396458e-05, |
|
"loss": 4.6921, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.48455721139907837, |
|
"learning_rate": 8.687865013678225e-05, |
|
"loss": 4.686, |
|
"step": 113100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5107011795043945, |
|
"learning_rate": 8.686704858959991e-05, |
|
"loss": 4.6863, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4489879608154297, |
|
"learning_rate": 8.685544704241758e-05, |
|
"loss": 4.689, |
|
"step": 113300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.7847305536270142, |
|
"learning_rate": 8.684384549523525e-05, |
|
"loss": 4.687, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4807884693145752, |
|
"learning_rate": 8.683224394805292e-05, |
|
"loss": 4.6844, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4227710962295532, |
|
"learning_rate": 8.682064240087059e-05, |
|
"loss": 4.6882, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.425329327583313, |
|
"learning_rate": 8.680904085368825e-05, |
|
"loss": 4.6876, |
|
"step": 113700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.39754214882850647, |
|
"learning_rate": 8.679743930650593e-05, |
|
"loss": 4.6928, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.41271573305130005, |
|
"learning_rate": 8.678583775932359e-05, |
|
"loss": 4.6871, |
|
"step": 113900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6075764298439026, |
|
"learning_rate": 8.677423621214126e-05, |
|
"loss": 4.6846, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4607229232788086, |
|
"learning_rate": 8.676263466495891e-05, |
|
"loss": 4.6907, |
|
"step": 114100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.49409857392311096, |
|
"learning_rate": 8.67510331177766e-05, |
|
"loss": 4.6835, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.1049710512161255, |
|
"learning_rate": 8.673943157059425e-05, |
|
"loss": 4.6851, |
|
"step": 114300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4263964593410492, |
|
"learning_rate": 8.672783002341192e-05, |
|
"loss": 4.6881, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.2732186317443848, |
|
"learning_rate": 8.67162284762296e-05, |
|
"loss": 4.6919, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.2248380184173584, |
|
"learning_rate": 8.670462692904727e-05, |
|
"loss": 4.6864, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.1274431943893433, |
|
"learning_rate": 8.669302538186494e-05, |
|
"loss": 4.6905, |
|
"step": 114700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.41689029335975647, |
|
"learning_rate": 8.668142383468259e-05, |
|
"loss": 4.6888, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.39221587777137756, |
|
"learning_rate": 8.666982228750026e-05, |
|
"loss": 4.6836, |
|
"step": 114900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4654403626918793, |
|
"learning_rate": 8.665822074031793e-05, |
|
"loss": 4.6869, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5727940201759338, |
|
"learning_rate": 8.66466191931356e-05, |
|
"loss": 4.6911, |
|
"step": 115100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3905259966850281, |
|
"learning_rate": 8.663501764595326e-05, |
|
"loss": 4.6837, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.094152808189392, |
|
"learning_rate": 8.662341609877093e-05, |
|
"loss": 4.6847, |
|
"step": 115300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.39949262142181396, |
|
"learning_rate": 8.66118145515886e-05, |
|
"loss": 4.6877, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3685074746608734, |
|
"learning_rate": 8.660021300440627e-05, |
|
"loss": 4.6849, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.41748788952827454, |
|
"learning_rate": 8.658861145722394e-05, |
|
"loss": 4.6861, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.671760082244873, |
|
"learning_rate": 8.657700991004161e-05, |
|
"loss": 4.6914, |
|
"step": 115700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.40038684010505676, |
|
"learning_rate": 8.656540836285928e-05, |
|
"loss": 4.6923, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4368140995502472, |
|
"learning_rate": 8.655380681567694e-05, |
|
"loss": 4.6875, |
|
"step": 115900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4060142934322357, |
|
"learning_rate": 8.654220526849461e-05, |
|
"loss": 4.6899, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4354085922241211, |
|
"learning_rate": 8.653060372131228e-05, |
|
"loss": 4.6814, |
|
"step": 116100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6066785454750061, |
|
"learning_rate": 8.651900217412995e-05, |
|
"loss": 4.6823, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.35917678475379944, |
|
"learning_rate": 8.650740062694761e-05, |
|
"loss": 4.6882, |
|
"step": 116300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.41072890162467957, |
|
"learning_rate": 8.649579907976528e-05, |
|
"loss": 4.6804, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.494043231010437, |
|
"learning_rate": 8.648419753258295e-05, |
|
"loss": 4.6811, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4103025197982788, |
|
"learning_rate": 8.647259598540062e-05, |
|
"loss": 4.682, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5634316802024841, |
|
"learning_rate": 8.646099443821829e-05, |
|
"loss": 4.6873, |
|
"step": 116700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4846033453941345, |
|
"learning_rate": 8.644939289103595e-05, |
|
"loss": 4.6852, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.8417425751686096, |
|
"learning_rate": 8.643779134385363e-05, |
|
"loss": 4.6835, |
|
"step": 116900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.9649463295936584, |
|
"learning_rate": 8.642618979667129e-05, |
|
"loss": 4.6895, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4397778809070587, |
|
"learning_rate": 8.641458824948896e-05, |
|
"loss": 4.6847, |
|
"step": 117100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.40810275077819824, |
|
"learning_rate": 8.640298670230662e-05, |
|
"loss": 4.6849, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.39354971051216125, |
|
"learning_rate": 8.63913851551243e-05, |
|
"loss": 4.687, |
|
"step": 117300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6670671701431274, |
|
"learning_rate": 8.637978360794196e-05, |
|
"loss": 4.6858, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.8771277070045471, |
|
"learning_rate": 8.636818206075963e-05, |
|
"loss": 4.6842, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6346202492713928, |
|
"learning_rate": 8.635658051357728e-05, |
|
"loss": 4.6821, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3756822347640991, |
|
"learning_rate": 8.634497896639497e-05, |
|
"loss": 4.6852, |
|
"step": 117700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.809264063835144, |
|
"learning_rate": 8.633337741921264e-05, |
|
"loss": 4.6809, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4695824980735779, |
|
"learning_rate": 8.63217758720303e-05, |
|
"loss": 4.6829, |
|
"step": 117900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3882189095020294, |
|
"learning_rate": 8.631017432484797e-05, |
|
"loss": 4.6824, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.46574699878692627, |
|
"learning_rate": 8.629857277766564e-05, |
|
"loss": 4.6873, |
|
"step": 118100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.8306362628936768, |
|
"learning_rate": 8.62869712304833e-05, |
|
"loss": 4.6865, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5190596580505371, |
|
"learning_rate": 8.627536968330096e-05, |
|
"loss": 4.6839, |
|
"step": 118300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5554775595664978, |
|
"learning_rate": 8.626376813611863e-05, |
|
"loss": 4.6833, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5029674768447876, |
|
"learning_rate": 8.62521665889363e-05, |
|
"loss": 4.6858, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5281274914741516, |
|
"learning_rate": 8.624056504175397e-05, |
|
"loss": 4.6801, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4034444987773895, |
|
"learning_rate": 8.622896349457163e-05, |
|
"loss": 4.6797, |
|
"step": 118700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4782603085041046, |
|
"learning_rate": 8.62173619473893e-05, |
|
"loss": 4.6808, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.505681574344635, |
|
"learning_rate": 8.620576040020699e-05, |
|
"loss": 4.6805, |
|
"step": 118900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.45551398396492004, |
|
"learning_rate": 8.619415885302464e-05, |
|
"loss": 4.681, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.46791940927505493, |
|
"learning_rate": 8.618255730584231e-05, |
|
"loss": 4.6804, |
|
"step": 119100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.40453869104385376, |
|
"learning_rate": 8.617095575865998e-05, |
|
"loss": 4.6849, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5323479771614075, |
|
"learning_rate": 8.615935421147765e-05, |
|
"loss": 4.6856, |
|
"step": 119300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3924371600151062, |
|
"learning_rate": 8.614775266429531e-05, |
|
"loss": 4.6811, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4504171311855316, |
|
"learning_rate": 8.613615111711298e-05, |
|
"loss": 4.6807, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.3027788400650024, |
|
"learning_rate": 8.612454956993065e-05, |
|
"loss": 4.6847, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5537983179092407, |
|
"learning_rate": 8.611294802274832e-05, |
|
"loss": 4.681, |
|
"step": 119700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3960654139518738, |
|
"learning_rate": 8.610134647556598e-05, |
|
"loss": 4.686, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4028005003929138, |
|
"learning_rate": 8.608974492838365e-05, |
|
"loss": 4.6777, |
|
"step": 119900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.42540809512138367, |
|
"learning_rate": 8.607814338120133e-05, |
|
"loss": 4.6831, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.0470972061157227, |
|
"learning_rate": 8.606654183401899e-05, |
|
"loss": 4.6818, |
|
"step": 120100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4977876842021942, |
|
"learning_rate": 8.605494028683666e-05, |
|
"loss": 4.6789, |
|
"step": 120200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5205646753311157, |
|
"learning_rate": 8.604333873965432e-05, |
|
"loss": 4.6806, |
|
"step": 120300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3748117983341217, |
|
"learning_rate": 8.6031737192472e-05, |
|
"loss": 4.6847, |
|
"step": 120400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4421583414077759, |
|
"learning_rate": 8.602013564528966e-05, |
|
"loss": 4.6772, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.596479594707489, |
|
"learning_rate": 8.600853409810733e-05, |
|
"loss": 4.6808, |
|
"step": 120600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.599827527999878, |
|
"learning_rate": 8.599693255092498e-05, |
|
"loss": 4.6842, |
|
"step": 120700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.47555598616600037, |
|
"learning_rate": 8.598533100374267e-05, |
|
"loss": 4.6842, |
|
"step": 120800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5468947887420654, |
|
"learning_rate": 8.597372945656033e-05, |
|
"loss": 4.6863, |
|
"step": 120900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4049210548400879, |
|
"learning_rate": 8.5962127909378e-05, |
|
"loss": 4.6802, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.39633214473724365, |
|
"learning_rate": 8.595052636219567e-05, |
|
"loss": 4.681, |
|
"step": 121100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3883484899997711, |
|
"learning_rate": 8.593892481501334e-05, |
|
"loss": 4.6846, |
|
"step": 121200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4120100736618042, |
|
"learning_rate": 8.592732326783101e-05, |
|
"loss": 4.6829, |
|
"step": 121300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.554779052734375, |
|
"learning_rate": 8.591572172064866e-05, |
|
"loss": 4.6828, |
|
"step": 121400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.2129675149917603, |
|
"learning_rate": 8.590412017346633e-05, |
|
"loss": 4.6802, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.413712739944458, |
|
"learning_rate": 8.5892518626284e-05, |
|
"loss": 4.6826, |
|
"step": 121600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.532698392868042, |
|
"learning_rate": 8.588091707910168e-05, |
|
"loss": 4.6837, |
|
"step": 121700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3998155891895294, |
|
"learning_rate": 8.586931553191933e-05, |
|
"loss": 4.6829, |
|
"step": 121800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.41272029280662537, |
|
"learning_rate": 8.5857713984737e-05, |
|
"loss": 4.6809, |
|
"step": 121900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.40928176045417786, |
|
"learning_rate": 8.584611243755467e-05, |
|
"loss": 4.6827, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6598598957061768, |
|
"learning_rate": 8.583451089037234e-05, |
|
"loss": 4.6869, |
|
"step": 122100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.47744888067245483, |
|
"learning_rate": 8.582290934319001e-05, |
|
"loss": 4.6834, |
|
"step": 122200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.42328187823295593, |
|
"learning_rate": 8.581130779600768e-05, |
|
"loss": 4.6823, |
|
"step": 122300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5083212852478027, |
|
"learning_rate": 8.579970624882535e-05, |
|
"loss": 4.6799, |
|
"step": 122400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.41937893629074097, |
|
"learning_rate": 8.578810470164301e-05, |
|
"loss": 4.6823, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.46155425906181335, |
|
"learning_rate": 8.577650315446068e-05, |
|
"loss": 4.6847, |
|
"step": 122600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4063146710395813, |
|
"learning_rate": 8.576490160727835e-05, |
|
"loss": 4.6777, |
|
"step": 122700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.9644930362701416, |
|
"learning_rate": 8.575330006009602e-05, |
|
"loss": 4.6816, |
|
"step": 122800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4703579246997833, |
|
"learning_rate": 8.574169851291368e-05, |
|
"loss": 4.6841, |
|
"step": 122900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3799445629119873, |
|
"learning_rate": 8.573009696573135e-05, |
|
"loss": 4.6874, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.223569393157959, |
|
"learning_rate": 8.571849541854902e-05, |
|
"loss": 4.685, |
|
"step": 123100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.3866465091705322, |
|
"learning_rate": 8.570689387136669e-05, |
|
"loss": 4.6814, |
|
"step": 123200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.8391069769859314, |
|
"learning_rate": 8.569529232418436e-05, |
|
"loss": 4.676, |
|
"step": 123300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.41561365127563477, |
|
"learning_rate": 8.568369077700202e-05, |
|
"loss": 4.6841, |
|
"step": 123400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.2650662660598755, |
|
"learning_rate": 8.56720892298197e-05, |
|
"loss": 4.6828, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6244620084762573, |
|
"learning_rate": 8.566048768263736e-05, |
|
"loss": 4.6762, |
|
"step": 123600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.36179304122924805, |
|
"learning_rate": 8.564888613545503e-05, |
|
"loss": 4.6765, |
|
"step": 123700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4196653366088867, |
|
"learning_rate": 8.563728458827269e-05, |
|
"loss": 4.6831, |
|
"step": 123800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3799944818019867, |
|
"learning_rate": 8.562568304109037e-05, |
|
"loss": 4.6783, |
|
"step": 123900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.47710663080215454, |
|
"learning_rate": 8.561408149390803e-05, |
|
"loss": 4.681, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.38043680787086487, |
|
"learning_rate": 8.56024799467257e-05, |
|
"loss": 4.6831, |
|
"step": 124100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.3860080242156982, |
|
"learning_rate": 8.559087839954337e-05, |
|
"loss": 4.6771, |
|
"step": 124200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5481556057929993, |
|
"learning_rate": 8.557927685236104e-05, |
|
"loss": 4.6815, |
|
"step": 124300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5027362704277039, |
|
"learning_rate": 8.556767530517871e-05, |
|
"loss": 4.6813, |
|
"step": 124400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 15.900703430175781, |
|
"learning_rate": 8.555607375799637e-05, |
|
"loss": 4.6881, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3703874349594116, |
|
"learning_rate": 8.554447221081404e-05, |
|
"loss": 4.6873, |
|
"step": 124600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3847333490848541, |
|
"learning_rate": 8.55328706636317e-05, |
|
"loss": 4.6814, |
|
"step": 124700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6109323501586914, |
|
"learning_rate": 8.552126911644938e-05, |
|
"loss": 4.6812, |
|
"step": 124800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.39739909768104553, |
|
"learning_rate": 8.550966756926703e-05, |
|
"loss": 4.6791, |
|
"step": 124900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4672738313674927, |
|
"learning_rate": 8.54980660220847e-05, |
|
"loss": 4.682, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3909046947956085, |
|
"learning_rate": 8.548646447490237e-05, |
|
"loss": 4.676, |
|
"step": 125100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7728373408317566, |
|
"learning_rate": 8.547486292772005e-05, |
|
"loss": 4.6798, |
|
"step": 125200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.6358431577682495, |
|
"learning_rate": 8.546326138053772e-05, |
|
"loss": 4.682, |
|
"step": 125300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.8379632234573364, |
|
"learning_rate": 8.545165983335539e-05, |
|
"loss": 4.6806, |
|
"step": 125400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4461159110069275, |
|
"learning_rate": 8.544005828617306e-05, |
|
"loss": 4.6822, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.537218689918518, |
|
"learning_rate": 8.542845673899071e-05, |
|
"loss": 4.6802, |
|
"step": 125600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.43007928133010864, |
|
"learning_rate": 8.541685519180838e-05, |
|
"loss": 4.6779, |
|
"step": 125700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5105913281440735, |
|
"learning_rate": 8.540525364462605e-05, |
|
"loss": 4.6817, |
|
"step": 125800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5022956728935242, |
|
"learning_rate": 8.539365209744372e-05, |
|
"loss": 4.678, |
|
"step": 125900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.45969635248184204, |
|
"learning_rate": 8.538205055026138e-05, |
|
"loss": 4.6779, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.8507632613182068, |
|
"learning_rate": 8.537044900307905e-05, |
|
"loss": 4.6747, |
|
"step": 126100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4339176118373871, |
|
"learning_rate": 8.535884745589672e-05, |
|
"loss": 4.6818, |
|
"step": 126200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.814734160900116, |
|
"learning_rate": 8.534724590871439e-05, |
|
"loss": 4.6786, |
|
"step": 126300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4143752455711365, |
|
"learning_rate": 8.533564436153206e-05, |
|
"loss": 4.6783, |
|
"step": 126400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.44699016213417053, |
|
"learning_rate": 8.532404281434972e-05, |
|
"loss": 4.6752, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.38036221265792847, |
|
"learning_rate": 8.53124412671674e-05, |
|
"loss": 4.679, |
|
"step": 126600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.38587382435798645, |
|
"learning_rate": 8.530083971998506e-05, |
|
"loss": 4.6801, |
|
"step": 126700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4019007682800293, |
|
"learning_rate": 8.528923817280273e-05, |
|
"loss": 4.681, |
|
"step": 126800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.43139323592185974, |
|
"learning_rate": 8.527763662562039e-05, |
|
"loss": 4.6849, |
|
"step": 126900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3975641131401062, |
|
"learning_rate": 8.526603507843807e-05, |
|
"loss": 4.6796, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.2765636444091797, |
|
"learning_rate": 8.525443353125573e-05, |
|
"loss": 4.6789, |
|
"step": 127100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3746165335178375, |
|
"learning_rate": 8.52428319840734e-05, |
|
"loss": 4.6801, |
|
"step": 127200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.3519067764282227, |
|
"learning_rate": 8.523123043689106e-05, |
|
"loss": 4.6818, |
|
"step": 127300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.41541022062301636, |
|
"learning_rate": 8.521962888970874e-05, |
|
"loss": 4.674, |
|
"step": 127400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.41416847705841064, |
|
"learning_rate": 8.520802734252641e-05, |
|
"loss": 4.6802, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4105985164642334, |
|
"learning_rate": 8.519642579534407e-05, |
|
"loss": 4.6818, |
|
"step": 127600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5681924223899841, |
|
"learning_rate": 8.518482424816174e-05, |
|
"loss": 4.6801, |
|
"step": 127700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5585260391235352, |
|
"learning_rate": 8.517322270097941e-05, |
|
"loss": 4.6775, |
|
"step": 127800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5471286177635193, |
|
"learning_rate": 8.516162115379708e-05, |
|
"loss": 4.6785, |
|
"step": 127900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.39895522594451904, |
|
"learning_rate": 8.515001960661474e-05, |
|
"loss": 4.6804, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.38762810826301575, |
|
"learning_rate": 8.51384180594324e-05, |
|
"loss": 4.6781, |
|
"step": 128100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.42689523100852966, |
|
"learning_rate": 8.512681651225008e-05, |
|
"loss": 4.6797, |
|
"step": 128200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.39311423897743225, |
|
"learning_rate": 8.511521496506775e-05, |
|
"loss": 4.6883, |
|
"step": 128300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5697504878044128, |
|
"learning_rate": 8.51036134178854e-05, |
|
"loss": 4.6743, |
|
"step": 128400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.42035338282585144, |
|
"learning_rate": 8.509201187070309e-05, |
|
"loss": 4.6797, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7120895981788635, |
|
"learning_rate": 8.508041032352076e-05, |
|
"loss": 4.6778, |
|
"step": 128600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.38941749930381775, |
|
"learning_rate": 8.506880877633841e-05, |
|
"loss": 4.6787, |
|
"step": 128700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7306220531463623, |
|
"learning_rate": 8.505720722915609e-05, |
|
"loss": 4.6772, |
|
"step": 128800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.38047537207603455, |
|
"learning_rate": 8.504560568197376e-05, |
|
"loss": 4.6804, |
|
"step": 128900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.079150676727295, |
|
"learning_rate": 8.503400413479143e-05, |
|
"loss": 4.6788, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.44098183512687683, |
|
"learning_rate": 8.502240258760908e-05, |
|
"loss": 4.6749, |
|
"step": 129100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.4035288095474243, |
|
"learning_rate": 8.501080104042675e-05, |
|
"loss": 4.6691, |
|
"step": 129200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.43114110827445984, |
|
"learning_rate": 8.499919949324442e-05, |
|
"loss": 4.6797, |
|
"step": 129300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5038126707077026, |
|
"learning_rate": 8.49875979460621e-05, |
|
"loss": 4.6818, |
|
"step": 129400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.44660595059394836, |
|
"learning_rate": 8.497599639887975e-05, |
|
"loss": 4.6804, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3962138593196869, |
|
"learning_rate": 8.496439485169742e-05, |
|
"loss": 4.6718, |
|
"step": 129600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5164237022399902, |
|
"learning_rate": 8.49527933045151e-05, |
|
"loss": 4.6833, |
|
"step": 129700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.7946994304656982, |
|
"learning_rate": 8.494119175733276e-05, |
|
"loss": 4.678, |
|
"step": 129800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.39110898971557617, |
|
"learning_rate": 8.492959021015043e-05, |
|
"loss": 4.6782, |
|
"step": 129900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.766246795654297, |
|
"learning_rate": 8.491798866296809e-05, |
|
"loss": 4.6728, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.374971866607666, |
|
"learning_rate": 8.490638711578577e-05, |
|
"loss": 4.6776, |
|
"step": 130100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5221861600875854, |
|
"learning_rate": 8.489478556860343e-05, |
|
"loss": 4.679, |
|
"step": 130200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3894909620285034, |
|
"learning_rate": 8.48831840214211e-05, |
|
"loss": 4.6727, |
|
"step": 130300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.39896321296691895, |
|
"learning_rate": 8.487158247423876e-05, |
|
"loss": 4.6779, |
|
"step": 130400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4062064588069916, |
|
"learning_rate": 8.485998092705644e-05, |
|
"loss": 4.6774, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5894352197647095, |
|
"learning_rate": 8.48483793798741e-05, |
|
"loss": 4.679, |
|
"step": 130600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7942706942558289, |
|
"learning_rate": 8.483677783269177e-05, |
|
"loss": 4.673, |
|
"step": 130700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4256235361099243, |
|
"learning_rate": 8.482517628550944e-05, |
|
"loss": 4.6752, |
|
"step": 130800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4181482791900635, |
|
"learning_rate": 8.481357473832711e-05, |
|
"loss": 4.6809, |
|
"step": 130900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.37271997332572937, |
|
"learning_rate": 8.480197319114478e-05, |
|
"loss": 4.678, |
|
"step": 131000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 861954, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 3.271273415079469e+18, |
|
"train_batch_size": 192, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|