|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 87356, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011447410595723248, |
|
"grad_norm": 0.08282724022865295, |
|
"learning_rate": 2.8332665560070978e-08, |
|
"loss": 2.747, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0022894821191446497, |
|
"grad_norm": 0.09139522910118103, |
|
"learning_rate": 5.6665331120141957e-08, |
|
"loss": 2.7153, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.003434223178716974, |
|
"grad_norm": 0.14882275462150574, |
|
"learning_rate": 8.528418522122374e-08, |
|
"loss": 2.7207, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.004578964238289299, |
|
"grad_norm": 0.15320613980293274, |
|
"learning_rate": 1.1390303932230555e-07, |
|
"loss": 2.7451, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.005723705297861623, |
|
"grad_norm": 0.10282430797815323, |
|
"learning_rate": 1.4223570488237652e-07, |
|
"loss": 2.7131, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.006868446357433948, |
|
"grad_norm": 0.14666467905044556, |
|
"learning_rate": 1.7085455898345834e-07, |
|
"loss": 2.7244, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.008013187417006273, |
|
"grad_norm": 0.13914579153060913, |
|
"learning_rate": 1.994734130845401e-07, |
|
"loss": 2.7248, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.009157928476578599, |
|
"grad_norm": 0.13069643080234528, |
|
"learning_rate": 2.278060786446111e-07, |
|
"loss": 2.7237, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.010302669536150923, |
|
"grad_norm": 0.11404985189437866, |
|
"learning_rate": 2.564249327456929e-07, |
|
"loss": 2.7372, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.011447410595723247, |
|
"grad_norm": 0.2278125137090683, |
|
"learning_rate": 2.8504378684677464e-07, |
|
"loss": 2.7491, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.012592151655295572, |
|
"grad_norm": 0.19849324226379395, |
|
"learning_rate": 3.1366264094785646e-07, |
|
"loss": 2.7059, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.013736892714867896, |
|
"grad_norm": 0.21892410516738892, |
|
"learning_rate": 3.422814950489383e-07, |
|
"loss": 2.7291, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.014881633774440222, |
|
"grad_norm": 0.09724584966897964, |
|
"learning_rate": 3.709003491500201e-07, |
|
"loss": 2.7131, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.016026374834012546, |
|
"grad_norm": 0.13547281920909882, |
|
"learning_rate": 3.9923301471009105e-07, |
|
"loss": 2.6872, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01717111589358487, |
|
"grad_norm": 0.21319937705993652, |
|
"learning_rate": 4.278518688111728e-07, |
|
"loss": 2.7306, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.018315856953157197, |
|
"grad_norm": 0.15332601964473724, |
|
"learning_rate": 4.564707229122546e-07, |
|
"loss": 2.6786, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01946059801272952, |
|
"grad_norm": 0.13409346342086792, |
|
"learning_rate": 4.850895770133364e-07, |
|
"loss": 2.7021, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.020605339072301845, |
|
"grad_norm": 0.27683642506599426, |
|
"learning_rate": 5.137084311144182e-07, |
|
"loss": 2.7098, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.02175008013187417, |
|
"grad_norm": 0.2598477900028229, |
|
"learning_rate": 5.423272852155001e-07, |
|
"loss": 2.6628, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.022894821191446493, |
|
"grad_norm": 0.2136494219303131, |
|
"learning_rate": 5.709461393165818e-07, |
|
"loss": 2.6882, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02403956225101882, |
|
"grad_norm": 0.1683170348405838, |
|
"learning_rate": 5.995649934176636e-07, |
|
"loss": 2.6787, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.025184303310591145, |
|
"grad_norm": 0.12898527085781097, |
|
"learning_rate": 6.281838475187455e-07, |
|
"loss": 2.6507, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.02632904437016347, |
|
"grad_norm": 0.25819605588912964, |
|
"learning_rate": 6.568027016198272e-07, |
|
"loss": 2.6485, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.027473785429735793, |
|
"grad_norm": 0.26418036222457886, |
|
"learning_rate": 6.85421555720909e-07, |
|
"loss": 2.665, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.02861852648930812, |
|
"grad_norm": 0.24633832275867462, |
|
"learning_rate": 7.140404098219908e-07, |
|
"loss": 2.6122, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.029763267548880444, |
|
"grad_norm": 0.11409879475831985, |
|
"learning_rate": 7.426592639230726e-07, |
|
"loss": 2.61, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.030908008608452768, |
|
"grad_norm": 0.18866164982318878, |
|
"learning_rate": 7.712781180241544e-07, |
|
"loss": 2.6109, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.03205274966802509, |
|
"grad_norm": 0.2056051343679428, |
|
"learning_rate": 7.998969721252361e-07, |
|
"loss": 2.6274, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.033197490727597416, |
|
"grad_norm": 0.18269231915473938, |
|
"learning_rate": 8.285158262263179e-07, |
|
"loss": 2.6012, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.03434223178716974, |
|
"grad_norm": 0.17360204458236694, |
|
"learning_rate": 8.571346803273998e-07, |
|
"loss": 2.6238, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.035486972846742064, |
|
"grad_norm": 0.1858297735452652, |
|
"learning_rate": 8.857535344284815e-07, |
|
"loss": 2.5836, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.036631713906314395, |
|
"grad_norm": 0.18415091931819916, |
|
"learning_rate": 9.143723885295633e-07, |
|
"loss": 2.5919, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.03777645496588672, |
|
"grad_norm": 0.19720780849456787, |
|
"learning_rate": 9.429912426306452e-07, |
|
"loss": 2.5742, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.03892119602545904, |
|
"grad_norm": 0.21820122003555298, |
|
"learning_rate": 9.71323908190716e-07, |
|
"loss": 2.5653, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.04006593708503137, |
|
"grad_norm": 0.2850651741027832, |
|
"learning_rate": 9.999427622917978e-07, |
|
"loss": 2.5936, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04121067814460369, |
|
"grad_norm": 0.19742882251739502, |
|
"learning_rate": 1.028275427851869e-06, |
|
"loss": 2.5751, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.042355419204176015, |
|
"grad_norm": 0.1881546527147293, |
|
"learning_rate": 1.0568942819529507e-06, |
|
"loss": 2.569, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.04350016026374834, |
|
"grad_norm": 0.16125181317329407, |
|
"learning_rate": 1.0855131360540325e-06, |
|
"loss": 2.5522, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.04464490132332066, |
|
"grad_norm": 0.17413508892059326, |
|
"learning_rate": 1.1141319901551142e-06, |
|
"loss": 2.5577, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.04578964238289299, |
|
"grad_norm": 0.36808159947395325, |
|
"learning_rate": 1.142750844256196e-06, |
|
"loss": 2.541, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04693438344246532, |
|
"grad_norm": 0.2387821078300476, |
|
"learning_rate": 1.1713696983572778e-06, |
|
"loss": 2.5481, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.04807912450203764, |
|
"grad_norm": 0.28203991055488586, |
|
"learning_rate": 1.1999885524583595e-06, |
|
"loss": 2.5099, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.049223865561609965, |
|
"grad_norm": 0.36418649554252625, |
|
"learning_rate": 1.2286074065594415e-06, |
|
"loss": 2.5158, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.05036860662118229, |
|
"grad_norm": 0.24819067120552063, |
|
"learning_rate": 1.2572262606605233e-06, |
|
"loss": 2.5189, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.05151334768075461, |
|
"grad_norm": 0.24554955959320068, |
|
"learning_rate": 1.285845114761605e-06, |
|
"loss": 2.5275, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05265808874032694, |
|
"grad_norm": 0.3186652660369873, |
|
"learning_rate": 1.3144639688626868e-06, |
|
"loss": 2.5156, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.05380282979989926, |
|
"grad_norm": 0.2992977797985077, |
|
"learning_rate": 1.3430828229637685e-06, |
|
"loss": 2.5009, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.054947570859471585, |
|
"grad_norm": 0.3505355715751648, |
|
"learning_rate": 1.3717016770648503e-06, |
|
"loss": 2.4923, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.05609231191904391, |
|
"grad_norm": 0.17197385430335999, |
|
"learning_rate": 1.4003205311659323e-06, |
|
"loss": 2.4825, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.05723705297861624, |
|
"grad_norm": 0.21533966064453125, |
|
"learning_rate": 1.4289393852670142e-06, |
|
"loss": 2.484, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.058381794038188564, |
|
"grad_norm": 0.17859208583831787, |
|
"learning_rate": 1.457558239368096e-06, |
|
"loss": 2.4734, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.05952653509776089, |
|
"grad_norm": 0.36880823969841003, |
|
"learning_rate": 1.4861770934691778e-06, |
|
"loss": 2.4747, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.06067127615733321, |
|
"grad_norm": 0.26152077317237854, |
|
"learning_rate": 1.5147959475702595e-06, |
|
"loss": 2.4696, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.061816017216905536, |
|
"grad_norm": 0.37325313687324524, |
|
"learning_rate": 1.5434148016713413e-06, |
|
"loss": 2.4908, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.06296075827647786, |
|
"grad_norm": 0.20671696960926056, |
|
"learning_rate": 1.572033655772423e-06, |
|
"loss": 2.4929, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06410549933605018, |
|
"grad_norm": 0.17978721857070923, |
|
"learning_rate": 1.6006525098735048e-06, |
|
"loss": 2.4557, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.06525024039562251, |
|
"grad_norm": 0.17210538685321808, |
|
"learning_rate": 1.6292713639745866e-06, |
|
"loss": 2.4646, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.06639498145519483, |
|
"grad_norm": 0.17783130705356598, |
|
"learning_rate": 1.6578902180756683e-06, |
|
"loss": 2.4563, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.06753972251476716, |
|
"grad_norm": 0.16413679718971252, |
|
"learning_rate": 1.68650907217675e-06, |
|
"loss": 2.4823, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.06868446357433948, |
|
"grad_norm": 0.1958412379026413, |
|
"learning_rate": 1.715127926277832e-06, |
|
"loss": 2.4733, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.0698292046339118, |
|
"grad_norm": 0.20609253644943237, |
|
"learning_rate": 1.7437467803789138e-06, |
|
"loss": 2.4613, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.07097394569348413, |
|
"grad_norm": 0.292453795671463, |
|
"learning_rate": 1.7723656344799956e-06, |
|
"loss": 2.4585, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.07211868675305645, |
|
"grad_norm": 0.22299447655677795, |
|
"learning_rate": 1.8009844885810774e-06, |
|
"loss": 2.4572, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.07326342781262879, |
|
"grad_norm": 0.18281777203083038, |
|
"learning_rate": 1.8296033426821591e-06, |
|
"loss": 2.4511, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.07440816887220111, |
|
"grad_norm": 0.19673572480678558, |
|
"learning_rate": 1.8582221967832409e-06, |
|
"loss": 2.4377, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07555290993177344, |
|
"grad_norm": 0.2068740874528885, |
|
"learning_rate": 1.8868410508843226e-06, |
|
"loss": 2.4483, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.07669765099134576, |
|
"grad_norm": 0.22345593571662903, |
|
"learning_rate": 1.9154599049854046e-06, |
|
"loss": 2.4708, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.07784239205091809, |
|
"grad_norm": 0.37216469645500183, |
|
"learning_rate": 1.944078759086486e-06, |
|
"loss": 2.453, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.07898713311049041, |
|
"grad_norm": 0.20201215147972107, |
|
"learning_rate": 1.972697613187568e-06, |
|
"loss": 2.4536, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.08013187417006273, |
|
"grad_norm": 0.32299327850341797, |
|
"learning_rate": 2.0013164672886497e-06, |
|
"loss": 2.4464, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.08127661522963506, |
|
"grad_norm": 0.2131340205669403, |
|
"learning_rate": 2.0299353213897317e-06, |
|
"loss": 2.4699, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.08242135628920738, |
|
"grad_norm": 0.2809932231903076, |
|
"learning_rate": 2.0585541754908132e-06, |
|
"loss": 2.4494, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.0835660973487797, |
|
"grad_norm": 0.19964805245399475, |
|
"learning_rate": 2.0871730295918956e-06, |
|
"loss": 2.4575, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.08471083840835203, |
|
"grad_norm": 0.2731245160102844, |
|
"learning_rate": 2.115791883692977e-06, |
|
"loss": 2.4513, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.08585557946792435, |
|
"grad_norm": 0.22060319781303406, |
|
"learning_rate": 2.144410737794059e-06, |
|
"loss": 2.4459, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08700032052749668, |
|
"grad_norm": 0.23072516918182373, |
|
"learning_rate": 2.1730295918951407e-06, |
|
"loss": 2.4469, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.088145061587069, |
|
"grad_norm": 0.2869388163089752, |
|
"learning_rate": 2.2016484459962227e-06, |
|
"loss": 2.426, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.08928980264664133, |
|
"grad_norm": 0.4073362648487091, |
|
"learning_rate": 2.230267300097304e-06, |
|
"loss": 2.4399, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.09043454370621365, |
|
"grad_norm": 0.3338267207145691, |
|
"learning_rate": 2.258886154198386e-06, |
|
"loss": 2.4442, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.09157928476578597, |
|
"grad_norm": 0.32599982619285583, |
|
"learning_rate": 2.2875050082994677e-06, |
|
"loss": 2.448, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.0927240258253583, |
|
"grad_norm": 0.38292455673217773, |
|
"learning_rate": 2.3161238624005497e-06, |
|
"loss": 2.4501, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.09386876688493064, |
|
"grad_norm": 0.2262214571237564, |
|
"learning_rate": 2.3447427165016317e-06, |
|
"loss": 2.4306, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.09501350794450296, |
|
"grad_norm": 0.23351378738880157, |
|
"learning_rate": 2.3733615706027132e-06, |
|
"loss": 2.4542, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.09615824900407528, |
|
"grad_norm": 0.24008594453334808, |
|
"learning_rate": 2.401980424703795e-06, |
|
"loss": 2.4491, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.09730299006364761, |
|
"grad_norm": 0.3131836950778961, |
|
"learning_rate": 2.4305992788048767e-06, |
|
"loss": 2.4402, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09844773112321993, |
|
"grad_norm": 0.3088011145591736, |
|
"learning_rate": 2.4592181329059587e-06, |
|
"loss": 2.4236, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.09959247218279225, |
|
"grad_norm": 0.31873244047164917, |
|
"learning_rate": 2.4878369870070403e-06, |
|
"loss": 2.4425, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.10073721324236458, |
|
"grad_norm": 0.19671718776226044, |
|
"learning_rate": 2.5164558411081227e-06, |
|
"loss": 2.4277, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.1018819543019369, |
|
"grad_norm": 0.3244757056236267, |
|
"learning_rate": 2.5450746952092042e-06, |
|
"loss": 2.4328, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.10302669536150923, |
|
"grad_norm": 0.32051828503608704, |
|
"learning_rate": 2.573693549310286e-06, |
|
"loss": 2.4303, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.10417143642108155, |
|
"grad_norm": 0.2557377517223358, |
|
"learning_rate": 2.6023124034113677e-06, |
|
"loss": 2.4426, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.10531617748065387, |
|
"grad_norm": 0.29920217394828796, |
|
"learning_rate": 2.6309312575124497e-06, |
|
"loss": 2.4247, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.1064609185402262, |
|
"grad_norm": 0.2528887987136841, |
|
"learning_rate": 2.6595501116135313e-06, |
|
"loss": 2.4337, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.10760565959979852, |
|
"grad_norm": 0.2672332525253296, |
|
"learning_rate": 2.6881689657146132e-06, |
|
"loss": 2.4327, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.10875040065937085, |
|
"grad_norm": 0.26545771956443787, |
|
"learning_rate": 2.7167878198156948e-06, |
|
"loss": 2.411, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.10989514171894317, |
|
"grad_norm": 0.2251209020614624, |
|
"learning_rate": 2.7454066739167768e-06, |
|
"loss": 2.4346, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.1110398827785155, |
|
"grad_norm": 0.25982141494750977, |
|
"learning_rate": 2.7740255280178583e-06, |
|
"loss": 2.4155, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.11218462383808782, |
|
"grad_norm": 0.2948269844055176, |
|
"learning_rate": 2.8026443821189403e-06, |
|
"loss": 2.4056, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.11332936489766014, |
|
"grad_norm": 0.2271622121334076, |
|
"learning_rate": 2.8312632362200223e-06, |
|
"loss": 2.4266, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.11447410595723248, |
|
"grad_norm": 0.25277993083000183, |
|
"learning_rate": 2.859882090321104e-06, |
|
"loss": 2.4267, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1156188470168048, |
|
"grad_norm": 0.2651556730270386, |
|
"learning_rate": 2.8885009444221858e-06, |
|
"loss": 2.4423, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.11676358807637713, |
|
"grad_norm": 0.22295983135700226, |
|
"learning_rate": 2.9171197985232673e-06, |
|
"loss": 2.4227, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.11790832913594945, |
|
"grad_norm": 0.2351614385843277, |
|
"learning_rate": 2.9457386526243493e-06, |
|
"loss": 2.4237, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.11905307019552178, |
|
"grad_norm": 0.327232301235199, |
|
"learning_rate": 2.974357506725431e-06, |
|
"loss": 2.4195, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.1201978112550941, |
|
"grad_norm": 0.234052836894989, |
|
"learning_rate": 3.002976360826513e-06, |
|
"loss": 2.4265, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.12134255231466642, |
|
"grad_norm": 0.29197776317596436, |
|
"learning_rate": 3.0315952149275944e-06, |
|
"loss": 2.4175, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.12248729337423875, |
|
"grad_norm": 0.3510327935218811, |
|
"learning_rate": 3.0602140690286763e-06, |
|
"loss": 2.4018, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.12363203443381107, |
|
"grad_norm": 0.24532395601272583, |
|
"learning_rate": 3.0888329231297583e-06, |
|
"loss": 2.4081, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.1247767754933834, |
|
"grad_norm": 0.29377228021621704, |
|
"learning_rate": 3.11745177723084e-06, |
|
"loss": 2.4152, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.12592151655295572, |
|
"grad_norm": 0.3314598798751831, |
|
"learning_rate": 3.146070631331922e-06, |
|
"loss": 2.4224, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.12706625761252804, |
|
"grad_norm": 0.31806275248527527, |
|
"learning_rate": 3.1746894854330034e-06, |
|
"loss": 2.4087, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.12821099867210037, |
|
"grad_norm": 0.29323023557662964, |
|
"learning_rate": 3.2033083395340854e-06, |
|
"loss": 2.3941, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.1293557397316727, |
|
"grad_norm": 0.230011448264122, |
|
"learning_rate": 3.231927193635167e-06, |
|
"loss": 2.4154, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.13050048079124502, |
|
"grad_norm": 0.36185070872306824, |
|
"learning_rate": 3.260546047736249e-06, |
|
"loss": 2.4163, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.13164522185081734, |
|
"grad_norm": 0.35968175530433655, |
|
"learning_rate": 3.2891649018373304e-06, |
|
"loss": 2.4148, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.13278996291038966, |
|
"grad_norm": 0.3145340383052826, |
|
"learning_rate": 3.3177837559384124e-06, |
|
"loss": 2.4102, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.133934703969962, |
|
"grad_norm": 0.26297980546951294, |
|
"learning_rate": 3.3464026100394944e-06, |
|
"loss": 2.4047, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.1350794450295343, |
|
"grad_norm": 0.24281686544418335, |
|
"learning_rate": 3.375021464140576e-06, |
|
"loss": 2.3916, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.13622418608910664, |
|
"grad_norm": 0.2922670245170593, |
|
"learning_rate": 3.403640318241658e-06, |
|
"loss": 2.4226, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.13736892714867896, |
|
"grad_norm": 0.28737780451774597, |
|
"learning_rate": 3.4322591723427395e-06, |
|
"loss": 2.4099, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.13851366820825128, |
|
"grad_norm": 0.7136600613594055, |
|
"learning_rate": 3.4608780264438214e-06, |
|
"loss": 2.4025, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.1396584092678236, |
|
"grad_norm": 0.2455575317144394, |
|
"learning_rate": 3.489496880544903e-06, |
|
"loss": 2.403, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.14080315032739593, |
|
"grad_norm": 0.24030736088752747, |
|
"learning_rate": 3.518115734645985e-06, |
|
"loss": 2.3824, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.14194789138696826, |
|
"grad_norm": 0.28610554337501526, |
|
"learning_rate": 3.5467345887470665e-06, |
|
"loss": 2.414, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.14309263244654058, |
|
"grad_norm": 0.28286024928092957, |
|
"learning_rate": 3.5753534428481485e-06, |
|
"loss": 2.403, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.1442373735061129, |
|
"grad_norm": 0.27423399686813354, |
|
"learning_rate": 3.60397229694923e-06, |
|
"loss": 2.3912, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.14538211456568526, |
|
"grad_norm": 0.3900642991065979, |
|
"learning_rate": 3.632591151050312e-06, |
|
"loss": 2.391, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.14652685562525758, |
|
"grad_norm": 0.3418841063976288, |
|
"learning_rate": 3.661210005151394e-06, |
|
"loss": 2.3931, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.1476715966848299, |
|
"grad_norm": 0.22310563921928406, |
|
"learning_rate": 3.6898288592524755e-06, |
|
"loss": 2.4003, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.14881633774440223, |
|
"grad_norm": 0.3633168935775757, |
|
"learning_rate": 3.7184477133535575e-06, |
|
"loss": 2.388, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.14996107880397455, |
|
"grad_norm": 0.24249403178691864, |
|
"learning_rate": 3.747066567454639e-06, |
|
"loss": 2.3798, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.15110581986354688, |
|
"grad_norm": 0.3075302243232727, |
|
"learning_rate": 3.7756854215557214e-06, |
|
"loss": 2.379, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.1522505609231192, |
|
"grad_norm": 0.2638431787490845, |
|
"learning_rate": 3.8043042756568034e-06, |
|
"loss": 2.3826, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.15339530198269152, |
|
"grad_norm": 0.26029646396636963, |
|
"learning_rate": 3.832923129757885e-06, |
|
"loss": 2.3876, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.15454004304226385, |
|
"grad_norm": 0.297446072101593, |
|
"learning_rate": 3.8615419838589665e-06, |
|
"loss": 2.3791, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.15568478410183617, |
|
"grad_norm": 0.34628915786743164, |
|
"learning_rate": 3.890160837960049e-06, |
|
"loss": 2.4032, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.1568295251614085, |
|
"grad_norm": 0.2669197916984558, |
|
"learning_rate": 3.9187796920611305e-06, |
|
"loss": 2.4, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.15797426622098082, |
|
"grad_norm": 0.3261224627494812, |
|
"learning_rate": 3.947398546162212e-06, |
|
"loss": 2.3816, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.15911900728055314, |
|
"grad_norm": 0.3039107024669647, |
|
"learning_rate": 3.9760174002632936e-06, |
|
"loss": 2.3783, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.16026374834012547, |
|
"grad_norm": 0.26345106959342957, |
|
"learning_rate": 4.004636254364376e-06, |
|
"loss": 2.382, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.1614084893996978, |
|
"grad_norm": 0.3465179204940796, |
|
"learning_rate": 4.0332551084654575e-06, |
|
"loss": 2.3873, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.16255323045927011, |
|
"grad_norm": 0.2453349232673645, |
|
"learning_rate": 4.061873962566539e-06, |
|
"loss": 2.3881, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.16369797151884244, |
|
"grad_norm": 0.24121074378490448, |
|
"learning_rate": 4.0904928166676215e-06, |
|
"loss": 2.3895, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.16484271257841476, |
|
"grad_norm": 0.3282526135444641, |
|
"learning_rate": 4.119111670768703e-06, |
|
"loss": 2.391, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.1659874536379871, |
|
"grad_norm": 0.32719773054122925, |
|
"learning_rate": 4.1477305248697846e-06, |
|
"loss": 2.3729, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.1671321946975594, |
|
"grad_norm": 0.2743726074695587, |
|
"learning_rate": 4.176349378970866e-06, |
|
"loss": 2.3967, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.16827693575713173, |
|
"grad_norm": 0.2472705990076065, |
|
"learning_rate": 4.2049682330719485e-06, |
|
"loss": 2.3871, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.16942167681670406, |
|
"grad_norm": 0.23346185684204102, |
|
"learning_rate": 4.23358708717303e-06, |
|
"loss": 2.3762, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.17056641787627638, |
|
"grad_norm": 0.261417031288147, |
|
"learning_rate": 4.262205941274112e-06, |
|
"loss": 2.3947, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.1717111589358487, |
|
"grad_norm": 0.3324854373931885, |
|
"learning_rate": 4.290824795375194e-06, |
|
"loss": 2.381, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.17285589999542103, |
|
"grad_norm": 0.3383265733718872, |
|
"learning_rate": 4.3194436494762755e-06, |
|
"loss": 2.3716, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.17400064105499335, |
|
"grad_norm": 0.34629401564598083, |
|
"learning_rate": 4.348062503577357e-06, |
|
"loss": 2.369, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.17514538211456568, |
|
"grad_norm": 0.24686865508556366, |
|
"learning_rate": 4.376681357678439e-06, |
|
"loss": 2.3883, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.176290123174138, |
|
"grad_norm": 0.38007158041000366, |
|
"learning_rate": 4.405300211779521e-06, |
|
"loss": 2.3869, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.17743486423371033, |
|
"grad_norm": 0.313494473695755, |
|
"learning_rate": 4.433919065880603e-06, |
|
"loss": 2.3859, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.17857960529328265, |
|
"grad_norm": 0.2624611556529999, |
|
"learning_rate": 4.462537919981684e-06, |
|
"loss": 2.3865, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.17972434635285497, |
|
"grad_norm": 0.2802521884441376, |
|
"learning_rate": 4.491156774082766e-06, |
|
"loss": 2.3939, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.1808690874124273, |
|
"grad_norm": 0.3011086881160736, |
|
"learning_rate": 4.519775628183848e-06, |
|
"loss": 2.385, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.18201382847199962, |
|
"grad_norm": 0.28954678773880005, |
|
"learning_rate": 4.54839448228493e-06, |
|
"loss": 2.3897, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.18315856953157195, |
|
"grad_norm": 0.2933329939842224, |
|
"learning_rate": 4.577013336386011e-06, |
|
"loss": 2.3761, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.18430331059114427, |
|
"grad_norm": 0.2496791034936905, |
|
"learning_rate": 4.605632190487094e-06, |
|
"loss": 2.3801, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.1854480516507166, |
|
"grad_norm": 0.28926581144332886, |
|
"learning_rate": 4.634251044588175e-06, |
|
"loss": 2.3754, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.18659279271028895, |
|
"grad_norm": 0.3181098997592926, |
|
"learning_rate": 4.662869898689257e-06, |
|
"loss": 2.3625, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.18773753376986127, |
|
"grad_norm": 0.3494364321231842, |
|
"learning_rate": 4.691488752790338e-06, |
|
"loss": 2.3688, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.1888822748294336, |
|
"grad_norm": 0.4044504463672638, |
|
"learning_rate": 4.720107606891421e-06, |
|
"loss": 2.379, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.19002701588900592, |
|
"grad_norm": 0.34538954496383667, |
|
"learning_rate": 4.748726460992502e-06, |
|
"loss": 2.3746, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.19117175694857824, |
|
"grad_norm": 0.2930959165096283, |
|
"learning_rate": 4.777345315093584e-06, |
|
"loss": 2.3628, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.19231649800815057, |
|
"grad_norm": 0.28704413771629333, |
|
"learning_rate": 4.805964169194665e-06, |
|
"loss": 2.3803, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.1934612390677229, |
|
"grad_norm": 0.34928804636001587, |
|
"learning_rate": 4.834583023295748e-06, |
|
"loss": 2.3745, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.19460598012729521, |
|
"grad_norm": 0.4369732439517975, |
|
"learning_rate": 4.863201877396829e-06, |
|
"loss": 2.3732, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.19575072118686754, |
|
"grad_norm": 0.31494560837745667, |
|
"learning_rate": 4.891820731497911e-06, |
|
"loss": 2.3747, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.19689546224643986, |
|
"grad_norm": 0.6197758913040161, |
|
"learning_rate": 4.920439585598993e-06, |
|
"loss": 2.3824, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.19804020330601219, |
|
"grad_norm": 0.4105755388736725, |
|
"learning_rate": 4.949058439700075e-06, |
|
"loss": 2.3705, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.1991849443655845, |
|
"grad_norm": 0.49081698060035706, |
|
"learning_rate": 4.977677293801156e-06, |
|
"loss": 2.3591, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.20032968542515683, |
|
"grad_norm": 0.2819342017173767, |
|
"learning_rate": 5.006296147902239e-06, |
|
"loss": 2.3844, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.20147442648472916, |
|
"grad_norm": 0.30305904150009155, |
|
"learning_rate": 5.03491500200332e-06, |
|
"loss": 2.3749, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.20261916754430148, |
|
"grad_norm": 0.36479583382606506, |
|
"learning_rate": 5.063533856104403e-06, |
|
"loss": 2.3765, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.2037639086038738, |
|
"grad_norm": 0.3018398880958557, |
|
"learning_rate": 5.092152710205483e-06, |
|
"loss": 2.3673, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.20490864966344613, |
|
"grad_norm": 0.32054489850997925, |
|
"learning_rate": 5.120771564306566e-06, |
|
"loss": 2.3658, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.20605339072301845, |
|
"grad_norm": 0.3198222219944, |
|
"learning_rate": 5.149390418407647e-06, |
|
"loss": 2.3693, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.20719813178259078, |
|
"grad_norm": 0.405718594789505, |
|
"learning_rate": 5.17800927250873e-06, |
|
"loss": 2.3586, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.2083428728421631, |
|
"grad_norm": 0.2990506887435913, |
|
"learning_rate": 5.20662812660981e-06, |
|
"loss": 2.3493, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.20948761390173543, |
|
"grad_norm": 0.3617069721221924, |
|
"learning_rate": 5.235246980710893e-06, |
|
"loss": 2.3556, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.21063235496130775, |
|
"grad_norm": 0.3361080586910248, |
|
"learning_rate": 5.263865834811974e-06, |
|
"loss": 2.3538, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.21177709602088007, |
|
"grad_norm": 0.37158912420272827, |
|
"learning_rate": 5.292484688913057e-06, |
|
"loss": 2.3538, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.2129218370804524, |
|
"grad_norm": 0.3937898576259613, |
|
"learning_rate": 5.321103543014137e-06, |
|
"loss": 2.368, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.21406657814002472, |
|
"grad_norm": 0.42322373390197754, |
|
"learning_rate": 5.34972239711522e-06, |
|
"loss": 2.3646, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.21521131919959705, |
|
"grad_norm": 0.411285400390625, |
|
"learning_rate": 5.378341251216301e-06, |
|
"loss": 2.3604, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.21635606025916937, |
|
"grad_norm": 0.30224665999412537, |
|
"learning_rate": 5.406960105317384e-06, |
|
"loss": 2.3579, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.2175008013187417, |
|
"grad_norm": 0.339236319065094, |
|
"learning_rate": 5.435578959418465e-06, |
|
"loss": 2.3741, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.21864554237831402, |
|
"grad_norm": 0.37957480549812317, |
|
"learning_rate": 5.464197813519547e-06, |
|
"loss": 2.3549, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.21979028343788634, |
|
"grad_norm": 0.2912836968898773, |
|
"learning_rate": 5.492816667620628e-06, |
|
"loss": 2.352, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.22093502449745867, |
|
"grad_norm": 0.2871094346046448, |
|
"learning_rate": 5.521435521721711e-06, |
|
"loss": 2.3602, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.222079765557031, |
|
"grad_norm": 0.33945441246032715, |
|
"learning_rate": 5.550054375822792e-06, |
|
"loss": 2.3715, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.2232245066166033, |
|
"grad_norm": 0.36321476101875305, |
|
"learning_rate": 5.578673229923875e-06, |
|
"loss": 2.3601, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.22436924767617564, |
|
"grad_norm": 0.27080637216567993, |
|
"learning_rate": 5.6072920840249555e-06, |
|
"loss": 2.3533, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.22551398873574796, |
|
"grad_norm": 0.2903904318809509, |
|
"learning_rate": 5.635910938126038e-06, |
|
"loss": 2.3658, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.22665872979532029, |
|
"grad_norm": 0.3127708435058594, |
|
"learning_rate": 5.664529792227119e-06, |
|
"loss": 2.3484, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.22780347085489264, |
|
"grad_norm": 0.37779003381729126, |
|
"learning_rate": 5.693148646328202e-06, |
|
"loss": 2.3635, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.22894821191446496, |
|
"grad_norm": 0.28905001282691956, |
|
"learning_rate": 5.7217675004292825e-06, |
|
"loss": 2.3645, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.23009295297403728, |
|
"grad_norm": 0.3213961720466614, |
|
"learning_rate": 5.750386354530365e-06, |
|
"loss": 2.3584, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.2312376940336096, |
|
"grad_norm": 0.28322041034698486, |
|
"learning_rate": 5.7790052086314464e-06, |
|
"loss": 2.3616, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.23238243509318193, |
|
"grad_norm": 0.3427826762199402, |
|
"learning_rate": 5.807624062732529e-06, |
|
"loss": 2.366, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.23352717615275426, |
|
"grad_norm": 0.35720208287239075, |
|
"learning_rate": 5.836242916833611e-06, |
|
"loss": 2.3604, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.23467191721232658, |
|
"grad_norm": 0.3247853219509125, |
|
"learning_rate": 5.864861770934692e-06, |
|
"loss": 2.3528, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.2358166582718989, |
|
"grad_norm": 0.31638818979263306, |
|
"learning_rate": 5.893480625035774e-06, |
|
"loss": 2.3659, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.23696139933147123, |
|
"grad_norm": 0.3150993287563324, |
|
"learning_rate": 5.922099479136856e-06, |
|
"loss": 2.3542, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.23810614039104355, |
|
"grad_norm": 0.28134357929229736, |
|
"learning_rate": 5.950718333237938e-06, |
|
"loss": 2.3645, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.23925088145061588, |
|
"grad_norm": 0.344279408454895, |
|
"learning_rate": 5.979337187339019e-06, |
|
"loss": 2.3553, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.2403956225101882, |
|
"grad_norm": 0.3156017065048218, |
|
"learning_rate": 6.007956041440101e-06, |
|
"loss": 2.3399, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.24154036356976052, |
|
"grad_norm": 0.31625500321388245, |
|
"learning_rate": 6.036574895541183e-06, |
|
"loss": 2.3458, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.24268510462933285, |
|
"grad_norm": 0.3604189455509186, |
|
"learning_rate": 6.065193749642265e-06, |
|
"loss": 2.3426, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.24382984568890517, |
|
"grad_norm": 0.3052213490009308, |
|
"learning_rate": 6.093526415202336e-06, |
|
"loss": 2.3498, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.2449745867484775, |
|
"grad_norm": 0.40419507026672363, |
|
"learning_rate": 6.122145269303418e-06, |
|
"loss": 2.3524, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.24611932780804982, |
|
"grad_norm": 0.3688088357448578, |
|
"learning_rate": 6.150764123404499e-06, |
|
"loss": 2.3434, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.24726406886762214, |
|
"grad_norm": 0.3363403379917145, |
|
"learning_rate": 6.179382977505581e-06, |
|
"loss": 2.3524, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.24840880992719447, |
|
"grad_norm": 0.516409695148468, |
|
"learning_rate": 6.208001831606663e-06, |
|
"loss": 2.3424, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.2495535509867668, |
|
"grad_norm": 0.3187144100666046, |
|
"learning_rate": 6.236620685707745e-06, |
|
"loss": 2.3427, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.25069829204633914, |
|
"grad_norm": 0.3366081416606903, |
|
"learning_rate": 6.265239539808826e-06, |
|
"loss": 2.3461, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.25184303310591144, |
|
"grad_norm": 0.3357242941856384, |
|
"learning_rate": 6.293858393909908e-06, |
|
"loss": 2.3423, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.2529877741654838, |
|
"grad_norm": 0.3461867570877075, |
|
"learning_rate": 6.32247724801099e-06, |
|
"loss": 2.3448, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.2541325152250561, |
|
"grad_norm": 0.3421408236026764, |
|
"learning_rate": 6.351096102112072e-06, |
|
"loss": 2.3513, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.25527725628462844, |
|
"grad_norm": 0.3435458242893219, |
|
"learning_rate": 6.379714956213153e-06, |
|
"loss": 2.359, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.25642199734420074, |
|
"grad_norm": 0.31256601214408875, |
|
"learning_rate": 6.408333810314235e-06, |
|
"loss": 2.3417, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.2575667384037731, |
|
"grad_norm": 0.4700869023799896, |
|
"learning_rate": 6.436952664415317e-06, |
|
"loss": 2.3609, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.2587114794633454, |
|
"grad_norm": 0.3140374422073364, |
|
"learning_rate": 6.465571518516399e-06, |
|
"loss": 2.3749, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.25985622052291774, |
|
"grad_norm": 0.35399436950683594, |
|
"learning_rate": 6.49419037261748e-06, |
|
"loss": 2.35, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.26100096158249003, |
|
"grad_norm": 0.3978697657585144, |
|
"learning_rate": 6.5228092267185625e-06, |
|
"loss": 2.3486, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.2621457026420624, |
|
"grad_norm": 0.4412658214569092, |
|
"learning_rate": 6.551428080819644e-06, |
|
"loss": 2.3555, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.2632904437016347, |
|
"grad_norm": 0.3189774453639984, |
|
"learning_rate": 6.580046934920726e-06, |
|
"loss": 2.3482, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.26443518476120703, |
|
"grad_norm": 0.40347644686698914, |
|
"learning_rate": 6.608665789021808e-06, |
|
"loss": 2.3528, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.26557992582077933, |
|
"grad_norm": 0.3658533990383148, |
|
"learning_rate": 6.6372846431228895e-06, |
|
"loss": 2.3328, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.2667246668803517, |
|
"grad_norm": 0.31631380319595337, |
|
"learning_rate": 6.665903497223972e-06, |
|
"loss": 2.3554, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.267869407939924, |
|
"grad_norm": 0.3406871259212494, |
|
"learning_rate": 6.6945223513250534e-06, |
|
"loss": 2.358, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.2690141489994963, |
|
"grad_norm": 0.34881141781806946, |
|
"learning_rate": 6.723141205426136e-06, |
|
"loss": 2.343, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.2701588900590686, |
|
"grad_norm": 0.5699728727340698, |
|
"learning_rate": 6.751760059527217e-06, |
|
"loss": 2.353, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.271303631118641, |
|
"grad_norm": 0.36337733268737793, |
|
"learning_rate": 6.780378913628299e-06, |
|
"loss": 2.3463, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.27244837217821327, |
|
"grad_norm": 0.34283825755119324, |
|
"learning_rate": 6.8089977677293805e-06, |
|
"loss": 2.3343, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.2735931132377856, |
|
"grad_norm": 0.3663531243801117, |
|
"learning_rate": 6.837616621830463e-06, |
|
"loss": 2.3477, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.2747378542973579, |
|
"grad_norm": 0.4608674943447113, |
|
"learning_rate": 6.8662354759315444e-06, |
|
"loss": 2.3396, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.27588259535693027, |
|
"grad_norm": 0.32811376452445984, |
|
"learning_rate": 6.894854330032627e-06, |
|
"loss": 2.3409, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.27702733641650257, |
|
"grad_norm": 0.3832899034023285, |
|
"learning_rate": 6.9231869955926965e-06, |
|
"loss": 2.3573, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.2781720774760749, |
|
"grad_norm": 0.3628441095352173, |
|
"learning_rate": 6.951805849693779e-06, |
|
"loss": 2.3515, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.2793168185356472, |
|
"grad_norm": 0.35530924797058105, |
|
"learning_rate": 6.98042470379486e-06, |
|
"loss": 2.3608, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.28046155959521957, |
|
"grad_norm": 0.33606886863708496, |
|
"learning_rate": 7.009043557895943e-06, |
|
"loss": 2.3445, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.28160630065479186, |
|
"grad_norm": 0.3475317358970642, |
|
"learning_rate": 7.0376624119970235e-06, |
|
"loss": 2.3527, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.2827510417143642, |
|
"grad_norm": 0.3616209030151367, |
|
"learning_rate": 7.066281266098106e-06, |
|
"loss": 2.3456, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.2838957827739365, |
|
"grad_norm": 0.4273635745048523, |
|
"learning_rate": 7.0949001201991875e-06, |
|
"loss": 2.3356, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.28504052383350886, |
|
"grad_norm": 0.32688596844673157, |
|
"learning_rate": 7.12351897430027e-06, |
|
"loss": 2.3435, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.28618526489308116, |
|
"grad_norm": 0.3584359288215637, |
|
"learning_rate": 7.1521378284013506e-06, |
|
"loss": 2.3431, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.2873300059526535, |
|
"grad_norm": 0.35065096616744995, |
|
"learning_rate": 7.180756682502433e-06, |
|
"loss": 2.3316, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.2884747470122258, |
|
"grad_norm": 0.3658662736415863, |
|
"learning_rate": 7.2093755366035145e-06, |
|
"loss": 2.342, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.28961948807179816, |
|
"grad_norm": 0.3615448772907257, |
|
"learning_rate": 7.237994390704597e-06, |
|
"loss": 2.3413, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.2907642291313705, |
|
"grad_norm": 0.37493348121643066, |
|
"learning_rate": 7.2666132448056785e-06, |
|
"loss": 2.3261, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.2919089701909428, |
|
"grad_norm": 0.43712013959884644, |
|
"learning_rate": 7.29523209890676e-06, |
|
"loss": 2.3367, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.29305371125051516, |
|
"grad_norm": 0.37490740418434143, |
|
"learning_rate": 7.3238509530078416e-06, |
|
"loss": 2.323, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.29419845231008745, |
|
"grad_norm": 0.33274149894714355, |
|
"learning_rate": 7.352469807108924e-06, |
|
"loss": 2.3333, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.2953431933696598, |
|
"grad_norm": 0.41356754302978516, |
|
"learning_rate": 7.3810886612100055e-06, |
|
"loss": 2.3253, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.2964879344292321, |
|
"grad_norm": 0.3717619478702545, |
|
"learning_rate": 7.409707515311088e-06, |
|
"loss": 2.3483, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.29763267548880445, |
|
"grad_norm": 0.3571007251739502, |
|
"learning_rate": 7.438326369412169e-06, |
|
"loss": 2.3429, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.29877741654837675, |
|
"grad_norm": 0.33724385499954224, |
|
"learning_rate": 7.466945223513251e-06, |
|
"loss": 2.3387, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.2999221576079491, |
|
"grad_norm": 0.4527655243873596, |
|
"learning_rate": 7.495564077614333e-06, |
|
"loss": 2.3387, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.3010668986675214, |
|
"grad_norm": 0.5135347247123718, |
|
"learning_rate": 7.524182931715415e-06, |
|
"loss": 2.3476, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.30221163972709375, |
|
"grad_norm": 0.36313098669052124, |
|
"learning_rate": 7.552801785816497e-06, |
|
"loss": 2.3419, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.30335638078666605, |
|
"grad_norm": 0.34982365369796753, |
|
"learning_rate": 7.581420639917578e-06, |
|
"loss": 2.3333, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.3045011218462384, |
|
"grad_norm": 0.5387922525405884, |
|
"learning_rate": 7.6100394940186604e-06, |
|
"loss": 2.3267, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.3056458629058107, |
|
"grad_norm": 0.3917737305164337, |
|
"learning_rate": 7.638658348119742e-06, |
|
"loss": 2.3441, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.30679060396538305, |
|
"grad_norm": 0.362425297498703, |
|
"learning_rate": 7.667277202220824e-06, |
|
"loss": 2.3279, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.30793534502495534, |
|
"grad_norm": 0.3922022581100464, |
|
"learning_rate": 7.695896056321905e-06, |
|
"loss": 2.328, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.3090800860845277, |
|
"grad_norm": 0.3867265582084656, |
|
"learning_rate": 7.724514910422987e-06, |
|
"loss": 2.3343, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.3102248271441, |
|
"grad_norm": 0.3767886757850647, |
|
"learning_rate": 7.753133764524068e-06, |
|
"loss": 2.3209, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.31136956820367234, |
|
"grad_norm": 0.3708842694759369, |
|
"learning_rate": 7.78175261862515e-06, |
|
"loss": 2.3406, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.31251430926324464, |
|
"grad_norm": 0.387917697429657, |
|
"learning_rate": 7.810085284185221e-06, |
|
"loss": 2.3314, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.313659050322817, |
|
"grad_norm": 0.38667747378349304, |
|
"learning_rate": 7.838704138286303e-06, |
|
"loss": 2.3534, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.3148037913823893, |
|
"grad_norm": 0.3695323169231415, |
|
"learning_rate": 7.867322992387386e-06, |
|
"loss": 2.3217, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.31594853244196164, |
|
"grad_norm": 0.4034270644187927, |
|
"learning_rate": 7.895941846488467e-06, |
|
"loss": 2.3202, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.31709327350153393, |
|
"grad_norm": 0.3806785047054291, |
|
"learning_rate": 7.924560700589549e-06, |
|
"loss": 2.3181, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.3182380145611063, |
|
"grad_norm": 0.4768499433994293, |
|
"learning_rate": 7.953179554690631e-06, |
|
"loss": 2.3294, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.3193827556206786, |
|
"grad_norm": 0.35020050406455994, |
|
"learning_rate": 7.981798408791712e-06, |
|
"loss": 2.348, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.32052749668025093, |
|
"grad_norm": 0.37575003504753113, |
|
"learning_rate": 8.010417262892794e-06, |
|
"loss": 2.3327, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.32167223773982323, |
|
"grad_norm": 0.38812920451164246, |
|
"learning_rate": 8.039036116993875e-06, |
|
"loss": 2.3488, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.3228169787993956, |
|
"grad_norm": 0.4457553029060364, |
|
"learning_rate": 8.067654971094958e-06, |
|
"loss": 2.3239, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.3239617198589679, |
|
"grad_norm": 0.4105226695537567, |
|
"learning_rate": 8.09627382519604e-06, |
|
"loss": 2.337, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.32510646091854023, |
|
"grad_norm": 0.3988581597805023, |
|
"learning_rate": 8.124892679297122e-06, |
|
"loss": 2.3405, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.3262512019781125, |
|
"grad_norm": 0.5257512927055359, |
|
"learning_rate": 8.153511533398203e-06, |
|
"loss": 2.3093, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.3273959430376849, |
|
"grad_norm": 0.4554663896560669, |
|
"learning_rate": 8.182130387499285e-06, |
|
"loss": 2.3376, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.3285406840972572, |
|
"grad_norm": 0.5744247436523438, |
|
"learning_rate": 8.210749241600366e-06, |
|
"loss": 2.3282, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.3296854251568295, |
|
"grad_norm": 0.4724181592464447, |
|
"learning_rate": 8.239368095701449e-06, |
|
"loss": 2.3158, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.3308301662164019, |
|
"grad_norm": 0.37950775027275085, |
|
"learning_rate": 8.267986949802531e-06, |
|
"loss": 2.33, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.3319749072759742, |
|
"grad_norm": 0.3992998003959656, |
|
"learning_rate": 8.296605803903612e-06, |
|
"loss": 2.3445, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.3331196483355465, |
|
"grad_norm": 0.3856017291545868, |
|
"learning_rate": 8.325224658004694e-06, |
|
"loss": 2.3205, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.3342643893951188, |
|
"grad_norm": 0.4017845690250397, |
|
"learning_rate": 8.353843512105776e-06, |
|
"loss": 2.3236, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.3354091304546912, |
|
"grad_norm": 0.557650625705719, |
|
"learning_rate": 8.382462366206859e-06, |
|
"loss": 2.3188, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.33655387151426347, |
|
"grad_norm": 0.5162211060523987, |
|
"learning_rate": 8.410795031766928e-06, |
|
"loss": 2.317, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.3376986125738358, |
|
"grad_norm": 0.4619830846786499, |
|
"learning_rate": 8.43941388586801e-06, |
|
"loss": 2.325, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.3388433536334081, |
|
"grad_norm": 0.36212870478630066, |
|
"learning_rate": 8.468032739969092e-06, |
|
"loss": 2.3266, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.33998809469298047, |
|
"grad_norm": 0.3987511098384857, |
|
"learning_rate": 8.496651594070175e-06, |
|
"loss": 2.3352, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.34113283575255277, |
|
"grad_norm": 0.49319741129875183, |
|
"learning_rate": 8.525270448171256e-06, |
|
"loss": 2.331, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.3422775768121251, |
|
"grad_norm": 0.3750767111778259, |
|
"learning_rate": 8.553889302272338e-06, |
|
"loss": 2.3197, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.3434223178716974, |
|
"grad_norm": 0.42684081196784973, |
|
"learning_rate": 8.582508156373419e-06, |
|
"loss": 2.3196, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.34456705893126977, |
|
"grad_norm": 0.4298862814903259, |
|
"learning_rate": 8.611127010474501e-06, |
|
"loss": 2.309, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.34571179999084206, |
|
"grad_norm": 0.4002019762992859, |
|
"learning_rate": 8.639745864575583e-06, |
|
"loss": 2.312, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.3468565410504144, |
|
"grad_norm": 0.3674704134464264, |
|
"learning_rate": 8.668364718676666e-06, |
|
"loss": 2.3147, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.3480012821099867, |
|
"grad_norm": 0.5854533314704895, |
|
"learning_rate": 8.696983572777747e-06, |
|
"loss": 2.3236, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.34914602316955906, |
|
"grad_norm": 0.4132590889930725, |
|
"learning_rate": 8.725602426878829e-06, |
|
"loss": 2.3359, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.35029076422913136, |
|
"grad_norm": 0.39070025086402893, |
|
"learning_rate": 8.75422128097991e-06, |
|
"loss": 2.3223, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.3514355052887037, |
|
"grad_norm": 0.40703412890434265, |
|
"learning_rate": 8.782840135080992e-06, |
|
"loss": 2.3243, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.352580246348276, |
|
"grad_norm": 0.3701010048389435, |
|
"learning_rate": 8.811458989182073e-06, |
|
"loss": 2.3297, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.35372498740784836, |
|
"grad_norm": 0.5442121028900146, |
|
"learning_rate": 8.840077843283155e-06, |
|
"loss": 2.3256, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.35486972846742065, |
|
"grad_norm": 0.4204414486885071, |
|
"learning_rate": 8.868696697384238e-06, |
|
"loss": 2.3178, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.356014469526993, |
|
"grad_norm": 0.4551771283149719, |
|
"learning_rate": 8.89731555148532e-06, |
|
"loss": 2.3299, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.3571592105865653, |
|
"grad_norm": 0.4303077757358551, |
|
"learning_rate": 8.9259344055864e-06, |
|
"loss": 2.3251, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.35830395164613765, |
|
"grad_norm": 0.41671931743621826, |
|
"learning_rate": 8.954553259687483e-06, |
|
"loss": 2.3279, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.35944869270570995, |
|
"grad_norm": 0.4018367528915405, |
|
"learning_rate": 8.983172113788564e-06, |
|
"loss": 2.3069, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.3605934337652823, |
|
"grad_norm": 0.5130965113639832, |
|
"learning_rate": 9.011790967889646e-06, |
|
"loss": 2.3315, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.3617381748248546, |
|
"grad_norm": 0.4209829270839691, |
|
"learning_rate": 9.040409821990729e-06, |
|
"loss": 2.324, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.36288291588442695, |
|
"grad_norm": 0.41159483790397644, |
|
"learning_rate": 9.06902867609181e-06, |
|
"loss": 2.321, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.36402765694399924, |
|
"grad_norm": 0.46957677602767944, |
|
"learning_rate": 9.097647530192892e-06, |
|
"loss": 2.3249, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.3651723980035716, |
|
"grad_norm": 0.39695438742637634, |
|
"learning_rate": 9.126266384293974e-06, |
|
"loss": 2.3112, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.3663171390631439, |
|
"grad_norm": 0.3984281122684479, |
|
"learning_rate": 9.154885238395056e-06, |
|
"loss": 2.3269, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.36746188012271624, |
|
"grad_norm": 0.40746673941612244, |
|
"learning_rate": 9.183504092496137e-06, |
|
"loss": 2.333, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.36860662118228854, |
|
"grad_norm": 0.39766183495521545, |
|
"learning_rate": 9.21212294659722e-06, |
|
"loss": 2.3354, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.3697513622418609, |
|
"grad_norm": 0.42040807008743286, |
|
"learning_rate": 9.2407418006983e-06, |
|
"loss": 2.3165, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.3708961033014332, |
|
"grad_norm": 0.43911924958229065, |
|
"learning_rate": 9.269360654799383e-06, |
|
"loss": 2.3386, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.37204084436100554, |
|
"grad_norm": 0.46791312098503113, |
|
"learning_rate": 9.297979508900465e-06, |
|
"loss": 2.3116, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.3731855854205779, |
|
"grad_norm": 0.41716283559799194, |
|
"learning_rate": 9.326598363001547e-06, |
|
"loss": 2.3298, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.3743303264801502, |
|
"grad_norm": 0.38628315925598145, |
|
"learning_rate": 9.355217217102628e-06, |
|
"loss": 2.3093, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.37547506753972254, |
|
"grad_norm": 0.4715399146080017, |
|
"learning_rate": 9.38383607120371e-06, |
|
"loss": 2.3353, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.37661980859929484, |
|
"grad_norm": 0.4824056923389435, |
|
"learning_rate": 9.412454925304791e-06, |
|
"loss": 2.3165, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.3777645496588672, |
|
"grad_norm": 0.3745131194591522, |
|
"learning_rate": 9.441073779405874e-06, |
|
"loss": 2.31, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.3789092907184395, |
|
"grad_norm": 0.4431547522544861, |
|
"learning_rate": 9.469406444965944e-06, |
|
"loss": 2.3158, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.38005403177801184, |
|
"grad_norm": 0.43701136112213135, |
|
"learning_rate": 9.498025299067026e-06, |
|
"loss": 2.3184, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.38119877283758413, |
|
"grad_norm": 0.40076711773872375, |
|
"learning_rate": 9.526644153168107e-06, |
|
"loss": 2.3247, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.3823435138971565, |
|
"grad_norm": Infinity, |
|
"learning_rate": 9.55497681872818e-06, |
|
"loss": 2.3038, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.3834882549567288, |
|
"grad_norm": 0.4818211793899536, |
|
"learning_rate": 9.583595672829262e-06, |
|
"loss": 2.3125, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.38463299601630113, |
|
"grad_norm": 0.44447603821754456, |
|
"learning_rate": 9.612214526930342e-06, |
|
"loss": 2.3056, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.38577773707587343, |
|
"grad_norm": 0.42127880454063416, |
|
"learning_rate": 9.640833381031425e-06, |
|
"loss": 2.318, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.3869224781354458, |
|
"grad_norm": 0.42268356680870056, |
|
"learning_rate": 9.669452235132506e-06, |
|
"loss": 2.3247, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.3880672191950181, |
|
"grad_norm": 0.42822974920272827, |
|
"learning_rate": 9.698071089233588e-06, |
|
"loss": 2.3164, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.38921196025459043, |
|
"grad_norm": 0.4107573926448822, |
|
"learning_rate": 9.72668994333467e-06, |
|
"loss": 2.3187, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.3903567013141627, |
|
"grad_norm": 0.40011221170425415, |
|
"learning_rate": 9.755308797435751e-06, |
|
"loss": 2.3197, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.3915014423737351, |
|
"grad_norm": 0.4174409508705139, |
|
"learning_rate": 9.783927651536833e-06, |
|
"loss": 2.3082, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.39264618343330737, |
|
"grad_norm": 0.4117099344730377, |
|
"learning_rate": 9.812546505637916e-06, |
|
"loss": 2.3197, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.3937909244928797, |
|
"grad_norm": 0.4379769563674927, |
|
"learning_rate": 9.841165359738997e-06, |
|
"loss": 2.3151, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.394935665552452, |
|
"grad_norm": 0.5240621566772461, |
|
"learning_rate": 9.869784213840079e-06, |
|
"loss": 2.309, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.39608040661202437, |
|
"grad_norm": 0.38917648792266846, |
|
"learning_rate": 9.89840306794116e-06, |
|
"loss": 2.3122, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.39722514767159667, |
|
"grad_norm": 0.47040241956710815, |
|
"learning_rate": 9.927021922042242e-06, |
|
"loss": 2.3053, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.398369888731169, |
|
"grad_norm": 0.42958155274391174, |
|
"learning_rate": 9.955640776143324e-06, |
|
"loss": 2.3038, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.3995146297907413, |
|
"grad_norm": 0.4274247884750366, |
|
"learning_rate": 9.984259630244407e-06, |
|
"loss": 2.3104, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.40065937085031367, |
|
"grad_norm": 0.4401596188545227, |
|
"learning_rate": 1.0012878484345488e-05, |
|
"loss": 2.2919, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.40180411190988596, |
|
"grad_norm": 0.4685971438884735, |
|
"learning_rate": 1.0041497338446568e-05, |
|
"loss": 2.323, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.4029488529694583, |
|
"grad_norm": 0.4304906725883484, |
|
"learning_rate": 1.0070116192547652e-05, |
|
"loss": 2.3182, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.4040935940290306, |
|
"grad_norm": 0.44299623370170593, |
|
"learning_rate": 1.0098735046648733e-05, |
|
"loss": 2.3041, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.40523833508860296, |
|
"grad_norm": 0.42946410179138184, |
|
"learning_rate": 1.0127353900749814e-05, |
|
"loss": 2.3094, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.40638307614817526, |
|
"grad_norm": 0.4753871262073517, |
|
"learning_rate": 1.0155972754850896e-05, |
|
"loss": 2.3141, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.4075278172077476, |
|
"grad_norm": 0.4177212417125702, |
|
"learning_rate": 1.0184591608951979e-05, |
|
"loss": 2.3255, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.4086725582673199, |
|
"grad_norm": 0.4225813150405884, |
|
"learning_rate": 1.0213210463053061e-05, |
|
"loss": 2.3238, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.40981729932689226, |
|
"grad_norm": 0.39542898535728455, |
|
"learning_rate": 1.0241829317154142e-05, |
|
"loss": 2.3013, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.41096204038646456, |
|
"grad_norm": 0.44745051860809326, |
|
"learning_rate": 1.0270448171255222e-05, |
|
"loss": 2.3142, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.4121067814460369, |
|
"grad_norm": 0.46079352498054504, |
|
"learning_rate": 1.0299067025356306e-05, |
|
"loss": 2.3153, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.41325152250560926, |
|
"grad_norm": 0.4723173975944519, |
|
"learning_rate": 1.0327685879457387e-05, |
|
"loss": 2.3053, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.41439626356518156, |
|
"grad_norm": 0.4553997218608856, |
|
"learning_rate": 1.035630473355847e-05, |
|
"loss": 2.3143, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.4155410046247539, |
|
"grad_norm": 0.43542203307151794, |
|
"learning_rate": 1.038492358765955e-05, |
|
"loss": 2.32, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.4166857456843262, |
|
"grad_norm": 0.5056464076042175, |
|
"learning_rate": 1.0413542441760633e-05, |
|
"loss": 2.3055, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.41783048674389855, |
|
"grad_norm": 0.424907922744751, |
|
"learning_rate": 1.0442161295861715e-05, |
|
"loss": 2.312, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.41897522780347085, |
|
"grad_norm": 0.4522388279438019, |
|
"learning_rate": 1.0470780149962796e-05, |
|
"loss": 2.304, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.4201199688630432, |
|
"grad_norm": 0.42723220586776733, |
|
"learning_rate": 1.0499399004063876e-05, |
|
"loss": 2.3128, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.4212647099226155, |
|
"grad_norm": 0.4533430337905884, |
|
"learning_rate": 1.052801785816496e-05, |
|
"loss": 2.3273, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.42240945098218785, |
|
"grad_norm": 0.4739341735839844, |
|
"learning_rate": 1.0556636712266041e-05, |
|
"loss": 2.3114, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.42355419204176015, |
|
"grad_norm": 0.43692803382873535, |
|
"learning_rate": 1.0585255566367124e-05, |
|
"loss": 2.3093, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.4246989331013325, |
|
"grad_norm": 0.437219500541687, |
|
"learning_rate": 1.0613874420468204e-05, |
|
"loss": 2.2975, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.4258436741609048, |
|
"grad_norm": 0.5384771227836609, |
|
"learning_rate": 1.0642493274569288e-05, |
|
"loss": 2.3272, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.42698841522047715, |
|
"grad_norm": 0.3731122314929962, |
|
"learning_rate": 1.0671112128670369e-05, |
|
"loss": 2.3239, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.42813315628004944, |
|
"grad_norm": 0.4134446978569031, |
|
"learning_rate": 1.069973098277145e-05, |
|
"loss": 2.2992, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.4292778973396218, |
|
"grad_norm": 0.3970800042152405, |
|
"learning_rate": 1.0728349836872532e-05, |
|
"loss": 2.3166, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.4304226383991941, |
|
"grad_norm": 0.4207904040813446, |
|
"learning_rate": 1.0756968690973615e-05, |
|
"loss": 2.2956, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.43156737945876644, |
|
"grad_norm": 0.43248116970062256, |
|
"learning_rate": 1.0785587545074695e-05, |
|
"loss": 2.2988, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.43271212051833874, |
|
"grad_norm": 0.5825778245925903, |
|
"learning_rate": 1.0814206399175778e-05, |
|
"loss": 2.3124, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.4338568615779111, |
|
"grad_norm": 0.4129347503185272, |
|
"learning_rate": 1.084282525327686e-05, |
|
"loss": 2.3045, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.4350016026374834, |
|
"grad_norm": 0.5687834024429321, |
|
"learning_rate": 1.0871444107377943e-05, |
|
"loss": 2.3107, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.43614634369705574, |
|
"grad_norm": 0.4236217737197876, |
|
"learning_rate": 1.0900062961479023e-05, |
|
"loss": 2.3019, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.43729108475662803, |
|
"grad_norm": 0.4377936124801636, |
|
"learning_rate": 1.0928681815580104e-05, |
|
"loss": 2.3093, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.4384358258162004, |
|
"grad_norm": 0.7092427611351013, |
|
"learning_rate": 1.0957300669681188e-05, |
|
"loss": 2.3122, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.4395805668757727, |
|
"grad_norm": 0.5125077366828918, |
|
"learning_rate": 1.0985919523782269e-05, |
|
"loss": 2.3101, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.44072530793534503, |
|
"grad_norm": 0.5460866093635559, |
|
"learning_rate": 1.1014252189342341e-05, |
|
"loss": 2.3059, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.44187004899491733, |
|
"grad_norm": 0.5048667788505554, |
|
"learning_rate": 1.1042871043443422e-05, |
|
"loss": 2.2934, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.4430147900544897, |
|
"grad_norm": 0.505024790763855, |
|
"learning_rate": 1.1071489897544502e-05, |
|
"loss": 2.3106, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.444159531114062, |
|
"grad_norm": 0.4877238869667053, |
|
"learning_rate": 1.1100108751645585e-05, |
|
"loss": 2.3108, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.44530427217363433, |
|
"grad_norm": 0.43280166387557983, |
|
"learning_rate": 1.1128727605746667e-05, |
|
"loss": 2.2922, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.4464490132332066, |
|
"grad_norm": 0.47969332337379456, |
|
"learning_rate": 1.115734645984775e-05, |
|
"loss": 2.3093, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.447593754292779, |
|
"grad_norm": 0.45905813574790955, |
|
"learning_rate": 1.118596531394883e-05, |
|
"loss": 2.302, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.4487384953523513, |
|
"grad_norm": 0.4151560962200165, |
|
"learning_rate": 1.1214584168049911e-05, |
|
"loss": 2.3278, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.4498832364119236, |
|
"grad_norm": 0.47377634048461914, |
|
"learning_rate": 1.1243203022150995e-05, |
|
"loss": 2.3144, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.4510279774714959, |
|
"grad_norm": 0.5002289414405823, |
|
"learning_rate": 1.1271821876252076e-05, |
|
"loss": 2.3065, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.4521727185310683, |
|
"grad_norm": 0.48427700996398926, |
|
"learning_rate": 1.1300440730353156e-05, |
|
"loss": 2.3175, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.45331745959064057, |
|
"grad_norm": 0.41482630372047424, |
|
"learning_rate": 1.1329059584454239e-05, |
|
"loss": 2.3115, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.4544622006502129, |
|
"grad_norm": 0.45701828598976135, |
|
"learning_rate": 1.1357678438555321e-05, |
|
"loss": 2.3166, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.4556069417097853, |
|
"grad_norm": 0.4916311502456665, |
|
"learning_rate": 1.1386297292656404e-05, |
|
"loss": 2.3017, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.45675168276935757, |
|
"grad_norm": 0.4516671299934387, |
|
"learning_rate": 1.1414916146757484e-05, |
|
"loss": 2.2997, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.4578964238289299, |
|
"grad_norm": 0.46147215366363525, |
|
"learning_rate": 1.1443535000858565e-05, |
|
"loss": 2.3052, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.4590411648885022, |
|
"grad_norm": 0.48540788888931274, |
|
"learning_rate": 1.1472153854959649e-05, |
|
"loss": 2.3032, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.46018590594807457, |
|
"grad_norm": 0.47240906953811646, |
|
"learning_rate": 1.150077270906073e-05, |
|
"loss": 2.2986, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.46133064700764687, |
|
"grad_norm": 0.5069533586502075, |
|
"learning_rate": 1.1529391563161812e-05, |
|
"loss": 2.3019, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.4624753880672192, |
|
"grad_norm": 0.5581790208816528, |
|
"learning_rate": 1.1558010417262893e-05, |
|
"loss": 2.297, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.4636201291267915, |
|
"grad_norm": 0.42001545429229736, |
|
"learning_rate": 1.1586629271363975e-05, |
|
"loss": 2.3023, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.46476487018636387, |
|
"grad_norm": 0.43199682235717773, |
|
"learning_rate": 1.1615248125465058e-05, |
|
"loss": 2.307, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.46590961124593616, |
|
"grad_norm": 0.39316660165786743, |
|
"learning_rate": 1.1643580791025128e-05, |
|
"loss": 2.3006, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.4670543523055085, |
|
"grad_norm": 0.4300936162471771, |
|
"learning_rate": 1.167219964512621e-05, |
|
"loss": 2.2991, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.4681990933650808, |
|
"grad_norm": 0.3927803933620453, |
|
"learning_rate": 1.1700818499227291e-05, |
|
"loss": 2.302, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.46934383442465316, |
|
"grad_norm": 0.4072405993938446, |
|
"learning_rate": 1.1729437353328375e-05, |
|
"loss": 2.3233, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.47048857548422546, |
|
"grad_norm": 0.5310955047607422, |
|
"learning_rate": 1.1758056207429456e-05, |
|
"loss": 2.2919, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.4716333165437978, |
|
"grad_norm": 0.48148131370544434, |
|
"learning_rate": 1.1786675061530537e-05, |
|
"loss": 2.2854, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.4727780576033701, |
|
"grad_norm": 0.4595455527305603, |
|
"learning_rate": 1.1815293915631617e-05, |
|
"loss": 2.2966, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.47392279866294246, |
|
"grad_norm": 0.41068974137306213, |
|
"learning_rate": 1.1843912769732702e-05, |
|
"loss": 2.3041, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.47506753972251475, |
|
"grad_norm": 0.48817092180252075, |
|
"learning_rate": 1.1872531623833782e-05, |
|
"loss": 2.3031, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.4762122807820871, |
|
"grad_norm": 0.4430725872516632, |
|
"learning_rate": 1.1901150477934865e-05, |
|
"loss": 2.306, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.4773570218416594, |
|
"grad_norm": 0.41381534934043884, |
|
"learning_rate": 1.1929769332035945e-05, |
|
"loss": 2.2938, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.47850176290123175, |
|
"grad_norm": 0.5231103897094727, |
|
"learning_rate": 1.195838818613703e-05, |
|
"loss": 2.287, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.47964650396080405, |
|
"grad_norm": 0.45451679825782776, |
|
"learning_rate": 1.198700704023811e-05, |
|
"loss": 2.3014, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.4807912450203764, |
|
"grad_norm": 0.4158308804035187, |
|
"learning_rate": 1.2015625894339191e-05, |
|
"loss": 2.2863, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.4819359860799487, |
|
"grad_norm": 0.4745527505874634, |
|
"learning_rate": 1.2044244748440273e-05, |
|
"loss": 2.3069, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.48308072713952105, |
|
"grad_norm": 0.4103641211986542, |
|
"learning_rate": 1.2072863602541356e-05, |
|
"loss": 2.3, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.48422546819909335, |
|
"grad_norm": 0.4857043921947479, |
|
"learning_rate": 1.2101482456642436e-05, |
|
"loss": 2.2829, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.4853702092586657, |
|
"grad_norm": 0.4224902093410492, |
|
"learning_rate": 1.2130101310743519e-05, |
|
"loss": 2.3012, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.486514950318238, |
|
"grad_norm": 0.4603799283504486, |
|
"learning_rate": 1.21587201648446e-05, |
|
"loss": 2.29, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.48765969137781034, |
|
"grad_norm": 0.43657830357551575, |
|
"learning_rate": 1.2187339018945684e-05, |
|
"loss": 2.3105, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.48880443243738264, |
|
"grad_norm": 0.411188006401062, |
|
"learning_rate": 1.2215957873046764e-05, |
|
"loss": 2.2911, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.489949173496955, |
|
"grad_norm": 0.4367277920246124, |
|
"learning_rate": 1.2244576727147845e-05, |
|
"loss": 2.3071, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.4910939145565273, |
|
"grad_norm": 0.4958134889602661, |
|
"learning_rate": 1.2272909392707917e-05, |
|
"loss": 2.3065, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.49223865561609964, |
|
"grad_norm": 0.4951634705066681, |
|
"learning_rate": 1.2301528246808998e-05, |
|
"loss": 2.2884, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.49338339667567194, |
|
"grad_norm": 0.44968706369400024, |
|
"learning_rate": 1.2330147100910082e-05, |
|
"loss": 2.2957, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.4945281377352443, |
|
"grad_norm": 0.45405635237693787, |
|
"learning_rate": 1.2358765955011163e-05, |
|
"loss": 2.2915, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.4956728787948166, |
|
"grad_norm": 0.5005086660385132, |
|
"learning_rate": 1.2387384809112243e-05, |
|
"loss": 2.3055, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.49681761985438894, |
|
"grad_norm": 0.5079677104949951, |
|
"learning_rate": 1.2416003663213326e-05, |
|
"loss": 2.3047, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.4979623609139613, |
|
"grad_norm": 0.47394371032714844, |
|
"learning_rate": 1.2444336328773396e-05, |
|
"loss": 2.2976, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.4991071019735336, |
|
"grad_norm": 0.5010650157928467, |
|
"learning_rate": 1.2472955182874477e-05, |
|
"loss": 2.2843, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.5002518430331059, |
|
"grad_norm": 0.443935751914978, |
|
"learning_rate": 1.2501574036975561e-05, |
|
"loss": 2.3022, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.5013965840926783, |
|
"grad_norm": 0.44586101174354553, |
|
"learning_rate": 1.2530192891076642e-05, |
|
"loss": 2.3024, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.5025413251522506, |
|
"grad_norm": 0.4664213955402374, |
|
"learning_rate": 1.2558811745177724e-05, |
|
"loss": 2.2865, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.5036860662118229, |
|
"grad_norm": 0.4366970956325531, |
|
"learning_rate": 1.2587430599278805e-05, |
|
"loss": 2.3052, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.5048308072713952, |
|
"grad_norm": 0.44286203384399414, |
|
"learning_rate": 1.2616049453379889e-05, |
|
"loss": 2.3006, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.5059755483309676, |
|
"grad_norm": 0.4843718707561493, |
|
"learning_rate": 1.264466830748097e-05, |
|
"loss": 2.2882, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.5071202893905399, |
|
"grad_norm": 0.4327425956726074, |
|
"learning_rate": 1.267328716158205e-05, |
|
"loss": 2.3146, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.5082650304501122, |
|
"grad_norm": 0.45917651057243347, |
|
"learning_rate": 1.2701906015683134e-05, |
|
"loss": 2.2763, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.5094097715096845, |
|
"grad_norm": 0.4044801890850067, |
|
"learning_rate": 1.2730524869784215e-05, |
|
"loss": 2.2919, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.5105545125692569, |
|
"grad_norm": 0.43837985396385193, |
|
"learning_rate": 1.2759143723885296e-05, |
|
"loss": 2.3, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.5116992536288292, |
|
"grad_norm": 0.5559438467025757, |
|
"learning_rate": 1.2787762577986378e-05, |
|
"loss": 2.2841, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.5128439946884015, |
|
"grad_norm": 0.5006335377693176, |
|
"learning_rate": 1.281638143208746e-05, |
|
"loss": 2.3024, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.5139887357479738, |
|
"grad_norm": 0.5347406268119812, |
|
"learning_rate": 1.2845000286188543e-05, |
|
"loss": 2.2915, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.5151334768075462, |
|
"grad_norm": 0.49963897466659546, |
|
"learning_rate": 1.2873619140289624e-05, |
|
"loss": 2.3094, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.5162782178671185, |
|
"grad_norm": 0.4746800661087036, |
|
"learning_rate": 1.2902237994390704e-05, |
|
"loss": 2.288, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.5174229589266908, |
|
"grad_norm": 0.44089171290397644, |
|
"learning_rate": 1.2930856848491788e-05, |
|
"loss": 2.2791, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.5185676999862631, |
|
"grad_norm": 0.46968016028404236, |
|
"learning_rate": 1.295947570259287e-05, |
|
"loss": 2.3088, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.5197124410458355, |
|
"grad_norm": 0.45375433564186096, |
|
"learning_rate": 1.2988094556693952e-05, |
|
"loss": 2.3083, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.5208571821054078, |
|
"grad_norm": 0.5065542459487915, |
|
"learning_rate": 1.3016713410795032e-05, |
|
"loss": 2.2694, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.5220019231649801, |
|
"grad_norm": 0.5144473910331726, |
|
"learning_rate": 1.3045332264896116e-05, |
|
"loss": 2.3044, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.5231466642245524, |
|
"grad_norm": 0.5982611179351807, |
|
"learning_rate": 1.3073951118997197e-05, |
|
"loss": 2.2964, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.5242914052841248, |
|
"grad_norm": 0.42570099234580994, |
|
"learning_rate": 1.3102569973098278e-05, |
|
"loss": 2.2898, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.5254361463436971, |
|
"grad_norm": 0.5816085934638977, |
|
"learning_rate": 1.3131188827199358e-05, |
|
"loss": 2.2974, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.5265808874032694, |
|
"grad_norm": 0.5254452228546143, |
|
"learning_rate": 1.3159807681300443e-05, |
|
"loss": 2.3149, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.5277256284628417, |
|
"grad_norm": 0.43442779779434204, |
|
"learning_rate": 1.3188426535401523e-05, |
|
"loss": 2.2951, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.5288703695224141, |
|
"grad_norm": 0.4493260085582733, |
|
"learning_rate": 1.3217045389502606e-05, |
|
"loss": 2.2768, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.5300151105819864, |
|
"grad_norm": 0.4513915777206421, |
|
"learning_rate": 1.3245378055062676e-05, |
|
"loss": 2.3075, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.5311598516415587, |
|
"grad_norm": 0.45114508271217346, |
|
"learning_rate": 1.3273996909163757e-05, |
|
"loss": 2.2895, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.532304592701131, |
|
"grad_norm": 0.43823984265327454, |
|
"learning_rate": 1.330261576326484e-05, |
|
"loss": 2.3087, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.5334493337607034, |
|
"grad_norm": 0.453106164932251, |
|
"learning_rate": 1.3331234617365922e-05, |
|
"loss": 2.2851, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.5345940748202757, |
|
"grad_norm": 0.46690353751182556, |
|
"learning_rate": 1.3359853471467004e-05, |
|
"loss": 2.2914, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.535738815879848, |
|
"grad_norm": 0.46535834670066833, |
|
"learning_rate": 1.3388472325568085e-05, |
|
"loss": 2.2938, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.5368835569394202, |
|
"grad_norm": 0.45468568801879883, |
|
"learning_rate": 1.3417091179669165e-05, |
|
"loss": 2.2974, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.5380282979989927, |
|
"grad_norm": 0.4835493862628937, |
|
"learning_rate": 1.344571003377025e-05, |
|
"loss": 2.2882, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.539173039058565, |
|
"grad_norm": 0.45315301418304443, |
|
"learning_rate": 1.347432888787133e-05, |
|
"loss": 2.2875, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.5403177801181372, |
|
"grad_norm": 0.5241557359695435, |
|
"learning_rate": 1.3502947741972413e-05, |
|
"loss": 2.2927, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.5414625211777097, |
|
"grad_norm": 0.4486404061317444, |
|
"learning_rate": 1.3531566596073495e-05, |
|
"loss": 2.2898, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.542607262237282, |
|
"grad_norm": 0.4849669933319092, |
|
"learning_rate": 1.3560185450174577e-05, |
|
"loss": 2.283, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.5437520032968542, |
|
"grad_norm": 0.6526544690132141, |
|
"learning_rate": 1.3588804304275658e-05, |
|
"loss": 2.287, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.5448967443564265, |
|
"grad_norm": 0.4628201723098755, |
|
"learning_rate": 1.3617423158376739e-05, |
|
"loss": 2.3099, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.546041485415999, |
|
"grad_norm": 0.5132496356964111, |
|
"learning_rate": 1.3646042012477823e-05, |
|
"loss": 2.2969, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.5471862264755712, |
|
"grad_norm": 0.545789897441864, |
|
"learning_rate": 1.3674660866578904e-05, |
|
"loss": 2.2948, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.5483309675351435, |
|
"grad_norm": 0.5407856106758118, |
|
"learning_rate": 1.3703279720679984e-05, |
|
"loss": 2.2861, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.5494757085947158, |
|
"grad_norm": 0.494488000869751, |
|
"learning_rate": 1.3731898574781067e-05, |
|
"loss": 2.2687, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.5506204496542882, |
|
"grad_norm": 0.44262704253196716, |
|
"learning_rate": 1.3760517428882149e-05, |
|
"loss": 2.2879, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.5517651907138605, |
|
"grad_norm": 0.4556616544723511, |
|
"learning_rate": 1.3789136282983232e-05, |
|
"loss": 2.2733, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.5529099317734328, |
|
"grad_norm": 0.5023077130317688, |
|
"learning_rate": 1.3817755137084312e-05, |
|
"loss": 2.284, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.5540546728330051, |
|
"grad_norm": 0.44959184527397156, |
|
"learning_rate": 1.3846373991185393e-05, |
|
"loss": 2.2821, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.5551994138925775, |
|
"grad_norm": 0.6102599501609802, |
|
"learning_rate": 1.3874992845286477e-05, |
|
"loss": 2.2889, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.5563441549521498, |
|
"grad_norm": 0.508794367313385, |
|
"learning_rate": 1.3903611699387558e-05, |
|
"loss": 2.2766, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.5574888960117221, |
|
"grad_norm": 0.5081732869148254, |
|
"learning_rate": 1.3932230553488638e-05, |
|
"loss": 2.2843, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.5586336370712944, |
|
"grad_norm": 0.4801699221134186, |
|
"learning_rate": 1.396084940758972e-05, |
|
"loss": 2.2749, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.5597783781308668, |
|
"grad_norm": 0.5260947346687317, |
|
"learning_rate": 1.3989468261690803e-05, |
|
"loss": 2.2883, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.5609231191904391, |
|
"grad_norm": 0.44729700684547424, |
|
"learning_rate": 1.4018087115791886e-05, |
|
"loss": 2.2889, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.5620678602500114, |
|
"grad_norm": 0.4468446671962738, |
|
"learning_rate": 1.4046705969892966e-05, |
|
"loss": 2.2892, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.5632126013095837, |
|
"grad_norm": 0.4718739688396454, |
|
"learning_rate": 1.4075324823994047e-05, |
|
"loss": 2.2894, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.5643573423691561, |
|
"grad_norm": 0.5241585373878479, |
|
"learning_rate": 1.4103943678095131e-05, |
|
"loss": 2.2922, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.5655020834287284, |
|
"grad_norm": 0.43622729182243347, |
|
"learning_rate": 1.4132562532196212e-05, |
|
"loss": 2.2816, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.5666468244883007, |
|
"grad_norm": 0.4169420599937439, |
|
"learning_rate": 1.4161181386297294e-05, |
|
"loss": 2.3186, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.567791565547873, |
|
"grad_norm": 0.4381948411464691, |
|
"learning_rate": 1.4189800240398375e-05, |
|
"loss": 2.2903, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.5689363066074454, |
|
"grad_norm": 0.5486495494842529, |
|
"learning_rate": 1.4218419094499459e-05, |
|
"loss": 2.2964, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.5700810476670177, |
|
"grad_norm": 0.42850059270858765, |
|
"learning_rate": 1.424703794860054e-05, |
|
"loss": 2.29, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.57122578872659, |
|
"grad_norm": 0.4907155930995941, |
|
"learning_rate": 1.427565680270162e-05, |
|
"loss": 2.3085, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.5723705297861623, |
|
"grad_norm": 0.43422576785087585, |
|
"learning_rate": 1.4304275656802701e-05, |
|
"loss": 2.2787, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.5735152708457347, |
|
"grad_norm": 0.4992702007293701, |
|
"learning_rate": 1.4332894510903785e-05, |
|
"loss": 2.287, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.574660011905307, |
|
"grad_norm": 0.4858098030090332, |
|
"learning_rate": 1.4361227176463857e-05, |
|
"loss": 2.2761, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.5758047529648793, |
|
"grad_norm": 0.48108112812042236, |
|
"learning_rate": 1.4389846030564938e-05, |
|
"loss": 2.2883, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.5769494940244516, |
|
"grad_norm": 0.38939031958580017, |
|
"learning_rate": 1.4418464884666019e-05, |
|
"loss": 2.2837, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.578094235084024, |
|
"grad_norm": 0.488679438829422, |
|
"learning_rate": 1.44470837387671e-05, |
|
"loss": 2.298, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.5792389761435963, |
|
"grad_norm": 0.5358524918556213, |
|
"learning_rate": 1.4475702592868184e-05, |
|
"loss": 2.2897, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.5803837172031686, |
|
"grad_norm": 0.48244425654411316, |
|
"learning_rate": 1.4504321446969264e-05, |
|
"loss": 2.2748, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.581528458262741, |
|
"grad_norm": 0.49125197529792786, |
|
"learning_rate": 1.4532940301070347e-05, |
|
"loss": 2.2666, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.5826731993223133, |
|
"grad_norm": 0.5043622851371765, |
|
"learning_rate": 1.4561559155171427e-05, |
|
"loss": 2.2948, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.5838179403818856, |
|
"grad_norm": 0.4543743431568146, |
|
"learning_rate": 1.4590178009272512e-05, |
|
"loss": 2.2801, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.5849626814414579, |
|
"grad_norm": 0.45934557914733887, |
|
"learning_rate": 1.4618796863373592e-05, |
|
"loss": 2.2897, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.5861074225010303, |
|
"grad_norm": 0.48373672366142273, |
|
"learning_rate": 1.4647415717474673e-05, |
|
"loss": 2.2808, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.5872521635606026, |
|
"grad_norm": 0.42684435844421387, |
|
"learning_rate": 1.4676034571575755e-05, |
|
"loss": 2.2707, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.5883969046201749, |
|
"grad_norm": 0.439179927110672, |
|
"learning_rate": 1.4704653425676838e-05, |
|
"loss": 2.2887, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.5895416456797472, |
|
"grad_norm": 0.48160520195961, |
|
"learning_rate": 1.473327227977792e-05, |
|
"loss": 2.282, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.5906863867393196, |
|
"grad_norm": 0.48224136233329773, |
|
"learning_rate": 1.4761891133879e-05, |
|
"loss": 2.2766, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.5918311277988919, |
|
"grad_norm": 0.46199363470077515, |
|
"learning_rate": 1.4790509987980082e-05, |
|
"loss": 2.2836, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.5929758688584642, |
|
"grad_norm": 0.4785059690475464, |
|
"learning_rate": 1.4819128842081166e-05, |
|
"loss": 2.2865, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.5941206099180365, |
|
"grad_norm": 0.43915683031082153, |
|
"learning_rate": 1.4847747696182246e-05, |
|
"loss": 2.2785, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.5952653509776089, |
|
"grad_norm": 0.5053157806396484, |
|
"learning_rate": 1.4876366550283327e-05, |
|
"loss": 2.2903, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.5964100920371812, |
|
"grad_norm": 0.4726928174495697, |
|
"learning_rate": 1.490498540438441e-05, |
|
"loss": 2.2818, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.5975548330967535, |
|
"grad_norm": 0.430034875869751, |
|
"learning_rate": 1.4933604258485492e-05, |
|
"loss": 2.29, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.5986995741563258, |
|
"grad_norm": 0.4643426835536957, |
|
"learning_rate": 1.4962223112586574e-05, |
|
"loss": 2.2897, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.5998443152158982, |
|
"grad_norm": 0.5476269125938416, |
|
"learning_rate": 1.4990841966687655e-05, |
|
"loss": 2.2786, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.6009890562754705, |
|
"grad_norm": 0.4216204285621643, |
|
"learning_rate": 1.5019174632247725e-05, |
|
"loss": 2.2814, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.6021337973350428, |
|
"grad_norm": 0.4980791211128235, |
|
"learning_rate": 1.5047793486348808e-05, |
|
"loss": 2.2648, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.6032785383946151, |
|
"grad_norm": 0.48100781440734863, |
|
"learning_rate": 1.5076412340449888e-05, |
|
"loss": 2.2881, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.6044232794541875, |
|
"grad_norm": 0.5112878084182739, |
|
"learning_rate": 1.5105031194550973e-05, |
|
"loss": 2.2761, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.6055680205137598, |
|
"grad_norm": 0.4899493455886841, |
|
"learning_rate": 1.5133650048652053e-05, |
|
"loss": 2.2852, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.6067127615733321, |
|
"grad_norm": 0.486299067735672, |
|
"learning_rate": 1.5162268902753134e-05, |
|
"loss": 2.2958, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.6078575026329044, |
|
"grad_norm": 0.580345630645752, |
|
"learning_rate": 1.5190887756854218e-05, |
|
"loss": 2.2716, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.6090022436924768, |
|
"grad_norm": 0.4456554055213928, |
|
"learning_rate": 1.5219506610955299e-05, |
|
"loss": 2.285, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.6101469847520491, |
|
"grad_norm": 0.4706750512123108, |
|
"learning_rate": 1.5248125465056381e-05, |
|
"loss": 2.2968, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.6112917258116214, |
|
"grad_norm": 0.47107580304145813, |
|
"learning_rate": 1.527674431915746e-05, |
|
"loss": 2.2733, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.6124364668711937, |
|
"grad_norm": 0.45870354771614075, |
|
"learning_rate": 1.5305363173258546e-05, |
|
"loss": 2.2962, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.6135812079307661, |
|
"grad_norm": 0.526592493057251, |
|
"learning_rate": 1.5333982027359625e-05, |
|
"loss": 2.28, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.6147259489903384, |
|
"grad_norm": 0.4595036506652832, |
|
"learning_rate": 1.5362600881460707e-05, |
|
"loss": 2.2751, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.6158706900499107, |
|
"grad_norm": 0.47698622941970825, |
|
"learning_rate": 1.539121973556179e-05, |
|
"loss": 2.2764, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.617015431109483, |
|
"grad_norm": 0.47543615102767944, |
|
"learning_rate": 1.5419838589662872e-05, |
|
"loss": 2.269, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.6181601721690554, |
|
"grad_norm": 0.46203306317329407, |
|
"learning_rate": 1.5448457443763955e-05, |
|
"loss": 2.3127, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.6193049132286277, |
|
"grad_norm": 0.4622338116168976, |
|
"learning_rate": 1.5477076297865034e-05, |
|
"loss": 2.2795, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.6204496542882, |
|
"grad_norm": 0.43615639209747314, |
|
"learning_rate": 1.5505695151966116e-05, |
|
"loss": 2.2751, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.6215943953477724, |
|
"grad_norm": 0.4956182837486267, |
|
"learning_rate": 1.55343140060672e-05, |
|
"loss": 2.2888, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.6227391364073447, |
|
"grad_norm": 0.44354909658432007, |
|
"learning_rate": 1.556293286016828e-05, |
|
"loss": 2.278, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.623883877466917, |
|
"grad_norm": 0.46796587109565735, |
|
"learning_rate": 1.5591551714269363e-05, |
|
"loss": 2.2843, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.6250286185264893, |
|
"grad_norm": 0.43353140354156494, |
|
"learning_rate": 1.5620170568370442e-05, |
|
"loss": 2.2814, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.6261733595860617, |
|
"grad_norm": 0.47816458344459534, |
|
"learning_rate": 1.5648789422471528e-05, |
|
"loss": 2.2708, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.627318100645634, |
|
"grad_norm": 0.4949074387550354, |
|
"learning_rate": 1.5677408276572607e-05, |
|
"loss": 2.2785, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.6284628417052063, |
|
"grad_norm": 0.41498610377311707, |
|
"learning_rate": 1.570602713067369e-05, |
|
"loss": 2.2752, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.6296075827647786, |
|
"grad_norm": 0.41272154450416565, |
|
"learning_rate": 1.5734645984774772e-05, |
|
"loss": 2.2679, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.630752323824351, |
|
"grad_norm": 0.47275310754776, |
|
"learning_rate": 1.5763264838875854e-05, |
|
"loss": 2.2838, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.6318970648839233, |
|
"grad_norm": 0.41480526328086853, |
|
"learning_rate": 1.5791883692976933e-05, |
|
"loss": 2.2905, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.6330418059434956, |
|
"grad_norm": 0.45607683062553406, |
|
"learning_rate": 1.5820502547078016e-05, |
|
"loss": 2.2793, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.6341865470030679, |
|
"grad_norm": 0.4298737645149231, |
|
"learning_rate": 1.5849121401179098e-05, |
|
"loss": 2.2864, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.6353312880626403, |
|
"grad_norm": 0.45687663555145264, |
|
"learning_rate": 1.587774025528018e-05, |
|
"loss": 2.2761, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.6364760291222126, |
|
"grad_norm": 0.4270581901073456, |
|
"learning_rate": 1.5906359109381263e-05, |
|
"loss": 2.2859, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.6376207701817849, |
|
"grad_norm": 0.4622785449028015, |
|
"learning_rate": 1.5934977963482342e-05, |
|
"loss": 2.2751, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.6387655112413572, |
|
"grad_norm": 0.4890844523906708, |
|
"learning_rate": 1.5963596817583424e-05, |
|
"loss": 2.2908, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.6399102523009296, |
|
"grad_norm": 0.4259001910686493, |
|
"learning_rate": 1.5992215671684507e-05, |
|
"loss": 2.2724, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.6410549933605019, |
|
"grad_norm": 0.5524899363517761, |
|
"learning_rate": 1.602083452578559e-05, |
|
"loss": 2.2707, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.6421997344200742, |
|
"grad_norm": 0.4600765109062195, |
|
"learning_rate": 1.604945337988667e-05, |
|
"loss": 2.2828, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.6433444754796465, |
|
"grad_norm": 0.4435892403125763, |
|
"learning_rate": 1.607807223398775e-05, |
|
"loss": 2.2781, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.6444892165392189, |
|
"grad_norm": 0.47321733832359314, |
|
"learning_rate": 1.6106691088088836e-05, |
|
"loss": 2.2494, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.6456339575987912, |
|
"grad_norm": 0.530928373336792, |
|
"learning_rate": 1.6135309942189915e-05, |
|
"loss": 2.275, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.6467786986583635, |
|
"grad_norm": 0.5238829851150513, |
|
"learning_rate": 1.6163928796290998e-05, |
|
"loss": 2.2823, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.6479234397179358, |
|
"grad_norm": 0.5112258791923523, |
|
"learning_rate": 1.619254765039208e-05, |
|
"loss": 2.2724, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.6490681807775082, |
|
"grad_norm": 0.5023364424705505, |
|
"learning_rate": 1.6221166504493162e-05, |
|
"loss": 2.2655, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.6502129218370805, |
|
"grad_norm": 0.45399200916290283, |
|
"learning_rate": 1.6249785358594245e-05, |
|
"loss": 2.2697, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.6513576628966528, |
|
"grad_norm": 0.4420014023780823, |
|
"learning_rate": 1.6278404212695324e-05, |
|
"loss": 2.2772, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.652502403956225, |
|
"grad_norm": 0.4523197412490845, |
|
"learning_rate": 1.6307023066796406e-05, |
|
"loss": 2.2743, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.6536471450157975, |
|
"grad_norm": 0.5409209728240967, |
|
"learning_rate": 1.633564192089749e-05, |
|
"loss": 2.2727, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.6547918860753698, |
|
"grad_norm": 0.5374095439910889, |
|
"learning_rate": 1.636426077499857e-05, |
|
"loss": 2.261, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.655936627134942, |
|
"grad_norm": 0.5544825196266174, |
|
"learning_rate": 1.6392879629099653e-05, |
|
"loss": 2.272, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.6570813681945143, |
|
"grad_norm": 0.4391005337238312, |
|
"learning_rate": 1.6421498483200732e-05, |
|
"loss": 2.2752, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.6582261092540868, |
|
"grad_norm": 0.524519145488739, |
|
"learning_rate": 1.6450117337301815e-05, |
|
"loss": 2.2798, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.659370850313659, |
|
"grad_norm": 0.4723650813102722, |
|
"learning_rate": 1.6478736191402897e-05, |
|
"loss": 2.2566, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.6605155913732313, |
|
"grad_norm": 0.4081030786037445, |
|
"learning_rate": 1.6507068856962968e-05, |
|
"loss": 2.2812, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.6616603324328038, |
|
"grad_norm": 0.4323836863040924, |
|
"learning_rate": 1.653568771106405e-05, |
|
"loss": 2.2651, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.662805073492376, |
|
"grad_norm": 0.5252947807312012, |
|
"learning_rate": 1.6564306565165132e-05, |
|
"loss": 2.2754, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.6639498145519483, |
|
"grad_norm": 0.4679439663887024, |
|
"learning_rate": 1.6592925419266215e-05, |
|
"loss": 2.2735, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.6650945556115206, |
|
"grad_norm": 0.4603148102760315, |
|
"learning_rate": 1.6621544273367297e-05, |
|
"loss": 2.2741, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.666239296671093, |
|
"grad_norm": 0.44324785470962524, |
|
"learning_rate": 1.6650163127468376e-05, |
|
"loss": 2.2858, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.6673840377306653, |
|
"grad_norm": 0.45351341366767883, |
|
"learning_rate": 1.667878198156946e-05, |
|
"loss": 2.2761, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.6685287787902376, |
|
"grad_norm": 0.49566417932510376, |
|
"learning_rate": 1.670740083567054e-05, |
|
"loss": 2.2684, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.6696735198498099, |
|
"grad_norm": 0.48700281977653503, |
|
"learning_rate": 1.6736019689771623e-05, |
|
"loss": 2.2683, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.6708182609093823, |
|
"grad_norm": 0.47343066334724426, |
|
"learning_rate": 1.6764638543872706e-05, |
|
"loss": 2.2727, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.6719630019689546, |
|
"grad_norm": 0.4507409334182739, |
|
"learning_rate": 1.6793257397973785e-05, |
|
"loss": 2.2774, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.6731077430285269, |
|
"grad_norm": 0.5125613808631897, |
|
"learning_rate": 1.682187625207487e-05, |
|
"loss": 2.2724, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.6742524840880992, |
|
"grad_norm": 0.4266802966594696, |
|
"learning_rate": 1.685049510617595e-05, |
|
"loss": 2.264, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.6753972251476716, |
|
"grad_norm": 0.4939129650592804, |
|
"learning_rate": 1.687882777173602e-05, |
|
"loss": 2.2736, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.6765419662072439, |
|
"grad_norm": 0.4348323345184326, |
|
"learning_rate": 1.6907446625837102e-05, |
|
"loss": 2.2647, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.6776867072668162, |
|
"grad_norm": 0.4148264527320862, |
|
"learning_rate": 1.6936065479938185e-05, |
|
"loss": 2.2731, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.6788314483263885, |
|
"grad_norm": 0.5018272399902344, |
|
"learning_rate": 1.6964684334039267e-05, |
|
"loss": 2.2798, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.6799761893859609, |
|
"grad_norm": 0.405222624540329, |
|
"learning_rate": 1.699330318814035e-05, |
|
"loss": 2.2716, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.6811209304455332, |
|
"grad_norm": 0.43340057134628296, |
|
"learning_rate": 1.702192204224143e-05, |
|
"loss": 2.2616, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.6822656715051055, |
|
"grad_norm": 0.4897302985191345, |
|
"learning_rate": 1.705054089634251e-05, |
|
"loss": 2.2937, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.6834104125646778, |
|
"grad_norm": 0.45297375321388245, |
|
"learning_rate": 1.7079159750443593e-05, |
|
"loss": 2.2646, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.6845551536242502, |
|
"grad_norm": 0.5536375641822815, |
|
"learning_rate": 1.7107778604544676e-05, |
|
"loss": 2.2762, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.6856998946838225, |
|
"grad_norm": 0.42413586378097534, |
|
"learning_rate": 1.7136397458645758e-05, |
|
"loss": 2.2578, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.6868446357433948, |
|
"grad_norm": 0.45568087697029114, |
|
"learning_rate": 1.7165016312746837e-05, |
|
"loss": 2.2875, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.6879893768029671, |
|
"grad_norm": 0.45397791266441345, |
|
"learning_rate": 1.7193635166847923e-05, |
|
"loss": 2.28, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.6891341178625395, |
|
"grad_norm": 0.4058510661125183, |
|
"learning_rate": 1.7222254020949002e-05, |
|
"loss": 2.2845, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.6902788589221118, |
|
"grad_norm": 0.4380168318748474, |
|
"learning_rate": 1.7250872875050084e-05, |
|
"loss": 2.2574, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.6914235999816841, |
|
"grad_norm": 0.4758777320384979, |
|
"learning_rate": 1.7279491729151167e-05, |
|
"loss": 2.2597, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.6925683410412564, |
|
"grad_norm": 0.45504075288772583, |
|
"learning_rate": 1.730811058325225e-05, |
|
"loss": 2.2763, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.6937130821008288, |
|
"grad_norm": 0.4878067374229431, |
|
"learning_rate": 1.733672943735333e-05, |
|
"loss": 2.2811, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.6948578231604011, |
|
"grad_norm": 0.49453550577163696, |
|
"learning_rate": 1.736534829145441e-05, |
|
"loss": 2.2545, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.6960025642199734, |
|
"grad_norm": 0.43168744444847107, |
|
"learning_rate": 1.7393967145555493e-05, |
|
"loss": 2.2584, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.6971473052795457, |
|
"grad_norm": 0.5180889368057251, |
|
"learning_rate": 1.7422585999656575e-05, |
|
"loss": 2.274, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.6982920463391181, |
|
"grad_norm": 0.4250308573246002, |
|
"learning_rate": 1.7451204853757658e-05, |
|
"loss": 2.2531, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.6994367873986904, |
|
"grad_norm": 0.40109291672706604, |
|
"learning_rate": 1.7479823707858737e-05, |
|
"loss": 2.2771, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.7005815284582627, |
|
"grad_norm": 0.4581041932106018, |
|
"learning_rate": 1.750844256195982e-05, |
|
"loss": 2.2713, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.701726269517835, |
|
"grad_norm": 0.44385623931884766, |
|
"learning_rate": 1.75370614160609e-05, |
|
"loss": 2.2559, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.7028710105774074, |
|
"grad_norm": 0.44979363679885864, |
|
"learning_rate": 1.7565680270161984e-05, |
|
"loss": 2.2533, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.7040157516369797, |
|
"grad_norm": 0.44290637969970703, |
|
"learning_rate": 1.7594299124263066e-05, |
|
"loss": 2.2726, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.705160492696552, |
|
"grad_norm": 0.4479668438434601, |
|
"learning_rate": 1.7622917978364145e-05, |
|
"loss": 2.2673, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.7063052337561244, |
|
"grad_norm": 0.416456401348114, |
|
"learning_rate": 1.765153683246523e-05, |
|
"loss": 2.266, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.7074499748156967, |
|
"grad_norm": 0.45117634534835815, |
|
"learning_rate": 1.768015568656631e-05, |
|
"loss": 2.2791, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.708594715875269, |
|
"grad_norm": 0.5188822150230408, |
|
"learning_rate": 1.7708774540667393e-05, |
|
"loss": 2.266, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.7097394569348413, |
|
"grad_norm": 0.47650662064552307, |
|
"learning_rate": 1.7737393394768475e-05, |
|
"loss": 2.2695, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.7108841979944137, |
|
"grad_norm": 0.5149694681167603, |
|
"learning_rate": 1.7766012248869557e-05, |
|
"loss": 2.2775, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.712028939053986, |
|
"grad_norm": 0.4305098354816437, |
|
"learning_rate": 1.779463110297064e-05, |
|
"loss": 2.2722, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.7131736801135583, |
|
"grad_norm": 0.48085054755210876, |
|
"learning_rate": 1.782324995707172e-05, |
|
"loss": 2.2712, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.7143184211731306, |
|
"grad_norm": 0.4287306070327759, |
|
"learning_rate": 1.78518688111728e-05, |
|
"loss": 2.2629, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.715463162232703, |
|
"grad_norm": 0.45178937911987305, |
|
"learning_rate": 1.7880487665273884e-05, |
|
"loss": 2.2787, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.7166079032922753, |
|
"grad_norm": 0.5393545031547546, |
|
"learning_rate": 1.7909106519374966e-05, |
|
"loss": 2.2803, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.7177526443518476, |
|
"grad_norm": 0.4494490325450897, |
|
"learning_rate": 1.793772537347605e-05, |
|
"loss": 2.2714, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.7188973854114199, |
|
"grad_norm": 0.43690425157546997, |
|
"learning_rate": 1.7966344227577127e-05, |
|
"loss": 2.2639, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.7200421264709923, |
|
"grad_norm": 0.5241349339485168, |
|
"learning_rate": 1.7994963081678213e-05, |
|
"loss": 2.2639, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.7211868675305646, |
|
"grad_norm": 0.5191497206687927, |
|
"learning_rate": 1.8023581935779292e-05, |
|
"loss": 2.269, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.7223316085901369, |
|
"grad_norm": 0.4875340461730957, |
|
"learning_rate": 1.8052200789880375e-05, |
|
"loss": 2.262, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.7234763496497092, |
|
"grad_norm": 0.47728395462036133, |
|
"learning_rate": 1.8080533455440445e-05, |
|
"loss": 2.2653, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.7246210907092816, |
|
"grad_norm": 0.517727792263031, |
|
"learning_rate": 1.8109152309541528e-05, |
|
"loss": 2.2505, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.7257658317688539, |
|
"grad_norm": 0.5039493441581726, |
|
"learning_rate": 1.813777116364261e-05, |
|
"loss": 2.2858, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.7269105728284262, |
|
"grad_norm": 0.5291385054588318, |
|
"learning_rate": 1.8166390017743692e-05, |
|
"loss": 2.2559, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.7280553138879985, |
|
"grad_norm": 0.4564548432826996, |
|
"learning_rate": 1.819500887184477e-05, |
|
"loss": 2.2834, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.7292000549475709, |
|
"grad_norm": 0.4725103974342346, |
|
"learning_rate": 1.8223627725945854e-05, |
|
"loss": 2.2542, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.7303447960071432, |
|
"grad_norm": 0.475724995136261, |
|
"learning_rate": 1.8252246580046936e-05, |
|
"loss": 2.2672, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.7314895370667155, |
|
"grad_norm": 0.46552959084510803, |
|
"learning_rate": 1.828086543414802e-05, |
|
"loss": 2.276, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.7326342781262878, |
|
"grad_norm": 0.4661727845668793, |
|
"learning_rate": 1.83094842882491e-05, |
|
"loss": 2.2629, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.7337790191858602, |
|
"grad_norm": 0.5402230620384216, |
|
"learning_rate": 1.833810314235018e-05, |
|
"loss": 2.2578, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.7349237602454325, |
|
"grad_norm": 0.5088352560997009, |
|
"learning_rate": 1.8366721996451266e-05, |
|
"loss": 2.2584, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.7360685013050048, |
|
"grad_norm": 0.4990089535713196, |
|
"learning_rate": 1.8395340850552345e-05, |
|
"loss": 2.2565, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.7372132423645771, |
|
"grad_norm": 0.4477214813232422, |
|
"learning_rate": 1.8423959704653427e-05, |
|
"loss": 2.2702, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.7383579834241495, |
|
"grad_norm": 0.43167996406555176, |
|
"learning_rate": 1.845257855875451e-05, |
|
"loss": 2.2744, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.7395027244837218, |
|
"grad_norm": 0.4676847457885742, |
|
"learning_rate": 1.8481197412855592e-05, |
|
"loss": 2.2624, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.7406474655432941, |
|
"grad_norm": 0.5216718912124634, |
|
"learning_rate": 1.8509816266956674e-05, |
|
"loss": 2.2801, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.7417922066028664, |
|
"grad_norm": 0.4484277665615082, |
|
"learning_rate": 1.8538435121057753e-05, |
|
"loss": 2.2567, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.7429369476624388, |
|
"grad_norm": 0.47648686170578003, |
|
"learning_rate": 1.8567053975158836e-05, |
|
"loss": 2.2605, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.7440816887220111, |
|
"grad_norm": 0.46372881531715393, |
|
"learning_rate": 1.8595672829259918e-05, |
|
"loss": 2.2673, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.7452264297815834, |
|
"grad_norm": 0.43808409571647644, |
|
"learning_rate": 1.8624291683361e-05, |
|
"loss": 2.263, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.7463711708411558, |
|
"grad_norm": 0.4345923364162445, |
|
"learning_rate": 1.865291053746208e-05, |
|
"loss": 2.2638, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.7475159119007281, |
|
"grad_norm": 0.49368196725845337, |
|
"learning_rate": 1.8681529391563162e-05, |
|
"loss": 2.271, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.7486606529603004, |
|
"grad_norm": 0.46279481053352356, |
|
"learning_rate": 1.8710148245664244e-05, |
|
"loss": 2.2721, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.7498053940198727, |
|
"grad_norm": 0.423225998878479, |
|
"learning_rate": 1.8738767099765327e-05, |
|
"loss": 2.2634, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.7509501350794451, |
|
"grad_norm": 0.48171648383140564, |
|
"learning_rate": 1.876738595386641e-05, |
|
"loss": 2.2656, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.7520948761390174, |
|
"grad_norm": 0.41427454352378845, |
|
"learning_rate": 1.879571861942648e-05, |
|
"loss": 2.2663, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.7532396171985897, |
|
"grad_norm": 0.5507281422615051, |
|
"learning_rate": 1.8824337473527562e-05, |
|
"loss": 2.2572, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.754384358258162, |
|
"grad_norm": 0.44962623715400696, |
|
"learning_rate": 1.8852956327628644e-05, |
|
"loss": 2.2745, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.7555290993177344, |
|
"grad_norm": 0.5174722671508789, |
|
"learning_rate": 1.8881575181729727e-05, |
|
"loss": 2.2387, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.7566738403773067, |
|
"grad_norm": 0.5622259974479675, |
|
"learning_rate": 1.8910194035830806e-05, |
|
"loss": 2.2602, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.757818581436879, |
|
"grad_norm": 0.5210707187652588, |
|
"learning_rate": 1.8938812889931888e-05, |
|
"loss": 2.2643, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.7589633224964513, |
|
"grad_norm": 0.486509770154953, |
|
"learning_rate": 1.896743174403297e-05, |
|
"loss": 2.2707, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.7601080635560237, |
|
"grad_norm": 0.47192618250846863, |
|
"learning_rate": 1.8996050598134053e-05, |
|
"loss": 2.2613, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.761252804615596, |
|
"grad_norm": 0.5344927906990051, |
|
"learning_rate": 1.9024669452235135e-05, |
|
"loss": 2.2561, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.7623975456751683, |
|
"grad_norm": 0.5391865372657776, |
|
"learning_rate": 1.9053288306336214e-05, |
|
"loss": 2.2763, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.7635422867347406, |
|
"grad_norm": 0.45489776134490967, |
|
"learning_rate": 1.90819071604373e-05, |
|
"loss": 2.2611, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.764687027794313, |
|
"grad_norm": 0.38119086623191833, |
|
"learning_rate": 1.911052601453838e-05, |
|
"loss": 2.2602, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.7658317688538853, |
|
"grad_norm": 0.49369150400161743, |
|
"learning_rate": 1.913914486863946e-05, |
|
"loss": 2.2613, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.7669765099134576, |
|
"grad_norm": 0.6532511711120605, |
|
"learning_rate": 1.916776372274054e-05, |
|
"loss": 2.268, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.7681212509730299, |
|
"grad_norm": 0.4440617263317108, |
|
"learning_rate": 1.9196382576841626e-05, |
|
"loss": 2.2776, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.7692659920326023, |
|
"grad_norm": 0.46082597970962524, |
|
"learning_rate": 1.9225001430942705e-05, |
|
"loss": 2.255, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.7704107330921746, |
|
"grad_norm": 0.40288957953453064, |
|
"learning_rate": 1.9253620285043788e-05, |
|
"loss": 2.2652, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.7715554741517469, |
|
"grad_norm": 0.51495760679245, |
|
"learning_rate": 1.928223913914487e-05, |
|
"loss": 2.2651, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.7727002152113192, |
|
"grad_norm": 0.5247004628181458, |
|
"learning_rate": 1.9310857993245953e-05, |
|
"loss": 2.2534, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.7738449562708916, |
|
"grad_norm": 0.4820224344730377, |
|
"learning_rate": 1.9339476847347035e-05, |
|
"loss": 2.2811, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.7749896973304639, |
|
"grad_norm": 0.43811190128326416, |
|
"learning_rate": 1.9368095701448114e-05, |
|
"loss": 2.2638, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.7761344383900362, |
|
"grad_norm": 0.4226974546909332, |
|
"learning_rate": 1.9396714555549196e-05, |
|
"loss": 2.2605, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.7772791794496084, |
|
"grad_norm": 0.4748658835887909, |
|
"learning_rate": 1.9425047221109267e-05, |
|
"loss": 2.2617, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.7784239205091809, |
|
"grad_norm": 0.4761633574962616, |
|
"learning_rate": 1.9453666075210353e-05, |
|
"loss": 2.2614, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.7795686615687532, |
|
"grad_norm": 0.48032355308532715, |
|
"learning_rate": 1.948228492931143e-05, |
|
"loss": 2.2443, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.7807134026283254, |
|
"grad_norm": 0.4990929961204529, |
|
"learning_rate": 1.9510903783412514e-05, |
|
"loss": 2.2702, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.7818581436878977, |
|
"grad_norm": 0.40114450454711914, |
|
"learning_rate": 1.9539522637513596e-05, |
|
"loss": 2.2482, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.7830028847474702, |
|
"grad_norm": 0.4980379343032837, |
|
"learning_rate": 1.9567855303073667e-05, |
|
"loss": 2.2706, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.7841476258070424, |
|
"grad_norm": 0.42115017771720886, |
|
"learning_rate": 1.9596474157174746e-05, |
|
"loss": 2.2741, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.7852923668666147, |
|
"grad_norm": 0.4727267622947693, |
|
"learning_rate": 1.9625093011275832e-05, |
|
"loss": 2.2649, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.7864371079261872, |
|
"grad_norm": 0.47092244029045105, |
|
"learning_rate": 1.965371186537691e-05, |
|
"loss": 2.2482, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.7875818489857594, |
|
"grad_norm": 0.4399222433567047, |
|
"learning_rate": 1.9682330719477993e-05, |
|
"loss": 2.2601, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.7887265900453317, |
|
"grad_norm": 0.44569170475006104, |
|
"learning_rate": 1.9710949573579076e-05, |
|
"loss": 2.2679, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.789871331104904, |
|
"grad_norm": 0.43348217010498047, |
|
"learning_rate": 1.9739568427680158e-05, |
|
"loss": 2.2529, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.7910160721644764, |
|
"grad_norm": 0.4533138573169708, |
|
"learning_rate": 1.976818728178124e-05, |
|
"loss": 2.2634, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.7921608132240487, |
|
"grad_norm": 0.5940411686897278, |
|
"learning_rate": 1.979680613588232e-05, |
|
"loss": 2.2526, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.793305554283621, |
|
"grad_norm": 0.4553944766521454, |
|
"learning_rate": 1.9825424989983402e-05, |
|
"loss": 2.2514, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.7944502953431933, |
|
"grad_norm": 0.5016659498214722, |
|
"learning_rate": 1.9854043844084484e-05, |
|
"loss": 2.2452, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.7955950364027657, |
|
"grad_norm": 0.45411109924316406, |
|
"learning_rate": 1.9882662698185567e-05, |
|
"loss": 2.2576, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.796739777462338, |
|
"grad_norm": 0.3970607817173004, |
|
"learning_rate": 1.991128155228665e-05, |
|
"loss": 2.2403, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.7978845185219103, |
|
"grad_norm": 0.47627323865890503, |
|
"learning_rate": 1.9939900406387728e-05, |
|
"loss": 2.2614, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.7990292595814826, |
|
"grad_norm": 0.4684958755970001, |
|
"learning_rate": 1.9968519260488814e-05, |
|
"loss": 2.2614, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.800174000641055, |
|
"grad_norm": 0.418066143989563, |
|
"learning_rate": 1.9997138114589893e-05, |
|
"loss": 2.2627, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.8013187417006273, |
|
"grad_norm": 0.43731051683425903, |
|
"learning_rate": 1.9998690637047088e-05, |
|
"loss": 2.2473, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.8024634827601996, |
|
"grad_norm": 0.43731534481048584, |
|
"learning_rate": 1.999416488433588e-05, |
|
"loss": 2.2512, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.8036082238197719, |
|
"grad_norm": 0.46741268038749695, |
|
"learning_rate": 1.99864080397093e-05, |
|
"loss": 2.2373, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.8047529648793443, |
|
"grad_norm": 0.4025750160217285, |
|
"learning_rate": 1.9975422610938463e-05, |
|
"loss": 2.2711, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.8058977059389166, |
|
"grad_norm": 0.5251903533935547, |
|
"learning_rate": 1.996121214958875e-05, |
|
"loss": 2.2627, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.8070424469984889, |
|
"grad_norm": 0.5502318143844604, |
|
"learning_rate": 1.9943781249871618e-05, |
|
"loss": 2.2615, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.8081871880580612, |
|
"grad_norm": 0.4485688805580139, |
|
"learning_rate": 1.992313554715929e-05, |
|
"loss": 2.2486, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.8093319291176336, |
|
"grad_norm": 0.42768144607543945, |
|
"learning_rate": 1.9899281716162846e-05, |
|
"loss": 2.2602, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.8104766701772059, |
|
"grad_norm": 0.446748286485672, |
|
"learning_rate": 1.987222746877431e-05, |
|
"loss": 2.2705, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.8116214112367782, |
|
"grad_norm": 0.4175347685813904, |
|
"learning_rate": 1.9841981551573424e-05, |
|
"loss": 2.2437, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.8127661522963505, |
|
"grad_norm": 0.4600895047187805, |
|
"learning_rate": 1.9808553742999863e-05, |
|
"loss": 2.2592, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.8139108933559229, |
|
"grad_norm": 0.4427413046360016, |
|
"learning_rate": 1.9771954850191927e-05, |
|
"loss": 2.2508, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.8150556344154952, |
|
"grad_norm": 0.41499242186546326, |
|
"learning_rate": 1.973219670549259e-05, |
|
"loss": 2.2677, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.8162003754750675, |
|
"grad_norm": 0.48436206579208374, |
|
"learning_rate": 1.9689292162624135e-05, |
|
"loss": 2.2569, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.8173451165346398, |
|
"grad_norm": 0.6114481687545776, |
|
"learning_rate": 1.9643255092532582e-05, |
|
"loss": 2.262, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.8184898575942122, |
|
"grad_norm": 0.489258348941803, |
|
"learning_rate": 1.959410037890323e-05, |
|
"loss": 2.2376, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.8196345986537845, |
|
"grad_norm": 0.5034488439559937, |
|
"learning_rate": 1.9541843913348804e-05, |
|
"loss": 2.2542, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.8207793397133568, |
|
"grad_norm": 0.4198042154312134, |
|
"learning_rate": 1.948650259027172e-05, |
|
"loss": 2.2509, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.8219240807729291, |
|
"grad_norm": 0.462247759103775, |
|
"learning_rate": 1.9428094301402164e-05, |
|
"loss": 2.2493, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.8230688218325015, |
|
"grad_norm": 0.4464458227157593, |
|
"learning_rate": 1.936663793001374e-05, |
|
"loss": 2.2645, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.8242135628920738, |
|
"grad_norm": 0.5643649697303772, |
|
"learning_rate": 1.930215334481855e-05, |
|
"loss": 2.2455, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.8253583039516461, |
|
"grad_norm": 0.48486921191215515, |
|
"learning_rate": 1.9234661393543668e-05, |
|
"loss": 2.256, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.8265030450112185, |
|
"grad_norm": 0.4897540807723999, |
|
"learning_rate": 1.9164183896191133e-05, |
|
"loss": 2.2453, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.8276477860707908, |
|
"grad_norm": 0.47083780169487, |
|
"learning_rate": 1.9090743637983577e-05, |
|
"loss": 2.239, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.8287925271303631, |
|
"grad_norm": 0.42039257287979126, |
|
"learning_rate": 1.9014364361997813e-05, |
|
"loss": 2.2483, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.8299372681899354, |
|
"grad_norm": 0.5170241594314575, |
|
"learning_rate": 1.8935070761488754e-05, |
|
"loss": 2.2686, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.8310820092495078, |
|
"grad_norm": 0.46110501885414124, |
|
"learning_rate": 1.885288847190614e-05, |
|
"loss": 2.2444, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.8322267503090801, |
|
"grad_norm": 0.48973456025123596, |
|
"learning_rate": 1.876784406260664e-05, |
|
"loss": 2.2475, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.8333714913686524, |
|
"grad_norm": 0.45546847581863403, |
|
"learning_rate": 1.8679965028264055e-05, |
|
"loss": 2.2519, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.8345162324282247, |
|
"grad_norm": 0.49703338742256165, |
|
"learning_rate": 1.859020042770291e-05, |
|
"loss": 2.2518, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.8356609734877971, |
|
"grad_norm": 0.494150310754776, |
|
"learning_rate": 1.8496765904697226e-05, |
|
"loss": 2.2682, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.8368057145473694, |
|
"grad_norm": 0.46213245391845703, |
|
"learning_rate": 1.840058439563126e-05, |
|
"loss": 2.2499, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.8379504556069417, |
|
"grad_norm": 0.40609198808670044, |
|
"learning_rate": 1.830168699577909e-05, |
|
"loss": 2.2706, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.839095196666514, |
|
"grad_norm": 0.6065017580986023, |
|
"learning_rate": 1.820010567845644e-05, |
|
"loss": 2.2553, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.8402399377260864, |
|
"grad_norm": 0.44308215379714966, |
|
"learning_rate": 1.809587328468373e-05, |
|
"loss": 2.2503, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.8413846787856587, |
|
"grad_norm": 0.41164156794548035, |
|
"learning_rate": 1.7989023512568686e-05, |
|
"loss": 2.2491, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.842529419845231, |
|
"grad_norm": 0.4938010573387146, |
|
"learning_rate": 1.7879590906411786e-05, |
|
"loss": 2.2581, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.8436741609048033, |
|
"grad_norm": 0.4634738862514496, |
|
"learning_rate": 1.7767610845538178e-05, |
|
"loss": 2.2665, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.8448189019643757, |
|
"grad_norm": 0.43750569224357605, |
|
"learning_rate": 1.7653119532859626e-05, |
|
"loss": 2.2497, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.845963643023948, |
|
"grad_norm": 0.5143136978149414, |
|
"learning_rate": 1.7536153983170157e-05, |
|
"loss": 2.2318, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.8471083840835203, |
|
"grad_norm": 0.5141619443893433, |
|
"learning_rate": 1.7416752011179294e-05, |
|
"loss": 2.2418, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.8482531251430926, |
|
"grad_norm": 0.46615302562713623, |
|
"learning_rate": 1.7296181957754126e-05, |
|
"loss": 2.2496, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.849397866202665, |
|
"grad_norm": 0.4517097473144531, |
|
"learning_rate": 1.71720471105587e-05, |
|
"loss": 2.2501, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.8505426072622373, |
|
"grad_norm": 0.4446674883365631, |
|
"learning_rate": 1.7045593556027164e-05, |
|
"loss": 2.2487, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.8516873483218096, |
|
"grad_norm": 0.5222780108451843, |
|
"learning_rate": 1.691686217632051e-05, |
|
"loss": 2.2495, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.8528320893813819, |
|
"grad_norm": 0.4754522442817688, |
|
"learning_rate": 1.678589459001567e-05, |
|
"loss": 2.251, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.8539768304409543, |
|
"grad_norm": 0.46791428327560425, |
|
"learning_rate": 1.6652733138650367e-05, |
|
"loss": 2.2502, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.8551215715005266, |
|
"grad_norm": 0.5582058429718018, |
|
"learning_rate": 1.651742087303412e-05, |
|
"loss": 2.249, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.8562663125600989, |
|
"grad_norm": 0.5812872648239136, |
|
"learning_rate": 1.6380001539330088e-05, |
|
"loss": 2.2402, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.8574110536196712, |
|
"grad_norm": 0.4505755305290222, |
|
"learning_rate": 1.624051956491196e-05, |
|
"loss": 2.2445, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.8585557946792436, |
|
"grad_norm": 0.46958789229393005, |
|
"learning_rate": 1.609902004400073e-05, |
|
"loss": 2.2729, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.8597005357388159, |
|
"grad_norm": 0.44104379415512085, |
|
"learning_rate": 1.5955548723085804e-05, |
|
"loss": 2.2429, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.8608452767983882, |
|
"grad_norm": 0.4598877429962158, |
|
"learning_rate": 1.581015198613528e-05, |
|
"loss": 2.2471, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.8619900178579605, |
|
"grad_norm": 0.43592801690101624, |
|
"learning_rate": 1.5662876839600084e-05, |
|
"loss": 2.2537, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.8631347589175329, |
|
"grad_norm": 0.46111100912094116, |
|
"learning_rate": 1.551377089721692e-05, |
|
"loss": 2.2262, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.8642794999771052, |
|
"grad_norm": 0.4764077961444855, |
|
"learning_rate": 1.5362882364614825e-05, |
|
"loss": 2.2556, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.8654242410366775, |
|
"grad_norm": 0.5170374512672424, |
|
"learning_rate": 1.5210260023730402e-05, |
|
"loss": 2.2654, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.8665689820962498, |
|
"grad_norm": 0.4534991383552551, |
|
"learning_rate": 1.5055953217036735e-05, |
|
"loss": 2.2647, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.8677137231558222, |
|
"grad_norm": 0.42923828959465027, |
|
"learning_rate": 1.490001183159105e-05, |
|
"loss": 2.2366, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.8688584642153945, |
|
"grad_norm": 0.5356833934783936, |
|
"learning_rate": 1.474248628290637e-05, |
|
"loss": 2.2459, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.8700032052749668, |
|
"grad_norm": 0.5153579115867615, |
|
"learning_rate": 1.4583427498652252e-05, |
|
"loss": 2.2416, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.8711479463345392, |
|
"grad_norm": 0.48512566089630127, |
|
"learning_rate": 1.4422886902190014e-05, |
|
"loss": 2.2519, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.8722926873941115, |
|
"grad_norm": 0.5230854153633118, |
|
"learning_rate": 1.4260916395947657e-05, |
|
"loss": 2.2429, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.8734374284536838, |
|
"grad_norm": 0.4530618190765381, |
|
"learning_rate": 1.4097568344639916e-05, |
|
"loss": 2.2675, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.8745821695132561, |
|
"grad_norm": 0.4915354251861572, |
|
"learning_rate": 1.3932895558338879e-05, |
|
"loss": 2.2443, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.8757269105728285, |
|
"grad_norm": 0.437199205160141, |
|
"learning_rate": 1.3766951275400596e-05, |
|
"loss": 2.2549, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.8768716516324008, |
|
"grad_norm": 0.48987364768981934, |
|
"learning_rate": 1.3599789145253226e-05, |
|
"loss": 2.2294, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.8780163926919731, |
|
"grad_norm": 0.40682971477508545, |
|
"learning_rate": 1.34314632110523e-05, |
|
"loss": 2.2404, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.8791611337515454, |
|
"grad_norm": 0.4797169268131256, |
|
"learning_rate": 1.3262027892208696e-05, |
|
"loss": 2.2613, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.8803058748111178, |
|
"grad_norm": 0.5203677415847778, |
|
"learning_rate": 1.3091537966794933e-05, |
|
"loss": 2.2509, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.8814506158706901, |
|
"grad_norm": 0.519087553024292, |
|
"learning_rate": 1.2920048553835574e-05, |
|
"loss": 2.2625, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.8825953569302624, |
|
"grad_norm": 0.4839503765106201, |
|
"learning_rate": 1.2747615095487331e-05, |
|
"loss": 2.2487, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.8837400979898347, |
|
"grad_norm": 0.4150638282299042, |
|
"learning_rate": 1.2574293339114757e-05, |
|
"loss": 2.254, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.8848848390494071, |
|
"grad_norm": 0.5131925940513611, |
|
"learning_rate": 1.240013931926724e-05, |
|
"loss": 2.238, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.8860295801089794, |
|
"grad_norm": 0.5299201607704163, |
|
"learning_rate": 1.2225209339563144e-05, |
|
"loss": 2.2517, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.8871743211685517, |
|
"grad_norm": 0.4446033239364624, |
|
"learning_rate": 1.204955995448699e-05, |
|
"loss": 2.2521, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.888319062228124, |
|
"grad_norm": 0.4393691122531891, |
|
"learning_rate": 1.1873247951105489e-05, |
|
"loss": 2.2634, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.8894638032876964, |
|
"grad_norm": 0.48134180903434753, |
|
"learning_rate": 1.1696330330708421e-05, |
|
"loss": 2.2478, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.8906085443472687, |
|
"grad_norm": 0.4115225076675415, |
|
"learning_rate": 1.1518864290380249e-05, |
|
"loss": 2.2197, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.891753285406841, |
|
"grad_norm": 0.47142502665519714, |
|
"learning_rate": 1.1340907204508403e-05, |
|
"loss": 2.2344, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.8928980264664133, |
|
"grad_norm": 0.5209864377975464, |
|
"learning_rate": 1.1162516606234276e-05, |
|
"loss": 2.2486, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.8940427675259857, |
|
"grad_norm": 0.4119342267513275, |
|
"learning_rate": 1.09837501688529e-05, |
|
"loss": 2.2425, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.895187508585558, |
|
"grad_norm": 0.4754474461078644, |
|
"learning_rate": 1.0806457916272542e-05, |
|
"loss": 2.246, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.8963322496451303, |
|
"grad_norm": 0.511458694934845, |
|
"learning_rate": 1.0627115602490508e-05, |
|
"loss": 2.2453, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.8974769907047025, |
|
"grad_norm": 0.5435287356376648, |
|
"learning_rate": 1.0447570543589034e-05, |
|
"loss": 2.2452, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.898621731764275, |
|
"grad_norm": 0.4102800190448761, |
|
"learning_rate": 1.0267880786097762e-05, |
|
"loss": 2.2459, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.8997664728238473, |
|
"grad_norm": 0.3983183801174164, |
|
"learning_rate": 1.0088104423327082e-05, |
|
"loss": 2.2274, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.9009112138834195, |
|
"grad_norm": 0.44764354825019836, |
|
"learning_rate": 9.908299576586684e-06, |
|
"loss": 2.2512, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.9020559549429918, |
|
"grad_norm": 0.5015277862548828, |
|
"learning_rate": 9.728524376395068e-06, |
|
"loss": 2.244, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.9032006960025643, |
|
"grad_norm": 0.4522726237773895, |
|
"learning_rate": 9.548836943686055e-06, |
|
"loss": 2.2443, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.9043454370621365, |
|
"grad_norm": 0.4915413558483124, |
|
"learning_rate": 9.369295371018442e-06, |
|
"loss": 2.246, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.9054901781217088, |
|
"grad_norm": 0.45682036876678467, |
|
"learning_rate": 9.18995770379478e-06, |
|
"loss": 2.2353, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.9066349191812811, |
|
"grad_norm": 0.41946882009506226, |
|
"learning_rate": 9.010881921495438e-06, |
|
"loss": 2.2468, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.9077796602408535, |
|
"grad_norm": 0.4232065975666046, |
|
"learning_rate": 8.832125918933955e-06, |
|
"loss": 2.2575, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.9089244013004258, |
|
"grad_norm": 0.4463779926300049, |
|
"learning_rate": 8.653747487539764e-06, |
|
"loss": 2.2379, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.9100691423599981, |
|
"grad_norm": 0.43841028213500977, |
|
"learning_rate": 8.47580429667436e-06, |
|
"loss": 2.2414, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.9112138834195705, |
|
"grad_norm": 0.43061941862106323, |
|
"learning_rate": 8.29835387498692e-06, |
|
"loss": 2.2475, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.9123586244791428, |
|
"grad_norm": 0.41848114132881165, |
|
"learning_rate": 8.121453591815401e-06, |
|
"loss": 2.2535, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.9135033655387151, |
|
"grad_norm": 0.4669823944568634, |
|
"learning_rate": 7.94516063863917e-06, |
|
"loss": 2.2551, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.9146481065982874, |
|
"grad_norm": 0.4634804129600525, |
|
"learning_rate": 7.769532010589123e-06, |
|
"loss": 2.2393, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.9157928476578598, |
|
"grad_norm": 0.43346285820007324, |
|
"learning_rate": 7.594624488021274e-06, |
|
"loss": 2.2607, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.9169375887174321, |
|
"grad_norm": 0.43006187677383423, |
|
"learning_rate": 7.420494618159791e-06, |
|
"loss": 2.2408, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.9180823297770044, |
|
"grad_norm": 0.577170729637146, |
|
"learning_rate": 7.2471986968154075e-06, |
|
"loss": 2.2421, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.9192270708365767, |
|
"grad_norm": 0.43540096282958984, |
|
"learning_rate": 7.074792750185093e-06, |
|
"loss": 2.2577, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.9203718118961491, |
|
"grad_norm": 0.41239428520202637, |
|
"learning_rate": 6.905042255517393e-06, |
|
"loss": 2.2246, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.9215165529557214, |
|
"grad_norm": 0.4757671654224396, |
|
"learning_rate": 6.734572883175328e-06, |
|
"loss": 2.243, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.9226612940152937, |
|
"grad_norm": 0.5074242353439331, |
|
"learning_rate": 6.5651592163653885e-06, |
|
"loss": 2.243, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.923806035074866, |
|
"grad_norm": 0.4626462459564209, |
|
"learning_rate": 6.3968560261600545e-06, |
|
"loss": 2.2369, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.9249507761344384, |
|
"grad_norm": 0.4515824317932129, |
|
"learning_rate": 6.229717724617108e-06, |
|
"loss": 2.2479, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.9260955171940107, |
|
"grad_norm": 0.43820834159851074, |
|
"learning_rate": 6.063798347188343e-06, |
|
"loss": 2.2437, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.927240258253583, |
|
"grad_norm": 0.47702258825302124, |
|
"learning_rate": 5.899151535250031e-06, |
|
"loss": 2.2382, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.9283849993131553, |
|
"grad_norm": 0.4041842520236969, |
|
"learning_rate": 5.735830518760757e-06, |
|
"loss": 2.2412, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.9295297403727277, |
|
"grad_norm": 0.5503548979759216, |
|
"learning_rate": 5.573888099052307e-06, |
|
"loss": 2.2515, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.9306744814323, |
|
"grad_norm": 0.4641313850879669, |
|
"learning_rate": 5.413376631759115e-06, |
|
"loss": 2.2299, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.9318192224918723, |
|
"grad_norm": 0.4651617109775543, |
|
"learning_rate": 5.254348009891777e-06, |
|
"loss": 2.2394, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.9329639635514446, |
|
"grad_norm": 0.5168155431747437, |
|
"learning_rate": 5.096853647060169e-06, |
|
"loss": 2.2464, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.934108704611017, |
|
"grad_norm": 0.48371872305870056, |
|
"learning_rate": 4.940944460851545e-06, |
|
"loss": 2.2488, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.9352534456705893, |
|
"grad_norm": 0.4888920187950134, |
|
"learning_rate": 4.7866708563689654e-06, |
|
"loss": 2.2428, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.9363981867301616, |
|
"grad_norm": 0.4613405764102936, |
|
"learning_rate": 4.635600085943046e-06, |
|
"loss": 2.2641, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.9375429277897339, |
|
"grad_norm": 0.39950716495513916, |
|
"learning_rate": 4.484729138719958e-06, |
|
"loss": 2.2513, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.9386876688493063, |
|
"grad_norm": 0.44666656851768494, |
|
"learning_rate": 4.335641266650937e-06, |
|
"loss": 2.2352, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.9398324099088786, |
|
"grad_norm": 0.6108360886573792, |
|
"learning_rate": 4.188384669522936e-06, |
|
"loss": 2.2526, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.9409771509684509, |
|
"grad_norm": 0.4861396551132202, |
|
"learning_rate": 4.043006955075667e-06, |
|
"loss": 2.2421, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.9421218920280232, |
|
"grad_norm": 0.4066333472728729, |
|
"learning_rate": 3.899555123610131e-06, |
|
"loss": 2.2425, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.9432666330875956, |
|
"grad_norm": 0.47433343529701233, |
|
"learning_rate": 3.7580755527935232e-06, |
|
"loss": 2.2357, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.9444113741471679, |
|
"grad_norm": 0.447336882352829, |
|
"learning_rate": 3.6186139826654253e-06, |
|
"loss": 2.2458, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.9455561152067402, |
|
"grad_norm": 0.48044124245643616, |
|
"learning_rate": 3.4812155008501692e-06, |
|
"loss": 2.2451, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.9467008562663125, |
|
"grad_norm": 0.43815353512763977, |
|
"learning_rate": 3.3459245279800846e-06, |
|
"loss": 2.2457, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.9478455973258849, |
|
"grad_norm": 0.4744986295700073, |
|
"learning_rate": 3.2127848033344124e-06, |
|
"loss": 2.2303, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.9489903383854572, |
|
"grad_norm": 0.46926042437553406, |
|
"learning_rate": 3.0818393706984906e-06, |
|
"loss": 2.2346, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.9501350794450295, |
|
"grad_norm": 0.423065721988678, |
|
"learning_rate": 2.9531305644477883e-06, |
|
"loss": 2.2323, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.9512798205046019, |
|
"grad_norm": 0.4213075339794159, |
|
"learning_rate": 2.8266999958613017e-06, |
|
"loss": 2.2362, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.9524245615641742, |
|
"grad_norm": 0.45430952310562134, |
|
"learning_rate": 2.7025885396687145e-06, |
|
"loss": 2.2441, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.9535693026237465, |
|
"grad_norm": 0.48012691736221313, |
|
"learning_rate": 2.5808363208356746e-06, |
|
"loss": 2.2623, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.9547140436833188, |
|
"grad_norm": 0.4386986494064331, |
|
"learning_rate": 2.461482701591493e-06, |
|
"loss": 2.2329, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.9558587847428912, |
|
"grad_norm": 0.4368671476840973, |
|
"learning_rate": 2.3445662687034143e-06, |
|
"loss": 2.2391, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.9570035258024635, |
|
"grad_norm": 0.4472872316837311, |
|
"learning_rate": 2.2301248210016024e-06, |
|
"loss": 2.2378, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.9581482668620358, |
|
"grad_norm": 0.4506527781486511, |
|
"learning_rate": 2.1181953571588596e-06, |
|
"loss": 2.2362, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.9592930079216081, |
|
"grad_norm": 0.4752218425273895, |
|
"learning_rate": 2.0088140637290265e-06, |
|
"loss": 2.2397, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.9604377489811805, |
|
"grad_norm": 0.4612346589565277, |
|
"learning_rate": 1.9020163034479567e-06, |
|
"loss": 2.2447, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.9615824900407528, |
|
"grad_norm": 0.49490582942962646, |
|
"learning_rate": 1.7978366038008234e-06, |
|
"loss": 2.243, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.9627272311003251, |
|
"grad_norm": 0.4314175844192505, |
|
"learning_rate": 1.696308645859447e-06, |
|
"loss": 2.2398, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.9638719721598974, |
|
"grad_norm": 0.4729178249835968, |
|
"learning_rate": 1.5974652533932833e-06, |
|
"loss": 2.2528, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.9650167132194698, |
|
"grad_norm": 0.45053282380104065, |
|
"learning_rate": 1.5013383822575766e-06, |
|
"loss": 2.2393, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.9661614542790421, |
|
"grad_norm": 0.3972727358341217, |
|
"learning_rate": 1.4079591100620837e-06, |
|
"loss": 2.2551, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.9673061953386144, |
|
"grad_norm": 0.4374849200248718, |
|
"learning_rate": 1.317357626123772e-06, |
|
"loss": 2.2434, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.9684509363981867, |
|
"grad_norm": 0.44703078269958496, |
|
"learning_rate": 1.2295632217066567e-06, |
|
"loss": 2.2446, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.9695956774577591, |
|
"grad_norm": 0.49270525574684143, |
|
"learning_rate": 1.1446042805520098e-06, |
|
"loss": 2.2554, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.9707404185173314, |
|
"grad_norm": 0.45350950956344604, |
|
"learning_rate": 1.062508269701963e-06, |
|
"loss": 2.2649, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.9718851595769037, |
|
"grad_norm": 0.45871463418006897, |
|
"learning_rate": 9.833017306194558e-07, |
|
"loss": 2.2386, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.973029900636476, |
|
"grad_norm": 0.4371441900730133, |
|
"learning_rate": 9.07010270607459e-07, |
|
"loss": 2.2543, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.9741746416960484, |
|
"grad_norm": 0.47834667563438416, |
|
"learning_rate": 8.33658554530169e-07, |
|
"loss": 2.2385, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.9753193827556207, |
|
"grad_norm": 0.5165786743164062, |
|
"learning_rate": 7.632702968389205e-07, |
|
"loss": 2.2423, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.976464123815193, |
|
"grad_norm": 0.4625995457172394, |
|
"learning_rate": 6.958682539053563e-07, |
|
"loss": 2.2618, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.9776088648747653, |
|
"grad_norm": 0.5300964713096619, |
|
"learning_rate": 6.314742166643406e-07, |
|
"loss": 2.2383, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.9787536059343377, |
|
"grad_norm": 0.49308186769485474, |
|
"learning_rate": 5.701090035689949e-07, |
|
"loss": 2.2657, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.97989834699391, |
|
"grad_norm": 0.4634435176849365, |
|
"learning_rate": 5.117924538601371e-07, |
|
"loss": 2.2347, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.9810430880534823, |
|
"grad_norm": 0.4959612488746643, |
|
"learning_rate": 4.565434211522979e-07, |
|
"loss": 2.2421, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.9821878291130546, |
|
"grad_norm": 0.43508780002593994, |
|
"learning_rate": 4.0437976733838757e-07, |
|
"loss": 2.2601, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.983332570172627, |
|
"grad_norm": 0.4364226460456848, |
|
"learning_rate": 3.557935619007491e-07, |
|
"loss": 2.2362, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.9844773112321993, |
|
"grad_norm": 0.4956010580062866, |
|
"learning_rate": 3.09818999860273e-07, |
|
"loss": 2.2456, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.9856220522917716, |
|
"grad_norm": 0.45080068707466125, |
|
"learning_rate": 2.6697725240214076e-07, |
|
"loss": 2.2333, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.9867667933513439, |
|
"grad_norm": 0.47665461897850037, |
|
"learning_rate": 2.2728217017075395e-07, |
|
"loss": 2.2434, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.9879115344109163, |
|
"grad_norm": 0.48589542508125305, |
|
"learning_rate": 1.9074658650043764e-07, |
|
"loss": 2.2461, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.9890562754704886, |
|
"grad_norm": 0.47914570569992065, |
|
"learning_rate": 1.5738231326645758e-07, |
|
"loss": 2.2246, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.9902010165300609, |
|
"grad_norm": 0.5974397659301758, |
|
"learning_rate": 1.2720013706627122e-07, |
|
"loss": 2.227, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.9913457575896332, |
|
"grad_norm": 0.5031090378761292, |
|
"learning_rate": 1.002098157322362e-07, |
|
"loss": 2.2415, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.9924904986492056, |
|
"grad_norm": 0.470460444688797, |
|
"learning_rate": 7.642007517693062e-08, |
|
"loss": 2.2416, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.9936352397087779, |
|
"grad_norm": 0.4445749521255493, |
|
"learning_rate": 5.5838606572078404e-08, |
|
"loss": 2.228, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.9947799807683502, |
|
"grad_norm": 0.45365962386131287, |
|
"learning_rate": 3.847206386201507e-08, |
|
"loss": 2.2488, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.9959247218279226, |
|
"grad_norm": 0.4715825319290161, |
|
"learning_rate": 2.4326061612479633e-08, |
|
"loss": 2.2225, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.9970694628874949, |
|
"grad_norm": 0.4650324583053589, |
|
"learning_rate": 1.340517319543877e-08, |
|
"loss": 2.2315, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.9982142039470672, |
|
"grad_norm": 0.4570050537586212, |
|
"learning_rate": 5.712929310521809e-09, |
|
"loss": 2.2593, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.9993589450066395, |
|
"grad_norm": 0.47539758682250977, |
|
"learning_rate": 1.2518168435593502e-09, |
|
"loss": 2.2317, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 87356, |
|
"total_flos": 1.2729340395184456e+19, |
|
"train_loss": 2.3255439768606525, |
|
"train_runtime": 52634.826, |
|
"train_samples_per_second": 26.555, |
|
"train_steps_per_second": 1.66 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 87356, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2729340395184456e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|