|
[ |
|
{ |
|
"loss": 29.9707, |
|
"grad_norm": 0.4777052700519562, |
|
"learning_rate": 0.0009991248796709547, |
|
"epoch": 0.0 |
|
}, |
|
{ |
|
"loss": 22.6857, |
|
"grad_norm": 0.7528864741325378, |
|
"learning_rate": 0.0009982497593419095, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 23.3032, |
|
"grad_norm": 0.2558889389038086, |
|
"learning_rate": 0.0009973746390128642, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 22.7608, |
|
"grad_norm": 0.16549238562583923, |
|
"learning_rate": 0.000996499518683819, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 21.8524, |
|
"grad_norm": 0.969261109828949, |
|
"learning_rate": 0.0009956243983547737, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 20.1216, |
|
"grad_norm": 1.4401915073394775, |
|
"learning_rate": 0.0009947492780257286, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 18.8698, |
|
"grad_norm": 1.2519457340240479, |
|
"learning_rate": 0.0009938741576966832, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 17.8051, |
|
"grad_norm": 0.6829971075057983, |
|
"learning_rate": 0.0009929990373676381, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 16.7436, |
|
"grad_norm": 0.8918408155441284, |
|
"learning_rate": 0.0009921239170385928, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 16.3997, |
|
"grad_norm": 1.1997641324996948, |
|
"learning_rate": 0.0009912487967095476, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 16.6555, |
|
"grad_norm": 1.0908863544464111, |
|
"learning_rate": 0.0009903736763805023, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 16.168, |
|
"grad_norm": 0.8117638230323792, |
|
"learning_rate": 0.0009894985560514572, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 16.4056, |
|
"grad_norm": 0.7367461919784546, |
|
"learning_rate": 0.0009886234357224118, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 16.0576, |
|
"grad_norm": 0.827192485332489, |
|
"learning_rate": 0.0009877483153933667, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 14.8864, |
|
"grad_norm": 0.6644204258918762, |
|
"learning_rate": 0.0009868731950643213, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 15.1702, |
|
"grad_norm": 0.6314308047294617, |
|
"learning_rate": 0.0009859980747352762, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 14.8873, |
|
"grad_norm": 0.4996398091316223, |
|
"learning_rate": 0.0009851229544062309, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 14.704, |
|
"grad_norm": 0.6396967768669128, |
|
"learning_rate": 0.0009842478340771857, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 14.8636, |
|
"grad_norm": 0.5319499373435974, |
|
"learning_rate": 0.0009833727137481404, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 14.7236, |
|
"grad_norm": 1.1328645944595337, |
|
"learning_rate": 0.0009824975934190953, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 14.2966, |
|
"grad_norm": 0.5435690879821777, |
|
"learning_rate": 0.00098162247309005, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 14.8865, |
|
"grad_norm": 0.5260070562362671, |
|
"learning_rate": 0.0009807473527610048, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 14.1635, |
|
"grad_norm": 0.5490550994873047, |
|
"learning_rate": 0.0009798722324319594, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 14.1756, |
|
"grad_norm": 0.5580148696899414, |
|
"learning_rate": 0.0009789971121029143, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 13.3026, |
|
"grad_norm": 0.4862927198410034, |
|
"learning_rate": 0.000978121991773869, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.9938, |
|
"grad_norm": 0.4365651607513428, |
|
"learning_rate": 0.0009772468714448236, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.7628, |
|
"grad_norm": 0.5206578373908997, |
|
"learning_rate": 0.0009763717511157785, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.2932, |
|
"grad_norm": 0.4493275582790375, |
|
"learning_rate": 0.0009754966307867332, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.4192, |
|
"grad_norm": 0.5717960596084595, |
|
"learning_rate": 0.000974621510457688, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 13.2883, |
|
"grad_norm": 0.48513928055763245, |
|
"learning_rate": 0.0009737463901286428, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 13.8283, |
|
"grad_norm": 0.7734763622283936, |
|
"learning_rate": 0.0009728712697995975, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 12.4766, |
|
"grad_norm": 0.45278435945510864, |
|
"learning_rate": 0.0009719961494705523, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 13.41, |
|
"grad_norm": 0.5911663174629211, |
|
"learning_rate": 0.000971121029141507, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 12.4475, |
|
"grad_norm": 0.5840547680854797, |
|
"learning_rate": 0.0009702459088124618, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 13.9417, |
|
"grad_norm": 0.7008219957351685, |
|
"learning_rate": 0.0009693707884834166, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 12.9164, |
|
"grad_norm": 0.7021568417549133, |
|
"learning_rate": 0.0009684956681543713, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 12.9824, |
|
"grad_norm": 0.5466001629829407, |
|
"learning_rate": 0.0009676205478253261, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 12.5413, |
|
"grad_norm": 0.6215840578079224, |
|
"learning_rate": 0.0009667454274962808, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 12.7753, |
|
"grad_norm": 1.5948784351348877, |
|
"learning_rate": 0.0009658703071672355, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 12.5837, |
|
"grad_norm": 1.1063404083251953, |
|
"learning_rate": 0.0009649951868381903, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 12.7079, |
|
"grad_norm": 0.7521733045578003, |
|
"learning_rate": 0.000964120066509145, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 12.1584, |
|
"grad_norm": 0.7596040964126587, |
|
"learning_rate": 0.0009632449461800998, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 12.6058, |
|
"grad_norm": 1.1221098899841309, |
|
"learning_rate": 0.0009623698258510546, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 12.7003, |
|
"grad_norm": 0.795098602771759, |
|
"learning_rate": 0.0009614947055220093, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 12.4519, |
|
"grad_norm": 0.481406569480896, |
|
"learning_rate": 0.0009606195851929641, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 12.8483, |
|
"grad_norm": 0.6707068681716919, |
|
"learning_rate": 0.0009597444648639187, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 12.593, |
|
"grad_norm": 0.6381434798240662, |
|
"learning_rate": 0.0009588693445348735, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 12.1654, |
|
"grad_norm": 0.7791229486465454, |
|
"learning_rate": 0.0009579942242058283, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 11.8089, |
|
"grad_norm": 0.8445360660552979, |
|
"learning_rate": 0.000957119103876783, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.232, |
|
"grad_norm": 0.6427455544471741, |
|
"learning_rate": 0.0009562439835477378, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.9625, |
|
"grad_norm": 0.5700855255126953, |
|
"learning_rate": 0.0009553688632186925, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.2628, |
|
"grad_norm": 0.8731588125228882, |
|
"learning_rate": 0.0009544937428896473, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 12.0435, |
|
"grad_norm": 0.869883120059967, |
|
"learning_rate": 0.0009536186225606021, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 12.2867, |
|
"grad_norm": 0.8802808523178101, |
|
"learning_rate": 0.0009527435022315568, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 11.9397, |
|
"grad_norm": 1.0076773166656494, |
|
"learning_rate": 0.0009518683819025116, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 11.8392, |
|
"grad_norm": 0.5855250954627991, |
|
"learning_rate": 0.0009509932615734664, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 11.3847, |
|
"grad_norm": 0.5606763958930969, |
|
"learning_rate": 0.0009501181412444211, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 12.2154, |
|
"grad_norm": 1.1014057397842407, |
|
"learning_rate": 0.0009492430209153759, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 11.6247, |
|
"grad_norm": 0.6524838805198669, |
|
"learning_rate": 0.0009483679005863306, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 11.5115, |
|
"grad_norm": 1.0140221118927002, |
|
"learning_rate": 0.0009474927802572854, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 12.1707, |
|
"grad_norm": 1.4689868688583374, |
|
"learning_rate": 0.0009466176599282402, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 11.6165, |
|
"grad_norm": 0.8136260509490967, |
|
"learning_rate": 0.0009457425395991948, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 11.8841, |
|
"grad_norm": 2.0376949310302734, |
|
"learning_rate": 0.0009448674192701496, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.2108, |
|
"grad_norm": 1.1647133827209473, |
|
"learning_rate": 0.0009439922989411043, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.9281, |
|
"grad_norm": 0.8479063510894775, |
|
"learning_rate": 0.0009431171786120591, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.0593, |
|
"grad_norm": 0.8340569138526917, |
|
"learning_rate": 0.0009422420582830139, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.5591, |
|
"grad_norm": 0.9813485145568848, |
|
"learning_rate": 0.0009413669379539686, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 11.1773, |
|
"grad_norm": 0.9088229537010193, |
|
"learning_rate": 0.0009404918176249234, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 11.6913, |
|
"grad_norm": 0.860917866230011, |
|
"learning_rate": 0.0009396166972958782, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 12.3707, |
|
"grad_norm": 0.7795988321304321, |
|
"learning_rate": 0.0009387415769668329, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 11.6669, |
|
"grad_norm": 0.914884626865387, |
|
"learning_rate": 0.0009378664566377877, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 11.6139, |
|
"grad_norm": 1.7863789796829224, |
|
"learning_rate": 0.0009369913363087424, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 11.1885, |
|
"grad_norm": 0.7225568294525146, |
|
"learning_rate": 0.0009361162159796972, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 11.7488, |
|
"grad_norm": 0.9028294682502747, |
|
"learning_rate": 0.000935241095650652, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 11.227, |
|
"grad_norm": 1.0842101573944092, |
|
"learning_rate": 0.0009343659753216067, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 11.4022, |
|
"grad_norm": 0.7042496800422668, |
|
"learning_rate": 0.0009334908549925615, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 11.006, |
|
"grad_norm": 0.8355586528778076, |
|
"learning_rate": 0.0009326157346635162, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 11.0561, |
|
"grad_norm": 0.9001519083976746, |
|
"learning_rate": 0.000931740614334471, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 11.357, |
|
"grad_norm": 0.8695396184921265, |
|
"learning_rate": 0.0009308654940054258, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 10.7003, |
|
"grad_norm": 0.8076105117797852, |
|
"learning_rate": 0.0009299903736763805, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 11.2661, |
|
"grad_norm": 0.9677106142044067, |
|
"learning_rate": 0.0009291152533473353, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 10.8957, |
|
"grad_norm": 0.8753145337104797, |
|
"learning_rate": 0.0009282401330182901, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 11.2854, |
|
"grad_norm": 0.7343422770500183, |
|
"learning_rate": 0.0009273650126892448, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 10.8205, |
|
"grad_norm": 0.9795741438865662, |
|
"learning_rate": 0.0009264898923601996, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 10.6805, |
|
"grad_norm": 0.9723809957504272, |
|
"learning_rate": 0.0009256147720311543, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 10.7639, |
|
"grad_norm": 0.6675435900688171, |
|
"learning_rate": 0.0009247396517021091, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 11.1119, |
|
"grad_norm": 0.9673445224761963, |
|
"learning_rate": 0.0009238645313730638, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 11.293, |
|
"grad_norm": 0.9545767307281494, |
|
"learning_rate": 0.0009229894110440185, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 11.4529, |
|
"grad_norm": 0.8443020582199097, |
|
"learning_rate": 0.0009221142907149733, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 10.402, |
|
"grad_norm": 0.9980494976043701, |
|
"learning_rate": 0.000921239170385928, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 10.8417, |
|
"grad_norm": 1.2651828527450562, |
|
"learning_rate": 0.0009203640500568828, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 10.9627, |
|
"grad_norm": 0.7320075035095215, |
|
"learning_rate": 0.0009194889297278376, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 10.1427, |
|
"grad_norm": 1.5249311923980713, |
|
"learning_rate": 0.0009186138093987923, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 11.0647, |
|
"grad_norm": 0.8371347188949585, |
|
"learning_rate": 0.0009177386890697471, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 10.7984, |
|
"grad_norm": 1.0522745847702026, |
|
"learning_rate": 0.0009168635687407019, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 10.0289, |
|
"grad_norm": 0.9992939829826355, |
|
"learning_rate": 0.0009159884484116566, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 10.6594, |
|
"grad_norm": 1.6465744972229004, |
|
"learning_rate": 0.0009151133280826114, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 10.7898, |
|
"grad_norm": 0.8755474090576172, |
|
"learning_rate": 0.0009142382077535661, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 10.8566, |
|
"grad_norm": 0.9154648780822754, |
|
"learning_rate": 0.0009133630874245209, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 10.3388, |
|
"grad_norm": 0.9557958245277405, |
|
"learning_rate": 0.0009124879670954757, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 11.0761, |
|
"grad_norm": 0.9756875038146973, |
|
"learning_rate": 0.0009116128467664304, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 10.6927, |
|
"grad_norm": 0.9137876033782959, |
|
"learning_rate": 0.0009107377264373852, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 10.4956, |
|
"grad_norm": 1.2811295986175537, |
|
"learning_rate": 0.00090986260610834, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 11.13, |
|
"grad_norm": 1.574196696281433, |
|
"learning_rate": 0.0009090749978121991, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 10.4299, |
|
"grad_norm": 1.120239019393921, |
|
"learning_rate": 0.0009082873895160585, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 10.9432, |
|
"grad_norm": 4.42399263381958, |
|
"learning_rate": 0.0009074122691870133, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 10.6758, |
|
"grad_norm": 1.1292444467544556, |
|
"learning_rate": 0.000906537148857968, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 9.9808, |
|
"grad_norm": 1.36553156375885, |
|
"learning_rate": 0.0009056620285289227, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 10.4376, |
|
"grad_norm": 1.4920979738235474, |
|
"learning_rate": 0.0009047869081998775, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 11.5319, |
|
"grad_norm": 1.142583966255188, |
|
"learning_rate": 0.0009039117878708322, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.8741, |
|
"grad_norm": 1.7269898653030396, |
|
"learning_rate": 0.000903036667541787, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.6609, |
|
"grad_norm": 1.0620924234390259, |
|
"learning_rate": 0.0009021615472127418, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.8716, |
|
"grad_norm": 1.0225517749786377, |
|
"learning_rate": 0.0009012864268836965, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.8629, |
|
"grad_norm": 0.8201847672462463, |
|
"learning_rate": 0.0009004113065546513, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.2614, |
|
"grad_norm": 0.7885268926620483, |
|
"learning_rate": 0.000899536186225606, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.1758, |
|
"grad_norm": 0.8671897053718567, |
|
"learning_rate": 0.0008986610658965608, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.2796, |
|
"grad_norm": 0.8501631617546082, |
|
"learning_rate": 0.0008977859455675156, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 10.4376, |
|
"grad_norm": 1.3847661018371582, |
|
"learning_rate": 0.0008969108252384703, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 10.6258, |
|
"grad_norm": 1.1267868280410767, |
|
"learning_rate": 0.0008960357049094251, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 10.3214, |
|
"grad_norm": 0.9492388963699341, |
|
"learning_rate": 0.0008951605845803799, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 10.3126, |
|
"grad_norm": 2.884838819503784, |
|
"learning_rate": 0.0008942854642513346, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 9.8104, |
|
"grad_norm": 1.007505178451538, |
|
"learning_rate": 0.0008934103439222894, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 10.7341, |
|
"grad_norm": 0.9504636526107788, |
|
"learning_rate": 0.0008925352235932441, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 10.3923, |
|
"grad_norm": 1.1075007915496826, |
|
"learning_rate": 0.0008916601032641989, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.323, |
|
"grad_norm": 1.137343406677246, |
|
"learning_rate": 0.0008907849829351537, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.2794, |
|
"grad_norm": 0.797771155834198, |
|
"learning_rate": 0.0008899098626061084, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.6656, |
|
"grad_norm": 1.018343448638916, |
|
"learning_rate": 0.0008890347422770632, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.2778, |
|
"grad_norm": 1.0548039674758911, |
|
"learning_rate": 0.000888159621948018, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.1114, |
|
"grad_norm": 3.0174038410186768, |
|
"learning_rate": 0.0008872845016189727, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.8685, |
|
"grad_norm": 2.50591778755188, |
|
"learning_rate": 0.0008864093812899275, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.0677, |
|
"grad_norm": 1.2851207256317139, |
|
"learning_rate": 0.0008855342609608822, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.0311, |
|
"grad_norm": 0.7987344264984131, |
|
"learning_rate": 0.0008846591406318369, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 9.7713, |
|
"grad_norm": 1.114479899406433, |
|
"learning_rate": 0.0008837840203027917, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 9.9371, |
|
"grad_norm": 1.2233116626739502, |
|
"learning_rate": 0.0008829088999737464, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 10.5333, |
|
"grad_norm": 2.0412189960479736, |
|
"learning_rate": 0.0008820337796447012, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 10.2577, |
|
"grad_norm": 1.2635306119918823, |
|
"learning_rate": 0.0008811586593156559, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 9.8937, |
|
"grad_norm": 12.760872840881348, |
|
"learning_rate": 0.0008802835389866107, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.6092, |
|
"grad_norm": 1.3580334186553955, |
|
"learning_rate": 0.0008794084186575654, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.2467, |
|
"grad_norm": 1.250632643699646, |
|
"learning_rate": 0.0008785332983285201, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.5076, |
|
"grad_norm": 1.458349585533142, |
|
"learning_rate": 0.0008776581779994749, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 10.2769, |
|
"grad_norm": 1.9139622449874878, |
|
"learning_rate": 0.0008767830576704296, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 10.4452, |
|
"grad_norm": 1.2400761842727661, |
|
"learning_rate": 0.0008759079373413844, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 10.01, |
|
"grad_norm": 1.5482594966888428, |
|
"learning_rate": 0.0008750328170123392, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 10.2997, |
|
"grad_norm": 1.68232262134552, |
|
"learning_rate": 0.0008741576966832939, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 10.0902, |
|
"grad_norm": 1.206350564956665, |
|
"learning_rate": 0.0008732825763542487, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 9.6499, |
|
"grad_norm": 1.2805421352386475, |
|
"learning_rate": 0.0008724074560252034, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 10.3858, |
|
"grad_norm": 1.1297776699066162, |
|
"learning_rate": 0.0008715323356961582, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 10.4059, |
|
"grad_norm": 1.382300853729248, |
|
"learning_rate": 0.000870657215367113, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 9.8993, |
|
"grad_norm": 1.1831278800964355, |
|
"learning_rate": 0.0008697820950380677, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 10.2277, |
|
"grad_norm": 1.5924201011657715, |
|
"learning_rate": 0.0008689069747090225, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 10.2644, |
|
"grad_norm": 0.9275569319725037, |
|
"learning_rate": 0.0008680318543799773, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 10.2756, |
|
"grad_norm": 1.220247745513916, |
|
"learning_rate": 0.000867156734050932, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 9.873, |
|
"grad_norm": 1.2408357858657837, |
|
"learning_rate": 0.0008662816137218868, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 10.4232, |
|
"grad_norm": 2.236565351486206, |
|
"learning_rate": 0.0008654064933928415, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 9.8613, |
|
"grad_norm": 1.3093738555908203, |
|
"learning_rate": 0.0008645313730637963, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 10.1708, |
|
"grad_norm": 2.232199192047119, |
|
"learning_rate": 0.000863656252734751, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 9.9729, |
|
"grad_norm": 1.4281343221664429, |
|
"learning_rate": 0.0008627811324057057, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 10.3467, |
|
"grad_norm": 1.7682894468307495, |
|
"learning_rate": 0.0008619060120766605, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 9.7119, |
|
"grad_norm": 1.7619984149932861, |
|
"learning_rate": 0.0008610308917476152, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 10.4769, |
|
"grad_norm": 1.5372920036315918, |
|
"learning_rate": 0.00086015577141857, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 10.691, |
|
"grad_norm": 2.3789474964141846, |
|
"learning_rate": 0.0008592806510895248, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 9.8791, |
|
"grad_norm": 2.496776819229126, |
|
"learning_rate": 0.0008584055307604795, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 9.7356, |
|
"grad_norm": 4.118072032928467, |
|
"learning_rate": 0.0008575304104314343, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 10.3761, |
|
"grad_norm": 1.7359448671340942, |
|
"learning_rate": 0.0008566552901023891, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 10.1403, |
|
"grad_norm": 1.8283412456512451, |
|
"learning_rate": 0.0008557801697733438, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 10.306, |
|
"grad_norm": 1.9979033470153809, |
|
"learning_rate": 0.0008549050494442986, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 9.5832, |
|
"grad_norm": 3.1794967651367188, |
|
"learning_rate": 0.0008540299291152533, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 10.1963, |
|
"grad_norm": 3.1991539001464844, |
|
"learning_rate": 0.0008531548087862081, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 10.7828, |
|
"grad_norm": 2.5145182609558105, |
|
"learning_rate": 0.0008522796884571629, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 10.1017, |
|
"grad_norm": 1.0783337354660034, |
|
"learning_rate": 0.0008514045681281176, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.4955, |
|
"grad_norm": 6.040937423706055, |
|
"learning_rate": 0.0008505294477990724, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 10.3679, |
|
"grad_norm": 1.5212355852127075, |
|
"learning_rate": 0.0008496543274700271, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.7236, |
|
"grad_norm": 4.30284309387207, |
|
"learning_rate": 0.0008487792071409819, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.7635, |
|
"grad_norm": 2.9821696281433105, |
|
"learning_rate": 0.0008479040868119367, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.8438, |
|
"grad_norm": 1.676759958267212, |
|
"learning_rate": 0.0008470289664828914, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.6693, |
|
"grad_norm": 1.8075122833251953, |
|
"learning_rate": 0.0008461538461538462, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 10.4572, |
|
"grad_norm": 2.4182658195495605, |
|
"learning_rate": 0.000845278725824801, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 10.3901, |
|
"grad_norm": 1.7208518981933594, |
|
"learning_rate": 0.0008444036054957557, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 9.7696, |
|
"grad_norm": 2.4831340312957764, |
|
"learning_rate": 0.0008435284851667105, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 10.409, |
|
"grad_norm": 1.3335094451904297, |
|
"learning_rate": 0.0008426533648376652, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 10.526, |
|
"grad_norm": 0.9441933035850525, |
|
"learning_rate": 0.0008417782445086199, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 10.14, |
|
"grad_norm": 1.1018340587615967, |
|
"learning_rate": 0.0008409031241795747, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 10.0298, |
|
"grad_norm": 1.2077239751815796, |
|
"learning_rate": 0.0008400280038505294, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.7303, |
|
"grad_norm": 2.0401172637939453, |
|
"learning_rate": 0.0008391528835214842, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 10.1229, |
|
"grad_norm": 1.9456411600112915, |
|
"learning_rate": 0.0008382777631924389, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.9805, |
|
"grad_norm": 1.830814003944397, |
|
"learning_rate": 0.0008374026428633937, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 10.1328, |
|
"grad_norm": 2.1729185581207275, |
|
"learning_rate": 0.0008365275225343485, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 10.4834, |
|
"grad_norm": 1.324315071105957, |
|
"learning_rate": 0.0008356524022053032, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 10.349, |
|
"grad_norm": 2.837768077850342, |
|
"learning_rate": 0.000834777281876258, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.8015, |
|
"grad_norm": 1.1361275911331177, |
|
"learning_rate": 0.0008339021615472128, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.5739, |
|
"grad_norm": 1.4033498764038086, |
|
"learning_rate": 0.0008330270412181675, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.5204, |
|
"grad_norm": 1.1027082204818726, |
|
"learning_rate": 0.0008321519208891223, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.4961, |
|
"grad_norm": 2.2432548999786377, |
|
"learning_rate": 0.000831276800560077, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 10.4562, |
|
"grad_norm": 1.3807300329208374, |
|
"learning_rate": 0.0008304016802310318, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.9888, |
|
"grad_norm": 2.594301462173462, |
|
"learning_rate": 0.0008295265599019866, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.4501, |
|
"grad_norm": 1.4775426387786865, |
|
"learning_rate": 0.0008286514395729413, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.9432, |
|
"grad_norm": 1.463850736618042, |
|
"learning_rate": 0.0008277763192438961, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.7867, |
|
"grad_norm": 1.5370949506759644, |
|
"learning_rate": 0.0008269011989148508, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.674, |
|
"grad_norm": 1.2858608961105347, |
|
"learning_rate": 0.0008260260785858056, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 10.4663, |
|
"grad_norm": 1.2758288383483887, |
|
"learning_rate": 0.0008251509582567604, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.552, |
|
"grad_norm": 1.181013822555542, |
|
"learning_rate": 0.0008242758379277151, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 9.9999, |
|
"grad_norm": 0.9388832449913025, |
|
"learning_rate": 0.0008234007175986699, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 10.1529, |
|
"grad_norm": 1.3157830238342285, |
|
"learning_rate": 0.0008225255972696247, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 10.3224, |
|
"grad_norm": 1.603309154510498, |
|
"learning_rate": 0.0008216504769405794, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.2725, |
|
"grad_norm": 1.2987728118896484, |
|
"learning_rate": 0.0008207753566115342, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 10.2593, |
|
"grad_norm": 1.398086428642273, |
|
"learning_rate": 0.0008199002362824888, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.8407, |
|
"grad_norm": 1.3308155536651611, |
|
"learning_rate": 0.0008190251159534436, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 10.7467, |
|
"grad_norm": 1.3167645931243896, |
|
"learning_rate": 0.0008181499956243984, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 10.1278, |
|
"grad_norm": 1.935791254043579, |
|
"learning_rate": 0.0008172748752953531, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.9477, |
|
"grad_norm": 1.7790919542312622, |
|
"learning_rate": 0.0008163997549663079, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.2234, |
|
"grad_norm": 0.8335697650909424, |
|
"learning_rate": 0.0008155246346372626, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.8562, |
|
"grad_norm": 2.750474691390991, |
|
"learning_rate": 0.0008146495143082174, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 10.3218, |
|
"grad_norm": 1.4811447858810425, |
|
"learning_rate": 0.0008137743939791722, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.6582, |
|
"grad_norm": 1.9921342134475708, |
|
"learning_rate": 0.0008128992736501269, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.8513, |
|
"grad_norm": 2.635014295578003, |
|
"learning_rate": 0.0008120241533210817, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.8862, |
|
"grad_norm": 1.5898804664611816, |
|
"learning_rate": 0.0008111490329920365, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.4721, |
|
"grad_norm": 4.158829689025879, |
|
"learning_rate": 0.0008102739126629912, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 10.1474, |
|
"grad_norm": 1.8269054889678955, |
|
"learning_rate": 0.000809398792333946, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.4288, |
|
"grad_norm": 3.384010076522827, |
|
"learning_rate": 0.0008085236720049007, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 10.0144, |
|
"grad_norm": 1.6854453086853027, |
|
"learning_rate": 0.0008076485516758555, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 10.229, |
|
"grad_norm": 2.0812976360321045, |
|
"learning_rate": 0.0008067734313468103, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 9.7204, |
|
"grad_norm": 1.7673369646072388, |
|
"learning_rate": 0.000805898311017765, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 9.6859, |
|
"grad_norm": 2.155963897705078, |
|
"learning_rate": 0.0008050231906887198, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 10.029, |
|
"grad_norm": 1.482950210571289, |
|
"learning_rate": 0.0008041480703596745, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 9.25, |
|
"grad_norm": 2.6473171710968018, |
|
"learning_rate": 0.0008032729500306293, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 10.028, |
|
"grad_norm": 1.3584322929382324, |
|
"learning_rate": 0.0008023978297015841, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 9.6924, |
|
"grad_norm": 1.74970543384552, |
|
"learning_rate": 0.0008015227093725388, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 10.0445, |
|
"grad_norm": 2.0750019550323486, |
|
"learning_rate": 0.0008006475890434936, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 9.7962, |
|
"grad_norm": 7.219356060028076, |
|
"learning_rate": 0.0007997724687144482, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 10.215, |
|
"grad_norm": 1.2369924783706665, |
|
"learning_rate": 0.0007988973483854029, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 9.538, |
|
"grad_norm": 1.9686328172683716, |
|
"learning_rate": 0.0007980222280563577, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 10.2107, |
|
"grad_norm": 1.2081037759780884, |
|
"learning_rate": 0.0007971471077273124, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 9.6709, |
|
"grad_norm": 1.7755659818649292, |
|
"learning_rate": 0.0007962719873982672, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 9.7973, |
|
"grad_norm": 2.226400375366211, |
|
"learning_rate": 0.000795396867069222, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 9.5564, |
|
"grad_norm": 1.2814253568649292, |
|
"learning_rate": 0.0007945217467401767, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 9.7987, |
|
"grad_norm": 2.0225868225097656, |
|
"learning_rate": 0.0007936466264111315, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 10.0866, |
|
"grad_norm": 2.059910774230957, |
|
"learning_rate": 0.0007927715060820862, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 10.031, |
|
"grad_norm": 3.564408779144287, |
|
"learning_rate": 0.000791896385753041, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 9.5562, |
|
"grad_norm": 1.6237695217132568, |
|
"learning_rate": 0.0007910212654239958, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 10.032, |
|
"grad_norm": 1.8051832914352417, |
|
"learning_rate": 0.0007901461450949505, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 9.5223, |
|
"grad_norm": 1.807507872581482, |
|
"learning_rate": 0.0007892710247659053, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 9.4476, |
|
"grad_norm": 1.3200876712799072, |
|
"learning_rate": 0.00078839590443686, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 9.4836, |
|
"grad_norm": 3.295555353164673, |
|
"learning_rate": 0.0007875207841078148, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 9.9695, |
|
"grad_norm": 2.036158561706543, |
|
"learning_rate": 0.0007867331758116741, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 9.414, |
|
"grad_norm": 1.6501291990280151, |
|
"learning_rate": 0.0007858580554826289, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 10.3832, |
|
"grad_norm": 1.3873107433319092, |
|
"learning_rate": 0.0007849829351535836, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.6308, |
|
"grad_norm": 1.0633749961853027, |
|
"learning_rate": 0.0007841078148245384, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.8861, |
|
"grad_norm": 2.238201141357422, |
|
"learning_rate": 0.0007832326944954931, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.9682, |
|
"grad_norm": 1.2320759296417236, |
|
"learning_rate": 0.0007823575741664479, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.496, |
|
"grad_norm": 1.8895844221115112, |
|
"learning_rate": 0.0007814824538374027, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 9.9117, |
|
"grad_norm": 1.7297803163528442, |
|
"learning_rate": 0.0007806073335083574, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 10.0705, |
|
"grad_norm": 1.8089996576309204, |
|
"learning_rate": 0.0007797322131793122, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 9.8684, |
|
"grad_norm": 2.4221599102020264, |
|
"learning_rate": 0.000778857092850267, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 9.343, |
|
"grad_norm": 1.869035243988037, |
|
"learning_rate": 0.0007779819725212217, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 9.395, |
|
"grad_norm": 1.5427782535552979, |
|
"learning_rate": 0.0007771068521921765, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 9.3372, |
|
"grad_norm": 1.2343759536743164, |
|
"learning_rate": 0.0007762317318631312, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 10.0514, |
|
"grad_norm": 1.057860016822815, |
|
"learning_rate": 0.000775356611534086, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 9.8897, |
|
"grad_norm": 4.536896228790283, |
|
"learning_rate": 0.0007744814912050408, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 9.7529, |
|
"grad_norm": 2.2841501235961914, |
|
"learning_rate": 0.0007736063708759955, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 9.7393, |
|
"grad_norm": 1.4836674928665161, |
|
"learning_rate": 0.0007727312505469503, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 9.4403, |
|
"grad_norm": 1.9073762893676758, |
|
"learning_rate": 0.000771856130217905, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 9.8424, |
|
"grad_norm": 2.367785930633545, |
|
"learning_rate": 0.0007709810098888598, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 9.5098, |
|
"grad_norm": 0.824318528175354, |
|
"learning_rate": 0.0007701058895598146, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 9.4785, |
|
"grad_norm": 1.2716361284255981, |
|
"learning_rate": 0.0007692307692307693, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 9.8112, |
|
"grad_norm": 2.1307737827301025, |
|
"learning_rate": 0.0007683556489017241, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 9.5932, |
|
"grad_norm": 2.0558087825775146, |
|
"learning_rate": 0.0007674805285726788, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 9.5525, |
|
"grad_norm": 1.582262396812439, |
|
"learning_rate": 0.0007666054082436335, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 9.8359, |
|
"grad_norm": 7.788843154907227, |
|
"learning_rate": 0.0007657302879145883, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 10.3724, |
|
"grad_norm": 1.328479528427124, |
|
"learning_rate": 0.000764855167585543, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 8.8465, |
|
"grad_norm": 1.6026923656463623, |
|
"learning_rate": 0.0007639800472564978, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 9.4257, |
|
"grad_norm": 4.00112247467041, |
|
"learning_rate": 0.0007631049269274526, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 9.4006, |
|
"grad_norm": 1.2519035339355469, |
|
"learning_rate": 0.0007622298065984073, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 9.2469, |
|
"grad_norm": 1.0302975177764893, |
|
"learning_rate": 0.0007613546862693621, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 9.6992, |
|
"grad_norm": 1.066437840461731, |
|
"learning_rate": 0.0007604795659403168, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 8.9602, |
|
"grad_norm": 1.232923984527588, |
|
"learning_rate": 0.0007596044456112715, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 10.1371, |
|
"grad_norm": 2.129009962081909, |
|
"learning_rate": 0.0007587293252822263, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 9.3879, |
|
"grad_norm": 1.385560154914856, |
|
"learning_rate": 0.000757854204953181, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 9.898, |
|
"grad_norm": 15.102237701416016, |
|
"learning_rate": 0.0007569790846241358, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 9.723, |
|
"grad_norm": 1.5371789932250977, |
|
"learning_rate": 0.0007561039642950905, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 9.5436, |
|
"grad_norm": 1.3847825527191162, |
|
"learning_rate": 0.0007552288439660453, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 9.4084, |
|
"grad_norm": 2.662229299545288, |
|
"learning_rate": 0.0007543537236370001, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 9.6916, |
|
"grad_norm": 1.3952440023422241, |
|
"learning_rate": 0.0007534786033079548, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 9.2971, |
|
"grad_norm": 2.79449725151062, |
|
"learning_rate": 0.0007526034829789096, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 9.6677, |
|
"grad_norm": 0.959707498550415, |
|
"learning_rate": 0.0007517283626498644, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 9.5952, |
|
"grad_norm": 1.7505630254745483, |
|
"learning_rate": 0.0007508532423208191, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 9.901, |
|
"grad_norm": 4.176792621612549, |
|
"learning_rate": 0.0007499781219917739, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 9.5036, |
|
"grad_norm": 2.338407516479492, |
|
"learning_rate": 0.0007491030016627286, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 10.0173, |
|
"grad_norm": 1.4003384113311768, |
|
"learning_rate": 0.0007482278813336834, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 9.7204, |
|
"grad_norm": 2.0305333137512207, |
|
"learning_rate": 0.0007473527610046382, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 9.7901, |
|
"grad_norm": 2.2396442890167236, |
|
"learning_rate": 0.0007464776406755928, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 9.5465, |
|
"grad_norm": 3.230546474456787, |
|
"learning_rate": 0.0007456025203465476, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 8.9817, |
|
"grad_norm": 3.14975643157959, |
|
"learning_rate": 0.0007447274000175023, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 10.0403, |
|
"grad_norm": 2.1714890003204346, |
|
"learning_rate": 0.0007438522796884571, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 9.054, |
|
"grad_norm": 1.8472590446472168, |
|
"learning_rate": 0.0007429771593594119, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 9.4847, |
|
"grad_norm": 1.0868862867355347, |
|
"learning_rate": 0.0007421020390303666, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 9.5688, |
|
"grad_norm": 0.9088165760040283, |
|
"learning_rate": 0.0007412269187013214, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 9.2655, |
|
"grad_norm": 1.2336516380310059, |
|
"learning_rate": 0.0007403517983722762, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 9.6194, |
|
"grad_norm": 1.2794588804244995, |
|
"learning_rate": 0.0007394766780432309, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 9.4072, |
|
"grad_norm": 1.5056113004684448, |
|
"learning_rate": 0.0007386015577141857, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.781, |
|
"grad_norm": 1.809520959854126, |
|
"learning_rate": 0.0007377264373851404, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 9.3203, |
|
"grad_norm": 3.1000723838806152, |
|
"learning_rate": 0.0007368513170560952, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 9.3199, |
|
"grad_norm": 4.879993915557861, |
|
"learning_rate": 0.00073597619672705, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 10.2243, |
|
"grad_norm": 1.508380651473999, |
|
"learning_rate": 0.0007351010763980047, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 9.3476, |
|
"grad_norm": 1.2379094362258911, |
|
"learning_rate": 0.0007342259560689595, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 9.3482, |
|
"grad_norm": 1.3472929000854492, |
|
"learning_rate": 0.0007333508357399142, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 9.1645, |
|
"grad_norm": 1.2490941286087036, |
|
"learning_rate": 0.000732475715410869, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 9.8443, |
|
"grad_norm": 1.3615162372589111, |
|
"learning_rate": 0.0007316005950818238, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 9.1608, |
|
"grad_norm": 1.608033299446106, |
|
"learning_rate": 0.0007307254747527785, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 9.5366, |
|
"grad_norm": 1.819758415222168, |
|
"learning_rate": 0.0007298503544237333, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 9.3414, |
|
"grad_norm": 1.190049409866333, |
|
"learning_rate": 0.0007289752340946881, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 9.3362, |
|
"grad_norm": 1.136693000793457, |
|
"learning_rate": 0.0007281001137656428, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 9.4184, |
|
"grad_norm": 1.3066457509994507, |
|
"learning_rate": 0.0007272249934365976, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 9.3295, |
|
"grad_norm": 2.193195343017578, |
|
"learning_rate": 0.0007263498731075523, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 9.0824, |
|
"grad_norm": 1.2458583116531372, |
|
"learning_rate": 0.0007254747527785071, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 9.4671, |
|
"grad_norm": 1.4734137058258057, |
|
"learning_rate": 0.0007245996324494618, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 8.8882, |
|
"grad_norm": 1.8609868288040161, |
|
"learning_rate": 0.0007237245121204165, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 9.8334, |
|
"grad_norm": 1.2084137201309204, |
|
"learning_rate": 0.0007228493917913713, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 9.301, |
|
"grad_norm": 1.3520543575286865, |
|
"learning_rate": 0.000721974271462326, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 9.4308, |
|
"grad_norm": 1.7796053886413574, |
|
"learning_rate": 0.0007210991511332808, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 9.2915, |
|
"grad_norm": 1.583756685256958, |
|
"learning_rate": 0.0007202240308042356, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 9.543, |
|
"grad_norm": 1.3439078330993652, |
|
"learning_rate": 0.0007193489104751903, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 9.4767, |
|
"grad_norm": 1.0626850128173828, |
|
"learning_rate": 0.0007184737901461451, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 9.6831, |
|
"grad_norm": 1.559846043586731, |
|
"learning_rate": 0.0007175986698170999, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 9.3683, |
|
"grad_norm": 1.3399856090545654, |
|
"learning_rate": 0.0007167235494880546, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 9.4018, |
|
"grad_norm": 2.0115649700164795, |
|
"learning_rate": 0.0007158484291590094, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 9.6007, |
|
"grad_norm": 1.9016413688659668, |
|
"learning_rate": 0.0007149733088299641, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 9.7843, |
|
"grad_norm": 9.662792205810547, |
|
"learning_rate": 0.0007140981885009189, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 9.4248, |
|
"grad_norm": 0.9219140410423279, |
|
"learning_rate": 0.0007132230681718737, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 9.8659, |
|
"grad_norm": 1.0851889848709106, |
|
"learning_rate": 0.0007123479478428284, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 9.1677, |
|
"grad_norm": 1.0349225997924805, |
|
"learning_rate": 0.0007114728275137832, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 9.1666, |
|
"grad_norm": 1.286309003829956, |
|
"learning_rate": 0.000710597707184738, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 9.5514, |
|
"grad_norm": 1.0325031280517578, |
|
"learning_rate": 0.0007097225868556927, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 9.2542, |
|
"grad_norm": 1.2344691753387451, |
|
"learning_rate": 0.0007088474665266475, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 9.1687, |
|
"grad_norm": 0.9820197224617004, |
|
"learning_rate": 0.0007079723461976022, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.9295, |
|
"grad_norm": 2.573585033416748, |
|
"learning_rate": 0.000707097225868557, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 9.6702, |
|
"grad_norm": 0.8707136511802673, |
|
"learning_rate": 0.0007062221055395118, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.5564, |
|
"grad_norm": 0.9832028150558472, |
|
"learning_rate": 0.0007053469852104665, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 9.426, |
|
"grad_norm": 2.1577107906341553, |
|
"learning_rate": 0.0007044718648814213, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 9.2118, |
|
"grad_norm": 1.6314407587051392, |
|
"learning_rate": 0.000703596744552376, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 9.4482, |
|
"grad_norm": 1.6563376188278198, |
|
"learning_rate": 0.0007027216242233307, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 10.1221, |
|
"grad_norm": 1.3398720026016235, |
|
"learning_rate": 0.0007018465038942855, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 9.2569, |
|
"grad_norm": 1.2780015468597412, |
|
"learning_rate": 0.0007009713835652402, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 9.1485, |
|
"grad_norm": 1.3434102535247803, |
|
"learning_rate": 0.000700096263236195, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 9.3431, |
|
"grad_norm": 2.2103283405303955, |
|
"learning_rate": 0.0006992211429071497, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 9.5529, |
|
"grad_norm": 2.479997158050537, |
|
"learning_rate": 0.0006983460225781045, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.5835, |
|
"grad_norm": 1.3891953229904175, |
|
"learning_rate": 0.0006974709022490593, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.835, |
|
"grad_norm": 0.9400926828384399, |
|
"learning_rate": 0.000696595781920014, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 9.1069, |
|
"grad_norm": 1.2385962009429932, |
|
"learning_rate": 0.0006957206615909688, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 9.2235, |
|
"grad_norm": 1.7397691011428833, |
|
"learning_rate": 0.0006948455412619236, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 9.2386, |
|
"grad_norm": 1.7163151502609253, |
|
"learning_rate": 0.0006939704209328783, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.7562, |
|
"grad_norm": 1.5626498460769653, |
|
"learning_rate": 0.0006930953006038331, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.8432, |
|
"grad_norm": 1.9265193939208984, |
|
"learning_rate": 0.0006922201802747878, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.8117, |
|
"grad_norm": 1.4459571838378906, |
|
"learning_rate": 0.0006913450599457426, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 9.439, |
|
"grad_norm": 0.9559070467948914, |
|
"learning_rate": 0.0006904699396166974, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 9.1912, |
|
"grad_norm": 1.9344050884246826, |
|
"learning_rate": 0.0006895948192876521, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 9.5571, |
|
"grad_norm": 1.52436101436615, |
|
"learning_rate": 0.0006887196989586069, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.9898, |
|
"grad_norm": 1.4828134775161743, |
|
"learning_rate": 0.0006878445786295616, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 9.1776, |
|
"grad_norm": 1.4312185049057007, |
|
"learning_rate": 0.0006869694583005164, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 10.1621, |
|
"grad_norm": 1.2089942693710327, |
|
"learning_rate": 0.0006860943379714712, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 8.8634, |
|
"grad_norm": 5.034254550933838, |
|
"learning_rate": 0.0006852192176424259, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 9.1892, |
|
"grad_norm": 2.494285821914673, |
|
"learning_rate": 0.0006843440973133807, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 8.6028, |
|
"grad_norm": 1.5366199016571045, |
|
"learning_rate": 0.0006834689769843355, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 9.0938, |
|
"grad_norm": 1.1272014379501343, |
|
"learning_rate": 0.0006825938566552902, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 9.607, |
|
"grad_norm": 3.852747917175293, |
|
"learning_rate": 0.000681718736326245, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 9.6214, |
|
"grad_norm": 1.9155749082565308, |
|
"learning_rate": 0.0006808436159971996, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 8.6868, |
|
"grad_norm": 1.9045560359954834, |
|
"learning_rate": 0.0006799684956681543, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 9.8133, |
|
"grad_norm": 1.4083536863327026, |
|
"learning_rate": 0.000679093375339109, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 9.2029, |
|
"grad_norm": 4.824470043182373, |
|
"learning_rate": 0.0006782182550100638, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 9.3758, |
|
"grad_norm": 1.2905750274658203, |
|
"learning_rate": 0.0006773431346810186, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 9.2105, |
|
"grad_norm": 1.4681618213653564, |
|
"learning_rate": 0.0006764680143519733, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 9.1096, |
|
"grad_norm": 1.5041123628616333, |
|
"learning_rate": 0.0006755928940229281, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 9.1485, |
|
"grad_norm": 1.7930779457092285, |
|
"learning_rate": 0.0006747177736938829, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 9.2587, |
|
"grad_norm": 1.1871591806411743, |
|
"learning_rate": 0.0006738426533648376, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 9.2174, |
|
"grad_norm": 1.550445556640625, |
|
"learning_rate": 0.0006729675330357924, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 8.8521, |
|
"grad_norm": 1.361382007598877, |
|
"learning_rate": 0.0006720924127067471, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 9.0098, |
|
"grad_norm": 1.350142002105713, |
|
"learning_rate": 0.0006712172923777019, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.6736, |
|
"grad_norm": 1.2662369012832642, |
|
"learning_rate": 0.0006703421720486567, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.9752, |
|
"grad_norm": 1.474623441696167, |
|
"learning_rate": 0.0006694670517196114, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.7473, |
|
"grad_norm": 2.676971912384033, |
|
"learning_rate": 0.0006685919313905662, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.8512, |
|
"grad_norm": 1.114418625831604, |
|
"learning_rate": 0.000667716811061521, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 8.1921, |
|
"grad_norm": 2.0294203758239746, |
|
"learning_rate": 0.0006668416907324757, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 8.8171, |
|
"grad_norm": 0.9778627157211304, |
|
"learning_rate": 0.0006659665704034305, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 8.8809, |
|
"grad_norm": 1.621929407119751, |
|
"learning_rate": 0.0006650914500743852, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.9527, |
|
"grad_norm": 1.0340059995651245, |
|
"learning_rate": 0.00066421632974534, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.6295, |
|
"grad_norm": 1.4925633668899536, |
|
"learning_rate": 0.0006633412094162948, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.7158, |
|
"grad_norm": 1.3651670217514038, |
|
"learning_rate": 0.0006624660890872495, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 9.0606, |
|
"grad_norm": 1.1281485557556152, |
|
"learning_rate": 0.0006615909687582043, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.8925, |
|
"grad_norm": 1.0784941911697388, |
|
"learning_rate": 0.000660715848429159, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 9.1237, |
|
"grad_norm": 1.49080228805542, |
|
"learning_rate": 0.0006598407281001137, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.9093, |
|
"grad_norm": 1.080828309059143, |
|
"learning_rate": 0.0006589656077710685, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.9275, |
|
"grad_norm": 1.0867069959640503, |
|
"learning_rate": 0.0006580904874420232, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.5924, |
|
"grad_norm": 1.0178778171539307, |
|
"learning_rate": 0.000657215367112978, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.8768, |
|
"grad_norm": 0.978421688079834, |
|
"learning_rate": 0.0006563402467839327, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.8812, |
|
"grad_norm": 1.6234030723571777, |
|
"learning_rate": 0.0006554651264548875, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 9.5212, |
|
"grad_norm": 5.744367599487305, |
|
"learning_rate": 0.0006545900061258423, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.066, |
|
"grad_norm": 3.1010031700134277, |
|
"learning_rate": 0.000653714885796797, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.8401, |
|
"grad_norm": 1.4084874391555786, |
|
"learning_rate": 0.0006528397654677518, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 9.1554, |
|
"grad_norm": 1.4125443696975708, |
|
"learning_rate": 0.0006519646451387066, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.5098, |
|
"grad_norm": 1.0087417364120483, |
|
"learning_rate": 0.0006510895248096613, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 8.6227, |
|
"grad_norm": 1.404480218887329, |
|
"learning_rate": 0.0006502144044806161, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 8.7843, |
|
"grad_norm": 1.1295698881149292, |
|
"learning_rate": 0.0006493392841515708, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 8.6766, |
|
"grad_norm": 1.0821887254714966, |
|
"learning_rate": 0.0006484641638225256, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.6414, |
|
"grad_norm": 1.1444706916809082, |
|
"learning_rate": 0.0006475890434934804, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.457, |
|
"grad_norm": 1.277224063873291, |
|
"learning_rate": 0.0006467139231644351, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 9.058, |
|
"grad_norm": 1.4391515254974365, |
|
"learning_rate": 0.0006458388028353899, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 9.0137, |
|
"grad_norm": 1.1909124851226807, |
|
"learning_rate": 0.0006449636825063447, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.95, |
|
"grad_norm": 1.1959373950958252, |
|
"learning_rate": 0.0006440885621772994, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.7242, |
|
"grad_norm": 1.0742520093917847, |
|
"learning_rate": 0.0006432134418482542, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.6848, |
|
"grad_norm": 1.1215168237686157, |
|
"learning_rate": 0.0006423383215192089, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.2356, |
|
"grad_norm": 1.329377293586731, |
|
"learning_rate": 0.0006414632011901637, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 9.357, |
|
"grad_norm": 1.2252676486968994, |
|
"learning_rate": 0.0006405880808611185, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 8.9564, |
|
"grad_norm": 1.4522862434387207, |
|
"learning_rate": 0.0006397129605320732, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 9.315, |
|
"grad_norm": 1.3707520961761475, |
|
"learning_rate": 0.000638837840203028, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 8.5879, |
|
"grad_norm": 1.6546357870101929, |
|
"learning_rate": 0.0006379627198739826, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 9.4063, |
|
"grad_norm": 0.9310407638549805, |
|
"learning_rate": 0.0006370875995449374, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.8435, |
|
"grad_norm": 0.9878571629524231, |
|
"learning_rate": 0.0006362124792158922, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 9.0975, |
|
"grad_norm": 0.9288727045059204, |
|
"learning_rate": 0.0006353373588868469, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 9.219, |
|
"grad_norm": 0.9407894015312195, |
|
"learning_rate": 0.0006344622385578017, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 8.6555, |
|
"grad_norm": 0.9899985790252686, |
|
"learning_rate": 0.0006335871182287564, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 8.1403, |
|
"grad_norm": 0.8422369360923767, |
|
"learning_rate": 0.0006327119978997112, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 8.5879, |
|
"grad_norm": 1.1602038145065308, |
|
"learning_rate": 0.000631836877570666, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.8147, |
|
"grad_norm": 1.0149036645889282, |
|
"learning_rate": 0.0006309617572416207, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.6708, |
|
"grad_norm": 1.3015429973602295, |
|
"learning_rate": 0.0006300866369125755, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.213, |
|
"grad_norm": 1.0710703134536743, |
|
"learning_rate": 0.0006292115165835303, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.7651, |
|
"grad_norm": 0.9002228379249573, |
|
"learning_rate": 0.000628336396254485, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 9.2161, |
|
"grad_norm": 1.2090556621551514, |
|
"learning_rate": 0.0006274612759254398, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.4087, |
|
"grad_norm": 1.2179570198059082, |
|
"learning_rate": 0.0006265861555963945, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.5906, |
|
"grad_norm": 1.7626177072525024, |
|
"learning_rate": 0.0006257110352673493, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.7996, |
|
"grad_norm": 1.2657760381698608, |
|
"learning_rate": 0.0006248359149383041, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.7193, |
|
"grad_norm": 0.8737196326255798, |
|
"learning_rate": 0.0006239607946092588, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.5347, |
|
"grad_norm": 1.1074841022491455, |
|
"learning_rate": 0.0006230856742802136, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.8374, |
|
"grad_norm": 1.264391303062439, |
|
"learning_rate": 0.0006222105539511684, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 7.9866, |
|
"grad_norm": 1.0013505220413208, |
|
"learning_rate": 0.0006213354336221231, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.1635, |
|
"grad_norm": 1.0330276489257812, |
|
"learning_rate": 0.0006204603132930779, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.1751, |
|
"grad_norm": 1.125343918800354, |
|
"learning_rate": 0.0006195851929640326, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 9.082, |
|
"grad_norm": 1.0461503267288208, |
|
"learning_rate": 0.0006187100726349874, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.4013, |
|
"grad_norm": 1.2671931982040405, |
|
"learning_rate": 0.0006178349523059422, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.735, |
|
"grad_norm": 1.315640926361084, |
|
"learning_rate": 0.0006169598319768969, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.3872, |
|
"grad_norm": 1.0746458768844604, |
|
"learning_rate": 0.0006160847116478516, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.4791, |
|
"grad_norm": 0.9568318724632263, |
|
"learning_rate": 0.0006152095913188063, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.4284, |
|
"grad_norm": 1.0956138372421265, |
|
"learning_rate": 0.0006143344709897611, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 9.1513, |
|
"grad_norm": 1.2635217905044556, |
|
"learning_rate": 0.0006134593506607159, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 8.7084, |
|
"grad_norm": 1.242577075958252, |
|
"learning_rate": 0.0006125842303316706, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 8.9941, |
|
"grad_norm": 1.0156121253967285, |
|
"learning_rate": 0.0006117091100026254, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.731, |
|
"grad_norm": 1.3975499868392944, |
|
"learning_rate": 0.0006108339896735801, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.5287, |
|
"grad_norm": 1.0764504671096802, |
|
"learning_rate": 0.0006099588693445349, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.2368, |
|
"grad_norm": 1.0151234865188599, |
|
"learning_rate": 0.0006090837490154897, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 9.1091, |
|
"grad_norm": 6.751773834228516, |
|
"learning_rate": 0.0006082086286864444, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.7919, |
|
"grad_norm": 0.95284503698349, |
|
"learning_rate": 0.0006073335083573992, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.0937, |
|
"grad_norm": 1.131046175956726, |
|
"learning_rate": 0.000606458388028354, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.4255, |
|
"grad_norm": 0.8307482600212097, |
|
"learning_rate": 0.0006055832676993087, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.3428, |
|
"grad_norm": 1.1681163311004639, |
|
"learning_rate": 0.0006047081473702635, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 8.1699, |
|
"grad_norm": 1.6491031646728516, |
|
"learning_rate": 0.0006038330270412182, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 8.3981, |
|
"grad_norm": 0.9328737258911133, |
|
"learning_rate": 0.000602957906712173, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 8.5749, |
|
"grad_norm": 1.3434003591537476, |
|
"learning_rate": 0.0006020827863831278, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 8.6492, |
|
"grad_norm": 1.1651496887207031, |
|
"learning_rate": 0.0006012076660540825, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.9343, |
|
"grad_norm": 1.1224288940429688, |
|
"learning_rate": 0.0006003325457250373, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.4265, |
|
"grad_norm": 1.1075445413589478, |
|
"learning_rate": 0.0005994574253959919, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.3367, |
|
"grad_norm": 1.0349383354187012, |
|
"learning_rate": 0.0005985823050669467, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.6752, |
|
"grad_norm": 0.9915909767150879, |
|
"learning_rate": 0.0005977071847379015, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 8.2193, |
|
"grad_norm": 1.172624111175537, |
|
"learning_rate": 0.0005968320644088562, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 7.7701, |
|
"grad_norm": 1.0810112953186035, |
|
"learning_rate": 0.000595956944079811, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 8.9113, |
|
"grad_norm": 1.1411935091018677, |
|
"learning_rate": 0.0005950818237507656, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.3426, |
|
"grad_norm": 0.9251805543899536, |
|
"learning_rate": 0.0005942067034217204, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.1973, |
|
"grad_norm": 0.9023226499557495, |
|
"learning_rate": 0.0005933315830926752, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.8777, |
|
"grad_norm": 0.9467354416847229, |
|
"learning_rate": 0.0005924564627636299, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.758, |
|
"grad_norm": 0.9941525459289551, |
|
"learning_rate": 0.0005915813424345847, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.6786, |
|
"grad_norm": 0.7721539735794067, |
|
"learning_rate": 0.0005907062221055395, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.7063, |
|
"grad_norm": 0.9968111515045166, |
|
"learning_rate": 0.0005898311017764942, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.4121, |
|
"grad_norm": 0.8019425272941589, |
|
"learning_rate": 0.000588955981447449, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.8181, |
|
"grad_norm": 1.1664308309555054, |
|
"learning_rate": 0.0005880808611184037, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.1548, |
|
"grad_norm": 1.008786678314209, |
|
"learning_rate": 0.0005872057407893585, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.5725, |
|
"grad_norm": 1.2349562644958496, |
|
"learning_rate": 0.0005863306204603133, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.8339, |
|
"grad_norm": 1.2367397546768188, |
|
"learning_rate": 0.000585455500131268, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.3184, |
|
"grad_norm": 0.9427123665809631, |
|
"learning_rate": 0.0005845803798022228, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 8.2814, |
|
"grad_norm": 0.951808512210846, |
|
"learning_rate": 0.0005837052594731775, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 8.1453, |
|
"grad_norm": 1.076816439628601, |
|
"learning_rate": 0.0005828301391441323, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 8.5114, |
|
"grad_norm": 1.248741865158081, |
|
"learning_rate": 0.0005819550188150871, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 8.7265, |
|
"grad_norm": 1.0166980028152466, |
|
"learning_rate": 0.0005810798984860418, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 9.0454, |
|
"grad_norm": 1.273942232131958, |
|
"learning_rate": 0.0005802047781569966, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 8.6499, |
|
"grad_norm": 0.8551316857337952, |
|
"learning_rate": 0.0005793296578279514, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 8.0282, |
|
"grad_norm": 1.0231510400772095, |
|
"learning_rate": 0.0005784545374989061, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 8.5694, |
|
"grad_norm": 0.8138982653617859, |
|
"learning_rate": 0.0005775794171698609, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 8.9449, |
|
"grad_norm": 1.151458978652954, |
|
"learning_rate": 0.0005767042968408156, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 8.5309, |
|
"grad_norm": 1.311020851135254, |
|
"learning_rate": 0.0005758291765117704, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 8.3937, |
|
"grad_norm": 1.0431928634643555, |
|
"learning_rate": 0.0005749540561827252, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 8.0121, |
|
"grad_norm": 0.9487342238426208, |
|
"learning_rate": 0.0005740789358536799, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 8.9756, |
|
"grad_norm": 0.7705584764480591, |
|
"learning_rate": 0.0005732038155246346, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 8.9679, |
|
"grad_norm": 0.9359903335571289, |
|
"learning_rate": 0.0005723286951955893, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 8.0724, |
|
"grad_norm": 1.031725525856018, |
|
"learning_rate": 0.0005714535748665441, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 8.7014, |
|
"grad_norm": 1.0501611232757568, |
|
"learning_rate": 0.0005705784545374989, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 8.2284, |
|
"grad_norm": 0.8158836960792542, |
|
"learning_rate": 0.0005697033342084536, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 8.8206, |
|
"grad_norm": 0.8827638030052185, |
|
"learning_rate": 0.0005688282138794084, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 8.4189, |
|
"grad_norm": 0.9118880033493042, |
|
"learning_rate": 0.0005679530935503632, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 8.5532, |
|
"grad_norm": 1.2081084251403809, |
|
"learning_rate": 0.0005670779732213179, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 8.5477, |
|
"grad_norm": 1.3465925455093384, |
|
"learning_rate": 0.0005662028528922727, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 9.2068, |
|
"grad_norm": 0.8770077228546143, |
|
"learning_rate": 0.0005653277325632274, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 8.6147, |
|
"grad_norm": 1.1257092952728271, |
|
"learning_rate": 0.0005644526122341822, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 8.4279, |
|
"grad_norm": 1.0482877492904663, |
|
"learning_rate": 0.000563577491905137, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 9.1236, |
|
"grad_norm": 1.0635833740234375, |
|
"learning_rate": 0.0005627023715760917, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 8.7325, |
|
"grad_norm": 0.866674542427063, |
|
"learning_rate": 0.0005618272512470465, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 8.3691, |
|
"grad_norm": 0.9562137126922607, |
|
"learning_rate": 0.0005609521309180012, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 8.3844, |
|
"grad_norm": 1.2593939304351807, |
|
"learning_rate": 0.000560077010588956, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 8.7797, |
|
"grad_norm": 0.8865370154380798, |
|
"learning_rate": 0.0005592018902599108, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 8.7078, |
|
"grad_norm": 1.0417253971099854, |
|
"learning_rate": 0.0005583267699308655, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 8.6024, |
|
"grad_norm": 1.1513303518295288, |
|
"learning_rate": 0.0005574516496018203, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 8.4373, |
|
"grad_norm": 0.8727751970291138, |
|
"learning_rate": 0.000556576529272775, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 8.2888, |
|
"grad_norm": 1.0075277090072632, |
|
"learning_rate": 0.0005557014089437298, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 8.465, |
|
"grad_norm": 0.9511576294898987, |
|
"learning_rate": 0.0005548262886146846, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.7129, |
|
"grad_norm": 0.9443394541740417, |
|
"learning_rate": 0.0005539511682856393, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 8.4521, |
|
"grad_norm": 0.9932364225387573, |
|
"learning_rate": 0.0005530760479565941, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 8.2593, |
|
"grad_norm": 0.8069454431533813, |
|
"learning_rate": 0.0005522009276275489, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 8.4721, |
|
"grad_norm": 1.1227058172225952, |
|
"learning_rate": 0.0005513258072985035, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 8.9954, |
|
"grad_norm": 0.8359375596046448, |
|
"learning_rate": 0.0005504506869694583, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 8.6039, |
|
"grad_norm": 1.1721514463424683, |
|
"learning_rate": 0.000549575566640413, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 7.8393, |
|
"grad_norm": 1.031473994255066, |
|
"learning_rate": 0.0005487004463113678, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 7.8643, |
|
"grad_norm": 0.935614287853241, |
|
"learning_rate": 0.0005478253259823226, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.4271, |
|
"grad_norm": 0.9366902709007263, |
|
"learning_rate": 0.0005469502056532773, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.3338, |
|
"grad_norm": 0.9616496562957764, |
|
"learning_rate": 0.0005460750853242321, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.1388, |
|
"grad_norm": 2.2672061920166016, |
|
"learning_rate": 0.0005451999649951868, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.879, |
|
"grad_norm": 1.948036789894104, |
|
"learning_rate": 0.0005443248446661416, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 8.8816, |
|
"grad_norm": 1.0832654237747192, |
|
"learning_rate": 0.0005434497243370964, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 8.5489, |
|
"grad_norm": 0.9174715876579285, |
|
"learning_rate": 0.0005425746040080511, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 8.8525, |
|
"grad_norm": 0.8547096252441406, |
|
"learning_rate": 0.0005416994836790059, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 8.6111, |
|
"grad_norm": 0.7524705529212952, |
|
"learning_rate": 0.0005408243633499607, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 8.0862, |
|
"grad_norm": 0.8433651328086853, |
|
"learning_rate": 0.0005399492430209154, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 8.2379, |
|
"grad_norm": 0.8744563460350037, |
|
"learning_rate": 0.0005390741226918702, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 8.2883, |
|
"grad_norm": 0.8806482553482056, |
|
"learning_rate": 0.0005381990023628249, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 8.6411, |
|
"grad_norm": 0.9276745319366455, |
|
"learning_rate": 0.0005373238820337797, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 8.7561, |
|
"grad_norm": 0.9556492567062378, |
|
"learning_rate": 0.0005364487617047345, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 9.305, |
|
"grad_norm": 0.8606293797492981, |
|
"learning_rate": 0.0005355736413756892, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 8.3839, |
|
"grad_norm": 1.108547329902649, |
|
"learning_rate": 0.000534698521046644, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 8.2164, |
|
"grad_norm": 0.9102107882499695, |
|
"learning_rate": 0.0005338234007175988, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 8.606, |
|
"grad_norm": 1.0984998941421509, |
|
"learning_rate": 0.0005329482803885535, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 8.0491, |
|
"grad_norm": 1.1762152910232544, |
|
"learning_rate": 0.0005320731600595083, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 8.7257, |
|
"grad_norm": 0.9669533371925354, |
|
"learning_rate": 0.000531198039730463, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 8.4473, |
|
"grad_norm": 1.0668437480926514, |
|
"learning_rate": 0.0005303229194014178, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 8.1594, |
|
"grad_norm": 0.8289794921875, |
|
"learning_rate": 0.0005294477990723725, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 8.9208, |
|
"grad_norm": 1.0676897764205933, |
|
"learning_rate": 0.0005285726787433272, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 8.0344, |
|
"grad_norm": 0.9914399981498718, |
|
"learning_rate": 0.000527697558414282, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 7.9721, |
|
"grad_norm": 0.7524304986000061, |
|
"learning_rate": 0.0005268224380852367, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 8.5322, |
|
"grad_norm": 0.9521943926811218, |
|
"learning_rate": 0.0005259473177561915, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 8.1986, |
|
"grad_norm": 0.9657976627349854, |
|
"learning_rate": 0.0005250721974271463, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 8.476, |
|
"grad_norm": 0.9338609576225281, |
|
"learning_rate": 0.000524197077098101, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 8.0189, |
|
"grad_norm": 0.8801831007003784, |
|
"learning_rate": 0.0005233219567690558, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 8.0839, |
|
"grad_norm": 0.8173283934593201, |
|
"learning_rate": 0.0005224468364400105, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 8.3716, |
|
"grad_norm": 0.8624017238616943, |
|
"learning_rate": 0.0005215717161109653, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 8.2837, |
|
"grad_norm": 0.8650451302528381, |
|
"learning_rate": 0.0005206965957819201, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 7.889, |
|
"grad_norm": 0.8268963098526001, |
|
"learning_rate": 0.0005198214754528747, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 8.7807, |
|
"grad_norm": 0.9244619607925415, |
|
"learning_rate": 0.0005189463551238295, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 8.5503, |
|
"grad_norm": 0.8533423542976379, |
|
"learning_rate": 0.0005180712347947842, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.7895, |
|
"grad_norm": 0.885784924030304, |
|
"learning_rate": 0.000517196114465739, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 8.9325, |
|
"grad_norm": 1.252569556236267, |
|
"learning_rate": 0.0005163209941366938, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.6823, |
|
"grad_norm": 0.9340423941612244, |
|
"learning_rate": 0.0005154458738076485, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 8.5812, |
|
"grad_norm": 1.1366244554519653, |
|
"learning_rate": 0.0005145707534786033, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 8.1907, |
|
"grad_norm": 0.6764490604400635, |
|
"learning_rate": 0.0005136956331495581, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 8.7694, |
|
"grad_norm": 0.7598670721054077, |
|
"learning_rate": 0.0005128205128205128, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 8.4732, |
|
"grad_norm": 1.1497093439102173, |
|
"learning_rate": 0.0005119453924914676, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 7.9224, |
|
"grad_norm": 0.8351478576660156, |
|
"learning_rate": 0.0005110702721624223, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 8.253, |
|
"grad_norm": 0.8981735706329346, |
|
"learning_rate": 0.0005101951518333771, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 8.442, |
|
"grad_norm": 0.910393238067627, |
|
"learning_rate": 0.0005093200315043319, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 8.4128, |
|
"grad_norm": 1.0419617891311646, |
|
"learning_rate": 0.0005084449111752865, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 8.5377, |
|
"grad_norm": 1.1774574518203735, |
|
"learning_rate": 0.0005075697908462413, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 8.1727, |
|
"grad_norm": 0.8679039478302002, |
|
"learning_rate": 0.000506694670517196, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 8.2085, |
|
"grad_norm": 0.8273195028305054, |
|
"learning_rate": 0.0005058195501881508, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 9.0157, |
|
"grad_norm": 1.0897700786590576, |
|
"learning_rate": 0.0005049444298591056, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 8.5794, |
|
"grad_norm": 1.19176185131073, |
|
"learning_rate": 0.0005040693095300603, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 8.4796, |
|
"grad_norm": 0.7944311499595642, |
|
"learning_rate": 0.0005031941892010151, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 8.2379, |
|
"grad_norm": 1.1032432317733765, |
|
"learning_rate": 0.0005023190688719699, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 7.8506, |
|
"grad_norm": 0.9756267070770264, |
|
"learning_rate": 0.0005014439485429246, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 8.4113, |
|
"grad_norm": 0.8557083010673523, |
|
"learning_rate": 0.0005005688282138794, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 8.3315, |
|
"grad_norm": 0.9195913672447205, |
|
"learning_rate": 0.0004996937078848341, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 8.3911, |
|
"grad_norm": 0.7430265545845032, |
|
"learning_rate": 0.0004988185875557889, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 8.3471, |
|
"grad_norm": 0.7685049176216125, |
|
"learning_rate": 0.0004979434672267437, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 8.252, |
|
"grad_norm": 0.9667441844940186, |
|
"learning_rate": 0.0004970683468976984, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 7.9134, |
|
"grad_norm": 0.878400981426239, |
|
"learning_rate": 0.0004961932265686532, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 8.337, |
|
"grad_norm": 0.8655962944030762, |
|
"learning_rate": 0.000495318106239608, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 8.2066, |
|
"grad_norm": 0.8063825964927673, |
|
"learning_rate": 0.0004944429859105627, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.4102, |
|
"grad_norm": 0.7918370962142944, |
|
"learning_rate": 0.0004935678655815175, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.1297, |
|
"grad_norm": 1.03073251247406, |
|
"learning_rate": 0.0004926927452524722, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.296, |
|
"grad_norm": 0.9369198679924011, |
|
"learning_rate": 0.000491817624923427, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.8051, |
|
"grad_norm": 0.9166183471679688, |
|
"learning_rate": 0.0004909425045943818, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 8.0258, |
|
"grad_norm": 0.8817450404167175, |
|
"learning_rate": 0.0004900673842653365, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.9202, |
|
"grad_norm": 1.0320311784744263, |
|
"learning_rate": 0.0004891922639362913, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 8.6314, |
|
"grad_norm": 0.9652658700942993, |
|
"learning_rate": 0.000488317143607246, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 8.5648, |
|
"grad_norm": 1.0785067081451416, |
|
"learning_rate": 0.00048744202327820075, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.528, |
|
"grad_norm": 1.0575002431869507, |
|
"learning_rate": 0.0004865669029491555, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.9019, |
|
"grad_norm": 0.8822360634803772, |
|
"learning_rate": 0.0004856917826201103, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 8.2544, |
|
"grad_norm": 0.7296998500823975, |
|
"learning_rate": 0.00048481666229106504, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.5853, |
|
"grad_norm": 0.925472617149353, |
|
"learning_rate": 0.0004839415419620198, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.3512, |
|
"grad_norm": 0.8641199469566345, |
|
"learning_rate": 0.00048306642163297456, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.0277, |
|
"grad_norm": 1.0501607656478882, |
|
"learning_rate": 0.0004821913013039293, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.0559, |
|
"grad_norm": 0.7827814221382141, |
|
"learning_rate": 0.00048131618097488403, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 8.0869, |
|
"grad_norm": 0.929253339767456, |
|
"learning_rate": 0.0004804410606458388, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 8.2206, |
|
"grad_norm": 0.9882745742797852, |
|
"learning_rate": 0.00047956594031679355, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 8.8141, |
|
"grad_norm": 0.874455988407135, |
|
"learning_rate": 0.0004786908199877483, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 8.403, |
|
"grad_norm": 1.1270105838775635, |
|
"learning_rate": 0.0004778156996587031, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 8.7545, |
|
"grad_norm": 0.7236598134040833, |
|
"learning_rate": 0.00047694057932965784, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 8.3653, |
|
"grad_norm": 0.8243849873542786, |
|
"learning_rate": 0.0004760654590006126, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 8.0057, |
|
"grad_norm": 0.9829972386360168, |
|
"learning_rate": 0.00047519033867156736, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 7.7738, |
|
"grad_norm": 1.1444923877716064, |
|
"learning_rate": 0.0004743152183425221, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 7.9619, |
|
"grad_norm": 1.1846139430999756, |
|
"learning_rate": 0.0004734400980134769, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 8.8667, |
|
"grad_norm": 0.9437428712844849, |
|
"learning_rate": 0.00047256497768443165, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 8.2367, |
|
"grad_norm": 0.8670662641525269, |
|
"learning_rate": 0.0004716898573553864, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.5306, |
|
"grad_norm": 0.823538064956665, |
|
"learning_rate": 0.00047081473702634117, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 8.0832, |
|
"grad_norm": 0.8938249349594116, |
|
"learning_rate": 0.0004699396166972959, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.7995, |
|
"grad_norm": 0.8147523999214172, |
|
"learning_rate": 0.00046906449636825064, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 8.2207, |
|
"grad_norm": 0.7885489463806152, |
|
"learning_rate": 0.0004681893760392054, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 8.3315, |
|
"grad_norm": 0.9256998300552368, |
|
"learning_rate": 0.00046731425571016016, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.8139, |
|
"grad_norm": 0.7331977486610413, |
|
"learning_rate": 0.0004664391353811149, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 8.2015, |
|
"grad_norm": 0.7677296996116638, |
|
"learning_rate": 0.0004655640150520697, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 8.114, |
|
"grad_norm": 1.066036343574524, |
|
"learning_rate": 0.00046468889472302445, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 8.3314, |
|
"grad_norm": 0.7969563603401184, |
|
"learning_rate": 0.0004638137743939792, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 8.4266, |
|
"grad_norm": 0.8454012274742126, |
|
"learning_rate": 0.000462938654064934, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 8.0451, |
|
"grad_norm": 1.049949288368225, |
|
"learning_rate": 0.00046206353373588874, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.8993, |
|
"grad_norm": 0.8960159420967102, |
|
"learning_rate": 0.0004611884134068435, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 8.4117, |
|
"grad_norm": 1.0029221773147583, |
|
"learning_rate": 0.00046031329307779826, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.9899, |
|
"grad_norm": 1.0616450309753418, |
|
"learning_rate": 0.0004594381727487529, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.9134, |
|
"grad_norm": 0.8082576990127563, |
|
"learning_rate": 0.0004585630524197077, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 8.1685, |
|
"grad_norm": 0.9529896974563599, |
|
"learning_rate": 0.00045768793209066244, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 8.7919, |
|
"grad_norm": 0.7967125773429871, |
|
"learning_rate": 0.0004568128117616172, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 8.4375, |
|
"grad_norm": 0.8775154948234558, |
|
"learning_rate": 0.00045593769143257196, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 8.559, |
|
"grad_norm": 0.782707929611206, |
|
"learning_rate": 0.0004550625711035267, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 8.4288, |
|
"grad_norm": 0.7907795310020447, |
|
"learning_rate": 0.0004541874507744815, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 8.5237, |
|
"grad_norm": 1.0685423612594604, |
|
"learning_rate": 0.00045331233044543625, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 8.4464, |
|
"grad_norm": 1.1534669399261475, |
|
"learning_rate": 0.000452437210116391, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.8577, |
|
"grad_norm": 0.7411785125732422, |
|
"learning_rate": 0.00045156208978734577, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 7.8189, |
|
"grad_norm": 0.87079256772995, |
|
"learning_rate": 0.00045068696945830053, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 8.1193, |
|
"grad_norm": 0.9850463271141052, |
|
"learning_rate": 0.0004498118491292553, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 7.9457, |
|
"grad_norm": 0.8739660978317261, |
|
"learning_rate": 0.00044893672880021, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 7.728, |
|
"grad_norm": 0.8551336526870728, |
|
"learning_rate": 0.00044806160847116476, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 8.8456, |
|
"grad_norm": 0.8609566688537598, |
|
"learning_rate": 0.0004471864881421195, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 8.0812, |
|
"grad_norm": 0.7449157238006592, |
|
"learning_rate": 0.0004463113678130743, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 8.0729, |
|
"grad_norm": 0.8253002762794495, |
|
"learning_rate": 0.00044543624748402905, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 8.4942, |
|
"grad_norm": 0.8349846601486206, |
|
"learning_rate": 0.0004445611271549838, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 8.3446, |
|
"grad_norm": 0.9881287813186646, |
|
"learning_rate": 0.00044368600682593857, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.3313, |
|
"grad_norm": 0.863059401512146, |
|
"learning_rate": 0.00044281088649689333, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 8.4412, |
|
"grad_norm": 0.9246751666069031, |
|
"learning_rate": 0.0004419357661678481, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 8.4511, |
|
"grad_norm": 0.7963143587112427, |
|
"learning_rate": 0.00044106064583880286, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.8743, |
|
"grad_norm": 1.0088573694229126, |
|
"learning_rate": 0.0004401855255097576, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 8.0994, |
|
"grad_norm": 0.7680083513259888, |
|
"learning_rate": 0.0004393104051807124, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.8712, |
|
"grad_norm": 0.8324389457702637, |
|
"learning_rate": 0.00043843528485166714, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.8454, |
|
"grad_norm": 0.9649554491043091, |
|
"learning_rate": 0.00043756016452262185, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.925, |
|
"grad_norm": 0.7881239652633667, |
|
"learning_rate": 0.0004366850441935766, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.9826, |
|
"grad_norm": 1.2129865884780884, |
|
"learning_rate": 0.0004358099238645314, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 8.3911, |
|
"grad_norm": 0.7000688910484314, |
|
"learning_rate": 0.00043493480353548614, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.9635, |
|
"grad_norm": 0.7449495196342468, |
|
"learning_rate": 0.0004340596832064409, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.8492, |
|
"grad_norm": 0.7399414777755737, |
|
"learning_rate": 0.00043318456287739566, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 8.5288, |
|
"grad_norm": 1.0965951681137085, |
|
"learning_rate": 0.0004323094425483504, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 8.0104, |
|
"grad_norm": 0.8990981578826904, |
|
"learning_rate": 0.0004314343222193052, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.8636, |
|
"grad_norm": 0.8695485591888428, |
|
"learning_rate": 0.00043055920189025994, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.9194, |
|
"grad_norm": 0.7813265919685364, |
|
"learning_rate": 0.0004296840815612147, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 8.4535, |
|
"grad_norm": 0.7645956873893738, |
|
"learning_rate": 0.00042880896123216947, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.8434, |
|
"grad_norm": 1.0397326946258545, |
|
"learning_rate": 0.00042793384090312423, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.8072, |
|
"grad_norm": 0.9630481004714966, |
|
"learning_rate": 0.00042705872057407894, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 8.327, |
|
"grad_norm": 0.7939698696136475, |
|
"learning_rate": 0.0004261836002450337, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 8.2467, |
|
"grad_norm": 1.0103453397750854, |
|
"learning_rate": 0.00042530847991598846, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.63, |
|
"grad_norm": 0.9281976819038391, |
|
"learning_rate": 0.0004244333595869432, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 7.7603, |
|
"grad_norm": 0.7895064949989319, |
|
"learning_rate": 0.000423558239257898, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 7.6725, |
|
"grad_norm": 0.7491249442100525, |
|
"learning_rate": 0.00042268311892885275, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 8.0813, |
|
"grad_norm": 0.7357456088066101, |
|
"learning_rate": 0.0004218079985998075, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 8.1603, |
|
"grad_norm": 0.8232001066207886, |
|
"learning_rate": 0.00042093287827076227, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 8.172, |
|
"grad_norm": 0.7846309542655945, |
|
"learning_rate": 0.00042005775794171703, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 8.2372, |
|
"grad_norm": 0.9100042581558228, |
|
"learning_rate": 0.00041918263761267174, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 7.8489, |
|
"grad_norm": 0.9496660828590393, |
|
"learning_rate": 0.0004183075172836265, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.7246, |
|
"grad_norm": 0.7061757445335388, |
|
"learning_rate": 0.00041743239695458126, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.988, |
|
"grad_norm": 0.9927607774734497, |
|
"learning_rate": 0.00041655727662553597, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.9562, |
|
"grad_norm": 0.8585007190704346, |
|
"learning_rate": 0.00041568215629649073, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 8.1105, |
|
"grad_norm": 1.0176628828048706, |
|
"learning_rate": 0.0004148070359674455, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.7869, |
|
"grad_norm": 0.8576889038085938, |
|
"learning_rate": 0.00041393191563840026, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.7945, |
|
"grad_norm": 0.8359828591346741, |
|
"learning_rate": 0.000413056795309355, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.9683, |
|
"grad_norm": 0.8636084794998169, |
|
"learning_rate": 0.0004121816749803098, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 8.3303, |
|
"grad_norm": 0.9006314873695374, |
|
"learning_rate": 0.00041130655465126454, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 8.1457, |
|
"grad_norm": 1.217007040977478, |
|
"learning_rate": 0.0004104314343222193, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 8.6171, |
|
"grad_norm": 1.0577572584152222, |
|
"learning_rate": 0.00040955631399317407, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.9349, |
|
"grad_norm": 0.9530831575393677, |
|
"learning_rate": 0.00040868119366412883, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 8.2722, |
|
"grad_norm": 0.9652631282806396, |
|
"learning_rate": 0.0004078060733350836, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 8.185, |
|
"grad_norm": 0.7349383234977722, |
|
"learning_rate": 0.00040693095300603835, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.3944, |
|
"grad_norm": 1.122018814086914, |
|
"learning_rate": 0.0004060558326769931, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.8828, |
|
"grad_norm": 0.96207195520401, |
|
"learning_rate": 0.0004051807123479478, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 8.1287, |
|
"grad_norm": 0.833884060382843, |
|
"learning_rate": 0.0004043055920189026, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 8.0382, |
|
"grad_norm": 0.9089711904525757, |
|
"learning_rate": 0.00040343047168985734, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 8.1137, |
|
"grad_norm": 0.6977031230926514, |
|
"learning_rate": 0.0004025553513608121, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 7.9215, |
|
"grad_norm": 0.9814949631690979, |
|
"learning_rate": 0.00040168023103176687, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 8.2266, |
|
"grad_norm": 0.9767114520072937, |
|
"learning_rate": 0.00040080511070272163, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 8.3445, |
|
"grad_norm": 1.1093454360961914, |
|
"learning_rate": 0.0003999299903736764, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 8.4239, |
|
"grad_norm": 0.93362957239151, |
|
"learning_rate": 0.00039905487004463115, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 8.2468, |
|
"grad_norm": 0.9497604370117188, |
|
"learning_rate": 0.0003981797497155859, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.8793, |
|
"grad_norm": 0.8992236852645874, |
|
"learning_rate": 0.0003973046293865407, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.8246, |
|
"grad_norm": 0.9486469030380249, |
|
"learning_rate": 0.00039642950905749544, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 8.6243, |
|
"grad_norm": 0.970136284828186, |
|
"learning_rate": 0.0003955543887284502, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.8859, |
|
"grad_norm": 1.0090283155441284, |
|
"learning_rate": 0.0003946792683994049, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 8.156, |
|
"grad_norm": 0.9662021994590759, |
|
"learning_rate": 0.00039380414807035967, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.7991, |
|
"grad_norm": 0.8005274534225464, |
|
"learning_rate": 0.00039292902774131443, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.8432, |
|
"grad_norm": 0.8537503480911255, |
|
"learning_rate": 0.0003920539074122692, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.7118, |
|
"grad_norm": 0.8975428342819214, |
|
"learning_rate": 0.00039117878708322396, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 8.0563, |
|
"grad_norm": 0.9040714502334595, |
|
"learning_rate": 0.0003903036667541787, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 8.005, |
|
"grad_norm": 0.882514476776123, |
|
"learning_rate": 0.0003894285464251335, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.99, |
|
"grad_norm": 0.9527498483657837, |
|
"learning_rate": 0.00038855342609608824, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.9497, |
|
"grad_norm": 0.7327905893325806, |
|
"learning_rate": 0.000387678305767043, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 8.1346, |
|
"grad_norm": 0.9137473106384277, |
|
"learning_rate": 0.00038680318543799776, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 7.266, |
|
"grad_norm": 0.8273423910140991, |
|
"learning_rate": 0.0003859280651089525, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 7.525, |
|
"grad_norm": 1.2288787364959717, |
|
"learning_rate": 0.0003850529447799073, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 8.5105, |
|
"grad_norm": 0.7940724492073059, |
|
"learning_rate": 0.00038417782445086205, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 8.0599, |
|
"grad_norm": 0.9253759384155273, |
|
"learning_rate": 0.00038330270412181676, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.1757, |
|
"grad_norm": 0.8145419359207153, |
|
"learning_rate": 0.0003824275837927715, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.6177, |
|
"grad_norm": 1.1738182306289673, |
|
"learning_rate": 0.0003815524634637263, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.6901, |
|
"grad_norm": 0.9141517877578735, |
|
"learning_rate": 0.00038067734313468104, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.7036, |
|
"grad_norm": 1.0994611978530884, |
|
"learning_rate": 0.00037980222280563575, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.9458, |
|
"grad_norm": 0.8445936441421509, |
|
"learning_rate": 0.0003789271024765905, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.6019, |
|
"grad_norm": 0.8796238899230957, |
|
"learning_rate": 0.0003780519821475453, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.7582, |
|
"grad_norm": 0.7801417112350464, |
|
"learning_rate": 0.00037717686181850004, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.8483, |
|
"grad_norm": 1.008893609046936, |
|
"learning_rate": 0.0003763017414894548, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 8.047, |
|
"grad_norm": 0.8021620512008667, |
|
"learning_rate": 0.00037542662116040956, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 8.2537, |
|
"grad_norm": 0.919774055480957, |
|
"learning_rate": 0.0003745515008313643, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 8.1101, |
|
"grad_norm": 1.094642996788025, |
|
"learning_rate": 0.0003736763805023191, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.9119, |
|
"grad_norm": 1.0133185386657715, |
|
"learning_rate": 0.0003728012601732738, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.9624, |
|
"grad_norm": 0.7546307444572449, |
|
"learning_rate": 0.00037192613984422855, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.9547, |
|
"grad_norm": 0.7390889525413513, |
|
"learning_rate": 0.0003710510195151833, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.7794, |
|
"grad_norm": 0.9140797257423401, |
|
"learning_rate": 0.0003701758991861381, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 8.0254, |
|
"grad_norm": 0.8325345516204834, |
|
"learning_rate": 0.00036930077885709284, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.7692, |
|
"grad_norm": 1.228366732597351, |
|
"learning_rate": 0.0003684256585280476, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.2768, |
|
"grad_norm": 1.0541235208511353, |
|
"learning_rate": 0.00036755053819900236, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 8.1104, |
|
"grad_norm": 1.0765891075134277, |
|
"learning_rate": 0.0003666754178699571, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.5317, |
|
"grad_norm": 0.9508135914802551, |
|
"learning_rate": 0.0003658002975409119, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.1908, |
|
"grad_norm": 0.7984021306037903, |
|
"learning_rate": 0.00036492517721186665, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.8423, |
|
"grad_norm": 1.0381263494491577, |
|
"learning_rate": 0.0003640500568828214, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 8.297, |
|
"grad_norm": 0.9509484171867371, |
|
"learning_rate": 0.00036317493655377617, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.7339, |
|
"grad_norm": 0.8926167488098145, |
|
"learning_rate": 0.0003622998162247309, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.9, |
|
"grad_norm": 1.0550678968429565, |
|
"learning_rate": 0.00036142469589568564, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.6175, |
|
"grad_norm": 0.9359092712402344, |
|
"learning_rate": 0.0003605495755666404, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 8.0818, |
|
"grad_norm": 0.735281765460968, |
|
"learning_rate": 0.00035967445523759516, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 8.1061, |
|
"grad_norm": 0.8289329409599304, |
|
"learning_rate": 0.0003587993349085499, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.3778, |
|
"grad_norm": 0.7723102569580078, |
|
"learning_rate": 0.0003579242145795047, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.853, |
|
"grad_norm": 0.7856701612472534, |
|
"learning_rate": 0.00035704909425045945, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 8.5133, |
|
"grad_norm": 0.7649736404418945, |
|
"learning_rate": 0.0003561739739214142, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 8.4676, |
|
"grad_norm": 0.6755172610282898, |
|
"learning_rate": 0.000355298853592369, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 8.2074, |
|
"grad_norm": 0.8537729382514954, |
|
"learning_rate": 0.00035442373326332374, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 8.249, |
|
"grad_norm": 0.9827852845191956, |
|
"learning_rate": 0.0003535486129342785, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 8.4107, |
|
"grad_norm": 1.2670233249664307, |
|
"learning_rate": 0.00035267349260523326, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 8.1578, |
|
"grad_norm": 0.8494543433189392, |
|
"learning_rate": 0.000351798372276188, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.9296, |
|
"grad_norm": 0.8582159876823425, |
|
"learning_rate": 0.00035092325194714273, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.4592, |
|
"grad_norm": 0.8539626598358154, |
|
"learning_rate": 0.0003500481316180975, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 8.1603, |
|
"grad_norm": 0.9004923701286316, |
|
"learning_rate": 0.00034917301128905225, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 8.1319, |
|
"grad_norm": 0.722870945930481, |
|
"learning_rate": 0.000348297890960007, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.791, |
|
"grad_norm": 0.9422692656517029, |
|
"learning_rate": 0.0003474227706309618, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 8.0631, |
|
"grad_norm": 1.2248715162277222, |
|
"learning_rate": 0.00034654765030191654, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 8.3269, |
|
"grad_norm": 1.370082974433899, |
|
"learning_rate": 0.0003456725299728713, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.7562, |
|
"grad_norm": 1.0009835958480835, |
|
"learning_rate": 0.00034479740964382606, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.4909, |
|
"grad_norm": 0.9207608103752136, |
|
"learning_rate": 0.0003439222893147808, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.2907, |
|
"grad_norm": 1.0351985692977905, |
|
"learning_rate": 0.0003430471689857356, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.9972, |
|
"grad_norm": 0.9398946762084961, |
|
"learning_rate": 0.00034217204865669035, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.6034, |
|
"grad_norm": 0.8558303713798523, |
|
"learning_rate": 0.0003412969283276451, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 8.3452, |
|
"grad_norm": 0.8279830813407898, |
|
"learning_rate": 0.0003404218079985998, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 8.3979, |
|
"grad_norm": 0.7496762275695801, |
|
"learning_rate": 0.0003395466876695545, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.5979, |
|
"grad_norm": 0.865039587020874, |
|
"learning_rate": 0.0003386715673405093, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.7027, |
|
"grad_norm": 0.7518277764320374, |
|
"learning_rate": 0.00033779644701146405, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.8756, |
|
"grad_norm": 0.8984577059745789, |
|
"learning_rate": 0.0003369213266824188, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.4597, |
|
"grad_norm": 0.7312489151954651, |
|
"learning_rate": 0.00033604620635337357, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.8173, |
|
"grad_norm": 0.8688482046127319, |
|
"learning_rate": 0.00033517108602432833, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.6772, |
|
"grad_norm": 0.9117947816848755, |
|
"learning_rate": 0.0003342959656952831, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 7.65, |
|
"grad_norm": 1.044518232345581, |
|
"learning_rate": 0.00033342084536623786, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 7.6424, |
|
"grad_norm": 0.8763852119445801, |
|
"learning_rate": 0.0003325457250371926, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 8.1303, |
|
"grad_norm": 1.2922908067703247, |
|
"learning_rate": 0.0003316706047081474, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 8.3256, |
|
"grad_norm": 0.7980864644050598, |
|
"learning_rate": 0.00033079548437910214, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.7353, |
|
"grad_norm": 0.8062283396720886, |
|
"learning_rate": 0.00032992036405005685, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 8.2314, |
|
"grad_norm": 0.9204174280166626, |
|
"learning_rate": 0.0003290452437210116, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.5946, |
|
"grad_norm": 0.7235244512557983, |
|
"learning_rate": 0.0003281701233919664, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.4673, |
|
"grad_norm": 0.8126214146614075, |
|
"learning_rate": 0.00032729500306292114, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.6391, |
|
"grad_norm": 0.7648585438728333, |
|
"learning_rate": 0.0003264198827338759, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 8.005, |
|
"grad_norm": 0.7453392148017883, |
|
"learning_rate": 0.00032554476240483066, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.8703, |
|
"grad_norm": 0.8830775022506714, |
|
"learning_rate": 0.0003246696420757854, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.8639, |
|
"grad_norm": 1.2337687015533447, |
|
"learning_rate": 0.0003237945217467402, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.8224, |
|
"grad_norm": 1.0393247604370117, |
|
"learning_rate": 0.00032291940141769494, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.7573, |
|
"grad_norm": 0.7463309168815613, |
|
"learning_rate": 0.0003220442810886497, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 8.3318, |
|
"grad_norm": 0.8722276091575623, |
|
"learning_rate": 0.00032116916075960447, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 8.0517, |
|
"grad_norm": 0.9069348573684692, |
|
"learning_rate": 0.00032029404043055923, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.9696, |
|
"grad_norm": 0.7715663909912109, |
|
"learning_rate": 0.000319418920101514, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.7113, |
|
"grad_norm": 0.8788508176803589, |
|
"learning_rate": 0.0003185437997724687, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.5771, |
|
"grad_norm": 1.057786226272583, |
|
"learning_rate": 0.00031766867944342346, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.985, |
|
"grad_norm": 1.2888935804367065, |
|
"learning_rate": 0.0003167935591143782, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.5748, |
|
"grad_norm": 0.8100298047065735, |
|
"learning_rate": 0.000315918438785333, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.7785, |
|
"grad_norm": 0.9130757451057434, |
|
"learning_rate": 0.00031504331845628775, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.3718, |
|
"grad_norm": 0.895447313785553, |
|
"learning_rate": 0.0003141681981272425, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 8.0138, |
|
"grad_norm": 0.8260514736175537, |
|
"learning_rate": 0.00031329307779819727, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.6438, |
|
"grad_norm": 0.9353188276290894, |
|
"learning_rate": 0.00031241795746915203, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.9212, |
|
"grad_norm": 0.8095923066139221, |
|
"learning_rate": 0.0003115428371401068, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 8.2193, |
|
"grad_norm": 0.8156134486198425, |
|
"learning_rate": 0.00031066771681106156, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.6264, |
|
"grad_norm": 0.9613614082336426, |
|
"learning_rate": 0.0003097925964820163, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.6684, |
|
"grad_norm": 0.8426281809806824, |
|
"learning_rate": 0.0003089174761529711, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.7356, |
|
"grad_norm": 0.8271446824073792, |
|
"learning_rate": 0.0003080423558239258, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.8816, |
|
"grad_norm": 0.9108027219772339, |
|
"learning_rate": 0.00030716723549488055, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 8.5754, |
|
"grad_norm": 0.8285607099533081, |
|
"learning_rate": 0.0003062921151658353, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.8875, |
|
"grad_norm": 0.79032963514328, |
|
"learning_rate": 0.00030541699483679007, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.4168, |
|
"grad_norm": 0.8623600602149963, |
|
"learning_rate": 0.00030454187450774483, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.546, |
|
"grad_norm": 0.8102550506591797, |
|
"learning_rate": 0.0003036667541786996, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.9269, |
|
"grad_norm": 1.0298386812210083, |
|
"learning_rate": 0.00030279163384965436, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.6682, |
|
"grad_norm": 0.8902001976966858, |
|
"learning_rate": 0.0003019165135206091, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 8.0309, |
|
"grad_norm": 0.831743597984314, |
|
"learning_rate": 0.0003010413931915639, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 8.157, |
|
"grad_norm": 0.8056457042694092, |
|
"learning_rate": 0.00030016627286251864, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 7.6514, |
|
"grad_norm": 1.071753978729248, |
|
"learning_rate": 0.00029929115253347335, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 7.8337, |
|
"grad_norm": 0.8061104416847229, |
|
"learning_rate": 0.0002984160322044281, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.8925, |
|
"grad_norm": 1.1958301067352295, |
|
"learning_rate": 0.0002975409118753828, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 6.9557, |
|
"grad_norm": 0.7460314631462097, |
|
"learning_rate": 0.0002966657915463376, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.724, |
|
"grad_norm": 0.8949922323226929, |
|
"learning_rate": 0.00029579067121729234, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 8.1209, |
|
"grad_norm": 0.7350090146064758, |
|
"learning_rate": 0.0002949155508882471, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.7897, |
|
"grad_norm": 0.9530614018440247, |
|
"learning_rate": 0.00029404043055920187, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.7916, |
|
"grad_norm": 0.7030171155929565, |
|
"learning_rate": 0.00029316531023015663, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.53, |
|
"grad_norm": 0.8843898177146912, |
|
"learning_rate": 0.0002922901899011114, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.5228, |
|
"grad_norm": 0.9127951860427856, |
|
"learning_rate": 0.00029141506957206615, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.423, |
|
"grad_norm": 0.7194523811340332, |
|
"learning_rate": 0.0002905399492430209, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 8.3464, |
|
"grad_norm": 0.8251200318336487, |
|
"learning_rate": 0.0002896648289139757, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.8906, |
|
"grad_norm": 0.9383019804954529, |
|
"learning_rate": 0.00028878970858493044, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 6.9917, |
|
"grad_norm": 1.1721993684768677, |
|
"learning_rate": 0.0002879145882558852, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.7154, |
|
"grad_norm": 0.7905781865119934, |
|
"learning_rate": 0.00028703946792683996, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.9272, |
|
"grad_norm": 0.9261153936386108, |
|
"learning_rate": 0.00028616434759779467, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.9141, |
|
"grad_norm": 1.206111192703247, |
|
"learning_rate": 0.00028528922726874943, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.9561, |
|
"grad_norm": 0.8015759587287903, |
|
"learning_rate": 0.0002844141069397042, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.6844, |
|
"grad_norm": 0.970389723777771, |
|
"learning_rate": 0.00028353898661065896, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.7312, |
|
"grad_norm": 1.3079341650009155, |
|
"learning_rate": 0.0002826638662816137, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.506, |
|
"grad_norm": 0.8393199443817139, |
|
"learning_rate": 0.0002817887459525685, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.3006, |
|
"grad_norm": 0.9169728755950928, |
|
"learning_rate": 0.00028091362562352324, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.5924, |
|
"grad_norm": 0.8766190409660339, |
|
"learning_rate": 0.000280038505294478, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 8.2074, |
|
"grad_norm": 0.8473224639892578, |
|
"learning_rate": 0.00027916338496543277, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.2028, |
|
"grad_norm": 0.9415881037712097, |
|
"learning_rate": 0.0002782882646363875, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.87, |
|
"grad_norm": 0.8043491840362549, |
|
"learning_rate": 0.0002774131443073423, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 8.5354, |
|
"grad_norm": 0.9696796536445618, |
|
"learning_rate": 0.00027653802397829705, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 8.1185, |
|
"grad_norm": 0.9294397830963135, |
|
"learning_rate": 0.00027566290364925176, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.8844, |
|
"grad_norm": 1.0350419282913208, |
|
"learning_rate": 0.0002747877833202065, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.9054, |
|
"grad_norm": 1.086616039276123, |
|
"learning_rate": 0.0002739126629911613, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.4362, |
|
"grad_norm": 0.865028440952301, |
|
"learning_rate": 0.00027303754266211604, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.4039, |
|
"grad_norm": 0.8574273586273193, |
|
"learning_rate": 0.0002721624223330708, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 8.0095, |
|
"grad_norm": 1.0509589910507202, |
|
"learning_rate": 0.00027128730200402557, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.6467, |
|
"grad_norm": 0.7813432812690735, |
|
"learning_rate": 0.00027041218167498033, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.4786, |
|
"grad_norm": 0.855741560459137, |
|
"learning_rate": 0.0002695370613459351, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.7862, |
|
"grad_norm": 0.8451842069625854, |
|
"learning_rate": 0.00026866194101688985, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.7616, |
|
"grad_norm": 0.882211446762085, |
|
"learning_rate": 0.0002677868206878446, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 8.1508, |
|
"grad_norm": 0.7093100547790527, |
|
"learning_rate": 0.0002669117003587994, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.8715, |
|
"grad_norm": 0.9282416701316833, |
|
"learning_rate": 0.00026603658002975414, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.6333, |
|
"grad_norm": 0.8849425911903381, |
|
"learning_rate": 0.0002651614597007089, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.624, |
|
"grad_norm": 0.8789107203483582, |
|
"learning_rate": 0.0002642863393716636, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.5042, |
|
"grad_norm": 0.9759025573730469, |
|
"learning_rate": 0.00026341121904261837, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.7317, |
|
"grad_norm": 0.794627845287323, |
|
"learning_rate": 0.00026253609871357313, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.4743, |
|
"grad_norm": 1.3992342948913574, |
|
"learning_rate": 0.0002616609783845279, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.5986, |
|
"grad_norm": 0.8934722542762756, |
|
"learning_rate": 0.00026078585805548266, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.7515, |
|
"grad_norm": 1.0474205017089844, |
|
"learning_rate": 0.00025991073772643736, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.0749, |
|
"grad_norm": 0.7677063345909119, |
|
"learning_rate": 0.0002590356173973921, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.7033, |
|
"grad_norm": 0.8318948149681091, |
|
"learning_rate": 0.0002581604970683469, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.775, |
|
"grad_norm": 0.7674381136894226, |
|
"learning_rate": 0.00025728537673930165, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.5289, |
|
"grad_norm": 1.0669969320297241, |
|
"learning_rate": 0.0002564102564102564, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.3784, |
|
"grad_norm": 1.0004348754882812, |
|
"learning_rate": 0.00025553513608121117, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.4305, |
|
"grad_norm": 0.7937709093093872, |
|
"learning_rate": 0.00025466001575216593, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.1845, |
|
"grad_norm": 0.9088554382324219, |
|
"learning_rate": 0.00025378489542312064, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.9313, |
|
"grad_norm": 1.0221823453903198, |
|
"learning_rate": 0.0002529097750940754, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.4251, |
|
"grad_norm": 0.7980064153671265, |
|
"learning_rate": 0.00025203465476503016, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 8.0494, |
|
"grad_norm": 0.8470319509506226, |
|
"learning_rate": 0.0002511595344359849, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.7765, |
|
"grad_norm": 1.101785659790039, |
|
"learning_rate": 0.0002502844141069397, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.8624, |
|
"grad_norm": 0.8655755519866943, |
|
"learning_rate": 0.00024940929377789445, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.6855, |
|
"grad_norm": 1.0447689294815063, |
|
"learning_rate": 0.0002485341734488492, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.7653, |
|
"grad_norm": 0.9611648917198181, |
|
"learning_rate": 0.000247659053119804, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 8.0705, |
|
"grad_norm": 1.410849928855896, |
|
"learning_rate": 0.00024678393279075874, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.8147, |
|
"grad_norm": 0.9252009987831116, |
|
"learning_rate": 0.0002459088124617135, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.9366, |
|
"grad_norm": 0.899348258972168, |
|
"learning_rate": 0.00024503369213266826, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 8.089, |
|
"grad_norm": 0.7920341491699219, |
|
"learning_rate": 0.000244158571803623, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.5066, |
|
"grad_norm": 0.8289885520935059, |
|
"learning_rate": 0.00024328345147457776, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.4402, |
|
"grad_norm": 0.9304541349411011, |
|
"learning_rate": 0.00024240833114553252, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 8.1004, |
|
"grad_norm": 0.8798967003822327, |
|
"learning_rate": 0.00024153321081648728, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.8528, |
|
"grad_norm": 0.9733609557151794, |
|
"learning_rate": 0.00024065809048744201, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.1178, |
|
"grad_norm": 1.1248620748519897, |
|
"learning_rate": 0.00023978297015839678, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.7862, |
|
"grad_norm": 1.2658095359802246, |
|
"learning_rate": 0.00023890784982935154, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.9395, |
|
"grad_norm": 1.0820565223693848, |
|
"learning_rate": 0.0002380327295003063, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.4596, |
|
"grad_norm": 0.9462448954582214, |
|
"learning_rate": 0.00023715760917126106, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.8461, |
|
"grad_norm": 0.8025732636451721, |
|
"learning_rate": 0.00023628248884221582, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.7102, |
|
"grad_norm": 0.7947144508361816, |
|
"learning_rate": 0.00023540736851317059, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.8149, |
|
"grad_norm": 0.8819990158081055, |
|
"learning_rate": 0.00023453224818412532, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.5168, |
|
"grad_norm": 0.9773268103599548, |
|
"learning_rate": 0.00023365712785508008, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.7338, |
|
"grad_norm": 1.384716510772705, |
|
"learning_rate": 0.00023278200752603484, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 6.9549, |
|
"grad_norm": 1.1293810606002808, |
|
"learning_rate": 0.0002319068871969896, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.8655, |
|
"grad_norm": 0.7238449454307556, |
|
"learning_rate": 0.00023103176686794437, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 7.7399, |
|
"grad_norm": 0.8876301646232605, |
|
"learning_rate": 0.00023015664653889913, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 7.5196, |
|
"grad_norm": 0.7352742552757263, |
|
"learning_rate": 0.00022928152620985384, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 8.0545, |
|
"grad_norm": 1.0614981651306152, |
|
"learning_rate": 0.0002284064058808086, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.8036, |
|
"grad_norm": 0.999052882194519, |
|
"learning_rate": 0.00022753128555176336, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.5506, |
|
"grad_norm": 1.084981918334961, |
|
"learning_rate": 0.00022665616522271812, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.7953, |
|
"grad_norm": 1.110907793045044, |
|
"learning_rate": 0.00022578104489367288, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.6064, |
|
"grad_norm": 1.29153311252594, |
|
"learning_rate": 0.00022490592456462765, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.9157, |
|
"grad_norm": 1.5039303302764893, |
|
"learning_rate": 0.00022403080423558238, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.5924, |
|
"grad_norm": 0.850940465927124, |
|
"learning_rate": 0.00022315568390653714, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.9425, |
|
"grad_norm": 0.79768967628479, |
|
"learning_rate": 0.0002222805635774919, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 8.0374, |
|
"grad_norm": 0.771493673324585, |
|
"learning_rate": 0.00022140544324844667, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.1645, |
|
"grad_norm": 0.7525059580802917, |
|
"learning_rate": 0.00022053032291940143, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.5769, |
|
"grad_norm": 0.9684802293777466, |
|
"learning_rate": 0.0002196552025903562, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.781, |
|
"grad_norm": 1.1203564405441284, |
|
"learning_rate": 0.00021878008226131092, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.4585, |
|
"grad_norm": 1.0650273561477661, |
|
"learning_rate": 0.0002179049619322657, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.7015, |
|
"grad_norm": 0.9924284219741821, |
|
"learning_rate": 0.00021702984160322045, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.572, |
|
"grad_norm": 0.8644096255302429, |
|
"learning_rate": 0.0002161547212741752, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.8879, |
|
"grad_norm": 0.854030966758728, |
|
"learning_rate": 0.00021527960094512997, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.842, |
|
"grad_norm": 0.7271285653114319, |
|
"learning_rate": 0.00021440448061608473, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.652, |
|
"grad_norm": 0.6921567320823669, |
|
"learning_rate": 0.00021352936028703947, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.8335, |
|
"grad_norm": 1.2016472816467285, |
|
"learning_rate": 0.00021265423995799423, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.5109, |
|
"grad_norm": 0.79868084192276, |
|
"learning_rate": 0.000211779119628949, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.3853, |
|
"grad_norm": 0.8064858913421631, |
|
"learning_rate": 0.00021090399929990375, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.6334, |
|
"grad_norm": 0.9092600345611572, |
|
"learning_rate": 0.00021002887897085852, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.9536, |
|
"grad_norm": 1.0683679580688477, |
|
"learning_rate": 0.00020915375864181325, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.7399, |
|
"grad_norm": 1.1141338348388672, |
|
"learning_rate": 0.00020827863831276799, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.9567, |
|
"grad_norm": 0.9624096751213074, |
|
"learning_rate": 0.00020740351798372275, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 8.1788, |
|
"grad_norm": 0.7703258991241455, |
|
"learning_rate": 0.0002065283976546775, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.8642, |
|
"grad_norm": 0.9297539591789246, |
|
"learning_rate": 0.00020565327732563227, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.4837, |
|
"grad_norm": 0.7845075130462646, |
|
"learning_rate": 0.00020477815699658703, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.5431, |
|
"grad_norm": 0.8620021343231201, |
|
"learning_rate": 0.0002039030366675418, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.7398, |
|
"grad_norm": 0.8532699942588806, |
|
"learning_rate": 0.00020302791633849656, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.7079, |
|
"grad_norm": 1.1266266107559204, |
|
"learning_rate": 0.0002021527960094513, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.8789, |
|
"grad_norm": 1.003790020942688, |
|
"learning_rate": 0.00020127767568040605, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.5108, |
|
"grad_norm": 1.1769237518310547, |
|
"learning_rate": 0.00020040255535136081, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.8151, |
|
"grad_norm": 0.9078934192657471, |
|
"learning_rate": 0.00019952743502231558, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.4092, |
|
"grad_norm": 0.8376544713973999, |
|
"learning_rate": 0.00019865231469327034, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.4658, |
|
"grad_norm": 0.9094048738479614, |
|
"learning_rate": 0.0001977771943642251, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.6113, |
|
"grad_norm": 1.1345362663269043, |
|
"learning_rate": 0.00019690207403517984, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.4598, |
|
"grad_norm": 0.8164626955986023, |
|
"learning_rate": 0.0001960269537061346, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.707, |
|
"grad_norm": 1.125823736190796, |
|
"learning_rate": 0.00019515183337708936, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 8.0873, |
|
"grad_norm": 0.8651579022407532, |
|
"learning_rate": 0.00019427671304804412, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.5421, |
|
"grad_norm": 0.9041004776954651, |
|
"learning_rate": 0.00019340159271899888, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.9615, |
|
"grad_norm": 0.8012003302574158, |
|
"learning_rate": 0.00019252647238995364, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.6728, |
|
"grad_norm": 0.8691316246986389, |
|
"learning_rate": 0.00019165135206090838, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.4882, |
|
"grad_norm": 0.8700850605964661, |
|
"learning_rate": 0.00019077623173186314, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.4824, |
|
"grad_norm": 1.0540724992752075, |
|
"learning_rate": 0.00018990111140281788, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.3133, |
|
"grad_norm": 0.9065701365470886, |
|
"learning_rate": 0.00018902599107377264, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 8.1036, |
|
"grad_norm": 0.8794527649879456, |
|
"learning_rate": 0.0001881508707447274, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.3707, |
|
"grad_norm": 0.9155571460723877, |
|
"learning_rate": 0.00018727575041568216, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.0801, |
|
"grad_norm": 0.7177339792251587, |
|
"learning_rate": 0.0001864006300866369, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.4368, |
|
"grad_norm": 0.8027993440628052, |
|
"learning_rate": 0.00018552550975759166, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 8.2545, |
|
"grad_norm": 0.9770577549934387, |
|
"learning_rate": 0.00018465038942854642, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.4767, |
|
"grad_norm": 1.0428367853164673, |
|
"learning_rate": 0.00018377526909950118, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 8.4641, |
|
"grad_norm": 0.8214976787567139, |
|
"learning_rate": 0.00018290014877045594, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.054, |
|
"grad_norm": 1.1258653402328491, |
|
"learning_rate": 0.0001820250284414107, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.5935, |
|
"grad_norm": 1.07210373878479, |
|
"learning_rate": 0.00018114990811236544, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.8104, |
|
"grad_norm": 1.0441612005233765, |
|
"learning_rate": 0.0001802747877833202, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.6147, |
|
"grad_norm": 0.9820619821548462, |
|
"learning_rate": 0.00017939966745427496, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 8.1347, |
|
"grad_norm": 0.8725702166557312, |
|
"learning_rate": 0.00017852454712522973, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 8.227, |
|
"grad_norm": 0.8640567660331726, |
|
"learning_rate": 0.0001776494267961845, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 7.392, |
|
"grad_norm": 1.0909335613250732, |
|
"learning_rate": 0.00017677430646713925, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 6.7634, |
|
"grad_norm": 0.8133190274238586, |
|
"learning_rate": 0.000175899186138094, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 7.802, |
|
"grad_norm": 0.9833294749259949, |
|
"learning_rate": 0.00017502406580904875, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.2764, |
|
"grad_norm": 0.9594758152961731, |
|
"learning_rate": 0.0001741489454800035, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.1931, |
|
"grad_norm": 0.9970749616622925, |
|
"learning_rate": 0.00017327382515095827, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.6539, |
|
"grad_norm": 0.8486274480819702, |
|
"learning_rate": 0.00017239870482191303, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.3367, |
|
"grad_norm": 0.9591713547706604, |
|
"learning_rate": 0.0001715235844928678, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.545, |
|
"grad_norm": 1.1163291931152344, |
|
"learning_rate": 0.00017064846416382255, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 8.3214, |
|
"grad_norm": 0.8581505417823792, |
|
"learning_rate": 0.00016977334383477726, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.1871, |
|
"grad_norm": 0.8021834492683411, |
|
"learning_rate": 0.00016889822350573202, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.8969, |
|
"grad_norm": 0.9090090990066528, |
|
"learning_rate": 0.00016802310317668679, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.37, |
|
"grad_norm": 0.8283194303512573, |
|
"learning_rate": 0.00016714798284764155, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.4669, |
|
"grad_norm": 0.8183834552764893, |
|
"learning_rate": 0.0001662728625185963, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.4362, |
|
"grad_norm": 0.9701572060585022, |
|
"learning_rate": 0.00016539774218955107, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.2859, |
|
"grad_norm": 1.079610824584961, |
|
"learning_rate": 0.0001645226218605058, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 8.0835, |
|
"grad_norm": 0.8598064184188843, |
|
"learning_rate": 0.00016364750153146057, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.6696, |
|
"grad_norm": 0.8653038740158081, |
|
"learning_rate": 0.00016277238120241533, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.6096, |
|
"grad_norm": 1.0018919706344604, |
|
"learning_rate": 0.0001618972608733701, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.7412, |
|
"grad_norm": 0.8919802308082581, |
|
"learning_rate": 0.00016102214054432485, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 7.4504, |
|
"grad_norm": 0.8712960481643677, |
|
"learning_rate": 0.00016014702021527962, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 8.1, |
|
"grad_norm": 0.8894332647323608, |
|
"learning_rate": 0.00015927189988623435, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 8.1017, |
|
"grad_norm": 1.024781584739685, |
|
"learning_rate": 0.0001583967795571891, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.6484, |
|
"grad_norm": 0.9175984859466553, |
|
"learning_rate": 0.00015752165922814387, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.3766, |
|
"grad_norm": 0.9064013361930847, |
|
"learning_rate": 0.00015664653889909864, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.6414, |
|
"grad_norm": 0.9600405097007751, |
|
"learning_rate": 0.0001557714185700534, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.2811, |
|
"grad_norm": 0.9788243174552917, |
|
"learning_rate": 0.00015489629824100816, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.3704, |
|
"grad_norm": 0.8740330338478088, |
|
"learning_rate": 0.0001540211779119629, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.5645, |
|
"grad_norm": 0.8021050095558167, |
|
"learning_rate": 0.00015314605758291766, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.9078, |
|
"grad_norm": 1.0614405870437622, |
|
"learning_rate": 0.00015227093725387242, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.3365, |
|
"grad_norm": 0.8063251376152039, |
|
"learning_rate": 0.00015139581692482718, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.8801, |
|
"grad_norm": 0.8937615752220154, |
|
"learning_rate": 0.00015052069659578194, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 8.0013, |
|
"grad_norm": 0.9128641486167908, |
|
"learning_rate": 0.00014964557626673668, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 8.1354, |
|
"grad_norm": 0.8519286513328552, |
|
"learning_rate": 0.0001487704559376914, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.6918, |
|
"grad_norm": 0.9265363812446594, |
|
"learning_rate": 0.00014789533560864617, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.7237, |
|
"grad_norm": 1.113276720046997, |
|
"learning_rate": 0.00014702021527960093, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.3281, |
|
"grad_norm": 0.9011558890342712, |
|
"learning_rate": 0.0001461450949505557, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 6.577, |
|
"grad_norm": 0.990836501121521, |
|
"learning_rate": 0.00014526997462151046, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.4641, |
|
"grad_norm": 1.1346269845962524, |
|
"learning_rate": 0.00014439485429246522, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.6071, |
|
"grad_norm": 1.0057759284973145, |
|
"learning_rate": 0.00014351973396341998, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 8.0022, |
|
"grad_norm": 0.8524260520935059, |
|
"learning_rate": 0.00014264461363437472, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.4056, |
|
"grad_norm": 0.7590330839157104, |
|
"learning_rate": 0.00014176949330532948, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.9487, |
|
"grad_norm": 1.2074108123779297, |
|
"learning_rate": 0.00014089437297628424, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.7237, |
|
"grad_norm": 0.9621999263763428, |
|
"learning_rate": 0.000140019252647239, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.2588, |
|
"grad_norm": 0.843911349773407, |
|
"learning_rate": 0.00013914413231819376, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.1697, |
|
"grad_norm": 0.7619708180427551, |
|
"learning_rate": 0.00013826901198914853, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.2903, |
|
"grad_norm": 1.0736790895462036, |
|
"learning_rate": 0.00013739389166010326, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.9193, |
|
"grad_norm": 1.03206467628479, |
|
"learning_rate": 0.00013651877133105802, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 8.182, |
|
"grad_norm": 0.9106431603431702, |
|
"learning_rate": 0.00013564365100201278, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.5139, |
|
"grad_norm": 0.9506519436836243, |
|
"learning_rate": 0.00013476853067296755, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.9055, |
|
"grad_norm": 0.859704852104187, |
|
"learning_rate": 0.0001338934103439223, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 8.0011, |
|
"grad_norm": 0.9628238677978516, |
|
"learning_rate": 0.00013301829001487707, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.4412, |
|
"grad_norm": 0.8472156524658203, |
|
"learning_rate": 0.0001321431696858318, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.6981, |
|
"grad_norm": 0.9454402327537537, |
|
"learning_rate": 0.00013126804935678657, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.4101, |
|
"grad_norm": 0.8925793766975403, |
|
"learning_rate": 0.00013039292902774133, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.1784, |
|
"grad_norm": 0.8468560576438904, |
|
"learning_rate": 0.00012951780869869606, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.6655, |
|
"grad_norm": 0.8432177901268005, |
|
"learning_rate": 0.00012864268836965082, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.4518, |
|
"grad_norm": 0.813543438911438, |
|
"learning_rate": 0.00012776756804060559, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.5661, |
|
"grad_norm": 1.134985327720642, |
|
"learning_rate": 0.00012689244771156032, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.3611, |
|
"grad_norm": 1.05497407913208, |
|
"learning_rate": 0.00012601732738251508, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.8965, |
|
"grad_norm": 1.0532019138336182, |
|
"learning_rate": 0.00012514220705346984, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 8.113, |
|
"grad_norm": 1.0708712339401245, |
|
"learning_rate": 0.0001242670867244246, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.4782, |
|
"grad_norm": 1.1848175525665283, |
|
"learning_rate": 0.00012339196639537937, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.761, |
|
"grad_norm": 0.9672744870185852, |
|
"learning_rate": 0.00012251684606633413, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.8212, |
|
"grad_norm": 1.2713532447814941, |
|
"learning_rate": 0.00012164172573728888, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.5313, |
|
"grad_norm": 1.026662826538086, |
|
"learning_rate": 0.00012076660540824364, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.9599, |
|
"grad_norm": 0.8448575139045715, |
|
"learning_rate": 0.00011989148507919839, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.7506, |
|
"grad_norm": 1.041380524635315, |
|
"learning_rate": 0.00011901636475015315, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.9023, |
|
"grad_norm": 0.8197987675666809, |
|
"learning_rate": 0.00011814124442110791, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.4913, |
|
"grad_norm": 0.918388307094574, |
|
"learning_rate": 0.00011726612409206266, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.8685, |
|
"grad_norm": 0.9161803722381592, |
|
"learning_rate": 0.00011639100376301742, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.5855, |
|
"grad_norm": 0.8994104266166687, |
|
"learning_rate": 0.00011551588343397218, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.6488, |
|
"grad_norm": 0.8985808491706848, |
|
"learning_rate": 0.00011464076310492692, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.5261, |
|
"grad_norm": 0.9975460767745972, |
|
"learning_rate": 0.00011376564277588168, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.9051, |
|
"grad_norm": 1.051378607749939, |
|
"learning_rate": 0.00011289052244683644, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.2571, |
|
"grad_norm": 1.017866611480713, |
|
"learning_rate": 0.00011201540211779119, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.6068, |
|
"grad_norm": 1.1010361909866333, |
|
"learning_rate": 0.00011114028178874595, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.6306, |
|
"grad_norm": 0.9585467576980591, |
|
"learning_rate": 0.00011026516145970071, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.3702, |
|
"grad_norm": 0.9484645128250122, |
|
"learning_rate": 0.00010939004113065546, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.4482, |
|
"grad_norm": 1.0726372003555298, |
|
"learning_rate": 0.00010851492080161022, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.7554, |
|
"grad_norm": 0.8078585863113403, |
|
"learning_rate": 0.00010763980047256499, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.3881, |
|
"grad_norm": 0.9488946199417114, |
|
"learning_rate": 0.00010676468014351973, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.7557, |
|
"grad_norm": 0.8590677976608276, |
|
"learning_rate": 0.0001058895598144745, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.2128, |
|
"grad_norm": 0.8768866062164307, |
|
"learning_rate": 0.00010501443948542926, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.6447, |
|
"grad_norm": 1.1127121448516846, |
|
"learning_rate": 0.00010413931915638399, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.7283, |
|
"grad_norm": 0.7706397771835327, |
|
"learning_rate": 0.00010326419882733875, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.8187, |
|
"grad_norm": 0.910484254360199, |
|
"learning_rate": 0.00010238907849829352, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 6.9677, |
|
"grad_norm": 0.8292771577835083, |
|
"learning_rate": 0.00010151395816924828, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.7939, |
|
"grad_norm": 1.2936872243881226, |
|
"learning_rate": 0.00010063883784020303, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.2773, |
|
"grad_norm": 1.050876259803772, |
|
"learning_rate": 9.976371751115779e-05, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.6461, |
|
"grad_norm": 1.0275306701660156, |
|
"learning_rate": 9.888859718211255e-05, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.4058, |
|
"grad_norm": 0.9414623379707336, |
|
"learning_rate": 9.80134768530673e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.4938, |
|
"grad_norm": 0.8367570042610168, |
|
"learning_rate": 9.713835652402206e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.4702, |
|
"grad_norm": 0.9100292325019836, |
|
"learning_rate": 9.626323619497682e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.4209, |
|
"grad_norm": 0.881262481212616, |
|
"learning_rate": 9.538811586593157e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.568, |
|
"grad_norm": 1.0841021537780762, |
|
"learning_rate": 9.451299553688632e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.4385, |
|
"grad_norm": 0.8553777933120728, |
|
"learning_rate": 9.363787520784108e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.7745, |
|
"grad_norm": 0.8244187235832214, |
|
"learning_rate": 9.276275487879583e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.3427, |
|
"grad_norm": 1.0330350399017334, |
|
"learning_rate": 9.188763454975059e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.4313, |
|
"grad_norm": 0.86846524477005, |
|
"learning_rate": 9.101251422070535e-05, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 7.6994, |
|
"grad_norm": 1.0151475667953491, |
|
"learning_rate": 9.01373938916601e-05, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 7.6994, |
|
"grad_norm": 0.8053341507911682, |
|
"learning_rate": 8.926227356261486e-05, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 7.8782, |
|
"grad_norm": 0.917957067489624, |
|
"learning_rate": 8.838715323356962e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.5889, |
|
"grad_norm": 1.556181788444519, |
|
"learning_rate": 8.751203290452437e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.6279, |
|
"grad_norm": 1.043771743774414, |
|
"learning_rate": 8.663691257547913e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.8682, |
|
"grad_norm": 1.1640032529830933, |
|
"learning_rate": 8.57617922464339e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.8918, |
|
"grad_norm": 0.8830235600471497, |
|
"learning_rate": 8.488667191738863e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.5466, |
|
"grad_norm": 0.958690345287323, |
|
"learning_rate": 8.401155158834339e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.5439, |
|
"grad_norm": 1.1970360279083252, |
|
"learning_rate": 8.313643125929815e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 8.1002, |
|
"grad_norm": 0.9388788938522339, |
|
"learning_rate": 8.22613109302529e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.6892, |
|
"grad_norm": 1.0798841714859009, |
|
"learning_rate": 8.138619060120766e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 8.0534, |
|
"grad_norm": 1.2909208536148071, |
|
"learning_rate": 8.051107027216243e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.3369, |
|
"grad_norm": 1.272641658782959, |
|
"learning_rate": 7.963594994311717e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.5785, |
|
"grad_norm": 0.9654033780097961, |
|
"learning_rate": 7.876082961407194e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.8078, |
|
"grad_norm": 0.8423277139663696, |
|
"learning_rate": 7.78857092850267e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.8086, |
|
"grad_norm": 0.9509181380271912, |
|
"learning_rate": 7.701058895598145e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 8.1405, |
|
"grad_norm": 0.9167718291282654, |
|
"learning_rate": 7.613546862693621e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.7728, |
|
"grad_norm": 0.9845168590545654, |
|
"learning_rate": 7.526034829789097e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.4146, |
|
"grad_norm": 0.9597529768943787, |
|
"learning_rate": 7.43852279688457e-05, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 7.0711, |
|
"grad_norm": 1.0068391561508179, |
|
"learning_rate": 7.351010763980047e-05, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 7.1173, |
|
"grad_norm": 0.8510629534721375, |
|
"learning_rate": 7.263498731075523e-05, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 7.1843, |
|
"grad_norm": 0.8737899661064148, |
|
"learning_rate": 7.175986698170999e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.376, |
|
"grad_norm": 0.9045628905296326, |
|
"learning_rate": 7.088474665266474e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.4447, |
|
"grad_norm": 0.8932380080223083, |
|
"learning_rate": 7.00096263236195e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.362, |
|
"grad_norm": 0.8961164951324463, |
|
"learning_rate": 6.913450599457426e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.4237, |
|
"grad_norm": 1.0015422105789185, |
|
"learning_rate": 6.825938566552901e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.2541, |
|
"grad_norm": 0.9842544198036194, |
|
"learning_rate": 6.738426533648377e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 8.0427, |
|
"grad_norm": 1.0375638008117676, |
|
"learning_rate": 6.650914500743853e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.4801, |
|
"grad_norm": 0.9552834630012512, |
|
"learning_rate": 6.563402467839328e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.8596, |
|
"grad_norm": 0.8038078546524048, |
|
"learning_rate": 6.475890434934803e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.2653, |
|
"grad_norm": 0.8008092045783997, |
|
"learning_rate": 6.388378402030279e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.159, |
|
"grad_norm": 1.087442398071289, |
|
"learning_rate": 6.300866369125754e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.0556, |
|
"grad_norm": 1.0442233085632324, |
|
"learning_rate": 6.21335433622123e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.162, |
|
"grad_norm": 1.0271589756011963, |
|
"learning_rate": 6.125842303316706e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.5864, |
|
"grad_norm": 0.9957409501075745, |
|
"learning_rate": 6.038330270412182e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 8.4511, |
|
"grad_norm": 0.870765745639801, |
|
"learning_rate": 5.9508182375076575e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.9488, |
|
"grad_norm": 0.8632308840751648, |
|
"learning_rate": 5.863306204603133e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 8.1216, |
|
"grad_norm": 1.1113914251327515, |
|
"learning_rate": 5.775794171698609e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.7049, |
|
"grad_norm": 0.9410499334335327, |
|
"learning_rate": 5.688282138794084e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.6916, |
|
"grad_norm": 0.8908835053443909, |
|
"learning_rate": 5.6007701058895595e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.5659, |
|
"grad_norm": 0.7924339175224304, |
|
"learning_rate": 5.513258072985036e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.4359, |
|
"grad_norm": 0.8098507523536682, |
|
"learning_rate": 5.425746040080511e-05, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 7.4043, |
|
"grad_norm": 0.8541660904884338, |
|
"learning_rate": 5.338234007175987e-05, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 7.5664, |
|
"grad_norm": 0.9474323987960815, |
|
"learning_rate": 5.250721974271463e-05, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 7.7903, |
|
"grad_norm": 1.0568387508392334, |
|
"learning_rate": 5.163209941366938e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.4216, |
|
"grad_norm": 0.9031184315681458, |
|
"learning_rate": 5.075697908462414e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.5944, |
|
"grad_norm": 0.8136922121047974, |
|
"learning_rate": 4.9881858755578894e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.6272, |
|
"grad_norm": 1.1002339124679565, |
|
"learning_rate": 4.900673842653365e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.5846, |
|
"grad_norm": 1.2232916355133057, |
|
"learning_rate": 4.813161809748841e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.8478, |
|
"grad_norm": 0.8891430497169495, |
|
"learning_rate": 4.725649776844316e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.3, |
|
"grad_norm": 0.9129414558410645, |
|
"learning_rate": 4.6381377439397914e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.4529, |
|
"grad_norm": 0.7938532829284668, |
|
"learning_rate": 4.5506257110352676e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.2803, |
|
"grad_norm": 0.9501358270645142, |
|
"learning_rate": 4.463113678130743e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.5943, |
|
"grad_norm": 1.0423897504806519, |
|
"learning_rate": 4.3756016452262186e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.2376, |
|
"grad_norm": 0.9883305430412292, |
|
"learning_rate": 4.288089612321695e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.6255, |
|
"grad_norm": 0.9974358677864075, |
|
"learning_rate": 4.2005775794171696e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.2739, |
|
"grad_norm": 0.9481905102729797, |
|
"learning_rate": 4.113065546512645e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 8.0077, |
|
"grad_norm": 1.067797064781189, |
|
"learning_rate": 4.025553513608121e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 7.5522, |
|
"grad_norm": 0.8410007953643799, |
|
"learning_rate": 3.938041480703597e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 7.5854, |
|
"grad_norm": 0.847583532333374, |
|
"learning_rate": 3.8505294477990723e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 7.2142, |
|
"grad_norm": 1.0279533863067627, |
|
"learning_rate": 3.7630174148945485e-05, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 7.4712, |
|
"grad_norm": 1.1256965398788452, |
|
"learning_rate": 3.6755053819900234e-05, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 7.7947, |
|
"grad_norm": 1.0278571844100952, |
|
"learning_rate": 3.5879933490854995e-05, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 7.3523, |
|
"grad_norm": 0.9609654545783997, |
|
"learning_rate": 3.500481316180975e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.3334, |
|
"grad_norm": 0.8453736901283264, |
|
"learning_rate": 3.4129692832764505e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.177, |
|
"grad_norm": 0.8161653280258179, |
|
"learning_rate": 3.325457250371927e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.9061, |
|
"grad_norm": 0.9861032366752625, |
|
"learning_rate": 3.2379452174674016e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.3155, |
|
"grad_norm": 1.1409838199615479, |
|
"learning_rate": 3.150433184562877e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.7667, |
|
"grad_norm": 0.8848074078559875, |
|
"learning_rate": 3.062921151658353e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.2722, |
|
"grad_norm": 0.8996227979660034, |
|
"learning_rate": 2.9754091187538288e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.4819, |
|
"grad_norm": 0.9429714679718018, |
|
"learning_rate": 2.8878970858493046e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.4154, |
|
"grad_norm": 1.1169899702072144, |
|
"learning_rate": 2.8003850529447798e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.7691, |
|
"grad_norm": 0.8326570987701416, |
|
"learning_rate": 2.7128730200402556e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.6196, |
|
"grad_norm": 0.9243487119674683, |
|
"learning_rate": 2.6253609871357314e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.6529, |
|
"grad_norm": 0.8465039730072021, |
|
"learning_rate": 2.537848954231207e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.2623, |
|
"grad_norm": 1.0216766595840454, |
|
"learning_rate": 2.4503369213266825e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.5628, |
|
"grad_norm": 0.9314711689949036, |
|
"learning_rate": 2.362824888422158e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.9252, |
|
"grad_norm": 0.8769168853759766, |
|
"learning_rate": 2.2753128555176338e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.2971, |
|
"grad_norm": 0.8925982713699341, |
|
"learning_rate": 2.1878008226131093e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.1022, |
|
"grad_norm": 0.937786340713501, |
|
"learning_rate": 2.1002887897085848e-05, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.5253, |
|
"grad_norm": 0.900693416595459, |
|
"learning_rate": 2.0127767568040607e-05, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.5837, |
|
"grad_norm": 0.9113482236862183, |
|
"learning_rate": 1.9252647238995362e-05, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.7925, |
|
"grad_norm": 0.8734735250473022, |
|
"learning_rate": 1.8377526909950117e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.5821, |
|
"grad_norm": 0.8616068959236145, |
|
"learning_rate": 1.7502406580904875e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 6.7659, |
|
"grad_norm": 0.8509213328361511, |
|
"learning_rate": 1.6627286251859634e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.9045, |
|
"grad_norm": 0.8518444895744324, |
|
"learning_rate": 1.5752165922814385e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.2314, |
|
"grad_norm": 1.1429413557052612, |
|
"learning_rate": 1.4877045593769144e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.5707, |
|
"grad_norm": 0.825677752494812, |
|
"learning_rate": 1.4001925264723899e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.2231, |
|
"grad_norm": 0.9227612018585205, |
|
"learning_rate": 1.3126804935678657e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.3015, |
|
"grad_norm": 0.9745140671730042, |
|
"learning_rate": 1.2251684606633412e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.5931, |
|
"grad_norm": 0.8096091151237488, |
|
"learning_rate": 1.1376564277588169e-05, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.2393, |
|
"grad_norm": 0.9233807921409607, |
|
"learning_rate": 1.0501443948542924e-05, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.2871, |
|
"grad_norm": 0.7690852880477905, |
|
"learning_rate": 9.626323619497681e-06, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.5845, |
|
"grad_norm": 0.882102370262146, |
|
"learning_rate": 8.751203290452438e-06, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.2335, |
|
"grad_norm": 0.887958288192749, |
|
"learning_rate": 7.876082961407193e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.5324, |
|
"grad_norm": 0.8895597457885742, |
|
"learning_rate": 7.000962632361949e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.1145, |
|
"grad_norm": 0.8137519955635071, |
|
"learning_rate": 6.125842303316706e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.411, |
|
"grad_norm": 0.9460362195968628, |
|
"learning_rate": 5.250721974271462e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.6058, |
|
"grad_norm": 0.9842742681503296, |
|
"learning_rate": 4.375601645226219e-06, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.6927, |
|
"grad_norm": 0.938562273979187, |
|
"learning_rate": 3.5004813161809747e-06, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.6995, |
|
"grad_norm": 0.9931243658065796, |
|
"learning_rate": 2.625360987135731e-06, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.5572, |
|
"grad_norm": 0.8916573524475098, |
|
"learning_rate": 1.7502406580904874e-06, |
|
"epoch": 3.0 |
|
}, |
|
{ |
|
"loss": 7.2556, |
|
"grad_norm": 0.780832052230835, |
|
"learning_rate": 8.751203290452437e-07, |
|
"epoch": 3.0 |
|
}, |
|
{ |
|
"train_runtime": 112786.1501, |
|
"train_samples_per_second": 3.243, |
|
"train_steps_per_second": 0.101, |
|
"train_loss": 8.874524852365107, |
|
"epoch": 3.0 |
|
} |
|
] |