|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3091190108191654, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003091190108191654, |
|
"grad_norm": 0.17105351388454437, |
|
"learning_rate": 2e-05, |
|
"loss": 1.589, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006182380216383308, |
|
"grad_norm": 0.13888764381408691, |
|
"learning_rate": 4e-05, |
|
"loss": 1.6333, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0009273570324574962, |
|
"grad_norm": 0.13389942049980164, |
|
"learning_rate": 6e-05, |
|
"loss": 1.6075, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0012364760432766616, |
|
"grad_norm": 0.1443634182214737, |
|
"learning_rate": 8e-05, |
|
"loss": 1.3981, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0015455950540958269, |
|
"grad_norm": 0.2410346418619156, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6522, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0018547140649149924, |
|
"grad_norm": 0.23892079293727875, |
|
"learning_rate": 0.00012, |
|
"loss": 1.3345, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0021638330757341576, |
|
"grad_norm": 0.32107171416282654, |
|
"learning_rate": 0.00014, |
|
"loss": 1.4086, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.002472952086553323, |
|
"grad_norm": 0.38699325919151306, |
|
"learning_rate": 0.00016, |
|
"loss": 1.2824, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0027820710973724882, |
|
"grad_norm": 0.2972716987133026, |
|
"learning_rate": 0.00018, |
|
"loss": 1.3528, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0030911901081916537, |
|
"grad_norm": 0.288402795791626, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0556, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0034003091190108192, |
|
"grad_norm": 0.4650692641735077, |
|
"learning_rate": 0.00019993798449612405, |
|
"loss": 1.0995, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0037094281298299847, |
|
"grad_norm": 0.39375749230384827, |
|
"learning_rate": 0.0001998759689922481, |
|
"loss": 0.9978, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00401854714064915, |
|
"grad_norm": 0.3362458348274231, |
|
"learning_rate": 0.0001998139534883721, |
|
"loss": 1.0522, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004327666151468315, |
|
"grad_norm": 0.2642221450805664, |
|
"learning_rate": 0.00019975193798449614, |
|
"loss": 0.9661, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00463678516228748, |
|
"grad_norm": 0.3542484939098358, |
|
"learning_rate": 0.00019968992248062018, |
|
"loss": 0.8814, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004945904173106646, |
|
"grad_norm": 0.1401689201593399, |
|
"learning_rate": 0.00019962790697674421, |
|
"loss": 0.8629, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005255023183925811, |
|
"grad_norm": 0.20015761256217957, |
|
"learning_rate": 0.00019956589147286823, |
|
"loss": 0.8454, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0055641421947449764, |
|
"grad_norm": 0.1540534645318985, |
|
"learning_rate": 0.00019950387596899224, |
|
"loss": 0.8852, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005873261205564142, |
|
"grad_norm": 0.15904690325260162, |
|
"learning_rate": 0.00019944186046511628, |
|
"loss": 0.7254, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0061823802163833074, |
|
"grad_norm": 0.21907807886600494, |
|
"learning_rate": 0.00019937984496124032, |
|
"loss": 1.0708, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006491499227202473, |
|
"grad_norm": 0.14591765403747559, |
|
"learning_rate": 0.00019931782945736435, |
|
"loss": 0.8555, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0068006182380216385, |
|
"grad_norm": 0.16298744082450867, |
|
"learning_rate": 0.00019925581395348837, |
|
"loss": 0.8262, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0071097372488408035, |
|
"grad_norm": 0.14358466863632202, |
|
"learning_rate": 0.0001991937984496124, |
|
"loss": 0.8744, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0074188562596599695, |
|
"grad_norm": 0.149592325091362, |
|
"learning_rate": 0.00019913178294573644, |
|
"loss": 0.9376, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0077279752704791345, |
|
"grad_norm": 0.13760673999786377, |
|
"learning_rate": 0.00019906976744186048, |
|
"loss": 0.8854, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0080370942812983, |
|
"grad_norm": 0.19108699262142181, |
|
"learning_rate": 0.00019900775193798452, |
|
"loss": 0.885, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.008346213292117466, |
|
"grad_norm": 0.1892910748720169, |
|
"learning_rate": 0.00019894573643410853, |
|
"loss": 0.9911, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00865533230293663, |
|
"grad_norm": 0.13371500372886658, |
|
"learning_rate": 0.00019888372093023257, |
|
"loss": 0.8475, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.008964451313755796, |
|
"grad_norm": 0.1226775124669075, |
|
"learning_rate": 0.0001988217054263566, |
|
"loss": 0.7733, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00927357032457496, |
|
"grad_norm": 0.13002170622348785, |
|
"learning_rate": 0.00019875968992248062, |
|
"loss": 0.8008, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009582689335394128, |
|
"grad_norm": 0.13575126230716705, |
|
"learning_rate": 0.00019869767441860466, |
|
"loss": 0.8841, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.009891808346213293, |
|
"grad_norm": 0.15838854014873505, |
|
"learning_rate": 0.00019863565891472867, |
|
"loss": 0.8856, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.010200927357032458, |
|
"grad_norm": 0.12744970619678497, |
|
"learning_rate": 0.0001985736434108527, |
|
"loss": 0.8327, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.010510046367851623, |
|
"grad_norm": 0.16277430951595306, |
|
"learning_rate": 0.00019851162790697675, |
|
"loss": 0.9559, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.010819165378670788, |
|
"grad_norm": 0.11398226767778397, |
|
"learning_rate": 0.0001984496124031008, |
|
"loss": 0.8987, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.011128284389489953, |
|
"grad_norm": 0.15259447693824768, |
|
"learning_rate": 0.00019838759689922483, |
|
"loss": 0.916, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01143740340030912, |
|
"grad_norm": 0.16493409872055054, |
|
"learning_rate": 0.00019832558139534884, |
|
"loss": 0.7795, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.011746522411128285, |
|
"grad_norm": 0.14143070578575134, |
|
"learning_rate": 0.00019826356589147288, |
|
"loss": 0.814, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.01205564142194745, |
|
"grad_norm": 0.11884719133377075, |
|
"learning_rate": 0.00019820155038759692, |
|
"loss": 0.7559, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.012364760432766615, |
|
"grad_norm": 0.12909553945064545, |
|
"learning_rate": 0.00019813953488372096, |
|
"loss": 0.9106, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01267387944358578, |
|
"grad_norm": 0.11181219667196274, |
|
"learning_rate": 0.00019807751937984497, |
|
"loss": 0.7127, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.012982998454404947, |
|
"grad_norm": 0.16188634932041168, |
|
"learning_rate": 0.000198015503875969, |
|
"loss": 0.856, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.013292117465224112, |
|
"grad_norm": 0.1277618706226349, |
|
"learning_rate": 0.00019795348837209304, |
|
"loss": 0.8901, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.013601236476043277, |
|
"grad_norm": 0.13743072748184204, |
|
"learning_rate": 0.00019789147286821706, |
|
"loss": 0.9266, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.013910355486862442, |
|
"grad_norm": 0.12885789573192596, |
|
"learning_rate": 0.0001978294573643411, |
|
"loss": 0.8982, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.014219474497681607, |
|
"grad_norm": 0.14296455681324005, |
|
"learning_rate": 0.0001977674418604651, |
|
"loss": 1.0045, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.014528593508500772, |
|
"grad_norm": 0.13536542654037476, |
|
"learning_rate": 0.00019770542635658915, |
|
"loss": 0.8648, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.014837712519319939, |
|
"grad_norm": 0.1285800188779831, |
|
"learning_rate": 0.00019764341085271318, |
|
"loss": 0.8113, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.015146831530139104, |
|
"grad_norm": 0.1538587212562561, |
|
"learning_rate": 0.00019758139534883722, |
|
"loss": 0.8465, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.015455950540958269, |
|
"grad_norm": 0.11420200765132904, |
|
"learning_rate": 0.00019751937984496126, |
|
"loss": 0.8647, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015765069551777436, |
|
"grad_norm": 0.13382850587368011, |
|
"learning_rate": 0.00019745736434108527, |
|
"loss": 0.9545, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0160741885625966, |
|
"grad_norm": 0.11594673991203308, |
|
"learning_rate": 0.0001973953488372093, |
|
"loss": 0.8599, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.016383307573415766, |
|
"grad_norm": 0.119788758456707, |
|
"learning_rate": 0.00019733333333333335, |
|
"loss": 0.8394, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.01669242658423493, |
|
"grad_norm": 0.1150812953710556, |
|
"learning_rate": 0.0001972713178294574, |
|
"loss": 0.8727, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.017001545595054096, |
|
"grad_norm": 0.1359858363866806, |
|
"learning_rate": 0.0001972093023255814, |
|
"loss": 0.76, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01731066460587326, |
|
"grad_norm": 0.15345649421215057, |
|
"learning_rate": 0.00019714728682170544, |
|
"loss": 0.7292, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.017619783616692426, |
|
"grad_norm": 0.14331281185150146, |
|
"learning_rate": 0.00019708527131782945, |
|
"loss": 0.8034, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.01792890262751159, |
|
"grad_norm": 0.16820766031742096, |
|
"learning_rate": 0.0001970232558139535, |
|
"loss": 0.7534, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.018238021638330756, |
|
"grad_norm": 0.16281287372112274, |
|
"learning_rate": 0.00019696124031007753, |
|
"loss": 0.8409, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01854714064914992, |
|
"grad_norm": 0.16938892006874084, |
|
"learning_rate": 0.00019689922480620157, |
|
"loss": 0.8786, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018856259659969087, |
|
"grad_norm": 0.13455645740032196, |
|
"learning_rate": 0.00019683720930232558, |
|
"loss": 0.7462, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.019165378670788255, |
|
"grad_norm": 0.12973853945732117, |
|
"learning_rate": 0.00019677519379844962, |
|
"loss": 0.8304, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.01947449768160742, |
|
"grad_norm": 0.158578023314476, |
|
"learning_rate": 0.00019671317829457366, |
|
"loss": 0.8244, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.019783616692426585, |
|
"grad_norm": 0.125227153301239, |
|
"learning_rate": 0.0001966511627906977, |
|
"loss": 0.8637, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.02009273570324575, |
|
"grad_norm": 0.1529238075017929, |
|
"learning_rate": 0.0001965891472868217, |
|
"loss": 0.8271, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.020401854714064915, |
|
"grad_norm": 0.1335589438676834, |
|
"learning_rate": 0.00019652713178294575, |
|
"loss": 0.8129, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.02071097372488408, |
|
"grad_norm": 0.13944782316684723, |
|
"learning_rate": 0.00019646511627906978, |
|
"loss": 0.887, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.021020092735703245, |
|
"grad_norm": 0.12453600019216537, |
|
"learning_rate": 0.00019640310077519382, |
|
"loss": 0.8369, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.02132921174652241, |
|
"grad_norm": 0.11443863809108734, |
|
"learning_rate": 0.00019634108527131786, |
|
"loss": 0.8769, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.021638330757341576, |
|
"grad_norm": 0.1325102150440216, |
|
"learning_rate": 0.00019627906976744185, |
|
"loss": 0.7528, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02194744976816074, |
|
"grad_norm": 0.13488665223121643, |
|
"learning_rate": 0.00019621705426356589, |
|
"loss": 0.876, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.022256568778979906, |
|
"grad_norm": 0.15174520015716553, |
|
"learning_rate": 0.00019615503875968992, |
|
"loss": 0.8219, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.022565687789799074, |
|
"grad_norm": 0.13083337247371674, |
|
"learning_rate": 0.00019609302325581396, |
|
"loss": 0.8696, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.02287480680061824, |
|
"grad_norm": 0.13707856833934784, |
|
"learning_rate": 0.000196031007751938, |
|
"loss": 0.9781, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.023183925811437404, |
|
"grad_norm": 0.14287059009075165, |
|
"learning_rate": 0.000195968992248062, |
|
"loss": 0.8975, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02349304482225657, |
|
"grad_norm": 0.14259910583496094, |
|
"learning_rate": 0.00019590697674418605, |
|
"loss": 0.781, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.023802163833075735, |
|
"grad_norm": 0.17812331020832062, |
|
"learning_rate": 0.0001958449612403101, |
|
"loss": 0.8664, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0241112828438949, |
|
"grad_norm": 0.10900291800498962, |
|
"learning_rate": 0.00019578294573643413, |
|
"loss": 0.8155, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.024420401854714065, |
|
"grad_norm": 0.1299259066581726, |
|
"learning_rate": 0.00019572093023255814, |
|
"loss": 0.8878, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.02472952086553323, |
|
"grad_norm": 0.1341174691915512, |
|
"learning_rate": 0.00019565891472868218, |
|
"loss": 0.855, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.025038639876352395, |
|
"grad_norm": 0.11747386306524277, |
|
"learning_rate": 0.00019559689922480622, |
|
"loss": 0.8901, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.02534775888717156, |
|
"grad_norm": 0.12569762766361237, |
|
"learning_rate": 0.00019553488372093026, |
|
"loss": 0.8644, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.025656877897990725, |
|
"grad_norm": 0.11595705896615982, |
|
"learning_rate": 0.0001954728682170543, |
|
"loss": 0.7919, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.025965996908809894, |
|
"grad_norm": 0.15013526380062103, |
|
"learning_rate": 0.0001954108527131783, |
|
"loss": 0.7987, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02627511591962906, |
|
"grad_norm": 0.13101589679718018, |
|
"learning_rate": 0.00019534883720930232, |
|
"loss": 0.7824, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.026584234930448224, |
|
"grad_norm": 0.12921208143234253, |
|
"learning_rate": 0.00019528682170542636, |
|
"loss": 0.9077, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.02689335394126739, |
|
"grad_norm": 0.18682463467121124, |
|
"learning_rate": 0.0001952248062015504, |
|
"loss": 0.8614, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.027202472952086554, |
|
"grad_norm": 0.15416069328784943, |
|
"learning_rate": 0.00019516279069767444, |
|
"loss": 0.8038, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.02751159196290572, |
|
"grad_norm": 0.13872137665748596, |
|
"learning_rate": 0.00019510077519379845, |
|
"loss": 0.9001, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.027820710973724884, |
|
"grad_norm": 0.1256810575723648, |
|
"learning_rate": 0.00019503875968992249, |
|
"loss": 0.8332, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02812982998454405, |
|
"grad_norm": 0.15000316500663757, |
|
"learning_rate": 0.00019497674418604652, |
|
"loss": 0.8345, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.028438948995363214, |
|
"grad_norm": 0.11536971479654312, |
|
"learning_rate": 0.00019491472868217056, |
|
"loss": 0.7702, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.02874806800618238, |
|
"grad_norm": 0.11627457290887833, |
|
"learning_rate": 0.00019485271317829457, |
|
"loss": 0.8944, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.029057187017001544, |
|
"grad_norm": 0.12913382053375244, |
|
"learning_rate": 0.00019479069767441861, |
|
"loss": 0.8054, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.02936630602782071, |
|
"grad_norm": 0.14983727037906647, |
|
"learning_rate": 0.00019472868217054265, |
|
"loss": 0.7897, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.029675425038639878, |
|
"grad_norm": 0.1396576315164566, |
|
"learning_rate": 0.0001946666666666667, |
|
"loss": 0.7595, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.029984544049459043, |
|
"grad_norm": 0.1093367412686348, |
|
"learning_rate": 0.0001946046511627907, |
|
"loss": 0.878, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.030293663060278208, |
|
"grad_norm": 0.14023703336715698, |
|
"learning_rate": 0.00019454263565891474, |
|
"loss": 0.8051, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.030602782071097373, |
|
"grad_norm": 0.11650537699460983, |
|
"learning_rate": 0.00019448062015503875, |
|
"loss": 0.7733, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.030911901081916538, |
|
"grad_norm": 0.13736988604068756, |
|
"learning_rate": 0.0001944186046511628, |
|
"loss": 0.8414, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.031221020092735703, |
|
"grad_norm": 0.13810019195079803, |
|
"learning_rate": 0.00019435658914728683, |
|
"loss": 0.9192, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.03153013910355487, |
|
"grad_norm": 0.16648177802562714, |
|
"learning_rate": 0.00019429457364341087, |
|
"loss": 0.8692, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.03183925811437403, |
|
"grad_norm": 0.16531941294670105, |
|
"learning_rate": 0.00019423255813953488, |
|
"loss": 0.8585, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0321483771251932, |
|
"grad_norm": 0.12364251166582108, |
|
"learning_rate": 0.00019417054263565892, |
|
"loss": 0.7652, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.03245749613601236, |
|
"grad_norm": 0.139155313372612, |
|
"learning_rate": 0.00019410852713178296, |
|
"loss": 0.7448, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03276661514683153, |
|
"grad_norm": 0.11827906966209412, |
|
"learning_rate": 0.000194046511627907, |
|
"loss": 0.9182, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.033075734157650694, |
|
"grad_norm": 0.1247883066534996, |
|
"learning_rate": 0.00019398449612403104, |
|
"loss": 0.8138, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.03338485316846986, |
|
"grad_norm": 0.12576410174369812, |
|
"learning_rate": 0.00019392248062015505, |
|
"loss": 0.803, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.033693972179289024, |
|
"grad_norm": 0.12698566913604736, |
|
"learning_rate": 0.00019386046511627909, |
|
"loss": 0.8579, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.03400309119010819, |
|
"grad_norm": 0.10796654969453812, |
|
"learning_rate": 0.0001937984496124031, |
|
"loss": 0.812, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.034312210200927354, |
|
"grad_norm": 0.12361832708120346, |
|
"learning_rate": 0.00019373643410852714, |
|
"loss": 0.8606, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.03462132921174652, |
|
"grad_norm": 0.12853065133094788, |
|
"learning_rate": 0.00019367441860465118, |
|
"loss": 0.9208, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.03493044822256569, |
|
"grad_norm": 0.119226835668087, |
|
"learning_rate": 0.0001936124031007752, |
|
"loss": 0.7306, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.03523956723338485, |
|
"grad_norm": 0.12476561963558197, |
|
"learning_rate": 0.00019355038759689923, |
|
"loss": 0.8046, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.03554868624420402, |
|
"grad_norm": 0.11674510687589645, |
|
"learning_rate": 0.00019348837209302326, |
|
"loss": 0.8214, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03585780525502318, |
|
"grad_norm": 0.1358969360589981, |
|
"learning_rate": 0.0001934263565891473, |
|
"loss": 0.7432, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.03616692426584235, |
|
"grad_norm": 0.1318214237689972, |
|
"learning_rate": 0.00019336434108527132, |
|
"loss": 0.9497, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.03647604327666151, |
|
"grad_norm": 0.1427808552980423, |
|
"learning_rate": 0.00019330232558139535, |
|
"loss": 0.8012, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.03678516228748068, |
|
"grad_norm": 0.14754672348499298, |
|
"learning_rate": 0.0001932403100775194, |
|
"loss": 0.8112, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.03709428129829984, |
|
"grad_norm": 0.13510632514953613, |
|
"learning_rate": 0.00019317829457364343, |
|
"loss": 0.8607, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03740340030911901, |
|
"grad_norm": 0.11763066798448563, |
|
"learning_rate": 0.00019311627906976747, |
|
"loss": 0.8349, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.03771251931993817, |
|
"grad_norm": 0.13032180070877075, |
|
"learning_rate": 0.00019305426356589148, |
|
"loss": 0.8847, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.03802163833075734, |
|
"grad_norm": 0.11119523644447327, |
|
"learning_rate": 0.00019299224806201552, |
|
"loss": 0.8302, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.03833075734157651, |
|
"grad_norm": 0.1144416555762291, |
|
"learning_rate": 0.00019293023255813953, |
|
"loss": 0.7984, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.03863987635239567, |
|
"grad_norm": 0.12676909565925598, |
|
"learning_rate": 0.00019286821705426357, |
|
"loss": 0.8186, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03894899536321484, |
|
"grad_norm": 0.10949283838272095, |
|
"learning_rate": 0.0001928062015503876, |
|
"loss": 0.7339, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.039258114374034, |
|
"grad_norm": 0.11983365565538406, |
|
"learning_rate": 0.00019274418604651162, |
|
"loss": 0.7616, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.03956723338485317, |
|
"grad_norm": 0.1309802085161209, |
|
"learning_rate": 0.00019268217054263566, |
|
"loss": 0.7917, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.03987635239567233, |
|
"grad_norm": 0.1349460780620575, |
|
"learning_rate": 0.0001926201550387597, |
|
"loss": 0.8753, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0401854714064915, |
|
"grad_norm": 0.12506724894046783, |
|
"learning_rate": 0.00019255813953488374, |
|
"loss": 0.7819, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04049459041731066, |
|
"grad_norm": 0.13243618607521057, |
|
"learning_rate": 0.00019249612403100778, |
|
"loss": 0.8049, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.04080370942812983, |
|
"grad_norm": 0.14795252680778503, |
|
"learning_rate": 0.0001924341085271318, |
|
"loss": 0.7969, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.04111282843894899, |
|
"grad_norm": 0.15747897326946259, |
|
"learning_rate": 0.00019237209302325583, |
|
"loss": 0.8746, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.04142194744976816, |
|
"grad_norm": 0.15109744668006897, |
|
"learning_rate": 0.00019231007751937987, |
|
"loss": 0.8562, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.04173106646058733, |
|
"grad_norm": 0.13535654544830322, |
|
"learning_rate": 0.0001922480620155039, |
|
"loss": 0.8928, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04204018547140649, |
|
"grad_norm": 0.1262591928243637, |
|
"learning_rate": 0.00019218604651162792, |
|
"loss": 0.8302, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.04234930448222566, |
|
"grad_norm": 0.11443354189395905, |
|
"learning_rate": 0.00019212403100775193, |
|
"loss": 0.8672, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.04265842349304482, |
|
"grad_norm": 0.11836638301610947, |
|
"learning_rate": 0.00019206201550387597, |
|
"loss": 0.7593, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.04296754250386399, |
|
"grad_norm": 0.12662746012210846, |
|
"learning_rate": 0.000192, |
|
"loss": 0.7531, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.04327666151468315, |
|
"grad_norm": 0.12387800961732864, |
|
"learning_rate": 0.00019193798449612404, |
|
"loss": 0.7422, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04358578052550232, |
|
"grad_norm": 0.12786395847797394, |
|
"learning_rate": 0.00019187596899224806, |
|
"loss": 0.744, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.04389489953632148, |
|
"grad_norm": 0.12761859595775604, |
|
"learning_rate": 0.0001918139534883721, |
|
"loss": 0.8977, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.04420401854714065, |
|
"grad_norm": 0.10713964700698853, |
|
"learning_rate": 0.00019175193798449613, |
|
"loss": 0.7677, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.04451313755795981, |
|
"grad_norm": 0.13007132709026337, |
|
"learning_rate": 0.00019168992248062017, |
|
"loss": 0.7516, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.04482225656877898, |
|
"grad_norm": 0.12673480808734894, |
|
"learning_rate": 0.0001916279069767442, |
|
"loss": 0.8469, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04513137557959815, |
|
"grad_norm": 0.14040741324424744, |
|
"learning_rate": 0.00019156589147286822, |
|
"loss": 0.8119, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.04544049459041731, |
|
"grad_norm": 0.1404358148574829, |
|
"learning_rate": 0.00019150387596899226, |
|
"loss": 0.8062, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.04574961360123648, |
|
"grad_norm": 0.140091672539711, |
|
"learning_rate": 0.0001914418604651163, |
|
"loss": 0.9796, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.04605873261205564, |
|
"grad_norm": 0.12712246179580688, |
|
"learning_rate": 0.00019137984496124034, |
|
"loss": 0.7021, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.04636785162287481, |
|
"grad_norm": 0.1542489379644394, |
|
"learning_rate": 0.00019131782945736435, |
|
"loss": 0.7141, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04667697063369397, |
|
"grad_norm": 0.1310671865940094, |
|
"learning_rate": 0.00019125581395348836, |
|
"loss": 0.7513, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.04698608964451314, |
|
"grad_norm": 0.1205151230096817, |
|
"learning_rate": 0.0001911937984496124, |
|
"loss": 0.8243, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.0472952086553323, |
|
"grad_norm": 0.13522934913635254, |
|
"learning_rate": 0.00019113178294573644, |
|
"loss": 0.9441, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.04760432766615147, |
|
"grad_norm": 0.11995132267475128, |
|
"learning_rate": 0.00019106976744186048, |
|
"loss": 0.7032, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.04791344667697063, |
|
"grad_norm": 0.11310404539108276, |
|
"learning_rate": 0.0001910077519379845, |
|
"loss": 0.6913, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0482225656877898, |
|
"grad_norm": 0.11462230980396271, |
|
"learning_rate": 0.00019094573643410853, |
|
"loss": 0.8314, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.04853168469860897, |
|
"grad_norm": 0.12306851893663406, |
|
"learning_rate": 0.00019088372093023257, |
|
"loss": 0.9122, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.04884080370942813, |
|
"grad_norm": 0.09559505432844162, |
|
"learning_rate": 0.0001908217054263566, |
|
"loss": 0.8177, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.0491499227202473, |
|
"grad_norm": 0.11616392433643341, |
|
"learning_rate": 0.00019075968992248064, |
|
"loss": 0.9223, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.04945904173106646, |
|
"grad_norm": 0.12350696325302124, |
|
"learning_rate": 0.00019069767441860466, |
|
"loss": 0.9004, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04976816074188563, |
|
"grad_norm": 0.12747159600257874, |
|
"learning_rate": 0.0001906356589147287, |
|
"loss": 0.9126, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.05007727975270479, |
|
"grad_norm": 0.12991321086883545, |
|
"learning_rate": 0.00019057364341085273, |
|
"loss": 0.9113, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.05038639876352396, |
|
"grad_norm": 0.11218614876270294, |
|
"learning_rate": 0.00019051162790697677, |
|
"loss": 0.7117, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.05069551777434312, |
|
"grad_norm": 0.13032029569149017, |
|
"learning_rate": 0.00019044961240310078, |
|
"loss": 0.841, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.05100463678516229, |
|
"grad_norm": 0.1347358375787735, |
|
"learning_rate": 0.0001903875968992248, |
|
"loss": 0.8103, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.05131375579598145, |
|
"grad_norm": 0.11914915591478348, |
|
"learning_rate": 0.00019032558139534883, |
|
"loss": 0.8913, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.05162287480680062, |
|
"grad_norm": 0.15790300071239471, |
|
"learning_rate": 0.00019026356589147287, |
|
"loss": 0.801, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.05193199381761979, |
|
"grad_norm": 0.15204893052577972, |
|
"learning_rate": 0.0001902015503875969, |
|
"loss": 0.8085, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.05224111282843895, |
|
"grad_norm": 0.11781688779592514, |
|
"learning_rate": 0.00019013953488372095, |
|
"loss": 0.9159, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.05255023183925812, |
|
"grad_norm": 0.12103480845689774, |
|
"learning_rate": 0.00019007751937984496, |
|
"loss": 0.8221, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05285935085007728, |
|
"grad_norm": 0.12477768957614899, |
|
"learning_rate": 0.000190015503875969, |
|
"loss": 0.7622, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.05316846986089645, |
|
"grad_norm": 0.11186879873275757, |
|
"learning_rate": 0.00018995348837209304, |
|
"loss": 0.7959, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.05347758887171561, |
|
"grad_norm": 0.12586474418640137, |
|
"learning_rate": 0.00018989147286821708, |
|
"loss": 0.6735, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.05378670788253478, |
|
"grad_norm": 0.11750543862581253, |
|
"learning_rate": 0.0001898294573643411, |
|
"loss": 0.8302, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.05409582689335394, |
|
"grad_norm": 0.1301109939813614, |
|
"learning_rate": 0.00018976744186046513, |
|
"loss": 0.9917, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05440494590417311, |
|
"grad_norm": 0.1335320919752121, |
|
"learning_rate": 0.00018970542635658917, |
|
"loss": 0.8256, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.05471406491499227, |
|
"grad_norm": 0.12373456358909607, |
|
"learning_rate": 0.00018964341085271318, |
|
"loss": 0.8458, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.05502318392581144, |
|
"grad_norm": 0.1291348785161972, |
|
"learning_rate": 0.00018958139534883722, |
|
"loss": 0.7675, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0553323029366306, |
|
"grad_norm": 0.12421860545873642, |
|
"learning_rate": 0.00018951937984496123, |
|
"loss": 0.8195, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.05564142194744977, |
|
"grad_norm": 0.1433798372745514, |
|
"learning_rate": 0.00018945736434108527, |
|
"loss": 0.7938, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.055950540958268936, |
|
"grad_norm": 0.15060195326805115, |
|
"learning_rate": 0.0001893953488372093, |
|
"loss": 0.7178, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.0562596599690881, |
|
"grad_norm": 0.13103605806827545, |
|
"learning_rate": 0.00018933333333333335, |
|
"loss": 0.8432, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.05656877897990727, |
|
"grad_norm": 0.1537558138370514, |
|
"learning_rate": 0.00018927131782945738, |
|
"loss": 0.81, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.05687789799072643, |
|
"grad_norm": 0.12011228501796722, |
|
"learning_rate": 0.0001892093023255814, |
|
"loss": 0.8906, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0571870170015456, |
|
"grad_norm": 0.11913521587848663, |
|
"learning_rate": 0.00018914728682170543, |
|
"loss": 0.7819, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05749613601236476, |
|
"grad_norm": 0.13771173357963562, |
|
"learning_rate": 0.00018908527131782947, |
|
"loss": 0.7775, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.05780525502318393, |
|
"grad_norm": 0.11831659823656082, |
|
"learning_rate": 0.0001890232558139535, |
|
"loss": 0.7461, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.05811437403400309, |
|
"grad_norm": 0.11049254238605499, |
|
"learning_rate": 0.00018896124031007752, |
|
"loss": 0.7784, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.05842349304482226, |
|
"grad_norm": 0.10577117651700974, |
|
"learning_rate": 0.00018889922480620156, |
|
"loss": 0.7211, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.05873261205564142, |
|
"grad_norm": 0.13082446157932281, |
|
"learning_rate": 0.00018883720930232557, |
|
"loss": 0.6854, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05904173106646059, |
|
"grad_norm": 0.11105687916278839, |
|
"learning_rate": 0.0001887751937984496, |
|
"loss": 0.8418, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.059350850077279756, |
|
"grad_norm": 0.1412641704082489, |
|
"learning_rate": 0.00018871317829457365, |
|
"loss": 0.8293, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.05965996908809892, |
|
"grad_norm": 0.12148229032754898, |
|
"learning_rate": 0.0001886511627906977, |
|
"loss": 0.856, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.059969088098918086, |
|
"grad_norm": 0.12246838212013245, |
|
"learning_rate": 0.0001885891472868217, |
|
"loss": 0.9042, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.06027820710973725, |
|
"grad_norm": 0.12810048460960388, |
|
"learning_rate": 0.00018852713178294574, |
|
"loss": 0.7636, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.060587326120556416, |
|
"grad_norm": 0.12897038459777832, |
|
"learning_rate": 0.00018846511627906978, |
|
"loss": 0.8182, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.06089644513137558, |
|
"grad_norm": 0.13533450663089752, |
|
"learning_rate": 0.00018840310077519382, |
|
"loss": 0.7578, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.061205564142194746, |
|
"grad_norm": 0.13347265124320984, |
|
"learning_rate": 0.00018834108527131783, |
|
"loss": 0.8414, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.06151468315301391, |
|
"grad_norm": 0.14512066543102264, |
|
"learning_rate": 0.00018827906976744187, |
|
"loss": 0.8698, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.061823802163833076, |
|
"grad_norm": 0.11501649022102356, |
|
"learning_rate": 0.0001882170542635659, |
|
"loss": 0.8591, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06213292117465224, |
|
"grad_norm": 0.11760124564170837, |
|
"learning_rate": 0.00018815503875968995, |
|
"loss": 0.6859, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.062442040185471406, |
|
"grad_norm": 0.13676373660564423, |
|
"learning_rate": 0.00018809302325581399, |
|
"loss": 0.7216, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.06275115919629057, |
|
"grad_norm": 0.11492311954498291, |
|
"learning_rate": 0.000188031007751938, |
|
"loss": 0.8223, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.06306027820710974, |
|
"grad_norm": 0.11568205058574677, |
|
"learning_rate": 0.000187968992248062, |
|
"loss": 0.8193, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.0633693972179289, |
|
"grad_norm": 0.12526321411132812, |
|
"learning_rate": 0.00018790697674418605, |
|
"loss": 0.8803, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.06367851622874807, |
|
"grad_norm": 0.12961214780807495, |
|
"learning_rate": 0.00018784496124031009, |
|
"loss": 0.8083, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.06398763523956723, |
|
"grad_norm": 0.11950293183326721, |
|
"learning_rate": 0.00018778294573643412, |
|
"loss": 0.8049, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.0642967542503864, |
|
"grad_norm": 0.11256164312362671, |
|
"learning_rate": 0.00018772093023255814, |
|
"loss": 0.7365, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.06460587326120557, |
|
"grad_norm": 0.13182170689105988, |
|
"learning_rate": 0.00018765891472868217, |
|
"loss": 0.7031, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.06491499227202473, |
|
"grad_norm": 0.1193682923913002, |
|
"learning_rate": 0.00018759689922480621, |
|
"loss": 0.8785, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06522411128284389, |
|
"grad_norm": 0.13558265566825867, |
|
"learning_rate": 0.00018753488372093025, |
|
"loss": 0.8275, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.06553323029366306, |
|
"grad_norm": 0.12028771638870239, |
|
"learning_rate": 0.00018747286821705426, |
|
"loss": 0.8436, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.06584234930448223, |
|
"grad_norm": 0.12355880439281464, |
|
"learning_rate": 0.0001874108527131783, |
|
"loss": 0.687, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.06615146831530139, |
|
"grad_norm": 0.11852920055389404, |
|
"learning_rate": 0.00018734883720930234, |
|
"loss": 0.8268, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.06646058732612056, |
|
"grad_norm": 0.1262328028678894, |
|
"learning_rate": 0.00018728682170542638, |
|
"loss": 0.841, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06676970633693972, |
|
"grad_norm": 0.13128647208213806, |
|
"learning_rate": 0.00018722480620155042, |
|
"loss": 0.7472, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.06707882534775889, |
|
"grad_norm": 0.12075427919626236, |
|
"learning_rate": 0.00018716279069767443, |
|
"loss": 0.8483, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.06738794435857805, |
|
"grad_norm": 0.11870454251766205, |
|
"learning_rate": 0.00018710077519379844, |
|
"loss": 0.8179, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.06769706336939722, |
|
"grad_norm": 0.12058960646390915, |
|
"learning_rate": 0.00018703875968992248, |
|
"loss": 0.8403, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.06800618238021638, |
|
"grad_norm": 0.13978858292102814, |
|
"learning_rate": 0.00018697674418604652, |
|
"loss": 0.6639, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06831530139103555, |
|
"grad_norm": 0.10775326192378998, |
|
"learning_rate": 0.00018691472868217056, |
|
"loss": 0.8841, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.06862442040185471, |
|
"grad_norm": 0.10687053948640823, |
|
"learning_rate": 0.00018685271317829457, |
|
"loss": 0.7715, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.06893353941267388, |
|
"grad_norm": 0.12222916632890701, |
|
"learning_rate": 0.0001867906976744186, |
|
"loss": 0.7728, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.06924265842349304, |
|
"grad_norm": 0.13918592035770416, |
|
"learning_rate": 0.00018672868217054265, |
|
"loss": 0.8792, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.0695517774343122, |
|
"grad_norm": 0.11157078295946121, |
|
"learning_rate": 0.0001866666666666667, |
|
"loss": 0.8906, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06986089644513138, |
|
"grad_norm": 0.12403914332389832, |
|
"learning_rate": 0.0001866046511627907, |
|
"loss": 0.9234, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.07017001545595054, |
|
"grad_norm": 0.11490818858146667, |
|
"learning_rate": 0.00018654263565891474, |
|
"loss": 0.8447, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.0704791344667697, |
|
"grad_norm": 0.13033214211463928, |
|
"learning_rate": 0.00018648062015503878, |
|
"loss": 0.7801, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.07078825347758887, |
|
"grad_norm": 0.1061464175581932, |
|
"learning_rate": 0.00018641860465116281, |
|
"loss": 0.6313, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.07109737248840804, |
|
"grad_norm": 0.12007651478052139, |
|
"learning_rate": 0.00018635658914728683, |
|
"loss": 0.7526, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0714064914992272, |
|
"grad_norm": 0.12450309842824936, |
|
"learning_rate": 0.00018629457364341086, |
|
"loss": 0.8403, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.07171561051004637, |
|
"grad_norm": 0.16374681890010834, |
|
"learning_rate": 0.00018623255813953488, |
|
"loss": 0.7729, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.07202472952086553, |
|
"grad_norm": 0.13087786734104156, |
|
"learning_rate": 0.00018617054263565892, |
|
"loss": 0.8882, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.0723338485316847, |
|
"grad_norm": 0.13743267953395844, |
|
"learning_rate": 0.00018610852713178295, |
|
"loss": 0.8316, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.07264296754250386, |
|
"grad_norm": 0.1110304743051529, |
|
"learning_rate": 0.000186046511627907, |
|
"loss": 0.773, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07295208655332303, |
|
"grad_norm": 0.12651820480823517, |
|
"learning_rate": 0.000185984496124031, |
|
"loss": 0.9538, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.0732612055641422, |
|
"grad_norm": 0.11768705397844315, |
|
"learning_rate": 0.00018592248062015504, |
|
"loss": 0.7585, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.07357032457496136, |
|
"grad_norm": 0.1165948212146759, |
|
"learning_rate": 0.00018586046511627908, |
|
"loss": 0.868, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.07387944358578052, |
|
"grad_norm": 0.12231750786304474, |
|
"learning_rate": 0.00018579844961240312, |
|
"loss": 0.8445, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.07418856259659969, |
|
"grad_norm": 0.13796208798885345, |
|
"learning_rate": 0.00018573643410852716, |
|
"loss": 0.8391, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07449768160741886, |
|
"grad_norm": 0.1166827604174614, |
|
"learning_rate": 0.00018567441860465117, |
|
"loss": 0.7763, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.07480680061823802, |
|
"grad_norm": 0.12125882506370544, |
|
"learning_rate": 0.0001856124031007752, |
|
"loss": 0.7467, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.07511591962905718, |
|
"grad_norm": 0.13202430307865143, |
|
"learning_rate": 0.00018555038759689925, |
|
"loss": 0.8348, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.07542503863987635, |
|
"grad_norm": 0.13780809938907623, |
|
"learning_rate": 0.00018548837209302326, |
|
"loss": 0.7159, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.07573415765069552, |
|
"grad_norm": 0.163734570145607, |
|
"learning_rate": 0.0001854263565891473, |
|
"loss": 0.8078, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.07604327666151468, |
|
"grad_norm": 0.15040288865566254, |
|
"learning_rate": 0.0001853643410852713, |
|
"loss": 0.7911, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.07635239567233384, |
|
"grad_norm": 0.13316433131694794, |
|
"learning_rate": 0.00018530232558139535, |
|
"loss": 0.8217, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.07666151468315302, |
|
"grad_norm": 0.14527438580989838, |
|
"learning_rate": 0.0001852403100775194, |
|
"loss": 0.8287, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.07697063369397218, |
|
"grad_norm": 0.11744588613510132, |
|
"learning_rate": 0.00018517829457364343, |
|
"loss": 0.8774, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.07727975270479134, |
|
"grad_norm": 0.15297925472259521, |
|
"learning_rate": 0.00018511627906976744, |
|
"loss": 0.8035, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0775888717156105, |
|
"grad_norm": 0.12520894408226013, |
|
"learning_rate": 0.00018505426356589148, |
|
"loss": 0.812, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.07789799072642968, |
|
"grad_norm": 0.12046486139297485, |
|
"learning_rate": 0.00018499224806201552, |
|
"loss": 0.7703, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.07820710973724884, |
|
"grad_norm": 0.10196825861930847, |
|
"learning_rate": 0.00018493023255813955, |
|
"loss": 0.7299, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.078516228748068, |
|
"grad_norm": 0.12353216111660004, |
|
"learning_rate": 0.0001848682170542636, |
|
"loss": 0.6998, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.07882534775888717, |
|
"grad_norm": 0.10435248166322708, |
|
"learning_rate": 0.0001848062015503876, |
|
"loss": 0.8073, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.07913446676970634, |
|
"grad_norm": 0.1290121078491211, |
|
"learning_rate": 0.00018474418604651164, |
|
"loss": 0.7954, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.0794435857805255, |
|
"grad_norm": 0.12450750917196274, |
|
"learning_rate": 0.00018468217054263566, |
|
"loss": 0.9049, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.07975270479134466, |
|
"grad_norm": 0.1351582258939743, |
|
"learning_rate": 0.0001846201550387597, |
|
"loss": 0.7842, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.08006182380216384, |
|
"grad_norm": 0.13335275650024414, |
|
"learning_rate": 0.00018455813953488373, |
|
"loss": 0.7771, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.080370942812983, |
|
"grad_norm": 0.10518497973680496, |
|
"learning_rate": 0.00018449612403100774, |
|
"loss": 0.7927, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08068006182380216, |
|
"grad_norm": 0.11359915882349014, |
|
"learning_rate": 0.00018443410852713178, |
|
"loss": 0.8461, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.08098918083462132, |
|
"grad_norm": 0.12962335348129272, |
|
"learning_rate": 0.00018437209302325582, |
|
"loss": 0.8299, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.0812982998454405, |
|
"grad_norm": 0.1394529491662979, |
|
"learning_rate": 0.00018431007751937986, |
|
"loss": 0.8031, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.08160741885625966, |
|
"grad_norm": 0.11067520827054977, |
|
"learning_rate": 0.0001842480620155039, |
|
"loss": 0.8019, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.08191653786707882, |
|
"grad_norm": 0.14076265692710876, |
|
"learning_rate": 0.0001841860465116279, |
|
"loss": 0.8521, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08222565687789798, |
|
"grad_norm": 0.14540016651153564, |
|
"learning_rate": 0.00018412403100775195, |
|
"loss": 0.8124, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.08253477588871716, |
|
"grad_norm": 0.14692644774913788, |
|
"learning_rate": 0.000184062015503876, |
|
"loss": 0.8629, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.08284389489953632, |
|
"grad_norm": 0.12723390758037567, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 0.7565, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.08315301391035548, |
|
"grad_norm": 0.13681192696094513, |
|
"learning_rate": 0.00018393798449612404, |
|
"loss": 0.8458, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.08346213292117466, |
|
"grad_norm": 0.1476822942495346, |
|
"learning_rate": 0.00018387596899224805, |
|
"loss": 0.7544, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08377125193199382, |
|
"grad_norm": 0.11408766359090805, |
|
"learning_rate": 0.0001838139534883721, |
|
"loss": 0.8782, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.08408037094281298, |
|
"grad_norm": 0.14710593223571777, |
|
"learning_rate": 0.00018375193798449613, |
|
"loss": 0.7568, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.08438948995363214, |
|
"grad_norm": 0.12558385729789734, |
|
"learning_rate": 0.00018368992248062017, |
|
"loss": 0.8574, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.08469860896445132, |
|
"grad_norm": 0.11890698224306107, |
|
"learning_rate": 0.00018362790697674418, |
|
"loss": 0.6688, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.08500772797527048, |
|
"grad_norm": 0.10440011322498322, |
|
"learning_rate": 0.00018356589147286822, |
|
"loss": 0.7699, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08531684698608964, |
|
"grad_norm": 0.13725546002388, |
|
"learning_rate": 0.00018350387596899226, |
|
"loss": 0.8359, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.0856259659969088, |
|
"grad_norm": 0.12335329502820969, |
|
"learning_rate": 0.0001834418604651163, |
|
"loss": 0.7212, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.08593508500772798, |
|
"grad_norm": 0.1138865053653717, |
|
"learning_rate": 0.00018337984496124033, |
|
"loss": 0.6597, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.08624420401854714, |
|
"grad_norm": 0.11864970624446869, |
|
"learning_rate": 0.00018331782945736435, |
|
"loss": 0.7944, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.0865533230293663, |
|
"grad_norm": 0.14360670745372772, |
|
"learning_rate": 0.00018325581395348838, |
|
"loss": 0.8186, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08686244204018548, |
|
"grad_norm": 0.13418716192245483, |
|
"learning_rate": 0.00018319379844961242, |
|
"loss": 0.8478, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.08717156105100464, |
|
"grad_norm": 0.13283377885818481, |
|
"learning_rate": 0.00018313178294573646, |
|
"loss": 0.8309, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.0874806800618238, |
|
"grad_norm": 0.11697278916835785, |
|
"learning_rate": 0.00018306976744186047, |
|
"loss": 0.8521, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.08778979907264296, |
|
"grad_norm": 0.11819571256637573, |
|
"learning_rate": 0.00018300775193798448, |
|
"loss": 0.6976, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.08809891808346214, |
|
"grad_norm": 0.11848420649766922, |
|
"learning_rate": 0.00018294573643410852, |
|
"loss": 0.8549, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.0884080370942813, |
|
"grad_norm": 0.10397352278232574, |
|
"learning_rate": 0.00018288372093023256, |
|
"loss": 0.6554, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.08871715610510046, |
|
"grad_norm": 0.15076309442520142, |
|
"learning_rate": 0.0001828217054263566, |
|
"loss": 0.6948, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.08902627511591962, |
|
"grad_norm": 0.13722991943359375, |
|
"learning_rate": 0.0001827596899224806, |
|
"loss": 0.8466, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.0893353941267388, |
|
"grad_norm": 0.11547433584928513, |
|
"learning_rate": 0.00018269767441860465, |
|
"loss": 0.7359, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.08964451313755796, |
|
"grad_norm": 0.12837247550487518, |
|
"learning_rate": 0.0001826356589147287, |
|
"loss": 0.8533, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08995363214837712, |
|
"grad_norm": 0.11957511305809021, |
|
"learning_rate": 0.00018257364341085273, |
|
"loss": 0.6918, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.0902627511591963, |
|
"grad_norm": 0.11487089097499847, |
|
"learning_rate": 0.00018251162790697677, |
|
"loss": 0.8411, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.09057187017001546, |
|
"grad_norm": 0.12621980905532837, |
|
"learning_rate": 0.00018244961240310078, |
|
"loss": 0.7913, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.09088098918083462, |
|
"grad_norm": 0.14285391569137573, |
|
"learning_rate": 0.00018238759689922482, |
|
"loss": 0.9191, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.09119010819165378, |
|
"grad_norm": 0.14195428788661957, |
|
"learning_rate": 0.00018232558139534886, |
|
"loss": 0.8699, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.09149922720247296, |
|
"grad_norm": 0.12400256842374802, |
|
"learning_rate": 0.0001822635658914729, |
|
"loss": 0.7707, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.09180834621329212, |
|
"grad_norm": 0.1220916360616684, |
|
"learning_rate": 0.0001822015503875969, |
|
"loss": 0.7948, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.09211746522411128, |
|
"grad_norm": 0.11888230592012405, |
|
"learning_rate": 0.00018213953488372092, |
|
"loss": 0.8237, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.09242658423493044, |
|
"grad_norm": 0.134236678481102, |
|
"learning_rate": 0.00018207751937984496, |
|
"loss": 0.7503, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.09273570324574962, |
|
"grad_norm": 0.09614330530166626, |
|
"learning_rate": 0.000182015503875969, |
|
"loss": 0.772, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09304482225656878, |
|
"grad_norm": 0.11686000227928162, |
|
"learning_rate": 0.00018195348837209303, |
|
"loss": 0.8648, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.09335394126738794, |
|
"grad_norm": 0.11321427673101425, |
|
"learning_rate": 0.00018189147286821707, |
|
"loss": 0.8302, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.09366306027820712, |
|
"grad_norm": 0.12898504734039307, |
|
"learning_rate": 0.00018182945736434109, |
|
"loss": 0.8, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.09397217928902628, |
|
"grad_norm": 0.10747554153203964, |
|
"learning_rate": 0.00018176744186046512, |
|
"loss": 0.7696, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.09428129829984544, |
|
"grad_norm": 0.12886860966682434, |
|
"learning_rate": 0.00018170542635658916, |
|
"loss": 0.8459, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.0945904173106646, |
|
"grad_norm": 0.12835724651813507, |
|
"learning_rate": 0.0001816434108527132, |
|
"loss": 0.8523, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.09489953632148378, |
|
"grad_norm": 0.11970589309930801, |
|
"learning_rate": 0.0001815813953488372, |
|
"loss": 0.7864, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.09520865533230294, |
|
"grad_norm": 0.12258201837539673, |
|
"learning_rate": 0.00018151937984496125, |
|
"loss": 0.7593, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.0955177743431221, |
|
"grad_norm": 0.1291266828775406, |
|
"learning_rate": 0.0001814573643410853, |
|
"loss": 0.925, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.09582689335394126, |
|
"grad_norm": 0.12266039103269577, |
|
"learning_rate": 0.0001813953488372093, |
|
"loss": 0.7341, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09613601236476044, |
|
"grad_norm": 0.10808485746383667, |
|
"learning_rate": 0.00018133333333333334, |
|
"loss": 0.8983, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.0964451313755796, |
|
"grad_norm": 0.1303120255470276, |
|
"learning_rate": 0.00018127131782945735, |
|
"loss": 0.9788, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.09675425038639876, |
|
"grad_norm": 0.1282745897769928, |
|
"learning_rate": 0.0001812093023255814, |
|
"loss": 0.6776, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.09706336939721794, |
|
"grad_norm": 0.10674197226762772, |
|
"learning_rate": 0.00018114728682170543, |
|
"loss": 0.819, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.0973724884080371, |
|
"grad_norm": 0.10732909291982651, |
|
"learning_rate": 0.00018108527131782947, |
|
"loss": 0.8426, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.09768160741885626, |
|
"grad_norm": 0.14202672243118286, |
|
"learning_rate": 0.0001810232558139535, |
|
"loss": 0.8447, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.09799072642967542, |
|
"grad_norm": 0.12257728725671768, |
|
"learning_rate": 0.00018096124031007752, |
|
"loss": 0.8032, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.0982998454404946, |
|
"grad_norm": 0.11397712677717209, |
|
"learning_rate": 0.00018089922480620156, |
|
"loss": 0.7667, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.09860896445131376, |
|
"grad_norm": 0.11759169399738312, |
|
"learning_rate": 0.0001808372093023256, |
|
"loss": 0.775, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.09891808346213292, |
|
"grad_norm": 0.10919482260942459, |
|
"learning_rate": 0.00018077519379844964, |
|
"loss": 0.6987, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09922720247295208, |
|
"grad_norm": 0.14136123657226562, |
|
"learning_rate": 0.00018071317829457365, |
|
"loss": 0.7642, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.09953632148377126, |
|
"grad_norm": 0.12550586462020874, |
|
"learning_rate": 0.00018065116279069769, |
|
"loss": 0.9144, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.09984544049459042, |
|
"grad_norm": 0.1267971247434616, |
|
"learning_rate": 0.00018058914728682172, |
|
"loss": 0.8512, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.10015455950540958, |
|
"grad_norm": 0.12473420053720474, |
|
"learning_rate": 0.00018052713178294574, |
|
"loss": 0.9391, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.10046367851622875, |
|
"grad_norm": 0.11510586738586426, |
|
"learning_rate": 0.00018046511627906977, |
|
"loss": 0.7916, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.10077279752704792, |
|
"grad_norm": 0.13380743563175201, |
|
"learning_rate": 0.00018040310077519381, |
|
"loss": 0.819, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.10108191653786708, |
|
"grad_norm": 0.1224348247051239, |
|
"learning_rate": 0.00018034108527131783, |
|
"loss": 0.778, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.10139103554868624, |
|
"grad_norm": 0.11977488547563553, |
|
"learning_rate": 0.00018027906976744186, |
|
"loss": 0.8521, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.10170015455950542, |
|
"grad_norm": 0.11883991211652756, |
|
"learning_rate": 0.0001802170542635659, |
|
"loss": 0.8959, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.10200927357032458, |
|
"grad_norm": 0.13148127496242523, |
|
"learning_rate": 0.00018015503875968994, |
|
"loss": 0.7503, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10231839258114374, |
|
"grad_norm": 0.12128669023513794, |
|
"learning_rate": 0.00018009302325581395, |
|
"loss": 0.7469, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.1026275115919629, |
|
"grad_norm": 0.12330310046672821, |
|
"learning_rate": 0.000180031007751938, |
|
"loss": 0.83, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.10293663060278208, |
|
"grad_norm": 0.10930616408586502, |
|
"learning_rate": 0.00017996899224806203, |
|
"loss": 0.7841, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.10324574961360124, |
|
"grad_norm": 0.12586379051208496, |
|
"learning_rate": 0.00017990697674418607, |
|
"loss": 0.729, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.1035548686244204, |
|
"grad_norm": 0.11840980499982834, |
|
"learning_rate": 0.0001798449612403101, |
|
"loss": 0.7329, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.10386398763523957, |
|
"grad_norm": 0.11878569424152374, |
|
"learning_rate": 0.00017978294573643412, |
|
"loss": 0.8202, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.10417310664605874, |
|
"grad_norm": 0.1265515387058258, |
|
"learning_rate": 0.00017972093023255813, |
|
"loss": 0.6638, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.1044822256568779, |
|
"grad_norm": 0.1272660791873932, |
|
"learning_rate": 0.00017965891472868217, |
|
"loss": 0.8512, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.10479134466769706, |
|
"grad_norm": 0.11359579861164093, |
|
"learning_rate": 0.0001795968992248062, |
|
"loss": 0.8199, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.10510046367851623, |
|
"grad_norm": 0.11645165085792542, |
|
"learning_rate": 0.00017953488372093025, |
|
"loss": 0.9163, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1054095826893354, |
|
"grad_norm": 0.11384947597980499, |
|
"learning_rate": 0.00017947286821705426, |
|
"loss": 0.7825, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.10571870170015456, |
|
"grad_norm": 0.11389808356761932, |
|
"learning_rate": 0.0001794108527131783, |
|
"loss": 0.8078, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.10602782071097372, |
|
"grad_norm": 0.12317777425050735, |
|
"learning_rate": 0.00017934883720930234, |
|
"loss": 0.9068, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.1063369397217929, |
|
"grad_norm": 0.10351788252592087, |
|
"learning_rate": 0.00017928682170542638, |
|
"loss": 0.8258, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.10664605873261206, |
|
"grad_norm": 0.11422822624444962, |
|
"learning_rate": 0.0001792248062015504, |
|
"loss": 0.8725, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.10695517774343122, |
|
"grad_norm": 0.11480465531349182, |
|
"learning_rate": 0.00017916279069767443, |
|
"loss": 0.8415, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.10726429675425038, |
|
"grad_norm": 0.11581287533044815, |
|
"learning_rate": 0.00017910077519379846, |
|
"loss": 0.6787, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.10757341576506955, |
|
"grad_norm": 0.10481414198875427, |
|
"learning_rate": 0.0001790387596899225, |
|
"loss": 0.6735, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.10788253477588872, |
|
"grad_norm": 0.12571753561496735, |
|
"learning_rate": 0.00017897674418604654, |
|
"loss": 0.7918, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.10819165378670788, |
|
"grad_norm": 0.1036786288022995, |
|
"learning_rate": 0.00017891472868217055, |
|
"loss": 0.7985, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10850077279752705, |
|
"grad_norm": 0.12399487942457199, |
|
"learning_rate": 0.00017885271317829457, |
|
"loss": 0.9029, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.10880989180834622, |
|
"grad_norm": 0.1186407133936882, |
|
"learning_rate": 0.0001787906976744186, |
|
"loss": 0.7395, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.10911901081916538, |
|
"grad_norm": 0.1321779191493988, |
|
"learning_rate": 0.00017872868217054264, |
|
"loss": 0.7905, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.10942812982998454, |
|
"grad_norm": 0.14797626435756683, |
|
"learning_rate": 0.00017866666666666668, |
|
"loss": 0.9305, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.10973724884080371, |
|
"grad_norm": 0.11093270033597946, |
|
"learning_rate": 0.0001786046511627907, |
|
"loss": 0.8375, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.11004636785162288, |
|
"grad_norm": 0.11384811252355576, |
|
"learning_rate": 0.00017854263565891473, |
|
"loss": 0.639, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.11035548686244204, |
|
"grad_norm": 0.13438202440738678, |
|
"learning_rate": 0.00017848062015503877, |
|
"loss": 0.777, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.1106646058732612, |
|
"grad_norm": 0.11255431920289993, |
|
"learning_rate": 0.0001784186046511628, |
|
"loss": 0.7742, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.11097372488408037, |
|
"grad_norm": 0.12108633667230606, |
|
"learning_rate": 0.00017835658914728682, |
|
"loss": 0.7723, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.11128284389489954, |
|
"grad_norm": 0.13009031116962433, |
|
"learning_rate": 0.00017829457364341086, |
|
"loss": 0.8108, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1115919629057187, |
|
"grad_norm": 0.10433927178382874, |
|
"learning_rate": 0.0001782325581395349, |
|
"loss": 0.6764, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.11190108191653787, |
|
"grad_norm": 0.132685124874115, |
|
"learning_rate": 0.00017817054263565894, |
|
"loss": 0.8012, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.11221020092735703, |
|
"grad_norm": 0.11265043169260025, |
|
"learning_rate": 0.00017810852713178298, |
|
"loss": 0.7925, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.1125193199381762, |
|
"grad_norm": 0.12110339850187302, |
|
"learning_rate": 0.000178046511627907, |
|
"loss": 0.8647, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.11282843894899536, |
|
"grad_norm": 0.1396140158176422, |
|
"learning_rate": 0.000177984496124031, |
|
"loss": 0.7221, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.11313755795981453, |
|
"grad_norm": 0.11034229397773743, |
|
"learning_rate": 0.00017792248062015504, |
|
"loss": 0.8636, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.1134466769706337, |
|
"grad_norm": 0.12675125896930695, |
|
"learning_rate": 0.00017786046511627908, |
|
"loss": 0.7799, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.11375579598145286, |
|
"grad_norm": 0.10970692336559296, |
|
"learning_rate": 0.00017779844961240312, |
|
"loss": 0.8741, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.11406491499227202, |
|
"grad_norm": 0.1316499263048172, |
|
"learning_rate": 0.00017773643410852713, |
|
"loss": 0.7235, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.1143740340030912, |
|
"grad_norm": 0.15425892174243927, |
|
"learning_rate": 0.00017767441860465117, |
|
"loss": 0.7253, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11468315301391035, |
|
"grad_norm": 0.1116160973906517, |
|
"learning_rate": 0.0001776124031007752, |
|
"loss": 0.7544, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.11499227202472952, |
|
"grad_norm": 0.112430639564991, |
|
"learning_rate": 0.00017755038759689924, |
|
"loss": 0.8633, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.11530139103554869, |
|
"grad_norm": 0.12070276588201523, |
|
"learning_rate": 0.00017748837209302328, |
|
"loss": 0.7936, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.11561051004636785, |
|
"grad_norm": 0.14540359377861023, |
|
"learning_rate": 0.0001774263565891473, |
|
"loss": 0.9689, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.11591962905718702, |
|
"grad_norm": 0.1259058117866516, |
|
"learning_rate": 0.00017736434108527133, |
|
"loss": 0.8228, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.11622874806800618, |
|
"grad_norm": 0.09805137664079666, |
|
"learning_rate": 0.00017730232558139537, |
|
"loss": 0.7173, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.11653786707882535, |
|
"grad_norm": 0.1228744387626648, |
|
"learning_rate": 0.00017724031007751938, |
|
"loss": 0.7926, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.11684698608964451, |
|
"grad_norm": 0.1288052648305893, |
|
"learning_rate": 0.00017717829457364342, |
|
"loss": 0.7403, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.11715610510046368, |
|
"grad_norm": 0.11749331653118134, |
|
"learning_rate": 0.00017711627906976743, |
|
"loss": 0.702, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.11746522411128284, |
|
"grad_norm": 0.12872126698493958, |
|
"learning_rate": 0.00017705426356589147, |
|
"loss": 0.7654, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11777434312210201, |
|
"grad_norm": 0.12806439399719238, |
|
"learning_rate": 0.0001769922480620155, |
|
"loss": 0.897, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.11808346213292117, |
|
"grad_norm": 0.1399737149477005, |
|
"learning_rate": 0.00017693023255813955, |
|
"loss": 0.7706, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.11839258114374034, |
|
"grad_norm": 0.14267806708812714, |
|
"learning_rate": 0.00017686821705426356, |
|
"loss": 0.8854, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.11870170015455951, |
|
"grad_norm": 0.10857547074556351, |
|
"learning_rate": 0.0001768062015503876, |
|
"loss": 0.7281, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.11901081916537867, |
|
"grad_norm": 0.11292342841625214, |
|
"learning_rate": 0.00017674418604651164, |
|
"loss": 0.7288, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.11931993817619783, |
|
"grad_norm": 0.117954321205616, |
|
"learning_rate": 0.00017668217054263568, |
|
"loss": 0.7577, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.119629057187017, |
|
"grad_norm": 0.11536258459091187, |
|
"learning_rate": 0.00017662015503875972, |
|
"loss": 0.8542, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.11993817619783617, |
|
"grad_norm": 0.13967657089233398, |
|
"learning_rate": 0.00017655813953488373, |
|
"loss": 0.7666, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.12024729520865533, |
|
"grad_norm": 0.124544158577919, |
|
"learning_rate": 0.00017649612403100777, |
|
"loss": 0.8928, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.1205564142194745, |
|
"grad_norm": 0.11185236275196075, |
|
"learning_rate": 0.00017643410852713178, |
|
"loss": 0.665, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12086553323029366, |
|
"grad_norm": 0.11170051246881485, |
|
"learning_rate": 0.00017637209302325582, |
|
"loss": 0.7362, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.12117465224111283, |
|
"grad_norm": 0.12095949798822403, |
|
"learning_rate": 0.00017631007751937986, |
|
"loss": 0.7309, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.121483771251932, |
|
"grad_norm": 0.12416354566812515, |
|
"learning_rate": 0.00017624806201550387, |
|
"loss": 0.7633, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.12179289026275116, |
|
"grad_norm": 0.11069466918706894, |
|
"learning_rate": 0.0001761860465116279, |
|
"loss": 0.7525, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.12210200927357033, |
|
"grad_norm": 0.11477687954902649, |
|
"learning_rate": 0.00017612403100775195, |
|
"loss": 0.8462, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.12241112828438949, |
|
"grad_norm": 0.13723276555538177, |
|
"learning_rate": 0.00017606201550387598, |
|
"loss": 0.9396, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.12272024729520865, |
|
"grad_norm": 0.11079475283622742, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 0.8683, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.12302936630602782, |
|
"grad_norm": 0.11033523827791214, |
|
"learning_rate": 0.00017593798449612403, |
|
"loss": 0.6651, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.12333848531684699, |
|
"grad_norm": 0.11088595539331436, |
|
"learning_rate": 0.00017587596899224807, |
|
"loss": 0.7183, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.12364760432766615, |
|
"grad_norm": 0.11949151009321213, |
|
"learning_rate": 0.0001758139534883721, |
|
"loss": 0.752, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12395672333848531, |
|
"grad_norm": 0.11605624854564667, |
|
"learning_rate": 0.00017575193798449615, |
|
"loss": 0.8398, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.12426584234930448, |
|
"grad_norm": 0.1079692542552948, |
|
"learning_rate": 0.00017568992248062016, |
|
"loss": 0.8006, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.12457496136012365, |
|
"grad_norm": 0.12857861816883087, |
|
"learning_rate": 0.0001756279069767442, |
|
"loss": 0.754, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.12488408037094281, |
|
"grad_norm": 0.11760881543159485, |
|
"learning_rate": 0.0001755658914728682, |
|
"loss": 0.7992, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.125193199381762, |
|
"grad_norm": 0.1251303255558014, |
|
"learning_rate": 0.00017550387596899225, |
|
"loss": 0.8104, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.12550231839258114, |
|
"grad_norm": 0.1187320277094841, |
|
"learning_rate": 0.0001754418604651163, |
|
"loss": 0.7542, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.1258114374034003, |
|
"grad_norm": 0.1084708720445633, |
|
"learning_rate": 0.0001753798449612403, |
|
"loss": 0.7296, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.1261205564142195, |
|
"grad_norm": 0.1298135370016098, |
|
"learning_rate": 0.00017531782945736434, |
|
"loss": 0.7441, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.12642967542503863, |
|
"grad_norm": 0.1294536590576172, |
|
"learning_rate": 0.00017525581395348838, |
|
"loss": 0.7905, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.1267387944358578, |
|
"grad_norm": 0.10958458483219147, |
|
"learning_rate": 0.00017519379844961242, |
|
"loss": 0.8569, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12704791344667696, |
|
"grad_norm": 0.12941788136959076, |
|
"learning_rate": 0.00017513178294573646, |
|
"loss": 0.8383, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.12735703245749613, |
|
"grad_norm": 0.12861841917037964, |
|
"learning_rate": 0.00017506976744186047, |
|
"loss": 0.7911, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.1276661514683153, |
|
"grad_norm": 0.13337025046348572, |
|
"learning_rate": 0.0001750077519379845, |
|
"loss": 0.7918, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.12797527047913446, |
|
"grad_norm": 0.11046712100505829, |
|
"learning_rate": 0.00017494573643410855, |
|
"loss": 0.6964, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.12828438948995363, |
|
"grad_norm": 0.10456400364637375, |
|
"learning_rate": 0.00017488372093023258, |
|
"loss": 0.8032, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.1285935085007728, |
|
"grad_norm": 0.1251031905412674, |
|
"learning_rate": 0.0001748217054263566, |
|
"loss": 0.7741, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.12890262751159196, |
|
"grad_norm": 0.13418059051036835, |
|
"learning_rate": 0.0001747596899224806, |
|
"loss": 0.862, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.12921174652241113, |
|
"grad_norm": 0.11014249175786972, |
|
"learning_rate": 0.00017469767441860465, |
|
"loss": 0.8202, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.1295208655332303, |
|
"grad_norm": 0.13367420434951782, |
|
"learning_rate": 0.00017463565891472869, |
|
"loss": 0.8483, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.12982998454404945, |
|
"grad_norm": 0.11982861161231995, |
|
"learning_rate": 0.00017457364341085272, |
|
"loss": 0.7669, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13013910355486863, |
|
"grad_norm": 0.13078713417053223, |
|
"learning_rate": 0.00017451162790697674, |
|
"loss": 0.7996, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.13044822256568778, |
|
"grad_norm": 0.1363217681646347, |
|
"learning_rate": 0.00017444961240310077, |
|
"loss": 0.8364, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.13075734157650695, |
|
"grad_norm": 0.11756312847137451, |
|
"learning_rate": 0.0001743875968992248, |
|
"loss": 0.7808, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.13106646058732613, |
|
"grad_norm": 0.12155081331729889, |
|
"learning_rate": 0.00017432558139534885, |
|
"loss": 0.7729, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.13137557959814528, |
|
"grad_norm": 0.13399578630924225, |
|
"learning_rate": 0.0001742635658914729, |
|
"loss": 0.7278, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.13168469860896445, |
|
"grad_norm": 0.12727884948253632, |
|
"learning_rate": 0.0001742015503875969, |
|
"loss": 0.7534, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.13199381761978363, |
|
"grad_norm": 0.13630586862564087, |
|
"learning_rate": 0.00017413953488372094, |
|
"loss": 0.7763, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.13230293663060277, |
|
"grad_norm": 0.14212100207805634, |
|
"learning_rate": 0.00017407751937984498, |
|
"loss": 0.8291, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.13261205564142195, |
|
"grad_norm": 0.11936759203672409, |
|
"learning_rate": 0.00017401550387596902, |
|
"loss": 0.9107, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.13292117465224113, |
|
"grad_norm": 0.11957745999097824, |
|
"learning_rate": 0.00017395348837209303, |
|
"loss": 0.7514, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13323029366306027, |
|
"grad_norm": 0.11473491042852402, |
|
"learning_rate": 0.00017389147286821704, |
|
"loss": 0.8061, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.13353941267387945, |
|
"grad_norm": 0.12292005121707916, |
|
"learning_rate": 0.00017382945736434108, |
|
"loss": 0.801, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.1338485316846986, |
|
"grad_norm": 0.11472901701927185, |
|
"learning_rate": 0.00017376744186046512, |
|
"loss": 0.7885, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.13415765069551777, |
|
"grad_norm": 0.1211596429347992, |
|
"learning_rate": 0.00017370542635658916, |
|
"loss": 0.8281, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.13446676970633695, |
|
"grad_norm": 0.1142617017030716, |
|
"learning_rate": 0.0001736434108527132, |
|
"loss": 0.823, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.1347758887171561, |
|
"grad_norm": 0.10048012435436249, |
|
"learning_rate": 0.0001735813953488372, |
|
"loss": 0.8976, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.13508500772797527, |
|
"grad_norm": 0.12125738710165024, |
|
"learning_rate": 0.00017351937984496125, |
|
"loss": 0.8559, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.13539412673879445, |
|
"grad_norm": 0.12249696254730225, |
|
"learning_rate": 0.00017345736434108529, |
|
"loss": 0.783, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.1357032457496136, |
|
"grad_norm": 0.14693719148635864, |
|
"learning_rate": 0.00017339534883720932, |
|
"loss": 0.8892, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.13601236476043277, |
|
"grad_norm": 0.12531165778636932, |
|
"learning_rate": 0.00017333333333333334, |
|
"loss": 0.8706, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13632148377125194, |
|
"grad_norm": 0.1262034773826599, |
|
"learning_rate": 0.00017327131782945737, |
|
"loss": 0.7885, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.1366306027820711, |
|
"grad_norm": 0.12192118167877197, |
|
"learning_rate": 0.00017320930232558141, |
|
"loss": 0.7533, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.13693972179289027, |
|
"grad_norm": 0.12393314391374588, |
|
"learning_rate": 0.00017314728682170545, |
|
"loss": 0.7126, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.13724884080370942, |
|
"grad_norm": 0.14559726417064667, |
|
"learning_rate": 0.00017308527131782946, |
|
"loss": 0.7041, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.1375579598145286, |
|
"grad_norm": 0.11685144901275635, |
|
"learning_rate": 0.00017302325581395348, |
|
"loss": 0.7084, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.13786707882534777, |
|
"grad_norm": 0.12664124369621277, |
|
"learning_rate": 0.00017296124031007751, |
|
"loss": 0.7724, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.13817619783616691, |
|
"grad_norm": 0.1175457313656807, |
|
"learning_rate": 0.00017289922480620155, |
|
"loss": 0.8241, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.1384853168469861, |
|
"grad_norm": 0.11846484243869781, |
|
"learning_rate": 0.0001728372093023256, |
|
"loss": 0.8515, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.13879443585780527, |
|
"grad_norm": 0.13215206563472748, |
|
"learning_rate": 0.00017277519379844963, |
|
"loss": 0.8095, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.1391035548686244, |
|
"grad_norm": 0.1288730353116989, |
|
"learning_rate": 0.00017271317829457364, |
|
"loss": 0.7236, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1394126738794436, |
|
"grad_norm": 0.11009534448385239, |
|
"learning_rate": 0.00017265116279069768, |
|
"loss": 0.8836, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.13972179289026276, |
|
"grad_norm": 0.1256999373435974, |
|
"learning_rate": 0.00017258914728682172, |
|
"loss": 0.8412, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.1400309119010819, |
|
"grad_norm": 0.12464401125907898, |
|
"learning_rate": 0.00017252713178294576, |
|
"loss": 0.8152, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.1403400309119011, |
|
"grad_norm": 0.11386653035879135, |
|
"learning_rate": 0.00017246511627906977, |
|
"loss": 0.7923, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.14064914992272023, |
|
"grad_norm": 0.11337646096944809, |
|
"learning_rate": 0.0001724031007751938, |
|
"loss": 0.6623, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.1409582689335394, |
|
"grad_norm": 0.13900204002857208, |
|
"learning_rate": 0.00017234108527131785, |
|
"loss": 0.8303, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.14126738794435859, |
|
"grad_norm": 0.13519424200057983, |
|
"learning_rate": 0.00017227906976744186, |
|
"loss": 0.7967, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.14157650695517773, |
|
"grad_norm": 0.12967944145202637, |
|
"learning_rate": 0.0001722170542635659, |
|
"loss": 0.7826, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.1418856259659969, |
|
"grad_norm": 0.12591594457626343, |
|
"learning_rate": 0.00017215503875968994, |
|
"loss": 0.9955, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.14219474497681608, |
|
"grad_norm": 0.11622080206871033, |
|
"learning_rate": 0.00017209302325581395, |
|
"loss": 0.8291, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14250386398763523, |
|
"grad_norm": 0.12004160135984421, |
|
"learning_rate": 0.000172031007751938, |
|
"loss": 0.8015, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.1428129829984544, |
|
"grad_norm": 0.11545343697071075, |
|
"learning_rate": 0.00017196899224806203, |
|
"loss": 0.7617, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.14312210200927358, |
|
"grad_norm": 0.1136220321059227, |
|
"learning_rate": 0.00017190697674418606, |
|
"loss": 0.723, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.14343122102009273, |
|
"grad_norm": 0.11028563231229782, |
|
"learning_rate": 0.00017184496124031008, |
|
"loss": 0.8464, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.1437403400309119, |
|
"grad_norm": 0.10660995543003082, |
|
"learning_rate": 0.00017178294573643412, |
|
"loss": 0.7367, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.14404945904173105, |
|
"grad_norm": 0.10705665498971939, |
|
"learning_rate": 0.00017172093023255815, |
|
"loss": 0.6892, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.14435857805255023, |
|
"grad_norm": 0.124393992125988, |
|
"learning_rate": 0.0001716589147286822, |
|
"loss": 0.7663, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.1446676970633694, |
|
"grad_norm": 0.10380648076534271, |
|
"learning_rate": 0.00017159689922480623, |
|
"loss": 0.7851, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.14497681607418855, |
|
"grad_norm": 0.13513809442520142, |
|
"learning_rate": 0.00017153488372093024, |
|
"loss": 0.8207, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.14528593508500773, |
|
"grad_norm": 0.1310744434595108, |
|
"learning_rate": 0.00017147286821705425, |
|
"loss": 0.7328, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1455950540958269, |
|
"grad_norm": 0.13068106770515442, |
|
"learning_rate": 0.0001714108527131783, |
|
"loss": 0.8289, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.14590417310664605, |
|
"grad_norm": 0.09564946591854095, |
|
"learning_rate": 0.00017134883720930233, |
|
"loss": 0.7596, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.14621329211746523, |
|
"grad_norm": 0.11033451557159424, |
|
"learning_rate": 0.00017128682170542637, |
|
"loss": 0.802, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.1465224111282844, |
|
"grad_norm": 0.11931835860013962, |
|
"learning_rate": 0.00017122480620155038, |
|
"loss": 0.7087, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.14683153013910355, |
|
"grad_norm": 0.12470009177923203, |
|
"learning_rate": 0.00017116279069767442, |
|
"loss": 0.7323, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.14714064914992273, |
|
"grad_norm": 0.1364419013261795, |
|
"learning_rate": 0.00017110077519379846, |
|
"loss": 0.7856, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.14744976816074187, |
|
"grad_norm": 0.12685492634773254, |
|
"learning_rate": 0.0001710387596899225, |
|
"loss": 0.8633, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.14775888717156105, |
|
"grad_norm": 0.11873108893632889, |
|
"learning_rate": 0.0001709767441860465, |
|
"loss": 0.7816, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.14806800618238022, |
|
"grad_norm": 0.12090124189853668, |
|
"learning_rate": 0.00017091472868217055, |
|
"loss": 0.9065, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.14837712519319937, |
|
"grad_norm": 0.11901501566171646, |
|
"learning_rate": 0.0001708527131782946, |
|
"loss": 0.8258, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.14868624420401855, |
|
"grad_norm": 0.11180437356233597, |
|
"learning_rate": 0.00017079069767441863, |
|
"loss": 0.7396, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.14899536321483772, |
|
"grad_norm": 0.16076162457466125, |
|
"learning_rate": 0.00017072868217054267, |
|
"loss": 0.7794, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.14930448222565687, |
|
"grad_norm": 0.13752448558807373, |
|
"learning_rate": 0.00017066666666666668, |
|
"loss": 0.6987, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.14961360123647605, |
|
"grad_norm": 0.11785899847745895, |
|
"learning_rate": 0.0001706046511627907, |
|
"loss": 0.8017, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.14992272024729522, |
|
"grad_norm": 0.11127035319805145, |
|
"learning_rate": 0.00017054263565891473, |
|
"loss": 0.7538, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.15023183925811437, |
|
"grad_norm": 0.12820091843605042, |
|
"learning_rate": 0.00017048062015503877, |
|
"loss": 0.7771, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.15054095826893354, |
|
"grad_norm": 0.13787533342838287, |
|
"learning_rate": 0.0001704186046511628, |
|
"loss": 0.789, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.1508500772797527, |
|
"grad_norm": 0.12819816172122955, |
|
"learning_rate": 0.00017035658914728682, |
|
"loss": 0.7542, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.15115919629057187, |
|
"grad_norm": 0.12091512233018875, |
|
"learning_rate": 0.00017029457364341086, |
|
"loss": 0.8161, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.15146831530139104, |
|
"grad_norm": 0.1256888210773468, |
|
"learning_rate": 0.0001702325581395349, |
|
"loss": 0.8272, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1517774343122102, |
|
"grad_norm": 0.11789566278457642, |
|
"learning_rate": 0.00017017054263565893, |
|
"loss": 0.7079, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.15208655332302937, |
|
"grad_norm": 0.11957567185163498, |
|
"learning_rate": 0.00017010852713178294, |
|
"loss": 0.8352, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.15239567233384854, |
|
"grad_norm": 0.11315543204545975, |
|
"learning_rate": 0.00017004651162790698, |
|
"loss": 0.9455, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.1527047913446677, |
|
"grad_norm": 0.1091320812702179, |
|
"learning_rate": 0.00016998449612403102, |
|
"loss": 0.686, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.15301391035548687, |
|
"grad_norm": 0.11446017026901245, |
|
"learning_rate": 0.00016992248062015506, |
|
"loss": 0.8191, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.15332302936630604, |
|
"grad_norm": 0.11834724992513657, |
|
"learning_rate": 0.0001698604651162791, |
|
"loss": 0.8379, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.1536321483771252, |
|
"grad_norm": 0.12001053988933563, |
|
"learning_rate": 0.0001697984496124031, |
|
"loss": 0.7332, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.15394126738794436, |
|
"grad_norm": 0.11104556918144226, |
|
"learning_rate": 0.00016973643410852712, |
|
"loss": 0.8642, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.1542503863987635, |
|
"grad_norm": 0.10420899838209152, |
|
"learning_rate": 0.00016967441860465116, |
|
"loss": 0.8252, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.1545595054095827, |
|
"grad_norm": 0.1481151580810547, |
|
"learning_rate": 0.0001696124031007752, |
|
"loss": 0.7085, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15486862442040186, |
|
"grad_norm": 0.13192850351333618, |
|
"learning_rate": 0.00016955038759689924, |
|
"loss": 0.8223, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.155177743431221, |
|
"grad_norm": 0.11016976088285446, |
|
"learning_rate": 0.00016948837209302325, |
|
"loss": 0.7833, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.15548686244204019, |
|
"grad_norm": 0.13597513735294342, |
|
"learning_rate": 0.0001694263565891473, |
|
"loss": 0.8608, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.15579598145285936, |
|
"grad_norm": 0.13814714550971985, |
|
"learning_rate": 0.00016936434108527133, |
|
"loss": 0.8234, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.1561051004636785, |
|
"grad_norm": 0.1129792258143425, |
|
"learning_rate": 0.00016930232558139537, |
|
"loss": 0.7622, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.15641421947449768, |
|
"grad_norm": 0.1326257288455963, |
|
"learning_rate": 0.0001692403100775194, |
|
"loss": 0.7745, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.15672333848531686, |
|
"grad_norm": 0.10894762724637985, |
|
"learning_rate": 0.00016917829457364342, |
|
"loss": 0.8396, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.157032457496136, |
|
"grad_norm": 0.10844721645116806, |
|
"learning_rate": 0.00016911627906976746, |
|
"loss": 0.6738, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.15734157650695518, |
|
"grad_norm": 0.12142128497362137, |
|
"learning_rate": 0.0001690542635658915, |
|
"loss": 0.8698, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.15765069551777433, |
|
"grad_norm": 0.12891779839992523, |
|
"learning_rate": 0.0001689922480620155, |
|
"loss": 0.7915, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1579598145285935, |
|
"grad_norm": 0.1314953863620758, |
|
"learning_rate": 0.00016893023255813955, |
|
"loss": 0.8347, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.15826893353941268, |
|
"grad_norm": 0.12055188417434692, |
|
"learning_rate": 0.00016886821705426356, |
|
"loss": 0.9093, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.15857805255023183, |
|
"grad_norm": 0.12292719632387161, |
|
"learning_rate": 0.0001688062015503876, |
|
"loss": 0.8591, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.158887171561051, |
|
"grad_norm": 0.11341209709644318, |
|
"learning_rate": 0.00016874418604651163, |
|
"loss": 0.7724, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.15919629057187018, |
|
"grad_norm": 0.11800853163003922, |
|
"learning_rate": 0.00016868217054263567, |
|
"loss": 0.707, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.15950540958268933, |
|
"grad_norm": 0.14028507471084595, |
|
"learning_rate": 0.00016862015503875968, |
|
"loss": 0.7773, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.1598145285935085, |
|
"grad_norm": 0.11926918476819992, |
|
"learning_rate": 0.00016855813953488372, |
|
"loss": 0.8076, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.16012364760432768, |
|
"grad_norm": 0.11683503538370132, |
|
"learning_rate": 0.00016849612403100776, |
|
"loss": 0.7492, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.16043276661514683, |
|
"grad_norm": 0.14212507009506226, |
|
"learning_rate": 0.0001684341085271318, |
|
"loss": 0.7072, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.160741885625966, |
|
"grad_norm": 0.12642718851566315, |
|
"learning_rate": 0.00016837209302325584, |
|
"loss": 0.9201, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16105100463678515, |
|
"grad_norm": 0.15104375779628754, |
|
"learning_rate": 0.00016831007751937985, |
|
"loss": 0.7333, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.16136012364760433, |
|
"grad_norm": 0.15067335963249207, |
|
"learning_rate": 0.0001682480620155039, |
|
"loss": 0.8267, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.1616692426584235, |
|
"grad_norm": 0.12420719116926193, |
|
"learning_rate": 0.00016818604651162793, |
|
"loss": 0.8489, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.16197836166924265, |
|
"grad_norm": 0.10997667163610458, |
|
"learning_rate": 0.00016812403100775194, |
|
"loss": 0.7615, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.16228748068006182, |
|
"grad_norm": 0.12284649908542633, |
|
"learning_rate": 0.00016806201550387598, |
|
"loss": 0.7435, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.162596599690881, |
|
"grad_norm": 0.10515284538269043, |
|
"learning_rate": 0.000168, |
|
"loss": 0.789, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.16290571870170015, |
|
"grad_norm": 0.12312375009059906, |
|
"learning_rate": 0.00016793798449612403, |
|
"loss": 0.8253, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.16321483771251932, |
|
"grad_norm": 0.10993171483278275, |
|
"learning_rate": 0.00016787596899224807, |
|
"loss": 0.7591, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.1635239567233385, |
|
"grad_norm": 0.11605069786310196, |
|
"learning_rate": 0.0001678139534883721, |
|
"loss": 0.8267, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.16383307573415765, |
|
"grad_norm": 0.12003269046545029, |
|
"learning_rate": 0.00016775193798449615, |
|
"loss": 0.8455, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16414219474497682, |
|
"grad_norm": 0.12208808213472366, |
|
"learning_rate": 0.00016768992248062016, |
|
"loss": 0.8168, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.16445131375579597, |
|
"grad_norm": 0.12368449568748474, |
|
"learning_rate": 0.0001676279069767442, |
|
"loss": 0.8713, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.16476043276661514, |
|
"grad_norm": 0.12407387793064117, |
|
"learning_rate": 0.00016756589147286823, |
|
"loss": 0.7938, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.16506955177743432, |
|
"grad_norm": 0.12617334723472595, |
|
"learning_rate": 0.00016750387596899227, |
|
"loss": 0.8575, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.16537867078825347, |
|
"grad_norm": 0.15063488483428955, |
|
"learning_rate": 0.00016744186046511629, |
|
"loss": 0.9415, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.16568778979907264, |
|
"grad_norm": 0.12658260762691498, |
|
"learning_rate": 0.00016737984496124032, |
|
"loss": 0.85, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.16599690880989182, |
|
"grad_norm": 0.09913121163845062, |
|
"learning_rate": 0.00016731782945736434, |
|
"loss": 0.8092, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.16630602782071097, |
|
"grad_norm": 0.12728868424892426, |
|
"learning_rate": 0.00016725581395348837, |
|
"loss": 0.6869, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.16661514683153014, |
|
"grad_norm": 0.12953142821788788, |
|
"learning_rate": 0.0001671937984496124, |
|
"loss": 0.7474, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.16692426584234932, |
|
"grad_norm": 0.1168576180934906, |
|
"learning_rate": 0.00016713178294573642, |
|
"loss": 0.7697, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.16723338485316847, |
|
"grad_norm": 0.12081418931484222, |
|
"learning_rate": 0.00016706976744186046, |
|
"loss": 0.8952, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.16754250386398764, |
|
"grad_norm": 0.12843774259090424, |
|
"learning_rate": 0.0001670077519379845, |
|
"loss": 0.8786, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.1678516228748068, |
|
"grad_norm": 0.13334107398986816, |
|
"learning_rate": 0.00016694573643410854, |
|
"loss": 0.7732, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.16816074188562596, |
|
"grad_norm": 0.12304075807332993, |
|
"learning_rate": 0.00016688372093023258, |
|
"loss": 0.8681, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.16846986089644514, |
|
"grad_norm": 0.11800245940685272, |
|
"learning_rate": 0.0001668217054263566, |
|
"loss": 0.8226, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.1687789799072643, |
|
"grad_norm": 0.1358041614294052, |
|
"learning_rate": 0.00016675968992248063, |
|
"loss": 0.7346, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.16908809891808346, |
|
"grad_norm": 0.101251982152462, |
|
"learning_rate": 0.00016669767441860467, |
|
"loss": 0.7556, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.16939721792890264, |
|
"grad_norm": 0.15012463927268982, |
|
"learning_rate": 0.0001666356589147287, |
|
"loss": 0.7558, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.16970633693972179, |
|
"grad_norm": 0.11593160778284073, |
|
"learning_rate": 0.00016657364341085272, |
|
"loss": 0.7963, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.17001545595054096, |
|
"grad_norm": 0.11717840284109116, |
|
"learning_rate": 0.00016651162790697673, |
|
"loss": 0.8219, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17032457496136014, |
|
"grad_norm": 0.12220215797424316, |
|
"learning_rate": 0.00016644961240310077, |
|
"loss": 0.8221, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.17063369397217928, |
|
"grad_norm": 0.1270114630460739, |
|
"learning_rate": 0.0001663875968992248, |
|
"loss": 0.7718, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.17094281298299846, |
|
"grad_norm": 0.11445185542106628, |
|
"learning_rate": 0.00016632558139534885, |
|
"loss": 0.8031, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.1712519319938176, |
|
"grad_norm": 0.1258378028869629, |
|
"learning_rate": 0.00016626356589147286, |
|
"loss": 0.8165, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.17156105100463678, |
|
"grad_norm": 0.12015929818153381, |
|
"learning_rate": 0.0001662015503875969, |
|
"loss": 0.7292, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.17187017001545596, |
|
"grad_norm": 0.12911571562290192, |
|
"learning_rate": 0.00016613953488372094, |
|
"loss": 0.7586, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.1721792890262751, |
|
"grad_norm": 0.11884018778800964, |
|
"learning_rate": 0.00016607751937984497, |
|
"loss": 0.816, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.17248840803709428, |
|
"grad_norm": 0.12241604179143906, |
|
"learning_rate": 0.00016601550387596901, |
|
"loss": 0.785, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.17279752704791346, |
|
"grad_norm": 0.1106485053896904, |
|
"learning_rate": 0.00016595348837209303, |
|
"loss": 0.8593, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.1731066460587326, |
|
"grad_norm": 0.13855457305908203, |
|
"learning_rate": 0.00016589147286821706, |
|
"loss": 0.8647, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17341576506955178, |
|
"grad_norm": 0.12671570479869843, |
|
"learning_rate": 0.0001658294573643411, |
|
"loss": 0.7475, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.17372488408037096, |
|
"grad_norm": 0.10744766145944595, |
|
"learning_rate": 0.00016576744186046514, |
|
"loss": 0.6681, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.1740340030911901, |
|
"grad_norm": 0.15949758887290955, |
|
"learning_rate": 0.00016570542635658915, |
|
"loss": 0.7603, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.17434312210200928, |
|
"grad_norm": 0.14625856280326843, |
|
"learning_rate": 0.00016564341085271316, |
|
"loss": 0.9034, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.17465224111282843, |
|
"grad_norm": 0.10362540185451508, |
|
"learning_rate": 0.0001655813953488372, |
|
"loss": 0.8991, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.1749613601236476, |
|
"grad_norm": 0.1137462630867958, |
|
"learning_rate": 0.00016551937984496124, |
|
"loss": 0.8242, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.17527047913446678, |
|
"grad_norm": 0.12184014916419983, |
|
"learning_rate": 0.00016545736434108528, |
|
"loss": 0.7607, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.17557959814528593, |
|
"grad_norm": 0.1274954229593277, |
|
"learning_rate": 0.00016539534883720932, |
|
"loss": 0.7599, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.1758887171561051, |
|
"grad_norm": 0.11610583961009979, |
|
"learning_rate": 0.00016533333333333333, |
|
"loss": 0.8305, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.17619783616692428, |
|
"grad_norm": 0.1202671155333519, |
|
"learning_rate": 0.00016527131782945737, |
|
"loss": 0.8162, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17650695517774342, |
|
"grad_norm": 0.1324416995048523, |
|
"learning_rate": 0.0001652093023255814, |
|
"loss": 0.808, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.1768160741885626, |
|
"grad_norm": 0.11402853578329086, |
|
"learning_rate": 0.00016514728682170545, |
|
"loss": 0.856, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.17712519319938178, |
|
"grad_norm": 0.11911406368017197, |
|
"learning_rate": 0.00016508527131782946, |
|
"loss": 0.7992, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.17743431221020092, |
|
"grad_norm": 0.13559581339359283, |
|
"learning_rate": 0.0001650232558139535, |
|
"loss": 0.8584, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.1777434312210201, |
|
"grad_norm": 0.12889279425144196, |
|
"learning_rate": 0.00016496124031007754, |
|
"loss": 0.8382, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.17805255023183925, |
|
"grad_norm": 0.12666314840316772, |
|
"learning_rate": 0.00016489922480620158, |
|
"loss": 0.8406, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.17836166924265842, |
|
"grad_norm": 0.12953141331672668, |
|
"learning_rate": 0.0001648372093023256, |
|
"loss": 0.7878, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.1786707882534776, |
|
"grad_norm": 0.10341861099004745, |
|
"learning_rate": 0.0001647751937984496, |
|
"loss": 0.8802, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.17897990726429674, |
|
"grad_norm": 0.11049186438322067, |
|
"learning_rate": 0.00016471317829457364, |
|
"loss": 0.7669, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.17928902627511592, |
|
"grad_norm": 0.11524353176355362, |
|
"learning_rate": 0.00016465116279069768, |
|
"loss": 0.7958, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1795981452859351, |
|
"grad_norm": 0.12205459177494049, |
|
"learning_rate": 0.00016458914728682172, |
|
"loss": 0.6768, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.17990726429675424, |
|
"grad_norm": 0.10032919049263, |
|
"learning_rate": 0.00016452713178294575, |
|
"loss": 0.8596, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.18021638330757342, |
|
"grad_norm": 0.10666303336620331, |
|
"learning_rate": 0.00016446511627906977, |
|
"loss": 0.8106, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.1805255023183926, |
|
"grad_norm": 0.09989852458238602, |
|
"learning_rate": 0.0001644031007751938, |
|
"loss": 0.7455, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.18083462132921174, |
|
"grad_norm": 0.11961805075407028, |
|
"learning_rate": 0.00016434108527131784, |
|
"loss": 0.8289, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.18114374034003092, |
|
"grad_norm": 0.13421611487865448, |
|
"learning_rate": 0.00016427906976744188, |
|
"loss": 0.6841, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.18145285935085007, |
|
"grad_norm": 0.11855993419885635, |
|
"learning_rate": 0.0001642170542635659, |
|
"loss": 0.8299, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.18176197836166924, |
|
"grad_norm": 0.11767081916332245, |
|
"learning_rate": 0.00016415503875968993, |
|
"loss": 0.7878, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.18207109737248842, |
|
"grad_norm": 0.12189806997776031, |
|
"learning_rate": 0.00016409302325581397, |
|
"loss": 0.863, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.18238021638330756, |
|
"grad_norm": 0.09818772226572037, |
|
"learning_rate": 0.00016403100775193798, |
|
"loss": 0.7039, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18268933539412674, |
|
"grad_norm": 0.13206005096435547, |
|
"learning_rate": 0.00016396899224806202, |
|
"loss": 0.7623, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.18299845440494591, |
|
"grad_norm": 0.12963028252124786, |
|
"learning_rate": 0.00016390697674418606, |
|
"loss": 0.7811, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.18330757341576506, |
|
"grad_norm": 0.11753853410482407, |
|
"learning_rate": 0.00016384496124031007, |
|
"loss": 0.8228, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.18361669242658424, |
|
"grad_norm": 0.10470208525657654, |
|
"learning_rate": 0.0001637829457364341, |
|
"loss": 0.8566, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.1839258114374034, |
|
"grad_norm": 0.10645218938589096, |
|
"learning_rate": 0.00016372093023255815, |
|
"loss": 0.7536, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.18423493044822256, |
|
"grad_norm": 0.11461575329303741, |
|
"learning_rate": 0.0001636589147286822, |
|
"loss": 0.7328, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.18454404945904174, |
|
"grad_norm": 0.1035584807395935, |
|
"learning_rate": 0.0001635968992248062, |
|
"loss": 0.7841, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.18485316846986088, |
|
"grad_norm": 0.12418399751186371, |
|
"learning_rate": 0.00016353488372093024, |
|
"loss": 0.7869, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.18516228748068006, |
|
"grad_norm": 0.12286582589149475, |
|
"learning_rate": 0.00016347286821705428, |
|
"loss": 0.8917, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.18547140649149924, |
|
"grad_norm": 0.11596380174160004, |
|
"learning_rate": 0.00016341085271317832, |
|
"loss": 0.7949, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18578052550231838, |
|
"grad_norm": 0.12381494790315628, |
|
"learning_rate": 0.00016334883720930235, |
|
"loss": 0.8331, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.18608964451313756, |
|
"grad_norm": 0.12967997789382935, |
|
"learning_rate": 0.00016328682170542637, |
|
"loss": 0.8003, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.18639876352395673, |
|
"grad_norm": 0.11383350193500519, |
|
"learning_rate": 0.0001632248062015504, |
|
"loss": 0.7191, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.18670788253477588, |
|
"grad_norm": 0.12088557332754135, |
|
"learning_rate": 0.00016316279069767442, |
|
"loss": 0.772, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.18701700154559506, |
|
"grad_norm": 0.1383604258298874, |
|
"learning_rate": 0.00016310077519379846, |
|
"loss": 0.8449, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.18732612055641423, |
|
"grad_norm": 0.13077442348003387, |
|
"learning_rate": 0.0001630387596899225, |
|
"loss": 0.817, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.18763523956723338, |
|
"grad_norm": 0.12592090666294098, |
|
"learning_rate": 0.0001629767441860465, |
|
"loss": 0.8615, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.18794435857805256, |
|
"grad_norm": 0.11555439233779907, |
|
"learning_rate": 0.00016291472868217054, |
|
"loss": 0.7587, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.1882534775888717, |
|
"grad_norm": 0.12637798488140106, |
|
"learning_rate": 0.00016285271317829458, |
|
"loss": 0.7867, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.18856259659969088, |
|
"grad_norm": 0.123162180185318, |
|
"learning_rate": 0.00016279069767441862, |
|
"loss": 0.8271, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18887171561051005, |
|
"grad_norm": 0.133504718542099, |
|
"learning_rate": 0.00016272868217054263, |
|
"loss": 0.8287, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.1891808346213292, |
|
"grad_norm": 0.13327136635780334, |
|
"learning_rate": 0.00016266666666666667, |
|
"loss": 0.8069, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.18948995363214838, |
|
"grad_norm": 0.13675393164157867, |
|
"learning_rate": 0.0001626046511627907, |
|
"loss": 0.7992, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.18979907264296755, |
|
"grad_norm": 0.12681642174720764, |
|
"learning_rate": 0.00016254263565891475, |
|
"loss": 0.8107, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.1901081916537867, |
|
"grad_norm": 0.11314484477043152, |
|
"learning_rate": 0.0001624806201550388, |
|
"loss": 0.8308, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.19041731066460588, |
|
"grad_norm": 0.11311525851488113, |
|
"learning_rate": 0.0001624186046511628, |
|
"loss": 0.8193, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.19072642967542505, |
|
"grad_norm": 0.10104167461395264, |
|
"learning_rate": 0.0001623565891472868, |
|
"loss": 0.8109, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.1910355486862442, |
|
"grad_norm": 0.1340848058462143, |
|
"learning_rate": 0.00016229457364341085, |
|
"loss": 0.7129, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.19134466769706338, |
|
"grad_norm": 0.1362898200750351, |
|
"learning_rate": 0.0001622325581395349, |
|
"loss": 0.7536, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.19165378670788252, |
|
"grad_norm": 0.13276411592960358, |
|
"learning_rate": 0.00016217054263565893, |
|
"loss": 0.8628, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1919629057187017, |
|
"grad_norm": 0.11686565726995468, |
|
"learning_rate": 0.00016210852713178294, |
|
"loss": 0.8807, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.19227202472952087, |
|
"grad_norm": 0.12405114620923996, |
|
"learning_rate": 0.00016204651162790698, |
|
"loss": 0.747, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.19258114374034002, |
|
"grad_norm": 0.12308946251869202, |
|
"learning_rate": 0.00016198449612403102, |
|
"loss": 0.7715, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.1928902627511592, |
|
"grad_norm": 0.14180584251880646, |
|
"learning_rate": 0.00016192248062015506, |
|
"loss": 0.9483, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.19319938176197837, |
|
"grad_norm": 0.1168755367398262, |
|
"learning_rate": 0.00016186046511627907, |
|
"loss": 0.7669, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.19350850077279752, |
|
"grad_norm": 0.11601582914590836, |
|
"learning_rate": 0.0001617984496124031, |
|
"loss": 0.8361, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.1938176197836167, |
|
"grad_norm": 0.11746050417423248, |
|
"learning_rate": 0.00016173643410852715, |
|
"loss": 0.844, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.19412673879443587, |
|
"grad_norm": 0.11663764715194702, |
|
"learning_rate": 0.00016167441860465118, |
|
"loss": 0.8162, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.19443585780525502, |
|
"grad_norm": 0.1258978247642517, |
|
"learning_rate": 0.00016161240310077522, |
|
"loss": 0.7729, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.1947449768160742, |
|
"grad_norm": 0.10496451705694199, |
|
"learning_rate": 0.00016155038759689923, |
|
"loss": 0.7739, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.19505409582689334, |
|
"grad_norm": 0.1313450038433075, |
|
"learning_rate": 0.00016148837209302325, |
|
"loss": 0.8157, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.19536321483771252, |
|
"grad_norm": 0.13735109567642212, |
|
"learning_rate": 0.00016142635658914728, |
|
"loss": 0.8494, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.1956723338485317, |
|
"grad_norm": 0.11428606510162354, |
|
"learning_rate": 0.00016136434108527132, |
|
"loss": 0.7957, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.19598145285935084, |
|
"grad_norm": 0.10336098074913025, |
|
"learning_rate": 0.00016130232558139536, |
|
"loss": 0.7244, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.19629057187017002, |
|
"grad_norm": 0.12851019203662872, |
|
"learning_rate": 0.00016124031007751937, |
|
"loss": 0.7908, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.1965996908809892, |
|
"grad_norm": 0.11097298562526703, |
|
"learning_rate": 0.0001611782945736434, |
|
"loss": 0.7819, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.19690880989180834, |
|
"grad_norm": 0.10702291131019592, |
|
"learning_rate": 0.00016111627906976745, |
|
"loss": 0.9083, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.19721792890262752, |
|
"grad_norm": 0.1185348853468895, |
|
"learning_rate": 0.0001610542635658915, |
|
"loss": 0.8115, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.1975270479134467, |
|
"grad_norm": 0.12385392189025879, |
|
"learning_rate": 0.00016099224806201553, |
|
"loss": 0.847, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.19783616692426584, |
|
"grad_norm": 0.13237705826759338, |
|
"learning_rate": 0.00016093023255813954, |
|
"loss": 0.7401, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.198145285935085, |
|
"grad_norm": 0.11597932875156403, |
|
"learning_rate": 0.00016086821705426358, |
|
"loss": 0.7639, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.19845440494590416, |
|
"grad_norm": 0.12065674364566803, |
|
"learning_rate": 0.00016080620155038762, |
|
"loss": 0.7425, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.19876352395672334, |
|
"grad_norm": 0.10582825541496277, |
|
"learning_rate": 0.00016074418604651166, |
|
"loss": 0.8191, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.1990726429675425, |
|
"grad_norm": 0.1253654509782791, |
|
"learning_rate": 0.00016068217054263567, |
|
"loss": 0.7928, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.19938176197836166, |
|
"grad_norm": 0.13197046518325806, |
|
"learning_rate": 0.00016062015503875968, |
|
"loss": 0.7402, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.19969088098918084, |
|
"grad_norm": 0.12224183231592178, |
|
"learning_rate": 0.00016055813953488372, |
|
"loss": 0.6898, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.13466641306877136, |
|
"learning_rate": 0.00016049612403100776, |
|
"loss": 0.814, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.20030911901081916, |
|
"grad_norm": 0.10830813646316528, |
|
"learning_rate": 0.0001604341085271318, |
|
"loss": 0.8274, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.20061823802163833, |
|
"grad_norm": 0.1332327425479889, |
|
"learning_rate": 0.0001603720930232558, |
|
"loss": 0.8042, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.2009273570324575, |
|
"grad_norm": 0.1263049840927124, |
|
"learning_rate": 0.00016031007751937985, |
|
"loss": 0.8042, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20123647604327666, |
|
"grad_norm": 0.13426467776298523, |
|
"learning_rate": 0.00016024806201550389, |
|
"loss": 0.8429, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.20154559505409583, |
|
"grad_norm": 0.1457086056470871, |
|
"learning_rate": 0.00016018604651162792, |
|
"loss": 0.8961, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.20185471406491498, |
|
"grad_norm": 0.11996602267026901, |
|
"learning_rate": 0.00016012403100775196, |
|
"loss": 0.7045, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.20216383307573416, |
|
"grad_norm": 0.10409342497587204, |
|
"learning_rate": 0.00016006201550387597, |
|
"loss": 0.6763, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.20247295208655333, |
|
"grad_norm": 0.11956805735826492, |
|
"learning_rate": 0.00016, |
|
"loss": 0.8169, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.20278207109737248, |
|
"grad_norm": 0.11860910803079605, |
|
"learning_rate": 0.00015993798449612405, |
|
"loss": 0.74, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.20309119010819165, |
|
"grad_norm": 0.12545433640480042, |
|
"learning_rate": 0.00015987596899224806, |
|
"loss": 0.8134, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.20340030911901083, |
|
"grad_norm": 0.10243546217679977, |
|
"learning_rate": 0.0001598139534883721, |
|
"loss": 0.7597, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.20370942812982998, |
|
"grad_norm": 0.11689910292625427, |
|
"learning_rate": 0.00015975193798449611, |
|
"loss": 0.7734, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.20401854714064915, |
|
"grad_norm": 0.12140754610300064, |
|
"learning_rate": 0.00015968992248062015, |
|
"loss": 0.8014, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.20432766615146833, |
|
"grad_norm": 0.11417256295681, |
|
"learning_rate": 0.0001596279069767442, |
|
"loss": 0.8837, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.20463678516228748, |
|
"grad_norm": 0.16317220032215118, |
|
"learning_rate": 0.00015956589147286823, |
|
"loss": 0.894, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.20494590417310665, |
|
"grad_norm": 0.1096215769648552, |
|
"learning_rate": 0.00015950387596899227, |
|
"loss": 0.6989, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.2052550231839258, |
|
"grad_norm": 0.11856718361377716, |
|
"learning_rate": 0.00015944186046511628, |
|
"loss": 0.8425, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.20556414219474498, |
|
"grad_norm": 0.11110817641019821, |
|
"learning_rate": 0.00015937984496124032, |
|
"loss": 0.8838, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.20587326120556415, |
|
"grad_norm": 0.1287812888622284, |
|
"learning_rate": 0.00015931782945736436, |
|
"loss": 0.7789, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.2061823802163833, |
|
"grad_norm": 0.0988534688949585, |
|
"learning_rate": 0.0001592558139534884, |
|
"loss": 0.7962, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.20649149922720247, |
|
"grad_norm": 0.12815728783607483, |
|
"learning_rate": 0.0001591937984496124, |
|
"loss": 0.6849, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.20680061823802165, |
|
"grad_norm": 0.12397190928459167, |
|
"learning_rate": 0.00015913178294573645, |
|
"loss": 0.771, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.2071097372488408, |
|
"grad_norm": 0.13357098400592804, |
|
"learning_rate": 0.00015906976744186046, |
|
"loss": 0.9005, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.20741885625965997, |
|
"grad_norm": 0.10197357833385468, |
|
"learning_rate": 0.0001590077519379845, |
|
"loss": 0.7802, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.20772797527047915, |
|
"grad_norm": 0.12450321763753891, |
|
"learning_rate": 0.00015894573643410854, |
|
"loss": 0.7157, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.2080370942812983, |
|
"grad_norm": 0.13183358311653137, |
|
"learning_rate": 0.00015888372093023255, |
|
"loss": 0.8082, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.20834621329211747, |
|
"grad_norm": 0.12288426607847214, |
|
"learning_rate": 0.0001588217054263566, |
|
"loss": 0.8119, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.20865533230293662, |
|
"grad_norm": 0.11869386583566666, |
|
"learning_rate": 0.00015875968992248063, |
|
"loss": 0.7948, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.2089644513137558, |
|
"grad_norm": 0.11692944169044495, |
|
"learning_rate": 0.00015869767441860466, |
|
"loss": 0.7011, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.20927357032457497, |
|
"grad_norm": 0.11799801886081696, |
|
"learning_rate": 0.0001586356589147287, |
|
"loss": 0.9002, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.20958268933539412, |
|
"grad_norm": 0.14463946223258972, |
|
"learning_rate": 0.00015857364341085271, |
|
"loss": 0.7562, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.2098918083462133, |
|
"grad_norm": 0.14642862975597382, |
|
"learning_rate": 0.00015851162790697675, |
|
"loss": 0.6619, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.21020092735703247, |
|
"grad_norm": 0.11202511936426163, |
|
"learning_rate": 0.0001584496124031008, |
|
"loss": 0.6871, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.21051004636785162, |
|
"grad_norm": 0.12495609372854233, |
|
"learning_rate": 0.00015838759689922483, |
|
"loss": 0.8702, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.2108191653786708, |
|
"grad_norm": 0.1246587410569191, |
|
"learning_rate": 0.00015832558139534884, |
|
"loss": 0.7562, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.21112828438948997, |
|
"grad_norm": 0.11207133531570435, |
|
"learning_rate": 0.00015826356589147285, |
|
"loss": 0.7948, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.21143740340030912, |
|
"grad_norm": 0.10788938403129578, |
|
"learning_rate": 0.0001582015503875969, |
|
"loss": 0.7807, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.2117465224111283, |
|
"grad_norm": 0.12020035088062286, |
|
"learning_rate": 0.00015813953488372093, |
|
"loss": 0.8755, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.21205564142194744, |
|
"grad_norm": 0.13727609813213348, |
|
"learning_rate": 0.00015807751937984497, |
|
"loss": 0.9059, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.2123647604327666, |
|
"grad_norm": 0.1516505777835846, |
|
"learning_rate": 0.00015801550387596898, |
|
"loss": 0.8297, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.2126738794435858, |
|
"grad_norm": 0.13136766850948334, |
|
"learning_rate": 0.00015795348837209302, |
|
"loss": 0.8096, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.21298299845440494, |
|
"grad_norm": 0.11303882300853729, |
|
"learning_rate": 0.00015789147286821706, |
|
"loss": 0.7967, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.2132921174652241, |
|
"grad_norm": 0.13250216841697693, |
|
"learning_rate": 0.0001578294573643411, |
|
"loss": 0.8846, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2136012364760433, |
|
"grad_norm": 0.125604048371315, |
|
"learning_rate": 0.00015776744186046514, |
|
"loss": 0.8468, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.21391035548686244, |
|
"grad_norm": 0.11047331243753433, |
|
"learning_rate": 0.00015770542635658915, |
|
"loss": 0.7068, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.2142194744976816, |
|
"grad_norm": 0.12153135985136032, |
|
"learning_rate": 0.0001576434108527132, |
|
"loss": 0.7166, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.21452859350850076, |
|
"grad_norm": 0.11161539703607559, |
|
"learning_rate": 0.00015758139534883723, |
|
"loss": 0.8544, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.21483771251931993, |
|
"grad_norm": 0.11793739348649979, |
|
"learning_rate": 0.00015751937984496126, |
|
"loss": 0.8196, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.2151468315301391, |
|
"grad_norm": 0.12710191309452057, |
|
"learning_rate": 0.00015745736434108528, |
|
"loss": 0.8249, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.21545595054095826, |
|
"grad_norm": 0.12436945736408234, |
|
"learning_rate": 0.0001573953488372093, |
|
"loss": 0.7147, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.21576506955177743, |
|
"grad_norm": 0.10970059782266617, |
|
"learning_rate": 0.00015733333333333333, |
|
"loss": 0.8297, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.2160741885625966, |
|
"grad_norm": 0.1195238009095192, |
|
"learning_rate": 0.00015727131782945737, |
|
"loss": 0.7347, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.21638330757341576, |
|
"grad_norm": 0.11942408978939056, |
|
"learning_rate": 0.0001572093023255814, |
|
"loss": 0.7825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21669242658423493, |
|
"grad_norm": 0.11630623042583466, |
|
"learning_rate": 0.00015714728682170544, |
|
"loss": 0.8442, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.2170015455950541, |
|
"grad_norm": 0.12051951140165329, |
|
"learning_rate": 0.00015708527131782945, |
|
"loss": 0.6505, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.21731066460587325, |
|
"grad_norm": 0.12351769208908081, |
|
"learning_rate": 0.0001570232558139535, |
|
"loss": 0.8298, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.21761978361669243, |
|
"grad_norm": 0.11859402805566788, |
|
"learning_rate": 0.00015696124031007753, |
|
"loss": 0.6916, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.21792890262751158, |
|
"grad_norm": 0.13090530037879944, |
|
"learning_rate": 0.00015689922480620157, |
|
"loss": 0.9438, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.21823802163833075, |
|
"grad_norm": 0.12880273163318634, |
|
"learning_rate": 0.00015683720930232558, |
|
"loss": 0.8416, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.21854714064914993, |
|
"grad_norm": 0.12142153829336166, |
|
"learning_rate": 0.00015677519379844962, |
|
"loss": 0.7073, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.21885625965996908, |
|
"grad_norm": 0.12591804563999176, |
|
"learning_rate": 0.00015671317829457366, |
|
"loss": 0.7311, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.21916537867078825, |
|
"grad_norm": 0.1107344701886177, |
|
"learning_rate": 0.0001566511627906977, |
|
"loss": 0.6529, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.21947449768160743, |
|
"grad_norm": 0.10950338840484619, |
|
"learning_rate": 0.0001565891472868217, |
|
"loss": 0.6766, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.21978361669242658, |
|
"grad_norm": 0.1164025217294693, |
|
"learning_rate": 0.00015652713178294572, |
|
"loss": 0.7021, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.22009273570324575, |
|
"grad_norm": 0.13531538844108582, |
|
"learning_rate": 0.00015646511627906976, |
|
"loss": 0.739, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.22040185471406493, |
|
"grad_norm": 0.12463265657424927, |
|
"learning_rate": 0.0001564031007751938, |
|
"loss": 0.7832, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.22071097372488407, |
|
"grad_norm": 0.13127584755420685, |
|
"learning_rate": 0.00015634108527131784, |
|
"loss": 0.7652, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.22102009273570325, |
|
"grad_norm": 0.13645778596401215, |
|
"learning_rate": 0.00015627906976744188, |
|
"loss": 0.8023, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.2213292117465224, |
|
"grad_norm": 0.12551645934581757, |
|
"learning_rate": 0.0001562170542635659, |
|
"loss": 0.7572, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.22163833075734157, |
|
"grad_norm": 0.1264384686946869, |
|
"learning_rate": 0.00015615503875968993, |
|
"loss": 0.7972, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.22194744976816075, |
|
"grad_norm": 0.12361495941877365, |
|
"learning_rate": 0.00015609302325581397, |
|
"loss": 0.7932, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.2222565687789799, |
|
"grad_norm": 0.12109937518835068, |
|
"learning_rate": 0.000156031007751938, |
|
"loss": 0.7977, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.22256568778979907, |
|
"grad_norm": 0.11111821234226227, |
|
"learning_rate": 0.00015596899224806202, |
|
"loss": 0.7563, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22287480680061825, |
|
"grad_norm": 0.11457593739032745, |
|
"learning_rate": 0.00015590697674418606, |
|
"loss": 0.8042, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.2231839258114374, |
|
"grad_norm": 0.11488046497106552, |
|
"learning_rate": 0.0001558449612403101, |
|
"loss": 0.7316, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.22349304482225657, |
|
"grad_norm": 0.1125350072979927, |
|
"learning_rate": 0.0001557829457364341, |
|
"loss": 0.9362, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.22380216383307575, |
|
"grad_norm": 0.13179023563861847, |
|
"learning_rate": 0.00015572093023255814, |
|
"loss": 0.8695, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.2241112828438949, |
|
"grad_norm": 0.11290204524993896, |
|
"learning_rate": 0.00015565891472868218, |
|
"loss": 0.7914, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.22442040185471407, |
|
"grad_norm": 0.10253167897462845, |
|
"learning_rate": 0.0001555968992248062, |
|
"loss": 0.7789, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.22472952086553322, |
|
"grad_norm": 0.13726738095283508, |
|
"learning_rate": 0.00015553488372093023, |
|
"loss": 0.8706, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.2250386398763524, |
|
"grad_norm": 0.11310728639364243, |
|
"learning_rate": 0.00015547286821705427, |
|
"loss": 0.816, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.22534775888717157, |
|
"grad_norm": 0.10052375495433807, |
|
"learning_rate": 0.0001554108527131783, |
|
"loss": 0.8172, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.22565687789799072, |
|
"grad_norm": 0.12013030052185059, |
|
"learning_rate": 0.00015534883720930232, |
|
"loss": 0.7367, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2259659969088099, |
|
"grad_norm": 0.12074479460716248, |
|
"learning_rate": 0.00015528682170542636, |
|
"loss": 0.7325, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.22627511591962907, |
|
"grad_norm": 0.11103136837482452, |
|
"learning_rate": 0.0001552248062015504, |
|
"loss": 0.7697, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.22658423493044821, |
|
"grad_norm": 0.1295919120311737, |
|
"learning_rate": 0.00015516279069767444, |
|
"loss": 0.8268, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.2268933539412674, |
|
"grad_norm": 0.11158143728971481, |
|
"learning_rate": 0.00015510077519379848, |
|
"loss": 0.8241, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.22720247295208656, |
|
"grad_norm": 0.11632904410362244, |
|
"learning_rate": 0.0001550387596899225, |
|
"loss": 0.8937, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.2275115919629057, |
|
"grad_norm": 0.11036121845245361, |
|
"learning_rate": 0.00015497674418604653, |
|
"loss": 0.7435, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.2278207109737249, |
|
"grad_norm": 0.11522484570741653, |
|
"learning_rate": 0.00015491472868217054, |
|
"loss": 0.7337, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.22812982998454404, |
|
"grad_norm": 0.11675230413675308, |
|
"learning_rate": 0.00015485271317829458, |
|
"loss": 0.8623, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.2284389489953632, |
|
"grad_norm": 0.114603690803051, |
|
"learning_rate": 0.00015479069767441862, |
|
"loss": 0.7344, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.2287480680061824, |
|
"grad_norm": 0.1253465712070465, |
|
"learning_rate": 0.00015472868217054263, |
|
"loss": 0.7111, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22905718701700153, |
|
"grad_norm": 0.1126297116279602, |
|
"learning_rate": 0.00015466666666666667, |
|
"loss": 0.7948, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.2293663060278207, |
|
"grad_norm": 0.1282925307750702, |
|
"learning_rate": 0.0001546046511627907, |
|
"loss": 0.8041, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.22967542503863989, |
|
"grad_norm": 0.11763650923967361, |
|
"learning_rate": 0.00015454263565891475, |
|
"loss": 0.7548, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.22998454404945903, |
|
"grad_norm": 0.12256699800491333, |
|
"learning_rate": 0.00015448062015503876, |
|
"loss": 0.8664, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.2302936630602782, |
|
"grad_norm": 0.12481536716222763, |
|
"learning_rate": 0.0001544186046511628, |
|
"loss": 0.8986, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.23060278207109738, |
|
"grad_norm": 0.11068347096443176, |
|
"learning_rate": 0.00015435658914728683, |
|
"loss": 0.8181, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.23091190108191653, |
|
"grad_norm": 0.13589359819889069, |
|
"learning_rate": 0.00015429457364341087, |
|
"loss": 0.6341, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.2312210200927357, |
|
"grad_norm": 0.10451477766036987, |
|
"learning_rate": 0.0001542325581395349, |
|
"loss": 0.7122, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.23153013910355486, |
|
"grad_norm": 0.129670187830925, |
|
"learning_rate": 0.00015417054263565892, |
|
"loss": 0.906, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.23183925811437403, |
|
"grad_norm": 0.11718375980854034, |
|
"learning_rate": 0.00015410852713178293, |
|
"loss": 0.7083, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2321483771251932, |
|
"grad_norm": 0.10383883118629456, |
|
"learning_rate": 0.00015404651162790697, |
|
"loss": 0.8419, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.23245749613601235, |
|
"grad_norm": 0.11427688598632812, |
|
"learning_rate": 0.000153984496124031, |
|
"loss": 0.8832, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.23276661514683153, |
|
"grad_norm": 0.10943648964166641, |
|
"learning_rate": 0.00015392248062015505, |
|
"loss": 0.7645, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.2330757341576507, |
|
"grad_norm": 0.10078372806310654, |
|
"learning_rate": 0.00015386046511627906, |
|
"loss": 0.8181, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.23338485316846985, |
|
"grad_norm": 0.10082436352968216, |
|
"learning_rate": 0.0001537984496124031, |
|
"loss": 0.9162, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.23369397217928903, |
|
"grad_norm": 0.1165718212723732, |
|
"learning_rate": 0.00015373643410852714, |
|
"loss": 0.7518, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.2340030911901082, |
|
"grad_norm": 0.11954308301210403, |
|
"learning_rate": 0.00015367441860465118, |
|
"loss": 0.684, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.23431221020092735, |
|
"grad_norm": 0.12400692701339722, |
|
"learning_rate": 0.0001536124031007752, |
|
"loss": 0.8917, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.23462132921174653, |
|
"grad_norm": 0.10413803160190582, |
|
"learning_rate": 0.00015355038759689923, |
|
"loss": 0.7598, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.23493044822256567, |
|
"grad_norm": 0.11822440475225449, |
|
"learning_rate": 0.00015348837209302327, |
|
"loss": 0.7463, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23523956723338485, |
|
"grad_norm": 0.11296241730451584, |
|
"learning_rate": 0.0001534263565891473, |
|
"loss": 0.7842, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.23554868624420403, |
|
"grad_norm": 0.1275034248828888, |
|
"learning_rate": 0.00015336434108527135, |
|
"loss": 0.7418, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.23585780525502317, |
|
"grad_norm": 0.12039622664451599, |
|
"learning_rate": 0.00015330232558139536, |
|
"loss": 0.7858, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.23616692426584235, |
|
"grad_norm": 0.12102185189723969, |
|
"learning_rate": 0.00015324031007751937, |
|
"loss": 0.7543, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.23647604327666152, |
|
"grad_norm": 0.11714228242635727, |
|
"learning_rate": 0.0001531782945736434, |
|
"loss": 0.7918, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.23678516228748067, |
|
"grad_norm": 0.1297132819890976, |
|
"learning_rate": 0.00015311627906976745, |
|
"loss": 0.7579, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.23709428129829985, |
|
"grad_norm": 0.1544187366962433, |
|
"learning_rate": 0.00015305426356589149, |
|
"loss": 0.9518, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.23740340030911902, |
|
"grad_norm": 0.1462169736623764, |
|
"learning_rate": 0.0001529922480620155, |
|
"loss": 0.8472, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.23771251931993817, |
|
"grad_norm": 0.12060233950614929, |
|
"learning_rate": 0.00015293023255813954, |
|
"loss": 0.7098, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.23802163833075735, |
|
"grad_norm": 0.10534477233886719, |
|
"learning_rate": 0.00015286821705426357, |
|
"loss": 0.692, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2383307573415765, |
|
"grad_norm": 0.12921524047851562, |
|
"learning_rate": 0.0001528062015503876, |
|
"loss": 0.7307, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.23863987635239567, |
|
"grad_norm": 0.11627444624900818, |
|
"learning_rate": 0.00015274418604651165, |
|
"loss": 0.7488, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.23894899536321484, |
|
"grad_norm": 0.12365692108869553, |
|
"learning_rate": 0.00015268217054263566, |
|
"loss": 0.8608, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.239258114374034, |
|
"grad_norm": 0.12448560446500778, |
|
"learning_rate": 0.0001526201550387597, |
|
"loss": 0.744, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.23956723338485317, |
|
"grad_norm": 0.11701495200395584, |
|
"learning_rate": 0.00015255813953488374, |
|
"loss": 0.7273, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.23987635239567234, |
|
"grad_norm": 0.14910434186458588, |
|
"learning_rate": 0.00015249612403100778, |
|
"loss": 0.8649, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.2401854714064915, |
|
"grad_norm": 0.12013334035873413, |
|
"learning_rate": 0.0001524341085271318, |
|
"loss": 0.763, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.24049459041731067, |
|
"grad_norm": 0.13918770849704742, |
|
"learning_rate": 0.0001523720930232558, |
|
"loss": 0.7846, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.24080370942812984, |
|
"grad_norm": 0.1394704133272171, |
|
"learning_rate": 0.00015231007751937984, |
|
"loss": 0.8571, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.241112828438949, |
|
"grad_norm": 0.1315182000398636, |
|
"learning_rate": 0.00015224806201550388, |
|
"loss": 0.9, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.24142194744976817, |
|
"grad_norm": 0.11989207565784454, |
|
"learning_rate": 0.00015218604651162792, |
|
"loss": 0.7906, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.2417310664605873, |
|
"grad_norm": 0.12133822590112686, |
|
"learning_rate": 0.00015212403100775193, |
|
"loss": 0.8559, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.2420401854714065, |
|
"grad_norm": 0.12769554555416107, |
|
"learning_rate": 0.00015206201550387597, |
|
"loss": 0.6996, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.24234930448222566, |
|
"grad_norm": 0.11488951742649078, |
|
"learning_rate": 0.000152, |
|
"loss": 0.667, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.2426584234930448, |
|
"grad_norm": 0.12931592762470245, |
|
"learning_rate": 0.00015193798449612405, |
|
"loss": 0.8865, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.242967542503864, |
|
"grad_norm": 0.1383776217699051, |
|
"learning_rate": 0.00015187596899224809, |
|
"loss": 0.7648, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.24327666151468316, |
|
"grad_norm": 0.13125276565551758, |
|
"learning_rate": 0.0001518139534883721, |
|
"loss": 0.8103, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.2435857805255023, |
|
"grad_norm": 0.11506158858537674, |
|
"learning_rate": 0.00015175193798449614, |
|
"loss": 0.7935, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.24389489953632149, |
|
"grad_norm": 0.1170530617237091, |
|
"learning_rate": 0.00015168992248062017, |
|
"loss": 0.861, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.24420401854714066, |
|
"grad_norm": 0.1097881942987442, |
|
"learning_rate": 0.0001516279069767442, |
|
"loss": 0.8071, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2445131375579598, |
|
"grad_norm": 0.12315784394741058, |
|
"learning_rate": 0.00015156589147286823, |
|
"loss": 0.8053, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.24482225656877898, |
|
"grad_norm": 0.1385902613401413, |
|
"learning_rate": 0.00015150387596899224, |
|
"loss": 0.766, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.24513137557959813, |
|
"grad_norm": 0.12784931063652039, |
|
"learning_rate": 0.00015144186046511628, |
|
"loss": 0.6802, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.2454404945904173, |
|
"grad_norm": 0.12145421653985977, |
|
"learning_rate": 0.00015137984496124031, |
|
"loss": 0.7079, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.24574961360123648, |
|
"grad_norm": 0.11476317793130875, |
|
"learning_rate": 0.00015131782945736435, |
|
"loss": 0.7618, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.24605873261205563, |
|
"grad_norm": 0.1074838861823082, |
|
"learning_rate": 0.0001512558139534884, |
|
"loss": 0.7939, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.2463678516228748, |
|
"grad_norm": 0.11968334019184113, |
|
"learning_rate": 0.0001511937984496124, |
|
"loss": 0.8351, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.24667697063369398, |
|
"grad_norm": 0.11175213009119034, |
|
"learning_rate": 0.00015113178294573644, |
|
"loss": 0.8101, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.24698608964451313, |
|
"grad_norm": 0.125063955783844, |
|
"learning_rate": 0.00015106976744186048, |
|
"loss": 0.769, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.2472952086553323, |
|
"grad_norm": 0.12018170952796936, |
|
"learning_rate": 0.00015100775193798452, |
|
"loss": 0.8536, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24760432766615148, |
|
"grad_norm": 0.1252349615097046, |
|
"learning_rate": 0.00015094573643410853, |
|
"loss": 0.813, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.24791344667697063, |
|
"grad_norm": 0.11219511181116104, |
|
"learning_rate": 0.00015088372093023257, |
|
"loss": 0.749, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.2482225656877898, |
|
"grad_norm": 0.11993087828159332, |
|
"learning_rate": 0.00015082170542635658, |
|
"loss": 0.8223, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.24853168469860895, |
|
"grad_norm": 0.19286490976810455, |
|
"learning_rate": 0.00015075968992248062, |
|
"loss": 0.7728, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.24884080370942813, |
|
"grad_norm": 0.1317611187696457, |
|
"learning_rate": 0.00015069767441860466, |
|
"loss": 0.8072, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.2491499227202473, |
|
"grad_norm": 0.1411685198545456, |
|
"learning_rate": 0.00015063565891472867, |
|
"loss": 0.7539, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.24945904173106645, |
|
"grad_norm": 0.12156263738870621, |
|
"learning_rate": 0.0001505736434108527, |
|
"loss": 0.7394, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.24976816074188563, |
|
"grad_norm": 0.17997561395168304, |
|
"learning_rate": 0.00015051162790697675, |
|
"loss": 0.829, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.2500772797527048, |
|
"grad_norm": 0.11623260378837585, |
|
"learning_rate": 0.0001504496124031008, |
|
"loss": 0.6734, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.250386398763524, |
|
"grad_norm": 0.12638065218925476, |
|
"learning_rate": 0.00015038759689922483, |
|
"loss": 0.743, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2506955177743431, |
|
"grad_norm": 0.11130564659833908, |
|
"learning_rate": 0.00015032558139534884, |
|
"loss": 0.7584, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.25100463678516227, |
|
"grad_norm": 0.11362282186746597, |
|
"learning_rate": 0.00015026356589147288, |
|
"loss": 0.8049, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.2513137557959815, |
|
"grad_norm": 0.12556937336921692, |
|
"learning_rate": 0.00015020155038759692, |
|
"loss": 0.8124, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.2516228748068006, |
|
"grad_norm": 0.12706847488880157, |
|
"learning_rate": 0.00015013953488372095, |
|
"loss": 0.8337, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.25193199381761977, |
|
"grad_norm": 0.1378735899925232, |
|
"learning_rate": 0.00015007751937984497, |
|
"loss": 0.7448, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.252241112828439, |
|
"grad_norm": 0.10803718119859695, |
|
"learning_rate": 0.000150015503875969, |
|
"loss": 0.7921, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.2525502318392581, |
|
"grad_norm": 0.13711851835250854, |
|
"learning_rate": 0.00014995348837209302, |
|
"loss": 0.806, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.25285935085007727, |
|
"grad_norm": 0.11921881139278412, |
|
"learning_rate": 0.00014989147286821705, |
|
"loss": 0.7221, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.2531684698608965, |
|
"grad_norm": 0.12782952189445496, |
|
"learning_rate": 0.0001498294573643411, |
|
"loss": 0.8501, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.2534775888717156, |
|
"grad_norm": 0.12477905303239822, |
|
"learning_rate": 0.0001497674418604651, |
|
"loss": 0.8073, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.25378670788253477, |
|
"grad_norm": 0.1095808669924736, |
|
"learning_rate": 0.00014970542635658914, |
|
"loss": 0.7556, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.2540958268933539, |
|
"grad_norm": 0.11897611618041992, |
|
"learning_rate": 0.00014964341085271318, |
|
"loss": 0.7244, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.2544049459041731, |
|
"grad_norm": 0.12462172657251358, |
|
"learning_rate": 0.00014958139534883722, |
|
"loss": 0.8497, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.25471406491499227, |
|
"grad_norm": 0.11331510543823242, |
|
"learning_rate": 0.00014951937984496126, |
|
"loss": 0.8004, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.2550231839258114, |
|
"grad_norm": 0.1233968660235405, |
|
"learning_rate": 0.00014945736434108527, |
|
"loss": 0.8544, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2553323029366306, |
|
"grad_norm": 0.12359130382537842, |
|
"learning_rate": 0.0001493953488372093, |
|
"loss": 0.7888, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.25564142194744977, |
|
"grad_norm": 0.1450347900390625, |
|
"learning_rate": 0.00014933333333333335, |
|
"loss": 0.8437, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.2559505409582689, |
|
"grad_norm": 0.14557255804538727, |
|
"learning_rate": 0.0001492713178294574, |
|
"loss": 0.8736, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.2562596599690881, |
|
"grad_norm": 0.11424949765205383, |
|
"learning_rate": 0.0001492093023255814, |
|
"loss": 0.8393, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.25656877897990726, |
|
"grad_norm": 0.12477642297744751, |
|
"learning_rate": 0.0001491472868217054, |
|
"loss": 0.8424, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2568778979907264, |
|
"grad_norm": 0.1368608921766281, |
|
"learning_rate": 0.00014908527131782945, |
|
"loss": 0.787, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.2571870170015456, |
|
"grad_norm": 0.12159669399261475, |
|
"learning_rate": 0.0001490232558139535, |
|
"loss": 0.7776, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.25749613601236476, |
|
"grad_norm": 0.1223360225558281, |
|
"learning_rate": 0.00014896124031007753, |
|
"loss": 0.8551, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.2578052550231839, |
|
"grad_norm": 0.11618901044130325, |
|
"learning_rate": 0.00014889922480620157, |
|
"loss": 0.7106, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.2581143740340031, |
|
"grad_norm": 0.16739368438720703, |
|
"learning_rate": 0.00014883720930232558, |
|
"loss": 0.8328, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.25842349304482226, |
|
"grad_norm": 0.13085711002349854, |
|
"learning_rate": 0.00014877519379844962, |
|
"loss": 0.7686, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.2587326120556414, |
|
"grad_norm": 0.11446749418973923, |
|
"learning_rate": 0.00014871317829457366, |
|
"loss": 0.8441, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.2590417310664606, |
|
"grad_norm": 0.13658201694488525, |
|
"learning_rate": 0.0001486511627906977, |
|
"loss": 0.698, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.25935085007727976, |
|
"grad_norm": 0.132501482963562, |
|
"learning_rate": 0.0001485891472868217, |
|
"loss": 0.8407, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.2596599690880989, |
|
"grad_norm": 0.1130068451166153, |
|
"learning_rate": 0.00014852713178294574, |
|
"loss": 0.845, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2599690880989181, |
|
"grad_norm": 0.12523633241653442, |
|
"learning_rate": 0.00014846511627906978, |
|
"loss": 0.7873, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.26027820710973726, |
|
"grad_norm": 0.21349893510341644, |
|
"learning_rate": 0.00014840310077519382, |
|
"loss": 0.9231, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.2605873261205564, |
|
"grad_norm": 0.13039101660251617, |
|
"learning_rate": 0.00014834108527131783, |
|
"loss": 0.7926, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.26089644513137555, |
|
"grad_norm": 0.15471790730953217, |
|
"learning_rate": 0.00014827906976744185, |
|
"loss": 0.7219, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.26120556414219476, |
|
"grad_norm": 0.19512821733951569, |
|
"learning_rate": 0.00014821705426356588, |
|
"loss": 0.7653, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.2615146831530139, |
|
"grad_norm": 0.12139850109815598, |
|
"learning_rate": 0.00014815503875968992, |
|
"loss": 0.7593, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.26182380216383305, |
|
"grad_norm": 0.14223287999629974, |
|
"learning_rate": 0.00014809302325581396, |
|
"loss": 0.8023, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.26213292117465226, |
|
"grad_norm": 0.12319888919591904, |
|
"learning_rate": 0.000148031007751938, |
|
"loss": 0.7967, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.2624420401854714, |
|
"grad_norm": 0.14263351261615753, |
|
"learning_rate": 0.000147968992248062, |
|
"loss": 0.8804, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.26275115919629055, |
|
"grad_norm": 0.11919604986906052, |
|
"learning_rate": 0.00014790697674418605, |
|
"loss": 0.8823, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.26306027820710975, |
|
"grad_norm": 0.13258209824562073, |
|
"learning_rate": 0.0001478449612403101, |
|
"loss": 0.7271, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.2633693972179289, |
|
"grad_norm": 0.11424367874860764, |
|
"learning_rate": 0.00014778294573643413, |
|
"loss": 0.741, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.26367851622874805, |
|
"grad_norm": 0.12254701554775238, |
|
"learning_rate": 0.00014772093023255814, |
|
"loss": 0.7135, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.26398763523956725, |
|
"grad_norm": 0.1269705444574356, |
|
"learning_rate": 0.00014765891472868218, |
|
"loss": 0.7848, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.2642967542503864, |
|
"grad_norm": 0.12801006436347961, |
|
"learning_rate": 0.00014759689922480622, |
|
"loss": 0.7662, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.26460587326120555, |
|
"grad_norm": 0.12760306894779205, |
|
"learning_rate": 0.00014753488372093026, |
|
"loss": 0.8694, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.26491499227202475, |
|
"grad_norm": 0.10601752996444702, |
|
"learning_rate": 0.00014747286821705427, |
|
"loss": 0.8976, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.2652241112828439, |
|
"grad_norm": 0.11408428847789764, |
|
"learning_rate": 0.0001474108527131783, |
|
"loss": 0.8152, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.26553323029366305, |
|
"grad_norm": 0.11453750729560852, |
|
"learning_rate": 0.00014734883720930232, |
|
"loss": 0.7036, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.26584234930448225, |
|
"grad_norm": 0.1266554743051529, |
|
"learning_rate": 0.00014728682170542636, |
|
"loss": 0.8151, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2661514683153014, |
|
"grad_norm": 0.11620058864355087, |
|
"learning_rate": 0.0001472248062015504, |
|
"loss": 0.8732, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.26646058732612055, |
|
"grad_norm": 0.1301504373550415, |
|
"learning_rate": 0.00014716279069767443, |
|
"loss": 0.801, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.26676970633693975, |
|
"grad_norm": 0.11662990599870682, |
|
"learning_rate": 0.00014710077519379845, |
|
"loss": 0.7293, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.2670788253477589, |
|
"grad_norm": 0.13666480779647827, |
|
"learning_rate": 0.00014703875968992248, |
|
"loss": 0.6958, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.26738794435857804, |
|
"grad_norm": 0.12055882066488266, |
|
"learning_rate": 0.00014697674418604652, |
|
"loss": 0.701, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.2676970633693972, |
|
"grad_norm": 0.11646155267953873, |
|
"learning_rate": 0.00014691472868217056, |
|
"loss": 0.8044, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.2680061823802164, |
|
"grad_norm": 0.13146454095840454, |
|
"learning_rate": 0.0001468527131782946, |
|
"loss": 0.8652, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.26831530139103554, |
|
"grad_norm": 0.11620502918958664, |
|
"learning_rate": 0.0001467906976744186, |
|
"loss": 0.815, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.2686244204018547, |
|
"grad_norm": 0.1345463991165161, |
|
"learning_rate": 0.00014672868217054265, |
|
"loss": 0.8375, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.2689335394126739, |
|
"grad_norm": 0.11036497354507446, |
|
"learning_rate": 0.00014666666666666666, |
|
"loss": 0.7705, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.26924265842349304, |
|
"grad_norm": 0.1117565780878067, |
|
"learning_rate": 0.0001466046511627907, |
|
"loss": 0.8034, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.2695517774343122, |
|
"grad_norm": 0.12002184987068176, |
|
"learning_rate": 0.00014654263565891474, |
|
"loss": 0.6356, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.2698608964451314, |
|
"grad_norm": 0.11224567890167236, |
|
"learning_rate": 0.00014648062015503875, |
|
"loss": 0.6498, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.27017001545595054, |
|
"grad_norm": 0.12627927958965302, |
|
"learning_rate": 0.0001464186046511628, |
|
"loss": 0.7575, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.2704791344667697, |
|
"grad_norm": 0.12623284757137299, |
|
"learning_rate": 0.00014635658914728683, |
|
"loss": 0.7678, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.2707882534775889, |
|
"grad_norm": 0.12953105568885803, |
|
"learning_rate": 0.00014629457364341087, |
|
"loss": 0.7884, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.27109737248840804, |
|
"grad_norm": 0.13573655486106873, |
|
"learning_rate": 0.00014623255813953488, |
|
"loss": 0.7948, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.2714064914992272, |
|
"grad_norm": 0.12704919278621674, |
|
"learning_rate": 0.00014617054263565892, |
|
"loss": 0.8609, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.2717156105100464, |
|
"grad_norm": 0.1407371610403061, |
|
"learning_rate": 0.00014610852713178296, |
|
"loss": 0.7222, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.27202472952086554, |
|
"grad_norm": 0.11318446695804596, |
|
"learning_rate": 0.000146046511627907, |
|
"loss": 0.8666, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2723338485316847, |
|
"grad_norm": 0.12386681139469147, |
|
"learning_rate": 0.00014598449612403103, |
|
"loss": 0.7838, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.2726429675425039, |
|
"grad_norm": 0.1111859530210495, |
|
"learning_rate": 0.00014592248062015505, |
|
"loss": 0.7663, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.27295208655332304, |
|
"grad_norm": 0.13240239024162292, |
|
"learning_rate": 0.00014586046511627906, |
|
"loss": 0.7194, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.2732612055641422, |
|
"grad_norm": 0.12492766976356506, |
|
"learning_rate": 0.0001457984496124031, |
|
"loss": 0.8904, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.2735703245749614, |
|
"grad_norm": 0.11625178158283234, |
|
"learning_rate": 0.00014573643410852714, |
|
"loss": 0.892, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.27387944358578054, |
|
"grad_norm": 0.12176412343978882, |
|
"learning_rate": 0.00014567441860465117, |
|
"loss": 0.6733, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.2741885625965997, |
|
"grad_norm": 0.12597818672657013, |
|
"learning_rate": 0.00014561240310077519, |
|
"loss": 0.7992, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.27449768160741883, |
|
"grad_norm": 0.12471161782741547, |
|
"learning_rate": 0.00014555038759689922, |
|
"loss": 0.6884, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.27480680061823803, |
|
"grad_norm": 0.11098852753639221, |
|
"learning_rate": 0.00014548837209302326, |
|
"loss": 0.6681, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.2751159196290572, |
|
"grad_norm": 0.10870758444070816, |
|
"learning_rate": 0.0001454263565891473, |
|
"loss": 0.8065, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.27542503863987633, |
|
"grad_norm": 0.1273547261953354, |
|
"learning_rate": 0.00014536434108527131, |
|
"loss": 0.7186, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.27573415765069553, |
|
"grad_norm": 0.14047206938266754, |
|
"learning_rate": 0.00014530232558139535, |
|
"loss": 0.7146, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.2760432766615147, |
|
"grad_norm": 0.12800012528896332, |
|
"learning_rate": 0.0001452403100775194, |
|
"loss": 0.8439, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.27635239567233383, |
|
"grad_norm": 0.11552654951810837, |
|
"learning_rate": 0.00014517829457364343, |
|
"loss": 0.8069, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.27666151468315303, |
|
"grad_norm": 0.11438272893428802, |
|
"learning_rate": 0.00014511627906976747, |
|
"loss": 0.7723, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.2769706336939722, |
|
"grad_norm": 0.13710401952266693, |
|
"learning_rate": 0.00014505426356589148, |
|
"loss": 0.9134, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.2772797527047913, |
|
"grad_norm": 0.13901177048683167, |
|
"learning_rate": 0.0001449922480620155, |
|
"loss": 0.9356, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.27758887171561053, |
|
"grad_norm": 0.1236179992556572, |
|
"learning_rate": 0.00014493023255813953, |
|
"loss": 0.8756, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.2778979907264297, |
|
"grad_norm": 0.11315148323774338, |
|
"learning_rate": 0.00014486821705426357, |
|
"loss": 0.762, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.2782071097372488, |
|
"grad_norm": 0.14273928105831146, |
|
"learning_rate": 0.0001448062015503876, |
|
"loss": 0.7974, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.27851622874806803, |
|
"grad_norm": 0.12433210015296936, |
|
"learning_rate": 0.00014474418604651162, |
|
"loss": 0.8364, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.2788253477588872, |
|
"grad_norm": 0.13087347149848938, |
|
"learning_rate": 0.00014468217054263566, |
|
"loss": 0.8037, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.2791344667697063, |
|
"grad_norm": 0.1182572990655899, |
|
"learning_rate": 0.0001446201550387597, |
|
"loss": 0.8244, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.2794435857805255, |
|
"grad_norm": 0.11682897806167603, |
|
"learning_rate": 0.00014455813953488374, |
|
"loss": 0.823, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.2797527047913447, |
|
"grad_norm": 0.12286652624607086, |
|
"learning_rate": 0.00014449612403100777, |
|
"loss": 0.8253, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.2800618238021638, |
|
"grad_norm": 0.1269593983888626, |
|
"learning_rate": 0.0001444341085271318, |
|
"loss": 0.573, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.280370942812983, |
|
"grad_norm": 0.11785610765218735, |
|
"learning_rate": 0.00014437209302325583, |
|
"loss": 0.7233, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.2806800618238022, |
|
"grad_norm": 0.1237734779715538, |
|
"learning_rate": 0.00014431007751937986, |
|
"loss": 0.82, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.2809891808346213, |
|
"grad_norm": 0.12301222234964371, |
|
"learning_rate": 0.0001442480620155039, |
|
"loss": 0.8205, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.28129829984544047, |
|
"grad_norm": 0.13933341205120087, |
|
"learning_rate": 0.00014418604651162791, |
|
"loss": 0.8397, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2816074188562597, |
|
"grad_norm": 0.13418903946876526, |
|
"learning_rate": 0.00014412403100775193, |
|
"loss": 0.7872, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.2819165378670788, |
|
"grad_norm": 0.11472947895526886, |
|
"learning_rate": 0.00014406201550387596, |
|
"loss": 0.8015, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.28222565687789797, |
|
"grad_norm": 0.11485429853200912, |
|
"learning_rate": 0.000144, |
|
"loss": 0.803, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.28253477588871717, |
|
"grad_norm": 0.12704961001873016, |
|
"learning_rate": 0.00014393798449612404, |
|
"loss": 0.9043, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.2828438948995363, |
|
"grad_norm": 0.12076624482870102, |
|
"learning_rate": 0.00014387596899224805, |
|
"loss": 0.8441, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.28315301391035547, |
|
"grad_norm": 0.12196331471204758, |
|
"learning_rate": 0.0001438139534883721, |
|
"loss": 0.7216, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.28346213292117467, |
|
"grad_norm": 0.12628835439682007, |
|
"learning_rate": 0.00014375193798449613, |
|
"loss": 0.7536, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.2837712519319938, |
|
"grad_norm": 0.12595216929912567, |
|
"learning_rate": 0.00014368992248062017, |
|
"loss": 0.8964, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.28408037094281297, |
|
"grad_norm": 0.10500409454107285, |
|
"learning_rate": 0.0001436279069767442, |
|
"loss": 0.8401, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.28438948995363217, |
|
"grad_norm": 0.12638381123542786, |
|
"learning_rate": 0.00014356589147286822, |
|
"loss": 0.796, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2846986089644513, |
|
"grad_norm": 0.14120124280452728, |
|
"learning_rate": 0.00014350387596899226, |
|
"loss": 0.8018, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.28500772797527046, |
|
"grad_norm": 0.12073471397161484, |
|
"learning_rate": 0.0001434418604651163, |
|
"loss": 0.8203, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.28531684698608967, |
|
"grad_norm": 0.10508771985769272, |
|
"learning_rate": 0.0001433798449612403, |
|
"loss": 0.7027, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.2856259659969088, |
|
"grad_norm": 0.12278520315885544, |
|
"learning_rate": 0.00014331782945736435, |
|
"loss": 0.7783, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.28593508500772796, |
|
"grad_norm": 0.10832314193248749, |
|
"learning_rate": 0.00014325581395348836, |
|
"loss": 0.7251, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.28624420401854717, |
|
"grad_norm": 0.12536031007766724, |
|
"learning_rate": 0.0001431937984496124, |
|
"loss": 0.7232, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.2865533230293663, |
|
"grad_norm": 0.143062561750412, |
|
"learning_rate": 0.00014313178294573644, |
|
"loss": 0.7258, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.28686244204018546, |
|
"grad_norm": 0.11144435405731201, |
|
"learning_rate": 0.00014306976744186048, |
|
"loss": 0.7562, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.2871715610510046, |
|
"grad_norm": 0.12134916335344315, |
|
"learning_rate": 0.00014300775193798452, |
|
"loss": 0.7109, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.2874806800618238, |
|
"grad_norm": 0.1274683177471161, |
|
"learning_rate": 0.00014294573643410853, |
|
"loss": 0.8072, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.28778979907264296, |
|
"grad_norm": 0.13728466629981995, |
|
"learning_rate": 0.00014288372093023257, |
|
"loss": 0.7393, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.2880989180834621, |
|
"grad_norm": 0.13668936491012573, |
|
"learning_rate": 0.0001428217054263566, |
|
"loss": 0.8375, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.2884080370942813, |
|
"grad_norm": 0.14077217876911163, |
|
"learning_rate": 0.00014275968992248064, |
|
"loss": 0.7897, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.28871715610510046, |
|
"grad_norm": 0.13246707618236542, |
|
"learning_rate": 0.00014269767441860465, |
|
"loss": 0.8227, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.2890262751159196, |
|
"grad_norm": 0.11966849118471146, |
|
"learning_rate": 0.0001426356589147287, |
|
"loss": 0.8414, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2893353941267388, |
|
"grad_norm": 0.12089065462350845, |
|
"learning_rate": 0.00014257364341085273, |
|
"loss": 0.7711, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.28964451313755796, |
|
"grad_norm": 0.1274116039276123, |
|
"learning_rate": 0.00014251162790697674, |
|
"loss": 0.6885, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.2899536321483771, |
|
"grad_norm": 0.13811667263507843, |
|
"learning_rate": 0.00014244961240310078, |
|
"loss": 0.6859, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.2902627511591963, |
|
"grad_norm": 0.1394423097372055, |
|
"learning_rate": 0.0001423875968992248, |
|
"loss": 0.7569, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.29057187017001546, |
|
"grad_norm": 0.11885955184698105, |
|
"learning_rate": 0.00014232558139534883, |
|
"loss": 0.7627, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2908809891808346, |
|
"grad_norm": 0.12418286502361298, |
|
"learning_rate": 0.00014226356589147287, |
|
"loss": 0.7303, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.2911901081916538, |
|
"grad_norm": 0.12816603481769562, |
|
"learning_rate": 0.0001422015503875969, |
|
"loss": 0.747, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.29149922720247295, |
|
"grad_norm": 0.10445892065763474, |
|
"learning_rate": 0.00014213953488372095, |
|
"loss": 0.7347, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.2918083462132921, |
|
"grad_norm": 0.13160108029842377, |
|
"learning_rate": 0.00014207751937984496, |
|
"loss": 0.7364, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.2921174652241113, |
|
"grad_norm": 0.116938017308712, |
|
"learning_rate": 0.000142015503875969, |
|
"loss": 0.8129, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.29242658423493045, |
|
"grad_norm": 0.13014064729213715, |
|
"learning_rate": 0.00014195348837209304, |
|
"loss": 0.8444, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.2927357032457496, |
|
"grad_norm": 0.12289168685674667, |
|
"learning_rate": 0.00014189147286821708, |
|
"loss": 0.8332, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.2930448222565688, |
|
"grad_norm": 0.11512966454029083, |
|
"learning_rate": 0.0001418294573643411, |
|
"loss": 0.7877, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.29335394126738795, |
|
"grad_norm": 0.11333896219730377, |
|
"learning_rate": 0.00014176744186046513, |
|
"loss": 0.733, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.2936630602782071, |
|
"grad_norm": 0.1227252408862114, |
|
"learning_rate": 0.00014170542635658914, |
|
"loss": 0.7675, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.29397217928902625, |
|
"grad_norm": 0.11178798228502274, |
|
"learning_rate": 0.00014164341085271318, |
|
"loss": 0.6638, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.29428129829984545, |
|
"grad_norm": 0.11883097887039185, |
|
"learning_rate": 0.00014158139534883722, |
|
"loss": 0.7909, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.2945904173106646, |
|
"grad_norm": 0.1324370801448822, |
|
"learning_rate": 0.00014151937984496126, |
|
"loss": 0.717, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.29489953632148375, |
|
"grad_norm": 0.1309555619955063, |
|
"learning_rate": 0.00014145736434108527, |
|
"loss": 0.7538, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.29520865533230295, |
|
"grad_norm": 0.12644729018211365, |
|
"learning_rate": 0.0001413953488372093, |
|
"loss": 0.6985, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.2955177743431221, |
|
"grad_norm": 0.10941684246063232, |
|
"learning_rate": 0.00014133333333333334, |
|
"loss": 0.8046, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.29582689335394124, |
|
"grad_norm": 0.1376543492078781, |
|
"learning_rate": 0.00014127131782945738, |
|
"loss": 0.7915, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.29613601236476045, |
|
"grad_norm": 0.14741478860378265, |
|
"learning_rate": 0.0001412093023255814, |
|
"loss": 0.9085, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.2964451313755796, |
|
"grad_norm": 0.12666583061218262, |
|
"learning_rate": 0.00014114728682170543, |
|
"loss": 0.793, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.29675425038639874, |
|
"grad_norm": 0.12379190325737, |
|
"learning_rate": 0.00014108527131782947, |
|
"loss": 0.8256, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29706336939721795, |
|
"grad_norm": 0.11084531992673874, |
|
"learning_rate": 0.0001410232558139535, |
|
"loss": 0.796, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.2973724884080371, |
|
"grad_norm": 0.12731553614139557, |
|
"learning_rate": 0.00014096124031007752, |
|
"loss": 0.68, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.29768160741885624, |
|
"grad_norm": 0.1280289590358734, |
|
"learning_rate": 0.00014089922480620153, |
|
"loss": 0.8189, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.29799072642967545, |
|
"grad_norm": 0.12680752575397491, |
|
"learning_rate": 0.00014083720930232557, |
|
"loss": 0.8237, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.2982998454404946, |
|
"grad_norm": 0.13440905511379242, |
|
"learning_rate": 0.0001407751937984496, |
|
"loss": 0.7518, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.29860896445131374, |
|
"grad_norm": 0.112543486058712, |
|
"learning_rate": 0.00014071317829457365, |
|
"loss": 0.7725, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.29891808346213294, |
|
"grad_norm": 0.126234233379364, |
|
"learning_rate": 0.0001406511627906977, |
|
"loss": 0.6715, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.2992272024729521, |
|
"grad_norm": 0.13545869290828705, |
|
"learning_rate": 0.0001405891472868217, |
|
"loss": 0.7503, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.29953632148377124, |
|
"grad_norm": 0.12928856909275055, |
|
"learning_rate": 0.00014052713178294574, |
|
"loss": 0.7488, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.29984544049459044, |
|
"grad_norm": 0.1343362033367157, |
|
"learning_rate": 0.00014046511627906978, |
|
"loss": 0.7984, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3001545595054096, |
|
"grad_norm": 0.13031892478466034, |
|
"learning_rate": 0.00014040310077519382, |
|
"loss": 0.7409, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.30046367851622874, |
|
"grad_norm": 0.12235540896654129, |
|
"learning_rate": 0.00014034108527131783, |
|
"loss": 0.8135, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.3007727975270479, |
|
"grad_norm": 0.1327418088912964, |
|
"learning_rate": 0.00014027906976744187, |
|
"loss": 0.8359, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.3010819165378671, |
|
"grad_norm": 0.12208300828933716, |
|
"learning_rate": 0.0001402170542635659, |
|
"loss": 0.8496, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.30139103554868624, |
|
"grad_norm": 0.11996152997016907, |
|
"learning_rate": 0.00014015503875968995, |
|
"loss": 0.7629, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3017001545595054, |
|
"grad_norm": 0.12489623576402664, |
|
"learning_rate": 0.00014009302325581398, |
|
"loss": 0.7716, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.3020092735703246, |
|
"grad_norm": 0.11581925302743912, |
|
"learning_rate": 0.00014003100775193797, |
|
"loss": 0.881, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.30231839258114374, |
|
"grad_norm": 0.12472864985466003, |
|
"learning_rate": 0.000139968992248062, |
|
"loss": 0.7534, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.3026275115919629, |
|
"grad_norm": 0.11038485169410706, |
|
"learning_rate": 0.00013990697674418605, |
|
"loss": 0.6802, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.3029366306027821, |
|
"grad_norm": 0.12170151621103287, |
|
"learning_rate": 0.00013984496124031008, |
|
"loss": 0.7849, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.30324574961360123, |
|
"grad_norm": 0.12583118677139282, |
|
"learning_rate": 0.00013978294573643412, |
|
"loss": 0.6808, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.3035548686244204, |
|
"grad_norm": 0.12267141789197922, |
|
"learning_rate": 0.00013972093023255813, |
|
"loss": 0.8894, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.3038639876352396, |
|
"grad_norm": 0.12336152046918869, |
|
"learning_rate": 0.00013965891472868217, |
|
"loss": 0.7588, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.30417310664605873, |
|
"grad_norm": 0.13550814986228943, |
|
"learning_rate": 0.0001395968992248062, |
|
"loss": 0.7588, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.3044822256568779, |
|
"grad_norm": 0.12295803427696228, |
|
"learning_rate": 0.00013953488372093025, |
|
"loss": 0.8387, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.3047913446676971, |
|
"grad_norm": 0.12663382291793823, |
|
"learning_rate": 0.00013947286821705426, |
|
"loss": 0.7513, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.30510046367851623, |
|
"grad_norm": 0.1203293651342392, |
|
"learning_rate": 0.0001394108527131783, |
|
"loss": 0.8078, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.3054095826893354, |
|
"grad_norm": 0.13784480094909668, |
|
"learning_rate": 0.00013934883720930234, |
|
"loss": 0.8303, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.3057187017001546, |
|
"grad_norm": 0.13811154663562775, |
|
"learning_rate": 0.00013928682170542638, |
|
"loss": 0.7439, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.30602782071097373, |
|
"grad_norm": 0.1402239203453064, |
|
"learning_rate": 0.0001392248062015504, |
|
"loss": 0.835, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3063369397217929, |
|
"grad_norm": 0.1344003528356552, |
|
"learning_rate": 0.00013916279069767443, |
|
"loss": 0.7973, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.3066460587326121, |
|
"grad_norm": 0.10925968736410141, |
|
"learning_rate": 0.00013910077519379844, |
|
"loss": 0.7921, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.30695517774343123, |
|
"grad_norm": 0.12327813357114792, |
|
"learning_rate": 0.00013903875968992248, |
|
"loss": 0.8377, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.3072642967542504, |
|
"grad_norm": 0.12558946013450623, |
|
"learning_rate": 0.00013897674418604652, |
|
"loss": 0.6776, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.3075734157650695, |
|
"grad_norm": 0.1224449872970581, |
|
"learning_rate": 0.00013891472868217056, |
|
"loss": 0.7526, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.3078825347758887, |
|
"grad_norm": 0.11907488107681274, |
|
"learning_rate": 0.00013885271317829457, |
|
"loss": 0.752, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.3081916537867079, |
|
"grad_norm": 0.12344703823328018, |
|
"learning_rate": 0.0001387906976744186, |
|
"loss": 0.7244, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.308500772797527, |
|
"grad_norm": 0.10863327980041504, |
|
"learning_rate": 0.00013872868217054265, |
|
"loss": 0.7937, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.3088098918083462, |
|
"grad_norm": 0.11824218183755875, |
|
"learning_rate": 0.00013866666666666669, |
|
"loss": 0.8872, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.3091190108191654, |
|
"grad_norm": 0.11574976146221161, |
|
"learning_rate": 0.00013860465116279072, |
|
"loss": 0.7953, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 3235, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 1.2994588591340913e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|