|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996163752945689, |
|
"eval_steps": 500, |
|
"global_step": 1140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008768564695566394, |
|
"grad_norm": 3.8354088038954104, |
|
"learning_rate": 5.0000000000000004e-08, |
|
"loss": 0.8827, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0017537129391132788, |
|
"grad_norm": 3.854484535409196, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 0.8816, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0026305694086699184, |
|
"grad_norm": 3.871894613191576, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 0.8801, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0035074258782265577, |
|
"grad_norm": 4.015192807591418, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.8778, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004384282347783197, |
|
"grad_norm": 3.8093684146898625, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 0.8711, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005261138817339837, |
|
"grad_norm": 3.8610474891808035, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 0.8774, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0061379952868964765, |
|
"grad_norm": 3.7967273935876027, |
|
"learning_rate": 3.5000000000000004e-07, |
|
"loss": 0.8669, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007014851756453115, |
|
"grad_norm": 3.6775126026184703, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.8605, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007891708226009755, |
|
"grad_norm": 3.8340713786963674, |
|
"learning_rate": 4.5000000000000003e-07, |
|
"loss": 0.8735, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008768564695566394, |
|
"grad_norm": 3.7479501504503463, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.8843, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009645421165123035, |
|
"grad_norm": 3.6317203672346734, |
|
"learning_rate": 5.5e-07, |
|
"loss": 0.8637, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010522277634679673, |
|
"grad_norm": 3.512911808429478, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.8649, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011399134104236312, |
|
"grad_norm": 3.5056527507086486, |
|
"learning_rate": 6.5e-07, |
|
"loss": 0.8514, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.012275990573792953, |
|
"grad_norm": 3.150666271402955, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 0.844, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013152847043349592, |
|
"grad_norm": 2.92608322776606, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.8382, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01402970351290623, |
|
"grad_norm": 3.0202821236842246, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.8419, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014906559982462871, |
|
"grad_norm": 2.9419098502173515, |
|
"learning_rate": 8.500000000000001e-07, |
|
"loss": 0.8362, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01578341645201951, |
|
"grad_norm": 2.7926753613205433, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 0.825, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01666027292157615, |
|
"grad_norm": 2.4471605086654096, |
|
"learning_rate": 9.500000000000001e-07, |
|
"loss": 0.7904, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.017537129391132788, |
|
"grad_norm": 1.8918627793518321, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7968, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.018413985860689427, |
|
"grad_norm": 1.713937144355921, |
|
"learning_rate": 1.0500000000000001e-06, |
|
"loss": 0.7828, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01929084233024607, |
|
"grad_norm": 1.4451729443975803, |
|
"learning_rate": 1.1e-06, |
|
"loss": 0.78, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.020167698799802708, |
|
"grad_norm": 1.0866085026095695, |
|
"learning_rate": 1.1500000000000002e-06, |
|
"loss": 0.7807, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.021044555269359347, |
|
"grad_norm": 1.022948274017058, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.758, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021921411738915986, |
|
"grad_norm": 0.976807823206357, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.7783, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.022798268208472625, |
|
"grad_norm": 2.5562950715507275, |
|
"learning_rate": 1.3e-06, |
|
"loss": 0.7815, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.023675124678029263, |
|
"grad_norm": 1.7956421603987698, |
|
"learning_rate": 1.3500000000000002e-06, |
|
"loss": 0.759, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.024551981147585906, |
|
"grad_norm": 1.3622207205502601, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 0.7551, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.025428837617142545, |
|
"grad_norm": 0.9842354354215974, |
|
"learning_rate": 1.45e-06, |
|
"loss": 0.7625, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.026305694086699184, |
|
"grad_norm": 0.7679059075291825, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7513, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.027182550556255822, |
|
"grad_norm": 0.709914193704945, |
|
"learning_rate": 1.5500000000000002e-06, |
|
"loss": 0.7309, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02805940702581246, |
|
"grad_norm": 0.5711165082308596, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.7358, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0289362634953691, |
|
"grad_norm": 0.6732600160748007, |
|
"learning_rate": 1.6500000000000003e-06, |
|
"loss": 0.746, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.029813119964925743, |
|
"grad_norm": 0.519623223105866, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 0.7408, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03068997643448238, |
|
"grad_norm": 0.4967853550459734, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.7284, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03156683290403902, |
|
"grad_norm": 0.4558474579400771, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 0.7337, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03244368937359566, |
|
"grad_norm": 0.5187940265183988, |
|
"learning_rate": 1.85e-06, |
|
"loss": 0.7459, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0333205458431523, |
|
"grad_norm": 0.46649520265418404, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 0.7238, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03419740231270894, |
|
"grad_norm": 0.4621107554297482, |
|
"learning_rate": 1.9500000000000004e-06, |
|
"loss": 0.7243, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.035074258782265576, |
|
"grad_norm": 0.4493723053379801, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7395, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.035951115251822215, |
|
"grad_norm": 0.4196555282378131, |
|
"learning_rate": 2.05e-06, |
|
"loss": 0.7371, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.036827971721378853, |
|
"grad_norm": 0.3836269605839978, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 0.7172, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0377048281909355, |
|
"grad_norm": 0.38056806308372326, |
|
"learning_rate": 2.15e-06, |
|
"loss": 0.7163, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03858168466049214, |
|
"grad_norm": 0.3561457145290273, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.6986, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03945854113004878, |
|
"grad_norm": 0.3723153937166507, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.7154, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.040335397599605416, |
|
"grad_norm": 0.36630666691552083, |
|
"learning_rate": 2.3000000000000004e-06, |
|
"loss": 0.7201, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.041212254069162055, |
|
"grad_norm": 0.3482645877468935, |
|
"learning_rate": 2.35e-06, |
|
"loss": 0.7213, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.042089110538718694, |
|
"grad_norm": 0.35892687942862245, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.7167, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04296596700827533, |
|
"grad_norm": 0.3353339246028489, |
|
"learning_rate": 2.4500000000000003e-06, |
|
"loss": 0.7154, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04384282347783197, |
|
"grad_norm": 0.3327601533732165, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7149, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04471967994738861, |
|
"grad_norm": 0.31047839521651305, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.7022, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04559653641694525, |
|
"grad_norm": 0.3140715368302216, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.7024, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04647339288650189, |
|
"grad_norm": 0.3070088967685052, |
|
"learning_rate": 2.6500000000000005e-06, |
|
"loss": 0.7116, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04735024935605853, |
|
"grad_norm": 0.29688015435603987, |
|
"learning_rate": 2.7000000000000004e-06, |
|
"loss": 0.7068, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04822710582561517, |
|
"grad_norm": 0.312569173156887, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 0.708, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04910396229517181, |
|
"grad_norm": 0.3212155084231398, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.6895, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04998081876472845, |
|
"grad_norm": 0.30141336197411556, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.714, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05085767523428509, |
|
"grad_norm": 0.2678799864293998, |
|
"learning_rate": 2.9e-06, |
|
"loss": 0.6864, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05173453170384173, |
|
"grad_norm": 0.2763602360222888, |
|
"learning_rate": 2.95e-06, |
|
"loss": 0.6955, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05261138817339837, |
|
"grad_norm": 0.2960116429627635, |
|
"learning_rate": 3e-06, |
|
"loss": 0.69, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.053488244642955006, |
|
"grad_norm": 0.3126860845251708, |
|
"learning_rate": 3.05e-06, |
|
"loss": 0.7008, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.054365101112511645, |
|
"grad_norm": 0.2684477743603555, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 0.7065, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.055241957582068284, |
|
"grad_norm": 0.2831279869843839, |
|
"learning_rate": 3.1500000000000003e-06, |
|
"loss": 0.6908, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05611881405162492, |
|
"grad_norm": 0.28914936357131454, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.6847, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05699567052118156, |
|
"grad_norm": 0.2664694092243829, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.6975, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0578725269907382, |
|
"grad_norm": 0.2670931319561963, |
|
"learning_rate": 3.3000000000000006e-06, |
|
"loss": 0.6957, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.058749383460294846, |
|
"grad_norm": 0.25481964712146327, |
|
"learning_rate": 3.3500000000000005e-06, |
|
"loss": 0.6907, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.059626239929851485, |
|
"grad_norm": 0.2917224006438053, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.6889, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.060503096399408124, |
|
"grad_norm": 0.27794604488949715, |
|
"learning_rate": 3.45e-06, |
|
"loss": 0.6815, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06137995286896476, |
|
"grad_norm": 0.24963117175569036, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.6883, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0622568093385214, |
|
"grad_norm": 0.2893133633641976, |
|
"learning_rate": 3.5500000000000003e-06, |
|
"loss": 0.6792, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06313366580807804, |
|
"grad_norm": 0.2826308836822568, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.7028, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06401052227763468, |
|
"grad_norm": 0.2640935466003184, |
|
"learning_rate": 3.65e-06, |
|
"loss": 0.6916, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06488737874719132, |
|
"grad_norm": 0.24415033172628944, |
|
"learning_rate": 3.7e-06, |
|
"loss": 0.6839, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06576423521674796, |
|
"grad_norm": 0.3112401087242733, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.7021, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0666410916863046, |
|
"grad_norm": 0.2875281112172732, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.6929, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06751794815586123, |
|
"grad_norm": 0.2874092373703745, |
|
"learning_rate": 3.85e-06, |
|
"loss": 0.6788, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06839480462541787, |
|
"grad_norm": 0.26681007920352356, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 0.6881, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06927166109497451, |
|
"grad_norm": 0.25207102904583284, |
|
"learning_rate": 3.95e-06, |
|
"loss": 0.6852, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07014851756453115, |
|
"grad_norm": 0.2747607135538642, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6864, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07102537403408779, |
|
"grad_norm": 0.26361955079133653, |
|
"learning_rate": 4.05e-06, |
|
"loss": 0.685, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07190223050364443, |
|
"grad_norm": 0.33310729956901713, |
|
"learning_rate": 4.1e-06, |
|
"loss": 0.6803, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07277908697320107, |
|
"grad_norm": 0.2453664087918243, |
|
"learning_rate": 4.15e-06, |
|
"loss": 0.6761, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07365594344275771, |
|
"grad_norm": 0.2908734202511105, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.6931, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07453279991231436, |
|
"grad_norm": 0.2786719287704165, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.6874, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.075409656381871, |
|
"grad_norm": 0.271512101257661, |
|
"learning_rate": 4.3e-06, |
|
"loss": 0.6775, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07628651285142764, |
|
"grad_norm": 0.2947304767213564, |
|
"learning_rate": 4.350000000000001e-06, |
|
"loss": 0.6865, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07716336932098428, |
|
"grad_norm": 0.25160176616217883, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.6785, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07804022579054092, |
|
"grad_norm": 0.32459153781403244, |
|
"learning_rate": 4.450000000000001e-06, |
|
"loss": 0.6773, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07891708226009755, |
|
"grad_norm": 0.2487028104553641, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.6812, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07979393872965419, |
|
"grad_norm": 0.2925038544983962, |
|
"learning_rate": 4.5500000000000005e-06, |
|
"loss": 0.6791, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08067079519921083, |
|
"grad_norm": 0.28005649996035475, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.6704, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08154765166876747, |
|
"grad_norm": 0.3264776457957641, |
|
"learning_rate": 4.65e-06, |
|
"loss": 0.6772, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08242450813832411, |
|
"grad_norm": 0.2533079586966528, |
|
"learning_rate": 4.7e-06, |
|
"loss": 0.6792, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08330136460788075, |
|
"grad_norm": 0.25651763696878965, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.6607, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08417822107743739, |
|
"grad_norm": 0.2546288408258964, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.6669, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08505507754699403, |
|
"grad_norm": 0.25215356470309513, |
|
"learning_rate": 4.85e-06, |
|
"loss": 0.6846, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08593193401655067, |
|
"grad_norm": 0.28631928221309494, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.6717, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0868087904861073, |
|
"grad_norm": 0.27212851090592044, |
|
"learning_rate": 4.95e-06, |
|
"loss": 0.6804, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.08768564695566394, |
|
"grad_norm": 0.29348118762199116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.669, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08856250342522058, |
|
"grad_norm": 0.30678288402779474, |
|
"learning_rate": 4.999998880733363e-06, |
|
"loss": 0.6631, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08943935989477722, |
|
"grad_norm": 0.3011120934546324, |
|
"learning_rate": 4.999995522934454e-06, |
|
"loss": 0.679, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09031621636433386, |
|
"grad_norm": 0.31706623056013666, |
|
"learning_rate": 4.9999899266062804e-06, |
|
"loss": 0.6723, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0911930728338905, |
|
"grad_norm": 0.3120471729111099, |
|
"learning_rate": 4.999982091753851e-06, |
|
"loss": 0.6613, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09206992930344714, |
|
"grad_norm": 0.2905613969012575, |
|
"learning_rate": 4.999972018384183e-06, |
|
"loss": 0.6611, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09294678577300378, |
|
"grad_norm": 0.28925318733211003, |
|
"learning_rate": 4.999959706506297e-06, |
|
"loss": 0.6695, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09382364224256041, |
|
"grad_norm": 0.28085987028825943, |
|
"learning_rate": 4.999945156131215e-06, |
|
"loss": 0.6502, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09470049871211705, |
|
"grad_norm": 0.30971852568333075, |
|
"learning_rate": 4.9999283672719665e-06, |
|
"loss": 0.672, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0955773551816737, |
|
"grad_norm": 0.32363303577963826, |
|
"learning_rate": 4.999909339943585e-06, |
|
"loss": 0.673, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09645421165123035, |
|
"grad_norm": 0.29549042512555623, |
|
"learning_rate": 4.999888074163108e-06, |
|
"loss": 0.6591, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09733106812078698, |
|
"grad_norm": 0.33514032815726946, |
|
"learning_rate": 4.999864569949576e-06, |
|
"loss": 0.6673, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09820792459034362, |
|
"grad_norm": 0.3092438114721304, |
|
"learning_rate": 4.999838827324036e-06, |
|
"loss": 0.6641, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09908478105990026, |
|
"grad_norm": 0.35403209993563217, |
|
"learning_rate": 4.999810846309539e-06, |
|
"loss": 0.6597, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0999616375294569, |
|
"grad_norm": 0.2964896689419525, |
|
"learning_rate": 4.999780626931136e-06, |
|
"loss": 0.67, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.10083849399901354, |
|
"grad_norm": 0.3484706075226941, |
|
"learning_rate": 4.999748169215891e-06, |
|
"loss": 0.6745, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.10171535046857018, |
|
"grad_norm": 0.33505074735981694, |
|
"learning_rate": 4.999713473192863e-06, |
|
"loss": 0.6591, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.10259220693812682, |
|
"grad_norm": 0.27082614750107925, |
|
"learning_rate": 4.999676538893121e-06, |
|
"loss": 0.6621, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.10346906340768346, |
|
"grad_norm": 0.3506965847465109, |
|
"learning_rate": 4.999637366349736e-06, |
|
"loss": 0.6733, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1043459198772401, |
|
"grad_norm": 0.27422374937685745, |
|
"learning_rate": 4.999595955597784e-06, |
|
"loss": 0.655, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.10522277634679673, |
|
"grad_norm": 0.33620430443399, |
|
"learning_rate": 4.999552306674345e-06, |
|
"loss": 0.6755, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10609963281635337, |
|
"grad_norm": 0.2837804889330797, |
|
"learning_rate": 4.999506419618502e-06, |
|
"loss": 0.6579, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.10697648928591001, |
|
"grad_norm": 0.37952040871876175, |
|
"learning_rate": 4.999458294471342e-06, |
|
"loss": 0.6692, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10785334575546665, |
|
"grad_norm": 0.2690864525050558, |
|
"learning_rate": 4.99940793127596e-06, |
|
"loss": 0.6494, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10873020222502329, |
|
"grad_norm": 0.3635002166658454, |
|
"learning_rate": 4.999355330077449e-06, |
|
"loss": 0.6611, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10960705869457993, |
|
"grad_norm": 0.29302462194523843, |
|
"learning_rate": 4.999300490922911e-06, |
|
"loss": 0.6526, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11048391516413657, |
|
"grad_norm": 0.3058787861740299, |
|
"learning_rate": 4.999243413861447e-06, |
|
"loss": 0.659, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1113607716336932, |
|
"grad_norm": 0.332548080761125, |
|
"learning_rate": 4.9991840989441665e-06, |
|
"loss": 0.6659, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11223762810324985, |
|
"grad_norm": 0.29432766212441813, |
|
"learning_rate": 4.999122546224181e-06, |
|
"loss": 0.6447, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11311448457280648, |
|
"grad_norm": 0.29523416391879537, |
|
"learning_rate": 4.999058755756605e-06, |
|
"loss": 0.6587, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11399134104236312, |
|
"grad_norm": 0.32423165831626255, |
|
"learning_rate": 4.998992727598557e-06, |
|
"loss": 0.6564, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11486819751191976, |
|
"grad_norm": 0.34859884756639065, |
|
"learning_rate": 4.99892446180916e-06, |
|
"loss": 0.653, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1157450539814764, |
|
"grad_norm": 0.30133447855543133, |
|
"learning_rate": 4.99885395844954e-06, |
|
"loss": 0.647, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.11662191045103305, |
|
"grad_norm": 0.3600942516700186, |
|
"learning_rate": 4.998781217582827e-06, |
|
"loss": 0.6581, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.11749876692058969, |
|
"grad_norm": 0.29960571448156953, |
|
"learning_rate": 4.998706239274153e-06, |
|
"loss": 0.6623, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11837562339014633, |
|
"grad_norm": 0.2992208264370026, |
|
"learning_rate": 4.998629023590656e-06, |
|
"loss": 0.6538, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11925247985970297, |
|
"grad_norm": 0.36522912538035174, |
|
"learning_rate": 4.998549570601475e-06, |
|
"loss": 0.6566, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12012933632925961, |
|
"grad_norm": 0.2988448634710597, |
|
"learning_rate": 4.998467880377754e-06, |
|
"loss": 0.673, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.12100619279881625, |
|
"grad_norm": 0.32912250244162505, |
|
"learning_rate": 4.998383952992639e-06, |
|
"loss": 0.6482, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12188304926837289, |
|
"grad_norm": 0.37178534793553225, |
|
"learning_rate": 4.998297788521279e-06, |
|
"loss": 0.6546, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12275990573792953, |
|
"grad_norm": 0.28062782891296695, |
|
"learning_rate": 4.998209387040829e-06, |
|
"loss": 0.6527, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12363676220748616, |
|
"grad_norm": 0.33723394797540485, |
|
"learning_rate": 4.998118748630443e-06, |
|
"loss": 0.6391, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1245136186770428, |
|
"grad_norm": 0.2834572318610097, |
|
"learning_rate": 4.99802587337128e-06, |
|
"loss": 0.6443, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.12539047514659943, |
|
"grad_norm": 0.321495289367043, |
|
"learning_rate": 4.997930761346502e-06, |
|
"loss": 0.6507, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.12626733161615608, |
|
"grad_norm": 0.3419910878952078, |
|
"learning_rate": 4.997833412641274e-06, |
|
"loss": 0.6543, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1271441880857127, |
|
"grad_norm": 0.28772221770446305, |
|
"learning_rate": 4.9977338273427625e-06, |
|
"loss": 0.6522, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12802104455526936, |
|
"grad_norm": 0.29706932671928316, |
|
"learning_rate": 4.997632005540139e-06, |
|
"loss": 0.6677, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.128897901024826, |
|
"grad_norm": 0.29918610448467253, |
|
"learning_rate": 4.997527947324573e-06, |
|
"loss": 0.6475, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12977475749438264, |
|
"grad_norm": 0.33103419851925103, |
|
"learning_rate": 4.997421652789243e-06, |
|
"loss": 0.67, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1306516139639393, |
|
"grad_norm": 0.27012500247528487, |
|
"learning_rate": 4.9973131220293255e-06, |
|
"loss": 0.647, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13152847043349591, |
|
"grad_norm": 0.297677443804652, |
|
"learning_rate": 4.9972023551419995e-06, |
|
"loss": 0.6519, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13240532690305257, |
|
"grad_norm": 0.27386600476743567, |
|
"learning_rate": 4.997089352226448e-06, |
|
"loss": 0.6562, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1332821833726092, |
|
"grad_norm": 0.3025435071675535, |
|
"learning_rate": 4.996974113383854e-06, |
|
"loss": 0.6485, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.13415903984216584, |
|
"grad_norm": 0.2928572797854547, |
|
"learning_rate": 4.996856638717406e-06, |
|
"loss": 0.641, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.13503589631172247, |
|
"grad_norm": 0.28232417223789874, |
|
"learning_rate": 4.996736928332292e-06, |
|
"loss": 0.6358, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.13591275278127912, |
|
"grad_norm": 0.33877806926878856, |
|
"learning_rate": 4.9966149823357e-06, |
|
"loss": 0.6558, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.13678960925083575, |
|
"grad_norm": 0.27274924720742, |
|
"learning_rate": 4.996490800836825e-06, |
|
"loss": 0.6553, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1376664657203924, |
|
"grad_norm": 0.3145522020468823, |
|
"learning_rate": 4.996364383946859e-06, |
|
"loss": 0.6458, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.13854332218994903, |
|
"grad_norm": 0.28298098932682264, |
|
"learning_rate": 4.996235731778997e-06, |
|
"loss": 0.6467, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13942017865950568, |
|
"grad_norm": 0.3289393703740858, |
|
"learning_rate": 4.996104844448438e-06, |
|
"loss": 0.6522, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1402970351290623, |
|
"grad_norm": 0.3242491154179804, |
|
"learning_rate": 4.995971722072379e-06, |
|
"loss": 0.6579, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14117389159861896, |
|
"grad_norm": 0.350063023556927, |
|
"learning_rate": 4.995836364770018e-06, |
|
"loss": 0.6639, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.14205074806817558, |
|
"grad_norm": 0.26800977502782475, |
|
"learning_rate": 4.995698772662558e-06, |
|
"loss": 0.6564, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14292760453773223, |
|
"grad_norm": 0.37123972908338404, |
|
"learning_rate": 4.9955589458732e-06, |
|
"loss": 0.6521, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.14380446100728886, |
|
"grad_norm": 0.25568101611736427, |
|
"learning_rate": 4.995416884527147e-06, |
|
"loss": 0.6489, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1446813174768455, |
|
"grad_norm": 0.3502739955437778, |
|
"learning_rate": 4.9952725887516015e-06, |
|
"loss": 0.6389, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.14555817394640214, |
|
"grad_norm": 0.2695951493086468, |
|
"learning_rate": 4.99512605867577e-06, |
|
"loss": 0.6409, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1464350304159588, |
|
"grad_norm": 0.33224546665642934, |
|
"learning_rate": 4.994977294430856e-06, |
|
"loss": 0.6478, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.14731188688551541, |
|
"grad_norm": 0.26336591640433304, |
|
"learning_rate": 4.994826296150064e-06, |
|
"loss": 0.6416, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.14818874335507207, |
|
"grad_norm": 0.3158628283831438, |
|
"learning_rate": 4.9946730639686025e-06, |
|
"loss": 0.6397, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.14906559982462872, |
|
"grad_norm": 0.29572803602407627, |
|
"learning_rate": 4.9945175980236745e-06, |
|
"loss": 0.6356, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14994245629418534, |
|
"grad_norm": 0.3344536076519792, |
|
"learning_rate": 4.99435989845449e-06, |
|
"loss": 0.6494, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.150819312763742, |
|
"grad_norm": 0.2811402499936693, |
|
"learning_rate": 4.994199965402252e-06, |
|
"loss": 0.6472, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.15169616923329862, |
|
"grad_norm": 0.30351530565920815, |
|
"learning_rate": 4.994037799010168e-06, |
|
"loss": 0.6514, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.15257302570285527, |
|
"grad_norm": 0.2667020904201129, |
|
"learning_rate": 4.993873399423445e-06, |
|
"loss": 0.642, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1534498821724119, |
|
"grad_norm": 0.3062654941965369, |
|
"learning_rate": 4.993706766789287e-06, |
|
"loss": 0.6398, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15432673864196855, |
|
"grad_norm": 0.28228507467929365, |
|
"learning_rate": 4.993537901256898e-06, |
|
"loss": 0.6446, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.15520359511152518, |
|
"grad_norm": 0.3157908119401443, |
|
"learning_rate": 4.993366802977486e-06, |
|
"loss": 0.645, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.15608045158108183, |
|
"grad_norm": 0.29612114085869035, |
|
"learning_rate": 4.993193472104253e-06, |
|
"loss": 0.6379, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.15695730805063846, |
|
"grad_norm": 0.31715005105530436, |
|
"learning_rate": 4.9930179087924e-06, |
|
"loss": 0.6446, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1578341645201951, |
|
"grad_norm": 0.3010974405602859, |
|
"learning_rate": 4.992840113199131e-06, |
|
"loss": 0.6273, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15871102098975173, |
|
"grad_norm": 0.3097310667014726, |
|
"learning_rate": 4.992660085483645e-06, |
|
"loss": 0.6477, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.15958787745930839, |
|
"grad_norm": 0.25428924204211556, |
|
"learning_rate": 4.992477825807142e-06, |
|
"loss": 0.6562, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.160464733928865, |
|
"grad_norm": 0.30870425916577926, |
|
"learning_rate": 4.992293334332821e-06, |
|
"loss": 0.6528, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.16134159039842166, |
|
"grad_norm": 0.2915653234864446, |
|
"learning_rate": 4.992106611225875e-06, |
|
"loss": 0.6491, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1622184468679783, |
|
"grad_norm": 0.3032380988277513, |
|
"learning_rate": 4.991917656653501e-06, |
|
"loss": 0.6523, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16309530333753494, |
|
"grad_norm": 0.2986663700583823, |
|
"learning_rate": 4.991726470784891e-06, |
|
"loss": 0.6333, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.16397215980709157, |
|
"grad_norm": 0.28321065505069615, |
|
"learning_rate": 4.9915330537912346e-06, |
|
"loss": 0.6411, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.16484901627664822, |
|
"grad_norm": 0.358610834369166, |
|
"learning_rate": 4.99133740584572e-06, |
|
"loss": 0.6404, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.16572587274620484, |
|
"grad_norm": 0.30976208589225795, |
|
"learning_rate": 4.991139527123534e-06, |
|
"loss": 0.6405, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1666027292157615, |
|
"grad_norm": 0.34149502314365515, |
|
"learning_rate": 4.990939417801859e-06, |
|
"loss": 0.6384, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16747958568531812, |
|
"grad_norm": 0.2959951500432587, |
|
"learning_rate": 4.9907370780598754e-06, |
|
"loss": 0.6469, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.16835644215487477, |
|
"grad_norm": 0.3302476980977895, |
|
"learning_rate": 4.990532508078761e-06, |
|
"loss": 0.6359, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1692332986244314, |
|
"grad_norm": 0.3944297035939378, |
|
"learning_rate": 4.990325708041691e-06, |
|
"loss": 0.6502, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.17011015509398805, |
|
"grad_norm": 0.360231124267091, |
|
"learning_rate": 4.990116678133836e-06, |
|
"loss": 0.6424, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1709870115635447, |
|
"grad_norm": 0.33832741778437936, |
|
"learning_rate": 4.989905418542366e-06, |
|
"loss": 0.6352, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.17186386803310133, |
|
"grad_norm": 0.36238295597291414, |
|
"learning_rate": 4.989691929456443e-06, |
|
"loss": 0.6499, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.17274072450265798, |
|
"grad_norm": 0.32684488652867627, |
|
"learning_rate": 4.98947621106723e-06, |
|
"loss": 0.6475, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1736175809722146, |
|
"grad_norm": 0.2757346118610075, |
|
"learning_rate": 4.989258263567884e-06, |
|
"loss": 0.6355, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.17449443744177126, |
|
"grad_norm": 0.29755713041423115, |
|
"learning_rate": 4.989038087153556e-06, |
|
"loss": 0.6336, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.17537129391132789, |
|
"grad_norm": 0.29151765698243737, |
|
"learning_rate": 4.988815682021398e-06, |
|
"loss": 0.6471, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17624815038088454, |
|
"grad_norm": 0.28111823253643253, |
|
"learning_rate": 4.988591048370552e-06, |
|
"loss": 0.6407, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.17712500685044116, |
|
"grad_norm": 0.2656165957748681, |
|
"learning_rate": 4.988364186402159e-06, |
|
"loss": 0.6326, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.17800186331999782, |
|
"grad_norm": 0.3028986715129606, |
|
"learning_rate": 4.988135096319355e-06, |
|
"loss": 0.6348, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.17887871978955444, |
|
"grad_norm": 0.29924585956112065, |
|
"learning_rate": 4.987903778327269e-06, |
|
"loss": 0.6488, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1797555762591111, |
|
"grad_norm": 0.2747438588784908, |
|
"learning_rate": 4.987670232633027e-06, |
|
"loss": 0.6353, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18063243272866772, |
|
"grad_norm": 0.30887265845064044, |
|
"learning_rate": 4.987434459445748e-06, |
|
"loss": 0.6428, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.18150928919822437, |
|
"grad_norm": 0.3193061834187564, |
|
"learning_rate": 4.987196458976548e-06, |
|
"loss": 0.6467, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.182386145667781, |
|
"grad_norm": 0.2769424032566695, |
|
"learning_rate": 4.9869562314385335e-06, |
|
"loss": 0.6407, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.18326300213733765, |
|
"grad_norm": 0.3406015148633883, |
|
"learning_rate": 4.986713777046809e-06, |
|
"loss": 0.6443, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.18413985860689427, |
|
"grad_norm": 0.271878066659463, |
|
"learning_rate": 4.986469096018472e-06, |
|
"loss": 0.6328, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18501671507645093, |
|
"grad_norm": 0.2987491049335003, |
|
"learning_rate": 4.9862221885726115e-06, |
|
"loss": 0.6478, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.18589357154600755, |
|
"grad_norm": 0.3087618217189243, |
|
"learning_rate": 4.985973054930313e-06, |
|
"loss": 0.6363, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1867704280155642, |
|
"grad_norm": 0.28612704652497223, |
|
"learning_rate": 4.985721695314653e-06, |
|
"loss": 0.6409, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.18764728448512083, |
|
"grad_norm": 0.26033127989473615, |
|
"learning_rate": 4.985468109950704e-06, |
|
"loss": 0.6495, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.18852414095467748, |
|
"grad_norm": 0.29345494621139656, |
|
"learning_rate": 4.985212299065528e-06, |
|
"loss": 0.648, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1894009974242341, |
|
"grad_norm": 0.30811406203792147, |
|
"learning_rate": 4.984954262888182e-06, |
|
"loss": 0.639, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.19027785389379076, |
|
"grad_norm": 0.3312828084167346, |
|
"learning_rate": 4.9846940016497146e-06, |
|
"loss": 0.6403, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1911547103633474, |
|
"grad_norm": 0.29106752415257064, |
|
"learning_rate": 4.984431515583169e-06, |
|
"loss": 0.6457, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.19203156683290404, |
|
"grad_norm": 0.2950307203873666, |
|
"learning_rate": 4.984166804923576e-06, |
|
"loss": 0.6366, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1929084233024607, |
|
"grad_norm": 0.33001978484003053, |
|
"learning_rate": 4.983899869907963e-06, |
|
"loss": 0.6519, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.19378527977201732, |
|
"grad_norm": 0.25712182858786903, |
|
"learning_rate": 4.983630710775346e-06, |
|
"loss": 0.6302, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.19466213624157397, |
|
"grad_norm": 0.33700258932320354, |
|
"learning_rate": 4.983359327766735e-06, |
|
"loss": 0.6382, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1955389927111306, |
|
"grad_norm": 0.3195952299259763, |
|
"learning_rate": 4.983085721125128e-06, |
|
"loss": 0.6408, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.19641584918068725, |
|
"grad_norm": 0.2820582636542398, |
|
"learning_rate": 4.982809891095519e-06, |
|
"loss": 0.6196, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.19729270565024387, |
|
"grad_norm": 0.30343326038998625, |
|
"learning_rate": 4.982531837924887e-06, |
|
"loss": 0.6361, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19816956211980052, |
|
"grad_norm": 0.2724213298701267, |
|
"learning_rate": 4.9822515618622055e-06, |
|
"loss": 0.6455, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.19904641858935715, |
|
"grad_norm": 0.28433275446155476, |
|
"learning_rate": 4.9819690631584375e-06, |
|
"loss": 0.6329, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.1999232750589138, |
|
"grad_norm": 0.2641523923467397, |
|
"learning_rate": 4.981684342066536e-06, |
|
"loss": 0.6301, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.20080013152847043, |
|
"grad_norm": 0.29243768749633176, |
|
"learning_rate": 4.9813973988414454e-06, |
|
"loss": 0.6369, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.20167698799802708, |
|
"grad_norm": 0.27139535071517695, |
|
"learning_rate": 4.981108233740096e-06, |
|
"loss": 0.6279, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2025538444675837, |
|
"grad_norm": 0.27525475223350887, |
|
"learning_rate": 4.980816847021412e-06, |
|
"loss": 0.6429, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.20343070093714036, |
|
"grad_norm": 0.3427701449667448, |
|
"learning_rate": 4.980523238946304e-06, |
|
"loss": 0.6438, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.20430755740669698, |
|
"grad_norm": 0.2574596630900604, |
|
"learning_rate": 4.980227409777673e-06, |
|
"loss": 0.6278, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.20518441387625364, |
|
"grad_norm": 0.3069435432493287, |
|
"learning_rate": 4.9799293597804086e-06, |
|
"loss": 0.645, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.20606127034581026, |
|
"grad_norm": 0.2861360169316533, |
|
"learning_rate": 4.979629089221387e-06, |
|
"loss": 0.646, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2069381268153669, |
|
"grad_norm": 0.258606470239814, |
|
"learning_rate": 4.9793265983694775e-06, |
|
"loss": 0.638, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.20781498328492354, |
|
"grad_norm": 0.2852233202848665, |
|
"learning_rate": 4.9790218874955325e-06, |
|
"loss": 0.6233, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.2086918397544802, |
|
"grad_norm": 0.27593128237727194, |
|
"learning_rate": 4.978714956872394e-06, |
|
"loss": 0.64, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.20956869622403682, |
|
"grad_norm": 0.2721892419938629, |
|
"learning_rate": 4.978405806774892e-06, |
|
"loss": 0.6242, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.21044555269359347, |
|
"grad_norm": 0.26477694173686633, |
|
"learning_rate": 4.978094437479843e-06, |
|
"loss": 0.6409, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2113224091631501, |
|
"grad_norm": 0.29511740452877416, |
|
"learning_rate": 4.977780849266054e-06, |
|
"loss": 0.6397, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.21219926563270675, |
|
"grad_norm": 0.3137075106480887, |
|
"learning_rate": 4.977465042414314e-06, |
|
"loss": 0.6185, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2130761221022634, |
|
"grad_norm": 0.2841757272525764, |
|
"learning_rate": 4.9771470172073985e-06, |
|
"loss": 0.6394, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.21395297857182002, |
|
"grad_norm": 0.289636229771129, |
|
"learning_rate": 4.976826773930076e-06, |
|
"loss": 0.6314, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.21482983504137668, |
|
"grad_norm": 0.30163996035868273, |
|
"learning_rate": 4.976504312869093e-06, |
|
"loss": 0.6347, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2157066915109333, |
|
"grad_norm": 0.261372963985366, |
|
"learning_rate": 4.976179634313187e-06, |
|
"loss": 0.6378, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.21658354798048995, |
|
"grad_norm": 0.3277256326536918, |
|
"learning_rate": 4.97585273855308e-06, |
|
"loss": 0.6326, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.21746040445004658, |
|
"grad_norm": 0.2609300415027874, |
|
"learning_rate": 4.975523625881478e-06, |
|
"loss": 0.643, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.21833726091960323, |
|
"grad_norm": 0.360435554160976, |
|
"learning_rate": 4.975192296593072e-06, |
|
"loss": 0.6301, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.21921411738915986, |
|
"grad_norm": 0.33545569496984357, |
|
"learning_rate": 4.97485875098454e-06, |
|
"loss": 0.6263, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2200909738587165, |
|
"grad_norm": 0.3109257543138659, |
|
"learning_rate": 4.974522989354544e-06, |
|
"loss": 0.6409, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.22096783032827313, |
|
"grad_norm": 0.324992218124581, |
|
"learning_rate": 4.974185012003727e-06, |
|
"loss": 0.634, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2218446867978298, |
|
"grad_norm": 0.32486130027399085, |
|
"learning_rate": 4.97384481923472e-06, |
|
"loss": 0.6164, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2227215432673864, |
|
"grad_norm": 0.37258515700556377, |
|
"learning_rate": 4.973502411352136e-06, |
|
"loss": 0.6387, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.22359839973694307, |
|
"grad_norm": 0.29043553996012594, |
|
"learning_rate": 4.97315778866257e-06, |
|
"loss": 0.6287, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2244752562064997, |
|
"grad_norm": 0.36257038619483317, |
|
"learning_rate": 4.972810951474605e-06, |
|
"loss": 0.6343, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.22535211267605634, |
|
"grad_norm": 0.2772793728031826, |
|
"learning_rate": 4.972461900098801e-06, |
|
"loss": 0.6289, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.22622896914561297, |
|
"grad_norm": 0.35920004083908574, |
|
"learning_rate": 4.972110634847703e-06, |
|
"loss": 0.6532, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.22710582561516962, |
|
"grad_norm": 0.29471007707943336, |
|
"learning_rate": 4.97175715603584e-06, |
|
"loss": 0.6431, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.22798268208472625, |
|
"grad_norm": 0.3052965075835166, |
|
"learning_rate": 4.971401463979722e-06, |
|
"loss": 0.6373, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2288595385542829, |
|
"grad_norm": 0.27702925326859024, |
|
"learning_rate": 4.971043558997839e-06, |
|
"loss": 0.6254, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.22973639502383952, |
|
"grad_norm": 0.30905022457424325, |
|
"learning_rate": 4.9706834414106645e-06, |
|
"loss": 0.6377, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.23061325149339618, |
|
"grad_norm": 0.2820956276882666, |
|
"learning_rate": 4.970321111540652e-06, |
|
"loss": 0.6303, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2314901079629528, |
|
"grad_norm": 0.3394900289735489, |
|
"learning_rate": 4.969956569712238e-06, |
|
"loss": 0.6394, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.23236696443250945, |
|
"grad_norm": 0.26647926556067275, |
|
"learning_rate": 4.969589816251837e-06, |
|
"loss": 0.6202, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2332438209020661, |
|
"grad_norm": 0.3281231898594553, |
|
"learning_rate": 4.9692208514878445e-06, |
|
"loss": 0.6343, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.23412067737162273, |
|
"grad_norm": 0.32675488207496506, |
|
"learning_rate": 4.968849675750638e-06, |
|
"loss": 0.6106, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.23499753384117938, |
|
"grad_norm": 0.28838375524590465, |
|
"learning_rate": 4.9684762893725715e-06, |
|
"loss": 0.6191, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.235874390310736, |
|
"grad_norm": 0.3568027126734991, |
|
"learning_rate": 4.968100692687981e-06, |
|
"loss": 0.6492, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.23675124678029266, |
|
"grad_norm": 0.28443576918161984, |
|
"learning_rate": 4.967722886033181e-06, |
|
"loss": 0.6332, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2376281032498493, |
|
"grad_norm": 0.34347891151295074, |
|
"learning_rate": 4.967342869746463e-06, |
|
"loss": 0.6302, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.23850495971940594, |
|
"grad_norm": 0.26856199334324765, |
|
"learning_rate": 4.9669606441681005e-06, |
|
"loss": 0.6253, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.23938181618896257, |
|
"grad_norm": 0.28792821400673596, |
|
"learning_rate": 4.966576209640344e-06, |
|
"loss": 0.617, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.24025867265851922, |
|
"grad_norm": 0.2749481611356667, |
|
"learning_rate": 4.966189566507418e-06, |
|
"loss": 0.6386, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.24113552912807584, |
|
"grad_norm": 0.2499995559979677, |
|
"learning_rate": 4.965800715115531e-06, |
|
"loss": 0.6281, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2420123855976325, |
|
"grad_norm": 0.2802197876098476, |
|
"learning_rate": 4.965409655812865e-06, |
|
"loss": 0.6356, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.24288924206718912, |
|
"grad_norm": 0.27112050232805884, |
|
"learning_rate": 4.965016388949579e-06, |
|
"loss": 0.6366, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.24376609853674577, |
|
"grad_norm": 0.28745747065199806, |
|
"learning_rate": 4.96462091487781e-06, |
|
"loss": 0.6245, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2446429550063024, |
|
"grad_norm": 0.29635776688822807, |
|
"learning_rate": 4.96422323395167e-06, |
|
"loss": 0.6413, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.24551981147585905, |
|
"grad_norm": 0.3376283192201481, |
|
"learning_rate": 4.963823346527249e-06, |
|
"loss": 0.6322, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24639666794541568, |
|
"grad_norm": 0.30520044326595835, |
|
"learning_rate": 4.96342125296261e-06, |
|
"loss": 0.6173, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.24727352441497233, |
|
"grad_norm": 0.34476437566601653, |
|
"learning_rate": 4.963016953617794e-06, |
|
"loss": 0.6172, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.24815038088452895, |
|
"grad_norm": 0.2611205789369605, |
|
"learning_rate": 4.962610448854816e-06, |
|
"loss": 0.6246, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2490272373540856, |
|
"grad_norm": 0.3294938430549001, |
|
"learning_rate": 4.962201739037665e-06, |
|
"loss": 0.632, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.24990409382364223, |
|
"grad_norm": 0.2716869569081184, |
|
"learning_rate": 4.961790824532306e-06, |
|
"loss": 0.6285, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.25078095029319886, |
|
"grad_norm": 0.33415021484488, |
|
"learning_rate": 4.961377705706677e-06, |
|
"loss": 0.6295, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2516578067627555, |
|
"grad_norm": 0.3077857421614378, |
|
"learning_rate": 4.960962382930691e-06, |
|
"loss": 0.6273, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.25253466323231216, |
|
"grad_norm": 0.3027918805177667, |
|
"learning_rate": 4.960544856576232e-06, |
|
"loss": 0.629, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2534115197018688, |
|
"grad_norm": 0.2916258020649895, |
|
"learning_rate": 4.960125127017159e-06, |
|
"loss": 0.6427, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2542883761714254, |
|
"grad_norm": 0.3152484231550671, |
|
"learning_rate": 4.959703194629304e-06, |
|
"loss": 0.6348, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.25516523264098206, |
|
"grad_norm": 0.32915709407999866, |
|
"learning_rate": 4.959279059790471e-06, |
|
"loss": 0.632, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2560420891105387, |
|
"grad_norm": 0.2817567268029023, |
|
"learning_rate": 4.958852722880435e-06, |
|
"loss": 0.6112, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.25691894558009537, |
|
"grad_norm": 0.3538236182060425, |
|
"learning_rate": 4.958424184280946e-06, |
|
"loss": 0.6241, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.257795802049652, |
|
"grad_norm": 0.2864183700965389, |
|
"learning_rate": 4.957993444375719e-06, |
|
"loss": 0.6277, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2586726585192086, |
|
"grad_norm": 0.33515303575483923, |
|
"learning_rate": 4.95756050355045e-06, |
|
"loss": 0.6277, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2595495149887653, |
|
"grad_norm": 0.31975746198582533, |
|
"learning_rate": 4.957125362192794e-06, |
|
"loss": 0.6114, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2604263714583219, |
|
"grad_norm": 0.34329553758734277, |
|
"learning_rate": 4.956688020692386e-06, |
|
"loss": 0.6457, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2613032279278786, |
|
"grad_norm": 0.3122307785419701, |
|
"learning_rate": 4.956248479440827e-06, |
|
"loss": 0.6272, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2621800843974352, |
|
"grad_norm": 0.3126439049869492, |
|
"learning_rate": 4.955806738831687e-06, |
|
"loss": 0.634, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.26305694086699183, |
|
"grad_norm": 0.30725526373905826, |
|
"learning_rate": 4.955362799260507e-06, |
|
"loss": 0.6269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2639337973365485, |
|
"grad_norm": 0.2952615284346605, |
|
"learning_rate": 4.954916661124797e-06, |
|
"loss": 0.6129, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.26481065380610513, |
|
"grad_norm": 0.3284069744839045, |
|
"learning_rate": 4.954468324824035e-06, |
|
"loss": 0.613, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.26568751027566173, |
|
"grad_norm": 0.34051928196991404, |
|
"learning_rate": 4.954017790759666e-06, |
|
"loss": 0.6192, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2665643667452184, |
|
"grad_norm": 0.30608255552211977, |
|
"learning_rate": 4.953565059335104e-06, |
|
"loss": 0.6244, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.26744122321477504, |
|
"grad_norm": 0.31501722301988566, |
|
"learning_rate": 4.953110130955733e-06, |
|
"loss": 0.6236, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2683180796843317, |
|
"grad_norm": 0.2978345978834651, |
|
"learning_rate": 4.9526530060289e-06, |
|
"loss": 0.6254, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2691949361538883, |
|
"grad_norm": 0.2935986604058687, |
|
"learning_rate": 4.952193684963922e-06, |
|
"loss": 0.6113, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.27007179262344494, |
|
"grad_norm": 0.294670736028252, |
|
"learning_rate": 4.95173216817208e-06, |
|
"loss": 0.6335, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2709486490930016, |
|
"grad_norm": 0.2746280487759909, |
|
"learning_rate": 4.951268456066623e-06, |
|
"loss": 0.6211, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.27182550556255825, |
|
"grad_norm": 0.2823209312944346, |
|
"learning_rate": 4.950802549062764e-06, |
|
"loss": 0.621, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.27270236203211484, |
|
"grad_norm": 0.2811005060766513, |
|
"learning_rate": 4.950334447577685e-06, |
|
"loss": 0.6291, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2735792185016715, |
|
"grad_norm": 0.31377780747479117, |
|
"learning_rate": 4.9498641520305264e-06, |
|
"loss": 0.6308, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.27445607497122815, |
|
"grad_norm": 0.263859895152384, |
|
"learning_rate": 4.949391662842401e-06, |
|
"loss": 0.6238, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2753329314407848, |
|
"grad_norm": 0.3124591272767995, |
|
"learning_rate": 4.948916980436379e-06, |
|
"loss": 0.6254, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.27620978791034145, |
|
"grad_norm": 0.2762091249470148, |
|
"learning_rate": 4.948440105237499e-06, |
|
"loss": 0.6297, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.27708664437989805, |
|
"grad_norm": 0.30510467983773004, |
|
"learning_rate": 4.947961037672761e-06, |
|
"loss": 0.6301, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2779635008494547, |
|
"grad_norm": 0.2894218681866538, |
|
"learning_rate": 4.947479778171127e-06, |
|
"loss": 0.6215, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.27884035731901136, |
|
"grad_norm": 0.278604444379188, |
|
"learning_rate": 4.946996327163526e-06, |
|
"loss": 0.6193, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.279717213788568, |
|
"grad_norm": 0.29226196825962947, |
|
"learning_rate": 4.946510685082844e-06, |
|
"loss": 0.6205, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2805940702581246, |
|
"grad_norm": 0.2956824922950759, |
|
"learning_rate": 4.946022852363932e-06, |
|
"loss": 0.6238, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.28147092672768126, |
|
"grad_norm": 0.28796938907697983, |
|
"learning_rate": 4.945532829443604e-06, |
|
"loss": 0.6176, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2823477831972379, |
|
"grad_norm": 0.2688847498978228, |
|
"learning_rate": 4.945040616760629e-06, |
|
"loss": 0.6178, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.28322463966679456, |
|
"grad_norm": 0.3167327299209847, |
|
"learning_rate": 4.944546214755744e-06, |
|
"loss": 0.6315, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.28410149613635116, |
|
"grad_norm": 0.28346482132020456, |
|
"learning_rate": 4.9440496238716415e-06, |
|
"loss": 0.6281, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2849783526059078, |
|
"grad_norm": 0.2862108698161924, |
|
"learning_rate": 4.943550844552978e-06, |
|
"loss": 0.6445, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.28585520907546447, |
|
"grad_norm": 0.3168994194030117, |
|
"learning_rate": 4.943049877246363e-06, |
|
"loss": 0.6336, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2867320655450211, |
|
"grad_norm": 0.3098419113094991, |
|
"learning_rate": 4.942546722400373e-06, |
|
"loss": 0.6194, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2876089220145777, |
|
"grad_norm": 0.3076330226750193, |
|
"learning_rate": 4.942041380465539e-06, |
|
"loss": 0.6332, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.28848577848413437, |
|
"grad_norm": 0.3073675940253473, |
|
"learning_rate": 4.941533851894349e-06, |
|
"loss": 0.6329, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.289362634953691, |
|
"grad_norm": 0.27407015238515836, |
|
"learning_rate": 4.9410241371412525e-06, |
|
"loss": 0.6292, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2902394914232477, |
|
"grad_norm": 0.3233677059379673, |
|
"learning_rate": 4.9405122366626545e-06, |
|
"loss": 0.6407, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2911163478928043, |
|
"grad_norm": 0.3056326849325438, |
|
"learning_rate": 4.939998150916917e-06, |
|
"loss": 0.6314, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2919932043623609, |
|
"grad_norm": 0.3140138519054107, |
|
"learning_rate": 4.93948188036436e-06, |
|
"loss": 0.6583, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2928700608319176, |
|
"grad_norm": 0.2967689552064628, |
|
"learning_rate": 4.938963425467258e-06, |
|
"loss": 0.6349, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.29374691730147423, |
|
"grad_norm": 0.35320572702474673, |
|
"learning_rate": 4.938442786689843e-06, |
|
"loss": 0.6248, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.29462377377103083, |
|
"grad_norm": 0.2958836632865014, |
|
"learning_rate": 4.9379199644983025e-06, |
|
"loss": 0.6255, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2955006302405875, |
|
"grad_norm": 0.3054952399371344, |
|
"learning_rate": 4.937394959360777e-06, |
|
"loss": 0.6119, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.29637748671014413, |
|
"grad_norm": 0.34308383177638463, |
|
"learning_rate": 4.9368677717473645e-06, |
|
"loss": 0.6468, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2972543431797008, |
|
"grad_norm": 0.2648620374237178, |
|
"learning_rate": 4.936338402130115e-06, |
|
"loss": 0.6203, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.29813119964925744, |
|
"grad_norm": 0.2976099930186866, |
|
"learning_rate": 4.935806850983034e-06, |
|
"loss": 0.6348, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.29900805611881404, |
|
"grad_norm": 0.285144357181017, |
|
"learning_rate": 4.935273118782078e-06, |
|
"loss": 0.6115, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2998849125883707, |
|
"grad_norm": 0.3079688238524965, |
|
"learning_rate": 4.934737206005159e-06, |
|
"loss": 0.6254, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.30076176905792734, |
|
"grad_norm": 0.27719094781494596, |
|
"learning_rate": 4.93419911313214e-06, |
|
"loss": 0.6386, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.301638625527484, |
|
"grad_norm": 0.29796636665366355, |
|
"learning_rate": 4.933658840644837e-06, |
|
"loss": 0.6268, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.3025154819970406, |
|
"grad_norm": 0.27509893042636935, |
|
"learning_rate": 4.933116389027017e-06, |
|
"loss": 0.621, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.30339233846659724, |
|
"grad_norm": 0.31224342373584874, |
|
"learning_rate": 4.932571758764398e-06, |
|
"loss": 0.6312, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3042691949361539, |
|
"grad_norm": 0.2689144896057607, |
|
"learning_rate": 4.93202495034465e-06, |
|
"loss": 0.6115, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.30514605140571055, |
|
"grad_norm": 0.2558266510993566, |
|
"learning_rate": 4.931475964257391e-06, |
|
"loss": 0.6245, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.30602290787526715, |
|
"grad_norm": 0.25500762407211314, |
|
"learning_rate": 4.930924800994192e-06, |
|
"loss": 0.6091, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3068997643448238, |
|
"grad_norm": 0.2717131638453367, |
|
"learning_rate": 4.9303714610485705e-06, |
|
"loss": 0.6281, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.30777662081438045, |
|
"grad_norm": 0.2729400616989181, |
|
"learning_rate": 4.929815944915997e-06, |
|
"loss": 0.6083, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3086534772839371, |
|
"grad_norm": 0.26000631857019024, |
|
"learning_rate": 4.929258253093885e-06, |
|
"loss": 0.6198, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.3095303337534937, |
|
"grad_norm": 0.2740884453189882, |
|
"learning_rate": 4.9286983860816e-06, |
|
"loss": 0.6338, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.31040719022305036, |
|
"grad_norm": 0.27150990388252366, |
|
"learning_rate": 4.928136344380457e-06, |
|
"loss": 0.6162, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.311284046692607, |
|
"grad_norm": 0.26286571771385, |
|
"learning_rate": 4.9275721284937115e-06, |
|
"loss": 0.629, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.31216090316216366, |
|
"grad_norm": 0.27510252961865267, |
|
"learning_rate": 4.9270057389265734e-06, |
|
"loss": 0.633, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.31303775963172026, |
|
"grad_norm": 0.2825214790660817, |
|
"learning_rate": 4.926437176186193e-06, |
|
"loss": 0.6263, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.3139146161012769, |
|
"grad_norm": 0.29292375908331497, |
|
"learning_rate": 4.92586644078167e-06, |
|
"loss": 0.6313, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.31479147257083356, |
|
"grad_norm": 0.2760563004495057, |
|
"learning_rate": 4.925293533224049e-06, |
|
"loss": 0.6174, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3156683290403902, |
|
"grad_norm": 0.29078508943452525, |
|
"learning_rate": 4.924718454026318e-06, |
|
"loss": 0.6156, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3165451855099468, |
|
"grad_norm": 0.2878769173523044, |
|
"learning_rate": 4.924141203703412e-06, |
|
"loss": 0.6047, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.31742204197950347, |
|
"grad_norm": 0.27485843884417593, |
|
"learning_rate": 4.923561782772206e-06, |
|
"loss": 0.6293, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.3182988984490601, |
|
"grad_norm": 0.2865164028316351, |
|
"learning_rate": 4.922980191751524e-06, |
|
"loss": 0.6269, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.31917575491861677, |
|
"grad_norm": 0.27991173694279825, |
|
"learning_rate": 4.922396431162129e-06, |
|
"loss": 0.6143, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.3200526113881734, |
|
"grad_norm": 0.279639353480309, |
|
"learning_rate": 4.921810501526728e-06, |
|
"loss": 0.635, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.32092946785773, |
|
"grad_norm": 0.2830142803081013, |
|
"learning_rate": 4.921222403369971e-06, |
|
"loss": 0.6157, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3218063243272867, |
|
"grad_norm": 0.2684155306717856, |
|
"learning_rate": 4.920632137218447e-06, |
|
"loss": 0.6294, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.3226831807968433, |
|
"grad_norm": 0.2983455576981931, |
|
"learning_rate": 4.920039703600691e-06, |
|
"loss": 0.624, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3235600372664, |
|
"grad_norm": 0.2948947231333358, |
|
"learning_rate": 4.9194451030471735e-06, |
|
"loss": 0.6102, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3244368937359566, |
|
"grad_norm": 0.2826890911442374, |
|
"learning_rate": 4.918848336090309e-06, |
|
"loss": 0.6236, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.32531375020551323, |
|
"grad_norm": 0.32269493597939386, |
|
"learning_rate": 4.91824940326445e-06, |
|
"loss": 0.6139, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3261906066750699, |
|
"grad_norm": 0.2734983777513044, |
|
"learning_rate": 4.91764830510589e-06, |
|
"loss": 0.6166, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.32706746314462654, |
|
"grad_norm": 0.36983262498880637, |
|
"learning_rate": 4.917045042152858e-06, |
|
"loss": 0.6186, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.32794431961418313, |
|
"grad_norm": 0.2751996219950251, |
|
"learning_rate": 4.916439614945527e-06, |
|
"loss": 0.6412, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3288211760837398, |
|
"grad_norm": 0.319865198714037, |
|
"learning_rate": 4.915832024026002e-06, |
|
"loss": 0.627, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.32969803255329644, |
|
"grad_norm": 0.29823421688781576, |
|
"learning_rate": 4.915222269938328e-06, |
|
"loss": 0.6181, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3305748890228531, |
|
"grad_norm": 0.27335542421500575, |
|
"learning_rate": 4.914610353228488e-06, |
|
"loss": 0.6202, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3314517454924097, |
|
"grad_norm": 0.3824213724235341, |
|
"learning_rate": 4.913996274444401e-06, |
|
"loss": 0.608, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.33232860196196634, |
|
"grad_norm": 0.3269271239671324, |
|
"learning_rate": 4.913380034135919e-06, |
|
"loss": 0.6229, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.333205458431523, |
|
"grad_norm": 0.2832871290462529, |
|
"learning_rate": 4.912761632854834e-06, |
|
"loss": 0.618, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.33408231490107965, |
|
"grad_norm": 0.329936751234759, |
|
"learning_rate": 4.912141071154869e-06, |
|
"loss": 0.6231, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.33495917137063624, |
|
"grad_norm": 0.2752693680315103, |
|
"learning_rate": 4.911518349591685e-06, |
|
"loss": 0.6234, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3358360278401929, |
|
"grad_norm": 0.3136704903953731, |
|
"learning_rate": 4.9108934687228735e-06, |
|
"loss": 0.6248, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.33671288430974955, |
|
"grad_norm": 0.2947450161853734, |
|
"learning_rate": 4.910266429107962e-06, |
|
"loss": 0.6291, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.3375897407793062, |
|
"grad_norm": 0.27963622109645897, |
|
"learning_rate": 4.90963723130841e-06, |
|
"loss": 0.6168, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3384665972488628, |
|
"grad_norm": 0.2755048673546131, |
|
"learning_rate": 4.90900587588761e-06, |
|
"loss": 0.6022, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.33934345371841945, |
|
"grad_norm": 0.28857281828902753, |
|
"learning_rate": 4.908372363410886e-06, |
|
"loss": 0.6254, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3402203101879761, |
|
"grad_norm": 0.28648556573019374, |
|
"learning_rate": 4.907736694445492e-06, |
|
"loss": 0.6175, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.34109716665753276, |
|
"grad_norm": 0.26925532018377424, |
|
"learning_rate": 4.9070988695606156e-06, |
|
"loss": 0.6176, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.3419740231270894, |
|
"grad_norm": 0.2832182299890066, |
|
"learning_rate": 4.906458889327375e-06, |
|
"loss": 0.6291, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.342850879596646, |
|
"grad_norm": 0.24545023229724808, |
|
"learning_rate": 4.905816754318815e-06, |
|
"loss": 0.621, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.34372773606620266, |
|
"grad_norm": 0.27071805276574584, |
|
"learning_rate": 4.905172465109912e-06, |
|
"loss": 0.6235, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3446045925357593, |
|
"grad_norm": 0.2686211222363871, |
|
"learning_rate": 4.904526022277572e-06, |
|
"loss": 0.6259, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.34548144900531597, |
|
"grad_norm": 0.2788582786567745, |
|
"learning_rate": 4.903877426400629e-06, |
|
"loss": 0.6113, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.34635830547487256, |
|
"grad_norm": 0.2882303517807228, |
|
"learning_rate": 4.903226678059842e-06, |
|
"loss": 0.6325, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3472351619444292, |
|
"grad_norm": 0.26417391198725343, |
|
"learning_rate": 4.902573777837902e-06, |
|
"loss": 0.6171, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.34811201841398587, |
|
"grad_norm": 0.27931172516771346, |
|
"learning_rate": 4.901918726319424e-06, |
|
"loss": 0.6041, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3489888748835425, |
|
"grad_norm": 0.24713049818043734, |
|
"learning_rate": 4.901261524090949e-06, |
|
"loss": 0.6099, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3498657313530991, |
|
"grad_norm": 0.29086241382146505, |
|
"learning_rate": 4.900602171740946e-06, |
|
"loss": 0.6258, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.35074258782265577, |
|
"grad_norm": 0.26291418203363, |
|
"learning_rate": 4.899940669859807e-06, |
|
"loss": 0.6117, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3516194442922124, |
|
"grad_norm": 0.3216617316096804, |
|
"learning_rate": 4.89927701903985e-06, |
|
"loss": 0.6187, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3524963007617691, |
|
"grad_norm": 0.27295463776878537, |
|
"learning_rate": 4.898611219875316e-06, |
|
"loss": 0.6132, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3533731572313257, |
|
"grad_norm": 0.2853334578601736, |
|
"learning_rate": 4.897943272962372e-06, |
|
"loss": 0.6148, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3542500137008823, |
|
"grad_norm": 0.31932832747253076, |
|
"learning_rate": 4.897273178899105e-06, |
|
"loss": 0.6187, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.355126870170439, |
|
"grad_norm": 0.28031643219296354, |
|
"learning_rate": 4.896600938285526e-06, |
|
"loss": 0.6236, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.35600372663999563, |
|
"grad_norm": 0.26831626886851945, |
|
"learning_rate": 4.89592655172357e-06, |
|
"loss": 0.6102, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.35688058310955223, |
|
"grad_norm": 0.2951228212133584, |
|
"learning_rate": 4.895250019817089e-06, |
|
"loss": 0.6164, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3577574395791089, |
|
"grad_norm": 0.27330142007513136, |
|
"learning_rate": 4.894571343171862e-06, |
|
"loss": 0.6023, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.35863429604866554, |
|
"grad_norm": 0.3204620119402923, |
|
"learning_rate": 4.893890522395582e-06, |
|
"loss": 0.62, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3595111525182222, |
|
"grad_norm": 0.261478566125417, |
|
"learning_rate": 4.893207558097867e-06, |
|
"loss": 0.6294, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.36038800898777884, |
|
"grad_norm": 0.250895473885103, |
|
"learning_rate": 4.892522450890251e-06, |
|
"loss": 0.6152, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.36126486545733544, |
|
"grad_norm": 0.2634865561040139, |
|
"learning_rate": 4.89183520138619e-06, |
|
"loss": 0.6157, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3621417219268921, |
|
"grad_norm": 0.26459491662331874, |
|
"learning_rate": 4.891145810201054e-06, |
|
"loss": 0.609, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.36301857839644874, |
|
"grad_norm": 0.24301745655990745, |
|
"learning_rate": 4.8904542779521346e-06, |
|
"loss": 0.6082, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3638954348660054, |
|
"grad_norm": 0.2692643109083729, |
|
"learning_rate": 4.8897606052586384e-06, |
|
"loss": 0.6226, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.364772291335562, |
|
"grad_norm": 0.24024671108707563, |
|
"learning_rate": 4.889064792741689e-06, |
|
"loss": 0.6153, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.36564914780511865, |
|
"grad_norm": 0.273288282597359, |
|
"learning_rate": 4.888366841024327e-06, |
|
"loss": 0.6334, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3665260042746753, |
|
"grad_norm": 0.2713735341001686, |
|
"learning_rate": 4.887666750731507e-06, |
|
"loss": 0.6204, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.36740286074423195, |
|
"grad_norm": 0.2749014394381958, |
|
"learning_rate": 4.8869645224901e-06, |
|
"loss": 0.6017, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.36827971721378855, |
|
"grad_norm": 0.27621114898765087, |
|
"learning_rate": 4.8862601569288885e-06, |
|
"loss": 0.6193, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3691565736833452, |
|
"grad_norm": 0.25931507650511326, |
|
"learning_rate": 4.885553654678573e-06, |
|
"loss": 0.6233, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.37003343015290185, |
|
"grad_norm": 0.28686169175433923, |
|
"learning_rate": 4.884845016371763e-06, |
|
"loss": 0.6197, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3709102866224585, |
|
"grad_norm": 0.27025382919889446, |
|
"learning_rate": 4.884134242642985e-06, |
|
"loss": 0.6033, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3717871430920151, |
|
"grad_norm": 0.275669477293775, |
|
"learning_rate": 4.883421334128674e-06, |
|
"loss": 0.6172, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.37266399956157176, |
|
"grad_norm": 0.26014021950194516, |
|
"learning_rate": 4.8827062914671775e-06, |
|
"loss": 0.6207, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3735408560311284, |
|
"grad_norm": 0.2986829920255015, |
|
"learning_rate": 4.881989115298755e-06, |
|
"loss": 0.6034, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.37441771250068506, |
|
"grad_norm": 0.28151692244357057, |
|
"learning_rate": 4.881269806265575e-06, |
|
"loss": 0.6133, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.37529456897024166, |
|
"grad_norm": 0.2932206682237993, |
|
"learning_rate": 4.8805483650117154e-06, |
|
"loss": 0.6132, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3761714254397983, |
|
"grad_norm": 0.3164265338412961, |
|
"learning_rate": 4.879824792183166e-06, |
|
"loss": 0.6077, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.37704828190935497, |
|
"grad_norm": 0.3636164115457003, |
|
"learning_rate": 4.879099088427824e-06, |
|
"loss": 0.6179, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3779251383789116, |
|
"grad_norm": 0.2891875334309757, |
|
"learning_rate": 4.878371254395492e-06, |
|
"loss": 0.6197, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3788019948484682, |
|
"grad_norm": 0.3816104662619605, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.6197, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.37967885131802487, |
|
"grad_norm": 0.29131497715708005, |
|
"learning_rate": 4.876909198108619e-06, |
|
"loss": 0.6159, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3805557077875815, |
|
"grad_norm": 0.3138520265609416, |
|
"learning_rate": 4.876174977163222e-06, |
|
"loss": 0.6139, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3814325642571382, |
|
"grad_norm": 0.28035852092093033, |
|
"learning_rate": 4.875438628559124e-06, |
|
"loss": 0.6183, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3823094207266948, |
|
"grad_norm": 0.3120106817898386, |
|
"learning_rate": 4.874700152955661e-06, |
|
"loss": 0.6052, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3831862771962514, |
|
"grad_norm": 0.29139666929908226, |
|
"learning_rate": 4.873959551014075e-06, |
|
"loss": 0.6058, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3840631336658081, |
|
"grad_norm": 0.31305383154436955, |
|
"learning_rate": 4.873216823397511e-06, |
|
"loss": 0.6094, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.38493999013536473, |
|
"grad_norm": 0.3052879988977325, |
|
"learning_rate": 4.872471970771015e-06, |
|
"loss": 0.6063, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3858168466049214, |
|
"grad_norm": 0.2965934350138861, |
|
"learning_rate": 4.871724993801541e-06, |
|
"loss": 0.6054, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.386693703074478, |
|
"grad_norm": 0.26339362714008424, |
|
"learning_rate": 4.870975893157941e-06, |
|
"loss": 0.6152, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.38757055954403463, |
|
"grad_norm": 0.27556079714679943, |
|
"learning_rate": 4.870224669510968e-06, |
|
"loss": 0.6158, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3884474160135913, |
|
"grad_norm": 0.29125701036171053, |
|
"learning_rate": 4.86947132353328e-06, |
|
"loss": 0.6202, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.38932427248314794, |
|
"grad_norm": 0.2966406156980298, |
|
"learning_rate": 4.868715855899432e-06, |
|
"loss": 0.6265, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.39020112895270453, |
|
"grad_norm": 0.27733217518457043, |
|
"learning_rate": 4.867958267285879e-06, |
|
"loss": 0.6068, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3910779854222612, |
|
"grad_norm": 0.2919788828093281, |
|
"learning_rate": 4.8671985583709765e-06, |
|
"loss": 0.6208, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.39195484189181784, |
|
"grad_norm": 0.29327731039840055, |
|
"learning_rate": 4.866436729834979e-06, |
|
"loss": 0.6175, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3928316983613745, |
|
"grad_norm": 0.2568832744529454, |
|
"learning_rate": 4.865672782360037e-06, |
|
"loss": 0.6177, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3937085548309311, |
|
"grad_norm": 0.283654204460893, |
|
"learning_rate": 4.8649067166301985e-06, |
|
"loss": 0.6203, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.39458541130048774, |
|
"grad_norm": 0.26828805221375346, |
|
"learning_rate": 4.864138533331411e-06, |
|
"loss": 0.6118, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3954622677700444, |
|
"grad_norm": 0.2597158618871073, |
|
"learning_rate": 4.863368233151514e-06, |
|
"loss": 0.6169, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.39633912423960105, |
|
"grad_norm": 0.28436035142498156, |
|
"learning_rate": 4.862595816780246e-06, |
|
"loss": 0.632, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.39721598070915765, |
|
"grad_norm": 0.2652505819829089, |
|
"learning_rate": 4.861821284909238e-06, |
|
"loss": 0.6289, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3980928371787143, |
|
"grad_norm": 0.29252031992594624, |
|
"learning_rate": 4.861044638232016e-06, |
|
"loss": 0.6328, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.39896969364827095, |
|
"grad_norm": 0.2994469365008051, |
|
"learning_rate": 4.860265877444001e-06, |
|
"loss": 0.617, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3998465501178276, |
|
"grad_norm": 0.2776900829822044, |
|
"learning_rate": 4.8594850032425036e-06, |
|
"loss": 0.608, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.4007234065873842, |
|
"grad_norm": 0.2753322141436327, |
|
"learning_rate": 4.858702016326731e-06, |
|
"loss": 0.607, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.40160026305694085, |
|
"grad_norm": 0.2738219915396828, |
|
"learning_rate": 4.857916917397779e-06, |
|
"loss": 0.6043, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4024771195264975, |
|
"grad_norm": 0.27192665887665013, |
|
"learning_rate": 4.857129707158637e-06, |
|
"loss": 0.6376, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.40335397599605416, |
|
"grad_norm": 0.27689826150792163, |
|
"learning_rate": 4.8563403863141825e-06, |
|
"loss": 0.6172, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4042308324656108, |
|
"grad_norm": 0.311644665297658, |
|
"learning_rate": 4.855548955571183e-06, |
|
"loss": 0.6106, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4051076889351674, |
|
"grad_norm": 0.2912453467934098, |
|
"learning_rate": 4.854755415638298e-06, |
|
"loss": 0.6129, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.40598454540472406, |
|
"grad_norm": 0.302939167109194, |
|
"learning_rate": 4.853959767226072e-06, |
|
"loss": 0.6301, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.4068614018742807, |
|
"grad_norm": 0.261297831693092, |
|
"learning_rate": 4.85316201104694e-06, |
|
"loss": 0.6136, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.40773825834383737, |
|
"grad_norm": 0.3154856081824323, |
|
"learning_rate": 4.852362147815225e-06, |
|
"loss": 0.6171, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.40861511481339396, |
|
"grad_norm": 0.29411022742744497, |
|
"learning_rate": 4.8515601782471325e-06, |
|
"loss": 0.6085, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.4094919712829506, |
|
"grad_norm": 0.3027595832299397, |
|
"learning_rate": 4.8507561030607576e-06, |
|
"loss": 0.6151, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.41036882775250727, |
|
"grad_norm": 0.3003092813187261, |
|
"learning_rate": 4.84994992297608e-06, |
|
"loss": 0.6071, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.4112456842220639, |
|
"grad_norm": 0.27374249219050456, |
|
"learning_rate": 4.849141638714965e-06, |
|
"loss": 0.6166, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.4121225406916205, |
|
"grad_norm": 0.3064667255626573, |
|
"learning_rate": 4.84833125100116e-06, |
|
"loss": 0.6024, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4129993971611772, |
|
"grad_norm": 0.28188617697439766, |
|
"learning_rate": 4.847518760560297e-06, |
|
"loss": 0.6134, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.4138762536307338, |
|
"grad_norm": 0.27693005272362925, |
|
"learning_rate": 4.846704168119892e-06, |
|
"loss": 0.5984, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.4147531101002905, |
|
"grad_norm": 0.3011450154809493, |
|
"learning_rate": 4.84588747440934e-06, |
|
"loss": 0.5932, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.4156299665698471, |
|
"grad_norm": 0.25715138595393167, |
|
"learning_rate": 4.845068680159921e-06, |
|
"loss": 0.6101, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.41650682303940373, |
|
"grad_norm": 0.2963493163477849, |
|
"learning_rate": 4.844247786104794e-06, |
|
"loss": 0.6081, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4173836795089604, |
|
"grad_norm": 0.29399759702492007, |
|
"learning_rate": 4.8434247929789975e-06, |
|
"loss": 0.6046, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.41826053597851703, |
|
"grad_norm": 0.3126535237916745, |
|
"learning_rate": 4.842599701519451e-06, |
|
"loss": 0.6304, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.41913739244807363, |
|
"grad_norm": 0.29299694878032745, |
|
"learning_rate": 4.841772512464953e-06, |
|
"loss": 0.6168, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.4200142489176303, |
|
"grad_norm": 0.289486342187316, |
|
"learning_rate": 4.840943226556178e-06, |
|
"loss": 0.6031, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.42089110538718694, |
|
"grad_norm": 0.30359254383613277, |
|
"learning_rate": 4.840111844535682e-06, |
|
"loss": 0.5994, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4217679618567436, |
|
"grad_norm": 0.2641793447534652, |
|
"learning_rate": 4.839278367147894e-06, |
|
"loss": 0.6036, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.4226448183263002, |
|
"grad_norm": 0.29968320834098117, |
|
"learning_rate": 4.838442795139121e-06, |
|
"loss": 0.6193, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.42352167479585684, |
|
"grad_norm": 0.30614554761610074, |
|
"learning_rate": 4.837605129257546e-06, |
|
"loss": 0.6115, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.4243985312654135, |
|
"grad_norm": 0.29316129861054724, |
|
"learning_rate": 4.836765370253223e-06, |
|
"loss": 0.6039, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.42527538773497015, |
|
"grad_norm": 0.35388210389950725, |
|
"learning_rate": 4.835923518878088e-06, |
|
"loss": 0.6089, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4261522442045268, |
|
"grad_norm": 0.27541931694811506, |
|
"learning_rate": 4.835079575885944e-06, |
|
"loss": 0.6129, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.4270291006740834, |
|
"grad_norm": 0.3408256598988536, |
|
"learning_rate": 4.834233542032468e-06, |
|
"loss": 0.6165, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.42790595714364005, |
|
"grad_norm": 0.30259946435062773, |
|
"learning_rate": 4.83338541807521e-06, |
|
"loss": 0.6111, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.4287828136131967, |
|
"grad_norm": 0.2871132966743198, |
|
"learning_rate": 4.832535204773593e-06, |
|
"loss": 0.6273, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.42965967008275335, |
|
"grad_norm": 0.3457337315321895, |
|
"learning_rate": 4.8316829028889076e-06, |
|
"loss": 0.6005, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.43053652655230995, |
|
"grad_norm": 0.2668696078107318, |
|
"learning_rate": 4.830828513184317e-06, |
|
"loss": 0.6122, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.4314133830218666, |
|
"grad_norm": 0.321068645111551, |
|
"learning_rate": 4.829972036424854e-06, |
|
"loss": 0.6058, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.43229023949142326, |
|
"grad_norm": 0.26125737492647644, |
|
"learning_rate": 4.829113473377417e-06, |
|
"loss": 0.6143, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4331670959609799, |
|
"grad_norm": 0.32002755047063874, |
|
"learning_rate": 4.828252824810777e-06, |
|
"loss": 0.6061, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.4340439524305365, |
|
"grad_norm": 0.2863878470189295, |
|
"learning_rate": 4.82739009149557e-06, |
|
"loss": 0.5977, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.43492080890009316, |
|
"grad_norm": 0.31874371835878795, |
|
"learning_rate": 4.826525274204297e-06, |
|
"loss": 0.608, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.4357976653696498, |
|
"grad_norm": 0.2956391151217163, |
|
"learning_rate": 4.825658373711328e-06, |
|
"loss": 0.6107, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.43667452183920646, |
|
"grad_norm": 0.288406786632812, |
|
"learning_rate": 4.824789390792899e-06, |
|
"loss": 0.6094, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.43755137830876306, |
|
"grad_norm": 0.33737182032602686, |
|
"learning_rate": 4.823918326227106e-06, |
|
"loss": 0.5971, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.4384282347783197, |
|
"grad_norm": 0.25632117321609454, |
|
"learning_rate": 4.823045180793914e-06, |
|
"loss": 0.6044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43930509124787637, |
|
"grad_norm": 0.2978956835348055, |
|
"learning_rate": 4.8221699552751465e-06, |
|
"loss": 0.6009, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.440181947717433, |
|
"grad_norm": 0.30339339194561, |
|
"learning_rate": 4.821292650454495e-06, |
|
"loss": 0.6113, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4410588041869896, |
|
"grad_norm": 0.3083549716587437, |
|
"learning_rate": 4.8204132671175085e-06, |
|
"loss": 0.6074, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.44193566065654627, |
|
"grad_norm": 0.291272682255802, |
|
"learning_rate": 4.819531806051599e-06, |
|
"loss": 0.606, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4428125171261029, |
|
"grad_norm": 0.3183233272727026, |
|
"learning_rate": 4.818648268046038e-06, |
|
"loss": 0.6145, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4436893735956596, |
|
"grad_norm": 0.27989457450916727, |
|
"learning_rate": 4.817762653891957e-06, |
|
"loss": 0.6095, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4445662300652162, |
|
"grad_norm": 0.32106502207942483, |
|
"learning_rate": 4.816874964382346e-06, |
|
"loss": 0.6096, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4454430865347728, |
|
"grad_norm": 0.2690675603747584, |
|
"learning_rate": 4.815985200312057e-06, |
|
"loss": 0.5986, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4463199430043295, |
|
"grad_norm": 0.2818980909126885, |
|
"learning_rate": 4.815093362477793e-06, |
|
"loss": 0.6136, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.44719679947388613, |
|
"grad_norm": 0.29748447845455983, |
|
"learning_rate": 4.8141994516781196e-06, |
|
"loss": 0.6162, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4480736559434428, |
|
"grad_norm": 0.3107094817046459, |
|
"learning_rate": 4.813303468713456e-06, |
|
"loss": 0.5939, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4489505124129994, |
|
"grad_norm": 0.27493905192543294, |
|
"learning_rate": 4.812405414386078e-06, |
|
"loss": 0.6054, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.44982736888255603, |
|
"grad_norm": 0.28885594119974684, |
|
"learning_rate": 4.811505289500113e-06, |
|
"loss": 0.611, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4507042253521127, |
|
"grad_norm": 0.2724458036095346, |
|
"learning_rate": 4.810603094861548e-06, |
|
"loss": 0.6296, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.45158108182166934, |
|
"grad_norm": 0.3171235548951884, |
|
"learning_rate": 4.809698831278217e-06, |
|
"loss": 0.6137, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.45245793829122594, |
|
"grad_norm": 0.2975607228468226, |
|
"learning_rate": 4.808792499559812e-06, |
|
"loss": 0.6081, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4533347947607826, |
|
"grad_norm": 0.29553804453973653, |
|
"learning_rate": 4.807884100517873e-06, |
|
"loss": 0.6106, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.45421165123033924, |
|
"grad_norm": 0.29283068458115197, |
|
"learning_rate": 4.8069736349657935e-06, |
|
"loss": 0.6144, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.4550885076998959, |
|
"grad_norm": 0.3123674697628625, |
|
"learning_rate": 4.806061103718816e-06, |
|
"loss": 0.6024, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4559653641694525, |
|
"grad_norm": 0.3185535504257689, |
|
"learning_rate": 4.805146507594034e-06, |
|
"loss": 0.6031, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.45684222063900914, |
|
"grad_norm": 0.32719458735857726, |
|
"learning_rate": 4.804229847410388e-06, |
|
"loss": 0.614, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4577190771085658, |
|
"grad_norm": 0.2756686412179773, |
|
"learning_rate": 4.803311123988668e-06, |
|
"loss": 0.6143, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.45859593357812245, |
|
"grad_norm": 0.3193363571929515, |
|
"learning_rate": 4.802390338151512e-06, |
|
"loss": 0.5962, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.45947279004767905, |
|
"grad_norm": 0.27470129307670516, |
|
"learning_rate": 4.801467490723402e-06, |
|
"loss": 0.6118, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4603496465172357, |
|
"grad_norm": 0.3268257836594815, |
|
"learning_rate": 4.800542582530668e-06, |
|
"loss": 0.6091, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.46122650298679235, |
|
"grad_norm": 0.2636715015821582, |
|
"learning_rate": 4.799615614401488e-06, |
|
"loss": 0.6113, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.462103359456349, |
|
"grad_norm": 0.3309929173426789, |
|
"learning_rate": 4.79868658716588e-06, |
|
"loss": 0.6063, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.4629802159259056, |
|
"grad_norm": 0.2705433155095911, |
|
"learning_rate": 4.7977555016557054e-06, |
|
"loss": 0.6115, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.46385707239546226, |
|
"grad_norm": 0.2986983107432822, |
|
"learning_rate": 4.796822358704673e-06, |
|
"loss": 0.624, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.4647339288650189, |
|
"grad_norm": 0.27153673858142124, |
|
"learning_rate": 4.7958871591483305e-06, |
|
"loss": 0.6144, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.46561078533457556, |
|
"grad_norm": 0.2774095045069063, |
|
"learning_rate": 4.794949903824069e-06, |
|
"loss": 0.6082, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4664876418041322, |
|
"grad_norm": 0.28167525290961587, |
|
"learning_rate": 4.794010593571118e-06, |
|
"loss": 0.6106, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4673644982736888, |
|
"grad_norm": 0.2626835693504621, |
|
"learning_rate": 4.793069229230548e-06, |
|
"loss": 0.6142, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.46824135474324546, |
|
"grad_norm": 0.27619948959341917, |
|
"learning_rate": 4.792125811645271e-06, |
|
"loss": 0.6073, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4691182112128021, |
|
"grad_norm": 0.2913249262978291, |
|
"learning_rate": 4.791180341660035e-06, |
|
"loss": 0.6034, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.46999506768235877, |
|
"grad_norm": 0.2792318560656134, |
|
"learning_rate": 4.790232820121426e-06, |
|
"loss": 0.6002, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.47087192415191537, |
|
"grad_norm": 0.2690237732263836, |
|
"learning_rate": 4.789283247877867e-06, |
|
"loss": 0.6128, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.471748780621472, |
|
"grad_norm": 0.2875784864108413, |
|
"learning_rate": 4.7883316257796195e-06, |
|
"loss": 0.6125, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.47262563709102867, |
|
"grad_norm": 0.3494280106540881, |
|
"learning_rate": 4.787377954678776e-06, |
|
"loss": 0.6079, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.4735024935605853, |
|
"grad_norm": 0.27811345732659243, |
|
"learning_rate": 4.786422235429269e-06, |
|
"loss": 0.6118, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4743793500301419, |
|
"grad_norm": 0.33921109846320074, |
|
"learning_rate": 4.785464468886859e-06, |
|
"loss": 0.6176, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.4752562064996986, |
|
"grad_norm": 0.29592545517880114, |
|
"learning_rate": 4.784504655909146e-06, |
|
"loss": 0.6131, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.4761330629692552, |
|
"grad_norm": 0.29373530511374163, |
|
"learning_rate": 4.783542797355558e-06, |
|
"loss": 0.6082, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4770099194388119, |
|
"grad_norm": 0.2999691792256973, |
|
"learning_rate": 4.782578894087357e-06, |
|
"loss": 0.5981, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.4778867759083685, |
|
"grad_norm": 0.2694268894908227, |
|
"learning_rate": 4.781612946967632e-06, |
|
"loss": 0.6055, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.47876363237792513, |
|
"grad_norm": 0.2970836241532985, |
|
"learning_rate": 4.780644956861307e-06, |
|
"loss": 0.6002, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.4796404888474818, |
|
"grad_norm": 0.3413332201519291, |
|
"learning_rate": 4.7796749246351335e-06, |
|
"loss": 0.6103, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.48051734531703844, |
|
"grad_norm": 0.27732196553749033, |
|
"learning_rate": 4.77870285115769e-06, |
|
"loss": 0.5972, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.48139420178659503, |
|
"grad_norm": 0.32594912225980904, |
|
"learning_rate": 4.777728737299387e-06, |
|
"loss": 0.6275, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4822710582561517, |
|
"grad_norm": 0.28158230943213153, |
|
"learning_rate": 4.776752583932455e-06, |
|
"loss": 0.6215, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.48314791472570834, |
|
"grad_norm": 0.3244722564822324, |
|
"learning_rate": 4.775774391930956e-06, |
|
"loss": 0.5947, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.484024771195265, |
|
"grad_norm": 0.26397208532030864, |
|
"learning_rate": 4.774794162170777e-06, |
|
"loss": 0.611, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4849016276648216, |
|
"grad_norm": 0.2816890422555255, |
|
"learning_rate": 4.773811895529629e-06, |
|
"loss": 0.5942, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.48577848413437824, |
|
"grad_norm": 0.28224512879430635, |
|
"learning_rate": 4.772827592887046e-06, |
|
"loss": 0.5918, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4866553406039349, |
|
"grad_norm": 0.2978578883597439, |
|
"learning_rate": 4.771841255124385e-06, |
|
"loss": 0.6031, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.48753219707349155, |
|
"grad_norm": 0.3212067488646109, |
|
"learning_rate": 4.770852883124827e-06, |
|
"loss": 0.6066, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4884090535430482, |
|
"grad_norm": 0.3047898856904216, |
|
"learning_rate": 4.769862477773374e-06, |
|
"loss": 0.6097, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4892859100126048, |
|
"grad_norm": 0.32816575436148626, |
|
"learning_rate": 4.768870039956846e-06, |
|
"loss": 0.6078, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.49016276648216145, |
|
"grad_norm": 0.30333447423661625, |
|
"learning_rate": 4.767875570563887e-06, |
|
"loss": 0.6103, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4910396229517181, |
|
"grad_norm": 0.32463487013229164, |
|
"learning_rate": 4.766879070484957e-06, |
|
"loss": 0.5925, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.49191647942127475, |
|
"grad_norm": 0.27125555349656966, |
|
"learning_rate": 4.765880540612336e-06, |
|
"loss": 0.6095, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.49279333589083135, |
|
"grad_norm": 0.29571340419933284, |
|
"learning_rate": 4.764879981840121e-06, |
|
"loss": 0.6061, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.493670192360388, |
|
"grad_norm": 0.28779220439984465, |
|
"learning_rate": 4.763877395064225e-06, |
|
"loss": 0.6164, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.49454704882994466, |
|
"grad_norm": 0.3023002461106019, |
|
"learning_rate": 4.762872781182378e-06, |
|
"loss": 0.6099, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4954239052995013, |
|
"grad_norm": 0.2852998688047179, |
|
"learning_rate": 4.761866141094126e-06, |
|
"loss": 0.6151, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4963007617690579, |
|
"grad_norm": 0.27004415072990756, |
|
"learning_rate": 4.7608574757008245e-06, |
|
"loss": 0.6056, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.49717761823861456, |
|
"grad_norm": 0.26583697629837466, |
|
"learning_rate": 4.759846785905649e-06, |
|
"loss": 0.6073, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4980544747081712, |
|
"grad_norm": 0.29963137609858226, |
|
"learning_rate": 4.758834072613583e-06, |
|
"loss": 0.6175, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.49893133117772787, |
|
"grad_norm": 0.2777428291092147, |
|
"learning_rate": 4.757819336731424e-06, |
|
"loss": 0.6084, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.49980818764728446, |
|
"grad_norm": 0.286537576055084, |
|
"learning_rate": 4.756802579167781e-06, |
|
"loss": 0.6122, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5006850441168411, |
|
"grad_norm": 0.2900434750609322, |
|
"learning_rate": 4.755783800833071e-06, |
|
"loss": 0.61, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5015619005863977, |
|
"grad_norm": 0.29602981997833644, |
|
"learning_rate": 4.754763002639522e-06, |
|
"loss": 0.5979, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5024387570559544, |
|
"grad_norm": 0.2850500950921633, |
|
"learning_rate": 4.75374018550117e-06, |
|
"loss": 0.616, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.503315613525511, |
|
"grad_norm": 0.2747595431255721, |
|
"learning_rate": 4.752715350333858e-06, |
|
"loss": 0.6082, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5041924699950677, |
|
"grad_norm": 0.30963433949041175, |
|
"learning_rate": 4.75168849805524e-06, |
|
"loss": 0.6062, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5050693264646243, |
|
"grad_norm": 0.28817154630491854, |
|
"learning_rate": 4.750659629584772e-06, |
|
"loss": 0.615, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5059461829341809, |
|
"grad_norm": 0.29777143797501865, |
|
"learning_rate": 4.749628745843715e-06, |
|
"loss": 0.6093, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5068230394037376, |
|
"grad_norm": 0.2761328411528336, |
|
"learning_rate": 4.748595847755137e-06, |
|
"loss": 0.5949, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5076998958732942, |
|
"grad_norm": 0.27941749417554973, |
|
"learning_rate": 4.74756093624391e-06, |
|
"loss": 0.6165, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.5085767523428508, |
|
"grad_norm": 0.28883681834919644, |
|
"learning_rate": 4.746524012236706e-06, |
|
"loss": 0.6012, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5094536088124075, |
|
"grad_norm": 0.2712633209555587, |
|
"learning_rate": 4.7454850766620005e-06, |
|
"loss": 0.5898, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5103304652819641, |
|
"grad_norm": 0.29386364789948854, |
|
"learning_rate": 4.7444441304500714e-06, |
|
"loss": 0.6057, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5112073217515208, |
|
"grad_norm": 0.27998562308750735, |
|
"learning_rate": 4.743401174532994e-06, |
|
"loss": 0.597, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5120841782210774, |
|
"grad_norm": 0.2944531079667381, |
|
"learning_rate": 4.742356209844646e-06, |
|
"loss": 0.5915, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.512961034690634, |
|
"grad_norm": 0.29506045387008756, |
|
"learning_rate": 4.741309237320703e-06, |
|
"loss": 0.6178, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5138378911601907, |
|
"grad_norm": 0.299236621784075, |
|
"learning_rate": 4.740260257898638e-06, |
|
"loss": 0.6121, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5147147476297473, |
|
"grad_norm": 0.303688650889379, |
|
"learning_rate": 4.739209272517721e-06, |
|
"loss": 0.5982, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.515591604099304, |
|
"grad_norm": 0.2925779066404172, |
|
"learning_rate": 4.738156282119018e-06, |
|
"loss": 0.5936, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5164684605688606, |
|
"grad_norm": 0.3374725318718031, |
|
"learning_rate": 4.73710128764539e-06, |
|
"loss": 0.6001, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.5173453170384172, |
|
"grad_norm": 0.28811046561615106, |
|
"learning_rate": 4.736044290041496e-06, |
|
"loss": 0.61, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.518222173507974, |
|
"grad_norm": 0.32139851009391945, |
|
"learning_rate": 4.7349852902537814e-06, |
|
"loss": 0.5931, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5190990299775305, |
|
"grad_norm": 0.27307295767087736, |
|
"learning_rate": 4.733924289230493e-06, |
|
"loss": 0.6035, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5199758864470871, |
|
"grad_norm": 0.3098223534082736, |
|
"learning_rate": 4.7328612879216615e-06, |
|
"loss": 0.6082, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5208527429166439, |
|
"grad_norm": 0.2808341207944162, |
|
"learning_rate": 4.731796287279115e-06, |
|
"loss": 0.5965, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5217295993862004, |
|
"grad_norm": 0.3093125993326785, |
|
"learning_rate": 4.730729288256468e-06, |
|
"loss": 0.6018, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5226064558557572, |
|
"grad_norm": 0.30147164249765196, |
|
"learning_rate": 4.729660291809126e-06, |
|
"loss": 0.6072, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.5234833123253138, |
|
"grad_norm": 0.2893545075475105, |
|
"learning_rate": 4.728589298894284e-06, |
|
"loss": 0.5894, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5243601687948704, |
|
"grad_norm": 0.29778530349250987, |
|
"learning_rate": 4.72751631047092e-06, |
|
"loss": 0.5941, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5252370252644271, |
|
"grad_norm": 0.2822751104373634, |
|
"learning_rate": 4.726441327499805e-06, |
|
"loss": 0.6056, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5261138817339837, |
|
"grad_norm": 0.30381920940202223, |
|
"learning_rate": 4.725364350943492e-06, |
|
"loss": 0.6016, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5269907382035403, |
|
"grad_norm": 0.2728312952142679, |
|
"learning_rate": 4.72428538176632e-06, |
|
"loss": 0.6033, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.527867594673097, |
|
"grad_norm": 0.2920360605636878, |
|
"learning_rate": 4.723204420934413e-06, |
|
"loss": 0.614, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.5287444511426536, |
|
"grad_norm": 0.282387818364113, |
|
"learning_rate": 4.722121469415677e-06, |
|
"loss": 0.5901, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5296213076122103, |
|
"grad_norm": 0.2954181717364726, |
|
"learning_rate": 4.721036528179802e-06, |
|
"loss": 0.6043, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5304981640817669, |
|
"grad_norm": 0.3084979402180987, |
|
"learning_rate": 4.719949598198258e-06, |
|
"loss": 0.5931, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5313750205513235, |
|
"grad_norm": 0.3252699365181927, |
|
"learning_rate": 4.718860680444297e-06, |
|
"loss": 0.6181, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5322518770208802, |
|
"grad_norm": 0.28357295095306256, |
|
"learning_rate": 4.717769775892951e-06, |
|
"loss": 0.5903, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5331287334904368, |
|
"grad_norm": 0.3569079908279582, |
|
"learning_rate": 4.7166768855210294e-06, |
|
"loss": 0.5939, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.5340055899599935, |
|
"grad_norm": 0.31741200071485426, |
|
"learning_rate": 4.715582010307121e-06, |
|
"loss": 0.5897, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5348824464295501, |
|
"grad_norm": 0.3218789245412814, |
|
"learning_rate": 4.714485151231593e-06, |
|
"loss": 0.5926, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5357593028991067, |
|
"grad_norm": 0.2824610260583936, |
|
"learning_rate": 4.713386309276585e-06, |
|
"loss": 0.6039, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.5366361593686634, |
|
"grad_norm": 0.3111981063952015, |
|
"learning_rate": 4.712285485426017e-06, |
|
"loss": 0.6012, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.53751301583822, |
|
"grad_norm": 0.2719370118974663, |
|
"learning_rate": 4.7111826806655804e-06, |
|
"loss": 0.5912, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5383898723077766, |
|
"grad_norm": 0.3161533458613161, |
|
"learning_rate": 4.710077895982741e-06, |
|
"loss": 0.5962, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5392667287773333, |
|
"grad_norm": 0.26701338476822095, |
|
"learning_rate": 4.708971132366739e-06, |
|
"loss": 0.6025, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5401435852468899, |
|
"grad_norm": 0.28447205168753736, |
|
"learning_rate": 4.707862390808583e-06, |
|
"loss": 0.5959, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5410204417164466, |
|
"grad_norm": 0.26585350433139904, |
|
"learning_rate": 4.706751672301058e-06, |
|
"loss": 0.5946, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5418972981860032, |
|
"grad_norm": 0.28276117956241253, |
|
"learning_rate": 4.705638977838712e-06, |
|
"loss": 0.5986, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5427741546555598, |
|
"grad_norm": 0.2752743049051474, |
|
"learning_rate": 4.704524308417872e-06, |
|
"loss": 0.6044, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5436510111251165, |
|
"grad_norm": 0.2744635750786116, |
|
"learning_rate": 4.703407665036622e-06, |
|
"loss": 0.6, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5445278675946731, |
|
"grad_norm": 0.2942835089324837, |
|
"learning_rate": 4.702289048694824e-06, |
|
"loss": 0.6163, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5454047240642297, |
|
"grad_norm": 0.29074004193212294, |
|
"learning_rate": 4.7011684603940985e-06, |
|
"loss": 0.61, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5462815805337864, |
|
"grad_norm": 0.265548853050648, |
|
"learning_rate": 4.700045901137838e-06, |
|
"loss": 0.6003, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.547158437003343, |
|
"grad_norm": 0.28147341099339, |
|
"learning_rate": 4.6989213719311956e-06, |
|
"loss": 0.6057, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5480352934728997, |
|
"grad_norm": 0.25061686481638634, |
|
"learning_rate": 4.697794873781089e-06, |
|
"loss": 0.6103, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5489121499424563, |
|
"grad_norm": 0.28270079603778164, |
|
"learning_rate": 4.696666407696201e-06, |
|
"loss": 0.5999, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5497890064120129, |
|
"grad_norm": 0.25832596909684546, |
|
"learning_rate": 4.695535974686975e-06, |
|
"loss": 0.5989, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5506658628815696, |
|
"grad_norm": 0.28610489660664173, |
|
"learning_rate": 4.694403575765615e-06, |
|
"loss": 0.6039, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5515427193511262, |
|
"grad_norm": 0.26039812165621273, |
|
"learning_rate": 4.693269211946086e-06, |
|
"loss": 0.5999, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5524195758206829, |
|
"grad_norm": 0.2802813802636672, |
|
"learning_rate": 4.692132884244113e-06, |
|
"loss": 0.5957, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5532964322902395, |
|
"grad_norm": 0.28045233973715045, |
|
"learning_rate": 4.69099459367718e-06, |
|
"loss": 0.6057, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5541732887597961, |
|
"grad_norm": 0.2850165288729873, |
|
"learning_rate": 4.689854341264525e-06, |
|
"loss": 0.6062, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5550501452293528, |
|
"grad_norm": 0.318532937146288, |
|
"learning_rate": 4.688712128027147e-06, |
|
"loss": 0.615, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5559270016989094, |
|
"grad_norm": 0.2700297126701359, |
|
"learning_rate": 4.687567954987798e-06, |
|
"loss": 0.6027, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.556803858168466, |
|
"grad_norm": 0.2709567537114069, |
|
"learning_rate": 4.686421823170987e-06, |
|
"loss": 0.606, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5576807146380227, |
|
"grad_norm": 0.30943308206128534, |
|
"learning_rate": 4.685273733602975e-06, |
|
"loss": 0.6122, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5585575711075793, |
|
"grad_norm": 0.2866407684585244, |
|
"learning_rate": 4.6841236873117765e-06, |
|
"loss": 0.5983, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.559434427577136, |
|
"grad_norm": 0.30074858616349, |
|
"learning_rate": 4.6829716853271576e-06, |
|
"loss": 0.6112, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5603112840466926, |
|
"grad_norm": 0.27481764632891953, |
|
"learning_rate": 4.681817728680638e-06, |
|
"loss": 0.5923, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5611881405162492, |
|
"grad_norm": 0.30985792219487485, |
|
"learning_rate": 4.680661818405485e-06, |
|
"loss": 0.6083, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5620649969858059, |
|
"grad_norm": 0.30548099410676144, |
|
"learning_rate": 4.679503955536715e-06, |
|
"loss": 0.6105, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5629418534553625, |
|
"grad_norm": 0.27736446160459594, |
|
"learning_rate": 4.678344141111096e-06, |
|
"loss": 0.6176, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5638187099249191, |
|
"grad_norm": 0.313370779146898, |
|
"learning_rate": 4.6771823761671386e-06, |
|
"loss": 0.6035, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5646955663944758, |
|
"grad_norm": 0.27389315771120454, |
|
"learning_rate": 4.676018661745104e-06, |
|
"loss": 0.6118, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5655724228640324, |
|
"grad_norm": 0.3272671136560007, |
|
"learning_rate": 4.674852998886998e-06, |
|
"loss": 0.6059, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5664492793335891, |
|
"grad_norm": 0.29110434636858074, |
|
"learning_rate": 4.6736853886365704e-06, |
|
"loss": 0.5957, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5673261358031457, |
|
"grad_norm": 0.27566640053494834, |
|
"learning_rate": 4.672515832039315e-06, |
|
"loss": 0.5847, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5682029922727023, |
|
"grad_norm": 0.3439499837560115, |
|
"learning_rate": 4.671344330142468e-06, |
|
"loss": 0.6066, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.569079848742259, |
|
"grad_norm": 0.2831795036732806, |
|
"learning_rate": 4.670170883995007e-06, |
|
"loss": 0.5875, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5699567052118156, |
|
"grad_norm": 0.3084275937304928, |
|
"learning_rate": 4.668995494647653e-06, |
|
"loss": 0.6046, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5708335616813722, |
|
"grad_norm": 0.2876312566066635, |
|
"learning_rate": 4.667818163152864e-06, |
|
"loss": 0.609, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5717104181509289, |
|
"grad_norm": 0.27641311480374825, |
|
"learning_rate": 4.6666388905648394e-06, |
|
"loss": 0.6084, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5725872746204855, |
|
"grad_norm": 0.2760161681243495, |
|
"learning_rate": 4.665457677939515e-06, |
|
"loss": 0.6036, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5734641310900422, |
|
"grad_norm": 0.2664014070652965, |
|
"learning_rate": 4.664274526334563e-06, |
|
"loss": 0.6047, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5743409875595988, |
|
"grad_norm": 0.27367722811571643, |
|
"learning_rate": 4.663089436809395e-06, |
|
"loss": 0.607, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5752178440291554, |
|
"grad_norm": 0.2971494077897638, |
|
"learning_rate": 4.661902410425156e-06, |
|
"loss": 0.5851, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5760947004987121, |
|
"grad_norm": 0.28359506675344376, |
|
"learning_rate": 4.660713448244723e-06, |
|
"loss": 0.5911, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5769715569682687, |
|
"grad_norm": 0.27646693971859265, |
|
"learning_rate": 4.6595225513327105e-06, |
|
"loss": 0.601, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5778484134378254, |
|
"grad_norm": 0.2707379861432875, |
|
"learning_rate": 4.658329720755464e-06, |
|
"loss": 0.5905, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.578725269907382, |
|
"grad_norm": 0.301271851117793, |
|
"learning_rate": 4.657134957581057e-06, |
|
"loss": 0.6023, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5796021263769386, |
|
"grad_norm": 0.30214846729641187, |
|
"learning_rate": 4.6559382628793e-06, |
|
"loss": 0.6095, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5804789828464954, |
|
"grad_norm": 0.2880769859831512, |
|
"learning_rate": 4.6547396377217265e-06, |
|
"loss": 0.6012, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.581355839316052, |
|
"grad_norm": 0.3363251460755209, |
|
"learning_rate": 4.653539083181603e-06, |
|
"loss": 0.5963, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5822326957856085, |
|
"grad_norm": 0.3446871487238731, |
|
"learning_rate": 4.652336600333921e-06, |
|
"loss": 0.5992, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5831095522551653, |
|
"grad_norm": 0.3016824402176579, |
|
"learning_rate": 4.651132190255401e-06, |
|
"loss": 0.6016, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5839864087247219, |
|
"grad_norm": 0.31791554379394255, |
|
"learning_rate": 4.649925854024486e-06, |
|
"loss": 0.5943, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5848632651942786, |
|
"grad_norm": 0.3603510668723624, |
|
"learning_rate": 4.648717592721347e-06, |
|
"loss": 0.6086, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5857401216638352, |
|
"grad_norm": 0.25073578292290827, |
|
"learning_rate": 4.647507407427877e-06, |
|
"loss": 0.5965, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5866169781333918, |
|
"grad_norm": 0.3401292596267892, |
|
"learning_rate": 4.646295299227691e-06, |
|
"loss": 0.5896, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5874938346029485, |
|
"grad_norm": 0.26798950974238206, |
|
"learning_rate": 4.645081269206128e-06, |
|
"loss": 0.5913, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5883706910725051, |
|
"grad_norm": 0.2712753517614824, |
|
"learning_rate": 4.643865318450247e-06, |
|
"loss": 0.5948, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5892475475420617, |
|
"grad_norm": 0.31478669896326056, |
|
"learning_rate": 4.642647448048824e-06, |
|
"loss": 0.6036, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5901244040116184, |
|
"grad_norm": 0.2853149586152437, |
|
"learning_rate": 4.641427659092359e-06, |
|
"loss": 0.5852, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.591001260481175, |
|
"grad_norm": 0.31928733056145026, |
|
"learning_rate": 4.6402059526730656e-06, |
|
"loss": 0.596, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5918781169507317, |
|
"grad_norm": 0.28886504451895006, |
|
"learning_rate": 4.638982329884878e-06, |
|
"loss": 0.5867, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5927549734202883, |
|
"grad_norm": 0.34332786639440344, |
|
"learning_rate": 4.637756791823443e-06, |
|
"loss": 0.5951, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5936318298898449, |
|
"grad_norm": 0.31536294202913445, |
|
"learning_rate": 4.6365293395861225e-06, |
|
"loss": 0.6005, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5945086863594016, |
|
"grad_norm": 0.36612645695214535, |
|
"learning_rate": 4.6352999742719954e-06, |
|
"loss": 0.6125, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5953855428289582, |
|
"grad_norm": 0.2865910172606529, |
|
"learning_rate": 4.634068696981852e-06, |
|
"loss": 0.6096, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5962623992985149, |
|
"grad_norm": 0.3077121674916666, |
|
"learning_rate": 4.632835508818192e-06, |
|
"loss": 0.5891, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5971392557680715, |
|
"grad_norm": 0.2930520316480949, |
|
"learning_rate": 4.631600410885231e-06, |
|
"loss": 0.5918, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5980161122376281, |
|
"grad_norm": 0.3412197822800723, |
|
"learning_rate": 4.630363404288891e-06, |
|
"loss": 0.5998, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5988929687071848, |
|
"grad_norm": 0.2869686807201651, |
|
"learning_rate": 4.629124490136804e-06, |
|
"loss": 0.5952, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5997698251767414, |
|
"grad_norm": 0.3044523168792968, |
|
"learning_rate": 4.627883669538311e-06, |
|
"loss": 0.6058, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.600646681646298, |
|
"grad_norm": 0.298754941767322, |
|
"learning_rate": 4.626640943604459e-06, |
|
"loss": 0.6099, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6015235381158547, |
|
"grad_norm": 0.30823608651620477, |
|
"learning_rate": 4.625396313448e-06, |
|
"loss": 0.5913, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.6024003945854113, |
|
"grad_norm": 0.2745802532714142, |
|
"learning_rate": 4.624149780183395e-06, |
|
"loss": 0.5904, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.603277251054968, |
|
"grad_norm": 0.2894557068485525, |
|
"learning_rate": 4.622901344926805e-06, |
|
"loss": 0.6006, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.6041541075245246, |
|
"grad_norm": 0.2844643276622375, |
|
"learning_rate": 4.621651008796095e-06, |
|
"loss": 0.5972, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.6050309639940812, |
|
"grad_norm": 0.3111750841694179, |
|
"learning_rate": 4.620398772910833e-06, |
|
"loss": 0.5911, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6059078204636379, |
|
"grad_norm": 0.30229136138256857, |
|
"learning_rate": 4.619144638392289e-06, |
|
"loss": 0.6063, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.6067846769331945, |
|
"grad_norm": 0.2903177693650587, |
|
"learning_rate": 4.6178886063634295e-06, |
|
"loss": 0.6022, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6076615334027511, |
|
"grad_norm": 0.29466063932438424, |
|
"learning_rate": 4.616630677948924e-06, |
|
"loss": 0.609, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6085383898723078, |
|
"grad_norm": 0.29795014881552045, |
|
"learning_rate": 4.615370854275138e-06, |
|
"loss": 0.5923, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.6094152463418644, |
|
"grad_norm": 0.2835342651327551, |
|
"learning_rate": 4.614109136470133e-06, |
|
"loss": 0.5941, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6102921028114211, |
|
"grad_norm": 0.2914927284695803, |
|
"learning_rate": 4.612845525663671e-06, |
|
"loss": 0.5915, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.6111689592809777, |
|
"grad_norm": 0.27150994490869584, |
|
"learning_rate": 4.611580022987202e-06, |
|
"loss": 0.5903, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6120458157505343, |
|
"grad_norm": 0.27427922033901636, |
|
"learning_rate": 4.610312629573877e-06, |
|
"loss": 0.5826, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.612922672220091, |
|
"grad_norm": 0.3257835351903302, |
|
"learning_rate": 4.609043346558536e-06, |
|
"loss": 0.608, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6137995286896476, |
|
"grad_norm": 0.27542786817313375, |
|
"learning_rate": 4.607772175077712e-06, |
|
"loss": 0.5914, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6146763851592043, |
|
"grad_norm": 0.32541464673918596, |
|
"learning_rate": 4.606499116269628e-06, |
|
"loss": 0.6004, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.6155532416287609, |
|
"grad_norm": 0.2775394483279354, |
|
"learning_rate": 4.605224171274198e-06, |
|
"loss": 0.6042, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6164300980983175, |
|
"grad_norm": 0.3010566442707075, |
|
"learning_rate": 4.603947341233026e-06, |
|
"loss": 0.5893, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6173069545678742, |
|
"grad_norm": 0.28841806172316603, |
|
"learning_rate": 4.602668627289401e-06, |
|
"loss": 0.5932, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.6181838110374308, |
|
"grad_norm": 0.32720143492110876, |
|
"learning_rate": 4.601388030588303e-06, |
|
"loss": 0.594, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6190606675069874, |
|
"grad_norm": 0.2629157828769276, |
|
"learning_rate": 4.600105552276393e-06, |
|
"loss": 0.5962, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.6199375239765441, |
|
"grad_norm": 0.2976311641314985, |
|
"learning_rate": 4.598821193502019e-06, |
|
"loss": 0.5993, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6208143804461007, |
|
"grad_norm": 0.3223849407278096, |
|
"learning_rate": 4.597534955415214e-06, |
|
"loss": 0.6023, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6216912369156574, |
|
"grad_norm": 0.3228934470983084, |
|
"learning_rate": 4.596246839167692e-06, |
|
"loss": 0.6058, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.622568093385214, |
|
"grad_norm": 0.2842350311614894, |
|
"learning_rate": 4.59495684591285e-06, |
|
"loss": 0.5965, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6234449498547706, |
|
"grad_norm": 0.30037127301855626, |
|
"learning_rate": 4.593664976805765e-06, |
|
"loss": 0.5912, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6243218063243273, |
|
"grad_norm": 0.29537031301186273, |
|
"learning_rate": 4.592371233003195e-06, |
|
"loss": 0.5847, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6251986627938839, |
|
"grad_norm": 0.3099776656835445, |
|
"learning_rate": 4.5910756156635725e-06, |
|
"loss": 0.6061, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.6260755192634405, |
|
"grad_norm": 0.3343474177937486, |
|
"learning_rate": 4.589778125947012e-06, |
|
"loss": 0.5775, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.6269523757329972, |
|
"grad_norm": 0.26492597760028275, |
|
"learning_rate": 4.588478765015304e-06, |
|
"loss": 0.6008, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6278292322025538, |
|
"grad_norm": 0.2996728173414987, |
|
"learning_rate": 4.587177534031914e-06, |
|
"loss": 0.5868, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6287060886721105, |
|
"grad_norm": 0.269698012084879, |
|
"learning_rate": 4.585874434161979e-06, |
|
"loss": 0.5908, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.6295829451416671, |
|
"grad_norm": 0.3120812259438331, |
|
"learning_rate": 4.584569466572313e-06, |
|
"loss": 0.5964, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.6304598016112237, |
|
"grad_norm": 0.306605213663903, |
|
"learning_rate": 4.583262632431402e-06, |
|
"loss": 0.587, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.6313366580807804, |
|
"grad_norm": 0.31045769873517814, |
|
"learning_rate": 4.581953932909403e-06, |
|
"loss": 0.5924, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.632213514550337, |
|
"grad_norm": 0.30956000847409926, |
|
"learning_rate": 4.580643369178142e-06, |
|
"loss": 0.5905, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6330903710198936, |
|
"grad_norm": 0.2980650280091205, |
|
"learning_rate": 4.579330942411115e-06, |
|
"loss": 0.5961, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.6339672274894503, |
|
"grad_norm": 0.2784986194522932, |
|
"learning_rate": 4.578016653783488e-06, |
|
"loss": 0.5962, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6348440839590069, |
|
"grad_norm": 0.32816601752120567, |
|
"learning_rate": 4.57670050447209e-06, |
|
"loss": 0.6149, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.6357209404285636, |
|
"grad_norm": 0.2822290286934802, |
|
"learning_rate": 4.575382495655421e-06, |
|
"loss": 0.5915, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6365977968981202, |
|
"grad_norm": 0.2993973936416954, |
|
"learning_rate": 4.574062628513643e-06, |
|
"loss": 0.59, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6374746533676768, |
|
"grad_norm": 0.27875804168057794, |
|
"learning_rate": 4.572740904228582e-06, |
|
"loss": 0.6018, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6383515098372335, |
|
"grad_norm": 0.3144256132274513, |
|
"learning_rate": 4.571417323983727e-06, |
|
"loss": 0.6056, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.6392283663067901, |
|
"grad_norm": 0.2763723528672814, |
|
"learning_rate": 4.570091888964231e-06, |
|
"loss": 0.5943, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.6401052227763468, |
|
"grad_norm": 0.3001278571328794, |
|
"learning_rate": 4.5687646003569055e-06, |
|
"loss": 0.588, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6409820792459034, |
|
"grad_norm": 0.2847820308061442, |
|
"learning_rate": 4.567435459350222e-06, |
|
"loss": 0.5971, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.64185893571546, |
|
"grad_norm": 0.292512543142512, |
|
"learning_rate": 4.566104467134311e-06, |
|
"loss": 0.5864, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.6427357921850168, |
|
"grad_norm": 0.28968651062565176, |
|
"learning_rate": 4.564771624900961e-06, |
|
"loss": 0.62, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.6436126486545733, |
|
"grad_norm": 0.3004795852693458, |
|
"learning_rate": 4.563436933843617e-06, |
|
"loss": 0.5964, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.64448950512413, |
|
"grad_norm": 0.2865806085716862, |
|
"learning_rate": 4.562100395157379e-06, |
|
"loss": 0.6026, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6453663615936867, |
|
"grad_norm": 0.2842649974188147, |
|
"learning_rate": 4.560762010039001e-06, |
|
"loss": 0.5913, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.6462432180632433, |
|
"grad_norm": 0.28683866497814775, |
|
"learning_rate": 4.5594217796868915e-06, |
|
"loss": 0.5951, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.6471200745328, |
|
"grad_norm": 0.2764873070461295, |
|
"learning_rate": 4.558079705301109e-06, |
|
"loss": 0.6053, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.6479969310023566, |
|
"grad_norm": 0.27004479414645, |
|
"learning_rate": 4.556735788083366e-06, |
|
"loss": 0.6039, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.6488737874719132, |
|
"grad_norm": 0.29052397029213667, |
|
"learning_rate": 4.555390029237026e-06, |
|
"loss": 0.601, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6497506439414699, |
|
"grad_norm": 0.2947691340138793, |
|
"learning_rate": 4.554042429967095e-06, |
|
"loss": 0.6025, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.6506275004110265, |
|
"grad_norm": 0.2792458027197797, |
|
"learning_rate": 4.552692991480234e-06, |
|
"loss": 0.6014, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.6515043568805831, |
|
"grad_norm": 0.3382217380230472, |
|
"learning_rate": 4.551341714984748e-06, |
|
"loss": 0.5955, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.6523812133501398, |
|
"grad_norm": 0.2966197192699023, |
|
"learning_rate": 4.549988601690588e-06, |
|
"loss": 0.5935, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.6532580698196964, |
|
"grad_norm": 0.31516646846151397, |
|
"learning_rate": 4.54863365280935e-06, |
|
"loss": 0.597, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6541349262892531, |
|
"grad_norm": 0.28496714910224397, |
|
"learning_rate": 4.547276869554272e-06, |
|
"loss": 0.5814, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.6550117827588097, |
|
"grad_norm": 0.30669749001026353, |
|
"learning_rate": 4.545918253140236e-06, |
|
"loss": 0.5952, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.6558886392283663, |
|
"grad_norm": 0.2812261666412913, |
|
"learning_rate": 4.544557804783765e-06, |
|
"loss": 0.6162, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.656765495697923, |
|
"grad_norm": 0.27761745178740765, |
|
"learning_rate": 4.543195525703024e-06, |
|
"loss": 0.5807, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6576423521674796, |
|
"grad_norm": 0.31002121863979637, |
|
"learning_rate": 4.541831417117815e-06, |
|
"loss": 0.5851, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6585192086370363, |
|
"grad_norm": 0.29034303454873894, |
|
"learning_rate": 4.540465480249579e-06, |
|
"loss": 0.6019, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6593960651065929, |
|
"grad_norm": 0.30559901683462565, |
|
"learning_rate": 4.539097716321394e-06, |
|
"loss": 0.5866, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6602729215761495, |
|
"grad_norm": 0.2641221990159659, |
|
"learning_rate": 4.537728126557974e-06, |
|
"loss": 0.5972, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6611497780457062, |
|
"grad_norm": 0.3227708789669896, |
|
"learning_rate": 4.536356712185668e-06, |
|
"loss": 0.5796, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6620266345152628, |
|
"grad_norm": 0.294701481555053, |
|
"learning_rate": 4.534983474432458e-06, |
|
"loss": 0.6149, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6629034909848194, |
|
"grad_norm": 0.32377533070879033, |
|
"learning_rate": 4.533608414527961e-06, |
|
"loss": 0.5891, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6637803474543761, |
|
"grad_norm": 0.3042889879699245, |
|
"learning_rate": 4.532231533703423e-06, |
|
"loss": 0.5913, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6646572039239327, |
|
"grad_norm": 0.31760559251266973, |
|
"learning_rate": 4.53085283319172e-06, |
|
"loss": 0.6096, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6655340603934894, |
|
"grad_norm": 0.3078941609749165, |
|
"learning_rate": 4.529472314227362e-06, |
|
"loss": 0.5905, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.666410916863046, |
|
"grad_norm": 0.30990175786815527, |
|
"learning_rate": 4.528089978046481e-06, |
|
"loss": 0.5991, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6672877733326026, |
|
"grad_norm": 0.32903820758007046, |
|
"learning_rate": 4.5267058258868414e-06, |
|
"loss": 0.5882, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6681646298021593, |
|
"grad_norm": 0.29452587669480845, |
|
"learning_rate": 4.52531985898783e-06, |
|
"loss": 0.5803, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6690414862717159, |
|
"grad_norm": 0.30776706716693625, |
|
"learning_rate": 4.52393207859046e-06, |
|
"loss": 0.577, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6699183427412725, |
|
"grad_norm": 0.31422641761257675, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 0.6018, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6707951992108292, |
|
"grad_norm": 0.3173718550935184, |
|
"learning_rate": 4.521151082272817e-06, |
|
"loss": 0.5882, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6716720556803858, |
|
"grad_norm": 0.2986562015643124, |
|
"learning_rate": 4.519757868842685e-06, |
|
"loss": 0.579, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6725489121499425, |
|
"grad_norm": 0.3090764441547647, |
|
"learning_rate": 4.518362846894475e-06, |
|
"loss": 0.5985, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6734257686194991, |
|
"grad_norm": 0.30790241933986734, |
|
"learning_rate": 4.516966017677308e-06, |
|
"loss": 0.5863, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6743026250890557, |
|
"grad_norm": 0.2994056106304016, |
|
"learning_rate": 4.515567382441923e-06, |
|
"loss": 0.5991, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6751794815586124, |
|
"grad_norm": 0.2958764046270931, |
|
"learning_rate": 4.514166942440679e-06, |
|
"loss": 0.5963, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.676056338028169, |
|
"grad_norm": 0.28788185549499157, |
|
"learning_rate": 4.512764698927545e-06, |
|
"loss": 0.6064, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6769331944977256, |
|
"grad_norm": 0.29708423016925406, |
|
"learning_rate": 4.511360653158111e-06, |
|
"loss": 0.5947, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6778100509672823, |
|
"grad_norm": 0.30991902940049315, |
|
"learning_rate": 4.509954806389577e-06, |
|
"loss": 0.5987, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6786869074368389, |
|
"grad_norm": 0.2873916475278516, |
|
"learning_rate": 4.508547159880758e-06, |
|
"loss": 0.5924, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6795637639063956, |
|
"grad_norm": 0.3007245570293541, |
|
"learning_rate": 4.50713771489208e-06, |
|
"loss": 0.6015, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6804406203759522, |
|
"grad_norm": 0.30867041078073276, |
|
"learning_rate": 4.505726472685577e-06, |
|
"loss": 0.5957, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6813174768455088, |
|
"grad_norm": 0.31345922212682475, |
|
"learning_rate": 4.504313434524894e-06, |
|
"loss": 0.6006, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6821943333150655, |
|
"grad_norm": 0.29707717549610757, |
|
"learning_rate": 4.502898601675285e-06, |
|
"loss": 0.5778, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6830711897846221, |
|
"grad_norm": 0.3796068136152165, |
|
"learning_rate": 4.501481975403611e-06, |
|
"loss": 0.5991, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6839480462541788, |
|
"grad_norm": 0.28337342976468866, |
|
"learning_rate": 4.5000635569783365e-06, |
|
"loss": 0.5948, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6848249027237354, |
|
"grad_norm": 0.31230108669893153, |
|
"learning_rate": 4.498643347669533e-06, |
|
"loss": 0.5925, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.685701759193292, |
|
"grad_norm": 0.27904331433791485, |
|
"learning_rate": 4.497221348748874e-06, |
|
"loss": 0.5916, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6865786156628487, |
|
"grad_norm": 0.2942542969448629, |
|
"learning_rate": 4.4957975614896386e-06, |
|
"loss": 0.5992, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6874554721324053, |
|
"grad_norm": 0.2908765617548673, |
|
"learning_rate": 4.494371987166703e-06, |
|
"loss": 0.6065, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.6883323286019619, |
|
"grad_norm": 0.2840490179126863, |
|
"learning_rate": 4.492944627056544e-06, |
|
"loss": 0.5902, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6892091850715186, |
|
"grad_norm": 0.2727369127304506, |
|
"learning_rate": 4.491515482437242e-06, |
|
"loss": 0.5867, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6900860415410752, |
|
"grad_norm": 0.28769481832954025, |
|
"learning_rate": 4.4900845545884695e-06, |
|
"loss": 0.5922, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6909628980106319, |
|
"grad_norm": 0.2906309237155975, |
|
"learning_rate": 4.4886518447915e-06, |
|
"loss": 0.5887, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6918397544801885, |
|
"grad_norm": 0.2948842293422461, |
|
"learning_rate": 4.487217354329201e-06, |
|
"loss": 0.6006, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6927166109497451, |
|
"grad_norm": 0.302074977476922, |
|
"learning_rate": 4.4857810844860325e-06, |
|
"loss": 0.5866, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6935934674193018, |
|
"grad_norm": 0.32893770275300094, |
|
"learning_rate": 4.484343036548051e-06, |
|
"loss": 0.5976, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6944703238888584, |
|
"grad_norm": 0.2778002794834819, |
|
"learning_rate": 4.482903211802904e-06, |
|
"loss": 0.584, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.695347180358415, |
|
"grad_norm": 0.294631010190205, |
|
"learning_rate": 4.481461611539829e-06, |
|
"loss": 0.5796, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6962240368279717, |
|
"grad_norm": 0.26497721691156156, |
|
"learning_rate": 4.480018237049655e-06, |
|
"loss": 0.5921, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6971008932975283, |
|
"grad_norm": 0.2571147884128945, |
|
"learning_rate": 4.4785730896247985e-06, |
|
"loss": 0.5967, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.697977749767085, |
|
"grad_norm": 0.27928133327664356, |
|
"learning_rate": 4.477126170559262e-06, |
|
"loss": 0.5933, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6988546062366416, |
|
"grad_norm": 0.2678842819485542, |
|
"learning_rate": 4.475677481148638e-06, |
|
"loss": 0.6041, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6997314627061982, |
|
"grad_norm": 0.2891606093702898, |
|
"learning_rate": 4.474227022690102e-06, |
|
"loss": 0.5957, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.700608319175755, |
|
"grad_norm": 0.288045727848727, |
|
"learning_rate": 4.4727747964824135e-06, |
|
"loss": 0.5904, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.7014851756453115, |
|
"grad_norm": 0.31585634496103415, |
|
"learning_rate": 4.471320803825915e-06, |
|
"loss": 0.5976, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7023620321148683, |
|
"grad_norm": 0.2748185200755283, |
|
"learning_rate": 4.469865046022531e-06, |
|
"loss": 0.5752, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.7032388885844248, |
|
"grad_norm": 0.3355774877957403, |
|
"learning_rate": 4.468407524375767e-06, |
|
"loss": 0.5983, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.7041157450539814, |
|
"grad_norm": 0.29100988533473726, |
|
"learning_rate": 4.466948240190707e-06, |
|
"loss": 0.5942, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.7049926015235382, |
|
"grad_norm": 0.32395113661904446, |
|
"learning_rate": 4.465487194774012e-06, |
|
"loss": 0.5934, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.7058694579930948, |
|
"grad_norm": 0.27010926989878575, |
|
"learning_rate": 4.464024389433924e-06, |
|
"loss": 0.5965, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7067463144626513, |
|
"grad_norm": 0.31589368881558894, |
|
"learning_rate": 4.462559825480257e-06, |
|
"loss": 0.5892, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.7076231709322081, |
|
"grad_norm": 0.2696414843727876, |
|
"learning_rate": 4.461093504224401e-06, |
|
"loss": 0.5995, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.7085000274017647, |
|
"grad_norm": 0.2953330107498836, |
|
"learning_rate": 4.459625426979319e-06, |
|
"loss": 0.5918, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.7093768838713214, |
|
"grad_norm": 0.281894292123873, |
|
"learning_rate": 4.458155595059549e-06, |
|
"loss": 0.5955, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.710253740340878, |
|
"grad_norm": 0.27376761478776995, |
|
"learning_rate": 4.4566840097811956e-06, |
|
"loss": 0.5871, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7111305968104346, |
|
"grad_norm": 0.27713167306531405, |
|
"learning_rate": 4.455210672461938e-06, |
|
"loss": 0.595, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.7120074532799913, |
|
"grad_norm": 0.27385713088626723, |
|
"learning_rate": 4.453735584421021e-06, |
|
"loss": 0.5899, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7128843097495479, |
|
"grad_norm": 0.29840396727897567, |
|
"learning_rate": 4.452258746979258e-06, |
|
"loss": 0.5844, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.7137611662191045, |
|
"grad_norm": 0.28333795883109736, |
|
"learning_rate": 4.4507801614590285e-06, |
|
"loss": 0.5939, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.7146380226886612, |
|
"grad_norm": 0.3089268512848077, |
|
"learning_rate": 4.449299829184278e-06, |
|
"loss": 0.5859, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7155148791582178, |
|
"grad_norm": 0.2808961599877815, |
|
"learning_rate": 4.447817751480516e-06, |
|
"loss": 0.5871, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7163917356277745, |
|
"grad_norm": 0.30287533725577037, |
|
"learning_rate": 4.446333929674816e-06, |
|
"loss": 0.593, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7172685920973311, |
|
"grad_norm": 0.30584446638710266, |
|
"learning_rate": 4.444848365095809e-06, |
|
"loss": 0.5917, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.7181454485668877, |
|
"grad_norm": 0.27241453105670504, |
|
"learning_rate": 4.44336105907369e-06, |
|
"loss": 0.5896, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.7190223050364444, |
|
"grad_norm": 0.36474064413319707, |
|
"learning_rate": 4.4418720129402145e-06, |
|
"loss": 0.5861, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.719899161506001, |
|
"grad_norm": 0.2832577542195539, |
|
"learning_rate": 4.4403812280286915e-06, |
|
"loss": 0.5905, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.7207760179755577, |
|
"grad_norm": 0.32117553322486775, |
|
"learning_rate": 4.4388887056739926e-06, |
|
"loss": 0.5801, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.7216528744451143, |
|
"grad_norm": 0.27537463782509236, |
|
"learning_rate": 4.43739444721254e-06, |
|
"loss": 0.587, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.7225297309146709, |
|
"grad_norm": 0.3274304411602489, |
|
"learning_rate": 4.435898453982313e-06, |
|
"loss": 0.6024, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.7234065873842276, |
|
"grad_norm": 0.3232032167824163, |
|
"learning_rate": 4.434400727322844e-06, |
|
"loss": 0.6145, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7242834438537842, |
|
"grad_norm": 0.3431783037261662, |
|
"learning_rate": 4.432901268575218e-06, |
|
"loss": 0.5937, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.7251603003233408, |
|
"grad_norm": 0.30897032551229503, |
|
"learning_rate": 4.43140007908207e-06, |
|
"loss": 0.598, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7260371567928975, |
|
"grad_norm": 0.2934772547759602, |
|
"learning_rate": 4.429897160187584e-06, |
|
"loss": 0.5918, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.7269140132624541, |
|
"grad_norm": 0.31389790755569874, |
|
"learning_rate": 4.4283925132374946e-06, |
|
"loss": 0.5832, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7277908697320108, |
|
"grad_norm": 0.29548260652561004, |
|
"learning_rate": 4.426886139579083e-06, |
|
"loss": 0.5937, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7286677262015674, |
|
"grad_norm": 0.3162599265610075, |
|
"learning_rate": 4.425378040561175e-06, |
|
"loss": 0.5889, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.729544582671124, |
|
"grad_norm": 0.3057143041654656, |
|
"learning_rate": 4.423868217534144e-06, |
|
"loss": 0.5848, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7304214391406807, |
|
"grad_norm": 0.29540394945672244, |
|
"learning_rate": 4.4223566718499055e-06, |
|
"loss": 0.5926, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.7312982956102373, |
|
"grad_norm": 0.30681513325771914, |
|
"learning_rate": 4.420843404861917e-06, |
|
"loss": 0.5838, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.7321751520797939, |
|
"grad_norm": 0.29780757398255076, |
|
"learning_rate": 4.419328417925177e-06, |
|
"loss": 0.5922, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7330520085493506, |
|
"grad_norm": 0.28283439818927025, |
|
"learning_rate": 4.417811712396226e-06, |
|
"loss": 0.5875, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.7339288650189072, |
|
"grad_norm": 0.30029201304931724, |
|
"learning_rate": 4.416293289633144e-06, |
|
"loss": 0.5989, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.7348057214884639, |
|
"grad_norm": 0.29188774973524867, |
|
"learning_rate": 4.414773150995543e-06, |
|
"loss": 0.5878, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.7356825779580205, |
|
"grad_norm": 0.3037257039566602, |
|
"learning_rate": 4.413251297844579e-06, |
|
"loss": 0.5849, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.7365594344275771, |
|
"grad_norm": 0.31802355671271254, |
|
"learning_rate": 4.411727731542937e-06, |
|
"loss": 0.5873, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7374362908971338, |
|
"grad_norm": 0.31892860544931334, |
|
"learning_rate": 4.410202453454841e-06, |
|
"loss": 0.5784, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.7383131473666904, |
|
"grad_norm": 0.31731371407494563, |
|
"learning_rate": 4.408675464946043e-06, |
|
"loss": 0.5973, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.739190003836247, |
|
"grad_norm": 0.2807004884396655, |
|
"learning_rate": 4.40714676738383e-06, |
|
"loss": 0.5842, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.7400668603058037, |
|
"grad_norm": 0.3102700515568577, |
|
"learning_rate": 4.405616362137017e-06, |
|
"loss": 0.584, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.7409437167753603, |
|
"grad_norm": 0.28221217756766914, |
|
"learning_rate": 4.404084250575952e-06, |
|
"loss": 0.599, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.741820573244917, |
|
"grad_norm": 0.284085524365953, |
|
"learning_rate": 4.4025504340725056e-06, |
|
"loss": 0.5799, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.7426974297144736, |
|
"grad_norm": 0.35367792241463614, |
|
"learning_rate": 4.401014914000078e-06, |
|
"loss": 0.5724, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.7435742861840302, |
|
"grad_norm": 0.26695572041406385, |
|
"learning_rate": 4.3994776917335945e-06, |
|
"loss": 0.5864, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.7444511426535869, |
|
"grad_norm": 0.3230503614090004, |
|
"learning_rate": 4.397938768649505e-06, |
|
"loss": 0.5781, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.7453279991231435, |
|
"grad_norm": 0.32670313161244324, |
|
"learning_rate": 4.39639814612578e-06, |
|
"loss": 0.5921, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7462048555927002, |
|
"grad_norm": 0.2965265275169285, |
|
"learning_rate": 4.394855825541915e-06, |
|
"loss": 0.5847, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.7470817120622568, |
|