| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1098, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00546448087431694, |
| "grad_norm": 0.697250247001648, |
| "learning_rate": 5.454545454545455e-07, |
| "loss": 1.9263319969177246, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01092896174863388, |
| "grad_norm": 0.721147894859314, |
| "learning_rate": 1.6363636363636363e-06, |
| "loss": 1.9639064073562622, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01639344262295082, |
| "grad_norm": 0.7265484929084778, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 1.8129602670669556, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02185792349726776, |
| "grad_norm": 0.5450526475906372, |
| "learning_rate": 3.818181818181818e-06, |
| "loss": 1.9235541820526123, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0273224043715847, |
| "grad_norm": 0.8771107196807861, |
| "learning_rate": 4.90909090909091e-06, |
| "loss": 2.251471519470215, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03278688524590164, |
| "grad_norm": 10.971705436706543, |
| "learning_rate": 6e-06, |
| "loss": 3.0256073474884033, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03825136612021858, |
| "grad_norm": 0.5691296458244324, |
| "learning_rate": 7.090909090909091e-06, |
| "loss": 1.903341293334961, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04371584699453552, |
| "grad_norm": 0.632869303226471, |
| "learning_rate": 8.181818181818181e-06, |
| "loss": 1.824144721031189, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04918032786885246, |
| "grad_norm": 2.687931776046753, |
| "learning_rate": 9.272727272727273e-06, |
| "loss": 2.273005962371826, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0546448087431694, |
| "grad_norm": 1.9365599155426025, |
| "learning_rate": 1.0363636363636364e-05, |
| "loss": 1.9546592235565186, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.060109289617486336, |
| "grad_norm": 5.1370134353637695, |
| "learning_rate": 1.1454545454545455e-05, |
| "loss": 2.148141860961914, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06557377049180328, |
| "grad_norm": 0.595227837562561, |
| "learning_rate": 1.2545454545454545e-05, |
| "loss": 1.7927855253219604, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07103825136612021, |
| "grad_norm": 0.4974764585494995, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": 1.8154855966567993, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07650273224043716, |
| "grad_norm": 0.4138644337654114, |
| "learning_rate": 1.4727272727272728e-05, |
| "loss": 1.7593921422958374, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08196721311475409, |
| "grad_norm": 1.1251859664916992, |
| "learning_rate": 1.5818181818181818e-05, |
| "loss": 1.6035438776016235, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08743169398907104, |
| "grad_norm": 3.0598487854003906, |
| "learning_rate": 1.6909090909090907e-05, |
| "loss": 1.9531118869781494, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09289617486338798, |
| "grad_norm": 0.38799506425857544, |
| "learning_rate": 1.8e-05, |
| "loss": 1.632307529449463, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09836065573770492, |
| "grad_norm": 1.2041815519332886, |
| "learning_rate": 1.909090909090909e-05, |
| "loss": 2.1407182216644287, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.10382513661202186, |
| "grad_norm": 1.2261812686920166, |
| "learning_rate": 2.0181818181818183e-05, |
| "loss": 1.7205183506011963, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1092896174863388, |
| "grad_norm": 0.778466522693634, |
| "learning_rate": 2.1272727272727273e-05, |
| "loss": 1.4309196472167969, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11475409836065574, |
| "grad_norm": 0.4726494550704956, |
| "learning_rate": 2.2363636363636366e-05, |
| "loss": 1.3979218006134033, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.12021857923497267, |
| "grad_norm": 0.46920815110206604, |
| "learning_rate": 2.3454545454545456e-05, |
| "loss": 1.5350127220153809, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.12568306010928962, |
| "grad_norm": 0.3451603353023529, |
| "learning_rate": 2.454545454545455e-05, |
| "loss": 1.2750825881958008, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.13114754098360656, |
| "grad_norm": 0.4018639326095581, |
| "learning_rate": 2.5636363636363635e-05, |
| "loss": 1.4977831840515137, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1366120218579235, |
| "grad_norm": 0.32938382029533386, |
| "learning_rate": 2.6727272727272728e-05, |
| "loss": 1.455066204071045, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14207650273224043, |
| "grad_norm": 0.3992089331150055, |
| "learning_rate": 2.7818181818181818e-05, |
| "loss": 1.50904381275177, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.14754098360655737, |
| "grad_norm": 0.48714780807495117, |
| "learning_rate": 2.890909090909091e-05, |
| "loss": 1.6593310832977295, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.15300546448087432, |
| "grad_norm": 0.5381816029548645, |
| "learning_rate": 3e-05, |
| "loss": 1.4189538955688477, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.15846994535519127, |
| "grad_norm": 2.2116336822509766, |
| "learning_rate": 2.9997491688899256e-05, |
| "loss": 1.0185623168945312, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.16393442622950818, |
| "grad_norm": 0.775851309299469, |
| "learning_rate": 2.998996768768956e-05, |
| "loss": 0.9615996479988098, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.16939890710382513, |
| "grad_norm": 0.5596693158149719, |
| "learning_rate": 2.9977430792302124e-05, |
| "loss": 1.397382378578186, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.17486338797814208, |
| "grad_norm": 0.37642741203308105, |
| "learning_rate": 2.9959885661467903e-05, |
| "loss": 1.1756441593170166, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.18032786885245902, |
| "grad_norm": 0.5881237983703613, |
| "learning_rate": 2.993733881498636e-05, |
| "loss": 0.8743929862976074, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.18579234972677597, |
| "grad_norm": 0.5072574615478516, |
| "learning_rate": 2.9909798631302736e-05, |
| "loss": 1.398439645767212, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1912568306010929, |
| "grad_norm": 0.3287373185157776, |
| "learning_rate": 2.987727534439457e-05, |
| "loss": 1.2875562906265259, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.19672131147540983, |
| "grad_norm": 5.050961017608643, |
| "learning_rate": 2.983978103996877e-05, |
| "loss": 0.9359242916107178, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.20218579234972678, |
| "grad_norm": 0.4209338128566742, |
| "learning_rate": 2.9797329650970525e-05, |
| "loss": 1.1358091831207275, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.20765027322404372, |
| "grad_norm": 0.4743102490901947, |
| "learning_rate": 2.974993695240579e-05, |
| "loss": 1.252506971359253, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.21311475409836064, |
| "grad_norm": 0.7218373417854309, |
| "learning_rate": 2.9697620555479297e-05, |
| "loss": 1.3395963907241821, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2185792349726776, |
| "grad_norm": 0.35181427001953125, |
| "learning_rate": 2.9640399901050182e-05, |
| "loss": 1.3289461135864258, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22404371584699453, |
| "grad_norm": 0.6300466656684875, |
| "learning_rate": 2.9578296252407734e-05, |
| "loss": 1.3236968517303467, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.22950819672131148, |
| "grad_norm": 0.3324219286441803, |
| "learning_rate": 2.9511332687369917e-05, |
| "loss": 1.2778677940368652, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.23497267759562843, |
| "grad_norm": 0.3525979816913605, |
| "learning_rate": 2.9439534089707624e-05, |
| "loss": 1.2054697275161743, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.24043715846994534, |
| "grad_norm": 0.568080484867096, |
| "learning_rate": 2.9362927139897832e-05, |
| "loss": 0.8521115779876709, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2459016393442623, |
| "grad_norm": 0.9504212141036987, |
| "learning_rate": 2.9281540305209068e-05, |
| "loss": 0.9611366987228394, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.25136612021857924, |
| "grad_norm": 0.5896546840667725, |
| "learning_rate": 2.919540382912294e-05, |
| "loss": 1.2202290296554565, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2568306010928962, |
| "grad_norm": 0.4182462990283966, |
| "learning_rate": 2.9104549720095634e-05, |
| "loss": 0.7185679078102112, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.26229508196721313, |
| "grad_norm": 0.6521818041801453, |
| "learning_rate": 2.9009011739663467e-05, |
| "loss": 1.2726106643676758, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2677595628415301, |
| "grad_norm": 0.5340232253074646, |
| "learning_rate": 2.8908825389897094e-05, |
| "loss": 1.2408103942871094, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.273224043715847, |
| "grad_norm": 0.3869662284851074, |
| "learning_rate": 2.8804027900208843e-05, |
| "loss": 1.1218898296356201, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2786885245901639, |
| "grad_norm": 0.3763534724712372, |
| "learning_rate": 2.8694658213518226e-05, |
| "loss": 0.8273072242736816, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.28415300546448086, |
| "grad_norm": 0.36140915751457214, |
| "learning_rate": 2.8580756971780686e-05, |
| "loss": 1.3101215362548828, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2896174863387978, |
| "grad_norm": 0.45537811517715454, |
| "learning_rate": 2.846236650088497e-05, |
| "loss": 1.2583913803100586, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.29508196721311475, |
| "grad_norm": 0.512202799320221, |
| "learning_rate": 2.833953079492476e-05, |
| "loss": 1.346620798110962, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3005464480874317, |
| "grad_norm": 0.2695588767528534, |
| "learning_rate": 2.82122954998504e-05, |
| "loss": 1.265444278717041, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.30601092896174864, |
| "grad_norm": 0.3954983949661255, |
| "learning_rate": 2.808070789650679e-05, |
| "loss": 1.2438230514526367, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3114754098360656, |
| "grad_norm": 0.6031639575958252, |
| "learning_rate": 2.7944816883063727e-05, |
| "loss": 1.0083987712860107, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.31693989071038253, |
| "grad_norm": 0.3518785834312439, |
| "learning_rate": 2.7804672956845295e-05, |
| "loss": 1.22913658618927, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3224043715846995, |
| "grad_norm": 0.6181989908218384, |
| "learning_rate": 2.766032819556495e-05, |
| "loss": 1.2250205278396606, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.32786885245901637, |
| "grad_norm": 4.4156494140625, |
| "learning_rate": 2.7511836237973366e-05, |
| "loss": 0.9285832047462463, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.6863148212432861, |
| "learning_rate": 2.735925226392618e-05, |
| "loss": 1.5127151012420654, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.33879781420765026, |
| "grad_norm": 0.2905527651309967, |
| "learning_rate": 2.7202632973879086e-05, |
| "loss": 1.2349207401275635, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3442622950819672, |
| "grad_norm": 0.2775297462940216, |
| "learning_rate": 2.7042036567817838e-05, |
| "loss": 1.1945700645446777, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.34972677595628415, |
| "grad_norm": 0.3885118067264557, |
| "learning_rate": 2.6877522723631036e-05, |
| "loss": 0.8623289465904236, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3551912568306011, |
| "grad_norm": 0.48008739948272705, |
| "learning_rate": 2.6709152574933727e-05, |
| "loss": 1.019203543663025, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.36065573770491804, |
| "grad_norm": 1.9439845085144043, |
| "learning_rate": 2.6536988688350067e-05, |
| "loss": 0.9837391376495361, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.366120218579235, |
| "grad_norm": 0.30251508951187134, |
| "learning_rate": 2.6361095040263437e-05, |
| "loss": 1.2110865116119385, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.37158469945355194, |
| "grad_norm": 0.3148176074028015, |
| "learning_rate": 2.618153699304274e-05, |
| "loss": 1.2147680521011353, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3770491803278688, |
| "grad_norm": 0.42469698190689087, |
| "learning_rate": 2.599838127075361e-05, |
| "loss": 1.2221580743789673, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3825136612021858, |
| "grad_norm": 0.33319714665412903, |
| "learning_rate": 2.5811695934363666e-05, |
| "loss": 0.7449063062667847, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3879781420765027, |
| "grad_norm": 0.4343816339969635, |
| "learning_rate": 2.5621550356450914e-05, |
| "loss": 0.8672135472297668, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.39344262295081966, |
| "grad_norm": 0.44463595747947693, |
| "learning_rate": 2.5428015195424825e-05, |
| "loss": 0.8414742946624756, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3989071038251366, |
| "grad_norm": 0.7496324181556702, |
| "learning_rate": 2.5231162369269498e-05, |
| "loss": 0.7604200839996338, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.40437158469945356, |
| "grad_norm": 0.36488234996795654, |
| "learning_rate": 2.503106502881889e-05, |
| "loss": 1.2763407230377197, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4098360655737705, |
| "grad_norm": 0.34441977739334106, |
| "learning_rate": 2.4827797530573762e-05, |
| "loss": 1.190218210220337, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.41530054644808745, |
| "grad_norm": 0.4284515082836151, |
| "learning_rate": 2.4621435409070757e-05, |
| "loss": 1.2155894041061401, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4207650273224044, |
| "grad_norm": 0.6492432355880737, |
| "learning_rate": 2.4412055348813602e-05, |
| "loss": 1.1843308210372925, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4262295081967213, |
| "grad_norm": 0.34537646174430847, |
| "learning_rate": 2.4199735155777017e-05, |
| "loss": 1.2178831100463867, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.43169398907103823, |
| "grad_norm": 1.2686742544174194, |
| "learning_rate": 2.3984553728493914e-05, |
| "loss": 1.0317703485488892, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4371584699453552, |
| "grad_norm": 0.5937894582748413, |
| "learning_rate": 2.3766591028736547e-05, |
| "loss": 1.2135744094848633, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4426229508196721, |
| "grad_norm": 2.5808207988739014, |
| "learning_rate": 2.3545928051802588e-05, |
| "loss": 0.8519853353500366, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.44808743169398907, |
| "grad_norm": 0.3145885765552521, |
| "learning_rate": 2.332264679641717e-05, |
| "loss": 1.2207863330841064, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.453551912568306, |
| "grad_norm": 1.2698609828948975, |
| "learning_rate": 2.3096830234261996e-05, |
| "loss": 0.843329131603241, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.45901639344262296, |
| "grad_norm": 0.2692093849182129, |
| "learning_rate": 2.2868562279142912e-05, |
| "loss": 1.206931233406067, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.4644808743169399, |
| "grad_norm": 0.31359997391700745, |
| "learning_rate": 2.2637927755807458e-05, |
| "loss": 1.532818078994751, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.46994535519125685, |
| "grad_norm": 0.8711612820625305, |
| "learning_rate": 2.2405012368423786e-05, |
| "loss": 0.9171653389930725, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.47540983606557374, |
| "grad_norm": 0.41555553674697876, |
| "learning_rate": 2.2169902668732893e-05, |
| "loss": 1.2091866731643677, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4808743169398907, |
| "grad_norm": 0.3255589008331299, |
| "learning_rate": 2.193268602388583e-05, |
| "loss": 1.1792771816253662, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.48633879781420764, |
| "grad_norm": 2.388908624649048, |
| "learning_rate": 2.1693450583977953e-05, |
| "loss": 1.2991008758544922, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4918032786885246, |
| "grad_norm": 0.8777170777320862, |
| "learning_rate": 2.1452285249292147e-05, |
| "loss": 0.9539348483085632, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4972677595628415, |
| "grad_norm": 0.5306017398834229, |
| "learning_rate": 2.12092796372634e-05, |
| "loss": 1.217163324356079, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5027322404371585, |
| "grad_norm": 0.3937690556049347, |
| "learning_rate": 2.096452404917679e-05, |
| "loss": 1.1728050708770752, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5081967213114754, |
| "grad_norm": 0.4189966022968292, |
| "learning_rate": 2.0718109436611348e-05, |
| "loss": 1.1901968717575073, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5136612021857924, |
| "grad_norm": 0.8861513733863831, |
| "learning_rate": 2.0470127367642345e-05, |
| "loss": 0.4577289819717407, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5191256830601093, |
| "grad_norm": 0.8669730424880981, |
| "learning_rate": 2.022066999281444e-05, |
| "loss": 1.170414924621582, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5245901639344263, |
| "grad_norm": 0.43784815073013306, |
| "learning_rate": 1.9969830010898358e-05, |
| "loss": 1.2003910541534424, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5300546448087432, |
| "grad_norm": 0.2408360242843628, |
| "learning_rate": 1.9717700634443903e-05, |
| "loss": 1.196523904800415, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5355191256830601, |
| "grad_norm": 0.275288462638855, |
| "learning_rate": 1.9464375555142e-05, |
| "loss": 0.9004436135292053, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5409836065573771, |
| "grad_norm": 0.30962035059928894, |
| "learning_rate": 1.9209948909008734e-05, |
| "loss": 1.002712368965149, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.546448087431694, |
| "grad_norm": 0.37051403522491455, |
| "learning_rate": 1.8954515241404218e-05, |
| "loss": 1.0420843362808228, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5519125683060109, |
| "grad_norm": 0.37618082761764526, |
| "learning_rate": 1.8698169471899414e-05, |
| "loss": 1.0357520580291748, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5573770491803278, |
| "grad_norm": 0.3667483627796173, |
| "learning_rate": 1.8441006859003842e-05, |
| "loss": 1.15482759475708, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5628415300546448, |
| "grad_norm": 0.35124123096466064, |
| "learning_rate": 1.818312296476737e-05, |
| "loss": 1.1959636211395264, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5683060109289617, |
| "grad_norm": 0.35074105858802795, |
| "learning_rate": 1.792461361926921e-05, |
| "loss": 1.087142825126648, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5737704918032787, |
| "grad_norm": 0.40441736578941345, |
| "learning_rate": 1.766557488500727e-05, |
| "loss": 1.23688805103302, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5792349726775956, |
| "grad_norm": 0.40216121077537537, |
| "learning_rate": 1.7406103021201212e-05, |
| "loss": 1.175810694694519, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5846994535519126, |
| "grad_norm": 0.356263130903244, |
| "learning_rate": 1.7146294448022335e-05, |
| "loss": 1.2632404565811157, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5901639344262295, |
| "grad_norm": 0.379120409488678, |
| "learning_rate": 1.688624571076371e-05, |
| "loss": 1.1749930381774902, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5956284153005464, |
| "grad_norm": 0.4688524901866913, |
| "learning_rate": 1.6626053443963762e-05, |
| "loss": 1.2324018478393555, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6010928961748634, |
| "grad_norm": 0.3071102797985077, |
| "learning_rate": 1.636581433549674e-05, |
| "loss": 1.0937501192092896, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6065573770491803, |
| "grad_norm": 0.4043320417404175, |
| "learning_rate": 1.610562509064332e-05, |
| "loss": 0.7812168598175049, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6120218579234973, |
| "grad_norm": 0.37222233414649963, |
| "learning_rate": 1.5845582396154786e-05, |
| "loss": 1.1552724838256836, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6174863387978142, |
| "grad_norm": 0.32638704776763916, |
| "learning_rate": 1.5585782884324064e-05, |
| "loss": 1.1811174154281616, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6229508196721312, |
| "grad_norm": 1.021145224571228, |
| "learning_rate": 1.5326323097077015e-05, |
| "loss": 0.7545455694198608, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6284153005464481, |
| "grad_norm": 1.044705867767334, |
| "learning_rate": 1.5067299450097261e-05, |
| "loss": 1.1588557958602905, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6338797814207651, |
| "grad_norm": 0.417707622051239, |
| "learning_rate": 1.4808808196998006e-05, |
| "loss": 1.1800490617752075, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.639344262295082, |
| "grad_norm": 0.4615285396575928, |
| "learning_rate": 1.4550945393554004e-05, |
| "loss": 0.9214609861373901, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.644808743169399, |
| "grad_norm": 0.9104841947555542, |
| "learning_rate": 1.4293806862007085e-05, |
| "loss": 1.4733829498291016, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6502732240437158, |
| "grad_norm": 0.5479267239570618, |
| "learning_rate": 1.4037488155458448e-05, |
| "loss": 1.122392177581787, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 1.7201780080795288, |
| "learning_rate": 1.3782084522360981e-05, |
| "loss": 1.0747902393341064, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6612021857923497, |
| "grad_norm": 0.2870098948478699, |
| "learning_rate": 1.3527690871124762e-05, |
| "loss": 0.7035287022590637, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.618334174156189, |
| "learning_rate": 1.3274401734848958e-05, |
| "loss": 1.2590125799179077, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6721311475409836, |
| "grad_norm": 0.9182770848274231, |
| "learning_rate": 1.3022311236193156e-05, |
| "loss": 1.1282920837402344, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6775956284153005, |
| "grad_norm": 0.3048880398273468, |
| "learning_rate": 1.2771513052401236e-05, |
| "loss": 1.1468207836151123, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6830601092896175, |
| "grad_norm": 0.46364668011665344, |
| "learning_rate": 1.2522100380490744e-05, |
| "loss": 0.41445863246917725, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6885245901639344, |
| "grad_norm": 0.3374931216239929, |
| "learning_rate": 1.2274165902620732e-05, |
| "loss": 1.205442190170288, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.6939890710382514, |
| "grad_norm": 0.30988192558288574, |
| "learning_rate": 1.2027801751650918e-05, |
| "loss": 1.1606954336166382, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6994535519125683, |
| "grad_norm": 0.6835072040557861, |
| "learning_rate": 1.1783099476904972e-05, |
| "loss": 0.9375866651535034, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7049180327868853, |
| "grad_norm": 0.5492672324180603, |
| "learning_rate": 1.1540150010150599e-05, |
| "loss": 1.1717281341552734, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7103825136612022, |
| "grad_norm": 1.321418285369873, |
| "learning_rate": 1.1299043631809205e-05, |
| "loss": 1.0042099952697754, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7158469945355191, |
| "grad_norm": 0.5885441303253174, |
| "learning_rate": 1.1059869937407486e-05, |
| "loss": 1.1690725088119507, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7213114754098361, |
| "grad_norm": 0.4145934581756592, |
| "learning_rate": 1.082271780428362e-05, |
| "loss": 1.1503442525863647, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.726775956284153, |
| "grad_norm": 0.3795332610607147, |
| "learning_rate": 1.0587675358560278e-05, |
| "loss": 0.7821759581565857, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.73224043715847, |
| "grad_norm": 0.34867045283317566, |
| "learning_rate": 1.0354829942396837e-05, |
| "loss": 1.1533390283584595, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7377049180327869, |
| "grad_norm": 0.3432830572128296, |
| "learning_rate": 1.012426808153287e-05, |
| "loss": 1.1842344999313354, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7431693989071039, |
| "grad_norm": 0.37027257680892944, |
| "learning_rate": 9.896075453135039e-06, |
| "loss": 1.2416757345199585, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.7486338797814208, |
| "grad_norm": 0.8126760721206665, |
| "learning_rate": 9.67033685395934e-06, |
| "loss": 1.2841261625289917, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7540983606557377, |
| "grad_norm": 0.7494352459907532, |
| "learning_rate": 9.447136168840466e-06, |
| "loss": 1.2585190534591675, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7595628415300546, |
| "grad_norm": 0.6677407026290894, |
| "learning_rate": 9.226556339520069e-06, |
| "loss": 0.826260507106781, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7650273224043715, |
| "grad_norm": 0.294973224401474, |
| "learning_rate": 9.008679333825478e-06, |
| "loss": 1.1650235652923584, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7704918032786885, |
| "grad_norm": 0.4066277742385864, |
| "learning_rate": 8.793586115210326e-06, |
| "loss": 1.0734792947769165, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7759562841530054, |
| "grad_norm": 0.24723787605762482, |
| "learning_rate": 8.581356612668382e-06, |
| "loss": 1.1951000690460205, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7814207650273224, |
| "grad_norm": 0.7921513319015503, |
| "learning_rate": 8.372069691031804e-06, |
| "loss": 1.210968017578125, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7868852459016393, |
| "grad_norm": 0.9979972243309021, |
| "learning_rate": 8.165803121664869e-06, |
| "loss": 0.7500867247581482, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.7923497267759563, |
| "grad_norm": 0.3506968915462494, |
| "learning_rate": 7.962633553563965e-06, |
| "loss": 0.8001227974891663, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7978142076502732, |
| "grad_norm": 0.33423033356666565, |
| "learning_rate": 7.762636484874723e-06, |
| "loss": 0.9556211233139038, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8032786885245902, |
| "grad_norm": 0.3771698474884033, |
| "learning_rate": 7.565886234836767e-06, |
| "loss": 1.159753441810608, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8087431693989071, |
| "grad_norm": 0.36711210012435913, |
| "learning_rate": 7.3724559161665876e-06, |
| "loss": 1.1728172302246094, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8142076502732241, |
| "grad_norm": 0.3197503685951233, |
| "learning_rate": 7.182417407888703e-06, |
| "loss": 1.1757210493087769, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.819672131147541, |
| "grad_norm": 0.3136984705924988, |
| "learning_rate": 6.995841328625321e-06, |
| "loss": 0.8655977845191956, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.825136612021858, |
| "grad_norm": 0.393072247505188, |
| "learning_rate": 6.812797010354325e-06, |
| "loss": 1.1789922714233398, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8306010928961749, |
| "grad_norm": 3.821265935897827, |
| "learning_rate": 6.63335247264542e-06, |
| "loss": 1.08378005027771, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8360655737704918, |
| "grad_norm": 0.33668431639671326, |
| "learning_rate": 6.457574397383919e-06, |
| "loss": 1.1709719896316528, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8415300546448088, |
| "grad_norm": 0.3578186631202698, |
| "learning_rate": 6.285528103991665e-06, |
| "loss": 1.193668246269226, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8469945355191257, |
| "grad_norm": 0.3615029752254486, |
| "learning_rate": 6.117277525154225e-06, |
| "loss": 1.15645432472229, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8524590163934426, |
| "grad_norm": 0.38210004568099976, |
| "learning_rate": 5.952885183063397e-06, |
| "loss": 1.1638695001602173, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.8579234972677595, |
| "grad_norm": 0.35445594787597656, |
| "learning_rate": 5.792412166183841e-06, |
| "loss": 1.1909141540527344, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8633879781420765, |
| "grad_norm": 0.5096743702888489, |
| "learning_rate": 5.635918106552546e-06, |
| "loss": 1.201849102973938, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8688524590163934, |
| "grad_norm": 1.2665657997131348, |
| "learning_rate": 5.483461157619428e-06, |
| "loss": 0.7697218656539917, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.8743169398907104, |
| "grad_norm": 0.31564080715179443, |
| "learning_rate": 5.335097972637441e-06, |
| "loss": 1.154910683631897, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8797814207650273, |
| "grad_norm": 0.41027265787124634, |
| "learning_rate": 5.1908836836101135e-06, |
| "loss": 0.810014545917511, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.8852459016393442, |
| "grad_norm": 0.5105057954788208, |
| "learning_rate": 5.050871880804414e-06, |
| "loss": 1.4533857107162476, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.8907103825136612, |
| "grad_norm": 0.35378292202949524, |
| "learning_rate": 4.915114592836521e-06, |
| "loss": 1.1535991430282593, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.8961748633879781, |
| "grad_norm": 1.3628326654434204, |
| "learning_rate": 4.783662267337909e-06, |
| "loss": 1.1089421510696411, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9016393442622951, |
| "grad_norm": 0.2328273057937622, |
| "learning_rate": 4.656563752208907e-06, |
| "loss": 1.2124766111373901, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.907103825136612, |
| "grad_norm": 0.31986290216445923, |
| "learning_rate": 4.533866277466767e-06, |
| "loss": 1.1944669485092163, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.912568306010929, |
| "grad_norm": 1.5621281862258911, |
| "learning_rate": 4.415615437694876e-06, |
| "loss": 1.3944669961929321, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.9180327868852459, |
| "grad_norm": 2.3418469429016113, |
| "learning_rate": 4.3018551750997694e-06, |
| "loss": 1.1178382635116577, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9234972677595629, |
| "grad_norm": 0.40905457735061646, |
| "learning_rate": 4.192627763182111e-06, |
| "loss": 1.1970925331115723, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.9289617486338798, |
| "grad_norm": 1.2464805841445923, |
| "learning_rate": 4.087973791027797e-06, |
| "loss": 0.7709401249885559, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9344262295081968, |
| "grad_norm": 0.713545560836792, |
| "learning_rate": 3.987932148224993e-06, |
| "loss": 1.1292576789855957, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9398907103825137, |
| "grad_norm": 1.2035443782806396, |
| "learning_rate": 3.8925400104126834e-06, |
| "loss": 0.7497198581695557, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9453551912568307, |
| "grad_norm": 0.3672787845134735, |
| "learning_rate": 3.8018328254661618e-06, |
| "loss": 1.1717313528060913, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9508196721311475, |
| "grad_norm": 0.602628767490387, |
| "learning_rate": 3.715844300324527e-06, |
| "loss": 0.9975014328956604, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.9562841530054644, |
| "grad_norm": 0.4423633813858032, |
| "learning_rate": 3.6346063884651327e-06, |
| "loss": 1.1410292387008667, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9617486338797814, |
| "grad_norm": 0.7007730007171631, |
| "learning_rate": 3.558149278029624e-06, |
| "loss": 1.3128447532653809, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.9672131147540983, |
| "grad_norm": 0.439485639333725, |
| "learning_rate": 3.4865013806059817e-06, |
| "loss": 0.7662681341171265, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.9726775956284153, |
| "grad_norm": 0.4370954930782318, |
| "learning_rate": 3.419689320670712e-06, |
| "loss": 0.7777884602546692, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.9781420765027322, |
| "grad_norm": 1.6398391723632812, |
| "learning_rate": 3.35773792569517e-06, |
| "loss": 1.1536012887954712, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "grad_norm": 0.42289936542510986, |
| "learning_rate": 3.300670216919602e-06, |
| "loss": 0.8657046556472778, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9890710382513661, |
| "grad_norm": 0.49119001626968384, |
| "learning_rate": 3.2485074007984468e-06, |
| "loss": 1.1398602724075317, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.994535519125683, |
| "grad_norm": 0.897072434425354, |
| "learning_rate": 3.2012688611199566e-06, |
| "loss": 1.2387840747833252, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.38847413659095764, |
| "learning_rate": 3.158972151803165e-06, |
| "loss": 1.236254334449768, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.005464480874317, |
| "grad_norm": 0.31760314106941223, |
| "learning_rate": 3.1216329903748095e-06, |
| "loss": 0.761336088180542, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.010928961748634, |
| "grad_norm": 0.3967256247997284, |
| "learning_rate": 3.089265252128686e-06, |
| "loss": 1.2508467435836792, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0163934426229508, |
| "grad_norm": 0.4457378387451172, |
| "learning_rate": 3.061880964969555e-06, |
| "loss": 1.246903419494629, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.0218579234972678, |
| "grad_norm": 0.3824722170829773, |
| "learning_rate": 3.039490304943562e-06, |
| "loss": 1.1185325384140015, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.0273224043715847, |
| "grad_norm": 0.3070431053638458, |
| "learning_rate": 3.022101592456795e-06, |
| "loss": 1.0686273574829102, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.0327868852459017, |
| "grad_norm": 0.28226983547210693, |
| "learning_rate": 3.0097212891834095e-06, |
| "loss": 1.0337367057800293, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.0382513661202186, |
| "grad_norm": 0.4869016110897064, |
| "learning_rate": 3.0023539956644634e-06, |
| "loss": 1.0311717987060547, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0437158469945356, |
| "grad_norm": 0.8694993257522583, |
| "learning_rate": 3.0000024495983428e-06, |
| "loss": 0.7782408595085144, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.0491803278688525, |
| "grad_norm": 0.5946288108825684, |
| "learning_rate": 3.002667524823434e-06, |
| "loss": 1.0987261533737183, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.0546448087431695, |
| "grad_norm": 1.0870792865753174, |
| "learning_rate": 3.010348230993402e-06, |
| "loss": 1.0993095636367798, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.0601092896174864, |
| "grad_norm": 0.5876205563545227, |
| "learning_rate": 3.0230417139451987e-06, |
| "loss": 1.4858818054199219, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.0655737704918034, |
| "grad_norm": 0.8428175449371338, |
| "learning_rate": 3.0407432567596883e-06, |
| "loss": 0.9062157273292542, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0710382513661203, |
| "grad_norm": 1.2778754234313965, |
| "learning_rate": 3.0634462815144474e-06, |
| "loss": 0.592805802822113, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.0765027322404372, |
| "grad_norm": 0.4570184648036957, |
| "learning_rate": 3.0911423517281404e-06, |
| "loss": 1.1192593574523926, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.0819672131147542, |
| "grad_norm": 0.42796361446380615, |
| "learning_rate": 3.1238211754955294e-06, |
| "loss": 1.0443531274795532, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.0874316939890711, |
| "grad_norm": 0.43680837750434875, |
| "learning_rate": 3.161470609311961e-06, |
| "loss": 1.0404291152954102, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.092896174863388, |
| "grad_norm": 0.40812206268310547, |
| "learning_rate": 3.2040766625859115e-06, |
| "loss": 1.1858930587768555, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.098360655737705, |
| "grad_norm": 0.5379334688186646, |
| "learning_rate": 3.2516235028379157e-06, |
| "loss": 1.0395185947418213, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.1038251366120218, |
| "grad_norm": 0.40824946761131287, |
| "learning_rate": 3.304093461583944e-06, |
| "loss": 1.026267170906067, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.1092896174863387, |
| "grad_norm": 0.603408694267273, |
| "learning_rate": 3.3614670409010353e-06, |
| "loss": 1.076138973236084, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.1147540983606556, |
| "grad_norm": 0.41980448365211487, |
| "learning_rate": 3.4237229206727602e-06, |
| "loss": 1.3695933818817139, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.1202185792349726, |
| "grad_norm": 0.869969367980957, |
| "learning_rate": 3.490837966511817e-06, |
| "loss": 1.047141671180725, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1256830601092895, |
| "grad_norm": 0.38264161348342896, |
| "learning_rate": 3.5627872383567937e-06, |
| "loss": 1.0203361511230469, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.1311475409836065, |
| "grad_norm": 0.30198222398757935, |
| "learning_rate": 3.6395439997399494e-06, |
| "loss": 0.5207374095916748, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.1366120218579234, |
| "grad_norm": 1.9066449403762817, |
| "learning_rate": 3.721079727722522e-06, |
| "loss": 1.2205498218536377, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.1420765027322404, |
| "grad_norm": 0.28766530752182007, |
| "learning_rate": 3.8073641234939055e-06, |
| "loss": 1.1669960021972656, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.1475409836065573, |
| "grad_norm": 0.4494319558143616, |
| "learning_rate": 3.898365123630732e-06, |
| "loss": 1.0348408222198486, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1530054644808743, |
| "grad_norm": 0.7580645680427551, |
| "learning_rate": 3.994048912011692e-06, |
| "loss": 1.0489870309829712, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.1584699453551912, |
| "grad_norm": 0.30486011505126953, |
| "learning_rate": 4.094379932383666e-06, |
| "loss": 1.0182819366455078, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.1639344262295082, |
| "grad_norm": 0.5283482074737549, |
| "learning_rate": 4.199320901574489e-06, |
| "loss": 0.7681307792663574, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.169398907103825, |
| "grad_norm": 0.6967251896858215, |
| "learning_rate": 4.3088328233474185e-06, |
| "loss": 1.1920503377914429, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.174863387978142, |
| "grad_norm": 0.30848848819732666, |
| "learning_rate": 4.422875002892234e-06, |
| "loss": 1.1227927207946777, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.180327868852459, |
| "grad_norm": 0.4338774085044861, |
| "learning_rate": 4.54140506194747e-06, |
| "loss": 1.0621066093444824, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.185792349726776, |
| "grad_norm": 0.5365703701972961, |
| "learning_rate": 4.664378954548241e-06, |
| "loss": 1.0539458990097046, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1912568306010929, |
| "grad_norm": 0.9198355674743652, |
| "learning_rate": 4.791750983393832e-06, |
| "loss": 0.6073042750358582, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.1967213114754098, |
| "grad_norm": 0.2908204197883606, |
| "learning_rate": 4.9234738168288466e-06, |
| "loss": 0.4756653904914856, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.2021857923497268, |
| "grad_norm": 0.43431609869003296, |
| "learning_rate": 5.059498506431758e-06, |
| "loss": 0.9815635085105896, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2076502732240437, |
| "grad_norm": 0.8783625364303589, |
| "learning_rate": 5.199774505204206e-06, |
| "loss": 1.324447512626648, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.2131147540983607, |
| "grad_norm": 0.8234128355979919, |
| "learning_rate": 5.344249686354357e-06, |
| "loss": 1.26932692527771, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.2185792349726776, |
| "grad_norm": 0.35708698630332947, |
| "learning_rate": 5.492870362667299e-06, |
| "loss": 0.5943037271499634, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.2240437158469946, |
| "grad_norm": 0.3496783375740051, |
| "learning_rate": 5.645581306455302e-06, |
| "loss": 0.8465991616249084, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.2295081967213115, |
| "grad_norm": 0.34306076169013977, |
| "learning_rate": 5.802325770080506e-06, |
| "loss": 1.0558314323425293, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2349726775956285, |
| "grad_norm": 0.6199325919151306, |
| "learning_rate": 5.96304550704246e-06, |
| "loss": 0.7405421137809753, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.2404371584699454, |
| "grad_norm": 0.5972355604171753, |
| "learning_rate": 6.127680793622588e-06, |
| "loss": 1.023563027381897, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.2459016393442623, |
| "grad_norm": 0.36354729533195496, |
| "learning_rate": 6.296170451077657e-06, |
| "loss": 1.149239420890808, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.2513661202185793, |
| "grad_norm": 0.5815706849098206, |
| "learning_rate": 6.468451868373856e-06, |
| "loss": 0.5993931889533997, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.2568306010928962, |
| "grad_norm": 0.884687066078186, |
| "learning_rate": 6.6444610254532e-06, |
| "loss": 1.1250090599060059, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2622950819672132, |
| "grad_norm": 0.5644488334655762, |
| "learning_rate": 6.824132517023449e-06, |
| "loss": 1.117283821105957, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.2677595628415301, |
| "grad_norm": 1.1574392318725586, |
| "learning_rate": 7.007399576862872e-06, |
| "loss": 0.8585411906242371, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.273224043715847, |
| "grad_norm": 0.31241562962532043, |
| "learning_rate": 7.1941941026306275e-06, |
| "loss": 1.159734845161438, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.278688524590164, |
| "grad_norm": 3.079608678817749, |
| "learning_rate": 7.3844466811737555e-06, |
| "loss": 0.43219655752182007, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.2841530054644807, |
| "grad_norm": 1.1515812873840332, |
| "learning_rate": 7.578086614321175e-06, |
| "loss": 0.32784971594810486, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.289617486338798, |
| "grad_norm": 0.8713538646697998, |
| "learning_rate": 7.775041945155295e-06, |
| "loss": 0.972320556640625, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.2950819672131146, |
| "grad_norm": 1.0352020263671875, |
| "learning_rate": 7.975239484751258e-06, |
| "loss": 0.8251127004623413, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.3005464480874318, |
| "grad_norm": 0.41140851378440857, |
| "learning_rate": 8.178604839374125e-06, |
| "loss": 1.0359759330749512, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.3060109289617485, |
| "grad_norm": 0.30868738889694214, |
| "learning_rate": 8.385062438123673e-06, |
| "loss": 1.13455331325531, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.3114754098360657, |
| "grad_norm": 0.603076159954071, |
| "learning_rate": 8.594535561016661e-06, |
| "loss": 1.1488956212997437, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3169398907103824, |
| "grad_norm": 0.32255861163139343, |
| "learning_rate": 8.806946367496155e-06, |
| "loss": 1.1327383518218994, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.3224043715846996, |
| "grad_norm": 0.37436342239379883, |
| "learning_rate": 9.02221592535712e-06, |
| "loss": 1.0864595174789429, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.3278688524590163, |
| "grad_norm": 0.4781150221824646, |
| "learning_rate": 9.240264240077859e-06, |
| "loss": 0.2114480584859848, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.7249221205711365, |
| "learning_rate": 9.461010284546016e-06, |
| "loss": 1.14420747756958, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.3387978142076502, |
| "grad_norm": 0.31920719146728516, |
| "learning_rate": 9.684372029168438e-06, |
| "loss": 0.9852396249771118, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.3442622950819672, |
| "grad_norm": 0.31214451789855957, |
| "learning_rate": 9.91026647235348e-06, |
| "loss": 1.109579086303711, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.349726775956284, |
| "grad_norm": 1.5647644996643066, |
| "learning_rate": 1.0138609671354586e-05, |
| "loss": 0.6934401988983154, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.355191256830601, |
| "grad_norm": 0.3006077706813812, |
| "learning_rate": 1.0369316773463458e-05, |
| "loss": 1.0647120475769043, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.360655737704918, |
| "grad_norm": 0.3098089098930359, |
| "learning_rate": 1.0602302047541566e-05, |
| "loss": 0.8615930676460266, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.366120218579235, |
| "grad_norm": 0.30825164914131165, |
| "learning_rate": 1.083747891587788e-05, |
| "loss": 0.9996722340583801, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3715846994535519, |
| "grad_norm": 0.31744763255119324, |
| "learning_rate": 1.1074759986361392e-05, |
| "loss": 1.1010159254074097, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.3770491803278688, |
| "grad_norm": 0.660419225692749, |
| "learning_rate": 1.1314057084956073e-05, |
| "loss": 0.9897991418838501, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.3825136612021858, |
| "grad_norm": 0.4726075530052185, |
| "learning_rate": 1.1555281288466553e-05, |
| "loss": 0.7116650342941284, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.3879781420765027, |
| "grad_norm": 0.4236489236354828, |
| "learning_rate": 1.1798342957582084e-05, |
| "loss": 0.7277122139930725, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.3934426229508197, |
| "grad_norm": 0.3505037724971771, |
| "learning_rate": 1.2043151770186725e-05, |
| "loss": 1.0869934558868408, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3989071038251366, |
| "grad_norm": 0.4062495529651642, |
| "learning_rate": 1.2289616754923078e-05, |
| "loss": 1.0307718515396118, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.4043715846994536, |
| "grad_norm": 0.9875432252883911, |
| "learning_rate": 1.253764632499752e-05, |
| "loss": 0.6626445055007935, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.4098360655737705, |
| "grad_norm": 0.6780223846435547, |
| "learning_rate": 1.2787148312213901e-05, |
| "loss": 1.4220600128173828, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.4153005464480874, |
| "grad_norm": 0.32189661264419556, |
| "learning_rate": 1.3038030001223439e-05, |
| "loss": 0.7831554412841797, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.4207650273224044, |
| "grad_norm": 0.38265976309776306, |
| "learning_rate": 1.3290198163977933e-05, |
| "loss": 0.7503346800804138, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4262295081967213, |
| "grad_norm": 0.7597147226333618, |
| "learning_rate": 1.3543559094373372e-05, |
| "loss": 0.7089603543281555, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.4316939890710383, |
| "grad_norm": 0.4683310091495514, |
| "learning_rate": 1.3798018643071386e-05, |
| "loss": 0.7786340117454529, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.4371584699453552, |
| "grad_norm": 0.4911711812019348, |
| "learning_rate": 1.4053482252485178e-05, |
| "loss": 0.9898138642311096, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.4426229508196722, |
| "grad_norm": 0.29452064633369446, |
| "learning_rate": 1.4309854991917388e-05, |
| "loss": 1.0076603889465332, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.4480874316939891, |
| "grad_norm": 0.31045207381248474, |
| "learning_rate": 1.4567041592836413e-05, |
| "loss": 0.6953315734863281, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.453551912568306, |
| "grad_norm": 0.39866623282432556, |
| "learning_rate": 1.48249464842784e-05, |
| "loss": 1.1244227886199951, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.459016393442623, |
| "grad_norm": 0.4015820026397705, |
| "learning_rate": 1.508347382836153e-05, |
| "loss": 1.071346402168274, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.46448087431694, |
| "grad_norm": 0.4267948269844055, |
| "learning_rate": 1.534252755589961e-05, |
| "loss": 1.0431689023971558, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.469945355191257, |
| "grad_norm": 0.24437487125396729, |
| "learning_rate": 1.5602011402101432e-05, |
| "loss": 0.6640901565551758, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.4754098360655736, |
| "grad_norm": 0.2949664890766144, |
| "learning_rate": 1.5861828942343037e-05, |
| "loss": 0.8600192070007324, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4808743169398908, |
| "grad_norm": 3.951502561569214, |
| "learning_rate": 1.612188362799917e-05, |
| "loss": 1.1732059717178345, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.4863387978142075, |
| "grad_norm": 0.34538793563842773, |
| "learning_rate": 1.6382078822320964e-05, |
| "loss": 1.0121636390686035, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.4918032786885247, |
| "grad_norm": 0.8434890508651733, |
| "learning_rate": 1.6642317836346324e-05, |
| "loss": 0.7092042565345764, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.4972677595628414, |
| "grad_norm": 0.849798321723938, |
| "learning_rate": 1.6902503964829644e-05, |
| "loss": 0.8493767380714417, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.5027322404371586, |
| "grad_norm": 0.2791331708431244, |
| "learning_rate": 1.7162540522177685e-05, |
| "loss": 1.0140348672866821, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5081967213114753, |
| "grad_norm": 0.6075169444084167, |
| "learning_rate": 1.7422330878378113e-05, |
| "loss": 0.8161216974258423, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.5136612021857925, |
| "grad_norm": 0.361250638961792, |
| "learning_rate": 1.7681778494907298e-05, |
| "loss": 1.0384575128555298, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.5191256830601092, |
| "grad_norm": 0.3741307854652405, |
| "learning_rate": 1.794078696060429e-05, |
| "loss": 0.7678899168968201, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.5245901639344264, |
| "grad_norm": 0.3114408552646637, |
| "learning_rate": 1.819926002749727e-05, |
| "loss": 1.02855384349823, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.530054644808743, |
| "grad_norm": 0.18622533977031708, |
| "learning_rate": 1.84571016465695e-05, |
| "loss": 0.2526892423629761, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5355191256830603, |
| "grad_norm": 0.23625467717647552, |
| "learning_rate": 1.8714216003451295e-05, |
| "loss": 1.0640628337860107, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.540983606557377, |
| "grad_norm": 0.724408745765686, |
| "learning_rate": 1.8970507554024827e-05, |
| "loss": 0.7283908724784851, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.5464480874316942, |
| "grad_norm": 0.3644772171974182, |
| "learning_rate": 1.922588105992838e-05, |
| "loss": 1.1399354934692383, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.5519125683060109, |
| "grad_norm": 0.9722273349761963, |
| "learning_rate": 1.9480241623947206e-05, |
| "loss": 1.1557573080062866, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.5573770491803278, |
| "grad_norm": 0.4744189381599426, |
| "learning_rate": 1.9733494725277413e-05, |
| "loss": 1.133260726928711, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.5628415300546448, |
| "grad_norm": 2.321540594100952, |
| "learning_rate": 1.998554625465005e-05, |
| "loss": 0.6360920071601868, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.5683060109289617, |
| "grad_norm": 0.6282745599746704, |
| "learning_rate": 2.0236302549302293e-05, |
| "loss": 0.9884318709373474, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.5737704918032787, |
| "grad_norm": 0.25726932287216187, |
| "learning_rate": 2.0485670427782644e-05, |
| "loss": 1.072218656539917, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.5792349726775956, |
| "grad_norm": 0.3337240517139435, |
| "learning_rate": 2.073355722457739e-05, |
| "loss": 1.0474885702133179, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.5846994535519126, |
| "grad_norm": 0.3079088628292084, |
| "learning_rate": 2.0979870824545165e-05, |
| "loss": 1.0121105909347534, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5901639344262295, |
| "grad_norm": 0.33754849433898926, |
| "learning_rate": 2.1224519697147145e-05, |
| "loss": 1.059688925743103, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.5956284153005464, |
| "grad_norm": 0.8730252385139465, |
| "learning_rate": 2.1467412930459936e-05, |
| "loss": 0.5801796317100525, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.6010928961748634, |
| "grad_norm": 1.4568897485733032, |
| "learning_rate": 2.1708460264958595e-05, |
| "loss": 1.0416557788848877, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.6065573770491803, |
| "grad_norm": 0.33535847067832947, |
| "learning_rate": 2.194757212705718e-05, |
| "loss": 1.0528165102005005, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.6120218579234973, |
| "grad_norm": 0.39254218339920044, |
| "learning_rate": 2.2184659662394522e-05, |
| "loss": 1.2890545129776, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.6174863387978142, |
| "grad_norm": 0.39055129885673523, |
| "learning_rate": 2.24196347688526e-05, |
| "loss": 1.0770164728164673, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.6229508196721312, |
| "grad_norm": 0.3632502853870392, |
| "learning_rate": 2.265241012929541e-05, |
| "loss": 1.0873066186904907, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.6284153005464481, |
| "grad_norm": 0.43228334188461304, |
| "learning_rate": 2.28828992440162e-05, |
| "loss": 1.009541630744934, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.633879781420765, |
| "grad_norm": 0.34224361181259155, |
| "learning_rate": 2.3111016462880873e-05, |
| "loss": 1.3436585664749146, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.639344262295082, |
| "grad_norm": 1.0117156505584717, |
| "learning_rate": 2.333667701715578e-05, |
| "loss": 0.9953033328056335, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.644808743169399, |
| "grad_norm": 0.4946202337741852, |
| "learning_rate": 2.3559797051007815e-05, |
| "loss": 1.3335758447647095, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.650273224043716, |
| "grad_norm": 0.30347269773483276, |
| "learning_rate": 2.3780293652665477e-05, |
| "loss": 0.9623563289642334, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.6557377049180326, |
| "grad_norm": 0.32685670256614685, |
| "learning_rate": 2.399808488522895e-05, |
| "loss": 1.137532353401184, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.6612021857923498, |
| "grad_norm": 0.3293339014053345, |
| "learning_rate": 2.4213089817118078e-05, |
| "loss": 1.055052399635315, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.46230047941207886, |
| "learning_rate": 2.4425228552146573e-05, |
| "loss": 1.1524425745010376, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.6721311475409837, |
| "grad_norm": 0.6461423635482788, |
| "learning_rate": 2.4634422259211614e-05, |
| "loss": 0.6941080689430237, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.6775956284153004, |
| "grad_norm": 1.0476462841033936, |
| "learning_rate": 2.4840593201587626e-05, |
| "loss": 0.8486734628677368, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.6830601092896176, |
| "grad_norm": 2.0571165084838867, |
| "learning_rate": 2.5043664765813377e-05, |
| "loss": 0.7663911581039429, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.6885245901639343, |
| "grad_norm": 0.4737246334552765, |
| "learning_rate": 2.524356149016163e-05, |
| "loss": 1.0844976902008057, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.6939890710382515, |
| "grad_norm": 0.3629366457462311, |
| "learning_rate": 2.544020909268085e-05, |
| "loss": 1.0529444217681885, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.6994535519125682, |
| "grad_norm": 0.3002009689807892, |
| "learning_rate": 2.5633534498798598e-05, |
| "loss": 1.004475474357605, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.7049180327868854, |
| "grad_norm": 6.076183319091797, |
| "learning_rate": 2.5823465868475985e-05, |
| "loss": 0.6815961599349976, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.710382513661202, |
| "grad_norm": 0.3336254954338074, |
| "learning_rate": 2.60099326229037e-05, |
| "loss": 1.1199313402175903, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.7158469945355193, |
| "grad_norm": 0.39660659432411194, |
| "learning_rate": 2.619286547072914e-05, |
| "loss": 1.038909673690796, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.721311475409836, |
| "grad_norm": 3.282057523727417, |
| "learning_rate": 2.6372196433805214e-05, |
| "loss": 0.709031879901886, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.7267759562841531, |
| "grad_norm": 0.4394005835056305, |
| "learning_rate": 2.654785887245112e-05, |
| "loss": 0.7367001175880432, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.7322404371584699, |
| "grad_norm": 0.32146531343460083, |
| "learning_rate": 2.671978751021577e-05, |
| "loss": 1.2364336252212524, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.737704918032787, |
| "grad_norm": 0.4605696499347687, |
| "learning_rate": 2.6887918458134622e-05, |
| "loss": 0.9947465658187866, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.7431693989071038, |
| "grad_norm": 0.3082851469516754, |
| "learning_rate": 2.705218923847093e-05, |
| "loss": 1.0885943174362183, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.748633879781421, |
| "grad_norm": 0.31364598870277405, |
| "learning_rate": 2.7212538807932576e-05, |
| "loss": 1.113791823387146, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.7540983606557377, |
| "grad_norm": 0.375470906496048, |
| "learning_rate": 2.7368907580355843e-05, |
| "loss": 0.7379390001296997, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.7595628415300546, |
| "grad_norm": 0.7673870921134949, |
| "learning_rate": 2.7521237448847734e-05, |
| "loss": 0.3575439751148224, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.7650273224043715, |
| "grad_norm": 0.4010154604911804, |
| "learning_rate": 2.766947180737861e-05, |
| "loss": 1.1416033506393433, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.7704918032786885, |
| "grad_norm": 0.27221986651420593, |
| "learning_rate": 2.781355557181706e-05, |
| "loss": 0.8822421431541443, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.7759562841530054, |
| "grad_norm": 0.496616005897522, |
| "learning_rate": 2.7953435200399262e-05, |
| "loss": 0.7749378085136414, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.7814207650273224, |
| "grad_norm": 0.7093772888183594, |
| "learning_rate": 2.8089058713625194e-05, |
| "loss": 0.7831916809082031, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.7868852459016393, |
| "grad_norm": 0.351391464471817, |
| "learning_rate": 2.8220375713574307e-05, |
| "loss": 1.0371301174163818, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.7923497267759563, |
| "grad_norm": 0.2960720658302307, |
| "learning_rate": 2.8347337402633456e-05, |
| "loss": 1.0718927383422852, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.7978142076502732, |
| "grad_norm": 0.6162258386611938, |
| "learning_rate": 2.846989660163019e-05, |
| "loss": 0.9962674379348755, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.8032786885245902, |
| "grad_norm": 0.4377414286136627, |
| "learning_rate": 2.858800776736461e-05, |
| "loss": 1.1022226810455322, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.8087431693989071, |
| "grad_norm": 0.2909747064113617, |
| "learning_rate": 2.87016270095333e-05, |
| "loss": 0.8855769634246826, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.814207650273224, |
| "grad_norm": 1.3076122999191284, |
| "learning_rate": 2.8810712107039e-05, |
| "loss": 0.6500188708305359, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.819672131147541, |
| "grad_norm": 0.303838849067688, |
| "learning_rate": 2.8915222523680082e-05, |
| "loss": 1.117383360862732, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.825136612021858, |
| "grad_norm": 0.28003278374671936, |
| "learning_rate": 2.9015119423213857e-05, |
| "loss": 1.067117691040039, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.830601092896175, |
| "grad_norm": 0.5866938829421997, |
| "learning_rate": 2.9110365683788173e-05, |
| "loss": 1.0645062923431396, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.8360655737704918, |
| "grad_norm": 0.4505460262298584, |
| "learning_rate": 2.9200925911735956e-05, |
| "loss": 1.0651450157165527, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.8415300546448088, |
| "grad_norm": 1.2405760288238525, |
| "learning_rate": 2.9286766454727563e-05, |
| "loss": 0.7814494967460632, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.8469945355191257, |
| "grad_norm": 0.34474292397499084, |
| "learning_rate": 2.9367855414276073e-05, |
| "loss": 1.151155710220337, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.8524590163934427, |
| "grad_norm": 0.3130297064781189, |
| "learning_rate": 2.9444162657590747e-05, |
| "loss": 0.960390567779541, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.8579234972677594, |
| "grad_norm": 0.37524649500846863, |
| "learning_rate": 2.951565982877447e-05, |
| "loss": 1.011980414390564, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.8633879781420766, |
| "grad_norm": 0.2684651017189026, |
| "learning_rate": 2.9582320359360864e-05, |
| "loss": 1.1165417432785034, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.8688524590163933, |
| "grad_norm": 0.6099388003349304, |
| "learning_rate": 2.9644119478187126e-05, |
| "loss": 0.9011784195899963, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.8743169398907105, |
| "grad_norm": 0.7181943655014038, |
| "learning_rate": 2.9701034220599074e-05, |
| "loss": 0.7465716004371643, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.8797814207650272, |
| "grad_norm": 0.31953608989715576, |
| "learning_rate": 2.975304343698483e-05, |
| "loss": 1.0670995712280273, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.8852459016393444, |
| "grad_norm": 0.3640364110469818, |
| "learning_rate": 2.980012780063404e-05, |
| "loss": 0.8072540760040283, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.890710382513661, |
| "grad_norm": 0.4549046754837036, |
| "learning_rate": 2.9842269814919755e-05, |
| "loss": 1.0721323490142822, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.8961748633879782, |
| "grad_norm": 0.9052984714508057, |
| "learning_rate": 2.9879453819800156e-05, |
| "loss": 0.8395594358444214, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.901639344262295, |
| "grad_norm": 0.32689180970191956, |
| "learning_rate": 2.991166599763788e-05, |
| "loss": 1.0846257209777832, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.9071038251366121, |
| "grad_norm": 0.43174540996551514, |
| "learning_rate": 2.993889437833466e-05, |
| "loss": 1.4478161334991455, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.9125683060109289, |
| "grad_norm": 0.30801597237586975, |
| "learning_rate": 2.9961128843779457e-05, |
| "loss": 1.0946073532104492, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.918032786885246, |
| "grad_norm": 0.29532310366630554, |
| "learning_rate": 2.9978361131608348e-05, |
| "loss": 1.0470151901245117, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.9234972677595628, |
| "grad_norm": 0.6861543655395508, |
| "learning_rate": 2.999058483827483e-05, |
| "loss": 1.1541780233383179, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.92896174863388, |
| "grad_norm": 0.373347669839859, |
| "learning_rate": 2.9997795421429404e-05, |
| "loss": 1.1399716138839722, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.9344262295081966, |
| "grad_norm": 0.31508007645606995, |
| "learning_rate": 2.9999990201607516e-05, |
| "loss": 1.1029908657073975, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.9398907103825138, |
| "grad_norm": 1.0937162637710571, |
| "learning_rate": 2.999716836322524e-05, |
| "loss": 1.2128394842147827, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.9453551912568305, |
| "grad_norm": 0.24902676045894623, |
| "learning_rate": 2.9989330954882366e-05, |
| "loss": 1.050254464149475, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.9508196721311475, |
| "grad_norm": 0.8138014078140259, |
| "learning_rate": 2.9976480888972708e-05, |
| "loss": 0.9359830021858215, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.9562841530054644, |
| "grad_norm": 0.4428923726081848, |
| "learning_rate": 2.9958622940601907e-05, |
| "loss": 1.1571717262268066, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.9617486338797814, |
| "grad_norm": 0.4176506996154785, |
| "learning_rate": 2.9935763745812935e-05, |
| "loss": 0.7543359398841858, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.9672131147540983, |
| "grad_norm": 0.32763925194740295, |
| "learning_rate": 2.990791179912017e-05, |
| "loss": 0.7445685863494873, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.9726775956284153, |
| "grad_norm": 0.3950430154800415, |
| "learning_rate": 2.9875077450352817e-05, |
| "loss": 1.0080370903015137, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.9781420765027322, |
| "grad_norm": 0.41049596667289734, |
| "learning_rate": 2.9837272900808863e-05, |
| "loss": 1.0963528156280518, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.9836065573770492, |
| "grad_norm": 0.9898220300674438, |
| "learning_rate": 2.9794512198721092e-05, |
| "loss": 0.7285370230674744, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.989071038251366, |
| "grad_norm": 0.3645866811275482, |
| "learning_rate": 2.9746811234036736e-05, |
| "loss": 0.8588154315948486, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.994535519125683, |
| "grad_norm": 0.23531635105609894, |
| "learning_rate": 2.9694187732512702e-05, |
| "loss": 1.1264981031417847, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.6136480569839478, |
| "learning_rate": 2.96366612491287e-05, |
| "loss": 1.1605597734451294, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.0054644808743167, |
| "grad_norm": 0.3638727068901062, |
| "learning_rate": 2.9574253160820573e-05, |
| "loss": 0.5738942623138428, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.010928961748634, |
| "grad_norm": 0.38032037019729614, |
| "learning_rate": 2.9506986658536562e-05, |
| "loss": 0.8583514094352722, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.0163934426229506, |
| "grad_norm": 0.3164927363395691, |
| "learning_rate": 2.9434886738619537e-05, |
| "loss": 0.914330005645752, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.021857923497268, |
| "grad_norm": 0.2615019977092743, |
| "learning_rate": 2.9357980193518312e-05, |
| "loss": 0.9653261303901672, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.0273224043715845, |
| "grad_norm": 0.35523325204849243, |
| "learning_rate": 2.927629560183153e-05, |
| "loss": 0.9499996304512024, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.0327868852459017, |
| "grad_norm": 0.9985663294792175, |
| "learning_rate": 2.91898633176878e-05, |
| "loss": 1.293304681777954, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.0382513661202184, |
| "grad_norm": 0.4231744706630707, |
| "learning_rate": 2.909871545946603e-05, |
| "loss": 1.3876979351043701, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.0437158469945356, |
| "grad_norm": 0.36017516255378723, |
| "learning_rate": 2.9002885897860252e-05, |
| "loss": 1.1023402214050293, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.0491803278688523, |
| "grad_norm": 0.36883434653282166, |
| "learning_rate": 2.8902410243293152e-05, |
| "loss": 0.982643187046051, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.0546448087431695, |
| "grad_norm": 1.1026638746261597, |
| "learning_rate": 2.8797325832683208e-05, |
| "loss": 0.9915405511856079, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.060109289617486, |
| "grad_norm": 0.7558255195617676, |
| "learning_rate": 2.868767171557021e-05, |
| "loss": 0.5983301997184753, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.0655737704918034, |
| "grad_norm": 0.2940157353878021, |
| "learning_rate": 2.8573488639604418e-05, |
| "loss": 0.18914969265460968, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.07103825136612, |
| "grad_norm": 0.40500950813293457, |
| "learning_rate": 2.845481903540464e-05, |
| "loss": 1.0764596462249756, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.0765027322404372, |
| "grad_norm": 0.2900254428386688, |
| "learning_rate": 2.8331707000790954e-05, |
| "loss": 0.6835562586784363, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.081967213114754, |
| "grad_norm": 0.3581569492816925, |
| "learning_rate": 2.820419828439788e-05, |
| "loss": 1.024125576019287, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.087431693989071, |
| "grad_norm": 0.6350530982017517, |
| "learning_rate": 2.8072340268674133e-05, |
| "loss": 0.8588637709617615, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.092896174863388, |
| "grad_norm": 0.29905757308006287, |
| "learning_rate": 2.793618195227521e-05, |
| "loss": 1.1255227327346802, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.098360655737705, |
| "grad_norm": 0.6376034021377563, |
| "learning_rate": 2.779577393185539e-05, |
| "loss": 0.96219402551651, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.1038251366120218, |
| "grad_norm": 0.4236537516117096, |
| "learning_rate": 2.765116838326597e-05, |
| "loss": 1.024062991142273, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.109289617486339, |
| "grad_norm": 0.41378986835479736, |
| "learning_rate": 2.750241904216663e-05, |
| "loss": 0.9890310168266296, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.1147540983606556, |
| "grad_norm": 0.2886035442352295, |
| "learning_rate": 2.7349581184057144e-05, |
| "loss": 0.8476873636245728, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.120218579234973, |
| "grad_norm": 0.9165263772010803, |
| "learning_rate": 2.719271160373693e-05, |
| "loss": 0.6358259320259094, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.1256830601092895, |
| "grad_norm": 0.30565494298934937, |
| "learning_rate": 2.703186859420002e-05, |
| "loss": 0.9424027800559998, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.1311475409836067, |
| "grad_norm": 1.7092857360839844, |
| "learning_rate": 2.6867111924973283e-05, |
| "loss": 0.5238833427429199, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.1366120218579234, |
| "grad_norm": 0.6912135481834412, |
| "learning_rate": 2.6698502819905935e-05, |
| "loss": 0.6626419425010681, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.1420765027322406, |
| "grad_norm": 1.169651746749878, |
| "learning_rate": 2.652610393441872e-05, |
| "loss": 0.84641432762146, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.1475409836065573, |
| "grad_norm": 0.7925103902816772, |
| "learning_rate": 2.6349979332220992e-05, |
| "loss": 0.6377699971199036, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.1530054644808745, |
| "grad_norm": 14.70950984954834, |
| "learning_rate": 2.6170194461504586e-05, |
| "loss": 0.6009026169776917, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.158469945355191, |
| "grad_norm": 0.3301484286785126, |
| "learning_rate": 2.5986816130623133e-05, |
| "loss": 1.100122094154358, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.1639344262295084, |
| "grad_norm": 0.37357431650161743, |
| "learning_rate": 2.579991248326594e-05, |
| "loss": 0.920696496963501, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.169398907103825, |
| "grad_norm": 0.392766535282135, |
| "learning_rate": 2.560955297313575e-05, |
| "loss": 0.8580578565597534, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.1748633879781423, |
| "grad_norm": 0.4077216386795044, |
| "learning_rate": 2.5415808338139595e-05, |
| "loss": 0.6196969747543335, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.180327868852459, |
| "grad_norm": 0.40729326009750366, |
| "learning_rate": 2.5218750574102465e-05, |
| "loss": 0.6308495402336121, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.185792349726776, |
| "grad_norm": 0.2669661343097687, |
| "learning_rate": 2.5018452908013522e-05, |
| "loss": 0.9696865081787109, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.191256830601093, |
| "grad_norm": 0.49087321758270264, |
| "learning_rate": 2.48149897708149e-05, |
| "loss": 1.0563379526138306, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.19672131147541, |
| "grad_norm": 0.3579554855823517, |
| "learning_rate": 2.4608436769743e-05, |
| "loss": 1.1717143058776855, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.202185792349727, |
| "grad_norm": 0.2751805782318115, |
| "learning_rate": 2.4398870660232684e-05, |
| "loss": 0.939961850643158, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.2076502732240435, |
| "grad_norm": 1.9989415407180786, |
| "learning_rate": 2.418636931739491e-05, |
| "loss": 0.8161137700080872, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.2131147540983607, |
| "grad_norm": 0.5127248764038086, |
| "learning_rate": 2.3971011707078125e-05, |
| "loss": 1.0133951902389526, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.2185792349726774, |
| "grad_norm": 1.0294671058654785, |
| "learning_rate": 2.3752877856524532e-05, |
| "loss": 0.9799056649208069, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.2240437158469946, |
| "grad_norm": 0.8723068833351135, |
| "learning_rate": 2.353204882463168e-05, |
| "loss": 0.5277108550071716, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.2295081967213113, |
| "grad_norm": 0.42140084505081177, |
| "learning_rate": 2.330860667183101e-05, |
| "loss": 0.7577545642852783, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.2349726775956285, |
| "grad_norm": 1.2116085290908813, |
| "learning_rate": 2.308263442959396e-05, |
| "loss": 0.7222480773925781, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.240437158469945, |
| "grad_norm": 0.4509221911430359, |
| "learning_rate": 2.2854216069577376e-05, |
| "loss": 1.012153148651123, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.2459016393442623, |
| "grad_norm": 0.5047132968902588, |
| "learning_rate": 2.2623436472419476e-05, |
| "loss": 1.0092438459396362, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.251366120218579, |
| "grad_norm": 0.4681483507156372, |
| "learning_rate": 2.2390381396198102e-05, |
| "loss": 0.8176283836364746, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.2568306010928962, |
| "grad_norm": 0.9395934343338013, |
| "learning_rate": 2.2155137444562842e-05, |
| "loss": 0.8658889532089233, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.262295081967213, |
| "grad_norm": 0.8730800747871399, |
| "learning_rate": 2.191779203455302e-05, |
| "loss": 0.43443816900253296, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.26775956284153, |
| "grad_norm": 0.3437061607837677, |
| "learning_rate": 2.1678433364113297e-05, |
| "loss": 0.7622541189193726, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.273224043715847, |
| "grad_norm": 0.4882737696170807, |
| "learning_rate": 2.1437150379319245e-05, |
| "loss": 0.7550147771835327, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.278688524590164, |
| "grad_norm": 0.5824230313301086, |
| "learning_rate": 2.1194032741324823e-05, |
| "loss": 0.7233867645263672, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.2841530054644807, |
| "grad_norm": 0.455255925655365, |
| "learning_rate": 2.0949170793044142e-05, |
| "loss": 1.0538800954818726, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.289617486338798, |
| "grad_norm": 0.33487194776535034, |
| "learning_rate": 2.070265552557985e-05, |
| "loss": 0.9521304368972778, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.2950819672131146, |
| "grad_norm": 0.5424416661262512, |
| "learning_rate": 2.0454578544410758e-05, |
| "loss": 0.5176016092300415, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.300546448087432, |
| "grad_norm": 0.44442370533943176, |
| "learning_rate": 2.0205032035351043e-05, |
| "loss": 0.6318649649620056, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.3060109289617485, |
| "grad_norm": 0.17473050951957703, |
| "learning_rate": 1.9954108730293875e-05, |
| "loss": 0.5930679440498352, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.3114754098360657, |
| "grad_norm": 0.30008724331855774, |
| "learning_rate": 1.9701901872752047e-05, |
| "loss": 1.1380505561828613, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.3169398907103824, |
| "grad_norm": 0.3241089880466461, |
| "learning_rate": 1.9448505183208607e-05, |
| "loss": 1.0619306564331055, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.3224043715846996, |
| "grad_norm": 0.40371695160865784, |
| "learning_rate": 1.919401282429013e-05, |
| "loss": 0.9061838388442993, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.3278688524590163, |
| "grad_norm": 0.2833711504936218, |
| "learning_rate": 1.893851936577567e-05, |
| "loss": 0.9225311279296875, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.3210262060165405, |
| "learning_rate": 1.868211974945461e-05, |
| "loss": 0.7068516612052917, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.33879781420765, |
| "grad_norm": 0.2600228190422058, |
| "learning_rate": 1.842490925384604e-05, |
| "loss": 0.6802095174789429, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.3442622950819674, |
| "grad_norm": 0.3579116463661194, |
| "learning_rate": 1.816698345879313e-05, |
| "loss": 0.8223679065704346, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.349726775956284, |
| "grad_norm": 0.2626917064189911, |
| "learning_rate": 1.790843820994548e-05, |
| "loss": 0.6397656202316284, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.3551912568306013, |
| "grad_norm": 0.8502326011657715, |
| "learning_rate": 1.7649369583142763e-05, |
| "loss": 0.9318816065788269, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.360655737704918, |
| "grad_norm": 0.34465688467025757, |
| "learning_rate": 1.738987384871274e-05, |
| "loss": 0.8776544332504272, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.366120218579235, |
| "grad_norm": 0.35162287950515747, |
| "learning_rate": 1.7130047435697118e-05, |
| "loss": 0.6873989105224609, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.371584699453552, |
| "grad_norm": 2.411452054977417, |
| "learning_rate": 1.6869986896018226e-05, |
| "loss": 1.1587547063827515, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.3770491803278686, |
| "grad_norm": 0.40320128202438354, |
| "learning_rate": 1.66097888686003e-05, |
| "loss": 0.7416959404945374, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.3825136612021858, |
| "grad_norm": 0.7894490361213684, |
| "learning_rate": 1.6349550043458252e-05, |
| "loss": 0.5259535312652588, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.387978142076503, |
| "grad_norm": 0.5915789604187012, |
| "learning_rate": 1.608936712576749e-05, |
| "loss": 1.008756160736084, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.3934426229508197, |
| "grad_norm": 0.28783875703811646, |
| "learning_rate": 1.582933679992809e-05, |
| "loss": 0.7991337180137634, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.3989071038251364, |
| "grad_norm": 2.9894583225250244, |
| "learning_rate": 1.556955569363678e-05, |
| "loss": 0.49375149607658386, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.4043715846994536, |
| "grad_norm": 0.4313114285469055, |
| "learning_rate": 1.531012034197988e-05, |
| "loss": 1.1030757427215576, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.4098360655737707, |
| "grad_norm": 0.4268873631954193, |
| "learning_rate": 1.5051127151560745e-05, |
| "loss": 0.7422546148300171, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.4153005464480874, |
| "grad_norm": 0.4147166609764099, |
| "learning_rate": 1.4792672364674816e-05, |
| "loss": 0.7103762030601501, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.420765027322404, |
| "grad_norm": 0.7336945533752441, |
| "learning_rate": 1.4534852023545968e-05, |
| "loss": 0.8475983142852783, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.4262295081967213, |
| "grad_norm": 0.6543511748313904, |
| "learning_rate": 1.4277761934636963e-05, |
| "loss": 0.968014121055603, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.431693989071038, |
| "grad_norm": 0.49377578496932983, |
| "learning_rate": 1.4021497633047664e-05, |
| "loss": 0.6535788774490356, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.4371584699453552, |
| "grad_norm": 0.8991974592208862, |
| "learning_rate": 1.3766154347013933e-05, |
| "loss": 1.167250156402588, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.442622950819672, |
| "grad_norm": 0.5998942255973816, |
| "learning_rate": 1.3511826962520809e-05, |
| "loss": 0.7989946603775024, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.448087431693989, |
| "grad_norm": 0.25032103061676025, |
| "learning_rate": 1.3258609988042627e-05, |
| "loss": 0.4925026595592499, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.453551912568306, |
| "grad_norm": 0.3441256582736969, |
| "learning_rate": 1.300659751942353e-05, |
| "loss": 0.22913937270641327, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.459016393442623, |
| "grad_norm": 0.4925740659236908, |
| "learning_rate": 1.2755883204911305e-05, |
| "loss": 0.9754618406295776, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.4644808743169397, |
| "grad_norm": 0.3265496492385864, |
| "learning_rate": 1.2506560210357541e-05, |
| "loss": 0.47504350543022156, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.469945355191257, |
| "grad_norm": 0.30063438415527344, |
| "learning_rate": 1.225872118459706e-05, |
| "loss": 0.994674026966095, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.4754098360655736, |
| "grad_norm": 3.1096532344818115, |
| "learning_rate": 1.2012458225019375e-05, |
| "loss": 0.41736599802970886, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.480874316939891, |
| "grad_norm": 2.740457773208618, |
| "learning_rate": 1.176786284334528e-05, |
| "loss": 0.6344496011734009, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.4863387978142075, |
| "grad_norm": 0.3145305812358856, |
| "learning_rate": 1.1525025931620855e-05, |
| "loss": 0.9095264077186584, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.4918032786885247, |
| "grad_norm": 1.253567099571228, |
| "learning_rate": 1.1284037728441877e-05, |
| "loss": 0.5218656659126282, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.4972677595628414, |
| "grad_norm": 0.4414713382720947, |
| "learning_rate": 1.1044987785420924e-05, |
| "loss": 0.9621478319168091, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.5027322404371586, |
| "grad_norm": 0.2708357572555542, |
| "learning_rate": 1.0807964933909975e-05, |
| "loss": 1.0340259075164795, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.5081967213114753, |
| "grad_norm": 0.5265946388244629, |
| "learning_rate": 1.0573057251990443e-05, |
| "loss": 0.944985568523407, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.5136612021857925, |
| "grad_norm": 0.2796134948730469, |
| "learning_rate": 1.0340352031743256e-05, |
| "loss": 0.7973819375038147, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.519125683060109, |
| "grad_norm": 0.297129362821579, |
| "learning_rate": 1.010993574681095e-05, |
| "loss": 0.8349511027336121, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.5245901639344264, |
| "grad_norm": 0.8182933330535889, |
| "learning_rate": 9.881894020263938e-06, |
| "loss": 0.3968673050403595, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.530054644808743, |
| "grad_norm": 0.2900335192680359, |
| "learning_rate": 9.656311592782831e-06, |
| "loss": 0.9522480368614197, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.5355191256830603, |
| "grad_norm": 0.7127178311347961, |
| "learning_rate": 9.433272291168689e-06, |
| "loss": 0.9828237295150757, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.540983606557377, |
| "grad_norm": 0.6727446913719177, |
| "learning_rate": 9.212858997192744e-06, |
| "loss": 0.7021880149841309, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.546448087431694, |
| "grad_norm": 0.575061559677124, |
| "learning_rate": 8.995153616797544e-06, |
| "loss": 0.53633713722229, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.551912568306011, |
| "grad_norm": 0.6114161014556885, |
| "learning_rate": 8.78023704966047e-06, |
| "loss": 0.8347498178482056, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.557377049180328, |
| "grad_norm": 0.3482673168182373, |
| "learning_rate": 8.568189159131336e-06, |
| "loss": 0.8326533436775208, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.5628415300546448, |
| "grad_norm": 0.4061831533908844, |
| "learning_rate": 8.359088742554941e-06, |
| "loss": 0.9324700236320496, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.5683060109289615, |
| "grad_norm": 0.2786436676979065, |
| "learning_rate": 8.15301350198999e-06, |
| "loss": 0.8789687752723694, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.5737704918032787, |
| "grad_norm": 0.29912883043289185, |
| "learning_rate": 7.950040015334789e-06, |
| "loss": 0.8741180896759033, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.579234972677596, |
| "grad_norm": 0.23834018409252167, |
| "learning_rate": 7.750243707870748e-06, |
| "loss": 1.01088285446167, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.5846994535519126, |
| "grad_norm": 0.7520560026168823, |
| "learning_rate": 7.553698824234314e-06, |
| "loss": 0.9131101965904236, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.5901639344262293, |
| "grad_norm": 0.3973008096218109, |
| "learning_rate": 7.360478400827475e-06, |
| "loss": 0.6671414971351624, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.5956284153005464, |
| "grad_norm": 1.165856957435608, |
| "learning_rate": 7.170654238677331e-06, |
| "loss": 0.8358378410339355, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.6010928961748636, |
| "grad_norm": 0.6425595879554749, |
| "learning_rate": 6.984296876754711e-06, |
| "loss": 0.876006007194519, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.6065573770491803, |
| "grad_norm": 0.2834312319755554, |
| "learning_rate": 6.801475565761783e-06, |
| "loss": 0.8852624297142029, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.612021857923497, |
| "grad_norm": 0.26827678084373474, |
| "learning_rate": 6.622258242398371e-06, |
| "loss": 0.49164018034935, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.6174863387978142, |
| "grad_norm": 0.3386532962322235, |
| "learning_rate": 6.4467115041165855e-06, |
| "loss": 0.7272903919219971, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.6229508196721314, |
| "grad_norm": 0.2805633544921875, |
| "learning_rate": 6.2749005843730336e-06, |
| "loss": 0.7272099852561951, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.628415300546448, |
| "grad_norm": 0.27282318472862244, |
| "learning_rate": 6.106889328388064e-06, |
| "loss": 1.0292612314224243, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.633879781420765, |
| "grad_norm": 0.302123486995697, |
| "learning_rate": 5.942740169420701e-06, |
| "loss": 0.9444547891616821, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.639344262295082, |
| "grad_norm": 0.37525907158851624, |
| "learning_rate": 5.7825141055683895e-06, |
| "loss": 0.7231637239456177, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.644808743169399, |
| "grad_norm": 0.26892736554145813, |
| "learning_rate": 5.62627067709992e-06, |
| "loss": 0.9579343795776367, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.650273224043716, |
| "grad_norm": 0.3834379017353058, |
| "learning_rate": 5.474067944330285e-06, |
| "loss": 1.0427404642105103, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.6557377049180326, |
| "grad_norm": 0.576501727104187, |
| "learning_rate": 5.325962466045282e-06, |
| "loss": 1.0664690732955933, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.66120218579235, |
| "grad_norm": 0.32818320393562317, |
| "learning_rate": 5.18200927848421e-06, |
| "loss": 0.9675801992416382, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.4426153004169464, |
| "learning_rate": 5.042261874888308e-06, |
| "loss": 0.8803547024726868, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.6721311475409837, |
| "grad_norm": 0.6687554717063904, |
| "learning_rate": 4.906772185622572e-06, |
| "loss": 0.4762001633644104, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.6775956284153004, |
| "grad_norm": 0.25917065143585205, |
| "learning_rate": 4.775590558878368e-06, |
| "loss": 0.8584491014480591, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.6830601092896176, |
| "grad_norm": 0.8609603643417358, |
| "learning_rate": 4.648765741963903e-06, |
| "loss": 1.0697994232177734, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.6885245901639343, |
| "grad_norm": 0.26177000999450684, |
| "learning_rate": 4.526344863189724e-06, |
| "loss": 1.0349247455596924, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.6939890710382515, |
| "grad_norm": 0.6148212552070618, |
| "learning_rate": 4.408373414355714e-06, |
| "loss": 1.1050891876220703, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.699453551912568, |
| "grad_norm": 0.3300875425338745, |
| "learning_rate": 4.29489523384628e-06, |
| "loss": 0.9843365550041199, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.7049180327868854, |
| "grad_norm": 0.3000809848308563, |
| "learning_rate": 4.185952490339899e-06, |
| "loss": 0.8345898985862732, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.710382513661202, |
| "grad_norm": 0.45524969696998596, |
| "learning_rate": 4.081585667139231e-06, |
| "loss": 0.8222418427467346, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.7158469945355193, |
| "grad_norm": 0.5290429592132568, |
| "learning_rate": 3.981833547127413e-06, |
| "loss": 0.8905819058418274, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.721311475409836, |
| "grad_norm": 0.9160144925117493, |
| "learning_rate": 3.886733198356298e-06, |
| "loss": 0.8706901669502258, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.726775956284153, |
| "grad_norm": 0.24170571565628052, |
| "learning_rate": 3.7963199602718717e-06, |
| "loss": 0.829027533531189, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.73224043715847, |
| "grad_norm": 0.28936469554901123, |
| "learning_rate": 3.7106274305821034e-06, |
| "loss": 0.9818518161773682, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.737704918032787, |
| "grad_norm": 1.1057279109954834, |
| "learning_rate": 3.6296874527719515e-06, |
| "loss": 0.8876405954360962, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.7431693989071038, |
| "grad_norm": 0.22955042123794556, |
| "learning_rate": 3.553530104270281e-06, |
| "loss": 0.5577114820480347, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.748633879781421, |
| "grad_norm": 0.7059992551803589, |
| "learning_rate": 3.4821836852730384e-06, |
| "loss": 0.8629549741744995, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.7540983606557377, |
| "grad_norm": 0.41986677050590515, |
| "learning_rate": 3.41567470822686e-06, |
| "loss": 1.2205630540847778, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.7595628415300544, |
| "grad_norm": 0.3675086796283722, |
| "learning_rate": 3.354027887976989e-06, |
| "loss": 0.6176258325576782, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.7650273224043715, |
| "grad_norm": 0.3218194842338562, |
| "learning_rate": 3.297266132583221e-06, |
| "loss": 0.9429301023483276, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.7704918032786887, |
| "grad_norm": 6.591115951538086, |
| "learning_rate": 3.245410534807195e-06, |
| "loss": 0.8208173513412476, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.7759562841530054, |
| "grad_norm": 0.31964442133903503, |
| "learning_rate": 3.1984803642743314e-06, |
| "loss": 0.6169829368591309, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.781420765027322, |
| "grad_norm": 1.38068425655365, |
| "learning_rate": 3.1564930603131777e-06, |
| "loss": 0.4389287531375885, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.7868852459016393, |
| "grad_norm": 0.2846753001213074, |
| "learning_rate": 3.1194642254749395e-06, |
| "loss": 0.9181113243103027, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.7923497267759565, |
| "grad_norm": 0.31778064370155334, |
| "learning_rate": 3.0874076197355317e-06, |
| "loss": 0.9030160903930664, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.797814207650273, |
| "grad_norm": 0.442088782787323, |
| "learning_rate": 3.0603351553823717e-06, |
| "loss": 0.807093620300293, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.80327868852459, |
| "grad_norm": 0.3773954510688782, |
| "learning_rate": 3.038256892587734e-06, |
| "loss": 0.8687778115272522, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.808743169398907, |
| "grad_norm": 0.3855181336402893, |
| "learning_rate": 3.0211810356703803e-06, |
| "loss": 1.0619217157363892, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.8142076502732243, |
| "grad_norm": 0.2924480736255646, |
| "learning_rate": 3.0091139300468266e-06, |
| "loss": 0.7161194086074829, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.819672131147541, |
| "grad_norm": 0.2923072278499603, |
| "learning_rate": 3.0020600598733656e-06, |
| "loss": 0.725647509098053, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.8251366120218577, |
| "grad_norm": 0.36642518639564514, |
| "learning_rate": 3.000022046379753e-06, |
| "loss": 1.2020186185836792, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.830601092896175, |
| "grad_norm": 0.7068674564361572, |
| "learning_rate": 3.0030006468951557e-06, |
| "loss": 1.0905146598815918, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.836065573770492, |
| "grad_norm": 0.3369429409503937, |
| "learning_rate": 3.0109947545667246e-06, |
| "loss": 0.25182783603668213, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.841530054644809, |
| "grad_norm": 0.3530783951282501, |
| "learning_rate": 3.024001398770901e-06, |
| "loss": 1.0270302295684814, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.8469945355191255, |
| "grad_norm": 0.32281166315078735, |
| "learning_rate": 3.042015746217308e-06, |
| "loss": 0.9714375138282776, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.8524590163934427, |
| "grad_norm": 0.5103802680969238, |
| "learning_rate": 3.0650311027448116e-06, |
| "loss": 0.9001659154891968, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.8579234972677594, |
| "grad_norm": 0.39303264021873474, |
| "learning_rate": 3.0930389158090754e-06, |
| "loss": 1.1310542821884155, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.8633879781420766, |
| "grad_norm": 0.30728888511657715, |
| "learning_rate": 3.1260287776607025e-06, |
| "loss": 0.9291237592697144, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.8688524590163933, |
| "grad_norm": 0.9881218075752258, |
| "learning_rate": 3.163988429212773e-06, |
| "loss": 0.9141870141029358, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.8743169398907105, |
| "grad_norm": 0.21143440902233124, |
| "learning_rate": 3.206903764596349e-06, |
| "loss": 0.49286749958992004, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.879781420765027, |
| "grad_norm": 0.32137101888656616, |
| "learning_rate": 3.254758836402225e-06, |
| "loss": 1.0972161293029785, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.8852459016393444, |
| "grad_norm": 0.33947762846946716, |
| "learning_rate": 3.3075358616070144e-06, |
| "loss": 0.9067559242248535, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.890710382513661, |
| "grad_norm": 0.32341116666793823, |
| "learning_rate": 3.365215228181358e-06, |
| "loss": 0.7935602068901062, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.8961748633879782, |
| "grad_norm": 0.3355255424976349, |
| "learning_rate": 3.4277755023777795e-06, |
| "loss": 0.9974086880683899, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.901639344262295, |
| "grad_norm": 0.27966246008872986, |
| "learning_rate": 3.495193436695504e-06, |
| "loss": 0.6481755375862122, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.907103825136612, |
| "grad_norm": 1.3332988023757935, |
| "learning_rate": 3.567443978519267e-06, |
| "loss": 0.4348956346511841, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.912568306010929, |
| "grad_norm": 0.3482104539871216, |
| "learning_rate": 3.6445002794288992e-06, |
| "loss": 0.7822635173797607, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.918032786885246, |
| "grad_norm": 0.3401924669742584, |
| "learning_rate": 3.7263337051762718e-06, |
| "loss": 0.6737417578697205, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.9234972677595628, |
| "grad_norm": 0.2622358798980713, |
| "learning_rate": 3.8129138463257943e-06, |
| "loss": 0.818148136138916, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.92896174863388, |
| "grad_norm": 0.5667299628257751, |
| "learning_rate": 3.904208529554625e-06, |
| "loss": 0.9563145637512207, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.9344262295081966, |
| "grad_norm": 1.3791451454162598, |
| "learning_rate": 4.000183829608332e-06, |
| "loss": 1.3551911115646362, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.939890710382514, |
| "grad_norm": 0.43662652373313904, |
| "learning_rate": 4.100804081907595e-06, |
| "loss": 0.38690492510795593, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.9453551912568305, |
| "grad_norm": 0.31867608428001404, |
| "learning_rate": 4.206031895801176e-06, |
| "loss": 0.9536612033843994, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.9508196721311473, |
| "grad_norm": 0.5429739356040955, |
| "learning_rate": 4.315828168460367e-06, |
| "loss": 0.5728800296783447, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.9562841530054644, |
| "grad_norm": 0.21107900142669678, |
| "learning_rate": 4.430152099409704e-06, |
| "loss": 0.1516314148902893, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.9617486338797816, |
| "grad_norm": 0.22951926290988922, |
| "learning_rate": 4.548961205688424e-06, |
| "loss": 1.0313093662261963, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.9672131147540983, |
| "grad_norm": 0.28624290227890015, |
| "learning_rate": 4.672211337637246e-06, |
| "loss": 0.69117271900177, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.972677595628415, |
| "grad_norm": 0.5194671154022217, |
| "learning_rate": 4.7998566953044445e-06, |
| "loss": 0.8698742389678955, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.978142076502732, |
| "grad_norm": 0.413565993309021, |
| "learning_rate": 4.931849845465193e-06, |
| "loss": 1.0937373638153076, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.9836065573770494, |
| "grad_norm": 0.9167707562446594, |
| "learning_rate": 5.06814173924782e-06, |
| "loss": 1.0198386907577515, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.989071038251366, |
| "grad_norm": 0.9425981044769287, |
| "learning_rate": 5.208681730360458e-06, |
| "loss": 0.5489972233772278, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.994535519125683, |
| "grad_norm": 0.38925519585609436, |
| "learning_rate": 5.3534175939112694e-06, |
| "loss": 0.9535996913909912, |
| "step": 1096 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.33070504665374756, |
| "learning_rate": 5.50229554581536e-06, |
| "loss": 0.64783775806427, |
| "step": 1098 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1098, |
| "total_flos": 4.929269661905715e+18, |
| "train_loss": 1.00607624289008, |
| "train_runtime": 12927.7338, |
| "train_samples_per_second": 5.096, |
| "train_steps_per_second": 0.085 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1098, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 99999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.929269661905715e+18, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|