| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 460, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004347826086956522, |
| "grad_norm": 209.3180389404297, |
| "learning_rate": 0.0, |
| "loss": 5.8188, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008695652173913044, |
| "grad_norm": 215.69874572753906, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 5.9259, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013043478260869565, |
| "grad_norm": 62.712825775146484, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 5.4202, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.017391304347826087, |
| "grad_norm": 85.59194946289062, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 5.3079, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.021739130434782608, |
| "grad_norm": 22.901897430419922, |
| "learning_rate": 1.739130434782609e-05, |
| "loss": 5.0196, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02608695652173913, |
| "grad_norm": 22.081829071044922, |
| "learning_rate": 2.173913043478261e-05, |
| "loss": 4.8222, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.030434782608695653, |
| "grad_norm": 11.022245407104492, |
| "learning_rate": 2.608695652173913e-05, |
| "loss": 4.4617, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.034782608695652174, |
| "grad_norm": 7.274469375610352, |
| "learning_rate": 3.0434782608695656e-05, |
| "loss": 4.335, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0391304347826087, |
| "grad_norm": 3.8645834922790527, |
| "learning_rate": 3.478260869565218e-05, |
| "loss": 4.0476, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.043478260869565216, |
| "grad_norm": 2.6724016666412354, |
| "learning_rate": 3.91304347826087e-05, |
| "loss": 3.8387, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04782608695652174, |
| "grad_norm": 2.258195161819458, |
| "learning_rate": 4.347826086956522e-05, |
| "loss": 3.8144, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05217391304347826, |
| "grad_norm": 1.8822625875473022, |
| "learning_rate": 4.782608695652174e-05, |
| "loss": 3.4008, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05652173913043478, |
| "grad_norm": 2.047840118408203, |
| "learning_rate": 5.217391304347826e-05, |
| "loss": 3.2554, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06086956521739131, |
| "grad_norm": 1.8671568632125854, |
| "learning_rate": 5.652173913043478e-05, |
| "loss": 3.2461, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06521739130434782, |
| "grad_norm": 1.6069483757019043, |
| "learning_rate": 6.086956521739131e-05, |
| "loss": 2.9738, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 1.3096915483474731, |
| "learning_rate": 6.521739130434783e-05, |
| "loss": 2.7823, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07391304347826087, |
| "grad_norm": 1.3594956398010254, |
| "learning_rate": 6.956521739130436e-05, |
| "loss": 2.6255, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0782608695652174, |
| "grad_norm": 1.0210895538330078, |
| "learning_rate": 7.391304347826086e-05, |
| "loss": 2.4501, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08260869565217391, |
| "grad_norm": 0.8942164182662964, |
| "learning_rate": 7.82608695652174e-05, |
| "loss": 2.2934, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 0.8361735343933105, |
| "learning_rate": 8.260869565217392e-05, |
| "loss": 2.2029, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09130434782608696, |
| "grad_norm": 0.794482409954071, |
| "learning_rate": 8.695652173913044e-05, |
| "loss": 2.0223, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09565217391304348, |
| "grad_norm": 0.7513137459754944, |
| "learning_rate": 9.130434782608696e-05, |
| "loss": 1.8504, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.76312655210495, |
| "learning_rate": 9.565217391304348e-05, |
| "loss": 1.6577, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10434782608695652, |
| "grad_norm": 0.8560758829116821, |
| "learning_rate": 0.0001, |
| "loss": 1.5565, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10869565217391304, |
| "grad_norm": 0.7479954957962036, |
| "learning_rate": 0.00010434782608695653, |
| "loss": 1.4364, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11304347826086956, |
| "grad_norm": 0.5951140522956848, |
| "learning_rate": 0.00010869565217391305, |
| "loss": 1.2957, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.11739130434782609, |
| "grad_norm": 0.503224790096283, |
| "learning_rate": 0.00011304347826086956, |
| "loss": 1.1799, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12173913043478261, |
| "grad_norm": 0.47480374574661255, |
| "learning_rate": 0.0001173913043478261, |
| "loss": 1.1277, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.12608695652173912, |
| "grad_norm": 0.38552260398864746, |
| "learning_rate": 0.00012173913043478263, |
| "loss": 1.0744, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13043478260869565, |
| "grad_norm": 0.35596558451652527, |
| "learning_rate": 0.00012608695652173915, |
| "loss": 1.0023, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13478260869565217, |
| "grad_norm": 0.32971665263175964, |
| "learning_rate": 0.00013043478260869567, |
| "loss": 0.9691, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 0.37770169973373413, |
| "learning_rate": 0.0001347826086956522, |
| "loss": 0.9116, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14347826086956522, |
| "grad_norm": 0.22640736401081085, |
| "learning_rate": 0.0001391304347826087, |
| "loss": 0.8613, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.14782608695652175, |
| "grad_norm": 0.20925410091876984, |
| "learning_rate": 0.0001434782608695652, |
| "loss": 0.8836, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15217391304347827, |
| "grad_norm": 0.20542123913764954, |
| "learning_rate": 0.00014782608695652173, |
| "loss": 0.8502, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1565217391304348, |
| "grad_norm": 0.16715222597122192, |
| "learning_rate": 0.00015217391304347827, |
| "loss": 0.8292, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1608695652173913, |
| "grad_norm": 0.1648133248090744, |
| "learning_rate": 0.0001565217391304348, |
| "loss": 0.8189, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.16521739130434782, |
| "grad_norm": 0.13562779128551483, |
| "learning_rate": 0.00016086956521739132, |
| "loss": 0.8078, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.16956521739130434, |
| "grad_norm": 0.1290610432624817, |
| "learning_rate": 0.00016521739130434784, |
| "loss": 0.7712, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 0.11024343967437744, |
| "learning_rate": 0.00016956521739130436, |
| "loss": 0.7448, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1782608695652174, |
| "grad_norm": 0.12418993562459946, |
| "learning_rate": 0.00017391304347826088, |
| "loss": 0.7633, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1826086956521739, |
| "grad_norm": 0.10319849103689194, |
| "learning_rate": 0.0001782608695652174, |
| "loss": 0.7463, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.18695652173913044, |
| "grad_norm": 0.10371455550193787, |
| "learning_rate": 0.00018260869565217392, |
| "loss": 0.7516, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.19130434782608696, |
| "grad_norm": 0.09219090640544891, |
| "learning_rate": 0.00018695652173913045, |
| "loss": 0.7265, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1956521739130435, |
| "grad_norm": 0.09577666968107224, |
| "learning_rate": 0.00019130434782608697, |
| "loss": 0.7382, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.08755916357040405, |
| "learning_rate": 0.0001956521739130435, |
| "loss": 0.7392, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.20434782608695654, |
| "grad_norm": 0.08335893601179123, |
| "learning_rate": 0.0002, |
| "loss": 0.7182, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 0.08622466027736664, |
| "learning_rate": 0.00019999712083215463, |
| "loss": 0.7196, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.21304347826086956, |
| "grad_norm": 0.07222707569599152, |
| "learning_rate": 0.00019998848349441062, |
| "loss": 0.7014, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 0.07286012172698975, |
| "learning_rate": 0.00019997408848413493, |
| "loss": 0.6986, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2217391304347826, |
| "grad_norm": 0.07811558246612549, |
| "learning_rate": 0.00019995393663024054, |
| "loss": 0.6922, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.22608695652173913, |
| "grad_norm": 0.07095416635274887, |
| "learning_rate": 0.0001999280290931388, |
| "loss": 0.7188, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.23043478260869565, |
| "grad_norm": 0.0705651044845581, |
| "learning_rate": 0.00019989636736467278, |
| "loss": 0.7135, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.23478260869565218, |
| "grad_norm": 0.0649741142988205, |
| "learning_rate": 0.00019985895326803097, |
| "loss": 0.6833, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2391304347826087, |
| "grad_norm": 0.07023416459560394, |
| "learning_rate": 0.00019981578895764273, |
| "loss": 0.6902, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.24347826086956523, |
| "grad_norm": 0.065043605864048, |
| "learning_rate": 0.00019976687691905393, |
| "loss": 0.6933, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.24782608695652175, |
| "grad_norm": 0.0647321566939354, |
| "learning_rate": 0.00019971221996878394, |
| "loss": 0.6946, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.25217391304347825, |
| "grad_norm": 0.08214448392391205, |
| "learning_rate": 0.0001996518212541634, |
| "loss": 0.6789, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2565217391304348, |
| "grad_norm": 0.06106014922261238, |
| "learning_rate": 0.00019958568425315314, |
| "loss": 0.6826, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 0.06052952632308006, |
| "learning_rate": 0.0001995138127741436, |
| "loss": 0.6706, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26521739130434785, |
| "grad_norm": 0.06265316903591156, |
| "learning_rate": 0.00019943621095573586, |
| "loss": 0.6809, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.26956521739130435, |
| "grad_norm": 0.0603368878364563, |
| "learning_rate": 0.00019935288326650312, |
| "loss": 0.6728, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.27391304347826084, |
| "grad_norm": 0.06611189991235733, |
| "learning_rate": 0.00019926383450473344, |
| "loss": 0.6499, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 0.06278355419635773, |
| "learning_rate": 0.00019916906979815347, |
| "loss": 0.6561, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2826086956521739, |
| "grad_norm": 0.07379094511270523, |
| "learning_rate": 0.00019906859460363307, |
| "loss": 0.6786, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.28695652173913044, |
| "grad_norm": 0.09574166685342789, |
| "learning_rate": 0.0001989624147068713, |
| "loss": 0.6625, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.29130434782608694, |
| "grad_norm": 0.08743462711572647, |
| "learning_rate": 0.00019885053622206304, |
| "loss": 0.648, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2956521739130435, |
| "grad_norm": 0.08914034813642502, |
| "learning_rate": 0.00019873296559154698, |
| "loss": 0.6561, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.06804706901311874, |
| "learning_rate": 0.0001986097095854347, |
| "loss": 0.658, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.30434782608695654, |
| "grad_norm": 0.09893489629030228, |
| "learning_rate": 0.00019848077530122083, |
| "loss": 0.6708, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.30869565217391304, |
| "grad_norm": 0.07928409427404404, |
| "learning_rate": 0.0001983461701633742, |
| "loss": 0.6407, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3130434782608696, |
| "grad_norm": 0.07455449551343918, |
| "learning_rate": 0.0001982059019229106, |
| "loss": 0.676, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3173913043478261, |
| "grad_norm": 0.0770968496799469, |
| "learning_rate": 0.00019805997865694614, |
| "loss": 0.6639, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3217391304347826, |
| "grad_norm": 0.06771919876337051, |
| "learning_rate": 0.00019790840876823232, |
| "loss": 0.6486, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.32608695652173914, |
| "grad_norm": 0.07457810640335083, |
| "learning_rate": 0.0001977512009846721, |
| "loss": 0.6681, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.33043478260869563, |
| "grad_norm": 0.0826922208070755, |
| "learning_rate": 0.00019758836435881746, |
| "loss": 0.6356, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3347826086956522, |
| "grad_norm": 0.07923886179924011, |
| "learning_rate": 0.00019741990826734794, |
| "loss": 0.6682, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3391304347826087, |
| "grad_norm": 0.11045071482658386, |
| "learning_rate": 0.0001972458424105307, |
| "loss": 0.6203, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.34347826086956523, |
| "grad_norm": 0.11731227487325668, |
| "learning_rate": 0.00019706617681166218, |
| "loss": 0.66, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.12649305164813995, |
| "learning_rate": 0.00019688092181649065, |
| "loss": 0.6613, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3521739130434783, |
| "grad_norm": 0.1144268661737442, |
| "learning_rate": 0.00019669008809262062, |
| "loss": 0.6606, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.3565217391304348, |
| "grad_norm": 0.11361440271139145, |
| "learning_rate": 0.00019649368662889855, |
| "loss": 0.629, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.36086956521739133, |
| "grad_norm": 0.12539249658584595, |
| "learning_rate": 0.00019629172873477995, |
| "loss": 0.6676, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3652173913043478, |
| "grad_norm": 0.11141279339790344, |
| "learning_rate": 0.00019608422603967836, |
| "loss": 0.6376, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3695652173913043, |
| "grad_norm": 0.09837634861469269, |
| "learning_rate": 0.00019587119049229557, |
| "loss": 0.6503, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3739130434782609, |
| "grad_norm": 0.15677575767040253, |
| "learning_rate": 0.0001956526343599335, |
| "loss": 0.6638, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3782608695652174, |
| "grad_norm": 0.252825528383255, |
| "learning_rate": 0.0001954285702277879, |
| "loss": 0.6713, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3826086956521739, |
| "grad_norm": 0.3602813482284546, |
| "learning_rate": 0.00019519901099822372, |
| "loss": 0.6596, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3869565217391304, |
| "grad_norm": 0.3970949053764343, |
| "learning_rate": 0.00019496396989003193, |
| "loss": 0.6617, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.391304347826087, |
| "grad_norm": 0.284343421459198, |
| "learning_rate": 0.00019472346043766865, |
| "loss": 0.6229, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.39565217391304347, |
| "grad_norm": 0.19832171499729156, |
| "learning_rate": 0.00019447749649047542, |
| "loss": 0.6665, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.24541743099689484, |
| "learning_rate": 0.00019422609221188207, |
| "loss": 0.6585, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4043478260869565, |
| "grad_norm": 0.1915537267923355, |
| "learning_rate": 0.00019396926207859084, |
| "loss": 0.6343, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.40869565217391307, |
| "grad_norm": 0.20492875576019287, |
| "learning_rate": 0.00019370702087974302, |
| "loss": 0.6438, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.41304347826086957, |
| "grad_norm": 0.25835996866226196, |
| "learning_rate": 0.00019343938371606712, |
| "loss": 0.6502, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 0.2585464417934418, |
| "learning_rate": 0.00019316636599900946, |
| "loss": 0.6393, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4217391304347826, |
| "grad_norm": 0.2317182868719101, |
| "learning_rate": 0.00019288798344984672, |
| "loss": 0.6275, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4260869565217391, |
| "grad_norm": 0.23632416129112244, |
| "learning_rate": 0.00019260425209878052, |
| "loss": 0.6414, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.43043478260869567, |
| "grad_norm": 0.1801244169473648, |
| "learning_rate": 0.00019231518828401458, |
| "loss": 0.6491, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.24871514737606049, |
| "learning_rate": 0.00019202080865081368, |
| "loss": 0.6581, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4391304347826087, |
| "grad_norm": 0.26276353001594543, |
| "learning_rate": 0.00019172113015054532, |
| "loss": 0.644, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.4434782608695652, |
| "grad_norm": 0.19743724167346954, |
| "learning_rate": 0.0001914161700397035, |
| "loss": 0.6519, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.44782608695652176, |
| "grad_norm": 0.31385916471481323, |
| "learning_rate": 0.00019110594587891519, |
| "loss": 0.6462, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.45217391304347826, |
| "grad_norm": 0.2689647674560547, |
| "learning_rate": 0.0001907904755319289, |
| "loss": 0.6517, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.45652173913043476, |
| "grad_norm": 0.17245543003082275, |
| "learning_rate": 0.00019046977716458626, |
| "loss": 0.6245, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4608695652173913, |
| "grad_norm": 0.4380849003791809, |
| "learning_rate": 0.00019014386924377582, |
| "loss": 0.6519, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4652173913043478, |
| "grad_norm": 0.305043488740921, |
| "learning_rate": 0.0001898127705363696, |
| "loss": 0.6606, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.46956521739130436, |
| "grad_norm": 0.20340269804000854, |
| "learning_rate": 0.0001894765001081428, |
| "loss": 0.6359, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.47391304347826085, |
| "grad_norm": 0.15703125298023224, |
| "learning_rate": 0.0001891350773226754, |
| "loss": 0.6461, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.4782608695652174, |
| "grad_norm": 0.16932646930217743, |
| "learning_rate": 0.0001887885218402375, |
| "loss": 0.6413, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4826086956521739, |
| "grad_norm": 0.1790553480386734, |
| "learning_rate": 0.00018843685361665723, |
| "loss": 0.6378, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 0.24903282523155212, |
| "learning_rate": 0.00018808009290217136, |
| "loss": 0.6308, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.49130434782608695, |
| "grad_norm": 0.20529182255268097, |
| "learning_rate": 0.00018771826024025946, |
| "loss": 0.6315, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4956521739130435, |
| "grad_norm": 0.18206629157066345, |
| "learning_rate": 0.00018735137646646078, |
| "loss": 0.6409, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.22906547784805298, |
| "learning_rate": 0.00018697946270717467, |
| "loss": 0.6522, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5043478260869565, |
| "grad_norm": 0.23560722172260284, |
| "learning_rate": 0.00018660254037844388, |
| "loss": 0.6424, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.508695652173913, |
| "grad_norm": 0.3479248881340027, |
| "learning_rate": 0.00018622063118472134, |
| "loss": 0.6591, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5130434782608696, |
| "grad_norm": 0.48405924439430237, |
| "learning_rate": 0.00018583375711762052, |
| "loss": 0.6312, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5173913043478261, |
| "grad_norm": 0.6660999655723572, |
| "learning_rate": 0.00018544194045464886, |
| "loss": 0.6492, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.6070662140846252, |
| "learning_rate": 0.0001850452037579251, |
| "loss": 0.631, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5260869565217391, |
| "grad_norm": 0.2432556301355362, |
| "learning_rate": 0.00018464356987288013, |
| "loss": 0.6192, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5304347826086957, |
| "grad_norm": 0.4718700647354126, |
| "learning_rate": 0.00018423706192694116, |
| "loss": 0.6385, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5347826086956522, |
| "grad_norm": 0.41220200061798096, |
| "learning_rate": 0.00018382570332820043, |
| "loss": 0.6362, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5391304347826087, |
| "grad_norm": 0.24313992261886597, |
| "learning_rate": 0.00018340951776406694, |
| "loss": 0.659, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5434782608695652, |
| "grad_norm": 0.42307668924331665, |
| "learning_rate": 0.00018298852919990252, |
| "loss": 0.6484, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5478260869565217, |
| "grad_norm": 0.2858572006225586, |
| "learning_rate": 0.00018256276187764197, |
| "loss": 0.6437, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5521739130434783, |
| "grad_norm": 0.2318851351737976, |
| "learning_rate": 0.0001821322403143969, |
| "loss": 0.6191, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 0.3861188292503357, |
| "learning_rate": 0.0001816969893010442, |
| "loss": 0.639, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5608695652173913, |
| "grad_norm": 0.2969801127910614, |
| "learning_rate": 0.0001812570339007983, |
| "loss": 0.6624, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5652173913043478, |
| "grad_norm": 0.29341548681259155, |
| "learning_rate": 0.00018081239944776805, |
| "loss": 0.639, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5695652173913044, |
| "grad_norm": 0.43678849935531616, |
| "learning_rate": 0.00018036311154549784, |
| "loss": 0.6384, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5739130434782609, |
| "grad_norm": 0.5248069167137146, |
| "learning_rate": 0.00017990919606549328, |
| "loss": 0.6451, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5782608695652174, |
| "grad_norm": 0.5387030243873596, |
| "learning_rate": 0.00017945067914573146, |
| "loss": 0.6198, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5826086956521739, |
| "grad_norm": 0.55666184425354, |
| "learning_rate": 0.00017898758718915586, |
| "loss": 0.6391, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5869565217391305, |
| "grad_norm": 0.4839560389518738, |
| "learning_rate": 0.0001785199468621559, |
| "loss": 0.6411, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.591304347826087, |
| "grad_norm": 0.5173195004463196, |
| "learning_rate": 0.00017804778509303138, |
| "loss": 0.6318, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5956521739130435, |
| "grad_norm": 0.341448038816452, |
| "learning_rate": 0.000177571129070442, |
| "loss": 0.6427, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.2654604911804199, |
| "learning_rate": 0.00017709000624184162, |
| "loss": 0.616, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6043478260869565, |
| "grad_norm": 0.4000408351421356, |
| "learning_rate": 0.0001766044443118978, |
| "loss": 0.611, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 0.2812383770942688, |
| "learning_rate": 0.00017611447124089649, |
| "loss": 0.6508, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6130434782608696, |
| "grad_norm": 0.30483949184417725, |
| "learning_rate": 0.00017562011524313185, |
| "loss": 0.6628, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6173913043478261, |
| "grad_norm": 0.4457907974720001, |
| "learning_rate": 0.0001751214047852818, |
| "loss": 0.6274, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6217391304347826, |
| "grad_norm": 0.38395488262176514, |
| "learning_rate": 0.00017461836858476856, |
| "loss": 0.6528, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 0.573344886302948, |
| "learning_rate": 0.00017411103560810526, |
| "loss": 0.6504, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6304347826086957, |
| "grad_norm": 0.5133661031723022, |
| "learning_rate": 0.00017359943506922774, |
| "loss": 0.6334, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6347826086956522, |
| "grad_norm": 0.2995568513870239, |
| "learning_rate": 0.00017308359642781242, |
| "loss": 0.6328, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6391304347826087, |
| "grad_norm": 0.5677820444107056, |
| "learning_rate": 0.0001725635493875799, |
| "loss": 0.639, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6434782608695652, |
| "grad_norm": 0.4751092791557312, |
| "learning_rate": 0.00017203932389458454, |
| "loss": 0.6229, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6478260869565218, |
| "grad_norm": 0.4374710023403168, |
| "learning_rate": 0.00017151095013548994, |
| "loss": 0.6377, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 0.4172927439212799, |
| "learning_rate": 0.0001709784585358309, |
| "loss": 0.6277, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6565217391304348, |
| "grad_norm": 0.3994798958301544, |
| "learning_rate": 0.00017044187975826124, |
| "loss": 0.637, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6608695652173913, |
| "grad_norm": 0.34366917610168457, |
| "learning_rate": 0.00016990124470078822, |
| "loss": 0.6556, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6652173913043479, |
| "grad_norm": 0.533347487449646, |
| "learning_rate": 0.0001693565844949933, |
| "loss": 0.6073, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6695652173913044, |
| "grad_norm": 0.4292946457862854, |
| "learning_rate": 0.0001688079305042395, |
| "loss": 0.6548, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6739130434782609, |
| "grad_norm": 0.2770076394081116, |
| "learning_rate": 0.00016825531432186543, |
| "loss": 0.6014, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6782608695652174, |
| "grad_norm": 0.377838134765625, |
| "learning_rate": 0.0001676987677693659, |
| "loss": 0.6406, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6826086956521739, |
| "grad_norm": 0.421268492937088, |
| "learning_rate": 0.0001671383228945597, |
| "loss": 0.6288, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.6869565217391305, |
| "grad_norm": 0.4219221770763397, |
| "learning_rate": 0.00016657401196974405, |
| "loss": 0.647, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.691304347826087, |
| "grad_norm": 0.3563760221004486, |
| "learning_rate": 0.00016600586748983641, |
| "loss": 0.6307, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.39387866854667664, |
| "learning_rate": 0.00016543392217050314, |
| "loss": 0.631, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.36268243193626404, |
| "learning_rate": 0.0001648582089462756, |
| "loss": 0.6429, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7043478260869566, |
| "grad_norm": 0.3702019155025482, |
| "learning_rate": 0.00016427876096865394, |
| "loss": 0.6338, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7086956521739131, |
| "grad_norm": 0.44408297538757324, |
| "learning_rate": 0.00016369561160419784, |
| "loss": 0.6416, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7130434782608696, |
| "grad_norm": 0.5986080765724182, |
| "learning_rate": 0.00016310879443260528, |
| "loss": 0.6187, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.717391304347826, |
| "grad_norm": 0.7963016629219055, |
| "learning_rate": 0.0001625183432447789, |
| "loss": 0.6365, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7217391304347827, |
| "grad_norm": 1.2156025171279907, |
| "learning_rate": 0.0001619242920408802, |
| "loss": 0.6625, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7260869565217392, |
| "grad_norm": 0.7924716472625732, |
| "learning_rate": 0.00016132667502837165, |
| "loss": 0.6276, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7304347826086957, |
| "grad_norm": 0.29551273584365845, |
| "learning_rate": 0.00016072552662004696, |
| "loss": 0.6159, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7347826086956522, |
| "grad_norm": 0.7566269040107727, |
| "learning_rate": 0.00016012088143204953, |
| "loss": 0.6485, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7391304347826086, |
| "grad_norm": 1.001354455947876, |
| "learning_rate": 0.00015951277428187898, |
| "loss": 0.6323, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7434782608695653, |
| "grad_norm": 0.9103027582168579, |
| "learning_rate": 0.00015890124018638638, |
| "loss": 0.6255, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7478260869565218, |
| "grad_norm": 0.3885137736797333, |
| "learning_rate": 0.00015828631435975784, |
| "loss": 0.6323, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7521739130434782, |
| "grad_norm": 0.6141281723976135, |
| "learning_rate": 0.00015766803221148673, |
| "loss": 0.6504, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7565217391304347, |
| "grad_norm": 0.8024821281433105, |
| "learning_rate": 0.0001570464293443346, |
| "loss": 0.641, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7608695652173914, |
| "grad_norm": 0.43333736062049866, |
| "learning_rate": 0.00015642154155228122, |
| "loss": 0.627, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7652173913043478, |
| "grad_norm": 0.649389922618866, |
| "learning_rate": 0.00015579340481846336, |
| "loss": 0.6483, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7695652173913043, |
| "grad_norm": 1.0359424352645874, |
| "learning_rate": 0.00015516205531310273, |
| "loss": 0.6332, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7739130434782608, |
| "grad_norm": 0.7209396362304688, |
| "learning_rate": 0.00015452752939142328, |
| "loss": 0.6524, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7782608695652173, |
| "grad_norm": 0.6178513169288635, |
| "learning_rate": 0.00015388986359155758, |
| "loss": 0.645, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.9886595606803894, |
| "learning_rate": 0.00015324909463244296, |
| "loss": 0.6642, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7869565217391304, |
| "grad_norm": 0.7466373443603516, |
| "learning_rate": 0.00015260525941170712, |
| "loss": 0.6315, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7913043478260869, |
| "grad_norm": 0.5552679896354675, |
| "learning_rate": 0.00015195839500354335, |
| "loss": 0.6207, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.7956521739130434, |
| "grad_norm": 0.5576688647270203, |
| "learning_rate": 0.0001513085386565758, |
| "loss": 0.6421, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.4000707268714905, |
| "learning_rate": 0.00015065572779171432, |
| "loss": 0.6398, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8043478260869565, |
| "grad_norm": 0.4978863298892975, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.6456, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.808695652173913, |
| "grad_norm": 0.4530424177646637, |
| "learning_rate": 0.00014934139304044033, |
| "loss": 0.6453, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8130434782608695, |
| "grad_norm": 0.29163071513175964, |
| "learning_rate": 0.00014867994483783485, |
| "loss": 0.6558, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8173913043478261, |
| "grad_norm": 0.33445900678634644, |
| "learning_rate": 0.00014801569348059157, |
| "loss": 0.6291, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8217391304347826, |
| "grad_norm": 0.3891032934188843, |
| "learning_rate": 0.0001473486772185334, |
| "loss": 0.6458, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8260869565217391, |
| "grad_norm": 0.4320944845676422, |
| "learning_rate": 0.00014667893446069588, |
| "loss": 0.6275, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8304347826086956, |
| "grad_norm": 0.3652418553829193, |
| "learning_rate": 0.00014600650377311522, |
| "loss": 0.6434, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 0.2939096689224243, |
| "learning_rate": 0.00014533142387660773, |
| "loss": 0.6462, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8391304347826087, |
| "grad_norm": 0.36094796657562256, |
| "learning_rate": 0.00014465373364454001, |
| "loss": 0.6259, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8434782608695652, |
| "grad_norm": 0.503746747970581, |
| "learning_rate": 0.00014397347210059057, |
| "loss": 0.6565, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8478260869565217, |
| "grad_norm": 0.501377522945404, |
| "learning_rate": 0.00014329067841650274, |
| "loss": 0.6358, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8521739130434782, |
| "grad_norm": 0.40720251202583313, |
| "learning_rate": 0.00014260539190982886, |
| "loss": 0.636, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8565217391304348, |
| "grad_norm": 0.3170947730541229, |
| "learning_rate": 0.00014191765204166643, |
| "loss": 0.6343, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8608695652173913, |
| "grad_norm": 0.43554455041885376, |
| "learning_rate": 0.00014122749841438575, |
| "loss": 0.6319, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8652173913043478, |
| "grad_norm": 0.5128415822982788, |
| "learning_rate": 0.00014053497076934948, |
| "loss": 0.6326, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.44992515444755554, |
| "learning_rate": 0.00013984010898462416, |
| "loss": 0.6343, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8739130434782608, |
| "grad_norm": 0.506968080997467, |
| "learning_rate": 0.00013914295307268396, |
| "loss": 0.6472, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8782608695652174, |
| "grad_norm": 0.6257392764091492, |
| "learning_rate": 0.0001384435431781065, |
| "loss": 0.6535, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8826086956521739, |
| "grad_norm": 0.9480230808258057, |
| "learning_rate": 0.00013774191957526143, |
| "loss": 0.6628, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8869565217391304, |
| "grad_norm": 1.2171893119812012, |
| "learning_rate": 0.00013703812266599113, |
| "loss": 0.6585, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8913043478260869, |
| "grad_norm": 0.3134421110153198, |
| "learning_rate": 0.00013633219297728416, |
| "loss": 0.6629, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8956521739130435, |
| "grad_norm": 1.003349781036377, |
| "learning_rate": 0.00013562417115894172, |
| "loss": 0.6516, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.246419906616211, |
| "learning_rate": 0.00013491409798123687, |
| "loss": 0.6418, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9043478260869565, |
| "grad_norm": 0.46948862075805664, |
| "learning_rate": 0.00013420201433256689, |
| "loss": 0.6441, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.908695652173913, |
| "grad_norm": 1.628340244293213, |
| "learning_rate": 0.00013348796121709862, |
| "loss": 0.6661, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9130434782608695, |
| "grad_norm": 0.4027623236179352, |
| "learning_rate": 0.0001327719797524075, |
| "loss": 0.6342, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9173913043478261, |
| "grad_norm": 1.3196384906768799, |
| "learning_rate": 0.00013205411116710972, |
| "loss": 0.6724, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9217391304347826, |
| "grad_norm": 0.561631977558136, |
| "learning_rate": 0.00013133439679848823, |
| "loss": 0.6541, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9260869565217391, |
| "grad_norm": 0.7715569734573364, |
| "learning_rate": 0.00013061287809011242, |
| "loss": 0.6419, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9304347826086956, |
| "grad_norm": 0.8591257333755493, |
| "learning_rate": 0.0001298895965894516, |
| "loss": 0.6197, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9347826086956522, |
| "grad_norm": 0.4229847192764282, |
| "learning_rate": 0.0001291645939454825, |
| "loss": 0.6472, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9391304347826087, |
| "grad_norm": 0.7943733930587769, |
| "learning_rate": 0.0001284379119062912, |
| "loss": 0.6576, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9434782608695652, |
| "grad_norm": 0.7454273104667664, |
| "learning_rate": 0.0001277095923166689, |
| "loss": 0.6245, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9478260869565217, |
| "grad_norm": 0.4976602792739868, |
| "learning_rate": 0.00012697967711570242, |
| "loss": 0.644, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9521739130434783, |
| "grad_norm": 0.6845293641090393, |
| "learning_rate": 0.00012624820833435937, |
| "loss": 0.6412, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 0.7265484929084778, |
| "learning_rate": 0.0001255152280930676, |
| "loss": 0.6438, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9608695652173913, |
| "grad_norm": 0.4346272647380829, |
| "learning_rate": 0.00012478077859929, |
| "loss": 0.6116, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.9652173913043478, |
| "grad_norm": 0.5768253803253174, |
| "learning_rate": 0.00012404490214509386, |
| "loss": 0.6242, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9695652173913043, |
| "grad_norm": 0.688556969165802, |
| "learning_rate": 0.00012330764110471566, |
| "loss": 0.6546, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 0.6147114634513855, |
| "learning_rate": 0.00012256903793212107, |
| "loss": 0.6286, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.9782608695652174, |
| "grad_norm": 0.6598117351531982, |
| "learning_rate": 0.00012182913515856015, |
| "loss": 0.65, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9826086956521739, |
| "grad_norm": 0.6232290863990784, |
| "learning_rate": 0.00012108797539011847, |
| "loss": 0.6465, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.9869565217391304, |
| "grad_norm": 0.3764599561691284, |
| "learning_rate": 0.0001203456013052634, |
| "loss": 0.6397, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.991304347826087, |
| "grad_norm": 0.4177006781101227, |
| "learning_rate": 0.00011960205565238684, |
| "loss": 0.6324, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.9956521739130435, |
| "grad_norm": 0.6632861495018005, |
| "learning_rate": 0.00011885738124734358, |
| "loss": 0.6394, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8406037092208862, |
| "learning_rate": 0.00011811162097098558, |
| "loss": 0.6563, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0043478260869565, |
| "grad_norm": 0.5756528973579407, |
| "learning_rate": 0.00011736481776669306, |
| "loss": 0.6087, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.008695652173913, |
| "grad_norm": 0.4710526466369629, |
| "learning_rate": 0.00011661701463790142, |
| "loss": 0.632, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.0130434782608695, |
| "grad_norm": 0.6560250520706177, |
| "learning_rate": 0.00011586825464562514, |
| "loss": 0.6305, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.017391304347826, |
| "grad_norm": 0.7808629870414734, |
| "learning_rate": 0.0001151185809059781, |
| "loss": 0.6374, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.0217391304347827, |
| "grad_norm": 0.7576888799667358, |
| "learning_rate": 0.00011436803658769082, |
| "loss": 0.6436, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.0260869565217392, |
| "grad_norm": 0.5998823046684265, |
| "learning_rate": 0.00011361666490962468, |
| "loss": 0.6245, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.0304347826086957, |
| "grad_norm": 0.4165184795856476, |
| "learning_rate": 0.00011286450913828312, |
| "loss": 0.6307, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.0347826086956522, |
| "grad_norm": 0.7513805031776428, |
| "learning_rate": 0.00011211161258532041, |
| "loss": 0.6327, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.0391304347826087, |
| "grad_norm": 0.6765947341918945, |
| "learning_rate": 0.00011135801860504749, |
| "loss": 0.6412, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.0434782608695652, |
| "grad_norm": 0.6011433005332947, |
| "learning_rate": 0.00011060377059193547, |
| "loss": 0.6521, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0478260869565217, |
| "grad_norm": 0.4051482677459717, |
| "learning_rate": 0.00010984891197811687, |
| "loss": 0.6419, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.0521739130434782, |
| "grad_norm": 0.7668951153755188, |
| "learning_rate": 0.0001090934862308847, |
| "loss": 0.6264, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.0565217391304347, |
| "grad_norm": 0.6456301212310791, |
| "learning_rate": 0.00010833753685018935, |
| "loss": 0.6509, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.0608695652173914, |
| "grad_norm": 0.3792930841445923, |
| "learning_rate": 0.00010758110736613385, |
| "loss": 0.6132, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.065217391304348, |
| "grad_norm": 0.47149112820625305, |
| "learning_rate": 0.0001068242413364671, |
| "loss": 0.6346, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.0695652173913044, |
| "grad_norm": 0.5918865203857422, |
| "learning_rate": 0.00010606698234407586, |
| "loss": 0.6365, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.0739130434782609, |
| "grad_norm": 0.4131185710430145, |
| "learning_rate": 0.00010530937399447496, |
| "loss": 0.6262, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.0782608695652174, |
| "grad_norm": 0.6192177534103394, |
| "learning_rate": 0.00010455145991329638, |
| "loss": 0.6267, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.0826086956521739, |
| "grad_norm": 0.7231637835502625, |
| "learning_rate": 0.00010379328374377715, |
| "loss": 0.6524, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.0869565217391304, |
| "grad_norm": 0.5949220061302185, |
| "learning_rate": 0.00010303488914424624, |
| "loss": 0.6465, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0913043478260869, |
| "grad_norm": 0.4544646441936493, |
| "learning_rate": 0.00010227631978561056, |
| "loss": 0.6383, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.0956521739130434, |
| "grad_norm": 0.3706333041191101, |
| "learning_rate": 0.00010151761934884028, |
| "loss": 0.6277, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.4875901937484741, |
| "learning_rate": 0.00010075883152245334, |
| "loss": 0.6119, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.1043478260869566, |
| "grad_norm": 0.3684738576412201, |
| "learning_rate": 0.0001, |
| "loss": 0.6264, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.108695652173913, |
| "grad_norm": 0.42785608768463135, |
| "learning_rate": 9.92411684775467e-05, |
| "loss": 0.6272, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.1130434782608696, |
| "grad_norm": 0.3924098014831543, |
| "learning_rate": 9.848238065115975e-05, |
| "loss": 0.65, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.117391304347826, |
| "grad_norm": 0.3814132809638977, |
| "learning_rate": 9.772368021438943e-05, |
| "loss": 0.6103, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.1217391304347826, |
| "grad_norm": 0.3904854655265808, |
| "learning_rate": 9.696511085575377e-05, |
| "loss": 0.6422, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.126086956521739, |
| "grad_norm": 0.27673909068107605, |
| "learning_rate": 9.620671625622288e-05, |
| "loss": 0.637, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.1304347826086956, |
| "grad_norm": 0.3034502863883972, |
| "learning_rate": 9.544854008670367e-05, |
| "loss": 0.6286, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.134782608695652, |
| "grad_norm": 0.3358616828918457, |
| "learning_rate": 9.469062600552509e-05, |
| "loss": 0.6427, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.1391304347826088, |
| "grad_norm": 0.245226189494133, |
| "learning_rate": 9.393301765592415e-05, |
| "loss": 0.6269, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.1434782608695653, |
| "grad_norm": 0.3370216488838196, |
| "learning_rate": 9.317575866353292e-05, |
| "loss": 0.6093, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.1478260869565218, |
| "grad_norm": 0.27349230647087097, |
| "learning_rate": 9.241889263386618e-05, |
| "loss": 0.6495, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.1521739130434783, |
| "grad_norm": 0.3180980086326599, |
| "learning_rate": 9.166246314981066e-05, |
| "loss": 0.6336, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.1565217391304348, |
| "grad_norm": 0.32506948709487915, |
| "learning_rate": 9.09065137691153e-05, |
| "loss": 0.6264, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.1608695652173913, |
| "grad_norm": 0.30634960532188416, |
| "learning_rate": 9.015108802188313e-05, |
| "loss": 0.6269, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.1652173913043478, |
| "grad_norm": 0.36996349692344666, |
| "learning_rate": 8.939622940806455e-05, |
| "loss": 0.6454, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.1695652173913043, |
| "grad_norm": 0.2220568060874939, |
| "learning_rate": 8.86419813949525e-05, |
| "loss": 0.6106, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.1739130434782608, |
| "grad_norm": 0.26350730657577515, |
| "learning_rate": 8.788838741467962e-05, |
| "loss": 0.5946, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1782608695652175, |
| "grad_norm": 0.22457756102085114, |
| "learning_rate": 8.713549086171691e-05, |
| "loss": 0.617, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.182608695652174, |
| "grad_norm": 0.28281551599502563, |
| "learning_rate": 8.638333509037536e-05, |
| "loss": 0.618, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.1869565217391305, |
| "grad_norm": 0.3269217908382416, |
| "learning_rate": 8.563196341230919e-05, |
| "loss": 0.6261, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.191304347826087, |
| "grad_norm": 0.2878088355064392, |
| "learning_rate": 8.488141909402191e-05, |
| "loss": 0.6104, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.1956521739130435, |
| "grad_norm": 0.23815755546092987, |
| "learning_rate": 8.413174535437487e-05, |
| "loss": 0.6325, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2873881459236145, |
| "learning_rate": 8.33829853620986e-05, |
| "loss": 0.6353, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.2043478260869565, |
| "grad_norm": 0.24071787297725677, |
| "learning_rate": 8.263518223330697e-05, |
| "loss": 0.621, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.208695652173913, |
| "grad_norm": 0.2793138921260834, |
| "learning_rate": 8.188837902901442e-05, |
| "loss": 0.6279, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.2130434782608694, |
| "grad_norm": 0.4142613112926483, |
| "learning_rate": 8.114261875265643e-05, |
| "loss": 0.6164, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.2173913043478262, |
| "grad_norm": 0.3821130692958832, |
| "learning_rate": 8.039794434761318e-05, |
| "loss": 0.6191, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2217391304347827, |
| "grad_norm": 0.24204838275909424, |
| "learning_rate": 7.965439869473664e-05, |
| "loss": 0.6149, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.2260869565217392, |
| "grad_norm": 0.2838297188282013, |
| "learning_rate": 7.891202460988158e-05, |
| "loss": 0.6478, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.2304347826086957, |
| "grad_norm": 0.35510286688804626, |
| "learning_rate": 7.817086484143986e-05, |
| "loss": 0.6418, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.2347826086956522, |
| "grad_norm": 0.37343931198120117, |
| "learning_rate": 7.743096206787894e-05, |
| "loss": 0.6126, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.2391304347826086, |
| "grad_norm": 0.264636754989624, |
| "learning_rate": 7.669235889528436e-05, |
| "loss": 0.6231, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.2434782608695651, |
| "grad_norm": 0.2996392548084259, |
| "learning_rate": 7.595509785490617e-05, |
| "loss": 0.6343, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.2478260869565219, |
| "grad_norm": 0.38490474224090576, |
| "learning_rate": 7.521922140071002e-05, |
| "loss": 0.64, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.2521739130434781, |
| "grad_norm": 0.297821044921875, |
| "learning_rate": 7.448477190693238e-05, |
| "loss": 0.6197, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.2565217391304349, |
| "grad_norm": 0.2390124499797821, |
| "learning_rate": 7.375179166564063e-05, |
| "loss": 0.6283, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.2608695652173914, |
| "grad_norm": 0.27224546670913696, |
| "learning_rate": 7.302032288429756e-05, |
| "loss": 0.6197, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2652173913043478, |
| "grad_norm": 0.3059544563293457, |
| "learning_rate": 7.229040768333115e-05, |
| "loss": 0.6281, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.2695652173913043, |
| "grad_norm": 0.23116622865200043, |
| "learning_rate": 7.156208809370883e-05, |
| "loss": 0.6235, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.2739130434782608, |
| "grad_norm": 0.28964969515800476, |
| "learning_rate": 7.08354060545175e-05, |
| "loss": 0.6032, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.2782608695652173, |
| "grad_norm": 0.23806549608707428, |
| "learning_rate": 7.011040341054845e-05, |
| "loss": 0.6113, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.2826086956521738, |
| "grad_norm": 0.23752138018608093, |
| "learning_rate": 6.93871219098876e-05, |
| "loss": 0.6359, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.2869565217391306, |
| "grad_norm": 0.23858202993869781, |
| "learning_rate": 6.866560320151179e-05, |
| "loss": 0.6207, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.2913043478260868, |
| "grad_norm": 0.28167223930358887, |
| "learning_rate": 6.79458888328903e-05, |
| "loss": 0.6055, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.2956521739130435, |
| "grad_norm": 0.24841302633285522, |
| "learning_rate": 6.722802024759252e-05, |
| "loss": 0.62, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.2754172682762146, |
| "learning_rate": 6.651203878290139e-05, |
| "loss": 0.6177, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 0.3091265857219696, |
| "learning_rate": 6.579798566743314e-05, |
| "loss": 0.6295, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.308695652173913, |
| "grad_norm": 0.2213958352804184, |
| "learning_rate": 6.508590201876317e-05, |
| "loss": 0.6017, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.3130434782608695, |
| "grad_norm": 0.30063438415527344, |
| "learning_rate": 6.437582884105835e-05, |
| "loss": 0.6455, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.317391304347826, |
| "grad_norm": 0.3458832800388336, |
| "learning_rate": 6.366780702271589e-05, |
| "loss": 0.6326, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.3217391304347825, |
| "grad_norm": 0.24162618815898895, |
| "learning_rate": 6.29618773340089e-05, |
| "loss": 0.6155, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.3260869565217392, |
| "grad_norm": 0.32919561862945557, |
| "learning_rate": 6.225808042473858e-05, |
| "loss": 0.6354, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.3304347826086955, |
| "grad_norm": 0.3182857036590576, |
| "learning_rate": 6.155645682189351e-05, |
| "loss": 0.6007, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.3347826086956522, |
| "grad_norm": 0.2895689308643341, |
| "learning_rate": 6.085704692731609e-05, |
| "loss": 0.6382, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.3391304347826087, |
| "grad_norm": 0.2976822555065155, |
| "learning_rate": 6.015989101537586e-05, |
| "loss": 0.5889, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.3434782608695652, |
| "grad_norm": 0.2826453745365143, |
| "learning_rate": 5.9465029230650534e-05, |
| "loss": 0.6329, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.3478260869565217, |
| "grad_norm": 0.3266195058822632, |
| "learning_rate": 5.877250158561425e-05, |
| "loss": 0.6359, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3521739130434782, |
| "grad_norm": 0.2917690873146057, |
| "learning_rate": 5.8082347958333625e-05, |
| "loss": 0.6301, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.3565217391304347, |
| "grad_norm": 0.25518476963043213, |
| "learning_rate": 5.73946080901712e-05, |
| "loss": 0.5977, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.3608695652173912, |
| "grad_norm": 0.2891990542411804, |
| "learning_rate": 5.670932158349731e-05, |
| "loss": 0.6394, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.365217391304348, |
| "grad_norm": 0.24494178593158722, |
| "learning_rate": 5.602652789940941e-05, |
| "loss": 0.6104, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.3695652173913042, |
| "grad_norm": 0.2637173533439636, |
| "learning_rate": 5.5346266355459995e-05, |
| "loss": 0.6252, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.373913043478261, |
| "grad_norm": 0.30065053701400757, |
| "learning_rate": 5.466857612339229e-05, |
| "loss": 0.638, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.3782608695652174, |
| "grad_norm": 0.2335294783115387, |
| "learning_rate": 5.399349622688479e-05, |
| "loss": 0.6433, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.382608695652174, |
| "grad_norm": 0.28032875061035156, |
| "learning_rate": 5.332106553930414e-05, |
| "loss": 0.6301, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.3869565217391304, |
| "grad_norm": 0.3166956603527069, |
| "learning_rate": 5.26513227814666e-05, |
| "loss": 0.63, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.391304347826087, |
| "grad_norm": 0.24381506443023682, |
| "learning_rate": 5.1984306519408456e-05, |
| "loss": 0.5954, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.3956521739130434, |
| "grad_norm": 0.38320308923721313, |
| "learning_rate": 5.1320055162165115e-05, |
| "loss": 0.6464, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.30799657106399536, |
| "learning_rate": 5.065860695955971e-05, |
| "loss": 0.6356, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.4043478260869566, |
| "grad_norm": 0.5910504460334778, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 0.6058, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.4086956521739131, |
| "grad_norm": 0.35114794969558716, |
| "learning_rate": 4.934427220828571e-05, |
| "loss": 0.6172, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.4130434782608696, |
| "grad_norm": 0.27586087584495544, |
| "learning_rate": 4.869146134342426e-05, |
| "loss": 0.6249, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.4173913043478261, |
| "grad_norm": 0.2799661457538605, |
| "learning_rate": 4.804160499645667e-05, |
| "loss": 0.6144, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.4217391304347826, |
| "grad_norm": 0.29662200808525085, |
| "learning_rate": 4.739474058829289e-05, |
| "loss": 0.6042, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.4260869565217391, |
| "grad_norm": 0.26697418093681335, |
| "learning_rate": 4.675090536755705e-05, |
| "loss": 0.6164, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.4304347826086956, |
| "grad_norm": 0.2900712490081787, |
| "learning_rate": 4.611013640844245e-05, |
| "loss": 0.6298, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.434782608695652, |
| "grad_norm": 0.26294711232185364, |
| "learning_rate": 4.547247060857675e-05, |
| "loss": 0.6363, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.4391304347826086, |
| "grad_norm": 0.22578993439674377, |
| "learning_rate": 4.483794468689728e-05, |
| "loss": 0.615, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.4434782608695653, |
| "grad_norm": 0.2953335642814636, |
| "learning_rate": 4.420659518153667e-05, |
| "loss": 0.6278, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.4478260869565218, |
| "grad_norm": 0.27388015389442444, |
| "learning_rate": 4.357845844771881e-05, |
| "loss": 0.6222, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.4521739130434783, |
| "grad_norm": 0.23460474610328674, |
| "learning_rate": 4.295357065566543e-05, |
| "loss": 0.6313, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.4565217391304348, |
| "grad_norm": 0.3273029923439026, |
| "learning_rate": 4.2331967788513295e-05, |
| "loss": 0.6038, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.4608695652173913, |
| "grad_norm": 0.27219584584236145, |
| "learning_rate": 4.1713685640242165e-05, |
| "loss": 0.6285, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.4652173913043478, |
| "grad_norm": 0.3417372703552246, |
| "learning_rate": 4.109875981361363e-05, |
| "loss": 0.6361, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.4695652173913043, |
| "grad_norm": 0.26904726028442383, |
| "learning_rate": 4.048722571812105e-05, |
| "loss": 0.6143, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.4739130434782608, |
| "grad_norm": 0.3139411211013794, |
| "learning_rate": 3.987911856795047e-05, |
| "loss": 0.6209, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.4782608695652173, |
| "grad_norm": 0.2561061978340149, |
| "learning_rate": 3.927447337995304e-05, |
| "loss": 0.614, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.482608695652174, |
| "grad_norm": 0.2412693351507187, |
| "learning_rate": 3.8673324971628357e-05, |
| "loss": 0.6154, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.4869565217391305, |
| "grad_norm": 0.20282143354415894, |
| "learning_rate": 3.8075707959119846e-05, |
| "loss": 0.6052, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.491304347826087, |
| "grad_norm": 0.25691401958465576, |
| "learning_rate": 3.7481656755221125e-05, |
| "loss": 0.6081, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.4956521739130435, |
| "grad_norm": 0.26478344202041626, |
| "learning_rate": 3.689120556739475e-05, |
| "loss": 0.6191, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.21863462030887604, |
| "learning_rate": 3.630438839580217e-05, |
| "loss": 0.6319, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.5043478260869565, |
| "grad_norm": 0.22426074743270874, |
| "learning_rate": 3.5721239031346066e-05, |
| "loss": 0.6178, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.508695652173913, |
| "grad_norm": 0.21831241250038147, |
| "learning_rate": 3.5141791053724405e-05, |
| "loss": 0.6303, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.5130434782608697, |
| "grad_norm": 0.21894365549087524, |
| "learning_rate": 3.456607782949689e-05, |
| "loss": 0.6044, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.517391304347826, |
| "grad_norm": 0.25870388746261597, |
| "learning_rate": 3.399413251016359e-05, |
| "loss": 0.6181, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.5217391304347827, |
| "grad_norm": 0.2224799394607544, |
| "learning_rate": 3.342598803025595e-05, |
| "loss": 0.6028, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.526086956521739, |
| "grad_norm": 0.25613147020339966, |
| "learning_rate": 3.2861677105440336e-05, |
| "loss": 0.5982, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.5304347826086957, |
| "grad_norm": 0.24120664596557617, |
| "learning_rate": 3.2301232230634104e-05, |
| "loss": 0.6159, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.5347826086956522, |
| "grad_norm": 0.22223018109798431, |
| "learning_rate": 3.174468567813461e-05, |
| "loss": 0.6113, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.5391304347826087, |
| "grad_norm": 0.25855186581611633, |
| "learning_rate": 3.119206949576052e-05, |
| "loss": 0.6315, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.5434782608695652, |
| "grad_norm": 0.21754255890846252, |
| "learning_rate": 3.0643415505006735e-05, |
| "loss": 0.6202, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.5478260869565217, |
| "grad_norm": 0.2552974224090576, |
| "learning_rate": 3.009875529921181e-05, |
| "loss": 0.6195, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.5521739130434784, |
| "grad_norm": 0.19017580151557922, |
| "learning_rate": 2.9558120241738784e-05, |
| "loss": 0.5913, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.5565217391304347, |
| "grad_norm": 0.2394818663597107, |
| "learning_rate": 2.90215414641691e-05, |
| "loss": 0.6091, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.5608695652173914, |
| "grad_norm": 0.29089704155921936, |
| "learning_rate": 2.8489049864510054e-05, |
| "loss": 0.639, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.5652173913043477, |
| "grad_norm": 0.21274344623088837, |
| "learning_rate": 2.7960676105415472e-05, |
| "loss": 0.6182, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.5695652173913044, |
| "grad_norm": 0.2907910645008087, |
| "learning_rate": 2.7436450612420095e-05, |
| "loss": 0.6111, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.5739130434782609, |
| "grad_norm": 0.19825702905654907, |
| "learning_rate": 2.691640357218759e-05, |
| "loss": 0.6175, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.5782608695652174, |
| "grad_norm": 0.2254411280155182, |
| "learning_rate": 2.640056493077231e-05, |
| "loss": 0.5938, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.5826086956521739, |
| "grad_norm": 0.2379319965839386, |
| "learning_rate": 2.5888964391894766e-05, |
| "loss": 0.6105, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.5869565217391304, |
| "grad_norm": 0.1910097748041153, |
| "learning_rate": 2.5381631415231454e-05, |
| "loss": 0.6088, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.591304347826087, |
| "grad_norm": 0.2428610920906067, |
| "learning_rate": 2.4878595214718236e-05, |
| "loss": 0.5999, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.5956521739130434, |
| "grad_norm": 0.21029497683048248, |
| "learning_rate": 2.4379884756868167e-05, |
| "loss": 0.617, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.22931547462940216, |
| "learning_rate": 2.3885528759103538e-05, |
| "loss": 0.5912, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.6043478260869564, |
| "grad_norm": 0.19771426916122437, |
| "learning_rate": 2.339555568810221e-05, |
| "loss": 0.58, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.608695652173913, |
| "grad_norm": 0.25638020038604736, |
| "learning_rate": 2.2909993758158412e-05, |
| "loss": 0.6217, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.6130434782608696, |
| "grad_norm": 0.2538512945175171, |
| "learning_rate": 2.242887092955801e-05, |
| "loss": 0.6372, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.617391304347826, |
| "grad_norm": 0.19667693972587585, |
| "learning_rate": 2.1952214906968627e-05, |
| "loss": 0.5964, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.6217391304347826, |
| "grad_norm": 0.2506635785102844, |
| "learning_rate": 2.1480053137844115e-05, |
| "loss": 0.6196, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.626086956521739, |
| "grad_norm": 0.19938236474990845, |
| "learning_rate": 2.101241281084416e-05, |
| "loss": 0.6214, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.6304347826086958, |
| "grad_norm": 0.20109523832798004, |
| "learning_rate": 2.054932085426856e-05, |
| "loss": 0.6072, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.634782608695652, |
| "grad_norm": 0.21539539098739624, |
| "learning_rate": 2.0090803934506764e-05, |
| "loss": 0.6062, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.6391304347826088, |
| "grad_norm": 0.21811442077159882, |
| "learning_rate": 1.9636888454502178e-05, |
| "loss": 0.6093, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.643478260869565, |
| "grad_norm": 0.1716393679380417, |
| "learning_rate": 1.9187600552231955e-05, |
| "loss": 0.5951, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.6478260869565218, |
| "grad_norm": 0.22683413326740265, |
| "learning_rate": 1.8742966099201697e-05, |
| "loss": 0.6119, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.6521739130434783, |
| "grad_norm": 0.213649183511734, |
| "learning_rate": 1.8303010698955804e-05, |
| "loss": 0.6013, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6565217391304348, |
| "grad_norm": 0.1564229130744934, |
| "learning_rate": 1.7867759685603114e-05, |
| "loss": 0.6083, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.6608695652173913, |
| "grad_norm": 0.2164810448884964, |
| "learning_rate": 1.7437238122358057e-05, |
| "loss": 0.6306, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.6652173913043478, |
| "grad_norm": 0.18622331321239471, |
| "learning_rate": 1.7011470800097496e-05, |
| "loss": 0.5776, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.6695652173913045, |
| "grad_norm": 0.1888633519411087, |
| "learning_rate": 1.659048223593308e-05, |
| "loss": 0.6242, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.6739130434782608, |
| "grad_norm": 0.16813671588897705, |
| "learning_rate": 1.6174296671799572e-05, |
| "loss": 0.5759, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.6782608695652175, |
| "grad_norm": 0.20108157396316528, |
| "learning_rate": 1.5762938073058853e-05, |
| "loss": 0.6151, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.6826086956521737, |
| "grad_norm": 0.17089848220348358, |
| "learning_rate": 1.5356430127119913e-05, |
| "loss": 0.5993, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.6869565217391305, |
| "grad_norm": 0.15591366589069366, |
| "learning_rate": 1.4954796242074898e-05, |
| "loss": 0.614, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.691304347826087, |
| "grad_norm": 0.16411159932613373, |
| "learning_rate": 1.4558059545351143e-05, |
| "loss": 0.6023, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.6956521739130435, |
| "grad_norm": 0.17830075323581696, |
| "learning_rate": 1.4166242882379476e-05, |
| "loss": 0.6031, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.15914571285247803, |
| "learning_rate": 1.3779368815278647e-05, |
| "loss": 0.6173, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.7043478260869565, |
| "grad_norm": 0.16238906979560852, |
| "learning_rate": 1.339745962155613e-05, |
| "loss": 0.6085, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.7086956521739132, |
| "grad_norm": 0.1836780607700348, |
| "learning_rate": 1.302053729282533e-05, |
| "loss": 0.6147, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.7130434782608694, |
| "grad_norm": 0.17346879839897156, |
| "learning_rate": 1.2648623533539261e-05, |
| "loss": 0.5877, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.7173913043478262, |
| "grad_norm": 0.17038564383983612, |
| "learning_rate": 1.2281739759740574e-05, |
| "loss": 0.5981, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.7217391304347827, |
| "grad_norm": 0.23689156770706177, |
| "learning_rate": 1.1919907097828653e-05, |
| "loss": 0.6318, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.7260869565217392, |
| "grad_norm": 0.1710849106311798, |
| "learning_rate": 1.1563146383342772e-05, |
| "loss": 0.6007, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.7304347826086957, |
| "grad_norm": 0.150990828871727, |
| "learning_rate": 1.1211478159762478e-05, |
| "loss": 0.5942, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.7347826086956522, |
| "grad_norm": 0.17545339465141296, |
| "learning_rate": 1.0864922677324618e-05, |
| "loss": 0.6205, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 0.15553072094917297, |
| "learning_rate": 1.0523499891857225e-05, |
| "loss": 0.5996, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7434782608695651, |
| "grad_norm": 0.16285602748394012, |
| "learning_rate": 1.01872294636304e-05, |
| "loss": 0.5937, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.7478260869565219, |
| "grad_norm": 0.16755890846252441, |
| "learning_rate": 9.856130756224213e-06, |
| "loss": 0.6023, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.7521739130434781, |
| "grad_norm": 0.141220822930336, |
| "learning_rate": 9.530222835413738e-06, |
| "loss": 0.6181, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.7565217391304349, |
| "grad_norm": 0.1646806001663208, |
| "learning_rate": 9.209524468071096e-06, |
| "loss": 0.615, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.7608695652173914, |
| "grad_norm": 0.1562896966934204, |
| "learning_rate": 8.894054121084838e-06, |
| "loss": 0.6002, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.7652173913043478, |
| "grad_norm": 0.15589256584644318, |
| "learning_rate": 8.58382996029652e-06, |
| "loss": 0.6098, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.7695652173913043, |
| "grad_norm": 0.14259637892246246, |
| "learning_rate": 8.278869849454718e-06, |
| "loss": 0.6032, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.7739130434782608, |
| "grad_norm": 0.1473761945962906, |
| "learning_rate": 7.97919134918632e-06, |
| "loss": 0.6265, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.7782608695652173, |
| "grad_norm": 0.1384039968252182, |
| "learning_rate": 7.684811715985429e-06, |
| "loss": 0.617, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.7826086956521738, |
| "grad_norm": 0.16400642693042755, |
| "learning_rate": 7.395747901219474e-06, |
| "loss": 0.632, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.7869565217391306, |
| "grad_norm": 0.154324010014534, |
| "learning_rate": 7.1120165501533e-06, |
| "loss": 0.6039, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.7913043478260868, |
| "grad_norm": 0.15824785828590393, |
| "learning_rate": 6.833634000990541e-06, |
| "loss": 0.5953, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.7956521739130435, |
| "grad_norm": 0.14589077234268188, |
| "learning_rate": 6.560616283932897e-06, |
| "loss": 0.6049, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.1429314911365509, |
| "learning_rate": 6.292979120256992e-06, |
| "loss": 0.6068, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.8043478260869565, |
| "grad_norm": 0.1345595121383667, |
| "learning_rate": 6.030737921409169e-06, |
| "loss": 0.6233, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.808695652173913, |
| "grad_norm": 0.14544036984443665, |
| "learning_rate": 5.77390778811796e-06, |
| "loss": 0.6229, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.8130434782608695, |
| "grad_norm": 0.14767540991306305, |
| "learning_rate": 5.52250350952459e-06, |
| "loss": 0.6289, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.8173913043478263, |
| "grad_norm": 0.14071600139141083, |
| "learning_rate": 5.276539562331384e-06, |
| "loss": 0.6006, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.8217391304347825, |
| "grad_norm": 0.15143164992332458, |
| "learning_rate": 5.036030109968082e-06, |
| "loss": 0.6162, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.8260869565217392, |
| "grad_norm": 0.1341582089662552, |
| "learning_rate": 4.800989001776324e-06, |
| "loss": 0.6015, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8304347826086955, |
| "grad_norm": 0.13631945848464966, |
| "learning_rate": 4.5714297722121106e-06, |
| "loss": 0.6197, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.8347826086956522, |
| "grad_norm": 0.15245981514453888, |
| "learning_rate": 4.347365640066525e-06, |
| "loss": 0.6225, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.8391304347826087, |
| "grad_norm": 0.14014090597629547, |
| "learning_rate": 4.128809507704445e-06, |
| "loss": 0.5993, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.8434782608695652, |
| "grad_norm": 0.13918210566043854, |
| "learning_rate": 3.915773960321634e-06, |
| "loss": 0.6243, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.8478260869565217, |
| "grad_norm": 0.13100025057792664, |
| "learning_rate": 3.7082712652200867e-06, |
| "loss": 0.6074, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.8521739130434782, |
| "grad_norm": 0.15842288732528687, |
| "learning_rate": 3.5063133711014882e-06, |
| "loss": 0.6097, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.856521739130435, |
| "grad_norm": 0.1331150382757187, |
| "learning_rate": 3.3099119073793928e-06, |
| "loss": 0.6021, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.8608695652173912, |
| "grad_norm": 0.13728439807891846, |
| "learning_rate": 3.119078183509372e-06, |
| "loss": 0.6015, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.865217391304348, |
| "grad_norm": 0.1390867531299591, |
| "learning_rate": 2.9338231883378366e-06, |
| "loss": 0.603, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.8695652173913042, |
| "grad_norm": 0.15344412624835968, |
| "learning_rate": 2.7541575894693194e-06, |
| "loss": 0.6045, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.873913043478261, |
| "grad_norm": 0.14015014469623566, |
| "learning_rate": 2.580091732652101e-06, |
| "loss": 0.6161, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.8782608695652174, |
| "grad_norm": 0.15435759723186493, |
| "learning_rate": 2.4116356411825525e-06, |
| "loss": 0.6281, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.882608695652174, |
| "grad_norm": 0.14000248908996582, |
| "learning_rate": 2.248799015327907e-06, |
| "loss": 0.6358, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.8869565217391304, |
| "grad_norm": 0.14310961961746216, |
| "learning_rate": 2.091591231767709e-06, |
| "loss": 0.63, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.891304347826087, |
| "grad_norm": 0.14115604758262634, |
| "learning_rate": 1.9400213430538773e-06, |
| "loss": 0.637, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.8956521739130436, |
| "grad_norm": 0.1358548104763031, |
| "learning_rate": 1.7940980770894122e-06, |
| "loss": 0.6241, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.13970565795898438, |
| "learning_rate": 1.6538298366257976e-06, |
| "loss": 0.6118, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.9043478260869566, |
| "grad_norm": 0.1373710334300995, |
| "learning_rate": 1.5192246987791981e-06, |
| "loss": 0.6121, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.908695652173913, |
| "grad_norm": 0.13853000104427338, |
| "learning_rate": 1.3902904145653096e-06, |
| "loss": 0.6433, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.9130434782608696, |
| "grad_norm": 0.16299930214881897, |
| "learning_rate": 1.2670344084530383e-06, |
| "loss": 0.6114, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.9173913043478261, |
| "grad_norm": 0.15240226686000824, |
| "learning_rate": 1.1494637779369766e-06, |
| "loss": 0.6392, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.9217391304347826, |
| "grad_norm": 0.12335983663797379, |
| "learning_rate": 1.0375852931286956e-06, |
| "loss": 0.6224, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.9260869565217391, |
| "grad_norm": 0.1319676637649536, |
| "learning_rate": 9.314053963669245e-07, |
| "loss": 0.618, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.9304347826086956, |
| "grad_norm": 0.11884966492652893, |
| "learning_rate": 8.309302018465581e-07, |
| "loss": 0.5911, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.9347826086956523, |
| "grad_norm": 0.1367439180612564, |
| "learning_rate": 7.361654952665609e-07, |
| "loss": 0.6186, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.9391304347826086, |
| "grad_norm": 0.12649576365947723, |
| "learning_rate": 6.471167334968886e-07, |
| "loss": 0.6249, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.9434782608695653, |
| "grad_norm": 0.1303686797618866, |
| "learning_rate": 5.637890442641402e-07, |
| "loss": 0.5979, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.9478260869565216, |
| "grad_norm": 0.1282416582107544, |
| "learning_rate": 4.861872258564049e-07, |
| "loss": 0.6195, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.9521739130434783, |
| "grad_norm": 0.12191151827573776, |
| "learning_rate": 4.143157468468717e-07, |
| "loss": 0.6097, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.9565217391304348, |
| "grad_norm": 0.11873479187488556, |
| "learning_rate": 3.481787458365915e-07, |
| "loss": 0.6102, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.9608695652173913, |
| "grad_norm": 0.1280432939529419, |
| "learning_rate": 2.877800312160783e-07, |
| "loss": 0.5857, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.9652173913043478, |
| "grad_norm": 0.12122710049152374, |
| "learning_rate": 2.3312308094607382e-07, |
| "loss": 0.5989, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.9695652173913043, |
| "grad_norm": 0.12293203175067902, |
| "learning_rate": 1.8421104235727405e-07, |
| "loss": 0.6275, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.973913043478261, |
| "grad_norm": 0.1292450726032257, |
| "learning_rate": 1.4104673196903005e-07, |
| "loss": 0.6009, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.9782608695652173, |
| "grad_norm": 0.1343158781528473, |
| "learning_rate": 1.0363263532724432e-07, |
| "loss": 0.6207, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.982608695652174, |
| "grad_norm": 0.12888604402542114, |
| "learning_rate": 7.197090686119623e-08, |
| "loss": 0.6183, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.9869565217391303, |
| "grad_norm": 0.12688706815242767, |
| "learning_rate": 4.606336975948589e-08, |
| "loss": 0.6183, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.991304347826087, |
| "grad_norm": 0.1274312287569046, |
| "learning_rate": 2.5911515865084667e-08, |
| "loss": 0.6094, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.9956521739130435, |
| "grad_norm": 0.13021548092365265, |
| "learning_rate": 1.1516505589381776e-08, |
| "loss": 0.6078, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.12913620471954346, |
| "learning_rate": 2.8791678453821135e-09, |
| "loss": 0.6241, |
| "step": 460 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7941728521827123e+19, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|