{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9998838222480395, "eval_steps": 500, "global_step": 17214, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011617775196049956, "grad_norm": 1.1400724649429321, "learning_rate": 0.0001, "loss": 1.9564, "step": 1 }, { "epoch": 0.00023235550392099912, "grad_norm": 0.5516552925109863, "learning_rate": 0.0001, "loss": 1.9434, "step": 2 }, { "epoch": 0.0003485332558814987, "grad_norm": 2.07415771484375, "learning_rate": 0.0001, "loss": 1.9873, "step": 3 }, { "epoch": 0.00046471100784199824, "grad_norm": 0.6526162028312683, "learning_rate": 0.0001, "loss": 1.9786, "step": 4 }, { "epoch": 0.0005808887598024978, "grad_norm": 0.5003806352615356, "learning_rate": 0.0001, "loss": 1.8828, "step": 5 }, { "epoch": 0.0006970665117629974, "grad_norm": 0.5392526388168335, "learning_rate": 0.0001, "loss": 1.8913, "step": 6 }, { "epoch": 0.000813244263723497, "grad_norm": 0.4057665765285492, "learning_rate": 0.0001, "loss": 1.7552, "step": 7 }, { "epoch": 0.0009294220156839965, "grad_norm": 0.42006543278694153, "learning_rate": 0.0001, "loss": 2.0082, "step": 8 }, { "epoch": 0.001045599767644496, "grad_norm": 1.1451431512832642, "learning_rate": 0.0001, "loss": 1.8661, "step": 9 }, { "epoch": 0.0011617775196049957, "grad_norm": 0.44178342819213867, "learning_rate": 0.0001, "loss": 1.914, "step": 10 }, { "epoch": 0.0012779552715654952, "grad_norm": 0.3882751762866974, "learning_rate": 0.0001, "loss": 1.902, "step": 11 }, { "epoch": 0.0013941330235259948, "grad_norm": 0.35292503237724304, "learning_rate": 0.0001, "loss": 1.89, "step": 12 }, { "epoch": 0.0015103107754864944, "grad_norm": 0.4178386926651001, "learning_rate": 0.0001, "loss": 1.8888, "step": 13 }, { "epoch": 0.001626488527446994, "grad_norm": 0.39220985770225525, "learning_rate": 0.0001, "loss": 1.7775, "step": 14 }, { "epoch": 0.0017426662794074934, "grad_norm": 0.4134705364704132, "learning_rate": 0.0001, "loss": 1.8794, "step": 15 }, { "epoch": 0.001858844031367993, "grad_norm": 0.3392408788204193, "learning_rate": 0.0001, "loss": 1.6998, "step": 16 }, { "epoch": 0.0019750217833284928, "grad_norm": 0.3672613203525543, "learning_rate": 0.0001, "loss": 2.0741, "step": 17 }, { "epoch": 0.002091199535288992, "grad_norm": 0.4194653630256653, "learning_rate": 0.0001, "loss": 1.9617, "step": 18 }, { "epoch": 0.002207377287249492, "grad_norm": 0.38323554396629333, "learning_rate": 0.0001, "loss": 1.8911, "step": 19 }, { "epoch": 0.0023235550392099913, "grad_norm": 0.35623419284820557, "learning_rate": 0.0001, "loss": 1.8062, "step": 20 }, { "epoch": 0.0024397327911704907, "grad_norm": 0.3997187614440918, "learning_rate": 0.0001, "loss": 2.1459, "step": 21 }, { "epoch": 0.0025559105431309905, "grad_norm": 0.31873300671577454, "learning_rate": 0.0001, "loss": 1.813, "step": 22 }, { "epoch": 0.00267208829509149, "grad_norm": 0.36987119913101196, "learning_rate": 0.0001, "loss": 1.8538, "step": 23 }, { "epoch": 0.0027882660470519897, "grad_norm": 0.38269954919815063, "learning_rate": 0.0001, "loss": 1.9971, "step": 24 }, { "epoch": 0.002904443799012489, "grad_norm": 0.3547232449054718, "learning_rate": 0.0001, "loss": 1.9081, "step": 25 }, { "epoch": 0.003020621550972989, "grad_norm": 0.33162739872932434, "learning_rate": 0.0001, "loss": 1.6442, "step": 26 }, { "epoch": 0.003136799302933488, "grad_norm": 0.3379284143447876, "learning_rate": 0.0001, "loss": 1.856, "step": 27 }, { "epoch": 0.003252977054893988, "grad_norm": 0.35247287154197693, "learning_rate": 0.0001, "loss": 1.7758, "step": 28 }, { "epoch": 0.0033691548068544874, "grad_norm": 0.3893706500530243, "learning_rate": 0.0001, "loss": 2.0282, "step": 29 }, { "epoch": 0.0034853325588149867, "grad_norm": 0.39306116104125977, "learning_rate": 0.0001, "loss": 1.7966, "step": 30 }, { "epoch": 0.0036015103107754865, "grad_norm": 0.36439645290374756, "learning_rate": 0.0001, "loss": 1.9552, "step": 31 }, { "epoch": 0.003717688062735986, "grad_norm": 0.3758845925331116, "learning_rate": 0.0001, "loss": 1.9639, "step": 32 }, { "epoch": 0.0038338658146964857, "grad_norm": 0.3357931971549988, "learning_rate": 0.0001, "loss": 1.8929, "step": 33 }, { "epoch": 0.0039500435666569855, "grad_norm": 0.3393707573413849, "learning_rate": 0.0001, "loss": 1.7972, "step": 34 }, { "epoch": 0.004066221318617485, "grad_norm": 0.3457406163215637, "learning_rate": 0.0001, "loss": 1.7516, "step": 35 }, { "epoch": 0.004182399070577984, "grad_norm": 0.33674880862236023, "learning_rate": 0.0001, "loss": 1.7137, "step": 36 }, { "epoch": 0.004298576822538484, "grad_norm": 0.33614569902420044, "learning_rate": 0.0001, "loss": 1.8855, "step": 37 }, { "epoch": 0.004414754574498984, "grad_norm": 0.34421899914741516, "learning_rate": 0.0001, "loss": 1.8091, "step": 38 }, { "epoch": 0.004530932326459483, "grad_norm": 0.39817526936531067, "learning_rate": 0.0001, "loss": 1.941, "step": 39 }, { "epoch": 0.004647110078419983, "grad_norm": 0.3206139802932739, "learning_rate": 0.0001, "loss": 1.672, "step": 40 }, { "epoch": 0.004763287830380482, "grad_norm": 0.37263521552085876, "learning_rate": 0.0001, "loss": 1.8275, "step": 41 }, { "epoch": 0.004879465582340981, "grad_norm": 0.3837355077266693, "learning_rate": 0.0001, "loss": 1.9595, "step": 42 }, { "epoch": 0.004995643334301482, "grad_norm": 0.33184581995010376, "learning_rate": 0.0001, "loss": 1.6796, "step": 43 }, { "epoch": 0.005111821086261981, "grad_norm": 0.32549911737442017, "learning_rate": 0.0001, "loss": 1.5881, "step": 44 }, { "epoch": 0.00522799883822248, "grad_norm": 0.35189497470855713, "learning_rate": 0.0001, "loss": 1.7428, "step": 45 }, { "epoch": 0.00534417659018298, "grad_norm": 0.34523481130599976, "learning_rate": 0.0001, "loss": 1.9006, "step": 46 }, { "epoch": 0.00546035434214348, "grad_norm": 0.3432101905345917, "learning_rate": 0.0001, "loss": 2.0098, "step": 47 }, { "epoch": 0.005576532094103979, "grad_norm": 0.3387945294380188, "learning_rate": 0.0001, "loss": 1.8139, "step": 48 }, { "epoch": 0.005692709846064479, "grad_norm": 0.33659058809280396, "learning_rate": 0.0001, "loss": 1.8409, "step": 49 }, { "epoch": 0.005808887598024978, "grad_norm": 0.32735776901245117, "learning_rate": 0.0001, "loss": 1.7861, "step": 50 }, { "epoch": 0.005925065349985477, "grad_norm": 0.35173168778419495, "learning_rate": 0.0001, "loss": 1.8177, "step": 51 }, { "epoch": 0.006041243101945978, "grad_norm": 0.3859328031539917, "learning_rate": 0.0001, "loss": 2.0822, "step": 52 }, { "epoch": 0.006157420853906477, "grad_norm": 0.3218703866004944, "learning_rate": 0.0001, "loss": 1.7588, "step": 53 }, { "epoch": 0.006273598605866976, "grad_norm": 0.34024327993392944, "learning_rate": 0.0001, "loss": 1.8432, "step": 54 }, { "epoch": 0.006389776357827476, "grad_norm": 0.3500777781009674, "learning_rate": 0.0001, "loss": 1.946, "step": 55 }, { "epoch": 0.006505954109787976, "grad_norm": 0.35199397802352905, "learning_rate": 0.0001, "loss": 1.7764, "step": 56 }, { "epoch": 0.006622131861748475, "grad_norm": 0.3386881947517395, "learning_rate": 0.0001, "loss": 1.8424, "step": 57 }, { "epoch": 0.006738309613708975, "grad_norm": 0.333283931016922, "learning_rate": 0.0001, "loss": 1.8125, "step": 58 }, { "epoch": 0.006854487365669474, "grad_norm": 0.32521334290504456, "learning_rate": 0.0001, "loss": 1.7354, "step": 59 }, { "epoch": 0.0069706651176299735, "grad_norm": 0.36899396777153015, "learning_rate": 0.0001, "loss": 1.8834, "step": 60 }, { "epoch": 0.007086842869590474, "grad_norm": 0.33825087547302246, "learning_rate": 0.0001, "loss": 1.905, "step": 61 }, { "epoch": 0.007203020621550973, "grad_norm": 0.3499239981174469, "learning_rate": 0.0001, "loss": 1.8053, "step": 62 }, { "epoch": 0.0073191983735114725, "grad_norm": 0.3182366192340851, "learning_rate": 0.0001, "loss": 1.7816, "step": 63 }, { "epoch": 0.007435376125471972, "grad_norm": 0.34978607296943665, "learning_rate": 0.0001, "loss": 1.9504, "step": 64 }, { "epoch": 0.007551553877432472, "grad_norm": 0.3370610177516937, "learning_rate": 0.0001, "loss": 1.8897, "step": 65 }, { "epoch": 0.0076677316293929714, "grad_norm": 0.3260805606842041, "learning_rate": 0.0001, "loss": 1.8774, "step": 66 }, { "epoch": 0.007783909381353471, "grad_norm": 0.32136961817741394, "learning_rate": 0.0001, "loss": 1.7582, "step": 67 }, { "epoch": 0.007900087133313971, "grad_norm": 0.34404799342155457, "learning_rate": 0.0001, "loss": 1.9441, "step": 68 }, { "epoch": 0.00801626488527447, "grad_norm": 0.3686494529247284, "learning_rate": 0.0001, "loss": 2.0653, "step": 69 }, { "epoch": 0.00813244263723497, "grad_norm": 0.34148523211479187, "learning_rate": 0.0001, "loss": 1.9369, "step": 70 }, { "epoch": 0.00824862038919547, "grad_norm": 0.3419267535209656, "learning_rate": 0.0001, "loss": 1.8754, "step": 71 }, { "epoch": 0.008364798141155969, "grad_norm": 0.34724754095077515, "learning_rate": 0.0001, "loss": 1.6847, "step": 72 }, { "epoch": 0.008480975893116468, "grad_norm": 0.3402698338031769, "learning_rate": 0.0001, "loss": 1.8966, "step": 73 }, { "epoch": 0.008597153645076967, "grad_norm": 0.3450843393802643, "learning_rate": 0.0001, "loss": 1.9082, "step": 74 }, { "epoch": 0.008713331397037467, "grad_norm": 0.3541632294654846, "learning_rate": 0.0001, "loss": 1.8932, "step": 75 }, { "epoch": 0.008829509148997968, "grad_norm": 0.3470132648944855, "learning_rate": 0.0001, "loss": 1.7195, "step": 76 }, { "epoch": 0.008945686900958467, "grad_norm": 0.33801767230033875, "learning_rate": 0.0001, "loss": 1.8521, "step": 77 }, { "epoch": 0.009061864652918966, "grad_norm": 0.3319573402404785, "learning_rate": 0.0001, "loss": 1.7786, "step": 78 }, { "epoch": 0.009178042404879466, "grad_norm": 0.35573163628578186, "learning_rate": 0.0001, "loss": 1.8646, "step": 79 }, { "epoch": 0.009294220156839965, "grad_norm": 0.36378416419029236, "learning_rate": 0.0001, "loss": 2.0346, "step": 80 }, { "epoch": 0.009410397908800465, "grad_norm": 0.3243565559387207, "learning_rate": 0.0001, "loss": 1.7084, "step": 81 }, { "epoch": 0.009526575660760964, "grad_norm": 0.32637423276901245, "learning_rate": 0.0001, "loss": 1.8714, "step": 82 }, { "epoch": 0.009642753412721463, "grad_norm": 0.3473580479621887, "learning_rate": 0.0001, "loss": 1.7707, "step": 83 }, { "epoch": 0.009758931164681963, "grad_norm": 0.3237003982067108, "learning_rate": 0.0001, "loss": 1.7546, "step": 84 }, { "epoch": 0.009875108916642464, "grad_norm": 0.3218970000743866, "learning_rate": 0.0001, "loss": 1.7922, "step": 85 }, { "epoch": 0.009991286668602963, "grad_norm": 0.3358110189437866, "learning_rate": 0.0001, "loss": 1.7284, "step": 86 }, { "epoch": 0.010107464420563463, "grad_norm": 0.34907999634742737, "learning_rate": 0.0001, "loss": 1.8581, "step": 87 }, { "epoch": 0.010223642172523962, "grad_norm": 0.3303886950016022, "learning_rate": 0.0001, "loss": 1.8264, "step": 88 }, { "epoch": 0.010339819924484461, "grad_norm": 0.33527591824531555, "learning_rate": 0.0001, "loss": 1.7832, "step": 89 }, { "epoch": 0.01045599767644496, "grad_norm": 0.3530234098434448, "learning_rate": 0.0001, "loss": 1.8076, "step": 90 }, { "epoch": 0.01057217542840546, "grad_norm": 0.35667964816093445, "learning_rate": 0.0001, "loss": 1.9292, "step": 91 }, { "epoch": 0.01068835318036596, "grad_norm": 0.32774579524993896, "learning_rate": 0.0001, "loss": 1.902, "step": 92 }, { "epoch": 0.010804530932326459, "grad_norm": 0.3343551456928253, "learning_rate": 0.0001, "loss": 1.8557, "step": 93 }, { "epoch": 0.01092070868428696, "grad_norm": 0.3439468741416931, "learning_rate": 0.0001, "loss": 1.8196, "step": 94 }, { "epoch": 0.01103688643624746, "grad_norm": 0.31505489349365234, "learning_rate": 0.0001, "loss": 1.7903, "step": 95 }, { "epoch": 0.011153064188207959, "grad_norm": 0.3402290344238281, "learning_rate": 0.0001, "loss": 1.7981, "step": 96 }, { "epoch": 0.011269241940168458, "grad_norm": 0.31825143098831177, "learning_rate": 0.0001, "loss": 1.7931, "step": 97 }, { "epoch": 0.011385419692128957, "grad_norm": 0.32378312945365906, "learning_rate": 0.0001, "loss": 1.6732, "step": 98 }, { "epoch": 0.011501597444089457, "grad_norm": 0.33641722798347473, "learning_rate": 0.0001, "loss": 1.8258, "step": 99 }, { "epoch": 0.011617775196049956, "grad_norm": 0.3626033663749695, "learning_rate": 0.0001, "loss": 1.9063, "step": 100 }, { "epoch": 0.011733952948010455, "grad_norm": 0.3551986813545227, "learning_rate": 0.0001, "loss": 1.876, "step": 101 }, { "epoch": 0.011850130699970955, "grad_norm": 0.364032506942749, "learning_rate": 0.0001, "loss": 1.8289, "step": 102 }, { "epoch": 0.011966308451931456, "grad_norm": 0.3228561282157898, "learning_rate": 0.0001, "loss": 1.7241, "step": 103 }, { "epoch": 0.012082486203891955, "grad_norm": 0.34352198243141174, "learning_rate": 0.0001, "loss": 1.8273, "step": 104 }, { "epoch": 0.012198663955852455, "grad_norm": 0.3475358784198761, "learning_rate": 0.0001, "loss": 1.6767, "step": 105 }, { "epoch": 0.012314841707812954, "grad_norm": 0.3112235367298126, "learning_rate": 0.0001, "loss": 1.635, "step": 106 }, { "epoch": 0.012431019459773453, "grad_norm": 0.3369406759738922, "learning_rate": 0.0001, "loss": 1.9139, "step": 107 }, { "epoch": 0.012547197211733953, "grad_norm": 0.3227371573448181, "learning_rate": 0.0001, "loss": 1.8202, "step": 108 }, { "epoch": 0.012663374963694452, "grad_norm": 0.36206531524658203, "learning_rate": 0.0001, "loss": 1.9441, "step": 109 }, { "epoch": 0.012779552715654952, "grad_norm": 0.3347959518432617, "learning_rate": 0.0001, "loss": 1.9206, "step": 110 }, { "epoch": 0.012895730467615451, "grad_norm": 0.32953107357025146, "learning_rate": 0.0001, "loss": 1.9498, "step": 111 }, { "epoch": 0.013011908219575952, "grad_norm": 0.30254584550857544, "learning_rate": 0.0001, "loss": 1.578, "step": 112 }, { "epoch": 0.013128085971536451, "grad_norm": 0.3403797149658203, "learning_rate": 0.0001, "loss": 1.8821, "step": 113 }, { "epoch": 0.01324426372349695, "grad_norm": 0.31918781995773315, "learning_rate": 0.0001, "loss": 1.7435, "step": 114 }, { "epoch": 0.01336044147545745, "grad_norm": 0.3319108486175537, "learning_rate": 0.0001, "loss": 1.7677, "step": 115 }, { "epoch": 0.01347661922741795, "grad_norm": 0.34339022636413574, "learning_rate": 0.0001, "loss": 1.6114, "step": 116 }, { "epoch": 0.013592796979378449, "grad_norm": 0.33843010663986206, "learning_rate": 0.0001, "loss": 1.8448, "step": 117 }, { "epoch": 0.013708974731338948, "grad_norm": 0.32837411761283875, "learning_rate": 0.0001, "loss": 1.7597, "step": 118 }, { "epoch": 0.013825152483299448, "grad_norm": 0.3620765507221222, "learning_rate": 0.0001, "loss": 1.8675, "step": 119 }, { "epoch": 0.013941330235259947, "grad_norm": 0.33245301246643066, "learning_rate": 0.0001, "loss": 1.8119, "step": 120 }, { "epoch": 0.014057507987220448, "grad_norm": 0.34569036960601807, "learning_rate": 0.0001, "loss": 1.9758, "step": 121 }, { "epoch": 0.014173685739180947, "grad_norm": 0.374098539352417, "learning_rate": 0.0001, "loss": 1.78, "step": 122 }, { "epoch": 0.014289863491141447, "grad_norm": 0.3418053686618805, "learning_rate": 0.0001, "loss": 1.7408, "step": 123 }, { "epoch": 0.014406041243101946, "grad_norm": 0.3384765088558197, "learning_rate": 0.0001, "loss": 1.7649, "step": 124 }, { "epoch": 0.014522218995062446, "grad_norm": 0.3590647280216217, "learning_rate": 0.0001, "loss": 1.9065, "step": 125 }, { "epoch": 0.014638396747022945, "grad_norm": 0.3395555019378662, "learning_rate": 0.0001, "loss": 1.8293, "step": 126 }, { "epoch": 0.014754574498983444, "grad_norm": 0.3610383868217468, "learning_rate": 0.0001, "loss": 1.8426, "step": 127 }, { "epoch": 0.014870752250943944, "grad_norm": 0.33869925141334534, "learning_rate": 0.0001, "loss": 1.7425, "step": 128 }, { "epoch": 0.014986930002904443, "grad_norm": 0.338553249835968, "learning_rate": 0.0001, "loss": 1.8638, "step": 129 }, { "epoch": 0.015103107754864944, "grad_norm": 0.3297845125198364, "learning_rate": 0.0001, "loss": 1.8374, "step": 130 }, { "epoch": 0.015219285506825444, "grad_norm": 0.3697233498096466, "learning_rate": 0.0001, "loss": 1.9652, "step": 131 }, { "epoch": 0.015335463258785943, "grad_norm": 0.3488331139087677, "learning_rate": 0.0001, "loss": 1.6962, "step": 132 }, { "epoch": 0.015451641010746442, "grad_norm": 0.3396114706993103, "learning_rate": 0.0001, "loss": 1.7741, "step": 133 }, { "epoch": 0.015567818762706942, "grad_norm": 0.34215307235717773, "learning_rate": 0.0001, "loss": 1.9031, "step": 134 }, { "epoch": 0.015683996514667443, "grad_norm": 0.32633256912231445, "learning_rate": 0.0001, "loss": 1.794, "step": 135 }, { "epoch": 0.015800174266627942, "grad_norm": 0.3819684088230133, "learning_rate": 0.0001, "loss": 1.7509, "step": 136 }, { "epoch": 0.01591635201858844, "grad_norm": 0.3342839479446411, "learning_rate": 0.0001, "loss": 1.7577, "step": 137 }, { "epoch": 0.01603252977054894, "grad_norm": 0.32045823335647583, "learning_rate": 0.0001, "loss": 1.9446, "step": 138 }, { "epoch": 0.01614870752250944, "grad_norm": 0.3185892701148987, "learning_rate": 0.0001, "loss": 1.6481, "step": 139 }, { "epoch": 0.01626488527446994, "grad_norm": 0.3285033106803894, "learning_rate": 0.0001, "loss": 1.8363, "step": 140 }, { "epoch": 0.01638106302643044, "grad_norm": 0.35822629928588867, "learning_rate": 0.0001, "loss": 1.7425, "step": 141 }, { "epoch": 0.01649724077839094, "grad_norm": 0.3599529266357422, "learning_rate": 0.0001, "loss": 1.7378, "step": 142 }, { "epoch": 0.016613418530351438, "grad_norm": 0.3105633556842804, "learning_rate": 0.0001, "loss": 1.6305, "step": 143 }, { "epoch": 0.016729596282311937, "grad_norm": 0.3310804069042206, "learning_rate": 0.0001, "loss": 1.7228, "step": 144 }, { "epoch": 0.016845774034272436, "grad_norm": 0.32904812693595886, "learning_rate": 0.0001, "loss": 1.7132, "step": 145 }, { "epoch": 0.016961951786232936, "grad_norm": 0.3462965190410614, "learning_rate": 0.0001, "loss": 1.8588, "step": 146 }, { "epoch": 0.017078129538193435, "grad_norm": 0.3536953926086426, "learning_rate": 0.0001, "loss": 1.7871, "step": 147 }, { "epoch": 0.017194307290153935, "grad_norm": 0.3167782723903656, "learning_rate": 0.0001, "loss": 1.7468, "step": 148 }, { "epoch": 0.017310485042114434, "grad_norm": 0.3438095450401306, "learning_rate": 0.0001, "loss": 1.7273, "step": 149 }, { "epoch": 0.017426662794074933, "grad_norm": 0.32976609468460083, "learning_rate": 0.0001, "loss": 1.8282, "step": 150 }, { "epoch": 0.017542840546035433, "grad_norm": 0.36160793900489807, "learning_rate": 0.0001, "loss": 1.982, "step": 151 }, { "epoch": 0.017659018297995936, "grad_norm": 0.31704050302505493, "learning_rate": 0.0001, "loss": 1.6637, "step": 152 }, { "epoch": 0.017775196049956435, "grad_norm": 0.3186262547969818, "learning_rate": 0.0001, "loss": 1.7416, "step": 153 }, { "epoch": 0.017891373801916934, "grad_norm": 0.3476766049861908, "learning_rate": 0.0001, "loss": 1.822, "step": 154 }, { "epoch": 0.018007551553877434, "grad_norm": 0.35061368346214294, "learning_rate": 0.0001, "loss": 1.7696, "step": 155 }, { "epoch": 0.018123729305837933, "grad_norm": 0.33829307556152344, "learning_rate": 0.0001, "loss": 1.791, "step": 156 }, { "epoch": 0.018239907057798432, "grad_norm": 0.3412603735923767, "learning_rate": 0.0001, "loss": 1.8324, "step": 157 }, { "epoch": 0.01835608480975893, "grad_norm": 0.3786666989326477, "learning_rate": 0.0001, "loss": 1.9237, "step": 158 }, { "epoch": 0.01847226256171943, "grad_norm": 0.31077513098716736, "learning_rate": 0.0001, "loss": 1.6433, "step": 159 }, { "epoch": 0.01858844031367993, "grad_norm": 0.32142356038093567, "learning_rate": 0.0001, "loss": 1.6929, "step": 160 }, { "epoch": 0.01870461806564043, "grad_norm": 0.30329957604408264, "learning_rate": 0.0001, "loss": 1.6117, "step": 161 }, { "epoch": 0.01882079581760093, "grad_norm": 0.3338776230812073, "learning_rate": 0.0001, "loss": 1.7708, "step": 162 }, { "epoch": 0.01893697356956143, "grad_norm": 0.32876482605934143, "learning_rate": 0.0001, "loss": 1.8144, "step": 163 }, { "epoch": 0.019053151321521928, "grad_norm": 0.32878580689430237, "learning_rate": 0.0001, "loss": 1.6474, "step": 164 }, { "epoch": 0.019169329073482427, "grad_norm": 0.33372652530670166, "learning_rate": 0.0001, "loss": 1.7709, "step": 165 }, { "epoch": 0.019285506825442927, "grad_norm": 0.3314981460571289, "learning_rate": 0.0001, "loss": 1.828, "step": 166 }, { "epoch": 0.019401684577403426, "grad_norm": 0.3250749707221985, "learning_rate": 0.0001, "loss": 1.7861, "step": 167 }, { "epoch": 0.019517862329363925, "grad_norm": 0.31878861784935, "learning_rate": 0.0001, "loss": 1.9088, "step": 168 }, { "epoch": 0.019634040081324425, "grad_norm": 0.3233966827392578, "learning_rate": 0.0001, "loss": 1.759, "step": 169 }, { "epoch": 0.019750217833284928, "grad_norm": 0.3260466754436493, "learning_rate": 0.0001, "loss": 1.9208, "step": 170 }, { "epoch": 0.019866395585245427, "grad_norm": 0.33691638708114624, "learning_rate": 0.0001, "loss": 1.8521, "step": 171 }, { "epoch": 0.019982573337205926, "grad_norm": 0.33654195070266724, "learning_rate": 0.0001, "loss": 1.7191, "step": 172 }, { "epoch": 0.020098751089166426, "grad_norm": 0.32309457659721375, "learning_rate": 0.0001, "loss": 1.7446, "step": 173 }, { "epoch": 0.020214928841126925, "grad_norm": 0.33408665657043457, "learning_rate": 0.0001, "loss": 1.8275, "step": 174 }, { "epoch": 0.020331106593087424, "grad_norm": 0.32850080728530884, "learning_rate": 0.0001, "loss": 1.8264, "step": 175 }, { "epoch": 0.020447284345047924, "grad_norm": 0.35644981265068054, "learning_rate": 0.0001, "loss": 1.9859, "step": 176 }, { "epoch": 0.020563462097008423, "grad_norm": 0.36484628915786743, "learning_rate": 0.0001, "loss": 1.8343, "step": 177 }, { "epoch": 0.020679639848968923, "grad_norm": 0.3122706413269043, "learning_rate": 0.0001, "loss": 1.748, "step": 178 }, { "epoch": 0.020795817600929422, "grad_norm": 0.35981225967407227, "learning_rate": 0.0001, "loss": 1.9379, "step": 179 }, { "epoch": 0.02091199535288992, "grad_norm": 0.3504844009876251, "learning_rate": 0.0001, "loss": 1.7112, "step": 180 }, { "epoch": 0.02102817310485042, "grad_norm": 0.3608277142047882, "learning_rate": 0.0001, "loss": 1.705, "step": 181 }, { "epoch": 0.02114435085681092, "grad_norm": 0.3393659293651581, "learning_rate": 0.0001, "loss": 1.87, "step": 182 }, { "epoch": 0.02126052860877142, "grad_norm": 0.34214162826538086, "learning_rate": 0.0001, "loss": 1.8054, "step": 183 }, { "epoch": 0.02137670636073192, "grad_norm": 0.33791059255599976, "learning_rate": 0.0001, "loss": 1.9013, "step": 184 }, { "epoch": 0.021492884112692418, "grad_norm": 0.3184012770652771, "learning_rate": 0.0001, "loss": 1.7028, "step": 185 }, { "epoch": 0.021609061864652918, "grad_norm": 0.3612908124923706, "learning_rate": 0.0001, "loss": 1.8917, "step": 186 }, { "epoch": 0.021725239616613417, "grad_norm": 0.3533594608306885, "learning_rate": 0.0001, "loss": 1.8029, "step": 187 }, { "epoch": 0.02184141736857392, "grad_norm": 0.33328378200531006, "learning_rate": 0.0001, "loss": 1.7835, "step": 188 }, { "epoch": 0.02195759512053442, "grad_norm": 0.3383578360080719, "learning_rate": 0.0001, "loss": 1.7839, "step": 189 }, { "epoch": 0.02207377287249492, "grad_norm": 0.3100789785385132, "learning_rate": 0.0001, "loss": 1.6669, "step": 190 }, { "epoch": 0.022189950624455418, "grad_norm": 0.3665405511856079, "learning_rate": 0.0001, "loss": 1.8639, "step": 191 }, { "epoch": 0.022306128376415917, "grad_norm": 0.36163437366485596, "learning_rate": 0.0001, "loss": 1.8558, "step": 192 }, { "epoch": 0.022422306128376417, "grad_norm": 0.367439866065979, "learning_rate": 0.0001, "loss": 1.9491, "step": 193 }, { "epoch": 0.022538483880336916, "grad_norm": 0.3353922963142395, "learning_rate": 0.0001, "loss": 1.8423, "step": 194 }, { "epoch": 0.022654661632297415, "grad_norm": 0.33214300870895386, "learning_rate": 0.0001, "loss": 1.559, "step": 195 }, { "epoch": 0.022770839384257915, "grad_norm": 0.33071082830429077, "learning_rate": 0.0001, "loss": 1.8457, "step": 196 }, { "epoch": 0.022887017136218414, "grad_norm": 0.3185707926750183, "learning_rate": 0.0001, "loss": 1.7457, "step": 197 }, { "epoch": 0.023003194888178913, "grad_norm": 0.32431602478027344, "learning_rate": 0.0001, "loss": 1.7577, "step": 198 }, { "epoch": 0.023119372640139413, "grad_norm": 0.3300994634628296, "learning_rate": 0.0001, "loss": 1.7727, "step": 199 }, { "epoch": 0.023235550392099912, "grad_norm": 0.3367205858230591, "learning_rate": 0.0001, "loss": 1.7845, "step": 200 }, { "epoch": 0.02335172814406041, "grad_norm": 0.3295809328556061, "learning_rate": 0.0001, "loss": 1.703, "step": 201 }, { "epoch": 0.02346790589602091, "grad_norm": 0.3326011598110199, "learning_rate": 0.0001, "loss": 1.7791, "step": 202 }, { "epoch": 0.02358408364798141, "grad_norm": 0.3159317374229431, "learning_rate": 0.0001, "loss": 1.7935, "step": 203 }, { "epoch": 0.02370026139994191, "grad_norm": 0.35444700717926025, "learning_rate": 0.0001, "loss": 1.8029, "step": 204 }, { "epoch": 0.02381643915190241, "grad_norm": 0.35230258107185364, "learning_rate": 0.0001, "loss": 1.7454, "step": 205 }, { "epoch": 0.023932616903862912, "grad_norm": 0.3529949486255646, "learning_rate": 0.0001, "loss": 1.8424, "step": 206 }, { "epoch": 0.02404879465582341, "grad_norm": 0.3865301012992859, "learning_rate": 0.0001, "loss": 1.8076, "step": 207 }, { "epoch": 0.02416497240778391, "grad_norm": 0.32935428619384766, "learning_rate": 0.0001, "loss": 1.7917, "step": 208 }, { "epoch": 0.02428115015974441, "grad_norm": 0.339444100856781, "learning_rate": 0.0001, "loss": 1.8285, "step": 209 }, { "epoch": 0.02439732791170491, "grad_norm": 0.3153437077999115, "learning_rate": 0.0001, "loss": 1.5033, "step": 210 }, { "epoch": 0.02451350566366541, "grad_norm": 0.3516276478767395, "learning_rate": 0.0001, "loss": 1.9109, "step": 211 }, { "epoch": 0.024629683415625908, "grad_norm": 0.3484845757484436, "learning_rate": 0.0001, "loss": 1.6998, "step": 212 }, { "epoch": 0.024745861167586407, "grad_norm": 0.3327982723712921, "learning_rate": 0.0001, "loss": 1.792, "step": 213 }, { "epoch": 0.024862038919546907, "grad_norm": 0.37349483370780945, "learning_rate": 0.0001, "loss": 1.8074, "step": 214 }, { "epoch": 0.024978216671507406, "grad_norm": 0.3740026354789734, "learning_rate": 0.0001, "loss": 1.7827, "step": 215 }, { "epoch": 0.025094394423467906, "grad_norm": 0.34118950366973877, "learning_rate": 0.0001, "loss": 1.7631, "step": 216 }, { "epoch": 0.025210572175428405, "grad_norm": 0.3344680368900299, "learning_rate": 0.0001, "loss": 1.7317, "step": 217 }, { "epoch": 0.025326749927388904, "grad_norm": 0.3705542981624603, "learning_rate": 0.0001, "loss": 1.8239, "step": 218 }, { "epoch": 0.025442927679349404, "grad_norm": 0.3481920063495636, "learning_rate": 0.0001, "loss": 1.8647, "step": 219 }, { "epoch": 0.025559105431309903, "grad_norm": 0.3217613399028778, "learning_rate": 0.0001, "loss": 1.7362, "step": 220 }, { "epoch": 0.025675283183270402, "grad_norm": 0.3721480667591095, "learning_rate": 0.0001, "loss": 1.8672, "step": 221 }, { "epoch": 0.025791460935230902, "grad_norm": 0.32509273290634155, "learning_rate": 0.0001, "loss": 1.7041, "step": 222 }, { "epoch": 0.0259076386871914, "grad_norm": 0.33785101771354675, "learning_rate": 0.0001, "loss": 1.7899, "step": 223 }, { "epoch": 0.026023816439151904, "grad_norm": 0.3636549115180969, "learning_rate": 0.0001, "loss": 1.8676, "step": 224 }, { "epoch": 0.026139994191112403, "grad_norm": 0.31692421436309814, "learning_rate": 0.0001, "loss": 1.6563, "step": 225 }, { "epoch": 0.026256171943072903, "grad_norm": 0.3383740186691284, "learning_rate": 0.0001, "loss": 1.7732, "step": 226 }, { "epoch": 0.026372349695033402, "grad_norm": 0.355059951543808, "learning_rate": 0.0001, "loss": 1.677, "step": 227 }, { "epoch": 0.0264885274469939, "grad_norm": 0.3562302589416504, "learning_rate": 0.0001, "loss": 1.9306, "step": 228 }, { "epoch": 0.0266047051989544, "grad_norm": 0.3225594162940979, "learning_rate": 0.0001, "loss": 1.7609, "step": 229 }, { "epoch": 0.0267208829509149, "grad_norm": 0.33251953125, "learning_rate": 0.0001, "loss": 1.8027, "step": 230 }, { "epoch": 0.0268370607028754, "grad_norm": 0.3734368085861206, "learning_rate": 0.0001, "loss": 1.7417, "step": 231 }, { "epoch": 0.0269532384548359, "grad_norm": 0.33438679575920105, "learning_rate": 0.0001, "loss": 1.7758, "step": 232 }, { "epoch": 0.0270694162067964, "grad_norm": 0.37446722388267517, "learning_rate": 0.0001, "loss": 1.929, "step": 233 }, { "epoch": 0.027185593958756898, "grad_norm": 0.3230035603046417, "learning_rate": 0.0001, "loss": 1.6549, "step": 234 }, { "epoch": 0.027301771710717397, "grad_norm": 0.36308127641677856, "learning_rate": 0.0001, "loss": 1.8119, "step": 235 }, { "epoch": 0.027417949462677896, "grad_norm": 0.31774529814720154, "learning_rate": 0.0001, "loss": 1.7894, "step": 236 }, { "epoch": 0.027534127214638396, "grad_norm": 0.3248523473739624, "learning_rate": 0.0001, "loss": 1.8632, "step": 237 }, { "epoch": 0.027650304966598895, "grad_norm": 0.3736507296562195, "learning_rate": 0.0001, "loss": 1.9092, "step": 238 }, { "epoch": 0.027766482718559395, "grad_norm": 0.3262912631034851, "learning_rate": 0.0001, "loss": 1.697, "step": 239 }, { "epoch": 0.027882660470519894, "grad_norm": 0.3249903619289398, "learning_rate": 0.0001, "loss": 1.7496, "step": 240 }, { "epoch": 0.027998838222480393, "grad_norm": 0.3449850380420685, "learning_rate": 0.0001, "loss": 1.825, "step": 241 }, { "epoch": 0.028115015974440896, "grad_norm": 0.41747432947158813, "learning_rate": 0.0001, "loss": 2.1273, "step": 242 }, { "epoch": 0.028231193726401396, "grad_norm": 0.35569465160369873, "learning_rate": 0.0001, "loss": 1.8643, "step": 243 }, { "epoch": 0.028347371478361895, "grad_norm": 0.3352862596511841, "learning_rate": 0.0001, "loss": 1.8427, "step": 244 }, { "epoch": 0.028463549230322394, "grad_norm": 0.38555145263671875, "learning_rate": 0.0001, "loss": 1.9834, "step": 245 }, { "epoch": 0.028579726982282894, "grad_norm": 0.3578146696090698, "learning_rate": 0.0001, "loss": 1.7827, "step": 246 }, { "epoch": 0.028695904734243393, "grad_norm": 0.3353215754032135, "learning_rate": 0.0001, "loss": 1.6884, "step": 247 }, { "epoch": 0.028812082486203892, "grad_norm": 0.3413390815258026, "learning_rate": 0.0001, "loss": 1.7664, "step": 248 }, { "epoch": 0.028928260238164392, "grad_norm": 0.3437090218067169, "learning_rate": 0.0001, "loss": 1.8693, "step": 249 }, { "epoch": 0.02904443799012489, "grad_norm": 0.3443460762500763, "learning_rate": 0.0001, "loss": 1.8738, "step": 250 }, { "epoch": 0.02916061574208539, "grad_norm": 0.3518659770488739, "learning_rate": 0.0001, "loss": 1.8869, "step": 251 }, { "epoch": 0.02927679349404589, "grad_norm": 0.3433094322681427, "learning_rate": 0.0001, "loss": 1.8771, "step": 252 }, { "epoch": 0.02939297124600639, "grad_norm": 0.3407760262489319, "learning_rate": 0.0001, "loss": 1.914, "step": 253 }, { "epoch": 0.02950914899796689, "grad_norm": 0.3268572688102722, "learning_rate": 0.0001, "loss": 1.8681, "step": 254 }, { "epoch": 0.029625326749927388, "grad_norm": 0.3242436349391937, "learning_rate": 0.0001, "loss": 1.8188, "step": 255 }, { "epoch": 0.029741504501887887, "grad_norm": 0.33246999979019165, "learning_rate": 0.0001, "loss": 1.7859, "step": 256 }, { "epoch": 0.029857682253848387, "grad_norm": 0.3286731541156769, "learning_rate": 0.0001, "loss": 1.782, "step": 257 }, { "epoch": 0.029973860005808886, "grad_norm": 0.3115208148956299, "learning_rate": 0.0001, "loss": 1.6835, "step": 258 }, { "epoch": 0.030090037757769385, "grad_norm": 0.3387967646121979, "learning_rate": 0.0001, "loss": 1.8529, "step": 259 }, { "epoch": 0.03020621550972989, "grad_norm": 0.35752734541893005, "learning_rate": 0.0001, "loss": 1.8307, "step": 260 }, { "epoch": 0.030322393261690388, "grad_norm": 0.3699615001678467, "learning_rate": 0.0001, "loss": 1.7063, "step": 261 }, { "epoch": 0.030438571013650887, "grad_norm": 0.328961044549942, "learning_rate": 0.0001, "loss": 1.625, "step": 262 }, { "epoch": 0.030554748765611386, "grad_norm": 0.36490383744239807, "learning_rate": 0.0001, "loss": 1.8146, "step": 263 }, { "epoch": 0.030670926517571886, "grad_norm": 0.32661300897598267, "learning_rate": 0.0001, "loss": 1.6685, "step": 264 }, { "epoch": 0.030787104269532385, "grad_norm": 0.34228524565696716, "learning_rate": 0.0001, "loss": 1.8087, "step": 265 }, { "epoch": 0.030903282021492885, "grad_norm": 0.35033437609672546, "learning_rate": 0.0001, "loss": 1.8297, "step": 266 }, { "epoch": 0.031019459773453384, "grad_norm": 0.3491237759590149, "learning_rate": 0.0001, "loss": 1.945, "step": 267 }, { "epoch": 0.031135637525413883, "grad_norm": 0.34657391905784607, "learning_rate": 0.0001, "loss": 1.823, "step": 268 }, { "epoch": 0.03125181527737438, "grad_norm": 0.36879056692123413, "learning_rate": 0.0001, "loss": 1.859, "step": 269 }, { "epoch": 0.031367993029334885, "grad_norm": 0.36880430579185486, "learning_rate": 0.0001, "loss": 1.7599, "step": 270 }, { "epoch": 0.03148417078129538, "grad_norm": 0.3530198931694031, "learning_rate": 0.0001, "loss": 1.9935, "step": 271 }, { "epoch": 0.031600348533255884, "grad_norm": 0.35175904631614685, "learning_rate": 0.0001, "loss": 1.8837, "step": 272 }, { "epoch": 0.03171652628521638, "grad_norm": 0.3176674246788025, "learning_rate": 0.0001, "loss": 1.6186, "step": 273 }, { "epoch": 0.03183270403717688, "grad_norm": 0.33333176374435425, "learning_rate": 0.0001, "loss": 1.6463, "step": 274 }, { "epoch": 0.03194888178913738, "grad_norm": 0.31809431314468384, "learning_rate": 0.0001, "loss": 1.5768, "step": 275 }, { "epoch": 0.03206505954109788, "grad_norm": 0.3419245183467865, "learning_rate": 0.0001, "loss": 1.8647, "step": 276 }, { "epoch": 0.03218123729305838, "grad_norm": 0.3137516379356384, "learning_rate": 0.0001, "loss": 1.6757, "step": 277 }, { "epoch": 0.03229741504501888, "grad_norm": 0.3313903510570526, "learning_rate": 0.0001, "loss": 1.7395, "step": 278 }, { "epoch": 0.032413592796979376, "grad_norm": 0.3197391629219055, "learning_rate": 0.0001, "loss": 1.6098, "step": 279 }, { "epoch": 0.03252977054893988, "grad_norm": 0.3287334442138672, "learning_rate": 0.0001, "loss": 1.6124, "step": 280 }, { "epoch": 0.032645948300900375, "grad_norm": 0.3322586715221405, "learning_rate": 0.0001, "loss": 1.8868, "step": 281 }, { "epoch": 0.03276212605286088, "grad_norm": 0.35943326354026794, "learning_rate": 0.0001, "loss": 1.8031, "step": 282 }, { "epoch": 0.032878303804821374, "grad_norm": 0.3296755254268646, "learning_rate": 0.0001, "loss": 1.7838, "step": 283 }, { "epoch": 0.03299448155678188, "grad_norm": 0.3416058123111725, "learning_rate": 0.0001, "loss": 1.7132, "step": 284 }, { "epoch": 0.03311065930874237, "grad_norm": 0.3588326573371887, "learning_rate": 0.0001, "loss": 1.8722, "step": 285 }, { "epoch": 0.033226837060702875, "grad_norm": 0.3292895555496216, "learning_rate": 0.0001, "loss": 1.7415, "step": 286 }, { "epoch": 0.03334301481266338, "grad_norm": 0.3382332921028137, "learning_rate": 0.0001, "loss": 1.8179, "step": 287 }, { "epoch": 0.033459192564623874, "grad_norm": 0.33576157689094543, "learning_rate": 0.0001, "loss": 1.8442, "step": 288 }, { "epoch": 0.03357537031658438, "grad_norm": 0.3294540047645569, "learning_rate": 0.0001, "loss": 1.622, "step": 289 }, { "epoch": 0.03369154806854487, "grad_norm": 0.3160039782524109, "learning_rate": 0.0001, "loss": 1.6648, "step": 290 }, { "epoch": 0.033807725820505376, "grad_norm": 0.3588349223136902, "learning_rate": 0.0001, "loss": 1.8304, "step": 291 }, { "epoch": 0.03392390357246587, "grad_norm": 0.30049145221710205, "learning_rate": 0.0001, "loss": 1.6468, "step": 292 }, { "epoch": 0.034040081324426374, "grad_norm": 0.3043253719806671, "learning_rate": 0.0001, "loss": 1.6615, "step": 293 }, { "epoch": 0.03415625907638687, "grad_norm": 0.377269983291626, "learning_rate": 0.0001, "loss": 1.8306, "step": 294 }, { "epoch": 0.03427243682834737, "grad_norm": 0.378327876329422, "learning_rate": 0.0001, "loss": 1.9481, "step": 295 }, { "epoch": 0.03438861458030787, "grad_norm": 0.3419339060783386, "learning_rate": 0.0001, "loss": 1.8375, "step": 296 }, { "epoch": 0.03450479233226837, "grad_norm": 0.3224831521511078, "learning_rate": 0.0001, "loss": 1.5147, "step": 297 }, { "epoch": 0.03462097008422887, "grad_norm": 0.35075846314430237, "learning_rate": 0.0001, "loss": 1.7834, "step": 298 }, { "epoch": 0.03473714783618937, "grad_norm": 0.345042884349823, "learning_rate": 0.0001, "loss": 1.8939, "step": 299 }, { "epoch": 0.03485332558814987, "grad_norm": 0.3130955100059509, "learning_rate": 0.0001, "loss": 1.6054, "step": 300 }, { "epoch": 0.03496950334011037, "grad_norm": 0.35810473561286926, "learning_rate": 0.0001, "loss": 1.7418, "step": 301 }, { "epoch": 0.035085681092070865, "grad_norm": 0.33703386783599854, "learning_rate": 0.0001, "loss": 1.7681, "step": 302 }, { "epoch": 0.03520185884403137, "grad_norm": 0.3259965777397156, "learning_rate": 0.0001, "loss": 1.7835, "step": 303 }, { "epoch": 0.03531803659599187, "grad_norm": 0.3411068618297577, "learning_rate": 0.0001, "loss": 1.7623, "step": 304 }, { "epoch": 0.03543421434795237, "grad_norm": 0.35380053520202637, "learning_rate": 0.0001, "loss": 1.6731, "step": 305 }, { "epoch": 0.03555039209991287, "grad_norm": 0.32706257700920105, "learning_rate": 0.0001, "loss": 1.7234, "step": 306 }, { "epoch": 0.035666569851873366, "grad_norm": 0.3144983649253845, "learning_rate": 0.0001, "loss": 1.6658, "step": 307 }, { "epoch": 0.03578274760383387, "grad_norm": 0.3492032289505005, "learning_rate": 0.0001, "loss": 1.7184, "step": 308 }, { "epoch": 0.035898925355794364, "grad_norm": 0.34730949997901917, "learning_rate": 0.0001, "loss": 1.7799, "step": 309 }, { "epoch": 0.03601510310775487, "grad_norm": 0.31954050064086914, "learning_rate": 0.0001, "loss": 1.6716, "step": 310 }, { "epoch": 0.03613128085971536, "grad_norm": 0.3348424732685089, "learning_rate": 0.0001, "loss": 1.7981, "step": 311 }, { "epoch": 0.036247458611675866, "grad_norm": 0.3164363503456116, "learning_rate": 0.0001, "loss": 1.5808, "step": 312 }, { "epoch": 0.03636363636363636, "grad_norm": 0.35313692688941956, "learning_rate": 0.0001, "loss": 1.9214, "step": 313 }, { "epoch": 0.036479814115596865, "grad_norm": 0.3127838671207428, "learning_rate": 0.0001, "loss": 1.6572, "step": 314 }, { "epoch": 0.03659599186755736, "grad_norm": 0.33738380670547485, "learning_rate": 0.0001, "loss": 1.8281, "step": 315 }, { "epoch": 0.03671216961951786, "grad_norm": 0.34780019521713257, "learning_rate": 0.0001, "loss": 1.863, "step": 316 }, { "epoch": 0.03682834737147836, "grad_norm": 0.3482603430747986, "learning_rate": 0.0001, "loss": 1.9564, "step": 317 }, { "epoch": 0.03694452512343886, "grad_norm": 0.3411928415298462, "learning_rate": 0.0001, "loss": 1.8199, "step": 318 }, { "epoch": 0.03706070287539936, "grad_norm": 0.33409395813941956, "learning_rate": 0.0001, "loss": 1.8027, "step": 319 }, { "epoch": 0.03717688062735986, "grad_norm": 0.3127480447292328, "learning_rate": 0.0001, "loss": 1.6778, "step": 320 }, { "epoch": 0.03729305837932036, "grad_norm": 0.3322620689868927, "learning_rate": 0.0001, "loss": 1.674, "step": 321 }, { "epoch": 0.03740923613128086, "grad_norm": 0.3270343542098999, "learning_rate": 0.0001, "loss": 1.6199, "step": 322 }, { "epoch": 0.03752541388324136, "grad_norm": 0.3130471110343933, "learning_rate": 0.0001, "loss": 1.6465, "step": 323 }, { "epoch": 0.03764159163520186, "grad_norm": 0.33622002601623535, "learning_rate": 0.0001, "loss": 1.7173, "step": 324 }, { "epoch": 0.03775776938716236, "grad_norm": 0.3530596196651459, "learning_rate": 0.0001, "loss": 1.9313, "step": 325 }, { "epoch": 0.03787394713912286, "grad_norm": 0.38460564613342285, "learning_rate": 0.0001, "loss": 2.0007, "step": 326 }, { "epoch": 0.03799012489108336, "grad_norm": 0.3392890393733978, "learning_rate": 0.0001, "loss": 1.8334, "step": 327 }, { "epoch": 0.038106302643043856, "grad_norm": 0.33205297589302063, "learning_rate": 0.0001, "loss": 1.8434, "step": 328 }, { "epoch": 0.03822248039500436, "grad_norm": 0.34488892555236816, "learning_rate": 0.0001, "loss": 1.7568, "step": 329 }, { "epoch": 0.038338658146964855, "grad_norm": 0.357216477394104, "learning_rate": 0.0001, "loss": 1.776, "step": 330 }, { "epoch": 0.03845483589892536, "grad_norm": 0.31771931052207947, "learning_rate": 0.0001, "loss": 1.6836, "step": 331 }, { "epoch": 0.03857101365088585, "grad_norm": 0.36487412452697754, "learning_rate": 0.0001, "loss": 1.8129, "step": 332 }, { "epoch": 0.038687191402846356, "grad_norm": 0.3407606780529022, "learning_rate": 0.0001, "loss": 1.7241, "step": 333 }, { "epoch": 0.03880336915480685, "grad_norm": 0.31532132625579834, "learning_rate": 0.0001, "loss": 1.6642, "step": 334 }, { "epoch": 0.038919546906767355, "grad_norm": 0.3287792503833771, "learning_rate": 0.0001, "loss": 1.7889, "step": 335 }, { "epoch": 0.03903572465872785, "grad_norm": 0.33964601159095764, "learning_rate": 0.0001, "loss": 1.8163, "step": 336 }, { "epoch": 0.039151902410688354, "grad_norm": 0.34165436029434204, "learning_rate": 0.0001, "loss": 1.8581, "step": 337 }, { "epoch": 0.03926808016264885, "grad_norm": 0.348545640707016, "learning_rate": 0.0001, "loss": 1.7487, "step": 338 }, { "epoch": 0.03938425791460935, "grad_norm": 0.322898268699646, "learning_rate": 0.0001, "loss": 1.641, "step": 339 }, { "epoch": 0.039500435666569855, "grad_norm": 0.3299243748188019, "learning_rate": 0.0001, "loss": 1.7179, "step": 340 }, { "epoch": 0.03961661341853035, "grad_norm": 0.34560680389404297, "learning_rate": 0.0001, "loss": 1.8015, "step": 341 }, { "epoch": 0.039732791170490854, "grad_norm": 0.34533241391181946, "learning_rate": 0.0001, "loss": 1.8813, "step": 342 }, { "epoch": 0.03984896892245135, "grad_norm": 0.3315548598766327, "learning_rate": 0.0001, "loss": 1.849, "step": 343 }, { "epoch": 0.03996514667441185, "grad_norm": 0.31113383173942566, "learning_rate": 0.0001, "loss": 1.7933, "step": 344 }, { "epoch": 0.04008132442637235, "grad_norm": 0.34004050493240356, "learning_rate": 0.0001, "loss": 1.7723, "step": 345 }, { "epoch": 0.04019750217833285, "grad_norm": 0.3573724925518036, "learning_rate": 0.0001, "loss": 1.8572, "step": 346 }, { "epoch": 0.04031367993029335, "grad_norm": 0.3385657072067261, "learning_rate": 0.0001, "loss": 1.7458, "step": 347 }, { "epoch": 0.04042985768225385, "grad_norm": 0.34114423394203186, "learning_rate": 0.0001, "loss": 1.7496, "step": 348 }, { "epoch": 0.040546035434214346, "grad_norm": 0.3590448796749115, "learning_rate": 0.0001, "loss": 1.8671, "step": 349 }, { "epoch": 0.04066221318617485, "grad_norm": 0.34044602513313293, "learning_rate": 0.0001, "loss": 1.751, "step": 350 }, { "epoch": 0.040778390938135345, "grad_norm": 0.34901726245880127, "learning_rate": 0.0001, "loss": 1.7836, "step": 351 }, { "epoch": 0.04089456869009585, "grad_norm": 0.33901697397232056, "learning_rate": 0.0001, "loss": 1.9265, "step": 352 }, { "epoch": 0.041010746442056344, "grad_norm": 0.33621305227279663, "learning_rate": 0.0001, "loss": 1.7498, "step": 353 }, { "epoch": 0.041126924194016846, "grad_norm": 0.3610171973705292, "learning_rate": 0.0001, "loss": 1.9162, "step": 354 }, { "epoch": 0.04124310194597734, "grad_norm": 0.33839908242225647, "learning_rate": 0.0001, "loss": 1.8152, "step": 355 }, { "epoch": 0.041359279697937845, "grad_norm": 0.32283854484558105, "learning_rate": 0.0001, "loss": 1.5406, "step": 356 }, { "epoch": 0.04147545744989834, "grad_norm": 0.3764745891094208, "learning_rate": 0.0001, "loss": 1.9003, "step": 357 }, { "epoch": 0.041591635201858844, "grad_norm": 0.34221282601356506, "learning_rate": 0.0001, "loss": 1.8303, "step": 358 }, { "epoch": 0.04170781295381935, "grad_norm": 0.36195555329322815, "learning_rate": 0.0001, "loss": 1.6955, "step": 359 }, { "epoch": 0.04182399070577984, "grad_norm": 0.33597198128700256, "learning_rate": 0.0001, "loss": 1.7741, "step": 360 }, { "epoch": 0.041940168457740346, "grad_norm": 0.3600618243217468, "learning_rate": 0.0001, "loss": 1.8281, "step": 361 }, { "epoch": 0.04205634620970084, "grad_norm": 0.3321481943130493, "learning_rate": 0.0001, "loss": 1.4949, "step": 362 }, { "epoch": 0.042172523961661344, "grad_norm": 0.34914496541023254, "learning_rate": 0.0001, "loss": 1.6745, "step": 363 }, { "epoch": 0.04228870171362184, "grad_norm": 0.34095922112464905, "learning_rate": 0.0001, "loss": 1.6643, "step": 364 }, { "epoch": 0.04240487946558234, "grad_norm": 0.35311540961265564, "learning_rate": 0.0001, "loss": 1.6882, "step": 365 }, { "epoch": 0.04252105721754284, "grad_norm": 0.383943647146225, "learning_rate": 0.0001, "loss": 1.8434, "step": 366 }, { "epoch": 0.04263723496950334, "grad_norm": 0.33830368518829346, "learning_rate": 0.0001, "loss": 1.8743, "step": 367 }, { "epoch": 0.04275341272146384, "grad_norm": 0.3408838212490082, "learning_rate": 0.0001, "loss": 1.762, "step": 368 }, { "epoch": 0.04286959047342434, "grad_norm": 0.37213000655174255, "learning_rate": 0.0001, "loss": 1.9295, "step": 369 }, { "epoch": 0.042985768225384836, "grad_norm": 0.33618566393852234, "learning_rate": 0.0001, "loss": 1.9901, "step": 370 }, { "epoch": 0.04310194597734534, "grad_norm": 0.3326283097267151, "learning_rate": 0.0001, "loss": 1.7401, "step": 371 }, { "epoch": 0.043218123729305835, "grad_norm": 0.3324650526046753, "learning_rate": 0.0001, "loss": 1.7636, "step": 372 }, { "epoch": 0.04333430148126634, "grad_norm": 0.36444899439811707, "learning_rate": 0.0001, "loss": 1.7127, "step": 373 }, { "epoch": 0.043450479233226834, "grad_norm": 0.32787513732910156, "learning_rate": 0.0001, "loss": 1.7763, "step": 374 }, { "epoch": 0.04356665698518734, "grad_norm": 0.3590793311595917, "learning_rate": 0.0001, "loss": 1.791, "step": 375 }, { "epoch": 0.04368283473714784, "grad_norm": 0.3318205773830414, "learning_rate": 0.0001, "loss": 1.8108, "step": 376 }, { "epoch": 0.043799012489108335, "grad_norm": 0.34203052520751953, "learning_rate": 0.0001, "loss": 1.6802, "step": 377 }, { "epoch": 0.04391519024106884, "grad_norm": 0.3480004072189331, "learning_rate": 0.0001, "loss": 1.8196, "step": 378 }, { "epoch": 0.044031367993029334, "grad_norm": 0.3592599928379059, "learning_rate": 0.0001, "loss": 1.768, "step": 379 }, { "epoch": 0.04414754574498984, "grad_norm": 0.3415685296058655, "learning_rate": 0.0001, "loss": 1.8739, "step": 380 }, { "epoch": 0.04426372349695033, "grad_norm": 0.33502671122550964, "learning_rate": 0.0001, "loss": 1.7937, "step": 381 }, { "epoch": 0.044379901248910836, "grad_norm": 0.3669358193874359, "learning_rate": 0.0001, "loss": 1.6517, "step": 382 }, { "epoch": 0.04449607900087133, "grad_norm": 0.35224342346191406, "learning_rate": 0.0001, "loss": 1.787, "step": 383 }, { "epoch": 0.044612256752831835, "grad_norm": 0.3165675699710846, "learning_rate": 0.0001, "loss": 1.7865, "step": 384 }, { "epoch": 0.04472843450479233, "grad_norm": 0.34537431597709656, "learning_rate": 0.0001, "loss": 1.7218, "step": 385 }, { "epoch": 0.04484461225675283, "grad_norm": 0.3536418080329895, "learning_rate": 0.0001, "loss": 1.8404, "step": 386 }, { "epoch": 0.04496079000871333, "grad_norm": 0.33434730768203735, "learning_rate": 0.0001, "loss": 1.8322, "step": 387 }, { "epoch": 0.04507696776067383, "grad_norm": 0.32901090383529663, "learning_rate": 0.0001, "loss": 1.7684, "step": 388 }, { "epoch": 0.04519314551263433, "grad_norm": 0.3278455436229706, "learning_rate": 0.0001, "loss": 1.6996, "step": 389 }, { "epoch": 0.04530932326459483, "grad_norm": 0.34721484780311584, "learning_rate": 0.0001, "loss": 1.8898, "step": 390 }, { "epoch": 0.04542550101655533, "grad_norm": 0.3322944939136505, "learning_rate": 0.0001, "loss": 1.6575, "step": 391 }, { "epoch": 0.04554167876851583, "grad_norm": 0.34092095494270325, "learning_rate": 0.0001, "loss": 1.8761, "step": 392 }, { "epoch": 0.045657856520476325, "grad_norm": 0.3071598708629608, "learning_rate": 0.0001, "loss": 1.7564, "step": 393 }, { "epoch": 0.04577403427243683, "grad_norm": 0.3361007273197174, "learning_rate": 0.0001, "loss": 1.7425, "step": 394 }, { "epoch": 0.04589021202439733, "grad_norm": 0.38204225897789, "learning_rate": 0.0001, "loss": 1.9099, "step": 395 }, { "epoch": 0.04600638977635783, "grad_norm": 0.32257604598999023, "learning_rate": 0.0001, "loss": 1.6667, "step": 396 }, { "epoch": 0.04612256752831833, "grad_norm": 0.3314444124698639, "learning_rate": 0.0001, "loss": 1.7518, "step": 397 }, { "epoch": 0.046238745280278826, "grad_norm": 0.34747904539108276, "learning_rate": 0.0001, "loss": 1.7717, "step": 398 }, { "epoch": 0.04635492303223933, "grad_norm": 0.3240096867084503, "learning_rate": 0.0001, "loss": 1.7374, "step": 399 }, { "epoch": 0.046471100784199824, "grad_norm": 0.3071964979171753, "learning_rate": 0.0001, "loss": 1.6034, "step": 400 }, { "epoch": 0.04658727853616033, "grad_norm": 0.3766336739063263, "learning_rate": 0.0001, "loss": 1.9295, "step": 401 }, { "epoch": 0.04670345628812082, "grad_norm": 0.35144826769828796, "learning_rate": 0.0001, "loss": 1.7427, "step": 402 }, { "epoch": 0.046819634040081326, "grad_norm": 0.31121134757995605, "learning_rate": 0.0001, "loss": 1.6415, "step": 403 }, { "epoch": 0.04693581179204182, "grad_norm": 0.3237099051475525, "learning_rate": 0.0001, "loss": 1.7629, "step": 404 }, { "epoch": 0.047051989544002325, "grad_norm": 0.3905102610588074, "learning_rate": 0.0001, "loss": 1.9238, "step": 405 }, { "epoch": 0.04716816729596282, "grad_norm": 0.31731948256492615, "learning_rate": 0.0001, "loss": 1.7172, "step": 406 }, { "epoch": 0.047284345047923323, "grad_norm": 0.33582621812820435, "learning_rate": 0.0001, "loss": 1.7687, "step": 407 }, { "epoch": 0.04740052279988382, "grad_norm": 0.3342641592025757, "learning_rate": 0.0001, "loss": 1.7827, "step": 408 }, { "epoch": 0.04751670055184432, "grad_norm": 0.3307628035545349, "learning_rate": 0.0001, "loss": 1.8255, "step": 409 }, { "epoch": 0.04763287830380482, "grad_norm": 0.3463532030582428, "learning_rate": 0.0001, "loss": 1.7545, "step": 410 }, { "epoch": 0.04774905605576532, "grad_norm": 0.35025402903556824, "learning_rate": 0.0001, "loss": 1.7729, "step": 411 }, { "epoch": 0.047865233807725824, "grad_norm": 0.380147784948349, "learning_rate": 0.0001, "loss": 1.6817, "step": 412 }, { "epoch": 0.04798141155968632, "grad_norm": 0.33736342191696167, "learning_rate": 0.0001, "loss": 1.6815, "step": 413 }, { "epoch": 0.04809758931164682, "grad_norm": 0.33364975452423096, "learning_rate": 0.0001, "loss": 1.726, "step": 414 }, { "epoch": 0.04821376706360732, "grad_norm": 0.3768046498298645, "learning_rate": 0.0001, "loss": 1.8859, "step": 415 }, { "epoch": 0.04832994481556782, "grad_norm": 0.3302350342273712, "learning_rate": 0.0001, "loss": 1.669, "step": 416 }, { "epoch": 0.04844612256752832, "grad_norm": 0.3570808172225952, "learning_rate": 0.0001, "loss": 1.8595, "step": 417 }, { "epoch": 0.04856230031948882, "grad_norm": 0.3274199962615967, "learning_rate": 0.0001, "loss": 1.8074, "step": 418 }, { "epoch": 0.048678478071449316, "grad_norm": 0.36040613055229187, "learning_rate": 0.0001, "loss": 1.8473, "step": 419 }, { "epoch": 0.04879465582340982, "grad_norm": 0.3288051187992096, "learning_rate": 0.0001, "loss": 1.7849, "step": 420 }, { "epoch": 0.048910833575370315, "grad_norm": 0.32802271842956543, "learning_rate": 0.0001, "loss": 1.8159, "step": 421 }, { "epoch": 0.04902701132733082, "grad_norm": 0.3353809118270874, "learning_rate": 0.0001, "loss": 1.7419, "step": 422 }, { "epoch": 0.04914318907929131, "grad_norm": 0.325050413608551, "learning_rate": 0.0001, "loss": 1.7534, "step": 423 }, { "epoch": 0.049259366831251816, "grad_norm": 0.34243983030319214, "learning_rate": 0.0001, "loss": 1.7955, "step": 424 }, { "epoch": 0.04937554458321231, "grad_norm": 0.36178770661354065, "learning_rate": 0.0001, "loss": 1.8266, "step": 425 }, { "epoch": 0.049491722335172815, "grad_norm": 0.35593631863594055, "learning_rate": 0.0001, "loss": 1.8078, "step": 426 }, { "epoch": 0.04960790008713331, "grad_norm": 0.3078707158565521, "learning_rate": 0.0001, "loss": 1.6211, "step": 427 }, { "epoch": 0.049724077839093814, "grad_norm": 0.3557792007923126, "learning_rate": 0.0001, "loss": 1.6497, "step": 428 }, { "epoch": 0.04984025559105431, "grad_norm": 0.3614198863506317, "learning_rate": 0.0001, "loss": 1.8574, "step": 429 }, { "epoch": 0.04995643334301481, "grad_norm": 0.3604932427406311, "learning_rate": 0.0001, "loss": 1.8913, "step": 430 }, { "epoch": 0.050072611094975315, "grad_norm": 0.34103018045425415, "learning_rate": 0.0001, "loss": 1.7272, "step": 431 }, { "epoch": 0.05018878884693581, "grad_norm": 0.34125006198883057, "learning_rate": 0.0001, "loss": 1.8007, "step": 432 }, { "epoch": 0.050304966598896314, "grad_norm": 0.3314953148365021, "learning_rate": 0.0001, "loss": 1.6559, "step": 433 }, { "epoch": 0.05042114435085681, "grad_norm": 0.3640129566192627, "learning_rate": 0.0001, "loss": 1.816, "step": 434 }, { "epoch": 0.05053732210281731, "grad_norm": 0.3177947402000427, "learning_rate": 0.0001, "loss": 1.6548, "step": 435 }, { "epoch": 0.05065349985477781, "grad_norm": 0.3107048273086548, "learning_rate": 0.0001, "loss": 1.6143, "step": 436 }, { "epoch": 0.05076967760673831, "grad_norm": 0.3266063332557678, "learning_rate": 0.0001, "loss": 1.7826, "step": 437 }, { "epoch": 0.05088585535869881, "grad_norm": 0.32090339064598083, "learning_rate": 0.0001, "loss": 1.5662, "step": 438 }, { "epoch": 0.05100203311065931, "grad_norm": 0.3660094141960144, "learning_rate": 0.0001, "loss": 1.9414, "step": 439 }, { "epoch": 0.051118210862619806, "grad_norm": 0.3175399899482727, "learning_rate": 0.0001, "loss": 1.6903, "step": 440 }, { "epoch": 0.05123438861458031, "grad_norm": 0.3207061290740967, "learning_rate": 0.0001, "loss": 1.7275, "step": 441 }, { "epoch": 0.051350566366540805, "grad_norm": 0.34974828362464905, "learning_rate": 0.0001, "loss": 1.8292, "step": 442 }, { "epoch": 0.05146674411850131, "grad_norm": 0.40996474027633667, "learning_rate": 0.0001, "loss": 1.8227, "step": 443 }, { "epoch": 0.051582921870461804, "grad_norm": 0.33745077252388, "learning_rate": 0.0001, "loss": 1.7853, "step": 444 }, { "epoch": 0.051699099622422306, "grad_norm": 0.3403610289096832, "learning_rate": 0.0001, "loss": 1.8465, "step": 445 }, { "epoch": 0.0518152773743828, "grad_norm": 0.30970969796180725, "learning_rate": 0.0001, "loss": 1.6728, "step": 446 }, { "epoch": 0.051931455126343305, "grad_norm": 0.3483198583126068, "learning_rate": 0.0001, "loss": 1.7418, "step": 447 }, { "epoch": 0.05204763287830381, "grad_norm": 0.3291124999523163, "learning_rate": 0.0001, "loss": 1.6788, "step": 448 }, { "epoch": 0.052163810630264304, "grad_norm": 0.320839524269104, "learning_rate": 0.0001, "loss": 1.7708, "step": 449 }, { "epoch": 0.05227998838222481, "grad_norm": 0.3379274308681488, "learning_rate": 0.0001, "loss": 1.7447, "step": 450 }, { "epoch": 0.0523961661341853, "grad_norm": 0.31182000041007996, "learning_rate": 0.0001, "loss": 1.6674, "step": 451 }, { "epoch": 0.052512343886145806, "grad_norm": 0.3308543562889099, "learning_rate": 0.0001, "loss": 1.6077, "step": 452 }, { "epoch": 0.0526285216381063, "grad_norm": 0.34902223944664, "learning_rate": 0.0001, "loss": 1.92, "step": 453 }, { "epoch": 0.052744699390066804, "grad_norm": 0.3305058181285858, "learning_rate": 0.0001, "loss": 1.7275, "step": 454 }, { "epoch": 0.0528608771420273, "grad_norm": 0.3399059772491455, "learning_rate": 0.0001, "loss": 1.6418, "step": 455 }, { "epoch": 0.0529770548939878, "grad_norm": 0.36334654688835144, "learning_rate": 0.0001, "loss": 1.8425, "step": 456 }, { "epoch": 0.0530932326459483, "grad_norm": 0.34613272547721863, "learning_rate": 0.0001, "loss": 1.7303, "step": 457 }, { "epoch": 0.0532094103979088, "grad_norm": 0.3351539373397827, "learning_rate": 0.0001, "loss": 1.656, "step": 458 }, { "epoch": 0.0533255881498693, "grad_norm": 0.34070366621017456, "learning_rate": 0.0001, "loss": 1.6501, "step": 459 }, { "epoch": 0.0534417659018298, "grad_norm": 0.3561154305934906, "learning_rate": 0.0001, "loss": 1.8496, "step": 460 }, { "epoch": 0.053557943653790296, "grad_norm": 0.32410141825675964, "learning_rate": 0.0001, "loss": 1.7718, "step": 461 }, { "epoch": 0.0536741214057508, "grad_norm": 0.3558378219604492, "learning_rate": 0.0001, "loss": 1.7761, "step": 462 }, { "epoch": 0.053790299157711295, "grad_norm": 0.3171548843383789, "learning_rate": 0.0001, "loss": 1.7016, "step": 463 }, { "epoch": 0.0539064769096718, "grad_norm": 0.34194016456604004, "learning_rate": 0.0001, "loss": 1.8411, "step": 464 }, { "epoch": 0.0540226546616323, "grad_norm": 0.3174999952316284, "learning_rate": 0.0001, "loss": 1.7361, "step": 465 }, { "epoch": 0.0541388324135928, "grad_norm": 0.33531078696250916, "learning_rate": 0.0001, "loss": 1.8138, "step": 466 }, { "epoch": 0.0542550101655533, "grad_norm": 0.33532780408859253, "learning_rate": 0.0001, "loss": 1.7827, "step": 467 }, { "epoch": 0.054371187917513795, "grad_norm": 0.40104812383651733, "learning_rate": 0.0001, "loss": 2.0425, "step": 468 }, { "epoch": 0.0544873656694743, "grad_norm": 0.33999940752983093, "learning_rate": 0.0001, "loss": 1.7676, "step": 469 }, { "epoch": 0.054603543421434794, "grad_norm": 0.3406895399093628, "learning_rate": 0.0001, "loss": 1.7311, "step": 470 }, { "epoch": 0.0547197211733953, "grad_norm": 0.32375043630599976, "learning_rate": 0.0001, "loss": 1.7128, "step": 471 }, { "epoch": 0.05483589892535579, "grad_norm": 0.3496737480163574, "learning_rate": 0.0001, "loss": 1.8868, "step": 472 }, { "epoch": 0.054952076677316296, "grad_norm": 0.32865509390830994, "learning_rate": 0.0001, "loss": 1.6974, "step": 473 }, { "epoch": 0.05506825442927679, "grad_norm": 0.3332083225250244, "learning_rate": 0.0001, "loss": 1.7202, "step": 474 }, { "epoch": 0.055184432181237295, "grad_norm": 0.3402409851551056, "learning_rate": 0.0001, "loss": 1.8425, "step": 475 }, { "epoch": 0.05530060993319779, "grad_norm": 0.37318024039268494, "learning_rate": 0.0001, "loss": 1.6539, "step": 476 }, { "epoch": 0.05541678768515829, "grad_norm": 0.3607703745365143, "learning_rate": 0.0001, "loss": 1.8452, "step": 477 }, { "epoch": 0.05553296543711879, "grad_norm": 0.3592975437641144, "learning_rate": 0.0001, "loss": 1.8463, "step": 478 }, { "epoch": 0.05564914318907929, "grad_norm": 0.3610447645187378, "learning_rate": 0.0001, "loss": 1.8906, "step": 479 }, { "epoch": 0.05576532094103979, "grad_norm": 0.33733242750167847, "learning_rate": 0.0001, "loss": 1.8464, "step": 480 }, { "epoch": 0.05588149869300029, "grad_norm": 0.35732340812683105, "learning_rate": 0.0001, "loss": 1.8149, "step": 481 }, { "epoch": 0.05599767644496079, "grad_norm": 0.3425617814064026, "learning_rate": 0.0001, "loss": 1.8263, "step": 482 }, { "epoch": 0.05611385419692129, "grad_norm": 0.33323052525520325, "learning_rate": 0.0001, "loss": 1.7538, "step": 483 }, { "epoch": 0.05623003194888179, "grad_norm": 0.35951876640319824, "learning_rate": 0.0001, "loss": 1.8618, "step": 484 }, { "epoch": 0.05634620970084229, "grad_norm": 0.3292408883571625, "learning_rate": 0.0001, "loss": 1.6186, "step": 485 }, { "epoch": 0.05646238745280279, "grad_norm": 0.37031203508377075, "learning_rate": 0.0001, "loss": 1.7308, "step": 486 }, { "epoch": 0.05657856520476329, "grad_norm": 0.32847365736961365, "learning_rate": 0.0001, "loss": 1.7937, "step": 487 }, { "epoch": 0.05669474295672379, "grad_norm": 0.3220025599002838, "learning_rate": 0.0001, "loss": 1.6471, "step": 488 }, { "epoch": 0.056810920708684286, "grad_norm": 0.36746835708618164, "learning_rate": 0.0001, "loss": 1.6696, "step": 489 }, { "epoch": 0.05692709846064479, "grad_norm": 0.3531992733478546, "learning_rate": 0.0001, "loss": 1.7423, "step": 490 }, { "epoch": 0.057043276212605284, "grad_norm": 0.3173719346523285, "learning_rate": 0.0001, "loss": 1.6648, "step": 491 }, { "epoch": 0.05715945396456579, "grad_norm": 0.35342299938201904, "learning_rate": 0.0001, "loss": 1.7122, "step": 492 }, { "epoch": 0.05727563171652628, "grad_norm": 0.3307000696659088, "learning_rate": 0.0001, "loss": 1.7597, "step": 493 }, { "epoch": 0.057391809468486786, "grad_norm": 0.3486880362033844, "learning_rate": 0.0001, "loss": 1.795, "step": 494 }, { "epoch": 0.05750798722044728, "grad_norm": 0.37966904044151306, "learning_rate": 0.0001, "loss": 1.8754, "step": 495 }, { "epoch": 0.057624164972407785, "grad_norm": 0.3487970530986786, "learning_rate": 0.0001, "loss": 1.8312, "step": 496 }, { "epoch": 0.05774034272436828, "grad_norm": 0.30827823281288147, "learning_rate": 0.0001, "loss": 1.6478, "step": 497 }, { "epoch": 0.057856520476328784, "grad_norm": 0.33888596296310425, "learning_rate": 0.0001, "loss": 1.7648, "step": 498 }, { "epoch": 0.05797269822828928, "grad_norm": 0.35201674699783325, "learning_rate": 0.0001, "loss": 1.7952, "step": 499 }, { "epoch": 0.05808887598024978, "grad_norm": 0.35336655378341675, "learning_rate": 0.0001, "loss": 1.808, "step": 500 }, { "epoch": 0.058205053732210285, "grad_norm": 0.36219102144241333, "learning_rate": 0.0001, "loss": 1.7659, "step": 501 }, { "epoch": 0.05832123148417078, "grad_norm": 0.3310224711894989, "learning_rate": 0.0001, "loss": 1.6516, "step": 502 }, { "epoch": 0.058437409236131284, "grad_norm": 0.372923344373703, "learning_rate": 0.0001, "loss": 1.7554, "step": 503 }, { "epoch": 0.05855358698809178, "grad_norm": 0.358759343624115, "learning_rate": 0.0001, "loss": 1.734, "step": 504 }, { "epoch": 0.05866976474005228, "grad_norm": 0.41483983397483826, "learning_rate": 0.0001, "loss": 1.7648, "step": 505 }, { "epoch": 0.05878594249201278, "grad_norm": 0.3552764058113098, "learning_rate": 0.0001, "loss": 1.8973, "step": 506 }, { "epoch": 0.05890212024397328, "grad_norm": 0.3468693494796753, "learning_rate": 0.0001, "loss": 1.7336, "step": 507 }, { "epoch": 0.05901829799593378, "grad_norm": 0.359375536441803, "learning_rate": 0.0001, "loss": 1.8677, "step": 508 }, { "epoch": 0.05913447574789428, "grad_norm": 0.32384392619132996, "learning_rate": 0.0001, "loss": 1.5734, "step": 509 }, { "epoch": 0.059250653499854776, "grad_norm": 0.3563079237937927, "learning_rate": 0.0001, "loss": 1.8236, "step": 510 }, { "epoch": 0.05936683125181528, "grad_norm": 0.32716381549835205, "learning_rate": 0.0001, "loss": 1.8558, "step": 511 }, { "epoch": 0.059483009003775775, "grad_norm": 0.3453499674797058, "learning_rate": 0.0001, "loss": 1.8389, "step": 512 }, { "epoch": 0.05959918675573628, "grad_norm": 0.32241371273994446, "learning_rate": 0.0001, "loss": 1.5502, "step": 513 }, { "epoch": 0.05971536450769677, "grad_norm": 0.34780648350715637, "learning_rate": 0.0001, "loss": 1.7995, "step": 514 }, { "epoch": 0.059831542259657276, "grad_norm": 0.3340502977371216, "learning_rate": 0.0001, "loss": 1.8202, "step": 515 }, { "epoch": 0.05994772001161777, "grad_norm": 0.331016480922699, "learning_rate": 0.0001, "loss": 1.8144, "step": 516 }, { "epoch": 0.060063897763578275, "grad_norm": 0.34053540229797363, "learning_rate": 0.0001, "loss": 1.6881, "step": 517 }, { "epoch": 0.06018007551553877, "grad_norm": 0.31866925954818726, "learning_rate": 0.0001, "loss": 1.541, "step": 518 }, { "epoch": 0.060296253267499274, "grad_norm": 0.36000797152519226, "learning_rate": 0.0001, "loss": 1.92, "step": 519 }, { "epoch": 0.06041243101945978, "grad_norm": 0.37475964426994324, "learning_rate": 0.0001, "loss": 1.8022, "step": 520 }, { "epoch": 0.06052860877142027, "grad_norm": 0.3554963767528534, "learning_rate": 0.0001, "loss": 1.8536, "step": 521 }, { "epoch": 0.060644786523380775, "grad_norm": 0.34274694323539734, "learning_rate": 0.0001, "loss": 1.7868, "step": 522 }, { "epoch": 0.06076096427534127, "grad_norm": 0.31418636441230774, "learning_rate": 0.0001, "loss": 1.7159, "step": 523 }, { "epoch": 0.060877142027301774, "grad_norm": 0.3319328725337982, "learning_rate": 0.0001, "loss": 1.7644, "step": 524 }, { "epoch": 0.06099331977926227, "grad_norm": 0.34280523657798767, "learning_rate": 0.0001, "loss": 1.759, "step": 525 }, { "epoch": 0.06110949753122277, "grad_norm": 0.3405478298664093, "learning_rate": 0.0001, "loss": 1.7908, "step": 526 }, { "epoch": 0.06122567528318327, "grad_norm": 0.34357553720474243, "learning_rate": 0.0001, "loss": 1.6256, "step": 527 }, { "epoch": 0.06134185303514377, "grad_norm": 0.3327886164188385, "learning_rate": 0.0001, "loss": 1.825, "step": 528 }, { "epoch": 0.06145803078710427, "grad_norm": 0.31985750794410706, "learning_rate": 0.0001, "loss": 1.5728, "step": 529 }, { "epoch": 0.06157420853906477, "grad_norm": 0.32863008975982666, "learning_rate": 0.0001, "loss": 1.7012, "step": 530 }, { "epoch": 0.061690386291025266, "grad_norm": 0.33859503269195557, "learning_rate": 0.0001, "loss": 1.7983, "step": 531 }, { "epoch": 0.06180656404298577, "grad_norm": 0.354203999042511, "learning_rate": 0.0001, "loss": 1.801, "step": 532 }, { "epoch": 0.061922741794946265, "grad_norm": 0.3500482439994812, "learning_rate": 0.0001, "loss": 1.7481, "step": 533 }, { "epoch": 0.06203891954690677, "grad_norm": 0.3380871117115021, "learning_rate": 0.0001, "loss": 1.7285, "step": 534 }, { "epoch": 0.062155097298867264, "grad_norm": 0.3638668656349182, "learning_rate": 0.0001, "loss": 1.8013, "step": 535 }, { "epoch": 0.06227127505082777, "grad_norm": 0.3231678903102875, "learning_rate": 0.0001, "loss": 1.6436, "step": 536 }, { "epoch": 0.06238745280278827, "grad_norm": 0.3448564112186432, "learning_rate": 0.0001, "loss": 1.7836, "step": 537 }, { "epoch": 0.06250363055474877, "grad_norm": 0.34526991844177246, "learning_rate": 0.0001, "loss": 1.7609, "step": 538 }, { "epoch": 0.06261980830670927, "grad_norm": 0.351270467042923, "learning_rate": 0.0001, "loss": 1.8093, "step": 539 }, { "epoch": 0.06273598605866977, "grad_norm": 0.33383870124816895, "learning_rate": 0.0001, "loss": 1.7871, "step": 540 }, { "epoch": 0.06285216381063026, "grad_norm": 0.31945285201072693, "learning_rate": 0.0001, "loss": 1.5509, "step": 541 }, { "epoch": 0.06296834156259076, "grad_norm": 0.32655853033065796, "learning_rate": 0.0001, "loss": 1.7056, "step": 542 }, { "epoch": 0.06308451931455127, "grad_norm": 0.3944481313228607, "learning_rate": 0.0001, "loss": 1.8958, "step": 543 }, { "epoch": 0.06320069706651177, "grad_norm": 0.38392776250839233, "learning_rate": 0.0001, "loss": 1.8377, "step": 544 }, { "epoch": 0.06331687481847226, "grad_norm": 0.3638809621334076, "learning_rate": 0.0001, "loss": 1.8184, "step": 545 }, { "epoch": 0.06343305257043276, "grad_norm": 0.3336743116378784, "learning_rate": 0.0001, "loss": 1.6553, "step": 546 }, { "epoch": 0.06354923032239326, "grad_norm": 0.32870176434516907, "learning_rate": 0.0001, "loss": 1.7085, "step": 547 }, { "epoch": 0.06366540807435377, "grad_norm": 0.3251485526561737, "learning_rate": 0.0001, "loss": 1.797, "step": 548 }, { "epoch": 0.06378158582631425, "grad_norm": 0.36087340116500854, "learning_rate": 0.0001, "loss": 1.8023, "step": 549 }, { "epoch": 0.06389776357827476, "grad_norm": 0.35436710715293884, "learning_rate": 0.0001, "loss": 1.7703, "step": 550 }, { "epoch": 0.06401394133023526, "grad_norm": 0.3625239133834839, "learning_rate": 0.0001, "loss": 1.7345, "step": 551 }, { "epoch": 0.06413011908219576, "grad_norm": 0.33559513092041016, "learning_rate": 0.0001, "loss": 1.6253, "step": 552 }, { "epoch": 0.06424629683415625, "grad_norm": 0.3335849642753601, "learning_rate": 0.0001, "loss": 1.832, "step": 553 }, { "epoch": 0.06436247458611676, "grad_norm": 0.3296360969543457, "learning_rate": 0.0001, "loss": 1.7504, "step": 554 }, { "epoch": 0.06447865233807726, "grad_norm": 0.3487899601459503, "learning_rate": 0.0001, "loss": 1.7819, "step": 555 }, { "epoch": 0.06459483009003776, "grad_norm": 0.3731647729873657, "learning_rate": 0.0001, "loss": 2.0035, "step": 556 }, { "epoch": 0.06471100784199826, "grad_norm": 0.33725181221961975, "learning_rate": 0.0001, "loss": 1.8034, "step": 557 }, { "epoch": 0.06482718559395875, "grad_norm": 0.3277061879634857, "learning_rate": 0.0001, "loss": 1.4806, "step": 558 }, { "epoch": 0.06494336334591926, "grad_norm": 0.3473774790763855, "learning_rate": 0.0001, "loss": 1.7392, "step": 559 }, { "epoch": 0.06505954109787976, "grad_norm": 0.35049760341644287, "learning_rate": 0.0001, "loss": 1.6481, "step": 560 }, { "epoch": 0.06517571884984026, "grad_norm": 0.3388431668281555, "learning_rate": 0.0001, "loss": 1.7494, "step": 561 }, { "epoch": 0.06529189660180075, "grad_norm": 0.35445770621299744, "learning_rate": 0.0001, "loss": 1.746, "step": 562 }, { "epoch": 0.06540807435376125, "grad_norm": 0.3392694592475891, "learning_rate": 0.0001, "loss": 1.8481, "step": 563 }, { "epoch": 0.06552425210572176, "grad_norm": 0.3575725257396698, "learning_rate": 0.0001, "loss": 1.7864, "step": 564 }, { "epoch": 0.06564042985768226, "grad_norm": 0.3979930877685547, "learning_rate": 0.0001, "loss": 1.8221, "step": 565 }, { "epoch": 0.06575660760964275, "grad_norm": 0.3467651605606079, "learning_rate": 0.0001, "loss": 1.8548, "step": 566 }, { "epoch": 0.06587278536160325, "grad_norm": 0.3466176390647888, "learning_rate": 0.0001, "loss": 1.9021, "step": 567 }, { "epoch": 0.06598896311356375, "grad_norm": 0.33713239431381226, "learning_rate": 0.0001, "loss": 1.835, "step": 568 }, { "epoch": 0.06610514086552426, "grad_norm": 0.3413597047328949, "learning_rate": 0.0001, "loss": 1.8019, "step": 569 }, { "epoch": 0.06622131861748475, "grad_norm": 0.40804192423820496, "learning_rate": 0.0001, "loss": 1.9077, "step": 570 }, { "epoch": 0.06633749636944525, "grad_norm": 0.36405694484710693, "learning_rate": 0.0001, "loss": 1.7789, "step": 571 }, { "epoch": 0.06645367412140575, "grad_norm": 0.3612009882926941, "learning_rate": 0.0001, "loss": 1.758, "step": 572 }, { "epoch": 0.06656985187336625, "grad_norm": 0.3526972830295563, "learning_rate": 0.0001, "loss": 1.9189, "step": 573 }, { "epoch": 0.06668602962532676, "grad_norm": 0.34476232528686523, "learning_rate": 0.0001, "loss": 1.5721, "step": 574 }, { "epoch": 0.06680220737728725, "grad_norm": 0.35720011591911316, "learning_rate": 0.0001, "loss": 1.9326, "step": 575 }, { "epoch": 0.06691838512924775, "grad_norm": 0.35136792063713074, "learning_rate": 0.0001, "loss": 1.8546, "step": 576 }, { "epoch": 0.06703456288120825, "grad_norm": 0.32413557171821594, "learning_rate": 0.0001, "loss": 1.5629, "step": 577 }, { "epoch": 0.06715074063316875, "grad_norm": 0.321250319480896, "learning_rate": 0.0001, "loss": 1.7531, "step": 578 }, { "epoch": 0.06726691838512924, "grad_norm": 0.34484270215034485, "learning_rate": 0.0001, "loss": 1.7195, "step": 579 }, { "epoch": 0.06738309613708975, "grad_norm": 0.34433478116989136, "learning_rate": 0.0001, "loss": 1.7748, "step": 580 }, { "epoch": 0.06749927388905025, "grad_norm": 0.3812878727912903, "learning_rate": 0.0001, "loss": 1.6951, "step": 581 }, { "epoch": 0.06761545164101075, "grad_norm": 0.344705730676651, "learning_rate": 0.0001, "loss": 1.5685, "step": 582 }, { "epoch": 0.06773162939297124, "grad_norm": 0.3333836793899536, "learning_rate": 0.0001, "loss": 1.6683, "step": 583 }, { "epoch": 0.06784780714493174, "grad_norm": 0.35086333751678467, "learning_rate": 0.0001, "loss": 1.7734, "step": 584 }, { "epoch": 0.06796398489689225, "grad_norm": 0.3325541019439697, "learning_rate": 0.0001, "loss": 1.609, "step": 585 }, { "epoch": 0.06808016264885275, "grad_norm": 0.35840341448783875, "learning_rate": 0.0001, "loss": 1.6111, "step": 586 }, { "epoch": 0.06819634040081324, "grad_norm": 0.33432942628860474, "learning_rate": 0.0001, "loss": 1.8359, "step": 587 }, { "epoch": 0.06831251815277374, "grad_norm": 0.34798067808151245, "learning_rate": 0.0001, "loss": 1.6117, "step": 588 }, { "epoch": 0.06842869590473424, "grad_norm": 0.319845974445343, "learning_rate": 0.0001, "loss": 1.6175, "step": 589 }, { "epoch": 0.06854487365669475, "grad_norm": 0.37829452753067017, "learning_rate": 0.0001, "loss": 1.8517, "step": 590 }, { "epoch": 0.06866105140865525, "grad_norm": 0.35298144817352295, "learning_rate": 0.0001, "loss": 1.801, "step": 591 }, { "epoch": 0.06877722916061574, "grad_norm": 0.40371862053871155, "learning_rate": 0.0001, "loss": 1.5935, "step": 592 }, { "epoch": 0.06889340691257624, "grad_norm": 0.3450127840042114, "learning_rate": 0.0001, "loss": 1.6476, "step": 593 }, { "epoch": 0.06900958466453674, "grad_norm": 0.3465590178966522, "learning_rate": 0.0001, "loss": 1.7291, "step": 594 }, { "epoch": 0.06912576241649725, "grad_norm": 0.3387679159641266, "learning_rate": 0.0001, "loss": 1.7121, "step": 595 }, { "epoch": 0.06924194016845774, "grad_norm": 0.3386651277542114, "learning_rate": 0.0001, "loss": 1.7596, "step": 596 }, { "epoch": 0.06935811792041824, "grad_norm": 0.3551906645298004, "learning_rate": 0.0001, "loss": 1.5952, "step": 597 }, { "epoch": 0.06947429567237874, "grad_norm": 0.37498459219932556, "learning_rate": 0.0001, "loss": 1.8622, "step": 598 }, { "epoch": 0.06959047342433924, "grad_norm": 0.3397311866283417, "learning_rate": 0.0001, "loss": 1.6644, "step": 599 }, { "epoch": 0.06970665117629973, "grad_norm": 0.34366878867149353, "learning_rate": 0.0001, "loss": 1.7838, "step": 600 }, { "epoch": 0.06982282892826024, "grad_norm": 0.3466067910194397, "learning_rate": 0.0001, "loss": 1.8222, "step": 601 }, { "epoch": 0.06993900668022074, "grad_norm": 0.35218632221221924, "learning_rate": 0.0001, "loss": 1.7995, "step": 602 }, { "epoch": 0.07005518443218124, "grad_norm": 0.3305307924747467, "learning_rate": 0.0001, "loss": 1.7099, "step": 603 }, { "epoch": 0.07017136218414173, "grad_norm": 0.3424377143383026, "learning_rate": 0.0001, "loss": 1.8505, "step": 604 }, { "epoch": 0.07028753993610223, "grad_norm": 0.33201947808265686, "learning_rate": 0.0001, "loss": 1.7887, "step": 605 }, { "epoch": 0.07040371768806274, "grad_norm": 0.3492118716239929, "learning_rate": 0.0001, "loss": 1.8554, "step": 606 }, { "epoch": 0.07051989544002324, "grad_norm": 0.3389701843261719, "learning_rate": 0.0001, "loss": 1.7442, "step": 607 }, { "epoch": 0.07063607319198374, "grad_norm": 0.3567183315753937, "learning_rate": 0.0001, "loss": 1.8379, "step": 608 }, { "epoch": 0.07075225094394423, "grad_norm": 0.342669814825058, "learning_rate": 0.0001, "loss": 1.7222, "step": 609 }, { "epoch": 0.07086842869590473, "grad_norm": 0.37096837162971497, "learning_rate": 0.0001, "loss": 1.8548, "step": 610 }, { "epoch": 0.07098460644786524, "grad_norm": 0.3211749494075775, "learning_rate": 0.0001, "loss": 1.7913, "step": 611 }, { "epoch": 0.07110078419982574, "grad_norm": 0.3553162217140198, "learning_rate": 0.0001, "loss": 1.8448, "step": 612 }, { "epoch": 0.07121696195178623, "grad_norm": 0.3596995174884796, "learning_rate": 0.0001, "loss": 1.7092, "step": 613 }, { "epoch": 0.07133313970374673, "grad_norm": 0.32127490639686584, "learning_rate": 0.0001, "loss": 1.6311, "step": 614 }, { "epoch": 0.07144931745570723, "grad_norm": 0.361713707447052, "learning_rate": 0.0001, "loss": 1.8077, "step": 615 }, { "epoch": 0.07156549520766774, "grad_norm": 0.3255080282688141, "learning_rate": 0.0001, "loss": 1.7618, "step": 616 }, { "epoch": 0.07168167295962823, "grad_norm": 0.3420798182487488, "learning_rate": 0.0001, "loss": 1.7544, "step": 617 }, { "epoch": 0.07179785071158873, "grad_norm": 0.39319682121276855, "learning_rate": 0.0001, "loss": 1.797, "step": 618 }, { "epoch": 0.07191402846354923, "grad_norm": 0.33742591738700867, "learning_rate": 0.0001, "loss": 1.4727, "step": 619 }, { "epoch": 0.07203020621550973, "grad_norm": 0.3327065408229828, "learning_rate": 0.0001, "loss": 1.7358, "step": 620 }, { "epoch": 0.07214638396747022, "grad_norm": 0.3351439833641052, "learning_rate": 0.0001, "loss": 1.6805, "step": 621 }, { "epoch": 0.07226256171943073, "grad_norm": 0.3432386815547943, "learning_rate": 0.0001, "loss": 1.7511, "step": 622 }, { "epoch": 0.07237873947139123, "grad_norm": 0.35164207220077515, "learning_rate": 0.0001, "loss": 1.7889, "step": 623 }, { "epoch": 0.07249491722335173, "grad_norm": 0.34505143761634827, "learning_rate": 0.0001, "loss": 1.7717, "step": 624 }, { "epoch": 0.07261109497531222, "grad_norm": 0.34696313738822937, "learning_rate": 0.0001, "loss": 1.7991, "step": 625 }, { "epoch": 0.07272727272727272, "grad_norm": 0.33465588092803955, "learning_rate": 0.0001, "loss": 1.8238, "step": 626 }, { "epoch": 0.07284345047923323, "grad_norm": 0.3633832335472107, "learning_rate": 0.0001, "loss": 1.808, "step": 627 }, { "epoch": 0.07295962823119373, "grad_norm": 0.3349045217037201, "learning_rate": 0.0001, "loss": 1.6797, "step": 628 }, { "epoch": 0.07307580598315423, "grad_norm": 0.3547269105911255, "learning_rate": 0.0001, "loss": 1.7475, "step": 629 }, { "epoch": 0.07319198373511472, "grad_norm": 0.41657668352127075, "learning_rate": 0.0001, "loss": 2.065, "step": 630 }, { "epoch": 0.07330816148707522, "grad_norm": 0.33551225066185, "learning_rate": 0.0001, "loss": 1.6979, "step": 631 }, { "epoch": 0.07342433923903573, "grad_norm": 0.3287172317504883, "learning_rate": 0.0001, "loss": 1.6711, "step": 632 }, { "epoch": 0.07354051699099623, "grad_norm": 0.3462834060192108, "learning_rate": 0.0001, "loss": 1.7681, "step": 633 }, { "epoch": 0.07365669474295672, "grad_norm": 0.32798144221305847, "learning_rate": 0.0001, "loss": 1.7706, "step": 634 }, { "epoch": 0.07377287249491722, "grad_norm": 0.3188357651233673, "learning_rate": 0.0001, "loss": 1.6755, "step": 635 }, { "epoch": 0.07388905024687772, "grad_norm": 0.34357234835624695, "learning_rate": 0.0001, "loss": 1.7105, "step": 636 }, { "epoch": 0.07400522799883823, "grad_norm": 0.3531816005706787, "learning_rate": 0.0001, "loss": 1.8203, "step": 637 }, { "epoch": 0.07412140575079872, "grad_norm": 0.3235273063182831, "learning_rate": 0.0001, "loss": 1.6864, "step": 638 }, { "epoch": 0.07423758350275922, "grad_norm": 0.3293229639530182, "learning_rate": 0.0001, "loss": 1.7596, "step": 639 }, { "epoch": 0.07435376125471972, "grad_norm": 0.3663218021392822, "learning_rate": 0.0001, "loss": 1.8909, "step": 640 }, { "epoch": 0.07446993900668022, "grad_norm": 0.33595624566078186, "learning_rate": 0.0001, "loss": 1.661, "step": 641 }, { "epoch": 0.07458611675864071, "grad_norm": 0.3276788890361786, "learning_rate": 0.0001, "loss": 1.6903, "step": 642 }, { "epoch": 0.07470229451060122, "grad_norm": 0.3766964077949524, "learning_rate": 0.0001, "loss": 1.9409, "step": 643 }, { "epoch": 0.07481847226256172, "grad_norm": 0.3381744623184204, "learning_rate": 0.0001, "loss": 1.6455, "step": 644 }, { "epoch": 0.07493465001452222, "grad_norm": 0.34408965706825256, "learning_rate": 0.0001, "loss": 1.5929, "step": 645 }, { "epoch": 0.07505082776648273, "grad_norm": 0.33645275235176086, "learning_rate": 0.0001, "loss": 1.7188, "step": 646 }, { "epoch": 0.07516700551844321, "grad_norm": 0.3625565469264984, "learning_rate": 0.0001, "loss": 1.798, "step": 647 }, { "epoch": 0.07528318327040372, "grad_norm": 0.3350875973701477, "learning_rate": 0.0001, "loss": 1.7037, "step": 648 }, { "epoch": 0.07539936102236422, "grad_norm": 0.34720659255981445, "learning_rate": 0.0001, "loss": 1.6187, "step": 649 }, { "epoch": 0.07551553877432472, "grad_norm": 0.3373357951641083, "learning_rate": 0.0001, "loss": 1.7226, "step": 650 }, { "epoch": 0.07563171652628521, "grad_norm": 0.3362538516521454, "learning_rate": 0.0001, "loss": 1.6718, "step": 651 }, { "epoch": 0.07574789427824571, "grad_norm": 0.34010645747184753, "learning_rate": 0.0001, "loss": 1.7155, "step": 652 }, { "epoch": 0.07586407203020622, "grad_norm": 0.31828391551971436, "learning_rate": 0.0001, "loss": 1.6605, "step": 653 }, { "epoch": 0.07598024978216672, "grad_norm": 0.32539987564086914, "learning_rate": 0.0001, "loss": 1.7795, "step": 654 }, { "epoch": 0.07609642753412721, "grad_norm": 0.32553842663764954, "learning_rate": 0.0001, "loss": 1.7163, "step": 655 }, { "epoch": 0.07621260528608771, "grad_norm": 0.35710620880126953, "learning_rate": 0.0001, "loss": 1.8839, "step": 656 }, { "epoch": 0.07632878303804821, "grad_norm": 0.33389732241630554, "learning_rate": 0.0001, "loss": 1.7109, "step": 657 }, { "epoch": 0.07644496079000872, "grad_norm": 0.3359222412109375, "learning_rate": 0.0001, "loss": 1.9214, "step": 658 }, { "epoch": 0.0765611385419692, "grad_norm": 0.3710361123085022, "learning_rate": 0.0001, "loss": 1.7439, "step": 659 }, { "epoch": 0.07667731629392971, "grad_norm": 0.34122204780578613, "learning_rate": 0.0001, "loss": 1.8186, "step": 660 }, { "epoch": 0.07679349404589021, "grad_norm": 0.3368107080459595, "learning_rate": 0.0001, "loss": 1.8068, "step": 661 }, { "epoch": 0.07690967179785071, "grad_norm": 0.3254035711288452, "learning_rate": 0.0001, "loss": 1.5137, "step": 662 }, { "epoch": 0.07702584954981122, "grad_norm": 0.3608943223953247, "learning_rate": 0.0001, "loss": 1.9746, "step": 663 }, { "epoch": 0.0771420273017717, "grad_norm": 0.3355536162853241, "learning_rate": 0.0001, "loss": 1.6237, "step": 664 }, { "epoch": 0.07725820505373221, "grad_norm": 0.3630955219268799, "learning_rate": 0.0001, "loss": 1.7642, "step": 665 }, { "epoch": 0.07737438280569271, "grad_norm": 0.3425436019897461, "learning_rate": 0.0001, "loss": 1.8083, "step": 666 }, { "epoch": 0.07749056055765322, "grad_norm": 0.3148491382598877, "learning_rate": 0.0001, "loss": 1.4924, "step": 667 }, { "epoch": 0.0776067383096137, "grad_norm": 0.3225914239883423, "learning_rate": 0.0001, "loss": 1.6458, "step": 668 }, { "epoch": 0.07772291606157421, "grad_norm": 0.37254008650779724, "learning_rate": 0.0001, "loss": 1.8129, "step": 669 }, { "epoch": 0.07783909381353471, "grad_norm": 0.3412669897079468, "learning_rate": 0.0001, "loss": 1.8073, "step": 670 }, { "epoch": 0.07795527156549521, "grad_norm": 0.333635151386261, "learning_rate": 0.0001, "loss": 1.6577, "step": 671 }, { "epoch": 0.0780714493174557, "grad_norm": 0.36147254705429077, "learning_rate": 0.0001, "loss": 1.8375, "step": 672 }, { "epoch": 0.0781876270694162, "grad_norm": 0.3428162932395935, "learning_rate": 0.0001, "loss": 1.7767, "step": 673 }, { "epoch": 0.07830380482137671, "grad_norm": 0.3516107499599457, "learning_rate": 0.0001, "loss": 1.7804, "step": 674 }, { "epoch": 0.07841998257333721, "grad_norm": 0.37563571333885193, "learning_rate": 0.0001, "loss": 1.7975, "step": 675 }, { "epoch": 0.0785361603252977, "grad_norm": 0.32837507128715515, "learning_rate": 0.0001, "loss": 1.6925, "step": 676 }, { "epoch": 0.0786523380772582, "grad_norm": 0.34303468465805054, "learning_rate": 0.0001, "loss": 1.8518, "step": 677 }, { "epoch": 0.0787685158292187, "grad_norm": 0.3297956883907318, "learning_rate": 0.0001, "loss": 1.7057, "step": 678 }, { "epoch": 0.07888469358117921, "grad_norm": 0.34342435002326965, "learning_rate": 0.0001, "loss": 1.636, "step": 679 }, { "epoch": 0.07900087133313971, "grad_norm": 0.3558964729309082, "learning_rate": 0.0001, "loss": 1.7076, "step": 680 }, { "epoch": 0.0791170490851002, "grad_norm": 0.3384302258491516, "learning_rate": 0.0001, "loss": 1.8352, "step": 681 }, { "epoch": 0.0792332268370607, "grad_norm": 0.33917075395584106, "learning_rate": 0.0001, "loss": 1.6065, "step": 682 }, { "epoch": 0.0793494045890212, "grad_norm": 0.37113896012306213, "learning_rate": 0.0001, "loss": 1.7594, "step": 683 }, { "epoch": 0.07946558234098171, "grad_norm": 0.34009361267089844, "learning_rate": 0.0001, "loss": 1.611, "step": 684 }, { "epoch": 0.0795817600929422, "grad_norm": 0.35961294174194336, "learning_rate": 0.0001, "loss": 1.7863, "step": 685 }, { "epoch": 0.0796979378449027, "grad_norm": 0.3599853813648224, "learning_rate": 0.0001, "loss": 1.7754, "step": 686 }, { "epoch": 0.0798141155968632, "grad_norm": 0.33733391761779785, "learning_rate": 0.0001, "loss": 1.6633, "step": 687 }, { "epoch": 0.0799302933488237, "grad_norm": 0.33123722672462463, "learning_rate": 0.0001, "loss": 1.7556, "step": 688 }, { "epoch": 0.0800464711007842, "grad_norm": 0.3545984923839569, "learning_rate": 0.0001, "loss": 1.5852, "step": 689 }, { "epoch": 0.0801626488527447, "grad_norm": 0.3520684242248535, "learning_rate": 0.0001, "loss": 1.7908, "step": 690 }, { "epoch": 0.0802788266047052, "grad_norm": 0.364214152097702, "learning_rate": 0.0001, "loss": 1.8752, "step": 691 }, { "epoch": 0.0803950043566657, "grad_norm": 0.35055190324783325, "learning_rate": 0.0001, "loss": 1.6875, "step": 692 }, { "epoch": 0.08051118210862619, "grad_norm": 0.3547899127006531, "learning_rate": 0.0001, "loss": 1.6266, "step": 693 }, { "epoch": 0.0806273598605867, "grad_norm": 0.31684044003486633, "learning_rate": 0.0001, "loss": 1.4333, "step": 694 }, { "epoch": 0.0807435376125472, "grad_norm": 0.3523019254207611, "learning_rate": 0.0001, "loss": 1.7933, "step": 695 }, { "epoch": 0.0808597153645077, "grad_norm": 0.379930317401886, "learning_rate": 0.0001, "loss": 1.8274, "step": 696 }, { "epoch": 0.08097589311646819, "grad_norm": 0.32600757479667664, "learning_rate": 0.0001, "loss": 1.6605, "step": 697 }, { "epoch": 0.08109207086842869, "grad_norm": 0.33486902713775635, "learning_rate": 0.0001, "loss": 1.7029, "step": 698 }, { "epoch": 0.0812082486203892, "grad_norm": 0.3586275577545166, "learning_rate": 0.0001, "loss": 1.7762, "step": 699 }, { "epoch": 0.0813244263723497, "grad_norm": 0.34406691789627075, "learning_rate": 0.0001, "loss": 1.5871, "step": 700 }, { "epoch": 0.0814406041243102, "grad_norm": 0.35073554515838623, "learning_rate": 0.0001, "loss": 1.776, "step": 701 }, { "epoch": 0.08155678187627069, "grad_norm": 0.4370097517967224, "learning_rate": 0.0001, "loss": 1.9901, "step": 702 }, { "epoch": 0.08167295962823119, "grad_norm": 0.36396339535713196, "learning_rate": 0.0001, "loss": 1.8286, "step": 703 }, { "epoch": 0.0817891373801917, "grad_norm": 0.31740906834602356, "learning_rate": 0.0001, "loss": 1.6781, "step": 704 }, { "epoch": 0.0819053151321522, "grad_norm": 0.36947962641716003, "learning_rate": 0.0001, "loss": 1.8666, "step": 705 }, { "epoch": 0.08202149288411269, "grad_norm": 0.3637601435184479, "learning_rate": 0.0001, "loss": 1.8011, "step": 706 }, { "epoch": 0.08213767063607319, "grad_norm": 0.35673728585243225, "learning_rate": 0.0001, "loss": 1.8199, "step": 707 }, { "epoch": 0.08225384838803369, "grad_norm": 0.3384333550930023, "learning_rate": 0.0001, "loss": 1.7729, "step": 708 }, { "epoch": 0.0823700261399942, "grad_norm": 0.3344199061393738, "learning_rate": 0.0001, "loss": 1.797, "step": 709 }, { "epoch": 0.08248620389195468, "grad_norm": 0.3451163172721863, "learning_rate": 0.0001, "loss": 1.7087, "step": 710 }, { "epoch": 0.08260238164391519, "grad_norm": 0.3421926200389862, "learning_rate": 0.0001, "loss": 1.8762, "step": 711 }, { "epoch": 0.08271855939587569, "grad_norm": 0.32630541920661926, "learning_rate": 0.0001, "loss": 1.6638, "step": 712 }, { "epoch": 0.0828347371478362, "grad_norm": 0.35678330063819885, "learning_rate": 0.0001, "loss": 1.915, "step": 713 }, { "epoch": 0.08295091489979668, "grad_norm": 0.3382868766784668, "learning_rate": 0.0001, "loss": 1.6471, "step": 714 }, { "epoch": 0.08306709265175719, "grad_norm": 0.3485075831413269, "learning_rate": 0.0001, "loss": 1.7365, "step": 715 }, { "epoch": 0.08318327040371769, "grad_norm": 0.31177419424057007, "learning_rate": 0.0001, "loss": 1.6356, "step": 716 }, { "epoch": 0.08329944815567819, "grad_norm": 0.34105831384658813, "learning_rate": 0.0001, "loss": 1.5632, "step": 717 }, { "epoch": 0.0834156259076387, "grad_norm": 0.3517579138278961, "learning_rate": 0.0001, "loss": 1.7773, "step": 718 }, { "epoch": 0.08353180365959918, "grad_norm": 0.3053590953350067, "learning_rate": 0.0001, "loss": 1.5039, "step": 719 }, { "epoch": 0.08364798141155969, "grad_norm": 0.35734882950782776, "learning_rate": 0.0001, "loss": 1.779, "step": 720 }, { "epoch": 0.08376415916352019, "grad_norm": 0.33821558952331543, "learning_rate": 0.0001, "loss": 1.7424, "step": 721 }, { "epoch": 0.08388033691548069, "grad_norm": 0.3388650417327881, "learning_rate": 0.0001, "loss": 1.7397, "step": 722 }, { "epoch": 0.08399651466744118, "grad_norm": 0.36601775884628296, "learning_rate": 0.0001, "loss": 1.8082, "step": 723 }, { "epoch": 0.08411269241940168, "grad_norm": 0.34071359038352966, "learning_rate": 0.0001, "loss": 1.7245, "step": 724 }, { "epoch": 0.08422887017136219, "grad_norm": 0.34587469696998596, "learning_rate": 0.0001, "loss": 1.533, "step": 725 }, { "epoch": 0.08434504792332269, "grad_norm": 0.3538304269313812, "learning_rate": 0.0001, "loss": 1.973, "step": 726 }, { "epoch": 0.08446122567528318, "grad_norm": 0.3469333350658417, "learning_rate": 0.0001, "loss": 1.8881, "step": 727 }, { "epoch": 0.08457740342724368, "grad_norm": 0.31623193621635437, "learning_rate": 0.0001, "loss": 1.7093, "step": 728 }, { "epoch": 0.08469358117920418, "grad_norm": 0.3332175016403198, "learning_rate": 0.0001, "loss": 1.7208, "step": 729 }, { "epoch": 0.08480975893116469, "grad_norm": 0.35500526428222656, "learning_rate": 0.0001, "loss": 1.7005, "step": 730 }, { "epoch": 0.08492593668312517, "grad_norm": 0.35975468158721924, "learning_rate": 0.0001, "loss": 1.7079, "step": 731 }, { "epoch": 0.08504211443508568, "grad_norm": 0.33805137872695923, "learning_rate": 0.0001, "loss": 1.7452, "step": 732 }, { "epoch": 0.08515829218704618, "grad_norm": 0.3446786105632782, "learning_rate": 0.0001, "loss": 1.7168, "step": 733 }, { "epoch": 0.08527446993900668, "grad_norm": 0.3499710261821747, "learning_rate": 0.0001, "loss": 1.8734, "step": 734 }, { "epoch": 0.08539064769096719, "grad_norm": 0.33205246925354004, "learning_rate": 0.0001, "loss": 1.7198, "step": 735 }, { "epoch": 0.08550682544292768, "grad_norm": 0.3275506794452667, "learning_rate": 0.0001, "loss": 1.4777, "step": 736 }, { "epoch": 0.08562300319488818, "grad_norm": 0.291427344083786, "learning_rate": 0.0001, "loss": 1.5407, "step": 737 }, { "epoch": 0.08573918094684868, "grad_norm": 0.3342317044734955, "learning_rate": 0.0001, "loss": 1.8063, "step": 738 }, { "epoch": 0.08585535869880918, "grad_norm": 0.3613170087337494, "learning_rate": 0.0001, "loss": 1.8471, "step": 739 }, { "epoch": 0.08597153645076967, "grad_norm": 0.35327574610710144, "learning_rate": 0.0001, "loss": 1.8022, "step": 740 }, { "epoch": 0.08608771420273018, "grad_norm": 0.32750821113586426, "learning_rate": 0.0001, "loss": 1.6562, "step": 741 }, { "epoch": 0.08620389195469068, "grad_norm": 0.37629932165145874, "learning_rate": 0.0001, "loss": 1.6965, "step": 742 }, { "epoch": 0.08632006970665118, "grad_norm": 0.3730964958667755, "learning_rate": 0.0001, "loss": 1.6513, "step": 743 }, { "epoch": 0.08643624745861167, "grad_norm": 0.36657699942588806, "learning_rate": 0.0001, "loss": 1.8363, "step": 744 }, { "epoch": 0.08655242521057217, "grad_norm": 0.3720366358757019, "learning_rate": 0.0001, "loss": 1.9311, "step": 745 }, { "epoch": 0.08666860296253268, "grad_norm": 0.34417784214019775, "learning_rate": 0.0001, "loss": 1.902, "step": 746 }, { "epoch": 0.08678478071449318, "grad_norm": 0.34215980768203735, "learning_rate": 0.0001, "loss": 1.6812, "step": 747 }, { "epoch": 0.08690095846645367, "grad_norm": 0.37166187167167664, "learning_rate": 0.0001, "loss": 1.7309, "step": 748 }, { "epoch": 0.08701713621841417, "grad_norm": 0.3440292179584503, "learning_rate": 0.0001, "loss": 1.6387, "step": 749 }, { "epoch": 0.08713331397037467, "grad_norm": 0.38115811347961426, "learning_rate": 0.0001, "loss": 1.8192, "step": 750 }, { "epoch": 0.08724949172233518, "grad_norm": 0.38101625442504883, "learning_rate": 0.0001, "loss": 1.8616, "step": 751 }, { "epoch": 0.08736566947429568, "grad_norm": 0.3463743031024933, "learning_rate": 0.0001, "loss": 1.7568, "step": 752 }, { "epoch": 0.08748184722625617, "grad_norm": 0.36165106296539307, "learning_rate": 0.0001, "loss": 1.8383, "step": 753 }, { "epoch": 0.08759802497821667, "grad_norm": 0.31763550639152527, "learning_rate": 0.0001, "loss": 1.5879, "step": 754 }, { "epoch": 0.08771420273017717, "grad_norm": 0.35172948241233826, "learning_rate": 0.0001, "loss": 1.8296, "step": 755 }, { "epoch": 0.08783038048213768, "grad_norm": 0.3546104431152344, "learning_rate": 0.0001, "loss": 1.615, "step": 756 }, { "epoch": 0.08794655823409817, "grad_norm": 0.3244597911834717, "learning_rate": 0.0001, "loss": 1.6416, "step": 757 }, { "epoch": 0.08806273598605867, "grad_norm": 0.33493107557296753, "learning_rate": 0.0001, "loss": 1.6849, "step": 758 }, { "epoch": 0.08817891373801917, "grad_norm": 0.3546777665615082, "learning_rate": 0.0001, "loss": 1.7593, "step": 759 }, { "epoch": 0.08829509148997967, "grad_norm": 0.3528088629245758, "learning_rate": 0.0001, "loss": 1.8915, "step": 760 }, { "epoch": 0.08841126924194016, "grad_norm": 0.32234612107276917, "learning_rate": 0.0001, "loss": 1.6456, "step": 761 }, { "epoch": 0.08852744699390067, "grad_norm": 0.35947954654693604, "learning_rate": 0.0001, "loss": 1.6958, "step": 762 }, { "epoch": 0.08864362474586117, "grad_norm": 0.34787270426750183, "learning_rate": 0.0001, "loss": 1.7837, "step": 763 }, { "epoch": 0.08875980249782167, "grad_norm": 0.3539670407772064, "learning_rate": 0.0001, "loss": 1.8225, "step": 764 }, { "epoch": 0.08887598024978216, "grad_norm": 0.3532291352748871, "learning_rate": 0.0001, "loss": 1.7474, "step": 765 }, { "epoch": 0.08899215800174266, "grad_norm": 0.35020220279693604, "learning_rate": 0.0001, "loss": 1.8047, "step": 766 }, { "epoch": 0.08910833575370317, "grad_norm": 0.3528810441493988, "learning_rate": 0.0001, "loss": 1.5495, "step": 767 }, { "epoch": 0.08922451350566367, "grad_norm": 0.38562941551208496, "learning_rate": 0.0001, "loss": 1.7691, "step": 768 }, { "epoch": 0.08934069125762417, "grad_norm": 0.3530423641204834, "learning_rate": 0.0001, "loss": 1.8129, "step": 769 }, { "epoch": 0.08945686900958466, "grad_norm": 0.35964086651802063, "learning_rate": 0.0001, "loss": 1.8651, "step": 770 }, { "epoch": 0.08957304676154516, "grad_norm": 0.3369607627391815, "learning_rate": 0.0001, "loss": 1.7238, "step": 771 }, { "epoch": 0.08968922451350567, "grad_norm": 0.3420158326625824, "learning_rate": 0.0001, "loss": 1.7426, "step": 772 }, { "epoch": 0.08980540226546617, "grad_norm": 0.3466860353946686, "learning_rate": 0.0001, "loss": 1.7715, "step": 773 }, { "epoch": 0.08992158001742666, "grad_norm": 0.33295726776123047, "learning_rate": 0.0001, "loss": 1.7334, "step": 774 }, { "epoch": 0.09003775776938716, "grad_norm": 0.2985909581184387, "learning_rate": 0.0001, "loss": 1.5965, "step": 775 }, { "epoch": 0.09015393552134766, "grad_norm": 0.334733247756958, "learning_rate": 0.0001, "loss": 1.8092, "step": 776 }, { "epoch": 0.09027011327330817, "grad_norm": 0.3320635259151459, "learning_rate": 0.0001, "loss": 1.6475, "step": 777 }, { "epoch": 0.09038629102526866, "grad_norm": 0.33000028133392334, "learning_rate": 0.0001, "loss": 1.6653, "step": 778 }, { "epoch": 0.09050246877722916, "grad_norm": 0.3425371050834656, "learning_rate": 0.0001, "loss": 1.8284, "step": 779 }, { "epoch": 0.09061864652918966, "grad_norm": 0.3471533954143524, "learning_rate": 0.0001, "loss": 1.824, "step": 780 }, { "epoch": 0.09073482428115016, "grad_norm": 0.34877267479896545, "learning_rate": 0.0001, "loss": 1.7793, "step": 781 }, { "epoch": 0.09085100203311065, "grad_norm": 0.33842357993125916, "learning_rate": 0.0001, "loss": 1.8107, "step": 782 }, { "epoch": 0.09096717978507116, "grad_norm": 0.3466620445251465, "learning_rate": 0.0001, "loss": 1.7385, "step": 783 }, { "epoch": 0.09108335753703166, "grad_norm": 0.3401154577732086, "learning_rate": 0.0001, "loss": 1.8291, "step": 784 }, { "epoch": 0.09119953528899216, "grad_norm": 0.3380959630012512, "learning_rate": 0.0001, "loss": 1.7823, "step": 785 }, { "epoch": 0.09131571304095265, "grad_norm": 0.3219566345214844, "learning_rate": 0.0001, "loss": 1.6534, "step": 786 }, { "epoch": 0.09143189079291315, "grad_norm": 0.36787810921669006, "learning_rate": 0.0001, "loss": 1.8798, "step": 787 }, { "epoch": 0.09154806854487366, "grad_norm": 0.3317922055721283, "learning_rate": 0.0001, "loss": 1.6822, "step": 788 }, { "epoch": 0.09166424629683416, "grad_norm": 0.32760554552078247, "learning_rate": 0.0001, "loss": 1.6927, "step": 789 }, { "epoch": 0.09178042404879466, "grad_norm": 0.3528217375278473, "learning_rate": 0.0001, "loss": 1.8356, "step": 790 }, { "epoch": 0.09189660180075515, "grad_norm": 0.3439396321773529, "learning_rate": 0.0001, "loss": 1.8907, "step": 791 }, { "epoch": 0.09201277955271565, "grad_norm": 0.32898107171058655, "learning_rate": 0.0001, "loss": 1.7463, "step": 792 }, { "epoch": 0.09212895730467616, "grad_norm": 0.35222840309143066, "learning_rate": 0.0001, "loss": 1.6535, "step": 793 }, { "epoch": 0.09224513505663666, "grad_norm": 0.35788872838020325, "learning_rate": 0.0001, "loss": 1.7799, "step": 794 }, { "epoch": 0.09236131280859715, "grad_norm": 0.34906426072120667, "learning_rate": 0.0001, "loss": 1.7877, "step": 795 }, { "epoch": 0.09247749056055765, "grad_norm": 0.3351164162158966, "learning_rate": 0.0001, "loss": 1.7754, "step": 796 }, { "epoch": 0.09259366831251815, "grad_norm": 0.34004396200180054, "learning_rate": 0.0001, "loss": 1.7906, "step": 797 }, { "epoch": 0.09270984606447866, "grad_norm": 0.3525846302509308, "learning_rate": 0.0001, "loss": 1.852, "step": 798 }, { "epoch": 0.09282602381643915, "grad_norm": 0.3507898449897766, "learning_rate": 0.0001, "loss": 1.6618, "step": 799 }, { "epoch": 0.09294220156839965, "grad_norm": 0.33934998512268066, "learning_rate": 0.0001, "loss": 1.8192, "step": 800 }, { "epoch": 0.09305837932036015, "grad_norm": 0.3526393473148346, "learning_rate": 0.0001, "loss": 1.7328, "step": 801 }, { "epoch": 0.09317455707232065, "grad_norm": 0.3338293135166168, "learning_rate": 0.0001, "loss": 1.8076, "step": 802 }, { "epoch": 0.09329073482428114, "grad_norm": 0.3807794451713562, "learning_rate": 0.0001, "loss": 1.9431, "step": 803 }, { "epoch": 0.09340691257624165, "grad_norm": 0.33969688415527344, "learning_rate": 0.0001, "loss": 1.8415, "step": 804 }, { "epoch": 0.09352309032820215, "grad_norm": 0.3313851058483124, "learning_rate": 0.0001, "loss": 1.7324, "step": 805 }, { "epoch": 0.09363926808016265, "grad_norm": 0.3267270624637604, "learning_rate": 0.0001, "loss": 1.7788, "step": 806 }, { "epoch": 0.09375544583212315, "grad_norm": 0.3477146327495575, "learning_rate": 0.0001, "loss": 1.6614, "step": 807 }, { "epoch": 0.09387162358408364, "grad_norm": 0.36601418256759644, "learning_rate": 0.0001, "loss": 1.8103, "step": 808 }, { "epoch": 0.09398780133604415, "grad_norm": 0.33894267678260803, "learning_rate": 0.0001, "loss": 1.7657, "step": 809 }, { "epoch": 0.09410397908800465, "grad_norm": 0.35825762152671814, "learning_rate": 0.0001, "loss": 1.7546, "step": 810 }, { "epoch": 0.09422015683996515, "grad_norm": 0.3400390148162842, "learning_rate": 0.0001, "loss": 1.5386, "step": 811 }, { "epoch": 0.09433633459192564, "grad_norm": 0.32821226119995117, "learning_rate": 0.0001, "loss": 1.6345, "step": 812 }, { "epoch": 0.09445251234388614, "grad_norm": 0.3435446619987488, "learning_rate": 0.0001, "loss": 1.7616, "step": 813 }, { "epoch": 0.09456869009584665, "grad_norm": 0.37115979194641113, "learning_rate": 0.0001, "loss": 1.7555, "step": 814 }, { "epoch": 0.09468486784780715, "grad_norm": 0.3451048731803894, "learning_rate": 0.0001, "loss": 1.6982, "step": 815 }, { "epoch": 0.09480104559976764, "grad_norm": 0.3507342040538788, "learning_rate": 0.0001, "loss": 1.8472, "step": 816 }, { "epoch": 0.09491722335172814, "grad_norm": 0.32728418707847595, "learning_rate": 0.0001, "loss": 1.6649, "step": 817 }, { "epoch": 0.09503340110368864, "grad_norm": 0.3312884271144867, "learning_rate": 0.0001, "loss": 1.6482, "step": 818 }, { "epoch": 0.09514957885564915, "grad_norm": 0.3303951025009155, "learning_rate": 0.0001, "loss": 1.6003, "step": 819 }, { "epoch": 0.09526575660760964, "grad_norm": 0.3290101885795593, "learning_rate": 0.0001, "loss": 1.6972, "step": 820 }, { "epoch": 0.09538193435957014, "grad_norm": 0.36628472805023193, "learning_rate": 0.0001, "loss": 1.7772, "step": 821 }, { "epoch": 0.09549811211153064, "grad_norm": 0.3897988796234131, "learning_rate": 0.0001, "loss": 1.808, "step": 822 }, { "epoch": 0.09561428986349114, "grad_norm": 0.3413255512714386, "learning_rate": 0.0001, "loss": 1.7803, "step": 823 }, { "epoch": 0.09573046761545165, "grad_norm": 0.4127778112888336, "learning_rate": 0.0001, "loss": 1.8115, "step": 824 }, { "epoch": 0.09584664536741214, "grad_norm": 0.3477415442466736, "learning_rate": 0.0001, "loss": 1.6128, "step": 825 }, { "epoch": 0.09596282311937264, "grad_norm": 0.32524755597114563, "learning_rate": 0.0001, "loss": 1.6613, "step": 826 }, { "epoch": 0.09607900087133314, "grad_norm": 0.36483168601989746, "learning_rate": 0.0001, "loss": 1.8151, "step": 827 }, { "epoch": 0.09619517862329365, "grad_norm": 0.34030166268348694, "learning_rate": 0.0001, "loss": 1.6926, "step": 828 }, { "epoch": 0.09631135637525413, "grad_norm": 0.3241477906703949, "learning_rate": 0.0001, "loss": 1.6646, "step": 829 }, { "epoch": 0.09642753412721464, "grad_norm": 0.3371472954750061, "learning_rate": 0.0001, "loss": 1.7769, "step": 830 }, { "epoch": 0.09654371187917514, "grad_norm": 0.3401608169078827, "learning_rate": 0.0001, "loss": 1.7182, "step": 831 }, { "epoch": 0.09665988963113564, "grad_norm": 0.32106491923332214, "learning_rate": 0.0001, "loss": 1.5612, "step": 832 }, { "epoch": 0.09677606738309613, "grad_norm": 0.34740883111953735, "learning_rate": 0.0001, "loss": 1.8344, "step": 833 }, { "epoch": 0.09689224513505663, "grad_norm": 0.3772992789745331, "learning_rate": 0.0001, "loss": 1.7999, "step": 834 }, { "epoch": 0.09700842288701714, "grad_norm": 0.3412041664123535, "learning_rate": 0.0001, "loss": 1.748, "step": 835 }, { "epoch": 0.09712460063897764, "grad_norm": 0.34832799434661865, "learning_rate": 0.0001, "loss": 1.6703, "step": 836 }, { "epoch": 0.09724077839093813, "grad_norm": 0.35847747325897217, "learning_rate": 0.0001, "loss": 1.8176, "step": 837 }, { "epoch": 0.09735695614289863, "grad_norm": 0.36482107639312744, "learning_rate": 0.0001, "loss": 1.8864, "step": 838 }, { "epoch": 0.09747313389485913, "grad_norm": 0.40612149238586426, "learning_rate": 0.0001, "loss": 1.8004, "step": 839 }, { "epoch": 0.09758931164681964, "grad_norm": 0.3522476255893707, "learning_rate": 0.0001, "loss": 1.8136, "step": 840 }, { "epoch": 0.09770548939878014, "grad_norm": 0.37533196806907654, "learning_rate": 0.0001, "loss": 1.8091, "step": 841 }, { "epoch": 0.09782166715074063, "grad_norm": 0.3364860415458679, "learning_rate": 0.0001, "loss": 1.7373, "step": 842 }, { "epoch": 0.09793784490270113, "grad_norm": 0.34495505690574646, "learning_rate": 0.0001, "loss": 1.8331, "step": 843 }, { "epoch": 0.09805402265466164, "grad_norm": 0.3652923107147217, "learning_rate": 0.0001, "loss": 1.8655, "step": 844 }, { "epoch": 0.09817020040662214, "grad_norm": 0.35473141074180603, "learning_rate": 0.0001, "loss": 1.907, "step": 845 }, { "epoch": 0.09828637815858263, "grad_norm": 0.36820659041404724, "learning_rate": 0.0001, "loss": 1.624, "step": 846 }, { "epoch": 0.09840255591054313, "grad_norm": 0.34225067496299744, "learning_rate": 0.0001, "loss": 1.6383, "step": 847 }, { "epoch": 0.09851873366250363, "grad_norm": 0.3609171509742737, "learning_rate": 0.0001, "loss": 1.7822, "step": 848 }, { "epoch": 0.09863491141446414, "grad_norm": 0.3501855731010437, "learning_rate": 0.0001, "loss": 1.6538, "step": 849 }, { "epoch": 0.09875108916642462, "grad_norm": 0.32687613368034363, "learning_rate": 0.0001, "loss": 1.7718, "step": 850 }, { "epoch": 0.09886726691838513, "grad_norm": 0.34075310826301575, "learning_rate": 0.0001, "loss": 1.6748, "step": 851 }, { "epoch": 0.09898344467034563, "grad_norm": 0.3613976836204529, "learning_rate": 0.0001, "loss": 1.9505, "step": 852 }, { "epoch": 0.09909962242230613, "grad_norm": 0.33815905451774597, "learning_rate": 0.0001, "loss": 1.7972, "step": 853 }, { "epoch": 0.09921580017426662, "grad_norm": 0.35017895698547363, "learning_rate": 0.0001, "loss": 1.6505, "step": 854 }, { "epoch": 0.09933197792622712, "grad_norm": 0.3758436441421509, "learning_rate": 0.0001, "loss": 1.6596, "step": 855 }, { "epoch": 0.09944815567818763, "grad_norm": 0.32959380745887756, "learning_rate": 0.0001, "loss": 1.6437, "step": 856 }, { "epoch": 0.09956433343014813, "grad_norm": 0.3575064539909363, "learning_rate": 0.0001, "loss": 1.8109, "step": 857 }, { "epoch": 0.09968051118210862, "grad_norm": 0.3391128182411194, "learning_rate": 0.0001, "loss": 1.7841, "step": 858 }, { "epoch": 0.09979668893406912, "grad_norm": 0.33528846502304077, "learning_rate": 0.0001, "loss": 1.7011, "step": 859 }, { "epoch": 0.09991286668602962, "grad_norm": 0.32930776476860046, "learning_rate": 0.0001, "loss": 1.5906, "step": 860 }, { "epoch": 0.10002904443799013, "grad_norm": 0.34488263726234436, "learning_rate": 0.0001, "loss": 1.7357, "step": 861 }, { "epoch": 0.10014522218995063, "grad_norm": 0.36875104904174805, "learning_rate": 0.0001, "loss": 1.8439, "step": 862 }, { "epoch": 0.10026139994191112, "grad_norm": 0.3462197184562683, "learning_rate": 0.0001, "loss": 1.7203, "step": 863 }, { "epoch": 0.10037757769387162, "grad_norm": 0.38343024253845215, "learning_rate": 0.0001, "loss": 2.0117, "step": 864 }, { "epoch": 0.10049375544583213, "grad_norm": 0.3336431682109833, "learning_rate": 0.0001, "loss": 1.7904, "step": 865 }, { "epoch": 0.10060993319779263, "grad_norm": 0.3539637327194214, "learning_rate": 0.0001, "loss": 1.7501, "step": 866 }, { "epoch": 0.10072611094975312, "grad_norm": 0.38433191180229187, "learning_rate": 0.0001, "loss": 1.8894, "step": 867 }, { "epoch": 0.10084228870171362, "grad_norm": 0.34169065952301025, "learning_rate": 0.0001, "loss": 1.6979, "step": 868 }, { "epoch": 0.10095846645367412, "grad_norm": 0.3778311610221863, "learning_rate": 0.0001, "loss": 1.735, "step": 869 }, { "epoch": 0.10107464420563463, "grad_norm": 0.3747129440307617, "learning_rate": 0.0001, "loss": 1.7026, "step": 870 }, { "epoch": 0.10119082195759511, "grad_norm": 0.34259381890296936, "learning_rate": 0.0001, "loss": 1.7926, "step": 871 }, { "epoch": 0.10130699970955562, "grad_norm": 0.35702332854270935, "learning_rate": 0.0001, "loss": 1.7456, "step": 872 }, { "epoch": 0.10142317746151612, "grad_norm": 0.35375505685806274, "learning_rate": 0.0001, "loss": 1.6864, "step": 873 }, { "epoch": 0.10153935521347662, "grad_norm": 0.35189199447631836, "learning_rate": 0.0001, "loss": 1.6413, "step": 874 }, { "epoch": 0.10165553296543711, "grad_norm": 0.3281596601009369, "learning_rate": 0.0001, "loss": 1.4696, "step": 875 }, { "epoch": 0.10177171071739761, "grad_norm": 0.34264156222343445, "learning_rate": 0.0001, "loss": 1.8293, "step": 876 }, { "epoch": 0.10188788846935812, "grad_norm": 0.3711477518081665, "learning_rate": 0.0001, "loss": 1.7117, "step": 877 }, { "epoch": 0.10200406622131862, "grad_norm": 0.31812041997909546, "learning_rate": 0.0001, "loss": 1.5149, "step": 878 }, { "epoch": 0.10212024397327912, "grad_norm": 0.3646787405014038, "learning_rate": 0.0001, "loss": 1.6864, "step": 879 }, { "epoch": 0.10223642172523961, "grad_norm": 0.3644947111606598, "learning_rate": 0.0001, "loss": 1.8658, "step": 880 }, { "epoch": 0.10235259947720012, "grad_norm": 0.345739483833313, "learning_rate": 0.0001, "loss": 1.7315, "step": 881 }, { "epoch": 0.10246877722916062, "grad_norm": 0.3633933663368225, "learning_rate": 0.0001, "loss": 1.6847, "step": 882 }, { "epoch": 0.10258495498112112, "grad_norm": 0.35784977674484253, "learning_rate": 0.0001, "loss": 1.7953, "step": 883 }, { "epoch": 0.10270113273308161, "grad_norm": 0.34802740812301636, "learning_rate": 0.0001, "loss": 1.7537, "step": 884 }, { "epoch": 0.10281731048504211, "grad_norm": 0.334532231092453, "learning_rate": 0.0001, "loss": 1.6814, "step": 885 }, { "epoch": 0.10293348823700262, "grad_norm": 0.37249666452407837, "learning_rate": 0.0001, "loss": 1.838, "step": 886 }, { "epoch": 0.10304966598896312, "grad_norm": 0.37367671728134155, "learning_rate": 0.0001, "loss": 1.8214, "step": 887 }, { "epoch": 0.10316584374092361, "grad_norm": 0.32655900716781616, "learning_rate": 0.0001, "loss": 1.7381, "step": 888 }, { "epoch": 0.10328202149288411, "grad_norm": 0.33447375893592834, "learning_rate": 0.0001, "loss": 1.6977, "step": 889 }, { "epoch": 0.10339819924484461, "grad_norm": 0.3553028404712677, "learning_rate": 0.0001, "loss": 1.9126, "step": 890 }, { "epoch": 0.10351437699680512, "grad_norm": 0.3429213762283325, "learning_rate": 0.0001, "loss": 1.7968, "step": 891 }, { "epoch": 0.1036305547487656, "grad_norm": 0.3589216470718384, "learning_rate": 0.0001, "loss": 1.87, "step": 892 }, { "epoch": 0.10374673250072611, "grad_norm": 0.35310253500938416, "learning_rate": 0.0001, "loss": 1.7481, "step": 893 }, { "epoch": 0.10386291025268661, "grad_norm": 0.36510980129241943, "learning_rate": 0.0001, "loss": 1.8996, "step": 894 }, { "epoch": 0.10397908800464711, "grad_norm": 0.3604600429534912, "learning_rate": 0.0001, "loss": 1.8, "step": 895 }, { "epoch": 0.10409526575660762, "grad_norm": 0.3504015803337097, "learning_rate": 0.0001, "loss": 1.706, "step": 896 }, { "epoch": 0.1042114435085681, "grad_norm": 0.3666890561580658, "learning_rate": 0.0001, "loss": 1.9185, "step": 897 }, { "epoch": 0.10432762126052861, "grad_norm": 0.32218024134635925, "learning_rate": 0.0001, "loss": 1.5574, "step": 898 }, { "epoch": 0.10444379901248911, "grad_norm": 0.32854628562927246, "learning_rate": 0.0001, "loss": 1.537, "step": 899 }, { "epoch": 0.10455997676444961, "grad_norm": 0.34168779850006104, "learning_rate": 0.0001, "loss": 1.7293, "step": 900 }, { "epoch": 0.1046761545164101, "grad_norm": 0.3389854431152344, "learning_rate": 0.0001, "loss": 1.7061, "step": 901 }, { "epoch": 0.1047923322683706, "grad_norm": 0.3675687611103058, "learning_rate": 0.0001, "loss": 1.9126, "step": 902 }, { "epoch": 0.10490851002033111, "grad_norm": 0.33231931924819946, "learning_rate": 0.0001, "loss": 1.6451, "step": 903 }, { "epoch": 0.10502468777229161, "grad_norm": 0.3507463037967682, "learning_rate": 0.0001, "loss": 1.7924, "step": 904 }, { "epoch": 0.1051408655242521, "grad_norm": 0.35986390709877014, "learning_rate": 0.0001, "loss": 1.7427, "step": 905 }, { "epoch": 0.1052570432762126, "grad_norm": 0.36210981011390686, "learning_rate": 0.0001, "loss": 1.8232, "step": 906 }, { "epoch": 0.1053732210281731, "grad_norm": 0.3577278256416321, "learning_rate": 0.0001, "loss": 1.7484, "step": 907 }, { "epoch": 0.10548939878013361, "grad_norm": 0.3454272449016571, "learning_rate": 0.0001, "loss": 1.6401, "step": 908 }, { "epoch": 0.1056055765320941, "grad_norm": 0.34774741530418396, "learning_rate": 0.0001, "loss": 1.6245, "step": 909 }, { "epoch": 0.1057217542840546, "grad_norm": 0.3502449095249176, "learning_rate": 0.0001, "loss": 1.6245, "step": 910 }, { "epoch": 0.1058379320360151, "grad_norm": 0.36542028188705444, "learning_rate": 0.0001, "loss": 1.8321, "step": 911 }, { "epoch": 0.1059541097879756, "grad_norm": 0.35387906432151794, "learning_rate": 0.0001, "loss": 1.7257, "step": 912 }, { "epoch": 0.10607028753993611, "grad_norm": 0.3482963442802429, "learning_rate": 0.0001, "loss": 1.7885, "step": 913 }, { "epoch": 0.1061864652918966, "grad_norm": 0.3488115668296814, "learning_rate": 0.0001, "loss": 1.6663, "step": 914 }, { "epoch": 0.1063026430438571, "grad_norm": 0.3296864926815033, "learning_rate": 0.0001, "loss": 1.5167, "step": 915 }, { "epoch": 0.1064188207958176, "grad_norm": 0.34132471680641174, "learning_rate": 0.0001, "loss": 1.6633, "step": 916 }, { "epoch": 0.1065349985477781, "grad_norm": 0.3466493487358093, "learning_rate": 0.0001, "loss": 1.6935, "step": 917 }, { "epoch": 0.1066511762997386, "grad_norm": 0.3528394401073456, "learning_rate": 0.0001, "loss": 1.847, "step": 918 }, { "epoch": 0.1067673540516991, "grad_norm": 0.32509297132492065, "learning_rate": 0.0001, "loss": 1.681, "step": 919 }, { "epoch": 0.1068835318036596, "grad_norm": 0.3523486852645874, "learning_rate": 0.0001, "loss": 1.7146, "step": 920 }, { "epoch": 0.1069997095556201, "grad_norm": 0.3355209529399872, "learning_rate": 0.0001, "loss": 1.6507, "step": 921 }, { "epoch": 0.10711588730758059, "grad_norm": 0.3474280834197998, "learning_rate": 0.0001, "loss": 1.6249, "step": 922 }, { "epoch": 0.1072320650595411, "grad_norm": 0.359402596950531, "learning_rate": 0.0001, "loss": 1.8466, "step": 923 }, { "epoch": 0.1073482428115016, "grad_norm": 0.3309195339679718, "learning_rate": 0.0001, "loss": 1.6251, "step": 924 }, { "epoch": 0.1074644205634621, "grad_norm": 0.3497809171676636, "learning_rate": 0.0001, "loss": 1.7625, "step": 925 }, { "epoch": 0.10758059831542259, "grad_norm": 0.34211966395378113, "learning_rate": 0.0001, "loss": 1.7996, "step": 926 }, { "epoch": 0.1076967760673831, "grad_norm": 0.3558671772480011, "learning_rate": 0.0001, "loss": 1.8548, "step": 927 }, { "epoch": 0.1078129538193436, "grad_norm": 0.3554135859012604, "learning_rate": 0.0001, "loss": 1.873, "step": 928 }, { "epoch": 0.1079291315713041, "grad_norm": 0.35786765813827515, "learning_rate": 0.0001, "loss": 1.7973, "step": 929 }, { "epoch": 0.1080453093232646, "grad_norm": 0.3519211709499359, "learning_rate": 0.0001, "loss": 1.7794, "step": 930 }, { "epoch": 0.10816148707522509, "grad_norm": 0.37536898255348206, "learning_rate": 0.0001, "loss": 1.8926, "step": 931 }, { "epoch": 0.1082776648271856, "grad_norm": 0.3422173857688904, "learning_rate": 0.0001, "loss": 1.8172, "step": 932 }, { "epoch": 0.1083938425791461, "grad_norm": 0.35913097858428955, "learning_rate": 0.0001, "loss": 1.8137, "step": 933 }, { "epoch": 0.1085100203311066, "grad_norm": 0.3589080274105072, "learning_rate": 0.0001, "loss": 1.7949, "step": 934 }, { "epoch": 0.10862619808306709, "grad_norm": 0.37230291962623596, "learning_rate": 0.0001, "loss": 1.8148, "step": 935 }, { "epoch": 0.10874237583502759, "grad_norm": 0.3313770294189453, "learning_rate": 0.0001, "loss": 1.6861, "step": 936 }, { "epoch": 0.1088585535869881, "grad_norm": 0.344086229801178, "learning_rate": 0.0001, "loss": 1.7272, "step": 937 }, { "epoch": 0.1089747313389486, "grad_norm": 0.3584043085575104, "learning_rate": 0.0001, "loss": 1.7395, "step": 938 }, { "epoch": 0.10909090909090909, "grad_norm": 0.34426915645599365, "learning_rate": 0.0001, "loss": 1.6909, "step": 939 }, { "epoch": 0.10920708684286959, "grad_norm": 0.340386301279068, "learning_rate": 0.0001, "loss": 1.7741, "step": 940 }, { "epoch": 0.10932326459483009, "grad_norm": 0.3348495662212372, "learning_rate": 0.0001, "loss": 1.7335, "step": 941 }, { "epoch": 0.1094394423467906, "grad_norm": 0.35593461990356445, "learning_rate": 0.0001, "loss": 1.7451, "step": 942 }, { "epoch": 0.10955562009875108, "grad_norm": 0.3561510145664215, "learning_rate": 0.0001, "loss": 1.8853, "step": 943 }, { "epoch": 0.10967179785071159, "grad_norm": 0.36038845777511597, "learning_rate": 0.0001, "loss": 1.6154, "step": 944 }, { "epoch": 0.10978797560267209, "grad_norm": 0.3256826400756836, "learning_rate": 0.0001, "loss": 1.5143, "step": 945 }, { "epoch": 0.10990415335463259, "grad_norm": 0.3304516673088074, "learning_rate": 0.0001, "loss": 1.6441, "step": 946 }, { "epoch": 0.11002033110659308, "grad_norm": 0.34142622351646423, "learning_rate": 0.0001, "loss": 1.7302, "step": 947 }, { "epoch": 0.11013650885855358, "grad_norm": 0.3722458481788635, "learning_rate": 0.0001, "loss": 1.6348, "step": 948 }, { "epoch": 0.11025268661051409, "grad_norm": 0.35679808259010315, "learning_rate": 0.0001, "loss": 1.7921, "step": 949 }, { "epoch": 0.11036886436247459, "grad_norm": 0.3507538139820099, "learning_rate": 0.0001, "loss": 1.7306, "step": 950 }, { "epoch": 0.11048504211443509, "grad_norm": 0.34563156962394714, "learning_rate": 0.0001, "loss": 1.7972, "step": 951 }, { "epoch": 0.11060121986639558, "grad_norm": 0.35172975063323975, "learning_rate": 0.0001, "loss": 1.8513, "step": 952 }, { "epoch": 0.11071739761835608, "grad_norm": 0.3704832196235657, "learning_rate": 0.0001, "loss": 1.7626, "step": 953 }, { "epoch": 0.11083357537031659, "grad_norm": 0.3332252502441406, "learning_rate": 0.0001, "loss": 1.7258, "step": 954 }, { "epoch": 0.11094975312227709, "grad_norm": 0.33239227533340454, "learning_rate": 0.0001, "loss": 1.5098, "step": 955 }, { "epoch": 0.11106593087423758, "grad_norm": 0.35018518567085266, "learning_rate": 0.0001, "loss": 1.8144, "step": 956 }, { "epoch": 0.11118210862619808, "grad_norm": 0.34933680295944214, "learning_rate": 0.0001, "loss": 1.68, "step": 957 }, { "epoch": 0.11129828637815858, "grad_norm": 0.38236263394355774, "learning_rate": 0.0001, "loss": 1.8272, "step": 958 }, { "epoch": 0.11141446413011909, "grad_norm": 0.3630216717720032, "learning_rate": 0.0001, "loss": 1.7154, "step": 959 }, { "epoch": 0.11153064188207958, "grad_norm": 0.3446943461894989, "learning_rate": 0.0001, "loss": 1.8034, "step": 960 }, { "epoch": 0.11164681963404008, "grad_norm": 0.40010347962379456, "learning_rate": 0.0001, "loss": 1.7285, "step": 961 }, { "epoch": 0.11176299738600058, "grad_norm": 0.35464581847190857, "learning_rate": 0.0001, "loss": 1.7943, "step": 962 }, { "epoch": 0.11187917513796108, "grad_norm": 0.38472074270248413, "learning_rate": 0.0001, "loss": 1.8237, "step": 963 }, { "epoch": 0.11199535288992157, "grad_norm": 0.3539893329143524, "learning_rate": 0.0001, "loss": 1.7777, "step": 964 }, { "epoch": 0.11211153064188208, "grad_norm": 0.3627575933933258, "learning_rate": 0.0001, "loss": 1.7519, "step": 965 }, { "epoch": 0.11222770839384258, "grad_norm": 0.3388507664203644, "learning_rate": 0.0001, "loss": 1.7009, "step": 966 }, { "epoch": 0.11234388614580308, "grad_norm": 0.3800560534000397, "learning_rate": 0.0001, "loss": 1.7116, "step": 967 }, { "epoch": 0.11246006389776358, "grad_norm": 0.3563110828399658, "learning_rate": 0.0001, "loss": 1.6944, "step": 968 }, { "epoch": 0.11257624164972407, "grad_norm": 0.34514880180358887, "learning_rate": 0.0001, "loss": 1.7658, "step": 969 }, { "epoch": 0.11269241940168458, "grad_norm": 0.35564523935317993, "learning_rate": 0.0001, "loss": 1.6212, "step": 970 }, { "epoch": 0.11280859715364508, "grad_norm": 0.35224324464797974, "learning_rate": 0.0001, "loss": 1.8569, "step": 971 }, { "epoch": 0.11292477490560558, "grad_norm": 0.344664990901947, "learning_rate": 0.0001, "loss": 1.6649, "step": 972 }, { "epoch": 0.11304095265756607, "grad_norm": 0.3399982452392578, "learning_rate": 0.0001, "loss": 1.6618, "step": 973 }, { "epoch": 0.11315713040952657, "grad_norm": 0.3541862368583679, "learning_rate": 0.0001, "loss": 1.5522, "step": 974 }, { "epoch": 0.11327330816148708, "grad_norm": 0.35132747888565063, "learning_rate": 0.0001, "loss": 1.7683, "step": 975 }, { "epoch": 0.11338948591344758, "grad_norm": 0.3748994469642639, "learning_rate": 0.0001, "loss": 1.6813, "step": 976 }, { "epoch": 0.11350566366540807, "grad_norm": 0.3247508406639099, "learning_rate": 0.0001, "loss": 1.6023, "step": 977 }, { "epoch": 0.11362184141736857, "grad_norm": 0.33404844999313354, "learning_rate": 0.0001, "loss": 1.6346, "step": 978 }, { "epoch": 0.11373801916932907, "grad_norm": 0.36675605177879333, "learning_rate": 0.0001, "loss": 1.7969, "step": 979 }, { "epoch": 0.11385419692128958, "grad_norm": 0.37571048736572266, "learning_rate": 0.0001, "loss": 1.7681, "step": 980 }, { "epoch": 0.11397037467325007, "grad_norm": 0.3701353967189789, "learning_rate": 0.0001, "loss": 1.8767, "step": 981 }, { "epoch": 0.11408655242521057, "grad_norm": 0.34536612033843994, "learning_rate": 0.0001, "loss": 1.616, "step": 982 }, { "epoch": 0.11420273017717107, "grad_norm": 0.43408679962158203, "learning_rate": 0.0001, "loss": 1.9732, "step": 983 }, { "epoch": 0.11431890792913157, "grad_norm": 0.34038683772087097, "learning_rate": 0.0001, "loss": 1.684, "step": 984 }, { "epoch": 0.11443508568109208, "grad_norm": 0.3477528691291809, "learning_rate": 0.0001, "loss": 1.5602, "step": 985 }, { "epoch": 0.11455126343305257, "grad_norm": 0.3574272096157074, "learning_rate": 0.0001, "loss": 1.6587, "step": 986 }, { "epoch": 0.11466744118501307, "grad_norm": 0.3423398733139038, "learning_rate": 0.0001, "loss": 1.6347, "step": 987 }, { "epoch": 0.11478361893697357, "grad_norm": 0.35041436553001404, "learning_rate": 0.0001, "loss": 1.6837, "step": 988 }, { "epoch": 0.11489979668893407, "grad_norm": 0.3532884418964386, "learning_rate": 0.0001, "loss": 1.8723, "step": 989 }, { "epoch": 0.11501597444089456, "grad_norm": 0.3406219482421875, "learning_rate": 0.0001, "loss": 1.6606, "step": 990 }, { "epoch": 0.11513215219285507, "grad_norm": 0.33296358585357666, "learning_rate": 0.0001, "loss": 1.6413, "step": 991 }, { "epoch": 0.11524832994481557, "grad_norm": 0.35060951113700867, "learning_rate": 0.0001, "loss": 1.6228, "step": 992 }, { "epoch": 0.11536450769677607, "grad_norm": 0.369406521320343, "learning_rate": 0.0001, "loss": 1.7544, "step": 993 }, { "epoch": 0.11548068544873656, "grad_norm": 0.3404718041419983, "learning_rate": 0.0001, "loss": 1.735, "step": 994 }, { "epoch": 0.11559686320069706, "grad_norm": 0.3357236385345459, "learning_rate": 0.0001, "loss": 1.7058, "step": 995 }, { "epoch": 0.11571304095265757, "grad_norm": 0.3525453805923462, "learning_rate": 0.0001, "loss": 1.8039, "step": 996 }, { "epoch": 0.11582921870461807, "grad_norm": 0.35840287804603577, "learning_rate": 0.0001, "loss": 1.8253, "step": 997 }, { "epoch": 0.11594539645657856, "grad_norm": 0.36817336082458496, "learning_rate": 0.0001, "loss": 1.8989, "step": 998 }, { "epoch": 0.11606157420853906, "grad_norm": 0.349050372838974, "learning_rate": 0.0001, "loss": 1.7812, "step": 999 }, { "epoch": 0.11617775196049956, "grad_norm": 0.37279772758483887, "learning_rate": 0.0001, "loss": 1.8076, "step": 1000 }, { "epoch": 0.11629392971246007, "grad_norm": 0.36874422430992126, "learning_rate": 0.0001, "loss": 1.7866, "step": 1001 }, { "epoch": 0.11641010746442057, "grad_norm": 0.3826844394207001, "learning_rate": 0.0001, "loss": 1.7474, "step": 1002 }, { "epoch": 0.11652628521638106, "grad_norm": 0.34281986951828003, "learning_rate": 0.0001, "loss": 1.657, "step": 1003 }, { "epoch": 0.11664246296834156, "grad_norm": 0.3320811688899994, "learning_rate": 0.0001, "loss": 1.6287, "step": 1004 }, { "epoch": 0.11675864072030206, "grad_norm": 0.3438369631767273, "learning_rate": 0.0001, "loss": 1.6014, "step": 1005 }, { "epoch": 0.11687481847226257, "grad_norm": 0.35468512773513794, "learning_rate": 0.0001, "loss": 1.65, "step": 1006 }, { "epoch": 0.11699099622422306, "grad_norm": 0.3840721547603607, "learning_rate": 0.0001, "loss": 1.9277, "step": 1007 }, { "epoch": 0.11710717397618356, "grad_norm": 0.3772999346256256, "learning_rate": 0.0001, "loss": 1.8193, "step": 1008 }, { "epoch": 0.11722335172814406, "grad_norm": 0.32721465826034546, "learning_rate": 0.0001, "loss": 1.5581, "step": 1009 }, { "epoch": 0.11733952948010457, "grad_norm": 0.3579975664615631, "learning_rate": 0.0001, "loss": 1.8259, "step": 1010 }, { "epoch": 0.11745570723206505, "grad_norm": 0.35289841890335083, "learning_rate": 0.0001, "loss": 1.6661, "step": 1011 }, { "epoch": 0.11757188498402556, "grad_norm": 0.35894694924354553, "learning_rate": 0.0001, "loss": 1.7533, "step": 1012 }, { "epoch": 0.11768806273598606, "grad_norm": 0.35734298825263977, "learning_rate": 0.0001, "loss": 1.8021, "step": 1013 }, { "epoch": 0.11780424048794656, "grad_norm": 0.3388814330101013, "learning_rate": 0.0001, "loss": 1.7065, "step": 1014 }, { "epoch": 0.11792041823990705, "grad_norm": 0.3703312277793884, "learning_rate": 0.0001, "loss": 1.7811, "step": 1015 }, { "epoch": 0.11803659599186755, "grad_norm": 0.3490375280380249, "learning_rate": 0.0001, "loss": 1.6314, "step": 1016 }, { "epoch": 0.11815277374382806, "grad_norm": 0.36077988147735596, "learning_rate": 0.0001, "loss": 1.7685, "step": 1017 }, { "epoch": 0.11826895149578856, "grad_norm": 0.366590291261673, "learning_rate": 0.0001, "loss": 1.7439, "step": 1018 }, { "epoch": 0.11838512924774906, "grad_norm": 0.3408995568752289, "learning_rate": 0.0001, "loss": 1.6552, "step": 1019 }, { "epoch": 0.11850130699970955, "grad_norm": 0.3684650659561157, "learning_rate": 0.0001, "loss": 1.7644, "step": 1020 }, { "epoch": 0.11861748475167005, "grad_norm": 0.37491750717163086, "learning_rate": 0.0001, "loss": 1.7273, "step": 1021 }, { "epoch": 0.11873366250363056, "grad_norm": 0.34040531516075134, "learning_rate": 0.0001, "loss": 1.7995, "step": 1022 }, { "epoch": 0.11884984025559106, "grad_norm": 0.3536158800125122, "learning_rate": 0.0001, "loss": 1.811, "step": 1023 }, { "epoch": 0.11896601800755155, "grad_norm": 0.38111457228660583, "learning_rate": 0.0001, "loss": 1.8178, "step": 1024 }, { "epoch": 0.11908219575951205, "grad_norm": 0.39054590463638306, "learning_rate": 0.0001, "loss": 1.8101, "step": 1025 }, { "epoch": 0.11919837351147256, "grad_norm": 0.37817317247390747, "learning_rate": 0.0001, "loss": 1.8686, "step": 1026 }, { "epoch": 0.11931455126343306, "grad_norm": 0.38268011808395386, "learning_rate": 0.0001, "loss": 1.8499, "step": 1027 }, { "epoch": 0.11943072901539355, "grad_norm": 0.3847000002861023, "learning_rate": 0.0001, "loss": 1.699, "step": 1028 }, { "epoch": 0.11954690676735405, "grad_norm": 0.3735695481300354, "learning_rate": 0.0001, "loss": 1.7161, "step": 1029 }, { "epoch": 0.11966308451931455, "grad_norm": 0.3305729031562805, "learning_rate": 0.0001, "loss": 1.7738, "step": 1030 }, { "epoch": 0.11977926227127506, "grad_norm": 0.3482242822647095, "learning_rate": 0.0001, "loss": 1.8291, "step": 1031 }, { "epoch": 0.11989544002323554, "grad_norm": 0.359893798828125, "learning_rate": 0.0001, "loss": 1.7426, "step": 1032 }, { "epoch": 0.12001161777519605, "grad_norm": 0.36758822202682495, "learning_rate": 0.0001, "loss": 1.9225, "step": 1033 }, { "epoch": 0.12012779552715655, "grad_norm": 0.3210867643356323, "learning_rate": 0.0001, "loss": 1.5384, "step": 1034 }, { "epoch": 0.12024397327911705, "grad_norm": 0.3599686622619629, "learning_rate": 0.0001, "loss": 1.77, "step": 1035 }, { "epoch": 0.12036015103107754, "grad_norm": 0.3894933760166168, "learning_rate": 0.0001, "loss": 1.7807, "step": 1036 }, { "epoch": 0.12047632878303804, "grad_norm": 0.3600466847419739, "learning_rate": 0.0001, "loss": 1.7304, "step": 1037 }, { "epoch": 0.12059250653499855, "grad_norm": 0.3773282766342163, "learning_rate": 0.0001, "loss": 1.7148, "step": 1038 }, { "epoch": 0.12070868428695905, "grad_norm": 0.342816561460495, "learning_rate": 0.0001, "loss": 1.6856, "step": 1039 }, { "epoch": 0.12082486203891955, "grad_norm": 0.34455814957618713, "learning_rate": 0.0001, "loss": 1.8846, "step": 1040 }, { "epoch": 0.12094103979088004, "grad_norm": 0.3425041437149048, "learning_rate": 0.0001, "loss": 1.5655, "step": 1041 }, { "epoch": 0.12105721754284055, "grad_norm": 0.3462069034576416, "learning_rate": 0.0001, "loss": 1.7432, "step": 1042 }, { "epoch": 0.12117339529480105, "grad_norm": 0.3301829397678375, "learning_rate": 0.0001, "loss": 1.5452, "step": 1043 }, { "epoch": 0.12128957304676155, "grad_norm": 0.32111644744873047, "learning_rate": 0.0001, "loss": 1.724, "step": 1044 }, { "epoch": 0.12140575079872204, "grad_norm": 0.3677181005477905, "learning_rate": 0.0001, "loss": 1.7678, "step": 1045 }, { "epoch": 0.12152192855068254, "grad_norm": 0.3687067925930023, "learning_rate": 0.0001, "loss": 1.6888, "step": 1046 }, { "epoch": 0.12163810630264305, "grad_norm": 0.34119775891304016, "learning_rate": 0.0001, "loss": 1.7466, "step": 1047 }, { "epoch": 0.12175428405460355, "grad_norm": 0.3656751811504364, "learning_rate": 0.0001, "loss": 1.6437, "step": 1048 }, { "epoch": 0.12187046180656404, "grad_norm": 0.35760533809661865, "learning_rate": 0.0001, "loss": 1.8831, "step": 1049 }, { "epoch": 0.12198663955852454, "grad_norm": 0.3624860942363739, "learning_rate": 0.0001, "loss": 1.7621, "step": 1050 }, { "epoch": 0.12210281731048504, "grad_norm": 0.3888024091720581, "learning_rate": 0.0001, "loss": 1.6873, "step": 1051 }, { "epoch": 0.12221899506244555, "grad_norm": 0.39359578490257263, "learning_rate": 0.0001, "loss": 1.9219, "step": 1052 }, { "epoch": 0.12233517281440603, "grad_norm": 0.3813614249229431, "learning_rate": 0.0001, "loss": 1.7615, "step": 1053 }, { "epoch": 0.12245135056636654, "grad_norm": 0.3501231074333191, "learning_rate": 0.0001, "loss": 1.7342, "step": 1054 }, { "epoch": 0.12256752831832704, "grad_norm": 0.32617613673210144, "learning_rate": 0.0001, "loss": 1.6313, "step": 1055 }, { "epoch": 0.12268370607028754, "grad_norm": 0.30450668931007385, "learning_rate": 0.0001, "loss": 1.4726, "step": 1056 }, { "epoch": 0.12279988382224805, "grad_norm": 0.33408787846565247, "learning_rate": 0.0001, "loss": 1.4528, "step": 1057 }, { "epoch": 0.12291606157420853, "grad_norm": 0.3771173357963562, "learning_rate": 0.0001, "loss": 1.8417, "step": 1058 }, { "epoch": 0.12303223932616904, "grad_norm": 0.3441436290740967, "learning_rate": 0.0001, "loss": 1.6334, "step": 1059 }, { "epoch": 0.12314841707812954, "grad_norm": 0.3676146864891052, "learning_rate": 0.0001, "loss": 1.6966, "step": 1060 }, { "epoch": 0.12326459483009004, "grad_norm": 0.35352498292922974, "learning_rate": 0.0001, "loss": 1.5551, "step": 1061 }, { "epoch": 0.12338077258205053, "grad_norm": 0.43162113428115845, "learning_rate": 0.0001, "loss": 1.593, "step": 1062 }, { "epoch": 0.12349695033401104, "grad_norm": 0.370995432138443, "learning_rate": 0.0001, "loss": 1.7928, "step": 1063 }, { "epoch": 0.12361312808597154, "grad_norm": 0.35732775926589966, "learning_rate": 0.0001, "loss": 1.7332, "step": 1064 }, { "epoch": 0.12372930583793204, "grad_norm": 0.40261656045913696, "learning_rate": 0.0001, "loss": 2.0031, "step": 1065 }, { "epoch": 0.12384548358989253, "grad_norm": 0.34159186482429504, "learning_rate": 0.0001, "loss": 1.6979, "step": 1066 }, { "epoch": 0.12396166134185303, "grad_norm": 0.3457348942756653, "learning_rate": 0.0001, "loss": 1.6923, "step": 1067 }, { "epoch": 0.12407783909381354, "grad_norm": 0.36517080664634705, "learning_rate": 0.0001, "loss": 1.7111, "step": 1068 }, { "epoch": 0.12419401684577404, "grad_norm": 0.3570208251476288, "learning_rate": 0.0001, "loss": 1.5761, "step": 1069 }, { "epoch": 0.12431019459773453, "grad_norm": 0.33659952878952026, "learning_rate": 0.0001, "loss": 1.476, "step": 1070 }, { "epoch": 0.12442637234969503, "grad_norm": 0.3287290930747986, "learning_rate": 0.0001, "loss": 1.6783, "step": 1071 }, { "epoch": 0.12454255010165553, "grad_norm": 0.3403220772743225, "learning_rate": 0.0001, "loss": 1.8731, "step": 1072 }, { "epoch": 0.12465872785361604, "grad_norm": 0.38080114126205444, "learning_rate": 0.0001, "loss": 1.6735, "step": 1073 }, { "epoch": 0.12477490560557654, "grad_norm": 0.3732984662055969, "learning_rate": 0.0001, "loss": 1.7634, "step": 1074 }, { "epoch": 0.12489108335753703, "grad_norm": 0.3515911102294922, "learning_rate": 0.0001, "loss": 1.7473, "step": 1075 }, { "epoch": 0.12500726110949753, "grad_norm": 0.36280357837677, "learning_rate": 0.0001, "loss": 1.852, "step": 1076 }, { "epoch": 0.12512343886145802, "grad_norm": 0.4288184344768524, "learning_rate": 0.0001, "loss": 1.9649, "step": 1077 }, { "epoch": 0.12523961661341854, "grad_norm": 0.3732617199420929, "learning_rate": 0.0001, "loss": 1.9636, "step": 1078 }, { "epoch": 0.12535579436537903, "grad_norm": 0.3731965124607086, "learning_rate": 0.0001, "loss": 1.6412, "step": 1079 }, { "epoch": 0.12547197211733954, "grad_norm": 0.33766472339630127, "learning_rate": 0.0001, "loss": 1.6632, "step": 1080 }, { "epoch": 0.12558814986930003, "grad_norm": 0.41299498081207275, "learning_rate": 0.0001, "loss": 2.008, "step": 1081 }, { "epoch": 0.12570432762126052, "grad_norm": 0.332131952047348, "learning_rate": 0.0001, "loss": 1.6935, "step": 1082 }, { "epoch": 0.12582050537322104, "grad_norm": 0.3629733920097351, "learning_rate": 0.0001, "loss": 1.8256, "step": 1083 }, { "epoch": 0.12593668312518153, "grad_norm": 0.35666128993034363, "learning_rate": 0.0001, "loss": 1.6795, "step": 1084 }, { "epoch": 0.12605286087714201, "grad_norm": 0.3693120777606964, "learning_rate": 0.0001, "loss": 1.8276, "step": 1085 }, { "epoch": 0.12616903862910253, "grad_norm": 0.38100001215934753, "learning_rate": 0.0001, "loss": 1.6575, "step": 1086 }, { "epoch": 0.12628521638106302, "grad_norm": 0.35735470056533813, "learning_rate": 0.0001, "loss": 1.8506, "step": 1087 }, { "epoch": 0.12640139413302354, "grad_norm": 0.3401797413825989, "learning_rate": 0.0001, "loss": 1.7468, "step": 1088 }, { "epoch": 0.12651757188498403, "grad_norm": 0.34798330068588257, "learning_rate": 0.0001, "loss": 1.8221, "step": 1089 }, { "epoch": 0.12663374963694451, "grad_norm": 0.3499447703361511, "learning_rate": 0.0001, "loss": 1.6818, "step": 1090 }, { "epoch": 0.12674992738890503, "grad_norm": 0.3714812099933624, "learning_rate": 0.0001, "loss": 1.6802, "step": 1091 }, { "epoch": 0.12686610514086552, "grad_norm": 0.3492056429386139, "learning_rate": 0.0001, "loss": 1.6368, "step": 1092 }, { "epoch": 0.12698228289282604, "grad_norm": 0.3401804566383362, "learning_rate": 0.0001, "loss": 1.7797, "step": 1093 }, { "epoch": 0.12709846064478653, "grad_norm": 0.33436286449432373, "learning_rate": 0.0001, "loss": 1.7528, "step": 1094 }, { "epoch": 0.12721463839674702, "grad_norm": 0.35085633397102356, "learning_rate": 0.0001, "loss": 1.7508, "step": 1095 }, { "epoch": 0.12733081614870753, "grad_norm": 0.3480139970779419, "learning_rate": 0.0001, "loss": 1.7582, "step": 1096 }, { "epoch": 0.12744699390066802, "grad_norm": 0.3390267789363861, "learning_rate": 0.0001, "loss": 1.555, "step": 1097 }, { "epoch": 0.1275631716526285, "grad_norm": 0.3470866084098816, "learning_rate": 0.0001, "loss": 1.7661, "step": 1098 }, { "epoch": 0.12767934940458903, "grad_norm": 0.37509381771087646, "learning_rate": 0.0001, "loss": 1.761, "step": 1099 }, { "epoch": 0.12779552715654952, "grad_norm": 0.35059359669685364, "learning_rate": 0.0001, "loss": 1.5687, "step": 1100 }, { "epoch": 0.12791170490851003, "grad_norm": 0.38559988141059875, "learning_rate": 0.0001, "loss": 1.9108, "step": 1101 }, { "epoch": 0.12802788266047052, "grad_norm": 0.3470633924007416, "learning_rate": 0.0001, "loss": 1.6246, "step": 1102 }, { "epoch": 0.128144060412431, "grad_norm": 0.3547315001487732, "learning_rate": 0.0001, "loss": 1.7726, "step": 1103 }, { "epoch": 0.12826023816439153, "grad_norm": 0.3657218813896179, "learning_rate": 0.0001, "loss": 1.8437, "step": 1104 }, { "epoch": 0.12837641591635202, "grad_norm": 0.3548438847064972, "learning_rate": 0.0001, "loss": 1.5604, "step": 1105 }, { "epoch": 0.1284925936683125, "grad_norm": 0.3553130030632019, "learning_rate": 0.0001, "loss": 1.7507, "step": 1106 }, { "epoch": 0.12860877142027302, "grad_norm": 0.35484910011291504, "learning_rate": 0.0001, "loss": 1.7819, "step": 1107 }, { "epoch": 0.1287249491722335, "grad_norm": 0.3301905691623688, "learning_rate": 0.0001, "loss": 1.6291, "step": 1108 }, { "epoch": 0.12884112692419403, "grad_norm": 0.3722817301750183, "learning_rate": 0.0001, "loss": 1.8194, "step": 1109 }, { "epoch": 0.12895730467615452, "grad_norm": 0.408559113740921, "learning_rate": 0.0001, "loss": 1.8952, "step": 1110 }, { "epoch": 0.129073482428115, "grad_norm": 0.3764549791812897, "learning_rate": 0.0001, "loss": 1.8921, "step": 1111 }, { "epoch": 0.12918966018007552, "grad_norm": 0.3443762958049774, "learning_rate": 0.0001, "loss": 1.7042, "step": 1112 }, { "epoch": 0.129305837932036, "grad_norm": 0.37455350160598755, "learning_rate": 0.0001, "loss": 1.7833, "step": 1113 }, { "epoch": 0.12942201568399653, "grad_norm": 0.35170894861221313, "learning_rate": 0.0001, "loss": 1.6039, "step": 1114 }, { "epoch": 0.12953819343595702, "grad_norm": 0.33991748094558716, "learning_rate": 0.0001, "loss": 1.8047, "step": 1115 }, { "epoch": 0.1296543711879175, "grad_norm": 0.3735693693161011, "learning_rate": 0.0001, "loss": 1.928, "step": 1116 }, { "epoch": 0.12977054893987802, "grad_norm": 0.34423983097076416, "learning_rate": 0.0001, "loss": 1.6854, "step": 1117 }, { "epoch": 0.1298867266918385, "grad_norm": 0.3688075542449951, "learning_rate": 0.0001, "loss": 1.8482, "step": 1118 }, { "epoch": 0.130002904443799, "grad_norm": 0.3608585000038147, "learning_rate": 0.0001, "loss": 1.8143, "step": 1119 }, { "epoch": 0.13011908219575952, "grad_norm": 0.38905078172683716, "learning_rate": 0.0001, "loss": 1.7257, "step": 1120 }, { "epoch": 0.13023525994772, "grad_norm": 0.3615328371524811, "learning_rate": 0.0001, "loss": 1.7845, "step": 1121 }, { "epoch": 0.13035143769968052, "grad_norm": 0.35324275493621826, "learning_rate": 0.0001, "loss": 1.7745, "step": 1122 }, { "epoch": 0.130467615451641, "grad_norm": 0.3279106318950653, "learning_rate": 0.0001, "loss": 1.6848, "step": 1123 }, { "epoch": 0.1305837932036015, "grad_norm": 0.37770041823387146, "learning_rate": 0.0001, "loss": 1.9185, "step": 1124 }, { "epoch": 0.13069997095556202, "grad_norm": 0.3365723192691803, "learning_rate": 0.0001, "loss": 1.5406, "step": 1125 }, { "epoch": 0.1308161487075225, "grad_norm": 0.35211873054504395, "learning_rate": 0.0001, "loss": 1.5533, "step": 1126 }, { "epoch": 0.13093232645948302, "grad_norm": 0.3364546000957489, "learning_rate": 0.0001, "loss": 1.607, "step": 1127 }, { "epoch": 0.1310485042114435, "grad_norm": 0.33624467253685, "learning_rate": 0.0001, "loss": 1.5203, "step": 1128 }, { "epoch": 0.131164681963404, "grad_norm": 0.34484627842903137, "learning_rate": 0.0001, "loss": 1.6558, "step": 1129 }, { "epoch": 0.13128085971536452, "grad_norm": 0.3245522677898407, "learning_rate": 0.0001, "loss": 1.6121, "step": 1130 }, { "epoch": 0.131397037467325, "grad_norm": 0.3584153652191162, "learning_rate": 0.0001, "loss": 1.6107, "step": 1131 }, { "epoch": 0.1315132152192855, "grad_norm": 0.35809946060180664, "learning_rate": 0.0001, "loss": 1.7665, "step": 1132 }, { "epoch": 0.131629392971246, "grad_norm": 0.3418474793434143, "learning_rate": 0.0001, "loss": 1.6811, "step": 1133 }, { "epoch": 0.1317455707232065, "grad_norm": 0.3623388707637787, "learning_rate": 0.0001, "loss": 1.7171, "step": 1134 }, { "epoch": 0.13186174847516702, "grad_norm": 0.3962494730949402, "learning_rate": 0.0001, "loss": 1.9956, "step": 1135 }, { "epoch": 0.1319779262271275, "grad_norm": 0.34545156359672546, "learning_rate": 0.0001, "loss": 1.4836, "step": 1136 }, { "epoch": 0.132094103979088, "grad_norm": 0.3415702283382416, "learning_rate": 0.0001, "loss": 1.7397, "step": 1137 }, { "epoch": 0.1322102817310485, "grad_norm": 0.41906920075416565, "learning_rate": 0.0001, "loss": 1.8653, "step": 1138 }, { "epoch": 0.132326459483009, "grad_norm": 0.3778826892375946, "learning_rate": 0.0001, "loss": 1.696, "step": 1139 }, { "epoch": 0.1324426372349695, "grad_norm": 0.3520076870918274, "learning_rate": 0.0001, "loss": 1.7079, "step": 1140 }, { "epoch": 0.13255881498693, "grad_norm": 0.38221481442451477, "learning_rate": 0.0001, "loss": 1.7196, "step": 1141 }, { "epoch": 0.1326749927388905, "grad_norm": 0.34587883949279785, "learning_rate": 0.0001, "loss": 1.7352, "step": 1142 }, { "epoch": 0.132791170490851, "grad_norm": 0.35534965991973877, "learning_rate": 0.0001, "loss": 1.7051, "step": 1143 }, { "epoch": 0.1329073482428115, "grad_norm": 0.36971423029899597, "learning_rate": 0.0001, "loss": 1.7933, "step": 1144 }, { "epoch": 0.133023525994772, "grad_norm": 0.3652137219905853, "learning_rate": 0.0001, "loss": 1.7395, "step": 1145 }, { "epoch": 0.1331397037467325, "grad_norm": 0.340309739112854, "learning_rate": 0.0001, "loss": 1.75, "step": 1146 }, { "epoch": 0.133255881498693, "grad_norm": 0.3661729693412781, "learning_rate": 0.0001, "loss": 1.7802, "step": 1147 }, { "epoch": 0.1333720592506535, "grad_norm": 0.3509800434112549, "learning_rate": 0.0001, "loss": 1.7748, "step": 1148 }, { "epoch": 0.133488237002614, "grad_norm": 0.34903261065483093, "learning_rate": 0.0001, "loss": 1.6914, "step": 1149 }, { "epoch": 0.1336044147545745, "grad_norm": 0.3474218249320984, "learning_rate": 0.0001, "loss": 1.8942, "step": 1150 }, { "epoch": 0.133720592506535, "grad_norm": 0.3618185818195343, "learning_rate": 0.0001, "loss": 1.7351, "step": 1151 }, { "epoch": 0.1338367702584955, "grad_norm": 0.3447827696800232, "learning_rate": 0.0001, "loss": 1.5644, "step": 1152 }, { "epoch": 0.13395294801045599, "grad_norm": 0.35929834842681885, "learning_rate": 0.0001, "loss": 1.6836, "step": 1153 }, { "epoch": 0.1340691257624165, "grad_norm": 0.378379762172699, "learning_rate": 0.0001, "loss": 1.8789, "step": 1154 }, { "epoch": 0.134185303514377, "grad_norm": 0.3777737319469452, "learning_rate": 0.0001, "loss": 1.6899, "step": 1155 }, { "epoch": 0.1343014812663375, "grad_norm": 0.542852520942688, "learning_rate": 0.0001, "loss": 1.5277, "step": 1156 }, { "epoch": 0.134417659018298, "grad_norm": 0.3426233232021332, "learning_rate": 0.0001, "loss": 1.4822, "step": 1157 }, { "epoch": 0.13453383677025849, "grad_norm": 0.34573081135749817, "learning_rate": 0.0001, "loss": 1.8107, "step": 1158 }, { "epoch": 0.134650014522219, "grad_norm": 0.35474127531051636, "learning_rate": 0.0001, "loss": 1.7912, "step": 1159 }, { "epoch": 0.1347661922741795, "grad_norm": 0.3295106589794159, "learning_rate": 0.0001, "loss": 1.6829, "step": 1160 }, { "epoch": 0.13488237002613998, "grad_norm": 0.380728542804718, "learning_rate": 0.0001, "loss": 1.8133, "step": 1161 }, { "epoch": 0.1349985477781005, "grad_norm": 0.38181304931640625, "learning_rate": 0.0001, "loss": 1.748, "step": 1162 }, { "epoch": 0.135114725530061, "grad_norm": 0.35895970463752747, "learning_rate": 0.0001, "loss": 1.6405, "step": 1163 }, { "epoch": 0.1352309032820215, "grad_norm": 0.3574581742286682, "learning_rate": 0.0001, "loss": 1.7418, "step": 1164 }, { "epoch": 0.135347081033982, "grad_norm": 0.3409847319126129, "learning_rate": 0.0001, "loss": 1.5072, "step": 1165 }, { "epoch": 0.13546325878594248, "grad_norm": 0.36997130513191223, "learning_rate": 0.0001, "loss": 1.8488, "step": 1166 }, { "epoch": 0.135579436537903, "grad_norm": 0.34550440311431885, "learning_rate": 0.0001, "loss": 1.715, "step": 1167 }, { "epoch": 0.1356956142898635, "grad_norm": 0.36882877349853516, "learning_rate": 0.0001, "loss": 1.7311, "step": 1168 }, { "epoch": 0.135811792041824, "grad_norm": 0.34376415610313416, "learning_rate": 0.0001, "loss": 1.6462, "step": 1169 }, { "epoch": 0.1359279697937845, "grad_norm": 0.35478463768959045, "learning_rate": 0.0001, "loss": 1.836, "step": 1170 }, { "epoch": 0.13604414754574498, "grad_norm": 0.3668883442878723, "learning_rate": 0.0001, "loss": 1.8465, "step": 1171 }, { "epoch": 0.1361603252977055, "grad_norm": 0.33655285835266113, "learning_rate": 0.0001, "loss": 1.6395, "step": 1172 }, { "epoch": 0.136276503049666, "grad_norm": 0.34968823194503784, "learning_rate": 0.0001, "loss": 1.6423, "step": 1173 }, { "epoch": 0.13639268080162648, "grad_norm": 0.37849414348602295, "learning_rate": 0.0001, "loss": 1.832, "step": 1174 }, { "epoch": 0.136508858553587, "grad_norm": 0.3569866120815277, "learning_rate": 0.0001, "loss": 1.7077, "step": 1175 }, { "epoch": 0.13662503630554748, "grad_norm": 0.34238025546073914, "learning_rate": 0.0001, "loss": 1.7226, "step": 1176 }, { "epoch": 0.136741214057508, "grad_norm": 0.35172078013420105, "learning_rate": 0.0001, "loss": 1.7818, "step": 1177 }, { "epoch": 0.1368573918094685, "grad_norm": 0.35582435131073, "learning_rate": 0.0001, "loss": 1.6404, "step": 1178 }, { "epoch": 0.13697356956142898, "grad_norm": 0.36293935775756836, "learning_rate": 0.0001, "loss": 1.8143, "step": 1179 }, { "epoch": 0.1370897473133895, "grad_norm": 0.4134126901626587, "learning_rate": 0.0001, "loss": 1.8926, "step": 1180 }, { "epoch": 0.13720592506534998, "grad_norm": 0.34876781702041626, "learning_rate": 0.0001, "loss": 1.7132, "step": 1181 }, { "epoch": 0.1373221028173105, "grad_norm": 0.35906705260276794, "learning_rate": 0.0001, "loss": 1.8285, "step": 1182 }, { "epoch": 0.137438280569271, "grad_norm": 0.3706406354904175, "learning_rate": 0.0001, "loss": 1.8965, "step": 1183 }, { "epoch": 0.13755445832123148, "grad_norm": 0.3602171242237091, "learning_rate": 0.0001, "loss": 1.7168, "step": 1184 }, { "epoch": 0.137670636073192, "grad_norm": 0.3797997534275055, "learning_rate": 0.0001, "loss": 1.8357, "step": 1185 }, { "epoch": 0.13778681382515248, "grad_norm": 0.3507281541824341, "learning_rate": 0.0001, "loss": 1.7615, "step": 1186 }, { "epoch": 0.13790299157711297, "grad_norm": 0.3676402270793915, "learning_rate": 0.0001, "loss": 1.7027, "step": 1187 }, { "epoch": 0.1380191693290735, "grad_norm": 0.36422815918922424, "learning_rate": 0.0001, "loss": 1.7189, "step": 1188 }, { "epoch": 0.13813534708103398, "grad_norm": 0.3710031807422638, "learning_rate": 0.0001, "loss": 1.7097, "step": 1189 }, { "epoch": 0.1382515248329945, "grad_norm": 0.34949737787246704, "learning_rate": 0.0001, "loss": 1.6865, "step": 1190 }, { "epoch": 0.13836770258495498, "grad_norm": 0.38215914368629456, "learning_rate": 0.0001, "loss": 1.9185, "step": 1191 }, { "epoch": 0.13848388033691547, "grad_norm": 0.3533918857574463, "learning_rate": 0.0001, "loss": 1.7133, "step": 1192 }, { "epoch": 0.138600058088876, "grad_norm": 0.36544063687324524, "learning_rate": 0.0001, "loss": 1.5791, "step": 1193 }, { "epoch": 0.13871623584083648, "grad_norm": 0.3464662730693817, "learning_rate": 0.0001, "loss": 1.6104, "step": 1194 }, { "epoch": 0.13883241359279697, "grad_norm": 0.32799604535102844, "learning_rate": 0.0001, "loss": 1.6871, "step": 1195 }, { "epoch": 0.13894859134475748, "grad_norm": 0.37455034255981445, "learning_rate": 0.0001, "loss": 1.895, "step": 1196 }, { "epoch": 0.13906476909671797, "grad_norm": 0.3819703459739685, "learning_rate": 0.0001, "loss": 1.6722, "step": 1197 }, { "epoch": 0.1391809468486785, "grad_norm": 0.33819907903671265, "learning_rate": 0.0001, "loss": 1.7259, "step": 1198 }, { "epoch": 0.13929712460063898, "grad_norm": 0.36918461322784424, "learning_rate": 0.0001, "loss": 1.5488, "step": 1199 }, { "epoch": 0.13941330235259947, "grad_norm": 0.37889495491981506, "learning_rate": 0.0001, "loss": 1.8902, "step": 1200 }, { "epoch": 0.13952948010455998, "grad_norm": 0.34494003653526306, "learning_rate": 0.0001, "loss": 1.6674, "step": 1201 }, { "epoch": 0.13964565785652047, "grad_norm": 0.3527129292488098, "learning_rate": 0.0001, "loss": 1.7551, "step": 1202 }, { "epoch": 0.139761835608481, "grad_norm": 0.35137444734573364, "learning_rate": 0.0001, "loss": 1.8853, "step": 1203 }, { "epoch": 0.13987801336044148, "grad_norm": 0.3461925685405731, "learning_rate": 0.0001, "loss": 1.7065, "step": 1204 }, { "epoch": 0.13999419111240197, "grad_norm": 0.36129575967788696, "learning_rate": 0.0001, "loss": 1.653, "step": 1205 }, { "epoch": 0.14011036886436248, "grad_norm": 0.34536105394363403, "learning_rate": 0.0001, "loss": 1.6792, "step": 1206 }, { "epoch": 0.14022654661632297, "grad_norm": 0.35485297441482544, "learning_rate": 0.0001, "loss": 1.6453, "step": 1207 }, { "epoch": 0.14034272436828346, "grad_norm": 0.3653600811958313, "learning_rate": 0.0001, "loss": 1.7249, "step": 1208 }, { "epoch": 0.14045890212024398, "grad_norm": 0.36751556396484375, "learning_rate": 0.0001, "loss": 1.9142, "step": 1209 }, { "epoch": 0.14057507987220447, "grad_norm": 0.3849860727787018, "learning_rate": 0.0001, "loss": 1.7801, "step": 1210 }, { "epoch": 0.14069125762416498, "grad_norm": 0.3888527452945709, "learning_rate": 0.0001, "loss": 1.7732, "step": 1211 }, { "epoch": 0.14080743537612547, "grad_norm": 0.3473045825958252, "learning_rate": 0.0001, "loss": 1.7279, "step": 1212 }, { "epoch": 0.14092361312808596, "grad_norm": 0.31697720289230347, "learning_rate": 0.0001, "loss": 1.5903, "step": 1213 }, { "epoch": 0.14103979088004648, "grad_norm": 0.36347395181655884, "learning_rate": 0.0001, "loss": 1.8358, "step": 1214 }, { "epoch": 0.14115596863200697, "grad_norm": 0.34787067770957947, "learning_rate": 0.0001, "loss": 1.7855, "step": 1215 }, { "epoch": 0.14127214638396748, "grad_norm": 0.35520100593566895, "learning_rate": 0.0001, "loss": 1.7215, "step": 1216 }, { "epoch": 0.14138832413592797, "grad_norm": 0.3633323609828949, "learning_rate": 0.0001, "loss": 1.7224, "step": 1217 }, { "epoch": 0.14150450188788846, "grad_norm": 0.3367605209350586, "learning_rate": 0.0001, "loss": 1.6469, "step": 1218 }, { "epoch": 0.14162067963984898, "grad_norm": 0.36629173159599304, "learning_rate": 0.0001, "loss": 1.8747, "step": 1219 }, { "epoch": 0.14173685739180947, "grad_norm": 0.3563576340675354, "learning_rate": 0.0001, "loss": 1.8185, "step": 1220 }, { "epoch": 0.14185303514376996, "grad_norm": 0.32516106963157654, "learning_rate": 0.0001, "loss": 1.5362, "step": 1221 }, { "epoch": 0.14196921289573047, "grad_norm": 0.3476703464984894, "learning_rate": 0.0001, "loss": 1.7661, "step": 1222 }, { "epoch": 0.14208539064769096, "grad_norm": 0.3780508041381836, "learning_rate": 0.0001, "loss": 1.7457, "step": 1223 }, { "epoch": 0.14220156839965148, "grad_norm": 0.3513147234916687, "learning_rate": 0.0001, "loss": 1.6291, "step": 1224 }, { "epoch": 0.14231774615161197, "grad_norm": 0.384937584400177, "learning_rate": 0.0001, "loss": 1.8593, "step": 1225 }, { "epoch": 0.14243392390357246, "grad_norm": 0.37124475836753845, "learning_rate": 0.0001, "loss": 1.8555, "step": 1226 }, { "epoch": 0.14255010165553297, "grad_norm": 0.3864074647426605, "learning_rate": 0.0001, "loss": 1.7074, "step": 1227 }, { "epoch": 0.14266627940749346, "grad_norm": 0.34239131212234497, "learning_rate": 0.0001, "loss": 1.668, "step": 1228 }, { "epoch": 0.14278245715945395, "grad_norm": 0.3898089528083801, "learning_rate": 0.0001, "loss": 1.8215, "step": 1229 }, { "epoch": 0.14289863491141447, "grad_norm": 0.3413831889629364, "learning_rate": 0.0001, "loss": 1.6671, "step": 1230 }, { "epoch": 0.14301481266337496, "grad_norm": 0.34374555945396423, "learning_rate": 0.0001, "loss": 1.7268, "step": 1231 }, { "epoch": 0.14313099041533547, "grad_norm": 0.3335866928100586, "learning_rate": 0.0001, "loss": 1.608, "step": 1232 }, { "epoch": 0.14324716816729596, "grad_norm": 0.34190499782562256, "learning_rate": 0.0001, "loss": 1.6609, "step": 1233 }, { "epoch": 0.14336334591925645, "grad_norm": 0.3815557062625885, "learning_rate": 0.0001, "loss": 1.9198, "step": 1234 }, { "epoch": 0.14347952367121697, "grad_norm": 0.3535671830177307, "learning_rate": 0.0001, "loss": 1.6951, "step": 1235 }, { "epoch": 0.14359570142317746, "grad_norm": 0.38413700461387634, "learning_rate": 0.0001, "loss": 1.8972, "step": 1236 }, { "epoch": 0.14371187917513797, "grad_norm": 0.3618411719799042, "learning_rate": 0.0001, "loss": 1.6467, "step": 1237 }, { "epoch": 0.14382805692709846, "grad_norm": 0.34210920333862305, "learning_rate": 0.0001, "loss": 1.634, "step": 1238 }, { "epoch": 0.14394423467905895, "grad_norm": 0.3580038249492645, "learning_rate": 0.0001, "loss": 1.6651, "step": 1239 }, { "epoch": 0.14406041243101947, "grad_norm": 0.36096274852752686, "learning_rate": 0.0001, "loss": 1.7914, "step": 1240 }, { "epoch": 0.14417659018297996, "grad_norm": 0.3829607665538788, "learning_rate": 0.0001, "loss": 1.8332, "step": 1241 }, { "epoch": 0.14429276793494045, "grad_norm": 0.3591415286064148, "learning_rate": 0.0001, "loss": 1.7212, "step": 1242 }, { "epoch": 0.14440894568690096, "grad_norm": 0.36007246375083923, "learning_rate": 0.0001, "loss": 1.7336, "step": 1243 }, { "epoch": 0.14452512343886145, "grad_norm": 0.3532988131046295, "learning_rate": 0.0001, "loss": 1.8504, "step": 1244 }, { "epoch": 0.14464130119082197, "grad_norm": 0.37223345041275024, "learning_rate": 0.0001, "loss": 1.7329, "step": 1245 }, { "epoch": 0.14475747894278246, "grad_norm": 0.34656253457069397, "learning_rate": 0.0001, "loss": 1.679, "step": 1246 }, { "epoch": 0.14487365669474295, "grad_norm": 0.3697267472743988, "learning_rate": 0.0001, "loss": 1.9745, "step": 1247 }, { "epoch": 0.14498983444670346, "grad_norm": 0.3211793899536133, "learning_rate": 0.0001, "loss": 1.5591, "step": 1248 }, { "epoch": 0.14510601219866395, "grad_norm": 0.350603848695755, "learning_rate": 0.0001, "loss": 1.5883, "step": 1249 }, { "epoch": 0.14522218995062444, "grad_norm": 0.3709065318107605, "learning_rate": 0.0001, "loss": 1.7782, "step": 1250 }, { "epoch": 0.14533836770258496, "grad_norm": 0.367072194814682, "learning_rate": 0.0001, "loss": 1.6962, "step": 1251 }, { "epoch": 0.14545454545454545, "grad_norm": 0.35711991786956787, "learning_rate": 0.0001, "loss": 1.8047, "step": 1252 }, { "epoch": 0.14557072320650596, "grad_norm": 0.37235334515571594, "learning_rate": 0.0001, "loss": 1.7157, "step": 1253 }, { "epoch": 0.14568690095846645, "grad_norm": 0.36526671051979065, "learning_rate": 0.0001, "loss": 1.7217, "step": 1254 }, { "epoch": 0.14580307871042694, "grad_norm": 0.35159561038017273, "learning_rate": 0.0001, "loss": 1.7109, "step": 1255 }, { "epoch": 0.14591925646238746, "grad_norm": 0.35681840777397156, "learning_rate": 0.0001, "loss": 1.731, "step": 1256 }, { "epoch": 0.14603543421434795, "grad_norm": 0.3531685471534729, "learning_rate": 0.0001, "loss": 1.714, "step": 1257 }, { "epoch": 0.14615161196630846, "grad_norm": 0.3450145423412323, "learning_rate": 0.0001, "loss": 1.7257, "step": 1258 }, { "epoch": 0.14626778971826895, "grad_norm": 0.3472040891647339, "learning_rate": 0.0001, "loss": 1.7006, "step": 1259 }, { "epoch": 0.14638396747022944, "grad_norm": 0.35504022240638733, "learning_rate": 0.0001, "loss": 1.8701, "step": 1260 }, { "epoch": 0.14650014522218996, "grad_norm": 0.3664184808731079, "learning_rate": 0.0001, "loss": 1.7835, "step": 1261 }, { "epoch": 0.14661632297415045, "grad_norm": 0.32701820135116577, "learning_rate": 0.0001, "loss": 1.6524, "step": 1262 }, { "epoch": 0.14673250072611094, "grad_norm": 0.34154212474823, "learning_rate": 0.0001, "loss": 1.7332, "step": 1263 }, { "epoch": 0.14684867847807145, "grad_norm": 0.3491269052028656, "learning_rate": 0.0001, "loss": 1.6165, "step": 1264 }, { "epoch": 0.14696485623003194, "grad_norm": 0.351469486951828, "learning_rate": 0.0001, "loss": 1.6913, "step": 1265 }, { "epoch": 0.14708103398199246, "grad_norm": 0.3573369085788727, "learning_rate": 0.0001, "loss": 1.7032, "step": 1266 }, { "epoch": 0.14719721173395295, "grad_norm": 0.35862183570861816, "learning_rate": 0.0001, "loss": 1.6222, "step": 1267 }, { "epoch": 0.14731338948591344, "grad_norm": 0.3771260976791382, "learning_rate": 0.0001, "loss": 1.8338, "step": 1268 }, { "epoch": 0.14742956723787395, "grad_norm": 0.36234867572784424, "learning_rate": 0.0001, "loss": 1.5947, "step": 1269 }, { "epoch": 0.14754574498983444, "grad_norm": 0.35228458046913147, "learning_rate": 0.0001, "loss": 1.7534, "step": 1270 }, { "epoch": 0.14766192274179496, "grad_norm": 0.3665032982826233, "learning_rate": 0.0001, "loss": 1.772, "step": 1271 }, { "epoch": 0.14777810049375545, "grad_norm": 0.3650833070278168, "learning_rate": 0.0001, "loss": 1.7147, "step": 1272 }, { "epoch": 0.14789427824571594, "grad_norm": 0.36604440212249756, "learning_rate": 0.0001, "loss": 1.648, "step": 1273 }, { "epoch": 0.14801045599767645, "grad_norm": 0.360029935836792, "learning_rate": 0.0001, "loss": 1.7143, "step": 1274 }, { "epoch": 0.14812663374963694, "grad_norm": 0.3815794885158539, "learning_rate": 0.0001, "loss": 1.8357, "step": 1275 }, { "epoch": 0.14824281150159743, "grad_norm": 0.36898788809776306, "learning_rate": 0.0001, "loss": 1.6458, "step": 1276 }, { "epoch": 0.14835898925355795, "grad_norm": 0.3591456115245819, "learning_rate": 0.0001, "loss": 1.8453, "step": 1277 }, { "epoch": 0.14847516700551844, "grad_norm": 0.3744908273220062, "learning_rate": 0.0001, "loss": 1.6924, "step": 1278 }, { "epoch": 0.14859134475747895, "grad_norm": 0.3827156722545624, "learning_rate": 0.0001, "loss": 1.837, "step": 1279 }, { "epoch": 0.14870752250943944, "grad_norm": 0.34295526146888733, "learning_rate": 0.0001, "loss": 1.5949, "step": 1280 }, { "epoch": 0.14882370026139993, "grad_norm": 0.36582034826278687, "learning_rate": 0.0001, "loss": 1.8293, "step": 1281 }, { "epoch": 0.14893987801336045, "grad_norm": 0.34312430024147034, "learning_rate": 0.0001, "loss": 1.7463, "step": 1282 }, { "epoch": 0.14905605576532094, "grad_norm": 0.3591785728931427, "learning_rate": 0.0001, "loss": 1.6766, "step": 1283 }, { "epoch": 0.14917223351728143, "grad_norm": 0.3512047231197357, "learning_rate": 0.0001, "loss": 1.7607, "step": 1284 }, { "epoch": 0.14928841126924194, "grad_norm": 0.35547614097595215, "learning_rate": 0.0001, "loss": 1.8423, "step": 1285 }, { "epoch": 0.14940458902120243, "grad_norm": 0.3671259582042694, "learning_rate": 0.0001, "loss": 1.7298, "step": 1286 }, { "epoch": 0.14952076677316295, "grad_norm": 0.3785369098186493, "learning_rate": 0.0001, "loss": 1.7921, "step": 1287 }, { "epoch": 0.14963694452512344, "grad_norm": 0.3537178635597229, "learning_rate": 0.0001, "loss": 1.6989, "step": 1288 }, { "epoch": 0.14975312227708393, "grad_norm": 0.35491520166397095, "learning_rate": 0.0001, "loss": 1.7895, "step": 1289 }, { "epoch": 0.14986930002904444, "grad_norm": 0.3567078709602356, "learning_rate": 0.0001, "loss": 1.7894, "step": 1290 }, { "epoch": 0.14998547778100493, "grad_norm": 0.35973167419433594, "learning_rate": 0.0001, "loss": 1.8044, "step": 1291 }, { "epoch": 0.15010165553296545, "grad_norm": 0.3756448030471802, "learning_rate": 0.0001, "loss": 1.9249, "step": 1292 }, { "epoch": 0.15021783328492594, "grad_norm": 0.3457126021385193, "learning_rate": 0.0001, "loss": 1.6851, "step": 1293 }, { "epoch": 0.15033401103688643, "grad_norm": 0.3493401110172272, "learning_rate": 0.0001, "loss": 1.6958, "step": 1294 }, { "epoch": 0.15045018878884694, "grad_norm": 0.38052037358283997, "learning_rate": 0.0001, "loss": 1.8351, "step": 1295 }, { "epoch": 0.15056636654080743, "grad_norm": 0.326164186000824, "learning_rate": 0.0001, "loss": 1.6357, "step": 1296 }, { "epoch": 0.15068254429276792, "grad_norm": 0.37613871693611145, "learning_rate": 0.0001, "loss": 1.7088, "step": 1297 }, { "epoch": 0.15079872204472844, "grad_norm": 0.40426647663116455, "learning_rate": 0.0001, "loss": 1.8863, "step": 1298 }, { "epoch": 0.15091489979668893, "grad_norm": 0.3410615026950836, "learning_rate": 0.0001, "loss": 1.6051, "step": 1299 }, { "epoch": 0.15103107754864945, "grad_norm": 0.36211374402046204, "learning_rate": 0.0001, "loss": 1.7685, "step": 1300 }, { "epoch": 0.15114725530060993, "grad_norm": 0.39096057415008545, "learning_rate": 0.0001, "loss": 1.8656, "step": 1301 }, { "epoch": 0.15126343305257042, "grad_norm": 0.3603553771972656, "learning_rate": 0.0001, "loss": 1.6818, "step": 1302 }, { "epoch": 0.15137961080453094, "grad_norm": 0.3809981048107147, "learning_rate": 0.0001, "loss": 1.7441, "step": 1303 }, { "epoch": 0.15149578855649143, "grad_norm": 0.35240018367767334, "learning_rate": 0.0001, "loss": 1.6923, "step": 1304 }, { "epoch": 0.15161196630845195, "grad_norm": 0.3340260088443756, "learning_rate": 0.0001, "loss": 1.6864, "step": 1305 }, { "epoch": 0.15172814406041243, "grad_norm": 0.3688841164112091, "learning_rate": 0.0001, "loss": 1.7241, "step": 1306 }, { "epoch": 0.15184432181237292, "grad_norm": 0.36552947759628296, "learning_rate": 0.0001, "loss": 1.6414, "step": 1307 }, { "epoch": 0.15196049956433344, "grad_norm": 0.3461878001689911, "learning_rate": 0.0001, "loss": 1.6445, "step": 1308 }, { "epoch": 0.15207667731629393, "grad_norm": 0.35376498103141785, "learning_rate": 0.0001, "loss": 1.7873, "step": 1309 }, { "epoch": 0.15219285506825442, "grad_norm": 0.37778156995773315, "learning_rate": 0.0001, "loss": 1.6798, "step": 1310 }, { "epoch": 0.15230903282021493, "grad_norm": 0.3635299503803253, "learning_rate": 0.0001, "loss": 1.7731, "step": 1311 }, { "epoch": 0.15242521057217542, "grad_norm": 0.37583014369010925, "learning_rate": 0.0001, "loss": 1.8543, "step": 1312 }, { "epoch": 0.15254138832413594, "grad_norm": 0.37398314476013184, "learning_rate": 0.0001, "loss": 1.6894, "step": 1313 }, { "epoch": 0.15265756607609643, "grad_norm": 0.37770211696624756, "learning_rate": 0.0001, "loss": 1.6799, "step": 1314 }, { "epoch": 0.15277374382805692, "grad_norm": 0.3403077721595764, "learning_rate": 0.0001, "loss": 1.782, "step": 1315 }, { "epoch": 0.15288992158001743, "grad_norm": 0.3504425287246704, "learning_rate": 0.0001, "loss": 1.7012, "step": 1316 }, { "epoch": 0.15300609933197792, "grad_norm": 0.3553392291069031, "learning_rate": 0.0001, "loss": 1.5919, "step": 1317 }, { "epoch": 0.1531222770839384, "grad_norm": 0.3788677752017975, "learning_rate": 0.0001, "loss": 1.7182, "step": 1318 }, { "epoch": 0.15323845483589893, "grad_norm": 0.3600800037384033, "learning_rate": 0.0001, "loss": 1.7677, "step": 1319 }, { "epoch": 0.15335463258785942, "grad_norm": 0.38489770889282227, "learning_rate": 0.0001, "loss": 1.6607, "step": 1320 }, { "epoch": 0.15347081033981994, "grad_norm": 0.3559502959251404, "learning_rate": 0.0001, "loss": 1.576, "step": 1321 }, { "epoch": 0.15358698809178042, "grad_norm": 0.3516106605529785, "learning_rate": 0.0001, "loss": 1.7595, "step": 1322 }, { "epoch": 0.1537031658437409, "grad_norm": 0.35282179713249207, "learning_rate": 0.0001, "loss": 1.7575, "step": 1323 }, { "epoch": 0.15381934359570143, "grad_norm": 0.3448900878429413, "learning_rate": 0.0001, "loss": 1.8222, "step": 1324 }, { "epoch": 0.15393552134766192, "grad_norm": 0.3468807339668274, "learning_rate": 0.0001, "loss": 1.6246, "step": 1325 }, { "epoch": 0.15405169909962244, "grad_norm": 0.3733367621898651, "learning_rate": 0.0001, "loss": 1.7449, "step": 1326 }, { "epoch": 0.15416787685158292, "grad_norm": 0.3257894217967987, "learning_rate": 0.0001, "loss": 1.5762, "step": 1327 }, { "epoch": 0.1542840546035434, "grad_norm": 0.3381756842136383, "learning_rate": 0.0001, "loss": 1.6698, "step": 1328 }, { "epoch": 0.15440023235550393, "grad_norm": 0.3703326880931854, "learning_rate": 0.0001, "loss": 1.8643, "step": 1329 }, { "epoch": 0.15451641010746442, "grad_norm": 0.3577940762042999, "learning_rate": 0.0001, "loss": 1.75, "step": 1330 }, { "epoch": 0.1546325878594249, "grad_norm": 0.38018643856048584, "learning_rate": 0.0001, "loss": 1.8148, "step": 1331 }, { "epoch": 0.15474876561138542, "grad_norm": 0.3477984368801117, "learning_rate": 0.0001, "loss": 1.7081, "step": 1332 }, { "epoch": 0.1548649433633459, "grad_norm": 0.35512134432792664, "learning_rate": 0.0001, "loss": 1.6638, "step": 1333 }, { "epoch": 0.15498112111530643, "grad_norm": 0.34343427419662476, "learning_rate": 0.0001, "loss": 1.7468, "step": 1334 }, { "epoch": 0.15509729886726692, "grad_norm": 0.36737141013145447, "learning_rate": 0.0001, "loss": 1.8357, "step": 1335 }, { "epoch": 0.1552134766192274, "grad_norm": 0.3362652659416199, "learning_rate": 0.0001, "loss": 1.6064, "step": 1336 }, { "epoch": 0.15532965437118793, "grad_norm": 0.3790622651576996, "learning_rate": 0.0001, "loss": 1.6913, "step": 1337 }, { "epoch": 0.15544583212314841, "grad_norm": 0.3796531856060028, "learning_rate": 0.0001, "loss": 1.7659, "step": 1338 }, { "epoch": 0.1555620098751089, "grad_norm": 0.3766029477119446, "learning_rate": 0.0001, "loss": 1.7234, "step": 1339 }, { "epoch": 0.15567818762706942, "grad_norm": 0.33512985706329346, "learning_rate": 0.0001, "loss": 1.7591, "step": 1340 }, { "epoch": 0.1557943653790299, "grad_norm": 0.35915273427963257, "learning_rate": 0.0001, "loss": 1.7368, "step": 1341 }, { "epoch": 0.15591054313099043, "grad_norm": 0.3571474254131317, "learning_rate": 0.0001, "loss": 1.7366, "step": 1342 }, { "epoch": 0.15602672088295091, "grad_norm": 0.36832332611083984, "learning_rate": 0.0001, "loss": 1.7109, "step": 1343 }, { "epoch": 0.1561428986349114, "grad_norm": 0.34558358788490295, "learning_rate": 0.0001, "loss": 1.6834, "step": 1344 }, { "epoch": 0.15625907638687192, "grad_norm": 0.3483864665031433, "learning_rate": 0.0001, "loss": 1.6472, "step": 1345 }, { "epoch": 0.1563752541388324, "grad_norm": 0.3357907831668854, "learning_rate": 0.0001, "loss": 1.7206, "step": 1346 }, { "epoch": 0.15649143189079293, "grad_norm": 0.37820810079574585, "learning_rate": 0.0001, "loss": 1.5922, "step": 1347 }, { "epoch": 0.15660760964275341, "grad_norm": 0.4033626616001129, "learning_rate": 0.0001, "loss": 1.8405, "step": 1348 }, { "epoch": 0.1567237873947139, "grad_norm": 0.3691973090171814, "learning_rate": 0.0001, "loss": 1.7431, "step": 1349 }, { "epoch": 0.15683996514667442, "grad_norm": 0.3847627639770508, "learning_rate": 0.0001, "loss": 1.8025, "step": 1350 }, { "epoch": 0.1569561428986349, "grad_norm": 0.4157688617706299, "learning_rate": 0.0001, "loss": 1.8269, "step": 1351 }, { "epoch": 0.1570723206505954, "grad_norm": 0.35120290517807007, "learning_rate": 0.0001, "loss": 1.5718, "step": 1352 }, { "epoch": 0.15718849840255592, "grad_norm": 0.3579387664794922, "learning_rate": 0.0001, "loss": 1.7129, "step": 1353 }, { "epoch": 0.1573046761545164, "grad_norm": 0.3572061359882355, "learning_rate": 0.0001, "loss": 1.7241, "step": 1354 }, { "epoch": 0.15742085390647692, "grad_norm": 0.38334596157073975, "learning_rate": 0.0001, "loss": 1.8802, "step": 1355 }, { "epoch": 0.1575370316584374, "grad_norm": 0.3814426064491272, "learning_rate": 0.0001, "loss": 1.8669, "step": 1356 }, { "epoch": 0.1576532094103979, "grad_norm": 0.3972860276699066, "learning_rate": 0.0001, "loss": 1.8, "step": 1357 }, { "epoch": 0.15776938716235842, "grad_norm": 0.41393837332725525, "learning_rate": 0.0001, "loss": 1.8818, "step": 1358 }, { "epoch": 0.1578855649143189, "grad_norm": 0.35115018486976624, "learning_rate": 0.0001, "loss": 1.6307, "step": 1359 }, { "epoch": 0.15800174266627942, "grad_norm": 0.36894285678863525, "learning_rate": 0.0001, "loss": 1.8274, "step": 1360 }, { "epoch": 0.1581179204182399, "grad_norm": 0.344990074634552, "learning_rate": 0.0001, "loss": 1.7561, "step": 1361 }, { "epoch": 0.1582340981702004, "grad_norm": 0.33179888129234314, "learning_rate": 0.0001, "loss": 1.588, "step": 1362 }, { "epoch": 0.15835027592216092, "grad_norm": 0.3482305407524109, "learning_rate": 0.0001, "loss": 1.5916, "step": 1363 }, { "epoch": 0.1584664536741214, "grad_norm": 0.34571194648742676, "learning_rate": 0.0001, "loss": 1.7024, "step": 1364 }, { "epoch": 0.1585826314260819, "grad_norm": 0.3417724668979645, "learning_rate": 0.0001, "loss": 1.4753, "step": 1365 }, { "epoch": 0.1586988091780424, "grad_norm": 0.36330536007881165, "learning_rate": 0.0001, "loss": 1.7957, "step": 1366 }, { "epoch": 0.1588149869300029, "grad_norm": 0.3438703417778015, "learning_rate": 0.0001, "loss": 1.8058, "step": 1367 }, { "epoch": 0.15893116468196342, "grad_norm": 0.35250309109687805, "learning_rate": 0.0001, "loss": 1.8591, "step": 1368 }, { "epoch": 0.1590473424339239, "grad_norm": 0.3651219606399536, "learning_rate": 0.0001, "loss": 1.6975, "step": 1369 }, { "epoch": 0.1591635201858844, "grad_norm": 0.3463347256183624, "learning_rate": 0.0001, "loss": 1.6508, "step": 1370 }, { "epoch": 0.1592796979378449, "grad_norm": 0.3516363799571991, "learning_rate": 0.0001, "loss": 1.5442, "step": 1371 }, { "epoch": 0.1593958756898054, "grad_norm": 0.37753501534461975, "learning_rate": 0.0001, "loss": 1.8072, "step": 1372 }, { "epoch": 0.1595120534417659, "grad_norm": 0.3713725507259369, "learning_rate": 0.0001, "loss": 1.7824, "step": 1373 }, { "epoch": 0.1596282311937264, "grad_norm": 0.35233864188194275, "learning_rate": 0.0001, "loss": 1.6618, "step": 1374 }, { "epoch": 0.1597444089456869, "grad_norm": 0.3708571195602417, "learning_rate": 0.0001, "loss": 1.6926, "step": 1375 }, { "epoch": 0.1598605866976474, "grad_norm": 0.4148230254650116, "learning_rate": 0.0001, "loss": 1.7946, "step": 1376 }, { "epoch": 0.1599767644496079, "grad_norm": 0.3738183081150055, "learning_rate": 0.0001, "loss": 1.6456, "step": 1377 }, { "epoch": 0.1600929422015684, "grad_norm": 0.40050017833709717, "learning_rate": 0.0001, "loss": 1.8349, "step": 1378 }, { "epoch": 0.1602091199535289, "grad_norm": 0.3961089551448822, "learning_rate": 0.0001, "loss": 1.8826, "step": 1379 }, { "epoch": 0.1603252977054894, "grad_norm": 0.38461560010910034, "learning_rate": 0.0001, "loss": 1.7562, "step": 1380 }, { "epoch": 0.1604414754574499, "grad_norm": 0.3477541208267212, "learning_rate": 0.0001, "loss": 1.6368, "step": 1381 }, { "epoch": 0.1605576532094104, "grad_norm": 0.3460700213909149, "learning_rate": 0.0001, "loss": 1.7069, "step": 1382 }, { "epoch": 0.1606738309613709, "grad_norm": 0.3566724956035614, "learning_rate": 0.0001, "loss": 1.6473, "step": 1383 }, { "epoch": 0.1607900087133314, "grad_norm": 0.36470624804496765, "learning_rate": 0.0001, "loss": 1.8717, "step": 1384 }, { "epoch": 0.1609061864652919, "grad_norm": 0.3292492926120758, "learning_rate": 0.0001, "loss": 1.6557, "step": 1385 }, { "epoch": 0.16102236421725238, "grad_norm": 0.36960333585739136, "learning_rate": 0.0001, "loss": 1.7142, "step": 1386 }, { "epoch": 0.1611385419692129, "grad_norm": 0.36718302965164185, "learning_rate": 0.0001, "loss": 1.8368, "step": 1387 }, { "epoch": 0.1612547197211734, "grad_norm": 0.3775370419025421, "learning_rate": 0.0001, "loss": 1.7769, "step": 1388 }, { "epoch": 0.1613708974731339, "grad_norm": 0.3697305917739868, "learning_rate": 0.0001, "loss": 1.7334, "step": 1389 }, { "epoch": 0.1614870752250944, "grad_norm": 0.37467798590660095, "learning_rate": 0.0001, "loss": 1.5945, "step": 1390 }, { "epoch": 0.16160325297705488, "grad_norm": 0.3733084499835968, "learning_rate": 0.0001, "loss": 1.8647, "step": 1391 }, { "epoch": 0.1617194307290154, "grad_norm": 0.3742946982383728, "learning_rate": 0.0001, "loss": 1.6711, "step": 1392 }, { "epoch": 0.1618356084809759, "grad_norm": 0.3777306079864502, "learning_rate": 0.0001, "loss": 1.7787, "step": 1393 }, { "epoch": 0.16195178623293638, "grad_norm": 0.3901897370815277, "learning_rate": 0.0001, "loss": 1.8356, "step": 1394 }, { "epoch": 0.1620679639848969, "grad_norm": 0.38239678740501404, "learning_rate": 0.0001, "loss": 1.9155, "step": 1395 }, { "epoch": 0.16218414173685738, "grad_norm": 0.3667686879634857, "learning_rate": 0.0001, "loss": 1.7969, "step": 1396 }, { "epoch": 0.1623003194888179, "grad_norm": 0.38420993089675903, "learning_rate": 0.0001, "loss": 1.7801, "step": 1397 }, { "epoch": 0.1624164972407784, "grad_norm": 0.36830443143844604, "learning_rate": 0.0001, "loss": 1.6106, "step": 1398 }, { "epoch": 0.16253267499273888, "grad_norm": 0.35537177324295044, "learning_rate": 0.0001, "loss": 1.6633, "step": 1399 }, { "epoch": 0.1626488527446994, "grad_norm": 0.35182371735572815, "learning_rate": 0.0001, "loss": 1.6809, "step": 1400 }, { "epoch": 0.16276503049665988, "grad_norm": 0.36817070841789246, "learning_rate": 0.0001, "loss": 1.773, "step": 1401 }, { "epoch": 0.1628812082486204, "grad_norm": 0.3580371141433716, "learning_rate": 0.0001, "loss": 1.708, "step": 1402 }, { "epoch": 0.1629973860005809, "grad_norm": 0.37469223141670227, "learning_rate": 0.0001, "loss": 1.7603, "step": 1403 }, { "epoch": 0.16311356375254138, "grad_norm": 0.39260217547416687, "learning_rate": 0.0001, "loss": 1.9347, "step": 1404 }, { "epoch": 0.1632297415045019, "grad_norm": 0.37076276540756226, "learning_rate": 0.0001, "loss": 1.755, "step": 1405 }, { "epoch": 0.16334591925646239, "grad_norm": 0.34179428219795227, "learning_rate": 0.0001, "loss": 1.7104, "step": 1406 }, { "epoch": 0.16346209700842287, "grad_norm": 0.359271377325058, "learning_rate": 0.0001, "loss": 1.688, "step": 1407 }, { "epoch": 0.1635782747603834, "grad_norm": 0.3883320093154907, "learning_rate": 0.0001, "loss": 1.8176, "step": 1408 }, { "epoch": 0.16369445251234388, "grad_norm": 0.35614779591560364, "learning_rate": 0.0001, "loss": 1.6372, "step": 1409 }, { "epoch": 0.1638106302643044, "grad_norm": 0.3655150830745697, "learning_rate": 0.0001, "loss": 1.8285, "step": 1410 }, { "epoch": 0.16392680801626489, "grad_norm": 0.37644287943840027, "learning_rate": 0.0001, "loss": 1.8047, "step": 1411 }, { "epoch": 0.16404298576822537, "grad_norm": 0.33691367506980896, "learning_rate": 0.0001, "loss": 1.6559, "step": 1412 }, { "epoch": 0.1641591635201859, "grad_norm": 0.3288310766220093, "learning_rate": 0.0001, "loss": 1.5953, "step": 1413 }, { "epoch": 0.16427534127214638, "grad_norm": 0.37377673387527466, "learning_rate": 0.0001, "loss": 1.7352, "step": 1414 }, { "epoch": 0.1643915190241069, "grad_norm": 0.3379480242729187, "learning_rate": 0.0001, "loss": 1.6594, "step": 1415 }, { "epoch": 0.16450769677606739, "grad_norm": 0.3620838522911072, "learning_rate": 0.0001, "loss": 1.6778, "step": 1416 }, { "epoch": 0.16462387452802787, "grad_norm": 0.3558915853500366, "learning_rate": 0.0001, "loss": 1.7501, "step": 1417 }, { "epoch": 0.1647400522799884, "grad_norm": 0.36311277747154236, "learning_rate": 0.0001, "loss": 1.6721, "step": 1418 }, { "epoch": 0.16485623003194888, "grad_norm": 0.41347232460975647, "learning_rate": 0.0001, "loss": 1.834, "step": 1419 }, { "epoch": 0.16497240778390937, "grad_norm": 0.3624221086502075, "learning_rate": 0.0001, "loss": 1.6716, "step": 1420 }, { "epoch": 0.1650885855358699, "grad_norm": 0.3721679449081421, "learning_rate": 0.0001, "loss": 1.7655, "step": 1421 }, { "epoch": 0.16520476328783038, "grad_norm": 0.3526135981082916, "learning_rate": 0.0001, "loss": 1.7385, "step": 1422 }, { "epoch": 0.1653209410397909, "grad_norm": 0.4029957354068756, "learning_rate": 0.0001, "loss": 2.0216, "step": 1423 }, { "epoch": 0.16543711879175138, "grad_norm": 0.36750248074531555, "learning_rate": 0.0001, "loss": 1.8155, "step": 1424 }, { "epoch": 0.16555329654371187, "grad_norm": 0.358469158411026, "learning_rate": 0.0001, "loss": 1.7874, "step": 1425 }, { "epoch": 0.1656694742956724, "grad_norm": 0.3403339684009552, "learning_rate": 0.0001, "loss": 1.6348, "step": 1426 }, { "epoch": 0.16578565204763288, "grad_norm": 0.3741562068462372, "learning_rate": 0.0001, "loss": 1.6782, "step": 1427 }, { "epoch": 0.16590182979959336, "grad_norm": 0.34802475571632385, "learning_rate": 0.0001, "loss": 1.7428, "step": 1428 }, { "epoch": 0.16601800755155388, "grad_norm": 0.3652847707271576, "learning_rate": 0.0001, "loss": 1.69, "step": 1429 }, { "epoch": 0.16613418530351437, "grad_norm": 0.37153634428977966, "learning_rate": 0.0001, "loss": 1.8237, "step": 1430 }, { "epoch": 0.1662503630554749, "grad_norm": 0.36057737469673157, "learning_rate": 0.0001, "loss": 1.8384, "step": 1431 }, { "epoch": 0.16636654080743538, "grad_norm": 0.3642016053199768, "learning_rate": 0.0001, "loss": 1.8367, "step": 1432 }, { "epoch": 0.16648271855939586, "grad_norm": 0.36787521839141846, "learning_rate": 0.0001, "loss": 1.7215, "step": 1433 }, { "epoch": 0.16659889631135638, "grad_norm": 0.3940580189228058, "learning_rate": 0.0001, "loss": 1.9512, "step": 1434 }, { "epoch": 0.16671507406331687, "grad_norm": 0.3518722355365753, "learning_rate": 0.0001, "loss": 1.6426, "step": 1435 }, { "epoch": 0.1668312518152774, "grad_norm": 0.3771706223487854, "learning_rate": 0.0001, "loss": 1.7006, "step": 1436 }, { "epoch": 0.16694742956723788, "grad_norm": 0.3364204466342926, "learning_rate": 0.0001, "loss": 1.6458, "step": 1437 }, { "epoch": 0.16706360731919837, "grad_norm": 0.36092275381088257, "learning_rate": 0.0001, "loss": 1.8134, "step": 1438 }, { "epoch": 0.16717978507115888, "grad_norm": 0.3495999276638031, "learning_rate": 0.0001, "loss": 1.6367, "step": 1439 }, { "epoch": 0.16729596282311937, "grad_norm": 0.35832324624061584, "learning_rate": 0.0001, "loss": 1.6631, "step": 1440 }, { "epoch": 0.16741214057507986, "grad_norm": 0.38260895013809204, "learning_rate": 0.0001, "loss": 1.8781, "step": 1441 }, { "epoch": 0.16752831832704038, "grad_norm": 0.37501585483551025, "learning_rate": 0.0001, "loss": 1.7943, "step": 1442 }, { "epoch": 0.16764449607900087, "grad_norm": 0.34276142716407776, "learning_rate": 0.0001, "loss": 1.643, "step": 1443 }, { "epoch": 0.16776067383096138, "grad_norm": 0.32856735587120056, "learning_rate": 0.0001, "loss": 1.6322, "step": 1444 }, { "epoch": 0.16787685158292187, "grad_norm": 0.35961073637008667, "learning_rate": 0.0001, "loss": 1.7903, "step": 1445 }, { "epoch": 0.16799302933488236, "grad_norm": 0.3753131628036499, "learning_rate": 0.0001, "loss": 1.8137, "step": 1446 }, { "epoch": 0.16810920708684288, "grad_norm": 0.3495393693447113, "learning_rate": 0.0001, "loss": 1.7824, "step": 1447 }, { "epoch": 0.16822538483880337, "grad_norm": 0.35757923126220703, "learning_rate": 0.0001, "loss": 1.6898, "step": 1448 }, { "epoch": 0.16834156259076388, "grad_norm": 0.3703269064426422, "learning_rate": 0.0001, "loss": 1.7857, "step": 1449 }, { "epoch": 0.16845774034272437, "grad_norm": 0.3545929789543152, "learning_rate": 0.0001, "loss": 1.5796, "step": 1450 }, { "epoch": 0.16857391809468486, "grad_norm": 0.3971767723560333, "learning_rate": 0.0001, "loss": 1.8604, "step": 1451 }, { "epoch": 0.16869009584664538, "grad_norm": 0.3627997040748596, "learning_rate": 0.0001, "loss": 1.7352, "step": 1452 }, { "epoch": 0.16880627359860587, "grad_norm": 0.37545061111450195, "learning_rate": 0.0001, "loss": 1.7773, "step": 1453 }, { "epoch": 0.16892245135056635, "grad_norm": 0.4215669631958008, "learning_rate": 0.0001, "loss": 1.7495, "step": 1454 }, { "epoch": 0.16903862910252687, "grad_norm": 0.3428536355495453, "learning_rate": 0.0001, "loss": 1.6043, "step": 1455 }, { "epoch": 0.16915480685448736, "grad_norm": 0.3856685161590576, "learning_rate": 0.0001, "loss": 1.8013, "step": 1456 }, { "epoch": 0.16927098460644788, "grad_norm": 0.3408958315849304, "learning_rate": 0.0001, "loss": 1.599, "step": 1457 }, { "epoch": 0.16938716235840837, "grad_norm": 0.3705407381057739, "learning_rate": 0.0001, "loss": 1.7433, "step": 1458 }, { "epoch": 0.16950334011036886, "grad_norm": 0.3559417128562927, "learning_rate": 0.0001, "loss": 1.6172, "step": 1459 }, { "epoch": 0.16961951786232937, "grad_norm": 0.39479878544807434, "learning_rate": 0.0001, "loss": 1.8522, "step": 1460 }, { "epoch": 0.16973569561428986, "grad_norm": 0.36740705370903015, "learning_rate": 0.0001, "loss": 1.7336, "step": 1461 }, { "epoch": 0.16985187336625035, "grad_norm": 0.37933793663978577, "learning_rate": 0.0001, "loss": 1.7353, "step": 1462 }, { "epoch": 0.16996805111821087, "grad_norm": 0.3855569064617157, "learning_rate": 0.0001, "loss": 1.6292, "step": 1463 }, { "epoch": 0.17008422887017136, "grad_norm": 0.36735615134239197, "learning_rate": 0.0001, "loss": 1.721, "step": 1464 }, { "epoch": 0.17020040662213187, "grad_norm": 0.3651230037212372, "learning_rate": 0.0001, "loss": 1.638, "step": 1465 }, { "epoch": 0.17031658437409236, "grad_norm": 0.3359425663948059, "learning_rate": 0.0001, "loss": 1.5297, "step": 1466 }, { "epoch": 0.17043276212605285, "grad_norm": 0.37476974725723267, "learning_rate": 0.0001, "loss": 1.7478, "step": 1467 }, { "epoch": 0.17054893987801337, "grad_norm": 0.3349404036998749, "learning_rate": 0.0001, "loss": 1.6202, "step": 1468 }, { "epoch": 0.17066511762997386, "grad_norm": 0.375108003616333, "learning_rate": 0.0001, "loss": 1.7672, "step": 1469 }, { "epoch": 0.17078129538193437, "grad_norm": 0.36245962977409363, "learning_rate": 0.0001, "loss": 1.7143, "step": 1470 }, { "epoch": 0.17089747313389486, "grad_norm": 0.3471023142337799, "learning_rate": 0.0001, "loss": 1.4378, "step": 1471 }, { "epoch": 0.17101365088585535, "grad_norm": 0.37973228096961975, "learning_rate": 0.0001, "loss": 1.8489, "step": 1472 }, { "epoch": 0.17112982863781587, "grad_norm": 0.3702830374240875, "learning_rate": 0.0001, "loss": 1.7367, "step": 1473 }, { "epoch": 0.17124600638977636, "grad_norm": 0.37454211711883545, "learning_rate": 0.0001, "loss": 1.8488, "step": 1474 }, { "epoch": 0.17136218414173685, "grad_norm": 0.3523843586444855, "learning_rate": 0.0001, "loss": 1.6839, "step": 1475 }, { "epoch": 0.17147836189369736, "grad_norm": 0.38084614276885986, "learning_rate": 0.0001, "loss": 1.8845, "step": 1476 }, { "epoch": 0.17159453964565785, "grad_norm": 0.35280105471611023, "learning_rate": 0.0001, "loss": 1.6205, "step": 1477 }, { "epoch": 0.17171071739761837, "grad_norm": 0.34946003556251526, "learning_rate": 0.0001, "loss": 1.5721, "step": 1478 }, { "epoch": 0.17182689514957886, "grad_norm": 0.3549077808856964, "learning_rate": 0.0001, "loss": 1.6502, "step": 1479 }, { "epoch": 0.17194307290153935, "grad_norm": 0.34895646572113037, "learning_rate": 0.0001, "loss": 1.6272, "step": 1480 }, { "epoch": 0.17205925065349986, "grad_norm": 0.3998420536518097, "learning_rate": 0.0001, "loss": 1.6422, "step": 1481 }, { "epoch": 0.17217542840546035, "grad_norm": 0.3754946291446686, "learning_rate": 0.0001, "loss": 1.6843, "step": 1482 }, { "epoch": 0.17229160615742084, "grad_norm": 0.36129212379455566, "learning_rate": 0.0001, "loss": 1.4891, "step": 1483 }, { "epoch": 0.17240778390938136, "grad_norm": 0.3542667329311371, "learning_rate": 0.0001, "loss": 1.6515, "step": 1484 }, { "epoch": 0.17252396166134185, "grad_norm": 0.3390235900878906, "learning_rate": 0.0001, "loss": 1.5409, "step": 1485 }, { "epoch": 0.17264013941330236, "grad_norm": 0.36193352937698364, "learning_rate": 0.0001, "loss": 1.6311, "step": 1486 }, { "epoch": 0.17275631716526285, "grad_norm": 0.3606919050216675, "learning_rate": 0.0001, "loss": 1.6162, "step": 1487 }, { "epoch": 0.17287249491722334, "grad_norm": 0.4732804000377655, "learning_rate": 0.0001, "loss": 1.7695, "step": 1488 }, { "epoch": 0.17298867266918386, "grad_norm": 0.3804911971092224, "learning_rate": 0.0001, "loss": 1.7707, "step": 1489 }, { "epoch": 0.17310485042114435, "grad_norm": 0.37315547466278076, "learning_rate": 0.0001, "loss": 1.6944, "step": 1490 }, { "epoch": 0.17322102817310486, "grad_norm": 0.3711428642272949, "learning_rate": 0.0001, "loss": 1.6206, "step": 1491 }, { "epoch": 0.17333720592506535, "grad_norm": 0.33480215072631836, "learning_rate": 0.0001, "loss": 1.5577, "step": 1492 }, { "epoch": 0.17345338367702584, "grad_norm": 0.3833867907524109, "learning_rate": 0.0001, "loss": 1.7718, "step": 1493 }, { "epoch": 0.17356956142898636, "grad_norm": 0.35482731461524963, "learning_rate": 0.0001, "loss": 1.6562, "step": 1494 }, { "epoch": 0.17368573918094685, "grad_norm": 0.35908132791519165, "learning_rate": 0.0001, "loss": 1.7193, "step": 1495 }, { "epoch": 0.17380191693290734, "grad_norm": 0.34301432967185974, "learning_rate": 0.0001, "loss": 1.6988, "step": 1496 }, { "epoch": 0.17391809468486785, "grad_norm": 0.34541556239128113, "learning_rate": 0.0001, "loss": 1.6545, "step": 1497 }, { "epoch": 0.17403427243682834, "grad_norm": 0.36009690165519714, "learning_rate": 0.0001, "loss": 1.7187, "step": 1498 }, { "epoch": 0.17415045018878886, "grad_norm": 0.3848399221897125, "learning_rate": 0.0001, "loss": 1.9549, "step": 1499 }, { "epoch": 0.17426662794074935, "grad_norm": 0.35505911707878113, "learning_rate": 0.0001, "loss": 1.7141, "step": 1500 }, { "epoch": 0.17438280569270984, "grad_norm": 0.3311626613140106, "learning_rate": 0.0001, "loss": 1.6282, "step": 1501 }, { "epoch": 0.17449898344467035, "grad_norm": 0.37923452258110046, "learning_rate": 0.0001, "loss": 1.7772, "step": 1502 }, { "epoch": 0.17461516119663084, "grad_norm": 0.3624334931373596, "learning_rate": 0.0001, "loss": 1.716, "step": 1503 }, { "epoch": 0.17473133894859136, "grad_norm": 0.3661384880542755, "learning_rate": 0.0001, "loss": 1.7486, "step": 1504 }, { "epoch": 0.17484751670055185, "grad_norm": 0.3476395308971405, "learning_rate": 0.0001, "loss": 1.8493, "step": 1505 }, { "epoch": 0.17496369445251234, "grad_norm": 0.3515165448188782, "learning_rate": 0.0001, "loss": 1.7228, "step": 1506 }, { "epoch": 0.17507987220447285, "grad_norm": 0.36239245533943176, "learning_rate": 0.0001, "loss": 1.6796, "step": 1507 }, { "epoch": 0.17519604995643334, "grad_norm": 0.3474177420139313, "learning_rate": 0.0001, "loss": 1.5533, "step": 1508 }, { "epoch": 0.17531222770839383, "grad_norm": 0.34168288111686707, "learning_rate": 0.0001, "loss": 1.4709, "step": 1509 }, { "epoch": 0.17542840546035435, "grad_norm": 0.3697127103805542, "learning_rate": 0.0001, "loss": 1.7274, "step": 1510 }, { "epoch": 0.17554458321231484, "grad_norm": 0.36415886878967285, "learning_rate": 0.0001, "loss": 1.6858, "step": 1511 }, { "epoch": 0.17566076096427535, "grad_norm": 0.3710338771343231, "learning_rate": 0.0001, "loss": 1.8252, "step": 1512 }, { "epoch": 0.17577693871623584, "grad_norm": 0.3623411953449249, "learning_rate": 0.0001, "loss": 1.4247, "step": 1513 }, { "epoch": 0.17589311646819633, "grad_norm": 0.3631919026374817, "learning_rate": 0.0001, "loss": 1.7089, "step": 1514 }, { "epoch": 0.17600929422015685, "grad_norm": 0.37533360719680786, "learning_rate": 0.0001, "loss": 1.748, "step": 1515 }, { "epoch": 0.17612547197211734, "grad_norm": 0.36933374404907227, "learning_rate": 0.0001, "loss": 1.8289, "step": 1516 }, { "epoch": 0.17624164972407783, "grad_norm": 0.3541377782821655, "learning_rate": 0.0001, "loss": 1.5451, "step": 1517 }, { "epoch": 0.17635782747603834, "grad_norm": 0.38855910301208496, "learning_rate": 0.0001, "loss": 1.7275, "step": 1518 }, { "epoch": 0.17647400522799883, "grad_norm": 0.3846849203109741, "learning_rate": 0.0001, "loss": 1.81, "step": 1519 }, { "epoch": 0.17659018297995935, "grad_norm": 0.4078463912010193, "learning_rate": 0.0001, "loss": 1.8295, "step": 1520 }, { "epoch": 0.17670636073191984, "grad_norm": 0.3415427803993225, "learning_rate": 0.0001, "loss": 1.6622, "step": 1521 }, { "epoch": 0.17682253848388033, "grad_norm": 0.36249756813049316, "learning_rate": 0.0001, "loss": 1.6939, "step": 1522 }, { "epoch": 0.17693871623584084, "grad_norm": 0.3513442277908325, "learning_rate": 0.0001, "loss": 1.7024, "step": 1523 }, { "epoch": 0.17705489398780133, "grad_norm": 0.42712968587875366, "learning_rate": 0.0001, "loss": 1.7684, "step": 1524 }, { "epoch": 0.17717107173976185, "grad_norm": 0.3688381612300873, "learning_rate": 0.0001, "loss": 1.7726, "step": 1525 }, { "epoch": 0.17728724949172234, "grad_norm": 0.38176625967025757, "learning_rate": 0.0001, "loss": 1.635, "step": 1526 }, { "epoch": 0.17740342724368283, "grad_norm": 0.3425157070159912, "learning_rate": 0.0001, "loss": 1.509, "step": 1527 }, { "epoch": 0.17751960499564334, "grad_norm": 0.362857848405838, "learning_rate": 0.0001, "loss": 1.8357, "step": 1528 }, { "epoch": 0.17763578274760383, "grad_norm": 0.3613092303276062, "learning_rate": 0.0001, "loss": 1.7633, "step": 1529 }, { "epoch": 0.17775196049956432, "grad_norm": 0.35249873995780945, "learning_rate": 0.0001, "loss": 1.7397, "step": 1530 }, { "epoch": 0.17786813825152484, "grad_norm": 0.3414962887763977, "learning_rate": 0.0001, "loss": 1.5691, "step": 1531 }, { "epoch": 0.17798431600348533, "grad_norm": 0.3660277724266052, "learning_rate": 0.0001, "loss": 1.5648, "step": 1532 }, { "epoch": 0.17810049375544584, "grad_norm": 0.344135046005249, "learning_rate": 0.0001, "loss": 1.7237, "step": 1533 }, { "epoch": 0.17821667150740633, "grad_norm": 0.3552722632884979, "learning_rate": 0.0001, "loss": 1.6736, "step": 1534 }, { "epoch": 0.17833284925936682, "grad_norm": 0.3565710186958313, "learning_rate": 0.0001, "loss": 1.6945, "step": 1535 }, { "epoch": 0.17844902701132734, "grad_norm": 0.34754517674446106, "learning_rate": 0.0001, "loss": 1.5476, "step": 1536 }, { "epoch": 0.17856520476328783, "grad_norm": 0.377332478761673, "learning_rate": 0.0001, "loss": 1.771, "step": 1537 }, { "epoch": 0.17868138251524834, "grad_norm": 0.34879541397094727, "learning_rate": 0.0001, "loss": 1.6475, "step": 1538 }, { "epoch": 0.17879756026720883, "grad_norm": 0.3693576753139496, "learning_rate": 0.0001, "loss": 1.6738, "step": 1539 }, { "epoch": 0.17891373801916932, "grad_norm": 0.3447043001651764, "learning_rate": 0.0001, "loss": 1.6958, "step": 1540 }, { "epoch": 0.17902991577112984, "grad_norm": 0.3493332862854004, "learning_rate": 0.0001, "loss": 1.5546, "step": 1541 }, { "epoch": 0.17914609352309033, "grad_norm": 0.3783632814884186, "learning_rate": 0.0001, "loss": 1.7726, "step": 1542 }, { "epoch": 0.17926227127505082, "grad_norm": 0.33536890149116516, "learning_rate": 0.0001, "loss": 1.7176, "step": 1543 }, { "epoch": 0.17937844902701133, "grad_norm": 0.3568625748157501, "learning_rate": 0.0001, "loss": 1.635, "step": 1544 }, { "epoch": 0.17949462677897182, "grad_norm": 0.3375276029109955, "learning_rate": 0.0001, "loss": 1.6558, "step": 1545 }, { "epoch": 0.17961080453093234, "grad_norm": 0.3463954031467438, "learning_rate": 0.0001, "loss": 1.7331, "step": 1546 }, { "epoch": 0.17972698228289283, "grad_norm": 0.38217151165008545, "learning_rate": 0.0001, "loss": 1.7775, "step": 1547 }, { "epoch": 0.17984316003485332, "grad_norm": 0.3745064437389374, "learning_rate": 0.0001, "loss": 1.789, "step": 1548 }, { "epoch": 0.17995933778681383, "grad_norm": 0.3647879660129547, "learning_rate": 0.0001, "loss": 1.5916, "step": 1549 }, { "epoch": 0.18007551553877432, "grad_norm": 0.34927716851234436, "learning_rate": 0.0001, "loss": 1.7875, "step": 1550 }, { "epoch": 0.1801916932907348, "grad_norm": 0.35309305787086487, "learning_rate": 0.0001, "loss": 1.7382, "step": 1551 }, { "epoch": 0.18030787104269533, "grad_norm": 0.385146826505661, "learning_rate": 0.0001, "loss": 1.668, "step": 1552 }, { "epoch": 0.18042404879465582, "grad_norm": 0.345344603061676, "learning_rate": 0.0001, "loss": 1.6421, "step": 1553 }, { "epoch": 0.18054022654661633, "grad_norm": 0.3495193123817444, "learning_rate": 0.0001, "loss": 1.87, "step": 1554 }, { "epoch": 0.18065640429857682, "grad_norm": 0.36495864391326904, "learning_rate": 0.0001, "loss": 1.8289, "step": 1555 }, { "epoch": 0.1807725820505373, "grad_norm": 0.34785377979278564, "learning_rate": 0.0001, "loss": 1.7213, "step": 1556 }, { "epoch": 0.18088875980249783, "grad_norm": 0.36701300740242004, "learning_rate": 0.0001, "loss": 1.716, "step": 1557 }, { "epoch": 0.18100493755445832, "grad_norm": 0.3620215356349945, "learning_rate": 0.0001, "loss": 1.6724, "step": 1558 }, { "epoch": 0.18112111530641883, "grad_norm": 0.34096696972846985, "learning_rate": 0.0001, "loss": 1.4877, "step": 1559 }, { "epoch": 0.18123729305837932, "grad_norm": 0.3897305428981781, "learning_rate": 0.0001, "loss": 1.7062, "step": 1560 }, { "epoch": 0.1813534708103398, "grad_norm": 0.3481612503528595, "learning_rate": 0.0001, "loss": 1.6208, "step": 1561 }, { "epoch": 0.18146964856230033, "grad_norm": 0.3717382550239563, "learning_rate": 0.0001, "loss": 1.781, "step": 1562 }, { "epoch": 0.18158582631426082, "grad_norm": 0.3604913353919983, "learning_rate": 0.0001, "loss": 1.7037, "step": 1563 }, { "epoch": 0.1817020040662213, "grad_norm": 0.36964964866638184, "learning_rate": 0.0001, "loss": 1.7833, "step": 1564 }, { "epoch": 0.18181818181818182, "grad_norm": 0.35544395446777344, "learning_rate": 0.0001, "loss": 1.6162, "step": 1565 }, { "epoch": 0.1819343595701423, "grad_norm": 0.36883848905563354, "learning_rate": 0.0001, "loss": 1.6346, "step": 1566 }, { "epoch": 0.18205053732210283, "grad_norm": 0.36678242683410645, "learning_rate": 0.0001, "loss": 1.727, "step": 1567 }, { "epoch": 0.18216671507406332, "grad_norm": 0.3713185489177704, "learning_rate": 0.0001, "loss": 1.8655, "step": 1568 }, { "epoch": 0.1822828928260238, "grad_norm": 0.32219791412353516, "learning_rate": 0.0001, "loss": 1.6041, "step": 1569 }, { "epoch": 0.18239907057798432, "grad_norm": 0.3948063254356384, "learning_rate": 0.0001, "loss": 1.7762, "step": 1570 }, { "epoch": 0.1825152483299448, "grad_norm": 0.35549992322921753, "learning_rate": 0.0001, "loss": 1.719, "step": 1571 }, { "epoch": 0.1826314260819053, "grad_norm": 0.36282774806022644, "learning_rate": 0.0001, "loss": 1.7864, "step": 1572 }, { "epoch": 0.18274760383386582, "grad_norm": 0.35417771339416504, "learning_rate": 0.0001, "loss": 1.7345, "step": 1573 }, { "epoch": 0.1828637815858263, "grad_norm": 0.406324565410614, "learning_rate": 0.0001, "loss": 1.9048, "step": 1574 }, { "epoch": 0.18297995933778682, "grad_norm": 0.3815106153488159, "learning_rate": 0.0001, "loss": 1.7483, "step": 1575 }, { "epoch": 0.1830961370897473, "grad_norm": 0.3650597929954529, "learning_rate": 0.0001, "loss": 1.8492, "step": 1576 }, { "epoch": 0.1832123148417078, "grad_norm": 0.3681640326976776, "learning_rate": 0.0001, "loss": 1.8177, "step": 1577 }, { "epoch": 0.18332849259366832, "grad_norm": 0.3654261529445648, "learning_rate": 0.0001, "loss": 1.747, "step": 1578 }, { "epoch": 0.1834446703456288, "grad_norm": 0.35047435760498047, "learning_rate": 0.0001, "loss": 1.5378, "step": 1579 }, { "epoch": 0.18356084809758932, "grad_norm": 0.33045920729637146, "learning_rate": 0.0001, "loss": 1.6797, "step": 1580 }, { "epoch": 0.1836770258495498, "grad_norm": 0.3612997233867645, "learning_rate": 0.0001, "loss": 1.5902, "step": 1581 }, { "epoch": 0.1837932036015103, "grad_norm": 0.382269948720932, "learning_rate": 0.0001, "loss": 1.8914, "step": 1582 }, { "epoch": 0.18390938135347082, "grad_norm": 0.348799467086792, "learning_rate": 0.0001, "loss": 1.7365, "step": 1583 }, { "epoch": 0.1840255591054313, "grad_norm": 0.3481920063495636, "learning_rate": 0.0001, "loss": 1.5861, "step": 1584 }, { "epoch": 0.1841417368573918, "grad_norm": 0.38168272376060486, "learning_rate": 0.0001, "loss": 1.8146, "step": 1585 }, { "epoch": 0.1842579146093523, "grad_norm": 0.3492324650287628, "learning_rate": 0.0001, "loss": 1.7394, "step": 1586 }, { "epoch": 0.1843740923613128, "grad_norm": 0.38045451045036316, "learning_rate": 0.0001, "loss": 1.7292, "step": 1587 }, { "epoch": 0.18449027011327332, "grad_norm": 0.3941299617290497, "learning_rate": 0.0001, "loss": 1.9037, "step": 1588 }, { "epoch": 0.1846064478652338, "grad_norm": 0.3451482355594635, "learning_rate": 0.0001, "loss": 1.6042, "step": 1589 }, { "epoch": 0.1847226256171943, "grad_norm": 0.36182719469070435, "learning_rate": 0.0001, "loss": 1.6665, "step": 1590 }, { "epoch": 0.1848388033691548, "grad_norm": 0.3482245206832886, "learning_rate": 0.0001, "loss": 1.7429, "step": 1591 }, { "epoch": 0.1849549811211153, "grad_norm": 0.3704969584941864, "learning_rate": 0.0001, "loss": 1.7674, "step": 1592 }, { "epoch": 0.18507115887307582, "grad_norm": 0.3831556737422943, "learning_rate": 0.0001, "loss": 1.728, "step": 1593 }, { "epoch": 0.1851873366250363, "grad_norm": 0.3729779124259949, "learning_rate": 0.0001, "loss": 1.7315, "step": 1594 }, { "epoch": 0.1853035143769968, "grad_norm": 0.38106775283813477, "learning_rate": 0.0001, "loss": 1.6608, "step": 1595 }, { "epoch": 0.18541969212895731, "grad_norm": 0.3817857801914215, "learning_rate": 0.0001, "loss": 1.5159, "step": 1596 }, { "epoch": 0.1855358698809178, "grad_norm": 0.35032176971435547, "learning_rate": 0.0001, "loss": 1.6233, "step": 1597 }, { "epoch": 0.1856520476328783, "grad_norm": 0.3457838296890259, "learning_rate": 0.0001, "loss": 1.4043, "step": 1598 }, { "epoch": 0.1857682253848388, "grad_norm": 0.37174192070961, "learning_rate": 0.0001, "loss": 1.7605, "step": 1599 }, { "epoch": 0.1858844031367993, "grad_norm": 0.36577579379081726, "learning_rate": 0.0001, "loss": 1.7309, "step": 1600 }, { "epoch": 0.18600058088875981, "grad_norm": 0.3587372899055481, "learning_rate": 0.0001, "loss": 1.6997, "step": 1601 }, { "epoch": 0.1861167586407203, "grad_norm": 0.3637838363647461, "learning_rate": 0.0001, "loss": 1.6007, "step": 1602 }, { "epoch": 0.1862329363926808, "grad_norm": 0.3631284534931183, "learning_rate": 0.0001, "loss": 1.6191, "step": 1603 }, { "epoch": 0.1863491141446413, "grad_norm": 0.3679940104484558, "learning_rate": 0.0001, "loss": 1.7726, "step": 1604 }, { "epoch": 0.1864652918966018, "grad_norm": 0.36848756670951843, "learning_rate": 0.0001, "loss": 1.7786, "step": 1605 }, { "epoch": 0.1865814696485623, "grad_norm": 0.36538413166999817, "learning_rate": 0.0001, "loss": 1.7454, "step": 1606 }, { "epoch": 0.1866976474005228, "grad_norm": 0.382051944732666, "learning_rate": 0.0001, "loss": 1.842, "step": 1607 }, { "epoch": 0.1868138251524833, "grad_norm": 0.33358198404312134, "learning_rate": 0.0001, "loss": 1.5731, "step": 1608 }, { "epoch": 0.1869300029044438, "grad_norm": 0.3623516261577606, "learning_rate": 0.0001, "loss": 1.7093, "step": 1609 }, { "epoch": 0.1870461806564043, "grad_norm": 0.37408748269081116, "learning_rate": 0.0001, "loss": 1.6864, "step": 1610 }, { "epoch": 0.1871623584083648, "grad_norm": 0.38509878516197205, "learning_rate": 0.0001, "loss": 1.935, "step": 1611 }, { "epoch": 0.1872785361603253, "grad_norm": 0.39082857966423035, "learning_rate": 0.0001, "loss": 1.7414, "step": 1612 }, { "epoch": 0.1873947139122858, "grad_norm": 0.3575880825519562, "learning_rate": 0.0001, "loss": 1.6035, "step": 1613 }, { "epoch": 0.1875108916642463, "grad_norm": 0.3531934916973114, "learning_rate": 0.0001, "loss": 1.7231, "step": 1614 }, { "epoch": 0.1876270694162068, "grad_norm": 0.3784133493900299, "learning_rate": 0.0001, "loss": 1.6525, "step": 1615 }, { "epoch": 0.1877432471681673, "grad_norm": 0.37965330481529236, "learning_rate": 0.0001, "loss": 1.6978, "step": 1616 }, { "epoch": 0.1878594249201278, "grad_norm": 0.36382848024368286, "learning_rate": 0.0001, "loss": 1.6383, "step": 1617 }, { "epoch": 0.1879756026720883, "grad_norm": 0.3851543068885803, "learning_rate": 0.0001, "loss": 1.7535, "step": 1618 }, { "epoch": 0.18809178042404878, "grad_norm": 0.3497619926929474, "learning_rate": 0.0001, "loss": 1.6959, "step": 1619 }, { "epoch": 0.1882079581760093, "grad_norm": 0.34661221504211426, "learning_rate": 0.0001, "loss": 1.5884, "step": 1620 }, { "epoch": 0.1883241359279698, "grad_norm": 0.348257452249527, "learning_rate": 0.0001, "loss": 1.7566, "step": 1621 }, { "epoch": 0.1884403136799303, "grad_norm": 0.3511572480201721, "learning_rate": 0.0001, "loss": 1.6716, "step": 1622 }, { "epoch": 0.1885564914318908, "grad_norm": 0.3942771553993225, "learning_rate": 0.0001, "loss": 1.815, "step": 1623 }, { "epoch": 0.18867266918385128, "grad_norm": 0.3480950593948364, "learning_rate": 0.0001, "loss": 1.7237, "step": 1624 }, { "epoch": 0.1887888469358118, "grad_norm": 0.3844303786754608, "learning_rate": 0.0001, "loss": 1.8658, "step": 1625 }, { "epoch": 0.1889050246877723, "grad_norm": 0.36564818024635315, "learning_rate": 0.0001, "loss": 1.7391, "step": 1626 }, { "epoch": 0.1890212024397328, "grad_norm": 0.3397161662578583, "learning_rate": 0.0001, "loss": 1.5765, "step": 1627 }, { "epoch": 0.1891373801916933, "grad_norm": 0.32916903495788574, "learning_rate": 0.0001, "loss": 1.4786, "step": 1628 }, { "epoch": 0.18925355794365378, "grad_norm": 0.37873196601867676, "learning_rate": 0.0001, "loss": 1.754, "step": 1629 }, { "epoch": 0.1893697356956143, "grad_norm": 0.3580092787742615, "learning_rate": 0.0001, "loss": 1.4407, "step": 1630 }, { "epoch": 0.1894859134475748, "grad_norm": 0.38638558983802795, "learning_rate": 0.0001, "loss": 1.769, "step": 1631 }, { "epoch": 0.18960209119953528, "grad_norm": 0.39580628275871277, "learning_rate": 0.0001, "loss": 1.668, "step": 1632 }, { "epoch": 0.1897182689514958, "grad_norm": 0.3963046967983246, "learning_rate": 0.0001, "loss": 1.8863, "step": 1633 }, { "epoch": 0.18983444670345628, "grad_norm": 0.33917540311813354, "learning_rate": 0.0001, "loss": 1.5406, "step": 1634 }, { "epoch": 0.1899506244554168, "grad_norm": 0.4500591456890106, "learning_rate": 0.0001, "loss": 1.7972, "step": 1635 }, { "epoch": 0.1900668022073773, "grad_norm": 0.3522135615348816, "learning_rate": 0.0001, "loss": 1.6265, "step": 1636 }, { "epoch": 0.19018297995933778, "grad_norm": 0.3856748640537262, "learning_rate": 0.0001, "loss": 1.708, "step": 1637 }, { "epoch": 0.1902991577112983, "grad_norm": 0.4033121168613434, "learning_rate": 0.0001, "loss": 1.8365, "step": 1638 }, { "epoch": 0.19041533546325878, "grad_norm": 0.3969012200832367, "learning_rate": 0.0001, "loss": 1.9015, "step": 1639 }, { "epoch": 0.19053151321521927, "grad_norm": 0.3580697178840637, "learning_rate": 0.0001, "loss": 1.6171, "step": 1640 }, { "epoch": 0.1906476909671798, "grad_norm": 0.37800854444503784, "learning_rate": 0.0001, "loss": 1.7347, "step": 1641 }, { "epoch": 0.19076386871914028, "grad_norm": 0.3692120313644409, "learning_rate": 0.0001, "loss": 1.6419, "step": 1642 }, { "epoch": 0.1908800464711008, "grad_norm": 0.380416601896286, "learning_rate": 0.0001, "loss": 1.851, "step": 1643 }, { "epoch": 0.19099622422306128, "grad_norm": 0.3523741364479065, "learning_rate": 0.0001, "loss": 1.6161, "step": 1644 }, { "epoch": 0.19111240197502177, "grad_norm": 0.338820219039917, "learning_rate": 0.0001, "loss": 1.7057, "step": 1645 }, { "epoch": 0.1912285797269823, "grad_norm": 0.356183260679245, "learning_rate": 0.0001, "loss": 1.6747, "step": 1646 }, { "epoch": 0.19134475747894278, "grad_norm": 0.34077268838882446, "learning_rate": 0.0001, "loss": 1.7581, "step": 1647 }, { "epoch": 0.1914609352309033, "grad_norm": 0.3771745562553406, "learning_rate": 0.0001, "loss": 1.7918, "step": 1648 }, { "epoch": 0.19157711298286378, "grad_norm": 0.3605990409851074, "learning_rate": 0.0001, "loss": 1.6994, "step": 1649 }, { "epoch": 0.19169329073482427, "grad_norm": 0.3496057987213135, "learning_rate": 0.0001, "loss": 1.683, "step": 1650 }, { "epoch": 0.1918094684867848, "grad_norm": 0.39877089858055115, "learning_rate": 0.0001, "loss": 1.9872, "step": 1651 }, { "epoch": 0.19192564623874528, "grad_norm": 0.3556186556816101, "learning_rate": 0.0001, "loss": 1.7769, "step": 1652 }, { "epoch": 0.19204182399070577, "grad_norm": 0.3624957501888275, "learning_rate": 0.0001, "loss": 1.6933, "step": 1653 }, { "epoch": 0.19215800174266628, "grad_norm": 0.3758990466594696, "learning_rate": 0.0001, "loss": 1.7976, "step": 1654 }, { "epoch": 0.19227417949462677, "grad_norm": 0.38239556550979614, "learning_rate": 0.0001, "loss": 1.6828, "step": 1655 }, { "epoch": 0.1923903572465873, "grad_norm": 0.3543239235877991, "learning_rate": 0.0001, "loss": 1.667, "step": 1656 }, { "epoch": 0.19250653499854778, "grad_norm": 0.3758254647254944, "learning_rate": 0.0001, "loss": 1.8641, "step": 1657 }, { "epoch": 0.19262271275050827, "grad_norm": 0.40138089656829834, "learning_rate": 0.0001, "loss": 1.8514, "step": 1658 }, { "epoch": 0.19273889050246878, "grad_norm": 0.36116254329681396, "learning_rate": 0.0001, "loss": 1.7876, "step": 1659 }, { "epoch": 0.19285506825442927, "grad_norm": 0.34624597430229187, "learning_rate": 0.0001, "loss": 1.6897, "step": 1660 }, { "epoch": 0.19297124600638976, "grad_norm": 0.3567349910736084, "learning_rate": 0.0001, "loss": 1.7766, "step": 1661 }, { "epoch": 0.19308742375835028, "grad_norm": 0.35692065954208374, "learning_rate": 0.0001, "loss": 1.7122, "step": 1662 }, { "epoch": 0.19320360151031077, "grad_norm": 0.32495301961898804, "learning_rate": 0.0001, "loss": 1.5563, "step": 1663 }, { "epoch": 0.19331977926227129, "grad_norm": 0.3486839830875397, "learning_rate": 0.0001, "loss": 1.739, "step": 1664 }, { "epoch": 0.19343595701423177, "grad_norm": 0.3624487817287445, "learning_rate": 0.0001, "loss": 1.783, "step": 1665 }, { "epoch": 0.19355213476619226, "grad_norm": 0.34828513860702515, "learning_rate": 0.0001, "loss": 1.6163, "step": 1666 }, { "epoch": 0.19366831251815278, "grad_norm": 0.339197039604187, "learning_rate": 0.0001, "loss": 1.6403, "step": 1667 }, { "epoch": 0.19378449027011327, "grad_norm": 0.3725949227809906, "learning_rate": 0.0001, "loss": 1.7417, "step": 1668 }, { "epoch": 0.19390066802207379, "grad_norm": 0.346892386674881, "learning_rate": 0.0001, "loss": 1.4863, "step": 1669 }, { "epoch": 0.19401684577403427, "grad_norm": 0.36845695972442627, "learning_rate": 0.0001, "loss": 1.8, "step": 1670 }, { "epoch": 0.19413302352599476, "grad_norm": 0.33988621830940247, "learning_rate": 0.0001, "loss": 1.664, "step": 1671 }, { "epoch": 0.19424920127795528, "grad_norm": 0.35175544023513794, "learning_rate": 0.0001, "loss": 1.7329, "step": 1672 }, { "epoch": 0.19436537902991577, "grad_norm": 0.35789933800697327, "learning_rate": 0.0001, "loss": 1.6697, "step": 1673 }, { "epoch": 0.19448155678187626, "grad_norm": 0.371448278427124, "learning_rate": 0.0001, "loss": 1.7318, "step": 1674 }, { "epoch": 0.19459773453383677, "grad_norm": 0.3563764989376068, "learning_rate": 0.0001, "loss": 1.7567, "step": 1675 }, { "epoch": 0.19471391228579726, "grad_norm": 0.38420358300209045, "learning_rate": 0.0001, "loss": 1.8371, "step": 1676 }, { "epoch": 0.19483009003775778, "grad_norm": 0.3995816707611084, "learning_rate": 0.0001, "loss": 1.8055, "step": 1677 }, { "epoch": 0.19494626778971827, "grad_norm": 0.3630034923553467, "learning_rate": 0.0001, "loss": 1.6929, "step": 1678 }, { "epoch": 0.19506244554167876, "grad_norm": 0.3776208162307739, "learning_rate": 0.0001, "loss": 1.8491, "step": 1679 }, { "epoch": 0.19517862329363928, "grad_norm": 0.3581395149230957, "learning_rate": 0.0001, "loss": 1.8839, "step": 1680 }, { "epoch": 0.19529480104559976, "grad_norm": 0.3381625711917877, "learning_rate": 0.0001, "loss": 1.7353, "step": 1681 }, { "epoch": 0.19541097879756028, "grad_norm": 0.3696902096271515, "learning_rate": 0.0001, "loss": 1.637, "step": 1682 }, { "epoch": 0.19552715654952077, "grad_norm": 0.3632832169532776, "learning_rate": 0.0001, "loss": 1.6398, "step": 1683 }, { "epoch": 0.19564333430148126, "grad_norm": 0.37248799204826355, "learning_rate": 0.0001, "loss": 1.7679, "step": 1684 }, { "epoch": 0.19575951205344178, "grad_norm": 0.354970782995224, "learning_rate": 0.0001, "loss": 1.6954, "step": 1685 }, { "epoch": 0.19587568980540226, "grad_norm": 0.370175838470459, "learning_rate": 0.0001, "loss": 1.7495, "step": 1686 }, { "epoch": 0.19599186755736275, "grad_norm": 0.38560378551483154, "learning_rate": 0.0001, "loss": 1.8188, "step": 1687 }, { "epoch": 0.19610804530932327, "grad_norm": 0.3574475049972534, "learning_rate": 0.0001, "loss": 1.7771, "step": 1688 }, { "epoch": 0.19622422306128376, "grad_norm": 0.39494848251342773, "learning_rate": 0.0001, "loss": 1.6645, "step": 1689 }, { "epoch": 0.19634040081324428, "grad_norm": 0.36588966846466064, "learning_rate": 0.0001, "loss": 1.7938, "step": 1690 }, { "epoch": 0.19645657856520476, "grad_norm": 0.3459327220916748, "learning_rate": 0.0001, "loss": 1.739, "step": 1691 }, { "epoch": 0.19657275631716525, "grad_norm": 0.36302775144577026, "learning_rate": 0.0001, "loss": 1.741, "step": 1692 }, { "epoch": 0.19668893406912577, "grad_norm": 0.3560570776462555, "learning_rate": 0.0001, "loss": 1.6156, "step": 1693 }, { "epoch": 0.19680511182108626, "grad_norm": 0.35664597153663635, "learning_rate": 0.0001, "loss": 1.5997, "step": 1694 }, { "epoch": 0.19692128957304675, "grad_norm": 0.35501745343208313, "learning_rate": 0.0001, "loss": 1.5578, "step": 1695 }, { "epoch": 0.19703746732500727, "grad_norm": 0.36302104592323303, "learning_rate": 0.0001, "loss": 1.6408, "step": 1696 }, { "epoch": 0.19715364507696775, "grad_norm": 0.3904525637626648, "learning_rate": 0.0001, "loss": 1.7684, "step": 1697 }, { "epoch": 0.19726982282892827, "grad_norm": 0.38676998019218445, "learning_rate": 0.0001, "loss": 1.8969, "step": 1698 }, { "epoch": 0.19738600058088876, "grad_norm": 0.3899071514606476, "learning_rate": 0.0001, "loss": 1.8044, "step": 1699 }, { "epoch": 0.19750217833284925, "grad_norm": 0.38243070244789124, "learning_rate": 0.0001, "loss": 1.9196, "step": 1700 }, { "epoch": 0.19761835608480977, "grad_norm": 0.3663628101348877, "learning_rate": 0.0001, "loss": 1.9099, "step": 1701 }, { "epoch": 0.19773453383677025, "grad_norm": 0.3713766634464264, "learning_rate": 0.0001, "loss": 1.9145, "step": 1702 }, { "epoch": 0.19785071158873077, "grad_norm": 0.36897388100624084, "learning_rate": 0.0001, "loss": 1.7247, "step": 1703 }, { "epoch": 0.19796688934069126, "grad_norm": 0.36905625462532043, "learning_rate": 0.0001, "loss": 1.776, "step": 1704 }, { "epoch": 0.19808306709265175, "grad_norm": 0.35968464612960815, "learning_rate": 0.0001, "loss": 1.7495, "step": 1705 }, { "epoch": 0.19819924484461227, "grad_norm": 0.3755891025066376, "learning_rate": 0.0001, "loss": 1.621, "step": 1706 }, { "epoch": 0.19831542259657275, "grad_norm": 0.3964156210422516, "learning_rate": 0.0001, "loss": 1.6651, "step": 1707 }, { "epoch": 0.19843160034853324, "grad_norm": 0.3733653724193573, "learning_rate": 0.0001, "loss": 1.7051, "step": 1708 }, { "epoch": 0.19854777810049376, "grad_norm": 0.36199355125427246, "learning_rate": 0.0001, "loss": 1.6899, "step": 1709 }, { "epoch": 0.19866395585245425, "grad_norm": 0.37445268034935, "learning_rate": 0.0001, "loss": 1.7155, "step": 1710 }, { "epoch": 0.19878013360441477, "grad_norm": 0.3383803069591522, "learning_rate": 0.0001, "loss": 1.6613, "step": 1711 }, { "epoch": 0.19889631135637525, "grad_norm": 0.3505041301250458, "learning_rate": 0.0001, "loss": 1.7243, "step": 1712 }, { "epoch": 0.19901248910833574, "grad_norm": 0.3884884715080261, "learning_rate": 0.0001, "loss": 1.6943, "step": 1713 }, { "epoch": 0.19912866686029626, "grad_norm": 0.3483599126338959, "learning_rate": 0.0001, "loss": 1.6576, "step": 1714 }, { "epoch": 0.19924484461225675, "grad_norm": 0.36730775237083435, "learning_rate": 0.0001, "loss": 1.8406, "step": 1715 }, { "epoch": 0.19936102236421724, "grad_norm": 0.3688085675239563, "learning_rate": 0.0001, "loss": 1.7211, "step": 1716 }, { "epoch": 0.19947720011617776, "grad_norm": 0.3598758578300476, "learning_rate": 0.0001, "loss": 1.5951, "step": 1717 }, { "epoch": 0.19959337786813824, "grad_norm": 0.3627162277698517, "learning_rate": 0.0001, "loss": 1.8198, "step": 1718 }, { "epoch": 0.19970955562009876, "grad_norm": 0.35776904225349426, "learning_rate": 0.0001, "loss": 1.7034, "step": 1719 }, { "epoch": 0.19982573337205925, "grad_norm": 0.3551950454711914, "learning_rate": 0.0001, "loss": 1.7584, "step": 1720 }, { "epoch": 0.19994191112401974, "grad_norm": 0.3886015713214874, "learning_rate": 0.0001, "loss": 1.8489, "step": 1721 }, { "epoch": 0.20005808887598026, "grad_norm": 0.396438330411911, "learning_rate": 0.0001, "loss": 1.7174, "step": 1722 }, { "epoch": 0.20017426662794074, "grad_norm": 0.38339731097221375, "learning_rate": 0.0001, "loss": 1.5463, "step": 1723 }, { "epoch": 0.20029044437990126, "grad_norm": 0.4310664236545563, "learning_rate": 0.0001, "loss": 1.9913, "step": 1724 }, { "epoch": 0.20040662213186175, "grad_norm": 0.34265899658203125, "learning_rate": 0.0001, "loss": 1.5937, "step": 1725 }, { "epoch": 0.20052279988382224, "grad_norm": 0.33483079075813293, "learning_rate": 0.0001, "loss": 1.5423, "step": 1726 }, { "epoch": 0.20063897763578276, "grad_norm": 0.34570086002349854, "learning_rate": 0.0001, "loss": 1.6765, "step": 1727 }, { "epoch": 0.20075515538774324, "grad_norm": 0.36235955357551575, "learning_rate": 0.0001, "loss": 1.623, "step": 1728 }, { "epoch": 0.20087133313970373, "grad_norm": 0.3711889088153839, "learning_rate": 0.0001, "loss": 1.7069, "step": 1729 }, { "epoch": 0.20098751089166425, "grad_norm": 0.34235769510269165, "learning_rate": 0.0001, "loss": 1.7062, "step": 1730 }, { "epoch": 0.20110368864362474, "grad_norm": 0.35882294178009033, "learning_rate": 0.0001, "loss": 1.6554, "step": 1731 }, { "epoch": 0.20121986639558526, "grad_norm": 0.3647457957267761, "learning_rate": 0.0001, "loss": 1.6424, "step": 1732 }, { "epoch": 0.20133604414754575, "grad_norm": 0.35160166025161743, "learning_rate": 0.0001, "loss": 1.6798, "step": 1733 }, { "epoch": 0.20145222189950623, "grad_norm": 0.38673707842826843, "learning_rate": 0.0001, "loss": 1.7222, "step": 1734 }, { "epoch": 0.20156839965146675, "grad_norm": 0.3855576813220978, "learning_rate": 0.0001, "loss": 1.7457, "step": 1735 }, { "epoch": 0.20168457740342724, "grad_norm": 0.34935352206230164, "learning_rate": 0.0001, "loss": 1.5022, "step": 1736 }, { "epoch": 0.20180075515538776, "grad_norm": 0.3582732379436493, "learning_rate": 0.0001, "loss": 1.7167, "step": 1737 }, { "epoch": 0.20191693290734825, "grad_norm": 0.35151124000549316, "learning_rate": 0.0001, "loss": 1.515, "step": 1738 }, { "epoch": 0.20203311065930873, "grad_norm": 0.3719716966152191, "learning_rate": 0.0001, "loss": 1.8222, "step": 1739 }, { "epoch": 0.20214928841126925, "grad_norm": 0.35581451654434204, "learning_rate": 0.0001, "loss": 1.7917, "step": 1740 }, { "epoch": 0.20226546616322974, "grad_norm": 0.3818107545375824, "learning_rate": 0.0001, "loss": 1.6525, "step": 1741 }, { "epoch": 0.20238164391519023, "grad_norm": 0.3568393588066101, "learning_rate": 0.0001, "loss": 1.6038, "step": 1742 }, { "epoch": 0.20249782166715075, "grad_norm": 0.38510867953300476, "learning_rate": 0.0001, "loss": 1.8498, "step": 1743 }, { "epoch": 0.20261399941911123, "grad_norm": 0.36341622471809387, "learning_rate": 0.0001, "loss": 1.7387, "step": 1744 }, { "epoch": 0.20273017717107175, "grad_norm": 0.38483190536499023, "learning_rate": 0.0001, "loss": 1.8348, "step": 1745 }, { "epoch": 0.20284635492303224, "grad_norm": 0.3468552529811859, "learning_rate": 0.0001, "loss": 1.5533, "step": 1746 }, { "epoch": 0.20296253267499273, "grad_norm": 0.388603150844574, "learning_rate": 0.0001, "loss": 1.7117, "step": 1747 }, { "epoch": 0.20307871042695325, "grad_norm": 0.3520529866218567, "learning_rate": 0.0001, "loss": 1.564, "step": 1748 }, { "epoch": 0.20319488817891374, "grad_norm": 0.39672860503196716, "learning_rate": 0.0001, "loss": 1.6181, "step": 1749 }, { "epoch": 0.20331106593087422, "grad_norm": 0.3811289668083191, "learning_rate": 0.0001, "loss": 1.6476, "step": 1750 }, { "epoch": 0.20342724368283474, "grad_norm": 0.3765932321548462, "learning_rate": 0.0001, "loss": 1.6609, "step": 1751 }, { "epoch": 0.20354342143479523, "grad_norm": 0.33500736951828003, "learning_rate": 0.0001, "loss": 1.6485, "step": 1752 }, { "epoch": 0.20365959918675575, "grad_norm": 0.3680993318557739, "learning_rate": 0.0001, "loss": 1.6946, "step": 1753 }, { "epoch": 0.20377577693871624, "grad_norm": 0.3892023265361786, "learning_rate": 0.0001, "loss": 1.8349, "step": 1754 }, { "epoch": 0.20389195469067672, "grad_norm": 0.3450574576854706, "learning_rate": 0.0001, "loss": 1.7574, "step": 1755 }, { "epoch": 0.20400813244263724, "grad_norm": 0.37310826778411865, "learning_rate": 0.0001, "loss": 1.8353, "step": 1756 }, { "epoch": 0.20412431019459773, "grad_norm": 0.3621986210346222, "learning_rate": 0.0001, "loss": 1.5399, "step": 1757 }, { "epoch": 0.20424048794655825, "grad_norm": 0.3675428628921509, "learning_rate": 0.0001, "loss": 1.5781, "step": 1758 }, { "epoch": 0.20435666569851874, "grad_norm": 0.3777763843536377, "learning_rate": 0.0001, "loss": 1.7072, "step": 1759 }, { "epoch": 0.20447284345047922, "grad_norm": 0.3660062849521637, "learning_rate": 0.0001, "loss": 1.6259, "step": 1760 }, { "epoch": 0.20458902120243974, "grad_norm": 0.3744828402996063, "learning_rate": 0.0001, "loss": 1.6227, "step": 1761 }, { "epoch": 0.20470519895440023, "grad_norm": 0.3762773275375366, "learning_rate": 0.0001, "loss": 1.6717, "step": 1762 }, { "epoch": 0.20482137670636072, "grad_norm": 0.3429649770259857, "learning_rate": 0.0001, "loss": 1.6066, "step": 1763 }, { "epoch": 0.20493755445832124, "grad_norm": 0.35902631282806396, "learning_rate": 0.0001, "loss": 1.7535, "step": 1764 }, { "epoch": 0.20505373221028173, "grad_norm": 0.37041351199150085, "learning_rate": 0.0001, "loss": 1.8098, "step": 1765 }, { "epoch": 0.20516990996224224, "grad_norm": 0.3608233332633972, "learning_rate": 0.0001, "loss": 1.6251, "step": 1766 }, { "epoch": 0.20528608771420273, "grad_norm": 0.39614373445510864, "learning_rate": 0.0001, "loss": 1.7858, "step": 1767 }, { "epoch": 0.20540226546616322, "grad_norm": 0.3881773054599762, "learning_rate": 0.0001, "loss": 1.7105, "step": 1768 }, { "epoch": 0.20551844321812374, "grad_norm": 0.3501490354537964, "learning_rate": 0.0001, "loss": 1.6295, "step": 1769 }, { "epoch": 0.20563462097008423, "grad_norm": 0.36642521619796753, "learning_rate": 0.0001, "loss": 1.6204, "step": 1770 }, { "epoch": 0.20575079872204474, "grad_norm": 0.3428248167037964, "learning_rate": 0.0001, "loss": 1.5848, "step": 1771 }, { "epoch": 0.20586697647400523, "grad_norm": 0.39081087708473206, "learning_rate": 0.0001, "loss": 1.7507, "step": 1772 }, { "epoch": 0.20598315422596572, "grad_norm": 0.3991013467311859, "learning_rate": 0.0001, "loss": 1.9912, "step": 1773 }, { "epoch": 0.20609933197792624, "grad_norm": 0.3668970763683319, "learning_rate": 0.0001, "loss": 1.7479, "step": 1774 }, { "epoch": 0.20621550972988673, "grad_norm": 0.36014431715011597, "learning_rate": 0.0001, "loss": 1.687, "step": 1775 }, { "epoch": 0.20633168748184721, "grad_norm": 0.38143759965896606, "learning_rate": 0.0001, "loss": 1.7137, "step": 1776 }, { "epoch": 0.20644786523380773, "grad_norm": 0.382790207862854, "learning_rate": 0.0001, "loss": 1.6797, "step": 1777 }, { "epoch": 0.20656404298576822, "grad_norm": 0.355749249458313, "learning_rate": 0.0001, "loss": 1.6275, "step": 1778 }, { "epoch": 0.20668022073772874, "grad_norm": 0.3811320662498474, "learning_rate": 0.0001, "loss": 1.8527, "step": 1779 }, { "epoch": 0.20679639848968923, "grad_norm": 0.3613418638706207, "learning_rate": 0.0001, "loss": 1.7679, "step": 1780 }, { "epoch": 0.20691257624164971, "grad_norm": 0.36926668882369995, "learning_rate": 0.0001, "loss": 1.7807, "step": 1781 }, { "epoch": 0.20702875399361023, "grad_norm": 0.3642374277114868, "learning_rate": 0.0001, "loss": 1.6741, "step": 1782 }, { "epoch": 0.20714493174557072, "grad_norm": 0.4205605685710907, "learning_rate": 0.0001, "loss": 1.8032, "step": 1783 }, { "epoch": 0.2072611094975312, "grad_norm": 0.3942674994468689, "learning_rate": 0.0001, "loss": 1.7935, "step": 1784 }, { "epoch": 0.20737728724949173, "grad_norm": 0.3895415663719177, "learning_rate": 0.0001, "loss": 1.7883, "step": 1785 }, { "epoch": 0.20749346500145222, "grad_norm": 0.3891303539276123, "learning_rate": 0.0001, "loss": 1.7201, "step": 1786 }, { "epoch": 0.20760964275341273, "grad_norm": 0.3562510311603546, "learning_rate": 0.0001, "loss": 1.6112, "step": 1787 }, { "epoch": 0.20772582050537322, "grad_norm": 0.35545283555984497, "learning_rate": 0.0001, "loss": 1.6897, "step": 1788 }, { "epoch": 0.2078419982573337, "grad_norm": 0.40169668197631836, "learning_rate": 0.0001, "loss": 1.67, "step": 1789 }, { "epoch": 0.20795817600929423, "grad_norm": 0.36041274666786194, "learning_rate": 0.0001, "loss": 1.5874, "step": 1790 }, { "epoch": 0.20807435376125472, "grad_norm": 0.36906883120536804, "learning_rate": 0.0001, "loss": 1.5506, "step": 1791 }, { "epoch": 0.20819053151321523, "grad_norm": 0.35365477204322815, "learning_rate": 0.0001, "loss": 1.4886, "step": 1792 }, { "epoch": 0.20830670926517572, "grad_norm": 0.3502259850502014, "learning_rate": 0.0001, "loss": 1.4629, "step": 1793 }, { "epoch": 0.2084228870171362, "grad_norm": 0.3471246659755707, "learning_rate": 0.0001, "loss": 1.6308, "step": 1794 }, { "epoch": 0.20853906476909673, "grad_norm": 0.36685308814048767, "learning_rate": 0.0001, "loss": 1.7224, "step": 1795 }, { "epoch": 0.20865524252105722, "grad_norm": 0.40070536732673645, "learning_rate": 0.0001, "loss": 1.7234, "step": 1796 }, { "epoch": 0.2087714202730177, "grad_norm": 0.35480430722236633, "learning_rate": 0.0001, "loss": 1.7181, "step": 1797 }, { "epoch": 0.20888759802497822, "grad_norm": 0.35834869742393494, "learning_rate": 0.0001, "loss": 1.8237, "step": 1798 }, { "epoch": 0.2090037757769387, "grad_norm": 0.521961510181427, "learning_rate": 0.0001, "loss": 1.721, "step": 1799 }, { "epoch": 0.20911995352889923, "grad_norm": 0.36445826292037964, "learning_rate": 0.0001, "loss": 1.6792, "step": 1800 }, { "epoch": 0.20923613128085972, "grad_norm": 0.3719666302204132, "learning_rate": 0.0001, "loss": 1.7106, "step": 1801 }, { "epoch": 0.2093523090328202, "grad_norm": 0.37383589148521423, "learning_rate": 0.0001, "loss": 1.8157, "step": 1802 }, { "epoch": 0.20946848678478072, "grad_norm": 0.3597404956817627, "learning_rate": 0.0001, "loss": 1.7367, "step": 1803 }, { "epoch": 0.2095846645367412, "grad_norm": 0.3522038161754608, "learning_rate": 0.0001, "loss": 1.5681, "step": 1804 }, { "epoch": 0.2097008422887017, "grad_norm": 0.37592774629592896, "learning_rate": 0.0001, "loss": 1.7834, "step": 1805 }, { "epoch": 0.20981702004066222, "grad_norm": 0.3839535117149353, "learning_rate": 0.0001, "loss": 1.7384, "step": 1806 }, { "epoch": 0.2099331977926227, "grad_norm": 0.3627369999885559, "learning_rate": 0.0001, "loss": 1.6661, "step": 1807 }, { "epoch": 0.21004937554458322, "grad_norm": 0.36218005418777466, "learning_rate": 0.0001, "loss": 1.5632, "step": 1808 }, { "epoch": 0.2101655532965437, "grad_norm": 0.3566618263721466, "learning_rate": 0.0001, "loss": 1.673, "step": 1809 }, { "epoch": 0.2102817310485042, "grad_norm": 0.3671146631240845, "learning_rate": 0.0001, "loss": 1.5717, "step": 1810 }, { "epoch": 0.21039790880046472, "grad_norm": 0.3826962113380432, "learning_rate": 0.0001, "loss": 1.6545, "step": 1811 }, { "epoch": 0.2105140865524252, "grad_norm": 0.36169835925102234, "learning_rate": 0.0001, "loss": 1.7124, "step": 1812 }, { "epoch": 0.21063026430438572, "grad_norm": 0.3707777261734009, "learning_rate": 0.0001, "loss": 1.6791, "step": 1813 }, { "epoch": 0.2107464420563462, "grad_norm": 0.3656941056251526, "learning_rate": 0.0001, "loss": 1.704, "step": 1814 }, { "epoch": 0.2108626198083067, "grad_norm": 0.3701861500740051, "learning_rate": 0.0001, "loss": 1.5397, "step": 1815 }, { "epoch": 0.21097879756026722, "grad_norm": 0.37783530354499817, "learning_rate": 0.0001, "loss": 1.7791, "step": 1816 }, { "epoch": 0.2110949753122277, "grad_norm": 0.3402620255947113, "learning_rate": 0.0001, "loss": 1.6113, "step": 1817 }, { "epoch": 0.2112111530641882, "grad_norm": 0.3584825098514557, "learning_rate": 0.0001, "loss": 1.7871, "step": 1818 }, { "epoch": 0.2113273308161487, "grad_norm": 0.37935203313827515, "learning_rate": 0.0001, "loss": 1.7422, "step": 1819 }, { "epoch": 0.2114435085681092, "grad_norm": 0.3971646726131439, "learning_rate": 0.0001, "loss": 1.9348, "step": 1820 }, { "epoch": 0.21155968632006972, "grad_norm": 0.3793870210647583, "learning_rate": 0.0001, "loss": 1.6856, "step": 1821 }, { "epoch": 0.2116758640720302, "grad_norm": 0.34898293018341064, "learning_rate": 0.0001, "loss": 1.6122, "step": 1822 }, { "epoch": 0.2117920418239907, "grad_norm": 0.3546793758869171, "learning_rate": 0.0001, "loss": 1.4422, "step": 1823 }, { "epoch": 0.2119082195759512, "grad_norm": 0.39553505182266235, "learning_rate": 0.0001, "loss": 1.7607, "step": 1824 }, { "epoch": 0.2120243973279117, "grad_norm": 0.38580450415611267, "learning_rate": 0.0001, "loss": 1.7025, "step": 1825 }, { "epoch": 0.21214057507987222, "grad_norm": 0.3958745300769806, "learning_rate": 0.0001, "loss": 1.6341, "step": 1826 }, { "epoch": 0.2122567528318327, "grad_norm": 0.3790923058986664, "learning_rate": 0.0001, "loss": 1.7912, "step": 1827 }, { "epoch": 0.2123729305837932, "grad_norm": 0.3835214376449585, "learning_rate": 0.0001, "loss": 1.7081, "step": 1828 }, { "epoch": 0.2124891083357537, "grad_norm": 0.36152535676956177, "learning_rate": 0.0001, "loss": 1.6533, "step": 1829 }, { "epoch": 0.2126052860877142, "grad_norm": 0.3254132866859436, "learning_rate": 0.0001, "loss": 1.5777, "step": 1830 }, { "epoch": 0.2127214638396747, "grad_norm": 0.38130825757980347, "learning_rate": 0.0001, "loss": 1.8067, "step": 1831 }, { "epoch": 0.2128376415916352, "grad_norm": 0.36673715710639954, "learning_rate": 0.0001, "loss": 1.7599, "step": 1832 }, { "epoch": 0.2129538193435957, "grad_norm": 0.39289578795433044, "learning_rate": 0.0001, "loss": 1.6897, "step": 1833 }, { "epoch": 0.2130699970955562, "grad_norm": 0.35290926694869995, "learning_rate": 0.0001, "loss": 1.6785, "step": 1834 }, { "epoch": 0.2131861748475167, "grad_norm": 0.36013439297676086, "learning_rate": 0.0001, "loss": 1.8015, "step": 1835 }, { "epoch": 0.2133023525994772, "grad_norm": 0.39623382687568665, "learning_rate": 0.0001, "loss": 1.7266, "step": 1836 }, { "epoch": 0.2134185303514377, "grad_norm": 0.37049758434295654, "learning_rate": 0.0001, "loss": 1.8266, "step": 1837 }, { "epoch": 0.2135347081033982, "grad_norm": 0.37480175495147705, "learning_rate": 0.0001, "loss": 1.6796, "step": 1838 }, { "epoch": 0.21365088585535869, "grad_norm": 0.38570842146873474, "learning_rate": 0.0001, "loss": 1.5916, "step": 1839 }, { "epoch": 0.2137670636073192, "grad_norm": 0.4038733243942261, "learning_rate": 0.0001, "loss": 1.7587, "step": 1840 }, { "epoch": 0.2138832413592797, "grad_norm": 0.3932957351207733, "learning_rate": 0.0001, "loss": 1.7695, "step": 1841 }, { "epoch": 0.2139994191112402, "grad_norm": 0.3737775981426239, "learning_rate": 0.0001, "loss": 1.7451, "step": 1842 }, { "epoch": 0.2141155968632007, "grad_norm": 0.39853671193122864, "learning_rate": 0.0001, "loss": 1.7764, "step": 1843 }, { "epoch": 0.21423177461516119, "grad_norm": 0.37162113189697266, "learning_rate": 0.0001, "loss": 1.5834, "step": 1844 }, { "epoch": 0.2143479523671217, "grad_norm": 0.38657307624816895, "learning_rate": 0.0001, "loss": 1.6493, "step": 1845 }, { "epoch": 0.2144641301190822, "grad_norm": 0.40344858169555664, "learning_rate": 0.0001, "loss": 1.6822, "step": 1846 }, { "epoch": 0.2145803078710427, "grad_norm": 0.34257638454437256, "learning_rate": 0.0001, "loss": 1.5296, "step": 1847 }, { "epoch": 0.2146964856230032, "grad_norm": 0.3880448043346405, "learning_rate": 0.0001, "loss": 1.7435, "step": 1848 }, { "epoch": 0.21481266337496369, "grad_norm": 0.3583984971046448, "learning_rate": 0.0001, "loss": 1.596, "step": 1849 }, { "epoch": 0.2149288411269242, "grad_norm": 0.36240407824516296, "learning_rate": 0.0001, "loss": 1.5892, "step": 1850 }, { "epoch": 0.2150450188788847, "grad_norm": 0.3811880648136139, "learning_rate": 0.0001, "loss": 1.7082, "step": 1851 }, { "epoch": 0.21516119663084518, "grad_norm": 0.4095620810985565, "learning_rate": 0.0001, "loss": 1.8439, "step": 1852 }, { "epoch": 0.2152773743828057, "grad_norm": 0.43665236234664917, "learning_rate": 0.0001, "loss": 1.8788, "step": 1853 }, { "epoch": 0.2153935521347662, "grad_norm": 0.37620803713798523, "learning_rate": 0.0001, "loss": 1.7409, "step": 1854 }, { "epoch": 0.2155097298867267, "grad_norm": 0.3814584016799927, "learning_rate": 0.0001, "loss": 1.7707, "step": 1855 }, { "epoch": 0.2156259076386872, "grad_norm": 0.349738210439682, "learning_rate": 0.0001, "loss": 1.5507, "step": 1856 }, { "epoch": 0.21574208539064768, "grad_norm": 0.388741135597229, "learning_rate": 0.0001, "loss": 1.8032, "step": 1857 }, { "epoch": 0.2158582631426082, "grad_norm": 0.37817415595054626, "learning_rate": 0.0001, "loss": 1.7183, "step": 1858 }, { "epoch": 0.2159744408945687, "grad_norm": 0.3564184606075287, "learning_rate": 0.0001, "loss": 1.6542, "step": 1859 }, { "epoch": 0.2160906186465292, "grad_norm": 0.3882390260696411, "learning_rate": 0.0001, "loss": 1.8215, "step": 1860 }, { "epoch": 0.2162067963984897, "grad_norm": 0.3727805018424988, "learning_rate": 0.0001, "loss": 1.7673, "step": 1861 }, { "epoch": 0.21632297415045018, "grad_norm": 0.3581012785434723, "learning_rate": 0.0001, "loss": 1.741, "step": 1862 }, { "epoch": 0.2164391519024107, "grad_norm": 0.4040369391441345, "learning_rate": 0.0001, "loss": 1.8377, "step": 1863 }, { "epoch": 0.2165553296543712, "grad_norm": 0.3850097060203552, "learning_rate": 0.0001, "loss": 1.7752, "step": 1864 }, { "epoch": 0.21667150740633168, "grad_norm": 0.3745688796043396, "learning_rate": 0.0001, "loss": 1.7286, "step": 1865 }, { "epoch": 0.2167876851582922, "grad_norm": 0.3721669018268585, "learning_rate": 0.0001, "loss": 1.7012, "step": 1866 }, { "epoch": 0.21690386291025268, "grad_norm": 0.3597140610218048, "learning_rate": 0.0001, "loss": 1.7579, "step": 1867 }, { "epoch": 0.2170200406622132, "grad_norm": 0.3266042470932007, "learning_rate": 0.0001, "loss": 1.5274, "step": 1868 }, { "epoch": 0.2171362184141737, "grad_norm": 0.3769254982471466, "learning_rate": 0.0001, "loss": 1.9181, "step": 1869 }, { "epoch": 0.21725239616613418, "grad_norm": 0.386996328830719, "learning_rate": 0.0001, "loss": 1.7463, "step": 1870 }, { "epoch": 0.2173685739180947, "grad_norm": 0.3945530354976654, "learning_rate": 0.0001, "loss": 1.7364, "step": 1871 }, { "epoch": 0.21748475167005518, "grad_norm": 0.39462509751319885, "learning_rate": 0.0001, "loss": 1.763, "step": 1872 }, { "epoch": 0.21760092942201567, "grad_norm": 0.35269200801849365, "learning_rate": 0.0001, "loss": 1.4984, "step": 1873 }, { "epoch": 0.2177171071739762, "grad_norm": 0.401889443397522, "learning_rate": 0.0001, "loss": 1.9107, "step": 1874 }, { "epoch": 0.21783328492593668, "grad_norm": 0.3577077090740204, "learning_rate": 0.0001, "loss": 1.7043, "step": 1875 }, { "epoch": 0.2179494626778972, "grad_norm": 0.3647770881652832, "learning_rate": 0.0001, "loss": 1.688, "step": 1876 }, { "epoch": 0.21806564042985768, "grad_norm": 0.37258070707321167, "learning_rate": 0.0001, "loss": 1.6599, "step": 1877 }, { "epoch": 0.21818181818181817, "grad_norm": 0.38503706455230713, "learning_rate": 0.0001, "loss": 1.7757, "step": 1878 }, { "epoch": 0.2182979959337787, "grad_norm": 0.3539654314517975, "learning_rate": 0.0001, "loss": 1.6676, "step": 1879 }, { "epoch": 0.21841417368573918, "grad_norm": 0.3822212517261505, "learning_rate": 0.0001, "loss": 1.8839, "step": 1880 }, { "epoch": 0.2185303514376997, "grad_norm": 0.34567397832870483, "learning_rate": 0.0001, "loss": 1.7168, "step": 1881 }, { "epoch": 0.21864652918966018, "grad_norm": 0.359351247549057, "learning_rate": 0.0001, "loss": 1.6512, "step": 1882 }, { "epoch": 0.21876270694162067, "grad_norm": 0.3686642348766327, "learning_rate": 0.0001, "loss": 1.6674, "step": 1883 }, { "epoch": 0.2188788846935812, "grad_norm": 0.37803003191947937, "learning_rate": 0.0001, "loss": 1.7462, "step": 1884 }, { "epoch": 0.21899506244554168, "grad_norm": 0.3631937503814697, "learning_rate": 0.0001, "loss": 1.6759, "step": 1885 }, { "epoch": 0.21911124019750217, "grad_norm": 0.3699375092983246, "learning_rate": 0.0001, "loss": 1.7432, "step": 1886 }, { "epoch": 0.21922741794946268, "grad_norm": 0.3758851885795593, "learning_rate": 0.0001, "loss": 1.6398, "step": 1887 }, { "epoch": 0.21934359570142317, "grad_norm": 0.38159826397895813, "learning_rate": 0.0001, "loss": 1.6982, "step": 1888 }, { "epoch": 0.2194597734533837, "grad_norm": 0.3970431983470917, "learning_rate": 0.0001, "loss": 1.7361, "step": 1889 }, { "epoch": 0.21957595120534418, "grad_norm": 0.5288736820220947, "learning_rate": 0.0001, "loss": 1.757, "step": 1890 }, { "epoch": 0.21969212895730467, "grad_norm": 0.3814886212348938, "learning_rate": 0.0001, "loss": 1.7492, "step": 1891 }, { "epoch": 0.21980830670926518, "grad_norm": 0.3462470471858978, "learning_rate": 0.0001, "loss": 1.5657, "step": 1892 }, { "epoch": 0.21992448446122567, "grad_norm": 0.41686296463012695, "learning_rate": 0.0001, "loss": 1.7641, "step": 1893 }, { "epoch": 0.22004066221318616, "grad_norm": 0.35188373923301697, "learning_rate": 0.0001, "loss": 1.6406, "step": 1894 }, { "epoch": 0.22015683996514668, "grad_norm": 0.366418719291687, "learning_rate": 0.0001, "loss": 1.7024, "step": 1895 }, { "epoch": 0.22027301771710717, "grad_norm": 0.3827008306980133, "learning_rate": 0.0001, "loss": 1.6493, "step": 1896 }, { "epoch": 0.22038919546906768, "grad_norm": 0.38810524344444275, "learning_rate": 0.0001, "loss": 1.7724, "step": 1897 }, { "epoch": 0.22050537322102817, "grad_norm": 0.3892178237438202, "learning_rate": 0.0001, "loss": 1.7133, "step": 1898 }, { "epoch": 0.22062155097298866, "grad_norm": 0.34898847341537476, "learning_rate": 0.0001, "loss": 1.5637, "step": 1899 }, { "epoch": 0.22073772872494918, "grad_norm": 0.38347798585891724, "learning_rate": 0.0001, "loss": 1.778, "step": 1900 }, { "epoch": 0.22085390647690967, "grad_norm": 0.45999932289123535, "learning_rate": 0.0001, "loss": 1.8044, "step": 1901 }, { "epoch": 0.22097008422887018, "grad_norm": 0.39396339654922485, "learning_rate": 0.0001, "loss": 1.6791, "step": 1902 }, { "epoch": 0.22108626198083067, "grad_norm": 0.3777587413787842, "learning_rate": 0.0001, "loss": 1.885, "step": 1903 }, { "epoch": 0.22120243973279116, "grad_norm": 0.38406991958618164, "learning_rate": 0.0001, "loss": 1.6602, "step": 1904 }, { "epoch": 0.22131861748475168, "grad_norm": 0.4187374413013458, "learning_rate": 0.0001, "loss": 1.714, "step": 1905 }, { "epoch": 0.22143479523671217, "grad_norm": 0.35657206177711487, "learning_rate": 0.0001, "loss": 1.7717, "step": 1906 }, { "epoch": 0.22155097298867266, "grad_norm": 0.3913048803806305, "learning_rate": 0.0001, "loss": 1.6577, "step": 1907 }, { "epoch": 0.22166715074063317, "grad_norm": 0.38149452209472656, "learning_rate": 0.0001, "loss": 1.7873, "step": 1908 }, { "epoch": 0.22178332849259366, "grad_norm": 0.3800641596317291, "learning_rate": 0.0001, "loss": 1.6575, "step": 1909 }, { "epoch": 0.22189950624455418, "grad_norm": 0.3555182218551636, "learning_rate": 0.0001, "loss": 1.5213, "step": 1910 }, { "epoch": 0.22201568399651467, "grad_norm": 0.3945232331752777, "learning_rate": 0.0001, "loss": 1.807, "step": 1911 }, { "epoch": 0.22213186174847516, "grad_norm": 0.38593828678131104, "learning_rate": 0.0001, "loss": 1.8026, "step": 1912 }, { "epoch": 0.22224803950043567, "grad_norm": 0.38784652948379517, "learning_rate": 0.0001, "loss": 1.7526, "step": 1913 }, { "epoch": 0.22236421725239616, "grad_norm": 0.35396912693977356, "learning_rate": 0.0001, "loss": 1.611, "step": 1914 }, { "epoch": 0.22248039500435668, "grad_norm": 0.3825778365135193, "learning_rate": 0.0001, "loss": 1.6673, "step": 1915 }, { "epoch": 0.22259657275631717, "grad_norm": 0.36573922634124756, "learning_rate": 0.0001, "loss": 1.7551, "step": 1916 }, { "epoch": 0.22271275050827766, "grad_norm": 0.37928685545921326, "learning_rate": 0.0001, "loss": 1.6232, "step": 1917 }, { "epoch": 0.22282892826023817, "grad_norm": 0.3511602580547333, "learning_rate": 0.0001, "loss": 1.7165, "step": 1918 }, { "epoch": 0.22294510601219866, "grad_norm": 0.3809853494167328, "learning_rate": 0.0001, "loss": 1.7735, "step": 1919 }, { "epoch": 0.22306128376415915, "grad_norm": 0.36619848012924194, "learning_rate": 0.0001, "loss": 1.6838, "step": 1920 }, { "epoch": 0.22317746151611967, "grad_norm": 0.3719876706600189, "learning_rate": 0.0001, "loss": 1.6385, "step": 1921 }, { "epoch": 0.22329363926808016, "grad_norm": 0.3887604773044586, "learning_rate": 0.0001, "loss": 1.6589, "step": 1922 }, { "epoch": 0.22340981702004067, "grad_norm": 0.36279502511024475, "learning_rate": 0.0001, "loss": 1.7314, "step": 1923 }, { "epoch": 0.22352599477200116, "grad_norm": 0.3789854645729065, "learning_rate": 0.0001, "loss": 1.5644, "step": 1924 }, { "epoch": 0.22364217252396165, "grad_norm": 0.3797144293785095, "learning_rate": 0.0001, "loss": 1.6078, "step": 1925 }, { "epoch": 0.22375835027592217, "grad_norm": 0.35952475666999817, "learning_rate": 0.0001, "loss": 1.5672, "step": 1926 }, { "epoch": 0.22387452802788266, "grad_norm": 0.3811299800872803, "learning_rate": 0.0001, "loss": 1.9059, "step": 1927 }, { "epoch": 0.22399070577984315, "grad_norm": 0.3837408721446991, "learning_rate": 0.0001, "loss": 1.7887, "step": 1928 }, { "epoch": 0.22410688353180366, "grad_norm": 0.4026016891002655, "learning_rate": 0.0001, "loss": 1.8315, "step": 1929 }, { "epoch": 0.22422306128376415, "grad_norm": 0.35901689529418945, "learning_rate": 0.0001, "loss": 1.6643, "step": 1930 }, { "epoch": 0.22433923903572467, "grad_norm": 0.3816909193992615, "learning_rate": 0.0001, "loss": 1.7365, "step": 1931 }, { "epoch": 0.22445541678768516, "grad_norm": 0.3439340889453888, "learning_rate": 0.0001, "loss": 1.6192, "step": 1932 }, { "epoch": 0.22457159453964565, "grad_norm": 0.37772074341773987, "learning_rate": 0.0001, "loss": 1.7376, "step": 1933 }, { "epoch": 0.22468777229160616, "grad_norm": 0.3728993833065033, "learning_rate": 0.0001, "loss": 1.6178, "step": 1934 }, { "epoch": 0.22480395004356665, "grad_norm": 0.3640214502811432, "learning_rate": 0.0001, "loss": 1.6539, "step": 1935 }, { "epoch": 0.22492012779552717, "grad_norm": 0.37045779824256897, "learning_rate": 0.0001, "loss": 1.8218, "step": 1936 }, { "epoch": 0.22503630554748766, "grad_norm": 0.3745948374271393, "learning_rate": 0.0001, "loss": 1.5704, "step": 1937 }, { "epoch": 0.22515248329944815, "grad_norm": 0.36098146438598633, "learning_rate": 0.0001, "loss": 1.6665, "step": 1938 }, { "epoch": 0.22526866105140866, "grad_norm": 0.3912782073020935, "learning_rate": 0.0001, "loss": 1.8078, "step": 1939 }, { "epoch": 0.22538483880336915, "grad_norm": 0.35821422934532166, "learning_rate": 0.0001, "loss": 1.6098, "step": 1940 }, { "epoch": 0.22550101655532964, "grad_norm": 0.3956640958786011, "learning_rate": 0.0001, "loss": 1.709, "step": 1941 }, { "epoch": 0.22561719430729016, "grad_norm": 0.3759971559047699, "learning_rate": 0.0001, "loss": 1.7211, "step": 1942 }, { "epoch": 0.22573337205925065, "grad_norm": 0.42271122336387634, "learning_rate": 0.0001, "loss": 1.8635, "step": 1943 }, { "epoch": 0.22584954981121116, "grad_norm": 0.350124329328537, "learning_rate": 0.0001, "loss": 1.5505, "step": 1944 }, { "epoch": 0.22596572756317165, "grad_norm": 0.35673707723617554, "learning_rate": 0.0001, "loss": 1.6215, "step": 1945 }, { "epoch": 0.22608190531513214, "grad_norm": 0.3845730721950531, "learning_rate": 0.0001, "loss": 1.7524, "step": 1946 }, { "epoch": 0.22619808306709266, "grad_norm": 0.3668268620967865, "learning_rate": 0.0001, "loss": 1.6956, "step": 1947 }, { "epoch": 0.22631426081905315, "grad_norm": 0.36193057894706726, "learning_rate": 0.0001, "loss": 1.6396, "step": 1948 }, { "epoch": 0.22643043857101366, "grad_norm": 0.3776628077030182, "learning_rate": 0.0001, "loss": 1.7175, "step": 1949 }, { "epoch": 0.22654661632297415, "grad_norm": 0.35988494753837585, "learning_rate": 0.0001, "loss": 1.5447, "step": 1950 }, { "epoch": 0.22666279407493464, "grad_norm": 0.3582817614078522, "learning_rate": 0.0001, "loss": 1.6182, "step": 1951 }, { "epoch": 0.22677897182689516, "grad_norm": 0.34842002391815186, "learning_rate": 0.0001, "loss": 1.4597, "step": 1952 }, { "epoch": 0.22689514957885565, "grad_norm": 0.3792864978313446, "learning_rate": 0.0001, "loss": 1.6922, "step": 1953 }, { "epoch": 0.22701132733081614, "grad_norm": 0.3827436566352844, "learning_rate": 0.0001, "loss": 1.6616, "step": 1954 }, { "epoch": 0.22712750508277665, "grad_norm": 0.3784099817276001, "learning_rate": 0.0001, "loss": 1.6977, "step": 1955 }, { "epoch": 0.22724368283473714, "grad_norm": 0.3828442096710205, "learning_rate": 0.0001, "loss": 1.7672, "step": 1956 }, { "epoch": 0.22735986058669766, "grad_norm": 0.3599461019039154, "learning_rate": 0.0001, "loss": 1.7539, "step": 1957 }, { "epoch": 0.22747603833865815, "grad_norm": 0.3842355012893677, "learning_rate": 0.0001, "loss": 1.8452, "step": 1958 }, { "epoch": 0.22759221609061864, "grad_norm": 0.375945508480072, "learning_rate": 0.0001, "loss": 1.8502, "step": 1959 }, { "epoch": 0.22770839384257915, "grad_norm": 0.4141533374786377, "learning_rate": 0.0001, "loss": 1.7301, "step": 1960 }, { "epoch": 0.22782457159453964, "grad_norm": 0.37964141368865967, "learning_rate": 0.0001, "loss": 1.5489, "step": 1961 }, { "epoch": 0.22794074934650013, "grad_norm": 0.39130285382270813, "learning_rate": 0.0001, "loss": 1.8114, "step": 1962 }, { "epoch": 0.22805692709846065, "grad_norm": 0.3492740988731384, "learning_rate": 0.0001, "loss": 1.6302, "step": 1963 }, { "epoch": 0.22817310485042114, "grad_norm": 0.36972182989120483, "learning_rate": 0.0001, "loss": 1.6917, "step": 1964 }, { "epoch": 0.22828928260238165, "grad_norm": 0.3815283179283142, "learning_rate": 0.0001, "loss": 1.5922, "step": 1965 }, { "epoch": 0.22840546035434214, "grad_norm": 0.3695911169052124, "learning_rate": 0.0001, "loss": 1.7524, "step": 1966 }, { "epoch": 0.22852163810630263, "grad_norm": 0.3688741624355316, "learning_rate": 0.0001, "loss": 1.8406, "step": 1967 }, { "epoch": 0.22863781585826315, "grad_norm": 0.3708171248435974, "learning_rate": 0.0001, "loss": 1.7476, "step": 1968 }, { "epoch": 0.22875399361022364, "grad_norm": 0.406654417514801, "learning_rate": 0.0001, "loss": 1.8354, "step": 1969 }, { "epoch": 0.22887017136218415, "grad_norm": 0.3633515536785126, "learning_rate": 0.0001, "loss": 1.6008, "step": 1970 }, { "epoch": 0.22898634911414464, "grad_norm": 0.37781691551208496, "learning_rate": 0.0001, "loss": 1.6574, "step": 1971 }, { "epoch": 0.22910252686610513, "grad_norm": 0.36972054839134216, "learning_rate": 0.0001, "loss": 1.6906, "step": 1972 }, { "epoch": 0.22921870461806565, "grad_norm": 0.40130361914634705, "learning_rate": 0.0001, "loss": 1.7472, "step": 1973 }, { "epoch": 0.22933488237002614, "grad_norm": 0.38525599241256714, "learning_rate": 0.0001, "loss": 1.7806, "step": 1974 }, { "epoch": 0.22945106012198663, "grad_norm": 0.38195157051086426, "learning_rate": 0.0001, "loss": 1.6819, "step": 1975 }, { "epoch": 0.22956723787394714, "grad_norm": 0.36674100160598755, "learning_rate": 0.0001, "loss": 1.5677, "step": 1976 }, { "epoch": 0.22968341562590763, "grad_norm": 0.37068042159080505, "learning_rate": 0.0001, "loss": 1.6381, "step": 1977 }, { "epoch": 0.22979959337786815, "grad_norm": 0.35614022612571716, "learning_rate": 0.0001, "loss": 1.467, "step": 1978 }, { "epoch": 0.22991577112982864, "grad_norm": 0.37935879826545715, "learning_rate": 0.0001, "loss": 1.711, "step": 1979 }, { "epoch": 0.23003194888178913, "grad_norm": 0.3637298047542572, "learning_rate": 0.0001, "loss": 1.6148, "step": 1980 }, { "epoch": 0.23014812663374964, "grad_norm": 0.40035533905029297, "learning_rate": 0.0001, "loss": 1.6534, "step": 1981 }, { "epoch": 0.23026430438571013, "grad_norm": 0.40497007966041565, "learning_rate": 0.0001, "loss": 1.6456, "step": 1982 }, { "epoch": 0.23038048213767062, "grad_norm": 0.3771011233329773, "learning_rate": 0.0001, "loss": 1.6556, "step": 1983 }, { "epoch": 0.23049665988963114, "grad_norm": 0.3938141465187073, "learning_rate": 0.0001, "loss": 1.791, "step": 1984 }, { "epoch": 0.23061283764159163, "grad_norm": 0.3939863443374634, "learning_rate": 0.0001, "loss": 1.7685, "step": 1985 }, { "epoch": 0.23072901539355214, "grad_norm": 0.3473072052001953, "learning_rate": 0.0001, "loss": 1.5541, "step": 1986 }, { "epoch": 0.23084519314551263, "grad_norm": 0.371954083442688, "learning_rate": 0.0001, "loss": 1.7404, "step": 1987 }, { "epoch": 0.23096137089747312, "grad_norm": 0.38401806354522705, "learning_rate": 0.0001, "loss": 1.6277, "step": 1988 }, { "epoch": 0.23107754864943364, "grad_norm": 0.36638298630714417, "learning_rate": 0.0001, "loss": 1.5587, "step": 1989 }, { "epoch": 0.23119372640139413, "grad_norm": 0.365355521440506, "learning_rate": 0.0001, "loss": 1.5955, "step": 1990 }, { "epoch": 0.23130990415335465, "grad_norm": 0.3541325330734253, "learning_rate": 0.0001, "loss": 1.6865, "step": 1991 }, { "epoch": 0.23142608190531513, "grad_norm": 0.3768599033355713, "learning_rate": 0.0001, "loss": 1.8022, "step": 1992 }, { "epoch": 0.23154225965727562, "grad_norm": 0.36274993419647217, "learning_rate": 0.0001, "loss": 1.6444, "step": 1993 }, { "epoch": 0.23165843740923614, "grad_norm": 0.3988153636455536, "learning_rate": 0.0001, "loss": 1.8395, "step": 1994 }, { "epoch": 0.23177461516119663, "grad_norm": 0.3925970494747162, "learning_rate": 0.0001, "loss": 1.6646, "step": 1995 }, { "epoch": 0.23189079291315712, "grad_norm": 0.3741264045238495, "learning_rate": 0.0001, "loss": 1.6348, "step": 1996 }, { "epoch": 0.23200697066511763, "grad_norm": 0.3511716425418854, "learning_rate": 0.0001, "loss": 1.5654, "step": 1997 }, { "epoch": 0.23212314841707812, "grad_norm": 0.3565942645072937, "learning_rate": 0.0001, "loss": 1.7113, "step": 1998 }, { "epoch": 0.23223932616903864, "grad_norm": 0.36350539326667786, "learning_rate": 0.0001, "loss": 1.6942, "step": 1999 }, { "epoch": 0.23235550392099913, "grad_norm": 0.39565804600715637, "learning_rate": 0.0001, "loss": 1.6384, "step": 2000 }, { "epoch": 0.23247168167295962, "grad_norm": 0.39192995429039, "learning_rate": 0.0001, "loss": 1.821, "step": 2001 }, { "epoch": 0.23258785942492013, "grad_norm": 0.3590277433395386, "learning_rate": 0.0001, "loss": 1.68, "step": 2002 }, { "epoch": 0.23270403717688062, "grad_norm": 0.3515608310699463, "learning_rate": 0.0001, "loss": 1.6445, "step": 2003 }, { "epoch": 0.23282021492884114, "grad_norm": 0.3733135759830475, "learning_rate": 0.0001, "loss": 1.567, "step": 2004 }, { "epoch": 0.23293639268080163, "grad_norm": 0.3820706903934479, "learning_rate": 0.0001, "loss": 1.625, "step": 2005 }, { "epoch": 0.23305257043276212, "grad_norm": 0.38101106882095337, "learning_rate": 0.0001, "loss": 1.6528, "step": 2006 }, { "epoch": 0.23316874818472264, "grad_norm": 0.3717200756072998, "learning_rate": 0.0001, "loss": 1.5109, "step": 2007 }, { "epoch": 0.23328492593668312, "grad_norm": 0.3890402913093567, "learning_rate": 0.0001, "loss": 1.8481, "step": 2008 }, { "epoch": 0.2334011036886436, "grad_norm": 0.374593585729599, "learning_rate": 0.0001, "loss": 1.6713, "step": 2009 }, { "epoch": 0.23351728144060413, "grad_norm": 0.37107715010643005, "learning_rate": 0.0001, "loss": 1.6466, "step": 2010 }, { "epoch": 0.23363345919256462, "grad_norm": 0.37940695881843567, "learning_rate": 0.0001, "loss": 1.6465, "step": 2011 }, { "epoch": 0.23374963694452514, "grad_norm": 0.351849228143692, "learning_rate": 0.0001, "loss": 1.6345, "step": 2012 }, { "epoch": 0.23386581469648562, "grad_norm": 0.35233619809150696, "learning_rate": 0.0001, "loss": 1.6328, "step": 2013 }, { "epoch": 0.2339819924484461, "grad_norm": 0.39142873883247375, "learning_rate": 0.0001, "loss": 1.8342, "step": 2014 }, { "epoch": 0.23409817020040663, "grad_norm": 0.36893418431282043, "learning_rate": 0.0001, "loss": 1.7415, "step": 2015 }, { "epoch": 0.23421434795236712, "grad_norm": 0.4079015851020813, "learning_rate": 0.0001, "loss": 1.9148, "step": 2016 }, { "epoch": 0.2343305257043276, "grad_norm": 0.35830172896385193, "learning_rate": 0.0001, "loss": 1.531, "step": 2017 }, { "epoch": 0.23444670345628812, "grad_norm": 0.35987070202827454, "learning_rate": 0.0001, "loss": 1.5225, "step": 2018 }, { "epoch": 0.2345628812082486, "grad_norm": 0.3772944211959839, "learning_rate": 0.0001, "loss": 1.7637, "step": 2019 }, { "epoch": 0.23467905896020913, "grad_norm": 0.36652514338493347, "learning_rate": 0.0001, "loss": 1.786, "step": 2020 }, { "epoch": 0.23479523671216962, "grad_norm": 0.3576715886592865, "learning_rate": 0.0001, "loss": 1.5957, "step": 2021 }, { "epoch": 0.2349114144641301, "grad_norm": 0.3853584825992584, "learning_rate": 0.0001, "loss": 1.7223, "step": 2022 }, { "epoch": 0.23502759221609063, "grad_norm": 0.3581581115722656, "learning_rate": 0.0001, "loss": 1.7853, "step": 2023 }, { "epoch": 0.23514376996805111, "grad_norm": 0.396012544631958, "learning_rate": 0.0001, "loss": 1.4849, "step": 2024 }, { "epoch": 0.23525994772001163, "grad_norm": 0.37956202030181885, "learning_rate": 0.0001, "loss": 1.6308, "step": 2025 }, { "epoch": 0.23537612547197212, "grad_norm": 0.377013623714447, "learning_rate": 0.0001, "loss": 1.7764, "step": 2026 }, { "epoch": 0.2354923032239326, "grad_norm": 0.37704232335090637, "learning_rate": 0.0001, "loss": 1.8591, "step": 2027 }, { "epoch": 0.23560848097589313, "grad_norm": 0.3992510437965393, "learning_rate": 0.0001, "loss": 2.0132, "step": 2028 }, { "epoch": 0.23572465872785361, "grad_norm": 0.36997881531715393, "learning_rate": 0.0001, "loss": 1.6769, "step": 2029 }, { "epoch": 0.2358408364798141, "grad_norm": 0.3613041043281555, "learning_rate": 0.0001, "loss": 1.6496, "step": 2030 }, { "epoch": 0.23595701423177462, "grad_norm": 0.36582064628601074, "learning_rate": 0.0001, "loss": 1.6731, "step": 2031 }, { "epoch": 0.2360731919837351, "grad_norm": 0.3395959436893463, "learning_rate": 0.0001, "loss": 1.687, "step": 2032 }, { "epoch": 0.23618936973569563, "grad_norm": 0.3602873384952545, "learning_rate": 0.0001, "loss": 1.6479, "step": 2033 }, { "epoch": 0.23630554748765611, "grad_norm": 0.3407090902328491, "learning_rate": 0.0001, "loss": 1.6057, "step": 2034 }, { "epoch": 0.2364217252396166, "grad_norm": 0.3737788200378418, "learning_rate": 0.0001, "loss": 1.6555, "step": 2035 }, { "epoch": 0.23653790299157712, "grad_norm": 0.3733769953250885, "learning_rate": 0.0001, "loss": 1.7639, "step": 2036 }, { "epoch": 0.2366540807435376, "grad_norm": 0.3718021512031555, "learning_rate": 0.0001, "loss": 1.8264, "step": 2037 }, { "epoch": 0.23677025849549813, "grad_norm": 0.33534038066864014, "learning_rate": 0.0001, "loss": 1.4352, "step": 2038 }, { "epoch": 0.23688643624745861, "grad_norm": 0.4014089107513428, "learning_rate": 0.0001, "loss": 1.8223, "step": 2039 }, { "epoch": 0.2370026139994191, "grad_norm": 0.37306705117225647, "learning_rate": 0.0001, "loss": 1.8535, "step": 2040 }, { "epoch": 0.23711879175137962, "grad_norm": 0.36598867177963257, "learning_rate": 0.0001, "loss": 1.7722, "step": 2041 }, { "epoch": 0.2372349695033401, "grad_norm": 0.3649059236049652, "learning_rate": 0.0001, "loss": 1.6208, "step": 2042 }, { "epoch": 0.2373511472553006, "grad_norm": 0.3907899558544159, "learning_rate": 0.0001, "loss": 1.6757, "step": 2043 }, { "epoch": 0.23746732500726112, "grad_norm": 0.37756413221359253, "learning_rate": 0.0001, "loss": 1.7135, "step": 2044 }, { "epoch": 0.2375835027592216, "grad_norm": 0.3651416599750519, "learning_rate": 0.0001, "loss": 1.723, "step": 2045 }, { "epoch": 0.23769968051118212, "grad_norm": 0.3615970015525818, "learning_rate": 0.0001, "loss": 1.7378, "step": 2046 }, { "epoch": 0.2378158582631426, "grad_norm": 0.3712145686149597, "learning_rate": 0.0001, "loss": 1.7052, "step": 2047 }, { "epoch": 0.2379320360151031, "grad_norm": 0.39952540397644043, "learning_rate": 0.0001, "loss": 1.6936, "step": 2048 }, { "epoch": 0.23804821376706362, "grad_norm": 0.37439173460006714, "learning_rate": 0.0001, "loss": 1.6468, "step": 2049 }, { "epoch": 0.2381643915190241, "grad_norm": 0.37691599130630493, "learning_rate": 0.0001, "loss": 1.715, "step": 2050 }, { "epoch": 0.2382805692709846, "grad_norm": 0.3760530352592468, "learning_rate": 0.0001, "loss": 1.6155, "step": 2051 }, { "epoch": 0.2383967470229451, "grad_norm": 0.3865487575531006, "learning_rate": 0.0001, "loss": 1.9395, "step": 2052 }, { "epoch": 0.2385129247749056, "grad_norm": 0.3658308982849121, "learning_rate": 0.0001, "loss": 1.5826, "step": 2053 }, { "epoch": 0.23862910252686612, "grad_norm": 0.3813599646091461, "learning_rate": 0.0001, "loss": 1.7707, "step": 2054 }, { "epoch": 0.2387452802788266, "grad_norm": 0.3815939128398895, "learning_rate": 0.0001, "loss": 1.6308, "step": 2055 }, { "epoch": 0.2388614580307871, "grad_norm": 0.3719700872898102, "learning_rate": 0.0001, "loss": 1.6661, "step": 2056 }, { "epoch": 0.2389776357827476, "grad_norm": 0.3591654896736145, "learning_rate": 0.0001, "loss": 1.5548, "step": 2057 }, { "epoch": 0.2390938135347081, "grad_norm": 0.37177640199661255, "learning_rate": 0.0001, "loss": 1.8142, "step": 2058 }, { "epoch": 0.23920999128666862, "grad_norm": 0.36674222350120544, "learning_rate": 0.0001, "loss": 1.7137, "step": 2059 }, { "epoch": 0.2393261690386291, "grad_norm": 0.36910292506217957, "learning_rate": 0.0001, "loss": 1.8695, "step": 2060 }, { "epoch": 0.2394423467905896, "grad_norm": 0.37248843908309937, "learning_rate": 0.0001, "loss": 1.6687, "step": 2061 }, { "epoch": 0.2395585245425501, "grad_norm": 0.3622843027114868, "learning_rate": 0.0001, "loss": 1.7263, "step": 2062 }, { "epoch": 0.2396747022945106, "grad_norm": 0.38794079422950745, "learning_rate": 0.0001, "loss": 1.7049, "step": 2063 }, { "epoch": 0.2397908800464711, "grad_norm": 0.36130279302597046, "learning_rate": 0.0001, "loss": 1.5625, "step": 2064 }, { "epoch": 0.2399070577984316, "grad_norm": 0.3953266441822052, "learning_rate": 0.0001, "loss": 1.7695, "step": 2065 }, { "epoch": 0.2400232355503921, "grad_norm": 0.3774740397930145, "learning_rate": 0.0001, "loss": 1.7106, "step": 2066 }, { "epoch": 0.2401394133023526, "grad_norm": 0.380696564912796, "learning_rate": 0.0001, "loss": 1.7201, "step": 2067 }, { "epoch": 0.2402555910543131, "grad_norm": 0.3862208127975464, "learning_rate": 0.0001, "loss": 1.7663, "step": 2068 }, { "epoch": 0.2403717688062736, "grad_norm": 0.34933963418006897, "learning_rate": 0.0001, "loss": 1.6167, "step": 2069 }, { "epoch": 0.2404879465582341, "grad_norm": 0.37984904646873474, "learning_rate": 0.0001, "loss": 1.6647, "step": 2070 }, { "epoch": 0.2406041243101946, "grad_norm": 0.3804422616958618, "learning_rate": 0.0001, "loss": 1.7629, "step": 2071 }, { "epoch": 0.24072030206215508, "grad_norm": 0.3919108808040619, "learning_rate": 0.0001, "loss": 1.6823, "step": 2072 }, { "epoch": 0.2408364798141156, "grad_norm": 0.4050542712211609, "learning_rate": 0.0001, "loss": 1.6919, "step": 2073 }, { "epoch": 0.2409526575660761, "grad_norm": 0.378836989402771, "learning_rate": 0.0001, "loss": 1.7873, "step": 2074 }, { "epoch": 0.2410688353180366, "grad_norm": 0.39815083146095276, "learning_rate": 0.0001, "loss": 1.8006, "step": 2075 }, { "epoch": 0.2411850130699971, "grad_norm": 0.39791086316108704, "learning_rate": 0.0001, "loss": 1.8538, "step": 2076 }, { "epoch": 0.24130119082195758, "grad_norm": 0.3830665349960327, "learning_rate": 0.0001, "loss": 1.6778, "step": 2077 }, { "epoch": 0.2414173685739181, "grad_norm": 0.38453802466392517, "learning_rate": 0.0001, "loss": 1.7415, "step": 2078 }, { "epoch": 0.2415335463258786, "grad_norm": 0.40375974774360657, "learning_rate": 0.0001, "loss": 1.8335, "step": 2079 }, { "epoch": 0.2416497240778391, "grad_norm": 0.38834068179130554, "learning_rate": 0.0001, "loss": 1.76, "step": 2080 }, { "epoch": 0.2417659018297996, "grad_norm": 0.38506248593330383, "learning_rate": 0.0001, "loss": 1.4974, "step": 2081 }, { "epoch": 0.24188207958176008, "grad_norm": 0.3646622896194458, "learning_rate": 0.0001, "loss": 1.726, "step": 2082 }, { "epoch": 0.2419982573337206, "grad_norm": 0.37850677967071533, "learning_rate": 0.0001, "loss": 1.6372, "step": 2083 }, { "epoch": 0.2421144350856811, "grad_norm": 0.386091411113739, "learning_rate": 0.0001, "loss": 1.8543, "step": 2084 }, { "epoch": 0.24223061283764158, "grad_norm": 0.38737788796424866, "learning_rate": 0.0001, "loss": 1.7008, "step": 2085 }, { "epoch": 0.2423467905896021, "grad_norm": 0.37103384733200073, "learning_rate": 0.0001, "loss": 1.708, "step": 2086 }, { "epoch": 0.24246296834156258, "grad_norm": 0.34921854734420776, "learning_rate": 0.0001, "loss": 1.5562, "step": 2087 }, { "epoch": 0.2425791460935231, "grad_norm": 0.3560514748096466, "learning_rate": 0.0001, "loss": 1.6923, "step": 2088 }, { "epoch": 0.2426953238454836, "grad_norm": 0.38028186559677124, "learning_rate": 0.0001, "loss": 1.6194, "step": 2089 }, { "epoch": 0.24281150159744408, "grad_norm": 0.40691468119621277, "learning_rate": 0.0001, "loss": 1.7285, "step": 2090 }, { "epoch": 0.2429276793494046, "grad_norm": 0.3687189221382141, "learning_rate": 0.0001, "loss": 1.6803, "step": 2091 }, { "epoch": 0.24304385710136509, "grad_norm": 0.35396918654441833, "learning_rate": 0.0001, "loss": 1.7285, "step": 2092 }, { "epoch": 0.2431600348533256, "grad_norm": 0.39168643951416016, "learning_rate": 0.0001, "loss": 1.6745, "step": 2093 }, { "epoch": 0.2432762126052861, "grad_norm": 0.4195053279399872, "learning_rate": 0.0001, "loss": 1.9219, "step": 2094 }, { "epoch": 0.24339239035724658, "grad_norm": 0.3593606948852539, "learning_rate": 0.0001, "loss": 1.6004, "step": 2095 }, { "epoch": 0.2435085681092071, "grad_norm": 0.35588037967681885, "learning_rate": 0.0001, "loss": 1.4985, "step": 2096 }, { "epoch": 0.24362474586116759, "grad_norm": 0.3821358382701874, "learning_rate": 0.0001, "loss": 1.6774, "step": 2097 }, { "epoch": 0.24374092361312807, "grad_norm": 0.36826246976852417, "learning_rate": 0.0001, "loss": 1.623, "step": 2098 }, { "epoch": 0.2438571013650886, "grad_norm": 0.3902747929096222, "learning_rate": 0.0001, "loss": 1.8032, "step": 2099 }, { "epoch": 0.24397327911704908, "grad_norm": 0.3716658651828766, "learning_rate": 0.0001, "loss": 1.6415, "step": 2100 }, { "epoch": 0.2440894568690096, "grad_norm": 0.3505365550518036, "learning_rate": 0.0001, "loss": 1.6708, "step": 2101 }, { "epoch": 0.24420563462097009, "grad_norm": 0.36862871050834656, "learning_rate": 0.0001, "loss": 1.7063, "step": 2102 }, { "epoch": 0.24432181237293057, "grad_norm": 0.35868167877197266, "learning_rate": 0.0001, "loss": 1.6787, "step": 2103 }, { "epoch": 0.2444379901248911, "grad_norm": 0.3973603844642639, "learning_rate": 0.0001, "loss": 1.661, "step": 2104 }, { "epoch": 0.24455416787685158, "grad_norm": 0.37595170736312866, "learning_rate": 0.0001, "loss": 1.7285, "step": 2105 }, { "epoch": 0.24467034562881207, "grad_norm": 0.3656224012374878, "learning_rate": 0.0001, "loss": 1.6813, "step": 2106 }, { "epoch": 0.24478652338077259, "grad_norm": 0.39221543073654175, "learning_rate": 0.0001, "loss": 1.7968, "step": 2107 }, { "epoch": 0.24490270113273307, "grad_norm": 0.36675718426704407, "learning_rate": 0.0001, "loss": 1.634, "step": 2108 }, { "epoch": 0.2450188788846936, "grad_norm": 0.3545796871185303, "learning_rate": 0.0001, "loss": 1.6131, "step": 2109 }, { "epoch": 0.24513505663665408, "grad_norm": 0.40240517258644104, "learning_rate": 0.0001, "loss": 1.9461, "step": 2110 }, { "epoch": 0.24525123438861457, "grad_norm": 0.38813287019729614, "learning_rate": 0.0001, "loss": 1.5576, "step": 2111 }, { "epoch": 0.2453674121405751, "grad_norm": 0.37340447306632996, "learning_rate": 0.0001, "loss": 1.527, "step": 2112 }, { "epoch": 0.24548358989253558, "grad_norm": 0.37866276502609253, "learning_rate": 0.0001, "loss": 1.7589, "step": 2113 }, { "epoch": 0.2455997676444961, "grad_norm": 0.3932071328163147, "learning_rate": 0.0001, "loss": 1.7188, "step": 2114 }, { "epoch": 0.24571594539645658, "grad_norm": 0.37961530685424805, "learning_rate": 0.0001, "loss": 1.6351, "step": 2115 }, { "epoch": 0.24583212314841707, "grad_norm": 0.38449040055274963, "learning_rate": 0.0001, "loss": 1.6648, "step": 2116 }, { "epoch": 0.2459483009003776, "grad_norm": 0.39510199427604675, "learning_rate": 0.0001, "loss": 1.6821, "step": 2117 }, { "epoch": 0.24606447865233808, "grad_norm": 0.40252119302749634, "learning_rate": 0.0001, "loss": 1.7532, "step": 2118 }, { "epoch": 0.24618065640429856, "grad_norm": 0.3880206346511841, "learning_rate": 0.0001, "loss": 1.7703, "step": 2119 }, { "epoch": 0.24629683415625908, "grad_norm": 0.3541505038738251, "learning_rate": 0.0001, "loss": 1.6812, "step": 2120 }, { "epoch": 0.24641301190821957, "grad_norm": 0.3985120952129364, "learning_rate": 0.0001, "loss": 1.7176, "step": 2121 }, { "epoch": 0.2465291896601801, "grad_norm": 0.39708301424980164, "learning_rate": 0.0001, "loss": 1.7669, "step": 2122 }, { "epoch": 0.24664536741214058, "grad_norm": 0.3605371415615082, "learning_rate": 0.0001, "loss": 1.6551, "step": 2123 }, { "epoch": 0.24676154516410106, "grad_norm": 0.38313308358192444, "learning_rate": 0.0001, "loss": 1.4943, "step": 2124 }, { "epoch": 0.24687772291606158, "grad_norm": 0.4088183641433716, "learning_rate": 0.0001, "loss": 1.7498, "step": 2125 }, { "epoch": 0.24699390066802207, "grad_norm": 0.3966735899448395, "learning_rate": 0.0001, "loss": 1.6322, "step": 2126 }, { "epoch": 0.24711007841998256, "grad_norm": 0.3689868748188019, "learning_rate": 0.0001, "loss": 1.689, "step": 2127 }, { "epoch": 0.24722625617194308, "grad_norm": 0.41865813732147217, "learning_rate": 0.0001, "loss": 1.7459, "step": 2128 }, { "epoch": 0.24734243392390357, "grad_norm": 0.39082497358322144, "learning_rate": 0.0001, "loss": 1.694, "step": 2129 }, { "epoch": 0.24745861167586408, "grad_norm": 0.40093672275543213, "learning_rate": 0.0001, "loss": 1.5972, "step": 2130 }, { "epoch": 0.24757478942782457, "grad_norm": 0.37469765543937683, "learning_rate": 0.0001, "loss": 1.6115, "step": 2131 }, { "epoch": 0.24769096717978506, "grad_norm": 0.3949383795261383, "learning_rate": 0.0001, "loss": 1.5865, "step": 2132 }, { "epoch": 0.24780714493174558, "grad_norm": 0.3878518342971802, "learning_rate": 0.0001, "loss": 1.9247, "step": 2133 }, { "epoch": 0.24792332268370607, "grad_norm": 0.38046330213546753, "learning_rate": 0.0001, "loss": 1.7708, "step": 2134 }, { "epoch": 0.24803950043566658, "grad_norm": 0.34667521715164185, "learning_rate": 0.0001, "loss": 1.7178, "step": 2135 }, { "epoch": 0.24815567818762707, "grad_norm": 0.376510888338089, "learning_rate": 0.0001, "loss": 1.8054, "step": 2136 }, { "epoch": 0.24827185593958756, "grad_norm": 0.371574729681015, "learning_rate": 0.0001, "loss": 1.5861, "step": 2137 }, { "epoch": 0.24838803369154808, "grad_norm": 0.4060184955596924, "learning_rate": 0.0001, "loss": 1.6723, "step": 2138 }, { "epoch": 0.24850421144350857, "grad_norm": 0.4174020290374756, "learning_rate": 0.0001, "loss": 1.8037, "step": 2139 }, { "epoch": 0.24862038919546905, "grad_norm": 0.3601975440979004, "learning_rate": 0.0001, "loss": 1.3983, "step": 2140 }, { "epoch": 0.24873656694742957, "grad_norm": 0.3981928825378418, "learning_rate": 0.0001, "loss": 1.7354, "step": 2141 }, { "epoch": 0.24885274469939006, "grad_norm": 0.3899874687194824, "learning_rate": 0.0001, "loss": 1.7262, "step": 2142 }, { "epoch": 0.24896892245135058, "grad_norm": 0.364611953496933, "learning_rate": 0.0001, "loss": 1.71, "step": 2143 }, { "epoch": 0.24908510020331107, "grad_norm": 0.3837345540523529, "learning_rate": 0.0001, "loss": 1.7161, "step": 2144 }, { "epoch": 0.24920127795527156, "grad_norm": 0.3657201826572418, "learning_rate": 0.0001, "loss": 1.7091, "step": 2145 }, { "epoch": 0.24931745570723207, "grad_norm": 0.3665958642959595, "learning_rate": 0.0001, "loss": 1.67, "step": 2146 }, { "epoch": 0.24943363345919256, "grad_norm": 0.35987091064453125, "learning_rate": 0.0001, "loss": 1.6669, "step": 2147 }, { "epoch": 0.24954981121115308, "grad_norm": 0.3838944733142853, "learning_rate": 0.0001, "loss": 1.6725, "step": 2148 }, { "epoch": 0.24966598896311357, "grad_norm": 0.3626435101032257, "learning_rate": 0.0001, "loss": 1.5494, "step": 2149 }, { "epoch": 0.24978216671507406, "grad_norm": 0.37998467683792114, "learning_rate": 0.0001, "loss": 1.7085, "step": 2150 }, { "epoch": 0.24989834446703457, "grad_norm": 0.37865573167800903, "learning_rate": 0.0001, "loss": 1.6637, "step": 2151 }, { "epoch": 0.25001452221899506, "grad_norm": 0.38255107402801514, "learning_rate": 0.0001, "loss": 1.7066, "step": 2152 }, { "epoch": 0.25013069997095555, "grad_norm": 0.375410258769989, "learning_rate": 0.0001, "loss": 1.6712, "step": 2153 }, { "epoch": 0.25024687772291604, "grad_norm": 0.3752192556858063, "learning_rate": 0.0001, "loss": 1.6418, "step": 2154 }, { "epoch": 0.2503630554748766, "grad_norm": 0.3927246034145355, "learning_rate": 0.0001, "loss": 1.6599, "step": 2155 }, { "epoch": 0.2504792332268371, "grad_norm": 0.38156911730766296, "learning_rate": 0.0001, "loss": 1.6593, "step": 2156 }, { "epoch": 0.25059541097879756, "grad_norm": 0.3630543351173401, "learning_rate": 0.0001, "loss": 1.6824, "step": 2157 }, { "epoch": 0.25071158873075805, "grad_norm": 0.38114479184150696, "learning_rate": 0.0001, "loss": 1.7344, "step": 2158 }, { "epoch": 0.25082776648271854, "grad_norm": 0.3578778803348541, "learning_rate": 0.0001, "loss": 1.6656, "step": 2159 }, { "epoch": 0.2509439442346791, "grad_norm": 0.3649848997592926, "learning_rate": 0.0001, "loss": 1.6591, "step": 2160 }, { "epoch": 0.2510601219866396, "grad_norm": 0.39916592836380005, "learning_rate": 0.0001, "loss": 1.7755, "step": 2161 }, { "epoch": 0.25117629973860006, "grad_norm": 0.34567490220069885, "learning_rate": 0.0001, "loss": 1.4154, "step": 2162 }, { "epoch": 0.25129247749056055, "grad_norm": 0.3676474690437317, "learning_rate": 0.0001, "loss": 1.6484, "step": 2163 }, { "epoch": 0.25140865524252104, "grad_norm": 0.3737073838710785, "learning_rate": 0.0001, "loss": 1.6569, "step": 2164 }, { "epoch": 0.2515248329944816, "grad_norm": 0.3624831438064575, "learning_rate": 0.0001, "loss": 1.616, "step": 2165 }, { "epoch": 0.2516410107464421, "grad_norm": 0.4120437800884247, "learning_rate": 0.0001, "loss": 1.9489, "step": 2166 }, { "epoch": 0.25175718849840256, "grad_norm": 0.3748963177204132, "learning_rate": 0.0001, "loss": 1.613, "step": 2167 }, { "epoch": 0.25187336625036305, "grad_norm": 0.38849174976348877, "learning_rate": 0.0001, "loss": 1.6497, "step": 2168 }, { "epoch": 0.25198954400232354, "grad_norm": 0.3984411358833313, "learning_rate": 0.0001, "loss": 1.7911, "step": 2169 }, { "epoch": 0.25210572175428403, "grad_norm": 0.40575408935546875, "learning_rate": 0.0001, "loss": 1.8617, "step": 2170 }, { "epoch": 0.2522218995062446, "grad_norm": 0.3502597212791443, "learning_rate": 0.0001, "loss": 1.5792, "step": 2171 }, { "epoch": 0.25233807725820506, "grad_norm": 0.36587563157081604, "learning_rate": 0.0001, "loss": 1.6778, "step": 2172 }, { "epoch": 0.25245425501016555, "grad_norm": 0.395804226398468, "learning_rate": 0.0001, "loss": 1.5945, "step": 2173 }, { "epoch": 0.25257043276212604, "grad_norm": 0.3610069751739502, "learning_rate": 0.0001, "loss": 1.6106, "step": 2174 }, { "epoch": 0.25268661051408653, "grad_norm": 0.38481786847114563, "learning_rate": 0.0001, "loss": 1.7379, "step": 2175 }, { "epoch": 0.2528027882660471, "grad_norm": 0.4121454656124115, "learning_rate": 0.0001, "loss": 1.7449, "step": 2176 }, { "epoch": 0.25291896601800756, "grad_norm": 0.40824562311172485, "learning_rate": 0.0001, "loss": 1.7024, "step": 2177 }, { "epoch": 0.25303514376996805, "grad_norm": 0.38010165095329285, "learning_rate": 0.0001, "loss": 1.6923, "step": 2178 }, { "epoch": 0.25315132152192854, "grad_norm": 0.3835202753543854, "learning_rate": 0.0001, "loss": 1.7208, "step": 2179 }, { "epoch": 0.25326749927388903, "grad_norm": 0.4025690257549286, "learning_rate": 0.0001, "loss": 1.7204, "step": 2180 }, { "epoch": 0.2533836770258496, "grad_norm": 0.3862835466861725, "learning_rate": 0.0001, "loss": 1.8057, "step": 2181 }, { "epoch": 0.25349985477781006, "grad_norm": 0.3741121292114258, "learning_rate": 0.0001, "loss": 1.7558, "step": 2182 }, { "epoch": 0.25361603252977055, "grad_norm": 0.38978350162506104, "learning_rate": 0.0001, "loss": 1.6125, "step": 2183 }, { "epoch": 0.25373221028173104, "grad_norm": 0.3566723167896271, "learning_rate": 0.0001, "loss": 1.5734, "step": 2184 }, { "epoch": 0.25384838803369153, "grad_norm": 0.4036724269390106, "learning_rate": 0.0001, "loss": 1.751, "step": 2185 }, { "epoch": 0.2539645657856521, "grad_norm": 0.3604584336280823, "learning_rate": 0.0001, "loss": 1.6988, "step": 2186 }, { "epoch": 0.25408074353761256, "grad_norm": 0.3696625232696533, "learning_rate": 0.0001, "loss": 1.5317, "step": 2187 }, { "epoch": 0.25419692128957305, "grad_norm": 0.38797250390052795, "learning_rate": 0.0001, "loss": 1.6658, "step": 2188 }, { "epoch": 0.25431309904153354, "grad_norm": 0.3951641321182251, "learning_rate": 0.0001, "loss": 1.6017, "step": 2189 }, { "epoch": 0.25442927679349403, "grad_norm": 0.37504735589027405, "learning_rate": 0.0001, "loss": 1.6175, "step": 2190 }, { "epoch": 0.2545454545454545, "grad_norm": 0.38757580518722534, "learning_rate": 0.0001, "loss": 1.6747, "step": 2191 }, { "epoch": 0.25466163229741506, "grad_norm": 0.41315603256225586, "learning_rate": 0.0001, "loss": 1.6652, "step": 2192 }, { "epoch": 0.25477781004937555, "grad_norm": 0.36828580498695374, "learning_rate": 0.0001, "loss": 1.7711, "step": 2193 }, { "epoch": 0.25489398780133604, "grad_norm": 0.39044126868247986, "learning_rate": 0.0001, "loss": 1.7516, "step": 2194 }, { "epoch": 0.25501016555329653, "grad_norm": 0.3624008893966675, "learning_rate": 0.0001, "loss": 1.6642, "step": 2195 }, { "epoch": 0.255126343305257, "grad_norm": 0.35610583424568176, "learning_rate": 0.0001, "loss": 1.5775, "step": 2196 }, { "epoch": 0.25524252105721756, "grad_norm": 0.3624838888645172, "learning_rate": 0.0001, "loss": 1.6916, "step": 2197 }, { "epoch": 0.25535869880917805, "grad_norm": 0.3851155638694763, "learning_rate": 0.0001, "loss": 1.6095, "step": 2198 }, { "epoch": 0.25547487656113854, "grad_norm": 0.3829360604286194, "learning_rate": 0.0001, "loss": 1.6852, "step": 2199 }, { "epoch": 0.25559105431309903, "grad_norm": 0.3810238242149353, "learning_rate": 0.0001, "loss": 1.6091, "step": 2200 }, { "epoch": 0.2557072320650595, "grad_norm": 0.384115993976593, "learning_rate": 0.0001, "loss": 1.6728, "step": 2201 }, { "epoch": 0.25582340981702006, "grad_norm": 0.3733699321746826, "learning_rate": 0.0001, "loss": 1.6823, "step": 2202 }, { "epoch": 0.25593958756898055, "grad_norm": 0.37444037199020386, "learning_rate": 0.0001, "loss": 1.7531, "step": 2203 }, { "epoch": 0.25605576532094104, "grad_norm": 0.3865280747413635, "learning_rate": 0.0001, "loss": 1.7525, "step": 2204 }, { "epoch": 0.25617194307290153, "grad_norm": 0.3753882944583893, "learning_rate": 0.0001, "loss": 1.6914, "step": 2205 }, { "epoch": 0.256288120824862, "grad_norm": 0.4033168852329254, "learning_rate": 0.0001, "loss": 1.4647, "step": 2206 }, { "epoch": 0.25640429857682256, "grad_norm": 0.4054439961910248, "learning_rate": 0.0001, "loss": 1.76, "step": 2207 }, { "epoch": 0.25652047632878305, "grad_norm": 0.3870498239994049, "learning_rate": 0.0001, "loss": 1.8224, "step": 2208 }, { "epoch": 0.25663665408074354, "grad_norm": 0.3696228861808777, "learning_rate": 0.0001, "loss": 1.6343, "step": 2209 }, { "epoch": 0.25675283183270403, "grad_norm": 0.36032870411872864, "learning_rate": 0.0001, "loss": 1.5121, "step": 2210 }, { "epoch": 0.2568690095846645, "grad_norm": 0.3826732635498047, "learning_rate": 0.0001, "loss": 1.6123, "step": 2211 }, { "epoch": 0.256985187336625, "grad_norm": 0.3756740391254425, "learning_rate": 0.0001, "loss": 1.6782, "step": 2212 }, { "epoch": 0.25710136508858555, "grad_norm": 0.39152175188064575, "learning_rate": 0.0001, "loss": 1.6711, "step": 2213 }, { "epoch": 0.25721754284054604, "grad_norm": 0.3959135413169861, "learning_rate": 0.0001, "loss": 1.7857, "step": 2214 }, { "epoch": 0.25733372059250653, "grad_norm": 0.3905220627784729, "learning_rate": 0.0001, "loss": 1.6834, "step": 2215 }, { "epoch": 0.257449898344467, "grad_norm": 0.3713771402835846, "learning_rate": 0.0001, "loss": 1.6193, "step": 2216 }, { "epoch": 0.2575660760964275, "grad_norm": 0.39879244565963745, "learning_rate": 0.0001, "loss": 1.7314, "step": 2217 }, { "epoch": 0.25768225384838805, "grad_norm": 0.3479825556278229, "learning_rate": 0.0001, "loss": 1.6179, "step": 2218 }, { "epoch": 0.25779843160034854, "grad_norm": 0.38801833987236023, "learning_rate": 0.0001, "loss": 1.7041, "step": 2219 }, { "epoch": 0.25791460935230903, "grad_norm": 0.37792959809303284, "learning_rate": 0.0001, "loss": 1.6854, "step": 2220 }, { "epoch": 0.2580307871042695, "grad_norm": 0.3557312786579132, "learning_rate": 0.0001, "loss": 1.5168, "step": 2221 }, { "epoch": 0.25814696485623, "grad_norm": 0.37623703479766846, "learning_rate": 0.0001, "loss": 1.7855, "step": 2222 }, { "epoch": 0.25826314260819055, "grad_norm": 0.35475483536720276, "learning_rate": 0.0001, "loss": 1.5467, "step": 2223 }, { "epoch": 0.25837932036015104, "grad_norm": 0.379874587059021, "learning_rate": 0.0001, "loss": 1.6733, "step": 2224 }, { "epoch": 0.25849549811211153, "grad_norm": 0.3651340901851654, "learning_rate": 0.0001, "loss": 1.5503, "step": 2225 }, { "epoch": 0.258611675864072, "grad_norm": 0.37844133377075195, "learning_rate": 0.0001, "loss": 1.7023, "step": 2226 }, { "epoch": 0.2587278536160325, "grad_norm": 0.36854520440101624, "learning_rate": 0.0001, "loss": 1.799, "step": 2227 }, { "epoch": 0.25884403136799305, "grad_norm": 0.4082055389881134, "learning_rate": 0.0001, "loss": 1.8261, "step": 2228 }, { "epoch": 0.25896020911995354, "grad_norm": 0.3727450668811798, "learning_rate": 0.0001, "loss": 1.7122, "step": 2229 }, { "epoch": 0.25907638687191403, "grad_norm": 0.38609281182289124, "learning_rate": 0.0001, "loss": 1.688, "step": 2230 }, { "epoch": 0.2591925646238745, "grad_norm": 0.4126780331134796, "learning_rate": 0.0001, "loss": 1.6359, "step": 2231 }, { "epoch": 0.259308742375835, "grad_norm": 0.3675900399684906, "learning_rate": 0.0001, "loss": 1.7965, "step": 2232 }, { "epoch": 0.2594249201277955, "grad_norm": 0.3680029511451721, "learning_rate": 0.0001, "loss": 1.6735, "step": 2233 }, { "epoch": 0.25954109787975604, "grad_norm": 0.3985327482223511, "learning_rate": 0.0001, "loss": 1.7455, "step": 2234 }, { "epoch": 0.25965727563171653, "grad_norm": 0.4041634798049927, "learning_rate": 0.0001, "loss": 1.9009, "step": 2235 }, { "epoch": 0.259773453383677, "grad_norm": 0.3741016685962677, "learning_rate": 0.0001, "loss": 1.6318, "step": 2236 }, { "epoch": 0.2598896311356375, "grad_norm": 0.38083234429359436, "learning_rate": 0.0001, "loss": 1.7036, "step": 2237 }, { "epoch": 0.260005808887598, "grad_norm": 0.40651369094848633, "learning_rate": 0.0001, "loss": 1.9219, "step": 2238 }, { "epoch": 0.26012198663955854, "grad_norm": 0.3514697849750519, "learning_rate": 0.0001, "loss": 1.5848, "step": 2239 }, { "epoch": 0.26023816439151903, "grad_norm": 0.402826189994812, "learning_rate": 0.0001, "loss": 1.769, "step": 2240 }, { "epoch": 0.2603543421434795, "grad_norm": 0.3742333948612213, "learning_rate": 0.0001, "loss": 1.784, "step": 2241 }, { "epoch": 0.26047051989544, "grad_norm": 0.3999182879924774, "learning_rate": 0.0001, "loss": 1.9366, "step": 2242 }, { "epoch": 0.2605866976474005, "grad_norm": 0.3678368031978607, "learning_rate": 0.0001, "loss": 1.6611, "step": 2243 }, { "epoch": 0.26070287539936104, "grad_norm": 0.36702072620391846, "learning_rate": 0.0001, "loss": 1.4785, "step": 2244 }, { "epoch": 0.26081905315132153, "grad_norm": 0.3562362790107727, "learning_rate": 0.0001, "loss": 1.7157, "step": 2245 }, { "epoch": 0.260935230903282, "grad_norm": 0.40297916531562805, "learning_rate": 0.0001, "loss": 1.7167, "step": 2246 }, { "epoch": 0.2610514086552425, "grad_norm": 0.37424853444099426, "learning_rate": 0.0001, "loss": 1.6965, "step": 2247 }, { "epoch": 0.261167586407203, "grad_norm": 0.36831429600715637, "learning_rate": 0.0001, "loss": 1.5774, "step": 2248 }, { "epoch": 0.26128376415916355, "grad_norm": 0.38578173518180847, "learning_rate": 0.0001, "loss": 1.6241, "step": 2249 }, { "epoch": 0.26139994191112403, "grad_norm": 0.40575075149536133, "learning_rate": 0.0001, "loss": 1.732, "step": 2250 }, { "epoch": 0.2615161196630845, "grad_norm": 0.3883078396320343, "learning_rate": 0.0001, "loss": 1.6865, "step": 2251 }, { "epoch": 0.261632297415045, "grad_norm": 0.3869827687740326, "learning_rate": 0.0001, "loss": 1.7362, "step": 2252 }, { "epoch": 0.2617484751670055, "grad_norm": 0.38627856969833374, "learning_rate": 0.0001, "loss": 1.751, "step": 2253 }, { "epoch": 0.26186465291896605, "grad_norm": 0.3833690285682678, "learning_rate": 0.0001, "loss": 1.7188, "step": 2254 }, { "epoch": 0.26198083067092653, "grad_norm": 0.39400362968444824, "learning_rate": 0.0001, "loss": 1.7143, "step": 2255 }, { "epoch": 0.262097008422887, "grad_norm": 0.3659832775592804, "learning_rate": 0.0001, "loss": 1.6737, "step": 2256 }, { "epoch": 0.2622131861748475, "grad_norm": 0.3808686137199402, "learning_rate": 0.0001, "loss": 1.6575, "step": 2257 }, { "epoch": 0.262329363926808, "grad_norm": 0.36852577328681946, "learning_rate": 0.0001, "loss": 1.6378, "step": 2258 }, { "epoch": 0.2624455416787685, "grad_norm": 0.37290260195732117, "learning_rate": 0.0001, "loss": 1.6818, "step": 2259 }, { "epoch": 0.26256171943072903, "grad_norm": 0.4122512936592102, "learning_rate": 0.0001, "loss": 1.7609, "step": 2260 }, { "epoch": 0.2626778971826895, "grad_norm": 0.3920998275279999, "learning_rate": 0.0001, "loss": 1.7818, "step": 2261 }, { "epoch": 0.26279407493465, "grad_norm": 0.40477001667022705, "learning_rate": 0.0001, "loss": 1.8105, "step": 2262 }, { "epoch": 0.2629102526866105, "grad_norm": 0.3666556179523468, "learning_rate": 0.0001, "loss": 1.5604, "step": 2263 }, { "epoch": 0.263026430438571, "grad_norm": 0.3544174134731293, "learning_rate": 0.0001, "loss": 1.6393, "step": 2264 }, { "epoch": 0.26314260819053154, "grad_norm": 0.3537236154079437, "learning_rate": 0.0001, "loss": 1.5875, "step": 2265 }, { "epoch": 0.263258785942492, "grad_norm": 0.3871021866798401, "learning_rate": 0.0001, "loss": 1.5506, "step": 2266 }, { "epoch": 0.2633749636944525, "grad_norm": 0.3693544566631317, "learning_rate": 0.0001, "loss": 1.6045, "step": 2267 }, { "epoch": 0.263491141446413, "grad_norm": 0.3744482696056366, "learning_rate": 0.0001, "loss": 1.5885, "step": 2268 }, { "epoch": 0.2636073191983735, "grad_norm": 0.38229045271873474, "learning_rate": 0.0001, "loss": 1.7494, "step": 2269 }, { "epoch": 0.26372349695033404, "grad_norm": 0.38412609696388245, "learning_rate": 0.0001, "loss": 1.806, "step": 2270 }, { "epoch": 0.2638396747022945, "grad_norm": 0.3687461018562317, "learning_rate": 0.0001, "loss": 1.5794, "step": 2271 }, { "epoch": 0.263955852454255, "grad_norm": 0.40106597542762756, "learning_rate": 0.0001, "loss": 1.7895, "step": 2272 }, { "epoch": 0.2640720302062155, "grad_norm": 0.3707484304904938, "learning_rate": 0.0001, "loss": 1.6494, "step": 2273 }, { "epoch": 0.264188207958176, "grad_norm": 0.3728141188621521, "learning_rate": 0.0001, "loss": 1.7414, "step": 2274 }, { "epoch": 0.26430438571013654, "grad_norm": 0.3807673752307892, "learning_rate": 0.0001, "loss": 1.6104, "step": 2275 }, { "epoch": 0.264420563462097, "grad_norm": 0.39177513122558594, "learning_rate": 0.0001, "loss": 1.7476, "step": 2276 }, { "epoch": 0.2645367412140575, "grad_norm": 0.3902243971824646, "learning_rate": 0.0001, "loss": 1.7135, "step": 2277 }, { "epoch": 0.264652918966018, "grad_norm": 0.36920416355133057, "learning_rate": 0.0001, "loss": 1.6747, "step": 2278 }, { "epoch": 0.2647690967179785, "grad_norm": 0.37540876865386963, "learning_rate": 0.0001, "loss": 1.5276, "step": 2279 }, { "epoch": 0.264885274469939, "grad_norm": 0.3670298159122467, "learning_rate": 0.0001, "loss": 1.8018, "step": 2280 }, { "epoch": 0.2650014522218995, "grad_norm": 0.344098836183548, "learning_rate": 0.0001, "loss": 1.5979, "step": 2281 }, { "epoch": 0.26511762997386, "grad_norm": 0.36059024930000305, "learning_rate": 0.0001, "loss": 1.5998, "step": 2282 }, { "epoch": 0.2652338077258205, "grad_norm": 0.37826424837112427, "learning_rate": 0.0001, "loss": 1.6737, "step": 2283 }, { "epoch": 0.265349985477781, "grad_norm": 0.40158766508102417, "learning_rate": 0.0001, "loss": 1.585, "step": 2284 }, { "epoch": 0.2654661632297415, "grad_norm": 0.37319740653038025, "learning_rate": 0.0001, "loss": 1.6671, "step": 2285 }, { "epoch": 0.265582340981702, "grad_norm": 0.3656269311904907, "learning_rate": 0.0001, "loss": 1.6144, "step": 2286 }, { "epoch": 0.2656985187336625, "grad_norm": 0.3666156232357025, "learning_rate": 0.0001, "loss": 1.6565, "step": 2287 }, { "epoch": 0.265814696485623, "grad_norm": 0.3868063688278198, "learning_rate": 0.0001, "loss": 1.6494, "step": 2288 }, { "epoch": 0.2659308742375835, "grad_norm": 0.36704185605049133, "learning_rate": 0.0001, "loss": 1.6885, "step": 2289 }, { "epoch": 0.266047051989544, "grad_norm": 0.37085020542144775, "learning_rate": 0.0001, "loss": 1.5913, "step": 2290 }, { "epoch": 0.2661632297415045, "grad_norm": 0.3715968430042267, "learning_rate": 0.0001, "loss": 1.8348, "step": 2291 }, { "epoch": 0.266279407493465, "grad_norm": 0.3815777599811554, "learning_rate": 0.0001, "loss": 1.7571, "step": 2292 }, { "epoch": 0.2663955852454255, "grad_norm": 0.3628351092338562, "learning_rate": 0.0001, "loss": 1.8001, "step": 2293 }, { "epoch": 0.266511762997386, "grad_norm": 0.39187735319137573, "learning_rate": 0.0001, "loss": 1.7246, "step": 2294 }, { "epoch": 0.2666279407493465, "grad_norm": 0.4006575644016266, "learning_rate": 0.0001, "loss": 1.6944, "step": 2295 }, { "epoch": 0.266744118501307, "grad_norm": 0.3559897243976593, "learning_rate": 0.0001, "loss": 1.6132, "step": 2296 }, { "epoch": 0.2668602962532675, "grad_norm": 0.3798125088214874, "learning_rate": 0.0001, "loss": 1.6853, "step": 2297 }, { "epoch": 0.266976474005228, "grad_norm": 0.40822911262512207, "learning_rate": 0.0001, "loss": 1.7918, "step": 2298 }, { "epoch": 0.2670926517571885, "grad_norm": 0.36172357201576233, "learning_rate": 0.0001, "loss": 1.6948, "step": 2299 }, { "epoch": 0.267208829509149, "grad_norm": 0.3883090913295746, "learning_rate": 0.0001, "loss": 1.6766, "step": 2300 }, { "epoch": 0.26732500726110947, "grad_norm": 0.3930845856666565, "learning_rate": 0.0001, "loss": 1.6652, "step": 2301 }, { "epoch": 0.26744118501307, "grad_norm": 0.34050822257995605, "learning_rate": 0.0001, "loss": 1.5192, "step": 2302 }, { "epoch": 0.2675573627650305, "grad_norm": 0.37255436182022095, "learning_rate": 0.0001, "loss": 1.6303, "step": 2303 }, { "epoch": 0.267673540516991, "grad_norm": 0.383784681558609, "learning_rate": 0.0001, "loss": 1.7766, "step": 2304 }, { "epoch": 0.2677897182689515, "grad_norm": 0.3867911100387573, "learning_rate": 0.0001, "loss": 1.855, "step": 2305 }, { "epoch": 0.26790589602091197, "grad_norm": 0.40486153960227966, "learning_rate": 0.0001, "loss": 1.6078, "step": 2306 }, { "epoch": 0.2680220737728725, "grad_norm": 0.42723652720451355, "learning_rate": 0.0001, "loss": 1.909, "step": 2307 }, { "epoch": 0.268138251524833, "grad_norm": 0.3602335751056671, "learning_rate": 0.0001, "loss": 1.6768, "step": 2308 }, { "epoch": 0.2682544292767935, "grad_norm": 0.3916855454444885, "learning_rate": 0.0001, "loss": 1.501, "step": 2309 }, { "epoch": 0.268370607028754, "grad_norm": 0.3967686891555786, "learning_rate": 0.0001, "loss": 1.5217, "step": 2310 }, { "epoch": 0.26848678478071447, "grad_norm": 0.37065213918685913, "learning_rate": 0.0001, "loss": 1.7548, "step": 2311 }, { "epoch": 0.268602962532675, "grad_norm": 0.3448849618434906, "learning_rate": 0.0001, "loss": 1.5527, "step": 2312 }, { "epoch": 0.2687191402846355, "grad_norm": 0.36089155077934265, "learning_rate": 0.0001, "loss": 1.845, "step": 2313 }, { "epoch": 0.268835318036596, "grad_norm": 0.3895665109157562, "learning_rate": 0.0001, "loss": 1.7589, "step": 2314 }, { "epoch": 0.2689514957885565, "grad_norm": 0.3936954736709595, "learning_rate": 0.0001, "loss": 1.6873, "step": 2315 }, { "epoch": 0.26906767354051697, "grad_norm": 0.38149169087409973, "learning_rate": 0.0001, "loss": 1.6693, "step": 2316 }, { "epoch": 0.2691838512924775, "grad_norm": 0.34843528270721436, "learning_rate": 0.0001, "loss": 1.5742, "step": 2317 }, { "epoch": 0.269300029044438, "grad_norm": 0.3937288522720337, "learning_rate": 0.0001, "loss": 1.6785, "step": 2318 }, { "epoch": 0.2694162067963985, "grad_norm": 0.431801438331604, "learning_rate": 0.0001, "loss": 1.7015, "step": 2319 }, { "epoch": 0.269532384548359, "grad_norm": 0.3963301181793213, "learning_rate": 0.0001, "loss": 1.7602, "step": 2320 }, { "epoch": 0.26964856230031947, "grad_norm": 0.39253467321395874, "learning_rate": 0.0001, "loss": 1.8633, "step": 2321 }, { "epoch": 0.26976474005227996, "grad_norm": 0.3785027861595154, "learning_rate": 0.0001, "loss": 1.6881, "step": 2322 }, { "epoch": 0.2698809178042405, "grad_norm": 0.4014844596385956, "learning_rate": 0.0001, "loss": 1.8149, "step": 2323 }, { "epoch": 0.269997095556201, "grad_norm": 0.3676471710205078, "learning_rate": 0.0001, "loss": 1.7278, "step": 2324 }, { "epoch": 0.2701132733081615, "grad_norm": 0.3606375455856323, "learning_rate": 0.0001, "loss": 1.7173, "step": 2325 }, { "epoch": 0.270229451060122, "grad_norm": 0.3681615889072418, "learning_rate": 0.0001, "loss": 1.747, "step": 2326 }, { "epoch": 0.27034562881208246, "grad_norm": 0.3740110397338867, "learning_rate": 0.0001, "loss": 1.593, "step": 2327 }, { "epoch": 0.270461806564043, "grad_norm": 0.39461883902549744, "learning_rate": 0.0001, "loss": 1.7891, "step": 2328 }, { "epoch": 0.2705779843160035, "grad_norm": 0.3810250759124756, "learning_rate": 0.0001, "loss": 1.7613, "step": 2329 }, { "epoch": 0.270694162067964, "grad_norm": 0.39059239625930786, "learning_rate": 0.0001, "loss": 1.7721, "step": 2330 }, { "epoch": 0.2708103398199245, "grad_norm": 0.40021392703056335, "learning_rate": 0.0001, "loss": 1.8333, "step": 2331 }, { "epoch": 0.27092651757188496, "grad_norm": 0.42178237438201904, "learning_rate": 0.0001, "loss": 1.819, "step": 2332 }, { "epoch": 0.2710426953238455, "grad_norm": 0.3722633123397827, "learning_rate": 0.0001, "loss": 1.6485, "step": 2333 }, { "epoch": 0.271158873075806, "grad_norm": 0.3866879940032959, "learning_rate": 0.0001, "loss": 1.8033, "step": 2334 }, { "epoch": 0.2712750508277665, "grad_norm": 0.38638031482696533, "learning_rate": 0.0001, "loss": 1.7147, "step": 2335 }, { "epoch": 0.271391228579727, "grad_norm": 0.37124332785606384, "learning_rate": 0.0001, "loss": 1.6957, "step": 2336 }, { "epoch": 0.27150740633168746, "grad_norm": 0.39737075567245483, "learning_rate": 0.0001, "loss": 1.6314, "step": 2337 }, { "epoch": 0.271623584083648, "grad_norm": 0.41190680861473083, "learning_rate": 0.0001, "loss": 1.7341, "step": 2338 }, { "epoch": 0.2717397618356085, "grad_norm": 0.36484986543655396, "learning_rate": 0.0001, "loss": 1.7156, "step": 2339 }, { "epoch": 0.271855939587569, "grad_norm": 0.38202008605003357, "learning_rate": 0.0001, "loss": 1.7296, "step": 2340 }, { "epoch": 0.2719721173395295, "grad_norm": 0.3791213035583496, "learning_rate": 0.0001, "loss": 1.7189, "step": 2341 }, { "epoch": 0.27208829509148996, "grad_norm": 0.3870936930179596, "learning_rate": 0.0001, "loss": 1.6692, "step": 2342 }, { "epoch": 0.2722044728434505, "grad_norm": 0.3662189543247223, "learning_rate": 0.0001, "loss": 1.7003, "step": 2343 }, { "epoch": 0.272320650595411, "grad_norm": 0.3746212422847748, "learning_rate": 0.0001, "loss": 1.743, "step": 2344 }, { "epoch": 0.2724368283473715, "grad_norm": 0.3724815845489502, "learning_rate": 0.0001, "loss": 1.7269, "step": 2345 }, { "epoch": 0.272553006099332, "grad_norm": 0.390982985496521, "learning_rate": 0.0001, "loss": 1.6358, "step": 2346 }, { "epoch": 0.27266918385129246, "grad_norm": 0.38862210512161255, "learning_rate": 0.0001, "loss": 1.6747, "step": 2347 }, { "epoch": 0.27278536160325295, "grad_norm": 0.3926958441734314, "learning_rate": 0.0001, "loss": 1.7315, "step": 2348 }, { "epoch": 0.2729015393552135, "grad_norm": 0.3797786831855774, "learning_rate": 0.0001, "loss": 1.707, "step": 2349 }, { "epoch": 0.273017717107174, "grad_norm": 0.40166667103767395, "learning_rate": 0.0001, "loss": 1.6808, "step": 2350 }, { "epoch": 0.2731338948591345, "grad_norm": 0.37161746621131897, "learning_rate": 0.0001, "loss": 1.7761, "step": 2351 }, { "epoch": 0.27325007261109496, "grad_norm": 0.3659614324569702, "learning_rate": 0.0001, "loss": 1.5514, "step": 2352 }, { "epoch": 0.27336625036305545, "grad_norm": 0.41991570591926575, "learning_rate": 0.0001, "loss": 1.7652, "step": 2353 }, { "epoch": 0.273482428115016, "grad_norm": 0.41460558772087097, "learning_rate": 0.0001, "loss": 1.7779, "step": 2354 }, { "epoch": 0.2735986058669765, "grad_norm": 0.3913847506046295, "learning_rate": 0.0001, "loss": 1.6966, "step": 2355 }, { "epoch": 0.273714783618937, "grad_norm": 0.3612228035926819, "learning_rate": 0.0001, "loss": 1.4956, "step": 2356 }, { "epoch": 0.27383096137089746, "grad_norm": 0.3820975422859192, "learning_rate": 0.0001, "loss": 1.7256, "step": 2357 }, { "epoch": 0.27394713912285795, "grad_norm": 0.37913262844085693, "learning_rate": 0.0001, "loss": 1.7815, "step": 2358 }, { "epoch": 0.2740633168748185, "grad_norm": 0.3759218454360962, "learning_rate": 0.0001, "loss": 1.5806, "step": 2359 }, { "epoch": 0.274179494626779, "grad_norm": 0.3867158889770508, "learning_rate": 0.0001, "loss": 1.6269, "step": 2360 }, { "epoch": 0.2742956723787395, "grad_norm": 0.36591243743896484, "learning_rate": 0.0001, "loss": 1.7097, "step": 2361 }, { "epoch": 0.27441185013069996, "grad_norm": 0.37191227078437805, "learning_rate": 0.0001, "loss": 1.6653, "step": 2362 }, { "epoch": 0.27452802788266045, "grad_norm": 0.40537264943122864, "learning_rate": 0.0001, "loss": 1.7831, "step": 2363 }, { "epoch": 0.274644205634621, "grad_norm": 0.3994043171405792, "learning_rate": 0.0001, "loss": 1.7722, "step": 2364 }, { "epoch": 0.2747603833865815, "grad_norm": 0.3657456040382385, "learning_rate": 0.0001, "loss": 1.5085, "step": 2365 }, { "epoch": 0.274876561138542, "grad_norm": 0.3876878321170807, "learning_rate": 0.0001, "loss": 1.6583, "step": 2366 }, { "epoch": 0.27499273889050246, "grad_norm": 0.3732263445854187, "learning_rate": 0.0001, "loss": 1.6381, "step": 2367 }, { "epoch": 0.27510891664246295, "grad_norm": 0.3888086974620819, "learning_rate": 0.0001, "loss": 1.8368, "step": 2368 }, { "epoch": 0.27522509439442344, "grad_norm": 0.3704921007156372, "learning_rate": 0.0001, "loss": 1.7033, "step": 2369 }, { "epoch": 0.275341272146384, "grad_norm": 0.4058105945587158, "learning_rate": 0.0001, "loss": 1.907, "step": 2370 }, { "epoch": 0.2754574498983445, "grad_norm": 0.35868632793426514, "learning_rate": 0.0001, "loss": 1.6088, "step": 2371 }, { "epoch": 0.27557362765030496, "grad_norm": 0.3760453462600708, "learning_rate": 0.0001, "loss": 1.5563, "step": 2372 }, { "epoch": 0.27568980540226545, "grad_norm": 0.43682530522346497, "learning_rate": 0.0001, "loss": 1.8795, "step": 2373 }, { "epoch": 0.27580598315422594, "grad_norm": 0.4160868525505066, "learning_rate": 0.0001, "loss": 1.7954, "step": 2374 }, { "epoch": 0.2759221609061865, "grad_norm": 0.3560098707675934, "learning_rate": 0.0001, "loss": 1.6013, "step": 2375 }, { "epoch": 0.276038338658147, "grad_norm": 0.37041234970092773, "learning_rate": 0.0001, "loss": 1.6575, "step": 2376 }, { "epoch": 0.27615451641010746, "grad_norm": 0.3486596345901489, "learning_rate": 0.0001, "loss": 1.5998, "step": 2377 }, { "epoch": 0.27627069416206795, "grad_norm": 0.368912935256958, "learning_rate": 0.0001, "loss": 1.7511, "step": 2378 }, { "epoch": 0.27638687191402844, "grad_norm": 0.41473448276519775, "learning_rate": 0.0001, "loss": 1.8201, "step": 2379 }, { "epoch": 0.276503049665989, "grad_norm": 0.39010536670684814, "learning_rate": 0.0001, "loss": 1.8835, "step": 2380 }, { "epoch": 0.2766192274179495, "grad_norm": 0.44715237617492676, "learning_rate": 0.0001, "loss": 1.6617, "step": 2381 }, { "epoch": 0.27673540516990996, "grad_norm": 0.38639402389526367, "learning_rate": 0.0001, "loss": 1.5988, "step": 2382 }, { "epoch": 0.27685158292187045, "grad_norm": 0.3690156638622284, "learning_rate": 0.0001, "loss": 1.6905, "step": 2383 }, { "epoch": 0.27696776067383094, "grad_norm": 0.380719393491745, "learning_rate": 0.0001, "loss": 1.6926, "step": 2384 }, { "epoch": 0.2770839384257915, "grad_norm": 0.37003418803215027, "learning_rate": 0.0001, "loss": 1.7133, "step": 2385 }, { "epoch": 0.277200116177752, "grad_norm": 0.3689243197441101, "learning_rate": 0.0001, "loss": 1.7334, "step": 2386 }, { "epoch": 0.27731629392971247, "grad_norm": 0.3837697505950928, "learning_rate": 0.0001, "loss": 1.6301, "step": 2387 }, { "epoch": 0.27743247168167295, "grad_norm": 0.39931994676589966, "learning_rate": 0.0001, "loss": 1.7904, "step": 2388 }, { "epoch": 0.27754864943363344, "grad_norm": 0.3684816062450409, "learning_rate": 0.0001, "loss": 1.7226, "step": 2389 }, { "epoch": 0.27766482718559393, "grad_norm": 0.37523365020751953, "learning_rate": 0.0001, "loss": 1.6248, "step": 2390 }, { "epoch": 0.2777810049375545, "grad_norm": 0.38715073466300964, "learning_rate": 0.0001, "loss": 1.8479, "step": 2391 }, { "epoch": 0.27789718268951497, "grad_norm": 0.3796006441116333, "learning_rate": 0.0001, "loss": 1.6852, "step": 2392 }, { "epoch": 0.27801336044147545, "grad_norm": 0.3867599070072174, "learning_rate": 0.0001, "loss": 1.6786, "step": 2393 }, { "epoch": 0.27812953819343594, "grad_norm": 0.36347195506095886, "learning_rate": 0.0001, "loss": 1.5115, "step": 2394 }, { "epoch": 0.27824571594539643, "grad_norm": 0.40452706813812256, "learning_rate": 0.0001, "loss": 1.7461, "step": 2395 }, { "epoch": 0.278361893697357, "grad_norm": 0.37691494822502136, "learning_rate": 0.0001, "loss": 1.6363, "step": 2396 }, { "epoch": 0.27847807144931747, "grad_norm": 0.3693540394306183, "learning_rate": 0.0001, "loss": 1.5973, "step": 2397 }, { "epoch": 0.27859424920127795, "grad_norm": 0.3859393894672394, "learning_rate": 0.0001, "loss": 1.6396, "step": 2398 }, { "epoch": 0.27871042695323844, "grad_norm": 0.36615633964538574, "learning_rate": 0.0001, "loss": 1.5203, "step": 2399 }, { "epoch": 0.27882660470519893, "grad_norm": 0.4025718867778778, "learning_rate": 0.0001, "loss": 1.7692, "step": 2400 }, { "epoch": 0.2789427824571595, "grad_norm": 0.35570523142814636, "learning_rate": 0.0001, "loss": 1.5524, "step": 2401 }, { "epoch": 0.27905896020911997, "grad_norm": 0.4003136456012726, "learning_rate": 0.0001, "loss": 1.6839, "step": 2402 }, { "epoch": 0.27917513796108046, "grad_norm": 0.38738471269607544, "learning_rate": 0.0001, "loss": 1.6605, "step": 2403 }, { "epoch": 0.27929131571304094, "grad_norm": 0.38564881682395935, "learning_rate": 0.0001, "loss": 1.8121, "step": 2404 }, { "epoch": 0.27940749346500143, "grad_norm": 0.4316978454589844, "learning_rate": 0.0001, "loss": 1.6242, "step": 2405 }, { "epoch": 0.279523671216962, "grad_norm": 0.3853331208229065, "learning_rate": 0.0001, "loss": 1.6656, "step": 2406 }, { "epoch": 0.27963984896892247, "grad_norm": 0.3825344443321228, "learning_rate": 0.0001, "loss": 1.623, "step": 2407 }, { "epoch": 0.27975602672088296, "grad_norm": 0.3726043999195099, "learning_rate": 0.0001, "loss": 1.7153, "step": 2408 }, { "epoch": 0.27987220447284344, "grad_norm": 0.36002033948898315, "learning_rate": 0.0001, "loss": 1.4855, "step": 2409 }, { "epoch": 0.27998838222480393, "grad_norm": 0.388629674911499, "learning_rate": 0.0001, "loss": 1.6194, "step": 2410 }, { "epoch": 0.2801045599767644, "grad_norm": 0.3558464050292969, "learning_rate": 0.0001, "loss": 1.3596, "step": 2411 }, { "epoch": 0.28022073772872497, "grad_norm": 0.36634495854377747, "learning_rate": 0.0001, "loss": 1.6079, "step": 2412 }, { "epoch": 0.28033691548068546, "grad_norm": 0.36792120337486267, "learning_rate": 0.0001, "loss": 1.775, "step": 2413 }, { "epoch": 0.28045309323264594, "grad_norm": 0.35753536224365234, "learning_rate": 0.0001, "loss": 1.5954, "step": 2414 }, { "epoch": 0.28056927098460643, "grad_norm": 0.3687507212162018, "learning_rate": 0.0001, "loss": 1.6478, "step": 2415 }, { "epoch": 0.2806854487365669, "grad_norm": 0.35439378023147583, "learning_rate": 0.0001, "loss": 1.6115, "step": 2416 }, { "epoch": 0.28080162648852747, "grad_norm": 0.3909814655780792, "learning_rate": 0.0001, "loss": 1.843, "step": 2417 }, { "epoch": 0.28091780424048796, "grad_norm": 0.4006040096282959, "learning_rate": 0.0001, "loss": 1.7323, "step": 2418 }, { "epoch": 0.28103398199244845, "grad_norm": 0.3820417821407318, "learning_rate": 0.0001, "loss": 1.7021, "step": 2419 }, { "epoch": 0.28115015974440893, "grad_norm": 0.41113728284835815, "learning_rate": 0.0001, "loss": 1.6909, "step": 2420 }, { "epoch": 0.2812663374963694, "grad_norm": 0.37190499901771545, "learning_rate": 0.0001, "loss": 1.5947, "step": 2421 }, { "epoch": 0.28138251524832997, "grad_norm": 0.3800449073314667, "learning_rate": 0.0001, "loss": 1.699, "step": 2422 }, { "epoch": 0.28149869300029046, "grad_norm": 0.3811475932598114, "learning_rate": 0.0001, "loss": 1.7483, "step": 2423 }, { "epoch": 0.28161487075225095, "grad_norm": 0.3777383863925934, "learning_rate": 0.0001, "loss": 1.6842, "step": 2424 }, { "epoch": 0.28173104850421143, "grad_norm": 0.3724311590194702, "learning_rate": 0.0001, "loss": 1.7839, "step": 2425 }, { "epoch": 0.2818472262561719, "grad_norm": 0.38829484581947327, "learning_rate": 0.0001, "loss": 1.8328, "step": 2426 }, { "epoch": 0.28196340400813247, "grad_norm": 0.4069208800792694, "learning_rate": 0.0001, "loss": 1.873, "step": 2427 }, { "epoch": 0.28207958176009296, "grad_norm": 0.3797883093357086, "learning_rate": 0.0001, "loss": 1.6634, "step": 2428 }, { "epoch": 0.28219575951205345, "grad_norm": 0.3935191333293915, "learning_rate": 0.0001, "loss": 1.8087, "step": 2429 }, { "epoch": 0.28231193726401393, "grad_norm": 0.37056753039360046, "learning_rate": 0.0001, "loss": 1.7262, "step": 2430 }, { "epoch": 0.2824281150159744, "grad_norm": 0.3883243799209595, "learning_rate": 0.0001, "loss": 1.7572, "step": 2431 }, { "epoch": 0.28254429276793497, "grad_norm": 0.38896408677101135, "learning_rate": 0.0001, "loss": 1.605, "step": 2432 }, { "epoch": 0.28266047051989546, "grad_norm": 0.3767715096473694, "learning_rate": 0.0001, "loss": 1.7514, "step": 2433 }, { "epoch": 0.28277664827185595, "grad_norm": 0.36977216601371765, "learning_rate": 0.0001, "loss": 1.659, "step": 2434 }, { "epoch": 0.28289282602381643, "grad_norm": 0.38601481914520264, "learning_rate": 0.0001, "loss": 1.7051, "step": 2435 }, { "epoch": 0.2830090037757769, "grad_norm": 0.36568722128868103, "learning_rate": 0.0001, "loss": 1.6103, "step": 2436 }, { "epoch": 0.2831251815277374, "grad_norm": 0.3678687512874603, "learning_rate": 0.0001, "loss": 1.6256, "step": 2437 }, { "epoch": 0.28324135927969796, "grad_norm": 0.4139416813850403, "learning_rate": 0.0001, "loss": 1.7692, "step": 2438 }, { "epoch": 0.28335753703165845, "grad_norm": 0.3862224519252777, "learning_rate": 0.0001, "loss": 1.7017, "step": 2439 }, { "epoch": 0.28347371478361894, "grad_norm": 0.3898821771144867, "learning_rate": 0.0001, "loss": 1.6621, "step": 2440 }, { "epoch": 0.2835898925355794, "grad_norm": 0.35055509209632874, "learning_rate": 0.0001, "loss": 1.6043, "step": 2441 }, { "epoch": 0.2837060702875399, "grad_norm": 0.3779016137123108, "learning_rate": 0.0001, "loss": 1.5587, "step": 2442 }, { "epoch": 0.28382224803950046, "grad_norm": 0.38517260551452637, "learning_rate": 0.0001, "loss": 1.6497, "step": 2443 }, { "epoch": 0.28393842579146095, "grad_norm": 0.3903183043003082, "learning_rate": 0.0001, "loss": 1.7303, "step": 2444 }, { "epoch": 0.28405460354342144, "grad_norm": 0.41626691818237305, "learning_rate": 0.0001, "loss": 1.8199, "step": 2445 }, { "epoch": 0.2841707812953819, "grad_norm": 0.37726128101348877, "learning_rate": 0.0001, "loss": 1.7504, "step": 2446 }, { "epoch": 0.2842869590473424, "grad_norm": 0.3679426908493042, "learning_rate": 0.0001, "loss": 1.6864, "step": 2447 }, { "epoch": 0.28440313679930296, "grad_norm": 0.3660835921764374, "learning_rate": 0.0001, "loss": 1.7442, "step": 2448 }, { "epoch": 0.28451931455126345, "grad_norm": 0.39392563700675964, "learning_rate": 0.0001, "loss": 1.7503, "step": 2449 }, { "epoch": 0.28463549230322394, "grad_norm": 0.38993775844573975, "learning_rate": 0.0001, "loss": 1.6938, "step": 2450 }, { "epoch": 0.2847516700551844, "grad_norm": 0.3619860112667084, "learning_rate": 0.0001, "loss": 1.5416, "step": 2451 }, { "epoch": 0.2848678478071449, "grad_norm": 0.3857851028442383, "learning_rate": 0.0001, "loss": 1.6284, "step": 2452 }, { "epoch": 0.28498402555910546, "grad_norm": 0.3716614544391632, "learning_rate": 0.0001, "loss": 1.5245, "step": 2453 }, { "epoch": 0.28510020331106595, "grad_norm": 0.4253447949886322, "learning_rate": 0.0001, "loss": 1.7602, "step": 2454 }, { "epoch": 0.28521638106302644, "grad_norm": 0.39747342467308044, "learning_rate": 0.0001, "loss": 1.6925, "step": 2455 }, { "epoch": 0.2853325588149869, "grad_norm": 0.3874737322330475, "learning_rate": 0.0001, "loss": 1.763, "step": 2456 }, { "epoch": 0.2854487365669474, "grad_norm": 0.37462908029556274, "learning_rate": 0.0001, "loss": 1.6554, "step": 2457 }, { "epoch": 0.2855649143189079, "grad_norm": 0.4207324981689453, "learning_rate": 0.0001, "loss": 1.6978, "step": 2458 }, { "epoch": 0.28568109207086845, "grad_norm": 0.3763648569583893, "learning_rate": 0.0001, "loss": 1.6805, "step": 2459 }, { "epoch": 0.28579726982282894, "grad_norm": 0.40208232402801514, "learning_rate": 0.0001, "loss": 1.584, "step": 2460 }, { "epoch": 0.2859134475747894, "grad_norm": 0.3689945638179779, "learning_rate": 0.0001, "loss": 1.7245, "step": 2461 }, { "epoch": 0.2860296253267499, "grad_norm": 0.3570806086063385, "learning_rate": 0.0001, "loss": 1.493, "step": 2462 }, { "epoch": 0.2861458030787104, "grad_norm": 0.39865806698799133, "learning_rate": 0.0001, "loss": 1.888, "step": 2463 }, { "epoch": 0.28626198083067095, "grad_norm": 0.3934881389141083, "learning_rate": 0.0001, "loss": 1.6636, "step": 2464 }, { "epoch": 0.28637815858263144, "grad_norm": 0.38317766785621643, "learning_rate": 0.0001, "loss": 1.6417, "step": 2465 }, { "epoch": 0.2864943363345919, "grad_norm": 0.3968998193740845, "learning_rate": 0.0001, "loss": 1.7444, "step": 2466 }, { "epoch": 0.2866105140865524, "grad_norm": 0.3885013461112976, "learning_rate": 0.0001, "loss": 1.7969, "step": 2467 }, { "epoch": 0.2867266918385129, "grad_norm": 0.3675188720226288, "learning_rate": 0.0001, "loss": 1.6485, "step": 2468 }, { "epoch": 0.28684286959047345, "grad_norm": 0.39261195063591003, "learning_rate": 0.0001, "loss": 1.6632, "step": 2469 }, { "epoch": 0.28695904734243394, "grad_norm": 0.3737514615058899, "learning_rate": 0.0001, "loss": 1.6274, "step": 2470 }, { "epoch": 0.2870752250943944, "grad_norm": 0.3958921730518341, "learning_rate": 0.0001, "loss": 1.6123, "step": 2471 }, { "epoch": 0.2871914028463549, "grad_norm": 0.37590277194976807, "learning_rate": 0.0001, "loss": 1.723, "step": 2472 }, { "epoch": 0.2873075805983154, "grad_norm": 0.39498046040534973, "learning_rate": 0.0001, "loss": 1.6826, "step": 2473 }, { "epoch": 0.28742375835027595, "grad_norm": 0.35599952936172485, "learning_rate": 0.0001, "loss": 1.6085, "step": 2474 }, { "epoch": 0.28753993610223644, "grad_norm": 0.3650219440460205, "learning_rate": 0.0001, "loss": 1.5636, "step": 2475 }, { "epoch": 0.2876561138541969, "grad_norm": 0.3618330955505371, "learning_rate": 0.0001, "loss": 1.5798, "step": 2476 }, { "epoch": 0.2877722916061574, "grad_norm": 0.35137006640434265, "learning_rate": 0.0001, "loss": 1.6287, "step": 2477 }, { "epoch": 0.2878884693581179, "grad_norm": 0.3683086931705475, "learning_rate": 0.0001, "loss": 1.5807, "step": 2478 }, { "epoch": 0.2880046471100784, "grad_norm": 0.4060046374797821, "learning_rate": 0.0001, "loss": 1.6742, "step": 2479 }, { "epoch": 0.28812082486203894, "grad_norm": 0.39657512307167053, "learning_rate": 0.0001, "loss": 1.7616, "step": 2480 }, { "epoch": 0.2882370026139994, "grad_norm": 0.38853272795677185, "learning_rate": 0.0001, "loss": 1.7686, "step": 2481 }, { "epoch": 0.2883531803659599, "grad_norm": 0.39028510451316833, "learning_rate": 0.0001, "loss": 1.674, "step": 2482 }, { "epoch": 0.2884693581179204, "grad_norm": 0.3892669379711151, "learning_rate": 0.0001, "loss": 1.7145, "step": 2483 }, { "epoch": 0.2885855358698809, "grad_norm": 0.3654754161834717, "learning_rate": 0.0001, "loss": 1.663, "step": 2484 }, { "epoch": 0.28870171362184144, "grad_norm": 0.3528408110141754, "learning_rate": 0.0001, "loss": 1.4453, "step": 2485 }, { "epoch": 0.2888178913738019, "grad_norm": 0.4061609208583832, "learning_rate": 0.0001, "loss": 1.9495, "step": 2486 }, { "epoch": 0.2889340691257624, "grad_norm": 0.4418545663356781, "learning_rate": 0.0001, "loss": 1.6918, "step": 2487 }, { "epoch": 0.2890502468777229, "grad_norm": 0.3858497142791748, "learning_rate": 0.0001, "loss": 1.6543, "step": 2488 }, { "epoch": 0.2891664246296834, "grad_norm": 0.43995893001556396, "learning_rate": 0.0001, "loss": 1.7826, "step": 2489 }, { "epoch": 0.28928260238164394, "grad_norm": 0.3793260157108307, "learning_rate": 0.0001, "loss": 1.7453, "step": 2490 }, { "epoch": 0.2893987801336044, "grad_norm": 0.3741684556007385, "learning_rate": 0.0001, "loss": 1.6777, "step": 2491 }, { "epoch": 0.2895149578855649, "grad_norm": 0.3923763632774353, "learning_rate": 0.0001, "loss": 1.744, "step": 2492 }, { "epoch": 0.2896311356375254, "grad_norm": 0.3590010702610016, "learning_rate": 0.0001, "loss": 1.5432, "step": 2493 }, { "epoch": 0.2897473133894859, "grad_norm": 0.37135443091392517, "learning_rate": 0.0001, "loss": 1.6868, "step": 2494 }, { "epoch": 0.28986349114144644, "grad_norm": 0.36070337891578674, "learning_rate": 0.0001, "loss": 1.6938, "step": 2495 }, { "epoch": 0.2899796688934069, "grad_norm": 0.3781169652938843, "learning_rate": 0.0001, "loss": 1.5814, "step": 2496 }, { "epoch": 0.2900958466453674, "grad_norm": 0.3880577087402344, "learning_rate": 0.0001, "loss": 1.4875, "step": 2497 }, { "epoch": 0.2902120243973279, "grad_norm": 0.38813501596450806, "learning_rate": 0.0001, "loss": 1.4855, "step": 2498 }, { "epoch": 0.2903282021492884, "grad_norm": 0.4048740267753601, "learning_rate": 0.0001, "loss": 1.6034, "step": 2499 }, { "epoch": 0.2904443799012489, "grad_norm": 0.35958972573280334, "learning_rate": 0.0001, "loss": 1.5821, "step": 2500 }, { "epoch": 0.29056055765320943, "grad_norm": 0.38591668009757996, "learning_rate": 0.0001, "loss": 1.7163, "step": 2501 }, { "epoch": 0.2906767354051699, "grad_norm": 0.37877535820007324, "learning_rate": 0.0001, "loss": 1.6734, "step": 2502 }, { "epoch": 0.2907929131571304, "grad_norm": 0.39159923791885376, "learning_rate": 0.0001, "loss": 1.8899, "step": 2503 }, { "epoch": 0.2909090909090909, "grad_norm": 0.4107246994972229, "learning_rate": 0.0001, "loss": 1.644, "step": 2504 }, { "epoch": 0.2910252686610514, "grad_norm": 0.39638829231262207, "learning_rate": 0.0001, "loss": 1.719, "step": 2505 }, { "epoch": 0.29114144641301193, "grad_norm": 0.3886748254299164, "learning_rate": 0.0001, "loss": 1.6838, "step": 2506 }, { "epoch": 0.2912576241649724, "grad_norm": 0.34036189317703247, "learning_rate": 0.0001, "loss": 1.4912, "step": 2507 }, { "epoch": 0.2913738019169329, "grad_norm": 0.4072575867176056, "learning_rate": 0.0001, "loss": 1.7982, "step": 2508 }, { "epoch": 0.2914899796688934, "grad_norm": 0.38684821128845215, "learning_rate": 0.0001, "loss": 1.6675, "step": 2509 }, { "epoch": 0.2916061574208539, "grad_norm": 0.3691238462924957, "learning_rate": 0.0001, "loss": 1.5981, "step": 2510 }, { "epoch": 0.29172233517281443, "grad_norm": 0.39206942915916443, "learning_rate": 0.0001, "loss": 1.6901, "step": 2511 }, { "epoch": 0.2918385129247749, "grad_norm": 0.3901553452014923, "learning_rate": 0.0001, "loss": 1.7128, "step": 2512 }, { "epoch": 0.2919546906767354, "grad_norm": 0.37073972821235657, "learning_rate": 0.0001, "loss": 1.5624, "step": 2513 }, { "epoch": 0.2920708684286959, "grad_norm": 0.3903072476387024, "learning_rate": 0.0001, "loss": 1.5732, "step": 2514 }, { "epoch": 0.2921870461806564, "grad_norm": 0.4193265438079834, "learning_rate": 0.0001, "loss": 1.7321, "step": 2515 }, { "epoch": 0.29230322393261693, "grad_norm": 0.4069857895374298, "learning_rate": 0.0001, "loss": 1.7801, "step": 2516 }, { "epoch": 0.2924194016845774, "grad_norm": 0.40134382247924805, "learning_rate": 0.0001, "loss": 1.8263, "step": 2517 }, { "epoch": 0.2925355794365379, "grad_norm": 0.39630135893821716, "learning_rate": 0.0001, "loss": 1.7561, "step": 2518 }, { "epoch": 0.2926517571884984, "grad_norm": 0.37301602959632874, "learning_rate": 0.0001, "loss": 1.6711, "step": 2519 }, { "epoch": 0.2927679349404589, "grad_norm": 0.3810461759567261, "learning_rate": 0.0001, "loss": 1.6239, "step": 2520 }, { "epoch": 0.29288411269241943, "grad_norm": 0.36505237221717834, "learning_rate": 0.0001, "loss": 1.6731, "step": 2521 }, { "epoch": 0.2930002904443799, "grad_norm": 0.3974515199661255, "learning_rate": 0.0001, "loss": 1.7363, "step": 2522 }, { "epoch": 0.2931164681963404, "grad_norm": 0.3713068664073944, "learning_rate": 0.0001, "loss": 1.6443, "step": 2523 }, { "epoch": 0.2932326459483009, "grad_norm": 0.4444441497325897, "learning_rate": 0.0001, "loss": 1.9543, "step": 2524 }, { "epoch": 0.2933488237002614, "grad_norm": 0.37668412923812866, "learning_rate": 0.0001, "loss": 1.7126, "step": 2525 }, { "epoch": 0.2934650014522219, "grad_norm": 0.40829259157180786, "learning_rate": 0.0001, "loss": 1.8668, "step": 2526 }, { "epoch": 0.2935811792041824, "grad_norm": 0.3872903287410736, "learning_rate": 0.0001, "loss": 1.5966, "step": 2527 }, { "epoch": 0.2936973569561429, "grad_norm": 0.4001368284225464, "learning_rate": 0.0001, "loss": 1.7448, "step": 2528 }, { "epoch": 0.2938135347081034, "grad_norm": 0.36894676089286804, "learning_rate": 0.0001, "loss": 1.6215, "step": 2529 }, { "epoch": 0.2939297124600639, "grad_norm": 0.4085611402988434, "learning_rate": 0.0001, "loss": 1.8068, "step": 2530 }, { "epoch": 0.2940458902120244, "grad_norm": 0.3942314386367798, "learning_rate": 0.0001, "loss": 1.6788, "step": 2531 }, { "epoch": 0.2941620679639849, "grad_norm": 0.3652056157588959, "learning_rate": 0.0001, "loss": 1.5036, "step": 2532 }, { "epoch": 0.2942782457159454, "grad_norm": 0.39049550890922546, "learning_rate": 0.0001, "loss": 1.7542, "step": 2533 }, { "epoch": 0.2943944234679059, "grad_norm": 0.38500627875328064, "learning_rate": 0.0001, "loss": 1.6621, "step": 2534 }, { "epoch": 0.2945106012198664, "grad_norm": 0.372928649187088, "learning_rate": 0.0001, "loss": 1.5971, "step": 2535 }, { "epoch": 0.2946267789718269, "grad_norm": 0.4132663607597351, "learning_rate": 0.0001, "loss": 1.7871, "step": 2536 }, { "epoch": 0.2947429567237874, "grad_norm": 0.41574445366859436, "learning_rate": 0.0001, "loss": 1.7713, "step": 2537 }, { "epoch": 0.2948591344757479, "grad_norm": 0.3948863744735718, "learning_rate": 0.0001, "loss": 1.6838, "step": 2538 }, { "epoch": 0.2949753122277084, "grad_norm": 0.3811478018760681, "learning_rate": 0.0001, "loss": 1.5245, "step": 2539 }, { "epoch": 0.2950914899796689, "grad_norm": 0.3667415678501129, "learning_rate": 0.0001, "loss": 1.7235, "step": 2540 }, { "epoch": 0.2952076677316294, "grad_norm": 0.41427987813949585, "learning_rate": 0.0001, "loss": 1.8357, "step": 2541 }, { "epoch": 0.2953238454835899, "grad_norm": 0.4144713282585144, "learning_rate": 0.0001, "loss": 1.7562, "step": 2542 }, { "epoch": 0.2954400232355504, "grad_norm": 0.3656145930290222, "learning_rate": 0.0001, "loss": 1.604, "step": 2543 }, { "epoch": 0.2955562009875109, "grad_norm": 0.3743351697921753, "learning_rate": 0.0001, "loss": 1.531, "step": 2544 }, { "epoch": 0.2956723787394714, "grad_norm": 0.4163815379142761, "learning_rate": 0.0001, "loss": 1.7352, "step": 2545 }, { "epoch": 0.2957885564914319, "grad_norm": 0.3973884582519531, "learning_rate": 0.0001, "loss": 1.8309, "step": 2546 }, { "epoch": 0.29590473424339236, "grad_norm": 0.4104848802089691, "learning_rate": 0.0001, "loss": 1.7723, "step": 2547 }, { "epoch": 0.2960209119953529, "grad_norm": 0.38057518005371094, "learning_rate": 0.0001, "loss": 1.5819, "step": 2548 }, { "epoch": 0.2961370897473134, "grad_norm": 0.3594435751438141, "learning_rate": 0.0001, "loss": 1.6023, "step": 2549 }, { "epoch": 0.2962532674992739, "grad_norm": 0.3822750747203827, "learning_rate": 0.0001, "loss": 1.5985, "step": 2550 }, { "epoch": 0.2963694452512344, "grad_norm": 0.4234026372432709, "learning_rate": 0.0001, "loss": 1.7732, "step": 2551 }, { "epoch": 0.29648562300319486, "grad_norm": 0.3969663083553314, "learning_rate": 0.0001, "loss": 1.6452, "step": 2552 }, { "epoch": 0.2966018007551554, "grad_norm": 0.36822569370269775, "learning_rate": 0.0001, "loss": 1.6158, "step": 2553 }, { "epoch": 0.2967179785071159, "grad_norm": 0.4209086298942566, "learning_rate": 0.0001, "loss": 1.9934, "step": 2554 }, { "epoch": 0.2968341562590764, "grad_norm": 0.42897096276283264, "learning_rate": 0.0001, "loss": 1.733, "step": 2555 }, { "epoch": 0.2969503340110369, "grad_norm": 0.36142057180404663, "learning_rate": 0.0001, "loss": 1.5931, "step": 2556 }, { "epoch": 0.29706651176299737, "grad_norm": 0.38266682624816895, "learning_rate": 0.0001, "loss": 1.6411, "step": 2557 }, { "epoch": 0.2971826895149579, "grad_norm": 0.3493916094303131, "learning_rate": 0.0001, "loss": 1.647, "step": 2558 }, { "epoch": 0.2972988672669184, "grad_norm": 0.3982014060020447, "learning_rate": 0.0001, "loss": 1.6872, "step": 2559 }, { "epoch": 0.2974150450188789, "grad_norm": 0.3709663450717926, "learning_rate": 0.0001, "loss": 1.7749, "step": 2560 }, { "epoch": 0.2975312227708394, "grad_norm": 0.3823295831680298, "learning_rate": 0.0001, "loss": 1.6854, "step": 2561 }, { "epoch": 0.29764740052279987, "grad_norm": 0.36725711822509766, "learning_rate": 0.0001, "loss": 1.5369, "step": 2562 }, { "epoch": 0.2977635782747604, "grad_norm": 0.407084584236145, "learning_rate": 0.0001, "loss": 1.7102, "step": 2563 }, { "epoch": 0.2978797560267209, "grad_norm": 0.3887426555156708, "learning_rate": 0.0001, "loss": 1.7267, "step": 2564 }, { "epoch": 0.2979959337786814, "grad_norm": 0.3916597068309784, "learning_rate": 0.0001, "loss": 1.6181, "step": 2565 }, { "epoch": 0.2981121115306419, "grad_norm": 0.3767833113670349, "learning_rate": 0.0001, "loss": 1.6224, "step": 2566 }, { "epoch": 0.29822828928260237, "grad_norm": 0.3760228753089905, "learning_rate": 0.0001, "loss": 1.577, "step": 2567 }, { "epoch": 0.29834446703456285, "grad_norm": 0.3806806802749634, "learning_rate": 0.0001, "loss": 1.5708, "step": 2568 }, { "epoch": 0.2984606447865234, "grad_norm": 0.38834068179130554, "learning_rate": 0.0001, "loss": 1.735, "step": 2569 }, { "epoch": 0.2985768225384839, "grad_norm": 0.4086969196796417, "learning_rate": 0.0001, "loss": 1.7382, "step": 2570 }, { "epoch": 0.2986930002904444, "grad_norm": 0.3873027265071869, "learning_rate": 0.0001, "loss": 1.6157, "step": 2571 }, { "epoch": 0.29880917804240487, "grad_norm": 0.38422465324401855, "learning_rate": 0.0001, "loss": 1.7938, "step": 2572 }, { "epoch": 0.29892535579436535, "grad_norm": 0.3803096115589142, "learning_rate": 0.0001, "loss": 1.7387, "step": 2573 }, { "epoch": 0.2990415335463259, "grad_norm": 0.41523873805999756, "learning_rate": 0.0001, "loss": 1.6704, "step": 2574 }, { "epoch": 0.2991577112982864, "grad_norm": 0.4138016104698181, "learning_rate": 0.0001, "loss": 1.9156, "step": 2575 }, { "epoch": 0.2992738890502469, "grad_norm": 0.3932843506336212, "learning_rate": 0.0001, "loss": 1.8139, "step": 2576 }, { "epoch": 0.29939006680220737, "grad_norm": 0.37751099467277527, "learning_rate": 0.0001, "loss": 1.642, "step": 2577 }, { "epoch": 0.29950624455416786, "grad_norm": 0.37788084149360657, "learning_rate": 0.0001, "loss": 1.6342, "step": 2578 }, { "epoch": 0.2996224223061284, "grad_norm": 0.381740927696228, "learning_rate": 0.0001, "loss": 1.6664, "step": 2579 }, { "epoch": 0.2997386000580889, "grad_norm": 0.38412848114967346, "learning_rate": 0.0001, "loss": 1.6217, "step": 2580 }, { "epoch": 0.2998547778100494, "grad_norm": 0.3746092617511749, "learning_rate": 0.0001, "loss": 1.5903, "step": 2581 }, { "epoch": 0.29997095556200987, "grad_norm": 0.3701321482658386, "learning_rate": 0.0001, "loss": 1.7027, "step": 2582 }, { "epoch": 0.30008713331397036, "grad_norm": 0.3820204436779022, "learning_rate": 0.0001, "loss": 1.6287, "step": 2583 }, { "epoch": 0.3002033110659309, "grad_norm": 0.3748900592327118, "learning_rate": 0.0001, "loss": 1.6212, "step": 2584 }, { "epoch": 0.3003194888178914, "grad_norm": 0.3851977288722992, "learning_rate": 0.0001, "loss": 1.7262, "step": 2585 }, { "epoch": 0.3004356665698519, "grad_norm": 0.3978486955165863, "learning_rate": 0.0001, "loss": 1.8482, "step": 2586 }, { "epoch": 0.30055184432181237, "grad_norm": 0.402482807636261, "learning_rate": 0.0001, "loss": 1.8079, "step": 2587 }, { "epoch": 0.30066802207377286, "grad_norm": 0.41250428557395935, "learning_rate": 0.0001, "loss": 1.7092, "step": 2588 }, { "epoch": 0.30078419982573334, "grad_norm": 0.4030408561229706, "learning_rate": 0.0001, "loss": 1.8394, "step": 2589 }, { "epoch": 0.3009003775776939, "grad_norm": 0.3963468670845032, "learning_rate": 0.0001, "loss": 1.6341, "step": 2590 }, { "epoch": 0.3010165553296544, "grad_norm": 0.37878894805908203, "learning_rate": 0.0001, "loss": 1.6902, "step": 2591 }, { "epoch": 0.30113273308161487, "grad_norm": 0.3783760666847229, "learning_rate": 0.0001, "loss": 1.7441, "step": 2592 }, { "epoch": 0.30124891083357536, "grad_norm": 0.3853001892566681, "learning_rate": 0.0001, "loss": 1.8408, "step": 2593 }, { "epoch": 0.30136508858553585, "grad_norm": 0.37395623326301575, "learning_rate": 0.0001, "loss": 1.6941, "step": 2594 }, { "epoch": 0.3014812663374964, "grad_norm": 0.3772829473018646, "learning_rate": 0.0001, "loss": 1.6901, "step": 2595 }, { "epoch": 0.3015974440894569, "grad_norm": 0.3889354467391968, "learning_rate": 0.0001, "loss": 1.7094, "step": 2596 }, { "epoch": 0.30171362184141737, "grad_norm": 0.4137793481349945, "learning_rate": 0.0001, "loss": 1.5989, "step": 2597 }, { "epoch": 0.30182979959337786, "grad_norm": 0.37697646021842957, "learning_rate": 0.0001, "loss": 1.6379, "step": 2598 }, { "epoch": 0.30194597734533835, "grad_norm": 0.3857576847076416, "learning_rate": 0.0001, "loss": 1.6224, "step": 2599 }, { "epoch": 0.3020621550972989, "grad_norm": 0.41573137044906616, "learning_rate": 0.0001, "loss": 1.8468, "step": 2600 }, { "epoch": 0.3021783328492594, "grad_norm": 0.3891277313232422, "learning_rate": 0.0001, "loss": 1.6498, "step": 2601 }, { "epoch": 0.30229451060121987, "grad_norm": 0.3497909605503082, "learning_rate": 0.0001, "loss": 1.398, "step": 2602 }, { "epoch": 0.30241068835318036, "grad_norm": 0.3606802523136139, "learning_rate": 0.0001, "loss": 1.5158, "step": 2603 }, { "epoch": 0.30252686610514085, "grad_norm": 0.4066804349422455, "learning_rate": 0.0001, "loss": 1.7463, "step": 2604 }, { "epoch": 0.3026430438571014, "grad_norm": 0.4184049367904663, "learning_rate": 0.0001, "loss": 1.7302, "step": 2605 }, { "epoch": 0.3027592216090619, "grad_norm": 0.3844599723815918, "learning_rate": 0.0001, "loss": 1.7257, "step": 2606 }, { "epoch": 0.30287539936102237, "grad_norm": 0.4283202290534973, "learning_rate": 0.0001, "loss": 1.8115, "step": 2607 }, { "epoch": 0.30299157711298286, "grad_norm": 0.4154917895793915, "learning_rate": 0.0001, "loss": 1.7869, "step": 2608 }, { "epoch": 0.30310775486494335, "grad_norm": 0.4059176445007324, "learning_rate": 0.0001, "loss": 1.8573, "step": 2609 }, { "epoch": 0.3032239326169039, "grad_norm": 0.38833245635032654, "learning_rate": 0.0001, "loss": 1.6275, "step": 2610 }, { "epoch": 0.3033401103688644, "grad_norm": 0.3859376013278961, "learning_rate": 0.0001, "loss": 1.7628, "step": 2611 }, { "epoch": 0.30345628812082487, "grad_norm": 0.3899228572845459, "learning_rate": 0.0001, "loss": 1.6193, "step": 2612 }, { "epoch": 0.30357246587278536, "grad_norm": 0.38399478793144226, "learning_rate": 0.0001, "loss": 1.6718, "step": 2613 }, { "epoch": 0.30368864362474585, "grad_norm": 0.440913587808609, "learning_rate": 0.0001, "loss": 1.8794, "step": 2614 }, { "epoch": 0.30380482137670634, "grad_norm": 0.3857106864452362, "learning_rate": 0.0001, "loss": 1.7513, "step": 2615 }, { "epoch": 0.3039209991286669, "grad_norm": 0.3604831099510193, "learning_rate": 0.0001, "loss": 1.6266, "step": 2616 }, { "epoch": 0.30403717688062737, "grad_norm": 0.3680822551250458, "learning_rate": 0.0001, "loss": 1.6749, "step": 2617 }, { "epoch": 0.30415335463258786, "grad_norm": 0.3898998498916626, "learning_rate": 0.0001, "loss": 1.7768, "step": 2618 }, { "epoch": 0.30426953238454835, "grad_norm": 0.3705196678638458, "learning_rate": 0.0001, "loss": 1.6788, "step": 2619 }, { "epoch": 0.30438571013650884, "grad_norm": 0.3996119797229767, "learning_rate": 0.0001, "loss": 1.7945, "step": 2620 }, { "epoch": 0.3045018878884694, "grad_norm": 0.391072541475296, "learning_rate": 0.0001, "loss": 1.6452, "step": 2621 }, { "epoch": 0.30461806564042987, "grad_norm": 0.5729893445968628, "learning_rate": 0.0001, "loss": 1.6585, "step": 2622 }, { "epoch": 0.30473424339239036, "grad_norm": 0.38251304626464844, "learning_rate": 0.0001, "loss": 1.6424, "step": 2623 }, { "epoch": 0.30485042114435085, "grad_norm": 0.3925994634628296, "learning_rate": 0.0001, "loss": 1.7669, "step": 2624 }, { "epoch": 0.30496659889631134, "grad_norm": 0.3850267827510834, "learning_rate": 0.0001, "loss": 1.7181, "step": 2625 }, { "epoch": 0.3050827766482719, "grad_norm": 0.37147024273872375, "learning_rate": 0.0001, "loss": 1.6673, "step": 2626 }, { "epoch": 0.30519895440023237, "grad_norm": 0.41201236844062805, "learning_rate": 0.0001, "loss": 1.7335, "step": 2627 }, { "epoch": 0.30531513215219286, "grad_norm": 0.4249459505081177, "learning_rate": 0.0001, "loss": 1.8026, "step": 2628 }, { "epoch": 0.30543130990415335, "grad_norm": 0.37525978684425354, "learning_rate": 0.0001, "loss": 1.7273, "step": 2629 }, { "epoch": 0.30554748765611384, "grad_norm": 0.3670780658721924, "learning_rate": 0.0001, "loss": 1.6659, "step": 2630 }, { "epoch": 0.3056636654080744, "grad_norm": 0.37397563457489014, "learning_rate": 0.0001, "loss": 1.7848, "step": 2631 }, { "epoch": 0.30577984316003487, "grad_norm": 0.35987600684165955, "learning_rate": 0.0001, "loss": 1.6362, "step": 2632 }, { "epoch": 0.30589602091199536, "grad_norm": 0.3674522340297699, "learning_rate": 0.0001, "loss": 1.6985, "step": 2633 }, { "epoch": 0.30601219866395585, "grad_norm": 0.3551689684391022, "learning_rate": 0.0001, "loss": 1.4699, "step": 2634 }, { "epoch": 0.30612837641591634, "grad_norm": 0.39974987506866455, "learning_rate": 0.0001, "loss": 1.7497, "step": 2635 }, { "epoch": 0.3062445541678768, "grad_norm": 0.37489983439445496, "learning_rate": 0.0001, "loss": 1.6122, "step": 2636 }, { "epoch": 0.30636073191983737, "grad_norm": 0.38141462206840515, "learning_rate": 0.0001, "loss": 1.725, "step": 2637 }, { "epoch": 0.30647690967179786, "grad_norm": 0.38110142946243286, "learning_rate": 0.0001, "loss": 1.7031, "step": 2638 }, { "epoch": 0.30659308742375835, "grad_norm": 0.3580850064754486, "learning_rate": 0.0001, "loss": 1.5362, "step": 2639 }, { "epoch": 0.30670926517571884, "grad_norm": 0.4292284846305847, "learning_rate": 0.0001, "loss": 1.7091, "step": 2640 }, { "epoch": 0.3068254429276793, "grad_norm": 0.37014874815940857, "learning_rate": 0.0001, "loss": 1.665, "step": 2641 }, { "epoch": 0.30694162067963987, "grad_norm": 0.36894306540489197, "learning_rate": 0.0001, "loss": 1.5296, "step": 2642 }, { "epoch": 0.30705779843160036, "grad_norm": 0.37392929196357727, "learning_rate": 0.0001, "loss": 1.5448, "step": 2643 }, { "epoch": 0.30717397618356085, "grad_norm": 0.3983522653579712, "learning_rate": 0.0001, "loss": 1.5856, "step": 2644 }, { "epoch": 0.30729015393552134, "grad_norm": 0.3379395604133606, "learning_rate": 0.0001, "loss": 1.4932, "step": 2645 }, { "epoch": 0.3074063316874818, "grad_norm": 0.35767263174057007, "learning_rate": 0.0001, "loss": 1.6182, "step": 2646 }, { "epoch": 0.30752250943944237, "grad_norm": 0.3817872405052185, "learning_rate": 0.0001, "loss": 1.7924, "step": 2647 }, { "epoch": 0.30763868719140286, "grad_norm": 0.3904268741607666, "learning_rate": 0.0001, "loss": 1.6342, "step": 2648 }, { "epoch": 0.30775486494336335, "grad_norm": 0.37173137068748474, "learning_rate": 0.0001, "loss": 1.5885, "step": 2649 }, { "epoch": 0.30787104269532384, "grad_norm": 0.3717529773712158, "learning_rate": 0.0001, "loss": 1.6096, "step": 2650 }, { "epoch": 0.3079872204472843, "grad_norm": 0.3810470998287201, "learning_rate": 0.0001, "loss": 1.6477, "step": 2651 }, { "epoch": 0.30810339819924487, "grad_norm": 0.3781997263431549, "learning_rate": 0.0001, "loss": 1.5476, "step": 2652 }, { "epoch": 0.30821957595120536, "grad_norm": 0.4403388798236847, "learning_rate": 0.0001, "loss": 1.7052, "step": 2653 }, { "epoch": 0.30833575370316585, "grad_norm": 0.3832712769508362, "learning_rate": 0.0001, "loss": 1.7543, "step": 2654 }, { "epoch": 0.30845193145512634, "grad_norm": 0.3549423813819885, "learning_rate": 0.0001, "loss": 1.4284, "step": 2655 }, { "epoch": 0.3085681092070868, "grad_norm": 0.37209948897361755, "learning_rate": 0.0001, "loss": 1.6505, "step": 2656 }, { "epoch": 0.3086842869590473, "grad_norm": 0.39987820386886597, "learning_rate": 0.0001, "loss": 1.6999, "step": 2657 }, { "epoch": 0.30880046471100786, "grad_norm": 0.40743595361709595, "learning_rate": 0.0001, "loss": 1.8868, "step": 2658 }, { "epoch": 0.30891664246296835, "grad_norm": 0.41529178619384766, "learning_rate": 0.0001, "loss": 1.6629, "step": 2659 }, { "epoch": 0.30903282021492884, "grad_norm": 0.420540988445282, "learning_rate": 0.0001, "loss": 1.6699, "step": 2660 }, { "epoch": 0.3091489979668893, "grad_norm": 0.38582152128219604, "learning_rate": 0.0001, "loss": 1.8365, "step": 2661 }, { "epoch": 0.3092651757188498, "grad_norm": 0.382856547832489, "learning_rate": 0.0001, "loss": 1.7678, "step": 2662 }, { "epoch": 0.30938135347081036, "grad_norm": 0.3981390595436096, "learning_rate": 0.0001, "loss": 1.7592, "step": 2663 }, { "epoch": 0.30949753122277085, "grad_norm": 0.4064244031906128, "learning_rate": 0.0001, "loss": 1.783, "step": 2664 }, { "epoch": 0.30961370897473134, "grad_norm": 0.3797922730445862, "learning_rate": 0.0001, "loss": 1.6889, "step": 2665 }, { "epoch": 0.3097298867266918, "grad_norm": 0.366719514131546, "learning_rate": 0.0001, "loss": 1.5794, "step": 2666 }, { "epoch": 0.3098460644786523, "grad_norm": 0.3816395103931427, "learning_rate": 0.0001, "loss": 1.7305, "step": 2667 }, { "epoch": 0.30996224223061286, "grad_norm": 0.3834119141101837, "learning_rate": 0.0001, "loss": 1.6341, "step": 2668 }, { "epoch": 0.31007841998257335, "grad_norm": 0.3797139525413513, "learning_rate": 0.0001, "loss": 1.683, "step": 2669 }, { "epoch": 0.31019459773453384, "grad_norm": 0.3854397237300873, "learning_rate": 0.0001, "loss": 1.7163, "step": 2670 }, { "epoch": 0.31031077548649433, "grad_norm": 0.3634074330329895, "learning_rate": 0.0001, "loss": 1.6188, "step": 2671 }, { "epoch": 0.3104269532384548, "grad_norm": 0.37497079372406006, "learning_rate": 0.0001, "loss": 1.6898, "step": 2672 }, { "epoch": 0.31054313099041536, "grad_norm": 0.39349761605262756, "learning_rate": 0.0001, "loss": 1.6077, "step": 2673 }, { "epoch": 0.31065930874237585, "grad_norm": 0.4020683765411377, "learning_rate": 0.0001, "loss": 1.6122, "step": 2674 }, { "epoch": 0.31077548649433634, "grad_norm": 0.4030044376850128, "learning_rate": 0.0001, "loss": 1.7939, "step": 2675 }, { "epoch": 0.31089166424629683, "grad_norm": 0.3698487877845764, "learning_rate": 0.0001, "loss": 1.5568, "step": 2676 }, { "epoch": 0.3110078419982573, "grad_norm": 0.39510759711265564, "learning_rate": 0.0001, "loss": 1.4885, "step": 2677 }, { "epoch": 0.3111240197502178, "grad_norm": 0.3986801207065582, "learning_rate": 0.0001, "loss": 1.6593, "step": 2678 }, { "epoch": 0.31124019750217835, "grad_norm": 0.39601030945777893, "learning_rate": 0.0001, "loss": 1.7573, "step": 2679 }, { "epoch": 0.31135637525413884, "grad_norm": 0.42814406752586365, "learning_rate": 0.0001, "loss": 1.7842, "step": 2680 }, { "epoch": 0.31147255300609933, "grad_norm": 0.39685603976249695, "learning_rate": 0.0001, "loss": 1.886, "step": 2681 }, { "epoch": 0.3115887307580598, "grad_norm": 0.4117811322212219, "learning_rate": 0.0001, "loss": 1.7367, "step": 2682 }, { "epoch": 0.3117049085100203, "grad_norm": 0.40006887912750244, "learning_rate": 0.0001, "loss": 1.7719, "step": 2683 }, { "epoch": 0.31182108626198085, "grad_norm": 0.4050239026546478, "learning_rate": 0.0001, "loss": 1.6554, "step": 2684 }, { "epoch": 0.31193726401394134, "grad_norm": 0.38189923763275146, "learning_rate": 0.0001, "loss": 1.6241, "step": 2685 }, { "epoch": 0.31205344176590183, "grad_norm": 0.4016052484512329, "learning_rate": 0.0001, "loss": 1.6297, "step": 2686 }, { "epoch": 0.3121696195178623, "grad_norm": 0.37489327788352966, "learning_rate": 0.0001, "loss": 1.6032, "step": 2687 }, { "epoch": 0.3122857972698228, "grad_norm": 0.3983065187931061, "learning_rate": 0.0001, "loss": 1.5249, "step": 2688 }, { "epoch": 0.31240197502178335, "grad_norm": 0.40546756982803345, "learning_rate": 0.0001, "loss": 1.7134, "step": 2689 }, { "epoch": 0.31251815277374384, "grad_norm": 0.37291330099105835, "learning_rate": 0.0001, "loss": 1.6283, "step": 2690 }, { "epoch": 0.31263433052570433, "grad_norm": 0.3653269410133362, "learning_rate": 0.0001, "loss": 1.5474, "step": 2691 }, { "epoch": 0.3127505082776648, "grad_norm": 0.3700310289859772, "learning_rate": 0.0001, "loss": 1.6161, "step": 2692 }, { "epoch": 0.3128666860296253, "grad_norm": 0.38666659593582153, "learning_rate": 0.0001, "loss": 1.6764, "step": 2693 }, { "epoch": 0.31298286378158585, "grad_norm": 0.3867506980895996, "learning_rate": 0.0001, "loss": 1.7236, "step": 2694 }, { "epoch": 0.31309904153354634, "grad_norm": 0.38117802143096924, "learning_rate": 0.0001, "loss": 1.7902, "step": 2695 }, { "epoch": 0.31321521928550683, "grad_norm": 0.403726726770401, "learning_rate": 0.0001, "loss": 1.8469, "step": 2696 }, { "epoch": 0.3133313970374673, "grad_norm": 0.3811923861503601, "learning_rate": 0.0001, "loss": 1.6764, "step": 2697 }, { "epoch": 0.3134475747894278, "grad_norm": 0.3940551280975342, "learning_rate": 0.0001, "loss": 1.6292, "step": 2698 }, { "epoch": 0.3135637525413883, "grad_norm": 0.3885653614997864, "learning_rate": 0.0001, "loss": 1.7254, "step": 2699 }, { "epoch": 0.31367993029334884, "grad_norm": 0.36595413088798523, "learning_rate": 0.0001, "loss": 1.7143, "step": 2700 }, { "epoch": 0.31379610804530933, "grad_norm": 0.38449594378471375, "learning_rate": 0.0001, "loss": 1.7474, "step": 2701 }, { "epoch": 0.3139122857972698, "grad_norm": 0.3768537640571594, "learning_rate": 0.0001, "loss": 1.6631, "step": 2702 }, { "epoch": 0.3140284635492303, "grad_norm": 0.3723905384540558, "learning_rate": 0.0001, "loss": 1.6857, "step": 2703 }, { "epoch": 0.3141446413011908, "grad_norm": 0.3800404667854309, "learning_rate": 0.0001, "loss": 1.653, "step": 2704 }, { "epoch": 0.31426081905315134, "grad_norm": 0.38999584317207336, "learning_rate": 0.0001, "loss": 1.6284, "step": 2705 }, { "epoch": 0.31437699680511183, "grad_norm": 0.40568429231643677, "learning_rate": 0.0001, "loss": 1.6276, "step": 2706 }, { "epoch": 0.3144931745570723, "grad_norm": 0.3945614993572235, "learning_rate": 0.0001, "loss": 1.6298, "step": 2707 }, { "epoch": 0.3146093523090328, "grad_norm": 0.36772727966308594, "learning_rate": 0.0001, "loss": 1.5668, "step": 2708 }, { "epoch": 0.3147255300609933, "grad_norm": 0.38640451431274414, "learning_rate": 0.0001, "loss": 1.677, "step": 2709 }, { "epoch": 0.31484170781295384, "grad_norm": 0.3605159819126129, "learning_rate": 0.0001, "loss": 1.4943, "step": 2710 }, { "epoch": 0.31495788556491433, "grad_norm": 0.38972237706184387, "learning_rate": 0.0001, "loss": 1.801, "step": 2711 }, { "epoch": 0.3150740633168748, "grad_norm": 0.44629716873168945, "learning_rate": 0.0001, "loss": 1.7858, "step": 2712 }, { "epoch": 0.3151902410688353, "grad_norm": 0.39029544591903687, "learning_rate": 0.0001, "loss": 1.7399, "step": 2713 }, { "epoch": 0.3153064188207958, "grad_norm": 0.37057486176490784, "learning_rate": 0.0001, "loss": 1.5732, "step": 2714 }, { "epoch": 0.31542259657275634, "grad_norm": 0.38961511850357056, "learning_rate": 0.0001, "loss": 1.7939, "step": 2715 }, { "epoch": 0.31553877432471683, "grad_norm": 0.3788926899433136, "learning_rate": 0.0001, "loss": 1.5609, "step": 2716 }, { "epoch": 0.3156549520766773, "grad_norm": 0.3727104961872101, "learning_rate": 0.0001, "loss": 1.5085, "step": 2717 }, { "epoch": 0.3157711298286378, "grad_norm": 0.38792872428894043, "learning_rate": 0.0001, "loss": 1.5825, "step": 2718 }, { "epoch": 0.3158873075805983, "grad_norm": 0.39093390107154846, "learning_rate": 0.0001, "loss": 1.5953, "step": 2719 }, { "epoch": 0.31600348533255884, "grad_norm": 0.39481261372566223, "learning_rate": 0.0001, "loss": 1.7409, "step": 2720 }, { "epoch": 0.31611966308451933, "grad_norm": 0.39427947998046875, "learning_rate": 0.0001, "loss": 1.7673, "step": 2721 }, { "epoch": 0.3162358408364798, "grad_norm": 0.3655182719230652, "learning_rate": 0.0001, "loss": 1.509, "step": 2722 }, { "epoch": 0.3163520185884403, "grad_norm": 0.4002794921398163, "learning_rate": 0.0001, "loss": 1.7199, "step": 2723 }, { "epoch": 0.3164681963404008, "grad_norm": 0.4090864360332489, "learning_rate": 0.0001, "loss": 1.7553, "step": 2724 }, { "epoch": 0.3165843740923613, "grad_norm": 0.3690468370914459, "learning_rate": 0.0001, "loss": 1.6284, "step": 2725 }, { "epoch": 0.31670055184432183, "grad_norm": 0.3945186734199524, "learning_rate": 0.0001, "loss": 1.6574, "step": 2726 }, { "epoch": 0.3168167295962823, "grad_norm": 0.3898504972457886, "learning_rate": 0.0001, "loss": 1.6971, "step": 2727 }, { "epoch": 0.3169329073482428, "grad_norm": 0.38657867908477783, "learning_rate": 0.0001, "loss": 1.8112, "step": 2728 }, { "epoch": 0.3170490851002033, "grad_norm": 0.39374595880508423, "learning_rate": 0.0001, "loss": 1.6534, "step": 2729 }, { "epoch": 0.3171652628521638, "grad_norm": 0.3979948163032532, "learning_rate": 0.0001, "loss": 1.6881, "step": 2730 }, { "epoch": 0.31728144060412433, "grad_norm": 0.3945339620113373, "learning_rate": 0.0001, "loss": 1.7315, "step": 2731 }, { "epoch": 0.3173976183560848, "grad_norm": 0.4169275760650635, "learning_rate": 0.0001, "loss": 1.7789, "step": 2732 }, { "epoch": 0.3175137961080453, "grad_norm": 0.37716567516326904, "learning_rate": 0.0001, "loss": 1.5534, "step": 2733 }, { "epoch": 0.3176299738600058, "grad_norm": 0.38198474049568176, "learning_rate": 0.0001, "loss": 1.7881, "step": 2734 }, { "epoch": 0.3177461516119663, "grad_norm": 0.3758987486362457, "learning_rate": 0.0001, "loss": 1.5993, "step": 2735 }, { "epoch": 0.31786232936392683, "grad_norm": 0.4033893644809723, "learning_rate": 0.0001, "loss": 1.567, "step": 2736 }, { "epoch": 0.3179785071158873, "grad_norm": 0.3776089549064636, "learning_rate": 0.0001, "loss": 1.614, "step": 2737 }, { "epoch": 0.3180946848678478, "grad_norm": 0.38801753520965576, "learning_rate": 0.0001, "loss": 1.6007, "step": 2738 }, { "epoch": 0.3182108626198083, "grad_norm": 0.3814786970615387, "learning_rate": 0.0001, "loss": 1.4875, "step": 2739 }, { "epoch": 0.3183270403717688, "grad_norm": 0.35588279366493225, "learning_rate": 0.0001, "loss": 1.5519, "step": 2740 }, { "epoch": 0.31844321812372933, "grad_norm": 0.378668874502182, "learning_rate": 0.0001, "loss": 1.6447, "step": 2741 }, { "epoch": 0.3185593958756898, "grad_norm": 0.4211976230144501, "learning_rate": 0.0001, "loss": 1.882, "step": 2742 }, { "epoch": 0.3186755736276503, "grad_norm": 0.36686578392982483, "learning_rate": 0.0001, "loss": 1.5626, "step": 2743 }, { "epoch": 0.3187917513796108, "grad_norm": 0.37785688042640686, "learning_rate": 0.0001, "loss": 1.5346, "step": 2744 }, { "epoch": 0.3189079291315713, "grad_norm": 0.3932788372039795, "learning_rate": 0.0001, "loss": 1.7125, "step": 2745 }, { "epoch": 0.3190241068835318, "grad_norm": 0.3749261200428009, "learning_rate": 0.0001, "loss": 1.7368, "step": 2746 }, { "epoch": 0.3191402846354923, "grad_norm": 0.36720603704452515, "learning_rate": 0.0001, "loss": 1.6742, "step": 2747 }, { "epoch": 0.3192564623874528, "grad_norm": 0.38770416378974915, "learning_rate": 0.0001, "loss": 1.6792, "step": 2748 }, { "epoch": 0.3193726401394133, "grad_norm": 0.42165276408195496, "learning_rate": 0.0001, "loss": 1.8394, "step": 2749 }, { "epoch": 0.3194888178913738, "grad_norm": 0.3547378480434418, "learning_rate": 0.0001, "loss": 1.7143, "step": 2750 }, { "epoch": 0.3196049956433343, "grad_norm": 0.3825453519821167, "learning_rate": 0.0001, "loss": 1.5718, "step": 2751 }, { "epoch": 0.3197211733952948, "grad_norm": 0.42200520634651184, "learning_rate": 0.0001, "loss": 1.6849, "step": 2752 }, { "epoch": 0.3198373511472553, "grad_norm": 0.3694903552532196, "learning_rate": 0.0001, "loss": 1.6634, "step": 2753 }, { "epoch": 0.3199535288992158, "grad_norm": 0.38652074337005615, "learning_rate": 0.0001, "loss": 1.673, "step": 2754 }, { "epoch": 0.3200697066511763, "grad_norm": 0.4044737219810486, "learning_rate": 0.0001, "loss": 1.442, "step": 2755 }, { "epoch": 0.3201858844031368, "grad_norm": 0.3883349597454071, "learning_rate": 0.0001, "loss": 1.5372, "step": 2756 }, { "epoch": 0.3203020621550973, "grad_norm": 0.3901846408843994, "learning_rate": 0.0001, "loss": 1.8362, "step": 2757 }, { "epoch": 0.3204182399070578, "grad_norm": 0.37919488549232483, "learning_rate": 0.0001, "loss": 1.6951, "step": 2758 }, { "epoch": 0.3205344176590183, "grad_norm": 0.36535388231277466, "learning_rate": 0.0001, "loss": 1.5061, "step": 2759 }, { "epoch": 0.3206505954109788, "grad_norm": 0.3907409608364105, "learning_rate": 0.0001, "loss": 1.717, "step": 2760 }, { "epoch": 0.3207667731629393, "grad_norm": 0.39692288637161255, "learning_rate": 0.0001, "loss": 1.6884, "step": 2761 }, { "epoch": 0.3208829509148998, "grad_norm": 0.431797593832016, "learning_rate": 0.0001, "loss": 1.6013, "step": 2762 }, { "epoch": 0.3209991286668603, "grad_norm": 0.38208791613578796, "learning_rate": 0.0001, "loss": 1.5427, "step": 2763 }, { "epoch": 0.3211153064188208, "grad_norm": 0.40455830097198486, "learning_rate": 0.0001, "loss": 1.6036, "step": 2764 }, { "epoch": 0.3212314841707813, "grad_norm": 0.3595045506954193, "learning_rate": 0.0001, "loss": 1.53, "step": 2765 }, { "epoch": 0.3213476619227418, "grad_norm": 0.42558008432388306, "learning_rate": 0.0001, "loss": 1.7101, "step": 2766 }, { "epoch": 0.32146383967470227, "grad_norm": 0.39539337158203125, "learning_rate": 0.0001, "loss": 1.5166, "step": 2767 }, { "epoch": 0.3215800174266628, "grad_norm": 0.3601813316345215, "learning_rate": 0.0001, "loss": 1.4853, "step": 2768 }, { "epoch": 0.3216961951786233, "grad_norm": 0.3683129847049713, "learning_rate": 0.0001, "loss": 1.6322, "step": 2769 }, { "epoch": 0.3218123729305838, "grad_norm": 0.4206804633140564, "learning_rate": 0.0001, "loss": 1.7475, "step": 2770 }, { "epoch": 0.3219285506825443, "grad_norm": 0.3908296525478363, "learning_rate": 0.0001, "loss": 1.6926, "step": 2771 }, { "epoch": 0.32204472843450477, "grad_norm": 0.38299795985221863, "learning_rate": 0.0001, "loss": 1.5537, "step": 2772 }, { "epoch": 0.3221609061864653, "grad_norm": 0.41059038043022156, "learning_rate": 0.0001, "loss": 1.6275, "step": 2773 }, { "epoch": 0.3222770839384258, "grad_norm": 0.3887813687324524, "learning_rate": 0.0001, "loss": 1.8025, "step": 2774 }, { "epoch": 0.3223932616903863, "grad_norm": 0.36034008860588074, "learning_rate": 0.0001, "loss": 1.6923, "step": 2775 }, { "epoch": 0.3225094394423468, "grad_norm": 0.3732610046863556, "learning_rate": 0.0001, "loss": 1.6004, "step": 2776 }, { "epoch": 0.32262561719430727, "grad_norm": 0.40316662192344666, "learning_rate": 0.0001, "loss": 1.6442, "step": 2777 }, { "epoch": 0.3227417949462678, "grad_norm": 0.40975120663642883, "learning_rate": 0.0001, "loss": 1.5921, "step": 2778 }, { "epoch": 0.3228579726982283, "grad_norm": 0.38018473982810974, "learning_rate": 0.0001, "loss": 1.711, "step": 2779 }, { "epoch": 0.3229741504501888, "grad_norm": 0.3749701678752899, "learning_rate": 0.0001, "loss": 1.5769, "step": 2780 }, { "epoch": 0.3230903282021493, "grad_norm": 0.37560439109802246, "learning_rate": 0.0001, "loss": 1.5876, "step": 2781 }, { "epoch": 0.32320650595410977, "grad_norm": 0.3560850918292999, "learning_rate": 0.0001, "loss": 1.5187, "step": 2782 }, { "epoch": 0.3233226837060703, "grad_norm": 0.41308125853538513, "learning_rate": 0.0001, "loss": 1.7863, "step": 2783 }, { "epoch": 0.3234388614580308, "grad_norm": 0.35252368450164795, "learning_rate": 0.0001, "loss": 1.6592, "step": 2784 }, { "epoch": 0.3235550392099913, "grad_norm": 0.3724053204059601, "learning_rate": 0.0001, "loss": 1.739, "step": 2785 }, { "epoch": 0.3236712169619518, "grad_norm": 0.4030148684978485, "learning_rate": 0.0001, "loss": 1.6719, "step": 2786 }, { "epoch": 0.32378739471391227, "grad_norm": 0.3721768260002136, "learning_rate": 0.0001, "loss": 1.7063, "step": 2787 }, { "epoch": 0.32390357246587276, "grad_norm": 0.34921830892562866, "learning_rate": 0.0001, "loss": 1.5576, "step": 2788 }, { "epoch": 0.3240197502178333, "grad_norm": 0.39488157629966736, "learning_rate": 0.0001, "loss": 1.7164, "step": 2789 }, { "epoch": 0.3241359279697938, "grad_norm": 0.3864257037639618, "learning_rate": 0.0001, "loss": 1.644, "step": 2790 }, { "epoch": 0.3242521057217543, "grad_norm": 0.41633668541908264, "learning_rate": 0.0001, "loss": 1.7591, "step": 2791 }, { "epoch": 0.32436828347371477, "grad_norm": 0.38308605551719666, "learning_rate": 0.0001, "loss": 1.6398, "step": 2792 }, { "epoch": 0.32448446122567526, "grad_norm": 0.37461498379707336, "learning_rate": 0.0001, "loss": 1.7296, "step": 2793 }, { "epoch": 0.3246006389776358, "grad_norm": 0.4059557020664215, "learning_rate": 0.0001, "loss": 1.8018, "step": 2794 }, { "epoch": 0.3247168167295963, "grad_norm": 0.37268683314323425, "learning_rate": 0.0001, "loss": 1.6274, "step": 2795 }, { "epoch": 0.3248329944815568, "grad_norm": 0.3826591372489929, "learning_rate": 0.0001, "loss": 1.7083, "step": 2796 }, { "epoch": 0.32494917223351727, "grad_norm": 0.38750702142715454, "learning_rate": 0.0001, "loss": 1.7622, "step": 2797 }, { "epoch": 0.32506534998547776, "grad_norm": 0.4062412679195404, "learning_rate": 0.0001, "loss": 1.7934, "step": 2798 }, { "epoch": 0.3251815277374383, "grad_norm": 0.3924409747123718, "learning_rate": 0.0001, "loss": 1.6323, "step": 2799 }, { "epoch": 0.3252977054893988, "grad_norm": 0.3657173216342926, "learning_rate": 0.0001, "loss": 1.5554, "step": 2800 }, { "epoch": 0.3254138832413593, "grad_norm": 0.37579992413520813, "learning_rate": 0.0001, "loss": 1.6206, "step": 2801 }, { "epoch": 0.32553006099331977, "grad_norm": 0.3894909620285034, "learning_rate": 0.0001, "loss": 1.5396, "step": 2802 }, { "epoch": 0.32564623874528026, "grad_norm": 0.3836139440536499, "learning_rate": 0.0001, "loss": 1.6939, "step": 2803 }, { "epoch": 0.3257624164972408, "grad_norm": 0.40788355469703674, "learning_rate": 0.0001, "loss": 1.7918, "step": 2804 }, { "epoch": 0.3258785942492013, "grad_norm": 0.4235386848449707, "learning_rate": 0.0001, "loss": 1.8649, "step": 2805 }, { "epoch": 0.3259947720011618, "grad_norm": 0.39928755164146423, "learning_rate": 0.0001, "loss": 1.7444, "step": 2806 }, { "epoch": 0.32611094975312227, "grad_norm": 0.40659260749816895, "learning_rate": 0.0001, "loss": 1.7038, "step": 2807 }, { "epoch": 0.32622712750508276, "grad_norm": 0.38179177045822144, "learning_rate": 0.0001, "loss": 1.7224, "step": 2808 }, { "epoch": 0.3263433052570433, "grad_norm": 0.38274791836738586, "learning_rate": 0.0001, "loss": 1.4469, "step": 2809 }, { "epoch": 0.3264594830090038, "grad_norm": 0.3890819549560547, "learning_rate": 0.0001, "loss": 1.6585, "step": 2810 }, { "epoch": 0.3265756607609643, "grad_norm": 0.37747570872306824, "learning_rate": 0.0001, "loss": 1.6623, "step": 2811 }, { "epoch": 0.32669183851292477, "grad_norm": 0.38250067830085754, "learning_rate": 0.0001, "loss": 1.6841, "step": 2812 }, { "epoch": 0.32680801626488526, "grad_norm": 0.3761623799800873, "learning_rate": 0.0001, "loss": 1.6855, "step": 2813 }, { "epoch": 0.32692419401684575, "grad_norm": 0.3789752721786499, "learning_rate": 0.0001, "loss": 1.718, "step": 2814 }, { "epoch": 0.3270403717688063, "grad_norm": 0.3886089622974396, "learning_rate": 0.0001, "loss": 1.7111, "step": 2815 }, { "epoch": 0.3271565495207668, "grad_norm": 0.3989008665084839, "learning_rate": 0.0001, "loss": 1.8128, "step": 2816 }, { "epoch": 0.32727272727272727, "grad_norm": 0.4016900360584259, "learning_rate": 0.0001, "loss": 1.682, "step": 2817 }, { "epoch": 0.32738890502468776, "grad_norm": 0.41296061873435974, "learning_rate": 0.0001, "loss": 1.8053, "step": 2818 }, { "epoch": 0.32750508277664825, "grad_norm": 0.3775239586830139, "learning_rate": 0.0001, "loss": 1.6908, "step": 2819 }, { "epoch": 0.3276212605286088, "grad_norm": 0.4092549681663513, "learning_rate": 0.0001, "loss": 1.7844, "step": 2820 }, { "epoch": 0.3277374382805693, "grad_norm": 0.3923521041870117, "learning_rate": 0.0001, "loss": 1.4103, "step": 2821 }, { "epoch": 0.32785361603252977, "grad_norm": 0.3734949231147766, "learning_rate": 0.0001, "loss": 1.5444, "step": 2822 }, { "epoch": 0.32796979378449026, "grad_norm": 0.3700851798057556, "learning_rate": 0.0001, "loss": 1.6773, "step": 2823 }, { "epoch": 0.32808597153645075, "grad_norm": 0.3860493302345276, "learning_rate": 0.0001, "loss": 1.7857, "step": 2824 }, { "epoch": 0.3282021492884113, "grad_norm": 0.4142455756664276, "learning_rate": 0.0001, "loss": 1.6164, "step": 2825 }, { "epoch": 0.3283183270403718, "grad_norm": 0.3860050141811371, "learning_rate": 0.0001, "loss": 1.7274, "step": 2826 }, { "epoch": 0.32843450479233227, "grad_norm": 0.39390870928764343, "learning_rate": 0.0001, "loss": 1.7529, "step": 2827 }, { "epoch": 0.32855068254429276, "grad_norm": 0.3824588656425476, "learning_rate": 0.0001, "loss": 1.7334, "step": 2828 }, { "epoch": 0.32866686029625325, "grad_norm": 0.3569580018520355, "learning_rate": 0.0001, "loss": 1.5416, "step": 2829 }, { "epoch": 0.3287830380482138, "grad_norm": 0.3893423080444336, "learning_rate": 0.0001, "loss": 1.5403, "step": 2830 }, { "epoch": 0.3288992158001743, "grad_norm": 0.3976670503616333, "learning_rate": 0.0001, "loss": 1.7884, "step": 2831 }, { "epoch": 0.32901539355213477, "grad_norm": 0.38111555576324463, "learning_rate": 0.0001, "loss": 1.5406, "step": 2832 }, { "epoch": 0.32913157130409526, "grad_norm": 0.38371211290359497, "learning_rate": 0.0001, "loss": 1.5291, "step": 2833 }, { "epoch": 0.32924774905605575, "grad_norm": 0.4201413691043854, "learning_rate": 0.0001, "loss": 1.7543, "step": 2834 }, { "epoch": 0.32936392680801624, "grad_norm": 0.386960506439209, "learning_rate": 0.0001, "loss": 1.6, "step": 2835 }, { "epoch": 0.3294801045599768, "grad_norm": 0.41088366508483887, "learning_rate": 0.0001, "loss": 1.9636, "step": 2836 }, { "epoch": 0.32959628231193727, "grad_norm": 0.39831310510635376, "learning_rate": 0.0001, "loss": 1.6039, "step": 2837 }, { "epoch": 0.32971246006389776, "grad_norm": 0.3723565638065338, "learning_rate": 0.0001, "loss": 1.6253, "step": 2838 }, { "epoch": 0.32982863781585825, "grad_norm": 0.3974156975746155, "learning_rate": 0.0001, "loss": 1.629, "step": 2839 }, { "epoch": 0.32994481556781874, "grad_norm": 0.3748154640197754, "learning_rate": 0.0001, "loss": 1.5427, "step": 2840 }, { "epoch": 0.3300609933197793, "grad_norm": 0.375637024641037, "learning_rate": 0.0001, "loss": 1.5665, "step": 2841 }, { "epoch": 0.3301771710717398, "grad_norm": 0.397079199552536, "learning_rate": 0.0001, "loss": 1.7209, "step": 2842 }, { "epoch": 0.33029334882370026, "grad_norm": 0.4001121520996094, "learning_rate": 0.0001, "loss": 1.6809, "step": 2843 }, { "epoch": 0.33040952657566075, "grad_norm": 0.3832455277442932, "learning_rate": 0.0001, "loss": 1.7496, "step": 2844 }, { "epoch": 0.33052570432762124, "grad_norm": 0.3800029456615448, "learning_rate": 0.0001, "loss": 1.6107, "step": 2845 }, { "epoch": 0.3306418820795818, "grad_norm": 0.37468817830085754, "learning_rate": 0.0001, "loss": 1.6306, "step": 2846 }, { "epoch": 0.3307580598315423, "grad_norm": 0.3698212802410126, "learning_rate": 0.0001, "loss": 1.6629, "step": 2847 }, { "epoch": 0.33087423758350276, "grad_norm": 0.39800024032592773, "learning_rate": 0.0001, "loss": 1.5955, "step": 2848 }, { "epoch": 0.33099041533546325, "grad_norm": 0.3913809657096863, "learning_rate": 0.0001, "loss": 1.7207, "step": 2849 }, { "epoch": 0.33110659308742374, "grad_norm": 0.3854704201221466, "learning_rate": 0.0001, "loss": 1.696, "step": 2850 }, { "epoch": 0.3312227708393843, "grad_norm": 0.36068427562713623, "learning_rate": 0.0001, "loss": 1.6256, "step": 2851 }, { "epoch": 0.3313389485913448, "grad_norm": 0.4268893003463745, "learning_rate": 0.0001, "loss": 1.7395, "step": 2852 }, { "epoch": 0.33145512634330526, "grad_norm": 0.3733718991279602, "learning_rate": 0.0001, "loss": 1.6515, "step": 2853 }, { "epoch": 0.33157130409526575, "grad_norm": 0.36404022574424744, "learning_rate": 0.0001, "loss": 1.4211, "step": 2854 }, { "epoch": 0.33168748184722624, "grad_norm": 0.4233626425266266, "learning_rate": 0.0001, "loss": 1.6407, "step": 2855 }, { "epoch": 0.33180365959918673, "grad_norm": 0.48992058634757996, "learning_rate": 0.0001, "loss": 1.8077, "step": 2856 }, { "epoch": 0.3319198373511473, "grad_norm": 0.4136912226676941, "learning_rate": 0.0001, "loss": 1.7703, "step": 2857 }, { "epoch": 0.33203601510310776, "grad_norm": 0.36938685178756714, "learning_rate": 0.0001, "loss": 1.6473, "step": 2858 }, { "epoch": 0.33215219285506825, "grad_norm": 0.36779820919036865, "learning_rate": 0.0001, "loss": 1.6147, "step": 2859 }, { "epoch": 0.33226837060702874, "grad_norm": 0.36435508728027344, "learning_rate": 0.0001, "loss": 1.508, "step": 2860 }, { "epoch": 0.33238454835898923, "grad_norm": 0.38518238067626953, "learning_rate": 0.0001, "loss": 1.6981, "step": 2861 }, { "epoch": 0.3325007261109498, "grad_norm": 0.419127881526947, "learning_rate": 0.0001, "loss": 1.8029, "step": 2862 }, { "epoch": 0.33261690386291026, "grad_norm": 0.3840404152870178, "learning_rate": 0.0001, "loss": 1.6177, "step": 2863 }, { "epoch": 0.33273308161487075, "grad_norm": 0.4244234263896942, "learning_rate": 0.0001, "loss": 1.7396, "step": 2864 }, { "epoch": 0.33284925936683124, "grad_norm": 0.40115001797676086, "learning_rate": 0.0001, "loss": 1.6808, "step": 2865 }, { "epoch": 0.33296543711879173, "grad_norm": 0.4338228702545166, "learning_rate": 0.0001, "loss": 1.6853, "step": 2866 }, { "epoch": 0.3330816148707523, "grad_norm": 0.40613770484924316, "learning_rate": 0.0001, "loss": 1.7427, "step": 2867 }, { "epoch": 0.33319779262271276, "grad_norm": 0.4123576283454895, "learning_rate": 0.0001, "loss": 1.5359, "step": 2868 }, { "epoch": 0.33331397037467325, "grad_norm": 0.3951095938682556, "learning_rate": 0.0001, "loss": 1.6141, "step": 2869 }, { "epoch": 0.33343014812663374, "grad_norm": 0.410762220621109, "learning_rate": 0.0001, "loss": 1.82, "step": 2870 }, { "epoch": 0.33354632587859423, "grad_norm": 0.4156797528266907, "learning_rate": 0.0001, "loss": 1.687, "step": 2871 }, { "epoch": 0.3336625036305548, "grad_norm": 0.3592885434627533, "learning_rate": 0.0001, "loss": 1.5042, "step": 2872 }, { "epoch": 0.33377868138251526, "grad_norm": 0.42720362544059753, "learning_rate": 0.0001, "loss": 1.918, "step": 2873 }, { "epoch": 0.33389485913447575, "grad_norm": 0.40637707710266113, "learning_rate": 0.0001, "loss": 1.8286, "step": 2874 }, { "epoch": 0.33401103688643624, "grad_norm": 0.39554738998413086, "learning_rate": 0.0001, "loss": 1.6854, "step": 2875 }, { "epoch": 0.33412721463839673, "grad_norm": 0.37411895394325256, "learning_rate": 0.0001, "loss": 1.5642, "step": 2876 }, { "epoch": 0.3342433923903572, "grad_norm": 0.38688522577285767, "learning_rate": 0.0001, "loss": 1.5817, "step": 2877 }, { "epoch": 0.33435957014231776, "grad_norm": 0.39456331729888916, "learning_rate": 0.0001, "loss": 1.7141, "step": 2878 }, { "epoch": 0.33447574789427825, "grad_norm": 0.37233471870422363, "learning_rate": 0.0001, "loss": 1.5662, "step": 2879 }, { "epoch": 0.33459192564623874, "grad_norm": 0.3917320668697357, "learning_rate": 0.0001, "loss": 1.7551, "step": 2880 }, { "epoch": 0.33470810339819923, "grad_norm": 0.3961198627948761, "learning_rate": 0.0001, "loss": 1.7296, "step": 2881 }, { "epoch": 0.3348242811501597, "grad_norm": 0.4228503108024597, "learning_rate": 0.0001, "loss": 1.8795, "step": 2882 }, { "epoch": 0.33494045890212026, "grad_norm": 0.3861311376094818, "learning_rate": 0.0001, "loss": 1.8777, "step": 2883 }, { "epoch": 0.33505663665408075, "grad_norm": 0.3791095018386841, "learning_rate": 0.0001, "loss": 1.6151, "step": 2884 }, { "epoch": 0.33517281440604124, "grad_norm": 0.45170632004737854, "learning_rate": 0.0001, "loss": 1.8281, "step": 2885 }, { "epoch": 0.33528899215800173, "grad_norm": 0.4086534082889557, "learning_rate": 0.0001, "loss": 1.5911, "step": 2886 }, { "epoch": 0.3354051699099622, "grad_norm": 0.4174976348876953, "learning_rate": 0.0001, "loss": 1.6932, "step": 2887 }, { "epoch": 0.33552134766192276, "grad_norm": 0.39714473485946655, "learning_rate": 0.0001, "loss": 1.683, "step": 2888 }, { "epoch": 0.33563752541388325, "grad_norm": 0.39770638942718506, "learning_rate": 0.0001, "loss": 1.7405, "step": 2889 }, { "epoch": 0.33575370316584374, "grad_norm": 0.4018489122390747, "learning_rate": 0.0001, "loss": 1.621, "step": 2890 }, { "epoch": 0.33586988091780423, "grad_norm": 0.3446825444698334, "learning_rate": 0.0001, "loss": 1.5324, "step": 2891 }, { "epoch": 0.3359860586697647, "grad_norm": 0.41117849946022034, "learning_rate": 0.0001, "loss": 1.7171, "step": 2892 }, { "epoch": 0.33610223642172526, "grad_norm": 0.4037335216999054, "learning_rate": 0.0001, "loss": 1.7018, "step": 2893 }, { "epoch": 0.33621841417368575, "grad_norm": 0.3712430000305176, "learning_rate": 0.0001, "loss": 1.5361, "step": 2894 }, { "epoch": 0.33633459192564624, "grad_norm": 0.4003915786743164, "learning_rate": 0.0001, "loss": 1.5166, "step": 2895 }, { "epoch": 0.33645076967760673, "grad_norm": 0.43774011731147766, "learning_rate": 0.0001, "loss": 1.5873, "step": 2896 }, { "epoch": 0.3365669474295672, "grad_norm": 0.43321821093559265, "learning_rate": 0.0001, "loss": 1.5878, "step": 2897 }, { "epoch": 0.33668312518152776, "grad_norm": 0.37499547004699707, "learning_rate": 0.0001, "loss": 1.5873, "step": 2898 }, { "epoch": 0.33679930293348825, "grad_norm": 0.37291446328163147, "learning_rate": 0.0001, "loss": 1.4856, "step": 2899 }, { "epoch": 0.33691548068544874, "grad_norm": 0.44143107533454895, "learning_rate": 0.0001, "loss": 1.8552, "step": 2900 }, { "epoch": 0.33703165843740923, "grad_norm": 0.3805929720401764, "learning_rate": 0.0001, "loss": 1.7206, "step": 2901 }, { "epoch": 0.3371478361893697, "grad_norm": 0.3873468339443207, "learning_rate": 0.0001, "loss": 1.6746, "step": 2902 }, { "epoch": 0.3372640139413302, "grad_norm": 0.3733051121234894, "learning_rate": 0.0001, "loss": 1.5941, "step": 2903 }, { "epoch": 0.33738019169329075, "grad_norm": 0.40816688537597656, "learning_rate": 0.0001, "loss": 1.6459, "step": 2904 }, { "epoch": 0.33749636944525124, "grad_norm": 0.3908594250679016, "learning_rate": 0.0001, "loss": 1.6518, "step": 2905 }, { "epoch": 0.33761254719721173, "grad_norm": 0.40407541394233704, "learning_rate": 0.0001, "loss": 1.7334, "step": 2906 }, { "epoch": 0.3377287249491722, "grad_norm": 0.4066741168498993, "learning_rate": 0.0001, "loss": 1.6296, "step": 2907 }, { "epoch": 0.3378449027011327, "grad_norm": 0.3964504599571228, "learning_rate": 0.0001, "loss": 1.6915, "step": 2908 }, { "epoch": 0.33796108045309325, "grad_norm": 0.37795713543891907, "learning_rate": 0.0001, "loss": 1.5542, "step": 2909 }, { "epoch": 0.33807725820505374, "grad_norm": 0.37210172414779663, "learning_rate": 0.0001, "loss": 1.6524, "step": 2910 }, { "epoch": 0.33819343595701423, "grad_norm": 0.3974775969982147, "learning_rate": 0.0001, "loss": 1.7075, "step": 2911 }, { "epoch": 0.3383096137089747, "grad_norm": 0.37600451707839966, "learning_rate": 0.0001, "loss": 1.8123, "step": 2912 }, { "epoch": 0.3384257914609352, "grad_norm": 0.3853381276130676, "learning_rate": 0.0001, "loss": 1.7165, "step": 2913 }, { "epoch": 0.33854196921289575, "grad_norm": 0.38297778367996216, "learning_rate": 0.0001, "loss": 1.6564, "step": 2914 }, { "epoch": 0.33865814696485624, "grad_norm": 0.392566978931427, "learning_rate": 0.0001, "loss": 1.6987, "step": 2915 }, { "epoch": 0.33877432471681673, "grad_norm": 0.3868968188762665, "learning_rate": 0.0001, "loss": 1.6981, "step": 2916 }, { "epoch": 0.3388905024687772, "grad_norm": 0.39296168088912964, "learning_rate": 0.0001, "loss": 1.7263, "step": 2917 }, { "epoch": 0.3390066802207377, "grad_norm": 0.3615785837173462, "learning_rate": 0.0001, "loss": 1.5509, "step": 2918 }, { "epoch": 0.33912285797269826, "grad_norm": 0.3897397220134735, "learning_rate": 0.0001, "loss": 1.7675, "step": 2919 }, { "epoch": 0.33923903572465874, "grad_norm": 0.3959611654281616, "learning_rate": 0.0001, "loss": 1.7306, "step": 2920 }, { "epoch": 0.33935521347661923, "grad_norm": 0.39301490783691406, "learning_rate": 0.0001, "loss": 1.6661, "step": 2921 }, { "epoch": 0.3394713912285797, "grad_norm": 0.3942652940750122, "learning_rate": 0.0001, "loss": 1.6023, "step": 2922 }, { "epoch": 0.3395875689805402, "grad_norm": 0.43564826250076294, "learning_rate": 0.0001, "loss": 1.8724, "step": 2923 }, { "epoch": 0.3397037467325007, "grad_norm": 0.39441993832588196, "learning_rate": 0.0001, "loss": 1.7982, "step": 2924 }, { "epoch": 0.33981992448446124, "grad_norm": 0.3842059373855591, "learning_rate": 0.0001, "loss": 1.6407, "step": 2925 }, { "epoch": 0.33993610223642173, "grad_norm": 0.40640154480934143, "learning_rate": 0.0001, "loss": 1.8546, "step": 2926 }, { "epoch": 0.3400522799883822, "grad_norm": 0.41045522689819336, "learning_rate": 0.0001, "loss": 1.8838, "step": 2927 }, { "epoch": 0.3401684577403427, "grad_norm": 0.37923598289489746, "learning_rate": 0.0001, "loss": 1.6066, "step": 2928 }, { "epoch": 0.3402846354923032, "grad_norm": 0.3806171715259552, "learning_rate": 0.0001, "loss": 1.7563, "step": 2929 }, { "epoch": 0.34040081324426374, "grad_norm": 0.3735258877277374, "learning_rate": 0.0001, "loss": 1.627, "step": 2930 }, { "epoch": 0.34051699099622423, "grad_norm": 0.3817983865737915, "learning_rate": 0.0001, "loss": 1.6563, "step": 2931 }, { "epoch": 0.3406331687481847, "grad_norm": 0.4227299690246582, "learning_rate": 0.0001, "loss": 1.78, "step": 2932 }, { "epoch": 0.3407493465001452, "grad_norm": 0.4059399664402008, "learning_rate": 0.0001, "loss": 1.5223, "step": 2933 }, { "epoch": 0.3408655242521057, "grad_norm": 0.3733903169631958, "learning_rate": 0.0001, "loss": 1.5281, "step": 2934 }, { "epoch": 0.34098170200406624, "grad_norm": 0.3693414330482483, "learning_rate": 0.0001, "loss": 1.6979, "step": 2935 }, { "epoch": 0.34109787975602673, "grad_norm": 0.3974681794643402, "learning_rate": 0.0001, "loss": 1.7623, "step": 2936 }, { "epoch": 0.3412140575079872, "grad_norm": 0.40453028678894043, "learning_rate": 0.0001, "loss": 1.6651, "step": 2937 }, { "epoch": 0.3413302352599477, "grad_norm": 0.3921975791454315, "learning_rate": 0.0001, "loss": 1.5821, "step": 2938 }, { "epoch": 0.3414464130119082, "grad_norm": 0.39022400975227356, "learning_rate": 0.0001, "loss": 1.6862, "step": 2939 }, { "epoch": 0.34156259076386875, "grad_norm": 0.39960476756095886, "learning_rate": 0.0001, "loss": 1.7099, "step": 2940 }, { "epoch": 0.34167876851582923, "grad_norm": 0.40952742099761963, "learning_rate": 0.0001, "loss": 1.7774, "step": 2941 }, { "epoch": 0.3417949462677897, "grad_norm": 0.3948822617530823, "learning_rate": 0.0001, "loss": 1.7191, "step": 2942 }, { "epoch": 0.3419111240197502, "grad_norm": 0.4061760902404785, "learning_rate": 0.0001, "loss": 1.7105, "step": 2943 }, { "epoch": 0.3420273017717107, "grad_norm": 0.3945620357990265, "learning_rate": 0.0001, "loss": 1.7056, "step": 2944 }, { "epoch": 0.3421434795236712, "grad_norm": 0.4100196659564972, "learning_rate": 0.0001, "loss": 1.7106, "step": 2945 }, { "epoch": 0.34225965727563173, "grad_norm": 0.3933731019496918, "learning_rate": 0.0001, "loss": 1.6135, "step": 2946 }, { "epoch": 0.3423758350275922, "grad_norm": 0.3884199559688568, "learning_rate": 0.0001, "loss": 1.6829, "step": 2947 }, { "epoch": 0.3424920127795527, "grad_norm": 0.386350154876709, "learning_rate": 0.0001, "loss": 1.7033, "step": 2948 }, { "epoch": 0.3426081905315132, "grad_norm": 0.40476351976394653, "learning_rate": 0.0001, "loss": 1.5197, "step": 2949 }, { "epoch": 0.3427243682834737, "grad_norm": 0.407321572303772, "learning_rate": 0.0001, "loss": 1.7674, "step": 2950 }, { "epoch": 0.34284054603543423, "grad_norm": 0.40488916635513306, "learning_rate": 0.0001, "loss": 1.6427, "step": 2951 }, { "epoch": 0.3429567237873947, "grad_norm": 0.4045671224594116, "learning_rate": 0.0001, "loss": 1.8208, "step": 2952 }, { "epoch": 0.3430729015393552, "grad_norm": 0.4002356231212616, "learning_rate": 0.0001, "loss": 1.6873, "step": 2953 }, { "epoch": 0.3431890792913157, "grad_norm": 0.3586932122707367, "learning_rate": 0.0001, "loss": 1.4746, "step": 2954 }, { "epoch": 0.3433052570432762, "grad_norm": 0.370332807302475, "learning_rate": 0.0001, "loss": 1.6734, "step": 2955 }, { "epoch": 0.34342143479523674, "grad_norm": 0.39355921745300293, "learning_rate": 0.0001, "loss": 1.8061, "step": 2956 }, { "epoch": 0.3435376125471972, "grad_norm": 0.39952757954597473, "learning_rate": 0.0001, "loss": 1.6956, "step": 2957 }, { "epoch": 0.3436537902991577, "grad_norm": 0.3770568370819092, "learning_rate": 0.0001, "loss": 1.724, "step": 2958 }, { "epoch": 0.3437699680511182, "grad_norm": 0.4441458582878113, "learning_rate": 0.0001, "loss": 1.9858, "step": 2959 }, { "epoch": 0.3438861458030787, "grad_norm": 0.38884633779525757, "learning_rate": 0.0001, "loss": 1.6538, "step": 2960 }, { "epoch": 0.34400232355503924, "grad_norm": 0.41551780700683594, "learning_rate": 0.0001, "loss": 1.8076, "step": 2961 }, { "epoch": 0.3441185013069997, "grad_norm": 0.3980303108692169, "learning_rate": 0.0001, "loss": 1.613, "step": 2962 }, { "epoch": 0.3442346790589602, "grad_norm": 0.3553750813007355, "learning_rate": 0.0001, "loss": 1.5749, "step": 2963 }, { "epoch": 0.3443508568109207, "grad_norm": 0.38143518567085266, "learning_rate": 0.0001, "loss": 1.6693, "step": 2964 }, { "epoch": 0.3444670345628812, "grad_norm": 0.40971437096595764, "learning_rate": 0.0001, "loss": 1.65, "step": 2965 }, { "epoch": 0.3445832123148417, "grad_norm": 0.35537075996398926, "learning_rate": 0.0001, "loss": 1.6096, "step": 2966 }, { "epoch": 0.3446993900668022, "grad_norm": 0.38093459606170654, "learning_rate": 0.0001, "loss": 1.7213, "step": 2967 }, { "epoch": 0.3448155678187627, "grad_norm": 0.34431418776512146, "learning_rate": 0.0001, "loss": 1.4065, "step": 2968 }, { "epoch": 0.3449317455707232, "grad_norm": 0.427884042263031, "learning_rate": 0.0001, "loss": 1.7658, "step": 2969 }, { "epoch": 0.3450479233226837, "grad_norm": 0.4321180582046509, "learning_rate": 0.0001, "loss": 1.8182, "step": 2970 }, { "epoch": 0.3451641010746442, "grad_norm": 0.3925512731075287, "learning_rate": 0.0001, "loss": 1.6195, "step": 2971 }, { "epoch": 0.3452802788266047, "grad_norm": 0.3812701106071472, "learning_rate": 0.0001, "loss": 1.5437, "step": 2972 }, { "epoch": 0.3453964565785652, "grad_norm": 0.4410078227519989, "learning_rate": 0.0001, "loss": 1.8109, "step": 2973 }, { "epoch": 0.3455126343305257, "grad_norm": 0.38449546694755554, "learning_rate": 0.0001, "loss": 1.7467, "step": 2974 }, { "epoch": 0.3456288120824862, "grad_norm": 0.3961304724216461, "learning_rate": 0.0001, "loss": 1.5686, "step": 2975 }, { "epoch": 0.3457449898344467, "grad_norm": 0.3921011686325073, "learning_rate": 0.0001, "loss": 1.7288, "step": 2976 }, { "epoch": 0.3458611675864072, "grad_norm": 0.39690279960632324, "learning_rate": 0.0001, "loss": 1.7883, "step": 2977 }, { "epoch": 0.3459773453383677, "grad_norm": 0.4186713397502899, "learning_rate": 0.0001, "loss": 1.7538, "step": 2978 }, { "epoch": 0.3460935230903282, "grad_norm": 0.39231374859809875, "learning_rate": 0.0001, "loss": 1.7383, "step": 2979 }, { "epoch": 0.3462097008422887, "grad_norm": 0.3903275430202484, "learning_rate": 0.0001, "loss": 1.6001, "step": 2980 }, { "epoch": 0.3463258785942492, "grad_norm": 0.4018222689628601, "learning_rate": 0.0001, "loss": 1.5887, "step": 2981 }, { "epoch": 0.3464420563462097, "grad_norm": 0.41252797842025757, "learning_rate": 0.0001, "loss": 1.5982, "step": 2982 }, { "epoch": 0.3465582340981702, "grad_norm": 0.38088223338127136, "learning_rate": 0.0001, "loss": 1.4515, "step": 2983 }, { "epoch": 0.3466744118501307, "grad_norm": 0.3844488859176636, "learning_rate": 0.0001, "loss": 1.6082, "step": 2984 }, { "epoch": 0.3467905896020912, "grad_norm": 0.3818972408771515, "learning_rate": 0.0001, "loss": 1.7069, "step": 2985 }, { "epoch": 0.3469067673540517, "grad_norm": 0.4173097014427185, "learning_rate": 0.0001, "loss": 1.619, "step": 2986 }, { "epoch": 0.3470229451060122, "grad_norm": 0.428475946187973, "learning_rate": 0.0001, "loss": 1.8248, "step": 2987 }, { "epoch": 0.3471391228579727, "grad_norm": 0.40576884150505066, "learning_rate": 0.0001, "loss": 1.6225, "step": 2988 }, { "epoch": 0.3472553006099332, "grad_norm": 0.4049152433872223, "learning_rate": 0.0001, "loss": 1.5798, "step": 2989 }, { "epoch": 0.3473714783618937, "grad_norm": 0.3948923647403717, "learning_rate": 0.0001, "loss": 1.6983, "step": 2990 }, { "epoch": 0.3474876561138542, "grad_norm": 0.3774530291557312, "learning_rate": 0.0001, "loss": 1.7484, "step": 2991 }, { "epoch": 0.34760383386581467, "grad_norm": 0.38777539134025574, "learning_rate": 0.0001, "loss": 1.6732, "step": 2992 }, { "epoch": 0.3477200116177752, "grad_norm": 0.38459205627441406, "learning_rate": 0.0001, "loss": 1.6152, "step": 2993 }, { "epoch": 0.3478361893697357, "grad_norm": 0.44415146112442017, "learning_rate": 0.0001, "loss": 1.8133, "step": 2994 }, { "epoch": 0.3479523671216962, "grad_norm": 0.3697360157966614, "learning_rate": 0.0001, "loss": 1.5783, "step": 2995 }, { "epoch": 0.3480685448736567, "grad_norm": 0.4112284779548645, "learning_rate": 0.0001, "loss": 1.7501, "step": 2996 }, { "epoch": 0.34818472262561717, "grad_norm": 0.42278242111206055, "learning_rate": 0.0001, "loss": 1.7225, "step": 2997 }, { "epoch": 0.3483009003775777, "grad_norm": 0.3720592260360718, "learning_rate": 0.0001, "loss": 1.5809, "step": 2998 }, { "epoch": 0.3484170781295382, "grad_norm": 0.37524881958961487, "learning_rate": 0.0001, "loss": 1.6982, "step": 2999 }, { "epoch": 0.3485332558814987, "grad_norm": 0.38815271854400635, "learning_rate": 0.0001, "loss": 1.6433, "step": 3000 }, { "epoch": 0.3486494336334592, "grad_norm": 0.4013916552066803, "learning_rate": 0.0001, "loss": 1.6074, "step": 3001 }, { "epoch": 0.34876561138541967, "grad_norm": 0.3819652497768402, "learning_rate": 0.0001, "loss": 1.6946, "step": 3002 }, { "epoch": 0.3488817891373802, "grad_norm": 0.37497755885124207, "learning_rate": 0.0001, "loss": 1.5937, "step": 3003 }, { "epoch": 0.3489979668893407, "grad_norm": 0.3700786232948303, "learning_rate": 0.0001, "loss": 1.5815, "step": 3004 }, { "epoch": 0.3491141446413012, "grad_norm": 0.40467193722724915, "learning_rate": 0.0001, "loss": 1.7022, "step": 3005 }, { "epoch": 0.3492303223932617, "grad_norm": 0.3688233494758606, "learning_rate": 0.0001, "loss": 1.5006, "step": 3006 }, { "epoch": 0.34934650014522217, "grad_norm": 0.39277154207229614, "learning_rate": 0.0001, "loss": 1.7189, "step": 3007 }, { "epoch": 0.3494626778971827, "grad_norm": 0.3854866921901703, "learning_rate": 0.0001, "loss": 1.66, "step": 3008 }, { "epoch": 0.3495788556491432, "grad_norm": 0.4092784821987152, "learning_rate": 0.0001, "loss": 1.7302, "step": 3009 }, { "epoch": 0.3496950334011037, "grad_norm": 0.3558938205242157, "learning_rate": 0.0001, "loss": 1.5746, "step": 3010 }, { "epoch": 0.3498112111530642, "grad_norm": 0.4179665148258209, "learning_rate": 0.0001, "loss": 1.7137, "step": 3011 }, { "epoch": 0.34992738890502467, "grad_norm": 0.38339027762413025, "learning_rate": 0.0001, "loss": 1.4907, "step": 3012 }, { "epoch": 0.35004356665698516, "grad_norm": 0.36649930477142334, "learning_rate": 0.0001, "loss": 1.6285, "step": 3013 }, { "epoch": 0.3501597444089457, "grad_norm": 0.4202296733856201, "learning_rate": 0.0001, "loss": 1.6298, "step": 3014 }, { "epoch": 0.3502759221609062, "grad_norm": 0.4036094844341278, "learning_rate": 0.0001, "loss": 1.7476, "step": 3015 }, { "epoch": 0.3503920999128667, "grad_norm": 0.3799827992916107, "learning_rate": 0.0001, "loss": 1.6123, "step": 3016 }, { "epoch": 0.3505082776648272, "grad_norm": 0.40348902344703674, "learning_rate": 0.0001, "loss": 1.7164, "step": 3017 }, { "epoch": 0.35062445541678766, "grad_norm": 0.38102301955223083, "learning_rate": 0.0001, "loss": 1.7244, "step": 3018 }, { "epoch": 0.3507406331687482, "grad_norm": 0.3788668215274811, "learning_rate": 0.0001, "loss": 1.637, "step": 3019 }, { "epoch": 0.3508568109207087, "grad_norm": 0.38864848017692566, "learning_rate": 0.0001, "loss": 1.5131, "step": 3020 }, { "epoch": 0.3509729886726692, "grad_norm": 0.37422239780426025, "learning_rate": 0.0001, "loss": 1.7779, "step": 3021 }, { "epoch": 0.3510891664246297, "grad_norm": 0.37829017639160156, "learning_rate": 0.0001, "loss": 1.7872, "step": 3022 }, { "epoch": 0.35120534417659016, "grad_norm": 0.39269503951072693, "learning_rate": 0.0001, "loss": 1.6837, "step": 3023 }, { "epoch": 0.3513215219285507, "grad_norm": 0.381878137588501, "learning_rate": 0.0001, "loss": 1.6482, "step": 3024 }, { "epoch": 0.3514376996805112, "grad_norm": 0.39613422751426697, "learning_rate": 0.0001, "loss": 1.8325, "step": 3025 }, { "epoch": 0.3515538774324717, "grad_norm": 0.42367449402809143, "learning_rate": 0.0001, "loss": 1.7125, "step": 3026 }, { "epoch": 0.3516700551844322, "grad_norm": 0.3661247789859772, "learning_rate": 0.0001, "loss": 1.6193, "step": 3027 }, { "epoch": 0.35178623293639266, "grad_norm": 0.36878344416618347, "learning_rate": 0.0001, "loss": 1.5813, "step": 3028 }, { "epoch": 0.3519024106883532, "grad_norm": 0.38732531666755676, "learning_rate": 0.0001, "loss": 1.7773, "step": 3029 }, { "epoch": 0.3520185884403137, "grad_norm": 0.4050348103046417, "learning_rate": 0.0001, "loss": 1.5636, "step": 3030 }, { "epoch": 0.3521347661922742, "grad_norm": 0.40039730072021484, "learning_rate": 0.0001, "loss": 1.6083, "step": 3031 }, { "epoch": 0.3522509439442347, "grad_norm": 0.40154829621315, "learning_rate": 0.0001, "loss": 1.6367, "step": 3032 }, { "epoch": 0.35236712169619516, "grad_norm": 0.37400686740875244, "learning_rate": 0.0001, "loss": 1.6594, "step": 3033 }, { "epoch": 0.35248329944815565, "grad_norm": 0.3930714726448059, "learning_rate": 0.0001, "loss": 1.874, "step": 3034 }, { "epoch": 0.3525994772001162, "grad_norm": 0.4327416718006134, "learning_rate": 0.0001, "loss": 1.8191, "step": 3035 }, { "epoch": 0.3527156549520767, "grad_norm": 0.4135274887084961, "learning_rate": 0.0001, "loss": 1.9436, "step": 3036 }, { "epoch": 0.3528318327040372, "grad_norm": 0.36766317486763, "learning_rate": 0.0001, "loss": 1.6381, "step": 3037 }, { "epoch": 0.35294801045599766, "grad_norm": 0.3964691460132599, "learning_rate": 0.0001, "loss": 1.7224, "step": 3038 }, { "epoch": 0.35306418820795815, "grad_norm": 0.4026515781879425, "learning_rate": 0.0001, "loss": 1.8446, "step": 3039 }, { "epoch": 0.3531803659599187, "grad_norm": 0.38630566000938416, "learning_rate": 0.0001, "loss": 1.6358, "step": 3040 }, { "epoch": 0.3532965437118792, "grad_norm": 0.38038820028305054, "learning_rate": 0.0001, "loss": 1.6688, "step": 3041 }, { "epoch": 0.3534127214638397, "grad_norm": 0.379242867231369, "learning_rate": 0.0001, "loss": 1.4822, "step": 3042 }, { "epoch": 0.35352889921580016, "grad_norm": 0.3936004340648651, "learning_rate": 0.0001, "loss": 1.8992, "step": 3043 }, { "epoch": 0.35364507696776065, "grad_norm": 0.3659883737564087, "learning_rate": 0.0001, "loss": 1.5642, "step": 3044 }, { "epoch": 0.3537612547197212, "grad_norm": 0.4306775629520416, "learning_rate": 0.0001, "loss": 1.8698, "step": 3045 }, { "epoch": 0.3538774324716817, "grad_norm": 0.40540704131126404, "learning_rate": 0.0001, "loss": 1.7321, "step": 3046 }, { "epoch": 0.3539936102236422, "grad_norm": 0.3898630440235138, "learning_rate": 0.0001, "loss": 1.7021, "step": 3047 }, { "epoch": 0.35410978797560266, "grad_norm": 0.40471145510673523, "learning_rate": 0.0001, "loss": 1.7801, "step": 3048 }, { "epoch": 0.35422596572756315, "grad_norm": 0.40043899416923523, "learning_rate": 0.0001, "loss": 1.6066, "step": 3049 }, { "epoch": 0.3543421434795237, "grad_norm": 0.3994670808315277, "learning_rate": 0.0001, "loss": 1.6784, "step": 3050 }, { "epoch": 0.3544583212314842, "grad_norm": 0.4123898446559906, "learning_rate": 0.0001, "loss": 1.7, "step": 3051 }, { "epoch": 0.3545744989834447, "grad_norm": 0.3823109567165375, "learning_rate": 0.0001, "loss": 1.6944, "step": 3052 }, { "epoch": 0.35469067673540516, "grad_norm": 0.41799673438072205, "learning_rate": 0.0001, "loss": 1.7098, "step": 3053 }, { "epoch": 0.35480685448736565, "grad_norm": 0.36214837431907654, "learning_rate": 0.0001, "loss": 1.5653, "step": 3054 }, { "epoch": 0.35492303223932614, "grad_norm": 0.4229331910610199, "learning_rate": 0.0001, "loss": 1.6691, "step": 3055 }, { "epoch": 0.3550392099912867, "grad_norm": 0.38769447803497314, "learning_rate": 0.0001, "loss": 1.5539, "step": 3056 }, { "epoch": 0.3551553877432472, "grad_norm": 0.39255595207214355, "learning_rate": 0.0001, "loss": 1.5755, "step": 3057 }, { "epoch": 0.35527156549520766, "grad_norm": 0.4204464852809906, "learning_rate": 0.0001, "loss": 1.7607, "step": 3058 }, { "epoch": 0.35538774324716815, "grad_norm": 0.3666527271270752, "learning_rate": 0.0001, "loss": 1.5986, "step": 3059 }, { "epoch": 0.35550392099912864, "grad_norm": 0.384753942489624, "learning_rate": 0.0001, "loss": 1.5905, "step": 3060 }, { "epoch": 0.3556200987510892, "grad_norm": 0.3891599178314209, "learning_rate": 0.0001, "loss": 1.6803, "step": 3061 }, { "epoch": 0.3557362765030497, "grad_norm": 0.37767064571380615, "learning_rate": 0.0001, "loss": 1.6844, "step": 3062 }, { "epoch": 0.35585245425501016, "grad_norm": 0.40282127261161804, "learning_rate": 0.0001, "loss": 1.8185, "step": 3063 }, { "epoch": 0.35596863200697065, "grad_norm": 0.3778342008590698, "learning_rate": 0.0001, "loss": 1.5302, "step": 3064 }, { "epoch": 0.35608480975893114, "grad_norm": 0.37607836723327637, "learning_rate": 0.0001, "loss": 1.6726, "step": 3065 }, { "epoch": 0.3562009875108917, "grad_norm": 0.3772094249725342, "learning_rate": 0.0001, "loss": 1.7671, "step": 3066 }, { "epoch": 0.3563171652628522, "grad_norm": 0.4059067666530609, "learning_rate": 0.0001, "loss": 1.6747, "step": 3067 }, { "epoch": 0.35643334301481266, "grad_norm": 0.39582398533821106, "learning_rate": 0.0001, "loss": 1.6757, "step": 3068 }, { "epoch": 0.35654952076677315, "grad_norm": 0.3916482627391815, "learning_rate": 0.0001, "loss": 1.5215, "step": 3069 }, { "epoch": 0.35666569851873364, "grad_norm": 0.41407474875450134, "learning_rate": 0.0001, "loss": 1.6777, "step": 3070 }, { "epoch": 0.3567818762706942, "grad_norm": 0.41029298305511475, "learning_rate": 0.0001, "loss": 1.7567, "step": 3071 }, { "epoch": 0.3568980540226547, "grad_norm": 0.4083024859428406, "learning_rate": 0.0001, "loss": 1.6835, "step": 3072 }, { "epoch": 0.35701423177461517, "grad_norm": 0.44555673003196716, "learning_rate": 0.0001, "loss": 1.7028, "step": 3073 }, { "epoch": 0.35713040952657565, "grad_norm": 0.37947797775268555, "learning_rate": 0.0001, "loss": 1.5277, "step": 3074 }, { "epoch": 0.35724658727853614, "grad_norm": 0.3823384940624237, "learning_rate": 0.0001, "loss": 1.673, "step": 3075 }, { "epoch": 0.3573627650304967, "grad_norm": 0.3980938494205475, "learning_rate": 0.0001, "loss": 1.7366, "step": 3076 }, { "epoch": 0.3574789427824572, "grad_norm": 0.39685919880867004, "learning_rate": 0.0001, "loss": 1.7057, "step": 3077 }, { "epoch": 0.35759512053441767, "grad_norm": 0.4064877927303314, "learning_rate": 0.0001, "loss": 1.7869, "step": 3078 }, { "epoch": 0.35771129828637815, "grad_norm": 0.41149288415908813, "learning_rate": 0.0001, "loss": 1.634, "step": 3079 }, { "epoch": 0.35782747603833864, "grad_norm": 0.36738306283950806, "learning_rate": 0.0001, "loss": 1.6363, "step": 3080 }, { "epoch": 0.35794365379029913, "grad_norm": 0.393264502286911, "learning_rate": 0.0001, "loss": 1.6643, "step": 3081 }, { "epoch": 0.3580598315422597, "grad_norm": 0.3866167366504669, "learning_rate": 0.0001, "loss": 1.7136, "step": 3082 }, { "epoch": 0.35817600929422017, "grad_norm": 0.38712388277053833, "learning_rate": 0.0001, "loss": 1.6682, "step": 3083 }, { "epoch": 0.35829218704618065, "grad_norm": 0.401869535446167, "learning_rate": 0.0001, "loss": 1.6885, "step": 3084 }, { "epoch": 0.35840836479814114, "grad_norm": 0.3952792286872864, "learning_rate": 0.0001, "loss": 1.6841, "step": 3085 }, { "epoch": 0.35852454255010163, "grad_norm": 0.36368271708488464, "learning_rate": 0.0001, "loss": 1.422, "step": 3086 }, { "epoch": 0.3586407203020622, "grad_norm": 0.39687228202819824, "learning_rate": 0.0001, "loss": 1.6623, "step": 3087 }, { "epoch": 0.35875689805402267, "grad_norm": 0.41528600454330444, "learning_rate": 0.0001, "loss": 1.472, "step": 3088 }, { "epoch": 0.35887307580598315, "grad_norm": 0.420449435710907, "learning_rate": 0.0001, "loss": 1.7244, "step": 3089 }, { "epoch": 0.35898925355794364, "grad_norm": 0.43798938393592834, "learning_rate": 0.0001, "loss": 1.8541, "step": 3090 }, { "epoch": 0.35910543130990413, "grad_norm": 0.37088409066200256, "learning_rate": 0.0001, "loss": 1.673, "step": 3091 }, { "epoch": 0.3592216090618647, "grad_norm": 0.41478070616722107, "learning_rate": 0.0001, "loss": 1.7114, "step": 3092 }, { "epoch": 0.35933778681382517, "grad_norm": 0.4017499089241028, "learning_rate": 0.0001, "loss": 1.657, "step": 3093 }, { "epoch": 0.35945396456578566, "grad_norm": 0.4266115128993988, "learning_rate": 0.0001, "loss": 1.7716, "step": 3094 }, { "epoch": 0.35957014231774614, "grad_norm": 0.39603403210639954, "learning_rate": 0.0001, "loss": 1.7288, "step": 3095 }, { "epoch": 0.35968632006970663, "grad_norm": 0.4058244824409485, "learning_rate": 0.0001, "loss": 1.6059, "step": 3096 }, { "epoch": 0.3598024978216672, "grad_norm": 0.3646165728569031, "learning_rate": 0.0001, "loss": 1.5943, "step": 3097 }, { "epoch": 0.35991867557362767, "grad_norm": 0.3995044529438019, "learning_rate": 0.0001, "loss": 1.5707, "step": 3098 }, { "epoch": 0.36003485332558816, "grad_norm": 0.3803756535053253, "learning_rate": 0.0001, "loss": 1.5508, "step": 3099 }, { "epoch": 0.36015103107754864, "grad_norm": 0.41200825572013855, "learning_rate": 0.0001, "loss": 1.7208, "step": 3100 }, { "epoch": 0.36026720882950913, "grad_norm": 0.394388347864151, "learning_rate": 0.0001, "loss": 1.6949, "step": 3101 }, { "epoch": 0.3603833865814696, "grad_norm": 0.3949473798274994, "learning_rate": 0.0001, "loss": 1.8397, "step": 3102 }, { "epoch": 0.36049956433343017, "grad_norm": 0.41774412989616394, "learning_rate": 0.0001, "loss": 1.601, "step": 3103 }, { "epoch": 0.36061574208539066, "grad_norm": 0.3581482470035553, "learning_rate": 0.0001, "loss": 1.4879, "step": 3104 }, { "epoch": 0.36073191983735114, "grad_norm": 0.41178280115127563, "learning_rate": 0.0001, "loss": 1.585, "step": 3105 }, { "epoch": 0.36084809758931163, "grad_norm": 0.4004935920238495, "learning_rate": 0.0001, "loss": 1.6611, "step": 3106 }, { "epoch": 0.3609642753412721, "grad_norm": 0.403432697057724, "learning_rate": 0.0001, "loss": 1.6761, "step": 3107 }, { "epoch": 0.36108045309323267, "grad_norm": 0.3860589861869812, "learning_rate": 0.0001, "loss": 1.691, "step": 3108 }, { "epoch": 0.36119663084519316, "grad_norm": 0.37853455543518066, "learning_rate": 0.0001, "loss": 1.6448, "step": 3109 }, { "epoch": 0.36131280859715365, "grad_norm": 0.40292003750801086, "learning_rate": 0.0001, "loss": 1.8344, "step": 3110 }, { "epoch": 0.36142898634911413, "grad_norm": 0.3984355926513672, "learning_rate": 0.0001, "loss": 1.879, "step": 3111 }, { "epoch": 0.3615451641010746, "grad_norm": 0.44067731499671936, "learning_rate": 0.0001, "loss": 1.6588, "step": 3112 }, { "epoch": 0.36166134185303517, "grad_norm": 0.3945563733577728, "learning_rate": 0.0001, "loss": 1.7042, "step": 3113 }, { "epoch": 0.36177751960499566, "grad_norm": 0.4175066649913788, "learning_rate": 0.0001, "loss": 1.6859, "step": 3114 }, { "epoch": 0.36189369735695615, "grad_norm": 0.41401904821395874, "learning_rate": 0.0001, "loss": 1.7809, "step": 3115 }, { "epoch": 0.36200987510891663, "grad_norm": 0.3950018882751465, "learning_rate": 0.0001, "loss": 1.5787, "step": 3116 }, { "epoch": 0.3621260528608771, "grad_norm": 0.3951791524887085, "learning_rate": 0.0001, "loss": 1.5756, "step": 3117 }, { "epoch": 0.36224223061283767, "grad_norm": 0.42133089900016785, "learning_rate": 0.0001, "loss": 1.5944, "step": 3118 }, { "epoch": 0.36235840836479816, "grad_norm": 0.423442542552948, "learning_rate": 0.0001, "loss": 1.9073, "step": 3119 }, { "epoch": 0.36247458611675865, "grad_norm": 0.4069424271583557, "learning_rate": 0.0001, "loss": 1.8604, "step": 3120 }, { "epoch": 0.36259076386871913, "grad_norm": 0.4289453625679016, "learning_rate": 0.0001, "loss": 1.7824, "step": 3121 }, { "epoch": 0.3627069416206796, "grad_norm": 0.36343902349472046, "learning_rate": 0.0001, "loss": 1.6927, "step": 3122 }, { "epoch": 0.3628231193726401, "grad_norm": 0.45105743408203125, "learning_rate": 0.0001, "loss": 1.782, "step": 3123 }, { "epoch": 0.36293929712460066, "grad_norm": 0.3925178050994873, "learning_rate": 0.0001, "loss": 1.6771, "step": 3124 }, { "epoch": 0.36305547487656115, "grad_norm": 0.4135838449001312, "learning_rate": 0.0001, "loss": 1.6086, "step": 3125 }, { "epoch": 0.36317165262852164, "grad_norm": 0.36576542258262634, "learning_rate": 0.0001, "loss": 1.6254, "step": 3126 }, { "epoch": 0.3632878303804821, "grad_norm": 0.4198266565799713, "learning_rate": 0.0001, "loss": 1.8513, "step": 3127 }, { "epoch": 0.3634040081324426, "grad_norm": 0.3670734763145447, "learning_rate": 0.0001, "loss": 1.6557, "step": 3128 }, { "epoch": 0.36352018588440316, "grad_norm": 0.36761337518692017, "learning_rate": 0.0001, "loss": 1.6473, "step": 3129 }, { "epoch": 0.36363636363636365, "grad_norm": 0.40638405084609985, "learning_rate": 0.0001, "loss": 1.6446, "step": 3130 }, { "epoch": 0.36375254138832414, "grad_norm": 0.38102391362190247, "learning_rate": 0.0001, "loss": 1.6785, "step": 3131 }, { "epoch": 0.3638687191402846, "grad_norm": 0.39593052864074707, "learning_rate": 0.0001, "loss": 1.6347, "step": 3132 }, { "epoch": 0.3639848968922451, "grad_norm": 0.4067305624485016, "learning_rate": 0.0001, "loss": 1.7925, "step": 3133 }, { "epoch": 0.36410107464420566, "grad_norm": 0.3692638576030731, "learning_rate": 0.0001, "loss": 1.6906, "step": 3134 }, { "epoch": 0.36421725239616615, "grad_norm": 0.40837207436561584, "learning_rate": 0.0001, "loss": 1.8112, "step": 3135 }, { "epoch": 0.36433343014812664, "grad_norm": 0.41324061155319214, "learning_rate": 0.0001, "loss": 1.6678, "step": 3136 }, { "epoch": 0.3644496079000871, "grad_norm": 0.35416892170906067, "learning_rate": 0.0001, "loss": 1.5757, "step": 3137 }, { "epoch": 0.3645657856520476, "grad_norm": 0.44099685549736023, "learning_rate": 0.0001, "loss": 1.918, "step": 3138 }, { "epoch": 0.36468196340400816, "grad_norm": 0.3850747346878052, "learning_rate": 0.0001, "loss": 1.7345, "step": 3139 }, { "epoch": 0.36479814115596865, "grad_norm": 0.4012189209461212, "learning_rate": 0.0001, "loss": 1.7401, "step": 3140 }, { "epoch": 0.36491431890792914, "grad_norm": 0.41232171654701233, "learning_rate": 0.0001, "loss": 1.7973, "step": 3141 }, { "epoch": 0.3650304966598896, "grad_norm": 0.37721219658851624, "learning_rate": 0.0001, "loss": 1.4974, "step": 3142 }, { "epoch": 0.3651466744118501, "grad_norm": 0.3969804346561432, "learning_rate": 0.0001, "loss": 1.54, "step": 3143 }, { "epoch": 0.3652628521638106, "grad_norm": 0.4444282054901123, "learning_rate": 0.0001, "loss": 1.7726, "step": 3144 }, { "epoch": 0.36537902991577115, "grad_norm": 0.4145006239414215, "learning_rate": 0.0001, "loss": 1.6728, "step": 3145 }, { "epoch": 0.36549520766773164, "grad_norm": 0.3919104337692261, "learning_rate": 0.0001, "loss": 1.5855, "step": 3146 }, { "epoch": 0.3656113854196921, "grad_norm": 0.4502047002315521, "learning_rate": 0.0001, "loss": 1.6506, "step": 3147 }, { "epoch": 0.3657275631716526, "grad_norm": 0.3652731776237488, "learning_rate": 0.0001, "loss": 1.6604, "step": 3148 }, { "epoch": 0.3658437409236131, "grad_norm": 0.37962082028388977, "learning_rate": 0.0001, "loss": 1.5167, "step": 3149 }, { "epoch": 0.36595991867557365, "grad_norm": 0.39860934019088745, "learning_rate": 0.0001, "loss": 1.7724, "step": 3150 }, { "epoch": 0.36607609642753414, "grad_norm": 0.42549842596054077, "learning_rate": 0.0001, "loss": 1.7802, "step": 3151 }, { "epoch": 0.3661922741794946, "grad_norm": 0.37746211886405945, "learning_rate": 0.0001, "loss": 1.5899, "step": 3152 }, { "epoch": 0.3663084519314551, "grad_norm": 0.3802013397216797, "learning_rate": 0.0001, "loss": 1.712, "step": 3153 }, { "epoch": 0.3664246296834156, "grad_norm": 0.426530659198761, "learning_rate": 0.0001, "loss": 1.8369, "step": 3154 }, { "epoch": 0.36654080743537615, "grad_norm": 0.4312988817691803, "learning_rate": 0.0001, "loss": 1.8648, "step": 3155 }, { "epoch": 0.36665698518733664, "grad_norm": 0.41918662190437317, "learning_rate": 0.0001, "loss": 1.6841, "step": 3156 }, { "epoch": 0.3667731629392971, "grad_norm": 0.3956180810928345, "learning_rate": 0.0001, "loss": 1.6398, "step": 3157 }, { "epoch": 0.3668893406912576, "grad_norm": 0.3965786397457123, "learning_rate": 0.0001, "loss": 1.6395, "step": 3158 }, { "epoch": 0.3670055184432181, "grad_norm": 0.38480865955352783, "learning_rate": 0.0001, "loss": 1.5811, "step": 3159 }, { "epoch": 0.36712169619517865, "grad_norm": 0.3683672249317169, "learning_rate": 0.0001, "loss": 1.6798, "step": 3160 }, { "epoch": 0.36723787394713914, "grad_norm": 0.3930373191833496, "learning_rate": 0.0001, "loss": 1.4934, "step": 3161 }, { "epoch": 0.3673540516990996, "grad_norm": 0.4485227167606354, "learning_rate": 0.0001, "loss": 1.8452, "step": 3162 }, { "epoch": 0.3674702294510601, "grad_norm": 0.3868573009967804, "learning_rate": 0.0001, "loss": 1.7911, "step": 3163 }, { "epoch": 0.3675864072030206, "grad_norm": 0.35300782322883606, "learning_rate": 0.0001, "loss": 1.4992, "step": 3164 }, { "epoch": 0.36770258495498115, "grad_norm": 0.38356491923332214, "learning_rate": 0.0001, "loss": 1.7018, "step": 3165 }, { "epoch": 0.36781876270694164, "grad_norm": 0.4126032292842865, "learning_rate": 0.0001, "loss": 1.7307, "step": 3166 }, { "epoch": 0.3679349404589021, "grad_norm": 0.3850405216217041, "learning_rate": 0.0001, "loss": 1.6739, "step": 3167 }, { "epoch": 0.3680511182108626, "grad_norm": 0.40334662795066833, "learning_rate": 0.0001, "loss": 1.4995, "step": 3168 }, { "epoch": 0.3681672959628231, "grad_norm": 0.3768133223056793, "learning_rate": 0.0001, "loss": 1.5328, "step": 3169 }, { "epoch": 0.3682834737147836, "grad_norm": 0.43316081166267395, "learning_rate": 0.0001, "loss": 1.8014, "step": 3170 }, { "epoch": 0.36839965146674414, "grad_norm": 0.39245787262916565, "learning_rate": 0.0001, "loss": 1.6579, "step": 3171 }, { "epoch": 0.3685158292187046, "grad_norm": 0.42442598938941956, "learning_rate": 0.0001, "loss": 1.8705, "step": 3172 }, { "epoch": 0.3686320069706651, "grad_norm": 0.384989857673645, "learning_rate": 0.0001, "loss": 1.8006, "step": 3173 }, { "epoch": 0.3687481847226256, "grad_norm": 0.3639425039291382, "learning_rate": 0.0001, "loss": 1.5607, "step": 3174 }, { "epoch": 0.3688643624745861, "grad_norm": 0.4113941490650177, "learning_rate": 0.0001, "loss": 1.7056, "step": 3175 }, { "epoch": 0.36898054022654664, "grad_norm": 0.39703720808029175, "learning_rate": 0.0001, "loss": 1.6322, "step": 3176 }, { "epoch": 0.3690967179785071, "grad_norm": 0.3902719020843506, "learning_rate": 0.0001, "loss": 1.5417, "step": 3177 }, { "epoch": 0.3692128957304676, "grad_norm": 0.3856205344200134, "learning_rate": 0.0001, "loss": 1.684, "step": 3178 }, { "epoch": 0.3693290734824281, "grad_norm": 0.4505693018436432, "learning_rate": 0.0001, "loss": 1.7766, "step": 3179 }, { "epoch": 0.3694452512343886, "grad_norm": 0.4288894534111023, "learning_rate": 0.0001, "loss": 1.7079, "step": 3180 }, { "epoch": 0.36956142898634914, "grad_norm": 0.39302563667297363, "learning_rate": 0.0001, "loss": 1.6753, "step": 3181 }, { "epoch": 0.3696776067383096, "grad_norm": 0.3827257454395294, "learning_rate": 0.0001, "loss": 1.5735, "step": 3182 }, { "epoch": 0.3697937844902701, "grad_norm": 0.38766050338745117, "learning_rate": 0.0001, "loss": 1.6736, "step": 3183 }, { "epoch": 0.3699099622422306, "grad_norm": 0.38151904940605164, "learning_rate": 0.0001, "loss": 1.6248, "step": 3184 }, { "epoch": 0.3700261399941911, "grad_norm": 0.3981049656867981, "learning_rate": 0.0001, "loss": 1.6537, "step": 3185 }, { "epoch": 0.37014231774615164, "grad_norm": 0.38091933727264404, "learning_rate": 0.0001, "loss": 1.6446, "step": 3186 }, { "epoch": 0.37025849549811213, "grad_norm": 0.34910398721694946, "learning_rate": 0.0001, "loss": 1.4155, "step": 3187 }, { "epoch": 0.3703746732500726, "grad_norm": 0.3882802128791809, "learning_rate": 0.0001, "loss": 1.5564, "step": 3188 }, { "epoch": 0.3704908510020331, "grad_norm": 0.40641340613365173, "learning_rate": 0.0001, "loss": 1.8477, "step": 3189 }, { "epoch": 0.3706070287539936, "grad_norm": 0.3742848038673401, "learning_rate": 0.0001, "loss": 1.5292, "step": 3190 }, { "epoch": 0.3707232065059541, "grad_norm": 0.39867183566093445, "learning_rate": 0.0001, "loss": 1.5682, "step": 3191 }, { "epoch": 0.37083938425791463, "grad_norm": 0.38178515434265137, "learning_rate": 0.0001, "loss": 1.57, "step": 3192 }, { "epoch": 0.3709555620098751, "grad_norm": 0.38367438316345215, "learning_rate": 0.0001, "loss": 1.7399, "step": 3193 }, { "epoch": 0.3710717397618356, "grad_norm": 0.4279497563838959, "learning_rate": 0.0001, "loss": 1.7196, "step": 3194 }, { "epoch": 0.3711879175137961, "grad_norm": 0.4051482379436493, "learning_rate": 0.0001, "loss": 1.667, "step": 3195 }, { "epoch": 0.3713040952657566, "grad_norm": 0.4077185392379761, "learning_rate": 0.0001, "loss": 1.7127, "step": 3196 }, { "epoch": 0.37142027301771713, "grad_norm": 0.4203553795814514, "learning_rate": 0.0001, "loss": 1.8894, "step": 3197 }, { "epoch": 0.3715364507696776, "grad_norm": 0.40069088339805603, "learning_rate": 0.0001, "loss": 1.5871, "step": 3198 }, { "epoch": 0.3716526285216381, "grad_norm": 0.4221431016921997, "learning_rate": 0.0001, "loss": 1.847, "step": 3199 }, { "epoch": 0.3717688062735986, "grad_norm": 0.4118482768535614, "learning_rate": 0.0001, "loss": 1.7249, "step": 3200 }, { "epoch": 0.3718849840255591, "grad_norm": 0.39366352558135986, "learning_rate": 0.0001, "loss": 1.6118, "step": 3201 }, { "epoch": 0.37200116177751963, "grad_norm": 0.38057029247283936, "learning_rate": 0.0001, "loss": 1.5149, "step": 3202 }, { "epoch": 0.3721173395294801, "grad_norm": 0.41096463799476624, "learning_rate": 0.0001, "loss": 1.6949, "step": 3203 }, { "epoch": 0.3722335172814406, "grad_norm": 0.39210009574890137, "learning_rate": 0.0001, "loss": 1.7374, "step": 3204 }, { "epoch": 0.3723496950334011, "grad_norm": 0.404095858335495, "learning_rate": 0.0001, "loss": 1.7286, "step": 3205 }, { "epoch": 0.3724658727853616, "grad_norm": 0.4096939265727997, "learning_rate": 0.0001, "loss": 1.6522, "step": 3206 }, { "epoch": 0.37258205053732213, "grad_norm": 0.41257479786872864, "learning_rate": 0.0001, "loss": 1.6904, "step": 3207 }, { "epoch": 0.3726982282892826, "grad_norm": 0.4033520221710205, "learning_rate": 0.0001, "loss": 1.707, "step": 3208 }, { "epoch": 0.3728144060412431, "grad_norm": 0.38651078939437866, "learning_rate": 0.0001, "loss": 1.7291, "step": 3209 }, { "epoch": 0.3729305837932036, "grad_norm": 0.40973809361457825, "learning_rate": 0.0001, "loss": 1.822, "step": 3210 }, { "epoch": 0.3730467615451641, "grad_norm": 0.40880417823791504, "learning_rate": 0.0001, "loss": 1.6477, "step": 3211 }, { "epoch": 0.3731629392971246, "grad_norm": 0.4003618061542511, "learning_rate": 0.0001, "loss": 1.7002, "step": 3212 }, { "epoch": 0.3732791170490851, "grad_norm": 0.38030943274497986, "learning_rate": 0.0001, "loss": 1.4813, "step": 3213 }, { "epoch": 0.3733952948010456, "grad_norm": 0.4228413999080658, "learning_rate": 0.0001, "loss": 1.6994, "step": 3214 }, { "epoch": 0.3735114725530061, "grad_norm": 0.3931327164173126, "learning_rate": 0.0001, "loss": 1.5443, "step": 3215 }, { "epoch": 0.3736276503049666, "grad_norm": 0.39333489537239075, "learning_rate": 0.0001, "loss": 1.7909, "step": 3216 }, { "epoch": 0.3737438280569271, "grad_norm": 0.4146861433982849, "learning_rate": 0.0001, "loss": 1.9366, "step": 3217 }, { "epoch": 0.3738600058088876, "grad_norm": 0.3847891390323639, "learning_rate": 0.0001, "loss": 1.5637, "step": 3218 }, { "epoch": 0.3739761835608481, "grad_norm": 0.3797701299190521, "learning_rate": 0.0001, "loss": 1.6363, "step": 3219 }, { "epoch": 0.3740923613128086, "grad_norm": 0.38476166129112244, "learning_rate": 0.0001, "loss": 1.6911, "step": 3220 }, { "epoch": 0.3742085390647691, "grad_norm": 0.39565321803092957, "learning_rate": 0.0001, "loss": 1.6827, "step": 3221 }, { "epoch": 0.3743247168167296, "grad_norm": 0.38622844219207764, "learning_rate": 0.0001, "loss": 1.6236, "step": 3222 }, { "epoch": 0.3744408945686901, "grad_norm": 0.39825373888015747, "learning_rate": 0.0001, "loss": 1.716, "step": 3223 }, { "epoch": 0.3745570723206506, "grad_norm": 0.35864919424057007, "learning_rate": 0.0001, "loss": 1.5082, "step": 3224 }, { "epoch": 0.3746732500726111, "grad_norm": 0.40784189105033875, "learning_rate": 0.0001, "loss": 1.7036, "step": 3225 }, { "epoch": 0.3747894278245716, "grad_norm": 0.41927802562713623, "learning_rate": 0.0001, "loss": 1.7068, "step": 3226 }, { "epoch": 0.3749056055765321, "grad_norm": 0.44369855523109436, "learning_rate": 0.0001, "loss": 1.6996, "step": 3227 }, { "epoch": 0.3750217833284926, "grad_norm": 0.36899685859680176, "learning_rate": 0.0001, "loss": 1.4204, "step": 3228 }, { "epoch": 0.3751379610804531, "grad_norm": 0.3842264711856842, "learning_rate": 0.0001, "loss": 1.7682, "step": 3229 }, { "epoch": 0.3752541388324136, "grad_norm": 0.41173362731933594, "learning_rate": 0.0001, "loss": 1.7494, "step": 3230 }, { "epoch": 0.3753703165843741, "grad_norm": 0.37613826990127563, "learning_rate": 0.0001, "loss": 1.5564, "step": 3231 }, { "epoch": 0.3754864943363346, "grad_norm": 0.4120997190475464, "learning_rate": 0.0001, "loss": 1.8258, "step": 3232 }, { "epoch": 0.37560267208829506, "grad_norm": 0.4014083743095398, "learning_rate": 0.0001, "loss": 1.7189, "step": 3233 }, { "epoch": 0.3757188498402556, "grad_norm": 0.38191670179367065, "learning_rate": 0.0001, "loss": 1.5884, "step": 3234 }, { "epoch": 0.3758350275922161, "grad_norm": 0.3927380442619324, "learning_rate": 0.0001, "loss": 1.7031, "step": 3235 }, { "epoch": 0.3759512053441766, "grad_norm": 0.39120250940322876, "learning_rate": 0.0001, "loss": 1.5805, "step": 3236 }, { "epoch": 0.3760673830961371, "grad_norm": 0.36845624446868896, "learning_rate": 0.0001, "loss": 1.6001, "step": 3237 }, { "epoch": 0.37618356084809756, "grad_norm": 0.39836472272872925, "learning_rate": 0.0001, "loss": 1.7282, "step": 3238 }, { "epoch": 0.3762997386000581, "grad_norm": 0.3965427279472351, "learning_rate": 0.0001, "loss": 1.6275, "step": 3239 }, { "epoch": 0.3764159163520186, "grad_norm": 0.4197937846183777, "learning_rate": 0.0001, "loss": 1.6893, "step": 3240 }, { "epoch": 0.3765320941039791, "grad_norm": 0.42778176069259644, "learning_rate": 0.0001, "loss": 1.7027, "step": 3241 }, { "epoch": 0.3766482718559396, "grad_norm": 0.3915843665599823, "learning_rate": 0.0001, "loss": 1.723, "step": 3242 }, { "epoch": 0.37676444960790006, "grad_norm": 0.401483416557312, "learning_rate": 0.0001, "loss": 1.573, "step": 3243 }, { "epoch": 0.3768806273598606, "grad_norm": 0.4040675163269043, "learning_rate": 0.0001, "loss": 1.5794, "step": 3244 }, { "epoch": 0.3769968051118211, "grad_norm": 0.39297956228256226, "learning_rate": 0.0001, "loss": 1.6027, "step": 3245 }, { "epoch": 0.3771129828637816, "grad_norm": 0.3960307240486145, "learning_rate": 0.0001, "loss": 1.5524, "step": 3246 }, { "epoch": 0.3772291606157421, "grad_norm": 0.4098290205001831, "learning_rate": 0.0001, "loss": 1.7013, "step": 3247 }, { "epoch": 0.37734533836770257, "grad_norm": 0.4199242889881134, "learning_rate": 0.0001, "loss": 1.6731, "step": 3248 }, { "epoch": 0.3774615161196631, "grad_norm": 0.4626270830631256, "learning_rate": 0.0001, "loss": 1.9117, "step": 3249 }, { "epoch": 0.3775776938716236, "grad_norm": 0.3856443464756012, "learning_rate": 0.0001, "loss": 1.4409, "step": 3250 }, { "epoch": 0.3776938716235841, "grad_norm": 0.3848719000816345, "learning_rate": 0.0001, "loss": 1.6576, "step": 3251 }, { "epoch": 0.3778100493755446, "grad_norm": 0.4036993384361267, "learning_rate": 0.0001, "loss": 1.8056, "step": 3252 }, { "epoch": 0.37792622712750507, "grad_norm": 0.42733898758888245, "learning_rate": 0.0001, "loss": 1.73, "step": 3253 }, { "epoch": 0.3780424048794656, "grad_norm": 0.3956359922885895, "learning_rate": 0.0001, "loss": 1.7278, "step": 3254 }, { "epoch": 0.3781585826314261, "grad_norm": 0.391928106546402, "learning_rate": 0.0001, "loss": 1.5629, "step": 3255 }, { "epoch": 0.3782747603833866, "grad_norm": 0.4201536774635315, "learning_rate": 0.0001, "loss": 1.8002, "step": 3256 }, { "epoch": 0.3783909381353471, "grad_norm": 0.41449272632598877, "learning_rate": 0.0001, "loss": 1.6748, "step": 3257 }, { "epoch": 0.37850711588730757, "grad_norm": 0.4059070944786072, "learning_rate": 0.0001, "loss": 1.6892, "step": 3258 }, { "epoch": 0.37862329363926805, "grad_norm": 0.40105417370796204, "learning_rate": 0.0001, "loss": 1.6817, "step": 3259 }, { "epoch": 0.3787394713912286, "grad_norm": 0.4251152276992798, "learning_rate": 0.0001, "loss": 1.6597, "step": 3260 }, { "epoch": 0.3788556491431891, "grad_norm": 0.3787403702735901, "learning_rate": 0.0001, "loss": 1.6152, "step": 3261 }, { "epoch": 0.3789718268951496, "grad_norm": 0.39760053157806396, "learning_rate": 0.0001, "loss": 1.7866, "step": 3262 }, { "epoch": 0.37908800464711007, "grad_norm": 0.3865306079387665, "learning_rate": 0.0001, "loss": 1.5825, "step": 3263 }, { "epoch": 0.37920418239907056, "grad_norm": 0.40044572949409485, "learning_rate": 0.0001, "loss": 1.7403, "step": 3264 }, { "epoch": 0.3793203601510311, "grad_norm": 0.36181673407554626, "learning_rate": 0.0001, "loss": 1.5654, "step": 3265 }, { "epoch": 0.3794365379029916, "grad_norm": 0.4151827096939087, "learning_rate": 0.0001, "loss": 1.7351, "step": 3266 }, { "epoch": 0.3795527156549521, "grad_norm": 0.3959139287471771, "learning_rate": 0.0001, "loss": 1.5862, "step": 3267 }, { "epoch": 0.37966889340691257, "grad_norm": 0.40265771746635437, "learning_rate": 0.0001, "loss": 1.7255, "step": 3268 }, { "epoch": 0.37978507115887306, "grad_norm": 0.39795050024986267, "learning_rate": 0.0001, "loss": 1.6619, "step": 3269 }, { "epoch": 0.3799012489108336, "grad_norm": 0.38326337933540344, "learning_rate": 0.0001, "loss": 1.6536, "step": 3270 }, { "epoch": 0.3800174266627941, "grad_norm": 0.4339217245578766, "learning_rate": 0.0001, "loss": 1.7723, "step": 3271 }, { "epoch": 0.3801336044147546, "grad_norm": 0.391989141702652, "learning_rate": 0.0001, "loss": 1.5652, "step": 3272 }, { "epoch": 0.38024978216671507, "grad_norm": 0.3686724901199341, "learning_rate": 0.0001, "loss": 1.4674, "step": 3273 }, { "epoch": 0.38036595991867556, "grad_norm": 0.3889879584312439, "learning_rate": 0.0001, "loss": 1.6417, "step": 3274 }, { "epoch": 0.3804821376706361, "grad_norm": 0.37887483835220337, "learning_rate": 0.0001, "loss": 1.64, "step": 3275 }, { "epoch": 0.3805983154225966, "grad_norm": 0.3940137028694153, "learning_rate": 0.0001, "loss": 1.569, "step": 3276 }, { "epoch": 0.3807144931745571, "grad_norm": 0.38315024971961975, "learning_rate": 0.0001, "loss": 1.654, "step": 3277 }, { "epoch": 0.38083067092651757, "grad_norm": 0.43694132566452026, "learning_rate": 0.0001, "loss": 1.61, "step": 3278 }, { "epoch": 0.38094684867847806, "grad_norm": 0.37439560890197754, "learning_rate": 0.0001, "loss": 1.4972, "step": 3279 }, { "epoch": 0.38106302643043855, "grad_norm": 0.39639008045196533, "learning_rate": 0.0001, "loss": 1.5886, "step": 3280 }, { "epoch": 0.3811792041823991, "grad_norm": 0.41483819484710693, "learning_rate": 0.0001, "loss": 1.7358, "step": 3281 }, { "epoch": 0.3812953819343596, "grad_norm": 0.38614705204963684, "learning_rate": 0.0001, "loss": 1.884, "step": 3282 }, { "epoch": 0.38141155968632007, "grad_norm": 0.4163734018802643, "learning_rate": 0.0001, "loss": 1.7154, "step": 3283 }, { "epoch": 0.38152773743828056, "grad_norm": 0.3871447741985321, "learning_rate": 0.0001, "loss": 1.8271, "step": 3284 }, { "epoch": 0.38164391519024105, "grad_norm": 0.4444115161895752, "learning_rate": 0.0001, "loss": 1.7751, "step": 3285 }, { "epoch": 0.3817600929422016, "grad_norm": 0.3978256285190582, "learning_rate": 0.0001, "loss": 1.7136, "step": 3286 }, { "epoch": 0.3818762706941621, "grad_norm": 0.37365588545799255, "learning_rate": 0.0001, "loss": 1.55, "step": 3287 }, { "epoch": 0.38199244844612257, "grad_norm": 0.39886194467544556, "learning_rate": 0.0001, "loss": 1.7383, "step": 3288 }, { "epoch": 0.38210862619808306, "grad_norm": 0.4288283586502075, "learning_rate": 0.0001, "loss": 1.7196, "step": 3289 }, { "epoch": 0.38222480395004355, "grad_norm": 0.40688732266426086, "learning_rate": 0.0001, "loss": 1.8548, "step": 3290 }, { "epoch": 0.3823409817020041, "grad_norm": 0.3931783437728882, "learning_rate": 0.0001, "loss": 1.7435, "step": 3291 }, { "epoch": 0.3824571594539646, "grad_norm": 0.41494035720825195, "learning_rate": 0.0001, "loss": 1.7333, "step": 3292 }, { "epoch": 0.38257333720592507, "grad_norm": 0.43234965205192566, "learning_rate": 0.0001, "loss": 1.6722, "step": 3293 }, { "epoch": 0.38268951495788556, "grad_norm": 0.3797743022441864, "learning_rate": 0.0001, "loss": 1.6612, "step": 3294 }, { "epoch": 0.38280569270984605, "grad_norm": 0.39665845036506653, "learning_rate": 0.0001, "loss": 1.6871, "step": 3295 }, { "epoch": 0.3829218704618066, "grad_norm": 0.430963933467865, "learning_rate": 0.0001, "loss": 1.757, "step": 3296 }, { "epoch": 0.3830380482137671, "grad_norm": 0.3795839250087738, "learning_rate": 0.0001, "loss": 1.727, "step": 3297 }, { "epoch": 0.38315422596572757, "grad_norm": 0.3861173987388611, "learning_rate": 0.0001, "loss": 1.5847, "step": 3298 }, { "epoch": 0.38327040371768806, "grad_norm": 0.4656057357788086, "learning_rate": 0.0001, "loss": 1.7191, "step": 3299 }, { "epoch": 0.38338658146964855, "grad_norm": 0.42121168971061707, "learning_rate": 0.0001, "loss": 1.7395, "step": 3300 }, { "epoch": 0.38350275922160904, "grad_norm": 0.4215461313724518, "learning_rate": 0.0001, "loss": 1.7149, "step": 3301 }, { "epoch": 0.3836189369735696, "grad_norm": 0.3778843581676483, "learning_rate": 0.0001, "loss": 1.6032, "step": 3302 }, { "epoch": 0.38373511472553007, "grad_norm": 0.372529000043869, "learning_rate": 0.0001, "loss": 1.6222, "step": 3303 }, { "epoch": 0.38385129247749056, "grad_norm": 0.3816990852355957, "learning_rate": 0.0001, "loss": 1.6851, "step": 3304 }, { "epoch": 0.38396747022945105, "grad_norm": 0.3976283669471741, "learning_rate": 0.0001, "loss": 1.7161, "step": 3305 }, { "epoch": 0.38408364798141154, "grad_norm": 0.38981184363365173, "learning_rate": 0.0001, "loss": 1.7524, "step": 3306 }, { "epoch": 0.3841998257333721, "grad_norm": 0.4135308265686035, "learning_rate": 0.0001, "loss": 1.5426, "step": 3307 }, { "epoch": 0.38431600348533257, "grad_norm": 0.39930155873298645, "learning_rate": 0.0001, "loss": 1.785, "step": 3308 }, { "epoch": 0.38443218123729306, "grad_norm": 0.36703500151634216, "learning_rate": 0.0001, "loss": 1.4164, "step": 3309 }, { "epoch": 0.38454835898925355, "grad_norm": 0.396085262298584, "learning_rate": 0.0001, "loss": 1.6961, "step": 3310 }, { "epoch": 0.38466453674121404, "grad_norm": 0.4066247045993805, "learning_rate": 0.0001, "loss": 1.6314, "step": 3311 }, { "epoch": 0.3847807144931746, "grad_norm": 0.4137192368507385, "learning_rate": 0.0001, "loss": 1.6474, "step": 3312 }, { "epoch": 0.38489689224513507, "grad_norm": 0.44450923800468445, "learning_rate": 0.0001, "loss": 1.713, "step": 3313 }, { "epoch": 0.38501306999709556, "grad_norm": 0.4225256145000458, "learning_rate": 0.0001, "loss": 1.5993, "step": 3314 }, { "epoch": 0.38512924774905605, "grad_norm": 0.37783291935920715, "learning_rate": 0.0001, "loss": 1.563, "step": 3315 }, { "epoch": 0.38524542550101654, "grad_norm": 0.3956874907016754, "learning_rate": 0.0001, "loss": 1.6478, "step": 3316 }, { "epoch": 0.3853616032529771, "grad_norm": 0.39644670486450195, "learning_rate": 0.0001, "loss": 1.631, "step": 3317 }, { "epoch": 0.38547778100493757, "grad_norm": 0.4001430869102478, "learning_rate": 0.0001, "loss": 1.5372, "step": 3318 }, { "epoch": 0.38559395875689806, "grad_norm": 0.42300862073898315, "learning_rate": 0.0001, "loss": 1.65, "step": 3319 }, { "epoch": 0.38571013650885855, "grad_norm": 0.4075738191604614, "learning_rate": 0.0001, "loss": 1.5891, "step": 3320 }, { "epoch": 0.38582631426081904, "grad_norm": 0.40944305062294006, "learning_rate": 0.0001, "loss": 1.6537, "step": 3321 }, { "epoch": 0.3859424920127795, "grad_norm": 0.41256004571914673, "learning_rate": 0.0001, "loss": 1.7812, "step": 3322 }, { "epoch": 0.38605866976474007, "grad_norm": 0.45253586769104004, "learning_rate": 0.0001, "loss": 1.9485, "step": 3323 }, { "epoch": 0.38617484751670056, "grad_norm": 0.3973802626132965, "learning_rate": 0.0001, "loss": 1.7537, "step": 3324 }, { "epoch": 0.38629102526866105, "grad_norm": 0.3796943724155426, "learning_rate": 0.0001, "loss": 1.6582, "step": 3325 }, { "epoch": 0.38640720302062154, "grad_norm": 0.38597339391708374, "learning_rate": 0.0001, "loss": 1.7241, "step": 3326 }, { "epoch": 0.386523380772582, "grad_norm": 0.4044647514820099, "learning_rate": 0.0001, "loss": 1.6084, "step": 3327 }, { "epoch": 0.38663955852454257, "grad_norm": 0.38887619972229004, "learning_rate": 0.0001, "loss": 1.6295, "step": 3328 }, { "epoch": 0.38675573627650306, "grad_norm": 0.3996337354183197, "learning_rate": 0.0001, "loss": 1.6442, "step": 3329 }, { "epoch": 0.38687191402846355, "grad_norm": 0.3903794288635254, "learning_rate": 0.0001, "loss": 1.6134, "step": 3330 }, { "epoch": 0.38698809178042404, "grad_norm": 0.41847532987594604, "learning_rate": 0.0001, "loss": 1.7084, "step": 3331 }, { "epoch": 0.3871042695323845, "grad_norm": 0.3963734805583954, "learning_rate": 0.0001, "loss": 1.646, "step": 3332 }, { "epoch": 0.38722044728434507, "grad_norm": 0.3818768858909607, "learning_rate": 0.0001, "loss": 1.6953, "step": 3333 }, { "epoch": 0.38733662503630556, "grad_norm": 0.4042767286300659, "learning_rate": 0.0001, "loss": 1.6731, "step": 3334 }, { "epoch": 0.38745280278826605, "grad_norm": 0.3764187693595886, "learning_rate": 0.0001, "loss": 1.6663, "step": 3335 }, { "epoch": 0.38756898054022654, "grad_norm": 0.41739174723625183, "learning_rate": 0.0001, "loss": 1.771, "step": 3336 }, { "epoch": 0.387685158292187, "grad_norm": 0.3916127383708954, "learning_rate": 0.0001, "loss": 1.5899, "step": 3337 }, { "epoch": 0.38780133604414757, "grad_norm": 0.3758176863193512, "learning_rate": 0.0001, "loss": 1.5736, "step": 3338 }, { "epoch": 0.38791751379610806, "grad_norm": 0.37665247917175293, "learning_rate": 0.0001, "loss": 1.5968, "step": 3339 }, { "epoch": 0.38803369154806855, "grad_norm": 0.38842839002609253, "learning_rate": 0.0001, "loss": 1.6628, "step": 3340 }, { "epoch": 0.38814986930002904, "grad_norm": 0.4044833183288574, "learning_rate": 0.0001, "loss": 1.7784, "step": 3341 }, { "epoch": 0.3882660470519895, "grad_norm": 0.3983111083507538, "learning_rate": 0.0001, "loss": 1.5932, "step": 3342 }, { "epoch": 0.38838222480395007, "grad_norm": 0.3763045072555542, "learning_rate": 0.0001, "loss": 1.6841, "step": 3343 }, { "epoch": 0.38849840255591056, "grad_norm": 0.43065011501312256, "learning_rate": 0.0001, "loss": 1.7841, "step": 3344 }, { "epoch": 0.38861458030787105, "grad_norm": 0.3971543312072754, "learning_rate": 0.0001, "loss": 1.7052, "step": 3345 }, { "epoch": 0.38873075805983154, "grad_norm": 0.4392778277397156, "learning_rate": 0.0001, "loss": 1.7309, "step": 3346 }, { "epoch": 0.388846935811792, "grad_norm": 0.41214969754219055, "learning_rate": 0.0001, "loss": 1.6677, "step": 3347 }, { "epoch": 0.3889631135637525, "grad_norm": 0.3774077296257019, "learning_rate": 0.0001, "loss": 1.594, "step": 3348 }, { "epoch": 0.38907929131571306, "grad_norm": 0.4119464159011841, "learning_rate": 0.0001, "loss": 1.7172, "step": 3349 }, { "epoch": 0.38919546906767355, "grad_norm": 0.4135492146015167, "learning_rate": 0.0001, "loss": 1.7139, "step": 3350 }, { "epoch": 0.38931164681963404, "grad_norm": 0.41081520915031433, "learning_rate": 0.0001, "loss": 1.8803, "step": 3351 }, { "epoch": 0.3894278245715945, "grad_norm": 0.4038920998573303, "learning_rate": 0.0001, "loss": 1.8064, "step": 3352 }, { "epoch": 0.389544002323555, "grad_norm": 0.4117118716239929, "learning_rate": 0.0001, "loss": 1.6438, "step": 3353 }, { "epoch": 0.38966018007551556, "grad_norm": 0.3965054452419281, "learning_rate": 0.0001, "loss": 1.7807, "step": 3354 }, { "epoch": 0.38977635782747605, "grad_norm": 0.3699134886264801, "learning_rate": 0.0001, "loss": 1.6003, "step": 3355 }, { "epoch": 0.38989253557943654, "grad_norm": 0.3890858590602875, "learning_rate": 0.0001, "loss": 1.6699, "step": 3356 }, { "epoch": 0.39000871333139703, "grad_norm": 0.3924883008003235, "learning_rate": 0.0001, "loss": 1.6579, "step": 3357 }, { "epoch": 0.3901248910833575, "grad_norm": 0.40697407722473145, "learning_rate": 0.0001, "loss": 1.743, "step": 3358 }, { "epoch": 0.39024106883531806, "grad_norm": 0.3949110507965088, "learning_rate": 0.0001, "loss": 1.7615, "step": 3359 }, { "epoch": 0.39035724658727855, "grad_norm": 0.4023362100124359, "learning_rate": 0.0001, "loss": 1.7459, "step": 3360 }, { "epoch": 0.39047342433923904, "grad_norm": 0.3989759385585785, "learning_rate": 0.0001, "loss": 1.6347, "step": 3361 }, { "epoch": 0.39058960209119953, "grad_norm": 0.3800547420978546, "learning_rate": 0.0001, "loss": 1.5109, "step": 3362 }, { "epoch": 0.39070577984316, "grad_norm": 0.408544659614563, "learning_rate": 0.0001, "loss": 1.7846, "step": 3363 }, { "epoch": 0.39082195759512056, "grad_norm": 0.41592341661453247, "learning_rate": 0.0001, "loss": 1.6665, "step": 3364 }, { "epoch": 0.39093813534708105, "grad_norm": 0.4047764539718628, "learning_rate": 0.0001, "loss": 1.88, "step": 3365 }, { "epoch": 0.39105431309904154, "grad_norm": 0.3952164649963379, "learning_rate": 0.0001, "loss": 1.5757, "step": 3366 }, { "epoch": 0.39117049085100203, "grad_norm": 0.39583536982536316, "learning_rate": 0.0001, "loss": 1.5236, "step": 3367 }, { "epoch": 0.3912866686029625, "grad_norm": 0.40658921003341675, "learning_rate": 0.0001, "loss": 1.6708, "step": 3368 }, { "epoch": 0.391402846354923, "grad_norm": 0.42151522636413574, "learning_rate": 0.0001, "loss": 1.7088, "step": 3369 }, { "epoch": 0.39151902410688355, "grad_norm": 0.42344412207603455, "learning_rate": 0.0001, "loss": 1.7888, "step": 3370 }, { "epoch": 0.39163520185884404, "grad_norm": 0.3720431625843048, "learning_rate": 0.0001, "loss": 1.6304, "step": 3371 }, { "epoch": 0.39175137961080453, "grad_norm": 0.39411425590515137, "learning_rate": 0.0001, "loss": 1.7272, "step": 3372 }, { "epoch": 0.391867557362765, "grad_norm": 0.3884545862674713, "learning_rate": 0.0001, "loss": 1.6546, "step": 3373 }, { "epoch": 0.3919837351147255, "grad_norm": 0.4163500666618347, "learning_rate": 0.0001, "loss": 1.7583, "step": 3374 }, { "epoch": 0.39209991286668605, "grad_norm": 0.38707318902015686, "learning_rate": 0.0001, "loss": 1.6019, "step": 3375 }, { "epoch": 0.39221609061864654, "grad_norm": 0.4354010820388794, "learning_rate": 0.0001, "loss": 1.5799, "step": 3376 }, { "epoch": 0.39233226837060703, "grad_norm": 0.39656829833984375, "learning_rate": 0.0001, "loss": 1.6507, "step": 3377 }, { "epoch": 0.3924484461225675, "grad_norm": 0.39710286259651184, "learning_rate": 0.0001, "loss": 1.6391, "step": 3378 }, { "epoch": 0.392564623874528, "grad_norm": 0.40320920944213867, "learning_rate": 0.0001, "loss": 1.7816, "step": 3379 }, { "epoch": 0.39268080162648855, "grad_norm": 0.40849626064300537, "learning_rate": 0.0001, "loss": 1.5617, "step": 3380 }, { "epoch": 0.39279697937844904, "grad_norm": 0.37604308128356934, "learning_rate": 0.0001, "loss": 1.6333, "step": 3381 }, { "epoch": 0.39291315713040953, "grad_norm": 0.38765400648117065, "learning_rate": 0.0001, "loss": 1.6481, "step": 3382 }, { "epoch": 0.39302933488237, "grad_norm": 0.38033145666122437, "learning_rate": 0.0001, "loss": 1.7555, "step": 3383 }, { "epoch": 0.3931455126343305, "grad_norm": 0.39232638478279114, "learning_rate": 0.0001, "loss": 1.538, "step": 3384 }, { "epoch": 0.39326169038629105, "grad_norm": 0.41576313972473145, "learning_rate": 0.0001, "loss": 1.6984, "step": 3385 }, { "epoch": 0.39337786813825154, "grad_norm": 0.40268710255622864, "learning_rate": 0.0001, "loss": 1.7465, "step": 3386 }, { "epoch": 0.39349404589021203, "grad_norm": 0.3708171546459198, "learning_rate": 0.0001, "loss": 1.5194, "step": 3387 }, { "epoch": 0.3936102236421725, "grad_norm": 0.3807376027107239, "learning_rate": 0.0001, "loss": 1.5313, "step": 3388 }, { "epoch": 0.393726401394133, "grad_norm": 0.39858368039131165, "learning_rate": 0.0001, "loss": 1.7221, "step": 3389 }, { "epoch": 0.3938425791460935, "grad_norm": 0.3780364692211151, "learning_rate": 0.0001, "loss": 1.7133, "step": 3390 }, { "epoch": 0.39395875689805404, "grad_norm": 0.39685890078544617, "learning_rate": 0.0001, "loss": 1.7051, "step": 3391 }, { "epoch": 0.39407493465001453, "grad_norm": 0.4291624128818512, "learning_rate": 0.0001, "loss": 1.8533, "step": 3392 }, { "epoch": 0.394191112401975, "grad_norm": 0.366272509098053, "learning_rate": 0.0001, "loss": 1.4734, "step": 3393 }, { "epoch": 0.3943072901539355, "grad_norm": 0.3910382091999054, "learning_rate": 0.0001, "loss": 1.6785, "step": 3394 }, { "epoch": 0.394423467905896, "grad_norm": 0.4170825481414795, "learning_rate": 0.0001, "loss": 1.684, "step": 3395 }, { "epoch": 0.39453964565785654, "grad_norm": 0.3881252706050873, "learning_rate": 0.0001, "loss": 1.7354, "step": 3396 }, { "epoch": 0.39465582340981703, "grad_norm": 0.3868289291858673, "learning_rate": 0.0001, "loss": 1.619, "step": 3397 }, { "epoch": 0.3947720011617775, "grad_norm": 0.38081884384155273, "learning_rate": 0.0001, "loss": 1.6079, "step": 3398 }, { "epoch": 0.394888178913738, "grad_norm": 0.39945870637893677, "learning_rate": 0.0001, "loss": 1.5884, "step": 3399 }, { "epoch": 0.3950043566656985, "grad_norm": 0.4090173840522766, "learning_rate": 0.0001, "loss": 1.7628, "step": 3400 }, { "epoch": 0.39512053441765904, "grad_norm": 0.3833993971347809, "learning_rate": 0.0001, "loss": 1.4581, "step": 3401 }, { "epoch": 0.39523671216961953, "grad_norm": 0.4201992154121399, "learning_rate": 0.0001, "loss": 1.713, "step": 3402 }, { "epoch": 0.39535288992158, "grad_norm": 0.4019543528556824, "learning_rate": 0.0001, "loss": 1.6119, "step": 3403 }, { "epoch": 0.3954690676735405, "grad_norm": 0.3776283860206604, "learning_rate": 0.0001, "loss": 1.5757, "step": 3404 }, { "epoch": 0.395585245425501, "grad_norm": 0.38334932923316956, "learning_rate": 0.0001, "loss": 1.3783, "step": 3405 }, { "epoch": 0.39570142317746154, "grad_norm": 0.3982570767402649, "learning_rate": 0.0001, "loss": 1.6886, "step": 3406 }, { "epoch": 0.39581760092942203, "grad_norm": 0.3854517340660095, "learning_rate": 0.0001, "loss": 1.708, "step": 3407 }, { "epoch": 0.3959337786813825, "grad_norm": 0.3867027461528778, "learning_rate": 0.0001, "loss": 1.6259, "step": 3408 }, { "epoch": 0.396049956433343, "grad_norm": 0.39494284987449646, "learning_rate": 0.0001, "loss": 1.5789, "step": 3409 }, { "epoch": 0.3961661341853035, "grad_norm": 0.40102261304855347, "learning_rate": 0.0001, "loss": 1.6244, "step": 3410 }, { "epoch": 0.396282311937264, "grad_norm": 0.38136184215545654, "learning_rate": 0.0001, "loss": 1.5714, "step": 3411 }, { "epoch": 0.39639848968922453, "grad_norm": 0.40026000142097473, "learning_rate": 0.0001, "loss": 1.5351, "step": 3412 }, { "epoch": 0.396514667441185, "grad_norm": 0.4199739694595337, "learning_rate": 0.0001, "loss": 1.6364, "step": 3413 }, { "epoch": 0.3966308451931455, "grad_norm": 0.4212833344936371, "learning_rate": 0.0001, "loss": 1.6712, "step": 3414 }, { "epoch": 0.396747022945106, "grad_norm": 0.39813485741615295, "learning_rate": 0.0001, "loss": 1.4715, "step": 3415 }, { "epoch": 0.3968632006970665, "grad_norm": 0.40672633051872253, "learning_rate": 0.0001, "loss": 1.7394, "step": 3416 }, { "epoch": 0.39697937844902703, "grad_norm": 0.42593738436698914, "learning_rate": 0.0001, "loss": 1.8138, "step": 3417 }, { "epoch": 0.3970955562009875, "grad_norm": 0.39474889636039734, "learning_rate": 0.0001, "loss": 1.5933, "step": 3418 }, { "epoch": 0.397211733952948, "grad_norm": 0.4089927077293396, "learning_rate": 0.0001, "loss": 1.5349, "step": 3419 }, { "epoch": 0.3973279117049085, "grad_norm": 0.3997913897037506, "learning_rate": 0.0001, "loss": 1.8294, "step": 3420 }, { "epoch": 0.397444089456869, "grad_norm": 0.46322256326675415, "learning_rate": 0.0001, "loss": 1.7424, "step": 3421 }, { "epoch": 0.39756026720882953, "grad_norm": 0.3604629337787628, "learning_rate": 0.0001, "loss": 1.4269, "step": 3422 }, { "epoch": 0.39767644496079, "grad_norm": 0.3815920352935791, "learning_rate": 0.0001, "loss": 1.4519, "step": 3423 }, { "epoch": 0.3977926227127505, "grad_norm": 0.37603339552879333, "learning_rate": 0.0001, "loss": 1.5044, "step": 3424 }, { "epoch": 0.397908800464711, "grad_norm": 0.4085451364517212, "learning_rate": 0.0001, "loss": 1.7673, "step": 3425 }, { "epoch": 0.3980249782166715, "grad_norm": 0.46148502826690674, "learning_rate": 0.0001, "loss": 1.9431, "step": 3426 }, { "epoch": 0.39814115596863203, "grad_norm": 0.3642321527004242, "learning_rate": 0.0001, "loss": 1.3867, "step": 3427 }, { "epoch": 0.3982573337205925, "grad_norm": 0.39448490738868713, "learning_rate": 0.0001, "loss": 1.5879, "step": 3428 }, { "epoch": 0.398373511472553, "grad_norm": 0.37852832674980164, "learning_rate": 0.0001, "loss": 1.5359, "step": 3429 }, { "epoch": 0.3984896892245135, "grad_norm": 0.44772058725357056, "learning_rate": 0.0001, "loss": 1.7785, "step": 3430 }, { "epoch": 0.398605866976474, "grad_norm": 0.39221101999282837, "learning_rate": 0.0001, "loss": 1.689, "step": 3431 }, { "epoch": 0.3987220447284345, "grad_norm": 0.38119301199913025, "learning_rate": 0.0001, "loss": 1.6269, "step": 3432 }, { "epoch": 0.398838222480395, "grad_norm": 0.4090498685836792, "learning_rate": 0.0001, "loss": 1.6914, "step": 3433 }, { "epoch": 0.3989544002323555, "grad_norm": 0.37487149238586426, "learning_rate": 0.0001, "loss": 1.4476, "step": 3434 }, { "epoch": 0.399070577984316, "grad_norm": 0.39408043026924133, "learning_rate": 0.0001, "loss": 1.5866, "step": 3435 }, { "epoch": 0.3991867557362765, "grad_norm": 0.38384729623794556, "learning_rate": 0.0001, "loss": 1.627, "step": 3436 }, { "epoch": 0.399302933488237, "grad_norm": 0.4172252416610718, "learning_rate": 0.0001, "loss": 1.6085, "step": 3437 }, { "epoch": 0.3994191112401975, "grad_norm": 0.3903944790363312, "learning_rate": 0.0001, "loss": 1.6583, "step": 3438 }, { "epoch": 0.399535288992158, "grad_norm": 0.3812950849533081, "learning_rate": 0.0001, "loss": 1.6213, "step": 3439 }, { "epoch": 0.3996514667441185, "grad_norm": 0.4021446704864502, "learning_rate": 0.0001, "loss": 1.6177, "step": 3440 }, { "epoch": 0.399767644496079, "grad_norm": 0.3971196711063385, "learning_rate": 0.0001, "loss": 1.7095, "step": 3441 }, { "epoch": 0.3998838222480395, "grad_norm": 0.40641874074935913, "learning_rate": 0.0001, "loss": 1.572, "step": 3442 }, { "epoch": 0.4, "grad_norm": 0.41259273886680603, "learning_rate": 0.0001, "loss": 1.742, "step": 3443 }, { "epoch": 0.4001161777519605, "grad_norm": 0.35140666365623474, "learning_rate": 0.0001, "loss": 1.4164, "step": 3444 }, { "epoch": 0.400232355503921, "grad_norm": 0.3907198905944824, "learning_rate": 0.0001, "loss": 1.5896, "step": 3445 }, { "epoch": 0.4003485332558815, "grad_norm": 0.41215452551841736, "learning_rate": 0.0001, "loss": 1.5177, "step": 3446 }, { "epoch": 0.400464711007842, "grad_norm": 0.41600051522254944, "learning_rate": 0.0001, "loss": 1.9322, "step": 3447 }, { "epoch": 0.4005808887598025, "grad_norm": 0.4017678201198578, "learning_rate": 0.0001, "loss": 1.6239, "step": 3448 }, { "epoch": 0.400697066511763, "grad_norm": 0.4211594760417938, "learning_rate": 0.0001, "loss": 1.6339, "step": 3449 }, { "epoch": 0.4008132442637235, "grad_norm": 0.4117183983325958, "learning_rate": 0.0001, "loss": 1.8139, "step": 3450 }, { "epoch": 0.400929422015684, "grad_norm": 0.39731109142303467, "learning_rate": 0.0001, "loss": 1.6456, "step": 3451 }, { "epoch": 0.4010455997676445, "grad_norm": 0.3891085088253021, "learning_rate": 0.0001, "loss": 1.6854, "step": 3452 }, { "epoch": 0.401161777519605, "grad_norm": 0.3980685770511627, "learning_rate": 0.0001, "loss": 1.7527, "step": 3453 }, { "epoch": 0.4012779552715655, "grad_norm": 0.43081504106521606, "learning_rate": 0.0001, "loss": 1.7178, "step": 3454 }, { "epoch": 0.401394133023526, "grad_norm": 0.38269612193107605, "learning_rate": 0.0001, "loss": 1.6664, "step": 3455 }, { "epoch": 0.4015103107754865, "grad_norm": 0.4062149226665497, "learning_rate": 0.0001, "loss": 1.5922, "step": 3456 }, { "epoch": 0.401626488527447, "grad_norm": 0.38731974363327026, "learning_rate": 0.0001, "loss": 1.6712, "step": 3457 }, { "epoch": 0.40174266627940747, "grad_norm": 0.4293443560600281, "learning_rate": 0.0001, "loss": 1.8078, "step": 3458 }, { "epoch": 0.401858844031368, "grad_norm": 0.3938728868961334, "learning_rate": 0.0001, "loss": 1.5877, "step": 3459 }, { "epoch": 0.4019750217833285, "grad_norm": 0.41334471106529236, "learning_rate": 0.0001, "loss": 1.6082, "step": 3460 }, { "epoch": 0.402091199535289, "grad_norm": 0.39084866642951965, "learning_rate": 0.0001, "loss": 1.6759, "step": 3461 }, { "epoch": 0.4022073772872495, "grad_norm": 0.4102165400981903, "learning_rate": 0.0001, "loss": 1.6758, "step": 3462 }, { "epoch": 0.40232355503920997, "grad_norm": 0.4234514534473419, "learning_rate": 0.0001, "loss": 1.7546, "step": 3463 }, { "epoch": 0.4024397327911705, "grad_norm": 0.38048434257507324, "learning_rate": 0.0001, "loss": 1.551, "step": 3464 }, { "epoch": 0.402555910543131, "grad_norm": 0.39564260840415955, "learning_rate": 0.0001, "loss": 1.7084, "step": 3465 }, { "epoch": 0.4026720882950915, "grad_norm": 0.40055975317955017, "learning_rate": 0.0001, "loss": 1.7346, "step": 3466 }, { "epoch": 0.402788266047052, "grad_norm": 0.424216628074646, "learning_rate": 0.0001, "loss": 1.713, "step": 3467 }, { "epoch": 0.40290444379901247, "grad_norm": 0.38856959342956543, "learning_rate": 0.0001, "loss": 1.6781, "step": 3468 }, { "epoch": 0.403020621550973, "grad_norm": 0.4526919424533844, "learning_rate": 0.0001, "loss": 1.7348, "step": 3469 }, { "epoch": 0.4031367993029335, "grad_norm": 0.4121178686618805, "learning_rate": 0.0001, "loss": 1.634, "step": 3470 }, { "epoch": 0.403252977054894, "grad_norm": 0.41178447008132935, "learning_rate": 0.0001, "loss": 1.6574, "step": 3471 }, { "epoch": 0.4033691548068545, "grad_norm": 0.3908507823944092, "learning_rate": 0.0001, "loss": 1.594, "step": 3472 }, { "epoch": 0.40348533255881497, "grad_norm": 0.4167093336582184, "learning_rate": 0.0001, "loss": 1.7207, "step": 3473 }, { "epoch": 0.4036015103107755, "grad_norm": 0.3978476822376251, "learning_rate": 0.0001, "loss": 1.533, "step": 3474 }, { "epoch": 0.403717688062736, "grad_norm": 0.4014563262462616, "learning_rate": 0.0001, "loss": 1.6501, "step": 3475 }, { "epoch": 0.4038338658146965, "grad_norm": 0.42038458585739136, "learning_rate": 0.0001, "loss": 1.7169, "step": 3476 }, { "epoch": 0.403950043566657, "grad_norm": 0.40541571378707886, "learning_rate": 0.0001, "loss": 1.6219, "step": 3477 }, { "epoch": 0.40406622131861747, "grad_norm": 0.39469483494758606, "learning_rate": 0.0001, "loss": 1.4549, "step": 3478 }, { "epoch": 0.40418239907057796, "grad_norm": 0.38308000564575195, "learning_rate": 0.0001, "loss": 1.502, "step": 3479 }, { "epoch": 0.4042985768225385, "grad_norm": 0.3976595103740692, "learning_rate": 0.0001, "loss": 1.5874, "step": 3480 }, { "epoch": 0.404414754574499, "grad_norm": 0.4284829795360565, "learning_rate": 0.0001, "loss": 1.7053, "step": 3481 }, { "epoch": 0.4045309323264595, "grad_norm": 0.36719101667404175, "learning_rate": 0.0001, "loss": 1.5308, "step": 3482 }, { "epoch": 0.40464711007841997, "grad_norm": 0.4026053845882416, "learning_rate": 0.0001, "loss": 1.6293, "step": 3483 }, { "epoch": 0.40476328783038046, "grad_norm": 0.40550971031188965, "learning_rate": 0.0001, "loss": 1.6716, "step": 3484 }, { "epoch": 0.404879465582341, "grad_norm": 0.3787073493003845, "learning_rate": 0.0001, "loss": 1.6136, "step": 3485 }, { "epoch": 0.4049956433343015, "grad_norm": 0.38911107182502747, "learning_rate": 0.0001, "loss": 1.4699, "step": 3486 }, { "epoch": 0.405111821086262, "grad_norm": 0.420773446559906, "learning_rate": 0.0001, "loss": 1.6514, "step": 3487 }, { "epoch": 0.40522799883822247, "grad_norm": 0.40293148159980774, "learning_rate": 0.0001, "loss": 1.6972, "step": 3488 }, { "epoch": 0.40534417659018296, "grad_norm": 0.40412238240242004, "learning_rate": 0.0001, "loss": 1.4579, "step": 3489 }, { "epoch": 0.4054603543421435, "grad_norm": 0.42694681882858276, "learning_rate": 0.0001, "loss": 1.6944, "step": 3490 }, { "epoch": 0.405576532094104, "grad_norm": 0.3828969895839691, "learning_rate": 0.0001, "loss": 1.6205, "step": 3491 }, { "epoch": 0.4056927098460645, "grad_norm": 0.4304792582988739, "learning_rate": 0.0001, "loss": 1.65, "step": 3492 }, { "epoch": 0.40580888759802497, "grad_norm": 0.40881672501564026, "learning_rate": 0.0001, "loss": 1.6384, "step": 3493 }, { "epoch": 0.40592506534998546, "grad_norm": 0.3886183202266693, "learning_rate": 0.0001, "loss": 1.6737, "step": 3494 }, { "epoch": 0.406041243101946, "grad_norm": 0.3807865083217621, "learning_rate": 0.0001, "loss": 1.6524, "step": 3495 }, { "epoch": 0.4061574208539065, "grad_norm": 0.4547371566295624, "learning_rate": 0.0001, "loss": 2.0104, "step": 3496 }, { "epoch": 0.406273598605867, "grad_norm": 0.43122467398643494, "learning_rate": 0.0001, "loss": 1.5135, "step": 3497 }, { "epoch": 0.40638977635782747, "grad_norm": 0.43641188740730286, "learning_rate": 0.0001, "loss": 1.7342, "step": 3498 }, { "epoch": 0.40650595410978796, "grad_norm": 0.38105788826942444, "learning_rate": 0.0001, "loss": 1.5406, "step": 3499 }, { "epoch": 0.40662213186174845, "grad_norm": 0.38278716802597046, "learning_rate": 0.0001, "loss": 1.6807, "step": 3500 }, { "epoch": 0.406738309613709, "grad_norm": 0.4314371645450592, "learning_rate": 0.0001, "loss": 1.6348, "step": 3501 }, { "epoch": 0.4068544873656695, "grad_norm": 0.39707180857658386, "learning_rate": 0.0001, "loss": 1.6498, "step": 3502 }, { "epoch": 0.40697066511762997, "grad_norm": 0.40061885118484497, "learning_rate": 0.0001, "loss": 1.7397, "step": 3503 }, { "epoch": 0.40708684286959046, "grad_norm": 0.39981821179389954, "learning_rate": 0.0001, "loss": 1.7075, "step": 3504 }, { "epoch": 0.40720302062155095, "grad_norm": 0.41869446635246277, "learning_rate": 0.0001, "loss": 1.7211, "step": 3505 }, { "epoch": 0.4073191983735115, "grad_norm": 0.4269830584526062, "learning_rate": 0.0001, "loss": 1.7423, "step": 3506 }, { "epoch": 0.407435376125472, "grad_norm": 0.44403141736984253, "learning_rate": 0.0001, "loss": 1.7895, "step": 3507 }, { "epoch": 0.40755155387743247, "grad_norm": 0.4114527404308319, "learning_rate": 0.0001, "loss": 1.6789, "step": 3508 }, { "epoch": 0.40766773162939296, "grad_norm": 0.40180087089538574, "learning_rate": 0.0001, "loss": 1.7476, "step": 3509 }, { "epoch": 0.40778390938135345, "grad_norm": 0.4387998580932617, "learning_rate": 0.0001, "loss": 1.6723, "step": 3510 }, { "epoch": 0.407900087133314, "grad_norm": 0.43841442465782166, "learning_rate": 0.0001, "loss": 1.7248, "step": 3511 }, { "epoch": 0.4080162648852745, "grad_norm": 0.41502171754837036, "learning_rate": 0.0001, "loss": 1.7085, "step": 3512 }, { "epoch": 0.40813244263723497, "grad_norm": 0.40264812111854553, "learning_rate": 0.0001, "loss": 1.7709, "step": 3513 }, { "epoch": 0.40824862038919546, "grad_norm": 0.39349353313446045, "learning_rate": 0.0001, "loss": 1.6735, "step": 3514 }, { "epoch": 0.40836479814115595, "grad_norm": 0.42213505506515503, "learning_rate": 0.0001, "loss": 1.5739, "step": 3515 }, { "epoch": 0.4084809758931165, "grad_norm": 0.382960706949234, "learning_rate": 0.0001, "loss": 1.6137, "step": 3516 }, { "epoch": 0.408597153645077, "grad_norm": 0.4227701723575592, "learning_rate": 0.0001, "loss": 1.7499, "step": 3517 }, { "epoch": 0.40871333139703747, "grad_norm": 0.4224139451980591, "learning_rate": 0.0001, "loss": 1.6512, "step": 3518 }, { "epoch": 0.40882950914899796, "grad_norm": 0.3891499638557434, "learning_rate": 0.0001, "loss": 1.5613, "step": 3519 }, { "epoch": 0.40894568690095845, "grad_norm": 0.40314075350761414, "learning_rate": 0.0001, "loss": 1.6407, "step": 3520 }, { "epoch": 0.40906186465291894, "grad_norm": 0.3946097791194916, "learning_rate": 0.0001, "loss": 1.5376, "step": 3521 }, { "epoch": 0.4091780424048795, "grad_norm": 0.41073909401893616, "learning_rate": 0.0001, "loss": 1.7195, "step": 3522 }, { "epoch": 0.40929422015683997, "grad_norm": 0.3862104117870331, "learning_rate": 0.0001, "loss": 1.5824, "step": 3523 }, { "epoch": 0.40941039790880046, "grad_norm": 0.38965824246406555, "learning_rate": 0.0001, "loss": 1.5643, "step": 3524 }, { "epoch": 0.40952657566076095, "grad_norm": 0.4076133370399475, "learning_rate": 0.0001, "loss": 1.7149, "step": 3525 }, { "epoch": 0.40964275341272144, "grad_norm": 0.39782625436782837, "learning_rate": 0.0001, "loss": 1.6744, "step": 3526 }, { "epoch": 0.409758931164682, "grad_norm": 0.4191223680973053, "learning_rate": 0.0001, "loss": 1.6426, "step": 3527 }, { "epoch": 0.40987510891664247, "grad_norm": 0.41905683279037476, "learning_rate": 0.0001, "loss": 1.7366, "step": 3528 }, { "epoch": 0.40999128666860296, "grad_norm": 0.47418078780174255, "learning_rate": 0.0001, "loss": 1.8641, "step": 3529 }, { "epoch": 0.41010746442056345, "grad_norm": 0.4219752252101898, "learning_rate": 0.0001, "loss": 1.773, "step": 3530 }, { "epoch": 0.41022364217252394, "grad_norm": 0.39003801345825195, "learning_rate": 0.0001, "loss": 1.6645, "step": 3531 }, { "epoch": 0.4103398199244845, "grad_norm": 0.39231449365615845, "learning_rate": 0.0001, "loss": 1.7753, "step": 3532 }, { "epoch": 0.410455997676445, "grad_norm": 0.39529526233673096, "learning_rate": 0.0001, "loss": 1.6385, "step": 3533 }, { "epoch": 0.41057217542840546, "grad_norm": 0.4106643497943878, "learning_rate": 0.0001, "loss": 1.7264, "step": 3534 }, { "epoch": 0.41068835318036595, "grad_norm": 0.3763425648212433, "learning_rate": 0.0001, "loss": 1.6384, "step": 3535 }, { "epoch": 0.41080453093232644, "grad_norm": 0.4354191720485687, "learning_rate": 0.0001, "loss": 1.7028, "step": 3536 }, { "epoch": 0.410920708684287, "grad_norm": 0.4091987609863281, "learning_rate": 0.0001, "loss": 1.6875, "step": 3537 }, { "epoch": 0.4110368864362475, "grad_norm": 0.3786587417125702, "learning_rate": 0.0001, "loss": 1.617, "step": 3538 }, { "epoch": 0.41115306418820796, "grad_norm": 0.3957653343677521, "learning_rate": 0.0001, "loss": 1.6741, "step": 3539 }, { "epoch": 0.41126924194016845, "grad_norm": 0.41153082251548767, "learning_rate": 0.0001, "loss": 1.5821, "step": 3540 }, { "epoch": 0.41138541969212894, "grad_norm": 0.408596932888031, "learning_rate": 0.0001, "loss": 1.6669, "step": 3541 }, { "epoch": 0.4115015974440895, "grad_norm": 0.4165953993797302, "learning_rate": 0.0001, "loss": 1.6293, "step": 3542 }, { "epoch": 0.41161777519605, "grad_norm": 0.4159640371799469, "learning_rate": 0.0001, "loss": 1.4411, "step": 3543 }, { "epoch": 0.41173395294801046, "grad_norm": 0.4020317494869232, "learning_rate": 0.0001, "loss": 1.7044, "step": 3544 }, { "epoch": 0.41185013069997095, "grad_norm": 0.40720680356025696, "learning_rate": 0.0001, "loss": 1.659, "step": 3545 }, { "epoch": 0.41196630845193144, "grad_norm": 0.3976491689682007, "learning_rate": 0.0001, "loss": 1.671, "step": 3546 }, { "epoch": 0.41208248620389193, "grad_norm": 0.40788426995277405, "learning_rate": 0.0001, "loss": 1.5899, "step": 3547 }, { "epoch": 0.4121986639558525, "grad_norm": 0.43293899297714233, "learning_rate": 0.0001, "loss": 1.7645, "step": 3548 }, { "epoch": 0.41231484170781296, "grad_norm": 0.4064108729362488, "learning_rate": 0.0001, "loss": 1.6196, "step": 3549 }, { "epoch": 0.41243101945977345, "grad_norm": 0.38206931948661804, "learning_rate": 0.0001, "loss": 1.5335, "step": 3550 }, { "epoch": 0.41254719721173394, "grad_norm": 0.41803139448165894, "learning_rate": 0.0001, "loss": 1.7538, "step": 3551 }, { "epoch": 0.41266337496369443, "grad_norm": 0.4098378121852875, "learning_rate": 0.0001, "loss": 1.7071, "step": 3552 }, { "epoch": 0.412779552715655, "grad_norm": 0.40097564458847046, "learning_rate": 0.0001, "loss": 1.7684, "step": 3553 }, { "epoch": 0.41289573046761546, "grad_norm": 0.4043814241886139, "learning_rate": 0.0001, "loss": 1.6431, "step": 3554 }, { "epoch": 0.41301190821957595, "grad_norm": 0.41799452900886536, "learning_rate": 0.0001, "loss": 1.6355, "step": 3555 }, { "epoch": 0.41312808597153644, "grad_norm": 0.41283276677131653, "learning_rate": 0.0001, "loss": 1.6325, "step": 3556 }, { "epoch": 0.41324426372349693, "grad_norm": 0.3751561641693115, "learning_rate": 0.0001, "loss": 1.599, "step": 3557 }, { "epoch": 0.4133604414754575, "grad_norm": 0.4034547805786133, "learning_rate": 0.0001, "loss": 1.6932, "step": 3558 }, { "epoch": 0.41347661922741796, "grad_norm": 0.3995891511440277, "learning_rate": 0.0001, "loss": 1.7567, "step": 3559 }, { "epoch": 0.41359279697937845, "grad_norm": 0.4040803909301758, "learning_rate": 0.0001, "loss": 1.6819, "step": 3560 }, { "epoch": 0.41370897473133894, "grad_norm": 0.3844342827796936, "learning_rate": 0.0001, "loss": 1.8967, "step": 3561 }, { "epoch": 0.41382515248329943, "grad_norm": 0.41226911544799805, "learning_rate": 0.0001, "loss": 1.7047, "step": 3562 }, { "epoch": 0.41394133023526, "grad_norm": 0.4063775837421417, "learning_rate": 0.0001, "loss": 1.7052, "step": 3563 }, { "epoch": 0.41405750798722046, "grad_norm": 0.3665454089641571, "learning_rate": 0.0001, "loss": 1.5527, "step": 3564 }, { "epoch": 0.41417368573918095, "grad_norm": 0.3731880784034729, "learning_rate": 0.0001, "loss": 1.5557, "step": 3565 }, { "epoch": 0.41428986349114144, "grad_norm": 0.4028816223144531, "learning_rate": 0.0001, "loss": 1.602, "step": 3566 }, { "epoch": 0.41440604124310193, "grad_norm": 0.39572829008102417, "learning_rate": 0.0001, "loss": 1.6978, "step": 3567 }, { "epoch": 0.4145222189950624, "grad_norm": 0.3968917727470398, "learning_rate": 0.0001, "loss": 1.4945, "step": 3568 }, { "epoch": 0.41463839674702296, "grad_norm": 0.42275354266166687, "learning_rate": 0.0001, "loss": 1.6939, "step": 3569 }, { "epoch": 0.41475457449898345, "grad_norm": 0.4133806526660919, "learning_rate": 0.0001, "loss": 1.7573, "step": 3570 }, { "epoch": 0.41487075225094394, "grad_norm": 0.4026901125907898, "learning_rate": 0.0001, "loss": 1.7445, "step": 3571 }, { "epoch": 0.41498693000290443, "grad_norm": 0.40076354146003723, "learning_rate": 0.0001, "loss": 1.6168, "step": 3572 }, { "epoch": 0.4151031077548649, "grad_norm": 0.3952670991420746, "learning_rate": 0.0001, "loss": 1.7818, "step": 3573 }, { "epoch": 0.41521928550682546, "grad_norm": 0.396936297416687, "learning_rate": 0.0001, "loss": 1.6677, "step": 3574 }, { "epoch": 0.41533546325878595, "grad_norm": 0.38129517436027527, "learning_rate": 0.0001, "loss": 1.5803, "step": 3575 }, { "epoch": 0.41545164101074644, "grad_norm": 0.42438849806785583, "learning_rate": 0.0001, "loss": 1.6739, "step": 3576 }, { "epoch": 0.41556781876270693, "grad_norm": 0.41612252593040466, "learning_rate": 0.0001, "loss": 1.8019, "step": 3577 }, { "epoch": 0.4156839965146674, "grad_norm": 0.4326874017715454, "learning_rate": 0.0001, "loss": 1.6919, "step": 3578 }, { "epoch": 0.41580017426662796, "grad_norm": 0.4148086905479431, "learning_rate": 0.0001, "loss": 1.6639, "step": 3579 }, { "epoch": 0.41591635201858845, "grad_norm": 0.4372271001338959, "learning_rate": 0.0001, "loss": 1.8974, "step": 3580 }, { "epoch": 0.41603252977054894, "grad_norm": 0.42456239461898804, "learning_rate": 0.0001, "loss": 1.8067, "step": 3581 }, { "epoch": 0.41614870752250943, "grad_norm": 0.3889367878437042, "learning_rate": 0.0001, "loss": 1.5411, "step": 3582 }, { "epoch": 0.4162648852744699, "grad_norm": 0.4123779833316803, "learning_rate": 0.0001, "loss": 1.7563, "step": 3583 }, { "epoch": 0.41638106302643046, "grad_norm": 0.45006388425827026, "learning_rate": 0.0001, "loss": 1.6411, "step": 3584 }, { "epoch": 0.41649724077839095, "grad_norm": 0.3993321359157562, "learning_rate": 0.0001, "loss": 1.663, "step": 3585 }, { "epoch": 0.41661341853035144, "grad_norm": 0.39892643690109253, "learning_rate": 0.0001, "loss": 1.4789, "step": 3586 }, { "epoch": 0.41672959628231193, "grad_norm": 0.4181758463382721, "learning_rate": 0.0001, "loss": 1.7197, "step": 3587 }, { "epoch": 0.4168457740342724, "grad_norm": 0.3975769877433777, "learning_rate": 0.0001, "loss": 1.6527, "step": 3588 }, { "epoch": 0.4169619517862329, "grad_norm": 0.3831265866756439, "learning_rate": 0.0001, "loss": 1.6703, "step": 3589 }, { "epoch": 0.41707812953819345, "grad_norm": 0.41471192240715027, "learning_rate": 0.0001, "loss": 1.7917, "step": 3590 }, { "epoch": 0.41719430729015394, "grad_norm": 0.3869475722312927, "learning_rate": 0.0001, "loss": 1.7876, "step": 3591 }, { "epoch": 0.41731048504211443, "grad_norm": 0.37546539306640625, "learning_rate": 0.0001, "loss": 1.5797, "step": 3592 }, { "epoch": 0.4174266627940749, "grad_norm": 0.38693931698799133, "learning_rate": 0.0001, "loss": 1.6906, "step": 3593 }, { "epoch": 0.4175428405460354, "grad_norm": 0.40979790687561035, "learning_rate": 0.0001, "loss": 1.7553, "step": 3594 }, { "epoch": 0.41765901829799595, "grad_norm": 0.4143114686012268, "learning_rate": 0.0001, "loss": 1.875, "step": 3595 }, { "epoch": 0.41777519604995644, "grad_norm": 0.4477331340312958, "learning_rate": 0.0001, "loss": 1.7272, "step": 3596 }, { "epoch": 0.41789137380191693, "grad_norm": 0.4350159168243408, "learning_rate": 0.0001, "loss": 1.7577, "step": 3597 }, { "epoch": 0.4180075515538774, "grad_norm": 0.40103811025619507, "learning_rate": 0.0001, "loss": 1.6782, "step": 3598 }, { "epoch": 0.4181237293058379, "grad_norm": 0.42773351073265076, "learning_rate": 0.0001, "loss": 1.8818, "step": 3599 }, { "epoch": 0.41823990705779845, "grad_norm": 0.3894450068473816, "learning_rate": 0.0001, "loss": 1.6622, "step": 3600 }, { "epoch": 0.41835608480975894, "grad_norm": 0.3698801100254059, "learning_rate": 0.0001, "loss": 1.4404, "step": 3601 }, { "epoch": 0.41847226256171943, "grad_norm": 0.3938926160335541, "learning_rate": 0.0001, "loss": 1.5742, "step": 3602 }, { "epoch": 0.4185884403136799, "grad_norm": 0.40313947200775146, "learning_rate": 0.0001, "loss": 1.6071, "step": 3603 }, { "epoch": 0.4187046180656404, "grad_norm": 0.4046363830566406, "learning_rate": 0.0001, "loss": 1.6521, "step": 3604 }, { "epoch": 0.41882079581760095, "grad_norm": 0.4111688435077667, "learning_rate": 0.0001, "loss": 1.6909, "step": 3605 }, { "epoch": 0.41893697356956144, "grad_norm": 0.41641494631767273, "learning_rate": 0.0001, "loss": 1.6157, "step": 3606 }, { "epoch": 0.41905315132152193, "grad_norm": 0.3970726728439331, "learning_rate": 0.0001, "loss": 1.6719, "step": 3607 }, { "epoch": 0.4191693290734824, "grad_norm": 0.40690597891807556, "learning_rate": 0.0001, "loss": 1.7636, "step": 3608 }, { "epoch": 0.4192855068254429, "grad_norm": 0.41410157084465027, "learning_rate": 0.0001, "loss": 1.794, "step": 3609 }, { "epoch": 0.4194016845774034, "grad_norm": 0.38639214634895325, "learning_rate": 0.0001, "loss": 1.6979, "step": 3610 }, { "epoch": 0.41951786232936394, "grad_norm": 0.3689410090446472, "learning_rate": 0.0001, "loss": 1.6076, "step": 3611 }, { "epoch": 0.41963404008132443, "grad_norm": 0.40414369106292725, "learning_rate": 0.0001, "loss": 1.8223, "step": 3612 }, { "epoch": 0.4197502178332849, "grad_norm": 0.38138678669929504, "learning_rate": 0.0001, "loss": 1.3491, "step": 3613 }, { "epoch": 0.4198663955852454, "grad_norm": 0.37447166442871094, "learning_rate": 0.0001, "loss": 1.5359, "step": 3614 }, { "epoch": 0.4199825733372059, "grad_norm": 0.40835610032081604, "learning_rate": 0.0001, "loss": 1.7345, "step": 3615 }, { "epoch": 0.42009875108916644, "grad_norm": 0.4299129545688629, "learning_rate": 0.0001, "loss": 1.7448, "step": 3616 }, { "epoch": 0.42021492884112693, "grad_norm": 0.3932313621044159, "learning_rate": 0.0001, "loss": 1.6607, "step": 3617 }, { "epoch": 0.4203311065930874, "grad_norm": 0.3847653865814209, "learning_rate": 0.0001, "loss": 1.5535, "step": 3618 }, { "epoch": 0.4204472843450479, "grad_norm": 0.435201495885849, "learning_rate": 0.0001, "loss": 1.8684, "step": 3619 }, { "epoch": 0.4205634620970084, "grad_norm": 0.3746712803840637, "learning_rate": 0.0001, "loss": 1.6723, "step": 3620 }, { "epoch": 0.42067963984896894, "grad_norm": 0.37663495540618896, "learning_rate": 0.0001, "loss": 1.5523, "step": 3621 }, { "epoch": 0.42079581760092943, "grad_norm": 0.37208837270736694, "learning_rate": 0.0001, "loss": 1.6111, "step": 3622 }, { "epoch": 0.4209119953528899, "grad_norm": 0.3866986334323883, "learning_rate": 0.0001, "loss": 1.5904, "step": 3623 }, { "epoch": 0.4210281731048504, "grad_norm": 0.40129542350769043, "learning_rate": 0.0001, "loss": 1.6223, "step": 3624 }, { "epoch": 0.4211443508568109, "grad_norm": 0.38859081268310547, "learning_rate": 0.0001, "loss": 1.6547, "step": 3625 }, { "epoch": 0.42126052860877145, "grad_norm": 0.43616414070129395, "learning_rate": 0.0001, "loss": 1.7725, "step": 3626 }, { "epoch": 0.42137670636073193, "grad_norm": 0.43202710151672363, "learning_rate": 0.0001, "loss": 1.4989, "step": 3627 }, { "epoch": 0.4214928841126924, "grad_norm": 0.39463871717453003, "learning_rate": 0.0001, "loss": 1.4335, "step": 3628 }, { "epoch": 0.4216090618646529, "grad_norm": 0.4237414002418518, "learning_rate": 0.0001, "loss": 1.7187, "step": 3629 }, { "epoch": 0.4217252396166134, "grad_norm": 0.39005210995674133, "learning_rate": 0.0001, "loss": 1.48, "step": 3630 }, { "epoch": 0.42184141736857395, "grad_norm": 0.3883463144302368, "learning_rate": 0.0001, "loss": 1.6161, "step": 3631 }, { "epoch": 0.42195759512053443, "grad_norm": 0.3896929621696472, "learning_rate": 0.0001, "loss": 1.6433, "step": 3632 }, { "epoch": 0.4220737728724949, "grad_norm": 0.41953974962234497, "learning_rate": 0.0001, "loss": 1.5526, "step": 3633 }, { "epoch": 0.4221899506244554, "grad_norm": 0.3946000039577484, "learning_rate": 0.0001, "loss": 1.6624, "step": 3634 }, { "epoch": 0.4223061283764159, "grad_norm": 0.4022957384586334, "learning_rate": 0.0001, "loss": 1.5182, "step": 3635 }, { "epoch": 0.4224223061283764, "grad_norm": 0.40276673436164856, "learning_rate": 0.0001, "loss": 1.5075, "step": 3636 }, { "epoch": 0.42253848388033693, "grad_norm": 0.41087186336517334, "learning_rate": 0.0001, "loss": 1.7919, "step": 3637 }, { "epoch": 0.4226546616322974, "grad_norm": 0.4109443426132202, "learning_rate": 0.0001, "loss": 1.7705, "step": 3638 }, { "epoch": 0.4227708393842579, "grad_norm": 0.3984861671924591, "learning_rate": 0.0001, "loss": 1.5405, "step": 3639 }, { "epoch": 0.4228870171362184, "grad_norm": 0.42480790615081787, "learning_rate": 0.0001, "loss": 1.7443, "step": 3640 }, { "epoch": 0.4230031948881789, "grad_norm": 0.37020301818847656, "learning_rate": 0.0001, "loss": 1.5778, "step": 3641 }, { "epoch": 0.42311937264013944, "grad_norm": 0.3878079950809479, "learning_rate": 0.0001, "loss": 1.5914, "step": 3642 }, { "epoch": 0.4232355503920999, "grad_norm": 0.4088906943798065, "learning_rate": 0.0001, "loss": 1.7003, "step": 3643 }, { "epoch": 0.4233517281440604, "grad_norm": 0.39940375089645386, "learning_rate": 0.0001, "loss": 1.673, "step": 3644 }, { "epoch": 0.4234679058960209, "grad_norm": 0.41945314407348633, "learning_rate": 0.0001, "loss": 1.6954, "step": 3645 }, { "epoch": 0.4235840836479814, "grad_norm": 0.43986397981643677, "learning_rate": 0.0001, "loss": 1.7769, "step": 3646 }, { "epoch": 0.42370026139994194, "grad_norm": 0.41708502173423767, "learning_rate": 0.0001, "loss": 1.6622, "step": 3647 }, { "epoch": 0.4238164391519024, "grad_norm": 0.3953828513622284, "learning_rate": 0.0001, "loss": 1.6353, "step": 3648 }, { "epoch": 0.4239326169038629, "grad_norm": 0.3876919746398926, "learning_rate": 0.0001, "loss": 1.475, "step": 3649 }, { "epoch": 0.4240487946558234, "grad_norm": 0.4110132157802582, "learning_rate": 0.0001, "loss": 1.8118, "step": 3650 }, { "epoch": 0.4241649724077839, "grad_norm": 0.4448528587818146, "learning_rate": 0.0001, "loss": 1.8269, "step": 3651 }, { "epoch": 0.42428115015974444, "grad_norm": 0.4393356740474701, "learning_rate": 0.0001, "loss": 1.8647, "step": 3652 }, { "epoch": 0.4243973279117049, "grad_norm": 0.4252372086048126, "learning_rate": 0.0001, "loss": 1.6679, "step": 3653 }, { "epoch": 0.4245135056636654, "grad_norm": 0.41888418793678284, "learning_rate": 0.0001, "loss": 1.7479, "step": 3654 }, { "epoch": 0.4246296834156259, "grad_norm": 0.39156967401504517, "learning_rate": 0.0001, "loss": 1.6443, "step": 3655 }, { "epoch": 0.4247458611675864, "grad_norm": 0.3865427076816559, "learning_rate": 0.0001, "loss": 1.6541, "step": 3656 }, { "epoch": 0.4248620389195469, "grad_norm": 0.37357524037361145, "learning_rate": 0.0001, "loss": 1.6029, "step": 3657 }, { "epoch": 0.4249782166715074, "grad_norm": 0.3771733045578003, "learning_rate": 0.0001, "loss": 1.59, "step": 3658 }, { "epoch": 0.4250943944234679, "grad_norm": 0.40311431884765625, "learning_rate": 0.0001, "loss": 1.6134, "step": 3659 }, { "epoch": 0.4252105721754284, "grad_norm": 0.39886149764060974, "learning_rate": 0.0001, "loss": 1.8507, "step": 3660 }, { "epoch": 0.4253267499273889, "grad_norm": 0.4184694290161133, "learning_rate": 0.0001, "loss": 1.7392, "step": 3661 }, { "epoch": 0.4254429276793494, "grad_norm": 0.4278115928173065, "learning_rate": 0.0001, "loss": 1.8403, "step": 3662 }, { "epoch": 0.4255591054313099, "grad_norm": 0.4227597117424011, "learning_rate": 0.0001, "loss": 1.7257, "step": 3663 }, { "epoch": 0.4256752831832704, "grad_norm": 0.3717440068721771, "learning_rate": 0.0001, "loss": 1.4974, "step": 3664 }, { "epoch": 0.4257914609352309, "grad_norm": 0.3993324935436249, "learning_rate": 0.0001, "loss": 1.6323, "step": 3665 }, { "epoch": 0.4259076386871914, "grad_norm": 0.40730348229408264, "learning_rate": 0.0001, "loss": 1.6303, "step": 3666 }, { "epoch": 0.4260238164391519, "grad_norm": 0.38585323095321655, "learning_rate": 0.0001, "loss": 1.6446, "step": 3667 }, { "epoch": 0.4261399941911124, "grad_norm": 0.4284312129020691, "learning_rate": 0.0001, "loss": 1.542, "step": 3668 }, { "epoch": 0.4262561719430729, "grad_norm": 0.417501300573349, "learning_rate": 0.0001, "loss": 1.6636, "step": 3669 }, { "epoch": 0.4263723496950334, "grad_norm": 0.3896951973438263, "learning_rate": 0.0001, "loss": 1.4807, "step": 3670 }, { "epoch": 0.4264885274469939, "grad_norm": 0.4131647050380707, "learning_rate": 0.0001, "loss": 1.6609, "step": 3671 }, { "epoch": 0.4266047051989544, "grad_norm": 0.36741092801094055, "learning_rate": 0.0001, "loss": 1.5499, "step": 3672 }, { "epoch": 0.4267208829509149, "grad_norm": 0.41601473093032837, "learning_rate": 0.0001, "loss": 1.5905, "step": 3673 }, { "epoch": 0.4268370607028754, "grad_norm": 0.41685929894447327, "learning_rate": 0.0001, "loss": 1.5354, "step": 3674 }, { "epoch": 0.4269532384548359, "grad_norm": 0.41353273391723633, "learning_rate": 0.0001, "loss": 1.7881, "step": 3675 }, { "epoch": 0.4270694162067964, "grad_norm": 0.40456423163414, "learning_rate": 0.0001, "loss": 1.6691, "step": 3676 }, { "epoch": 0.4271855939587569, "grad_norm": 0.457220196723938, "learning_rate": 0.0001, "loss": 1.7856, "step": 3677 }, { "epoch": 0.42730177171071737, "grad_norm": 0.4161781668663025, "learning_rate": 0.0001, "loss": 1.6692, "step": 3678 }, { "epoch": 0.4274179494626779, "grad_norm": 0.40317872166633606, "learning_rate": 0.0001, "loss": 1.6023, "step": 3679 }, { "epoch": 0.4275341272146384, "grad_norm": 0.4100804328918457, "learning_rate": 0.0001, "loss": 1.7285, "step": 3680 }, { "epoch": 0.4276503049665989, "grad_norm": 0.40741920471191406, "learning_rate": 0.0001, "loss": 1.6931, "step": 3681 }, { "epoch": 0.4277664827185594, "grad_norm": 0.3968175947666168, "learning_rate": 0.0001, "loss": 1.5824, "step": 3682 }, { "epoch": 0.42788266047051987, "grad_norm": 0.3978244662284851, "learning_rate": 0.0001, "loss": 1.7198, "step": 3683 }, { "epoch": 0.4279988382224804, "grad_norm": 0.39147892594337463, "learning_rate": 0.0001, "loss": 1.6397, "step": 3684 }, { "epoch": 0.4281150159744409, "grad_norm": 0.410375714302063, "learning_rate": 0.0001, "loss": 1.7164, "step": 3685 }, { "epoch": 0.4282311937264014, "grad_norm": 0.4207121729850769, "learning_rate": 0.0001, "loss": 1.7148, "step": 3686 }, { "epoch": 0.4283473714783619, "grad_norm": 0.446114718914032, "learning_rate": 0.0001, "loss": 1.7685, "step": 3687 }, { "epoch": 0.42846354923032237, "grad_norm": 0.429855614900589, "learning_rate": 0.0001, "loss": 1.8145, "step": 3688 }, { "epoch": 0.4285797269822829, "grad_norm": 0.4075881838798523, "learning_rate": 0.0001, "loss": 1.6954, "step": 3689 }, { "epoch": 0.4286959047342434, "grad_norm": 0.42212194204330444, "learning_rate": 0.0001, "loss": 1.8045, "step": 3690 }, { "epoch": 0.4288120824862039, "grad_norm": 0.3913438022136688, "learning_rate": 0.0001, "loss": 1.541, "step": 3691 }, { "epoch": 0.4289282602381644, "grad_norm": 0.4033333659172058, "learning_rate": 0.0001, "loss": 1.4609, "step": 3692 }, { "epoch": 0.42904443799012487, "grad_norm": 0.41124168038368225, "learning_rate": 0.0001, "loss": 1.7919, "step": 3693 }, { "epoch": 0.4291606157420854, "grad_norm": 0.4082317352294922, "learning_rate": 0.0001, "loss": 1.714, "step": 3694 }, { "epoch": 0.4292767934940459, "grad_norm": 0.43243342638015747, "learning_rate": 0.0001, "loss": 1.8346, "step": 3695 }, { "epoch": 0.4293929712460064, "grad_norm": 0.4029131829738617, "learning_rate": 0.0001, "loss": 1.6845, "step": 3696 }, { "epoch": 0.4295091489979669, "grad_norm": 0.39119669795036316, "learning_rate": 0.0001, "loss": 1.547, "step": 3697 }, { "epoch": 0.42962532674992737, "grad_norm": 0.4251865744590759, "learning_rate": 0.0001, "loss": 1.7231, "step": 3698 }, { "epoch": 0.42974150450188786, "grad_norm": 0.41602757573127747, "learning_rate": 0.0001, "loss": 1.6948, "step": 3699 }, { "epoch": 0.4298576822538484, "grad_norm": 0.41048288345336914, "learning_rate": 0.0001, "loss": 1.7455, "step": 3700 }, { "epoch": 0.4299738600058089, "grad_norm": 0.39157551527023315, "learning_rate": 0.0001, "loss": 1.6849, "step": 3701 }, { "epoch": 0.4300900377577694, "grad_norm": 0.40497899055480957, "learning_rate": 0.0001, "loss": 1.729, "step": 3702 }, { "epoch": 0.4302062155097299, "grad_norm": 0.36876180768013, "learning_rate": 0.0001, "loss": 1.5289, "step": 3703 }, { "epoch": 0.43032239326169036, "grad_norm": 0.39832401275634766, "learning_rate": 0.0001, "loss": 1.5778, "step": 3704 }, { "epoch": 0.4304385710136509, "grad_norm": 0.40576431155204773, "learning_rate": 0.0001, "loss": 1.5792, "step": 3705 }, { "epoch": 0.4305547487656114, "grad_norm": 0.41582056879997253, "learning_rate": 0.0001, "loss": 1.7519, "step": 3706 }, { "epoch": 0.4306709265175719, "grad_norm": 0.37532898783683777, "learning_rate": 0.0001, "loss": 1.4301, "step": 3707 }, { "epoch": 0.4307871042695324, "grad_norm": 0.4363482594490051, "learning_rate": 0.0001, "loss": 1.6907, "step": 3708 }, { "epoch": 0.43090328202149286, "grad_norm": 0.38650989532470703, "learning_rate": 0.0001, "loss": 1.579, "step": 3709 }, { "epoch": 0.4310194597734534, "grad_norm": 0.4108797013759613, "learning_rate": 0.0001, "loss": 1.8012, "step": 3710 }, { "epoch": 0.4311356375254139, "grad_norm": 0.3996245563030243, "learning_rate": 0.0001, "loss": 1.7163, "step": 3711 }, { "epoch": 0.4312518152773744, "grad_norm": 0.40178465843200684, "learning_rate": 0.0001, "loss": 1.744, "step": 3712 }, { "epoch": 0.4313679930293349, "grad_norm": 0.38844192028045654, "learning_rate": 0.0001, "loss": 1.6231, "step": 3713 }, { "epoch": 0.43148417078129536, "grad_norm": 0.38284438848495483, "learning_rate": 0.0001, "loss": 1.6097, "step": 3714 }, { "epoch": 0.4316003485332559, "grad_norm": 0.4122409224510193, "learning_rate": 0.0001, "loss": 1.7502, "step": 3715 }, { "epoch": 0.4317165262852164, "grad_norm": 0.4218509793281555, "learning_rate": 0.0001, "loss": 1.6147, "step": 3716 }, { "epoch": 0.4318327040371769, "grad_norm": 0.4130255877971649, "learning_rate": 0.0001, "loss": 1.5444, "step": 3717 }, { "epoch": 0.4319488817891374, "grad_norm": 0.4026492238044739, "learning_rate": 0.0001, "loss": 1.5865, "step": 3718 }, { "epoch": 0.43206505954109786, "grad_norm": 0.4177059531211853, "learning_rate": 0.0001, "loss": 1.6353, "step": 3719 }, { "epoch": 0.4321812372930584, "grad_norm": 0.4138438105583191, "learning_rate": 0.0001, "loss": 1.616, "step": 3720 }, { "epoch": 0.4322974150450189, "grad_norm": 0.4228832721710205, "learning_rate": 0.0001, "loss": 1.8092, "step": 3721 }, { "epoch": 0.4324135927969794, "grad_norm": 0.4101725220680237, "learning_rate": 0.0001, "loss": 1.6595, "step": 3722 }, { "epoch": 0.4325297705489399, "grad_norm": 0.38750308752059937, "learning_rate": 0.0001, "loss": 1.4348, "step": 3723 }, { "epoch": 0.43264594830090036, "grad_norm": 0.43653422594070435, "learning_rate": 0.0001, "loss": 1.698, "step": 3724 }, { "epoch": 0.43276212605286085, "grad_norm": 0.39200592041015625, "learning_rate": 0.0001, "loss": 1.6315, "step": 3725 }, { "epoch": 0.4328783038048214, "grad_norm": 0.40859901905059814, "learning_rate": 0.0001, "loss": 1.6831, "step": 3726 }, { "epoch": 0.4329944815567819, "grad_norm": 0.41595181822776794, "learning_rate": 0.0001, "loss": 1.6281, "step": 3727 }, { "epoch": 0.4331106593087424, "grad_norm": 0.391353577375412, "learning_rate": 0.0001, "loss": 1.5171, "step": 3728 }, { "epoch": 0.43322683706070286, "grad_norm": 0.40748852491378784, "learning_rate": 0.0001, "loss": 1.639, "step": 3729 }, { "epoch": 0.43334301481266335, "grad_norm": 0.3781512975692749, "learning_rate": 0.0001, "loss": 1.4826, "step": 3730 }, { "epoch": 0.4334591925646239, "grad_norm": 0.36677882075309753, "learning_rate": 0.0001, "loss": 1.3444, "step": 3731 }, { "epoch": 0.4335753703165844, "grad_norm": 0.38588741421699524, "learning_rate": 0.0001, "loss": 1.5981, "step": 3732 }, { "epoch": 0.4336915480685449, "grad_norm": 0.4158620834350586, "learning_rate": 0.0001, "loss": 1.6755, "step": 3733 }, { "epoch": 0.43380772582050536, "grad_norm": 0.4044388234615326, "learning_rate": 0.0001, "loss": 1.6385, "step": 3734 }, { "epoch": 0.43392390357246585, "grad_norm": 0.4197262227535248, "learning_rate": 0.0001, "loss": 1.7444, "step": 3735 }, { "epoch": 0.4340400813244264, "grad_norm": 0.40574586391448975, "learning_rate": 0.0001, "loss": 1.6206, "step": 3736 }, { "epoch": 0.4341562590763869, "grad_norm": 0.397217333316803, "learning_rate": 0.0001, "loss": 1.6766, "step": 3737 }, { "epoch": 0.4342724368283474, "grad_norm": 0.4184318482875824, "learning_rate": 0.0001, "loss": 1.6272, "step": 3738 }, { "epoch": 0.43438861458030786, "grad_norm": 0.3860454857349396, "learning_rate": 0.0001, "loss": 1.5121, "step": 3739 }, { "epoch": 0.43450479233226835, "grad_norm": 0.41592180728912354, "learning_rate": 0.0001, "loss": 1.5661, "step": 3740 }, { "epoch": 0.4346209700842289, "grad_norm": 0.3821113407611847, "learning_rate": 0.0001, "loss": 1.5594, "step": 3741 }, { "epoch": 0.4347371478361894, "grad_norm": 0.4071682095527649, "learning_rate": 0.0001, "loss": 1.5585, "step": 3742 }, { "epoch": 0.4348533255881499, "grad_norm": 0.41301366686820984, "learning_rate": 0.0001, "loss": 1.5946, "step": 3743 }, { "epoch": 0.43496950334011036, "grad_norm": 0.4541078209877014, "learning_rate": 0.0001, "loss": 1.6193, "step": 3744 }, { "epoch": 0.43508568109207085, "grad_norm": 0.4197753369808197, "learning_rate": 0.0001, "loss": 1.7419, "step": 3745 }, { "epoch": 0.43520185884403134, "grad_norm": 0.40982601046562195, "learning_rate": 0.0001, "loss": 1.601, "step": 3746 }, { "epoch": 0.4353180365959919, "grad_norm": 0.439325213432312, "learning_rate": 0.0001, "loss": 1.8748, "step": 3747 }, { "epoch": 0.4354342143479524, "grad_norm": 0.4326147139072418, "learning_rate": 0.0001, "loss": 1.7788, "step": 3748 }, { "epoch": 0.43555039209991286, "grad_norm": 0.43868646025657654, "learning_rate": 0.0001, "loss": 1.63, "step": 3749 }, { "epoch": 0.43566656985187335, "grad_norm": 0.4201911389827728, "learning_rate": 0.0001, "loss": 1.524, "step": 3750 }, { "epoch": 0.43578274760383384, "grad_norm": 0.3979409337043762, "learning_rate": 0.0001, "loss": 1.7081, "step": 3751 }, { "epoch": 0.4358989253557944, "grad_norm": 0.4207451045513153, "learning_rate": 0.0001, "loss": 1.6116, "step": 3752 }, { "epoch": 0.4360151031077549, "grad_norm": 0.3865394592285156, "learning_rate": 0.0001, "loss": 1.6928, "step": 3753 }, { "epoch": 0.43613128085971536, "grad_norm": 0.4036276340484619, "learning_rate": 0.0001, "loss": 1.7083, "step": 3754 }, { "epoch": 0.43624745861167585, "grad_norm": 0.39419230818748474, "learning_rate": 0.0001, "loss": 1.7132, "step": 3755 }, { "epoch": 0.43636363636363634, "grad_norm": 0.42213842272758484, "learning_rate": 0.0001, "loss": 1.7595, "step": 3756 }, { "epoch": 0.4364798141155969, "grad_norm": 0.4202689528465271, "learning_rate": 0.0001, "loss": 1.7564, "step": 3757 }, { "epoch": 0.4365959918675574, "grad_norm": 0.40122315287590027, "learning_rate": 0.0001, "loss": 1.7285, "step": 3758 }, { "epoch": 0.43671216961951786, "grad_norm": 0.41420555114746094, "learning_rate": 0.0001, "loss": 1.7936, "step": 3759 }, { "epoch": 0.43682834737147835, "grad_norm": 0.46364185214042664, "learning_rate": 0.0001, "loss": 1.7911, "step": 3760 }, { "epoch": 0.43694452512343884, "grad_norm": 0.404472678899765, "learning_rate": 0.0001, "loss": 1.6818, "step": 3761 }, { "epoch": 0.4370607028753994, "grad_norm": 0.4069894552230835, "learning_rate": 0.0001, "loss": 1.7287, "step": 3762 }, { "epoch": 0.4371768806273599, "grad_norm": 0.41052114963531494, "learning_rate": 0.0001, "loss": 1.5779, "step": 3763 }, { "epoch": 0.43729305837932037, "grad_norm": 0.41978520154953003, "learning_rate": 0.0001, "loss": 1.5853, "step": 3764 }, { "epoch": 0.43740923613128085, "grad_norm": 0.45879876613616943, "learning_rate": 0.0001, "loss": 1.7743, "step": 3765 }, { "epoch": 0.43752541388324134, "grad_norm": 0.42506903409957886, "learning_rate": 0.0001, "loss": 1.706, "step": 3766 }, { "epoch": 0.43764159163520183, "grad_norm": 0.4226793944835663, "learning_rate": 0.0001, "loss": 1.6838, "step": 3767 }, { "epoch": 0.4377577693871624, "grad_norm": 0.38549569249153137, "learning_rate": 0.0001, "loss": 1.5986, "step": 3768 }, { "epoch": 0.43787394713912287, "grad_norm": 0.3775811791419983, "learning_rate": 0.0001, "loss": 1.5797, "step": 3769 }, { "epoch": 0.43799012489108335, "grad_norm": 0.4261288046836853, "learning_rate": 0.0001, "loss": 1.7852, "step": 3770 }, { "epoch": 0.43810630264304384, "grad_norm": 0.4037782847881317, "learning_rate": 0.0001, "loss": 1.7035, "step": 3771 }, { "epoch": 0.43822248039500433, "grad_norm": 0.3799671530723572, "learning_rate": 0.0001, "loss": 1.5825, "step": 3772 }, { "epoch": 0.4383386581469649, "grad_norm": 0.4238581955432892, "learning_rate": 0.0001, "loss": 1.7204, "step": 3773 }, { "epoch": 0.43845483589892537, "grad_norm": 0.39733898639678955, "learning_rate": 0.0001, "loss": 1.7237, "step": 3774 }, { "epoch": 0.43857101365088585, "grad_norm": 0.386123925447464, "learning_rate": 0.0001, "loss": 1.6263, "step": 3775 }, { "epoch": 0.43868719140284634, "grad_norm": 0.42280828952789307, "learning_rate": 0.0001, "loss": 1.7637, "step": 3776 }, { "epoch": 0.43880336915480683, "grad_norm": 0.4024019241333008, "learning_rate": 0.0001, "loss": 1.7998, "step": 3777 }, { "epoch": 0.4389195469067674, "grad_norm": 0.36499494314193726, "learning_rate": 0.0001, "loss": 1.408, "step": 3778 }, { "epoch": 0.43903572465872787, "grad_norm": 0.4545857906341553, "learning_rate": 0.0001, "loss": 1.9895, "step": 3779 }, { "epoch": 0.43915190241068836, "grad_norm": 0.4315713047981262, "learning_rate": 0.0001, "loss": 1.6808, "step": 3780 }, { "epoch": 0.43926808016264884, "grad_norm": 0.4134562015533447, "learning_rate": 0.0001, "loss": 1.6684, "step": 3781 }, { "epoch": 0.43938425791460933, "grad_norm": 0.39927372336387634, "learning_rate": 0.0001, "loss": 1.5434, "step": 3782 }, { "epoch": 0.4395004356665699, "grad_norm": 0.407842755317688, "learning_rate": 0.0001, "loss": 1.5138, "step": 3783 }, { "epoch": 0.43961661341853037, "grad_norm": 0.4489937126636505, "learning_rate": 0.0001, "loss": 1.7954, "step": 3784 }, { "epoch": 0.43973279117049086, "grad_norm": 0.41381195187568665, "learning_rate": 0.0001, "loss": 1.5945, "step": 3785 }, { "epoch": 0.43984896892245134, "grad_norm": 0.4299207627773285, "learning_rate": 0.0001, "loss": 1.7795, "step": 3786 }, { "epoch": 0.43996514667441183, "grad_norm": 0.4247708320617676, "learning_rate": 0.0001, "loss": 1.5955, "step": 3787 }, { "epoch": 0.4400813244263723, "grad_norm": 0.45429563522338867, "learning_rate": 0.0001, "loss": 1.6822, "step": 3788 }, { "epoch": 0.44019750217833287, "grad_norm": 0.39192894101142883, "learning_rate": 0.0001, "loss": 1.5933, "step": 3789 }, { "epoch": 0.44031367993029336, "grad_norm": 0.4126262664794922, "learning_rate": 0.0001, "loss": 1.6458, "step": 3790 }, { "epoch": 0.44042985768225384, "grad_norm": 0.3963555693626404, "learning_rate": 0.0001, "loss": 1.552, "step": 3791 }, { "epoch": 0.44054603543421433, "grad_norm": 0.3770107924938202, "learning_rate": 0.0001, "loss": 1.3031, "step": 3792 }, { "epoch": 0.4406622131861748, "grad_norm": 0.40233081579208374, "learning_rate": 0.0001, "loss": 1.5341, "step": 3793 }, { "epoch": 0.44077839093813537, "grad_norm": 0.4272160232067108, "learning_rate": 0.0001, "loss": 1.7011, "step": 3794 }, { "epoch": 0.44089456869009586, "grad_norm": 0.4221998453140259, "learning_rate": 0.0001, "loss": 1.7047, "step": 3795 }, { "epoch": 0.44101074644205635, "grad_norm": 0.3974875509738922, "learning_rate": 0.0001, "loss": 1.6049, "step": 3796 }, { "epoch": 0.44112692419401683, "grad_norm": 0.462016224861145, "learning_rate": 0.0001, "loss": 1.8508, "step": 3797 }, { "epoch": 0.4412431019459773, "grad_norm": 0.412485271692276, "learning_rate": 0.0001, "loss": 1.6794, "step": 3798 }, { "epoch": 0.44135927969793787, "grad_norm": 0.40575462579727173, "learning_rate": 0.0001, "loss": 1.6224, "step": 3799 }, { "epoch": 0.44147545744989836, "grad_norm": 0.393636018037796, "learning_rate": 0.0001, "loss": 1.5162, "step": 3800 }, { "epoch": 0.44159163520185885, "grad_norm": 0.39681223034858704, "learning_rate": 0.0001, "loss": 1.6334, "step": 3801 }, { "epoch": 0.44170781295381933, "grad_norm": 0.41064146161079407, "learning_rate": 0.0001, "loss": 1.4694, "step": 3802 }, { "epoch": 0.4418239907057798, "grad_norm": 0.4206371605396271, "learning_rate": 0.0001, "loss": 1.7796, "step": 3803 }, { "epoch": 0.44194016845774037, "grad_norm": 0.41671186685562134, "learning_rate": 0.0001, "loss": 1.7832, "step": 3804 }, { "epoch": 0.44205634620970086, "grad_norm": 0.38577428460121155, "learning_rate": 0.0001, "loss": 1.5598, "step": 3805 }, { "epoch": 0.44217252396166135, "grad_norm": 0.3924373388290405, "learning_rate": 0.0001, "loss": 1.6841, "step": 3806 }, { "epoch": 0.44228870171362183, "grad_norm": 0.44141262769699097, "learning_rate": 0.0001, "loss": 1.7508, "step": 3807 }, { "epoch": 0.4424048794655823, "grad_norm": 0.40393298864364624, "learning_rate": 0.0001, "loss": 1.7571, "step": 3808 }, { "epoch": 0.44252105721754287, "grad_norm": 0.42056989669799805, "learning_rate": 0.0001, "loss": 1.5809, "step": 3809 }, { "epoch": 0.44263723496950336, "grad_norm": 0.38610607385635376, "learning_rate": 0.0001, "loss": 1.5378, "step": 3810 }, { "epoch": 0.44275341272146385, "grad_norm": 0.408483624458313, "learning_rate": 0.0001, "loss": 1.5269, "step": 3811 }, { "epoch": 0.44286959047342433, "grad_norm": 0.3991870880126953, "learning_rate": 0.0001, "loss": 1.6283, "step": 3812 }, { "epoch": 0.4429857682253848, "grad_norm": 0.4130445718765259, "learning_rate": 0.0001, "loss": 1.7266, "step": 3813 }, { "epoch": 0.4431019459773453, "grad_norm": 0.4013391137123108, "learning_rate": 0.0001, "loss": 1.5487, "step": 3814 }, { "epoch": 0.44321812372930586, "grad_norm": 0.4077419638633728, "learning_rate": 0.0001, "loss": 1.7261, "step": 3815 }, { "epoch": 0.44333430148126635, "grad_norm": 0.40052545070648193, "learning_rate": 0.0001, "loss": 1.4593, "step": 3816 }, { "epoch": 0.44345047923322684, "grad_norm": 0.38764074444770813, "learning_rate": 0.0001, "loss": 1.7004, "step": 3817 }, { "epoch": 0.4435666569851873, "grad_norm": 0.41951900720596313, "learning_rate": 0.0001, "loss": 1.6013, "step": 3818 }, { "epoch": 0.4436828347371478, "grad_norm": 0.42698657512664795, "learning_rate": 0.0001, "loss": 1.7343, "step": 3819 }, { "epoch": 0.44379901248910836, "grad_norm": 0.42899012565612793, "learning_rate": 0.0001, "loss": 1.8282, "step": 3820 }, { "epoch": 0.44391519024106885, "grad_norm": 0.3827013373374939, "learning_rate": 0.0001, "loss": 1.5941, "step": 3821 }, { "epoch": 0.44403136799302934, "grad_norm": 0.42411962151527405, "learning_rate": 0.0001, "loss": 1.7373, "step": 3822 }, { "epoch": 0.4441475457449898, "grad_norm": 0.4320610463619232, "learning_rate": 0.0001, "loss": 1.661, "step": 3823 }, { "epoch": 0.4442637234969503, "grad_norm": 0.4369167685508728, "learning_rate": 0.0001, "loss": 1.6977, "step": 3824 }, { "epoch": 0.44437990124891086, "grad_norm": 0.3931221663951874, "learning_rate": 0.0001, "loss": 1.7523, "step": 3825 }, { "epoch": 0.44449607900087135, "grad_norm": 0.4219224750995636, "learning_rate": 0.0001, "loss": 1.6523, "step": 3826 }, { "epoch": 0.44461225675283184, "grad_norm": 0.4289875030517578, "learning_rate": 0.0001, "loss": 1.7104, "step": 3827 }, { "epoch": 0.4447284345047923, "grad_norm": 0.3924271762371063, "learning_rate": 0.0001, "loss": 1.6693, "step": 3828 }, { "epoch": 0.4448446122567528, "grad_norm": 0.3892977237701416, "learning_rate": 0.0001, "loss": 1.674, "step": 3829 }, { "epoch": 0.44496079000871336, "grad_norm": 0.4215613305568695, "learning_rate": 0.0001, "loss": 1.8097, "step": 3830 }, { "epoch": 0.44507696776067385, "grad_norm": 0.38903629779815674, "learning_rate": 0.0001, "loss": 1.5231, "step": 3831 }, { "epoch": 0.44519314551263434, "grad_norm": 0.3972281217575073, "learning_rate": 0.0001, "loss": 1.6644, "step": 3832 }, { "epoch": 0.4453093232645948, "grad_norm": 0.43121159076690674, "learning_rate": 0.0001, "loss": 1.7917, "step": 3833 }, { "epoch": 0.4454255010165553, "grad_norm": 0.3963190019130707, "learning_rate": 0.0001, "loss": 1.6404, "step": 3834 }, { "epoch": 0.4455416787685158, "grad_norm": 0.4058324992656708, "learning_rate": 0.0001, "loss": 1.6382, "step": 3835 }, { "epoch": 0.44565785652047635, "grad_norm": 0.38773831725120544, "learning_rate": 0.0001, "loss": 1.5982, "step": 3836 }, { "epoch": 0.44577403427243684, "grad_norm": 0.4000050723552704, "learning_rate": 0.0001, "loss": 1.6112, "step": 3837 }, { "epoch": 0.4458902120243973, "grad_norm": 0.4279811382293701, "learning_rate": 0.0001, "loss": 1.7576, "step": 3838 }, { "epoch": 0.4460063897763578, "grad_norm": 0.3969043493270874, "learning_rate": 0.0001, "loss": 1.6881, "step": 3839 }, { "epoch": 0.4461225675283183, "grad_norm": 0.3967093825340271, "learning_rate": 0.0001, "loss": 1.5786, "step": 3840 }, { "epoch": 0.44623874528027885, "grad_norm": 0.4337485432624817, "learning_rate": 0.0001, "loss": 1.7033, "step": 3841 }, { "epoch": 0.44635492303223934, "grad_norm": 0.4148968756198883, "learning_rate": 0.0001, "loss": 1.7227, "step": 3842 }, { "epoch": 0.4464711007841998, "grad_norm": 0.4100302457809448, "learning_rate": 0.0001, "loss": 1.7214, "step": 3843 }, { "epoch": 0.4465872785361603, "grad_norm": 0.430477112531662, "learning_rate": 0.0001, "loss": 1.6915, "step": 3844 }, { "epoch": 0.4467034562881208, "grad_norm": 0.424699068069458, "learning_rate": 0.0001, "loss": 1.6179, "step": 3845 }, { "epoch": 0.44681963404008135, "grad_norm": 0.39795196056365967, "learning_rate": 0.0001, "loss": 1.7281, "step": 3846 }, { "epoch": 0.44693581179204184, "grad_norm": 0.4175790548324585, "learning_rate": 0.0001, "loss": 1.5496, "step": 3847 }, { "epoch": 0.4470519895440023, "grad_norm": 0.3961549699306488, "learning_rate": 0.0001, "loss": 1.5253, "step": 3848 }, { "epoch": 0.4471681672959628, "grad_norm": 0.3898450434207916, "learning_rate": 0.0001, "loss": 1.569, "step": 3849 }, { "epoch": 0.4472843450479233, "grad_norm": 0.41821977496147156, "learning_rate": 0.0001, "loss": 1.6888, "step": 3850 }, { "epoch": 0.44740052279988385, "grad_norm": 0.38670074939727783, "learning_rate": 0.0001, "loss": 1.6028, "step": 3851 }, { "epoch": 0.44751670055184434, "grad_norm": 0.4136195182800293, "learning_rate": 0.0001, "loss": 1.8563, "step": 3852 }, { "epoch": 0.4476328783038048, "grad_norm": 0.4174270033836365, "learning_rate": 0.0001, "loss": 1.7291, "step": 3853 }, { "epoch": 0.4477490560557653, "grad_norm": 0.41631799936294556, "learning_rate": 0.0001, "loss": 1.5991, "step": 3854 }, { "epoch": 0.4478652338077258, "grad_norm": 0.443198561668396, "learning_rate": 0.0001, "loss": 1.6342, "step": 3855 }, { "epoch": 0.4479814115596863, "grad_norm": 0.4108307659626007, "learning_rate": 0.0001, "loss": 1.6264, "step": 3856 }, { "epoch": 0.44809758931164684, "grad_norm": 0.36928313970565796, "learning_rate": 0.0001, "loss": 1.5414, "step": 3857 }, { "epoch": 0.4482137670636073, "grad_norm": 0.42408835887908936, "learning_rate": 0.0001, "loss": 1.909, "step": 3858 }, { "epoch": 0.4483299448155678, "grad_norm": 0.4042268991470337, "learning_rate": 0.0001, "loss": 1.6949, "step": 3859 }, { "epoch": 0.4484461225675283, "grad_norm": 0.40864208340644836, "learning_rate": 0.0001, "loss": 1.6614, "step": 3860 }, { "epoch": 0.4485623003194888, "grad_norm": 0.38092851638793945, "learning_rate": 0.0001, "loss": 1.6558, "step": 3861 }, { "epoch": 0.44867847807144934, "grad_norm": 0.3783656656742096, "learning_rate": 0.0001, "loss": 1.5293, "step": 3862 }, { "epoch": 0.4487946558234098, "grad_norm": 0.3975265920162201, "learning_rate": 0.0001, "loss": 1.6055, "step": 3863 }, { "epoch": 0.4489108335753703, "grad_norm": 0.42539462447166443, "learning_rate": 0.0001, "loss": 1.7332, "step": 3864 }, { "epoch": 0.4490270113273308, "grad_norm": 0.4166495203971863, "learning_rate": 0.0001, "loss": 1.6275, "step": 3865 }, { "epoch": 0.4491431890792913, "grad_norm": 0.39668262004852295, "learning_rate": 0.0001, "loss": 1.6647, "step": 3866 }, { "epoch": 0.44925936683125184, "grad_norm": 0.4214429557323456, "learning_rate": 0.0001, "loss": 1.6029, "step": 3867 }, { "epoch": 0.4493755445832123, "grad_norm": 0.43809008598327637, "learning_rate": 0.0001, "loss": 1.4617, "step": 3868 }, { "epoch": 0.4494917223351728, "grad_norm": 0.42034274339675903, "learning_rate": 0.0001, "loss": 1.794, "step": 3869 }, { "epoch": 0.4496079000871333, "grad_norm": 0.3921016454696655, "learning_rate": 0.0001, "loss": 1.5351, "step": 3870 }, { "epoch": 0.4497240778390938, "grad_norm": 0.4140496253967285, "learning_rate": 0.0001, "loss": 1.4723, "step": 3871 }, { "epoch": 0.44984025559105434, "grad_norm": 0.43890661001205444, "learning_rate": 0.0001, "loss": 1.6398, "step": 3872 }, { "epoch": 0.4499564333430148, "grad_norm": 0.40606656670570374, "learning_rate": 0.0001, "loss": 1.5126, "step": 3873 }, { "epoch": 0.4500726110949753, "grad_norm": 0.45156994462013245, "learning_rate": 0.0001, "loss": 1.6816, "step": 3874 }, { "epoch": 0.4501887888469358, "grad_norm": 0.4368317425251007, "learning_rate": 0.0001, "loss": 1.67, "step": 3875 }, { "epoch": 0.4503049665988963, "grad_norm": 0.41252076625823975, "learning_rate": 0.0001, "loss": 1.6612, "step": 3876 }, { "epoch": 0.4504211443508568, "grad_norm": 0.46063879132270813, "learning_rate": 0.0001, "loss": 1.7607, "step": 3877 }, { "epoch": 0.45053732210281733, "grad_norm": 0.4769091308116913, "learning_rate": 0.0001, "loss": 1.6788, "step": 3878 }, { "epoch": 0.4506534998547778, "grad_norm": 0.3981684446334839, "learning_rate": 0.0001, "loss": 1.7009, "step": 3879 }, { "epoch": 0.4507696776067383, "grad_norm": 0.4136994779109955, "learning_rate": 0.0001, "loss": 1.6577, "step": 3880 }, { "epoch": 0.4508858553586988, "grad_norm": 0.394010454416275, "learning_rate": 0.0001, "loss": 1.6413, "step": 3881 }, { "epoch": 0.4510020331106593, "grad_norm": 0.3855630159378052, "learning_rate": 0.0001, "loss": 1.6939, "step": 3882 }, { "epoch": 0.45111821086261983, "grad_norm": 0.41295209527015686, "learning_rate": 0.0001, "loss": 1.7063, "step": 3883 }, { "epoch": 0.4512343886145803, "grad_norm": 0.36102786660194397, "learning_rate": 0.0001, "loss": 1.5307, "step": 3884 }, { "epoch": 0.4513505663665408, "grad_norm": 0.42346733808517456, "learning_rate": 0.0001, "loss": 1.6742, "step": 3885 }, { "epoch": 0.4514667441185013, "grad_norm": 0.40333321690559387, "learning_rate": 0.0001, "loss": 1.6063, "step": 3886 }, { "epoch": 0.4515829218704618, "grad_norm": 0.38405728340148926, "learning_rate": 0.0001, "loss": 1.5387, "step": 3887 }, { "epoch": 0.45169909962242233, "grad_norm": 0.4108724594116211, "learning_rate": 0.0001, "loss": 1.6753, "step": 3888 }, { "epoch": 0.4518152773743828, "grad_norm": 0.42868340015411377, "learning_rate": 0.0001, "loss": 1.7265, "step": 3889 }, { "epoch": 0.4519314551263433, "grad_norm": 0.4091024100780487, "learning_rate": 0.0001, "loss": 1.5311, "step": 3890 }, { "epoch": 0.4520476328783038, "grad_norm": 0.4288122355937958, "learning_rate": 0.0001, "loss": 1.6001, "step": 3891 }, { "epoch": 0.4521638106302643, "grad_norm": 0.42493435740470886, "learning_rate": 0.0001, "loss": 1.8673, "step": 3892 }, { "epoch": 0.45227998838222483, "grad_norm": 0.3990475833415985, "learning_rate": 0.0001, "loss": 1.6854, "step": 3893 }, { "epoch": 0.4523961661341853, "grad_norm": 0.39818140864372253, "learning_rate": 0.0001, "loss": 1.7016, "step": 3894 }, { "epoch": 0.4525123438861458, "grad_norm": 0.39904457330703735, "learning_rate": 0.0001, "loss": 1.648, "step": 3895 }, { "epoch": 0.4526285216381063, "grad_norm": 0.41773471236228943, "learning_rate": 0.0001, "loss": 1.5215, "step": 3896 }, { "epoch": 0.4527446993900668, "grad_norm": 0.42894163727760315, "learning_rate": 0.0001, "loss": 1.805, "step": 3897 }, { "epoch": 0.45286087714202733, "grad_norm": 0.4194585084915161, "learning_rate": 0.0001, "loss": 1.7241, "step": 3898 }, { "epoch": 0.4529770548939878, "grad_norm": 0.40379759669303894, "learning_rate": 0.0001, "loss": 1.6349, "step": 3899 }, { "epoch": 0.4530932326459483, "grad_norm": 0.4571918845176697, "learning_rate": 0.0001, "loss": 1.8804, "step": 3900 }, { "epoch": 0.4532094103979088, "grad_norm": 0.3897266685962677, "learning_rate": 0.0001, "loss": 1.6028, "step": 3901 }, { "epoch": 0.4533255881498693, "grad_norm": 0.4049232304096222, "learning_rate": 0.0001, "loss": 1.6794, "step": 3902 }, { "epoch": 0.4534417659018298, "grad_norm": 0.43378233909606934, "learning_rate": 0.0001, "loss": 1.7152, "step": 3903 }, { "epoch": 0.4535579436537903, "grad_norm": 0.43286213278770447, "learning_rate": 0.0001, "loss": 1.7369, "step": 3904 }, { "epoch": 0.4536741214057508, "grad_norm": 0.41819891333580017, "learning_rate": 0.0001, "loss": 1.6184, "step": 3905 }, { "epoch": 0.4537902991577113, "grad_norm": 0.4100865125656128, "learning_rate": 0.0001, "loss": 1.6647, "step": 3906 }, { "epoch": 0.4539064769096718, "grad_norm": 0.3872487545013428, "learning_rate": 0.0001, "loss": 1.4784, "step": 3907 }, { "epoch": 0.4540226546616323, "grad_norm": 0.4244650602340698, "learning_rate": 0.0001, "loss": 1.5842, "step": 3908 }, { "epoch": 0.4541388324135928, "grad_norm": 0.38467180728912354, "learning_rate": 0.0001, "loss": 1.6559, "step": 3909 }, { "epoch": 0.4542550101655533, "grad_norm": 0.3707321286201477, "learning_rate": 0.0001, "loss": 1.5709, "step": 3910 }, { "epoch": 0.4543711879175138, "grad_norm": 0.412838876247406, "learning_rate": 0.0001, "loss": 1.6973, "step": 3911 }, { "epoch": 0.4544873656694743, "grad_norm": 0.4340794086456299, "learning_rate": 0.0001, "loss": 1.6017, "step": 3912 }, { "epoch": 0.4546035434214348, "grad_norm": 0.38097426295280457, "learning_rate": 0.0001, "loss": 1.4161, "step": 3913 }, { "epoch": 0.4547197211733953, "grad_norm": 0.418896347284317, "learning_rate": 0.0001, "loss": 1.7463, "step": 3914 }, { "epoch": 0.4548358989253558, "grad_norm": 0.427630752325058, "learning_rate": 0.0001, "loss": 1.8143, "step": 3915 }, { "epoch": 0.4549520766773163, "grad_norm": 0.4016280174255371, "learning_rate": 0.0001, "loss": 1.6164, "step": 3916 }, { "epoch": 0.4550682544292768, "grad_norm": 0.46618133783340454, "learning_rate": 0.0001, "loss": 1.6915, "step": 3917 }, { "epoch": 0.4551844321812373, "grad_norm": 0.40906664729118347, "learning_rate": 0.0001, "loss": 1.5765, "step": 3918 }, { "epoch": 0.4553006099331978, "grad_norm": 0.40268227458000183, "learning_rate": 0.0001, "loss": 1.5699, "step": 3919 }, { "epoch": 0.4554167876851583, "grad_norm": 0.408345490694046, "learning_rate": 0.0001, "loss": 1.7256, "step": 3920 }, { "epoch": 0.4555329654371188, "grad_norm": 0.39653435349464417, "learning_rate": 0.0001, "loss": 1.5844, "step": 3921 }, { "epoch": 0.4556491431890793, "grad_norm": 0.39751750230789185, "learning_rate": 0.0001, "loss": 1.6837, "step": 3922 }, { "epoch": 0.4557653209410398, "grad_norm": 0.3973672688007355, "learning_rate": 0.0001, "loss": 1.6071, "step": 3923 }, { "epoch": 0.45588149869300026, "grad_norm": 0.43432149291038513, "learning_rate": 0.0001, "loss": 1.6463, "step": 3924 }, { "epoch": 0.4559976764449608, "grad_norm": 0.43509483337402344, "learning_rate": 0.0001, "loss": 1.7848, "step": 3925 }, { "epoch": 0.4561138541969213, "grad_norm": 0.407279908657074, "learning_rate": 0.0001, "loss": 1.6915, "step": 3926 }, { "epoch": 0.4562300319488818, "grad_norm": 0.40425196290016174, "learning_rate": 0.0001, "loss": 1.6857, "step": 3927 }, { "epoch": 0.4563462097008423, "grad_norm": 0.4185945689678192, "learning_rate": 0.0001, "loss": 1.7103, "step": 3928 }, { "epoch": 0.45646238745280276, "grad_norm": 0.39828792214393616, "learning_rate": 0.0001, "loss": 1.7497, "step": 3929 }, { "epoch": 0.4565785652047633, "grad_norm": 0.40944311022758484, "learning_rate": 0.0001, "loss": 1.6036, "step": 3930 }, { "epoch": 0.4566947429567238, "grad_norm": 0.3925982117652893, "learning_rate": 0.0001, "loss": 1.4826, "step": 3931 }, { "epoch": 0.4568109207086843, "grad_norm": 0.413461834192276, "learning_rate": 0.0001, "loss": 1.7878, "step": 3932 }, { "epoch": 0.4569270984606448, "grad_norm": 0.4017687737941742, "learning_rate": 0.0001, "loss": 1.7589, "step": 3933 }, { "epoch": 0.45704327621260527, "grad_norm": 0.3962691128253937, "learning_rate": 0.0001, "loss": 1.547, "step": 3934 }, { "epoch": 0.4571594539645658, "grad_norm": 0.4170648753643036, "learning_rate": 0.0001, "loss": 1.6969, "step": 3935 }, { "epoch": 0.4572756317165263, "grad_norm": 0.4325627386569977, "learning_rate": 0.0001, "loss": 1.5711, "step": 3936 }, { "epoch": 0.4573918094684868, "grad_norm": 0.40576669573783875, "learning_rate": 0.0001, "loss": 1.5092, "step": 3937 }, { "epoch": 0.4575079872204473, "grad_norm": 0.40341272950172424, "learning_rate": 0.0001, "loss": 1.6716, "step": 3938 }, { "epoch": 0.45762416497240777, "grad_norm": 0.3955957293510437, "learning_rate": 0.0001, "loss": 1.7395, "step": 3939 }, { "epoch": 0.4577403427243683, "grad_norm": 0.3895145356655121, "learning_rate": 0.0001, "loss": 1.6388, "step": 3940 }, { "epoch": 0.4578565204763288, "grad_norm": 0.4147211015224457, "learning_rate": 0.0001, "loss": 1.5953, "step": 3941 }, { "epoch": 0.4579726982282893, "grad_norm": 0.39672261476516724, "learning_rate": 0.0001, "loss": 1.5186, "step": 3942 }, { "epoch": 0.4580888759802498, "grad_norm": 0.4300733506679535, "learning_rate": 0.0001, "loss": 1.6518, "step": 3943 }, { "epoch": 0.45820505373221027, "grad_norm": 0.4062694013118744, "learning_rate": 0.0001, "loss": 1.8041, "step": 3944 }, { "epoch": 0.45832123148417075, "grad_norm": 0.4132828414440155, "learning_rate": 0.0001, "loss": 1.537, "step": 3945 }, { "epoch": 0.4584374092361313, "grad_norm": 0.42365893721580505, "learning_rate": 0.0001, "loss": 1.6487, "step": 3946 }, { "epoch": 0.4585535869880918, "grad_norm": 0.3917977511882782, "learning_rate": 0.0001, "loss": 1.5432, "step": 3947 }, { "epoch": 0.4586697647400523, "grad_norm": 0.4075486361980438, "learning_rate": 0.0001, "loss": 1.6274, "step": 3948 }, { "epoch": 0.45878594249201277, "grad_norm": 0.41571909189224243, "learning_rate": 0.0001, "loss": 1.6735, "step": 3949 }, { "epoch": 0.45890212024397325, "grad_norm": 0.44082123041152954, "learning_rate": 0.0001, "loss": 1.7793, "step": 3950 }, { "epoch": 0.4590182979959338, "grad_norm": 0.40916866064071655, "learning_rate": 0.0001, "loss": 1.7366, "step": 3951 }, { "epoch": 0.4591344757478943, "grad_norm": 0.3818245530128479, "learning_rate": 0.0001, "loss": 1.5146, "step": 3952 }, { "epoch": 0.4592506534998548, "grad_norm": 0.4646684527397156, "learning_rate": 0.0001, "loss": 1.8009, "step": 3953 }, { "epoch": 0.45936683125181527, "grad_norm": 0.4245597720146179, "learning_rate": 0.0001, "loss": 1.6859, "step": 3954 }, { "epoch": 0.45948300900377576, "grad_norm": 0.42128875851631165, "learning_rate": 0.0001, "loss": 1.5136, "step": 3955 }, { "epoch": 0.4595991867557363, "grad_norm": 0.41281989216804504, "learning_rate": 0.0001, "loss": 1.7685, "step": 3956 }, { "epoch": 0.4597153645076968, "grad_norm": 0.4056452810764313, "learning_rate": 0.0001, "loss": 1.6749, "step": 3957 }, { "epoch": 0.4598315422596573, "grad_norm": 0.4163060188293457, "learning_rate": 0.0001, "loss": 1.6496, "step": 3958 }, { "epoch": 0.45994772001161777, "grad_norm": 0.4370429813861847, "learning_rate": 0.0001, "loss": 1.7209, "step": 3959 }, { "epoch": 0.46006389776357826, "grad_norm": 0.4308282136917114, "learning_rate": 0.0001, "loss": 1.7271, "step": 3960 }, { "epoch": 0.4601800755155388, "grad_norm": 0.42895281314849854, "learning_rate": 0.0001, "loss": 1.9235, "step": 3961 }, { "epoch": 0.4602962532674993, "grad_norm": 0.3952750563621521, "learning_rate": 0.0001, "loss": 1.7455, "step": 3962 }, { "epoch": 0.4604124310194598, "grad_norm": 0.4076629877090454, "learning_rate": 0.0001, "loss": 1.7427, "step": 3963 }, { "epoch": 0.46052860877142027, "grad_norm": 0.41401106119155884, "learning_rate": 0.0001, "loss": 1.6593, "step": 3964 }, { "epoch": 0.46064478652338076, "grad_norm": 0.39911845326423645, "learning_rate": 0.0001, "loss": 1.7128, "step": 3965 }, { "epoch": 0.46076096427534124, "grad_norm": 0.3996291160583496, "learning_rate": 0.0001, "loss": 1.7472, "step": 3966 }, { "epoch": 0.4608771420273018, "grad_norm": 0.4146043360233307, "learning_rate": 0.0001, "loss": 1.7652, "step": 3967 }, { "epoch": 0.4609933197792623, "grad_norm": 0.4056430459022522, "learning_rate": 0.0001, "loss": 1.6542, "step": 3968 }, { "epoch": 0.46110949753122277, "grad_norm": 0.3950080871582031, "learning_rate": 0.0001, "loss": 1.6484, "step": 3969 }, { "epoch": 0.46122567528318326, "grad_norm": 0.3902670443058014, "learning_rate": 0.0001, "loss": 1.5769, "step": 3970 }, { "epoch": 0.46134185303514375, "grad_norm": 0.408426970243454, "learning_rate": 0.0001, "loss": 1.5973, "step": 3971 }, { "epoch": 0.4614580307871043, "grad_norm": 0.42066359519958496, "learning_rate": 0.0001, "loss": 1.8631, "step": 3972 }, { "epoch": 0.4615742085390648, "grad_norm": 0.4041798412799835, "learning_rate": 0.0001, "loss": 1.615, "step": 3973 }, { "epoch": 0.46169038629102527, "grad_norm": 0.4431467056274414, "learning_rate": 0.0001, "loss": 1.6752, "step": 3974 }, { "epoch": 0.46180656404298576, "grad_norm": 0.3934585154056549, "learning_rate": 0.0001, "loss": 1.6613, "step": 3975 }, { "epoch": 0.46192274179494625, "grad_norm": 0.40387487411499023, "learning_rate": 0.0001, "loss": 1.5377, "step": 3976 }, { "epoch": 0.4620389195469068, "grad_norm": 0.39581024646759033, "learning_rate": 0.0001, "loss": 1.6956, "step": 3977 }, { "epoch": 0.4621550972988673, "grad_norm": 0.43566346168518066, "learning_rate": 0.0001, "loss": 1.7688, "step": 3978 }, { "epoch": 0.46227127505082777, "grad_norm": 0.399808794260025, "learning_rate": 0.0001, "loss": 1.7104, "step": 3979 }, { "epoch": 0.46238745280278826, "grad_norm": 0.42738527059555054, "learning_rate": 0.0001, "loss": 1.6639, "step": 3980 }, { "epoch": 0.46250363055474875, "grad_norm": 0.3896215260028839, "learning_rate": 0.0001, "loss": 1.605, "step": 3981 }, { "epoch": 0.4626198083067093, "grad_norm": 0.41741159558296204, "learning_rate": 0.0001, "loss": 1.7703, "step": 3982 }, { "epoch": 0.4627359860586698, "grad_norm": 0.40396156907081604, "learning_rate": 0.0001, "loss": 1.5881, "step": 3983 }, { "epoch": 0.46285216381063027, "grad_norm": 0.38009852170944214, "learning_rate": 0.0001, "loss": 1.4823, "step": 3984 }, { "epoch": 0.46296834156259076, "grad_norm": 0.43098655343055725, "learning_rate": 0.0001, "loss": 1.6838, "step": 3985 }, { "epoch": 0.46308451931455125, "grad_norm": 0.461058109998703, "learning_rate": 0.0001, "loss": 1.6804, "step": 3986 }, { "epoch": 0.4632006970665118, "grad_norm": 0.45243731141090393, "learning_rate": 0.0001, "loss": 1.9556, "step": 3987 }, { "epoch": 0.4633168748184723, "grad_norm": 0.41540998220443726, "learning_rate": 0.0001, "loss": 1.5549, "step": 3988 }, { "epoch": 0.46343305257043277, "grad_norm": 0.4656381905078888, "learning_rate": 0.0001, "loss": 1.6438, "step": 3989 }, { "epoch": 0.46354923032239326, "grad_norm": 0.4463013708591461, "learning_rate": 0.0001, "loss": 1.7644, "step": 3990 }, { "epoch": 0.46366540807435375, "grad_norm": 0.40116289258003235, "learning_rate": 0.0001, "loss": 1.6486, "step": 3991 }, { "epoch": 0.46378158582631424, "grad_norm": 0.41295450925827026, "learning_rate": 0.0001, "loss": 1.6462, "step": 3992 }, { "epoch": 0.4638977635782748, "grad_norm": 0.43854475021362305, "learning_rate": 0.0001, "loss": 1.8506, "step": 3993 }, { "epoch": 0.46401394133023527, "grad_norm": 0.39046186208724976, "learning_rate": 0.0001, "loss": 1.6821, "step": 3994 }, { "epoch": 0.46413011908219576, "grad_norm": 0.4226478338241577, "learning_rate": 0.0001, "loss": 1.7984, "step": 3995 }, { "epoch": 0.46424629683415625, "grad_norm": 0.40615102648735046, "learning_rate": 0.0001, "loss": 1.6383, "step": 3996 }, { "epoch": 0.46436247458611674, "grad_norm": 0.41897422075271606, "learning_rate": 0.0001, "loss": 1.5104, "step": 3997 }, { "epoch": 0.4644786523380773, "grad_norm": 0.3868965804576874, "learning_rate": 0.0001, "loss": 1.5209, "step": 3998 }, { "epoch": 0.46459483009003777, "grad_norm": 0.40913960337638855, "learning_rate": 0.0001, "loss": 1.7078, "step": 3999 }, { "epoch": 0.46471100784199826, "grad_norm": 0.44357261061668396, "learning_rate": 0.0001, "loss": 1.5254, "step": 4000 }, { "epoch": 0.46482718559395875, "grad_norm": 0.42295897006988525, "learning_rate": 0.0001, "loss": 1.6681, "step": 4001 }, { "epoch": 0.46494336334591924, "grad_norm": 0.38419994711875916, "learning_rate": 0.0001, "loss": 1.5279, "step": 4002 }, { "epoch": 0.4650595410978798, "grad_norm": 0.4167155623435974, "learning_rate": 0.0001, "loss": 1.7534, "step": 4003 }, { "epoch": 0.46517571884984027, "grad_norm": 0.394581139087677, "learning_rate": 0.0001, "loss": 1.661, "step": 4004 }, { "epoch": 0.46529189660180076, "grad_norm": 0.4233587384223938, "learning_rate": 0.0001, "loss": 1.6249, "step": 4005 }, { "epoch": 0.46540807435376125, "grad_norm": 0.3914411962032318, "learning_rate": 0.0001, "loss": 1.5213, "step": 4006 }, { "epoch": 0.46552425210572174, "grad_norm": 0.4535432457923889, "learning_rate": 0.0001, "loss": 1.798, "step": 4007 }, { "epoch": 0.4656404298576823, "grad_norm": 0.39255577325820923, "learning_rate": 0.0001, "loss": 1.5505, "step": 4008 }, { "epoch": 0.46575660760964277, "grad_norm": 0.38806408643722534, "learning_rate": 0.0001, "loss": 1.5348, "step": 4009 }, { "epoch": 0.46587278536160326, "grad_norm": 0.4313128888607025, "learning_rate": 0.0001, "loss": 1.5856, "step": 4010 }, { "epoch": 0.46598896311356375, "grad_norm": 0.39540693163871765, "learning_rate": 0.0001, "loss": 1.4181, "step": 4011 }, { "epoch": 0.46610514086552424, "grad_norm": 0.40360215306282043, "learning_rate": 0.0001, "loss": 1.6609, "step": 4012 }, { "epoch": 0.4662213186174847, "grad_norm": 0.4235546886920929, "learning_rate": 0.0001, "loss": 1.7763, "step": 4013 }, { "epoch": 0.46633749636944527, "grad_norm": 0.4328942596912384, "learning_rate": 0.0001, "loss": 1.8291, "step": 4014 }, { "epoch": 0.46645367412140576, "grad_norm": 0.40199771523475647, "learning_rate": 0.0001, "loss": 1.4812, "step": 4015 }, { "epoch": 0.46656985187336625, "grad_norm": 0.3917251527309418, "learning_rate": 0.0001, "loss": 1.6395, "step": 4016 }, { "epoch": 0.46668602962532674, "grad_norm": 0.4347023665904999, "learning_rate": 0.0001, "loss": 1.7273, "step": 4017 }, { "epoch": 0.4668022073772872, "grad_norm": 0.41650915145874023, "learning_rate": 0.0001, "loss": 1.6943, "step": 4018 }, { "epoch": 0.46691838512924777, "grad_norm": 0.39249107241630554, "learning_rate": 0.0001, "loss": 1.5723, "step": 4019 }, { "epoch": 0.46703456288120826, "grad_norm": 0.42523854970932007, "learning_rate": 0.0001, "loss": 1.6913, "step": 4020 }, { "epoch": 0.46715074063316875, "grad_norm": 0.385466068983078, "learning_rate": 0.0001, "loss": 1.4976, "step": 4021 }, { "epoch": 0.46726691838512924, "grad_norm": 0.39372915029525757, "learning_rate": 0.0001, "loss": 1.5259, "step": 4022 }, { "epoch": 0.4673830961370897, "grad_norm": 0.41617611050605774, "learning_rate": 0.0001, "loss": 1.6379, "step": 4023 }, { "epoch": 0.46749927388905027, "grad_norm": 0.4050281047821045, "learning_rate": 0.0001, "loss": 1.6093, "step": 4024 }, { "epoch": 0.46761545164101076, "grad_norm": 0.41811496019363403, "learning_rate": 0.0001, "loss": 1.6338, "step": 4025 }, { "epoch": 0.46773162939297125, "grad_norm": 0.4445625841617584, "learning_rate": 0.0001, "loss": 1.6577, "step": 4026 }, { "epoch": 0.46784780714493174, "grad_norm": 0.3947649598121643, "learning_rate": 0.0001, "loss": 1.6823, "step": 4027 }, { "epoch": 0.4679639848968922, "grad_norm": 0.39127346873283386, "learning_rate": 0.0001, "loss": 1.7173, "step": 4028 }, { "epoch": 0.46808016264885277, "grad_norm": 0.4156622588634491, "learning_rate": 0.0001, "loss": 1.782, "step": 4029 }, { "epoch": 0.46819634040081326, "grad_norm": 0.40657055377960205, "learning_rate": 0.0001, "loss": 1.7922, "step": 4030 }, { "epoch": 0.46831251815277375, "grad_norm": 0.4071350693702698, "learning_rate": 0.0001, "loss": 1.546, "step": 4031 }, { "epoch": 0.46842869590473424, "grad_norm": 0.41567331552505493, "learning_rate": 0.0001, "loss": 1.703, "step": 4032 }, { "epoch": 0.4685448736566947, "grad_norm": 0.4297296106815338, "learning_rate": 0.0001, "loss": 1.6237, "step": 4033 }, { "epoch": 0.4686610514086552, "grad_norm": 0.46219146251678467, "learning_rate": 0.0001, "loss": 1.6704, "step": 4034 }, { "epoch": 0.46877722916061576, "grad_norm": 0.38711977005004883, "learning_rate": 0.0001, "loss": 1.5427, "step": 4035 }, { "epoch": 0.46889340691257625, "grad_norm": 0.39784133434295654, "learning_rate": 0.0001, "loss": 1.5922, "step": 4036 }, { "epoch": 0.46900958466453674, "grad_norm": 0.4138612747192383, "learning_rate": 0.0001, "loss": 1.7207, "step": 4037 }, { "epoch": 0.4691257624164972, "grad_norm": 0.42039382457733154, "learning_rate": 0.0001, "loss": 1.6665, "step": 4038 }, { "epoch": 0.4692419401684577, "grad_norm": 0.40157628059387207, "learning_rate": 0.0001, "loss": 1.644, "step": 4039 }, { "epoch": 0.46935811792041826, "grad_norm": 0.40044525265693665, "learning_rate": 0.0001, "loss": 1.7727, "step": 4040 }, { "epoch": 0.46947429567237875, "grad_norm": 0.4081622064113617, "learning_rate": 0.0001, "loss": 1.627, "step": 4041 }, { "epoch": 0.46959047342433924, "grad_norm": 0.40018370747566223, "learning_rate": 0.0001, "loss": 1.6327, "step": 4042 }, { "epoch": 0.4697066511762997, "grad_norm": 0.3724619746208191, "learning_rate": 0.0001, "loss": 1.5894, "step": 4043 }, { "epoch": 0.4698228289282602, "grad_norm": 0.386092871427536, "learning_rate": 0.0001, "loss": 1.5178, "step": 4044 }, { "epoch": 0.46993900668022076, "grad_norm": 0.44518908858299255, "learning_rate": 0.0001, "loss": 1.8437, "step": 4045 }, { "epoch": 0.47005518443218125, "grad_norm": 0.4211956560611725, "learning_rate": 0.0001, "loss": 1.5107, "step": 4046 }, { "epoch": 0.47017136218414174, "grad_norm": 0.4604097306728363, "learning_rate": 0.0001, "loss": 1.6791, "step": 4047 }, { "epoch": 0.47028753993610223, "grad_norm": 0.4125956892967224, "learning_rate": 0.0001, "loss": 1.6793, "step": 4048 }, { "epoch": 0.4704037176880627, "grad_norm": 0.46292823553085327, "learning_rate": 0.0001, "loss": 1.877, "step": 4049 }, { "epoch": 0.47051989544002326, "grad_norm": 0.4355732202529907, "learning_rate": 0.0001, "loss": 1.708, "step": 4050 }, { "epoch": 0.47063607319198375, "grad_norm": 0.4154967665672302, "learning_rate": 0.0001, "loss": 1.7367, "step": 4051 }, { "epoch": 0.47075225094394424, "grad_norm": 0.4346443712711334, "learning_rate": 0.0001, "loss": 1.6655, "step": 4052 }, { "epoch": 0.47086842869590473, "grad_norm": 0.41704896092414856, "learning_rate": 0.0001, "loss": 1.8386, "step": 4053 }, { "epoch": 0.4709846064478652, "grad_norm": 0.4196739196777344, "learning_rate": 0.0001, "loss": 1.6717, "step": 4054 }, { "epoch": 0.4711007841998257, "grad_norm": 0.43008390069007874, "learning_rate": 0.0001, "loss": 1.7694, "step": 4055 }, { "epoch": 0.47121696195178625, "grad_norm": 0.37963777780532837, "learning_rate": 0.0001, "loss": 1.4642, "step": 4056 }, { "epoch": 0.47133313970374674, "grad_norm": 0.40089571475982666, "learning_rate": 0.0001, "loss": 1.5918, "step": 4057 }, { "epoch": 0.47144931745570723, "grad_norm": 0.4169794023036957, "learning_rate": 0.0001, "loss": 1.7457, "step": 4058 }, { "epoch": 0.4715654952076677, "grad_norm": 0.4090938866138458, "learning_rate": 0.0001, "loss": 1.6641, "step": 4059 }, { "epoch": 0.4716816729596282, "grad_norm": 0.4060511589050293, "learning_rate": 0.0001, "loss": 1.5108, "step": 4060 }, { "epoch": 0.47179785071158875, "grad_norm": 0.4162571132183075, "learning_rate": 0.0001, "loss": 1.7383, "step": 4061 }, { "epoch": 0.47191402846354924, "grad_norm": 0.39749133586883545, "learning_rate": 0.0001, "loss": 1.6077, "step": 4062 }, { "epoch": 0.47203020621550973, "grad_norm": 0.4047934412956238, "learning_rate": 0.0001, "loss": 1.5669, "step": 4063 }, { "epoch": 0.4721463839674702, "grad_norm": 0.436172217130661, "learning_rate": 0.0001, "loss": 1.6651, "step": 4064 }, { "epoch": 0.4722625617194307, "grad_norm": 0.4056571125984192, "learning_rate": 0.0001, "loss": 1.5496, "step": 4065 }, { "epoch": 0.47237873947139125, "grad_norm": 0.4182429909706116, "learning_rate": 0.0001, "loss": 1.7614, "step": 4066 }, { "epoch": 0.47249491722335174, "grad_norm": 0.3990522623062134, "learning_rate": 0.0001, "loss": 1.5811, "step": 4067 }, { "epoch": 0.47261109497531223, "grad_norm": 0.4048585593700409, "learning_rate": 0.0001, "loss": 1.6271, "step": 4068 }, { "epoch": 0.4727272727272727, "grad_norm": 0.3930312693119049, "learning_rate": 0.0001, "loss": 1.6553, "step": 4069 }, { "epoch": 0.4728434504792332, "grad_norm": 0.4067966639995575, "learning_rate": 0.0001, "loss": 1.6163, "step": 4070 }, { "epoch": 0.47295962823119375, "grad_norm": 0.4243100881576538, "learning_rate": 0.0001, "loss": 1.639, "step": 4071 }, { "epoch": 0.47307580598315424, "grad_norm": 0.4516909718513489, "learning_rate": 0.0001, "loss": 1.7275, "step": 4072 }, { "epoch": 0.47319198373511473, "grad_norm": 0.4081507921218872, "learning_rate": 0.0001, "loss": 1.6746, "step": 4073 }, { "epoch": 0.4733081614870752, "grad_norm": 0.43128862977027893, "learning_rate": 0.0001, "loss": 1.6588, "step": 4074 }, { "epoch": 0.4734243392390357, "grad_norm": 0.43954023718833923, "learning_rate": 0.0001, "loss": 1.8691, "step": 4075 }, { "epoch": 0.47354051699099625, "grad_norm": 0.422748863697052, "learning_rate": 0.0001, "loss": 1.6295, "step": 4076 }, { "epoch": 0.47365669474295674, "grad_norm": 0.40614694356918335, "learning_rate": 0.0001, "loss": 1.5405, "step": 4077 }, { "epoch": 0.47377287249491723, "grad_norm": 0.4623175263404846, "learning_rate": 0.0001, "loss": 1.8329, "step": 4078 }, { "epoch": 0.4738890502468777, "grad_norm": 0.44638627767562866, "learning_rate": 0.0001, "loss": 1.9259, "step": 4079 }, { "epoch": 0.4740052279988382, "grad_norm": 0.4093332588672638, "learning_rate": 0.0001, "loss": 1.6923, "step": 4080 }, { "epoch": 0.4741214057507987, "grad_norm": 0.4484642446041107, "learning_rate": 0.0001, "loss": 1.7393, "step": 4081 }, { "epoch": 0.47423758350275924, "grad_norm": 0.4261915981769562, "learning_rate": 0.0001, "loss": 1.7479, "step": 4082 }, { "epoch": 0.47435376125471973, "grad_norm": 0.3942796587944031, "learning_rate": 0.0001, "loss": 1.5641, "step": 4083 }, { "epoch": 0.4744699390066802, "grad_norm": 0.4067613184452057, "learning_rate": 0.0001, "loss": 1.6648, "step": 4084 }, { "epoch": 0.4745861167586407, "grad_norm": 0.42234787344932556, "learning_rate": 0.0001, "loss": 1.6912, "step": 4085 }, { "epoch": 0.4747022945106012, "grad_norm": 0.38540762662887573, "learning_rate": 0.0001, "loss": 1.5529, "step": 4086 }, { "epoch": 0.47481847226256174, "grad_norm": 0.386514276266098, "learning_rate": 0.0001, "loss": 1.6059, "step": 4087 }, { "epoch": 0.47493465001452223, "grad_norm": 0.4075110852718353, "learning_rate": 0.0001, "loss": 1.6056, "step": 4088 }, { "epoch": 0.4750508277664827, "grad_norm": 0.39759472012519836, "learning_rate": 0.0001, "loss": 1.54, "step": 4089 }, { "epoch": 0.4751670055184432, "grad_norm": 0.3987995386123657, "learning_rate": 0.0001, "loss": 1.5567, "step": 4090 }, { "epoch": 0.4752831832704037, "grad_norm": 0.3973774313926697, "learning_rate": 0.0001, "loss": 1.5121, "step": 4091 }, { "epoch": 0.47539936102236424, "grad_norm": 0.4317288398742676, "learning_rate": 0.0001, "loss": 1.597, "step": 4092 }, { "epoch": 0.47551553877432473, "grad_norm": 0.4416712522506714, "learning_rate": 0.0001, "loss": 1.7252, "step": 4093 }, { "epoch": 0.4756317165262852, "grad_norm": 0.43052783608436584, "learning_rate": 0.0001, "loss": 1.6964, "step": 4094 }, { "epoch": 0.4757478942782457, "grad_norm": 0.4163714647293091, "learning_rate": 0.0001, "loss": 1.6424, "step": 4095 }, { "epoch": 0.4758640720302062, "grad_norm": 0.41067805886268616, "learning_rate": 0.0001, "loss": 1.5735, "step": 4096 }, { "epoch": 0.47598024978216674, "grad_norm": 0.4482041001319885, "learning_rate": 0.0001, "loss": 1.8553, "step": 4097 }, { "epoch": 0.47609642753412723, "grad_norm": 0.4115692973136902, "learning_rate": 0.0001, "loss": 1.6287, "step": 4098 }, { "epoch": 0.4762126052860877, "grad_norm": 0.4347507953643799, "learning_rate": 0.0001, "loss": 1.7567, "step": 4099 }, { "epoch": 0.4763287830380482, "grad_norm": 0.409196674823761, "learning_rate": 0.0001, "loss": 1.646, "step": 4100 }, { "epoch": 0.4764449607900087, "grad_norm": 0.46702349185943604, "learning_rate": 0.0001, "loss": 1.7937, "step": 4101 }, { "epoch": 0.4765611385419692, "grad_norm": 0.3931210935115814, "learning_rate": 0.0001, "loss": 1.5143, "step": 4102 }, { "epoch": 0.47667731629392973, "grad_norm": 0.42548468708992004, "learning_rate": 0.0001, "loss": 1.7687, "step": 4103 }, { "epoch": 0.4767934940458902, "grad_norm": 0.690558910369873, "learning_rate": 0.0001, "loss": 1.6536, "step": 4104 }, { "epoch": 0.4769096717978507, "grad_norm": 0.41380855441093445, "learning_rate": 0.0001, "loss": 1.6347, "step": 4105 }, { "epoch": 0.4770258495498112, "grad_norm": 0.37842079997062683, "learning_rate": 0.0001, "loss": 1.5828, "step": 4106 }, { "epoch": 0.4771420273017717, "grad_norm": 0.4153887629508972, "learning_rate": 0.0001, "loss": 1.6783, "step": 4107 }, { "epoch": 0.47725820505373223, "grad_norm": 0.408482164144516, "learning_rate": 0.0001, "loss": 1.5271, "step": 4108 }, { "epoch": 0.4773743828056927, "grad_norm": 0.4116607904434204, "learning_rate": 0.0001, "loss": 1.5599, "step": 4109 }, { "epoch": 0.4774905605576532, "grad_norm": 0.3892625570297241, "learning_rate": 0.0001, "loss": 1.6382, "step": 4110 }, { "epoch": 0.4776067383096137, "grad_norm": 0.41425132751464844, "learning_rate": 0.0001, "loss": 1.6396, "step": 4111 }, { "epoch": 0.4777229160615742, "grad_norm": 0.4124647080898285, "learning_rate": 0.0001, "loss": 1.5062, "step": 4112 }, { "epoch": 0.47783909381353473, "grad_norm": 0.43545451760292053, "learning_rate": 0.0001, "loss": 1.7047, "step": 4113 }, { "epoch": 0.4779552715654952, "grad_norm": 0.4274523854255676, "learning_rate": 0.0001, "loss": 1.6518, "step": 4114 }, { "epoch": 0.4780714493174557, "grad_norm": 0.41416046023368835, "learning_rate": 0.0001, "loss": 1.6724, "step": 4115 }, { "epoch": 0.4781876270694162, "grad_norm": 0.4206220805644989, "learning_rate": 0.0001, "loss": 1.7191, "step": 4116 }, { "epoch": 0.4783038048213767, "grad_norm": 0.4083358645439148, "learning_rate": 0.0001, "loss": 1.7273, "step": 4117 }, { "epoch": 0.47841998257333723, "grad_norm": 0.4127695560455322, "learning_rate": 0.0001, "loss": 1.8423, "step": 4118 }, { "epoch": 0.4785361603252977, "grad_norm": 0.441363662481308, "learning_rate": 0.0001, "loss": 1.8589, "step": 4119 }, { "epoch": 0.4786523380772582, "grad_norm": 0.41235068440437317, "learning_rate": 0.0001, "loss": 1.7405, "step": 4120 }, { "epoch": 0.4787685158292187, "grad_norm": 0.41113796830177307, "learning_rate": 0.0001, "loss": 1.6793, "step": 4121 }, { "epoch": 0.4788846935811792, "grad_norm": 0.44025418162345886, "learning_rate": 0.0001, "loss": 1.7407, "step": 4122 }, { "epoch": 0.4790008713331397, "grad_norm": 0.3890633285045624, "learning_rate": 0.0001, "loss": 1.5768, "step": 4123 }, { "epoch": 0.4791170490851002, "grad_norm": 0.40119051933288574, "learning_rate": 0.0001, "loss": 1.6278, "step": 4124 }, { "epoch": 0.4792332268370607, "grad_norm": 0.4198724329471588, "learning_rate": 0.0001, "loss": 1.6017, "step": 4125 }, { "epoch": 0.4793494045890212, "grad_norm": 0.4038214385509491, "learning_rate": 0.0001, "loss": 1.6103, "step": 4126 }, { "epoch": 0.4794655823409817, "grad_norm": 0.4278080463409424, "learning_rate": 0.0001, "loss": 1.5321, "step": 4127 }, { "epoch": 0.4795817600929422, "grad_norm": 0.41188672184944153, "learning_rate": 0.0001, "loss": 1.621, "step": 4128 }, { "epoch": 0.4796979378449027, "grad_norm": 0.43129464983940125, "learning_rate": 0.0001, "loss": 1.7032, "step": 4129 }, { "epoch": 0.4798141155968632, "grad_norm": 0.4218551814556122, "learning_rate": 0.0001, "loss": 1.742, "step": 4130 }, { "epoch": 0.4799302933488237, "grad_norm": 0.37498828768730164, "learning_rate": 0.0001, "loss": 1.4627, "step": 4131 }, { "epoch": 0.4800464711007842, "grad_norm": 0.4059140682220459, "learning_rate": 0.0001, "loss": 1.6074, "step": 4132 }, { "epoch": 0.4801626488527447, "grad_norm": 0.4361055791378021, "learning_rate": 0.0001, "loss": 1.9007, "step": 4133 }, { "epoch": 0.4802788266047052, "grad_norm": 0.40483996272087097, "learning_rate": 0.0001, "loss": 1.6734, "step": 4134 }, { "epoch": 0.4803950043566657, "grad_norm": 0.4151374101638794, "learning_rate": 0.0001, "loss": 1.8005, "step": 4135 }, { "epoch": 0.4805111821086262, "grad_norm": 0.4022291302680969, "learning_rate": 0.0001, "loss": 1.6368, "step": 4136 }, { "epoch": 0.4806273598605867, "grad_norm": 0.40195754170417786, "learning_rate": 0.0001, "loss": 1.5596, "step": 4137 }, { "epoch": 0.4807435376125472, "grad_norm": 0.41277408599853516, "learning_rate": 0.0001, "loss": 1.5831, "step": 4138 }, { "epoch": 0.4808597153645077, "grad_norm": 0.4332813024520874, "learning_rate": 0.0001, "loss": 1.9132, "step": 4139 }, { "epoch": 0.4809758931164682, "grad_norm": 0.3997774124145508, "learning_rate": 0.0001, "loss": 1.5791, "step": 4140 }, { "epoch": 0.4810920708684287, "grad_norm": 0.4188762605190277, "learning_rate": 0.0001, "loss": 1.6811, "step": 4141 }, { "epoch": 0.4812082486203892, "grad_norm": 0.40828222036361694, "learning_rate": 0.0001, "loss": 1.5355, "step": 4142 }, { "epoch": 0.4813244263723497, "grad_norm": 0.4262183904647827, "learning_rate": 0.0001, "loss": 1.7421, "step": 4143 }, { "epoch": 0.48144060412431017, "grad_norm": 0.4175308346748352, "learning_rate": 0.0001, "loss": 1.7043, "step": 4144 }, { "epoch": 0.4815567818762707, "grad_norm": 0.40894895792007446, "learning_rate": 0.0001, "loss": 1.5447, "step": 4145 }, { "epoch": 0.4816729596282312, "grad_norm": 0.4432525038719177, "learning_rate": 0.0001, "loss": 1.6228, "step": 4146 }, { "epoch": 0.4817891373801917, "grad_norm": 0.412423700094223, "learning_rate": 0.0001, "loss": 1.5552, "step": 4147 }, { "epoch": 0.4819053151321522, "grad_norm": 0.3926151990890503, "learning_rate": 0.0001, "loss": 1.697, "step": 4148 }, { "epoch": 0.48202149288411267, "grad_norm": 0.4282263517379761, "learning_rate": 0.0001, "loss": 1.7589, "step": 4149 }, { "epoch": 0.4821376706360732, "grad_norm": 0.3978181481361389, "learning_rate": 0.0001, "loss": 1.4873, "step": 4150 }, { "epoch": 0.4822538483880337, "grad_norm": 0.39814430475234985, "learning_rate": 0.0001, "loss": 1.6877, "step": 4151 }, { "epoch": 0.4823700261399942, "grad_norm": 0.40920761227607727, "learning_rate": 0.0001, "loss": 1.7559, "step": 4152 }, { "epoch": 0.4824862038919547, "grad_norm": 0.43666741251945496, "learning_rate": 0.0001, "loss": 1.6647, "step": 4153 }, { "epoch": 0.48260238164391517, "grad_norm": 0.449294775724411, "learning_rate": 0.0001, "loss": 1.7839, "step": 4154 }, { "epoch": 0.4827185593958757, "grad_norm": 0.4310729205608368, "learning_rate": 0.0001, "loss": 1.5696, "step": 4155 }, { "epoch": 0.4828347371478362, "grad_norm": 0.4184435307979584, "learning_rate": 0.0001, "loss": 1.7023, "step": 4156 }, { "epoch": 0.4829509148997967, "grad_norm": 0.4592064321041107, "learning_rate": 0.0001, "loss": 1.853, "step": 4157 }, { "epoch": 0.4830670926517572, "grad_norm": 0.41902077198028564, "learning_rate": 0.0001, "loss": 1.7134, "step": 4158 }, { "epoch": 0.48318327040371767, "grad_norm": 0.40385156869888306, "learning_rate": 0.0001, "loss": 1.4542, "step": 4159 }, { "epoch": 0.4832994481556782, "grad_norm": 0.4063291549682617, "learning_rate": 0.0001, "loss": 1.444, "step": 4160 }, { "epoch": 0.4834156259076387, "grad_norm": 0.4716438055038452, "learning_rate": 0.0001, "loss": 1.8141, "step": 4161 }, { "epoch": 0.4835318036595992, "grad_norm": 0.3783859312534332, "learning_rate": 0.0001, "loss": 1.3083, "step": 4162 }, { "epoch": 0.4836479814115597, "grad_norm": 0.4354921281337738, "learning_rate": 0.0001, "loss": 1.5088, "step": 4163 }, { "epoch": 0.48376415916352017, "grad_norm": 0.4501488506793976, "learning_rate": 0.0001, "loss": 1.7144, "step": 4164 }, { "epoch": 0.48388033691548066, "grad_norm": 0.42613235116004944, "learning_rate": 0.0001, "loss": 1.6802, "step": 4165 }, { "epoch": 0.4839965146674412, "grad_norm": 0.41294065117836, "learning_rate": 0.0001, "loss": 1.7037, "step": 4166 }, { "epoch": 0.4841126924194017, "grad_norm": 0.399277001619339, "learning_rate": 0.0001, "loss": 1.4283, "step": 4167 }, { "epoch": 0.4842288701713622, "grad_norm": 0.4134821891784668, "learning_rate": 0.0001, "loss": 1.5305, "step": 4168 }, { "epoch": 0.48434504792332267, "grad_norm": 0.4123948812484741, "learning_rate": 0.0001, "loss": 1.6264, "step": 4169 }, { "epoch": 0.48446122567528316, "grad_norm": 0.4469812214374542, "learning_rate": 0.0001, "loss": 1.8222, "step": 4170 }, { "epoch": 0.4845774034272437, "grad_norm": 0.4323446452617645, "learning_rate": 0.0001, "loss": 1.761, "step": 4171 }, { "epoch": 0.4846935811792042, "grad_norm": 0.4458593428134918, "learning_rate": 0.0001, "loss": 1.7806, "step": 4172 }, { "epoch": 0.4848097589311647, "grad_norm": 0.4241911470890045, "learning_rate": 0.0001, "loss": 1.7529, "step": 4173 }, { "epoch": 0.48492593668312517, "grad_norm": 0.43590041995048523, "learning_rate": 0.0001, "loss": 1.6048, "step": 4174 }, { "epoch": 0.48504211443508566, "grad_norm": 0.4520244002342224, "learning_rate": 0.0001, "loss": 1.764, "step": 4175 }, { "epoch": 0.4851582921870462, "grad_norm": 0.40647611021995544, "learning_rate": 0.0001, "loss": 1.5838, "step": 4176 }, { "epoch": 0.4852744699390067, "grad_norm": 0.4175068438053131, "learning_rate": 0.0001, "loss": 1.7583, "step": 4177 }, { "epoch": 0.4853906476909672, "grad_norm": 0.4129279553890228, "learning_rate": 0.0001, "loss": 1.5511, "step": 4178 }, { "epoch": 0.48550682544292767, "grad_norm": 0.42088520526885986, "learning_rate": 0.0001, "loss": 1.756, "step": 4179 }, { "epoch": 0.48562300319488816, "grad_norm": 0.4570264220237732, "learning_rate": 0.0001, "loss": 1.7849, "step": 4180 }, { "epoch": 0.4857391809468487, "grad_norm": 0.4151148200035095, "learning_rate": 0.0001, "loss": 1.7816, "step": 4181 }, { "epoch": 0.4858553586988092, "grad_norm": 0.4177875518798828, "learning_rate": 0.0001, "loss": 1.5103, "step": 4182 }, { "epoch": 0.4859715364507697, "grad_norm": 0.4347212016582489, "learning_rate": 0.0001, "loss": 1.6551, "step": 4183 }, { "epoch": 0.48608771420273017, "grad_norm": 0.406599760055542, "learning_rate": 0.0001, "loss": 1.6424, "step": 4184 }, { "epoch": 0.48620389195469066, "grad_norm": 0.4066656529903412, "learning_rate": 0.0001, "loss": 1.5057, "step": 4185 }, { "epoch": 0.4863200697066512, "grad_norm": 0.4207620918750763, "learning_rate": 0.0001, "loss": 1.5419, "step": 4186 }, { "epoch": 0.4864362474586117, "grad_norm": 0.43291810154914856, "learning_rate": 0.0001, "loss": 1.7537, "step": 4187 }, { "epoch": 0.4865524252105722, "grad_norm": 0.44666996598243713, "learning_rate": 0.0001, "loss": 1.7468, "step": 4188 }, { "epoch": 0.48666860296253267, "grad_norm": 0.394218772649765, "learning_rate": 0.0001, "loss": 1.4794, "step": 4189 }, { "epoch": 0.48678478071449316, "grad_norm": 0.41351521015167236, "learning_rate": 0.0001, "loss": 1.7016, "step": 4190 }, { "epoch": 0.48690095846645365, "grad_norm": 0.412653386592865, "learning_rate": 0.0001, "loss": 1.5221, "step": 4191 }, { "epoch": 0.4870171362184142, "grad_norm": 0.4062814712524414, "learning_rate": 0.0001, "loss": 1.5941, "step": 4192 }, { "epoch": 0.4871333139703747, "grad_norm": 0.41630494594573975, "learning_rate": 0.0001, "loss": 1.7655, "step": 4193 }, { "epoch": 0.48724949172233517, "grad_norm": 0.41663774847984314, "learning_rate": 0.0001, "loss": 1.6558, "step": 4194 }, { "epoch": 0.48736566947429566, "grad_norm": 0.42713698744773865, "learning_rate": 0.0001, "loss": 1.6671, "step": 4195 }, { "epoch": 0.48748184722625615, "grad_norm": 0.43906137347221375, "learning_rate": 0.0001, "loss": 1.7459, "step": 4196 }, { "epoch": 0.4875980249782167, "grad_norm": 0.3749426305294037, "learning_rate": 0.0001, "loss": 1.6356, "step": 4197 }, { "epoch": 0.4877142027301772, "grad_norm": 0.5166771411895752, "learning_rate": 0.0001, "loss": 1.6694, "step": 4198 }, { "epoch": 0.48783038048213767, "grad_norm": 0.42134204506874084, "learning_rate": 0.0001, "loss": 1.505, "step": 4199 }, { "epoch": 0.48794655823409816, "grad_norm": 0.4114912152290344, "learning_rate": 0.0001, "loss": 1.5915, "step": 4200 }, { "epoch": 0.48806273598605865, "grad_norm": 0.4079913794994354, "learning_rate": 0.0001, "loss": 1.55, "step": 4201 }, { "epoch": 0.4881789137380192, "grad_norm": 0.4527676999568939, "learning_rate": 0.0001, "loss": 1.6562, "step": 4202 }, { "epoch": 0.4882950914899797, "grad_norm": 0.4323418438434601, "learning_rate": 0.0001, "loss": 1.6853, "step": 4203 }, { "epoch": 0.48841126924194017, "grad_norm": 0.43545210361480713, "learning_rate": 0.0001, "loss": 1.7446, "step": 4204 }, { "epoch": 0.48852744699390066, "grad_norm": 0.4371805191040039, "learning_rate": 0.0001, "loss": 1.5752, "step": 4205 }, { "epoch": 0.48864362474586115, "grad_norm": 0.42473122477531433, "learning_rate": 0.0001, "loss": 1.6911, "step": 4206 }, { "epoch": 0.4887598024978217, "grad_norm": 0.42178717255592346, "learning_rate": 0.0001, "loss": 1.7745, "step": 4207 }, { "epoch": 0.4888759802497822, "grad_norm": 0.39883777499198914, "learning_rate": 0.0001, "loss": 1.6367, "step": 4208 }, { "epoch": 0.48899215800174267, "grad_norm": 0.39283865690231323, "learning_rate": 0.0001, "loss": 1.5009, "step": 4209 }, { "epoch": 0.48910833575370316, "grad_norm": 0.390767902135849, "learning_rate": 0.0001, "loss": 1.6082, "step": 4210 }, { "epoch": 0.48922451350566365, "grad_norm": 0.40290915966033936, "learning_rate": 0.0001, "loss": 1.539, "step": 4211 }, { "epoch": 0.48934069125762414, "grad_norm": 0.3867753744125366, "learning_rate": 0.0001, "loss": 1.514, "step": 4212 }, { "epoch": 0.4894568690095847, "grad_norm": 0.40270060300827026, "learning_rate": 0.0001, "loss": 1.6378, "step": 4213 }, { "epoch": 0.48957304676154517, "grad_norm": 0.40884700417518616, "learning_rate": 0.0001, "loss": 1.5464, "step": 4214 }, { "epoch": 0.48968922451350566, "grad_norm": 0.4496394395828247, "learning_rate": 0.0001, "loss": 1.7212, "step": 4215 }, { "epoch": 0.48980540226546615, "grad_norm": 0.4228356182575226, "learning_rate": 0.0001, "loss": 1.7374, "step": 4216 }, { "epoch": 0.48992158001742664, "grad_norm": 0.3831014633178711, "learning_rate": 0.0001, "loss": 1.5569, "step": 4217 }, { "epoch": 0.4900377577693872, "grad_norm": 0.4408629834651947, "learning_rate": 0.0001, "loss": 1.661, "step": 4218 }, { "epoch": 0.4901539355213477, "grad_norm": 0.4310610890388489, "learning_rate": 0.0001, "loss": 1.6763, "step": 4219 }, { "epoch": 0.49027011327330816, "grad_norm": 0.4541778266429901, "learning_rate": 0.0001, "loss": 1.6869, "step": 4220 }, { "epoch": 0.49038629102526865, "grad_norm": 0.4351007342338562, "learning_rate": 0.0001, "loss": 1.7975, "step": 4221 }, { "epoch": 0.49050246877722914, "grad_norm": 0.4092995226383209, "learning_rate": 0.0001, "loss": 1.6019, "step": 4222 }, { "epoch": 0.4906186465291897, "grad_norm": 0.41761350631713867, "learning_rate": 0.0001, "loss": 1.6881, "step": 4223 }, { "epoch": 0.4907348242811502, "grad_norm": 0.42286214232444763, "learning_rate": 0.0001, "loss": 1.6648, "step": 4224 }, { "epoch": 0.49085100203311066, "grad_norm": 0.44822847843170166, "learning_rate": 0.0001, "loss": 1.588, "step": 4225 }, { "epoch": 0.49096717978507115, "grad_norm": 0.40160536766052246, "learning_rate": 0.0001, "loss": 1.5714, "step": 4226 }, { "epoch": 0.49108335753703164, "grad_norm": 0.39959850907325745, "learning_rate": 0.0001, "loss": 1.5053, "step": 4227 }, { "epoch": 0.4911995352889922, "grad_norm": 0.4233490228652954, "learning_rate": 0.0001, "loss": 1.7888, "step": 4228 }, { "epoch": 0.4913157130409527, "grad_norm": 0.41371530294418335, "learning_rate": 0.0001, "loss": 1.7971, "step": 4229 }, { "epoch": 0.49143189079291316, "grad_norm": 0.4132305085659027, "learning_rate": 0.0001, "loss": 1.7104, "step": 4230 }, { "epoch": 0.49154806854487365, "grad_norm": 0.4752264618873596, "learning_rate": 0.0001, "loss": 1.7421, "step": 4231 }, { "epoch": 0.49166424629683414, "grad_norm": 0.37759026885032654, "learning_rate": 0.0001, "loss": 1.5019, "step": 4232 }, { "epoch": 0.49178042404879463, "grad_norm": 0.423034131526947, "learning_rate": 0.0001, "loss": 1.7351, "step": 4233 }, { "epoch": 0.4918966018007552, "grad_norm": 0.42618465423583984, "learning_rate": 0.0001, "loss": 1.7726, "step": 4234 }, { "epoch": 0.49201277955271566, "grad_norm": 0.4361681044101715, "learning_rate": 0.0001, "loss": 1.7888, "step": 4235 }, { "epoch": 0.49212895730467615, "grad_norm": 0.4297569692134857, "learning_rate": 0.0001, "loss": 1.8216, "step": 4236 }, { "epoch": 0.49224513505663664, "grad_norm": 0.4081512987613678, "learning_rate": 0.0001, "loss": 1.6625, "step": 4237 }, { "epoch": 0.49236131280859713, "grad_norm": 0.4123179614543915, "learning_rate": 0.0001, "loss": 1.7767, "step": 4238 }, { "epoch": 0.4924774905605577, "grad_norm": 0.42507731914520264, "learning_rate": 0.0001, "loss": 1.7946, "step": 4239 }, { "epoch": 0.49259366831251816, "grad_norm": 0.4431770145893097, "learning_rate": 0.0001, "loss": 1.753, "step": 4240 }, { "epoch": 0.49270984606447865, "grad_norm": 0.4066873788833618, "learning_rate": 0.0001, "loss": 1.6131, "step": 4241 }, { "epoch": 0.49282602381643914, "grad_norm": 0.39505207538604736, "learning_rate": 0.0001, "loss": 1.6114, "step": 4242 }, { "epoch": 0.49294220156839963, "grad_norm": 0.3953791856765747, "learning_rate": 0.0001, "loss": 1.67, "step": 4243 }, { "epoch": 0.4930583793203602, "grad_norm": 0.4070848822593689, "learning_rate": 0.0001, "loss": 1.5279, "step": 4244 }, { "epoch": 0.49317455707232066, "grad_norm": 0.3953931927680969, "learning_rate": 0.0001, "loss": 1.6102, "step": 4245 }, { "epoch": 0.49329073482428115, "grad_norm": 0.41489389538764954, "learning_rate": 0.0001, "loss": 1.6426, "step": 4246 }, { "epoch": 0.49340691257624164, "grad_norm": 0.381734699010849, "learning_rate": 0.0001, "loss": 1.6127, "step": 4247 }, { "epoch": 0.49352309032820213, "grad_norm": 0.4128166139125824, "learning_rate": 0.0001, "loss": 1.7149, "step": 4248 }, { "epoch": 0.4936392680801627, "grad_norm": 0.4323165714740753, "learning_rate": 0.0001, "loss": 1.4858, "step": 4249 }, { "epoch": 0.49375544583212316, "grad_norm": 0.4313991069793701, "learning_rate": 0.0001, "loss": 1.74, "step": 4250 }, { "epoch": 0.49387162358408365, "grad_norm": 0.41360074281692505, "learning_rate": 0.0001, "loss": 1.7439, "step": 4251 }, { "epoch": 0.49398780133604414, "grad_norm": 0.41541731357574463, "learning_rate": 0.0001, "loss": 1.7381, "step": 4252 }, { "epoch": 0.49410397908800463, "grad_norm": 0.3932528495788574, "learning_rate": 0.0001, "loss": 1.5159, "step": 4253 }, { "epoch": 0.4942201568399651, "grad_norm": 0.41322755813598633, "learning_rate": 0.0001, "loss": 1.6621, "step": 4254 }, { "epoch": 0.49433633459192566, "grad_norm": 0.39592114090919495, "learning_rate": 0.0001, "loss": 1.614, "step": 4255 }, { "epoch": 0.49445251234388615, "grad_norm": 0.4194466173648834, "learning_rate": 0.0001, "loss": 1.7409, "step": 4256 }, { "epoch": 0.49456869009584664, "grad_norm": 0.4130099415779114, "learning_rate": 0.0001, "loss": 1.5919, "step": 4257 }, { "epoch": 0.49468486784780713, "grad_norm": 0.39909616112709045, "learning_rate": 0.0001, "loss": 1.3854, "step": 4258 }, { "epoch": 0.4948010455997676, "grad_norm": 0.4121648371219635, "learning_rate": 0.0001, "loss": 1.6729, "step": 4259 }, { "epoch": 0.49491722335172816, "grad_norm": 0.4459930956363678, "learning_rate": 0.0001, "loss": 1.8308, "step": 4260 }, { "epoch": 0.49503340110368865, "grad_norm": 0.42174288630485535, "learning_rate": 0.0001, "loss": 1.7415, "step": 4261 }, { "epoch": 0.49514957885564914, "grad_norm": 0.4145379364490509, "learning_rate": 0.0001, "loss": 1.713, "step": 4262 }, { "epoch": 0.49526575660760963, "grad_norm": 0.40851959586143494, "learning_rate": 0.0001, "loss": 1.7002, "step": 4263 }, { "epoch": 0.4953819343595701, "grad_norm": 0.42193418741226196, "learning_rate": 0.0001, "loss": 1.6732, "step": 4264 }, { "epoch": 0.49549811211153066, "grad_norm": 0.4060492515563965, "learning_rate": 0.0001, "loss": 1.7783, "step": 4265 }, { "epoch": 0.49561428986349115, "grad_norm": 0.3899538815021515, "learning_rate": 0.0001, "loss": 1.6265, "step": 4266 }, { "epoch": 0.49573046761545164, "grad_norm": 0.42852500081062317, "learning_rate": 0.0001, "loss": 1.7705, "step": 4267 }, { "epoch": 0.49584664536741213, "grad_norm": 0.41384875774383545, "learning_rate": 0.0001, "loss": 1.6454, "step": 4268 }, { "epoch": 0.4959628231193726, "grad_norm": 0.41500815749168396, "learning_rate": 0.0001, "loss": 1.6607, "step": 4269 }, { "epoch": 0.49607900087133316, "grad_norm": 0.40487122535705566, "learning_rate": 0.0001, "loss": 1.5961, "step": 4270 }, { "epoch": 0.49619517862329365, "grad_norm": 0.4144662022590637, "learning_rate": 0.0001, "loss": 1.6281, "step": 4271 }, { "epoch": 0.49631135637525414, "grad_norm": 0.40673938393592834, "learning_rate": 0.0001, "loss": 1.633, "step": 4272 }, { "epoch": 0.49642753412721463, "grad_norm": 0.4252079725265503, "learning_rate": 0.0001, "loss": 1.5973, "step": 4273 }, { "epoch": 0.4965437118791751, "grad_norm": 0.4067811071872711, "learning_rate": 0.0001, "loss": 1.5687, "step": 4274 }, { "epoch": 0.49665988963113566, "grad_norm": 0.4027066230773926, "learning_rate": 0.0001, "loss": 1.568, "step": 4275 }, { "epoch": 0.49677606738309615, "grad_norm": 0.4006246030330658, "learning_rate": 0.0001, "loss": 1.5842, "step": 4276 }, { "epoch": 0.49689224513505664, "grad_norm": 0.42645466327667236, "learning_rate": 0.0001, "loss": 1.7451, "step": 4277 }, { "epoch": 0.49700842288701713, "grad_norm": 0.3970547616481781, "learning_rate": 0.0001, "loss": 1.71, "step": 4278 }, { "epoch": 0.4971246006389776, "grad_norm": 0.43710431456565857, "learning_rate": 0.0001, "loss": 1.8507, "step": 4279 }, { "epoch": 0.4972407783909381, "grad_norm": 0.4387757182121277, "learning_rate": 0.0001, "loss": 1.7658, "step": 4280 }, { "epoch": 0.49735695614289865, "grad_norm": 0.3968219757080078, "learning_rate": 0.0001, "loss": 1.6035, "step": 4281 }, { "epoch": 0.49747313389485914, "grad_norm": 0.42177560925483704, "learning_rate": 0.0001, "loss": 1.6699, "step": 4282 }, { "epoch": 0.49758931164681963, "grad_norm": 0.46521419286727905, "learning_rate": 0.0001, "loss": 1.8792, "step": 4283 }, { "epoch": 0.4977054893987801, "grad_norm": 0.45769378542900085, "learning_rate": 0.0001, "loss": 1.9134, "step": 4284 }, { "epoch": 0.4978216671507406, "grad_norm": 0.415170282125473, "learning_rate": 0.0001, "loss": 1.691, "step": 4285 }, { "epoch": 0.49793784490270115, "grad_norm": 0.4076900780200958, "learning_rate": 0.0001, "loss": 1.614, "step": 4286 }, { "epoch": 0.49805402265466164, "grad_norm": 0.42825424671173096, "learning_rate": 0.0001, "loss": 1.6607, "step": 4287 }, { "epoch": 0.49817020040662213, "grad_norm": 0.39369601011276245, "learning_rate": 0.0001, "loss": 1.5995, "step": 4288 }, { "epoch": 0.4982863781585826, "grad_norm": 0.415863037109375, "learning_rate": 0.0001, "loss": 1.5335, "step": 4289 }, { "epoch": 0.4984025559105431, "grad_norm": 0.4149499833583832, "learning_rate": 0.0001, "loss": 1.6372, "step": 4290 }, { "epoch": 0.49851873366250365, "grad_norm": 0.4311138391494751, "learning_rate": 0.0001, "loss": 1.6839, "step": 4291 }, { "epoch": 0.49863491141446414, "grad_norm": 0.4341351389884949, "learning_rate": 0.0001, "loss": 1.6409, "step": 4292 }, { "epoch": 0.49875108916642463, "grad_norm": 0.43341371417045593, "learning_rate": 0.0001, "loss": 1.6568, "step": 4293 }, { "epoch": 0.4988672669183851, "grad_norm": 0.4445037543773651, "learning_rate": 0.0001, "loss": 1.8165, "step": 4294 }, { "epoch": 0.4989834446703456, "grad_norm": 0.4031905233860016, "learning_rate": 0.0001, "loss": 1.5914, "step": 4295 }, { "epoch": 0.49909962242230616, "grad_norm": 0.3780546188354492, "learning_rate": 0.0001, "loss": 1.3956, "step": 4296 }, { "epoch": 0.49921580017426664, "grad_norm": 0.4312121868133545, "learning_rate": 0.0001, "loss": 1.6278, "step": 4297 }, { "epoch": 0.49933197792622713, "grad_norm": 0.42564424872398376, "learning_rate": 0.0001, "loss": 1.6275, "step": 4298 }, { "epoch": 0.4994481556781876, "grad_norm": 0.41907092928886414, "learning_rate": 0.0001, "loss": 1.6452, "step": 4299 }, { "epoch": 0.4995643334301481, "grad_norm": 0.44234657287597656, "learning_rate": 0.0001, "loss": 1.8374, "step": 4300 }, { "epoch": 0.4996805111821086, "grad_norm": 0.43759170174598694, "learning_rate": 0.0001, "loss": 1.7536, "step": 4301 }, { "epoch": 0.49979668893406914, "grad_norm": 0.4182339012622833, "learning_rate": 0.0001, "loss": 1.717, "step": 4302 }, { "epoch": 0.49991286668602963, "grad_norm": 0.4217366874217987, "learning_rate": 0.0001, "loss": 1.7919, "step": 4303 }, { "epoch": 0.5000290444379901, "grad_norm": 0.4132644236087799, "learning_rate": 0.0001, "loss": 1.5728, "step": 4304 }, { "epoch": 0.5001452221899506, "grad_norm": 0.4324856996536255, "learning_rate": 0.0001, "loss": 1.5971, "step": 4305 }, { "epoch": 0.5002613999419111, "grad_norm": 0.4039803445339203, "learning_rate": 0.0001, "loss": 1.6391, "step": 4306 }, { "epoch": 0.5003775776938716, "grad_norm": 0.4950650930404663, "learning_rate": 0.0001, "loss": 1.8132, "step": 4307 }, { "epoch": 0.5004937554458321, "grad_norm": 0.4059164524078369, "learning_rate": 0.0001, "loss": 1.6249, "step": 4308 }, { "epoch": 0.5006099331977927, "grad_norm": 0.4175871014595032, "learning_rate": 0.0001, "loss": 1.575, "step": 4309 }, { "epoch": 0.5007261109497532, "grad_norm": 0.40468019247055054, "learning_rate": 0.0001, "loss": 1.4808, "step": 4310 }, { "epoch": 0.5008422887017137, "grad_norm": 0.45055514574050903, "learning_rate": 0.0001, "loss": 1.8287, "step": 4311 }, { "epoch": 0.5009584664536741, "grad_norm": 0.4032573997974396, "learning_rate": 0.0001, "loss": 1.5997, "step": 4312 }, { "epoch": 0.5010746442056346, "grad_norm": 0.4106045067310333, "learning_rate": 0.0001, "loss": 1.5928, "step": 4313 }, { "epoch": 0.5011908219575951, "grad_norm": 0.4448246955871582, "learning_rate": 0.0001, "loss": 1.6811, "step": 4314 }, { "epoch": 0.5013069997095556, "grad_norm": 0.45284712314605713, "learning_rate": 0.0001, "loss": 1.6662, "step": 4315 }, { "epoch": 0.5014231774615161, "grad_norm": 0.4348433017730713, "learning_rate": 0.0001, "loss": 1.6896, "step": 4316 }, { "epoch": 0.5015393552134766, "grad_norm": 0.4291326701641083, "learning_rate": 0.0001, "loss": 1.6956, "step": 4317 }, { "epoch": 0.5016555329654371, "grad_norm": 0.42308685183525085, "learning_rate": 0.0001, "loss": 1.7336, "step": 4318 }, { "epoch": 0.5017717107173976, "grad_norm": 0.4419673681259155, "learning_rate": 0.0001, "loss": 1.8071, "step": 4319 }, { "epoch": 0.5018878884693582, "grad_norm": 0.4059154987335205, "learning_rate": 0.0001, "loss": 1.7697, "step": 4320 }, { "epoch": 0.5020040662213187, "grad_norm": 0.44748127460479736, "learning_rate": 0.0001, "loss": 1.8027, "step": 4321 }, { "epoch": 0.5021202439732791, "grad_norm": 0.4353194832801819, "learning_rate": 0.0001, "loss": 1.77, "step": 4322 }, { "epoch": 0.5022364217252396, "grad_norm": 0.43615394830703735, "learning_rate": 0.0001, "loss": 1.6816, "step": 4323 }, { "epoch": 0.5023525994772001, "grad_norm": 0.4044416546821594, "learning_rate": 0.0001, "loss": 1.4778, "step": 4324 }, { "epoch": 0.5024687772291606, "grad_norm": 0.4381449222564697, "learning_rate": 0.0001, "loss": 1.6863, "step": 4325 }, { "epoch": 0.5025849549811211, "grad_norm": 0.41832980513572693, "learning_rate": 0.0001, "loss": 1.5382, "step": 4326 }, { "epoch": 0.5027011327330816, "grad_norm": 0.41191017627716064, "learning_rate": 0.0001, "loss": 1.7425, "step": 4327 }, { "epoch": 0.5028173104850421, "grad_norm": 0.4307502508163452, "learning_rate": 0.0001, "loss": 1.7084, "step": 4328 }, { "epoch": 0.5029334882370026, "grad_norm": 0.39051318168640137, "learning_rate": 0.0001, "loss": 1.4484, "step": 4329 }, { "epoch": 0.5030496659889632, "grad_norm": 0.4072178602218628, "learning_rate": 0.0001, "loss": 1.6725, "step": 4330 }, { "epoch": 0.5031658437409237, "grad_norm": 0.4136503040790558, "learning_rate": 0.0001, "loss": 1.5188, "step": 4331 }, { "epoch": 0.5032820214928841, "grad_norm": 0.4073225259780884, "learning_rate": 0.0001, "loss": 1.5818, "step": 4332 }, { "epoch": 0.5033981992448446, "grad_norm": 0.45762255787849426, "learning_rate": 0.0001, "loss": 1.8642, "step": 4333 }, { "epoch": 0.5035143769968051, "grad_norm": 0.3969933092594147, "learning_rate": 0.0001, "loss": 1.4036, "step": 4334 }, { "epoch": 0.5036305547487656, "grad_norm": 0.4575958251953125, "learning_rate": 0.0001, "loss": 1.7049, "step": 4335 }, { "epoch": 0.5037467325007261, "grad_norm": 0.4271569550037384, "learning_rate": 0.0001, "loss": 1.786, "step": 4336 }, { "epoch": 0.5038629102526866, "grad_norm": 0.41905540227890015, "learning_rate": 0.0001, "loss": 1.4688, "step": 4337 }, { "epoch": 0.5039790880046471, "grad_norm": 0.4295593202114105, "learning_rate": 0.0001, "loss": 1.7429, "step": 4338 }, { "epoch": 0.5040952657566076, "grad_norm": 0.4510742425918579, "learning_rate": 0.0001, "loss": 1.6844, "step": 4339 }, { "epoch": 0.5042114435085681, "grad_norm": 0.38910943269729614, "learning_rate": 0.0001, "loss": 1.6087, "step": 4340 }, { "epoch": 0.5043276212605287, "grad_norm": 0.43599119782447815, "learning_rate": 0.0001, "loss": 1.6775, "step": 4341 }, { "epoch": 0.5044437990124891, "grad_norm": 0.4247783124446869, "learning_rate": 0.0001, "loss": 1.7167, "step": 4342 }, { "epoch": 0.5045599767644496, "grad_norm": 0.41217729449272156, "learning_rate": 0.0001, "loss": 1.6514, "step": 4343 }, { "epoch": 0.5046761545164101, "grad_norm": 0.4111458361148834, "learning_rate": 0.0001, "loss": 1.7107, "step": 4344 }, { "epoch": 0.5047923322683706, "grad_norm": 0.44936275482177734, "learning_rate": 0.0001, "loss": 1.7152, "step": 4345 }, { "epoch": 0.5049085100203311, "grad_norm": 0.39809176325798035, "learning_rate": 0.0001, "loss": 1.4638, "step": 4346 }, { "epoch": 0.5050246877722916, "grad_norm": 0.45946526527404785, "learning_rate": 0.0001, "loss": 1.7883, "step": 4347 }, { "epoch": 0.5051408655242521, "grad_norm": 0.4194871485233307, "learning_rate": 0.0001, "loss": 1.6288, "step": 4348 }, { "epoch": 0.5052570432762126, "grad_norm": 0.4171569049358368, "learning_rate": 0.0001, "loss": 1.6859, "step": 4349 }, { "epoch": 0.5053732210281731, "grad_norm": 0.4225428104400635, "learning_rate": 0.0001, "loss": 1.5897, "step": 4350 }, { "epoch": 0.5054893987801337, "grad_norm": 0.410523921251297, "learning_rate": 0.0001, "loss": 1.7041, "step": 4351 }, { "epoch": 0.5056055765320941, "grad_norm": 0.4116719663143158, "learning_rate": 0.0001, "loss": 1.6019, "step": 4352 }, { "epoch": 0.5057217542840546, "grad_norm": 0.405364453792572, "learning_rate": 0.0001, "loss": 1.6812, "step": 4353 }, { "epoch": 0.5058379320360151, "grad_norm": 0.38687193393707275, "learning_rate": 0.0001, "loss": 1.3621, "step": 4354 }, { "epoch": 0.5059541097879756, "grad_norm": 0.4160784184932709, "learning_rate": 0.0001, "loss": 1.6666, "step": 4355 }, { "epoch": 0.5060702875399361, "grad_norm": 0.4402058720588684, "learning_rate": 0.0001, "loss": 1.6338, "step": 4356 }, { "epoch": 0.5061864652918966, "grad_norm": 0.4149058759212494, "learning_rate": 0.0001, "loss": 1.5838, "step": 4357 }, { "epoch": 0.5063026430438571, "grad_norm": 0.39352482557296753, "learning_rate": 0.0001, "loss": 1.5758, "step": 4358 }, { "epoch": 0.5064188207958176, "grad_norm": 0.3932999074459076, "learning_rate": 0.0001, "loss": 1.5528, "step": 4359 }, { "epoch": 0.5065349985477781, "grad_norm": 0.41762053966522217, "learning_rate": 0.0001, "loss": 1.6335, "step": 4360 }, { "epoch": 0.5066511762997385, "grad_norm": 0.39525556564331055, "learning_rate": 0.0001, "loss": 1.5574, "step": 4361 }, { "epoch": 0.5067673540516991, "grad_norm": 0.41103050112724304, "learning_rate": 0.0001, "loss": 1.671, "step": 4362 }, { "epoch": 0.5068835318036596, "grad_norm": 0.4429750144481659, "learning_rate": 0.0001, "loss": 1.7287, "step": 4363 }, { "epoch": 0.5069997095556201, "grad_norm": 0.42924562096595764, "learning_rate": 0.0001, "loss": 1.6523, "step": 4364 }, { "epoch": 0.5071158873075806, "grad_norm": 0.4067068099975586, "learning_rate": 0.0001, "loss": 1.5797, "step": 4365 }, { "epoch": 0.5072320650595411, "grad_norm": 0.42301809787750244, "learning_rate": 0.0001, "loss": 1.5505, "step": 4366 }, { "epoch": 0.5073482428115016, "grad_norm": 0.4366927146911621, "learning_rate": 0.0001, "loss": 1.6521, "step": 4367 }, { "epoch": 0.5074644205634621, "grad_norm": 0.48699912428855896, "learning_rate": 0.0001, "loss": 1.6093, "step": 4368 }, { "epoch": 0.5075805983154226, "grad_norm": 0.4013606309890747, "learning_rate": 0.0001, "loss": 1.6461, "step": 4369 }, { "epoch": 0.5076967760673831, "grad_norm": 0.42257198691368103, "learning_rate": 0.0001, "loss": 1.6119, "step": 4370 }, { "epoch": 0.5078129538193435, "grad_norm": 0.41376587748527527, "learning_rate": 0.0001, "loss": 1.519, "step": 4371 }, { "epoch": 0.5079291315713041, "grad_norm": 0.42543458938598633, "learning_rate": 0.0001, "loss": 1.6674, "step": 4372 }, { "epoch": 0.5080453093232646, "grad_norm": 0.40931135416030884, "learning_rate": 0.0001, "loss": 1.728, "step": 4373 }, { "epoch": 0.5081614870752251, "grad_norm": 0.3892346918582916, "learning_rate": 0.0001, "loss": 1.5749, "step": 4374 }, { "epoch": 0.5082776648271856, "grad_norm": 0.3831053674221039, "learning_rate": 0.0001, "loss": 1.4605, "step": 4375 }, { "epoch": 0.5083938425791461, "grad_norm": 0.3641822338104248, "learning_rate": 0.0001, "loss": 1.5647, "step": 4376 }, { "epoch": 0.5085100203311066, "grad_norm": 0.40960368514060974, "learning_rate": 0.0001, "loss": 1.6555, "step": 4377 }, { "epoch": 0.5086261980830671, "grad_norm": 0.42281374335289, "learning_rate": 0.0001, "loss": 1.6668, "step": 4378 }, { "epoch": 0.5087423758350276, "grad_norm": 0.4280136227607727, "learning_rate": 0.0001, "loss": 1.5184, "step": 4379 }, { "epoch": 0.5088585535869881, "grad_norm": 0.4118686020374298, "learning_rate": 0.0001, "loss": 1.6772, "step": 4380 }, { "epoch": 0.5089747313389485, "grad_norm": 0.45106491446495056, "learning_rate": 0.0001, "loss": 1.5719, "step": 4381 }, { "epoch": 0.509090909090909, "grad_norm": 0.3965935707092285, "learning_rate": 0.0001, "loss": 1.4588, "step": 4382 }, { "epoch": 0.5092070868428696, "grad_norm": 0.3852759003639221, "learning_rate": 0.0001, "loss": 1.3832, "step": 4383 }, { "epoch": 0.5093232645948301, "grad_norm": 0.4219793677330017, "learning_rate": 0.0001, "loss": 1.6145, "step": 4384 }, { "epoch": 0.5094394423467906, "grad_norm": 0.4372718632221222, "learning_rate": 0.0001, "loss": 1.6514, "step": 4385 }, { "epoch": 0.5095556200987511, "grad_norm": 0.40098169445991516, "learning_rate": 0.0001, "loss": 1.6136, "step": 4386 }, { "epoch": 0.5096717978507116, "grad_norm": 0.4264896810054779, "learning_rate": 0.0001, "loss": 1.6914, "step": 4387 }, { "epoch": 0.5097879756026721, "grad_norm": 0.4068213105201721, "learning_rate": 0.0001, "loss": 1.713, "step": 4388 }, { "epoch": 0.5099041533546326, "grad_norm": 0.4088718593120575, "learning_rate": 0.0001, "loss": 1.6382, "step": 4389 }, { "epoch": 0.5100203311065931, "grad_norm": 0.42276012897491455, "learning_rate": 0.0001, "loss": 1.6818, "step": 4390 }, { "epoch": 0.5101365088585536, "grad_norm": 0.4298572540283203, "learning_rate": 0.0001, "loss": 1.67, "step": 4391 }, { "epoch": 0.510252686610514, "grad_norm": 0.4012244939804077, "learning_rate": 0.0001, "loss": 1.4795, "step": 4392 }, { "epoch": 0.5103688643624746, "grad_norm": 0.41358691453933716, "learning_rate": 0.0001, "loss": 1.5749, "step": 4393 }, { "epoch": 0.5104850421144351, "grad_norm": 0.41776931285858154, "learning_rate": 0.0001, "loss": 1.6255, "step": 4394 }, { "epoch": 0.5106012198663956, "grad_norm": 0.4451960325241089, "learning_rate": 0.0001, "loss": 1.6869, "step": 4395 }, { "epoch": 0.5107173976183561, "grad_norm": 0.43334871530532837, "learning_rate": 0.0001, "loss": 1.6803, "step": 4396 }, { "epoch": 0.5108335753703166, "grad_norm": 0.3872377276420593, "learning_rate": 0.0001, "loss": 1.5352, "step": 4397 }, { "epoch": 0.5109497531222771, "grad_norm": 0.3914475440979004, "learning_rate": 0.0001, "loss": 1.6059, "step": 4398 }, { "epoch": 0.5110659308742376, "grad_norm": 0.447561115026474, "learning_rate": 0.0001, "loss": 1.7356, "step": 4399 }, { "epoch": 0.5111821086261981, "grad_norm": 0.45441824197769165, "learning_rate": 0.0001, "loss": 1.6668, "step": 4400 }, { "epoch": 0.5112982863781586, "grad_norm": 0.3847355544567108, "learning_rate": 0.0001, "loss": 1.4718, "step": 4401 }, { "epoch": 0.511414464130119, "grad_norm": 0.41067802906036377, "learning_rate": 0.0001, "loss": 1.6448, "step": 4402 }, { "epoch": 0.5115306418820795, "grad_norm": 0.4514882564544678, "learning_rate": 0.0001, "loss": 1.792, "step": 4403 }, { "epoch": 0.5116468196340401, "grad_norm": 0.4275973439216614, "learning_rate": 0.0001, "loss": 1.7367, "step": 4404 }, { "epoch": 0.5117629973860006, "grad_norm": 0.41547560691833496, "learning_rate": 0.0001, "loss": 1.5478, "step": 4405 }, { "epoch": 0.5118791751379611, "grad_norm": 0.40632376074790955, "learning_rate": 0.0001, "loss": 1.5644, "step": 4406 }, { "epoch": 0.5119953528899216, "grad_norm": 0.4488579034805298, "learning_rate": 0.0001, "loss": 1.5699, "step": 4407 }, { "epoch": 0.5121115306418821, "grad_norm": 0.42902886867523193, "learning_rate": 0.0001, "loss": 1.5679, "step": 4408 }, { "epoch": 0.5122277083938426, "grad_norm": 0.42067989706993103, "learning_rate": 0.0001, "loss": 1.5815, "step": 4409 }, { "epoch": 0.5123438861458031, "grad_norm": 0.4122752845287323, "learning_rate": 0.0001, "loss": 1.4263, "step": 4410 }, { "epoch": 0.5124600638977636, "grad_norm": 0.443730890750885, "learning_rate": 0.0001, "loss": 1.5752, "step": 4411 }, { "epoch": 0.512576241649724, "grad_norm": 0.43061313033103943, "learning_rate": 0.0001, "loss": 1.5591, "step": 4412 }, { "epoch": 0.5126924194016845, "grad_norm": 0.42542704939842224, "learning_rate": 0.0001, "loss": 1.6375, "step": 4413 }, { "epoch": 0.5128085971536451, "grad_norm": 0.41993504762649536, "learning_rate": 0.0001, "loss": 1.5644, "step": 4414 }, { "epoch": 0.5129247749056056, "grad_norm": 0.43750661611557007, "learning_rate": 0.0001, "loss": 1.6251, "step": 4415 }, { "epoch": 0.5130409526575661, "grad_norm": 0.4423817992210388, "learning_rate": 0.0001, "loss": 1.5846, "step": 4416 }, { "epoch": 0.5131571304095266, "grad_norm": 0.44078147411346436, "learning_rate": 0.0001, "loss": 1.7149, "step": 4417 }, { "epoch": 0.5132733081614871, "grad_norm": 0.41512519121170044, "learning_rate": 0.0001, "loss": 1.6267, "step": 4418 }, { "epoch": 0.5133894859134476, "grad_norm": 0.4335169792175293, "learning_rate": 0.0001, "loss": 1.6486, "step": 4419 }, { "epoch": 0.5135056636654081, "grad_norm": 0.39206641912460327, "learning_rate": 0.0001, "loss": 1.5569, "step": 4420 }, { "epoch": 0.5136218414173686, "grad_norm": 0.4225231111049652, "learning_rate": 0.0001, "loss": 1.6272, "step": 4421 }, { "epoch": 0.513738019169329, "grad_norm": 0.4060937762260437, "learning_rate": 0.0001, "loss": 1.5008, "step": 4422 }, { "epoch": 0.5138541969212895, "grad_norm": 0.4321762025356293, "learning_rate": 0.0001, "loss": 1.5683, "step": 4423 }, { "epoch": 0.51397037467325, "grad_norm": 0.4687374234199524, "learning_rate": 0.0001, "loss": 1.7355, "step": 4424 }, { "epoch": 0.5140865524252106, "grad_norm": 0.4362178146839142, "learning_rate": 0.0001, "loss": 1.5629, "step": 4425 }, { "epoch": 0.5142027301771711, "grad_norm": 0.42870602011680603, "learning_rate": 0.0001, "loss": 1.5298, "step": 4426 }, { "epoch": 0.5143189079291316, "grad_norm": 0.4190438985824585, "learning_rate": 0.0001, "loss": 1.6347, "step": 4427 }, { "epoch": 0.5144350856810921, "grad_norm": 0.43848124146461487, "learning_rate": 0.0001, "loss": 1.6039, "step": 4428 }, { "epoch": 0.5145512634330526, "grad_norm": 4.445634365081787, "learning_rate": 0.0001, "loss": 1.5215, "step": 4429 }, { "epoch": 0.5146674411850131, "grad_norm": 0.43199974298477173, "learning_rate": 0.0001, "loss": 1.6709, "step": 4430 }, { "epoch": 0.5147836189369736, "grad_norm": 0.39670631289482117, "learning_rate": 0.0001, "loss": 1.5531, "step": 4431 }, { "epoch": 0.514899796688934, "grad_norm": 0.4087604880332947, "learning_rate": 0.0001, "loss": 1.6337, "step": 4432 }, { "epoch": 0.5150159744408945, "grad_norm": 0.4322669804096222, "learning_rate": 0.0001, "loss": 1.7526, "step": 4433 }, { "epoch": 0.515132152192855, "grad_norm": 0.40673530101776123, "learning_rate": 0.0001, "loss": 1.5269, "step": 4434 }, { "epoch": 0.5152483299448156, "grad_norm": 0.4231918454170227, "learning_rate": 0.0001, "loss": 1.7449, "step": 4435 }, { "epoch": 0.5153645076967761, "grad_norm": 0.42139682173728943, "learning_rate": 0.0001, "loss": 1.6195, "step": 4436 }, { "epoch": 0.5154806854487366, "grad_norm": 0.4375002384185791, "learning_rate": 0.0001, "loss": 1.6319, "step": 4437 }, { "epoch": 0.5155968632006971, "grad_norm": 0.41837188601493835, "learning_rate": 0.0001, "loss": 1.7799, "step": 4438 }, { "epoch": 0.5157130409526576, "grad_norm": 0.36996695399284363, "learning_rate": 0.0001, "loss": 1.5302, "step": 4439 }, { "epoch": 0.5158292187046181, "grad_norm": 0.428469181060791, "learning_rate": 0.0001, "loss": 1.7409, "step": 4440 }, { "epoch": 0.5159453964565786, "grad_norm": 0.3917810320854187, "learning_rate": 0.0001, "loss": 1.619, "step": 4441 }, { "epoch": 0.516061574208539, "grad_norm": 0.41308334469795227, "learning_rate": 0.0001, "loss": 1.845, "step": 4442 }, { "epoch": 0.5161777519604995, "grad_norm": 0.41742420196533203, "learning_rate": 0.0001, "loss": 1.6663, "step": 4443 }, { "epoch": 0.51629392971246, "grad_norm": 0.42206430435180664, "learning_rate": 0.0001, "loss": 1.5815, "step": 4444 }, { "epoch": 0.5164101074644205, "grad_norm": 0.41111844778060913, "learning_rate": 0.0001, "loss": 1.687, "step": 4445 }, { "epoch": 0.5165262852163811, "grad_norm": 0.42969468235969543, "learning_rate": 0.0001, "loss": 1.571, "step": 4446 }, { "epoch": 0.5166424629683416, "grad_norm": 0.4495762288570404, "learning_rate": 0.0001, "loss": 1.5439, "step": 4447 }, { "epoch": 0.5167586407203021, "grad_norm": 0.41785508394241333, "learning_rate": 0.0001, "loss": 1.4878, "step": 4448 }, { "epoch": 0.5168748184722626, "grad_norm": 0.4101484715938568, "learning_rate": 0.0001, "loss": 1.6829, "step": 4449 }, { "epoch": 0.5169909962242231, "grad_norm": 0.422985315322876, "learning_rate": 0.0001, "loss": 1.6234, "step": 4450 }, { "epoch": 0.5171071739761836, "grad_norm": 0.45259571075439453, "learning_rate": 0.0001, "loss": 1.6417, "step": 4451 }, { "epoch": 0.517223351728144, "grad_norm": 0.3940257728099823, "learning_rate": 0.0001, "loss": 1.5267, "step": 4452 }, { "epoch": 0.5173395294801045, "grad_norm": 0.41753649711608887, "learning_rate": 0.0001, "loss": 1.5316, "step": 4453 }, { "epoch": 0.517455707232065, "grad_norm": 0.4366135895252228, "learning_rate": 0.0001, "loss": 1.7993, "step": 4454 }, { "epoch": 0.5175718849840255, "grad_norm": 0.43286582827568054, "learning_rate": 0.0001, "loss": 1.7569, "step": 4455 }, { "epoch": 0.5176880627359861, "grad_norm": 0.4226556420326233, "learning_rate": 0.0001, "loss": 1.6214, "step": 4456 }, { "epoch": 0.5178042404879466, "grad_norm": 0.42732787132263184, "learning_rate": 0.0001, "loss": 1.7325, "step": 4457 }, { "epoch": 0.5179204182399071, "grad_norm": 0.4511219561100006, "learning_rate": 0.0001, "loss": 1.6626, "step": 4458 }, { "epoch": 0.5180365959918676, "grad_norm": 0.418562114238739, "learning_rate": 0.0001, "loss": 1.6329, "step": 4459 }, { "epoch": 0.5181527737438281, "grad_norm": 0.4227406978607178, "learning_rate": 0.0001, "loss": 1.6733, "step": 4460 }, { "epoch": 0.5182689514957886, "grad_norm": 0.4748642146587372, "learning_rate": 0.0001, "loss": 1.8498, "step": 4461 }, { "epoch": 0.518385129247749, "grad_norm": 0.4098576605319977, "learning_rate": 0.0001, "loss": 1.7199, "step": 4462 }, { "epoch": 0.5185013069997095, "grad_norm": 0.3687531054019928, "learning_rate": 0.0001, "loss": 1.4375, "step": 4463 }, { "epoch": 0.51861748475167, "grad_norm": 0.43809184432029724, "learning_rate": 0.0001, "loss": 1.6958, "step": 4464 }, { "epoch": 0.5187336625036305, "grad_norm": 0.4355543851852417, "learning_rate": 0.0001, "loss": 1.529, "step": 4465 }, { "epoch": 0.518849840255591, "grad_norm": 0.40263766050338745, "learning_rate": 0.0001, "loss": 1.624, "step": 4466 }, { "epoch": 0.5189660180075516, "grad_norm": 0.40483129024505615, "learning_rate": 0.0001, "loss": 1.6302, "step": 4467 }, { "epoch": 0.5190821957595121, "grad_norm": 0.4438629150390625, "learning_rate": 0.0001, "loss": 1.7151, "step": 4468 }, { "epoch": 0.5191983735114726, "grad_norm": 0.417834997177124, "learning_rate": 0.0001, "loss": 1.5252, "step": 4469 }, { "epoch": 0.5193145512634331, "grad_norm": 0.4064665138721466, "learning_rate": 0.0001, "loss": 1.6158, "step": 4470 }, { "epoch": 0.5194307290153936, "grad_norm": 0.4300839900970459, "learning_rate": 0.0001, "loss": 1.7744, "step": 4471 }, { "epoch": 0.519546906767354, "grad_norm": 0.43153315782546997, "learning_rate": 0.0001, "loss": 1.6319, "step": 4472 }, { "epoch": 0.5196630845193145, "grad_norm": 0.40210986137390137, "learning_rate": 0.0001, "loss": 1.654, "step": 4473 }, { "epoch": 0.519779262271275, "grad_norm": 0.4261165261268616, "learning_rate": 0.0001, "loss": 1.6603, "step": 4474 }, { "epoch": 0.5198954400232355, "grad_norm": 0.40189406275749207, "learning_rate": 0.0001, "loss": 1.541, "step": 4475 }, { "epoch": 0.520011617775196, "grad_norm": 0.40366873145103455, "learning_rate": 0.0001, "loss": 1.6311, "step": 4476 }, { "epoch": 0.5201277955271566, "grad_norm": 0.39482223987579346, "learning_rate": 0.0001, "loss": 1.5386, "step": 4477 }, { "epoch": 0.5202439732791171, "grad_norm": 0.4213715195655823, "learning_rate": 0.0001, "loss": 1.6938, "step": 4478 }, { "epoch": 0.5203601510310776, "grad_norm": 0.412153959274292, "learning_rate": 0.0001, "loss": 1.6617, "step": 4479 }, { "epoch": 0.5204763287830381, "grad_norm": 0.4511539041996002, "learning_rate": 0.0001, "loss": 1.7117, "step": 4480 }, { "epoch": 0.5205925065349986, "grad_norm": 0.4023229777812958, "learning_rate": 0.0001, "loss": 1.6164, "step": 4481 }, { "epoch": 0.520708684286959, "grad_norm": 0.410552054643631, "learning_rate": 0.0001, "loss": 1.6157, "step": 4482 }, { "epoch": 0.5208248620389195, "grad_norm": 0.4153655767440796, "learning_rate": 0.0001, "loss": 1.7299, "step": 4483 }, { "epoch": 0.52094103979088, "grad_norm": 0.4262380599975586, "learning_rate": 0.0001, "loss": 1.7457, "step": 4484 }, { "epoch": 0.5210572175428405, "grad_norm": 0.38934075832366943, "learning_rate": 0.0001, "loss": 1.5891, "step": 4485 }, { "epoch": 0.521173395294801, "grad_norm": 0.417797327041626, "learning_rate": 0.0001, "loss": 1.5473, "step": 4486 }, { "epoch": 0.5212895730467616, "grad_norm": 0.45602330565452576, "learning_rate": 0.0001, "loss": 1.7248, "step": 4487 }, { "epoch": 0.5214057507987221, "grad_norm": 0.3888321816921234, "learning_rate": 0.0001, "loss": 1.5404, "step": 4488 }, { "epoch": 0.5215219285506826, "grad_norm": 0.4114174246788025, "learning_rate": 0.0001, "loss": 1.623, "step": 4489 }, { "epoch": 0.5216381063026431, "grad_norm": 0.43008658289909363, "learning_rate": 0.0001, "loss": 1.6818, "step": 4490 }, { "epoch": 0.5217542840546036, "grad_norm": 0.40249064564704895, "learning_rate": 0.0001, "loss": 1.6013, "step": 4491 }, { "epoch": 0.521870461806564, "grad_norm": 0.39665487408638, "learning_rate": 0.0001, "loss": 1.6483, "step": 4492 }, { "epoch": 0.5219866395585245, "grad_norm": 0.46716028451919556, "learning_rate": 0.0001, "loss": 1.8122, "step": 4493 }, { "epoch": 0.522102817310485, "grad_norm": 0.4550071954727173, "learning_rate": 0.0001, "loss": 1.6226, "step": 4494 }, { "epoch": 0.5222189950624455, "grad_norm": 0.40407729148864746, "learning_rate": 0.0001, "loss": 1.5972, "step": 4495 }, { "epoch": 0.522335172814406, "grad_norm": 0.4077194035053253, "learning_rate": 0.0001, "loss": 1.534, "step": 4496 }, { "epoch": 0.5224513505663665, "grad_norm": 0.4374626576900482, "learning_rate": 0.0001, "loss": 1.8369, "step": 4497 }, { "epoch": 0.5225675283183271, "grad_norm": 0.44101259112358093, "learning_rate": 0.0001, "loss": 1.6678, "step": 4498 }, { "epoch": 0.5226837060702876, "grad_norm": 0.45485249161720276, "learning_rate": 0.0001, "loss": 1.619, "step": 4499 }, { "epoch": 0.5227998838222481, "grad_norm": 0.4245244860649109, "learning_rate": 0.0001, "loss": 1.709, "step": 4500 }, { "epoch": 0.5229160615742086, "grad_norm": 0.4580254554748535, "learning_rate": 0.0001, "loss": 1.6549, "step": 4501 }, { "epoch": 0.523032239326169, "grad_norm": 0.4223475754261017, "learning_rate": 0.0001, "loss": 1.5295, "step": 4502 }, { "epoch": 0.5231484170781295, "grad_norm": 0.42708471417427063, "learning_rate": 0.0001, "loss": 1.8193, "step": 4503 }, { "epoch": 0.52326459483009, "grad_norm": 0.42466166615486145, "learning_rate": 0.0001, "loss": 1.6805, "step": 4504 }, { "epoch": 0.5233807725820505, "grad_norm": 0.4411070942878723, "learning_rate": 0.0001, "loss": 1.6752, "step": 4505 }, { "epoch": 0.523496950334011, "grad_norm": 0.4360845983028412, "learning_rate": 0.0001, "loss": 1.635, "step": 4506 }, { "epoch": 0.5236131280859715, "grad_norm": 0.4071287512779236, "learning_rate": 0.0001, "loss": 1.6069, "step": 4507 }, { "epoch": 0.5237293058379321, "grad_norm": 0.4129199981689453, "learning_rate": 0.0001, "loss": 1.5767, "step": 4508 }, { "epoch": 0.5238454835898926, "grad_norm": 0.40559443831443787, "learning_rate": 0.0001, "loss": 1.6083, "step": 4509 }, { "epoch": 0.5239616613418531, "grad_norm": 0.4351811707019806, "learning_rate": 0.0001, "loss": 1.7353, "step": 4510 }, { "epoch": 0.5240778390938136, "grad_norm": 0.4436653256416321, "learning_rate": 0.0001, "loss": 1.8253, "step": 4511 }, { "epoch": 0.524194016845774, "grad_norm": 0.4075913727283478, "learning_rate": 0.0001, "loss": 1.4621, "step": 4512 }, { "epoch": 0.5243101945977345, "grad_norm": 0.4155612885951996, "learning_rate": 0.0001, "loss": 1.6053, "step": 4513 }, { "epoch": 0.524426372349695, "grad_norm": 0.4131620526313782, "learning_rate": 0.0001, "loss": 1.6391, "step": 4514 }, { "epoch": 0.5245425501016555, "grad_norm": 0.4026408791542053, "learning_rate": 0.0001, "loss": 1.4066, "step": 4515 }, { "epoch": 0.524658727853616, "grad_norm": 0.42349475622177124, "learning_rate": 0.0001, "loss": 1.7857, "step": 4516 }, { "epoch": 0.5247749056055765, "grad_norm": 0.4265604615211487, "learning_rate": 0.0001, "loss": 1.6916, "step": 4517 }, { "epoch": 0.524891083357537, "grad_norm": 0.4295273423194885, "learning_rate": 0.0001, "loss": 1.7647, "step": 4518 }, { "epoch": 0.5250072611094976, "grad_norm": 0.42609089612960815, "learning_rate": 0.0001, "loss": 1.586, "step": 4519 }, { "epoch": 0.5251234388614581, "grad_norm": 0.42496544122695923, "learning_rate": 0.0001, "loss": 1.6681, "step": 4520 }, { "epoch": 0.5252396166134186, "grad_norm": 0.4378863573074341, "learning_rate": 0.0001, "loss": 1.7169, "step": 4521 }, { "epoch": 0.525355794365379, "grad_norm": 0.43438613414764404, "learning_rate": 0.0001, "loss": 1.7235, "step": 4522 }, { "epoch": 0.5254719721173395, "grad_norm": 0.4248746931552887, "learning_rate": 0.0001, "loss": 1.4961, "step": 4523 }, { "epoch": 0.5255881498693, "grad_norm": 0.43905171751976013, "learning_rate": 0.0001, "loss": 1.7656, "step": 4524 }, { "epoch": 0.5257043276212605, "grad_norm": 0.4113710820674896, "learning_rate": 0.0001, "loss": 1.66, "step": 4525 }, { "epoch": 0.525820505373221, "grad_norm": 0.4203665256500244, "learning_rate": 0.0001, "loss": 1.5595, "step": 4526 }, { "epoch": 0.5259366831251815, "grad_norm": 0.4476137161254883, "learning_rate": 0.0001, "loss": 1.7966, "step": 4527 }, { "epoch": 0.526052860877142, "grad_norm": 0.400177925825119, "learning_rate": 0.0001, "loss": 1.6435, "step": 4528 }, { "epoch": 0.5261690386291026, "grad_norm": 0.4333209991455078, "learning_rate": 0.0001, "loss": 1.8233, "step": 4529 }, { "epoch": 0.5262852163810631, "grad_norm": 0.43465256690979004, "learning_rate": 0.0001, "loss": 1.665, "step": 4530 }, { "epoch": 0.5264013941330236, "grad_norm": 0.4152390658855438, "learning_rate": 0.0001, "loss": 1.6964, "step": 4531 }, { "epoch": 0.526517571884984, "grad_norm": 0.42579424381256104, "learning_rate": 0.0001, "loss": 1.6378, "step": 4532 }, { "epoch": 0.5266337496369445, "grad_norm": 0.4379642903804779, "learning_rate": 0.0001, "loss": 1.6145, "step": 4533 }, { "epoch": 0.526749927388905, "grad_norm": 0.4103189706802368, "learning_rate": 0.0001, "loss": 1.6903, "step": 4534 }, { "epoch": 0.5268661051408655, "grad_norm": 0.45309609174728394, "learning_rate": 0.0001, "loss": 1.7196, "step": 4535 }, { "epoch": 0.526982282892826, "grad_norm": 0.42796722054481506, "learning_rate": 0.0001, "loss": 1.6582, "step": 4536 }, { "epoch": 0.5270984606447865, "grad_norm": 0.41641005873680115, "learning_rate": 0.0001, "loss": 1.6095, "step": 4537 }, { "epoch": 0.527214638396747, "grad_norm": 0.4048060178756714, "learning_rate": 0.0001, "loss": 1.5817, "step": 4538 }, { "epoch": 0.5273308161487075, "grad_norm": 0.41168227791786194, "learning_rate": 0.0001, "loss": 1.6224, "step": 4539 }, { "epoch": 0.5274469939006681, "grad_norm": 0.45252159237861633, "learning_rate": 0.0001, "loss": 1.6104, "step": 4540 }, { "epoch": 0.5275631716526286, "grad_norm": 0.4120829999446869, "learning_rate": 0.0001, "loss": 1.5013, "step": 4541 }, { "epoch": 0.527679349404589, "grad_norm": 0.44721588492393494, "learning_rate": 0.0001, "loss": 1.6587, "step": 4542 }, { "epoch": 0.5277955271565495, "grad_norm": 0.43271827697753906, "learning_rate": 0.0001, "loss": 1.6503, "step": 4543 }, { "epoch": 0.52791170490851, "grad_norm": 0.42740026116371155, "learning_rate": 0.0001, "loss": 1.7174, "step": 4544 }, { "epoch": 0.5280278826604705, "grad_norm": 0.3889728784561157, "learning_rate": 0.0001, "loss": 1.6171, "step": 4545 }, { "epoch": 0.528144060412431, "grad_norm": 0.4437465965747833, "learning_rate": 0.0001, "loss": 1.6956, "step": 4546 }, { "epoch": 0.5282602381643915, "grad_norm": 0.39472874999046326, "learning_rate": 0.0001, "loss": 1.5576, "step": 4547 }, { "epoch": 0.528376415916352, "grad_norm": 0.4104737937450409, "learning_rate": 0.0001, "loss": 1.8144, "step": 4548 }, { "epoch": 0.5284925936683125, "grad_norm": 0.4385277330875397, "learning_rate": 0.0001, "loss": 1.7121, "step": 4549 }, { "epoch": 0.5286087714202731, "grad_norm": 0.3903822600841522, "learning_rate": 0.0001, "loss": 1.547, "step": 4550 }, { "epoch": 0.5287249491722336, "grad_norm": 0.4265904128551483, "learning_rate": 0.0001, "loss": 1.6622, "step": 4551 }, { "epoch": 0.528841126924194, "grad_norm": 0.4125080704689026, "learning_rate": 0.0001, "loss": 1.6587, "step": 4552 }, { "epoch": 0.5289573046761545, "grad_norm": 0.42072439193725586, "learning_rate": 0.0001, "loss": 1.5583, "step": 4553 }, { "epoch": 0.529073482428115, "grad_norm": 0.41168302297592163, "learning_rate": 0.0001, "loss": 1.5291, "step": 4554 }, { "epoch": 0.5291896601800755, "grad_norm": 0.39374563097953796, "learning_rate": 0.0001, "loss": 1.5875, "step": 4555 }, { "epoch": 0.529305837932036, "grad_norm": 0.43736445903778076, "learning_rate": 0.0001, "loss": 1.6175, "step": 4556 }, { "epoch": 0.5294220156839965, "grad_norm": 0.456046462059021, "learning_rate": 0.0001, "loss": 1.6058, "step": 4557 }, { "epoch": 0.529538193435957, "grad_norm": 0.407843679189682, "learning_rate": 0.0001, "loss": 1.6127, "step": 4558 }, { "epoch": 0.5296543711879175, "grad_norm": 0.4293687045574188, "learning_rate": 0.0001, "loss": 1.6207, "step": 4559 }, { "epoch": 0.529770548939878, "grad_norm": 0.42823317646980286, "learning_rate": 0.0001, "loss": 1.8056, "step": 4560 }, { "epoch": 0.5298867266918386, "grad_norm": 0.41989070177078247, "learning_rate": 0.0001, "loss": 1.6296, "step": 4561 }, { "epoch": 0.530002904443799, "grad_norm": 0.41696497797966003, "learning_rate": 0.0001, "loss": 1.5857, "step": 4562 }, { "epoch": 0.5301190821957595, "grad_norm": 0.3901718854904175, "learning_rate": 0.0001, "loss": 1.5489, "step": 4563 }, { "epoch": 0.53023525994772, "grad_norm": 0.42461147904396057, "learning_rate": 0.0001, "loss": 1.605, "step": 4564 }, { "epoch": 0.5303514376996805, "grad_norm": 0.4265443682670593, "learning_rate": 0.0001, "loss": 1.5939, "step": 4565 }, { "epoch": 0.530467615451641, "grad_norm": 0.4418391287326813, "learning_rate": 0.0001, "loss": 1.7592, "step": 4566 }, { "epoch": 0.5305837932036015, "grad_norm": 0.4429027736186981, "learning_rate": 0.0001, "loss": 1.6546, "step": 4567 }, { "epoch": 0.530699970955562, "grad_norm": 0.4483877420425415, "learning_rate": 0.0001, "loss": 1.6595, "step": 4568 }, { "epoch": 0.5308161487075225, "grad_norm": 0.4467184245586395, "learning_rate": 0.0001, "loss": 1.7746, "step": 4569 }, { "epoch": 0.530932326459483, "grad_norm": 0.4503554403781891, "learning_rate": 0.0001, "loss": 1.8263, "step": 4570 }, { "epoch": 0.5310485042114436, "grad_norm": 0.40501412749290466, "learning_rate": 0.0001, "loss": 1.6129, "step": 4571 }, { "epoch": 0.531164681963404, "grad_norm": 0.41153189539909363, "learning_rate": 0.0001, "loss": 1.6277, "step": 4572 }, { "epoch": 0.5312808597153645, "grad_norm": 0.41947031021118164, "learning_rate": 0.0001, "loss": 1.6558, "step": 4573 }, { "epoch": 0.531397037467325, "grad_norm": 0.4129175841808319, "learning_rate": 0.0001, "loss": 1.5436, "step": 4574 }, { "epoch": 0.5315132152192855, "grad_norm": 0.44129297137260437, "learning_rate": 0.0001, "loss": 1.693, "step": 4575 }, { "epoch": 0.531629392971246, "grad_norm": 0.41922008991241455, "learning_rate": 0.0001, "loss": 1.6644, "step": 4576 }, { "epoch": 0.5317455707232065, "grad_norm": 0.41607466340065, "learning_rate": 0.0001, "loss": 1.6542, "step": 4577 }, { "epoch": 0.531861748475167, "grad_norm": 0.42458733916282654, "learning_rate": 0.0001, "loss": 1.6047, "step": 4578 }, { "epoch": 0.5319779262271275, "grad_norm": 0.43366140127182007, "learning_rate": 0.0001, "loss": 1.508, "step": 4579 }, { "epoch": 0.532094103979088, "grad_norm": 0.4278888702392578, "learning_rate": 0.0001, "loss": 1.688, "step": 4580 }, { "epoch": 0.5322102817310485, "grad_norm": 0.4110510051250458, "learning_rate": 0.0001, "loss": 1.5144, "step": 4581 }, { "epoch": 0.532326459483009, "grad_norm": 0.4136632978916168, "learning_rate": 0.0001, "loss": 1.5157, "step": 4582 }, { "epoch": 0.5324426372349695, "grad_norm": 0.3931570053100586, "learning_rate": 0.0001, "loss": 1.4754, "step": 4583 }, { "epoch": 0.53255881498693, "grad_norm": 0.41531267762184143, "learning_rate": 0.0001, "loss": 1.6292, "step": 4584 }, { "epoch": 0.5326749927388905, "grad_norm": 0.4423351585865021, "learning_rate": 0.0001, "loss": 1.847, "step": 4585 }, { "epoch": 0.532791170490851, "grad_norm": 0.401030570268631, "learning_rate": 0.0001, "loss": 1.592, "step": 4586 }, { "epoch": 0.5329073482428115, "grad_norm": 0.42098504304885864, "learning_rate": 0.0001, "loss": 1.7131, "step": 4587 }, { "epoch": 0.533023525994772, "grad_norm": 0.4085943102836609, "learning_rate": 0.0001, "loss": 1.5669, "step": 4588 }, { "epoch": 0.5331397037467325, "grad_norm": 0.39442306756973267, "learning_rate": 0.0001, "loss": 1.4053, "step": 4589 }, { "epoch": 0.533255881498693, "grad_norm": 0.4020472466945648, "learning_rate": 0.0001, "loss": 1.491, "step": 4590 }, { "epoch": 0.5333720592506535, "grad_norm": 0.41288527846336365, "learning_rate": 0.0001, "loss": 1.641, "step": 4591 }, { "epoch": 0.533488237002614, "grad_norm": 0.4358307719230652, "learning_rate": 0.0001, "loss": 1.5793, "step": 4592 }, { "epoch": 0.5336044147545745, "grad_norm": 0.4035166800022125, "learning_rate": 0.0001, "loss": 1.5314, "step": 4593 }, { "epoch": 0.533720592506535, "grad_norm": 0.420097291469574, "learning_rate": 0.0001, "loss": 1.7894, "step": 4594 }, { "epoch": 0.5338367702584955, "grad_norm": 0.47331398725509644, "learning_rate": 0.0001, "loss": 1.5796, "step": 4595 }, { "epoch": 0.533952948010456, "grad_norm": 0.42985647916793823, "learning_rate": 0.0001, "loss": 1.7268, "step": 4596 }, { "epoch": 0.5340691257624165, "grad_norm": 0.3984420895576477, "learning_rate": 0.0001, "loss": 1.4863, "step": 4597 }, { "epoch": 0.534185303514377, "grad_norm": 0.41712093353271484, "learning_rate": 0.0001, "loss": 1.6269, "step": 4598 }, { "epoch": 0.5343014812663375, "grad_norm": 0.42095884680747986, "learning_rate": 0.0001, "loss": 1.7377, "step": 4599 }, { "epoch": 0.534417659018298, "grad_norm": 0.4140506386756897, "learning_rate": 0.0001, "loss": 1.514, "step": 4600 }, { "epoch": 0.5345338367702585, "grad_norm": 0.39841312170028687, "learning_rate": 0.0001, "loss": 1.4846, "step": 4601 }, { "epoch": 0.5346500145222189, "grad_norm": 0.46106958389282227, "learning_rate": 0.0001, "loss": 1.6737, "step": 4602 }, { "epoch": 0.5347661922741795, "grad_norm": 0.4085831344127655, "learning_rate": 0.0001, "loss": 1.5861, "step": 4603 }, { "epoch": 0.53488237002614, "grad_norm": 0.49497655034065247, "learning_rate": 0.0001, "loss": 1.6332, "step": 4604 }, { "epoch": 0.5349985477781005, "grad_norm": 0.43607422709465027, "learning_rate": 0.0001, "loss": 1.7121, "step": 4605 }, { "epoch": 0.535114725530061, "grad_norm": 0.4607904255390167, "learning_rate": 0.0001, "loss": 1.6288, "step": 4606 }, { "epoch": 0.5352309032820215, "grad_norm": 0.4455506205558777, "learning_rate": 0.0001, "loss": 1.6646, "step": 4607 }, { "epoch": 0.535347081033982, "grad_norm": 0.39066752791404724, "learning_rate": 0.0001, "loss": 1.4855, "step": 4608 }, { "epoch": 0.5354632587859425, "grad_norm": 0.4268709421157837, "learning_rate": 0.0001, "loss": 1.8102, "step": 4609 }, { "epoch": 0.535579436537903, "grad_norm": 0.40803661942481995, "learning_rate": 0.0001, "loss": 1.6831, "step": 4610 }, { "epoch": 0.5356956142898635, "grad_norm": 0.4266864061355591, "learning_rate": 0.0001, "loss": 1.5085, "step": 4611 }, { "epoch": 0.5358117920418239, "grad_norm": 0.4282558262348175, "learning_rate": 0.0001, "loss": 1.6198, "step": 4612 }, { "epoch": 0.5359279697937845, "grad_norm": 0.4355599880218506, "learning_rate": 0.0001, "loss": 1.7922, "step": 4613 }, { "epoch": 0.536044147545745, "grad_norm": 0.41130515933036804, "learning_rate": 0.0001, "loss": 1.5415, "step": 4614 }, { "epoch": 0.5361603252977055, "grad_norm": 0.4203796982765198, "learning_rate": 0.0001, "loss": 1.5859, "step": 4615 }, { "epoch": 0.536276503049666, "grad_norm": 0.4113680124282837, "learning_rate": 0.0001, "loss": 1.6055, "step": 4616 }, { "epoch": 0.5363926808016265, "grad_norm": 0.4167897701263428, "learning_rate": 0.0001, "loss": 1.654, "step": 4617 }, { "epoch": 0.536508858553587, "grad_norm": 0.39767855405807495, "learning_rate": 0.0001, "loss": 1.4927, "step": 4618 }, { "epoch": 0.5366250363055475, "grad_norm": 0.42861250042915344, "learning_rate": 0.0001, "loss": 1.7427, "step": 4619 }, { "epoch": 0.536741214057508, "grad_norm": 0.4207315742969513, "learning_rate": 0.0001, "loss": 1.8116, "step": 4620 }, { "epoch": 0.5368573918094685, "grad_norm": 0.41089701652526855, "learning_rate": 0.0001, "loss": 1.7136, "step": 4621 }, { "epoch": 0.5369735695614289, "grad_norm": 0.42728808522224426, "learning_rate": 0.0001, "loss": 1.6571, "step": 4622 }, { "epoch": 0.5370897473133894, "grad_norm": 0.4472593069076538, "learning_rate": 0.0001, "loss": 1.7698, "step": 4623 }, { "epoch": 0.53720592506535, "grad_norm": 0.41069328784942627, "learning_rate": 0.0001, "loss": 1.5528, "step": 4624 }, { "epoch": 0.5373221028173105, "grad_norm": 0.4025200605392456, "learning_rate": 0.0001, "loss": 1.6138, "step": 4625 }, { "epoch": 0.537438280569271, "grad_norm": 0.4271898567676544, "learning_rate": 0.0001, "loss": 1.8612, "step": 4626 }, { "epoch": 0.5375544583212315, "grad_norm": 0.40859630703926086, "learning_rate": 0.0001, "loss": 1.5449, "step": 4627 }, { "epoch": 0.537670636073192, "grad_norm": 0.390266090631485, "learning_rate": 0.0001, "loss": 1.4714, "step": 4628 }, { "epoch": 0.5377868138251525, "grad_norm": 0.44839224219322205, "learning_rate": 0.0001, "loss": 1.5188, "step": 4629 }, { "epoch": 0.537902991577113, "grad_norm": 0.4331499934196472, "learning_rate": 0.0001, "loss": 1.748, "step": 4630 }, { "epoch": 0.5380191693290735, "grad_norm": 0.41848304867744446, "learning_rate": 0.0001, "loss": 1.559, "step": 4631 }, { "epoch": 0.5381353470810339, "grad_norm": 0.4118647873401642, "learning_rate": 0.0001, "loss": 1.5754, "step": 4632 }, { "epoch": 0.5382515248329944, "grad_norm": 0.4422479569911957, "learning_rate": 0.0001, "loss": 1.6402, "step": 4633 }, { "epoch": 0.538367702584955, "grad_norm": 0.4162086844444275, "learning_rate": 0.0001, "loss": 1.6752, "step": 4634 }, { "epoch": 0.5384838803369155, "grad_norm": 0.4036068618297577, "learning_rate": 0.0001, "loss": 1.6282, "step": 4635 }, { "epoch": 0.538600058088876, "grad_norm": 0.4048601984977722, "learning_rate": 0.0001, "loss": 1.5711, "step": 4636 }, { "epoch": 0.5387162358408365, "grad_norm": 0.4456162750720978, "learning_rate": 0.0001, "loss": 1.7916, "step": 4637 }, { "epoch": 0.538832413592797, "grad_norm": 0.44490668177604675, "learning_rate": 0.0001, "loss": 1.6388, "step": 4638 }, { "epoch": 0.5389485913447575, "grad_norm": 0.42898234724998474, "learning_rate": 0.0001, "loss": 1.6908, "step": 4639 }, { "epoch": 0.539064769096718, "grad_norm": 0.41526684165000916, "learning_rate": 0.0001, "loss": 1.6208, "step": 4640 }, { "epoch": 0.5391809468486785, "grad_norm": 0.391984224319458, "learning_rate": 0.0001, "loss": 1.5299, "step": 4641 }, { "epoch": 0.5392971246006389, "grad_norm": 0.4175387918949127, "learning_rate": 0.0001, "loss": 1.5193, "step": 4642 }, { "epoch": 0.5394133023525994, "grad_norm": 0.4540769159793854, "learning_rate": 0.0001, "loss": 1.601, "step": 4643 }, { "epoch": 0.5395294801045599, "grad_norm": 0.43631550669670105, "learning_rate": 0.0001, "loss": 1.578, "step": 4644 }, { "epoch": 0.5396456578565205, "grad_norm": 0.459330290555954, "learning_rate": 0.0001, "loss": 1.7209, "step": 4645 }, { "epoch": 0.539761835608481, "grad_norm": 0.41860130429267883, "learning_rate": 0.0001, "loss": 1.6275, "step": 4646 }, { "epoch": 0.5398780133604415, "grad_norm": 0.5068102478981018, "learning_rate": 0.0001, "loss": 1.8484, "step": 4647 }, { "epoch": 0.539994191112402, "grad_norm": 0.4381267726421356, "learning_rate": 0.0001, "loss": 1.7247, "step": 4648 }, { "epoch": 0.5401103688643625, "grad_norm": 0.426881343126297, "learning_rate": 0.0001, "loss": 1.5756, "step": 4649 }, { "epoch": 0.540226546616323, "grad_norm": 0.44243374466896057, "learning_rate": 0.0001, "loss": 1.8272, "step": 4650 }, { "epoch": 0.5403427243682835, "grad_norm": 0.4320388734340668, "learning_rate": 0.0001, "loss": 1.5522, "step": 4651 }, { "epoch": 0.540458902120244, "grad_norm": 0.418743759393692, "learning_rate": 0.0001, "loss": 1.5871, "step": 4652 }, { "epoch": 0.5405750798722044, "grad_norm": 0.4069058299064636, "learning_rate": 0.0001, "loss": 1.7696, "step": 4653 }, { "epoch": 0.5406912576241649, "grad_norm": 0.4170093536376953, "learning_rate": 0.0001, "loss": 1.6897, "step": 4654 }, { "epoch": 0.5408074353761255, "grad_norm": 0.4032455384731293, "learning_rate": 0.0001, "loss": 1.6103, "step": 4655 }, { "epoch": 0.540923613128086, "grad_norm": 0.3982067108154297, "learning_rate": 0.0001, "loss": 1.6941, "step": 4656 }, { "epoch": 0.5410397908800465, "grad_norm": 0.4389408528804779, "learning_rate": 0.0001, "loss": 1.6428, "step": 4657 }, { "epoch": 0.541155968632007, "grad_norm": 0.41216394305229187, "learning_rate": 0.0001, "loss": 1.724, "step": 4658 }, { "epoch": 0.5412721463839675, "grad_norm": 0.4423290491104126, "learning_rate": 0.0001, "loss": 1.8955, "step": 4659 }, { "epoch": 0.541388324135928, "grad_norm": 0.45288944244384766, "learning_rate": 0.0001, "loss": 1.5609, "step": 4660 }, { "epoch": 0.5415045018878885, "grad_norm": 0.43445900082588196, "learning_rate": 0.0001, "loss": 1.6535, "step": 4661 }, { "epoch": 0.541620679639849, "grad_norm": 0.4164069890975952, "learning_rate": 0.0001, "loss": 1.6999, "step": 4662 }, { "epoch": 0.5417368573918094, "grad_norm": 0.4147018492221832, "learning_rate": 0.0001, "loss": 1.6152, "step": 4663 }, { "epoch": 0.5418530351437699, "grad_norm": 0.4525691568851471, "learning_rate": 0.0001, "loss": 1.674, "step": 4664 }, { "epoch": 0.5419692128957304, "grad_norm": 0.4286227822303772, "learning_rate": 0.0001, "loss": 1.6262, "step": 4665 }, { "epoch": 0.542085390647691, "grad_norm": 0.4411110281944275, "learning_rate": 0.0001, "loss": 1.6274, "step": 4666 }, { "epoch": 0.5422015683996515, "grad_norm": 0.3831879794597626, "learning_rate": 0.0001, "loss": 1.3289, "step": 4667 }, { "epoch": 0.542317746151612, "grad_norm": 0.42957553267478943, "learning_rate": 0.0001, "loss": 1.6977, "step": 4668 }, { "epoch": 0.5424339239035725, "grad_norm": 0.43326959013938904, "learning_rate": 0.0001, "loss": 1.6754, "step": 4669 }, { "epoch": 0.542550101655533, "grad_norm": 0.42409175634384155, "learning_rate": 0.0001, "loss": 1.5158, "step": 4670 }, { "epoch": 0.5426662794074935, "grad_norm": 0.42008155584335327, "learning_rate": 0.0001, "loss": 1.6757, "step": 4671 }, { "epoch": 0.542782457159454, "grad_norm": 0.41546356678009033, "learning_rate": 0.0001, "loss": 1.6692, "step": 4672 }, { "epoch": 0.5428986349114144, "grad_norm": 0.41974398493766785, "learning_rate": 0.0001, "loss": 1.6507, "step": 4673 }, { "epoch": 0.5430148126633749, "grad_norm": 0.39772695302963257, "learning_rate": 0.0001, "loss": 1.6017, "step": 4674 }, { "epoch": 0.5431309904153354, "grad_norm": 0.43353089690208435, "learning_rate": 0.0001, "loss": 1.7021, "step": 4675 }, { "epoch": 0.543247168167296, "grad_norm": 0.4182477295398712, "learning_rate": 0.0001, "loss": 1.5777, "step": 4676 }, { "epoch": 0.5433633459192565, "grad_norm": 0.4173682928085327, "learning_rate": 0.0001, "loss": 1.512, "step": 4677 }, { "epoch": 0.543479523671217, "grad_norm": 0.44657060503959656, "learning_rate": 0.0001, "loss": 1.6167, "step": 4678 }, { "epoch": 0.5435957014231775, "grad_norm": 0.43546125292778015, "learning_rate": 0.0001, "loss": 1.6459, "step": 4679 }, { "epoch": 0.543711879175138, "grad_norm": 0.45045846700668335, "learning_rate": 0.0001, "loss": 1.6601, "step": 4680 }, { "epoch": 0.5438280569270985, "grad_norm": 0.4383883774280548, "learning_rate": 0.0001, "loss": 1.5701, "step": 4681 }, { "epoch": 0.543944234679059, "grad_norm": 0.40852198004722595, "learning_rate": 0.0001, "loss": 1.6311, "step": 4682 }, { "epoch": 0.5440604124310194, "grad_norm": 0.4357270896434784, "learning_rate": 0.0001, "loss": 1.8067, "step": 4683 }, { "epoch": 0.5441765901829799, "grad_norm": 0.4512038826942444, "learning_rate": 0.0001, "loss": 1.6369, "step": 4684 }, { "epoch": 0.5442927679349404, "grad_norm": 0.41331276297569275, "learning_rate": 0.0001, "loss": 1.6546, "step": 4685 }, { "epoch": 0.544408945686901, "grad_norm": 0.4491327702999115, "learning_rate": 0.0001, "loss": 1.7954, "step": 4686 }, { "epoch": 0.5445251234388615, "grad_norm": 0.4428486227989197, "learning_rate": 0.0001, "loss": 1.7398, "step": 4687 }, { "epoch": 0.544641301190822, "grad_norm": 0.416236937046051, "learning_rate": 0.0001, "loss": 1.6693, "step": 4688 }, { "epoch": 0.5447574789427825, "grad_norm": 0.40862324833869934, "learning_rate": 0.0001, "loss": 1.6646, "step": 4689 }, { "epoch": 0.544873656694743, "grad_norm": 0.43137800693511963, "learning_rate": 0.0001, "loss": 1.6398, "step": 4690 }, { "epoch": 0.5449898344467035, "grad_norm": 0.4132535755634308, "learning_rate": 0.0001, "loss": 1.6942, "step": 4691 }, { "epoch": 0.545106012198664, "grad_norm": 0.5874444842338562, "learning_rate": 0.0001, "loss": 1.6515, "step": 4692 }, { "epoch": 0.5452221899506244, "grad_norm": 0.379760205745697, "learning_rate": 0.0001, "loss": 1.3516, "step": 4693 }, { "epoch": 0.5453383677025849, "grad_norm": 0.4385640025138855, "learning_rate": 0.0001, "loss": 1.776, "step": 4694 }, { "epoch": 0.5454545454545454, "grad_norm": 0.4262996315956116, "learning_rate": 0.0001, "loss": 1.5412, "step": 4695 }, { "epoch": 0.5455707232065059, "grad_norm": 0.4239009916782379, "learning_rate": 0.0001, "loss": 1.6814, "step": 4696 }, { "epoch": 0.5456869009584665, "grad_norm": 0.40220028162002563, "learning_rate": 0.0001, "loss": 1.5991, "step": 4697 }, { "epoch": 0.545803078710427, "grad_norm": 0.3948020339012146, "learning_rate": 0.0001, "loss": 1.5745, "step": 4698 }, { "epoch": 0.5459192564623875, "grad_norm": 0.3882817029953003, "learning_rate": 0.0001, "loss": 1.4505, "step": 4699 }, { "epoch": 0.546035434214348, "grad_norm": 0.4421759843826294, "learning_rate": 0.0001, "loss": 1.6698, "step": 4700 }, { "epoch": 0.5461516119663085, "grad_norm": 0.4043938219547272, "learning_rate": 0.0001, "loss": 1.6986, "step": 4701 }, { "epoch": 0.546267789718269, "grad_norm": 0.42511290311813354, "learning_rate": 0.0001, "loss": 1.5415, "step": 4702 }, { "epoch": 0.5463839674702294, "grad_norm": 0.4274735748767853, "learning_rate": 0.0001, "loss": 1.6945, "step": 4703 }, { "epoch": 0.5465001452221899, "grad_norm": 0.4572598934173584, "learning_rate": 0.0001, "loss": 1.754, "step": 4704 }, { "epoch": 0.5466163229741504, "grad_norm": 0.41152650117874146, "learning_rate": 0.0001, "loss": 1.3119, "step": 4705 }, { "epoch": 0.5467325007261109, "grad_norm": 0.4448837637901306, "learning_rate": 0.0001, "loss": 1.6856, "step": 4706 }, { "epoch": 0.5468486784780715, "grad_norm": 0.4235488176345825, "learning_rate": 0.0001, "loss": 1.7473, "step": 4707 }, { "epoch": 0.546964856230032, "grad_norm": 0.4105146527290344, "learning_rate": 0.0001, "loss": 1.6456, "step": 4708 }, { "epoch": 0.5470810339819925, "grad_norm": 0.4490967094898224, "learning_rate": 0.0001, "loss": 1.7984, "step": 4709 }, { "epoch": 0.547197211733953, "grad_norm": 0.4733203947544098, "learning_rate": 0.0001, "loss": 1.6005, "step": 4710 }, { "epoch": 0.5473133894859135, "grad_norm": 0.4103139340877533, "learning_rate": 0.0001, "loss": 1.6986, "step": 4711 }, { "epoch": 0.547429567237874, "grad_norm": 0.41262155771255493, "learning_rate": 0.0001, "loss": 1.5596, "step": 4712 }, { "epoch": 0.5475457449898344, "grad_norm": 0.41517654061317444, "learning_rate": 0.0001, "loss": 1.6688, "step": 4713 }, { "epoch": 0.5476619227417949, "grad_norm": 0.43527457118034363, "learning_rate": 0.0001, "loss": 1.7127, "step": 4714 }, { "epoch": 0.5477781004937554, "grad_norm": 0.3753294348716736, "learning_rate": 0.0001, "loss": 1.4135, "step": 4715 }, { "epoch": 0.5478942782457159, "grad_norm": 0.4435529112815857, "learning_rate": 0.0001, "loss": 1.7554, "step": 4716 }, { "epoch": 0.5480104559976764, "grad_norm": 0.39679768681526184, "learning_rate": 0.0001, "loss": 1.5539, "step": 4717 }, { "epoch": 0.548126633749637, "grad_norm": 0.421624094247818, "learning_rate": 0.0001, "loss": 1.6894, "step": 4718 }, { "epoch": 0.5482428115015975, "grad_norm": 0.42037492990493774, "learning_rate": 0.0001, "loss": 1.6101, "step": 4719 }, { "epoch": 0.548358989253558, "grad_norm": 0.44528070092201233, "learning_rate": 0.0001, "loss": 1.4923, "step": 4720 }, { "epoch": 0.5484751670055185, "grad_norm": 0.41840189695358276, "learning_rate": 0.0001, "loss": 1.6836, "step": 4721 }, { "epoch": 0.548591344757479, "grad_norm": 0.3980635404586792, "learning_rate": 0.0001, "loss": 1.7035, "step": 4722 }, { "epoch": 0.5487075225094394, "grad_norm": 0.42704808712005615, "learning_rate": 0.0001, "loss": 1.6621, "step": 4723 }, { "epoch": 0.5488237002613999, "grad_norm": 0.4258357584476471, "learning_rate": 0.0001, "loss": 1.6625, "step": 4724 }, { "epoch": 0.5489398780133604, "grad_norm": 0.4512523114681244, "learning_rate": 0.0001, "loss": 1.6432, "step": 4725 }, { "epoch": 0.5490560557653209, "grad_norm": 0.4495634138584137, "learning_rate": 0.0001, "loss": 1.7266, "step": 4726 }, { "epoch": 0.5491722335172814, "grad_norm": 0.44394296407699585, "learning_rate": 0.0001, "loss": 1.7735, "step": 4727 }, { "epoch": 0.549288411269242, "grad_norm": 0.4451744258403778, "learning_rate": 0.0001, "loss": 1.8485, "step": 4728 }, { "epoch": 0.5494045890212025, "grad_norm": 0.42420530319213867, "learning_rate": 0.0001, "loss": 1.7562, "step": 4729 }, { "epoch": 0.549520766773163, "grad_norm": 0.4318549931049347, "learning_rate": 0.0001, "loss": 1.5842, "step": 4730 }, { "epoch": 0.5496369445251235, "grad_norm": 0.3942951560020447, "learning_rate": 0.0001, "loss": 1.7032, "step": 4731 }, { "epoch": 0.549753122277084, "grad_norm": 0.42226502299308777, "learning_rate": 0.0001, "loss": 1.723, "step": 4732 }, { "epoch": 0.5498693000290444, "grad_norm": 0.45085522532463074, "learning_rate": 0.0001, "loss": 1.7787, "step": 4733 }, { "epoch": 0.5499854777810049, "grad_norm": 0.40263989567756653, "learning_rate": 0.0001, "loss": 1.6947, "step": 4734 }, { "epoch": 0.5501016555329654, "grad_norm": 0.4361323416233063, "learning_rate": 0.0001, "loss": 1.6209, "step": 4735 }, { "epoch": 0.5502178332849259, "grad_norm": 0.4477275311946869, "learning_rate": 0.0001, "loss": 1.6644, "step": 4736 }, { "epoch": 0.5503340110368864, "grad_norm": 0.490419864654541, "learning_rate": 0.0001, "loss": 1.7794, "step": 4737 }, { "epoch": 0.5504501887888469, "grad_norm": 0.4405496418476105, "learning_rate": 0.0001, "loss": 1.6157, "step": 4738 }, { "epoch": 0.5505663665408075, "grad_norm": 0.38646984100341797, "learning_rate": 0.0001, "loss": 1.5299, "step": 4739 }, { "epoch": 0.550682544292768, "grad_norm": 0.40412217378616333, "learning_rate": 0.0001, "loss": 1.6316, "step": 4740 }, { "epoch": 0.5507987220447285, "grad_norm": 0.41205301880836487, "learning_rate": 0.0001, "loss": 1.6778, "step": 4741 }, { "epoch": 0.550914899796689, "grad_norm": 0.4445815086364746, "learning_rate": 0.0001, "loss": 1.6385, "step": 4742 }, { "epoch": 0.5510310775486494, "grad_norm": 0.3821341395378113, "learning_rate": 0.0001, "loss": 1.364, "step": 4743 }, { "epoch": 0.5511472553006099, "grad_norm": 0.4401394724845886, "learning_rate": 0.0001, "loss": 1.8219, "step": 4744 }, { "epoch": 0.5512634330525704, "grad_norm": 0.4259290397167206, "learning_rate": 0.0001, "loss": 1.7113, "step": 4745 }, { "epoch": 0.5513796108045309, "grad_norm": 0.4446122348308563, "learning_rate": 0.0001, "loss": 1.672, "step": 4746 }, { "epoch": 0.5514957885564914, "grad_norm": 0.4578036367893219, "learning_rate": 0.0001, "loss": 1.7534, "step": 4747 }, { "epoch": 0.5516119663084519, "grad_norm": 0.42073720693588257, "learning_rate": 0.0001, "loss": 1.692, "step": 4748 }, { "epoch": 0.5517281440604125, "grad_norm": 0.4655468761920929, "learning_rate": 0.0001, "loss": 1.6489, "step": 4749 }, { "epoch": 0.551844321812373, "grad_norm": 0.41886764764785767, "learning_rate": 0.0001, "loss": 1.6146, "step": 4750 }, { "epoch": 0.5519604995643335, "grad_norm": 0.42605897784233093, "learning_rate": 0.0001, "loss": 1.5669, "step": 4751 }, { "epoch": 0.552076677316294, "grad_norm": 0.43297725915908813, "learning_rate": 0.0001, "loss": 1.608, "step": 4752 }, { "epoch": 0.5521928550682544, "grad_norm": 0.44777053594589233, "learning_rate": 0.0001, "loss": 1.6371, "step": 4753 }, { "epoch": 0.5523090328202149, "grad_norm": 0.43533116579055786, "learning_rate": 0.0001, "loss": 1.6652, "step": 4754 }, { "epoch": 0.5524252105721754, "grad_norm": 0.4219701290130615, "learning_rate": 0.0001, "loss": 1.6955, "step": 4755 }, { "epoch": 0.5525413883241359, "grad_norm": 0.42743462324142456, "learning_rate": 0.0001, "loss": 1.7591, "step": 4756 }, { "epoch": 0.5526575660760964, "grad_norm": 0.4393671452999115, "learning_rate": 0.0001, "loss": 1.7385, "step": 4757 }, { "epoch": 0.5527737438280569, "grad_norm": 0.43937399983406067, "learning_rate": 0.0001, "loss": 1.6714, "step": 4758 }, { "epoch": 0.5528899215800174, "grad_norm": 0.41498544812202454, "learning_rate": 0.0001, "loss": 1.6554, "step": 4759 }, { "epoch": 0.553006099331978, "grad_norm": 0.3907436430454254, "learning_rate": 0.0001, "loss": 1.4194, "step": 4760 }, { "epoch": 0.5531222770839385, "grad_norm": 0.4304429292678833, "learning_rate": 0.0001, "loss": 1.5683, "step": 4761 }, { "epoch": 0.553238454835899, "grad_norm": 0.41753122210502625, "learning_rate": 0.0001, "loss": 1.5066, "step": 4762 }, { "epoch": 0.5533546325878594, "grad_norm": 0.4009332060813904, "learning_rate": 0.0001, "loss": 1.6122, "step": 4763 }, { "epoch": 0.5534708103398199, "grad_norm": 0.41789358854293823, "learning_rate": 0.0001, "loss": 1.6677, "step": 4764 }, { "epoch": 0.5535869880917804, "grad_norm": 0.42972517013549805, "learning_rate": 0.0001, "loss": 1.7597, "step": 4765 }, { "epoch": 0.5537031658437409, "grad_norm": 0.4440890848636627, "learning_rate": 0.0001, "loss": 1.5651, "step": 4766 }, { "epoch": 0.5538193435957014, "grad_norm": 0.43731603026390076, "learning_rate": 0.0001, "loss": 1.7988, "step": 4767 }, { "epoch": 0.5539355213476619, "grad_norm": 0.4374067485332489, "learning_rate": 0.0001, "loss": 1.6098, "step": 4768 }, { "epoch": 0.5540516990996224, "grad_norm": 0.43468907475471497, "learning_rate": 0.0001, "loss": 1.6439, "step": 4769 }, { "epoch": 0.554167876851583, "grad_norm": 0.44447562098503113, "learning_rate": 0.0001, "loss": 1.6284, "step": 4770 }, { "epoch": 0.5542840546035435, "grad_norm": 0.4318912625312805, "learning_rate": 0.0001, "loss": 1.6811, "step": 4771 }, { "epoch": 0.554400232355504, "grad_norm": 0.4088803827762604, "learning_rate": 0.0001, "loss": 1.506, "step": 4772 }, { "epoch": 0.5545164101074644, "grad_norm": 0.45096081495285034, "learning_rate": 0.0001, "loss": 1.6394, "step": 4773 }, { "epoch": 0.5546325878594249, "grad_norm": 0.45685335993766785, "learning_rate": 0.0001, "loss": 1.6431, "step": 4774 }, { "epoch": 0.5547487656113854, "grad_norm": 0.4274502396583557, "learning_rate": 0.0001, "loss": 1.6178, "step": 4775 }, { "epoch": 0.5548649433633459, "grad_norm": 0.3890495300292969, "learning_rate": 0.0001, "loss": 1.5234, "step": 4776 }, { "epoch": 0.5549811211153064, "grad_norm": 0.4626457095146179, "learning_rate": 0.0001, "loss": 1.5708, "step": 4777 }, { "epoch": 0.5550972988672669, "grad_norm": 0.42439907789230347, "learning_rate": 0.0001, "loss": 1.5759, "step": 4778 }, { "epoch": 0.5552134766192274, "grad_norm": 0.45186173915863037, "learning_rate": 0.0001, "loss": 1.7971, "step": 4779 }, { "epoch": 0.5553296543711879, "grad_norm": 0.4208846688270569, "learning_rate": 0.0001, "loss": 1.5664, "step": 4780 }, { "epoch": 0.5554458321231485, "grad_norm": 0.43783605098724365, "learning_rate": 0.0001, "loss": 1.8354, "step": 4781 }, { "epoch": 0.555562009875109, "grad_norm": 0.4037763178348541, "learning_rate": 0.0001, "loss": 1.5226, "step": 4782 }, { "epoch": 0.5556781876270694, "grad_norm": 0.41722458600997925, "learning_rate": 0.0001, "loss": 1.6663, "step": 4783 }, { "epoch": 0.5557943653790299, "grad_norm": 0.4464002549648285, "learning_rate": 0.0001, "loss": 1.6796, "step": 4784 }, { "epoch": 0.5559105431309904, "grad_norm": 0.4327322840690613, "learning_rate": 0.0001, "loss": 1.829, "step": 4785 }, { "epoch": 0.5560267208829509, "grad_norm": 0.4065304100513458, "learning_rate": 0.0001, "loss": 1.6915, "step": 4786 }, { "epoch": 0.5561428986349114, "grad_norm": 0.4391983449459076, "learning_rate": 0.0001, "loss": 1.6593, "step": 4787 }, { "epoch": 0.5562590763868719, "grad_norm": 0.42863357067108154, "learning_rate": 0.0001, "loss": 1.4858, "step": 4788 }, { "epoch": 0.5563752541388324, "grad_norm": 0.40624338388442993, "learning_rate": 0.0001, "loss": 1.4187, "step": 4789 }, { "epoch": 0.5564914318907929, "grad_norm": 0.4419528543949127, "learning_rate": 0.0001, "loss": 1.5129, "step": 4790 }, { "epoch": 0.5566076096427535, "grad_norm": 0.470945805311203, "learning_rate": 0.0001, "loss": 1.8979, "step": 4791 }, { "epoch": 0.556723787394714, "grad_norm": 0.4236770272254944, "learning_rate": 0.0001, "loss": 1.8773, "step": 4792 }, { "epoch": 0.5568399651466744, "grad_norm": 0.426725834608078, "learning_rate": 0.0001, "loss": 1.6101, "step": 4793 }, { "epoch": 0.5569561428986349, "grad_norm": 0.4224294126033783, "learning_rate": 0.0001, "loss": 1.6533, "step": 4794 }, { "epoch": 0.5570723206505954, "grad_norm": 0.413329541683197, "learning_rate": 0.0001, "loss": 1.5878, "step": 4795 }, { "epoch": 0.5571884984025559, "grad_norm": 0.43398913741111755, "learning_rate": 0.0001, "loss": 1.5168, "step": 4796 }, { "epoch": 0.5573046761545164, "grad_norm": 0.4381754696369171, "learning_rate": 0.0001, "loss": 1.7338, "step": 4797 }, { "epoch": 0.5574208539064769, "grad_norm": 0.44960010051727295, "learning_rate": 0.0001, "loss": 1.7337, "step": 4798 }, { "epoch": 0.5575370316584374, "grad_norm": 0.4516027271747589, "learning_rate": 0.0001, "loss": 1.629, "step": 4799 }, { "epoch": 0.5576532094103979, "grad_norm": 0.4343845844268799, "learning_rate": 0.0001, "loss": 1.7357, "step": 4800 }, { "epoch": 0.5577693871623584, "grad_norm": 0.436753511428833, "learning_rate": 0.0001, "loss": 1.6702, "step": 4801 }, { "epoch": 0.557885564914319, "grad_norm": 0.4462500512599945, "learning_rate": 0.0001, "loss": 1.6267, "step": 4802 }, { "epoch": 0.5580017426662794, "grad_norm": 0.396566778421402, "learning_rate": 0.0001, "loss": 1.3311, "step": 4803 }, { "epoch": 0.5581179204182399, "grad_norm": 0.5002578496932983, "learning_rate": 0.0001, "loss": 1.8922, "step": 4804 }, { "epoch": 0.5582340981702004, "grad_norm": 0.43090498447418213, "learning_rate": 0.0001, "loss": 1.7225, "step": 4805 }, { "epoch": 0.5583502759221609, "grad_norm": 0.4082920253276825, "learning_rate": 0.0001, "loss": 1.6582, "step": 4806 }, { "epoch": 0.5584664536741214, "grad_norm": 0.43145760893821716, "learning_rate": 0.0001, "loss": 1.6492, "step": 4807 }, { "epoch": 0.5585826314260819, "grad_norm": 0.3939366638660431, "learning_rate": 0.0001, "loss": 1.5123, "step": 4808 }, { "epoch": 0.5586988091780424, "grad_norm": 0.45775458216667175, "learning_rate": 0.0001, "loss": 1.7803, "step": 4809 }, { "epoch": 0.5588149869300029, "grad_norm": 0.4209611117839813, "learning_rate": 0.0001, "loss": 1.6339, "step": 4810 }, { "epoch": 0.5589311646819634, "grad_norm": 0.4282268285751343, "learning_rate": 0.0001, "loss": 1.6259, "step": 4811 }, { "epoch": 0.559047342433924, "grad_norm": 0.4249371290206909, "learning_rate": 0.0001, "loss": 1.6085, "step": 4812 }, { "epoch": 0.5591635201858844, "grad_norm": 0.4493142068386078, "learning_rate": 0.0001, "loss": 1.5725, "step": 4813 }, { "epoch": 0.5592796979378449, "grad_norm": 0.43762892484664917, "learning_rate": 0.0001, "loss": 1.79, "step": 4814 }, { "epoch": 0.5593958756898054, "grad_norm": 0.4867606461048126, "learning_rate": 0.0001, "loss": 1.786, "step": 4815 }, { "epoch": 0.5595120534417659, "grad_norm": 0.4017328917980194, "learning_rate": 0.0001, "loss": 1.5433, "step": 4816 }, { "epoch": 0.5596282311937264, "grad_norm": 0.4285069704055786, "learning_rate": 0.0001, "loss": 1.7024, "step": 4817 }, { "epoch": 0.5597444089456869, "grad_norm": 0.4313235580921173, "learning_rate": 0.0001, "loss": 1.6348, "step": 4818 }, { "epoch": 0.5598605866976474, "grad_norm": 0.3793412744998932, "learning_rate": 0.0001, "loss": 1.404, "step": 4819 }, { "epoch": 0.5599767644496079, "grad_norm": 0.4507150948047638, "learning_rate": 0.0001, "loss": 1.8942, "step": 4820 }, { "epoch": 0.5600929422015684, "grad_norm": 0.41890159249305725, "learning_rate": 0.0001, "loss": 1.5985, "step": 4821 }, { "epoch": 0.5602091199535288, "grad_norm": 0.4463735818862915, "learning_rate": 0.0001, "loss": 1.7219, "step": 4822 }, { "epoch": 0.5603252977054894, "grad_norm": 0.42564481496810913, "learning_rate": 0.0001, "loss": 1.7087, "step": 4823 }, { "epoch": 0.5604414754574499, "grad_norm": 0.4581405222415924, "learning_rate": 0.0001, "loss": 1.6817, "step": 4824 }, { "epoch": 0.5605576532094104, "grad_norm": 0.39707350730895996, "learning_rate": 0.0001, "loss": 1.51, "step": 4825 }, { "epoch": 0.5606738309613709, "grad_norm": 0.43881773948669434, "learning_rate": 0.0001, "loss": 1.6627, "step": 4826 }, { "epoch": 0.5607900087133314, "grad_norm": 0.4427170157432556, "learning_rate": 0.0001, "loss": 1.7076, "step": 4827 }, { "epoch": 0.5609061864652919, "grad_norm": 0.41310209035873413, "learning_rate": 0.0001, "loss": 1.5741, "step": 4828 }, { "epoch": 0.5610223642172524, "grad_norm": 0.4155448079109192, "learning_rate": 0.0001, "loss": 1.5655, "step": 4829 }, { "epoch": 0.5611385419692129, "grad_norm": 0.42745399475097656, "learning_rate": 0.0001, "loss": 1.55, "step": 4830 }, { "epoch": 0.5612547197211734, "grad_norm": 0.39938148856163025, "learning_rate": 0.0001, "loss": 1.548, "step": 4831 }, { "epoch": 0.5613708974731338, "grad_norm": 0.4226715862751007, "learning_rate": 0.0001, "loss": 1.6005, "step": 4832 }, { "epoch": 0.5614870752250944, "grad_norm": 0.4088357985019684, "learning_rate": 0.0001, "loss": 1.5315, "step": 4833 }, { "epoch": 0.5616032529770549, "grad_norm": 0.41541755199432373, "learning_rate": 0.0001, "loss": 1.5913, "step": 4834 }, { "epoch": 0.5617194307290154, "grad_norm": 0.3974235951900482, "learning_rate": 0.0001, "loss": 1.523, "step": 4835 }, { "epoch": 0.5618356084809759, "grad_norm": 0.42991939187049866, "learning_rate": 0.0001, "loss": 1.495, "step": 4836 }, { "epoch": 0.5619517862329364, "grad_norm": 0.43215852975845337, "learning_rate": 0.0001, "loss": 1.6715, "step": 4837 }, { "epoch": 0.5620679639848969, "grad_norm": 0.42073243856430054, "learning_rate": 0.0001, "loss": 1.7004, "step": 4838 }, { "epoch": 0.5621841417368574, "grad_norm": 0.40232616662979126, "learning_rate": 0.0001, "loss": 1.5684, "step": 4839 }, { "epoch": 0.5623003194888179, "grad_norm": 0.4098654091358185, "learning_rate": 0.0001, "loss": 1.6134, "step": 4840 }, { "epoch": 0.5624164972407784, "grad_norm": 0.44964614510536194, "learning_rate": 0.0001, "loss": 1.8357, "step": 4841 }, { "epoch": 0.5625326749927388, "grad_norm": 0.45787712931632996, "learning_rate": 0.0001, "loss": 1.6967, "step": 4842 }, { "epoch": 0.5626488527446993, "grad_norm": 0.41997238993644714, "learning_rate": 0.0001, "loss": 1.6079, "step": 4843 }, { "epoch": 0.5627650304966599, "grad_norm": 0.4027771055698395, "learning_rate": 0.0001, "loss": 1.6138, "step": 4844 }, { "epoch": 0.5628812082486204, "grad_norm": 0.3900171220302582, "learning_rate": 0.0001, "loss": 1.5524, "step": 4845 }, { "epoch": 0.5629973860005809, "grad_norm": 0.4267227053642273, "learning_rate": 0.0001, "loss": 1.5865, "step": 4846 }, { "epoch": 0.5631135637525414, "grad_norm": 0.42500099539756775, "learning_rate": 0.0001, "loss": 1.6397, "step": 4847 }, { "epoch": 0.5632297415045019, "grad_norm": 0.40846553444862366, "learning_rate": 0.0001, "loss": 1.6244, "step": 4848 }, { "epoch": 0.5633459192564624, "grad_norm": 0.43166103959083557, "learning_rate": 0.0001, "loss": 1.6879, "step": 4849 }, { "epoch": 0.5634620970084229, "grad_norm": 0.41474416851997375, "learning_rate": 0.0001, "loss": 1.7393, "step": 4850 }, { "epoch": 0.5635782747603834, "grad_norm": 0.4628075361251831, "learning_rate": 0.0001, "loss": 1.6299, "step": 4851 }, { "epoch": 0.5636944525123438, "grad_norm": 0.48346492648124695, "learning_rate": 0.0001, "loss": 1.6858, "step": 4852 }, { "epoch": 0.5638106302643043, "grad_norm": 0.4645657241344452, "learning_rate": 0.0001, "loss": 1.7263, "step": 4853 }, { "epoch": 0.5639268080162649, "grad_norm": 0.4429479241371155, "learning_rate": 0.0001, "loss": 1.7976, "step": 4854 }, { "epoch": 0.5640429857682254, "grad_norm": 0.4967527389526367, "learning_rate": 0.0001, "loss": 1.8657, "step": 4855 }, { "epoch": 0.5641591635201859, "grad_norm": 0.45798930525779724, "learning_rate": 0.0001, "loss": 1.6908, "step": 4856 }, { "epoch": 0.5642753412721464, "grad_norm": 0.4299752116203308, "learning_rate": 0.0001, "loss": 1.7577, "step": 4857 }, { "epoch": 0.5643915190241069, "grad_norm": 0.4171035885810852, "learning_rate": 0.0001, "loss": 1.5997, "step": 4858 }, { "epoch": 0.5645076967760674, "grad_norm": 0.45656847953796387, "learning_rate": 0.0001, "loss": 1.6746, "step": 4859 }, { "epoch": 0.5646238745280279, "grad_norm": 0.4058765172958374, "learning_rate": 0.0001, "loss": 1.6135, "step": 4860 }, { "epoch": 0.5647400522799884, "grad_norm": 0.44879722595214844, "learning_rate": 0.0001, "loss": 1.663, "step": 4861 }, { "epoch": 0.5648562300319488, "grad_norm": 0.4731845259666443, "learning_rate": 0.0001, "loss": 1.713, "step": 4862 }, { "epoch": 0.5649724077839093, "grad_norm": 0.42394986748695374, "learning_rate": 0.0001, "loss": 1.6365, "step": 4863 }, { "epoch": 0.5650885855358699, "grad_norm": 0.4450361728668213, "learning_rate": 0.0001, "loss": 1.626, "step": 4864 }, { "epoch": 0.5652047632878304, "grad_norm": 0.3994229733943939, "learning_rate": 0.0001, "loss": 1.5822, "step": 4865 }, { "epoch": 0.5653209410397909, "grad_norm": 0.40167900919914246, "learning_rate": 0.0001, "loss": 1.6394, "step": 4866 }, { "epoch": 0.5654371187917514, "grad_norm": 0.409967839717865, "learning_rate": 0.0001, "loss": 1.5287, "step": 4867 }, { "epoch": 0.5655532965437119, "grad_norm": 0.4286467432975769, "learning_rate": 0.0001, "loss": 1.7396, "step": 4868 }, { "epoch": 0.5656694742956724, "grad_norm": 0.4050009250640869, "learning_rate": 0.0001, "loss": 1.5667, "step": 4869 }, { "epoch": 0.5657856520476329, "grad_norm": 0.45172083377838135, "learning_rate": 0.0001, "loss": 1.6246, "step": 4870 }, { "epoch": 0.5659018297995934, "grad_norm": 0.45170027017593384, "learning_rate": 0.0001, "loss": 1.6732, "step": 4871 }, { "epoch": 0.5660180075515538, "grad_norm": 0.43326306343078613, "learning_rate": 0.0001, "loss": 1.6344, "step": 4872 }, { "epoch": 0.5661341853035143, "grad_norm": 0.4260367453098297, "learning_rate": 0.0001, "loss": 1.7098, "step": 4873 }, { "epoch": 0.5662503630554748, "grad_norm": 0.4030110836029053, "learning_rate": 0.0001, "loss": 1.5083, "step": 4874 }, { "epoch": 0.5663665408074354, "grad_norm": 0.4576740860939026, "learning_rate": 0.0001, "loss": 1.5193, "step": 4875 }, { "epoch": 0.5664827185593959, "grad_norm": 0.4269350469112396, "learning_rate": 0.0001, "loss": 1.6734, "step": 4876 }, { "epoch": 0.5665988963113564, "grad_norm": 0.3865914046764374, "learning_rate": 0.0001, "loss": 1.4036, "step": 4877 }, { "epoch": 0.5667150740633169, "grad_norm": 0.4161985218524933, "learning_rate": 0.0001, "loss": 1.6248, "step": 4878 }, { "epoch": 0.5668312518152774, "grad_norm": 0.4495078921318054, "learning_rate": 0.0001, "loss": 1.5718, "step": 4879 }, { "epoch": 0.5669474295672379, "grad_norm": 0.40627461671829224, "learning_rate": 0.0001, "loss": 1.5743, "step": 4880 }, { "epoch": 0.5670636073191984, "grad_norm": 0.3978639543056488, "learning_rate": 0.0001, "loss": 1.5106, "step": 4881 }, { "epoch": 0.5671797850711588, "grad_norm": 0.4051436185836792, "learning_rate": 0.0001, "loss": 1.5717, "step": 4882 }, { "epoch": 0.5672959628231193, "grad_norm": 0.44826844334602356, "learning_rate": 0.0001, "loss": 1.6458, "step": 4883 }, { "epoch": 0.5674121405750798, "grad_norm": 0.3976137340068817, "learning_rate": 0.0001, "loss": 1.6192, "step": 4884 }, { "epoch": 0.5675283183270404, "grad_norm": 0.39482155442237854, "learning_rate": 0.0001, "loss": 1.4746, "step": 4885 }, { "epoch": 0.5676444960790009, "grad_norm": 0.43112707138061523, "learning_rate": 0.0001, "loss": 1.697, "step": 4886 }, { "epoch": 0.5677606738309614, "grad_norm": 0.45522767305374146, "learning_rate": 0.0001, "loss": 1.5977, "step": 4887 }, { "epoch": 0.5678768515829219, "grad_norm": 0.425699919462204, "learning_rate": 0.0001, "loss": 1.7145, "step": 4888 }, { "epoch": 0.5679930293348824, "grad_norm": 0.4110868573188782, "learning_rate": 0.0001, "loss": 1.735, "step": 4889 }, { "epoch": 0.5681092070868429, "grad_norm": 0.4198397994041443, "learning_rate": 0.0001, "loss": 1.542, "step": 4890 }, { "epoch": 0.5682253848388034, "grad_norm": 0.41459405422210693, "learning_rate": 0.0001, "loss": 1.6464, "step": 4891 }, { "epoch": 0.5683415625907638, "grad_norm": 0.4587966501712799, "learning_rate": 0.0001, "loss": 1.7476, "step": 4892 }, { "epoch": 0.5684577403427243, "grad_norm": 0.4369155466556549, "learning_rate": 0.0001, "loss": 1.5929, "step": 4893 }, { "epoch": 0.5685739180946848, "grad_norm": 0.4800203740596771, "learning_rate": 0.0001, "loss": 1.7202, "step": 4894 }, { "epoch": 0.5686900958466453, "grad_norm": 0.4436158835887909, "learning_rate": 0.0001, "loss": 1.6576, "step": 4895 }, { "epoch": 0.5688062735986059, "grad_norm": 0.4184040129184723, "learning_rate": 0.0001, "loss": 1.5964, "step": 4896 }, { "epoch": 0.5689224513505664, "grad_norm": 0.3854430913925171, "learning_rate": 0.0001, "loss": 1.5371, "step": 4897 }, { "epoch": 0.5690386291025269, "grad_norm": 0.4409714639186859, "learning_rate": 0.0001, "loss": 1.7056, "step": 4898 }, { "epoch": 0.5691548068544874, "grad_norm": 0.4433159828186035, "learning_rate": 0.0001, "loss": 1.7323, "step": 4899 }, { "epoch": 0.5692709846064479, "grad_norm": 0.4146018326282501, "learning_rate": 0.0001, "loss": 1.6415, "step": 4900 }, { "epoch": 0.5693871623584084, "grad_norm": 0.45392921566963196, "learning_rate": 0.0001, "loss": 1.5877, "step": 4901 }, { "epoch": 0.5695033401103688, "grad_norm": 0.42398884892463684, "learning_rate": 0.0001, "loss": 1.6091, "step": 4902 }, { "epoch": 0.5696195178623293, "grad_norm": 0.478370726108551, "learning_rate": 0.0001, "loss": 1.7515, "step": 4903 }, { "epoch": 0.5697356956142898, "grad_norm": 0.4152291417121887, "learning_rate": 0.0001, "loss": 1.5101, "step": 4904 }, { "epoch": 0.5698518733662503, "grad_norm": 0.4159678816795349, "learning_rate": 0.0001, "loss": 1.5687, "step": 4905 }, { "epoch": 0.5699680511182109, "grad_norm": 0.40162405371665955, "learning_rate": 0.0001, "loss": 1.6491, "step": 4906 }, { "epoch": 0.5700842288701714, "grad_norm": 0.44275495409965515, "learning_rate": 0.0001, "loss": 1.6955, "step": 4907 }, { "epoch": 0.5702004066221319, "grad_norm": 0.43401822447776794, "learning_rate": 0.0001, "loss": 1.6592, "step": 4908 }, { "epoch": 0.5703165843740924, "grad_norm": 0.40605849027633667, "learning_rate": 0.0001, "loss": 1.552, "step": 4909 }, { "epoch": 0.5704327621260529, "grad_norm": 0.4131391644477844, "learning_rate": 0.0001, "loss": 1.6183, "step": 4910 }, { "epoch": 0.5705489398780134, "grad_norm": 0.4179399311542511, "learning_rate": 0.0001, "loss": 1.5991, "step": 4911 }, { "epoch": 0.5706651176299739, "grad_norm": 0.4298674464225769, "learning_rate": 0.0001, "loss": 1.5691, "step": 4912 }, { "epoch": 0.5707812953819343, "grad_norm": 0.4349467158317566, "learning_rate": 0.0001, "loss": 1.6335, "step": 4913 }, { "epoch": 0.5708974731338948, "grad_norm": 0.4512692987918854, "learning_rate": 0.0001, "loss": 1.6647, "step": 4914 }, { "epoch": 0.5710136508858553, "grad_norm": 0.45529913902282715, "learning_rate": 0.0001, "loss": 1.6938, "step": 4915 }, { "epoch": 0.5711298286378158, "grad_norm": 0.4262561798095703, "learning_rate": 0.0001, "loss": 1.493, "step": 4916 }, { "epoch": 0.5712460063897764, "grad_norm": 0.40773990750312805, "learning_rate": 0.0001, "loss": 1.5212, "step": 4917 }, { "epoch": 0.5713621841417369, "grad_norm": 0.43575015664100647, "learning_rate": 0.0001, "loss": 1.7376, "step": 4918 }, { "epoch": 0.5714783618936974, "grad_norm": 0.4189346730709076, "learning_rate": 0.0001, "loss": 1.6159, "step": 4919 }, { "epoch": 0.5715945396456579, "grad_norm": 0.4791758954524994, "learning_rate": 0.0001, "loss": 1.7816, "step": 4920 }, { "epoch": 0.5717107173976184, "grad_norm": 0.42903390526771545, "learning_rate": 0.0001, "loss": 1.6155, "step": 4921 }, { "epoch": 0.5718268951495789, "grad_norm": 0.4235144555568695, "learning_rate": 0.0001, "loss": 1.5611, "step": 4922 }, { "epoch": 0.5719430729015393, "grad_norm": 0.4023747742176056, "learning_rate": 0.0001, "loss": 1.653, "step": 4923 }, { "epoch": 0.5720592506534998, "grad_norm": 0.44118964672088623, "learning_rate": 0.0001, "loss": 1.7021, "step": 4924 }, { "epoch": 0.5721754284054603, "grad_norm": 0.4274747669696808, "learning_rate": 0.0001, "loss": 1.5897, "step": 4925 }, { "epoch": 0.5722916061574208, "grad_norm": 0.41285374760627747, "learning_rate": 0.0001, "loss": 1.4082, "step": 4926 }, { "epoch": 0.5724077839093814, "grad_norm": 0.4146759510040283, "learning_rate": 0.0001, "loss": 1.4328, "step": 4927 }, { "epoch": 0.5725239616613419, "grad_norm": 0.41832008957862854, "learning_rate": 0.0001, "loss": 1.5344, "step": 4928 }, { "epoch": 0.5726401394133024, "grad_norm": 0.4418119192123413, "learning_rate": 0.0001, "loss": 1.7025, "step": 4929 }, { "epoch": 0.5727563171652629, "grad_norm": 0.3945986032485962, "learning_rate": 0.0001, "loss": 1.6492, "step": 4930 }, { "epoch": 0.5728724949172234, "grad_norm": 0.44057559967041016, "learning_rate": 0.0001, "loss": 1.7055, "step": 4931 }, { "epoch": 0.5729886726691839, "grad_norm": 0.4348897933959961, "learning_rate": 0.0001, "loss": 1.5898, "step": 4932 }, { "epoch": 0.5731048504211443, "grad_norm": 0.431241512298584, "learning_rate": 0.0001, "loss": 1.7089, "step": 4933 }, { "epoch": 0.5732210281731048, "grad_norm": 0.4173518419265747, "learning_rate": 0.0001, "loss": 1.6529, "step": 4934 }, { "epoch": 0.5733372059250653, "grad_norm": 0.4776740074157715, "learning_rate": 0.0001, "loss": 1.6631, "step": 4935 }, { "epoch": 0.5734533836770258, "grad_norm": 0.449663907289505, "learning_rate": 0.0001, "loss": 1.6217, "step": 4936 }, { "epoch": 0.5735695614289863, "grad_norm": 0.4311505854129791, "learning_rate": 0.0001, "loss": 1.5797, "step": 4937 }, { "epoch": 0.5736857391809469, "grad_norm": 0.4156731963157654, "learning_rate": 0.0001, "loss": 1.8089, "step": 4938 }, { "epoch": 0.5738019169329074, "grad_norm": 0.4211094379425049, "learning_rate": 0.0001, "loss": 1.4911, "step": 4939 }, { "epoch": 0.5739180946848679, "grad_norm": 0.4334612488746643, "learning_rate": 0.0001, "loss": 1.6352, "step": 4940 }, { "epoch": 0.5740342724368284, "grad_norm": 0.43656647205352783, "learning_rate": 0.0001, "loss": 1.7283, "step": 4941 }, { "epoch": 0.5741504501887889, "grad_norm": 0.3772907555103302, "learning_rate": 0.0001, "loss": 1.4156, "step": 4942 }, { "epoch": 0.5742666279407493, "grad_norm": 0.4248020350933075, "learning_rate": 0.0001, "loss": 1.5932, "step": 4943 }, { "epoch": 0.5743828056927098, "grad_norm": 0.4416458010673523, "learning_rate": 0.0001, "loss": 1.562, "step": 4944 }, { "epoch": 0.5744989834446703, "grad_norm": 0.43282851576805115, "learning_rate": 0.0001, "loss": 1.7414, "step": 4945 }, { "epoch": 0.5746151611966308, "grad_norm": 0.4210634231567383, "learning_rate": 0.0001, "loss": 1.4285, "step": 4946 }, { "epoch": 0.5747313389485913, "grad_norm": 0.4776458740234375, "learning_rate": 0.0001, "loss": 1.7215, "step": 4947 }, { "epoch": 0.5748475167005519, "grad_norm": 0.4351522624492645, "learning_rate": 0.0001, "loss": 1.5761, "step": 4948 }, { "epoch": 0.5749636944525124, "grad_norm": 0.4363928437232971, "learning_rate": 0.0001, "loss": 1.6422, "step": 4949 }, { "epoch": 0.5750798722044729, "grad_norm": 0.4042133390903473, "learning_rate": 0.0001, "loss": 1.5203, "step": 4950 }, { "epoch": 0.5751960499564334, "grad_norm": 0.4090398848056793, "learning_rate": 0.0001, "loss": 1.3931, "step": 4951 }, { "epoch": 0.5753122277083939, "grad_norm": 0.4363382160663605, "learning_rate": 0.0001, "loss": 1.6114, "step": 4952 }, { "epoch": 0.5754284054603543, "grad_norm": 0.43779557943344116, "learning_rate": 0.0001, "loss": 1.657, "step": 4953 }, { "epoch": 0.5755445832123148, "grad_norm": 0.4067518413066864, "learning_rate": 0.0001, "loss": 1.5868, "step": 4954 }, { "epoch": 0.5756607609642753, "grad_norm": 0.45167723298072815, "learning_rate": 0.0001, "loss": 1.726, "step": 4955 }, { "epoch": 0.5757769387162358, "grad_norm": 0.4339980185031891, "learning_rate": 0.0001, "loss": 1.7253, "step": 4956 }, { "epoch": 0.5758931164681963, "grad_norm": 0.41764840483665466, "learning_rate": 0.0001, "loss": 1.636, "step": 4957 }, { "epoch": 0.5760092942201568, "grad_norm": 0.4416648745536804, "learning_rate": 0.0001, "loss": 1.6458, "step": 4958 }, { "epoch": 0.5761254719721174, "grad_norm": 0.42781156301498413, "learning_rate": 0.0001, "loss": 1.6848, "step": 4959 }, { "epoch": 0.5762416497240779, "grad_norm": 0.44320225715637207, "learning_rate": 0.0001, "loss": 1.686, "step": 4960 }, { "epoch": 0.5763578274760384, "grad_norm": 0.3853711485862732, "learning_rate": 0.0001, "loss": 1.4755, "step": 4961 }, { "epoch": 0.5764740052279989, "grad_norm": 0.3990086317062378, "learning_rate": 0.0001, "loss": 1.5151, "step": 4962 }, { "epoch": 0.5765901829799593, "grad_norm": 0.41801634430885315, "learning_rate": 0.0001, "loss": 1.418, "step": 4963 }, { "epoch": 0.5767063607319198, "grad_norm": 0.4076370596885681, "learning_rate": 0.0001, "loss": 1.5114, "step": 4964 }, { "epoch": 0.5768225384838803, "grad_norm": 0.4171597361564636, "learning_rate": 0.0001, "loss": 1.5458, "step": 4965 }, { "epoch": 0.5769387162358408, "grad_norm": 0.4075530171394348, "learning_rate": 0.0001, "loss": 1.58, "step": 4966 }, { "epoch": 0.5770548939878013, "grad_norm": 0.4294959008693695, "learning_rate": 0.0001, "loss": 1.7531, "step": 4967 }, { "epoch": 0.5771710717397618, "grad_norm": 0.4060986340045929, "learning_rate": 0.0001, "loss": 1.5989, "step": 4968 }, { "epoch": 0.5772872494917224, "grad_norm": 0.43765029311180115, "learning_rate": 0.0001, "loss": 1.5315, "step": 4969 }, { "epoch": 0.5774034272436829, "grad_norm": 0.4338558316230774, "learning_rate": 0.0001, "loss": 1.5691, "step": 4970 }, { "epoch": 0.5775196049956434, "grad_norm": 0.42239588499069214, "learning_rate": 0.0001, "loss": 1.64, "step": 4971 }, { "epoch": 0.5776357827476039, "grad_norm": 0.45466890931129456, "learning_rate": 0.0001, "loss": 1.6055, "step": 4972 }, { "epoch": 0.5777519604995643, "grad_norm": 0.43936869502067566, "learning_rate": 0.0001, "loss": 1.6832, "step": 4973 }, { "epoch": 0.5778681382515248, "grad_norm": 0.4495198130607605, "learning_rate": 0.0001, "loss": 1.6946, "step": 4974 }, { "epoch": 0.5779843160034853, "grad_norm": 0.41554975509643555, "learning_rate": 0.0001, "loss": 1.5771, "step": 4975 }, { "epoch": 0.5781004937554458, "grad_norm": 0.3993794620037079, "learning_rate": 0.0001, "loss": 1.4377, "step": 4976 }, { "epoch": 0.5782166715074063, "grad_norm": 0.4077496826648712, "learning_rate": 0.0001, "loss": 1.6203, "step": 4977 }, { "epoch": 0.5783328492593668, "grad_norm": 0.45379477739334106, "learning_rate": 0.0001, "loss": 1.7378, "step": 4978 }, { "epoch": 0.5784490270113273, "grad_norm": 0.4157100319862366, "learning_rate": 0.0001, "loss": 1.6734, "step": 4979 }, { "epoch": 0.5785652047632879, "grad_norm": 0.4228839874267578, "learning_rate": 0.0001, "loss": 1.6136, "step": 4980 }, { "epoch": 0.5786813825152484, "grad_norm": 0.43401455879211426, "learning_rate": 0.0001, "loss": 1.7284, "step": 4981 }, { "epoch": 0.5787975602672089, "grad_norm": 0.43419334292411804, "learning_rate": 0.0001, "loss": 1.5411, "step": 4982 }, { "epoch": 0.5789137380191693, "grad_norm": 0.4071895182132721, "learning_rate": 0.0001, "loss": 1.5022, "step": 4983 }, { "epoch": 0.5790299157711298, "grad_norm": 0.3975391983985901, "learning_rate": 0.0001, "loss": 1.4657, "step": 4984 }, { "epoch": 0.5791460935230903, "grad_norm": 0.4305202066898346, "learning_rate": 0.0001, "loss": 1.6816, "step": 4985 }, { "epoch": 0.5792622712750508, "grad_norm": 0.47176504135131836, "learning_rate": 0.0001, "loss": 1.8323, "step": 4986 }, { "epoch": 0.5793784490270113, "grad_norm": 0.45612940192222595, "learning_rate": 0.0001, "loss": 1.6724, "step": 4987 }, { "epoch": 0.5794946267789718, "grad_norm": 0.4044199585914612, "learning_rate": 0.0001, "loss": 1.5166, "step": 4988 }, { "epoch": 0.5796108045309323, "grad_norm": 0.42781221866607666, "learning_rate": 0.0001, "loss": 1.5931, "step": 4989 }, { "epoch": 0.5797269822828929, "grad_norm": 0.4110262989997864, "learning_rate": 0.0001, "loss": 1.5422, "step": 4990 }, { "epoch": 0.5798431600348534, "grad_norm": 0.43411797285079956, "learning_rate": 0.0001, "loss": 1.6164, "step": 4991 }, { "epoch": 0.5799593377868139, "grad_norm": 0.4334775507450104, "learning_rate": 0.0001, "loss": 1.5883, "step": 4992 }, { "epoch": 0.5800755155387743, "grad_norm": 0.40531855821609497, "learning_rate": 0.0001, "loss": 1.606, "step": 4993 }, { "epoch": 0.5801916932907348, "grad_norm": 0.41528868675231934, "learning_rate": 0.0001, "loss": 1.5316, "step": 4994 }, { "epoch": 0.5803078710426953, "grad_norm": 0.44848141074180603, "learning_rate": 0.0001, "loss": 1.6933, "step": 4995 }, { "epoch": 0.5804240487946558, "grad_norm": 0.4627237021923065, "learning_rate": 0.0001, "loss": 1.6469, "step": 4996 }, { "epoch": 0.5805402265466163, "grad_norm": 0.4355679154396057, "learning_rate": 0.0001, "loss": 1.656, "step": 4997 }, { "epoch": 0.5806564042985768, "grad_norm": 0.4059557318687439, "learning_rate": 0.0001, "loss": 1.533, "step": 4998 }, { "epoch": 0.5807725820505373, "grad_norm": 0.4451749324798584, "learning_rate": 0.0001, "loss": 1.8021, "step": 4999 }, { "epoch": 0.5808887598024978, "grad_norm": 0.47867774963378906, "learning_rate": 0.0001, "loss": 1.7213, "step": 5000 }, { "epoch": 0.5810049375544584, "grad_norm": 0.4432399570941925, "learning_rate": 0.0001, "loss": 1.5957, "step": 5001 }, { "epoch": 0.5811211153064189, "grad_norm": 0.40032443404197693, "learning_rate": 0.0001, "loss": 1.6074, "step": 5002 }, { "epoch": 0.5812372930583793, "grad_norm": 0.4400497376918793, "learning_rate": 0.0001, "loss": 1.7494, "step": 5003 }, { "epoch": 0.5813534708103398, "grad_norm": 0.410427451133728, "learning_rate": 0.0001, "loss": 1.6003, "step": 5004 }, { "epoch": 0.5814696485623003, "grad_norm": 0.3894704580307007, "learning_rate": 0.0001, "loss": 1.6396, "step": 5005 }, { "epoch": 0.5815858263142608, "grad_norm": 0.4130994975566864, "learning_rate": 0.0001, "loss": 1.5854, "step": 5006 }, { "epoch": 0.5817020040662213, "grad_norm": 0.4316181242465973, "learning_rate": 0.0001, "loss": 1.6324, "step": 5007 }, { "epoch": 0.5818181818181818, "grad_norm": 0.43336644768714905, "learning_rate": 0.0001, "loss": 1.5665, "step": 5008 }, { "epoch": 0.5819343595701423, "grad_norm": 0.41586264967918396, "learning_rate": 0.0001, "loss": 1.6583, "step": 5009 }, { "epoch": 0.5820505373221028, "grad_norm": 0.398639053106308, "learning_rate": 0.0001, "loss": 1.6328, "step": 5010 }, { "epoch": 0.5821667150740634, "grad_norm": 0.42765846848487854, "learning_rate": 0.0001, "loss": 1.7781, "step": 5011 }, { "epoch": 0.5822828928260239, "grad_norm": 0.420610249042511, "learning_rate": 0.0001, "loss": 1.6591, "step": 5012 }, { "epoch": 0.5823990705779843, "grad_norm": 0.43733763694763184, "learning_rate": 0.0001, "loss": 1.709, "step": 5013 }, { "epoch": 0.5825152483299448, "grad_norm": 0.46470460295677185, "learning_rate": 0.0001, "loss": 1.6778, "step": 5014 }, { "epoch": 0.5826314260819053, "grad_norm": 0.43464747071266174, "learning_rate": 0.0001, "loss": 1.5551, "step": 5015 }, { "epoch": 0.5827476038338658, "grad_norm": 0.42087873816490173, "learning_rate": 0.0001, "loss": 1.6084, "step": 5016 }, { "epoch": 0.5828637815858263, "grad_norm": 0.4221756160259247, "learning_rate": 0.0001, "loss": 1.5856, "step": 5017 }, { "epoch": 0.5829799593377868, "grad_norm": 0.4236156940460205, "learning_rate": 0.0001, "loss": 1.6823, "step": 5018 }, { "epoch": 0.5830961370897473, "grad_norm": 0.39433997869491577, "learning_rate": 0.0001, "loss": 1.4167, "step": 5019 }, { "epoch": 0.5832123148417078, "grad_norm": 0.4237521290779114, "learning_rate": 0.0001, "loss": 1.6079, "step": 5020 }, { "epoch": 0.5833284925936683, "grad_norm": 0.44210362434387207, "learning_rate": 0.0001, "loss": 1.7102, "step": 5021 }, { "epoch": 0.5834446703456289, "grad_norm": 0.4477832317352295, "learning_rate": 0.0001, "loss": 1.7188, "step": 5022 }, { "epoch": 0.5835608480975893, "grad_norm": 0.39492443203926086, "learning_rate": 0.0001, "loss": 1.3972, "step": 5023 }, { "epoch": 0.5836770258495498, "grad_norm": 0.42422086000442505, "learning_rate": 0.0001, "loss": 1.6085, "step": 5024 }, { "epoch": 0.5837932036015103, "grad_norm": 0.4569999575614929, "learning_rate": 0.0001, "loss": 1.6428, "step": 5025 }, { "epoch": 0.5839093813534708, "grad_norm": 0.38789770007133484, "learning_rate": 0.0001, "loss": 1.4186, "step": 5026 }, { "epoch": 0.5840255591054313, "grad_norm": 0.4335310161113739, "learning_rate": 0.0001, "loss": 1.4464, "step": 5027 }, { "epoch": 0.5841417368573918, "grad_norm": 0.4649960398674011, "learning_rate": 0.0001, "loss": 1.7433, "step": 5028 }, { "epoch": 0.5842579146093523, "grad_norm": 0.41210901737213135, "learning_rate": 0.0001, "loss": 1.5121, "step": 5029 }, { "epoch": 0.5843740923613128, "grad_norm": 0.4519018232822418, "learning_rate": 0.0001, "loss": 1.6782, "step": 5030 }, { "epoch": 0.5844902701132733, "grad_norm": 0.45829200744628906, "learning_rate": 0.0001, "loss": 1.5862, "step": 5031 }, { "epoch": 0.5846064478652339, "grad_norm": 0.4412051737308502, "learning_rate": 0.0001, "loss": 1.6317, "step": 5032 }, { "epoch": 0.5847226256171943, "grad_norm": 0.4298493266105652, "learning_rate": 0.0001, "loss": 1.71, "step": 5033 }, { "epoch": 0.5848388033691548, "grad_norm": 0.47564446926116943, "learning_rate": 0.0001, "loss": 1.7355, "step": 5034 }, { "epoch": 0.5849549811211153, "grad_norm": 0.4598512053489685, "learning_rate": 0.0001, "loss": 1.6587, "step": 5035 }, { "epoch": 0.5850711588730758, "grad_norm": 0.4496704041957855, "learning_rate": 0.0001, "loss": 1.4895, "step": 5036 }, { "epoch": 0.5851873366250363, "grad_norm": 0.4410554766654968, "learning_rate": 0.0001, "loss": 1.626, "step": 5037 }, { "epoch": 0.5853035143769968, "grad_norm": 0.4468580484390259, "learning_rate": 0.0001, "loss": 1.7235, "step": 5038 }, { "epoch": 0.5854196921289573, "grad_norm": 0.41189512610435486, "learning_rate": 0.0001, "loss": 1.5451, "step": 5039 }, { "epoch": 0.5855358698809178, "grad_norm": 0.38999536633491516, "learning_rate": 0.0001, "loss": 1.5278, "step": 5040 }, { "epoch": 0.5856520476328783, "grad_norm": 0.40551015734672546, "learning_rate": 0.0001, "loss": 1.4846, "step": 5041 }, { "epoch": 0.5857682253848389, "grad_norm": 0.4540075957775116, "learning_rate": 0.0001, "loss": 1.7988, "step": 5042 }, { "epoch": 0.5858844031367993, "grad_norm": 0.4358082115650177, "learning_rate": 0.0001, "loss": 1.6716, "step": 5043 }, { "epoch": 0.5860005808887598, "grad_norm": 0.42553818225860596, "learning_rate": 0.0001, "loss": 1.5983, "step": 5044 }, { "epoch": 0.5861167586407203, "grad_norm": 0.4158061444759369, "learning_rate": 0.0001, "loss": 1.5391, "step": 5045 }, { "epoch": 0.5862329363926808, "grad_norm": 0.4467496871948242, "learning_rate": 0.0001, "loss": 1.79, "step": 5046 }, { "epoch": 0.5863491141446413, "grad_norm": 0.4339059293270111, "learning_rate": 0.0001, "loss": 1.6562, "step": 5047 }, { "epoch": 0.5864652918966018, "grad_norm": 0.4463905096054077, "learning_rate": 0.0001, "loss": 1.755, "step": 5048 }, { "epoch": 0.5865814696485623, "grad_norm": 0.4336758553981781, "learning_rate": 0.0001, "loss": 1.7042, "step": 5049 }, { "epoch": 0.5866976474005228, "grad_norm": 0.3911759555339813, "learning_rate": 0.0001, "loss": 1.5634, "step": 5050 }, { "epoch": 0.5868138251524833, "grad_norm": 0.43406540155410767, "learning_rate": 0.0001, "loss": 1.5955, "step": 5051 }, { "epoch": 0.5869300029044437, "grad_norm": 0.40602606534957886, "learning_rate": 0.0001, "loss": 1.5095, "step": 5052 }, { "epoch": 0.5870461806564043, "grad_norm": 0.40257728099823, "learning_rate": 0.0001, "loss": 1.659, "step": 5053 }, { "epoch": 0.5871623584083648, "grad_norm": 0.42640700936317444, "learning_rate": 0.0001, "loss": 1.6454, "step": 5054 }, { "epoch": 0.5872785361603253, "grad_norm": 0.4607278108596802, "learning_rate": 0.0001, "loss": 1.6632, "step": 5055 }, { "epoch": 0.5873947139122858, "grad_norm": 0.456226110458374, "learning_rate": 0.0001, "loss": 1.7054, "step": 5056 }, { "epoch": 0.5875108916642463, "grad_norm": 0.44643908739089966, "learning_rate": 0.0001, "loss": 1.6056, "step": 5057 }, { "epoch": 0.5876270694162068, "grad_norm": 0.4707536995410919, "learning_rate": 0.0001, "loss": 1.7986, "step": 5058 }, { "epoch": 0.5877432471681673, "grad_norm": 0.437148779630661, "learning_rate": 0.0001, "loss": 1.6663, "step": 5059 }, { "epoch": 0.5878594249201278, "grad_norm": 0.4139115810394287, "learning_rate": 0.0001, "loss": 1.6089, "step": 5060 }, { "epoch": 0.5879756026720883, "grad_norm": 0.44146662950515747, "learning_rate": 0.0001, "loss": 1.6923, "step": 5061 }, { "epoch": 0.5880917804240487, "grad_norm": 0.41695713996887207, "learning_rate": 0.0001, "loss": 1.6376, "step": 5062 }, { "epoch": 0.5882079581760093, "grad_norm": 0.45756304264068604, "learning_rate": 0.0001, "loss": 1.622, "step": 5063 }, { "epoch": 0.5883241359279698, "grad_norm": 0.4567687213420868, "learning_rate": 0.0001, "loss": 1.687, "step": 5064 }, { "epoch": 0.5884403136799303, "grad_norm": 0.43768954277038574, "learning_rate": 0.0001, "loss": 1.6733, "step": 5065 }, { "epoch": 0.5885564914318908, "grad_norm": 0.4621203541755676, "learning_rate": 0.0001, "loss": 1.7541, "step": 5066 }, { "epoch": 0.5886726691838513, "grad_norm": 0.420998215675354, "learning_rate": 0.0001, "loss": 1.601, "step": 5067 }, { "epoch": 0.5887888469358118, "grad_norm": 0.45057764649391174, "learning_rate": 0.0001, "loss": 1.763, "step": 5068 }, { "epoch": 0.5889050246877723, "grad_norm": 0.47149190306663513, "learning_rate": 0.0001, "loss": 1.8066, "step": 5069 }, { "epoch": 0.5890212024397328, "grad_norm": 0.4899585545063019, "learning_rate": 0.0001, "loss": 1.7134, "step": 5070 }, { "epoch": 0.5891373801916933, "grad_norm": 0.4497217833995819, "learning_rate": 0.0001, "loss": 1.7211, "step": 5071 }, { "epoch": 0.5892535579436537, "grad_norm": 0.47766372561454773, "learning_rate": 0.0001, "loss": 1.7802, "step": 5072 }, { "epoch": 0.5893697356956142, "grad_norm": 0.4019678831100464, "learning_rate": 0.0001, "loss": 1.413, "step": 5073 }, { "epoch": 0.5894859134475748, "grad_norm": 0.40686938166618347, "learning_rate": 0.0001, "loss": 1.7313, "step": 5074 }, { "epoch": 0.5896020911995353, "grad_norm": 0.417871356010437, "learning_rate": 0.0001, "loss": 1.6024, "step": 5075 }, { "epoch": 0.5897182689514958, "grad_norm": 0.4329603612422943, "learning_rate": 0.0001, "loss": 1.7662, "step": 5076 }, { "epoch": 0.5898344467034563, "grad_norm": 0.41233643889427185, "learning_rate": 0.0001, "loss": 1.4379, "step": 5077 }, { "epoch": 0.5899506244554168, "grad_norm": 0.4218975603580475, "learning_rate": 0.0001, "loss": 1.5749, "step": 5078 }, { "epoch": 0.5900668022073773, "grad_norm": 0.4207758605480194, "learning_rate": 0.0001, "loss": 1.6849, "step": 5079 }, { "epoch": 0.5901829799593378, "grad_norm": 0.4465838670730591, "learning_rate": 0.0001, "loss": 1.4879, "step": 5080 }, { "epoch": 0.5902991577112983, "grad_norm": 0.43389689922332764, "learning_rate": 0.0001, "loss": 1.7137, "step": 5081 }, { "epoch": 0.5904153354632588, "grad_norm": 0.4294486343860626, "learning_rate": 0.0001, "loss": 1.6588, "step": 5082 }, { "epoch": 0.5905315132152192, "grad_norm": 0.45105913281440735, "learning_rate": 0.0001, "loss": 1.7492, "step": 5083 }, { "epoch": 0.5906476909671798, "grad_norm": 0.3978818655014038, "learning_rate": 0.0001, "loss": 1.4812, "step": 5084 }, { "epoch": 0.5907638687191403, "grad_norm": 0.44959375262260437, "learning_rate": 0.0001, "loss": 1.7158, "step": 5085 }, { "epoch": 0.5908800464711008, "grad_norm": 0.40922439098358154, "learning_rate": 0.0001, "loss": 1.6289, "step": 5086 }, { "epoch": 0.5909962242230613, "grad_norm": 0.41393378376960754, "learning_rate": 0.0001, "loss": 1.5888, "step": 5087 }, { "epoch": 0.5911124019750218, "grad_norm": 0.4593432545661926, "learning_rate": 0.0001, "loss": 1.691, "step": 5088 }, { "epoch": 0.5912285797269823, "grad_norm": 0.4352717995643616, "learning_rate": 0.0001, "loss": 1.5997, "step": 5089 }, { "epoch": 0.5913447574789428, "grad_norm": 0.4431297779083252, "learning_rate": 0.0001, "loss": 1.7196, "step": 5090 }, { "epoch": 0.5914609352309033, "grad_norm": 0.4279361069202423, "learning_rate": 0.0001, "loss": 1.5642, "step": 5091 }, { "epoch": 0.5915771129828638, "grad_norm": 0.4590848386287689, "learning_rate": 0.0001, "loss": 1.5416, "step": 5092 }, { "epoch": 0.5916932907348242, "grad_norm": 0.4331355094909668, "learning_rate": 0.0001, "loss": 1.4664, "step": 5093 }, { "epoch": 0.5918094684867847, "grad_norm": 0.4613255262374878, "learning_rate": 0.0001, "loss": 1.8481, "step": 5094 }, { "epoch": 0.5919256462387453, "grad_norm": 0.46412867307662964, "learning_rate": 0.0001, "loss": 1.5394, "step": 5095 }, { "epoch": 0.5920418239907058, "grad_norm": 0.4639713764190674, "learning_rate": 0.0001, "loss": 1.6082, "step": 5096 }, { "epoch": 0.5921580017426663, "grad_norm": 0.48971736431121826, "learning_rate": 0.0001, "loss": 1.6449, "step": 5097 }, { "epoch": 0.5922741794946268, "grad_norm": 0.48425406217575073, "learning_rate": 0.0001, "loss": 1.6604, "step": 5098 }, { "epoch": 0.5923903572465873, "grad_norm": 0.45319536328315735, "learning_rate": 0.0001, "loss": 1.7629, "step": 5099 }, { "epoch": 0.5925065349985478, "grad_norm": 0.436058908700943, "learning_rate": 0.0001, "loss": 1.6002, "step": 5100 }, { "epoch": 0.5926227127505083, "grad_norm": 0.4272605776786804, "learning_rate": 0.0001, "loss": 1.6671, "step": 5101 }, { "epoch": 0.5927388905024688, "grad_norm": 0.4012027978897095, "learning_rate": 0.0001, "loss": 1.5953, "step": 5102 }, { "epoch": 0.5928550682544292, "grad_norm": 0.42485886812210083, "learning_rate": 0.0001, "loss": 1.5558, "step": 5103 }, { "epoch": 0.5929712460063897, "grad_norm": 0.45090624690055847, "learning_rate": 0.0001, "loss": 1.672, "step": 5104 }, { "epoch": 0.5930874237583503, "grad_norm": 0.45741748809814453, "learning_rate": 0.0001, "loss": 1.7975, "step": 5105 }, { "epoch": 0.5932036015103108, "grad_norm": 0.48514753580093384, "learning_rate": 0.0001, "loss": 1.6806, "step": 5106 }, { "epoch": 0.5933197792622713, "grad_norm": 0.41915470361709595, "learning_rate": 0.0001, "loss": 1.6837, "step": 5107 }, { "epoch": 0.5934359570142318, "grad_norm": 0.3943340480327606, "learning_rate": 0.0001, "loss": 1.4051, "step": 5108 }, { "epoch": 0.5935521347661923, "grad_norm": 0.4319881796836853, "learning_rate": 0.0001, "loss": 1.4794, "step": 5109 }, { "epoch": 0.5936683125181528, "grad_norm": 0.41222912073135376, "learning_rate": 0.0001, "loss": 1.5224, "step": 5110 }, { "epoch": 0.5937844902701133, "grad_norm": 0.4626607894897461, "learning_rate": 0.0001, "loss": 1.6452, "step": 5111 }, { "epoch": 0.5939006680220738, "grad_norm": 0.44612380862236023, "learning_rate": 0.0001, "loss": 1.7116, "step": 5112 }, { "epoch": 0.5940168457740342, "grad_norm": 0.44148531556129456, "learning_rate": 0.0001, "loss": 1.6118, "step": 5113 }, { "epoch": 0.5941330235259947, "grad_norm": 0.4143647253513336, "learning_rate": 0.0001, "loss": 1.4093, "step": 5114 }, { "epoch": 0.5942492012779552, "grad_norm": 0.4281814992427826, "learning_rate": 0.0001, "loss": 1.6976, "step": 5115 }, { "epoch": 0.5943653790299158, "grad_norm": 0.44174307584762573, "learning_rate": 0.0001, "loss": 1.5203, "step": 5116 }, { "epoch": 0.5944815567818763, "grad_norm": 0.4306361675262451, "learning_rate": 0.0001, "loss": 1.6543, "step": 5117 }, { "epoch": 0.5945977345338368, "grad_norm": 0.46144962310791016, "learning_rate": 0.0001, "loss": 1.8373, "step": 5118 }, { "epoch": 0.5947139122857973, "grad_norm": 0.43401268124580383, "learning_rate": 0.0001, "loss": 1.6625, "step": 5119 }, { "epoch": 0.5948300900377578, "grad_norm": 0.4165130853652954, "learning_rate": 0.0001, "loss": 1.5347, "step": 5120 }, { "epoch": 0.5949462677897183, "grad_norm": 0.46252015233039856, "learning_rate": 0.0001, "loss": 1.7019, "step": 5121 }, { "epoch": 0.5950624455416788, "grad_norm": 0.40529441833496094, "learning_rate": 0.0001, "loss": 1.5844, "step": 5122 }, { "epoch": 0.5951786232936392, "grad_norm": 0.44284263253211975, "learning_rate": 0.0001, "loss": 1.7003, "step": 5123 }, { "epoch": 0.5952948010455997, "grad_norm": 0.4453653395175934, "learning_rate": 0.0001, "loss": 1.6263, "step": 5124 }, { "epoch": 0.5954109787975602, "grad_norm": 0.4209020137786865, "learning_rate": 0.0001, "loss": 1.4575, "step": 5125 }, { "epoch": 0.5955271565495208, "grad_norm": 0.4327007234096527, "learning_rate": 0.0001, "loss": 1.6253, "step": 5126 }, { "epoch": 0.5956433343014813, "grad_norm": 0.4445003271102905, "learning_rate": 0.0001, "loss": 1.7994, "step": 5127 }, { "epoch": 0.5957595120534418, "grad_norm": 0.40100163221359253, "learning_rate": 0.0001, "loss": 1.6465, "step": 5128 }, { "epoch": 0.5958756898054023, "grad_norm": 0.4560073912143707, "learning_rate": 0.0001, "loss": 1.6552, "step": 5129 }, { "epoch": 0.5959918675573628, "grad_norm": 0.42099544405937195, "learning_rate": 0.0001, "loss": 1.6327, "step": 5130 }, { "epoch": 0.5961080453093233, "grad_norm": 0.40899744629859924, "learning_rate": 0.0001, "loss": 1.5274, "step": 5131 }, { "epoch": 0.5962242230612838, "grad_norm": 0.3647748827934265, "learning_rate": 0.0001, "loss": 1.3157, "step": 5132 }, { "epoch": 0.5963404008132442, "grad_norm": 0.43523362278938293, "learning_rate": 0.0001, "loss": 1.6919, "step": 5133 }, { "epoch": 0.5964565785652047, "grad_norm": 0.40236619114875793, "learning_rate": 0.0001, "loss": 1.5173, "step": 5134 }, { "epoch": 0.5965727563171652, "grad_norm": 0.43228352069854736, "learning_rate": 0.0001, "loss": 1.6709, "step": 5135 }, { "epoch": 0.5966889340691257, "grad_norm": 0.41964632272720337, "learning_rate": 0.0001, "loss": 1.4214, "step": 5136 }, { "epoch": 0.5968051118210863, "grad_norm": 0.5177493691444397, "learning_rate": 0.0001, "loss": 1.7107, "step": 5137 }, { "epoch": 0.5969212895730468, "grad_norm": 0.40368756651878357, "learning_rate": 0.0001, "loss": 1.4657, "step": 5138 }, { "epoch": 0.5970374673250073, "grad_norm": 0.49018216133117676, "learning_rate": 0.0001, "loss": 1.6654, "step": 5139 }, { "epoch": 0.5971536450769678, "grad_norm": 0.4471641480922699, "learning_rate": 0.0001, "loss": 1.4782, "step": 5140 }, { "epoch": 0.5972698228289283, "grad_norm": 0.4672026038169861, "learning_rate": 0.0001, "loss": 1.7613, "step": 5141 }, { "epoch": 0.5973860005808888, "grad_norm": 0.49104636907577515, "learning_rate": 0.0001, "loss": 1.7263, "step": 5142 }, { "epoch": 0.5975021783328492, "grad_norm": 0.4686024487018585, "learning_rate": 0.0001, "loss": 1.5556, "step": 5143 }, { "epoch": 0.5976183560848097, "grad_norm": 0.41973164677619934, "learning_rate": 0.0001, "loss": 1.6068, "step": 5144 }, { "epoch": 0.5977345338367702, "grad_norm": 0.43972551822662354, "learning_rate": 0.0001, "loss": 1.5259, "step": 5145 }, { "epoch": 0.5978507115887307, "grad_norm": 0.4550989270210266, "learning_rate": 0.0001, "loss": 1.8349, "step": 5146 }, { "epoch": 0.5979668893406913, "grad_norm": 0.452958345413208, "learning_rate": 0.0001, "loss": 1.5227, "step": 5147 }, { "epoch": 0.5980830670926518, "grad_norm": 0.4228680431842804, "learning_rate": 0.0001, "loss": 1.6042, "step": 5148 }, { "epoch": 0.5981992448446123, "grad_norm": 0.4337772727012634, "learning_rate": 0.0001, "loss": 1.6816, "step": 5149 }, { "epoch": 0.5983154225965728, "grad_norm": 0.3951295018196106, "learning_rate": 0.0001, "loss": 1.553, "step": 5150 }, { "epoch": 0.5984316003485333, "grad_norm": 0.4213055670261383, "learning_rate": 0.0001, "loss": 1.5837, "step": 5151 }, { "epoch": 0.5985477781004938, "grad_norm": 0.4383963644504547, "learning_rate": 0.0001, "loss": 1.7536, "step": 5152 }, { "epoch": 0.5986639558524542, "grad_norm": 0.43566179275512695, "learning_rate": 0.0001, "loss": 1.6028, "step": 5153 }, { "epoch": 0.5987801336044147, "grad_norm": 0.4275076985359192, "learning_rate": 0.0001, "loss": 1.6243, "step": 5154 }, { "epoch": 0.5988963113563752, "grad_norm": 0.4178537130355835, "learning_rate": 0.0001, "loss": 1.5415, "step": 5155 }, { "epoch": 0.5990124891083357, "grad_norm": 0.4059988558292389, "learning_rate": 0.0001, "loss": 1.6947, "step": 5156 }, { "epoch": 0.5991286668602962, "grad_norm": 0.44386255741119385, "learning_rate": 0.0001, "loss": 1.5318, "step": 5157 }, { "epoch": 0.5992448446122568, "grad_norm": 0.430132657289505, "learning_rate": 0.0001, "loss": 1.7439, "step": 5158 }, { "epoch": 0.5993610223642173, "grad_norm": 0.4542747735977173, "learning_rate": 0.0001, "loss": 1.7855, "step": 5159 }, { "epoch": 0.5994772001161778, "grad_norm": 0.4200115203857422, "learning_rate": 0.0001, "loss": 1.5615, "step": 5160 }, { "epoch": 0.5995933778681383, "grad_norm": 0.4746837615966797, "learning_rate": 0.0001, "loss": 1.758, "step": 5161 }, { "epoch": 0.5997095556200988, "grad_norm": 0.45345309376716614, "learning_rate": 0.0001, "loss": 1.5189, "step": 5162 }, { "epoch": 0.5998257333720592, "grad_norm": 0.4250603914260864, "learning_rate": 0.0001, "loss": 1.5632, "step": 5163 }, { "epoch": 0.5999419111240197, "grad_norm": 0.4513823688030243, "learning_rate": 0.0001, "loss": 1.5504, "step": 5164 }, { "epoch": 0.6000580888759802, "grad_norm": 0.4406159520149231, "learning_rate": 0.0001, "loss": 1.5861, "step": 5165 }, { "epoch": 0.6001742666279407, "grad_norm": 0.45702022314071655, "learning_rate": 0.0001, "loss": 1.716, "step": 5166 }, { "epoch": 0.6002904443799012, "grad_norm": 0.4588814079761505, "learning_rate": 0.0001, "loss": 1.7822, "step": 5167 }, { "epoch": 0.6004066221318618, "grad_norm": 0.4281061291694641, "learning_rate": 0.0001, "loss": 1.6619, "step": 5168 }, { "epoch": 0.6005227998838223, "grad_norm": 0.42194947600364685, "learning_rate": 0.0001, "loss": 1.5524, "step": 5169 }, { "epoch": 0.6006389776357828, "grad_norm": 0.43042877316474915, "learning_rate": 0.0001, "loss": 1.6838, "step": 5170 }, { "epoch": 0.6007551553877433, "grad_norm": 0.40578198432922363, "learning_rate": 0.0001, "loss": 1.6245, "step": 5171 }, { "epoch": 0.6008713331397038, "grad_norm": 0.41954731941223145, "learning_rate": 0.0001, "loss": 1.6543, "step": 5172 }, { "epoch": 0.6009875108916642, "grad_norm": 0.4439033567905426, "learning_rate": 0.0001, "loss": 1.6539, "step": 5173 }, { "epoch": 0.6011036886436247, "grad_norm": 0.41968825459480286, "learning_rate": 0.0001, "loss": 1.3706, "step": 5174 }, { "epoch": 0.6012198663955852, "grad_norm": 0.480570524930954, "learning_rate": 0.0001, "loss": 1.7536, "step": 5175 }, { "epoch": 0.6013360441475457, "grad_norm": 0.45514610409736633, "learning_rate": 0.0001, "loss": 1.541, "step": 5176 }, { "epoch": 0.6014522218995062, "grad_norm": 0.4680652618408203, "learning_rate": 0.0001, "loss": 1.7239, "step": 5177 }, { "epoch": 0.6015683996514667, "grad_norm": 0.47438210248947144, "learning_rate": 0.0001, "loss": 1.6592, "step": 5178 }, { "epoch": 0.6016845774034273, "grad_norm": 0.41302818059921265, "learning_rate": 0.0001, "loss": 1.7152, "step": 5179 }, { "epoch": 0.6018007551553878, "grad_norm": 0.4282575845718384, "learning_rate": 0.0001, "loss": 1.5534, "step": 5180 }, { "epoch": 0.6019169329073483, "grad_norm": 0.4566362202167511, "learning_rate": 0.0001, "loss": 1.7625, "step": 5181 }, { "epoch": 0.6020331106593088, "grad_norm": 0.42509856820106506, "learning_rate": 0.0001, "loss": 1.6779, "step": 5182 }, { "epoch": 0.6021492884112692, "grad_norm": 0.4111645817756653, "learning_rate": 0.0001, "loss": 1.5353, "step": 5183 }, { "epoch": 0.6022654661632297, "grad_norm": 0.45297473669052124, "learning_rate": 0.0001, "loss": 1.7363, "step": 5184 }, { "epoch": 0.6023816439151902, "grad_norm": 0.4520341157913208, "learning_rate": 0.0001, "loss": 1.5046, "step": 5185 }, { "epoch": 0.6024978216671507, "grad_norm": 0.4603181481361389, "learning_rate": 0.0001, "loss": 1.6009, "step": 5186 }, { "epoch": 0.6026139994191112, "grad_norm": 0.4459368884563446, "learning_rate": 0.0001, "loss": 1.6856, "step": 5187 }, { "epoch": 0.6027301771710717, "grad_norm": 0.42652809619903564, "learning_rate": 0.0001, "loss": 1.5666, "step": 5188 }, { "epoch": 0.6028463549230323, "grad_norm": 0.41247275471687317, "learning_rate": 0.0001, "loss": 1.4765, "step": 5189 }, { "epoch": 0.6029625326749928, "grad_norm": 0.48231571912765503, "learning_rate": 0.0001, "loss": 1.8699, "step": 5190 }, { "epoch": 0.6030787104269533, "grad_norm": 0.43310195207595825, "learning_rate": 0.0001, "loss": 1.6091, "step": 5191 }, { "epoch": 0.6031948881789138, "grad_norm": 0.4534681737422943, "learning_rate": 0.0001, "loss": 1.6521, "step": 5192 }, { "epoch": 0.6033110659308742, "grad_norm": 0.41649046540260315, "learning_rate": 0.0001, "loss": 1.5868, "step": 5193 }, { "epoch": 0.6034272436828347, "grad_norm": 0.4298163950443268, "learning_rate": 0.0001, "loss": 1.6492, "step": 5194 }, { "epoch": 0.6035434214347952, "grad_norm": 0.43806689977645874, "learning_rate": 0.0001, "loss": 1.5434, "step": 5195 }, { "epoch": 0.6036595991867557, "grad_norm": 0.4382534325122833, "learning_rate": 0.0001, "loss": 1.5519, "step": 5196 }, { "epoch": 0.6037757769387162, "grad_norm": 0.44435277581214905, "learning_rate": 0.0001, "loss": 1.6435, "step": 5197 }, { "epoch": 0.6038919546906767, "grad_norm": 0.42443951964378357, "learning_rate": 0.0001, "loss": 1.6619, "step": 5198 }, { "epoch": 0.6040081324426372, "grad_norm": 0.43603894114494324, "learning_rate": 0.0001, "loss": 1.7109, "step": 5199 }, { "epoch": 0.6041243101945978, "grad_norm": 0.43126562237739563, "learning_rate": 0.0001, "loss": 1.4945, "step": 5200 }, { "epoch": 0.6042404879465583, "grad_norm": 0.42838260531425476, "learning_rate": 0.0001, "loss": 1.7171, "step": 5201 }, { "epoch": 0.6043566656985188, "grad_norm": 0.43312254548072815, "learning_rate": 0.0001, "loss": 1.6684, "step": 5202 }, { "epoch": 0.6044728434504792, "grad_norm": 0.4641507863998413, "learning_rate": 0.0001, "loss": 1.7897, "step": 5203 }, { "epoch": 0.6045890212024397, "grad_norm": 0.4522157311439514, "learning_rate": 0.0001, "loss": 1.6904, "step": 5204 }, { "epoch": 0.6047051989544002, "grad_norm": 0.4437788128852844, "learning_rate": 0.0001, "loss": 1.655, "step": 5205 }, { "epoch": 0.6048213767063607, "grad_norm": 0.40446487069129944, "learning_rate": 0.0001, "loss": 1.5878, "step": 5206 }, { "epoch": 0.6049375544583212, "grad_norm": 0.47073522210121155, "learning_rate": 0.0001, "loss": 1.92, "step": 5207 }, { "epoch": 0.6050537322102817, "grad_norm": 0.4400515556335449, "learning_rate": 0.0001, "loss": 1.6603, "step": 5208 }, { "epoch": 0.6051699099622422, "grad_norm": 0.41592809557914734, "learning_rate": 0.0001, "loss": 1.4964, "step": 5209 }, { "epoch": 0.6052860877142028, "grad_norm": 0.44804421067237854, "learning_rate": 0.0001, "loss": 1.6943, "step": 5210 }, { "epoch": 0.6054022654661633, "grad_norm": 0.4076915979385376, "learning_rate": 0.0001, "loss": 1.5187, "step": 5211 }, { "epoch": 0.6055184432181238, "grad_norm": 0.42822811007499695, "learning_rate": 0.0001, "loss": 1.6617, "step": 5212 }, { "epoch": 0.6056346209700842, "grad_norm": 0.40919166803359985, "learning_rate": 0.0001, "loss": 1.6142, "step": 5213 }, { "epoch": 0.6057507987220447, "grad_norm": 0.4067077040672302, "learning_rate": 0.0001, "loss": 1.4705, "step": 5214 }, { "epoch": 0.6058669764740052, "grad_norm": 0.44207772612571716, "learning_rate": 0.0001, "loss": 1.63, "step": 5215 }, { "epoch": 0.6059831542259657, "grad_norm": 0.4533270001411438, "learning_rate": 0.0001, "loss": 1.7419, "step": 5216 }, { "epoch": 0.6060993319779262, "grad_norm": 0.4846879839897156, "learning_rate": 0.0001, "loss": 1.8105, "step": 5217 }, { "epoch": 0.6062155097298867, "grad_norm": 0.48439884185791016, "learning_rate": 0.0001, "loss": 1.8875, "step": 5218 }, { "epoch": 0.6063316874818472, "grad_norm": 0.418059766292572, "learning_rate": 0.0001, "loss": 1.4664, "step": 5219 }, { "epoch": 0.6064478652338078, "grad_norm": 0.4088701903820038, "learning_rate": 0.0001, "loss": 1.4301, "step": 5220 }, { "epoch": 0.6065640429857683, "grad_norm": 0.4230385720729828, "learning_rate": 0.0001, "loss": 1.5988, "step": 5221 }, { "epoch": 0.6066802207377288, "grad_norm": 0.4391116797924042, "learning_rate": 0.0001, "loss": 1.4377, "step": 5222 }, { "epoch": 0.6067963984896892, "grad_norm": 0.44503310322761536, "learning_rate": 0.0001, "loss": 1.6902, "step": 5223 }, { "epoch": 0.6069125762416497, "grad_norm": 0.4621829390525818, "learning_rate": 0.0001, "loss": 1.743, "step": 5224 }, { "epoch": 0.6070287539936102, "grad_norm": 0.4562551975250244, "learning_rate": 0.0001, "loss": 1.7225, "step": 5225 }, { "epoch": 0.6071449317455707, "grad_norm": 0.4189031720161438, "learning_rate": 0.0001, "loss": 1.4614, "step": 5226 }, { "epoch": 0.6072611094975312, "grad_norm": 0.45642954111099243, "learning_rate": 0.0001, "loss": 1.6229, "step": 5227 }, { "epoch": 0.6073772872494917, "grad_norm": 0.4381450116634369, "learning_rate": 0.0001, "loss": 1.6006, "step": 5228 }, { "epoch": 0.6074934650014522, "grad_norm": 0.4203230142593384, "learning_rate": 0.0001, "loss": 1.6691, "step": 5229 }, { "epoch": 0.6076096427534127, "grad_norm": 0.41312652826309204, "learning_rate": 0.0001, "loss": 1.6501, "step": 5230 }, { "epoch": 0.6077258205053733, "grad_norm": 0.46580955386161804, "learning_rate": 0.0001, "loss": 1.7905, "step": 5231 }, { "epoch": 0.6078419982573338, "grad_norm": 0.42524558305740356, "learning_rate": 0.0001, "loss": 1.6327, "step": 5232 }, { "epoch": 0.6079581760092942, "grad_norm": 0.43027788400650024, "learning_rate": 0.0001, "loss": 1.7278, "step": 5233 }, { "epoch": 0.6080743537612547, "grad_norm": 0.4252438545227051, "learning_rate": 0.0001, "loss": 1.4463, "step": 5234 }, { "epoch": 0.6081905315132152, "grad_norm": 0.4131147861480713, "learning_rate": 0.0001, "loss": 1.5513, "step": 5235 }, { "epoch": 0.6083067092651757, "grad_norm": 0.48660963773727417, "learning_rate": 0.0001, "loss": 1.6143, "step": 5236 }, { "epoch": 0.6084228870171362, "grad_norm": 0.43840891122817993, "learning_rate": 0.0001, "loss": 1.6438, "step": 5237 }, { "epoch": 0.6085390647690967, "grad_norm": 0.48248180747032166, "learning_rate": 0.0001, "loss": 1.7602, "step": 5238 }, { "epoch": 0.6086552425210572, "grad_norm": 0.45849937200546265, "learning_rate": 0.0001, "loss": 1.7877, "step": 5239 }, { "epoch": 0.6087714202730177, "grad_norm": 0.43766871094703674, "learning_rate": 0.0001, "loss": 1.741, "step": 5240 }, { "epoch": 0.6088875980249783, "grad_norm": 0.41059789061546326, "learning_rate": 0.0001, "loss": 1.7146, "step": 5241 }, { "epoch": 0.6090037757769388, "grad_norm": 0.40786266326904297, "learning_rate": 0.0001, "loss": 1.5118, "step": 5242 }, { "epoch": 0.6091199535288992, "grad_norm": 0.4507090747356415, "learning_rate": 0.0001, "loss": 1.6575, "step": 5243 }, { "epoch": 0.6092361312808597, "grad_norm": 0.45171281695365906, "learning_rate": 0.0001, "loss": 1.6024, "step": 5244 }, { "epoch": 0.6093523090328202, "grad_norm": 0.4520759582519531, "learning_rate": 0.0001, "loss": 1.6238, "step": 5245 }, { "epoch": 0.6094684867847807, "grad_norm": 0.4407515823841095, "learning_rate": 0.0001, "loss": 1.4963, "step": 5246 }, { "epoch": 0.6095846645367412, "grad_norm": 0.49019014835357666, "learning_rate": 0.0001, "loss": 1.7029, "step": 5247 }, { "epoch": 0.6097008422887017, "grad_norm": 0.4346254765987396, "learning_rate": 0.0001, "loss": 1.5592, "step": 5248 }, { "epoch": 0.6098170200406622, "grad_norm": 0.43381422758102417, "learning_rate": 0.0001, "loss": 1.6572, "step": 5249 }, { "epoch": 0.6099331977926227, "grad_norm": 0.43752437829971313, "learning_rate": 0.0001, "loss": 1.5148, "step": 5250 }, { "epoch": 0.6100493755445832, "grad_norm": 0.4531850814819336, "learning_rate": 0.0001, "loss": 1.7279, "step": 5251 }, { "epoch": 0.6101655532965438, "grad_norm": 0.4308491051197052, "learning_rate": 0.0001, "loss": 1.6087, "step": 5252 }, { "epoch": 0.6102817310485043, "grad_norm": 0.45477020740509033, "learning_rate": 0.0001, "loss": 1.7004, "step": 5253 }, { "epoch": 0.6103979088004647, "grad_norm": 0.42559128999710083, "learning_rate": 0.0001, "loss": 1.6461, "step": 5254 }, { "epoch": 0.6105140865524252, "grad_norm": 0.45015910267829895, "learning_rate": 0.0001, "loss": 1.6186, "step": 5255 }, { "epoch": 0.6106302643043857, "grad_norm": 0.44628676772117615, "learning_rate": 0.0001, "loss": 1.7035, "step": 5256 }, { "epoch": 0.6107464420563462, "grad_norm": 0.4328848421573639, "learning_rate": 0.0001, "loss": 1.5298, "step": 5257 }, { "epoch": 0.6108626198083067, "grad_norm": 0.42768430709838867, "learning_rate": 0.0001, "loss": 1.7161, "step": 5258 }, { "epoch": 0.6109787975602672, "grad_norm": 0.4138183891773224, "learning_rate": 0.0001, "loss": 1.5596, "step": 5259 }, { "epoch": 0.6110949753122277, "grad_norm": 0.40088340640068054, "learning_rate": 0.0001, "loss": 1.2582, "step": 5260 }, { "epoch": 0.6112111530641882, "grad_norm": 0.47067928314208984, "learning_rate": 0.0001, "loss": 1.6999, "step": 5261 }, { "epoch": 0.6113273308161488, "grad_norm": 0.4110230803489685, "learning_rate": 0.0001, "loss": 1.5359, "step": 5262 }, { "epoch": 0.6114435085681093, "grad_norm": 0.4310549795627594, "learning_rate": 0.0001, "loss": 1.5655, "step": 5263 }, { "epoch": 0.6115596863200697, "grad_norm": 0.4675642251968384, "learning_rate": 0.0001, "loss": 1.7689, "step": 5264 }, { "epoch": 0.6116758640720302, "grad_norm": 0.4433719217777252, "learning_rate": 0.0001, "loss": 1.6569, "step": 5265 }, { "epoch": 0.6117920418239907, "grad_norm": 0.4305296242237091, "learning_rate": 0.0001, "loss": 1.5723, "step": 5266 }, { "epoch": 0.6119082195759512, "grad_norm": 0.42353424429893494, "learning_rate": 0.0001, "loss": 1.5442, "step": 5267 }, { "epoch": 0.6120243973279117, "grad_norm": 0.43402886390686035, "learning_rate": 0.0001, "loss": 1.7132, "step": 5268 }, { "epoch": 0.6121405750798722, "grad_norm": 0.41518083214759827, "learning_rate": 0.0001, "loss": 1.4871, "step": 5269 }, { "epoch": 0.6122567528318327, "grad_norm": 0.42695778608322144, "learning_rate": 0.0001, "loss": 1.7047, "step": 5270 }, { "epoch": 0.6123729305837932, "grad_norm": 0.41428840160369873, "learning_rate": 0.0001, "loss": 1.5187, "step": 5271 }, { "epoch": 0.6124891083357537, "grad_norm": 0.42342090606689453, "learning_rate": 0.0001, "loss": 1.4672, "step": 5272 }, { "epoch": 0.6126052860877143, "grad_norm": 0.479927122592926, "learning_rate": 0.0001, "loss": 1.838, "step": 5273 }, { "epoch": 0.6127214638396747, "grad_norm": 0.4496026039123535, "learning_rate": 0.0001, "loss": 1.6792, "step": 5274 }, { "epoch": 0.6128376415916352, "grad_norm": 0.4338063895702362, "learning_rate": 0.0001, "loss": 1.6543, "step": 5275 }, { "epoch": 0.6129538193435957, "grad_norm": 0.4147947132587433, "learning_rate": 0.0001, "loss": 1.6407, "step": 5276 }, { "epoch": 0.6130699970955562, "grad_norm": 0.4108210802078247, "learning_rate": 0.0001, "loss": 1.5585, "step": 5277 }, { "epoch": 0.6131861748475167, "grad_norm": 0.40176138281822205, "learning_rate": 0.0001, "loss": 1.4927, "step": 5278 }, { "epoch": 0.6133023525994772, "grad_norm": 0.44862088561058044, "learning_rate": 0.0001, "loss": 1.5002, "step": 5279 }, { "epoch": 0.6134185303514377, "grad_norm": 0.4072551429271698, "learning_rate": 0.0001, "loss": 1.6122, "step": 5280 }, { "epoch": 0.6135347081033982, "grad_norm": 0.47543540596961975, "learning_rate": 0.0001, "loss": 1.7072, "step": 5281 }, { "epoch": 0.6136508858553587, "grad_norm": 0.4115746021270752, "learning_rate": 0.0001, "loss": 1.5711, "step": 5282 }, { "epoch": 0.6137670636073193, "grad_norm": 0.42444461584091187, "learning_rate": 0.0001, "loss": 1.5438, "step": 5283 }, { "epoch": 0.6138832413592797, "grad_norm": 0.4719880223274231, "learning_rate": 0.0001, "loss": 1.6267, "step": 5284 }, { "epoch": 0.6139994191112402, "grad_norm": 0.4645368158817291, "learning_rate": 0.0001, "loss": 1.6634, "step": 5285 }, { "epoch": 0.6141155968632007, "grad_norm": 0.4200696349143982, "learning_rate": 0.0001, "loss": 1.5337, "step": 5286 }, { "epoch": 0.6142317746151612, "grad_norm": 0.439284086227417, "learning_rate": 0.0001, "loss": 1.5819, "step": 5287 }, { "epoch": 0.6143479523671217, "grad_norm": 0.42954742908477783, "learning_rate": 0.0001, "loss": 1.6853, "step": 5288 }, { "epoch": 0.6144641301190822, "grad_norm": 0.4379776120185852, "learning_rate": 0.0001, "loss": 1.7162, "step": 5289 }, { "epoch": 0.6145803078710427, "grad_norm": 0.4193497896194458, "learning_rate": 0.0001, "loss": 1.6855, "step": 5290 }, { "epoch": 0.6146964856230032, "grad_norm": 0.43286967277526855, "learning_rate": 0.0001, "loss": 1.7413, "step": 5291 }, { "epoch": 0.6148126633749637, "grad_norm": 0.43035081028938293, "learning_rate": 0.0001, "loss": 1.5693, "step": 5292 }, { "epoch": 0.6149288411269241, "grad_norm": 0.43507617712020874, "learning_rate": 0.0001, "loss": 1.8054, "step": 5293 }, { "epoch": 0.6150450188788847, "grad_norm": 0.3959396779537201, "learning_rate": 0.0001, "loss": 1.4206, "step": 5294 }, { "epoch": 0.6151611966308452, "grad_norm": 0.45225653052330017, "learning_rate": 0.0001, "loss": 1.5735, "step": 5295 }, { "epoch": 0.6152773743828057, "grad_norm": 0.40767356753349304, "learning_rate": 0.0001, "loss": 1.4859, "step": 5296 }, { "epoch": 0.6153935521347662, "grad_norm": 0.4382137954235077, "learning_rate": 0.0001, "loss": 1.5909, "step": 5297 }, { "epoch": 0.6155097298867267, "grad_norm": 0.44092151522636414, "learning_rate": 0.0001, "loss": 1.7193, "step": 5298 }, { "epoch": 0.6156259076386872, "grad_norm": 0.46326959133148193, "learning_rate": 0.0001, "loss": 1.5849, "step": 5299 }, { "epoch": 0.6157420853906477, "grad_norm": 0.4669477343559265, "learning_rate": 0.0001, "loss": 1.7398, "step": 5300 }, { "epoch": 0.6158582631426082, "grad_norm": 0.42557385563850403, "learning_rate": 0.0001, "loss": 1.5386, "step": 5301 }, { "epoch": 0.6159744408945687, "grad_norm": 0.4492860436439514, "learning_rate": 0.0001, "loss": 1.5424, "step": 5302 }, { "epoch": 0.6160906186465291, "grad_norm": 0.43879732489585876, "learning_rate": 0.0001, "loss": 1.5146, "step": 5303 }, { "epoch": 0.6162067963984897, "grad_norm": 0.43372800946235657, "learning_rate": 0.0001, "loss": 1.6623, "step": 5304 }, { "epoch": 0.6163229741504502, "grad_norm": 0.43956896662712097, "learning_rate": 0.0001, "loss": 1.6077, "step": 5305 }, { "epoch": 0.6164391519024107, "grad_norm": 0.44320589303970337, "learning_rate": 0.0001, "loss": 1.6369, "step": 5306 }, { "epoch": 0.6165553296543712, "grad_norm": 0.44610658288002014, "learning_rate": 0.0001, "loss": 1.6463, "step": 5307 }, { "epoch": 0.6166715074063317, "grad_norm": 0.4208918511867523, "learning_rate": 0.0001, "loss": 1.6308, "step": 5308 }, { "epoch": 0.6167876851582922, "grad_norm": 0.43007153272628784, "learning_rate": 0.0001, "loss": 1.6225, "step": 5309 }, { "epoch": 0.6169038629102527, "grad_norm": 0.4086022973060608, "learning_rate": 0.0001, "loss": 1.5524, "step": 5310 }, { "epoch": 0.6170200406622132, "grad_norm": 0.4730951189994812, "learning_rate": 0.0001, "loss": 1.6534, "step": 5311 }, { "epoch": 0.6171362184141737, "grad_norm": 0.41164976358413696, "learning_rate": 0.0001, "loss": 1.3929, "step": 5312 }, { "epoch": 0.6172523961661341, "grad_norm": 0.43444958329200745, "learning_rate": 0.0001, "loss": 1.6257, "step": 5313 }, { "epoch": 0.6173685739180946, "grad_norm": 0.4134158790111542, "learning_rate": 0.0001, "loss": 1.4962, "step": 5314 }, { "epoch": 0.6174847516700552, "grad_norm": 0.42741191387176514, "learning_rate": 0.0001, "loss": 1.6745, "step": 5315 }, { "epoch": 0.6176009294220157, "grad_norm": 0.42607542872428894, "learning_rate": 0.0001, "loss": 1.5641, "step": 5316 }, { "epoch": 0.6177171071739762, "grad_norm": 0.450953871011734, "learning_rate": 0.0001, "loss": 1.7549, "step": 5317 }, { "epoch": 0.6178332849259367, "grad_norm": 0.4400138556957245, "learning_rate": 0.0001, "loss": 1.6905, "step": 5318 }, { "epoch": 0.6179494626778972, "grad_norm": 0.4217263460159302, "learning_rate": 0.0001, "loss": 1.6168, "step": 5319 }, { "epoch": 0.6180656404298577, "grad_norm": 0.4471561908721924, "learning_rate": 0.0001, "loss": 1.5789, "step": 5320 }, { "epoch": 0.6181818181818182, "grad_norm": 0.45624133944511414, "learning_rate": 0.0001, "loss": 1.6327, "step": 5321 }, { "epoch": 0.6182979959337787, "grad_norm": 0.4240606129169464, "learning_rate": 0.0001, "loss": 1.6738, "step": 5322 }, { "epoch": 0.6184141736857391, "grad_norm": 0.41618114709854126, "learning_rate": 0.0001, "loss": 1.6379, "step": 5323 }, { "epoch": 0.6185303514376996, "grad_norm": 0.4757783114910126, "learning_rate": 0.0001, "loss": 1.6637, "step": 5324 }, { "epoch": 0.6186465291896602, "grad_norm": 0.45356112718582153, "learning_rate": 0.0001, "loss": 1.6294, "step": 5325 }, { "epoch": 0.6187627069416207, "grad_norm": 0.42001453042030334, "learning_rate": 0.0001, "loss": 1.5796, "step": 5326 }, { "epoch": 0.6188788846935812, "grad_norm": 0.4233144521713257, "learning_rate": 0.0001, "loss": 1.8047, "step": 5327 }, { "epoch": 0.6189950624455417, "grad_norm": 0.451232373714447, "learning_rate": 0.0001, "loss": 1.7163, "step": 5328 }, { "epoch": 0.6191112401975022, "grad_norm": 0.4210103750228882, "learning_rate": 0.0001, "loss": 1.6238, "step": 5329 }, { "epoch": 0.6192274179494627, "grad_norm": 0.43561357259750366, "learning_rate": 0.0001, "loss": 1.5469, "step": 5330 }, { "epoch": 0.6193435957014232, "grad_norm": 0.43155673146247864, "learning_rate": 0.0001, "loss": 1.5399, "step": 5331 }, { "epoch": 0.6194597734533837, "grad_norm": 0.45339235663414, "learning_rate": 0.0001, "loss": 1.614, "step": 5332 }, { "epoch": 0.6195759512053441, "grad_norm": 0.5200868248939514, "learning_rate": 0.0001, "loss": 1.6676, "step": 5333 }, { "epoch": 0.6196921289573046, "grad_norm": 0.45999056100845337, "learning_rate": 0.0001, "loss": 1.7309, "step": 5334 }, { "epoch": 0.6198083067092651, "grad_norm": 0.46080732345581055, "learning_rate": 0.0001, "loss": 1.6882, "step": 5335 }, { "epoch": 0.6199244844612257, "grad_norm": 0.4588463008403778, "learning_rate": 0.0001, "loss": 1.7431, "step": 5336 }, { "epoch": 0.6200406622131862, "grad_norm": 0.4329404830932617, "learning_rate": 0.0001, "loss": 1.7793, "step": 5337 }, { "epoch": 0.6201568399651467, "grad_norm": 0.42099177837371826, "learning_rate": 0.0001, "loss": 1.5492, "step": 5338 }, { "epoch": 0.6202730177171072, "grad_norm": 0.4456626772880554, "learning_rate": 0.0001, "loss": 1.5394, "step": 5339 }, { "epoch": 0.6203891954690677, "grad_norm": 0.45711061358451843, "learning_rate": 0.0001, "loss": 1.6839, "step": 5340 }, { "epoch": 0.6205053732210282, "grad_norm": 0.4267137944698334, "learning_rate": 0.0001, "loss": 1.5828, "step": 5341 }, { "epoch": 0.6206215509729887, "grad_norm": 0.438827782869339, "learning_rate": 0.0001, "loss": 1.566, "step": 5342 }, { "epoch": 0.6207377287249491, "grad_norm": 0.43408849835395813, "learning_rate": 0.0001, "loss": 1.5385, "step": 5343 }, { "epoch": 0.6208539064769096, "grad_norm": 0.5134710669517517, "learning_rate": 0.0001, "loss": 1.6072, "step": 5344 }, { "epoch": 0.6209700842288701, "grad_norm": 0.47032633423805237, "learning_rate": 0.0001, "loss": 1.6972, "step": 5345 }, { "epoch": 0.6210862619808307, "grad_norm": 0.47240790724754333, "learning_rate": 0.0001, "loss": 1.6964, "step": 5346 }, { "epoch": 0.6212024397327912, "grad_norm": 0.46300870180130005, "learning_rate": 0.0001, "loss": 1.5826, "step": 5347 }, { "epoch": 0.6213186174847517, "grad_norm": 0.4472779333591461, "learning_rate": 0.0001, "loss": 1.5877, "step": 5348 }, { "epoch": 0.6214347952367122, "grad_norm": 0.45526379346847534, "learning_rate": 0.0001, "loss": 1.676, "step": 5349 }, { "epoch": 0.6215509729886727, "grad_norm": 0.4606863260269165, "learning_rate": 0.0001, "loss": 1.5718, "step": 5350 }, { "epoch": 0.6216671507406332, "grad_norm": 0.4486534297466278, "learning_rate": 0.0001, "loss": 1.6654, "step": 5351 }, { "epoch": 0.6217833284925937, "grad_norm": 0.5138721466064453, "learning_rate": 0.0001, "loss": 1.6765, "step": 5352 }, { "epoch": 0.6218995062445541, "grad_norm": 0.43069109320640564, "learning_rate": 0.0001, "loss": 1.5228, "step": 5353 }, { "epoch": 0.6220156839965146, "grad_norm": 0.4056498408317566, "learning_rate": 0.0001, "loss": 1.5213, "step": 5354 }, { "epoch": 0.6221318617484751, "grad_norm": 0.4548470079898834, "learning_rate": 0.0001, "loss": 1.7058, "step": 5355 }, { "epoch": 0.6222480395004356, "grad_norm": 0.4468959867954254, "learning_rate": 0.0001, "loss": 1.7774, "step": 5356 }, { "epoch": 0.6223642172523962, "grad_norm": 0.4434147775173187, "learning_rate": 0.0001, "loss": 1.4379, "step": 5357 }, { "epoch": 0.6224803950043567, "grad_norm": 0.4365995526313782, "learning_rate": 0.0001, "loss": 1.6554, "step": 5358 }, { "epoch": 0.6225965727563172, "grad_norm": 0.42630013823509216, "learning_rate": 0.0001, "loss": 1.6059, "step": 5359 }, { "epoch": 0.6227127505082777, "grad_norm": 0.4268414378166199, "learning_rate": 0.0001, "loss": 1.727, "step": 5360 }, { "epoch": 0.6228289282602382, "grad_norm": 0.4438273012638092, "learning_rate": 0.0001, "loss": 1.7561, "step": 5361 }, { "epoch": 0.6229451060121987, "grad_norm": 0.4021027088165283, "learning_rate": 0.0001, "loss": 1.3959, "step": 5362 }, { "epoch": 0.6230612837641591, "grad_norm": 0.40190452337265015, "learning_rate": 0.0001, "loss": 1.5664, "step": 5363 }, { "epoch": 0.6231774615161196, "grad_norm": 0.45445725321769714, "learning_rate": 0.0001, "loss": 1.7218, "step": 5364 }, { "epoch": 0.6232936392680801, "grad_norm": 0.41883429884910583, "learning_rate": 0.0001, "loss": 1.44, "step": 5365 }, { "epoch": 0.6234098170200406, "grad_norm": 0.4443962574005127, "learning_rate": 0.0001, "loss": 1.7734, "step": 5366 }, { "epoch": 0.6235259947720012, "grad_norm": 0.4436537027359009, "learning_rate": 0.0001, "loss": 1.7087, "step": 5367 }, { "epoch": 0.6236421725239617, "grad_norm": 0.4414960741996765, "learning_rate": 0.0001, "loss": 1.6531, "step": 5368 }, { "epoch": 0.6237583502759222, "grad_norm": 0.43961644172668457, "learning_rate": 0.0001, "loss": 1.6234, "step": 5369 }, { "epoch": 0.6238745280278827, "grad_norm": 0.42468035221099854, "learning_rate": 0.0001, "loss": 1.656, "step": 5370 }, { "epoch": 0.6239907057798432, "grad_norm": 0.40939033031463623, "learning_rate": 0.0001, "loss": 1.5147, "step": 5371 }, { "epoch": 0.6241068835318037, "grad_norm": 0.43494606018066406, "learning_rate": 0.0001, "loss": 1.5552, "step": 5372 }, { "epoch": 0.6242230612837641, "grad_norm": 0.4493979513645172, "learning_rate": 0.0001, "loss": 1.7368, "step": 5373 }, { "epoch": 0.6243392390357246, "grad_norm": 0.3963182270526886, "learning_rate": 0.0001, "loss": 1.4555, "step": 5374 }, { "epoch": 0.6244554167876851, "grad_norm": 0.4189557731151581, "learning_rate": 0.0001, "loss": 1.568, "step": 5375 }, { "epoch": 0.6245715945396456, "grad_norm": 0.449720174074173, "learning_rate": 0.0001, "loss": 1.662, "step": 5376 }, { "epoch": 0.6246877722916061, "grad_norm": 0.4135220944881439, "learning_rate": 0.0001, "loss": 1.6108, "step": 5377 }, { "epoch": 0.6248039500435667, "grad_norm": 0.47096794843673706, "learning_rate": 0.0001, "loss": 1.6151, "step": 5378 }, { "epoch": 0.6249201277955272, "grad_norm": 0.4510813057422638, "learning_rate": 0.0001, "loss": 1.6438, "step": 5379 }, { "epoch": 0.6250363055474877, "grad_norm": 0.43997013568878174, "learning_rate": 0.0001, "loss": 1.5405, "step": 5380 }, { "epoch": 0.6251524832994482, "grad_norm": 0.4731866419315338, "learning_rate": 0.0001, "loss": 1.7788, "step": 5381 }, { "epoch": 0.6252686610514087, "grad_norm": 0.44134780764579773, "learning_rate": 0.0001, "loss": 1.6662, "step": 5382 }, { "epoch": 0.6253848388033691, "grad_norm": 0.5102000832557678, "learning_rate": 0.0001, "loss": 1.7645, "step": 5383 }, { "epoch": 0.6255010165553296, "grad_norm": 0.43674057722091675, "learning_rate": 0.0001, "loss": 1.5027, "step": 5384 }, { "epoch": 0.6256171943072901, "grad_norm": 0.47883304953575134, "learning_rate": 0.0001, "loss": 1.6244, "step": 5385 }, { "epoch": 0.6257333720592506, "grad_norm": 0.43253079056739807, "learning_rate": 0.0001, "loss": 1.7042, "step": 5386 }, { "epoch": 0.6258495498112111, "grad_norm": 0.41759157180786133, "learning_rate": 0.0001, "loss": 1.6168, "step": 5387 }, { "epoch": 0.6259657275631717, "grad_norm": 0.421507865190506, "learning_rate": 0.0001, "loss": 1.5269, "step": 5388 }, { "epoch": 0.6260819053151322, "grad_norm": 0.4465677738189697, "learning_rate": 0.0001, "loss": 1.8354, "step": 5389 }, { "epoch": 0.6261980830670927, "grad_norm": 0.4306974709033966, "learning_rate": 0.0001, "loss": 1.5394, "step": 5390 }, { "epoch": 0.6263142608190532, "grad_norm": 0.4195205271244049, "learning_rate": 0.0001, "loss": 1.6212, "step": 5391 }, { "epoch": 0.6264304385710137, "grad_norm": 0.44183290004730225, "learning_rate": 0.0001, "loss": 1.5672, "step": 5392 }, { "epoch": 0.6265466163229741, "grad_norm": 0.44616198539733887, "learning_rate": 0.0001, "loss": 1.6277, "step": 5393 }, { "epoch": 0.6266627940749346, "grad_norm": 0.44162172079086304, "learning_rate": 0.0001, "loss": 1.6604, "step": 5394 }, { "epoch": 0.6267789718268951, "grad_norm": 0.46100786328315735, "learning_rate": 0.0001, "loss": 1.8678, "step": 5395 }, { "epoch": 0.6268951495788556, "grad_norm": 0.4163057208061218, "learning_rate": 0.0001, "loss": 1.5349, "step": 5396 }, { "epoch": 0.6270113273308161, "grad_norm": 0.4567055404186249, "learning_rate": 0.0001, "loss": 1.6346, "step": 5397 }, { "epoch": 0.6271275050827766, "grad_norm": 0.4351733326911926, "learning_rate": 0.0001, "loss": 1.7198, "step": 5398 }, { "epoch": 0.6272436828347372, "grad_norm": 0.4696790277957916, "learning_rate": 0.0001, "loss": 1.773, "step": 5399 }, { "epoch": 0.6273598605866977, "grad_norm": 0.4156184792518616, "learning_rate": 0.0001, "loss": 1.4328, "step": 5400 }, { "epoch": 0.6274760383386582, "grad_norm": 0.450946182012558, "learning_rate": 0.0001, "loss": 1.7431, "step": 5401 }, { "epoch": 0.6275922160906187, "grad_norm": 0.48097074031829834, "learning_rate": 0.0001, "loss": 1.622, "step": 5402 }, { "epoch": 0.6277083938425791, "grad_norm": 0.4515106976032257, "learning_rate": 0.0001, "loss": 1.7013, "step": 5403 }, { "epoch": 0.6278245715945396, "grad_norm": 0.4210405647754669, "learning_rate": 0.0001, "loss": 1.5421, "step": 5404 }, { "epoch": 0.6279407493465001, "grad_norm": 0.42682531476020813, "learning_rate": 0.0001, "loss": 1.5663, "step": 5405 }, { "epoch": 0.6280569270984606, "grad_norm": 0.4539082944393158, "learning_rate": 0.0001, "loss": 1.6611, "step": 5406 }, { "epoch": 0.6281731048504211, "grad_norm": 0.43941760063171387, "learning_rate": 0.0001, "loss": 1.5897, "step": 5407 }, { "epoch": 0.6282892826023816, "grad_norm": 0.4232398271560669, "learning_rate": 0.0001, "loss": 1.6926, "step": 5408 }, { "epoch": 0.6284054603543422, "grad_norm": 0.4403201639652252, "learning_rate": 0.0001, "loss": 1.5219, "step": 5409 }, { "epoch": 0.6285216381063027, "grad_norm": 0.45847928524017334, "learning_rate": 0.0001, "loss": 1.7491, "step": 5410 }, { "epoch": 0.6286378158582632, "grad_norm": 0.4543238878250122, "learning_rate": 0.0001, "loss": 1.7062, "step": 5411 }, { "epoch": 0.6287539936102237, "grad_norm": 0.4506990611553192, "learning_rate": 0.0001, "loss": 1.7011, "step": 5412 }, { "epoch": 0.6288701713621841, "grad_norm": 0.38618630170822144, "learning_rate": 0.0001, "loss": 1.5152, "step": 5413 }, { "epoch": 0.6289863491141446, "grad_norm": 0.48193633556365967, "learning_rate": 0.0001, "loss": 1.8388, "step": 5414 }, { "epoch": 0.6291025268661051, "grad_norm": 0.45501387119293213, "learning_rate": 0.0001, "loss": 1.6377, "step": 5415 }, { "epoch": 0.6292187046180656, "grad_norm": 0.43010449409484863, "learning_rate": 0.0001, "loss": 1.5293, "step": 5416 }, { "epoch": 0.6293348823700261, "grad_norm": 0.42179417610168457, "learning_rate": 0.0001, "loss": 1.6207, "step": 5417 }, { "epoch": 0.6294510601219866, "grad_norm": 0.4156195819377899, "learning_rate": 0.0001, "loss": 1.3938, "step": 5418 }, { "epoch": 0.6295672378739472, "grad_norm": 0.4323188364505768, "learning_rate": 0.0001, "loss": 1.612, "step": 5419 }, { "epoch": 0.6296834156259077, "grad_norm": 0.4095346927642822, "learning_rate": 0.0001, "loss": 1.5443, "step": 5420 }, { "epoch": 0.6297995933778682, "grad_norm": 0.4441397190093994, "learning_rate": 0.0001, "loss": 1.6549, "step": 5421 }, { "epoch": 0.6299157711298287, "grad_norm": 0.4608078896999359, "learning_rate": 0.0001, "loss": 1.6716, "step": 5422 }, { "epoch": 0.6300319488817892, "grad_norm": 0.4394521415233612, "learning_rate": 0.0001, "loss": 1.6433, "step": 5423 }, { "epoch": 0.6301481266337496, "grad_norm": 0.4374762773513794, "learning_rate": 0.0001, "loss": 1.6325, "step": 5424 }, { "epoch": 0.6302643043857101, "grad_norm": 0.49020811915397644, "learning_rate": 0.0001, "loss": 1.7044, "step": 5425 }, { "epoch": 0.6303804821376706, "grad_norm": 0.45695215463638306, "learning_rate": 0.0001, "loss": 1.6427, "step": 5426 }, { "epoch": 0.6304966598896311, "grad_norm": 0.4443933367729187, "learning_rate": 0.0001, "loss": 1.6863, "step": 5427 }, { "epoch": 0.6306128376415916, "grad_norm": 0.47112321853637695, "learning_rate": 0.0001, "loss": 1.8038, "step": 5428 }, { "epoch": 0.6307290153935521, "grad_norm": 0.43735185265541077, "learning_rate": 0.0001, "loss": 1.5341, "step": 5429 }, { "epoch": 0.6308451931455127, "grad_norm": 0.42006251215934753, "learning_rate": 0.0001, "loss": 1.462, "step": 5430 }, { "epoch": 0.6309613708974732, "grad_norm": 0.4126074016094208, "learning_rate": 0.0001, "loss": 1.5895, "step": 5431 }, { "epoch": 0.6310775486494337, "grad_norm": 0.4392840564250946, "learning_rate": 0.0001, "loss": 1.7046, "step": 5432 }, { "epoch": 0.6311937264013942, "grad_norm": 0.4268054962158203, "learning_rate": 0.0001, "loss": 1.6143, "step": 5433 }, { "epoch": 0.6313099041533546, "grad_norm": 0.4118890166282654, "learning_rate": 0.0001, "loss": 1.4662, "step": 5434 }, { "epoch": 0.6314260819053151, "grad_norm": 0.4566260874271393, "learning_rate": 0.0001, "loss": 1.5913, "step": 5435 }, { "epoch": 0.6315422596572756, "grad_norm": 0.4307934045791626, "learning_rate": 0.0001, "loss": 1.6546, "step": 5436 }, { "epoch": 0.6316584374092361, "grad_norm": 0.4302327632904053, "learning_rate": 0.0001, "loss": 1.7334, "step": 5437 }, { "epoch": 0.6317746151611966, "grad_norm": 0.4081536531448364, "learning_rate": 0.0001, "loss": 1.5399, "step": 5438 }, { "epoch": 0.6318907929131571, "grad_norm": 0.43301841616630554, "learning_rate": 0.0001, "loss": 1.6369, "step": 5439 }, { "epoch": 0.6320069706651177, "grad_norm": 0.4393984377384186, "learning_rate": 0.0001, "loss": 1.5908, "step": 5440 }, { "epoch": 0.6321231484170782, "grad_norm": 0.4503820240497589, "learning_rate": 0.0001, "loss": 1.7102, "step": 5441 }, { "epoch": 0.6322393261690387, "grad_norm": 0.4837632179260254, "learning_rate": 0.0001, "loss": 1.7753, "step": 5442 }, { "epoch": 0.6323555039209992, "grad_norm": 0.4605303108692169, "learning_rate": 0.0001, "loss": 1.6316, "step": 5443 }, { "epoch": 0.6324716816729596, "grad_norm": 0.4571518003940582, "learning_rate": 0.0001, "loss": 1.6705, "step": 5444 }, { "epoch": 0.6325878594249201, "grad_norm": 0.4223042130470276, "learning_rate": 0.0001, "loss": 1.4927, "step": 5445 }, { "epoch": 0.6327040371768806, "grad_norm": 0.4364268183708191, "learning_rate": 0.0001, "loss": 1.6427, "step": 5446 }, { "epoch": 0.6328202149288411, "grad_norm": 0.43388858437538147, "learning_rate": 0.0001, "loss": 1.6069, "step": 5447 }, { "epoch": 0.6329363926808016, "grad_norm": 0.4458199143409729, "learning_rate": 0.0001, "loss": 1.7514, "step": 5448 }, { "epoch": 0.6330525704327621, "grad_norm": 0.43075671792030334, "learning_rate": 0.0001, "loss": 1.5369, "step": 5449 }, { "epoch": 0.6331687481847226, "grad_norm": 0.42168936133384705, "learning_rate": 0.0001, "loss": 1.4663, "step": 5450 }, { "epoch": 0.6332849259366832, "grad_norm": 0.41019555926322937, "learning_rate": 0.0001, "loss": 1.5724, "step": 5451 }, { "epoch": 0.6334011036886437, "grad_norm": 0.4191059470176697, "learning_rate": 0.0001, "loss": 1.5034, "step": 5452 }, { "epoch": 0.6335172814406042, "grad_norm": 0.4607682526111603, "learning_rate": 0.0001, "loss": 1.6468, "step": 5453 }, { "epoch": 0.6336334591925646, "grad_norm": 0.4602360725402832, "learning_rate": 0.0001, "loss": 1.7269, "step": 5454 }, { "epoch": 0.6337496369445251, "grad_norm": 0.4206385910511017, "learning_rate": 0.0001, "loss": 1.5707, "step": 5455 }, { "epoch": 0.6338658146964856, "grad_norm": 0.427676796913147, "learning_rate": 0.0001, "loss": 1.6279, "step": 5456 }, { "epoch": 0.6339819924484461, "grad_norm": 0.47075173258781433, "learning_rate": 0.0001, "loss": 1.7829, "step": 5457 }, { "epoch": 0.6340981702004066, "grad_norm": 0.45573490858078003, "learning_rate": 0.0001, "loss": 1.6889, "step": 5458 }, { "epoch": 0.6342143479523671, "grad_norm": 0.43033167719841003, "learning_rate": 0.0001, "loss": 1.8298, "step": 5459 }, { "epoch": 0.6343305257043276, "grad_norm": 0.429253488779068, "learning_rate": 0.0001, "loss": 1.5989, "step": 5460 }, { "epoch": 0.6344467034562882, "grad_norm": 0.4369482696056366, "learning_rate": 0.0001, "loss": 1.6974, "step": 5461 }, { "epoch": 0.6345628812082487, "grad_norm": 0.4227111041545868, "learning_rate": 0.0001, "loss": 1.6638, "step": 5462 }, { "epoch": 0.6346790589602092, "grad_norm": 0.42353329062461853, "learning_rate": 0.0001, "loss": 1.6536, "step": 5463 }, { "epoch": 0.6347952367121696, "grad_norm": 0.44350820779800415, "learning_rate": 0.0001, "loss": 1.6899, "step": 5464 }, { "epoch": 0.6349114144641301, "grad_norm": 0.42905986309051514, "learning_rate": 0.0001, "loss": 1.633, "step": 5465 }, { "epoch": 0.6350275922160906, "grad_norm": 0.4360484480857849, "learning_rate": 0.0001, "loss": 1.6686, "step": 5466 }, { "epoch": 0.6351437699680511, "grad_norm": 0.4489285349845886, "learning_rate": 0.0001, "loss": 1.7598, "step": 5467 }, { "epoch": 0.6352599477200116, "grad_norm": 0.43288835883140564, "learning_rate": 0.0001, "loss": 1.6209, "step": 5468 }, { "epoch": 0.6353761254719721, "grad_norm": 0.4269554316997528, "learning_rate": 0.0001, "loss": 1.514, "step": 5469 }, { "epoch": 0.6354923032239326, "grad_norm": 0.43894824385643005, "learning_rate": 0.0001, "loss": 1.6423, "step": 5470 }, { "epoch": 0.6356084809758931, "grad_norm": 0.42554864287376404, "learning_rate": 0.0001, "loss": 1.382, "step": 5471 }, { "epoch": 0.6357246587278537, "grad_norm": 0.43490105867385864, "learning_rate": 0.0001, "loss": 1.5542, "step": 5472 }, { "epoch": 0.6358408364798142, "grad_norm": 0.41975313425064087, "learning_rate": 0.0001, "loss": 1.5971, "step": 5473 }, { "epoch": 0.6359570142317746, "grad_norm": 0.43810316920280457, "learning_rate": 0.0001, "loss": 1.5934, "step": 5474 }, { "epoch": 0.6360731919837351, "grad_norm": 0.4118810296058655, "learning_rate": 0.0001, "loss": 1.5732, "step": 5475 }, { "epoch": 0.6361893697356956, "grad_norm": 0.4668201804161072, "learning_rate": 0.0001, "loss": 1.7682, "step": 5476 }, { "epoch": 0.6363055474876561, "grad_norm": 0.43905314803123474, "learning_rate": 0.0001, "loss": 1.6805, "step": 5477 }, { "epoch": 0.6364217252396166, "grad_norm": 0.43517014384269714, "learning_rate": 0.0001, "loss": 1.6264, "step": 5478 }, { "epoch": 0.6365379029915771, "grad_norm": 0.4257369339466095, "learning_rate": 0.0001, "loss": 1.6548, "step": 5479 }, { "epoch": 0.6366540807435376, "grad_norm": 0.4415474534034729, "learning_rate": 0.0001, "loss": 1.5927, "step": 5480 }, { "epoch": 0.6367702584954981, "grad_norm": 0.48000675439834595, "learning_rate": 0.0001, "loss": 1.8075, "step": 5481 }, { "epoch": 0.6368864362474587, "grad_norm": 0.433585524559021, "learning_rate": 0.0001, "loss": 1.6295, "step": 5482 }, { "epoch": 0.6370026139994192, "grad_norm": 0.46495142579078674, "learning_rate": 0.0001, "loss": 1.5808, "step": 5483 }, { "epoch": 0.6371187917513796, "grad_norm": 0.4620204567909241, "learning_rate": 0.0001, "loss": 1.7422, "step": 5484 }, { "epoch": 0.6372349695033401, "grad_norm": 0.457280695438385, "learning_rate": 0.0001, "loss": 1.5088, "step": 5485 }, { "epoch": 0.6373511472553006, "grad_norm": 0.40286630392074585, "learning_rate": 0.0001, "loss": 1.5871, "step": 5486 }, { "epoch": 0.6374673250072611, "grad_norm": 0.41824090480804443, "learning_rate": 0.0001, "loss": 1.5161, "step": 5487 }, { "epoch": 0.6375835027592216, "grad_norm": 0.4374508261680603, "learning_rate": 0.0001, "loss": 1.4786, "step": 5488 }, { "epoch": 0.6376996805111821, "grad_norm": 0.44496670365333557, "learning_rate": 0.0001, "loss": 1.6769, "step": 5489 }, { "epoch": 0.6378158582631426, "grad_norm": 0.49528950452804565, "learning_rate": 0.0001, "loss": 1.9251, "step": 5490 }, { "epoch": 0.6379320360151031, "grad_norm": 0.4341500401496887, "learning_rate": 0.0001, "loss": 1.6974, "step": 5491 }, { "epoch": 0.6380482137670636, "grad_norm": 0.4738743305206299, "learning_rate": 0.0001, "loss": 1.8318, "step": 5492 }, { "epoch": 0.6381643915190242, "grad_norm": 0.4284060001373291, "learning_rate": 0.0001, "loss": 1.6151, "step": 5493 }, { "epoch": 0.6382805692709846, "grad_norm": 0.4526841938495636, "learning_rate": 0.0001, "loss": 1.7989, "step": 5494 }, { "epoch": 0.6383967470229451, "grad_norm": 0.47774431109428406, "learning_rate": 0.0001, "loss": 1.8372, "step": 5495 }, { "epoch": 0.6385129247749056, "grad_norm": 0.41493383049964905, "learning_rate": 0.0001, "loss": 1.5741, "step": 5496 }, { "epoch": 0.6386291025268661, "grad_norm": 0.3971651494503021, "learning_rate": 0.0001, "loss": 1.3297, "step": 5497 }, { "epoch": 0.6387452802788266, "grad_norm": 0.4250967502593994, "learning_rate": 0.0001, "loss": 1.5869, "step": 5498 }, { "epoch": 0.6388614580307871, "grad_norm": 0.4181077778339386, "learning_rate": 0.0001, "loss": 1.5321, "step": 5499 }, { "epoch": 0.6389776357827476, "grad_norm": 0.45691534876823425, "learning_rate": 0.0001, "loss": 1.6779, "step": 5500 }, { "epoch": 0.6390938135347081, "grad_norm": 0.43789535760879517, "learning_rate": 0.0001, "loss": 1.7341, "step": 5501 }, { "epoch": 0.6392099912866686, "grad_norm": 0.42184701561927795, "learning_rate": 0.0001, "loss": 1.5499, "step": 5502 }, { "epoch": 0.6393261690386292, "grad_norm": 0.4412265717983246, "learning_rate": 0.0001, "loss": 1.5301, "step": 5503 }, { "epoch": 0.6394423467905896, "grad_norm": 0.4428151845932007, "learning_rate": 0.0001, "loss": 1.6518, "step": 5504 }, { "epoch": 0.6395585245425501, "grad_norm": 0.4467543959617615, "learning_rate": 0.0001, "loss": 1.5952, "step": 5505 }, { "epoch": 0.6396747022945106, "grad_norm": 0.40767189860343933, "learning_rate": 0.0001, "loss": 1.5017, "step": 5506 }, { "epoch": 0.6397908800464711, "grad_norm": 0.4650069773197174, "learning_rate": 0.0001, "loss": 1.6277, "step": 5507 }, { "epoch": 0.6399070577984316, "grad_norm": 0.4141228199005127, "learning_rate": 0.0001, "loss": 1.5472, "step": 5508 }, { "epoch": 0.6400232355503921, "grad_norm": 0.4306548833847046, "learning_rate": 0.0001, "loss": 1.652, "step": 5509 }, { "epoch": 0.6401394133023526, "grad_norm": 0.45290401577949524, "learning_rate": 0.0001, "loss": 1.6879, "step": 5510 }, { "epoch": 0.6402555910543131, "grad_norm": 0.43989840149879456, "learning_rate": 0.0001, "loss": 1.5273, "step": 5511 }, { "epoch": 0.6403717688062736, "grad_norm": 0.46134138107299805, "learning_rate": 0.0001, "loss": 1.6924, "step": 5512 }, { "epoch": 0.640487946558234, "grad_norm": 0.4793362617492676, "learning_rate": 0.0001, "loss": 1.7376, "step": 5513 }, { "epoch": 0.6406041243101946, "grad_norm": 0.4687068462371826, "learning_rate": 0.0001, "loss": 1.3945, "step": 5514 }, { "epoch": 0.6407203020621551, "grad_norm": 0.4695538878440857, "learning_rate": 0.0001, "loss": 1.7152, "step": 5515 }, { "epoch": 0.6408364798141156, "grad_norm": 0.4554864168167114, "learning_rate": 0.0001, "loss": 1.6469, "step": 5516 }, { "epoch": 0.6409526575660761, "grad_norm": 0.45408567786216736, "learning_rate": 0.0001, "loss": 1.6828, "step": 5517 }, { "epoch": 0.6410688353180366, "grad_norm": 0.4672776162624359, "learning_rate": 0.0001, "loss": 1.7557, "step": 5518 }, { "epoch": 0.6411850130699971, "grad_norm": 0.43636855483055115, "learning_rate": 0.0001, "loss": 1.6705, "step": 5519 }, { "epoch": 0.6413011908219576, "grad_norm": 0.4704853892326355, "learning_rate": 0.0001, "loss": 1.6469, "step": 5520 }, { "epoch": 0.6414173685739181, "grad_norm": 0.45459243655204773, "learning_rate": 0.0001, "loss": 1.5321, "step": 5521 }, { "epoch": 0.6415335463258786, "grad_norm": 0.4612571895122528, "learning_rate": 0.0001, "loss": 1.6599, "step": 5522 }, { "epoch": 0.641649724077839, "grad_norm": 0.44641435146331787, "learning_rate": 0.0001, "loss": 1.5666, "step": 5523 }, { "epoch": 0.6417659018297996, "grad_norm": 0.4449585974216461, "learning_rate": 0.0001, "loss": 1.5589, "step": 5524 }, { "epoch": 0.6418820795817601, "grad_norm": 0.49576014280319214, "learning_rate": 0.0001, "loss": 1.6711, "step": 5525 }, { "epoch": 0.6419982573337206, "grad_norm": 0.4535317122936249, "learning_rate": 0.0001, "loss": 1.6709, "step": 5526 }, { "epoch": 0.6421144350856811, "grad_norm": 0.43689054250717163, "learning_rate": 0.0001, "loss": 1.5724, "step": 5527 }, { "epoch": 0.6422306128376416, "grad_norm": 0.4465075135231018, "learning_rate": 0.0001, "loss": 1.5477, "step": 5528 }, { "epoch": 0.6423467905896021, "grad_norm": 0.4437648355960846, "learning_rate": 0.0001, "loss": 1.6005, "step": 5529 }, { "epoch": 0.6424629683415626, "grad_norm": 0.43459346890449524, "learning_rate": 0.0001, "loss": 1.5339, "step": 5530 }, { "epoch": 0.6425791460935231, "grad_norm": 0.46662724018096924, "learning_rate": 0.0001, "loss": 1.6315, "step": 5531 }, { "epoch": 0.6426953238454836, "grad_norm": 0.4122602939605713, "learning_rate": 0.0001, "loss": 1.5359, "step": 5532 }, { "epoch": 0.642811501597444, "grad_norm": 0.4282759726047516, "learning_rate": 0.0001, "loss": 1.5484, "step": 5533 }, { "epoch": 0.6429276793494045, "grad_norm": 0.4355936348438263, "learning_rate": 0.0001, "loss": 1.6888, "step": 5534 }, { "epoch": 0.6430438571013651, "grad_norm": 0.4413192570209503, "learning_rate": 0.0001, "loss": 1.5418, "step": 5535 }, { "epoch": 0.6431600348533256, "grad_norm": 0.4456358253955841, "learning_rate": 0.0001, "loss": 1.5277, "step": 5536 }, { "epoch": 0.6432762126052861, "grad_norm": 0.4487074017524719, "learning_rate": 0.0001, "loss": 1.6349, "step": 5537 }, { "epoch": 0.6433923903572466, "grad_norm": 0.4546639323234558, "learning_rate": 0.0001, "loss": 1.7977, "step": 5538 }, { "epoch": 0.6435085681092071, "grad_norm": 0.47125598788261414, "learning_rate": 0.0001, "loss": 1.6502, "step": 5539 }, { "epoch": 0.6436247458611676, "grad_norm": 0.4681374132633209, "learning_rate": 0.0001, "loss": 1.7863, "step": 5540 }, { "epoch": 0.6437409236131281, "grad_norm": 0.41679567098617554, "learning_rate": 0.0001, "loss": 1.6445, "step": 5541 }, { "epoch": 0.6438571013650886, "grad_norm": 0.4643932282924652, "learning_rate": 0.0001, "loss": 1.4515, "step": 5542 }, { "epoch": 0.643973279117049, "grad_norm": 0.42899298667907715, "learning_rate": 0.0001, "loss": 1.5497, "step": 5543 }, { "epoch": 0.6440894568690095, "grad_norm": 0.44600605964660645, "learning_rate": 0.0001, "loss": 1.5991, "step": 5544 }, { "epoch": 0.6442056346209701, "grad_norm": 0.4713478088378906, "learning_rate": 0.0001, "loss": 1.6812, "step": 5545 }, { "epoch": 0.6443218123729306, "grad_norm": 0.48742958903312683, "learning_rate": 0.0001, "loss": 1.5977, "step": 5546 }, { "epoch": 0.6444379901248911, "grad_norm": 0.4531098008155823, "learning_rate": 0.0001, "loss": 1.7098, "step": 5547 }, { "epoch": 0.6445541678768516, "grad_norm": 0.4089454710483551, "learning_rate": 0.0001, "loss": 1.4264, "step": 5548 }, { "epoch": 0.6446703456288121, "grad_norm": 0.4439248740673065, "learning_rate": 0.0001, "loss": 1.5493, "step": 5549 }, { "epoch": 0.6447865233807726, "grad_norm": 0.4419444799423218, "learning_rate": 0.0001, "loss": 1.58, "step": 5550 }, { "epoch": 0.6449027011327331, "grad_norm": 0.43608152866363525, "learning_rate": 0.0001, "loss": 1.547, "step": 5551 }, { "epoch": 0.6450188788846936, "grad_norm": 0.4767807722091675, "learning_rate": 0.0001, "loss": 1.6492, "step": 5552 }, { "epoch": 0.645135056636654, "grad_norm": 0.4356260597705841, "learning_rate": 0.0001, "loss": 1.6679, "step": 5553 }, { "epoch": 0.6452512343886145, "grad_norm": 0.42101210355758667, "learning_rate": 0.0001, "loss": 1.5609, "step": 5554 }, { "epoch": 0.645367412140575, "grad_norm": 0.4493582248687744, "learning_rate": 0.0001, "loss": 1.3819, "step": 5555 }, { "epoch": 0.6454835898925356, "grad_norm": 0.4049502909183502, "learning_rate": 0.0001, "loss": 1.411, "step": 5556 }, { "epoch": 0.6455997676444961, "grad_norm": 0.42648744583129883, "learning_rate": 0.0001, "loss": 1.6125, "step": 5557 }, { "epoch": 0.6457159453964566, "grad_norm": 0.45338109135627747, "learning_rate": 0.0001, "loss": 1.7413, "step": 5558 }, { "epoch": 0.6458321231484171, "grad_norm": 0.4680498540401459, "learning_rate": 0.0001, "loss": 1.7397, "step": 5559 }, { "epoch": 0.6459483009003776, "grad_norm": 0.48937198519706726, "learning_rate": 0.0001, "loss": 1.7993, "step": 5560 }, { "epoch": 0.6460644786523381, "grad_norm": 0.41471385955810547, "learning_rate": 0.0001, "loss": 1.5596, "step": 5561 }, { "epoch": 0.6461806564042986, "grad_norm": 0.4447079002857208, "learning_rate": 0.0001, "loss": 1.6486, "step": 5562 }, { "epoch": 0.646296834156259, "grad_norm": 0.42607253789901733, "learning_rate": 0.0001, "loss": 1.6055, "step": 5563 }, { "epoch": 0.6464130119082195, "grad_norm": 0.4518924653530121, "learning_rate": 0.0001, "loss": 1.7441, "step": 5564 }, { "epoch": 0.64652918966018, "grad_norm": 0.4289340376853943, "learning_rate": 0.0001, "loss": 1.7057, "step": 5565 }, { "epoch": 0.6466453674121406, "grad_norm": 0.45490723848342896, "learning_rate": 0.0001, "loss": 1.6834, "step": 5566 }, { "epoch": 0.6467615451641011, "grad_norm": 0.4541492462158203, "learning_rate": 0.0001, "loss": 1.5296, "step": 5567 }, { "epoch": 0.6468777229160616, "grad_norm": 0.42619723081588745, "learning_rate": 0.0001, "loss": 1.556, "step": 5568 }, { "epoch": 0.6469939006680221, "grad_norm": 0.43783965706825256, "learning_rate": 0.0001, "loss": 1.7282, "step": 5569 }, { "epoch": 0.6471100784199826, "grad_norm": 0.4257659912109375, "learning_rate": 0.0001, "loss": 1.6778, "step": 5570 }, { "epoch": 0.6472262561719431, "grad_norm": 0.4469713270664215, "learning_rate": 0.0001, "loss": 1.6628, "step": 5571 }, { "epoch": 0.6473424339239036, "grad_norm": 0.4462829530239105, "learning_rate": 0.0001, "loss": 1.5517, "step": 5572 }, { "epoch": 0.647458611675864, "grad_norm": 0.4520929455757141, "learning_rate": 0.0001, "loss": 1.647, "step": 5573 }, { "epoch": 0.6475747894278245, "grad_norm": 0.47630277276039124, "learning_rate": 0.0001, "loss": 1.6382, "step": 5574 }, { "epoch": 0.647690967179785, "grad_norm": 0.435032457113266, "learning_rate": 0.0001, "loss": 1.4792, "step": 5575 }, { "epoch": 0.6478071449317455, "grad_norm": 0.46478384733200073, "learning_rate": 0.0001, "loss": 1.6891, "step": 5576 }, { "epoch": 0.6479233226837061, "grad_norm": 0.4479098618030548, "learning_rate": 0.0001, "loss": 1.6942, "step": 5577 }, { "epoch": 0.6480395004356666, "grad_norm": 0.4304809868335724, "learning_rate": 0.0001, "loss": 1.5376, "step": 5578 }, { "epoch": 0.6481556781876271, "grad_norm": 0.43313372135162354, "learning_rate": 0.0001, "loss": 1.6466, "step": 5579 }, { "epoch": 0.6482718559395876, "grad_norm": 0.4252367615699768, "learning_rate": 0.0001, "loss": 1.6563, "step": 5580 }, { "epoch": 0.6483880336915481, "grad_norm": 0.42418304085731506, "learning_rate": 0.0001, "loss": 1.6122, "step": 5581 }, { "epoch": 0.6485042114435086, "grad_norm": 0.45233771204948425, "learning_rate": 0.0001, "loss": 1.779, "step": 5582 }, { "epoch": 0.648620389195469, "grad_norm": 0.43534642457962036, "learning_rate": 0.0001, "loss": 1.5435, "step": 5583 }, { "epoch": 0.6487365669474295, "grad_norm": 0.40678098797798157, "learning_rate": 0.0001, "loss": 1.5975, "step": 5584 }, { "epoch": 0.64885274469939, "grad_norm": 0.4426053762435913, "learning_rate": 0.0001, "loss": 1.599, "step": 5585 }, { "epoch": 0.6489689224513505, "grad_norm": 0.43607258796691895, "learning_rate": 0.0001, "loss": 1.6646, "step": 5586 }, { "epoch": 0.6490851002033111, "grad_norm": 0.43579816818237305, "learning_rate": 0.0001, "loss": 1.4639, "step": 5587 }, { "epoch": 0.6492012779552716, "grad_norm": 0.43486765027046204, "learning_rate": 0.0001, "loss": 1.636, "step": 5588 }, { "epoch": 0.6493174557072321, "grad_norm": 0.42424148321151733, "learning_rate": 0.0001, "loss": 1.5596, "step": 5589 }, { "epoch": 0.6494336334591926, "grad_norm": 0.42990225553512573, "learning_rate": 0.0001, "loss": 1.6196, "step": 5590 }, { "epoch": 0.6495498112111531, "grad_norm": 0.43480220437049866, "learning_rate": 0.0001, "loss": 1.7013, "step": 5591 }, { "epoch": 0.6496659889631136, "grad_norm": 0.4241524040699005, "learning_rate": 0.0001, "loss": 1.6738, "step": 5592 }, { "epoch": 0.649782166715074, "grad_norm": 0.4739820957183838, "learning_rate": 0.0001, "loss": 1.7373, "step": 5593 }, { "epoch": 0.6498983444670345, "grad_norm": 0.44619935750961304, "learning_rate": 0.0001, "loss": 1.5907, "step": 5594 }, { "epoch": 0.650014522218995, "grad_norm": 0.444965660572052, "learning_rate": 0.0001, "loss": 1.721, "step": 5595 }, { "epoch": 0.6501306999709555, "grad_norm": 0.4169558882713318, "learning_rate": 0.0001, "loss": 1.4508, "step": 5596 }, { "epoch": 0.6502468777229161, "grad_norm": 0.44122716784477234, "learning_rate": 0.0001, "loss": 1.612, "step": 5597 }, { "epoch": 0.6503630554748766, "grad_norm": 0.4570583403110504, "learning_rate": 0.0001, "loss": 1.7827, "step": 5598 }, { "epoch": 0.6504792332268371, "grad_norm": 0.4605511426925659, "learning_rate": 0.0001, "loss": 1.6557, "step": 5599 }, { "epoch": 0.6505954109787976, "grad_norm": 0.44260162115097046, "learning_rate": 0.0001, "loss": 1.6296, "step": 5600 }, { "epoch": 0.6507115887307581, "grad_norm": 0.4196053445339203, "learning_rate": 0.0001, "loss": 1.4482, "step": 5601 }, { "epoch": 0.6508277664827186, "grad_norm": 0.44127750396728516, "learning_rate": 0.0001, "loss": 1.6281, "step": 5602 }, { "epoch": 0.650943944234679, "grad_norm": 0.3989742398262024, "learning_rate": 0.0001, "loss": 1.4924, "step": 5603 }, { "epoch": 0.6510601219866395, "grad_norm": 0.4253181517124176, "learning_rate": 0.0001, "loss": 1.533, "step": 5604 }, { "epoch": 0.6511762997386, "grad_norm": 0.43197429180145264, "learning_rate": 0.0001, "loss": 1.5778, "step": 5605 }, { "epoch": 0.6512924774905605, "grad_norm": 0.41109520196914673, "learning_rate": 0.0001, "loss": 1.5876, "step": 5606 }, { "epoch": 0.651408655242521, "grad_norm": 0.41764628887176514, "learning_rate": 0.0001, "loss": 1.5224, "step": 5607 }, { "epoch": 0.6515248329944816, "grad_norm": 0.4168471395969391, "learning_rate": 0.0001, "loss": 1.6006, "step": 5608 }, { "epoch": 0.6516410107464421, "grad_norm": 0.47764724493026733, "learning_rate": 0.0001, "loss": 1.6774, "step": 5609 }, { "epoch": 0.6517571884984026, "grad_norm": 0.432256281375885, "learning_rate": 0.0001, "loss": 1.6464, "step": 5610 }, { "epoch": 0.6518733662503631, "grad_norm": 0.4086950421333313, "learning_rate": 0.0001, "loss": 1.3479, "step": 5611 }, { "epoch": 0.6519895440023236, "grad_norm": 0.4752063453197479, "learning_rate": 0.0001, "loss": 1.6622, "step": 5612 }, { "epoch": 0.652105721754284, "grad_norm": 0.41802459955215454, "learning_rate": 0.0001, "loss": 1.5654, "step": 5613 }, { "epoch": 0.6522218995062445, "grad_norm": 0.43375664949417114, "learning_rate": 0.0001, "loss": 1.4565, "step": 5614 }, { "epoch": 0.652338077258205, "grad_norm": 0.4103739559650421, "learning_rate": 0.0001, "loss": 1.3726, "step": 5615 }, { "epoch": 0.6524542550101655, "grad_norm": 0.4138474464416504, "learning_rate": 0.0001, "loss": 1.5999, "step": 5616 }, { "epoch": 0.652570432762126, "grad_norm": 0.4464971423149109, "learning_rate": 0.0001, "loss": 1.6249, "step": 5617 }, { "epoch": 0.6526866105140866, "grad_norm": 0.4133894741535187, "learning_rate": 0.0001, "loss": 1.5194, "step": 5618 }, { "epoch": 0.6528027882660471, "grad_norm": 0.4327857792377472, "learning_rate": 0.0001, "loss": 1.6527, "step": 5619 }, { "epoch": 0.6529189660180076, "grad_norm": 0.4483093321323395, "learning_rate": 0.0001, "loss": 1.6088, "step": 5620 }, { "epoch": 0.6530351437699681, "grad_norm": 0.43492817878723145, "learning_rate": 0.0001, "loss": 1.6154, "step": 5621 }, { "epoch": 0.6531513215219286, "grad_norm": 0.4094054400920868, "learning_rate": 0.0001, "loss": 1.4924, "step": 5622 }, { "epoch": 0.653267499273889, "grad_norm": 0.4260571599006653, "learning_rate": 0.0001, "loss": 1.5632, "step": 5623 }, { "epoch": 0.6533836770258495, "grad_norm": 0.46979352831840515, "learning_rate": 0.0001, "loss": 1.7291, "step": 5624 }, { "epoch": 0.65349985477781, "grad_norm": 0.4641943871974945, "learning_rate": 0.0001, "loss": 1.5805, "step": 5625 }, { "epoch": 0.6536160325297705, "grad_norm": 0.4234953224658966, "learning_rate": 0.0001, "loss": 1.4507, "step": 5626 }, { "epoch": 0.653732210281731, "grad_norm": 0.4371975064277649, "learning_rate": 0.0001, "loss": 1.5322, "step": 5627 }, { "epoch": 0.6538483880336915, "grad_norm": 0.46449965238571167, "learning_rate": 0.0001, "loss": 1.6554, "step": 5628 }, { "epoch": 0.6539645657856521, "grad_norm": 0.4338880479335785, "learning_rate": 0.0001, "loss": 1.613, "step": 5629 }, { "epoch": 0.6540807435376126, "grad_norm": 0.4431876540184021, "learning_rate": 0.0001, "loss": 1.628, "step": 5630 }, { "epoch": 0.6541969212895731, "grad_norm": 0.4568195641040802, "learning_rate": 0.0001, "loss": 1.7121, "step": 5631 }, { "epoch": 0.6543130990415336, "grad_norm": 0.42718222737312317, "learning_rate": 0.0001, "loss": 1.6245, "step": 5632 }, { "epoch": 0.654429276793494, "grad_norm": 0.4288146197795868, "learning_rate": 0.0001, "loss": 1.4417, "step": 5633 }, { "epoch": 0.6545454545454545, "grad_norm": 0.4955886900424957, "learning_rate": 0.0001, "loss": 1.8059, "step": 5634 }, { "epoch": 0.654661632297415, "grad_norm": 0.46111878752708435, "learning_rate": 0.0001, "loss": 1.6836, "step": 5635 }, { "epoch": 0.6547778100493755, "grad_norm": 0.46014752984046936, "learning_rate": 0.0001, "loss": 1.6274, "step": 5636 }, { "epoch": 0.654893987801336, "grad_norm": 0.4325878322124481, "learning_rate": 0.0001, "loss": 1.4557, "step": 5637 }, { "epoch": 0.6550101655532965, "grad_norm": 0.4380306899547577, "learning_rate": 0.0001, "loss": 1.5515, "step": 5638 }, { "epoch": 0.6551263433052571, "grad_norm": 0.37767666578292847, "learning_rate": 0.0001, "loss": 1.1668, "step": 5639 }, { "epoch": 0.6552425210572176, "grad_norm": 0.4658316969871521, "learning_rate": 0.0001, "loss": 1.6848, "step": 5640 }, { "epoch": 0.6553586988091781, "grad_norm": 0.4257132411003113, "learning_rate": 0.0001, "loss": 1.5541, "step": 5641 }, { "epoch": 0.6554748765611386, "grad_norm": 0.43129682540893555, "learning_rate": 0.0001, "loss": 1.5798, "step": 5642 }, { "epoch": 0.655591054313099, "grad_norm": 0.4526180326938629, "learning_rate": 0.0001, "loss": 1.7933, "step": 5643 }, { "epoch": 0.6557072320650595, "grad_norm": 0.5669481754302979, "learning_rate": 0.0001, "loss": 1.6737, "step": 5644 }, { "epoch": 0.65582340981702, "grad_norm": 0.4606925845146179, "learning_rate": 0.0001, "loss": 1.6747, "step": 5645 }, { "epoch": 0.6559395875689805, "grad_norm": 0.4286077916622162, "learning_rate": 0.0001, "loss": 1.5805, "step": 5646 }, { "epoch": 0.656055765320941, "grad_norm": 0.422355979681015, "learning_rate": 0.0001, "loss": 1.6196, "step": 5647 }, { "epoch": 0.6561719430729015, "grad_norm": 0.4737590551376343, "learning_rate": 0.0001, "loss": 1.7451, "step": 5648 }, { "epoch": 0.656288120824862, "grad_norm": 0.47408369183540344, "learning_rate": 0.0001, "loss": 1.5371, "step": 5649 }, { "epoch": 0.6564042985768226, "grad_norm": 0.4657224416732788, "learning_rate": 0.0001, "loss": 1.714, "step": 5650 }, { "epoch": 0.6565204763287831, "grad_norm": 0.4612825810909271, "learning_rate": 0.0001, "loss": 1.708, "step": 5651 }, { "epoch": 0.6566366540807436, "grad_norm": 0.4196070730686188, "learning_rate": 0.0001, "loss": 1.6494, "step": 5652 }, { "epoch": 0.656752831832704, "grad_norm": 0.43713754415512085, "learning_rate": 0.0001, "loss": 1.6664, "step": 5653 }, { "epoch": 0.6568690095846645, "grad_norm": 0.4284767508506775, "learning_rate": 0.0001, "loss": 1.6073, "step": 5654 }, { "epoch": 0.656985187336625, "grad_norm": 0.4555676281452179, "learning_rate": 0.0001, "loss": 1.7236, "step": 5655 }, { "epoch": 0.6571013650885855, "grad_norm": 0.43046674132347107, "learning_rate": 0.0001, "loss": 1.4605, "step": 5656 }, { "epoch": 0.657217542840546, "grad_norm": 0.4417819678783417, "learning_rate": 0.0001, "loss": 1.5334, "step": 5657 }, { "epoch": 0.6573337205925065, "grad_norm": 0.4391897916793823, "learning_rate": 0.0001, "loss": 1.5996, "step": 5658 }, { "epoch": 0.657449898344467, "grad_norm": 0.4507284164428711, "learning_rate": 0.0001, "loss": 1.6983, "step": 5659 }, { "epoch": 0.6575660760964276, "grad_norm": 0.43838751316070557, "learning_rate": 0.0001, "loss": 1.6327, "step": 5660 }, { "epoch": 0.6576822538483881, "grad_norm": 0.4852067232131958, "learning_rate": 0.0001, "loss": 1.7812, "step": 5661 }, { "epoch": 0.6577984316003486, "grad_norm": 0.416293740272522, "learning_rate": 0.0001, "loss": 1.5342, "step": 5662 }, { "epoch": 0.657914609352309, "grad_norm": 0.45646652579307556, "learning_rate": 0.0001, "loss": 1.5847, "step": 5663 }, { "epoch": 0.6580307871042695, "grad_norm": 0.43007123470306396, "learning_rate": 0.0001, "loss": 1.5599, "step": 5664 }, { "epoch": 0.65814696485623, "grad_norm": 0.4502851963043213, "learning_rate": 0.0001, "loss": 1.5684, "step": 5665 }, { "epoch": 0.6582631426081905, "grad_norm": 0.44901666045188904, "learning_rate": 0.0001, "loss": 1.6163, "step": 5666 }, { "epoch": 0.658379320360151, "grad_norm": 0.4369884431362152, "learning_rate": 0.0001, "loss": 1.5916, "step": 5667 }, { "epoch": 0.6584954981121115, "grad_norm": 0.4983995854854584, "learning_rate": 0.0001, "loss": 1.6471, "step": 5668 }, { "epoch": 0.658611675864072, "grad_norm": 0.5106979608535767, "learning_rate": 0.0001, "loss": 1.7756, "step": 5669 }, { "epoch": 0.6587278536160325, "grad_norm": 0.4607015550136566, "learning_rate": 0.0001, "loss": 1.6902, "step": 5670 }, { "epoch": 0.6588440313679931, "grad_norm": 0.44511786103248596, "learning_rate": 0.0001, "loss": 1.5567, "step": 5671 }, { "epoch": 0.6589602091199536, "grad_norm": 0.44376087188720703, "learning_rate": 0.0001, "loss": 1.5643, "step": 5672 }, { "epoch": 0.659076386871914, "grad_norm": 0.463748574256897, "learning_rate": 0.0001, "loss": 1.6231, "step": 5673 }, { "epoch": 0.6591925646238745, "grad_norm": 0.4355449676513672, "learning_rate": 0.0001, "loss": 1.6533, "step": 5674 }, { "epoch": 0.659308742375835, "grad_norm": 0.4218558967113495, "learning_rate": 0.0001, "loss": 1.3842, "step": 5675 }, { "epoch": 0.6594249201277955, "grad_norm": 0.4563169479370117, "learning_rate": 0.0001, "loss": 1.7053, "step": 5676 }, { "epoch": 0.659541097879756, "grad_norm": 0.4865387976169586, "learning_rate": 0.0001, "loss": 1.975, "step": 5677 }, { "epoch": 0.6596572756317165, "grad_norm": 0.4624812602996826, "learning_rate": 0.0001, "loss": 1.7371, "step": 5678 }, { "epoch": 0.659773453383677, "grad_norm": 0.4570534825325012, "learning_rate": 0.0001, "loss": 1.7626, "step": 5679 }, { "epoch": 0.6598896311356375, "grad_norm": 0.42516836524009705, "learning_rate": 0.0001, "loss": 1.5708, "step": 5680 }, { "epoch": 0.6600058088875981, "grad_norm": 0.4255434572696686, "learning_rate": 0.0001, "loss": 1.6152, "step": 5681 }, { "epoch": 0.6601219866395586, "grad_norm": 0.4116609990596771, "learning_rate": 0.0001, "loss": 1.4613, "step": 5682 }, { "epoch": 0.6602381643915191, "grad_norm": 0.45455899834632874, "learning_rate": 0.0001, "loss": 1.7634, "step": 5683 }, { "epoch": 0.6603543421434795, "grad_norm": 0.4295278787612915, "learning_rate": 0.0001, "loss": 1.7607, "step": 5684 }, { "epoch": 0.66047051989544, "grad_norm": 0.43869319558143616, "learning_rate": 0.0001, "loss": 1.4694, "step": 5685 }, { "epoch": 0.6605866976474005, "grad_norm": 0.4462525546550751, "learning_rate": 0.0001, "loss": 1.7752, "step": 5686 }, { "epoch": 0.660702875399361, "grad_norm": 0.4246017038822174, "learning_rate": 0.0001, "loss": 1.6029, "step": 5687 }, { "epoch": 0.6608190531513215, "grad_norm": 0.43097880482673645, "learning_rate": 0.0001, "loss": 1.5193, "step": 5688 }, { "epoch": 0.660935230903282, "grad_norm": 0.45352938771247864, "learning_rate": 0.0001, "loss": 1.6469, "step": 5689 }, { "epoch": 0.6610514086552425, "grad_norm": 0.46061140298843384, "learning_rate": 0.0001, "loss": 1.5656, "step": 5690 }, { "epoch": 0.661167586407203, "grad_norm": 0.4334444999694824, "learning_rate": 0.0001, "loss": 1.6308, "step": 5691 }, { "epoch": 0.6612837641591636, "grad_norm": 0.43552273511886597, "learning_rate": 0.0001, "loss": 1.7532, "step": 5692 }, { "epoch": 0.6613999419111241, "grad_norm": 0.4349510669708252, "learning_rate": 0.0001, "loss": 1.6089, "step": 5693 }, { "epoch": 0.6615161196630845, "grad_norm": 0.45464491844177246, "learning_rate": 0.0001, "loss": 1.604, "step": 5694 }, { "epoch": 0.661632297415045, "grad_norm": 0.44270920753479004, "learning_rate": 0.0001, "loss": 1.5775, "step": 5695 }, { "epoch": 0.6617484751670055, "grad_norm": 0.4432513117790222, "learning_rate": 0.0001, "loss": 1.6634, "step": 5696 }, { "epoch": 0.661864652918966, "grad_norm": 0.4541747272014618, "learning_rate": 0.0001, "loss": 1.483, "step": 5697 }, { "epoch": 0.6619808306709265, "grad_norm": 0.43986377120018005, "learning_rate": 0.0001, "loss": 1.6965, "step": 5698 }, { "epoch": 0.662097008422887, "grad_norm": 0.463550329208374, "learning_rate": 0.0001, "loss": 1.6621, "step": 5699 }, { "epoch": 0.6622131861748475, "grad_norm": 0.42243123054504395, "learning_rate": 0.0001, "loss": 1.5027, "step": 5700 }, { "epoch": 0.662329363926808, "grad_norm": 0.4550102651119232, "learning_rate": 0.0001, "loss": 1.521, "step": 5701 }, { "epoch": 0.6624455416787686, "grad_norm": 0.48717159032821655, "learning_rate": 0.0001, "loss": 1.8157, "step": 5702 }, { "epoch": 0.6625617194307291, "grad_norm": 0.43235111236572266, "learning_rate": 0.0001, "loss": 1.6943, "step": 5703 }, { "epoch": 0.6626778971826895, "grad_norm": 0.39643558859825134, "learning_rate": 0.0001, "loss": 1.4418, "step": 5704 }, { "epoch": 0.66279407493465, "grad_norm": 0.445264607667923, "learning_rate": 0.0001, "loss": 1.5598, "step": 5705 }, { "epoch": 0.6629102526866105, "grad_norm": 0.45641717314720154, "learning_rate": 0.0001, "loss": 1.7218, "step": 5706 }, { "epoch": 0.663026430438571, "grad_norm": 0.4507564902305603, "learning_rate": 0.0001, "loss": 1.597, "step": 5707 }, { "epoch": 0.6631426081905315, "grad_norm": 0.4877043068408966, "learning_rate": 0.0001, "loss": 1.7286, "step": 5708 }, { "epoch": 0.663258785942492, "grad_norm": 0.42610713839530945, "learning_rate": 0.0001, "loss": 1.6014, "step": 5709 }, { "epoch": 0.6633749636944525, "grad_norm": 0.4246615767478943, "learning_rate": 0.0001, "loss": 1.6489, "step": 5710 }, { "epoch": 0.663491141446413, "grad_norm": 0.46146342158317566, "learning_rate": 0.0001, "loss": 1.7475, "step": 5711 }, { "epoch": 0.6636073191983735, "grad_norm": 0.45187118649482727, "learning_rate": 0.0001, "loss": 1.6847, "step": 5712 }, { "epoch": 0.6637234969503341, "grad_norm": 0.4408842921257019, "learning_rate": 0.0001, "loss": 1.5933, "step": 5713 }, { "epoch": 0.6638396747022945, "grad_norm": 0.4070923626422882, "learning_rate": 0.0001, "loss": 1.5068, "step": 5714 }, { "epoch": 0.663955852454255, "grad_norm": 0.4318831264972687, "learning_rate": 0.0001, "loss": 1.5662, "step": 5715 }, { "epoch": 0.6640720302062155, "grad_norm": 0.48511743545532227, "learning_rate": 0.0001, "loss": 1.7734, "step": 5716 }, { "epoch": 0.664188207958176, "grad_norm": 0.41092416644096375, "learning_rate": 0.0001, "loss": 1.4239, "step": 5717 }, { "epoch": 0.6643043857101365, "grad_norm": 0.4554840922355652, "learning_rate": 0.0001, "loss": 1.6977, "step": 5718 }, { "epoch": 0.664420563462097, "grad_norm": 0.42100241780281067, "learning_rate": 0.0001, "loss": 1.475, "step": 5719 }, { "epoch": 0.6645367412140575, "grad_norm": 0.421871542930603, "learning_rate": 0.0001, "loss": 1.4534, "step": 5720 }, { "epoch": 0.664652918966018, "grad_norm": 0.46472758054733276, "learning_rate": 0.0001, "loss": 1.6236, "step": 5721 }, { "epoch": 0.6647690967179785, "grad_norm": 0.4486168920993805, "learning_rate": 0.0001, "loss": 1.5682, "step": 5722 }, { "epoch": 0.6648852744699391, "grad_norm": 0.44002678990364075, "learning_rate": 0.0001, "loss": 1.6501, "step": 5723 }, { "epoch": 0.6650014522218995, "grad_norm": 0.427792489528656, "learning_rate": 0.0001, "loss": 1.527, "step": 5724 }, { "epoch": 0.66511762997386, "grad_norm": 0.44811704754829407, "learning_rate": 0.0001, "loss": 1.6518, "step": 5725 }, { "epoch": 0.6652338077258205, "grad_norm": 0.4508500099182129, "learning_rate": 0.0001, "loss": 1.7428, "step": 5726 }, { "epoch": 0.665349985477781, "grad_norm": 0.4322895407676697, "learning_rate": 0.0001, "loss": 1.4308, "step": 5727 }, { "epoch": 0.6654661632297415, "grad_norm": 0.4665115177631378, "learning_rate": 0.0001, "loss": 1.7279, "step": 5728 }, { "epoch": 0.665582340981702, "grad_norm": 0.45049217343330383, "learning_rate": 0.0001, "loss": 1.6839, "step": 5729 }, { "epoch": 0.6656985187336625, "grad_norm": 0.4667704701423645, "learning_rate": 0.0001, "loss": 1.553, "step": 5730 }, { "epoch": 0.665814696485623, "grad_norm": 0.43236711621284485, "learning_rate": 0.0001, "loss": 1.5529, "step": 5731 }, { "epoch": 0.6659308742375835, "grad_norm": 0.4469371438026428, "learning_rate": 0.0001, "loss": 1.5488, "step": 5732 }, { "epoch": 0.666047051989544, "grad_norm": 0.4933030307292938, "learning_rate": 0.0001, "loss": 1.7527, "step": 5733 }, { "epoch": 0.6661632297415045, "grad_norm": 0.466074138879776, "learning_rate": 0.0001, "loss": 1.658, "step": 5734 }, { "epoch": 0.666279407493465, "grad_norm": 0.418959379196167, "learning_rate": 0.0001, "loss": 1.6183, "step": 5735 }, { "epoch": 0.6663955852454255, "grad_norm": 0.4188354015350342, "learning_rate": 0.0001, "loss": 1.4785, "step": 5736 }, { "epoch": 0.666511762997386, "grad_norm": 0.46846193075180054, "learning_rate": 0.0001, "loss": 1.6352, "step": 5737 }, { "epoch": 0.6666279407493465, "grad_norm": 0.41429415345191956, "learning_rate": 0.0001, "loss": 1.5134, "step": 5738 }, { "epoch": 0.666744118501307, "grad_norm": 0.4290158748626709, "learning_rate": 0.0001, "loss": 1.4901, "step": 5739 }, { "epoch": 0.6668602962532675, "grad_norm": 0.45462852716445923, "learning_rate": 0.0001, "loss": 1.7486, "step": 5740 }, { "epoch": 0.666976474005228, "grad_norm": 0.4142838716506958, "learning_rate": 0.0001, "loss": 1.5618, "step": 5741 }, { "epoch": 0.6670926517571885, "grad_norm": 0.4205038249492645, "learning_rate": 0.0001, "loss": 1.5088, "step": 5742 }, { "epoch": 0.667208829509149, "grad_norm": 0.45246177911758423, "learning_rate": 0.0001, "loss": 1.6424, "step": 5743 }, { "epoch": 0.6673250072611095, "grad_norm": 0.4968937039375305, "learning_rate": 0.0001, "loss": 1.5755, "step": 5744 }, { "epoch": 0.66744118501307, "grad_norm": 0.44130516052246094, "learning_rate": 0.0001, "loss": 1.6289, "step": 5745 }, { "epoch": 0.6675573627650305, "grad_norm": 0.4321398138999939, "learning_rate": 0.0001, "loss": 1.6267, "step": 5746 }, { "epoch": 0.667673540516991, "grad_norm": 0.43933340907096863, "learning_rate": 0.0001, "loss": 1.5193, "step": 5747 }, { "epoch": 0.6677897182689515, "grad_norm": 0.4424324333667755, "learning_rate": 0.0001, "loss": 1.6544, "step": 5748 }, { "epoch": 0.667905896020912, "grad_norm": 0.44031408429145813, "learning_rate": 0.0001, "loss": 1.6682, "step": 5749 }, { "epoch": 0.6680220737728725, "grad_norm": 0.4373178482055664, "learning_rate": 0.0001, "loss": 1.4932, "step": 5750 }, { "epoch": 0.668138251524833, "grad_norm": 0.517252504825592, "learning_rate": 0.0001, "loss": 1.8808, "step": 5751 }, { "epoch": 0.6682544292767935, "grad_norm": 0.44176408648490906, "learning_rate": 0.0001, "loss": 1.6645, "step": 5752 }, { "epoch": 0.668370607028754, "grad_norm": 0.4362224340438843, "learning_rate": 0.0001, "loss": 1.577, "step": 5753 }, { "epoch": 0.6684867847807144, "grad_norm": 0.44093477725982666, "learning_rate": 0.0001, "loss": 1.529, "step": 5754 }, { "epoch": 0.668602962532675, "grad_norm": 0.42824220657348633, "learning_rate": 0.0001, "loss": 1.6981, "step": 5755 }, { "epoch": 0.6687191402846355, "grad_norm": 0.46376579999923706, "learning_rate": 0.0001, "loss": 1.7126, "step": 5756 }, { "epoch": 0.668835318036596, "grad_norm": 0.47102320194244385, "learning_rate": 0.0001, "loss": 1.8464, "step": 5757 }, { "epoch": 0.6689514957885565, "grad_norm": 0.4689522385597229, "learning_rate": 0.0001, "loss": 1.6374, "step": 5758 }, { "epoch": 0.669067673540517, "grad_norm": 0.43695563077926636, "learning_rate": 0.0001, "loss": 1.567, "step": 5759 }, { "epoch": 0.6691838512924775, "grad_norm": 0.48102685809135437, "learning_rate": 0.0001, "loss": 1.6039, "step": 5760 }, { "epoch": 0.669300029044438, "grad_norm": 0.4178447127342224, "learning_rate": 0.0001, "loss": 1.4713, "step": 5761 }, { "epoch": 0.6694162067963985, "grad_norm": 0.43344780802726746, "learning_rate": 0.0001, "loss": 1.5005, "step": 5762 }, { "epoch": 0.669532384548359, "grad_norm": 0.4385789632797241, "learning_rate": 0.0001, "loss": 1.6053, "step": 5763 }, { "epoch": 0.6696485623003194, "grad_norm": 0.49828559160232544, "learning_rate": 0.0001, "loss": 1.8065, "step": 5764 }, { "epoch": 0.66976474005228, "grad_norm": 0.45839375257492065, "learning_rate": 0.0001, "loss": 1.7687, "step": 5765 }, { "epoch": 0.6698809178042405, "grad_norm": 0.45659172534942627, "learning_rate": 0.0001, "loss": 1.6962, "step": 5766 }, { "epoch": 0.669997095556201, "grad_norm": 0.43861255049705505, "learning_rate": 0.0001, "loss": 1.3606, "step": 5767 }, { "epoch": 0.6701132733081615, "grad_norm": 0.47588106989860535, "learning_rate": 0.0001, "loss": 1.7271, "step": 5768 }, { "epoch": 0.670229451060122, "grad_norm": 0.43281859159469604, "learning_rate": 0.0001, "loss": 1.5828, "step": 5769 }, { "epoch": 0.6703456288120825, "grad_norm": 0.42776212096214294, "learning_rate": 0.0001, "loss": 1.586, "step": 5770 }, { "epoch": 0.670461806564043, "grad_norm": 0.48418179154396057, "learning_rate": 0.0001, "loss": 1.6726, "step": 5771 }, { "epoch": 0.6705779843160035, "grad_norm": 0.47568055987358093, "learning_rate": 0.0001, "loss": 1.6748, "step": 5772 }, { "epoch": 0.670694162067964, "grad_norm": 0.46286889910697937, "learning_rate": 0.0001, "loss": 1.6651, "step": 5773 }, { "epoch": 0.6708103398199244, "grad_norm": 0.42690396308898926, "learning_rate": 0.0001, "loss": 1.5761, "step": 5774 }, { "epoch": 0.670926517571885, "grad_norm": 0.5125055313110352, "learning_rate": 0.0001, "loss": 1.8218, "step": 5775 }, { "epoch": 0.6710426953238455, "grad_norm": 0.4569031894207001, "learning_rate": 0.0001, "loss": 1.7317, "step": 5776 }, { "epoch": 0.671158873075806, "grad_norm": 0.3980884253978729, "learning_rate": 0.0001, "loss": 1.5388, "step": 5777 }, { "epoch": 0.6712750508277665, "grad_norm": 0.450810968875885, "learning_rate": 0.0001, "loss": 1.5016, "step": 5778 }, { "epoch": 0.671391228579727, "grad_norm": 0.47312337160110474, "learning_rate": 0.0001, "loss": 1.7046, "step": 5779 }, { "epoch": 0.6715074063316875, "grad_norm": 0.42309361696243286, "learning_rate": 0.0001, "loss": 1.5981, "step": 5780 }, { "epoch": 0.671623584083648, "grad_norm": 0.4467894434928894, "learning_rate": 0.0001, "loss": 1.6096, "step": 5781 }, { "epoch": 0.6717397618356085, "grad_norm": 0.41931766271591187, "learning_rate": 0.0001, "loss": 1.4433, "step": 5782 }, { "epoch": 0.671855939587569, "grad_norm": 0.49468186497688293, "learning_rate": 0.0001, "loss": 1.5217, "step": 5783 }, { "epoch": 0.6719721173395294, "grad_norm": 0.5051024556159973, "learning_rate": 0.0001, "loss": 1.7906, "step": 5784 }, { "epoch": 0.6720882950914899, "grad_norm": 0.4598732888698578, "learning_rate": 0.0001, "loss": 1.6131, "step": 5785 }, { "epoch": 0.6722044728434505, "grad_norm": 0.4720596671104431, "learning_rate": 0.0001, "loss": 1.4947, "step": 5786 }, { "epoch": 0.672320650595411, "grad_norm": 0.40756291151046753, "learning_rate": 0.0001, "loss": 1.4437, "step": 5787 }, { "epoch": 0.6724368283473715, "grad_norm": 0.44663846492767334, "learning_rate": 0.0001, "loss": 1.6562, "step": 5788 }, { "epoch": 0.672553006099332, "grad_norm": 0.44735023379325867, "learning_rate": 0.0001, "loss": 1.6056, "step": 5789 }, { "epoch": 0.6726691838512925, "grad_norm": 0.4546589255332947, "learning_rate": 0.0001, "loss": 1.6313, "step": 5790 }, { "epoch": 0.672785361603253, "grad_norm": 0.44948139786720276, "learning_rate": 0.0001, "loss": 1.6102, "step": 5791 }, { "epoch": 0.6729015393552135, "grad_norm": 0.45747241377830505, "learning_rate": 0.0001, "loss": 1.6629, "step": 5792 }, { "epoch": 0.673017717107174, "grad_norm": 0.46398770809173584, "learning_rate": 0.0001, "loss": 1.5984, "step": 5793 }, { "epoch": 0.6731338948591344, "grad_norm": 0.4531368017196655, "learning_rate": 0.0001, "loss": 1.5425, "step": 5794 }, { "epoch": 0.6732500726110949, "grad_norm": 0.43872615694999695, "learning_rate": 0.0001, "loss": 1.5689, "step": 5795 }, { "epoch": 0.6733662503630555, "grad_norm": 0.4827875792980194, "learning_rate": 0.0001, "loss": 1.7964, "step": 5796 }, { "epoch": 0.673482428115016, "grad_norm": 0.46272164583206177, "learning_rate": 0.0001, "loss": 1.6332, "step": 5797 }, { "epoch": 0.6735986058669765, "grad_norm": 0.43684569001197815, "learning_rate": 0.0001, "loss": 1.5881, "step": 5798 }, { "epoch": 0.673714783618937, "grad_norm": 0.43560728430747986, "learning_rate": 0.0001, "loss": 1.5473, "step": 5799 }, { "epoch": 0.6738309613708975, "grad_norm": 0.43896791338920593, "learning_rate": 0.0001, "loss": 1.6281, "step": 5800 }, { "epoch": 0.673947139122858, "grad_norm": 0.4776453375816345, "learning_rate": 0.0001, "loss": 1.64, "step": 5801 }, { "epoch": 0.6740633168748185, "grad_norm": 0.44419828057289124, "learning_rate": 0.0001, "loss": 1.6425, "step": 5802 }, { "epoch": 0.674179494626779, "grad_norm": 0.44315245747566223, "learning_rate": 0.0001, "loss": 1.5136, "step": 5803 }, { "epoch": 0.6742956723787394, "grad_norm": 0.4680185914039612, "learning_rate": 0.0001, "loss": 1.7217, "step": 5804 }, { "epoch": 0.6744118501306999, "grad_norm": 0.4504956007003784, "learning_rate": 0.0001, "loss": 1.5078, "step": 5805 }, { "epoch": 0.6745280278826604, "grad_norm": 0.4400990307331085, "learning_rate": 0.0001, "loss": 1.6676, "step": 5806 }, { "epoch": 0.674644205634621, "grad_norm": 0.5055308938026428, "learning_rate": 0.0001, "loss": 1.5901, "step": 5807 }, { "epoch": 0.6747603833865815, "grad_norm": 0.4236086905002594, "learning_rate": 0.0001, "loss": 1.6601, "step": 5808 }, { "epoch": 0.674876561138542, "grad_norm": 0.4559587836265564, "learning_rate": 0.0001, "loss": 1.7122, "step": 5809 }, { "epoch": 0.6749927388905025, "grad_norm": 0.42813077569007874, "learning_rate": 0.0001, "loss": 1.6111, "step": 5810 }, { "epoch": 0.675108916642463, "grad_norm": 0.4201710820198059, "learning_rate": 0.0001, "loss": 1.5335, "step": 5811 }, { "epoch": 0.6752250943944235, "grad_norm": 0.4242464005947113, "learning_rate": 0.0001, "loss": 1.5137, "step": 5812 }, { "epoch": 0.675341272146384, "grad_norm": 0.45076656341552734, "learning_rate": 0.0001, "loss": 1.698, "step": 5813 }, { "epoch": 0.6754574498983444, "grad_norm": 0.4501783549785614, "learning_rate": 0.0001, "loss": 1.7337, "step": 5814 }, { "epoch": 0.6755736276503049, "grad_norm": 0.4316518306732178, "learning_rate": 0.0001, "loss": 1.514, "step": 5815 }, { "epoch": 0.6756898054022654, "grad_norm": 0.4382632076740265, "learning_rate": 0.0001, "loss": 1.8078, "step": 5816 }, { "epoch": 0.675805983154226, "grad_norm": 0.42367810010910034, "learning_rate": 0.0001, "loss": 1.6614, "step": 5817 }, { "epoch": 0.6759221609061865, "grad_norm": 0.434133917093277, "learning_rate": 0.0001, "loss": 1.5825, "step": 5818 }, { "epoch": 0.676038338658147, "grad_norm": 0.4283505976200104, "learning_rate": 0.0001, "loss": 1.6213, "step": 5819 }, { "epoch": 0.6761545164101075, "grad_norm": 0.4194677770137787, "learning_rate": 0.0001, "loss": 1.6286, "step": 5820 }, { "epoch": 0.676270694162068, "grad_norm": 0.43860924243927, "learning_rate": 0.0001, "loss": 1.6455, "step": 5821 }, { "epoch": 0.6763868719140285, "grad_norm": 0.43896040320396423, "learning_rate": 0.0001, "loss": 1.5243, "step": 5822 }, { "epoch": 0.676503049665989, "grad_norm": 0.41808873414993286, "learning_rate": 0.0001, "loss": 1.3372, "step": 5823 }, { "epoch": 0.6766192274179494, "grad_norm": 0.5032152533531189, "learning_rate": 0.0001, "loss": 1.7323, "step": 5824 }, { "epoch": 0.6767354051699099, "grad_norm": 0.48192158341407776, "learning_rate": 0.0001, "loss": 1.7263, "step": 5825 }, { "epoch": 0.6768515829218704, "grad_norm": 0.4845306873321533, "learning_rate": 0.0001, "loss": 1.7389, "step": 5826 }, { "epoch": 0.6769677606738309, "grad_norm": 0.46503588557243347, "learning_rate": 0.0001, "loss": 1.7579, "step": 5827 }, { "epoch": 0.6770839384257915, "grad_norm": 0.42725270986557007, "learning_rate": 0.0001, "loss": 1.5129, "step": 5828 }, { "epoch": 0.677200116177752, "grad_norm": 0.43792206048965454, "learning_rate": 0.0001, "loss": 1.6703, "step": 5829 }, { "epoch": 0.6773162939297125, "grad_norm": 0.42509621381759644, "learning_rate": 0.0001, "loss": 1.5468, "step": 5830 }, { "epoch": 0.677432471681673, "grad_norm": 0.4887797236442566, "learning_rate": 0.0001, "loss": 1.7904, "step": 5831 }, { "epoch": 0.6775486494336335, "grad_norm": 0.4223867356777191, "learning_rate": 0.0001, "loss": 1.4495, "step": 5832 }, { "epoch": 0.677664827185594, "grad_norm": 0.44735440611839294, "learning_rate": 0.0001, "loss": 1.5636, "step": 5833 }, { "epoch": 0.6777810049375544, "grad_norm": 0.4342286288738251, "learning_rate": 0.0001, "loss": 1.4604, "step": 5834 }, { "epoch": 0.6778971826895149, "grad_norm": 0.47264301776885986, "learning_rate": 0.0001, "loss": 1.5672, "step": 5835 }, { "epoch": 0.6780133604414754, "grad_norm": 0.45144325494766235, "learning_rate": 0.0001, "loss": 1.6075, "step": 5836 }, { "epoch": 0.6781295381934359, "grad_norm": 0.45070862770080566, "learning_rate": 0.0001, "loss": 1.7091, "step": 5837 }, { "epoch": 0.6782457159453965, "grad_norm": 0.44926077127456665, "learning_rate": 0.0001, "loss": 1.5651, "step": 5838 }, { "epoch": 0.678361893697357, "grad_norm": 0.42580538988113403, "learning_rate": 0.0001, "loss": 1.6603, "step": 5839 }, { "epoch": 0.6784780714493175, "grad_norm": 0.4390771985054016, "learning_rate": 0.0001, "loss": 1.5282, "step": 5840 }, { "epoch": 0.678594249201278, "grad_norm": 0.43495962023735046, "learning_rate": 0.0001, "loss": 1.527, "step": 5841 }, { "epoch": 0.6787104269532385, "grad_norm": 0.42545855045318604, "learning_rate": 0.0001, "loss": 1.4797, "step": 5842 }, { "epoch": 0.678826604705199, "grad_norm": 0.40862634778022766, "learning_rate": 0.0001, "loss": 1.4629, "step": 5843 }, { "epoch": 0.6789427824571594, "grad_norm": 0.4625224173069, "learning_rate": 0.0001, "loss": 1.6166, "step": 5844 }, { "epoch": 0.6790589602091199, "grad_norm": 0.4572260081768036, "learning_rate": 0.0001, "loss": 1.6402, "step": 5845 }, { "epoch": 0.6791751379610804, "grad_norm": 0.45550736784935, "learning_rate": 0.0001, "loss": 1.725, "step": 5846 }, { "epoch": 0.6792913157130409, "grad_norm": 0.49932238459587097, "learning_rate": 0.0001, "loss": 1.6559, "step": 5847 }, { "epoch": 0.6794074934650014, "grad_norm": 0.43598851561546326, "learning_rate": 0.0001, "loss": 1.6201, "step": 5848 }, { "epoch": 0.679523671216962, "grad_norm": 0.4407789707183838, "learning_rate": 0.0001, "loss": 1.5795, "step": 5849 }, { "epoch": 0.6796398489689225, "grad_norm": 0.5003304481506348, "learning_rate": 0.0001, "loss": 1.9146, "step": 5850 }, { "epoch": 0.679756026720883, "grad_norm": 0.45455658435821533, "learning_rate": 0.0001, "loss": 1.6535, "step": 5851 }, { "epoch": 0.6798722044728435, "grad_norm": 0.429861456155777, "learning_rate": 0.0001, "loss": 1.7047, "step": 5852 }, { "epoch": 0.679988382224804, "grad_norm": 0.41853785514831543, "learning_rate": 0.0001, "loss": 1.4794, "step": 5853 }, { "epoch": 0.6801045599767644, "grad_norm": 0.4636412560939789, "learning_rate": 0.0001, "loss": 1.6509, "step": 5854 }, { "epoch": 0.6802207377287249, "grad_norm": 0.4763379395008087, "learning_rate": 0.0001, "loss": 1.9099, "step": 5855 }, { "epoch": 0.6803369154806854, "grad_norm": 0.40933549404144287, "learning_rate": 0.0001, "loss": 1.4495, "step": 5856 }, { "epoch": 0.6804530932326459, "grad_norm": 0.43275657296180725, "learning_rate": 0.0001, "loss": 1.4485, "step": 5857 }, { "epoch": 0.6805692709846064, "grad_norm": 0.4391117990016937, "learning_rate": 0.0001, "loss": 1.6513, "step": 5858 }, { "epoch": 0.680685448736567, "grad_norm": 0.456015408039093, "learning_rate": 0.0001, "loss": 1.5974, "step": 5859 }, { "epoch": 0.6808016264885275, "grad_norm": 0.4378184378147125, "learning_rate": 0.0001, "loss": 1.5483, "step": 5860 }, { "epoch": 0.680917804240488, "grad_norm": 0.4603559672832489, "learning_rate": 0.0001, "loss": 1.5069, "step": 5861 }, { "epoch": 0.6810339819924485, "grad_norm": 0.46735942363739014, "learning_rate": 0.0001, "loss": 1.5081, "step": 5862 }, { "epoch": 0.681150159744409, "grad_norm": 0.42926350235939026, "learning_rate": 0.0001, "loss": 1.5607, "step": 5863 }, { "epoch": 0.6812663374963694, "grad_norm": 0.4338090717792511, "learning_rate": 0.0001, "loss": 1.5821, "step": 5864 }, { "epoch": 0.6813825152483299, "grad_norm": 0.4733595550060272, "learning_rate": 0.0001, "loss": 1.4723, "step": 5865 }, { "epoch": 0.6814986930002904, "grad_norm": 0.43445518612861633, "learning_rate": 0.0001, "loss": 1.6658, "step": 5866 }, { "epoch": 0.6816148707522509, "grad_norm": 0.4225948750972748, "learning_rate": 0.0001, "loss": 1.5438, "step": 5867 }, { "epoch": 0.6817310485042114, "grad_norm": 0.4454067051410675, "learning_rate": 0.0001, "loss": 1.6634, "step": 5868 }, { "epoch": 0.6818472262561719, "grad_norm": 0.4258386194705963, "learning_rate": 0.0001, "loss": 1.4935, "step": 5869 }, { "epoch": 0.6819634040081325, "grad_norm": 0.5034085512161255, "learning_rate": 0.0001, "loss": 1.6897, "step": 5870 }, { "epoch": 0.682079581760093, "grad_norm": 0.48741772770881653, "learning_rate": 0.0001, "loss": 1.8736, "step": 5871 }, { "epoch": 0.6821957595120535, "grad_norm": 0.4586668908596039, "learning_rate": 0.0001, "loss": 1.6409, "step": 5872 }, { "epoch": 0.682311937264014, "grad_norm": 0.445441335439682, "learning_rate": 0.0001, "loss": 1.5984, "step": 5873 }, { "epoch": 0.6824281150159744, "grad_norm": 0.42984551191329956, "learning_rate": 0.0001, "loss": 1.6407, "step": 5874 }, { "epoch": 0.6825442927679349, "grad_norm": 0.42550551891326904, "learning_rate": 0.0001, "loss": 1.4473, "step": 5875 }, { "epoch": 0.6826604705198954, "grad_norm": 0.43332982063293457, "learning_rate": 0.0001, "loss": 1.4163, "step": 5876 }, { "epoch": 0.6827766482718559, "grad_norm": 0.46797114610671997, "learning_rate": 0.0001, "loss": 1.6737, "step": 5877 }, { "epoch": 0.6828928260238164, "grad_norm": 0.45049670338630676, "learning_rate": 0.0001, "loss": 1.7157, "step": 5878 }, { "epoch": 0.6830090037757769, "grad_norm": 0.4513972997665405, "learning_rate": 0.0001, "loss": 1.5783, "step": 5879 }, { "epoch": 0.6831251815277375, "grad_norm": 0.46550363302230835, "learning_rate": 0.0001, "loss": 1.537, "step": 5880 }, { "epoch": 0.683241359279698, "grad_norm": 0.4839429259300232, "learning_rate": 0.0001, "loss": 1.7258, "step": 5881 }, { "epoch": 0.6833575370316585, "grad_norm": 0.4608222544193268, "learning_rate": 0.0001, "loss": 1.5762, "step": 5882 }, { "epoch": 0.683473714783619, "grad_norm": 0.4843682050704956, "learning_rate": 0.0001, "loss": 1.7465, "step": 5883 }, { "epoch": 0.6835898925355794, "grad_norm": 0.44494950771331787, "learning_rate": 0.0001, "loss": 1.6064, "step": 5884 }, { "epoch": 0.6837060702875399, "grad_norm": 0.4825565814971924, "learning_rate": 0.0001, "loss": 1.6292, "step": 5885 }, { "epoch": 0.6838222480395004, "grad_norm": 0.4432181715965271, "learning_rate": 0.0001, "loss": 1.7055, "step": 5886 }, { "epoch": 0.6839384257914609, "grad_norm": 0.43164384365081787, "learning_rate": 0.0001, "loss": 1.6208, "step": 5887 }, { "epoch": 0.6840546035434214, "grad_norm": 0.4706867039203644, "learning_rate": 0.0001, "loss": 1.7136, "step": 5888 }, { "epoch": 0.6841707812953819, "grad_norm": 0.4217129647731781, "learning_rate": 0.0001, "loss": 1.4834, "step": 5889 }, { "epoch": 0.6842869590473424, "grad_norm": 0.4253685772418976, "learning_rate": 0.0001, "loss": 1.4565, "step": 5890 }, { "epoch": 0.684403136799303, "grad_norm": 0.431268572807312, "learning_rate": 0.0001, "loss": 1.6927, "step": 5891 }, { "epoch": 0.6845193145512635, "grad_norm": 0.44175827503204346, "learning_rate": 0.0001, "loss": 1.6133, "step": 5892 }, { "epoch": 0.684635492303224, "grad_norm": 0.41143807768821716, "learning_rate": 0.0001, "loss": 1.5274, "step": 5893 }, { "epoch": 0.6847516700551844, "grad_norm": 0.4632609784603119, "learning_rate": 0.0001, "loss": 1.6837, "step": 5894 }, { "epoch": 0.6848678478071449, "grad_norm": 0.45641466975212097, "learning_rate": 0.0001, "loss": 1.7331, "step": 5895 }, { "epoch": 0.6849840255591054, "grad_norm": 0.4799252152442932, "learning_rate": 0.0001, "loss": 1.6531, "step": 5896 }, { "epoch": 0.6851002033110659, "grad_norm": 0.42571592330932617, "learning_rate": 0.0001, "loss": 1.6587, "step": 5897 }, { "epoch": 0.6852163810630264, "grad_norm": 0.42581233382225037, "learning_rate": 0.0001, "loss": 1.5199, "step": 5898 }, { "epoch": 0.6853325588149869, "grad_norm": 0.4454191327095032, "learning_rate": 0.0001, "loss": 1.4827, "step": 5899 }, { "epoch": 0.6854487365669474, "grad_norm": 0.4443952143192291, "learning_rate": 0.0001, "loss": 1.3731, "step": 5900 }, { "epoch": 0.685564914318908, "grad_norm": 0.4548780918121338, "learning_rate": 0.0001, "loss": 1.5763, "step": 5901 }, { "epoch": 0.6856810920708685, "grad_norm": 0.4646033048629761, "learning_rate": 0.0001, "loss": 1.6999, "step": 5902 }, { "epoch": 0.685797269822829, "grad_norm": 0.4402289390563965, "learning_rate": 0.0001, "loss": 1.4804, "step": 5903 }, { "epoch": 0.6859134475747894, "grad_norm": 0.462422251701355, "learning_rate": 0.0001, "loss": 1.6013, "step": 5904 }, { "epoch": 0.6860296253267499, "grad_norm": 0.4708074629306793, "learning_rate": 0.0001, "loss": 1.7784, "step": 5905 }, { "epoch": 0.6861458030787104, "grad_norm": 0.4279741942882538, "learning_rate": 0.0001, "loss": 1.577, "step": 5906 }, { "epoch": 0.6862619808306709, "grad_norm": 0.45395079255104065, "learning_rate": 0.0001, "loss": 1.7005, "step": 5907 }, { "epoch": 0.6863781585826314, "grad_norm": 0.46580180525779724, "learning_rate": 0.0001, "loss": 1.7221, "step": 5908 }, { "epoch": 0.6864943363345919, "grad_norm": 0.44675102829933167, "learning_rate": 0.0001, "loss": 1.6527, "step": 5909 }, { "epoch": 0.6866105140865524, "grad_norm": 0.41608941555023193, "learning_rate": 0.0001, "loss": 1.4215, "step": 5910 }, { "epoch": 0.6867266918385129, "grad_norm": 0.4041667878627777, "learning_rate": 0.0001, "loss": 1.3818, "step": 5911 }, { "epoch": 0.6868428695904735, "grad_norm": 0.46621033549308777, "learning_rate": 0.0001, "loss": 1.6305, "step": 5912 }, { "epoch": 0.686959047342434, "grad_norm": 0.42089179158210754, "learning_rate": 0.0001, "loss": 1.3727, "step": 5913 }, { "epoch": 0.6870752250943944, "grad_norm": 0.43382903933525085, "learning_rate": 0.0001, "loss": 1.3902, "step": 5914 }, { "epoch": 0.6871914028463549, "grad_norm": 0.45685744285583496, "learning_rate": 0.0001, "loss": 1.7223, "step": 5915 }, { "epoch": 0.6873075805983154, "grad_norm": 0.4664427936077118, "learning_rate": 0.0001, "loss": 1.5859, "step": 5916 }, { "epoch": 0.6874237583502759, "grad_norm": 0.43341436982154846, "learning_rate": 0.0001, "loss": 1.5792, "step": 5917 }, { "epoch": 0.6875399361022364, "grad_norm": 0.4530865252017975, "learning_rate": 0.0001, "loss": 1.6366, "step": 5918 }, { "epoch": 0.6876561138541969, "grad_norm": 0.4333666265010834, "learning_rate": 0.0001, "loss": 1.4821, "step": 5919 }, { "epoch": 0.6877722916061574, "grad_norm": 0.4299314618110657, "learning_rate": 0.0001, "loss": 1.5067, "step": 5920 }, { "epoch": 0.6878884693581179, "grad_norm": 0.43617698550224304, "learning_rate": 0.0001, "loss": 1.7213, "step": 5921 }, { "epoch": 0.6880046471100785, "grad_norm": 0.4485790431499481, "learning_rate": 0.0001, "loss": 1.4229, "step": 5922 }, { "epoch": 0.688120824862039, "grad_norm": 0.44341012835502625, "learning_rate": 0.0001, "loss": 1.4585, "step": 5923 }, { "epoch": 0.6882370026139994, "grad_norm": 0.4278321862220764, "learning_rate": 0.0001, "loss": 1.5298, "step": 5924 }, { "epoch": 0.6883531803659599, "grad_norm": 0.4742536246776581, "learning_rate": 0.0001, "loss": 1.7254, "step": 5925 }, { "epoch": 0.6884693581179204, "grad_norm": 0.43478327989578247, "learning_rate": 0.0001, "loss": 1.4958, "step": 5926 }, { "epoch": 0.6885855358698809, "grad_norm": 0.4236944317817688, "learning_rate": 0.0001, "loss": 1.4795, "step": 5927 }, { "epoch": 0.6887017136218414, "grad_norm": 0.4370484948158264, "learning_rate": 0.0001, "loss": 1.6045, "step": 5928 }, { "epoch": 0.6888178913738019, "grad_norm": 0.45525872707366943, "learning_rate": 0.0001, "loss": 1.6832, "step": 5929 }, { "epoch": 0.6889340691257624, "grad_norm": 0.4394777715206146, "learning_rate": 0.0001, "loss": 1.542, "step": 5930 }, { "epoch": 0.6890502468777229, "grad_norm": 0.4571029841899872, "learning_rate": 0.0001, "loss": 1.7596, "step": 5931 }, { "epoch": 0.6891664246296834, "grad_norm": 0.4554816782474518, "learning_rate": 0.0001, "loss": 1.6739, "step": 5932 }, { "epoch": 0.689282602381644, "grad_norm": 0.43854042887687683, "learning_rate": 0.0001, "loss": 1.5481, "step": 5933 }, { "epoch": 0.6893987801336044, "grad_norm": 0.4328182339668274, "learning_rate": 0.0001, "loss": 1.644, "step": 5934 }, { "epoch": 0.6895149578855649, "grad_norm": 0.44765332341194153, "learning_rate": 0.0001, "loss": 1.6783, "step": 5935 }, { "epoch": 0.6896311356375254, "grad_norm": 0.43624910712242126, "learning_rate": 0.0001, "loss": 1.6246, "step": 5936 }, { "epoch": 0.6897473133894859, "grad_norm": 0.46665430068969727, "learning_rate": 0.0001, "loss": 1.6061, "step": 5937 }, { "epoch": 0.6898634911414464, "grad_norm": 0.45889371633529663, "learning_rate": 0.0001, "loss": 1.551, "step": 5938 }, { "epoch": 0.6899796688934069, "grad_norm": 0.4659807085990906, "learning_rate": 0.0001, "loss": 1.878, "step": 5939 }, { "epoch": 0.6900958466453674, "grad_norm": 0.45350781083106995, "learning_rate": 0.0001, "loss": 1.6036, "step": 5940 }, { "epoch": 0.6902120243973279, "grad_norm": 0.44755759835243225, "learning_rate": 0.0001, "loss": 1.6358, "step": 5941 }, { "epoch": 0.6903282021492884, "grad_norm": 0.39925143122673035, "learning_rate": 0.0001, "loss": 1.4103, "step": 5942 }, { "epoch": 0.690444379901249, "grad_norm": 0.4676305055618286, "learning_rate": 0.0001, "loss": 1.7827, "step": 5943 }, { "epoch": 0.6905605576532095, "grad_norm": 0.42078080773353577, "learning_rate": 0.0001, "loss": 1.3661, "step": 5944 }, { "epoch": 0.6906767354051699, "grad_norm": 0.4294394254684448, "learning_rate": 0.0001, "loss": 1.5364, "step": 5945 }, { "epoch": 0.6907929131571304, "grad_norm": 0.4754204750061035, "learning_rate": 0.0001, "loss": 1.4769, "step": 5946 }, { "epoch": 0.6909090909090909, "grad_norm": 0.4373209476470947, "learning_rate": 0.0001, "loss": 1.6253, "step": 5947 }, { "epoch": 0.6910252686610514, "grad_norm": 0.4468858242034912, "learning_rate": 0.0001, "loss": 1.4982, "step": 5948 }, { "epoch": 0.6911414464130119, "grad_norm": 0.41958677768707275, "learning_rate": 0.0001, "loss": 1.6706, "step": 5949 }, { "epoch": 0.6912576241649724, "grad_norm": 0.4472271203994751, "learning_rate": 0.0001, "loss": 1.6119, "step": 5950 }, { "epoch": 0.6913738019169329, "grad_norm": 0.4623844623565674, "learning_rate": 0.0001, "loss": 1.6781, "step": 5951 }, { "epoch": 0.6914899796688934, "grad_norm": 0.4381425082683563, "learning_rate": 0.0001, "loss": 1.5165, "step": 5952 }, { "epoch": 0.691606157420854, "grad_norm": 0.4867214858531952, "learning_rate": 0.0001, "loss": 1.7421, "step": 5953 }, { "epoch": 0.6917223351728145, "grad_norm": 0.45404472947120667, "learning_rate": 0.0001, "loss": 1.6872, "step": 5954 }, { "epoch": 0.6918385129247749, "grad_norm": 0.45104196667671204, "learning_rate": 0.0001, "loss": 1.759, "step": 5955 }, { "epoch": 0.6919546906767354, "grad_norm": 0.43430057168006897, "learning_rate": 0.0001, "loss": 1.6374, "step": 5956 }, { "epoch": 0.6920708684286959, "grad_norm": 0.4231088161468506, "learning_rate": 0.0001, "loss": 1.5436, "step": 5957 }, { "epoch": 0.6921870461806564, "grad_norm": 0.43777701258659363, "learning_rate": 0.0001, "loss": 1.5665, "step": 5958 }, { "epoch": 0.6923032239326169, "grad_norm": 0.4415545165538788, "learning_rate": 0.0001, "loss": 1.6971, "step": 5959 }, { "epoch": 0.6924194016845774, "grad_norm": 0.4693138599395752, "learning_rate": 0.0001, "loss": 1.827, "step": 5960 }, { "epoch": 0.6925355794365379, "grad_norm": 0.458947092294693, "learning_rate": 0.0001, "loss": 1.7383, "step": 5961 }, { "epoch": 0.6926517571884984, "grad_norm": 0.47753122448921204, "learning_rate": 0.0001, "loss": 1.6538, "step": 5962 }, { "epoch": 0.6927679349404589, "grad_norm": 0.44865846633911133, "learning_rate": 0.0001, "loss": 1.5345, "step": 5963 }, { "epoch": 0.6928841126924195, "grad_norm": 0.46769386529922485, "learning_rate": 0.0001, "loss": 1.6535, "step": 5964 }, { "epoch": 0.6930002904443799, "grad_norm": 0.46594002842903137, "learning_rate": 0.0001, "loss": 1.5307, "step": 5965 }, { "epoch": 0.6931164681963404, "grad_norm": 0.44469648599624634, "learning_rate": 0.0001, "loss": 1.6999, "step": 5966 }, { "epoch": 0.6932326459483009, "grad_norm": 0.44019195437431335, "learning_rate": 0.0001, "loss": 1.6059, "step": 5967 }, { "epoch": 0.6933488237002614, "grad_norm": 0.4828648567199707, "learning_rate": 0.0001, "loss": 1.734, "step": 5968 }, { "epoch": 0.6934650014522219, "grad_norm": 0.4316853880882263, "learning_rate": 0.0001, "loss": 1.5491, "step": 5969 }, { "epoch": 0.6935811792041824, "grad_norm": 0.45684826374053955, "learning_rate": 0.0001, "loss": 1.496, "step": 5970 }, { "epoch": 0.6936973569561429, "grad_norm": 0.4338705539703369, "learning_rate": 0.0001, "loss": 1.6207, "step": 5971 }, { "epoch": 0.6938135347081034, "grad_norm": 0.46950671076774597, "learning_rate": 0.0001, "loss": 1.6252, "step": 5972 }, { "epoch": 0.6939297124600639, "grad_norm": 0.4339956045150757, "learning_rate": 0.0001, "loss": 1.5908, "step": 5973 }, { "epoch": 0.6940458902120245, "grad_norm": 0.5043813586235046, "learning_rate": 0.0001, "loss": 1.6671, "step": 5974 }, { "epoch": 0.6941620679639849, "grad_norm": 0.4209142029285431, "learning_rate": 0.0001, "loss": 1.6935, "step": 5975 }, { "epoch": 0.6942782457159454, "grad_norm": 0.4390365779399872, "learning_rate": 0.0001, "loss": 1.6409, "step": 5976 }, { "epoch": 0.6943944234679059, "grad_norm": 0.4435301423072815, "learning_rate": 0.0001, "loss": 1.4815, "step": 5977 }, { "epoch": 0.6945106012198664, "grad_norm": 0.4381083548069, "learning_rate": 0.0001, "loss": 1.5671, "step": 5978 }, { "epoch": 0.6946267789718269, "grad_norm": 0.4319905638694763, "learning_rate": 0.0001, "loss": 1.5669, "step": 5979 }, { "epoch": 0.6947429567237874, "grad_norm": 0.42140626907348633, "learning_rate": 0.0001, "loss": 1.6624, "step": 5980 }, { "epoch": 0.6948591344757479, "grad_norm": 0.46980568766593933, "learning_rate": 0.0001, "loss": 1.8191, "step": 5981 }, { "epoch": 0.6949753122277084, "grad_norm": 0.44185250997543335, "learning_rate": 0.0001, "loss": 1.6686, "step": 5982 }, { "epoch": 0.6950914899796689, "grad_norm": 0.4495440423488617, "learning_rate": 0.0001, "loss": 1.7767, "step": 5983 }, { "epoch": 0.6952076677316293, "grad_norm": 0.43711593747138977, "learning_rate": 0.0001, "loss": 1.4925, "step": 5984 }, { "epoch": 0.6953238454835899, "grad_norm": 0.44120272994041443, "learning_rate": 0.0001, "loss": 1.6307, "step": 5985 }, { "epoch": 0.6954400232355504, "grad_norm": 0.46513938903808594, "learning_rate": 0.0001, "loss": 1.7124, "step": 5986 }, { "epoch": 0.6955562009875109, "grad_norm": 0.43529126048088074, "learning_rate": 0.0001, "loss": 1.548, "step": 5987 }, { "epoch": 0.6956723787394714, "grad_norm": 0.4164985120296478, "learning_rate": 0.0001, "loss": 1.6025, "step": 5988 }, { "epoch": 0.6957885564914319, "grad_norm": 0.4299083352088928, "learning_rate": 0.0001, "loss": 1.6278, "step": 5989 }, { "epoch": 0.6959047342433924, "grad_norm": 0.4332889914512634, "learning_rate": 0.0001, "loss": 1.5584, "step": 5990 }, { "epoch": 0.6960209119953529, "grad_norm": 0.4063509404659271, "learning_rate": 0.0001, "loss": 1.529, "step": 5991 }, { "epoch": 0.6961370897473134, "grad_norm": 0.4663069248199463, "learning_rate": 0.0001, "loss": 1.7359, "step": 5992 }, { "epoch": 0.6962532674992739, "grad_norm": 0.46274805068969727, "learning_rate": 0.0001, "loss": 1.6643, "step": 5993 }, { "epoch": 0.6963694452512343, "grad_norm": 0.45924481749534607, "learning_rate": 0.0001, "loss": 1.6665, "step": 5994 }, { "epoch": 0.6964856230031949, "grad_norm": 0.4352482557296753, "learning_rate": 0.0001, "loss": 1.5231, "step": 5995 }, { "epoch": 0.6966018007551554, "grad_norm": 0.4291403591632843, "learning_rate": 0.0001, "loss": 1.5539, "step": 5996 }, { "epoch": 0.6967179785071159, "grad_norm": 0.5188395977020264, "learning_rate": 0.0001, "loss": 1.6233, "step": 5997 }, { "epoch": 0.6968341562590764, "grad_norm": 0.4158150851726532, "learning_rate": 0.0001, "loss": 1.4289, "step": 5998 }, { "epoch": 0.6969503340110369, "grad_norm": 0.461252897977829, "learning_rate": 0.0001, "loss": 1.7773, "step": 5999 }, { "epoch": 0.6970665117629974, "grad_norm": 0.40928980708122253, "learning_rate": 0.0001, "loss": 1.4985, "step": 6000 }, { "epoch": 0.6971826895149579, "grad_norm": 0.4179105758666992, "learning_rate": 0.0001, "loss": 1.6255, "step": 6001 }, { "epoch": 0.6972988672669184, "grad_norm": 0.4326860010623932, "learning_rate": 0.0001, "loss": 1.7069, "step": 6002 }, { "epoch": 0.6974150450188789, "grad_norm": 0.42176535725593567, "learning_rate": 0.0001, "loss": 1.4696, "step": 6003 }, { "epoch": 0.6975312227708393, "grad_norm": 0.4395149350166321, "learning_rate": 0.0001, "loss": 1.5552, "step": 6004 }, { "epoch": 0.6976474005227998, "grad_norm": 0.43788623809814453, "learning_rate": 0.0001, "loss": 1.6544, "step": 6005 }, { "epoch": 0.6977635782747604, "grad_norm": 0.4315040111541748, "learning_rate": 0.0001, "loss": 1.6495, "step": 6006 }, { "epoch": 0.6978797560267209, "grad_norm": 0.43025481700897217, "learning_rate": 0.0001, "loss": 1.6953, "step": 6007 }, { "epoch": 0.6979959337786814, "grad_norm": 0.4490424692630768, "learning_rate": 0.0001, "loss": 1.6173, "step": 6008 }, { "epoch": 0.6981121115306419, "grad_norm": 0.4578996002674103, "learning_rate": 0.0001, "loss": 1.7515, "step": 6009 }, { "epoch": 0.6982282892826024, "grad_norm": 0.42355626821517944, "learning_rate": 0.0001, "loss": 1.5837, "step": 6010 }, { "epoch": 0.6983444670345629, "grad_norm": 0.4342043399810791, "learning_rate": 0.0001, "loss": 1.5084, "step": 6011 }, { "epoch": 0.6984606447865234, "grad_norm": 0.41684943437576294, "learning_rate": 0.0001, "loss": 1.4773, "step": 6012 }, { "epoch": 0.6985768225384839, "grad_norm": 0.4408121705055237, "learning_rate": 0.0001, "loss": 1.4747, "step": 6013 }, { "epoch": 0.6986930002904443, "grad_norm": 0.43644100427627563, "learning_rate": 0.0001, "loss": 1.4511, "step": 6014 }, { "epoch": 0.6988091780424048, "grad_norm": 0.4824478328227997, "learning_rate": 0.0001, "loss": 1.5967, "step": 6015 }, { "epoch": 0.6989253557943654, "grad_norm": 0.4514261782169342, "learning_rate": 0.0001, "loss": 1.6404, "step": 6016 }, { "epoch": 0.6990415335463259, "grad_norm": 0.4663759768009186, "learning_rate": 0.0001, "loss": 1.6674, "step": 6017 }, { "epoch": 0.6991577112982864, "grad_norm": 0.45603466033935547, "learning_rate": 0.0001, "loss": 1.5931, "step": 6018 }, { "epoch": 0.6992738890502469, "grad_norm": 0.44485101103782654, "learning_rate": 0.0001, "loss": 1.614, "step": 6019 }, { "epoch": 0.6993900668022074, "grad_norm": 0.4779071509838104, "learning_rate": 0.0001, "loss": 1.7452, "step": 6020 }, { "epoch": 0.6995062445541679, "grad_norm": 0.4669143259525299, "learning_rate": 0.0001, "loss": 1.6123, "step": 6021 }, { "epoch": 0.6996224223061284, "grad_norm": 0.4332158863544464, "learning_rate": 0.0001, "loss": 1.6481, "step": 6022 }, { "epoch": 0.6997386000580889, "grad_norm": 0.4396936595439911, "learning_rate": 0.0001, "loss": 1.7863, "step": 6023 }, { "epoch": 0.6998547778100493, "grad_norm": 0.43175163865089417, "learning_rate": 0.0001, "loss": 1.5967, "step": 6024 }, { "epoch": 0.6999709555620098, "grad_norm": 0.4532018005847931, "learning_rate": 0.0001, "loss": 1.6209, "step": 6025 }, { "epoch": 0.7000871333139703, "grad_norm": 0.4363815188407898, "learning_rate": 0.0001, "loss": 1.5637, "step": 6026 }, { "epoch": 0.7002033110659309, "grad_norm": 0.4686264097690582, "learning_rate": 0.0001, "loss": 1.6303, "step": 6027 }, { "epoch": 0.7003194888178914, "grad_norm": 0.4810371696949005, "learning_rate": 0.0001, "loss": 1.5524, "step": 6028 }, { "epoch": 0.7004356665698519, "grad_norm": 0.4395377039909363, "learning_rate": 0.0001, "loss": 1.589, "step": 6029 }, { "epoch": 0.7005518443218124, "grad_norm": 0.43942928314208984, "learning_rate": 0.0001, "loss": 1.4534, "step": 6030 }, { "epoch": 0.7006680220737729, "grad_norm": 0.43220555782318115, "learning_rate": 0.0001, "loss": 1.6113, "step": 6031 }, { "epoch": 0.7007841998257334, "grad_norm": 0.44977232813835144, "learning_rate": 0.0001, "loss": 1.4633, "step": 6032 }, { "epoch": 0.7009003775776939, "grad_norm": 0.4994456171989441, "learning_rate": 0.0001, "loss": 1.729, "step": 6033 }, { "epoch": 0.7010165553296543, "grad_norm": 0.44498246908187866, "learning_rate": 0.0001, "loss": 1.6094, "step": 6034 }, { "epoch": 0.7011327330816148, "grad_norm": 0.4909377992153168, "learning_rate": 0.0001, "loss": 1.7201, "step": 6035 }, { "epoch": 0.7012489108335753, "grad_norm": 0.4470892548561096, "learning_rate": 0.0001, "loss": 1.554, "step": 6036 }, { "epoch": 0.7013650885855359, "grad_norm": 0.47130686044692993, "learning_rate": 0.0001, "loss": 1.7369, "step": 6037 }, { "epoch": 0.7014812663374964, "grad_norm": 0.4447838068008423, "learning_rate": 0.0001, "loss": 1.5377, "step": 6038 }, { "epoch": 0.7015974440894569, "grad_norm": 0.48111826181411743, "learning_rate": 0.0001, "loss": 1.7131, "step": 6039 }, { "epoch": 0.7017136218414174, "grad_norm": 0.4512770175933838, "learning_rate": 0.0001, "loss": 1.6969, "step": 6040 }, { "epoch": 0.7018297995933779, "grad_norm": 0.4442228078842163, "learning_rate": 0.0001, "loss": 1.5082, "step": 6041 }, { "epoch": 0.7019459773453384, "grad_norm": 0.49722543358802795, "learning_rate": 0.0001, "loss": 1.7205, "step": 6042 }, { "epoch": 0.7020621550972989, "grad_norm": 0.4556099474430084, "learning_rate": 0.0001, "loss": 1.498, "step": 6043 }, { "epoch": 0.7021783328492593, "grad_norm": 0.465631365776062, "learning_rate": 0.0001, "loss": 1.4306, "step": 6044 }, { "epoch": 0.7022945106012198, "grad_norm": 0.4637199938297272, "learning_rate": 0.0001, "loss": 1.7575, "step": 6045 }, { "epoch": 0.7024106883531803, "grad_norm": 0.5277969241142273, "learning_rate": 0.0001, "loss": 1.6427, "step": 6046 }, { "epoch": 0.7025268661051408, "grad_norm": 0.43430525064468384, "learning_rate": 0.0001, "loss": 1.6548, "step": 6047 }, { "epoch": 0.7026430438571014, "grad_norm": 0.4051744043827057, "learning_rate": 0.0001, "loss": 1.357, "step": 6048 }, { "epoch": 0.7027592216090619, "grad_norm": 0.4759519696235657, "learning_rate": 0.0001, "loss": 1.6906, "step": 6049 }, { "epoch": 0.7028753993610224, "grad_norm": 0.4706842005252838, "learning_rate": 0.0001, "loss": 1.6156, "step": 6050 }, { "epoch": 0.7029915771129829, "grad_norm": 0.46256595849990845, "learning_rate": 0.0001, "loss": 1.7044, "step": 6051 }, { "epoch": 0.7031077548649434, "grad_norm": 0.43394848704338074, "learning_rate": 0.0001, "loss": 1.6269, "step": 6052 }, { "epoch": 0.7032239326169039, "grad_norm": 0.44326069951057434, "learning_rate": 0.0001, "loss": 1.5096, "step": 6053 }, { "epoch": 0.7033401103688643, "grad_norm": 0.47456949949264526, "learning_rate": 0.0001, "loss": 1.7654, "step": 6054 }, { "epoch": 0.7034562881208248, "grad_norm": 0.42411813139915466, "learning_rate": 0.0001, "loss": 1.5732, "step": 6055 }, { "epoch": 0.7035724658727853, "grad_norm": 0.43997639417648315, "learning_rate": 0.0001, "loss": 1.6792, "step": 6056 }, { "epoch": 0.7036886436247458, "grad_norm": 0.3923532962799072, "learning_rate": 0.0001, "loss": 1.4779, "step": 6057 }, { "epoch": 0.7038048213767064, "grad_norm": 0.4113970994949341, "learning_rate": 0.0001, "loss": 1.4705, "step": 6058 }, { "epoch": 0.7039209991286669, "grad_norm": 0.42837658524513245, "learning_rate": 0.0001, "loss": 1.4292, "step": 6059 }, { "epoch": 0.7040371768806274, "grad_norm": 0.44520092010498047, "learning_rate": 0.0001, "loss": 1.5377, "step": 6060 }, { "epoch": 0.7041533546325879, "grad_norm": 0.43117523193359375, "learning_rate": 0.0001, "loss": 1.3792, "step": 6061 }, { "epoch": 0.7042695323845484, "grad_norm": 0.4564533233642578, "learning_rate": 0.0001, "loss": 1.6178, "step": 6062 }, { "epoch": 0.7043857101365089, "grad_norm": 0.4633900225162506, "learning_rate": 0.0001, "loss": 1.6223, "step": 6063 }, { "epoch": 0.7045018878884693, "grad_norm": 0.439037561416626, "learning_rate": 0.0001, "loss": 1.5777, "step": 6064 }, { "epoch": 0.7046180656404298, "grad_norm": 0.42920568585395813, "learning_rate": 0.0001, "loss": 1.6043, "step": 6065 }, { "epoch": 0.7047342433923903, "grad_norm": 0.44558966159820557, "learning_rate": 0.0001, "loss": 1.5141, "step": 6066 }, { "epoch": 0.7048504211443508, "grad_norm": 0.4488900899887085, "learning_rate": 0.0001, "loss": 1.6281, "step": 6067 }, { "epoch": 0.7049665988963113, "grad_norm": 0.4773916006088257, "learning_rate": 0.0001, "loss": 1.6774, "step": 6068 }, { "epoch": 0.7050827766482719, "grad_norm": 0.4205687344074249, "learning_rate": 0.0001, "loss": 1.5134, "step": 6069 }, { "epoch": 0.7051989544002324, "grad_norm": 0.4228312373161316, "learning_rate": 0.0001, "loss": 1.5634, "step": 6070 }, { "epoch": 0.7053151321521929, "grad_norm": 0.42489486932754517, "learning_rate": 0.0001, "loss": 1.5474, "step": 6071 }, { "epoch": 0.7054313099041534, "grad_norm": 0.432058185338974, "learning_rate": 0.0001, "loss": 1.5865, "step": 6072 }, { "epoch": 0.7055474876561139, "grad_norm": 0.4359389543533325, "learning_rate": 0.0001, "loss": 1.5741, "step": 6073 }, { "epoch": 0.7056636654080743, "grad_norm": 0.46697720885276794, "learning_rate": 0.0001, "loss": 1.5813, "step": 6074 }, { "epoch": 0.7057798431600348, "grad_norm": 0.4296872317790985, "learning_rate": 0.0001, "loss": 1.6521, "step": 6075 }, { "epoch": 0.7058960209119953, "grad_norm": 0.4327179491519928, "learning_rate": 0.0001, "loss": 1.5553, "step": 6076 }, { "epoch": 0.7060121986639558, "grad_norm": 0.49642127752304077, "learning_rate": 0.0001, "loss": 1.6104, "step": 6077 }, { "epoch": 0.7061283764159163, "grad_norm": 0.44212982058525085, "learning_rate": 0.0001, "loss": 1.6172, "step": 6078 }, { "epoch": 0.7062445541678769, "grad_norm": 0.45311763882637024, "learning_rate": 0.0001, "loss": 1.6611, "step": 6079 }, { "epoch": 0.7063607319198374, "grad_norm": 0.4574792981147766, "learning_rate": 0.0001, "loss": 1.5746, "step": 6080 }, { "epoch": 0.7064769096717979, "grad_norm": 0.4645473062992096, "learning_rate": 0.0001, "loss": 1.5848, "step": 6081 }, { "epoch": 0.7065930874237584, "grad_norm": 0.4442659318447113, "learning_rate": 0.0001, "loss": 1.58, "step": 6082 }, { "epoch": 0.7067092651757189, "grad_norm": 0.41226938366889954, "learning_rate": 0.0001, "loss": 1.518, "step": 6083 }, { "epoch": 0.7068254429276793, "grad_norm": 0.4444557726383209, "learning_rate": 0.0001, "loss": 1.6688, "step": 6084 }, { "epoch": 0.7069416206796398, "grad_norm": 0.4641963839530945, "learning_rate": 0.0001, "loss": 1.4054, "step": 6085 }, { "epoch": 0.7070577984316003, "grad_norm": 0.44319379329681396, "learning_rate": 0.0001, "loss": 1.5801, "step": 6086 }, { "epoch": 0.7071739761835608, "grad_norm": 0.4771478474140167, "learning_rate": 0.0001, "loss": 1.6775, "step": 6087 }, { "epoch": 0.7072901539355213, "grad_norm": 0.45707952976226807, "learning_rate": 0.0001, "loss": 1.5976, "step": 6088 }, { "epoch": 0.7074063316874818, "grad_norm": 0.49724724888801575, "learning_rate": 0.0001, "loss": 1.6874, "step": 6089 }, { "epoch": 0.7075225094394424, "grad_norm": 0.44829389452934265, "learning_rate": 0.0001, "loss": 1.5349, "step": 6090 }, { "epoch": 0.7076386871914029, "grad_norm": 0.4388028681278229, "learning_rate": 0.0001, "loss": 1.5188, "step": 6091 }, { "epoch": 0.7077548649433634, "grad_norm": 0.41982501745224, "learning_rate": 0.0001, "loss": 1.604, "step": 6092 }, { "epoch": 0.7078710426953239, "grad_norm": 0.4299807846546173, "learning_rate": 0.0001, "loss": 1.55, "step": 6093 }, { "epoch": 0.7079872204472843, "grad_norm": 0.42945340275764465, "learning_rate": 0.0001, "loss": 1.5398, "step": 6094 }, { "epoch": 0.7081033981992448, "grad_norm": 0.4422987401485443, "learning_rate": 0.0001, "loss": 1.6441, "step": 6095 }, { "epoch": 0.7082195759512053, "grad_norm": 0.42367589473724365, "learning_rate": 0.0001, "loss": 1.6466, "step": 6096 }, { "epoch": 0.7083357537031658, "grad_norm": 0.4437691867351532, "learning_rate": 0.0001, "loss": 1.6916, "step": 6097 }, { "epoch": 0.7084519314551263, "grad_norm": 0.434927374124527, "learning_rate": 0.0001, "loss": 1.6145, "step": 6098 }, { "epoch": 0.7085681092070868, "grad_norm": 0.5107674598693848, "learning_rate": 0.0001, "loss": 1.6533, "step": 6099 }, { "epoch": 0.7086842869590474, "grad_norm": 0.41166844964027405, "learning_rate": 0.0001, "loss": 1.5646, "step": 6100 }, { "epoch": 0.7088004647110079, "grad_norm": 0.42547357082366943, "learning_rate": 0.0001, "loss": 1.5394, "step": 6101 }, { "epoch": 0.7089166424629684, "grad_norm": 0.44167038798332214, "learning_rate": 0.0001, "loss": 1.5377, "step": 6102 }, { "epoch": 0.7090328202149289, "grad_norm": 0.4662346839904785, "learning_rate": 0.0001, "loss": 1.5511, "step": 6103 }, { "epoch": 0.7091489979668893, "grad_norm": 0.43853190541267395, "learning_rate": 0.0001, "loss": 1.671, "step": 6104 }, { "epoch": 0.7092651757188498, "grad_norm": 0.4340886175632477, "learning_rate": 0.0001, "loss": 1.6179, "step": 6105 }, { "epoch": 0.7093813534708103, "grad_norm": 0.4345012605190277, "learning_rate": 0.0001, "loss": 1.6163, "step": 6106 }, { "epoch": 0.7094975312227708, "grad_norm": 0.43658679723739624, "learning_rate": 0.0001, "loss": 1.5974, "step": 6107 }, { "epoch": 0.7096137089747313, "grad_norm": 0.42257410287857056, "learning_rate": 0.0001, "loss": 1.5284, "step": 6108 }, { "epoch": 0.7097298867266918, "grad_norm": 0.4617374539375305, "learning_rate": 0.0001, "loss": 1.7341, "step": 6109 }, { "epoch": 0.7098460644786523, "grad_norm": 0.4545523524284363, "learning_rate": 0.0001, "loss": 1.6107, "step": 6110 }, { "epoch": 0.7099622422306129, "grad_norm": 0.4178352355957031, "learning_rate": 0.0001, "loss": 1.5278, "step": 6111 }, { "epoch": 0.7100784199825734, "grad_norm": 0.4775643050670624, "learning_rate": 0.0001, "loss": 1.6867, "step": 6112 }, { "epoch": 0.7101945977345339, "grad_norm": 0.4341333508491516, "learning_rate": 0.0001, "loss": 1.492, "step": 6113 }, { "epoch": 0.7103107754864944, "grad_norm": 0.44754558801651, "learning_rate": 0.0001, "loss": 1.5143, "step": 6114 }, { "epoch": 0.7104269532384548, "grad_norm": 0.40256983041763306, "learning_rate": 0.0001, "loss": 1.5979, "step": 6115 }, { "epoch": 0.7105431309904153, "grad_norm": 0.4549466669559479, "learning_rate": 0.0001, "loss": 1.5838, "step": 6116 }, { "epoch": 0.7106593087423758, "grad_norm": 0.45725107192993164, "learning_rate": 0.0001, "loss": 1.5894, "step": 6117 }, { "epoch": 0.7107754864943363, "grad_norm": 0.4477519094944, "learning_rate": 0.0001, "loss": 1.6944, "step": 6118 }, { "epoch": 0.7108916642462968, "grad_norm": 0.4426574110984802, "learning_rate": 0.0001, "loss": 1.4692, "step": 6119 }, { "epoch": 0.7110078419982573, "grad_norm": 0.42777371406555176, "learning_rate": 0.0001, "loss": 1.4743, "step": 6120 }, { "epoch": 0.7111240197502179, "grad_norm": 0.43052244186401367, "learning_rate": 0.0001, "loss": 1.6027, "step": 6121 }, { "epoch": 0.7112401975021784, "grad_norm": 0.42970481514930725, "learning_rate": 0.0001, "loss": 1.6517, "step": 6122 }, { "epoch": 0.7113563752541389, "grad_norm": 0.4782750606536865, "learning_rate": 0.0001, "loss": 1.5925, "step": 6123 }, { "epoch": 0.7114725530060994, "grad_norm": 0.45479482412338257, "learning_rate": 0.0001, "loss": 1.6684, "step": 6124 }, { "epoch": 0.7115887307580598, "grad_norm": 0.43917152285575867, "learning_rate": 0.0001, "loss": 1.5785, "step": 6125 }, { "epoch": 0.7117049085100203, "grad_norm": 0.4511852562427521, "learning_rate": 0.0001, "loss": 1.6015, "step": 6126 }, { "epoch": 0.7118210862619808, "grad_norm": 0.4351733326911926, "learning_rate": 0.0001, "loss": 1.7038, "step": 6127 }, { "epoch": 0.7119372640139413, "grad_norm": 0.458448588848114, "learning_rate": 0.0001, "loss": 1.588, "step": 6128 }, { "epoch": 0.7120534417659018, "grad_norm": 0.41360458731651306, "learning_rate": 0.0001, "loss": 1.3792, "step": 6129 }, { "epoch": 0.7121696195178623, "grad_norm": 0.45411717891693115, "learning_rate": 0.0001, "loss": 1.6064, "step": 6130 }, { "epoch": 0.7122857972698228, "grad_norm": 0.4399266242980957, "learning_rate": 0.0001, "loss": 1.589, "step": 6131 }, { "epoch": 0.7124019750217834, "grad_norm": 0.49353474378585815, "learning_rate": 0.0001, "loss": 1.7608, "step": 6132 }, { "epoch": 0.7125181527737439, "grad_norm": 0.4594828188419342, "learning_rate": 0.0001, "loss": 1.5687, "step": 6133 }, { "epoch": 0.7126343305257044, "grad_norm": 0.4374282658100128, "learning_rate": 0.0001, "loss": 1.4874, "step": 6134 }, { "epoch": 0.7127505082776648, "grad_norm": 0.45439621806144714, "learning_rate": 0.0001, "loss": 1.5785, "step": 6135 }, { "epoch": 0.7128666860296253, "grad_norm": 0.4368216097354889, "learning_rate": 0.0001, "loss": 1.6585, "step": 6136 }, { "epoch": 0.7129828637815858, "grad_norm": 0.4321901202201843, "learning_rate": 0.0001, "loss": 1.6303, "step": 6137 }, { "epoch": 0.7130990415335463, "grad_norm": 0.4542466402053833, "learning_rate": 0.0001, "loss": 1.5449, "step": 6138 }, { "epoch": 0.7132152192855068, "grad_norm": 0.43192481994628906, "learning_rate": 0.0001, "loss": 1.6529, "step": 6139 }, { "epoch": 0.7133313970374673, "grad_norm": 0.46249476075172424, "learning_rate": 0.0001, "loss": 1.6503, "step": 6140 }, { "epoch": 0.7134475747894278, "grad_norm": 0.4444749653339386, "learning_rate": 0.0001, "loss": 1.7026, "step": 6141 }, { "epoch": 0.7135637525413884, "grad_norm": 0.4751194417476654, "learning_rate": 0.0001, "loss": 1.6593, "step": 6142 }, { "epoch": 0.7136799302933489, "grad_norm": 0.4836347699165344, "learning_rate": 0.0001, "loss": 1.6869, "step": 6143 }, { "epoch": 0.7137961080453094, "grad_norm": 0.4470170736312866, "learning_rate": 0.0001, "loss": 1.3949, "step": 6144 }, { "epoch": 0.7139122857972698, "grad_norm": 0.43671542406082153, "learning_rate": 0.0001, "loss": 1.6338, "step": 6145 }, { "epoch": 0.7140284635492303, "grad_norm": 0.4873625636100769, "learning_rate": 0.0001, "loss": 1.7472, "step": 6146 }, { "epoch": 0.7141446413011908, "grad_norm": 0.43032151460647583, "learning_rate": 0.0001, "loss": 1.5258, "step": 6147 }, { "epoch": 0.7142608190531513, "grad_norm": 0.445576936006546, "learning_rate": 0.0001, "loss": 1.6298, "step": 6148 }, { "epoch": 0.7143769968051118, "grad_norm": 0.450259804725647, "learning_rate": 0.0001, "loss": 1.5595, "step": 6149 }, { "epoch": 0.7144931745570723, "grad_norm": 0.46049565076828003, "learning_rate": 0.0001, "loss": 1.5674, "step": 6150 }, { "epoch": 0.7146093523090328, "grad_norm": 0.44381004571914673, "learning_rate": 0.0001, "loss": 1.5347, "step": 6151 }, { "epoch": 0.7147255300609934, "grad_norm": 0.44434526562690735, "learning_rate": 0.0001, "loss": 1.5329, "step": 6152 }, { "epoch": 0.7148417078129539, "grad_norm": 0.4273211658000946, "learning_rate": 0.0001, "loss": 1.498, "step": 6153 }, { "epoch": 0.7149578855649144, "grad_norm": 0.46949854493141174, "learning_rate": 0.0001, "loss": 1.6709, "step": 6154 }, { "epoch": 0.7150740633168748, "grad_norm": 0.40654122829437256, "learning_rate": 0.0001, "loss": 1.4843, "step": 6155 }, { "epoch": 0.7151902410688353, "grad_norm": 0.4765225946903229, "learning_rate": 0.0001, "loss": 1.7118, "step": 6156 }, { "epoch": 0.7153064188207958, "grad_norm": 0.46587619185447693, "learning_rate": 0.0001, "loss": 1.6022, "step": 6157 }, { "epoch": 0.7154225965727563, "grad_norm": 0.4613794684410095, "learning_rate": 0.0001, "loss": 1.7496, "step": 6158 }, { "epoch": 0.7155387743247168, "grad_norm": 0.4671042263507843, "learning_rate": 0.0001, "loss": 1.6173, "step": 6159 }, { "epoch": 0.7156549520766773, "grad_norm": 0.4612120985984802, "learning_rate": 0.0001, "loss": 1.6127, "step": 6160 }, { "epoch": 0.7157711298286378, "grad_norm": 0.4640682339668274, "learning_rate": 0.0001, "loss": 1.6909, "step": 6161 }, { "epoch": 0.7158873075805983, "grad_norm": 0.43429458141326904, "learning_rate": 0.0001, "loss": 1.6723, "step": 6162 }, { "epoch": 0.7160034853325589, "grad_norm": 0.4491255581378937, "learning_rate": 0.0001, "loss": 1.512, "step": 6163 }, { "epoch": 0.7161196630845194, "grad_norm": 0.39764824509620667, "learning_rate": 0.0001, "loss": 1.344, "step": 6164 }, { "epoch": 0.7162358408364798, "grad_norm": 0.5114927887916565, "learning_rate": 0.0001, "loss": 1.746, "step": 6165 }, { "epoch": 0.7163520185884403, "grad_norm": 0.5151812434196472, "learning_rate": 0.0001, "loss": 1.6123, "step": 6166 }, { "epoch": 0.7164681963404008, "grad_norm": 0.43614163994789124, "learning_rate": 0.0001, "loss": 1.63, "step": 6167 }, { "epoch": 0.7165843740923613, "grad_norm": 0.46181315183639526, "learning_rate": 0.0001, "loss": 1.6494, "step": 6168 }, { "epoch": 0.7167005518443218, "grad_norm": 0.43157726526260376, "learning_rate": 0.0001, "loss": 1.5617, "step": 6169 }, { "epoch": 0.7168167295962823, "grad_norm": 0.4304739534854889, "learning_rate": 0.0001, "loss": 1.5688, "step": 6170 }, { "epoch": 0.7169329073482428, "grad_norm": 0.4536237120628357, "learning_rate": 0.0001, "loss": 1.7147, "step": 6171 }, { "epoch": 0.7170490851002033, "grad_norm": 0.40845513343811035, "learning_rate": 0.0001, "loss": 1.4578, "step": 6172 }, { "epoch": 0.7171652628521639, "grad_norm": 0.45678746700286865, "learning_rate": 0.0001, "loss": 1.6398, "step": 6173 }, { "epoch": 0.7172814406041244, "grad_norm": 0.42773115634918213, "learning_rate": 0.0001, "loss": 1.5271, "step": 6174 }, { "epoch": 0.7173976183560848, "grad_norm": 0.4427812993526459, "learning_rate": 0.0001, "loss": 1.7262, "step": 6175 }, { "epoch": 0.7175137961080453, "grad_norm": 0.42807790637016296, "learning_rate": 0.0001, "loss": 1.6036, "step": 6176 }, { "epoch": 0.7176299738600058, "grad_norm": 0.4445224404335022, "learning_rate": 0.0001, "loss": 1.6919, "step": 6177 }, { "epoch": 0.7177461516119663, "grad_norm": 0.43124210834503174, "learning_rate": 0.0001, "loss": 1.5545, "step": 6178 }, { "epoch": 0.7178623293639268, "grad_norm": 0.41215458512306213, "learning_rate": 0.0001, "loss": 1.6969, "step": 6179 }, { "epoch": 0.7179785071158873, "grad_norm": 0.45972126722335815, "learning_rate": 0.0001, "loss": 1.6359, "step": 6180 }, { "epoch": 0.7180946848678478, "grad_norm": 0.44836103916168213, "learning_rate": 0.0001, "loss": 1.7967, "step": 6181 }, { "epoch": 0.7182108626198083, "grad_norm": 0.4521453082561493, "learning_rate": 0.0001, "loss": 1.657, "step": 6182 }, { "epoch": 0.7183270403717688, "grad_norm": 0.42196735739707947, "learning_rate": 0.0001, "loss": 1.5564, "step": 6183 }, { "epoch": 0.7184432181237294, "grad_norm": 0.4695570170879364, "learning_rate": 0.0001, "loss": 1.6709, "step": 6184 }, { "epoch": 0.7185593958756898, "grad_norm": 0.4804345369338989, "learning_rate": 0.0001, "loss": 1.7857, "step": 6185 }, { "epoch": 0.7186755736276503, "grad_norm": 0.41611528396606445, "learning_rate": 0.0001, "loss": 1.5269, "step": 6186 }, { "epoch": 0.7187917513796108, "grad_norm": 0.4277372658252716, "learning_rate": 0.0001, "loss": 1.4379, "step": 6187 }, { "epoch": 0.7189079291315713, "grad_norm": 0.45039102435112, "learning_rate": 0.0001, "loss": 1.6271, "step": 6188 }, { "epoch": 0.7190241068835318, "grad_norm": 0.4118036925792694, "learning_rate": 0.0001, "loss": 1.4019, "step": 6189 }, { "epoch": 0.7191402846354923, "grad_norm": 0.42078375816345215, "learning_rate": 0.0001, "loss": 1.4847, "step": 6190 }, { "epoch": 0.7192564623874528, "grad_norm": 0.4338337779045105, "learning_rate": 0.0001, "loss": 1.6613, "step": 6191 }, { "epoch": 0.7193726401394133, "grad_norm": 0.440143883228302, "learning_rate": 0.0001, "loss": 1.6647, "step": 6192 }, { "epoch": 0.7194888178913738, "grad_norm": 0.45626693964004517, "learning_rate": 0.0001, "loss": 1.6489, "step": 6193 }, { "epoch": 0.7196049956433344, "grad_norm": 0.48190397024154663, "learning_rate": 0.0001, "loss": 1.6335, "step": 6194 }, { "epoch": 0.7197211733952948, "grad_norm": 0.42678698897361755, "learning_rate": 0.0001, "loss": 1.6169, "step": 6195 }, { "epoch": 0.7198373511472553, "grad_norm": 0.48554766178131104, "learning_rate": 0.0001, "loss": 1.7154, "step": 6196 }, { "epoch": 0.7199535288992158, "grad_norm": 0.4570693075656891, "learning_rate": 0.0001, "loss": 1.622, "step": 6197 }, { "epoch": 0.7200697066511763, "grad_norm": 0.48876166343688965, "learning_rate": 0.0001, "loss": 1.678, "step": 6198 }, { "epoch": 0.7201858844031368, "grad_norm": 0.4803100526332855, "learning_rate": 0.0001, "loss": 1.5687, "step": 6199 }, { "epoch": 0.7203020621550973, "grad_norm": 0.44159939885139465, "learning_rate": 0.0001, "loss": 1.6498, "step": 6200 }, { "epoch": 0.7204182399070578, "grad_norm": 0.45352402329444885, "learning_rate": 0.0001, "loss": 1.7286, "step": 6201 }, { "epoch": 0.7205344176590183, "grad_norm": 0.4505062699317932, "learning_rate": 0.0001, "loss": 1.5318, "step": 6202 }, { "epoch": 0.7206505954109788, "grad_norm": 0.4428583085536957, "learning_rate": 0.0001, "loss": 1.6199, "step": 6203 }, { "epoch": 0.7207667731629392, "grad_norm": 0.43122392892837524, "learning_rate": 0.0001, "loss": 1.5087, "step": 6204 }, { "epoch": 0.7208829509148998, "grad_norm": 0.4514487087726593, "learning_rate": 0.0001, "loss": 1.7102, "step": 6205 }, { "epoch": 0.7209991286668603, "grad_norm": 0.43971630930900574, "learning_rate": 0.0001, "loss": 1.6305, "step": 6206 }, { "epoch": 0.7211153064188208, "grad_norm": 0.424069344997406, "learning_rate": 0.0001, "loss": 1.4566, "step": 6207 }, { "epoch": 0.7212314841707813, "grad_norm": 0.4398737847805023, "learning_rate": 0.0001, "loss": 1.4913, "step": 6208 }, { "epoch": 0.7213476619227418, "grad_norm": 0.4196438491344452, "learning_rate": 0.0001, "loss": 1.5111, "step": 6209 }, { "epoch": 0.7214638396747023, "grad_norm": 0.45629531145095825, "learning_rate": 0.0001, "loss": 1.6255, "step": 6210 }, { "epoch": 0.7215800174266628, "grad_norm": 0.47182023525238037, "learning_rate": 0.0001, "loss": 1.7316, "step": 6211 }, { "epoch": 0.7216961951786233, "grad_norm": 0.460814893245697, "learning_rate": 0.0001, "loss": 1.6633, "step": 6212 }, { "epoch": 0.7218123729305838, "grad_norm": 0.41416749358177185, "learning_rate": 0.0001, "loss": 1.3794, "step": 6213 }, { "epoch": 0.7219285506825442, "grad_norm": 0.4730556607246399, "learning_rate": 0.0001, "loss": 1.6985, "step": 6214 }, { "epoch": 0.7220447284345048, "grad_norm": 0.4644532799720764, "learning_rate": 0.0001, "loss": 1.5189, "step": 6215 }, { "epoch": 0.7221609061864653, "grad_norm": 0.44122570753097534, "learning_rate": 0.0001, "loss": 1.6217, "step": 6216 }, { "epoch": 0.7222770839384258, "grad_norm": 0.46849554777145386, "learning_rate": 0.0001, "loss": 1.4721, "step": 6217 }, { "epoch": 0.7223932616903863, "grad_norm": 0.43822774291038513, "learning_rate": 0.0001, "loss": 1.518, "step": 6218 }, { "epoch": 0.7225094394423468, "grad_norm": 0.42008358240127563, "learning_rate": 0.0001, "loss": 1.529, "step": 6219 }, { "epoch": 0.7226256171943073, "grad_norm": 0.45318713784217834, "learning_rate": 0.0001, "loss": 1.6414, "step": 6220 }, { "epoch": 0.7227417949462678, "grad_norm": 0.4390380084514618, "learning_rate": 0.0001, "loss": 1.5671, "step": 6221 }, { "epoch": 0.7228579726982283, "grad_norm": 0.46329495310783386, "learning_rate": 0.0001, "loss": 1.5951, "step": 6222 }, { "epoch": 0.7229741504501888, "grad_norm": 0.47496578097343445, "learning_rate": 0.0001, "loss": 1.6651, "step": 6223 }, { "epoch": 0.7230903282021492, "grad_norm": 0.46759724617004395, "learning_rate": 0.0001, "loss": 1.7158, "step": 6224 }, { "epoch": 0.7232065059541097, "grad_norm": 0.46540212631225586, "learning_rate": 0.0001, "loss": 1.4931, "step": 6225 }, { "epoch": 0.7233226837060703, "grad_norm": 0.4282924234867096, "learning_rate": 0.0001, "loss": 1.5571, "step": 6226 }, { "epoch": 0.7234388614580308, "grad_norm": 0.44697490334510803, "learning_rate": 0.0001, "loss": 1.5344, "step": 6227 }, { "epoch": 0.7235550392099913, "grad_norm": 0.4610814154148102, "learning_rate": 0.0001, "loss": 1.4993, "step": 6228 }, { "epoch": 0.7236712169619518, "grad_norm": 0.45639610290527344, "learning_rate": 0.0001, "loss": 1.5497, "step": 6229 }, { "epoch": 0.7237873947139123, "grad_norm": 0.4381502568721771, "learning_rate": 0.0001, "loss": 1.573, "step": 6230 }, { "epoch": 0.7239035724658728, "grad_norm": 0.440270334482193, "learning_rate": 0.0001, "loss": 1.4069, "step": 6231 }, { "epoch": 0.7240197502178333, "grad_norm": 0.4624209403991699, "learning_rate": 0.0001, "loss": 1.5338, "step": 6232 }, { "epoch": 0.7241359279697938, "grad_norm": 0.4678003489971161, "learning_rate": 0.0001, "loss": 1.659, "step": 6233 }, { "epoch": 0.7242521057217542, "grad_norm": 0.44897353649139404, "learning_rate": 0.0001, "loss": 1.5219, "step": 6234 }, { "epoch": 0.7243682834737147, "grad_norm": 0.431941419839859, "learning_rate": 0.0001, "loss": 1.6613, "step": 6235 }, { "epoch": 0.7244844612256753, "grad_norm": 0.45239877700805664, "learning_rate": 0.0001, "loss": 1.6443, "step": 6236 }, { "epoch": 0.7246006389776358, "grad_norm": 0.4519427418708801, "learning_rate": 0.0001, "loss": 1.7284, "step": 6237 }, { "epoch": 0.7247168167295963, "grad_norm": 0.45957690477371216, "learning_rate": 0.0001, "loss": 1.5839, "step": 6238 }, { "epoch": 0.7248329944815568, "grad_norm": 0.46946290135383606, "learning_rate": 0.0001, "loss": 1.5579, "step": 6239 }, { "epoch": 0.7249491722335173, "grad_norm": 0.4527909457683563, "learning_rate": 0.0001, "loss": 1.6597, "step": 6240 }, { "epoch": 0.7250653499854778, "grad_norm": 0.4612141251564026, "learning_rate": 0.0001, "loss": 1.6803, "step": 6241 }, { "epoch": 0.7251815277374383, "grad_norm": 0.4190555214881897, "learning_rate": 0.0001, "loss": 1.517, "step": 6242 }, { "epoch": 0.7252977054893988, "grad_norm": 0.4483005702495575, "learning_rate": 0.0001, "loss": 1.5936, "step": 6243 }, { "epoch": 0.7254138832413592, "grad_norm": 0.4509059190750122, "learning_rate": 0.0001, "loss": 1.6389, "step": 6244 }, { "epoch": 0.7255300609933197, "grad_norm": 0.47739991545677185, "learning_rate": 0.0001, "loss": 1.759, "step": 6245 }, { "epoch": 0.7256462387452802, "grad_norm": 0.4522044360637665, "learning_rate": 0.0001, "loss": 1.6458, "step": 6246 }, { "epoch": 0.7257624164972408, "grad_norm": 0.43035435676574707, "learning_rate": 0.0001, "loss": 1.5951, "step": 6247 }, { "epoch": 0.7258785942492013, "grad_norm": 0.4420571029186249, "learning_rate": 0.0001, "loss": 1.5334, "step": 6248 }, { "epoch": 0.7259947720011618, "grad_norm": 0.47003141045570374, "learning_rate": 0.0001, "loss": 1.7234, "step": 6249 }, { "epoch": 0.7261109497531223, "grad_norm": 0.43513643741607666, "learning_rate": 0.0001, "loss": 1.7224, "step": 6250 }, { "epoch": 0.7262271275050828, "grad_norm": 0.4268497824668884, "learning_rate": 0.0001, "loss": 1.6493, "step": 6251 }, { "epoch": 0.7263433052570433, "grad_norm": 0.46211907267570496, "learning_rate": 0.0001, "loss": 1.7306, "step": 6252 }, { "epoch": 0.7264594830090038, "grad_norm": 0.4459565281867981, "learning_rate": 0.0001, "loss": 1.4749, "step": 6253 }, { "epoch": 0.7265756607609642, "grad_norm": 0.44835829734802246, "learning_rate": 0.0001, "loss": 1.5263, "step": 6254 }, { "epoch": 0.7266918385129247, "grad_norm": 0.4384959936141968, "learning_rate": 0.0001, "loss": 1.4294, "step": 6255 }, { "epoch": 0.7268080162648852, "grad_norm": 0.4654468894004822, "learning_rate": 0.0001, "loss": 1.6979, "step": 6256 }, { "epoch": 0.7269241940168458, "grad_norm": 0.4371379613876343, "learning_rate": 0.0001, "loss": 1.6439, "step": 6257 }, { "epoch": 0.7270403717688063, "grad_norm": 0.48108625411987305, "learning_rate": 0.0001, "loss": 1.7885, "step": 6258 }, { "epoch": 0.7271565495207668, "grad_norm": 0.4787601828575134, "learning_rate": 0.0001, "loss": 1.6427, "step": 6259 }, { "epoch": 0.7272727272727273, "grad_norm": 0.43150654435157776, "learning_rate": 0.0001, "loss": 1.5843, "step": 6260 }, { "epoch": 0.7273889050246878, "grad_norm": 0.46279093623161316, "learning_rate": 0.0001, "loss": 1.6501, "step": 6261 }, { "epoch": 0.7275050827766483, "grad_norm": 0.4516332447528839, "learning_rate": 0.0001, "loss": 1.5332, "step": 6262 }, { "epoch": 0.7276212605286088, "grad_norm": 0.43989452719688416, "learning_rate": 0.0001, "loss": 1.6061, "step": 6263 }, { "epoch": 0.7277374382805692, "grad_norm": 0.44632989168167114, "learning_rate": 0.0001, "loss": 1.5898, "step": 6264 }, { "epoch": 0.7278536160325297, "grad_norm": 0.44944363832473755, "learning_rate": 0.0001, "loss": 1.6132, "step": 6265 }, { "epoch": 0.7279697937844902, "grad_norm": 0.4271278977394104, "learning_rate": 0.0001, "loss": 1.48, "step": 6266 }, { "epoch": 0.7280859715364507, "grad_norm": 0.4227433204650879, "learning_rate": 0.0001, "loss": 1.5262, "step": 6267 }, { "epoch": 0.7282021492884113, "grad_norm": 0.42294958233833313, "learning_rate": 0.0001, "loss": 1.6177, "step": 6268 }, { "epoch": 0.7283183270403718, "grad_norm": 0.43251901865005493, "learning_rate": 0.0001, "loss": 1.576, "step": 6269 }, { "epoch": 0.7284345047923323, "grad_norm": 0.47049641609191895, "learning_rate": 0.0001, "loss": 1.7649, "step": 6270 }, { "epoch": 0.7285506825442928, "grad_norm": 0.457476943731308, "learning_rate": 0.0001, "loss": 1.6335, "step": 6271 }, { "epoch": 0.7286668602962533, "grad_norm": 0.4726758599281311, "learning_rate": 0.0001, "loss": 1.6506, "step": 6272 }, { "epoch": 0.7287830380482138, "grad_norm": 0.5140420198440552, "learning_rate": 0.0001, "loss": 1.8907, "step": 6273 }, { "epoch": 0.7288992158001742, "grad_norm": 0.4506530165672302, "learning_rate": 0.0001, "loss": 1.6638, "step": 6274 }, { "epoch": 0.7290153935521347, "grad_norm": 0.4550815522670746, "learning_rate": 0.0001, "loss": 1.7825, "step": 6275 }, { "epoch": 0.7291315713040952, "grad_norm": 0.45727846026420593, "learning_rate": 0.0001, "loss": 1.728, "step": 6276 }, { "epoch": 0.7292477490560557, "grad_norm": 0.4206952154636383, "learning_rate": 0.0001, "loss": 1.4247, "step": 6277 }, { "epoch": 0.7293639268080163, "grad_norm": 0.4411332905292511, "learning_rate": 0.0001, "loss": 1.7024, "step": 6278 }, { "epoch": 0.7294801045599768, "grad_norm": 0.4896695613861084, "learning_rate": 0.0001, "loss": 1.7724, "step": 6279 }, { "epoch": 0.7295962823119373, "grad_norm": 0.463642954826355, "learning_rate": 0.0001, "loss": 1.6887, "step": 6280 }, { "epoch": 0.7297124600638978, "grad_norm": 0.41118043661117554, "learning_rate": 0.0001, "loss": 1.5507, "step": 6281 }, { "epoch": 0.7298286378158583, "grad_norm": 0.4534677565097809, "learning_rate": 0.0001, "loss": 1.6086, "step": 6282 }, { "epoch": 0.7299448155678188, "grad_norm": 0.4662560522556305, "learning_rate": 0.0001, "loss": 1.5533, "step": 6283 }, { "epoch": 0.7300609933197793, "grad_norm": 0.4774499833583832, "learning_rate": 0.0001, "loss": 1.8409, "step": 6284 }, { "epoch": 0.7301771710717397, "grad_norm": 0.45723921060562134, "learning_rate": 0.0001, "loss": 1.6268, "step": 6285 }, { "epoch": 0.7302933488237002, "grad_norm": 0.44660598039627075, "learning_rate": 0.0001, "loss": 1.6364, "step": 6286 }, { "epoch": 0.7304095265756607, "grad_norm": 0.44800204038619995, "learning_rate": 0.0001, "loss": 1.5822, "step": 6287 }, { "epoch": 0.7305257043276212, "grad_norm": 0.45778778195381165, "learning_rate": 0.0001, "loss": 1.5414, "step": 6288 }, { "epoch": 0.7306418820795818, "grad_norm": 0.431100457906723, "learning_rate": 0.0001, "loss": 1.6013, "step": 6289 }, { "epoch": 0.7307580598315423, "grad_norm": 0.4813309907913208, "learning_rate": 0.0001, "loss": 1.5132, "step": 6290 }, { "epoch": 0.7308742375835028, "grad_norm": 0.4887335002422333, "learning_rate": 0.0001, "loss": 1.7038, "step": 6291 }, { "epoch": 0.7309904153354633, "grad_norm": 0.48292210698127747, "learning_rate": 0.0001, "loss": 1.6222, "step": 6292 }, { "epoch": 0.7311065930874238, "grad_norm": 0.42640790343284607, "learning_rate": 0.0001, "loss": 1.5194, "step": 6293 }, { "epoch": 0.7312227708393843, "grad_norm": 0.41172870993614197, "learning_rate": 0.0001, "loss": 1.4218, "step": 6294 }, { "epoch": 0.7313389485913447, "grad_norm": 0.48400574922561646, "learning_rate": 0.0001, "loss": 1.6952, "step": 6295 }, { "epoch": 0.7314551263433052, "grad_norm": 0.47430482506752014, "learning_rate": 0.0001, "loss": 1.5349, "step": 6296 }, { "epoch": 0.7315713040952657, "grad_norm": 0.4179590046405792, "learning_rate": 0.0001, "loss": 1.6036, "step": 6297 }, { "epoch": 0.7316874818472262, "grad_norm": 0.4300008714199066, "learning_rate": 0.0001, "loss": 1.5309, "step": 6298 }, { "epoch": 0.7318036595991868, "grad_norm": 0.4634290933609009, "learning_rate": 0.0001, "loss": 1.7461, "step": 6299 }, { "epoch": 0.7319198373511473, "grad_norm": 0.4361773133277893, "learning_rate": 0.0001, "loss": 1.5926, "step": 6300 }, { "epoch": 0.7320360151031078, "grad_norm": 0.46928146481513977, "learning_rate": 0.0001, "loss": 1.7193, "step": 6301 }, { "epoch": 0.7321521928550683, "grad_norm": 0.47918713092803955, "learning_rate": 0.0001, "loss": 1.7658, "step": 6302 }, { "epoch": 0.7322683706070288, "grad_norm": 0.49823591113090515, "learning_rate": 0.0001, "loss": 1.6058, "step": 6303 }, { "epoch": 0.7323845483589893, "grad_norm": 0.5099037289619446, "learning_rate": 0.0001, "loss": 1.8295, "step": 6304 }, { "epoch": 0.7325007261109497, "grad_norm": 0.49155983328819275, "learning_rate": 0.0001, "loss": 1.6552, "step": 6305 }, { "epoch": 0.7326169038629102, "grad_norm": 0.4120127260684967, "learning_rate": 0.0001, "loss": 1.3311, "step": 6306 }, { "epoch": 0.7327330816148707, "grad_norm": 0.5385419726371765, "learning_rate": 0.0001, "loss": 1.6745, "step": 6307 }, { "epoch": 0.7328492593668312, "grad_norm": 0.4356537461280823, "learning_rate": 0.0001, "loss": 1.6443, "step": 6308 }, { "epoch": 0.7329654371187917, "grad_norm": 0.46573251485824585, "learning_rate": 0.0001, "loss": 1.6368, "step": 6309 }, { "epoch": 0.7330816148707523, "grad_norm": 0.42254379391670227, "learning_rate": 0.0001, "loss": 1.5118, "step": 6310 }, { "epoch": 0.7331977926227128, "grad_norm": 0.42466604709625244, "learning_rate": 0.0001, "loss": 1.5458, "step": 6311 }, { "epoch": 0.7333139703746733, "grad_norm": 0.4290546774864197, "learning_rate": 0.0001, "loss": 1.3726, "step": 6312 }, { "epoch": 0.7334301481266338, "grad_norm": 0.4976603090763092, "learning_rate": 0.0001, "loss": 1.6887, "step": 6313 }, { "epoch": 0.7335463258785943, "grad_norm": 0.457592248916626, "learning_rate": 0.0001, "loss": 1.5581, "step": 6314 }, { "epoch": 0.7336625036305547, "grad_norm": 0.4522661864757538, "learning_rate": 0.0001, "loss": 1.6479, "step": 6315 }, { "epoch": 0.7337786813825152, "grad_norm": 0.43026474118232727, "learning_rate": 0.0001, "loss": 1.4995, "step": 6316 }, { "epoch": 0.7338948591344757, "grad_norm": 0.41863980889320374, "learning_rate": 0.0001, "loss": 1.4759, "step": 6317 }, { "epoch": 0.7340110368864362, "grad_norm": 0.47513189911842346, "learning_rate": 0.0001, "loss": 1.7717, "step": 6318 }, { "epoch": 0.7341272146383967, "grad_norm": 0.46296703815460205, "learning_rate": 0.0001, "loss": 1.5514, "step": 6319 }, { "epoch": 0.7342433923903573, "grad_norm": 0.40901249647140503, "learning_rate": 0.0001, "loss": 1.4381, "step": 6320 }, { "epoch": 0.7343595701423178, "grad_norm": 0.48210811614990234, "learning_rate": 0.0001, "loss": 1.5241, "step": 6321 }, { "epoch": 0.7344757478942783, "grad_norm": 0.4578268826007843, "learning_rate": 0.0001, "loss": 1.5555, "step": 6322 }, { "epoch": 0.7345919256462388, "grad_norm": 0.5248122811317444, "learning_rate": 0.0001, "loss": 1.7845, "step": 6323 }, { "epoch": 0.7347081033981993, "grad_norm": 0.4604836404323578, "learning_rate": 0.0001, "loss": 1.5511, "step": 6324 }, { "epoch": 0.7348242811501597, "grad_norm": 0.4478675425052643, "learning_rate": 0.0001, "loss": 1.5158, "step": 6325 }, { "epoch": 0.7349404589021202, "grad_norm": 0.4665616452693939, "learning_rate": 0.0001, "loss": 1.7363, "step": 6326 }, { "epoch": 0.7350566366540807, "grad_norm": 0.4462968409061432, "learning_rate": 0.0001, "loss": 1.5514, "step": 6327 }, { "epoch": 0.7351728144060412, "grad_norm": 0.4639495313167572, "learning_rate": 0.0001, "loss": 1.6764, "step": 6328 }, { "epoch": 0.7352889921580017, "grad_norm": 0.47614338994026184, "learning_rate": 0.0001, "loss": 1.7773, "step": 6329 }, { "epoch": 0.7354051699099623, "grad_norm": 0.47251033782958984, "learning_rate": 0.0001, "loss": 1.6546, "step": 6330 }, { "epoch": 0.7355213476619228, "grad_norm": 0.4667409062385559, "learning_rate": 0.0001, "loss": 1.6973, "step": 6331 }, { "epoch": 0.7356375254138833, "grad_norm": 0.43964049220085144, "learning_rate": 0.0001, "loss": 1.6167, "step": 6332 }, { "epoch": 0.7357537031658438, "grad_norm": 0.43899863958358765, "learning_rate": 0.0001, "loss": 1.4977, "step": 6333 }, { "epoch": 0.7358698809178043, "grad_norm": 0.4609370231628418, "learning_rate": 0.0001, "loss": 1.5861, "step": 6334 }, { "epoch": 0.7359860586697647, "grad_norm": 0.4471205472946167, "learning_rate": 0.0001, "loss": 1.5519, "step": 6335 }, { "epoch": 0.7361022364217252, "grad_norm": 0.4316443204879761, "learning_rate": 0.0001, "loss": 1.4519, "step": 6336 }, { "epoch": 0.7362184141736857, "grad_norm": 0.4466089606285095, "learning_rate": 0.0001, "loss": 1.6115, "step": 6337 }, { "epoch": 0.7363345919256462, "grad_norm": 0.4607985317707062, "learning_rate": 0.0001, "loss": 1.6619, "step": 6338 }, { "epoch": 0.7364507696776067, "grad_norm": 0.46263134479522705, "learning_rate": 0.0001, "loss": 1.7685, "step": 6339 }, { "epoch": 0.7365669474295672, "grad_norm": 0.4616027772426605, "learning_rate": 0.0001, "loss": 1.553, "step": 6340 }, { "epoch": 0.7366831251815278, "grad_norm": 0.4539996385574341, "learning_rate": 0.0001, "loss": 1.5797, "step": 6341 }, { "epoch": 0.7367993029334883, "grad_norm": 0.4599131643772125, "learning_rate": 0.0001, "loss": 1.5019, "step": 6342 }, { "epoch": 0.7369154806854488, "grad_norm": 0.4772374927997589, "learning_rate": 0.0001, "loss": 1.5946, "step": 6343 }, { "epoch": 0.7370316584374093, "grad_norm": 0.4597277343273163, "learning_rate": 0.0001, "loss": 1.6406, "step": 6344 }, { "epoch": 0.7371478361893697, "grad_norm": 0.4844377934932709, "learning_rate": 0.0001, "loss": 1.6903, "step": 6345 }, { "epoch": 0.7372640139413302, "grad_norm": 0.4434802532196045, "learning_rate": 0.0001, "loss": 1.6391, "step": 6346 }, { "epoch": 0.7373801916932907, "grad_norm": 0.48957550525665283, "learning_rate": 0.0001, "loss": 1.6194, "step": 6347 }, { "epoch": 0.7374963694452512, "grad_norm": 0.4729391932487488, "learning_rate": 0.0001, "loss": 1.5037, "step": 6348 }, { "epoch": 0.7376125471972117, "grad_norm": 0.4368651807308197, "learning_rate": 0.0001, "loss": 1.7193, "step": 6349 }, { "epoch": 0.7377287249491722, "grad_norm": 0.4742048978805542, "learning_rate": 0.0001, "loss": 1.8506, "step": 6350 }, { "epoch": 0.7378449027011328, "grad_norm": 0.4797591269016266, "learning_rate": 0.0001, "loss": 1.6867, "step": 6351 }, { "epoch": 0.7379610804530933, "grad_norm": 0.46037012338638306, "learning_rate": 0.0001, "loss": 1.6366, "step": 6352 }, { "epoch": 0.7380772582050538, "grad_norm": 0.45086851716041565, "learning_rate": 0.0001, "loss": 1.5711, "step": 6353 }, { "epoch": 0.7381934359570143, "grad_norm": 0.4557948410511017, "learning_rate": 0.0001, "loss": 1.5405, "step": 6354 }, { "epoch": 0.7383096137089747, "grad_norm": 0.4523628354072571, "learning_rate": 0.0001, "loss": 1.5482, "step": 6355 }, { "epoch": 0.7384257914609352, "grad_norm": 0.43781834840774536, "learning_rate": 0.0001, "loss": 1.4367, "step": 6356 }, { "epoch": 0.7385419692128957, "grad_norm": 0.5110533833503723, "learning_rate": 0.0001, "loss": 1.6609, "step": 6357 }, { "epoch": 0.7386581469648562, "grad_norm": 0.4702020585536957, "learning_rate": 0.0001, "loss": 1.5689, "step": 6358 }, { "epoch": 0.7387743247168167, "grad_norm": 0.45719245076179504, "learning_rate": 0.0001, "loss": 1.504, "step": 6359 }, { "epoch": 0.7388905024687772, "grad_norm": 0.45897620916366577, "learning_rate": 0.0001, "loss": 1.6057, "step": 6360 }, { "epoch": 0.7390066802207377, "grad_norm": 0.48604246973991394, "learning_rate": 0.0001, "loss": 1.6199, "step": 6361 }, { "epoch": 0.7391228579726983, "grad_norm": 0.4290804862976074, "learning_rate": 0.0001, "loss": 1.5523, "step": 6362 }, { "epoch": 0.7392390357246588, "grad_norm": 0.47816741466522217, "learning_rate": 0.0001, "loss": 1.5016, "step": 6363 }, { "epoch": 0.7393552134766193, "grad_norm": 0.5012046098709106, "learning_rate": 0.0001, "loss": 1.514, "step": 6364 }, { "epoch": 0.7394713912285797, "grad_norm": 0.5031648874282837, "learning_rate": 0.0001, "loss": 1.7799, "step": 6365 }, { "epoch": 0.7395875689805402, "grad_norm": 0.46205514669418335, "learning_rate": 0.0001, "loss": 1.7046, "step": 6366 }, { "epoch": 0.7397037467325007, "grad_norm": 0.4918919801712036, "learning_rate": 0.0001, "loss": 1.6548, "step": 6367 }, { "epoch": 0.7398199244844612, "grad_norm": 0.4417077898979187, "learning_rate": 0.0001, "loss": 1.5443, "step": 6368 }, { "epoch": 0.7399361022364217, "grad_norm": 0.5084665417671204, "learning_rate": 0.0001, "loss": 1.8225, "step": 6369 }, { "epoch": 0.7400522799883822, "grad_norm": 0.4046757221221924, "learning_rate": 0.0001, "loss": 1.3748, "step": 6370 }, { "epoch": 0.7401684577403427, "grad_norm": 0.4671691358089447, "learning_rate": 0.0001, "loss": 1.6038, "step": 6371 }, { "epoch": 0.7402846354923033, "grad_norm": 0.41901758313179016, "learning_rate": 0.0001, "loss": 1.4741, "step": 6372 }, { "epoch": 0.7404008132442638, "grad_norm": 0.4258348345756531, "learning_rate": 0.0001, "loss": 1.4846, "step": 6373 }, { "epoch": 0.7405169909962243, "grad_norm": 0.44732606410980225, "learning_rate": 0.0001, "loss": 1.6255, "step": 6374 }, { "epoch": 0.7406331687481847, "grad_norm": 0.4379178583621979, "learning_rate": 0.0001, "loss": 1.5444, "step": 6375 }, { "epoch": 0.7407493465001452, "grad_norm": 0.44309452176094055, "learning_rate": 0.0001, "loss": 1.4593, "step": 6376 }, { "epoch": 0.7408655242521057, "grad_norm": 0.47775688767433167, "learning_rate": 0.0001, "loss": 1.5678, "step": 6377 }, { "epoch": 0.7409817020040662, "grad_norm": 0.4426514208316803, "learning_rate": 0.0001, "loss": 1.5051, "step": 6378 }, { "epoch": 0.7410978797560267, "grad_norm": 0.45742908120155334, "learning_rate": 0.0001, "loss": 1.5815, "step": 6379 }, { "epoch": 0.7412140575079872, "grad_norm": 0.46088162064552307, "learning_rate": 0.0001, "loss": 1.6739, "step": 6380 }, { "epoch": 0.7413302352599477, "grad_norm": 0.4453122317790985, "learning_rate": 0.0001, "loss": 1.4667, "step": 6381 }, { "epoch": 0.7414464130119082, "grad_norm": 0.4657224714756012, "learning_rate": 0.0001, "loss": 1.7709, "step": 6382 }, { "epoch": 0.7415625907638688, "grad_norm": 0.47661951184272766, "learning_rate": 0.0001, "loss": 1.5963, "step": 6383 }, { "epoch": 0.7416787685158293, "grad_norm": 0.4470673203468323, "learning_rate": 0.0001, "loss": 1.6727, "step": 6384 }, { "epoch": 0.7417949462677897, "grad_norm": 0.4273890554904938, "learning_rate": 0.0001, "loss": 1.3943, "step": 6385 }, { "epoch": 0.7419111240197502, "grad_norm": 0.4307868778705597, "learning_rate": 0.0001, "loss": 1.4627, "step": 6386 }, { "epoch": 0.7420273017717107, "grad_norm": 0.5137743353843689, "learning_rate": 0.0001, "loss": 1.543, "step": 6387 }, { "epoch": 0.7421434795236712, "grad_norm": 0.4485335648059845, "learning_rate": 0.0001, "loss": 1.5651, "step": 6388 }, { "epoch": 0.7422596572756317, "grad_norm": 0.46610766649246216, "learning_rate": 0.0001, "loss": 1.5949, "step": 6389 }, { "epoch": 0.7423758350275922, "grad_norm": 0.4473322629928589, "learning_rate": 0.0001, "loss": 1.4623, "step": 6390 }, { "epoch": 0.7424920127795527, "grad_norm": 0.4895930886268616, "learning_rate": 0.0001, "loss": 1.6431, "step": 6391 }, { "epoch": 0.7426081905315132, "grad_norm": 0.48194587230682373, "learning_rate": 0.0001, "loss": 1.6737, "step": 6392 }, { "epoch": 0.7427243682834738, "grad_norm": 0.44431936740875244, "learning_rate": 0.0001, "loss": 1.5463, "step": 6393 }, { "epoch": 0.7428405460354343, "grad_norm": 0.4284924864768982, "learning_rate": 0.0001, "loss": 1.6245, "step": 6394 }, { "epoch": 0.7429567237873947, "grad_norm": 0.4462593197822571, "learning_rate": 0.0001, "loss": 1.5761, "step": 6395 }, { "epoch": 0.7430729015393552, "grad_norm": 0.4361656606197357, "learning_rate": 0.0001, "loss": 1.5675, "step": 6396 }, { "epoch": 0.7431890792913157, "grad_norm": 0.4379312992095947, "learning_rate": 0.0001, "loss": 1.6233, "step": 6397 }, { "epoch": 0.7433052570432762, "grad_norm": 0.4425080418586731, "learning_rate": 0.0001, "loss": 1.4492, "step": 6398 }, { "epoch": 0.7434214347952367, "grad_norm": 0.4721614718437195, "learning_rate": 0.0001, "loss": 1.7602, "step": 6399 }, { "epoch": 0.7435376125471972, "grad_norm": 0.45762506127357483, "learning_rate": 0.0001, "loss": 1.5522, "step": 6400 }, { "epoch": 0.7436537902991577, "grad_norm": 0.44420626759529114, "learning_rate": 0.0001, "loss": 1.4694, "step": 6401 }, { "epoch": 0.7437699680511182, "grad_norm": 0.4328571856021881, "learning_rate": 0.0001, "loss": 1.5338, "step": 6402 }, { "epoch": 0.7438861458030787, "grad_norm": 0.4925638735294342, "learning_rate": 0.0001, "loss": 1.731, "step": 6403 }, { "epoch": 0.7440023235550393, "grad_norm": 0.43524104356765747, "learning_rate": 0.0001, "loss": 1.6355, "step": 6404 }, { "epoch": 0.7441185013069997, "grad_norm": 0.4498136043548584, "learning_rate": 0.0001, "loss": 1.5421, "step": 6405 }, { "epoch": 0.7442346790589602, "grad_norm": 0.43513327836990356, "learning_rate": 0.0001, "loss": 1.3918, "step": 6406 }, { "epoch": 0.7443508568109207, "grad_norm": 0.4613160789012909, "learning_rate": 0.0001, "loss": 1.6829, "step": 6407 }, { "epoch": 0.7444670345628812, "grad_norm": 0.4584994614124298, "learning_rate": 0.0001, "loss": 1.6791, "step": 6408 }, { "epoch": 0.7445832123148417, "grad_norm": 0.49195122718811035, "learning_rate": 0.0001, "loss": 1.8131, "step": 6409 }, { "epoch": 0.7446993900668022, "grad_norm": 0.4556443393230438, "learning_rate": 0.0001, "loss": 1.6678, "step": 6410 }, { "epoch": 0.7448155678187627, "grad_norm": 0.46693944931030273, "learning_rate": 0.0001, "loss": 1.719, "step": 6411 }, { "epoch": 0.7449317455707232, "grad_norm": 0.46386128664016724, "learning_rate": 0.0001, "loss": 1.6447, "step": 6412 }, { "epoch": 0.7450479233226837, "grad_norm": 0.43832361698150635, "learning_rate": 0.0001, "loss": 1.5779, "step": 6413 }, { "epoch": 0.7451641010746443, "grad_norm": 0.4549694359302521, "learning_rate": 0.0001, "loss": 1.6203, "step": 6414 }, { "epoch": 0.7452802788266047, "grad_norm": 0.44481566548347473, "learning_rate": 0.0001, "loss": 1.5899, "step": 6415 }, { "epoch": 0.7453964565785652, "grad_norm": 0.43813270330429077, "learning_rate": 0.0001, "loss": 1.6835, "step": 6416 }, { "epoch": 0.7455126343305257, "grad_norm": 0.44562163949012756, "learning_rate": 0.0001, "loss": 1.6412, "step": 6417 }, { "epoch": 0.7456288120824862, "grad_norm": 0.4208749532699585, "learning_rate": 0.0001, "loss": 1.4426, "step": 6418 }, { "epoch": 0.7457449898344467, "grad_norm": 0.4573444426059723, "learning_rate": 0.0001, "loss": 1.5261, "step": 6419 }, { "epoch": 0.7458611675864072, "grad_norm": 0.4514586329460144, "learning_rate": 0.0001, "loss": 1.6794, "step": 6420 }, { "epoch": 0.7459773453383677, "grad_norm": 0.427177757024765, "learning_rate": 0.0001, "loss": 1.5954, "step": 6421 }, { "epoch": 0.7460935230903282, "grad_norm": 0.43251994252204895, "learning_rate": 0.0001, "loss": 1.6548, "step": 6422 }, { "epoch": 0.7462097008422887, "grad_norm": 0.4547024369239807, "learning_rate": 0.0001, "loss": 1.4714, "step": 6423 }, { "epoch": 0.7463258785942491, "grad_norm": 0.454458087682724, "learning_rate": 0.0001, "loss": 1.585, "step": 6424 }, { "epoch": 0.7464420563462097, "grad_norm": 0.44266244769096375, "learning_rate": 0.0001, "loss": 1.5443, "step": 6425 }, { "epoch": 0.7465582340981702, "grad_norm": 0.4643990099430084, "learning_rate": 0.0001, "loss": 1.6165, "step": 6426 }, { "epoch": 0.7466744118501307, "grad_norm": 0.4686664938926697, "learning_rate": 0.0001, "loss": 1.5298, "step": 6427 }, { "epoch": 0.7467905896020912, "grad_norm": 0.49029332399368286, "learning_rate": 0.0001, "loss": 1.5791, "step": 6428 }, { "epoch": 0.7469067673540517, "grad_norm": 0.46670180559158325, "learning_rate": 0.0001, "loss": 1.5147, "step": 6429 }, { "epoch": 0.7470229451060122, "grad_norm": 0.44535404443740845, "learning_rate": 0.0001, "loss": 1.6434, "step": 6430 }, { "epoch": 0.7471391228579727, "grad_norm": 0.4496926963329315, "learning_rate": 0.0001, "loss": 1.4151, "step": 6431 }, { "epoch": 0.7472553006099332, "grad_norm": 0.4497212767601013, "learning_rate": 0.0001, "loss": 1.6221, "step": 6432 }, { "epoch": 0.7473714783618937, "grad_norm": 0.44064804911613464, "learning_rate": 0.0001, "loss": 1.5384, "step": 6433 }, { "epoch": 0.7474876561138541, "grad_norm": 0.459143728017807, "learning_rate": 0.0001, "loss": 1.6105, "step": 6434 }, { "epoch": 0.7476038338658147, "grad_norm": 0.4302321672439575, "learning_rate": 0.0001, "loss": 1.5137, "step": 6435 }, { "epoch": 0.7477200116177752, "grad_norm": 0.4803028702735901, "learning_rate": 0.0001, "loss": 1.6813, "step": 6436 }, { "epoch": 0.7478361893697357, "grad_norm": 0.41635861992836, "learning_rate": 0.0001, "loss": 1.4167, "step": 6437 }, { "epoch": 0.7479523671216962, "grad_norm": 0.44405534863471985, "learning_rate": 0.0001, "loss": 1.6584, "step": 6438 }, { "epoch": 0.7480685448736567, "grad_norm": 0.4449010491371155, "learning_rate": 0.0001, "loss": 1.7648, "step": 6439 }, { "epoch": 0.7481847226256172, "grad_norm": 0.45254096388816833, "learning_rate": 0.0001, "loss": 1.6037, "step": 6440 }, { "epoch": 0.7483009003775777, "grad_norm": 0.4546097218990326, "learning_rate": 0.0001, "loss": 1.6735, "step": 6441 }, { "epoch": 0.7484170781295382, "grad_norm": 0.44353100657463074, "learning_rate": 0.0001, "loss": 1.6668, "step": 6442 }, { "epoch": 0.7485332558814987, "grad_norm": 0.4421907365322113, "learning_rate": 0.0001, "loss": 1.6233, "step": 6443 }, { "epoch": 0.7486494336334591, "grad_norm": 0.4550931751728058, "learning_rate": 0.0001, "loss": 1.5141, "step": 6444 }, { "epoch": 0.7487656113854196, "grad_norm": 0.4081590175628662, "learning_rate": 0.0001, "loss": 1.3085, "step": 6445 }, { "epoch": 0.7488817891373802, "grad_norm": 0.41531556844711304, "learning_rate": 0.0001, "loss": 1.5251, "step": 6446 }, { "epoch": 0.7489979668893407, "grad_norm": 0.46945273876190186, "learning_rate": 0.0001, "loss": 1.6785, "step": 6447 }, { "epoch": 0.7491141446413012, "grad_norm": 0.4772491455078125, "learning_rate": 0.0001, "loss": 1.6821, "step": 6448 }, { "epoch": 0.7492303223932617, "grad_norm": 0.4390372335910797, "learning_rate": 0.0001, "loss": 1.5864, "step": 6449 }, { "epoch": 0.7493465001452222, "grad_norm": 0.425656795501709, "learning_rate": 0.0001, "loss": 1.4169, "step": 6450 }, { "epoch": 0.7494626778971827, "grad_norm": 0.44285711646080017, "learning_rate": 0.0001, "loss": 1.4793, "step": 6451 }, { "epoch": 0.7495788556491432, "grad_norm": 0.44809186458587646, "learning_rate": 0.0001, "loss": 1.6826, "step": 6452 }, { "epoch": 0.7496950334011037, "grad_norm": 0.44603654742240906, "learning_rate": 0.0001, "loss": 1.4245, "step": 6453 }, { "epoch": 0.7498112111530641, "grad_norm": 0.45368316769599915, "learning_rate": 0.0001, "loss": 1.5795, "step": 6454 }, { "epoch": 0.7499273889050246, "grad_norm": 0.464532732963562, "learning_rate": 0.0001, "loss": 1.6552, "step": 6455 }, { "epoch": 0.7500435666569852, "grad_norm": 0.43661272525787354, "learning_rate": 0.0001, "loss": 1.4989, "step": 6456 }, { "epoch": 0.7501597444089457, "grad_norm": 0.4373878240585327, "learning_rate": 0.0001, "loss": 1.6482, "step": 6457 }, { "epoch": 0.7502759221609062, "grad_norm": 0.45285502076148987, "learning_rate": 0.0001, "loss": 1.558, "step": 6458 }, { "epoch": 0.7503920999128667, "grad_norm": 0.4704199433326721, "learning_rate": 0.0001, "loss": 1.4876, "step": 6459 }, { "epoch": 0.7505082776648272, "grad_norm": 0.45915988087654114, "learning_rate": 0.0001, "loss": 1.7206, "step": 6460 }, { "epoch": 0.7506244554167877, "grad_norm": 0.46170055866241455, "learning_rate": 0.0001, "loss": 1.6194, "step": 6461 }, { "epoch": 0.7507406331687482, "grad_norm": 0.46552303433418274, "learning_rate": 0.0001, "loss": 1.6316, "step": 6462 }, { "epoch": 0.7508568109207087, "grad_norm": 0.47524699568748474, "learning_rate": 0.0001, "loss": 1.629, "step": 6463 }, { "epoch": 0.7509729886726692, "grad_norm": 0.4851406514644623, "learning_rate": 0.0001, "loss": 1.605, "step": 6464 }, { "epoch": 0.7510891664246296, "grad_norm": 0.46212369203567505, "learning_rate": 0.0001, "loss": 1.6158, "step": 6465 }, { "epoch": 0.7512053441765901, "grad_norm": 0.4637957811355591, "learning_rate": 0.0001, "loss": 1.6477, "step": 6466 }, { "epoch": 0.7513215219285507, "grad_norm": 0.46081292629241943, "learning_rate": 0.0001, "loss": 1.5939, "step": 6467 }, { "epoch": 0.7514376996805112, "grad_norm": 0.418517529964447, "learning_rate": 0.0001, "loss": 1.5286, "step": 6468 }, { "epoch": 0.7515538774324717, "grad_norm": 0.42126187682151794, "learning_rate": 0.0001, "loss": 1.5361, "step": 6469 }, { "epoch": 0.7516700551844322, "grad_norm": 0.45902541279792786, "learning_rate": 0.0001, "loss": 1.6279, "step": 6470 }, { "epoch": 0.7517862329363927, "grad_norm": 0.42754048109054565, "learning_rate": 0.0001, "loss": 1.4632, "step": 6471 }, { "epoch": 0.7519024106883532, "grad_norm": 0.4438011348247528, "learning_rate": 0.0001, "loss": 1.4954, "step": 6472 }, { "epoch": 0.7520185884403137, "grad_norm": 0.46611130237579346, "learning_rate": 0.0001, "loss": 1.7748, "step": 6473 }, { "epoch": 0.7521347661922742, "grad_norm": 0.43576914072036743, "learning_rate": 0.0001, "loss": 1.4486, "step": 6474 }, { "epoch": 0.7522509439442346, "grad_norm": 0.4808332622051239, "learning_rate": 0.0001, "loss": 1.6467, "step": 6475 }, { "epoch": 0.7523671216961951, "grad_norm": 0.4799876809120178, "learning_rate": 0.0001, "loss": 1.6361, "step": 6476 }, { "epoch": 0.7524832994481557, "grad_norm": 0.4838355481624603, "learning_rate": 0.0001, "loss": 1.6259, "step": 6477 }, { "epoch": 0.7525994772001162, "grad_norm": 0.47365084290504456, "learning_rate": 0.0001, "loss": 1.5567, "step": 6478 }, { "epoch": 0.7527156549520767, "grad_norm": 0.4396180510520935, "learning_rate": 0.0001, "loss": 1.6491, "step": 6479 }, { "epoch": 0.7528318327040372, "grad_norm": 0.4406949579715729, "learning_rate": 0.0001, "loss": 1.6357, "step": 6480 }, { "epoch": 0.7529480104559977, "grad_norm": 0.43334612250328064, "learning_rate": 0.0001, "loss": 1.5365, "step": 6481 }, { "epoch": 0.7530641882079582, "grad_norm": 0.4289649426937103, "learning_rate": 0.0001, "loss": 1.4876, "step": 6482 }, { "epoch": 0.7531803659599187, "grad_norm": 0.4250386953353882, "learning_rate": 0.0001, "loss": 1.4657, "step": 6483 }, { "epoch": 0.7532965437118792, "grad_norm": 0.4700089395046234, "learning_rate": 0.0001, "loss": 1.6436, "step": 6484 }, { "epoch": 0.7534127214638396, "grad_norm": 0.4487655460834503, "learning_rate": 0.0001, "loss": 1.5854, "step": 6485 }, { "epoch": 0.7535288992158001, "grad_norm": 0.44160258769989014, "learning_rate": 0.0001, "loss": 1.5324, "step": 6486 }, { "epoch": 0.7536450769677606, "grad_norm": 0.43378591537475586, "learning_rate": 0.0001, "loss": 1.5401, "step": 6487 }, { "epoch": 0.7537612547197212, "grad_norm": 0.4056842029094696, "learning_rate": 0.0001, "loss": 1.5334, "step": 6488 }, { "epoch": 0.7538774324716817, "grad_norm": 0.4529426693916321, "learning_rate": 0.0001, "loss": 1.5559, "step": 6489 }, { "epoch": 0.7539936102236422, "grad_norm": 0.5032495856285095, "learning_rate": 0.0001, "loss": 1.7247, "step": 6490 }, { "epoch": 0.7541097879756027, "grad_norm": 0.4208425283432007, "learning_rate": 0.0001, "loss": 1.4887, "step": 6491 }, { "epoch": 0.7542259657275632, "grad_norm": 0.429240345954895, "learning_rate": 0.0001, "loss": 1.6465, "step": 6492 }, { "epoch": 0.7543421434795237, "grad_norm": 0.45296066999435425, "learning_rate": 0.0001, "loss": 1.5084, "step": 6493 }, { "epoch": 0.7544583212314842, "grad_norm": 0.4730745255947113, "learning_rate": 0.0001, "loss": 1.7686, "step": 6494 }, { "epoch": 0.7545744989834446, "grad_norm": 0.43891751766204834, "learning_rate": 0.0001, "loss": 1.7096, "step": 6495 }, { "epoch": 0.7546906767354051, "grad_norm": 0.4880208671092987, "learning_rate": 0.0001, "loss": 1.7739, "step": 6496 }, { "epoch": 0.7548068544873656, "grad_norm": 0.43888798356056213, "learning_rate": 0.0001, "loss": 1.5535, "step": 6497 }, { "epoch": 0.7549230322393262, "grad_norm": 0.42402908205986023, "learning_rate": 0.0001, "loss": 1.5107, "step": 6498 }, { "epoch": 0.7550392099912867, "grad_norm": 0.43991389870643616, "learning_rate": 0.0001, "loss": 1.5399, "step": 6499 }, { "epoch": 0.7551553877432472, "grad_norm": 0.43535780906677246, "learning_rate": 0.0001, "loss": 1.4413, "step": 6500 }, { "epoch": 0.7552715654952077, "grad_norm": 0.4613569974899292, "learning_rate": 0.0001, "loss": 1.6213, "step": 6501 }, { "epoch": 0.7553877432471682, "grad_norm": 0.4456695020198822, "learning_rate": 0.0001, "loss": 1.5422, "step": 6502 }, { "epoch": 0.7555039209991287, "grad_norm": 0.4481378197669983, "learning_rate": 0.0001, "loss": 1.6161, "step": 6503 }, { "epoch": 0.7556200987510892, "grad_norm": 0.45384448766708374, "learning_rate": 0.0001, "loss": 1.5488, "step": 6504 }, { "epoch": 0.7557362765030496, "grad_norm": 0.474298894405365, "learning_rate": 0.0001, "loss": 1.6841, "step": 6505 }, { "epoch": 0.7558524542550101, "grad_norm": 0.478977233171463, "learning_rate": 0.0001, "loss": 1.757, "step": 6506 }, { "epoch": 0.7559686320069706, "grad_norm": 0.45256179571151733, "learning_rate": 0.0001, "loss": 1.745, "step": 6507 }, { "epoch": 0.7560848097589312, "grad_norm": 0.465461790561676, "learning_rate": 0.0001, "loss": 1.7028, "step": 6508 }, { "epoch": 0.7562009875108917, "grad_norm": 0.4733600318431854, "learning_rate": 0.0001, "loss": 1.7272, "step": 6509 }, { "epoch": 0.7563171652628522, "grad_norm": 0.46702197194099426, "learning_rate": 0.0001, "loss": 1.6782, "step": 6510 }, { "epoch": 0.7564333430148127, "grad_norm": 0.4695473909378052, "learning_rate": 0.0001, "loss": 1.5759, "step": 6511 }, { "epoch": 0.7565495207667732, "grad_norm": 0.48179465532302856, "learning_rate": 0.0001, "loss": 1.7386, "step": 6512 }, { "epoch": 0.7566656985187337, "grad_norm": 0.4340745210647583, "learning_rate": 0.0001, "loss": 1.385, "step": 6513 }, { "epoch": 0.7567818762706942, "grad_norm": 0.4561876058578491, "learning_rate": 0.0001, "loss": 1.7278, "step": 6514 }, { "epoch": 0.7568980540226546, "grad_norm": 0.4735701084136963, "learning_rate": 0.0001, "loss": 1.6604, "step": 6515 }, { "epoch": 0.7570142317746151, "grad_norm": 0.5096227526664734, "learning_rate": 0.0001, "loss": 1.6791, "step": 6516 }, { "epoch": 0.7571304095265756, "grad_norm": 0.5059962868690491, "learning_rate": 0.0001, "loss": 1.6957, "step": 6517 }, { "epoch": 0.7572465872785361, "grad_norm": 0.4230511784553528, "learning_rate": 0.0001, "loss": 1.4676, "step": 6518 }, { "epoch": 0.7573627650304967, "grad_norm": 0.5008574724197388, "learning_rate": 0.0001, "loss": 1.6361, "step": 6519 }, { "epoch": 0.7574789427824572, "grad_norm": 0.45503777265548706, "learning_rate": 0.0001, "loss": 1.582, "step": 6520 }, { "epoch": 0.7575951205344177, "grad_norm": 0.49144798517227173, "learning_rate": 0.0001, "loss": 1.5634, "step": 6521 }, { "epoch": 0.7577112982863782, "grad_norm": 0.450183629989624, "learning_rate": 0.0001, "loss": 1.569, "step": 6522 }, { "epoch": 0.7578274760383387, "grad_norm": 0.4511738717556, "learning_rate": 0.0001, "loss": 1.4578, "step": 6523 }, { "epoch": 0.7579436537902992, "grad_norm": 0.45936381816864014, "learning_rate": 0.0001, "loss": 1.5197, "step": 6524 }, { "epoch": 0.7580598315422596, "grad_norm": 0.4909375309944153, "learning_rate": 0.0001, "loss": 1.6538, "step": 6525 }, { "epoch": 0.7581760092942201, "grad_norm": 0.426647812128067, "learning_rate": 0.0001, "loss": 1.5593, "step": 6526 }, { "epoch": 0.7582921870461806, "grad_norm": 0.42700493335723877, "learning_rate": 0.0001, "loss": 1.4212, "step": 6527 }, { "epoch": 0.7584083647981411, "grad_norm": 0.4363797903060913, "learning_rate": 0.0001, "loss": 1.4909, "step": 6528 }, { "epoch": 0.7585245425501017, "grad_norm": 0.47377872467041016, "learning_rate": 0.0001, "loss": 1.6416, "step": 6529 }, { "epoch": 0.7586407203020622, "grad_norm": 0.45347169041633606, "learning_rate": 0.0001, "loss": 1.5481, "step": 6530 }, { "epoch": 0.7587568980540227, "grad_norm": 0.4400898814201355, "learning_rate": 0.0001, "loss": 1.6557, "step": 6531 }, { "epoch": 0.7588730758059832, "grad_norm": 0.4629836678504944, "learning_rate": 0.0001, "loss": 1.6101, "step": 6532 }, { "epoch": 0.7589892535579437, "grad_norm": 0.43157103657722473, "learning_rate": 0.0001, "loss": 1.3997, "step": 6533 }, { "epoch": 0.7591054313099042, "grad_norm": 0.44874757528305054, "learning_rate": 0.0001, "loss": 1.6097, "step": 6534 }, { "epoch": 0.7592216090618646, "grad_norm": 0.4572241008281708, "learning_rate": 0.0001, "loss": 1.7732, "step": 6535 }, { "epoch": 0.7593377868138251, "grad_norm": 0.509334146976471, "learning_rate": 0.0001, "loss": 1.6985, "step": 6536 }, { "epoch": 0.7594539645657856, "grad_norm": 0.45650917291641235, "learning_rate": 0.0001, "loss": 1.7018, "step": 6537 }, { "epoch": 0.7595701423177461, "grad_norm": 0.4282233715057373, "learning_rate": 0.0001, "loss": 1.5703, "step": 6538 }, { "epoch": 0.7596863200697066, "grad_norm": 0.49342942237854004, "learning_rate": 0.0001, "loss": 1.6612, "step": 6539 }, { "epoch": 0.7598024978216672, "grad_norm": 0.45365840196609497, "learning_rate": 0.0001, "loss": 1.7058, "step": 6540 }, { "epoch": 0.7599186755736277, "grad_norm": 0.4653255343437195, "learning_rate": 0.0001, "loss": 1.5784, "step": 6541 }, { "epoch": 0.7600348533255882, "grad_norm": 0.4376204013824463, "learning_rate": 0.0001, "loss": 1.4713, "step": 6542 }, { "epoch": 0.7601510310775487, "grad_norm": 0.42907100915908813, "learning_rate": 0.0001, "loss": 1.5606, "step": 6543 }, { "epoch": 0.7602672088295092, "grad_norm": 0.4851232171058655, "learning_rate": 0.0001, "loss": 1.6919, "step": 6544 }, { "epoch": 0.7603833865814696, "grad_norm": 0.4842982590198517, "learning_rate": 0.0001, "loss": 1.5926, "step": 6545 }, { "epoch": 0.7604995643334301, "grad_norm": 0.44596225023269653, "learning_rate": 0.0001, "loss": 1.5771, "step": 6546 }, { "epoch": 0.7606157420853906, "grad_norm": 0.42956697940826416, "learning_rate": 0.0001, "loss": 1.5368, "step": 6547 }, { "epoch": 0.7607319198373511, "grad_norm": 0.4747844934463501, "learning_rate": 0.0001, "loss": 1.6931, "step": 6548 }, { "epoch": 0.7608480975893116, "grad_norm": 0.4778608977794647, "learning_rate": 0.0001, "loss": 1.7017, "step": 6549 }, { "epoch": 0.7609642753412722, "grad_norm": 0.4931018352508545, "learning_rate": 0.0001, "loss": 1.7501, "step": 6550 }, { "epoch": 0.7610804530932327, "grad_norm": 0.4602007567882538, "learning_rate": 0.0001, "loss": 1.7131, "step": 6551 }, { "epoch": 0.7611966308451932, "grad_norm": 0.4534691572189331, "learning_rate": 0.0001, "loss": 1.418, "step": 6552 }, { "epoch": 0.7613128085971537, "grad_norm": 0.4759400486946106, "learning_rate": 0.0001, "loss": 1.7773, "step": 6553 }, { "epoch": 0.7614289863491142, "grad_norm": 0.44911110401153564, "learning_rate": 0.0001, "loss": 1.7104, "step": 6554 }, { "epoch": 0.7615451641010746, "grad_norm": 0.4117274582386017, "learning_rate": 0.0001, "loss": 1.5102, "step": 6555 }, { "epoch": 0.7616613418530351, "grad_norm": 0.44577354192733765, "learning_rate": 0.0001, "loss": 1.72, "step": 6556 }, { "epoch": 0.7617775196049956, "grad_norm": 0.4217338562011719, "learning_rate": 0.0001, "loss": 1.4747, "step": 6557 }, { "epoch": 0.7618936973569561, "grad_norm": 0.4698602855205536, "learning_rate": 0.0001, "loss": 1.6707, "step": 6558 }, { "epoch": 0.7620098751089166, "grad_norm": 0.44901999831199646, "learning_rate": 0.0001, "loss": 1.656, "step": 6559 }, { "epoch": 0.7621260528608771, "grad_norm": 0.41744062304496765, "learning_rate": 0.0001, "loss": 1.4392, "step": 6560 }, { "epoch": 0.7622422306128377, "grad_norm": 0.4368325471878052, "learning_rate": 0.0001, "loss": 1.5994, "step": 6561 }, { "epoch": 0.7623584083647982, "grad_norm": 0.43781906366348267, "learning_rate": 0.0001, "loss": 1.547, "step": 6562 }, { "epoch": 0.7624745861167587, "grad_norm": 0.4672465920448303, "learning_rate": 0.0001, "loss": 1.6581, "step": 6563 }, { "epoch": 0.7625907638687192, "grad_norm": 0.44556140899658203, "learning_rate": 0.0001, "loss": 1.6116, "step": 6564 }, { "epoch": 0.7627069416206796, "grad_norm": 0.47782960534095764, "learning_rate": 0.0001, "loss": 1.556, "step": 6565 }, { "epoch": 0.7628231193726401, "grad_norm": 0.4672389328479767, "learning_rate": 0.0001, "loss": 1.6303, "step": 6566 }, { "epoch": 0.7629392971246006, "grad_norm": 0.44168704748153687, "learning_rate": 0.0001, "loss": 1.6007, "step": 6567 }, { "epoch": 0.7630554748765611, "grad_norm": 0.47608694434165955, "learning_rate": 0.0001, "loss": 1.6944, "step": 6568 }, { "epoch": 0.7631716526285216, "grad_norm": 0.4470519423484802, "learning_rate": 0.0001, "loss": 1.6235, "step": 6569 }, { "epoch": 0.7632878303804821, "grad_norm": 0.41022247076034546, "learning_rate": 0.0001, "loss": 1.3155, "step": 6570 }, { "epoch": 0.7634040081324427, "grad_norm": 0.4504031240940094, "learning_rate": 0.0001, "loss": 1.5964, "step": 6571 }, { "epoch": 0.7635201858844032, "grad_norm": 0.48684489727020264, "learning_rate": 0.0001, "loss": 1.6809, "step": 6572 }, { "epoch": 0.7636363636363637, "grad_norm": 0.43762362003326416, "learning_rate": 0.0001, "loss": 1.6745, "step": 6573 }, { "epoch": 0.7637525413883242, "grad_norm": 0.47748515009880066, "learning_rate": 0.0001, "loss": 1.6348, "step": 6574 }, { "epoch": 0.7638687191402846, "grad_norm": 0.4670480191707611, "learning_rate": 0.0001, "loss": 1.7789, "step": 6575 }, { "epoch": 0.7639848968922451, "grad_norm": 0.45016875863075256, "learning_rate": 0.0001, "loss": 1.6367, "step": 6576 }, { "epoch": 0.7641010746442056, "grad_norm": 0.4202631115913391, "learning_rate": 0.0001, "loss": 1.4861, "step": 6577 }, { "epoch": 0.7642172523961661, "grad_norm": 0.4666217565536499, "learning_rate": 0.0001, "loss": 1.4498, "step": 6578 }, { "epoch": 0.7643334301481266, "grad_norm": 0.46688103675842285, "learning_rate": 0.0001, "loss": 1.4261, "step": 6579 }, { "epoch": 0.7644496079000871, "grad_norm": 0.4328000843524933, "learning_rate": 0.0001, "loss": 1.503, "step": 6580 }, { "epoch": 0.7645657856520476, "grad_norm": 0.4551675617694855, "learning_rate": 0.0001, "loss": 1.567, "step": 6581 }, { "epoch": 0.7646819634040082, "grad_norm": 0.4549828767776489, "learning_rate": 0.0001, "loss": 1.5367, "step": 6582 }, { "epoch": 0.7647981411559687, "grad_norm": 0.45938196778297424, "learning_rate": 0.0001, "loss": 1.6921, "step": 6583 }, { "epoch": 0.7649143189079292, "grad_norm": 0.4694269299507141, "learning_rate": 0.0001, "loss": 1.5216, "step": 6584 }, { "epoch": 0.7650304966598896, "grad_norm": 0.47541555762290955, "learning_rate": 0.0001, "loss": 1.6457, "step": 6585 }, { "epoch": 0.7651466744118501, "grad_norm": 0.432059645652771, "learning_rate": 0.0001, "loss": 1.4684, "step": 6586 }, { "epoch": 0.7652628521638106, "grad_norm": 0.48397889733314514, "learning_rate": 0.0001, "loss": 1.5379, "step": 6587 }, { "epoch": 0.7653790299157711, "grad_norm": 0.437933087348938, "learning_rate": 0.0001, "loss": 1.5497, "step": 6588 }, { "epoch": 0.7654952076677316, "grad_norm": 0.4440605044364929, "learning_rate": 0.0001, "loss": 1.663, "step": 6589 }, { "epoch": 0.7656113854196921, "grad_norm": 0.447862833738327, "learning_rate": 0.0001, "loss": 1.6118, "step": 6590 }, { "epoch": 0.7657275631716526, "grad_norm": 0.41971054673194885, "learning_rate": 0.0001, "loss": 1.5061, "step": 6591 }, { "epoch": 0.7658437409236132, "grad_norm": 0.4456073045730591, "learning_rate": 0.0001, "loss": 1.5482, "step": 6592 }, { "epoch": 0.7659599186755737, "grad_norm": 0.43009260296821594, "learning_rate": 0.0001, "loss": 1.5966, "step": 6593 }, { "epoch": 0.7660760964275342, "grad_norm": 0.482006698846817, "learning_rate": 0.0001, "loss": 1.6334, "step": 6594 }, { "epoch": 0.7661922741794946, "grad_norm": 0.4667458236217499, "learning_rate": 0.0001, "loss": 1.5051, "step": 6595 }, { "epoch": 0.7663084519314551, "grad_norm": 0.4783475995063782, "learning_rate": 0.0001, "loss": 1.7254, "step": 6596 }, { "epoch": 0.7664246296834156, "grad_norm": 0.4709714353084564, "learning_rate": 0.0001, "loss": 1.5501, "step": 6597 }, { "epoch": 0.7665408074353761, "grad_norm": 0.45129141211509705, "learning_rate": 0.0001, "loss": 1.573, "step": 6598 }, { "epoch": 0.7666569851873366, "grad_norm": 0.44044631719589233, "learning_rate": 0.0001, "loss": 1.4134, "step": 6599 }, { "epoch": 0.7667731629392971, "grad_norm": 0.4458431899547577, "learning_rate": 0.0001, "loss": 1.4987, "step": 6600 }, { "epoch": 0.7668893406912576, "grad_norm": 0.4668479263782501, "learning_rate": 0.0001, "loss": 1.5884, "step": 6601 }, { "epoch": 0.7670055184432181, "grad_norm": 0.4631541073322296, "learning_rate": 0.0001, "loss": 1.5373, "step": 6602 }, { "epoch": 0.7671216961951787, "grad_norm": 0.47970542311668396, "learning_rate": 0.0001, "loss": 1.6183, "step": 6603 }, { "epoch": 0.7672378739471392, "grad_norm": 0.4707159101963043, "learning_rate": 0.0001, "loss": 1.7697, "step": 6604 }, { "epoch": 0.7673540516990996, "grad_norm": 0.48300158977508545, "learning_rate": 0.0001, "loss": 1.5748, "step": 6605 }, { "epoch": 0.7674702294510601, "grad_norm": 0.47976362705230713, "learning_rate": 0.0001, "loss": 1.5799, "step": 6606 }, { "epoch": 0.7675864072030206, "grad_norm": 0.4959631860256195, "learning_rate": 0.0001, "loss": 1.6045, "step": 6607 }, { "epoch": 0.7677025849549811, "grad_norm": 0.4722089171409607, "learning_rate": 0.0001, "loss": 1.6423, "step": 6608 }, { "epoch": 0.7678187627069416, "grad_norm": 0.5199443101882935, "learning_rate": 0.0001, "loss": 1.642, "step": 6609 }, { "epoch": 0.7679349404589021, "grad_norm": 0.4067780375480652, "learning_rate": 0.0001, "loss": 1.3372, "step": 6610 }, { "epoch": 0.7680511182108626, "grad_norm": 0.43238359689712524, "learning_rate": 0.0001, "loss": 1.5107, "step": 6611 }, { "epoch": 0.7681672959628231, "grad_norm": 0.5341728925704956, "learning_rate": 0.0001, "loss": 1.8739, "step": 6612 }, { "epoch": 0.7682834737147837, "grad_norm": 0.46490055322647095, "learning_rate": 0.0001, "loss": 1.7022, "step": 6613 }, { "epoch": 0.7683996514667442, "grad_norm": 0.4309159219264984, "learning_rate": 0.0001, "loss": 1.6349, "step": 6614 }, { "epoch": 0.7685158292187046, "grad_norm": 0.4642525017261505, "learning_rate": 0.0001, "loss": 1.6468, "step": 6615 }, { "epoch": 0.7686320069706651, "grad_norm": 0.46897363662719727, "learning_rate": 0.0001, "loss": 1.5809, "step": 6616 }, { "epoch": 0.7687481847226256, "grad_norm": 0.43041566014289856, "learning_rate": 0.0001, "loss": 1.5338, "step": 6617 }, { "epoch": 0.7688643624745861, "grad_norm": 0.42853835225105286, "learning_rate": 0.0001, "loss": 1.4978, "step": 6618 }, { "epoch": 0.7689805402265466, "grad_norm": 0.4620231091976166, "learning_rate": 0.0001, "loss": 1.73, "step": 6619 }, { "epoch": 0.7690967179785071, "grad_norm": 0.44504088163375854, "learning_rate": 0.0001, "loss": 1.6048, "step": 6620 }, { "epoch": 0.7692128957304676, "grad_norm": 0.48609089851379395, "learning_rate": 0.0001, "loss": 1.5766, "step": 6621 }, { "epoch": 0.7693290734824281, "grad_norm": 0.483452171087265, "learning_rate": 0.0001, "loss": 1.6595, "step": 6622 }, { "epoch": 0.7694452512343886, "grad_norm": 0.4754545986652374, "learning_rate": 0.0001, "loss": 1.663, "step": 6623 }, { "epoch": 0.7695614289863492, "grad_norm": 0.4533579349517822, "learning_rate": 0.0001, "loss": 1.5474, "step": 6624 }, { "epoch": 0.7696776067383096, "grad_norm": 0.4664144814014435, "learning_rate": 0.0001, "loss": 1.6962, "step": 6625 }, { "epoch": 0.7697937844902701, "grad_norm": 0.48113691806793213, "learning_rate": 0.0001, "loss": 1.5808, "step": 6626 }, { "epoch": 0.7699099622422306, "grad_norm": 0.4325944185256958, "learning_rate": 0.0001, "loss": 1.5159, "step": 6627 }, { "epoch": 0.7700261399941911, "grad_norm": 0.4415172338485718, "learning_rate": 0.0001, "loss": 1.5869, "step": 6628 }, { "epoch": 0.7701423177461516, "grad_norm": 0.41996294260025024, "learning_rate": 0.0001, "loss": 1.497, "step": 6629 }, { "epoch": 0.7702584954981121, "grad_norm": 0.4572359025478363, "learning_rate": 0.0001, "loss": 1.6587, "step": 6630 }, { "epoch": 0.7703746732500726, "grad_norm": 0.4630671441555023, "learning_rate": 0.0001, "loss": 1.6671, "step": 6631 }, { "epoch": 0.7704908510020331, "grad_norm": 0.4352489411830902, "learning_rate": 0.0001, "loss": 1.6456, "step": 6632 }, { "epoch": 0.7706070287539936, "grad_norm": 0.4531479477882385, "learning_rate": 0.0001, "loss": 1.525, "step": 6633 }, { "epoch": 0.7707232065059542, "grad_norm": 0.5102584362030029, "learning_rate": 0.0001, "loss": 1.5636, "step": 6634 }, { "epoch": 0.7708393842579147, "grad_norm": 0.45892634987831116, "learning_rate": 0.0001, "loss": 1.5188, "step": 6635 }, { "epoch": 0.7709555620098751, "grad_norm": 0.4718113839626312, "learning_rate": 0.0001, "loss": 1.7131, "step": 6636 }, { "epoch": 0.7710717397618356, "grad_norm": 0.4592994749546051, "learning_rate": 0.0001, "loss": 1.644, "step": 6637 }, { "epoch": 0.7711879175137961, "grad_norm": 0.4589068293571472, "learning_rate": 0.0001, "loss": 1.5792, "step": 6638 }, { "epoch": 0.7713040952657566, "grad_norm": 0.41312143206596375, "learning_rate": 0.0001, "loss": 1.3861, "step": 6639 }, { "epoch": 0.7714202730177171, "grad_norm": 0.4337233901023865, "learning_rate": 0.0001, "loss": 1.4143, "step": 6640 }, { "epoch": 0.7715364507696776, "grad_norm": 0.4643138349056244, "learning_rate": 0.0001, "loss": 1.6514, "step": 6641 }, { "epoch": 0.7716526285216381, "grad_norm": 0.485331654548645, "learning_rate": 0.0001, "loss": 1.6514, "step": 6642 }, { "epoch": 0.7717688062735986, "grad_norm": 0.4634449779987335, "learning_rate": 0.0001, "loss": 1.6173, "step": 6643 }, { "epoch": 0.771884984025559, "grad_norm": 0.45036226511001587, "learning_rate": 0.0001, "loss": 1.4835, "step": 6644 }, { "epoch": 0.7720011617775197, "grad_norm": 0.45549461245536804, "learning_rate": 0.0001, "loss": 1.6978, "step": 6645 }, { "epoch": 0.7721173395294801, "grad_norm": 0.4745498299598694, "learning_rate": 0.0001, "loss": 1.5631, "step": 6646 }, { "epoch": 0.7722335172814406, "grad_norm": 0.5061119198799133, "learning_rate": 0.0001, "loss": 1.725, "step": 6647 }, { "epoch": 0.7723496950334011, "grad_norm": 0.4423942565917969, "learning_rate": 0.0001, "loss": 1.5582, "step": 6648 }, { "epoch": 0.7724658727853616, "grad_norm": 0.46046459674835205, "learning_rate": 0.0001, "loss": 1.6316, "step": 6649 }, { "epoch": 0.7725820505373221, "grad_norm": 0.4494078755378723, "learning_rate": 0.0001, "loss": 1.5904, "step": 6650 }, { "epoch": 0.7726982282892826, "grad_norm": 0.4689868986606598, "learning_rate": 0.0001, "loss": 1.7184, "step": 6651 }, { "epoch": 0.7728144060412431, "grad_norm": 0.42947614192962646, "learning_rate": 0.0001, "loss": 1.5102, "step": 6652 }, { "epoch": 0.7729305837932036, "grad_norm": 0.424402117729187, "learning_rate": 0.0001, "loss": 1.3695, "step": 6653 }, { "epoch": 0.773046761545164, "grad_norm": 0.44652658700942993, "learning_rate": 0.0001, "loss": 1.6374, "step": 6654 }, { "epoch": 0.7731629392971247, "grad_norm": 0.4878056049346924, "learning_rate": 0.0001, "loss": 1.6091, "step": 6655 }, { "epoch": 0.7732791170490851, "grad_norm": 0.4504278898239136, "learning_rate": 0.0001, "loss": 1.4506, "step": 6656 }, { "epoch": 0.7733952948010456, "grad_norm": 0.43324804306030273, "learning_rate": 0.0001, "loss": 1.4912, "step": 6657 }, { "epoch": 0.7735114725530061, "grad_norm": 0.47235342860221863, "learning_rate": 0.0001, "loss": 1.7402, "step": 6658 }, { "epoch": 0.7736276503049666, "grad_norm": 0.4258509576320648, "learning_rate": 0.0001, "loss": 1.5373, "step": 6659 }, { "epoch": 0.7737438280569271, "grad_norm": 0.47323077917099, "learning_rate": 0.0001, "loss": 1.5615, "step": 6660 }, { "epoch": 0.7738600058088876, "grad_norm": 0.44588038325309753, "learning_rate": 0.0001, "loss": 1.601, "step": 6661 }, { "epoch": 0.7739761835608481, "grad_norm": 0.4483187794685364, "learning_rate": 0.0001, "loss": 1.4728, "step": 6662 }, { "epoch": 0.7740923613128086, "grad_norm": 0.4594082534313202, "learning_rate": 0.0001, "loss": 1.6624, "step": 6663 }, { "epoch": 0.774208539064769, "grad_norm": 0.4679012894630432, "learning_rate": 0.0001, "loss": 1.7395, "step": 6664 }, { "epoch": 0.7743247168167295, "grad_norm": 0.5004777312278748, "learning_rate": 0.0001, "loss": 1.7538, "step": 6665 }, { "epoch": 0.7744408945686901, "grad_norm": 0.4686128795146942, "learning_rate": 0.0001, "loss": 1.6201, "step": 6666 }, { "epoch": 0.7745570723206506, "grad_norm": 0.46381059288978577, "learning_rate": 0.0001, "loss": 1.6585, "step": 6667 }, { "epoch": 0.7746732500726111, "grad_norm": 0.4782971739768982, "learning_rate": 0.0001, "loss": 1.7547, "step": 6668 }, { "epoch": 0.7747894278245716, "grad_norm": 0.48163890838623047, "learning_rate": 0.0001, "loss": 1.7483, "step": 6669 }, { "epoch": 0.7749056055765321, "grad_norm": 0.47830620408058167, "learning_rate": 0.0001, "loss": 1.6716, "step": 6670 }, { "epoch": 0.7750217833284926, "grad_norm": 0.4307786524295807, "learning_rate": 0.0001, "loss": 1.7148, "step": 6671 }, { "epoch": 0.7751379610804531, "grad_norm": 0.4738655090332031, "learning_rate": 0.0001, "loss": 1.6182, "step": 6672 }, { "epoch": 0.7752541388324136, "grad_norm": 0.45517605543136597, "learning_rate": 0.0001, "loss": 1.6803, "step": 6673 }, { "epoch": 0.775370316584374, "grad_norm": 0.44930917024612427, "learning_rate": 0.0001, "loss": 1.5551, "step": 6674 }, { "epoch": 0.7754864943363345, "grad_norm": 0.4695178270339966, "learning_rate": 0.0001, "loss": 1.6834, "step": 6675 }, { "epoch": 0.7756026720882951, "grad_norm": 0.4576238691806793, "learning_rate": 0.0001, "loss": 1.5411, "step": 6676 }, { "epoch": 0.7757188498402556, "grad_norm": 0.44012871384620667, "learning_rate": 0.0001, "loss": 1.4499, "step": 6677 }, { "epoch": 0.7758350275922161, "grad_norm": 0.4941727817058563, "learning_rate": 0.0001, "loss": 1.6882, "step": 6678 }, { "epoch": 0.7759512053441766, "grad_norm": 0.4414857029914856, "learning_rate": 0.0001, "loss": 1.5447, "step": 6679 }, { "epoch": 0.7760673830961371, "grad_norm": 0.468009352684021, "learning_rate": 0.0001, "loss": 1.69, "step": 6680 }, { "epoch": 0.7761835608480976, "grad_norm": 0.514825165271759, "learning_rate": 0.0001, "loss": 1.6376, "step": 6681 }, { "epoch": 0.7762997386000581, "grad_norm": 0.47073277831077576, "learning_rate": 0.0001, "loss": 1.5097, "step": 6682 }, { "epoch": 0.7764159163520186, "grad_norm": 0.46188485622406006, "learning_rate": 0.0001, "loss": 1.5405, "step": 6683 }, { "epoch": 0.776532094103979, "grad_norm": 0.5069568753242493, "learning_rate": 0.0001, "loss": 1.577, "step": 6684 }, { "epoch": 0.7766482718559395, "grad_norm": 0.4475022256374359, "learning_rate": 0.0001, "loss": 1.5666, "step": 6685 }, { "epoch": 0.7767644496079001, "grad_norm": 0.48565706610679626, "learning_rate": 0.0001, "loss": 1.5044, "step": 6686 }, { "epoch": 0.7768806273598606, "grad_norm": 0.4480251967906952, "learning_rate": 0.0001, "loss": 1.5559, "step": 6687 }, { "epoch": 0.7769968051118211, "grad_norm": 0.4224783480167389, "learning_rate": 0.0001, "loss": 1.4649, "step": 6688 }, { "epoch": 0.7771129828637816, "grad_norm": 0.45073410868644714, "learning_rate": 0.0001, "loss": 1.6847, "step": 6689 }, { "epoch": 0.7772291606157421, "grad_norm": 0.4535382390022278, "learning_rate": 0.0001, "loss": 1.6862, "step": 6690 }, { "epoch": 0.7773453383677026, "grad_norm": 0.45997950434684753, "learning_rate": 0.0001, "loss": 1.6649, "step": 6691 }, { "epoch": 0.7774615161196631, "grad_norm": 0.4208465814590454, "learning_rate": 0.0001, "loss": 1.4265, "step": 6692 }, { "epoch": 0.7775776938716236, "grad_norm": 0.4323180317878723, "learning_rate": 0.0001, "loss": 1.4691, "step": 6693 }, { "epoch": 0.777693871623584, "grad_norm": 0.47370851039886475, "learning_rate": 0.0001, "loss": 1.5979, "step": 6694 }, { "epoch": 0.7778100493755445, "grad_norm": 0.46739041805267334, "learning_rate": 0.0001, "loss": 1.6456, "step": 6695 }, { "epoch": 0.777926227127505, "grad_norm": 0.44516703486442566, "learning_rate": 0.0001, "loss": 1.5745, "step": 6696 }, { "epoch": 0.7780424048794656, "grad_norm": 0.4348163306713104, "learning_rate": 0.0001, "loss": 1.5218, "step": 6697 }, { "epoch": 0.7781585826314261, "grad_norm": 0.5044941902160645, "learning_rate": 0.0001, "loss": 1.7077, "step": 6698 }, { "epoch": 0.7782747603833866, "grad_norm": 0.48483940958976746, "learning_rate": 0.0001, "loss": 1.7837, "step": 6699 }, { "epoch": 0.7783909381353471, "grad_norm": 0.46066558361053467, "learning_rate": 0.0001, "loss": 1.6183, "step": 6700 }, { "epoch": 0.7785071158873076, "grad_norm": 0.45025432109832764, "learning_rate": 0.0001, "loss": 1.6095, "step": 6701 }, { "epoch": 0.7786232936392681, "grad_norm": 0.4388742744922638, "learning_rate": 0.0001, "loss": 1.6649, "step": 6702 }, { "epoch": 0.7787394713912286, "grad_norm": 0.44050899147987366, "learning_rate": 0.0001, "loss": 1.5372, "step": 6703 }, { "epoch": 0.778855649143189, "grad_norm": 0.5146862864494324, "learning_rate": 0.0001, "loss": 1.8927, "step": 6704 }, { "epoch": 0.7789718268951495, "grad_norm": 0.43440109491348267, "learning_rate": 0.0001, "loss": 1.5271, "step": 6705 }, { "epoch": 0.77908800464711, "grad_norm": 0.43129873275756836, "learning_rate": 0.0001, "loss": 1.6918, "step": 6706 }, { "epoch": 0.7792041823990706, "grad_norm": 0.4510217308998108, "learning_rate": 0.0001, "loss": 1.532, "step": 6707 }, { "epoch": 0.7793203601510311, "grad_norm": 0.5071493983268738, "learning_rate": 0.0001, "loss": 1.7241, "step": 6708 }, { "epoch": 0.7794365379029916, "grad_norm": 0.4631008505821228, "learning_rate": 0.0001, "loss": 1.5681, "step": 6709 }, { "epoch": 0.7795527156549521, "grad_norm": 0.4254882335662842, "learning_rate": 0.0001, "loss": 1.4675, "step": 6710 }, { "epoch": 0.7796688934069126, "grad_norm": 0.4777772128582001, "learning_rate": 0.0001, "loss": 1.7763, "step": 6711 }, { "epoch": 0.7797850711588731, "grad_norm": 0.4478752017021179, "learning_rate": 0.0001, "loss": 1.6425, "step": 6712 }, { "epoch": 0.7799012489108336, "grad_norm": 0.4405520260334015, "learning_rate": 0.0001, "loss": 1.7456, "step": 6713 }, { "epoch": 0.7800174266627941, "grad_norm": 0.43905889987945557, "learning_rate": 0.0001, "loss": 1.6287, "step": 6714 }, { "epoch": 0.7801336044147545, "grad_norm": 0.46866193413734436, "learning_rate": 0.0001, "loss": 1.6221, "step": 6715 }, { "epoch": 0.780249782166715, "grad_norm": 0.4609702229499817, "learning_rate": 0.0001, "loss": 1.744, "step": 6716 }, { "epoch": 0.7803659599186755, "grad_norm": 0.4288838803768158, "learning_rate": 0.0001, "loss": 1.5726, "step": 6717 }, { "epoch": 0.7804821376706361, "grad_norm": 0.4898937940597534, "learning_rate": 0.0001, "loss": 1.7136, "step": 6718 }, { "epoch": 0.7805983154225966, "grad_norm": 0.46226128935813904, "learning_rate": 0.0001, "loss": 1.6664, "step": 6719 }, { "epoch": 0.7807144931745571, "grad_norm": 0.4708699584007263, "learning_rate": 0.0001, "loss": 1.4289, "step": 6720 }, { "epoch": 0.7808306709265176, "grad_norm": 0.4597412943840027, "learning_rate": 0.0001, "loss": 1.7016, "step": 6721 }, { "epoch": 0.7809468486784781, "grad_norm": 0.44381624460220337, "learning_rate": 0.0001, "loss": 1.6171, "step": 6722 }, { "epoch": 0.7810630264304386, "grad_norm": 0.43926021456718445, "learning_rate": 0.0001, "loss": 1.6971, "step": 6723 }, { "epoch": 0.7811792041823991, "grad_norm": 0.4407069683074951, "learning_rate": 0.0001, "loss": 1.5507, "step": 6724 }, { "epoch": 0.7812953819343595, "grad_norm": 0.44746869802474976, "learning_rate": 0.0001, "loss": 1.5651, "step": 6725 }, { "epoch": 0.78141155968632, "grad_norm": 0.44408538937568665, "learning_rate": 0.0001, "loss": 1.378, "step": 6726 }, { "epoch": 0.7815277374382805, "grad_norm": 0.44895273447036743, "learning_rate": 0.0001, "loss": 1.647, "step": 6727 }, { "epoch": 0.7816439151902411, "grad_norm": 0.4479658901691437, "learning_rate": 0.0001, "loss": 1.5602, "step": 6728 }, { "epoch": 0.7817600929422016, "grad_norm": 0.4522935450077057, "learning_rate": 0.0001, "loss": 1.5872, "step": 6729 }, { "epoch": 0.7818762706941621, "grad_norm": 0.4975508749485016, "learning_rate": 0.0001, "loss": 1.5601, "step": 6730 }, { "epoch": 0.7819924484461226, "grad_norm": 0.4425521194934845, "learning_rate": 0.0001, "loss": 1.6797, "step": 6731 }, { "epoch": 0.7821086261980831, "grad_norm": 0.4268142580986023, "learning_rate": 0.0001, "loss": 1.5329, "step": 6732 }, { "epoch": 0.7822248039500436, "grad_norm": 0.45263275504112244, "learning_rate": 0.0001, "loss": 1.6088, "step": 6733 }, { "epoch": 0.7823409817020041, "grad_norm": 0.4428475797176361, "learning_rate": 0.0001, "loss": 1.4382, "step": 6734 }, { "epoch": 0.7824571594539645, "grad_norm": 0.5304811596870422, "learning_rate": 0.0001, "loss": 1.7288, "step": 6735 }, { "epoch": 0.782573337205925, "grad_norm": 0.43402206897735596, "learning_rate": 0.0001, "loss": 1.5834, "step": 6736 }, { "epoch": 0.7826895149578855, "grad_norm": 0.43947911262512207, "learning_rate": 0.0001, "loss": 1.4528, "step": 6737 }, { "epoch": 0.782805692709846, "grad_norm": 0.4783528447151184, "learning_rate": 0.0001, "loss": 1.6796, "step": 6738 }, { "epoch": 0.7829218704618066, "grad_norm": 0.4822714924812317, "learning_rate": 0.0001, "loss": 1.5938, "step": 6739 }, { "epoch": 0.7830380482137671, "grad_norm": 0.4645395576953888, "learning_rate": 0.0001, "loss": 1.6273, "step": 6740 }, { "epoch": 0.7831542259657276, "grad_norm": 0.47700071334838867, "learning_rate": 0.0001, "loss": 1.6726, "step": 6741 }, { "epoch": 0.7832704037176881, "grad_norm": 0.48509228229522705, "learning_rate": 0.0001, "loss": 1.6509, "step": 6742 }, { "epoch": 0.7833865814696486, "grad_norm": 0.43715542554855347, "learning_rate": 0.0001, "loss": 1.5105, "step": 6743 }, { "epoch": 0.7835027592216091, "grad_norm": 0.5839574933052063, "learning_rate": 0.0001, "loss": 1.7659, "step": 6744 }, { "epoch": 0.7836189369735695, "grad_norm": 0.4410605728626251, "learning_rate": 0.0001, "loss": 1.5191, "step": 6745 }, { "epoch": 0.78373511472553, "grad_norm": 0.44699952006340027, "learning_rate": 0.0001, "loss": 1.4267, "step": 6746 }, { "epoch": 0.7838512924774905, "grad_norm": 0.43901586532592773, "learning_rate": 0.0001, "loss": 1.5008, "step": 6747 }, { "epoch": 0.783967470229451, "grad_norm": 0.43384185433387756, "learning_rate": 0.0001, "loss": 1.4576, "step": 6748 }, { "epoch": 0.7840836479814116, "grad_norm": 0.41548269987106323, "learning_rate": 0.0001, "loss": 1.3953, "step": 6749 }, { "epoch": 0.7841998257333721, "grad_norm": 0.4889026880264282, "learning_rate": 0.0001, "loss": 1.5251, "step": 6750 }, { "epoch": 0.7843160034853326, "grad_norm": 0.46144333481788635, "learning_rate": 0.0001, "loss": 1.6779, "step": 6751 }, { "epoch": 0.7844321812372931, "grad_norm": 0.4552895128726959, "learning_rate": 0.0001, "loss": 1.6212, "step": 6752 }, { "epoch": 0.7845483589892536, "grad_norm": 0.4651104807853699, "learning_rate": 0.0001, "loss": 1.3792, "step": 6753 }, { "epoch": 0.7846645367412141, "grad_norm": 0.4177487790584564, "learning_rate": 0.0001, "loss": 1.5152, "step": 6754 }, { "epoch": 0.7847807144931745, "grad_norm": 0.4314608573913574, "learning_rate": 0.0001, "loss": 1.4307, "step": 6755 }, { "epoch": 0.784896892245135, "grad_norm": 0.4462542235851288, "learning_rate": 0.0001, "loss": 1.687, "step": 6756 }, { "epoch": 0.7850130699970955, "grad_norm": 0.4506712853908539, "learning_rate": 0.0001, "loss": 1.6538, "step": 6757 }, { "epoch": 0.785129247749056, "grad_norm": 0.465562641620636, "learning_rate": 0.0001, "loss": 1.5888, "step": 6758 }, { "epoch": 0.7852454255010165, "grad_norm": 0.5159258842468262, "learning_rate": 0.0001, "loss": 1.7131, "step": 6759 }, { "epoch": 0.7853616032529771, "grad_norm": 0.4616967439651489, "learning_rate": 0.0001, "loss": 1.6249, "step": 6760 }, { "epoch": 0.7854777810049376, "grad_norm": 0.4350794553756714, "learning_rate": 0.0001, "loss": 1.505, "step": 6761 }, { "epoch": 0.7855939587568981, "grad_norm": 0.4288870692253113, "learning_rate": 0.0001, "loss": 1.4676, "step": 6762 }, { "epoch": 0.7857101365088586, "grad_norm": 0.4555493891239166, "learning_rate": 0.0001, "loss": 1.605, "step": 6763 }, { "epoch": 0.7858263142608191, "grad_norm": 0.4390559196472168, "learning_rate": 0.0001, "loss": 1.4557, "step": 6764 }, { "epoch": 0.7859424920127795, "grad_norm": 0.47772979736328125, "learning_rate": 0.0001, "loss": 1.5336, "step": 6765 }, { "epoch": 0.78605866976474, "grad_norm": 0.45631012320518494, "learning_rate": 0.0001, "loss": 1.6223, "step": 6766 }, { "epoch": 0.7861748475167005, "grad_norm": 0.4568979740142822, "learning_rate": 0.0001, "loss": 1.4912, "step": 6767 }, { "epoch": 0.786291025268661, "grad_norm": 0.4683852195739746, "learning_rate": 0.0001, "loss": 1.5739, "step": 6768 }, { "epoch": 0.7864072030206215, "grad_norm": 0.45670732855796814, "learning_rate": 0.0001, "loss": 1.5515, "step": 6769 }, { "epoch": 0.7865233807725821, "grad_norm": 0.4741782248020172, "learning_rate": 0.0001, "loss": 1.6872, "step": 6770 }, { "epoch": 0.7866395585245426, "grad_norm": 0.4552614092826843, "learning_rate": 0.0001, "loss": 1.6347, "step": 6771 }, { "epoch": 0.7867557362765031, "grad_norm": 0.4711034297943115, "learning_rate": 0.0001, "loss": 1.6586, "step": 6772 }, { "epoch": 0.7868719140284636, "grad_norm": 0.4594586491584778, "learning_rate": 0.0001, "loss": 1.652, "step": 6773 }, { "epoch": 0.7869880917804241, "grad_norm": 0.458446204662323, "learning_rate": 0.0001, "loss": 1.6688, "step": 6774 }, { "epoch": 0.7871042695323845, "grad_norm": 0.48481476306915283, "learning_rate": 0.0001, "loss": 1.6635, "step": 6775 }, { "epoch": 0.787220447284345, "grad_norm": 0.5112566947937012, "learning_rate": 0.0001, "loss": 1.7064, "step": 6776 }, { "epoch": 0.7873366250363055, "grad_norm": 0.43871763348579407, "learning_rate": 0.0001, "loss": 1.5638, "step": 6777 }, { "epoch": 0.787452802788266, "grad_norm": 0.47852954268455505, "learning_rate": 0.0001, "loss": 1.5477, "step": 6778 }, { "epoch": 0.7875689805402265, "grad_norm": 0.4647180736064911, "learning_rate": 0.0001, "loss": 1.6743, "step": 6779 }, { "epoch": 0.787685158292187, "grad_norm": 0.454166978597641, "learning_rate": 0.0001, "loss": 1.5399, "step": 6780 }, { "epoch": 0.7878013360441476, "grad_norm": 0.447322279214859, "learning_rate": 0.0001, "loss": 1.5876, "step": 6781 }, { "epoch": 0.7879175137961081, "grad_norm": 0.4487408399581909, "learning_rate": 0.0001, "loss": 1.6681, "step": 6782 }, { "epoch": 0.7880336915480686, "grad_norm": 0.46517065167427063, "learning_rate": 0.0001, "loss": 1.725, "step": 6783 }, { "epoch": 0.7881498693000291, "grad_norm": 0.4516124427318573, "learning_rate": 0.0001, "loss": 1.6411, "step": 6784 }, { "epoch": 0.7882660470519895, "grad_norm": 0.46726909279823303, "learning_rate": 0.0001, "loss": 1.5802, "step": 6785 }, { "epoch": 0.78838222480395, "grad_norm": 0.44458258152008057, "learning_rate": 0.0001, "loss": 1.5214, "step": 6786 }, { "epoch": 0.7884984025559105, "grad_norm": 0.44695961475372314, "learning_rate": 0.0001, "loss": 1.5685, "step": 6787 }, { "epoch": 0.788614580307871, "grad_norm": 0.46932363510131836, "learning_rate": 0.0001, "loss": 1.5508, "step": 6788 }, { "epoch": 0.7887307580598315, "grad_norm": 0.4439738392829895, "learning_rate": 0.0001, "loss": 1.6621, "step": 6789 }, { "epoch": 0.788846935811792, "grad_norm": 0.45402002334594727, "learning_rate": 0.0001, "loss": 1.6997, "step": 6790 }, { "epoch": 0.7889631135637526, "grad_norm": 0.45656514167785645, "learning_rate": 0.0001, "loss": 1.4856, "step": 6791 }, { "epoch": 0.7890792913157131, "grad_norm": 0.4795583486557007, "learning_rate": 0.0001, "loss": 1.5899, "step": 6792 }, { "epoch": 0.7891954690676736, "grad_norm": 0.45822617411613464, "learning_rate": 0.0001, "loss": 1.5305, "step": 6793 }, { "epoch": 0.7893116468196341, "grad_norm": 0.44518178701400757, "learning_rate": 0.0001, "loss": 1.5226, "step": 6794 }, { "epoch": 0.7894278245715945, "grad_norm": 0.4444418251514435, "learning_rate": 0.0001, "loss": 1.6133, "step": 6795 }, { "epoch": 0.789544002323555, "grad_norm": 0.4494422972202301, "learning_rate": 0.0001, "loss": 1.7072, "step": 6796 }, { "epoch": 0.7896601800755155, "grad_norm": 0.43979451060295105, "learning_rate": 0.0001, "loss": 1.5291, "step": 6797 }, { "epoch": 0.789776357827476, "grad_norm": 0.4433092474937439, "learning_rate": 0.0001, "loss": 1.5643, "step": 6798 }, { "epoch": 0.7898925355794365, "grad_norm": 0.44533100724220276, "learning_rate": 0.0001, "loss": 1.7044, "step": 6799 }, { "epoch": 0.790008713331397, "grad_norm": 0.4789431393146515, "learning_rate": 0.0001, "loss": 1.6042, "step": 6800 }, { "epoch": 0.7901248910833575, "grad_norm": 0.4464295506477356, "learning_rate": 0.0001, "loss": 1.573, "step": 6801 }, { "epoch": 0.7902410688353181, "grad_norm": 0.48397591710090637, "learning_rate": 0.0001, "loss": 1.5736, "step": 6802 }, { "epoch": 0.7903572465872786, "grad_norm": 0.47774365544319153, "learning_rate": 0.0001, "loss": 1.6687, "step": 6803 }, { "epoch": 0.7904734243392391, "grad_norm": 0.4871893525123596, "learning_rate": 0.0001, "loss": 1.6478, "step": 6804 }, { "epoch": 0.7905896020911996, "grad_norm": 0.4551542103290558, "learning_rate": 0.0001, "loss": 1.8026, "step": 6805 }, { "epoch": 0.79070577984316, "grad_norm": 0.4858977496623993, "learning_rate": 0.0001, "loss": 1.5087, "step": 6806 }, { "epoch": 0.7908219575951205, "grad_norm": 0.4872024953365326, "learning_rate": 0.0001, "loss": 1.6519, "step": 6807 }, { "epoch": 0.790938135347081, "grad_norm": 0.45094436407089233, "learning_rate": 0.0001, "loss": 1.7071, "step": 6808 }, { "epoch": 0.7910543130990415, "grad_norm": 0.43320778012275696, "learning_rate": 0.0001, "loss": 1.6605, "step": 6809 }, { "epoch": 0.791170490851002, "grad_norm": 0.4422098696231842, "learning_rate": 0.0001, "loss": 1.5932, "step": 6810 }, { "epoch": 0.7912866686029625, "grad_norm": 0.44262856245040894, "learning_rate": 0.0001, "loss": 1.4983, "step": 6811 }, { "epoch": 0.7914028463549231, "grad_norm": 0.4632859230041504, "learning_rate": 0.0001, "loss": 1.6968, "step": 6812 }, { "epoch": 0.7915190241068836, "grad_norm": 0.4445875883102417, "learning_rate": 0.0001, "loss": 1.6205, "step": 6813 }, { "epoch": 0.7916352018588441, "grad_norm": 0.47772374749183655, "learning_rate": 0.0001, "loss": 1.7145, "step": 6814 }, { "epoch": 0.7917513796108046, "grad_norm": 0.4446466863155365, "learning_rate": 0.0001, "loss": 1.5446, "step": 6815 }, { "epoch": 0.791867557362765, "grad_norm": 0.47620153427124023, "learning_rate": 0.0001, "loss": 1.7399, "step": 6816 }, { "epoch": 0.7919837351147255, "grad_norm": 0.496855229139328, "learning_rate": 0.0001, "loss": 1.5951, "step": 6817 }, { "epoch": 0.792099912866686, "grad_norm": 0.44331079721450806, "learning_rate": 0.0001, "loss": 1.5529, "step": 6818 }, { "epoch": 0.7922160906186465, "grad_norm": 0.4731941223144531, "learning_rate": 0.0001, "loss": 1.6266, "step": 6819 }, { "epoch": 0.792332268370607, "grad_norm": 0.507358729839325, "learning_rate": 0.0001, "loss": 1.5982, "step": 6820 }, { "epoch": 0.7924484461225675, "grad_norm": 0.46159496903419495, "learning_rate": 0.0001, "loss": 1.6328, "step": 6821 }, { "epoch": 0.792564623874528, "grad_norm": 0.4719959497451782, "learning_rate": 0.0001, "loss": 1.6799, "step": 6822 }, { "epoch": 0.7926808016264886, "grad_norm": 0.4765816330909729, "learning_rate": 0.0001, "loss": 1.6144, "step": 6823 }, { "epoch": 0.7927969793784491, "grad_norm": 0.43941232562065125, "learning_rate": 0.0001, "loss": 1.5425, "step": 6824 }, { "epoch": 0.7929131571304096, "grad_norm": 0.47497496008872986, "learning_rate": 0.0001, "loss": 1.649, "step": 6825 }, { "epoch": 0.79302933488237, "grad_norm": 0.43414729833602905, "learning_rate": 0.0001, "loss": 1.6018, "step": 6826 }, { "epoch": 0.7931455126343305, "grad_norm": 0.48383721709251404, "learning_rate": 0.0001, "loss": 1.7554, "step": 6827 }, { "epoch": 0.793261690386291, "grad_norm": 0.4651719033718109, "learning_rate": 0.0001, "loss": 1.5808, "step": 6828 }, { "epoch": 0.7933778681382515, "grad_norm": 0.42594683170318604, "learning_rate": 0.0001, "loss": 1.5052, "step": 6829 }, { "epoch": 0.793494045890212, "grad_norm": 0.4392074644565582, "learning_rate": 0.0001, "loss": 1.5447, "step": 6830 }, { "epoch": 0.7936102236421725, "grad_norm": 0.43708086013793945, "learning_rate": 0.0001, "loss": 1.3342, "step": 6831 }, { "epoch": 0.793726401394133, "grad_norm": 0.46385717391967773, "learning_rate": 0.0001, "loss": 1.709, "step": 6832 }, { "epoch": 0.7938425791460936, "grad_norm": 0.4499506652355194, "learning_rate": 0.0001, "loss": 1.5392, "step": 6833 }, { "epoch": 0.7939587568980541, "grad_norm": 0.46915948390960693, "learning_rate": 0.0001, "loss": 1.6962, "step": 6834 }, { "epoch": 0.7940749346500146, "grad_norm": 0.4685263931751251, "learning_rate": 0.0001, "loss": 1.6678, "step": 6835 }, { "epoch": 0.794191112401975, "grad_norm": 0.45696932077407837, "learning_rate": 0.0001, "loss": 1.6566, "step": 6836 }, { "epoch": 0.7943072901539355, "grad_norm": 0.47869807481765747, "learning_rate": 0.0001, "loss": 1.6313, "step": 6837 }, { "epoch": 0.794423467905896, "grad_norm": 0.45035579800605774, "learning_rate": 0.0001, "loss": 1.6116, "step": 6838 }, { "epoch": 0.7945396456578565, "grad_norm": 0.48194530606269836, "learning_rate": 0.0001, "loss": 1.5359, "step": 6839 }, { "epoch": 0.794655823409817, "grad_norm": 0.47445181012153625, "learning_rate": 0.0001, "loss": 1.7277, "step": 6840 }, { "epoch": 0.7947720011617775, "grad_norm": 0.4426630139350891, "learning_rate": 0.0001, "loss": 1.5437, "step": 6841 }, { "epoch": 0.794888178913738, "grad_norm": 0.45886486768722534, "learning_rate": 0.0001, "loss": 1.5772, "step": 6842 }, { "epoch": 0.7950043566656985, "grad_norm": 0.4601069986820221, "learning_rate": 0.0001, "loss": 1.6217, "step": 6843 }, { "epoch": 0.7951205344176591, "grad_norm": 0.42290568351745605, "learning_rate": 0.0001, "loss": 1.4043, "step": 6844 }, { "epoch": 0.7952367121696196, "grad_norm": 0.4962608516216278, "learning_rate": 0.0001, "loss": 1.6887, "step": 6845 }, { "epoch": 0.79535288992158, "grad_norm": 0.47025734186172485, "learning_rate": 0.0001, "loss": 1.4043, "step": 6846 }, { "epoch": 0.7954690676735405, "grad_norm": 0.441499263048172, "learning_rate": 0.0001, "loss": 1.6115, "step": 6847 }, { "epoch": 0.795585245425501, "grad_norm": 0.4636131823062897, "learning_rate": 0.0001, "loss": 1.6929, "step": 6848 }, { "epoch": 0.7957014231774615, "grad_norm": 0.46920451521873474, "learning_rate": 0.0001, "loss": 1.5591, "step": 6849 }, { "epoch": 0.795817600929422, "grad_norm": 0.49390605092048645, "learning_rate": 0.0001, "loss": 1.6649, "step": 6850 }, { "epoch": 0.7959337786813825, "grad_norm": 0.46050357818603516, "learning_rate": 0.0001, "loss": 1.6044, "step": 6851 }, { "epoch": 0.796049956433343, "grad_norm": 0.4840867817401886, "learning_rate": 0.0001, "loss": 1.4195, "step": 6852 }, { "epoch": 0.7961661341853035, "grad_norm": 0.46949562430381775, "learning_rate": 0.0001, "loss": 1.6401, "step": 6853 }, { "epoch": 0.7962823119372641, "grad_norm": 0.5006006360054016, "learning_rate": 0.0001, "loss": 1.8093, "step": 6854 }, { "epoch": 0.7963984896892246, "grad_norm": 0.5085091590881348, "learning_rate": 0.0001, "loss": 1.8903, "step": 6855 }, { "epoch": 0.796514667441185, "grad_norm": 0.4508131742477417, "learning_rate": 0.0001, "loss": 1.5699, "step": 6856 }, { "epoch": 0.7966308451931455, "grad_norm": 0.4634523391723633, "learning_rate": 0.0001, "loss": 1.5805, "step": 6857 }, { "epoch": 0.796747022945106, "grad_norm": 0.4478987753391266, "learning_rate": 0.0001, "loss": 1.6498, "step": 6858 }, { "epoch": 0.7968632006970665, "grad_norm": 0.4543631374835968, "learning_rate": 0.0001, "loss": 1.6481, "step": 6859 }, { "epoch": 0.796979378449027, "grad_norm": 0.48462975025177, "learning_rate": 0.0001, "loss": 1.6535, "step": 6860 }, { "epoch": 0.7970955562009875, "grad_norm": 0.45995092391967773, "learning_rate": 0.0001, "loss": 1.6833, "step": 6861 }, { "epoch": 0.797211733952948, "grad_norm": 0.48978012800216675, "learning_rate": 0.0001, "loss": 1.4784, "step": 6862 }, { "epoch": 0.7973279117049085, "grad_norm": 0.45018213987350464, "learning_rate": 0.0001, "loss": 1.6156, "step": 6863 }, { "epoch": 0.797444089456869, "grad_norm": 0.47435247898101807, "learning_rate": 0.0001, "loss": 1.6635, "step": 6864 }, { "epoch": 0.7975602672088296, "grad_norm": 0.48207005858421326, "learning_rate": 0.0001, "loss": 1.5935, "step": 6865 }, { "epoch": 0.79767644496079, "grad_norm": 0.4846184253692627, "learning_rate": 0.0001, "loss": 1.5241, "step": 6866 }, { "epoch": 0.7977926227127505, "grad_norm": 0.42367056012153625, "learning_rate": 0.0001, "loss": 1.3894, "step": 6867 }, { "epoch": 0.797908800464711, "grad_norm": 0.46958428621292114, "learning_rate": 0.0001, "loss": 1.5891, "step": 6868 }, { "epoch": 0.7980249782166715, "grad_norm": 0.4629022777080536, "learning_rate": 0.0001, "loss": 1.7408, "step": 6869 }, { "epoch": 0.798141155968632, "grad_norm": 0.5330944657325745, "learning_rate": 0.0001, "loss": 1.8337, "step": 6870 }, { "epoch": 0.7982573337205925, "grad_norm": 0.45015132427215576, "learning_rate": 0.0001, "loss": 1.6438, "step": 6871 }, { "epoch": 0.798373511472553, "grad_norm": 0.447221964597702, "learning_rate": 0.0001, "loss": 1.5227, "step": 6872 }, { "epoch": 0.7984896892245135, "grad_norm": 0.4698033928871155, "learning_rate": 0.0001, "loss": 1.6346, "step": 6873 }, { "epoch": 0.798605866976474, "grad_norm": 0.4613732099533081, "learning_rate": 0.0001, "loss": 1.7136, "step": 6874 }, { "epoch": 0.7987220447284346, "grad_norm": 0.45149853825569153, "learning_rate": 0.0001, "loss": 1.53, "step": 6875 }, { "epoch": 0.798838222480395, "grad_norm": 0.44822239875793457, "learning_rate": 0.0001, "loss": 1.569, "step": 6876 }, { "epoch": 0.7989544002323555, "grad_norm": 0.4859652817249298, "learning_rate": 0.0001, "loss": 1.5857, "step": 6877 }, { "epoch": 0.799070577984316, "grad_norm": 0.4371628165245056, "learning_rate": 0.0001, "loss": 1.5781, "step": 6878 }, { "epoch": 0.7991867557362765, "grad_norm": 0.432081013917923, "learning_rate": 0.0001, "loss": 1.6044, "step": 6879 }, { "epoch": 0.799302933488237, "grad_norm": 0.4589148461818695, "learning_rate": 0.0001, "loss": 1.5197, "step": 6880 }, { "epoch": 0.7994191112401975, "grad_norm": 0.43646013736724854, "learning_rate": 0.0001, "loss": 1.5708, "step": 6881 }, { "epoch": 0.799535288992158, "grad_norm": 0.42627108097076416, "learning_rate": 0.0001, "loss": 1.5185, "step": 6882 }, { "epoch": 0.7996514667441185, "grad_norm": 0.4646453559398651, "learning_rate": 0.0001, "loss": 1.5625, "step": 6883 }, { "epoch": 0.799767644496079, "grad_norm": 0.46586647629737854, "learning_rate": 0.0001, "loss": 1.4998, "step": 6884 }, { "epoch": 0.7998838222480396, "grad_norm": 0.512482762336731, "learning_rate": 0.0001, "loss": 1.6869, "step": 6885 }, { "epoch": 0.8, "grad_norm": 0.4631596803665161, "learning_rate": 0.0001, "loss": 1.6069, "step": 6886 }, { "epoch": 0.8001161777519605, "grad_norm": 0.472889244556427, "learning_rate": 0.0001, "loss": 1.7365, "step": 6887 }, { "epoch": 0.800232355503921, "grad_norm": 0.5098186731338501, "learning_rate": 0.0001, "loss": 1.4923, "step": 6888 }, { "epoch": 0.8003485332558815, "grad_norm": 0.48305651545524597, "learning_rate": 0.0001, "loss": 1.777, "step": 6889 }, { "epoch": 0.800464711007842, "grad_norm": 0.48298099637031555, "learning_rate": 0.0001, "loss": 1.7043, "step": 6890 }, { "epoch": 0.8005808887598025, "grad_norm": 0.4996420741081238, "learning_rate": 0.0001, "loss": 1.7451, "step": 6891 }, { "epoch": 0.800697066511763, "grad_norm": 0.44727784395217896, "learning_rate": 0.0001, "loss": 1.6145, "step": 6892 }, { "epoch": 0.8008132442637235, "grad_norm": 0.4710618555545807, "learning_rate": 0.0001, "loss": 1.531, "step": 6893 }, { "epoch": 0.800929422015684, "grad_norm": 0.4605047404766083, "learning_rate": 0.0001, "loss": 1.6003, "step": 6894 }, { "epoch": 0.8010455997676444, "grad_norm": 0.4915280342102051, "learning_rate": 0.0001, "loss": 1.6894, "step": 6895 }, { "epoch": 0.801161777519605, "grad_norm": 0.45687055587768555, "learning_rate": 0.0001, "loss": 1.7078, "step": 6896 }, { "epoch": 0.8012779552715655, "grad_norm": 0.4345172345638275, "learning_rate": 0.0001, "loss": 1.624, "step": 6897 }, { "epoch": 0.801394133023526, "grad_norm": 0.4714672863483429, "learning_rate": 0.0001, "loss": 1.4299, "step": 6898 }, { "epoch": 0.8015103107754865, "grad_norm": 0.46883341670036316, "learning_rate": 0.0001, "loss": 1.5489, "step": 6899 }, { "epoch": 0.801626488527447, "grad_norm": 0.43329375982284546, "learning_rate": 0.0001, "loss": 1.5865, "step": 6900 }, { "epoch": 0.8017426662794075, "grad_norm": 0.5150987505912781, "learning_rate": 0.0001, "loss": 1.8174, "step": 6901 }, { "epoch": 0.801858844031368, "grad_norm": 0.43868762254714966, "learning_rate": 0.0001, "loss": 1.5489, "step": 6902 }, { "epoch": 0.8019750217833285, "grad_norm": 0.4465371072292328, "learning_rate": 0.0001, "loss": 1.5276, "step": 6903 }, { "epoch": 0.802091199535289, "grad_norm": 0.4565187096595764, "learning_rate": 0.0001, "loss": 1.6618, "step": 6904 }, { "epoch": 0.8022073772872494, "grad_norm": 0.5145065784454346, "learning_rate": 0.0001, "loss": 1.7764, "step": 6905 }, { "epoch": 0.80232355503921, "grad_norm": 0.5177064538002014, "learning_rate": 0.0001, "loss": 1.6975, "step": 6906 }, { "epoch": 0.8024397327911705, "grad_norm": 0.42869505286216736, "learning_rate": 0.0001, "loss": 1.4623, "step": 6907 }, { "epoch": 0.802555910543131, "grad_norm": 0.4332997500896454, "learning_rate": 0.0001, "loss": 1.5114, "step": 6908 }, { "epoch": 0.8026720882950915, "grad_norm": 0.4914308786392212, "learning_rate": 0.0001, "loss": 1.5891, "step": 6909 }, { "epoch": 0.802788266047052, "grad_norm": 0.4771595299243927, "learning_rate": 0.0001, "loss": 1.5143, "step": 6910 }, { "epoch": 0.8029044437990125, "grad_norm": 0.45163875818252563, "learning_rate": 0.0001, "loss": 1.7303, "step": 6911 }, { "epoch": 0.803020621550973, "grad_norm": 0.4456539452075958, "learning_rate": 0.0001, "loss": 1.6847, "step": 6912 }, { "epoch": 0.8031367993029335, "grad_norm": 0.42746564745903015, "learning_rate": 0.0001, "loss": 1.5497, "step": 6913 }, { "epoch": 0.803252977054894, "grad_norm": 0.4743218421936035, "learning_rate": 0.0001, "loss": 1.5728, "step": 6914 }, { "epoch": 0.8033691548068544, "grad_norm": 0.5129088759422302, "learning_rate": 0.0001, "loss": 1.6247, "step": 6915 }, { "epoch": 0.8034853325588149, "grad_norm": 0.4551659822463989, "learning_rate": 0.0001, "loss": 1.6385, "step": 6916 }, { "epoch": 0.8036015103107755, "grad_norm": 0.4753081202507019, "learning_rate": 0.0001, "loss": 1.6096, "step": 6917 }, { "epoch": 0.803717688062736, "grad_norm": 0.4394998252391815, "learning_rate": 0.0001, "loss": 1.575, "step": 6918 }, { "epoch": 0.8038338658146965, "grad_norm": 0.45924627780914307, "learning_rate": 0.0001, "loss": 1.5364, "step": 6919 }, { "epoch": 0.803950043566657, "grad_norm": 0.44786056876182556, "learning_rate": 0.0001, "loss": 1.5884, "step": 6920 }, { "epoch": 0.8040662213186175, "grad_norm": 0.4409099519252777, "learning_rate": 0.0001, "loss": 1.6561, "step": 6921 }, { "epoch": 0.804182399070578, "grad_norm": 0.45234692096710205, "learning_rate": 0.0001, "loss": 1.5925, "step": 6922 }, { "epoch": 0.8042985768225385, "grad_norm": 0.5276682376861572, "learning_rate": 0.0001, "loss": 1.7412, "step": 6923 }, { "epoch": 0.804414754574499, "grad_norm": 0.4487822353839874, "learning_rate": 0.0001, "loss": 1.6712, "step": 6924 }, { "epoch": 0.8045309323264594, "grad_norm": 0.4596274495124817, "learning_rate": 0.0001, "loss": 1.687, "step": 6925 }, { "epoch": 0.8046471100784199, "grad_norm": 0.44739964604377747, "learning_rate": 0.0001, "loss": 1.5391, "step": 6926 }, { "epoch": 0.8047632878303805, "grad_norm": 0.46213769912719727, "learning_rate": 0.0001, "loss": 1.4638, "step": 6927 }, { "epoch": 0.804879465582341, "grad_norm": 0.4504436254501343, "learning_rate": 0.0001, "loss": 1.6742, "step": 6928 }, { "epoch": 0.8049956433343015, "grad_norm": 0.4626392722129822, "learning_rate": 0.0001, "loss": 1.4868, "step": 6929 }, { "epoch": 0.805111821086262, "grad_norm": 0.4738352596759796, "learning_rate": 0.0001, "loss": 1.699, "step": 6930 }, { "epoch": 0.8052279988382225, "grad_norm": 0.45543593168258667, "learning_rate": 0.0001, "loss": 1.6072, "step": 6931 }, { "epoch": 0.805344176590183, "grad_norm": 0.4740353226661682, "learning_rate": 0.0001, "loss": 1.6442, "step": 6932 }, { "epoch": 0.8054603543421435, "grad_norm": 0.4481174647808075, "learning_rate": 0.0001, "loss": 1.5732, "step": 6933 }, { "epoch": 0.805576532094104, "grad_norm": 0.461093544960022, "learning_rate": 0.0001, "loss": 1.6113, "step": 6934 }, { "epoch": 0.8056927098460644, "grad_norm": 0.4422491490840912, "learning_rate": 0.0001, "loss": 1.5261, "step": 6935 }, { "epoch": 0.8058088875980249, "grad_norm": 0.4397140443325043, "learning_rate": 0.0001, "loss": 1.5065, "step": 6936 }, { "epoch": 0.8059250653499854, "grad_norm": 0.47454312443733215, "learning_rate": 0.0001, "loss": 1.5382, "step": 6937 }, { "epoch": 0.806041243101946, "grad_norm": 0.46560293436050415, "learning_rate": 0.0001, "loss": 1.6589, "step": 6938 }, { "epoch": 0.8061574208539065, "grad_norm": 0.46185746788978577, "learning_rate": 0.0001, "loss": 1.6063, "step": 6939 }, { "epoch": 0.806273598605867, "grad_norm": 0.4402056634426117, "learning_rate": 0.0001, "loss": 1.5025, "step": 6940 }, { "epoch": 0.8063897763578275, "grad_norm": 0.452150821685791, "learning_rate": 0.0001, "loss": 1.5531, "step": 6941 }, { "epoch": 0.806505954109788, "grad_norm": 0.4527072012424469, "learning_rate": 0.0001, "loss": 1.5323, "step": 6942 }, { "epoch": 0.8066221318617485, "grad_norm": 0.4439299702644348, "learning_rate": 0.0001, "loss": 1.5294, "step": 6943 }, { "epoch": 0.806738309613709, "grad_norm": 0.4649060070514679, "learning_rate": 0.0001, "loss": 1.7965, "step": 6944 }, { "epoch": 0.8068544873656694, "grad_norm": 0.46530160307884216, "learning_rate": 0.0001, "loss": 1.7508, "step": 6945 }, { "epoch": 0.8069706651176299, "grad_norm": 0.456967294216156, "learning_rate": 0.0001, "loss": 1.6391, "step": 6946 }, { "epoch": 0.8070868428695904, "grad_norm": 0.47573670744895935, "learning_rate": 0.0001, "loss": 1.5697, "step": 6947 }, { "epoch": 0.807203020621551, "grad_norm": 0.4661976099014282, "learning_rate": 0.0001, "loss": 1.6118, "step": 6948 }, { "epoch": 0.8073191983735115, "grad_norm": 0.4907025694847107, "learning_rate": 0.0001, "loss": 1.6961, "step": 6949 }, { "epoch": 0.807435376125472, "grad_norm": 0.4918571412563324, "learning_rate": 0.0001, "loss": 1.6054, "step": 6950 }, { "epoch": 0.8075515538774325, "grad_norm": 0.48200660943984985, "learning_rate": 0.0001, "loss": 1.6009, "step": 6951 }, { "epoch": 0.807667731629393, "grad_norm": 0.42808154225349426, "learning_rate": 0.0001, "loss": 1.503, "step": 6952 }, { "epoch": 0.8077839093813535, "grad_norm": 0.473086416721344, "learning_rate": 0.0001, "loss": 1.7153, "step": 6953 }, { "epoch": 0.807900087133314, "grad_norm": 0.44156551361083984, "learning_rate": 0.0001, "loss": 1.5875, "step": 6954 }, { "epoch": 0.8080162648852744, "grad_norm": 0.5178338289260864, "learning_rate": 0.0001, "loss": 1.5546, "step": 6955 }, { "epoch": 0.8081324426372349, "grad_norm": 0.4564967751502991, "learning_rate": 0.0001, "loss": 1.5125, "step": 6956 }, { "epoch": 0.8082486203891954, "grad_norm": 0.44008997082710266, "learning_rate": 0.0001, "loss": 1.5756, "step": 6957 }, { "epoch": 0.8083647981411559, "grad_norm": 0.4413783848285675, "learning_rate": 0.0001, "loss": 1.5241, "step": 6958 }, { "epoch": 0.8084809758931165, "grad_norm": 0.4661567807197571, "learning_rate": 0.0001, "loss": 1.4379, "step": 6959 }, { "epoch": 0.808597153645077, "grad_norm": 0.47281137108802795, "learning_rate": 0.0001, "loss": 1.7149, "step": 6960 }, { "epoch": 0.8087133313970375, "grad_norm": 0.47660407423973083, "learning_rate": 0.0001, "loss": 1.7855, "step": 6961 }, { "epoch": 0.808829509148998, "grad_norm": 0.4914090633392334, "learning_rate": 0.0001, "loss": 1.6549, "step": 6962 }, { "epoch": 0.8089456869009585, "grad_norm": 0.4621322453022003, "learning_rate": 0.0001, "loss": 1.5594, "step": 6963 }, { "epoch": 0.809061864652919, "grad_norm": 0.4495106339454651, "learning_rate": 0.0001, "loss": 1.6727, "step": 6964 }, { "epoch": 0.8091780424048794, "grad_norm": 0.49163323640823364, "learning_rate": 0.0001, "loss": 1.5462, "step": 6965 }, { "epoch": 0.8092942201568399, "grad_norm": 0.47235107421875, "learning_rate": 0.0001, "loss": 1.5385, "step": 6966 }, { "epoch": 0.8094103979088004, "grad_norm": 0.45100662112236023, "learning_rate": 0.0001, "loss": 1.5889, "step": 6967 }, { "epoch": 0.8095265756607609, "grad_norm": 0.46376436948776245, "learning_rate": 0.0001, "loss": 1.7342, "step": 6968 }, { "epoch": 0.8096427534127215, "grad_norm": 0.490784227848053, "learning_rate": 0.0001, "loss": 1.6433, "step": 6969 }, { "epoch": 0.809758931164682, "grad_norm": 0.49541133642196655, "learning_rate": 0.0001, "loss": 1.7031, "step": 6970 }, { "epoch": 0.8098751089166425, "grad_norm": 0.47032544016838074, "learning_rate": 0.0001, "loss": 1.5497, "step": 6971 }, { "epoch": 0.809991286668603, "grad_norm": 0.4594431519508362, "learning_rate": 0.0001, "loss": 1.5522, "step": 6972 }, { "epoch": 0.8101074644205635, "grad_norm": 0.4627406895160675, "learning_rate": 0.0001, "loss": 1.561, "step": 6973 }, { "epoch": 0.810223642172524, "grad_norm": 0.4937112331390381, "learning_rate": 0.0001, "loss": 1.7939, "step": 6974 }, { "epoch": 0.8103398199244845, "grad_norm": 0.4783715605735779, "learning_rate": 0.0001, "loss": 1.6928, "step": 6975 }, { "epoch": 0.8104559976764449, "grad_norm": 0.47671520709991455, "learning_rate": 0.0001, "loss": 1.5699, "step": 6976 }, { "epoch": 0.8105721754284054, "grad_norm": 0.47149521112442017, "learning_rate": 0.0001, "loss": 1.6648, "step": 6977 }, { "epoch": 0.8106883531803659, "grad_norm": 0.45699021220207214, "learning_rate": 0.0001, "loss": 1.5172, "step": 6978 }, { "epoch": 0.8108045309323264, "grad_norm": 0.4693375527858734, "learning_rate": 0.0001, "loss": 1.6806, "step": 6979 }, { "epoch": 0.810920708684287, "grad_norm": 0.4451500475406647, "learning_rate": 0.0001, "loss": 1.6516, "step": 6980 }, { "epoch": 0.8110368864362475, "grad_norm": 0.45721450448036194, "learning_rate": 0.0001, "loss": 1.6198, "step": 6981 }, { "epoch": 0.811153064188208, "grad_norm": 0.44827860593795776, "learning_rate": 0.0001, "loss": 1.6295, "step": 6982 }, { "epoch": 0.8112692419401685, "grad_norm": 0.47101083397865295, "learning_rate": 0.0001, "loss": 1.4988, "step": 6983 }, { "epoch": 0.811385419692129, "grad_norm": 0.4695024788379669, "learning_rate": 0.0001, "loss": 1.6635, "step": 6984 }, { "epoch": 0.8115015974440895, "grad_norm": 0.5033894777297974, "learning_rate": 0.0001, "loss": 1.5087, "step": 6985 }, { "epoch": 0.8116177751960499, "grad_norm": 0.49278390407562256, "learning_rate": 0.0001, "loss": 1.5152, "step": 6986 }, { "epoch": 0.8117339529480104, "grad_norm": 0.5109890699386597, "learning_rate": 0.0001, "loss": 1.6748, "step": 6987 }, { "epoch": 0.8118501306999709, "grad_norm": 0.4835306704044342, "learning_rate": 0.0001, "loss": 1.7316, "step": 6988 }, { "epoch": 0.8119663084519314, "grad_norm": 0.48664847016334534, "learning_rate": 0.0001, "loss": 1.6619, "step": 6989 }, { "epoch": 0.812082486203892, "grad_norm": 0.4511289596557617, "learning_rate": 0.0001, "loss": 1.4578, "step": 6990 }, { "epoch": 0.8121986639558525, "grad_norm": 0.46078622341156006, "learning_rate": 0.0001, "loss": 1.529, "step": 6991 }, { "epoch": 0.812314841707813, "grad_norm": 0.5006919503211975, "learning_rate": 0.0001, "loss": 1.5886, "step": 6992 }, { "epoch": 0.8124310194597735, "grad_norm": 0.46968936920166016, "learning_rate": 0.0001, "loss": 1.7114, "step": 6993 }, { "epoch": 0.812547197211734, "grad_norm": 0.46966198086738586, "learning_rate": 0.0001, "loss": 1.5932, "step": 6994 }, { "epoch": 0.8126633749636945, "grad_norm": 0.5085934400558472, "learning_rate": 0.0001, "loss": 1.7453, "step": 6995 }, { "epoch": 0.8127795527156549, "grad_norm": 0.42895692586898804, "learning_rate": 0.0001, "loss": 1.505, "step": 6996 }, { "epoch": 0.8128957304676154, "grad_norm": 0.46299561858177185, "learning_rate": 0.0001, "loss": 1.6104, "step": 6997 }, { "epoch": 0.8130119082195759, "grad_norm": 0.4561334550380707, "learning_rate": 0.0001, "loss": 1.5164, "step": 6998 }, { "epoch": 0.8131280859715364, "grad_norm": 0.49617594480514526, "learning_rate": 0.0001, "loss": 1.7706, "step": 6999 }, { "epoch": 0.8132442637234969, "grad_norm": 0.4899764955043793, "learning_rate": 0.0001, "loss": 1.6421, "step": 7000 }, { "epoch": 0.8133604414754575, "grad_norm": 0.4907947778701782, "learning_rate": 0.0001, "loss": 1.7775, "step": 7001 }, { "epoch": 0.813476619227418, "grad_norm": 0.45711711049079895, "learning_rate": 0.0001, "loss": 1.6188, "step": 7002 }, { "epoch": 0.8135927969793785, "grad_norm": 0.449201375246048, "learning_rate": 0.0001, "loss": 1.611, "step": 7003 }, { "epoch": 0.813708974731339, "grad_norm": 0.4838063418865204, "learning_rate": 0.0001, "loss": 1.5176, "step": 7004 }, { "epoch": 0.8138251524832995, "grad_norm": 0.46976253390312195, "learning_rate": 0.0001, "loss": 1.6453, "step": 7005 }, { "epoch": 0.8139413302352599, "grad_norm": 0.4605077803134918, "learning_rate": 0.0001, "loss": 1.6659, "step": 7006 }, { "epoch": 0.8140575079872204, "grad_norm": 0.4310915470123291, "learning_rate": 0.0001, "loss": 1.5281, "step": 7007 }, { "epoch": 0.8141736857391809, "grad_norm": 0.49787524342536926, "learning_rate": 0.0001, "loss": 1.6964, "step": 7008 }, { "epoch": 0.8142898634911414, "grad_norm": 0.4633225202560425, "learning_rate": 0.0001, "loss": 1.6641, "step": 7009 }, { "epoch": 0.8144060412431019, "grad_norm": 0.5344363451004028, "learning_rate": 0.0001, "loss": 1.8373, "step": 7010 }, { "epoch": 0.8145222189950625, "grad_norm": 0.4966965615749359, "learning_rate": 0.0001, "loss": 1.6806, "step": 7011 }, { "epoch": 0.814638396747023, "grad_norm": 0.4843759834766388, "learning_rate": 0.0001, "loss": 1.8051, "step": 7012 }, { "epoch": 0.8147545744989835, "grad_norm": 0.46458831429481506, "learning_rate": 0.0001, "loss": 1.4644, "step": 7013 }, { "epoch": 0.814870752250944, "grad_norm": 0.46197426319122314, "learning_rate": 0.0001, "loss": 1.6258, "step": 7014 }, { "epoch": 0.8149869300029045, "grad_norm": 0.4449380934238434, "learning_rate": 0.0001, "loss": 1.5742, "step": 7015 }, { "epoch": 0.8151031077548649, "grad_norm": 0.47970300912857056, "learning_rate": 0.0001, "loss": 1.4273, "step": 7016 }, { "epoch": 0.8152192855068254, "grad_norm": 0.48018476366996765, "learning_rate": 0.0001, "loss": 1.7756, "step": 7017 }, { "epoch": 0.8153354632587859, "grad_norm": 0.45871859788894653, "learning_rate": 0.0001, "loss": 1.7081, "step": 7018 }, { "epoch": 0.8154516410107464, "grad_norm": 0.509146511554718, "learning_rate": 0.0001, "loss": 1.7281, "step": 7019 }, { "epoch": 0.8155678187627069, "grad_norm": 0.4545764625072479, "learning_rate": 0.0001, "loss": 1.5903, "step": 7020 }, { "epoch": 0.8156839965146674, "grad_norm": 0.46723830699920654, "learning_rate": 0.0001, "loss": 1.5216, "step": 7021 }, { "epoch": 0.815800174266628, "grad_norm": 0.4666821360588074, "learning_rate": 0.0001, "loss": 1.6664, "step": 7022 }, { "epoch": 0.8159163520185885, "grad_norm": 0.45362234115600586, "learning_rate": 0.0001, "loss": 1.6185, "step": 7023 }, { "epoch": 0.816032529770549, "grad_norm": 0.5081374049186707, "learning_rate": 0.0001, "loss": 1.7431, "step": 7024 }, { "epoch": 0.8161487075225095, "grad_norm": 0.4551977515220642, "learning_rate": 0.0001, "loss": 1.4208, "step": 7025 }, { "epoch": 0.8162648852744699, "grad_norm": 0.4469984471797943, "learning_rate": 0.0001, "loss": 1.4629, "step": 7026 }, { "epoch": 0.8163810630264304, "grad_norm": 0.4428351819515228, "learning_rate": 0.0001, "loss": 1.5317, "step": 7027 }, { "epoch": 0.8164972407783909, "grad_norm": 0.46681028604507446, "learning_rate": 0.0001, "loss": 1.7313, "step": 7028 }, { "epoch": 0.8166134185303514, "grad_norm": 0.47523221373558044, "learning_rate": 0.0001, "loss": 1.5623, "step": 7029 }, { "epoch": 0.8167295962823119, "grad_norm": 0.42008304595947266, "learning_rate": 0.0001, "loss": 1.2919, "step": 7030 }, { "epoch": 0.8168457740342724, "grad_norm": 0.4836232364177704, "learning_rate": 0.0001, "loss": 1.6328, "step": 7031 }, { "epoch": 0.816961951786233, "grad_norm": 0.49851134419441223, "learning_rate": 0.0001, "loss": 1.4884, "step": 7032 }, { "epoch": 0.8170781295381935, "grad_norm": 0.4806590974330902, "learning_rate": 0.0001, "loss": 1.4057, "step": 7033 }, { "epoch": 0.817194307290154, "grad_norm": 0.46328291296958923, "learning_rate": 0.0001, "loss": 1.5448, "step": 7034 }, { "epoch": 0.8173104850421145, "grad_norm": 0.44353359937667847, "learning_rate": 0.0001, "loss": 1.583, "step": 7035 }, { "epoch": 0.8174266627940749, "grad_norm": 0.5083478689193726, "learning_rate": 0.0001, "loss": 1.7822, "step": 7036 }, { "epoch": 0.8175428405460354, "grad_norm": 0.4776815176010132, "learning_rate": 0.0001, "loss": 1.7168, "step": 7037 }, { "epoch": 0.8176590182979959, "grad_norm": 0.43044784665107727, "learning_rate": 0.0001, "loss": 1.3441, "step": 7038 }, { "epoch": 0.8177751960499564, "grad_norm": 0.4387872815132141, "learning_rate": 0.0001, "loss": 1.5306, "step": 7039 }, { "epoch": 0.8178913738019169, "grad_norm": 0.46325990557670593, "learning_rate": 0.0001, "loss": 1.7339, "step": 7040 }, { "epoch": 0.8180075515538774, "grad_norm": 0.4589924216270447, "learning_rate": 0.0001, "loss": 1.6567, "step": 7041 }, { "epoch": 0.8181237293058379, "grad_norm": 0.4716587960720062, "learning_rate": 0.0001, "loss": 1.6844, "step": 7042 }, { "epoch": 0.8182399070577985, "grad_norm": 0.4652487635612488, "learning_rate": 0.0001, "loss": 1.7618, "step": 7043 }, { "epoch": 0.818356084809759, "grad_norm": 0.4787328839302063, "learning_rate": 0.0001, "loss": 1.5892, "step": 7044 }, { "epoch": 0.8184722625617195, "grad_norm": 0.47911664843559265, "learning_rate": 0.0001, "loss": 1.6959, "step": 7045 }, { "epoch": 0.8185884403136799, "grad_norm": 0.4904523491859436, "learning_rate": 0.0001, "loss": 1.7353, "step": 7046 }, { "epoch": 0.8187046180656404, "grad_norm": 0.4955161213874817, "learning_rate": 0.0001, "loss": 1.5992, "step": 7047 }, { "epoch": 0.8188207958176009, "grad_norm": 0.44562825560569763, "learning_rate": 0.0001, "loss": 1.6544, "step": 7048 }, { "epoch": 0.8189369735695614, "grad_norm": 0.44809067249298096, "learning_rate": 0.0001, "loss": 1.3182, "step": 7049 }, { "epoch": 0.8190531513215219, "grad_norm": 0.4440357983112335, "learning_rate": 0.0001, "loss": 1.6446, "step": 7050 }, { "epoch": 0.8191693290734824, "grad_norm": 0.4690278172492981, "learning_rate": 0.0001, "loss": 1.5354, "step": 7051 }, { "epoch": 0.8192855068254429, "grad_norm": 0.4551823139190674, "learning_rate": 0.0001, "loss": 1.6531, "step": 7052 }, { "epoch": 0.8194016845774035, "grad_norm": 0.48354125022888184, "learning_rate": 0.0001, "loss": 1.7015, "step": 7053 }, { "epoch": 0.819517862329364, "grad_norm": 0.4836772084236145, "learning_rate": 0.0001, "loss": 1.5432, "step": 7054 }, { "epoch": 0.8196340400813245, "grad_norm": 0.4699805974960327, "learning_rate": 0.0001, "loss": 1.6777, "step": 7055 }, { "epoch": 0.8197502178332849, "grad_norm": 0.41965410113334656, "learning_rate": 0.0001, "loss": 1.4799, "step": 7056 }, { "epoch": 0.8198663955852454, "grad_norm": 0.5057850480079651, "learning_rate": 0.0001, "loss": 1.7782, "step": 7057 }, { "epoch": 0.8199825733372059, "grad_norm": 0.4643802046775818, "learning_rate": 0.0001, "loss": 1.6054, "step": 7058 }, { "epoch": 0.8200987510891664, "grad_norm": 0.4550783932209015, "learning_rate": 0.0001, "loss": 1.5912, "step": 7059 }, { "epoch": 0.8202149288411269, "grad_norm": 0.5149709582328796, "learning_rate": 0.0001, "loss": 1.5496, "step": 7060 }, { "epoch": 0.8203311065930874, "grad_norm": 0.46510255336761475, "learning_rate": 0.0001, "loss": 1.6459, "step": 7061 }, { "epoch": 0.8204472843450479, "grad_norm": 0.46841713786125183, "learning_rate": 0.0001, "loss": 1.615, "step": 7062 }, { "epoch": 0.8205634620970085, "grad_norm": 0.4657427966594696, "learning_rate": 0.0001, "loss": 1.6439, "step": 7063 }, { "epoch": 0.820679639848969, "grad_norm": 0.43857282400131226, "learning_rate": 0.0001, "loss": 1.3911, "step": 7064 }, { "epoch": 0.8207958176009295, "grad_norm": 0.457220196723938, "learning_rate": 0.0001, "loss": 1.5525, "step": 7065 }, { "epoch": 0.82091199535289, "grad_norm": 0.462664932012558, "learning_rate": 0.0001, "loss": 1.6042, "step": 7066 }, { "epoch": 0.8210281731048504, "grad_norm": 0.5375829339027405, "learning_rate": 0.0001, "loss": 1.6802, "step": 7067 }, { "epoch": 0.8211443508568109, "grad_norm": 0.47955790162086487, "learning_rate": 0.0001, "loss": 1.6042, "step": 7068 }, { "epoch": 0.8212605286087714, "grad_norm": 0.4525638818740845, "learning_rate": 0.0001, "loss": 1.5566, "step": 7069 }, { "epoch": 0.8213767063607319, "grad_norm": 0.6293335556983948, "learning_rate": 0.0001, "loss": 1.7643, "step": 7070 }, { "epoch": 0.8214928841126924, "grad_norm": 0.46683555841445923, "learning_rate": 0.0001, "loss": 1.615, "step": 7071 }, { "epoch": 0.8216090618646529, "grad_norm": 0.4560864269733429, "learning_rate": 0.0001, "loss": 1.4407, "step": 7072 }, { "epoch": 0.8217252396166134, "grad_norm": 0.4356245696544647, "learning_rate": 0.0001, "loss": 1.5042, "step": 7073 }, { "epoch": 0.821841417368574, "grad_norm": 0.4432215988636017, "learning_rate": 0.0001, "loss": 1.4791, "step": 7074 }, { "epoch": 0.8219575951205345, "grad_norm": 0.43178898096084595, "learning_rate": 0.0001, "loss": 1.5999, "step": 7075 }, { "epoch": 0.822073772872495, "grad_norm": 0.45065784454345703, "learning_rate": 0.0001, "loss": 1.5992, "step": 7076 }, { "epoch": 0.8221899506244554, "grad_norm": 0.44402584433555603, "learning_rate": 0.0001, "loss": 1.645, "step": 7077 }, { "epoch": 0.8223061283764159, "grad_norm": 0.4661881625652313, "learning_rate": 0.0001, "loss": 1.636, "step": 7078 }, { "epoch": 0.8224223061283764, "grad_norm": 0.46382853388786316, "learning_rate": 0.0001, "loss": 1.589, "step": 7079 }, { "epoch": 0.8225384838803369, "grad_norm": 0.4864356219768524, "learning_rate": 0.0001, "loss": 1.5702, "step": 7080 }, { "epoch": 0.8226546616322974, "grad_norm": 0.48990336060523987, "learning_rate": 0.0001, "loss": 1.7913, "step": 7081 }, { "epoch": 0.8227708393842579, "grad_norm": 0.4853106141090393, "learning_rate": 0.0001, "loss": 1.8482, "step": 7082 }, { "epoch": 0.8228870171362184, "grad_norm": 0.42953288555145264, "learning_rate": 0.0001, "loss": 1.3854, "step": 7083 }, { "epoch": 0.823003194888179, "grad_norm": 0.4772704541683197, "learning_rate": 0.0001, "loss": 1.4499, "step": 7084 }, { "epoch": 0.8231193726401395, "grad_norm": 0.4406863749027252, "learning_rate": 0.0001, "loss": 1.5327, "step": 7085 }, { "epoch": 0.8232355503921, "grad_norm": 0.47718164324760437, "learning_rate": 0.0001, "loss": 1.6784, "step": 7086 }, { "epoch": 0.8233517281440604, "grad_norm": 0.4515807628631592, "learning_rate": 0.0001, "loss": 1.5782, "step": 7087 }, { "epoch": 0.8234679058960209, "grad_norm": 0.4711925685405731, "learning_rate": 0.0001, "loss": 1.6348, "step": 7088 }, { "epoch": 0.8235840836479814, "grad_norm": 0.4708985984325409, "learning_rate": 0.0001, "loss": 1.5224, "step": 7089 }, { "epoch": 0.8237002613999419, "grad_norm": 0.48707452416419983, "learning_rate": 0.0001, "loss": 1.5098, "step": 7090 }, { "epoch": 0.8238164391519024, "grad_norm": 0.5250394940376282, "learning_rate": 0.0001, "loss": 1.707, "step": 7091 }, { "epoch": 0.8239326169038629, "grad_norm": 0.4977336525917053, "learning_rate": 0.0001, "loss": 1.6806, "step": 7092 }, { "epoch": 0.8240487946558234, "grad_norm": 0.4466916024684906, "learning_rate": 0.0001, "loss": 1.5158, "step": 7093 }, { "epoch": 0.8241649724077839, "grad_norm": 0.4977428913116455, "learning_rate": 0.0001, "loss": 1.6532, "step": 7094 }, { "epoch": 0.8242811501597445, "grad_norm": 0.4770013093948364, "learning_rate": 0.0001, "loss": 1.629, "step": 7095 }, { "epoch": 0.824397327911705, "grad_norm": 0.4783066511154175, "learning_rate": 0.0001, "loss": 1.6102, "step": 7096 }, { "epoch": 0.8245135056636654, "grad_norm": 0.44045940041542053, "learning_rate": 0.0001, "loss": 1.4502, "step": 7097 }, { "epoch": 0.8246296834156259, "grad_norm": 0.449008584022522, "learning_rate": 0.0001, "loss": 1.566, "step": 7098 }, { "epoch": 0.8247458611675864, "grad_norm": 0.45892423391342163, "learning_rate": 0.0001, "loss": 1.6121, "step": 7099 }, { "epoch": 0.8248620389195469, "grad_norm": 0.4626208245754242, "learning_rate": 0.0001, "loss": 1.6057, "step": 7100 }, { "epoch": 0.8249782166715074, "grad_norm": 0.5038914084434509, "learning_rate": 0.0001, "loss": 1.651, "step": 7101 }, { "epoch": 0.8250943944234679, "grad_norm": 0.4759518802165985, "learning_rate": 0.0001, "loss": 1.7069, "step": 7102 }, { "epoch": 0.8252105721754284, "grad_norm": 0.48517483472824097, "learning_rate": 0.0001, "loss": 1.5147, "step": 7103 }, { "epoch": 0.8253267499273889, "grad_norm": 0.47098508477211, "learning_rate": 0.0001, "loss": 1.6013, "step": 7104 }, { "epoch": 0.8254429276793495, "grad_norm": 0.47671204805374146, "learning_rate": 0.0001, "loss": 1.5827, "step": 7105 }, { "epoch": 0.82555910543131, "grad_norm": 0.4556223750114441, "learning_rate": 0.0001, "loss": 1.5374, "step": 7106 }, { "epoch": 0.8256752831832704, "grad_norm": 0.47399070858955383, "learning_rate": 0.0001, "loss": 1.6283, "step": 7107 }, { "epoch": 0.8257914609352309, "grad_norm": 0.4748283624649048, "learning_rate": 0.0001, "loss": 1.6214, "step": 7108 }, { "epoch": 0.8259076386871914, "grad_norm": 0.48463886976242065, "learning_rate": 0.0001, "loss": 1.6234, "step": 7109 }, { "epoch": 0.8260238164391519, "grad_norm": 0.46149513125419617, "learning_rate": 0.0001, "loss": 1.4978, "step": 7110 }, { "epoch": 0.8261399941911124, "grad_norm": 0.47687479853630066, "learning_rate": 0.0001, "loss": 1.6549, "step": 7111 }, { "epoch": 0.8262561719430729, "grad_norm": 0.46911653876304626, "learning_rate": 0.0001, "loss": 1.5885, "step": 7112 }, { "epoch": 0.8263723496950334, "grad_norm": 0.5078893899917603, "learning_rate": 0.0001, "loss": 1.677, "step": 7113 }, { "epoch": 0.8264885274469939, "grad_norm": 0.455218642950058, "learning_rate": 0.0001, "loss": 1.6192, "step": 7114 }, { "epoch": 0.8266047051989543, "grad_norm": 0.44931599497795105, "learning_rate": 0.0001, "loss": 1.5572, "step": 7115 }, { "epoch": 0.826720882950915, "grad_norm": 0.50384521484375, "learning_rate": 0.0001, "loss": 1.7711, "step": 7116 }, { "epoch": 0.8268370607028754, "grad_norm": 0.48653748631477356, "learning_rate": 0.0001, "loss": 1.6987, "step": 7117 }, { "epoch": 0.8269532384548359, "grad_norm": 0.44801607728004456, "learning_rate": 0.0001, "loss": 1.5793, "step": 7118 }, { "epoch": 0.8270694162067964, "grad_norm": 0.45984333753585815, "learning_rate": 0.0001, "loss": 1.7446, "step": 7119 }, { "epoch": 0.8271855939587569, "grad_norm": 0.49128204584121704, "learning_rate": 0.0001, "loss": 1.6582, "step": 7120 }, { "epoch": 0.8273017717107174, "grad_norm": 0.47482722997665405, "learning_rate": 0.0001, "loss": 1.5669, "step": 7121 }, { "epoch": 0.8274179494626779, "grad_norm": 0.46835750341415405, "learning_rate": 0.0001, "loss": 1.5629, "step": 7122 }, { "epoch": 0.8275341272146384, "grad_norm": 0.4840836524963379, "learning_rate": 0.0001, "loss": 1.7906, "step": 7123 }, { "epoch": 0.8276503049665989, "grad_norm": 0.4466482102870941, "learning_rate": 0.0001, "loss": 1.5372, "step": 7124 }, { "epoch": 0.8277664827185593, "grad_norm": 0.47686582803726196, "learning_rate": 0.0001, "loss": 1.5739, "step": 7125 }, { "epoch": 0.82788266047052, "grad_norm": 0.46509337425231934, "learning_rate": 0.0001, "loss": 1.5728, "step": 7126 }, { "epoch": 0.8279988382224804, "grad_norm": 0.44292381405830383, "learning_rate": 0.0001, "loss": 1.6446, "step": 7127 }, { "epoch": 0.8281150159744409, "grad_norm": 0.42537015676498413, "learning_rate": 0.0001, "loss": 1.5346, "step": 7128 }, { "epoch": 0.8282311937264014, "grad_norm": 0.45168519020080566, "learning_rate": 0.0001, "loss": 1.554, "step": 7129 }, { "epoch": 0.8283473714783619, "grad_norm": 0.48862767219543457, "learning_rate": 0.0001, "loss": 1.632, "step": 7130 }, { "epoch": 0.8284635492303224, "grad_norm": 0.4335499405860901, "learning_rate": 0.0001, "loss": 1.3461, "step": 7131 }, { "epoch": 0.8285797269822829, "grad_norm": 0.4990119934082031, "learning_rate": 0.0001, "loss": 1.7773, "step": 7132 }, { "epoch": 0.8286959047342434, "grad_norm": 0.4781734347343445, "learning_rate": 0.0001, "loss": 1.6872, "step": 7133 }, { "epoch": 0.8288120824862039, "grad_norm": 0.46874862909317017, "learning_rate": 0.0001, "loss": 1.6284, "step": 7134 }, { "epoch": 0.8289282602381643, "grad_norm": 0.49614983797073364, "learning_rate": 0.0001, "loss": 1.5967, "step": 7135 }, { "epoch": 0.8290444379901248, "grad_norm": 0.45961683988571167, "learning_rate": 0.0001, "loss": 1.5863, "step": 7136 }, { "epoch": 0.8291606157420854, "grad_norm": 0.42698389291763306, "learning_rate": 0.0001, "loss": 1.4959, "step": 7137 }, { "epoch": 0.8292767934940459, "grad_norm": 0.5121428370475769, "learning_rate": 0.0001, "loss": 1.6586, "step": 7138 }, { "epoch": 0.8293929712460064, "grad_norm": 0.45941177010536194, "learning_rate": 0.0001, "loss": 1.6187, "step": 7139 }, { "epoch": 0.8295091489979669, "grad_norm": 0.487868994474411, "learning_rate": 0.0001, "loss": 1.5562, "step": 7140 }, { "epoch": 0.8296253267499274, "grad_norm": 0.49762672185897827, "learning_rate": 0.0001, "loss": 1.6841, "step": 7141 }, { "epoch": 0.8297415045018879, "grad_norm": 0.4626729488372803, "learning_rate": 0.0001, "loss": 1.6782, "step": 7142 }, { "epoch": 0.8298576822538484, "grad_norm": 0.4500117301940918, "learning_rate": 0.0001, "loss": 1.44, "step": 7143 }, { "epoch": 0.8299738600058089, "grad_norm": 0.45620059967041016, "learning_rate": 0.0001, "loss": 1.5538, "step": 7144 }, { "epoch": 0.8300900377577694, "grad_norm": 0.48835253715515137, "learning_rate": 0.0001, "loss": 1.6525, "step": 7145 }, { "epoch": 0.8302062155097298, "grad_norm": 0.468001127243042, "learning_rate": 0.0001, "loss": 1.6553, "step": 7146 }, { "epoch": 0.8303223932616904, "grad_norm": 0.45825132727622986, "learning_rate": 0.0001, "loss": 1.6388, "step": 7147 }, { "epoch": 0.8304385710136509, "grad_norm": 0.46828025579452515, "learning_rate": 0.0001, "loss": 1.509, "step": 7148 }, { "epoch": 0.8305547487656114, "grad_norm": 0.49574530124664307, "learning_rate": 0.0001, "loss": 1.6193, "step": 7149 }, { "epoch": 0.8306709265175719, "grad_norm": 0.4577179551124573, "learning_rate": 0.0001, "loss": 1.6214, "step": 7150 }, { "epoch": 0.8307871042695324, "grad_norm": 0.47962459921836853, "learning_rate": 0.0001, "loss": 1.622, "step": 7151 }, { "epoch": 0.8309032820214929, "grad_norm": 0.5886511206626892, "learning_rate": 0.0001, "loss": 1.6682, "step": 7152 }, { "epoch": 0.8310194597734534, "grad_norm": 0.5050860047340393, "learning_rate": 0.0001, "loss": 1.5805, "step": 7153 }, { "epoch": 0.8311356375254139, "grad_norm": 0.48599928617477417, "learning_rate": 0.0001, "loss": 1.6516, "step": 7154 }, { "epoch": 0.8312518152773744, "grad_norm": 0.4886664152145386, "learning_rate": 0.0001, "loss": 1.6722, "step": 7155 }, { "epoch": 0.8313679930293348, "grad_norm": 0.502704381942749, "learning_rate": 0.0001, "loss": 1.6681, "step": 7156 }, { "epoch": 0.8314841707812953, "grad_norm": 0.4662174880504608, "learning_rate": 0.0001, "loss": 1.5118, "step": 7157 }, { "epoch": 0.8316003485332559, "grad_norm": 0.5194843411445618, "learning_rate": 0.0001, "loss": 1.8369, "step": 7158 }, { "epoch": 0.8317165262852164, "grad_norm": 0.458987832069397, "learning_rate": 0.0001, "loss": 1.738, "step": 7159 }, { "epoch": 0.8318327040371769, "grad_norm": 0.4516839385032654, "learning_rate": 0.0001, "loss": 1.4411, "step": 7160 }, { "epoch": 0.8319488817891374, "grad_norm": 0.47868669033050537, "learning_rate": 0.0001, "loss": 1.4402, "step": 7161 }, { "epoch": 0.8320650595410979, "grad_norm": 0.4523623585700989, "learning_rate": 0.0001, "loss": 1.7252, "step": 7162 }, { "epoch": 0.8321812372930584, "grad_norm": 0.4759959876537323, "learning_rate": 0.0001, "loss": 1.6792, "step": 7163 }, { "epoch": 0.8322974150450189, "grad_norm": 0.47161024808883667, "learning_rate": 0.0001, "loss": 1.6595, "step": 7164 }, { "epoch": 0.8324135927969794, "grad_norm": 0.43895629048347473, "learning_rate": 0.0001, "loss": 1.6528, "step": 7165 }, { "epoch": 0.8325297705489398, "grad_norm": 0.47241824865341187, "learning_rate": 0.0001, "loss": 1.4324, "step": 7166 }, { "epoch": 0.8326459483009003, "grad_norm": 0.43697863817214966, "learning_rate": 0.0001, "loss": 1.4951, "step": 7167 }, { "epoch": 0.8327621260528609, "grad_norm": 0.5089771747589111, "learning_rate": 0.0001, "loss": 1.6859, "step": 7168 }, { "epoch": 0.8328783038048214, "grad_norm": 0.47586750984191895, "learning_rate": 0.0001, "loss": 1.4354, "step": 7169 }, { "epoch": 0.8329944815567819, "grad_norm": 0.4991025924682617, "learning_rate": 0.0001, "loss": 1.6361, "step": 7170 }, { "epoch": 0.8331106593087424, "grad_norm": 0.4552896022796631, "learning_rate": 0.0001, "loss": 1.6522, "step": 7171 }, { "epoch": 0.8332268370607029, "grad_norm": 0.4508793354034424, "learning_rate": 0.0001, "loss": 1.7194, "step": 7172 }, { "epoch": 0.8333430148126634, "grad_norm": 0.4660915732383728, "learning_rate": 0.0001, "loss": 1.5309, "step": 7173 }, { "epoch": 0.8334591925646239, "grad_norm": 0.48091113567352295, "learning_rate": 0.0001, "loss": 1.608, "step": 7174 }, { "epoch": 0.8335753703165844, "grad_norm": 0.4672652781009674, "learning_rate": 0.0001, "loss": 1.6219, "step": 7175 }, { "epoch": 0.8336915480685448, "grad_norm": 0.46777546405792236, "learning_rate": 0.0001, "loss": 1.6119, "step": 7176 }, { "epoch": 0.8338077258205053, "grad_norm": 0.422648161649704, "learning_rate": 0.0001, "loss": 1.4262, "step": 7177 }, { "epoch": 0.8339239035724658, "grad_norm": 0.4612855613231659, "learning_rate": 0.0001, "loss": 1.695, "step": 7178 }, { "epoch": 0.8340400813244264, "grad_norm": 0.4481313228607178, "learning_rate": 0.0001, "loss": 1.5589, "step": 7179 }, { "epoch": 0.8341562590763869, "grad_norm": 0.4890344440937042, "learning_rate": 0.0001, "loss": 1.6548, "step": 7180 }, { "epoch": 0.8342724368283474, "grad_norm": 0.4647752046585083, "learning_rate": 0.0001, "loss": 1.5956, "step": 7181 }, { "epoch": 0.8343886145803079, "grad_norm": 0.5206274390220642, "learning_rate": 0.0001, "loss": 1.7667, "step": 7182 }, { "epoch": 0.8345047923322684, "grad_norm": 0.4441780745983124, "learning_rate": 0.0001, "loss": 1.3936, "step": 7183 }, { "epoch": 0.8346209700842289, "grad_norm": 0.4659591019153595, "learning_rate": 0.0001, "loss": 1.4816, "step": 7184 }, { "epoch": 0.8347371478361894, "grad_norm": 0.45973634719848633, "learning_rate": 0.0001, "loss": 1.5349, "step": 7185 }, { "epoch": 0.8348533255881498, "grad_norm": 0.43164822459220886, "learning_rate": 0.0001, "loss": 1.4983, "step": 7186 }, { "epoch": 0.8349695033401103, "grad_norm": 0.4463837742805481, "learning_rate": 0.0001, "loss": 1.5938, "step": 7187 }, { "epoch": 0.8350856810920708, "grad_norm": 0.4984694719314575, "learning_rate": 0.0001, "loss": 1.8769, "step": 7188 }, { "epoch": 0.8352018588440314, "grad_norm": 0.476712167263031, "learning_rate": 0.0001, "loss": 1.6925, "step": 7189 }, { "epoch": 0.8353180365959919, "grad_norm": 0.4878515899181366, "learning_rate": 0.0001, "loss": 1.6762, "step": 7190 }, { "epoch": 0.8354342143479524, "grad_norm": 0.44668495655059814, "learning_rate": 0.0001, "loss": 1.7159, "step": 7191 }, { "epoch": 0.8355503920999129, "grad_norm": 0.46840810775756836, "learning_rate": 0.0001, "loss": 1.6875, "step": 7192 }, { "epoch": 0.8356665698518734, "grad_norm": 0.47092488408088684, "learning_rate": 0.0001, "loss": 1.6715, "step": 7193 }, { "epoch": 0.8357827476038339, "grad_norm": 0.4265216588973999, "learning_rate": 0.0001, "loss": 1.3333, "step": 7194 }, { "epoch": 0.8358989253557944, "grad_norm": 0.4651343822479248, "learning_rate": 0.0001, "loss": 1.7237, "step": 7195 }, { "epoch": 0.8360151031077548, "grad_norm": 0.4326002299785614, "learning_rate": 0.0001, "loss": 1.5349, "step": 7196 }, { "epoch": 0.8361312808597153, "grad_norm": 0.46833327412605286, "learning_rate": 0.0001, "loss": 1.6418, "step": 7197 }, { "epoch": 0.8362474586116758, "grad_norm": 0.46911799907684326, "learning_rate": 0.0001, "loss": 1.5144, "step": 7198 }, { "epoch": 0.8363636363636363, "grad_norm": 0.43018439412117004, "learning_rate": 0.0001, "loss": 1.5022, "step": 7199 }, { "epoch": 0.8364798141155969, "grad_norm": 0.5141427516937256, "learning_rate": 0.0001, "loss": 1.5996, "step": 7200 }, { "epoch": 0.8365959918675574, "grad_norm": 0.4784258306026459, "learning_rate": 0.0001, "loss": 1.7193, "step": 7201 }, { "epoch": 0.8367121696195179, "grad_norm": 0.47277989983558655, "learning_rate": 0.0001, "loss": 1.4349, "step": 7202 }, { "epoch": 0.8368283473714784, "grad_norm": 0.46376681327819824, "learning_rate": 0.0001, "loss": 1.6443, "step": 7203 }, { "epoch": 0.8369445251234389, "grad_norm": 0.4796522855758667, "learning_rate": 0.0001, "loss": 1.7946, "step": 7204 }, { "epoch": 0.8370607028753994, "grad_norm": 0.42169952392578125, "learning_rate": 0.0001, "loss": 1.4923, "step": 7205 }, { "epoch": 0.8371768806273598, "grad_norm": 0.4597862660884857, "learning_rate": 0.0001, "loss": 1.6694, "step": 7206 }, { "epoch": 0.8372930583793203, "grad_norm": 0.4585164189338684, "learning_rate": 0.0001, "loss": 1.5235, "step": 7207 }, { "epoch": 0.8374092361312808, "grad_norm": 0.4696848392486572, "learning_rate": 0.0001, "loss": 1.6361, "step": 7208 }, { "epoch": 0.8375254138832413, "grad_norm": 0.4311649203300476, "learning_rate": 0.0001, "loss": 1.4602, "step": 7209 }, { "epoch": 0.8376415916352019, "grad_norm": 0.48763203620910645, "learning_rate": 0.0001, "loss": 1.5663, "step": 7210 }, { "epoch": 0.8377577693871624, "grad_norm": 0.4315451383590698, "learning_rate": 0.0001, "loss": 1.5024, "step": 7211 }, { "epoch": 0.8378739471391229, "grad_norm": 0.44684144854545593, "learning_rate": 0.0001, "loss": 1.6711, "step": 7212 }, { "epoch": 0.8379901248910834, "grad_norm": 0.4800936281681061, "learning_rate": 0.0001, "loss": 1.4873, "step": 7213 }, { "epoch": 0.8381063026430439, "grad_norm": 0.45008590817451477, "learning_rate": 0.0001, "loss": 1.5139, "step": 7214 }, { "epoch": 0.8382224803950044, "grad_norm": 0.5298113822937012, "learning_rate": 0.0001, "loss": 1.6431, "step": 7215 }, { "epoch": 0.8383386581469648, "grad_norm": 0.4812602400779724, "learning_rate": 0.0001, "loss": 1.6722, "step": 7216 }, { "epoch": 0.8384548358989253, "grad_norm": 0.48588478565216064, "learning_rate": 0.0001, "loss": 1.747, "step": 7217 }, { "epoch": 0.8385710136508858, "grad_norm": 0.5386226177215576, "learning_rate": 0.0001, "loss": 1.6504, "step": 7218 }, { "epoch": 0.8386871914028463, "grad_norm": 0.5037118196487427, "learning_rate": 0.0001, "loss": 1.6933, "step": 7219 }, { "epoch": 0.8388033691548068, "grad_norm": 0.48345088958740234, "learning_rate": 0.0001, "loss": 1.7639, "step": 7220 }, { "epoch": 0.8389195469067674, "grad_norm": 0.4569917321205139, "learning_rate": 0.0001, "loss": 1.6688, "step": 7221 }, { "epoch": 0.8390357246587279, "grad_norm": 0.4810996651649475, "learning_rate": 0.0001, "loss": 1.6776, "step": 7222 }, { "epoch": 0.8391519024106884, "grad_norm": 0.49533677101135254, "learning_rate": 0.0001, "loss": 1.6395, "step": 7223 }, { "epoch": 0.8392680801626489, "grad_norm": 0.4612615704536438, "learning_rate": 0.0001, "loss": 1.7384, "step": 7224 }, { "epoch": 0.8393842579146094, "grad_norm": 0.4266413152217865, "learning_rate": 0.0001, "loss": 1.5239, "step": 7225 }, { "epoch": 0.8395004356665698, "grad_norm": 0.490537166595459, "learning_rate": 0.0001, "loss": 1.8198, "step": 7226 }, { "epoch": 0.8396166134185303, "grad_norm": 0.45766183733940125, "learning_rate": 0.0001, "loss": 1.4652, "step": 7227 }, { "epoch": 0.8397327911704908, "grad_norm": 0.48349621891975403, "learning_rate": 0.0001, "loss": 1.6274, "step": 7228 }, { "epoch": 0.8398489689224513, "grad_norm": 0.44390740990638733, "learning_rate": 0.0001, "loss": 1.5538, "step": 7229 }, { "epoch": 0.8399651466744118, "grad_norm": 0.4712617099285126, "learning_rate": 0.0001, "loss": 1.5777, "step": 7230 }, { "epoch": 0.8400813244263724, "grad_norm": 0.47112375497817993, "learning_rate": 0.0001, "loss": 1.6299, "step": 7231 }, { "epoch": 0.8401975021783329, "grad_norm": 0.491479754447937, "learning_rate": 0.0001, "loss": 1.5637, "step": 7232 }, { "epoch": 0.8403136799302934, "grad_norm": 0.42350316047668457, "learning_rate": 0.0001, "loss": 1.5259, "step": 7233 }, { "epoch": 0.8404298576822539, "grad_norm": 0.4231340289115906, "learning_rate": 0.0001, "loss": 1.6274, "step": 7234 }, { "epoch": 0.8405460354342144, "grad_norm": 0.47028040885925293, "learning_rate": 0.0001, "loss": 1.7254, "step": 7235 }, { "epoch": 0.8406622131861748, "grad_norm": 0.4882940351963043, "learning_rate": 0.0001, "loss": 1.7317, "step": 7236 }, { "epoch": 0.8407783909381353, "grad_norm": 0.4849685728549957, "learning_rate": 0.0001, "loss": 1.6552, "step": 7237 }, { "epoch": 0.8408945686900958, "grad_norm": 0.45504260063171387, "learning_rate": 0.0001, "loss": 1.6153, "step": 7238 }, { "epoch": 0.8410107464420563, "grad_norm": 0.4550893008708954, "learning_rate": 0.0001, "loss": 1.4802, "step": 7239 }, { "epoch": 0.8411269241940168, "grad_norm": 0.5196647047996521, "learning_rate": 0.0001, "loss": 1.6103, "step": 7240 }, { "epoch": 0.8412431019459774, "grad_norm": 0.525698721408844, "learning_rate": 0.0001, "loss": 1.7223, "step": 7241 }, { "epoch": 0.8413592796979379, "grad_norm": 0.46245935559272766, "learning_rate": 0.0001, "loss": 1.7588, "step": 7242 }, { "epoch": 0.8414754574498984, "grad_norm": 0.4684200584888458, "learning_rate": 0.0001, "loss": 1.6076, "step": 7243 }, { "epoch": 0.8415916352018589, "grad_norm": 0.5075876116752625, "learning_rate": 0.0001, "loss": 1.5209, "step": 7244 }, { "epoch": 0.8417078129538194, "grad_norm": 0.4788230061531067, "learning_rate": 0.0001, "loss": 1.6149, "step": 7245 }, { "epoch": 0.8418239907057798, "grad_norm": 0.4579117000102997, "learning_rate": 0.0001, "loss": 1.6237, "step": 7246 }, { "epoch": 0.8419401684577403, "grad_norm": 0.46064794063568115, "learning_rate": 0.0001, "loss": 1.583, "step": 7247 }, { "epoch": 0.8420563462097008, "grad_norm": 0.49048542976379395, "learning_rate": 0.0001, "loss": 1.5957, "step": 7248 }, { "epoch": 0.8421725239616613, "grad_norm": 0.46786949038505554, "learning_rate": 0.0001, "loss": 1.3677, "step": 7249 }, { "epoch": 0.8422887017136218, "grad_norm": 0.4660671651363373, "learning_rate": 0.0001, "loss": 1.5582, "step": 7250 }, { "epoch": 0.8424048794655823, "grad_norm": 0.45976850390434265, "learning_rate": 0.0001, "loss": 1.5745, "step": 7251 }, { "epoch": 0.8425210572175429, "grad_norm": 0.45167800784111023, "learning_rate": 0.0001, "loss": 1.4037, "step": 7252 }, { "epoch": 0.8426372349695034, "grad_norm": 0.45298877358436584, "learning_rate": 0.0001, "loss": 1.5996, "step": 7253 }, { "epoch": 0.8427534127214639, "grad_norm": 0.4448161721229553, "learning_rate": 0.0001, "loss": 1.4936, "step": 7254 }, { "epoch": 0.8428695904734244, "grad_norm": 0.4482094943523407, "learning_rate": 0.0001, "loss": 1.5979, "step": 7255 }, { "epoch": 0.8429857682253848, "grad_norm": 0.45849940180778503, "learning_rate": 0.0001, "loss": 1.5942, "step": 7256 }, { "epoch": 0.8431019459773453, "grad_norm": 0.4591526985168457, "learning_rate": 0.0001, "loss": 1.5527, "step": 7257 }, { "epoch": 0.8432181237293058, "grad_norm": 0.4684211015701294, "learning_rate": 0.0001, "loss": 1.6851, "step": 7258 }, { "epoch": 0.8433343014812663, "grad_norm": 0.4497279226779938, "learning_rate": 0.0001, "loss": 1.4289, "step": 7259 }, { "epoch": 0.8434504792332268, "grad_norm": 0.46544843912124634, "learning_rate": 0.0001, "loss": 1.5504, "step": 7260 }, { "epoch": 0.8435666569851873, "grad_norm": 0.4916938841342926, "learning_rate": 0.0001, "loss": 1.6037, "step": 7261 }, { "epoch": 0.8436828347371479, "grad_norm": 0.4782385230064392, "learning_rate": 0.0001, "loss": 1.6031, "step": 7262 }, { "epoch": 0.8437990124891084, "grad_norm": 0.46342313289642334, "learning_rate": 0.0001, "loss": 1.6774, "step": 7263 }, { "epoch": 0.8439151902410689, "grad_norm": 0.456551194190979, "learning_rate": 0.0001, "loss": 1.7548, "step": 7264 }, { "epoch": 0.8440313679930294, "grad_norm": 0.4580911695957184, "learning_rate": 0.0001, "loss": 1.5583, "step": 7265 }, { "epoch": 0.8441475457449898, "grad_norm": 0.47985199093818665, "learning_rate": 0.0001, "loss": 1.5994, "step": 7266 }, { "epoch": 0.8442637234969503, "grad_norm": 0.47257447242736816, "learning_rate": 0.0001, "loss": 1.5999, "step": 7267 }, { "epoch": 0.8443799012489108, "grad_norm": 0.4965929687023163, "learning_rate": 0.0001, "loss": 1.5904, "step": 7268 }, { "epoch": 0.8444960790008713, "grad_norm": 0.4734271168708801, "learning_rate": 0.0001, "loss": 1.7729, "step": 7269 }, { "epoch": 0.8446122567528318, "grad_norm": 0.4869614839553833, "learning_rate": 0.0001, "loss": 1.7014, "step": 7270 }, { "epoch": 0.8447284345047923, "grad_norm": 0.4773048162460327, "learning_rate": 0.0001, "loss": 1.5696, "step": 7271 }, { "epoch": 0.8448446122567528, "grad_norm": 0.47592759132385254, "learning_rate": 0.0001, "loss": 1.5401, "step": 7272 }, { "epoch": 0.8449607900087134, "grad_norm": 0.4415343105792999, "learning_rate": 0.0001, "loss": 1.3948, "step": 7273 }, { "epoch": 0.8450769677606739, "grad_norm": 0.4446001350879669, "learning_rate": 0.0001, "loss": 1.5169, "step": 7274 }, { "epoch": 0.8451931455126344, "grad_norm": 0.4620625674724579, "learning_rate": 0.0001, "loss": 1.5818, "step": 7275 }, { "epoch": 0.8453093232645948, "grad_norm": 0.4809649884700775, "learning_rate": 0.0001, "loss": 1.5976, "step": 7276 }, { "epoch": 0.8454255010165553, "grad_norm": 0.45134058594703674, "learning_rate": 0.0001, "loss": 1.6033, "step": 7277 }, { "epoch": 0.8455416787685158, "grad_norm": 0.44494450092315674, "learning_rate": 0.0001, "loss": 1.5495, "step": 7278 }, { "epoch": 0.8456578565204763, "grad_norm": 0.5055558085441589, "learning_rate": 0.0001, "loss": 1.4701, "step": 7279 }, { "epoch": 0.8457740342724368, "grad_norm": 0.418148398399353, "learning_rate": 0.0001, "loss": 1.4975, "step": 7280 }, { "epoch": 0.8458902120243973, "grad_norm": 0.4533846378326416, "learning_rate": 0.0001, "loss": 1.6442, "step": 7281 }, { "epoch": 0.8460063897763578, "grad_norm": 0.49870097637176514, "learning_rate": 0.0001, "loss": 1.6239, "step": 7282 }, { "epoch": 0.8461225675283184, "grad_norm": 0.4689449071884155, "learning_rate": 0.0001, "loss": 1.7109, "step": 7283 }, { "epoch": 0.8462387452802789, "grad_norm": 0.466709166765213, "learning_rate": 0.0001, "loss": 1.5115, "step": 7284 }, { "epoch": 0.8463549230322394, "grad_norm": 0.46668291091918945, "learning_rate": 0.0001, "loss": 1.7183, "step": 7285 }, { "epoch": 0.8464711007841998, "grad_norm": 0.43597686290740967, "learning_rate": 0.0001, "loss": 1.5108, "step": 7286 }, { "epoch": 0.8465872785361603, "grad_norm": 0.5049334168434143, "learning_rate": 0.0001, "loss": 1.5353, "step": 7287 }, { "epoch": 0.8467034562881208, "grad_norm": 0.46801841259002686, "learning_rate": 0.0001, "loss": 1.762, "step": 7288 }, { "epoch": 0.8468196340400813, "grad_norm": 0.46203526854515076, "learning_rate": 0.0001, "loss": 1.6308, "step": 7289 }, { "epoch": 0.8469358117920418, "grad_norm": 0.45195382833480835, "learning_rate": 0.0001, "loss": 1.6999, "step": 7290 }, { "epoch": 0.8470519895440023, "grad_norm": 0.4815501868724823, "learning_rate": 0.0001, "loss": 1.5988, "step": 7291 }, { "epoch": 0.8471681672959628, "grad_norm": 0.4791460931301117, "learning_rate": 0.0001, "loss": 1.6911, "step": 7292 }, { "epoch": 0.8472843450479233, "grad_norm": 0.4468856155872345, "learning_rate": 0.0001, "loss": 1.6002, "step": 7293 }, { "epoch": 0.8474005227998839, "grad_norm": 0.45526090264320374, "learning_rate": 0.0001, "loss": 1.5771, "step": 7294 }, { "epoch": 0.8475167005518444, "grad_norm": 0.5207736492156982, "learning_rate": 0.0001, "loss": 1.7663, "step": 7295 }, { "epoch": 0.8476328783038048, "grad_norm": 0.47375261783599854, "learning_rate": 0.0001, "loss": 1.5903, "step": 7296 }, { "epoch": 0.8477490560557653, "grad_norm": 0.4951147139072418, "learning_rate": 0.0001, "loss": 1.5862, "step": 7297 }, { "epoch": 0.8478652338077258, "grad_norm": 0.4843972325325012, "learning_rate": 0.0001, "loss": 1.6032, "step": 7298 }, { "epoch": 0.8479814115596863, "grad_norm": 0.44949018955230713, "learning_rate": 0.0001, "loss": 1.5033, "step": 7299 }, { "epoch": 0.8480975893116468, "grad_norm": 0.467597633600235, "learning_rate": 0.0001, "loss": 1.6343, "step": 7300 }, { "epoch": 0.8482137670636073, "grad_norm": 0.44474077224731445, "learning_rate": 0.0001, "loss": 1.491, "step": 7301 }, { "epoch": 0.8483299448155678, "grad_norm": 0.455203115940094, "learning_rate": 0.0001, "loss": 1.5006, "step": 7302 }, { "epoch": 0.8484461225675283, "grad_norm": 0.44709402322769165, "learning_rate": 0.0001, "loss": 1.3972, "step": 7303 }, { "epoch": 0.8485623003194889, "grad_norm": 0.4874759316444397, "learning_rate": 0.0001, "loss": 1.7365, "step": 7304 }, { "epoch": 0.8486784780714494, "grad_norm": 0.4757806956768036, "learning_rate": 0.0001, "loss": 1.609, "step": 7305 }, { "epoch": 0.8487946558234098, "grad_norm": 0.4443889260292053, "learning_rate": 0.0001, "loss": 1.4886, "step": 7306 }, { "epoch": 0.8489108335753703, "grad_norm": 0.4977072477340698, "learning_rate": 0.0001, "loss": 1.6426, "step": 7307 }, { "epoch": 0.8490270113273308, "grad_norm": 0.5108775496482849, "learning_rate": 0.0001, "loss": 1.7174, "step": 7308 }, { "epoch": 0.8491431890792913, "grad_norm": 0.5324129462242126, "learning_rate": 0.0001, "loss": 1.7534, "step": 7309 }, { "epoch": 0.8492593668312518, "grad_norm": 0.46875059604644775, "learning_rate": 0.0001, "loss": 1.7199, "step": 7310 }, { "epoch": 0.8493755445832123, "grad_norm": 0.4918031096458435, "learning_rate": 0.0001, "loss": 1.5536, "step": 7311 }, { "epoch": 0.8494917223351728, "grad_norm": 0.4679587185382843, "learning_rate": 0.0001, "loss": 1.67, "step": 7312 }, { "epoch": 0.8496079000871333, "grad_norm": 0.4425588846206665, "learning_rate": 0.0001, "loss": 1.4041, "step": 7313 }, { "epoch": 0.8497240778390938, "grad_norm": 0.48238202929496765, "learning_rate": 0.0001, "loss": 1.8244, "step": 7314 }, { "epoch": 0.8498402555910544, "grad_norm": 0.42519932985305786, "learning_rate": 0.0001, "loss": 1.4709, "step": 7315 }, { "epoch": 0.8499564333430148, "grad_norm": 0.4286966919898987, "learning_rate": 0.0001, "loss": 1.518, "step": 7316 }, { "epoch": 0.8500726110949753, "grad_norm": 0.4630890190601349, "learning_rate": 0.0001, "loss": 1.6126, "step": 7317 }, { "epoch": 0.8501887888469358, "grad_norm": 0.4862578809261322, "learning_rate": 0.0001, "loss": 1.5924, "step": 7318 }, { "epoch": 0.8503049665988963, "grad_norm": 0.45061859488487244, "learning_rate": 0.0001, "loss": 1.6935, "step": 7319 }, { "epoch": 0.8504211443508568, "grad_norm": 0.47194933891296387, "learning_rate": 0.0001, "loss": 1.639, "step": 7320 }, { "epoch": 0.8505373221028173, "grad_norm": 0.457969605922699, "learning_rate": 0.0001, "loss": 1.6307, "step": 7321 }, { "epoch": 0.8506534998547778, "grad_norm": 0.4440132677555084, "learning_rate": 0.0001, "loss": 1.6272, "step": 7322 }, { "epoch": 0.8507696776067383, "grad_norm": 0.5148141384124756, "learning_rate": 0.0001, "loss": 1.659, "step": 7323 }, { "epoch": 0.8508858553586988, "grad_norm": 0.481653094291687, "learning_rate": 0.0001, "loss": 1.6465, "step": 7324 }, { "epoch": 0.8510020331106594, "grad_norm": 0.4533710777759552, "learning_rate": 0.0001, "loss": 1.4495, "step": 7325 }, { "epoch": 0.8511182108626199, "grad_norm": 0.4878830909729004, "learning_rate": 0.0001, "loss": 1.6365, "step": 7326 }, { "epoch": 0.8512343886145803, "grad_norm": 0.5046993494033813, "learning_rate": 0.0001, "loss": 1.8043, "step": 7327 }, { "epoch": 0.8513505663665408, "grad_norm": 0.48228132724761963, "learning_rate": 0.0001, "loss": 1.5357, "step": 7328 }, { "epoch": 0.8514667441185013, "grad_norm": 0.47209233045578003, "learning_rate": 0.0001, "loss": 1.6615, "step": 7329 }, { "epoch": 0.8515829218704618, "grad_norm": 0.4597385823726654, "learning_rate": 0.0001, "loss": 1.4711, "step": 7330 }, { "epoch": 0.8516990996224223, "grad_norm": 0.478766530752182, "learning_rate": 0.0001, "loss": 1.6755, "step": 7331 }, { "epoch": 0.8518152773743828, "grad_norm": 0.4749451279640198, "learning_rate": 0.0001, "loss": 1.5885, "step": 7332 }, { "epoch": 0.8519314551263433, "grad_norm": 0.4681585431098938, "learning_rate": 0.0001, "loss": 1.531, "step": 7333 }, { "epoch": 0.8520476328783038, "grad_norm": 0.5120987892150879, "learning_rate": 0.0001, "loss": 1.651, "step": 7334 }, { "epoch": 0.8521638106302643, "grad_norm": 0.4379187822341919, "learning_rate": 0.0001, "loss": 1.4779, "step": 7335 }, { "epoch": 0.8522799883822249, "grad_norm": 0.5244159698486328, "learning_rate": 0.0001, "loss": 1.7338, "step": 7336 }, { "epoch": 0.8523961661341853, "grad_norm": 0.4366852343082428, "learning_rate": 0.0001, "loss": 1.448, "step": 7337 }, { "epoch": 0.8525123438861458, "grad_norm": 0.468220591545105, "learning_rate": 0.0001, "loss": 1.5285, "step": 7338 }, { "epoch": 0.8526285216381063, "grad_norm": 0.43382033705711365, "learning_rate": 0.0001, "loss": 1.4661, "step": 7339 }, { "epoch": 0.8527446993900668, "grad_norm": 0.48206859827041626, "learning_rate": 0.0001, "loss": 1.7871, "step": 7340 }, { "epoch": 0.8528608771420273, "grad_norm": 0.448639839887619, "learning_rate": 0.0001, "loss": 1.3691, "step": 7341 }, { "epoch": 0.8529770548939878, "grad_norm": 0.48015591502189636, "learning_rate": 0.0001, "loss": 1.7805, "step": 7342 }, { "epoch": 0.8530932326459483, "grad_norm": 0.44810763001441956, "learning_rate": 0.0001, "loss": 1.502, "step": 7343 }, { "epoch": 0.8532094103979088, "grad_norm": 0.5089064240455627, "learning_rate": 0.0001, "loss": 1.7226, "step": 7344 }, { "epoch": 0.8533255881498693, "grad_norm": 0.4648088216781616, "learning_rate": 0.0001, "loss": 1.3528, "step": 7345 }, { "epoch": 0.8534417659018299, "grad_norm": 0.46756070852279663, "learning_rate": 0.0001, "loss": 1.6056, "step": 7346 }, { "epoch": 0.8535579436537903, "grad_norm": 0.4743233621120453, "learning_rate": 0.0001, "loss": 1.6762, "step": 7347 }, { "epoch": 0.8536741214057508, "grad_norm": 0.4871358573436737, "learning_rate": 0.0001, "loss": 1.7681, "step": 7348 }, { "epoch": 0.8537902991577113, "grad_norm": 0.4481533467769623, "learning_rate": 0.0001, "loss": 1.5602, "step": 7349 }, { "epoch": 0.8539064769096718, "grad_norm": 0.49137020111083984, "learning_rate": 0.0001, "loss": 1.3881, "step": 7350 }, { "epoch": 0.8540226546616323, "grad_norm": 0.45045387744903564, "learning_rate": 0.0001, "loss": 1.6635, "step": 7351 }, { "epoch": 0.8541388324135928, "grad_norm": 0.45191293954849243, "learning_rate": 0.0001, "loss": 1.5433, "step": 7352 }, { "epoch": 0.8542550101655533, "grad_norm": 0.46247225999832153, "learning_rate": 0.0001, "loss": 1.7451, "step": 7353 }, { "epoch": 0.8543711879175138, "grad_norm": 0.4777053892612457, "learning_rate": 0.0001, "loss": 1.6937, "step": 7354 }, { "epoch": 0.8544873656694743, "grad_norm": 0.44908279180526733, "learning_rate": 0.0001, "loss": 1.5446, "step": 7355 }, { "epoch": 0.8546035434214347, "grad_norm": 0.4714983403682709, "learning_rate": 0.0001, "loss": 1.5421, "step": 7356 }, { "epoch": 0.8547197211733953, "grad_norm": 0.4688902497291565, "learning_rate": 0.0001, "loss": 1.6913, "step": 7357 }, { "epoch": 0.8548358989253558, "grad_norm": 0.46939048171043396, "learning_rate": 0.0001, "loss": 1.67, "step": 7358 }, { "epoch": 0.8549520766773163, "grad_norm": 0.47825509309768677, "learning_rate": 0.0001, "loss": 1.6216, "step": 7359 }, { "epoch": 0.8550682544292768, "grad_norm": 0.49258682131767273, "learning_rate": 0.0001, "loss": 1.6379, "step": 7360 }, { "epoch": 0.8551844321812373, "grad_norm": 0.5022354125976562, "learning_rate": 0.0001, "loss": 1.7366, "step": 7361 }, { "epoch": 0.8553006099331978, "grad_norm": 0.4815436899662018, "learning_rate": 0.0001, "loss": 1.5895, "step": 7362 }, { "epoch": 0.8554167876851583, "grad_norm": 0.4701344668865204, "learning_rate": 0.0001, "loss": 1.6379, "step": 7363 }, { "epoch": 0.8555329654371188, "grad_norm": 0.4595559537410736, "learning_rate": 0.0001, "loss": 1.4641, "step": 7364 }, { "epoch": 0.8556491431890793, "grad_norm": 0.45530787110328674, "learning_rate": 0.0001, "loss": 1.5974, "step": 7365 }, { "epoch": 0.8557653209410397, "grad_norm": 0.45153534412384033, "learning_rate": 0.0001, "loss": 1.3796, "step": 7366 }, { "epoch": 0.8558814986930003, "grad_norm": 0.4568248391151428, "learning_rate": 0.0001, "loss": 1.6289, "step": 7367 }, { "epoch": 0.8559976764449608, "grad_norm": 0.5031229853630066, "learning_rate": 0.0001, "loss": 1.6887, "step": 7368 }, { "epoch": 0.8561138541969213, "grad_norm": 0.5217198729515076, "learning_rate": 0.0001, "loss": 1.7427, "step": 7369 }, { "epoch": 0.8562300319488818, "grad_norm": 0.46275079250335693, "learning_rate": 0.0001, "loss": 1.4803, "step": 7370 }, { "epoch": 0.8563462097008423, "grad_norm": 0.47801512479782104, "learning_rate": 0.0001, "loss": 1.6526, "step": 7371 }, { "epoch": 0.8564623874528028, "grad_norm": 0.48373323678970337, "learning_rate": 0.0001, "loss": 1.5822, "step": 7372 }, { "epoch": 0.8565785652047633, "grad_norm": 0.4380189776420593, "learning_rate": 0.0001, "loss": 1.4553, "step": 7373 }, { "epoch": 0.8566947429567238, "grad_norm": 0.48953357338905334, "learning_rate": 0.0001, "loss": 1.7664, "step": 7374 }, { "epoch": 0.8568109207086843, "grad_norm": 0.4443288743495941, "learning_rate": 0.0001, "loss": 1.4155, "step": 7375 }, { "epoch": 0.8569270984606447, "grad_norm": 0.4462949335575104, "learning_rate": 0.0001, "loss": 1.6234, "step": 7376 }, { "epoch": 0.8570432762126052, "grad_norm": 0.5047136545181274, "learning_rate": 0.0001, "loss": 1.6205, "step": 7377 }, { "epoch": 0.8571594539645658, "grad_norm": 0.440891832113266, "learning_rate": 0.0001, "loss": 1.6558, "step": 7378 }, { "epoch": 0.8572756317165263, "grad_norm": 0.49732422828674316, "learning_rate": 0.0001, "loss": 1.7462, "step": 7379 }, { "epoch": 0.8573918094684868, "grad_norm": 0.47000056505203247, "learning_rate": 0.0001, "loss": 1.6352, "step": 7380 }, { "epoch": 0.8575079872204473, "grad_norm": 0.4483802020549774, "learning_rate": 0.0001, "loss": 1.5411, "step": 7381 }, { "epoch": 0.8576241649724078, "grad_norm": 0.4606070816516876, "learning_rate": 0.0001, "loss": 1.6308, "step": 7382 }, { "epoch": 0.8577403427243683, "grad_norm": 0.4387550950050354, "learning_rate": 0.0001, "loss": 1.5059, "step": 7383 }, { "epoch": 0.8578565204763288, "grad_norm": 0.4888121783733368, "learning_rate": 0.0001, "loss": 1.6821, "step": 7384 }, { "epoch": 0.8579726982282893, "grad_norm": 0.4582982659339905, "learning_rate": 0.0001, "loss": 1.527, "step": 7385 }, { "epoch": 0.8580888759802497, "grad_norm": 0.46754390001296997, "learning_rate": 0.0001, "loss": 1.6641, "step": 7386 }, { "epoch": 0.8582050537322102, "grad_norm": 0.44355496764183044, "learning_rate": 0.0001, "loss": 1.6246, "step": 7387 }, { "epoch": 0.8583212314841708, "grad_norm": 0.628402829170227, "learning_rate": 0.0001, "loss": 1.4312, "step": 7388 }, { "epoch": 0.8584374092361313, "grad_norm": 0.5051394701004028, "learning_rate": 0.0001, "loss": 1.6685, "step": 7389 }, { "epoch": 0.8585535869880918, "grad_norm": 0.4363487660884857, "learning_rate": 0.0001, "loss": 1.3303, "step": 7390 }, { "epoch": 0.8586697647400523, "grad_norm": 0.4447265863418579, "learning_rate": 0.0001, "loss": 1.4897, "step": 7391 }, { "epoch": 0.8587859424920128, "grad_norm": 0.4490847885608673, "learning_rate": 0.0001, "loss": 1.3502, "step": 7392 }, { "epoch": 0.8589021202439733, "grad_norm": 0.6224667429924011, "learning_rate": 0.0001, "loss": 1.5804, "step": 7393 }, { "epoch": 0.8590182979959338, "grad_norm": 0.4750422537326813, "learning_rate": 0.0001, "loss": 1.5267, "step": 7394 }, { "epoch": 0.8591344757478943, "grad_norm": 0.47161757946014404, "learning_rate": 0.0001, "loss": 1.5626, "step": 7395 }, { "epoch": 0.8592506534998547, "grad_norm": 0.4862884283065796, "learning_rate": 0.0001, "loss": 1.5404, "step": 7396 }, { "epoch": 0.8593668312518152, "grad_norm": 0.6144393682479858, "learning_rate": 0.0001, "loss": 1.4849, "step": 7397 }, { "epoch": 0.8594830090037757, "grad_norm": 0.4535844326019287, "learning_rate": 0.0001, "loss": 1.536, "step": 7398 }, { "epoch": 0.8595991867557363, "grad_norm": 0.4707335829734802, "learning_rate": 0.0001, "loss": 1.6917, "step": 7399 }, { "epoch": 0.8597153645076968, "grad_norm": 0.45508313179016113, "learning_rate": 0.0001, "loss": 1.5563, "step": 7400 }, { "epoch": 0.8598315422596573, "grad_norm": 0.45613574981689453, "learning_rate": 0.0001, "loss": 1.5132, "step": 7401 }, { "epoch": 0.8599477200116178, "grad_norm": 0.46452754735946655, "learning_rate": 0.0001, "loss": 1.6491, "step": 7402 }, { "epoch": 0.8600638977635783, "grad_norm": 0.5027807354927063, "learning_rate": 0.0001, "loss": 1.7318, "step": 7403 }, { "epoch": 0.8601800755155388, "grad_norm": 0.4823223352432251, "learning_rate": 0.0001, "loss": 1.6181, "step": 7404 }, { "epoch": 0.8602962532674993, "grad_norm": 0.4800436794757843, "learning_rate": 0.0001, "loss": 1.6692, "step": 7405 }, { "epoch": 0.8604124310194597, "grad_norm": 0.48085838556289673, "learning_rate": 0.0001, "loss": 1.5379, "step": 7406 }, { "epoch": 0.8605286087714202, "grad_norm": 0.45660898089408875, "learning_rate": 0.0001, "loss": 1.5542, "step": 7407 }, { "epoch": 0.8606447865233807, "grad_norm": 0.4891624450683594, "learning_rate": 0.0001, "loss": 1.569, "step": 7408 }, { "epoch": 0.8607609642753413, "grad_norm": 0.4807065427303314, "learning_rate": 0.0001, "loss": 1.7155, "step": 7409 }, { "epoch": 0.8608771420273018, "grad_norm": 0.45160871744155884, "learning_rate": 0.0001, "loss": 1.5249, "step": 7410 }, { "epoch": 0.8609933197792623, "grad_norm": 0.4672984182834625, "learning_rate": 0.0001, "loss": 1.5387, "step": 7411 }, { "epoch": 0.8611094975312228, "grad_norm": 0.4706302881240845, "learning_rate": 0.0001, "loss": 1.7187, "step": 7412 }, { "epoch": 0.8612256752831833, "grad_norm": 0.4920727014541626, "learning_rate": 0.0001, "loss": 1.6299, "step": 7413 }, { "epoch": 0.8613418530351438, "grad_norm": 0.46764302253723145, "learning_rate": 0.0001, "loss": 1.5539, "step": 7414 }, { "epoch": 0.8614580307871043, "grad_norm": 0.4732871651649475, "learning_rate": 0.0001, "loss": 1.5067, "step": 7415 }, { "epoch": 0.8615742085390647, "grad_norm": 0.4615955054759979, "learning_rate": 0.0001, "loss": 1.4017, "step": 7416 }, { "epoch": 0.8616903862910252, "grad_norm": 1.543933629989624, "learning_rate": 0.0001, "loss": 1.7691, "step": 7417 }, { "epoch": 0.8618065640429857, "grad_norm": 0.4970760941505432, "learning_rate": 0.0001, "loss": 1.5598, "step": 7418 }, { "epoch": 0.8619227417949463, "grad_norm": 0.45834749937057495, "learning_rate": 0.0001, "loss": 1.5149, "step": 7419 }, { "epoch": 0.8620389195469068, "grad_norm": 0.49793627858161926, "learning_rate": 0.0001, "loss": 1.7069, "step": 7420 }, { "epoch": 0.8621550972988673, "grad_norm": 0.4896315336227417, "learning_rate": 0.0001, "loss": 1.5341, "step": 7421 }, { "epoch": 0.8622712750508278, "grad_norm": 0.46836981177330017, "learning_rate": 0.0001, "loss": 1.5196, "step": 7422 }, { "epoch": 0.8623874528027883, "grad_norm": 0.47271421551704407, "learning_rate": 0.0001, "loss": 1.6342, "step": 7423 }, { "epoch": 0.8625036305547488, "grad_norm": 0.5026324987411499, "learning_rate": 0.0001, "loss": 1.6324, "step": 7424 }, { "epoch": 0.8626198083067093, "grad_norm": 0.515398383140564, "learning_rate": 0.0001, "loss": 1.813, "step": 7425 }, { "epoch": 0.8627359860586697, "grad_norm": 0.49251848459243774, "learning_rate": 0.0001, "loss": 1.6419, "step": 7426 }, { "epoch": 0.8628521638106302, "grad_norm": 0.44753625988960266, "learning_rate": 0.0001, "loss": 1.5583, "step": 7427 }, { "epoch": 0.8629683415625907, "grad_norm": 0.4866905212402344, "learning_rate": 0.0001, "loss": 1.7084, "step": 7428 }, { "epoch": 0.8630845193145512, "grad_norm": 0.4585065245628357, "learning_rate": 0.0001, "loss": 1.6465, "step": 7429 }, { "epoch": 0.8632006970665118, "grad_norm": 0.47094056010246277, "learning_rate": 0.0001, "loss": 1.7554, "step": 7430 }, { "epoch": 0.8633168748184723, "grad_norm": 0.45176732540130615, "learning_rate": 0.0001, "loss": 1.6076, "step": 7431 }, { "epoch": 0.8634330525704328, "grad_norm": 0.48504602909088135, "learning_rate": 0.0001, "loss": 1.7036, "step": 7432 }, { "epoch": 0.8635492303223933, "grad_norm": 0.49550312757492065, "learning_rate": 0.0001, "loss": 1.6723, "step": 7433 }, { "epoch": 0.8636654080743538, "grad_norm": 0.4960317313671112, "learning_rate": 0.0001, "loss": 1.5014, "step": 7434 }, { "epoch": 0.8637815858263143, "grad_norm": 0.4894813597202301, "learning_rate": 0.0001, "loss": 1.6142, "step": 7435 }, { "epoch": 0.8638977635782747, "grad_norm": 0.47758471965789795, "learning_rate": 0.0001, "loss": 1.6221, "step": 7436 }, { "epoch": 0.8640139413302352, "grad_norm": 0.5021971464157104, "learning_rate": 0.0001, "loss": 1.7103, "step": 7437 }, { "epoch": 0.8641301190821957, "grad_norm": 0.5115821957588196, "learning_rate": 0.0001, "loss": 1.8215, "step": 7438 }, { "epoch": 0.8642462968341562, "grad_norm": 0.45494914054870605, "learning_rate": 0.0001, "loss": 1.5219, "step": 7439 }, { "epoch": 0.8643624745861168, "grad_norm": 0.4635239839553833, "learning_rate": 0.0001, "loss": 1.7132, "step": 7440 }, { "epoch": 0.8644786523380773, "grad_norm": 0.4838850200176239, "learning_rate": 0.0001, "loss": 1.453, "step": 7441 }, { "epoch": 0.8645948300900378, "grad_norm": 0.48782113194465637, "learning_rate": 0.0001, "loss": 1.8607, "step": 7442 }, { "epoch": 0.8647110078419983, "grad_norm": 0.4964597225189209, "learning_rate": 0.0001, "loss": 1.6444, "step": 7443 }, { "epoch": 0.8648271855939588, "grad_norm": 0.4533204734325409, "learning_rate": 0.0001, "loss": 1.4895, "step": 7444 }, { "epoch": 0.8649433633459193, "grad_norm": 0.4995560348033905, "learning_rate": 0.0001, "loss": 1.5984, "step": 7445 }, { "epoch": 0.8650595410978797, "grad_norm": 0.48218008875846863, "learning_rate": 0.0001, "loss": 1.5235, "step": 7446 }, { "epoch": 0.8651757188498402, "grad_norm": 0.4977954626083374, "learning_rate": 0.0001, "loss": 1.6671, "step": 7447 }, { "epoch": 0.8652918966018007, "grad_norm": 0.4879086911678314, "learning_rate": 0.0001, "loss": 1.4763, "step": 7448 }, { "epoch": 0.8654080743537612, "grad_norm": 0.4788166880607605, "learning_rate": 0.0001, "loss": 1.615, "step": 7449 }, { "epoch": 0.8655242521057217, "grad_norm": 0.5052685737609863, "learning_rate": 0.0001, "loss": 1.6799, "step": 7450 }, { "epoch": 0.8656404298576823, "grad_norm": 0.48076626658439636, "learning_rate": 0.0001, "loss": 1.5764, "step": 7451 }, { "epoch": 0.8657566076096428, "grad_norm": 0.481800377368927, "learning_rate": 0.0001, "loss": 1.4718, "step": 7452 }, { "epoch": 0.8658727853616033, "grad_norm": 0.4728825092315674, "learning_rate": 0.0001, "loss": 1.591, "step": 7453 }, { "epoch": 0.8659889631135638, "grad_norm": 0.4901738464832306, "learning_rate": 0.0001, "loss": 1.6488, "step": 7454 }, { "epoch": 0.8661051408655243, "grad_norm": 0.47152256965637207, "learning_rate": 0.0001, "loss": 1.615, "step": 7455 }, { "epoch": 0.8662213186174847, "grad_norm": 0.4627699851989746, "learning_rate": 0.0001, "loss": 1.5407, "step": 7456 }, { "epoch": 0.8663374963694452, "grad_norm": 0.47442302107810974, "learning_rate": 0.0001, "loss": 1.6379, "step": 7457 }, { "epoch": 0.8664536741214057, "grad_norm": 0.45434120297431946, "learning_rate": 0.0001, "loss": 1.6515, "step": 7458 }, { "epoch": 0.8665698518733662, "grad_norm": 0.5147332549095154, "learning_rate": 0.0001, "loss": 1.6957, "step": 7459 }, { "epoch": 0.8666860296253267, "grad_norm": 0.4700101315975189, "learning_rate": 0.0001, "loss": 1.502, "step": 7460 }, { "epoch": 0.8668022073772873, "grad_norm": 0.5192029476165771, "learning_rate": 0.0001, "loss": 1.562, "step": 7461 }, { "epoch": 0.8669183851292478, "grad_norm": 0.44885125756263733, "learning_rate": 0.0001, "loss": 1.6334, "step": 7462 }, { "epoch": 0.8670345628812083, "grad_norm": 0.45993509888648987, "learning_rate": 0.0001, "loss": 1.5642, "step": 7463 }, { "epoch": 0.8671507406331688, "grad_norm": 0.46040210127830505, "learning_rate": 0.0001, "loss": 1.3868, "step": 7464 }, { "epoch": 0.8672669183851293, "grad_norm": 0.47678741812705994, "learning_rate": 0.0001, "loss": 1.6829, "step": 7465 }, { "epoch": 0.8673830961370897, "grad_norm": 0.4918549358844757, "learning_rate": 0.0001, "loss": 1.6523, "step": 7466 }, { "epoch": 0.8674992738890502, "grad_norm": 0.45147234201431274, "learning_rate": 0.0001, "loss": 1.3795, "step": 7467 }, { "epoch": 0.8676154516410107, "grad_norm": 0.4544316530227661, "learning_rate": 0.0001, "loss": 1.475, "step": 7468 }, { "epoch": 0.8677316293929712, "grad_norm": 0.4829142987728119, "learning_rate": 0.0001, "loss": 1.6479, "step": 7469 }, { "epoch": 0.8678478071449317, "grad_norm": 0.5195518136024475, "learning_rate": 0.0001, "loss": 1.5526, "step": 7470 }, { "epoch": 0.8679639848968922, "grad_norm": 0.48266440629959106, "learning_rate": 0.0001, "loss": 1.6076, "step": 7471 }, { "epoch": 0.8680801626488528, "grad_norm": 0.5128846764564514, "learning_rate": 0.0001, "loss": 1.5199, "step": 7472 }, { "epoch": 0.8681963404008133, "grad_norm": 0.48383504152297974, "learning_rate": 0.0001, "loss": 1.6736, "step": 7473 }, { "epoch": 0.8683125181527738, "grad_norm": 0.49934422969818115, "learning_rate": 0.0001, "loss": 1.7374, "step": 7474 }, { "epoch": 0.8684286959047343, "grad_norm": 0.4719254672527313, "learning_rate": 0.0001, "loss": 1.5179, "step": 7475 }, { "epoch": 0.8685448736566947, "grad_norm": 0.45778128504753113, "learning_rate": 0.0001, "loss": 1.4926, "step": 7476 }, { "epoch": 0.8686610514086552, "grad_norm": 0.4514407813549042, "learning_rate": 0.0001, "loss": 1.6567, "step": 7477 }, { "epoch": 0.8687772291606157, "grad_norm": 0.4867977201938629, "learning_rate": 0.0001, "loss": 1.6724, "step": 7478 }, { "epoch": 0.8688934069125762, "grad_norm": 0.4617425799369812, "learning_rate": 0.0001, "loss": 1.684, "step": 7479 }, { "epoch": 0.8690095846645367, "grad_norm": 0.44265344738960266, "learning_rate": 0.0001, "loss": 1.4586, "step": 7480 }, { "epoch": 0.8691257624164972, "grad_norm": 0.4775390625, "learning_rate": 0.0001, "loss": 1.6254, "step": 7481 }, { "epoch": 0.8692419401684578, "grad_norm": 0.5168774127960205, "learning_rate": 0.0001, "loss": 1.7173, "step": 7482 }, { "epoch": 0.8693581179204183, "grad_norm": 0.48702213168144226, "learning_rate": 0.0001, "loss": 1.7377, "step": 7483 }, { "epoch": 0.8694742956723788, "grad_norm": 0.47748100757598877, "learning_rate": 0.0001, "loss": 1.7014, "step": 7484 }, { "epoch": 0.8695904734243393, "grad_norm": 0.45373064279556274, "learning_rate": 0.0001, "loss": 1.5932, "step": 7485 }, { "epoch": 0.8697066511762997, "grad_norm": 0.4954831898212433, "learning_rate": 0.0001, "loss": 1.6765, "step": 7486 }, { "epoch": 0.8698228289282602, "grad_norm": 0.4588451683521271, "learning_rate": 0.0001, "loss": 1.3947, "step": 7487 }, { "epoch": 0.8699390066802207, "grad_norm": 0.4915754199028015, "learning_rate": 0.0001, "loss": 1.6084, "step": 7488 }, { "epoch": 0.8700551844321812, "grad_norm": 0.4902786612510681, "learning_rate": 0.0001, "loss": 1.4462, "step": 7489 }, { "epoch": 0.8701713621841417, "grad_norm": 0.4758920967578888, "learning_rate": 0.0001, "loss": 1.584, "step": 7490 }, { "epoch": 0.8702875399361022, "grad_norm": 0.4647005796432495, "learning_rate": 0.0001, "loss": 1.6616, "step": 7491 }, { "epoch": 0.8704037176880627, "grad_norm": 0.4310697615146637, "learning_rate": 0.0001, "loss": 1.4565, "step": 7492 }, { "epoch": 0.8705198954400233, "grad_norm": 0.4658608138561249, "learning_rate": 0.0001, "loss": 1.6598, "step": 7493 }, { "epoch": 0.8706360731919838, "grad_norm": 0.5647340416908264, "learning_rate": 0.0001, "loss": 1.7218, "step": 7494 }, { "epoch": 0.8707522509439443, "grad_norm": 0.4370400607585907, "learning_rate": 0.0001, "loss": 1.5661, "step": 7495 }, { "epoch": 0.8708684286959048, "grad_norm": 0.5290013551712036, "learning_rate": 0.0001, "loss": 1.7937, "step": 7496 }, { "epoch": 0.8709846064478652, "grad_norm": 0.49788692593574524, "learning_rate": 0.0001, "loss": 1.5633, "step": 7497 }, { "epoch": 0.8711007841998257, "grad_norm": 0.5271779894828796, "learning_rate": 0.0001, "loss": 1.4642, "step": 7498 }, { "epoch": 0.8712169619517862, "grad_norm": 0.5094662308692932, "learning_rate": 0.0001, "loss": 1.766, "step": 7499 }, { "epoch": 0.8713331397037467, "grad_norm": 0.45193493366241455, "learning_rate": 0.0001, "loss": 1.5286, "step": 7500 }, { "epoch": 0.8714493174557072, "grad_norm": 0.4743613600730896, "learning_rate": 0.0001, "loss": 1.5546, "step": 7501 }, { "epoch": 0.8715654952076677, "grad_norm": 0.5035197734832764, "learning_rate": 0.0001, "loss": 1.5991, "step": 7502 }, { "epoch": 0.8716816729596283, "grad_norm": 0.4376752972602844, "learning_rate": 0.0001, "loss": 1.4405, "step": 7503 }, { "epoch": 0.8717978507115888, "grad_norm": 0.5224100947380066, "learning_rate": 0.0001, "loss": 1.7625, "step": 7504 }, { "epoch": 0.8719140284635493, "grad_norm": 0.44658803939819336, "learning_rate": 0.0001, "loss": 1.4969, "step": 7505 }, { "epoch": 0.8720302062155098, "grad_norm": 0.44167301058769226, "learning_rate": 0.0001, "loss": 1.6027, "step": 7506 }, { "epoch": 0.8721463839674702, "grad_norm": 0.45513877272605896, "learning_rate": 0.0001, "loss": 1.6441, "step": 7507 }, { "epoch": 0.8722625617194307, "grad_norm": 0.5717625617980957, "learning_rate": 0.0001, "loss": 1.7618, "step": 7508 }, { "epoch": 0.8723787394713912, "grad_norm": 0.4953731298446655, "learning_rate": 0.0001, "loss": 1.7237, "step": 7509 }, { "epoch": 0.8724949172233517, "grad_norm": 0.49298036098480225, "learning_rate": 0.0001, "loss": 1.532, "step": 7510 }, { "epoch": 0.8726110949753122, "grad_norm": 0.4598557651042938, "learning_rate": 0.0001, "loss": 1.5216, "step": 7511 }, { "epoch": 0.8727272727272727, "grad_norm": 0.46120190620422363, "learning_rate": 0.0001, "loss": 1.587, "step": 7512 }, { "epoch": 0.8728434504792332, "grad_norm": 0.48204609751701355, "learning_rate": 0.0001, "loss": 1.669, "step": 7513 }, { "epoch": 0.8729596282311938, "grad_norm": 0.5170610547065735, "learning_rate": 0.0001, "loss": 1.7495, "step": 7514 }, { "epoch": 0.8730758059831543, "grad_norm": 0.47179120779037476, "learning_rate": 0.0001, "loss": 1.4475, "step": 7515 }, { "epoch": 0.8731919837351148, "grad_norm": 0.5068784952163696, "learning_rate": 0.0001, "loss": 1.5792, "step": 7516 }, { "epoch": 0.8733081614870752, "grad_norm": 0.46290868520736694, "learning_rate": 0.0001, "loss": 1.54, "step": 7517 }, { "epoch": 0.8734243392390357, "grad_norm": 0.45205244421958923, "learning_rate": 0.0001, "loss": 1.3574, "step": 7518 }, { "epoch": 0.8735405169909962, "grad_norm": 0.934231162071228, "learning_rate": 0.0001, "loss": 1.5797, "step": 7519 }, { "epoch": 0.8736566947429567, "grad_norm": 0.45129621028900146, "learning_rate": 0.0001, "loss": 1.4467, "step": 7520 }, { "epoch": 0.8737728724949172, "grad_norm": 0.456668496131897, "learning_rate": 0.0001, "loss": 1.604, "step": 7521 }, { "epoch": 0.8738890502468777, "grad_norm": 0.5230050086975098, "learning_rate": 0.0001, "loss": 1.6754, "step": 7522 }, { "epoch": 0.8740052279988382, "grad_norm": 0.47268834710121155, "learning_rate": 0.0001, "loss": 1.6878, "step": 7523 }, { "epoch": 0.8741214057507988, "grad_norm": 0.5117980241775513, "learning_rate": 0.0001, "loss": 1.5417, "step": 7524 }, { "epoch": 0.8742375835027593, "grad_norm": 0.44424542784690857, "learning_rate": 0.0001, "loss": 1.6092, "step": 7525 }, { "epoch": 0.8743537612547198, "grad_norm": 0.479316771030426, "learning_rate": 0.0001, "loss": 1.7161, "step": 7526 }, { "epoch": 0.8744699390066802, "grad_norm": 0.442326158285141, "learning_rate": 0.0001, "loss": 1.5868, "step": 7527 }, { "epoch": 0.8745861167586407, "grad_norm": 0.4649803936481476, "learning_rate": 0.0001, "loss": 1.5859, "step": 7528 }, { "epoch": 0.8747022945106012, "grad_norm": 0.41753268241882324, "learning_rate": 0.0001, "loss": 1.4568, "step": 7529 }, { "epoch": 0.8748184722625617, "grad_norm": 0.5000750422477722, "learning_rate": 0.0001, "loss": 1.6933, "step": 7530 }, { "epoch": 0.8749346500145222, "grad_norm": 0.4585053026676178, "learning_rate": 0.0001, "loss": 1.4732, "step": 7531 }, { "epoch": 0.8750508277664827, "grad_norm": 0.48473891615867615, "learning_rate": 0.0001, "loss": 1.506, "step": 7532 }, { "epoch": 0.8751670055184432, "grad_norm": 0.4806990325450897, "learning_rate": 0.0001, "loss": 1.6123, "step": 7533 }, { "epoch": 0.8752831832704037, "grad_norm": 0.4711168110370636, "learning_rate": 0.0001, "loss": 1.6303, "step": 7534 }, { "epoch": 0.8753993610223643, "grad_norm": 0.4625207483768463, "learning_rate": 0.0001, "loss": 1.5046, "step": 7535 }, { "epoch": 0.8755155387743248, "grad_norm": 0.47735491394996643, "learning_rate": 0.0001, "loss": 1.5856, "step": 7536 }, { "epoch": 0.8756317165262852, "grad_norm": 0.47820740938186646, "learning_rate": 0.0001, "loss": 1.5657, "step": 7537 }, { "epoch": 0.8757478942782457, "grad_norm": 0.43879976868629456, "learning_rate": 0.0001, "loss": 1.4218, "step": 7538 }, { "epoch": 0.8758640720302062, "grad_norm": 0.5144027471542358, "learning_rate": 0.0001, "loss": 1.6866, "step": 7539 }, { "epoch": 0.8759802497821667, "grad_norm": 0.5422682166099548, "learning_rate": 0.0001, "loss": 1.765, "step": 7540 }, { "epoch": 0.8760964275341272, "grad_norm": 0.49460479617118835, "learning_rate": 0.0001, "loss": 1.5586, "step": 7541 }, { "epoch": 0.8762126052860877, "grad_norm": 0.4671952426433563, "learning_rate": 0.0001, "loss": 1.6071, "step": 7542 }, { "epoch": 0.8763287830380482, "grad_norm": 0.4591098725795746, "learning_rate": 0.0001, "loss": 1.5281, "step": 7543 }, { "epoch": 0.8764449607900087, "grad_norm": 0.49451717734336853, "learning_rate": 0.0001, "loss": 1.6752, "step": 7544 }, { "epoch": 0.8765611385419693, "grad_norm": 0.48347243666648865, "learning_rate": 0.0001, "loss": 1.5652, "step": 7545 }, { "epoch": 0.8766773162939298, "grad_norm": 0.4897051453590393, "learning_rate": 0.0001, "loss": 1.6112, "step": 7546 }, { "epoch": 0.8767934940458902, "grad_norm": 0.45810526609420776, "learning_rate": 0.0001, "loss": 1.5242, "step": 7547 }, { "epoch": 0.8769096717978507, "grad_norm": 0.4647473394870758, "learning_rate": 0.0001, "loss": 1.5803, "step": 7548 }, { "epoch": 0.8770258495498112, "grad_norm": 0.4482647776603699, "learning_rate": 0.0001, "loss": 1.5122, "step": 7549 }, { "epoch": 0.8771420273017717, "grad_norm": 0.44632047414779663, "learning_rate": 0.0001, "loss": 1.4941, "step": 7550 }, { "epoch": 0.8772582050537322, "grad_norm": 0.5201831459999084, "learning_rate": 0.0001, "loss": 1.6024, "step": 7551 }, { "epoch": 0.8773743828056927, "grad_norm": 0.46722298860549927, "learning_rate": 0.0001, "loss": 1.7115, "step": 7552 }, { "epoch": 0.8774905605576532, "grad_norm": 0.5357077121734619, "learning_rate": 0.0001, "loss": 1.7269, "step": 7553 }, { "epoch": 0.8776067383096137, "grad_norm": 0.481431782245636, "learning_rate": 0.0001, "loss": 1.5137, "step": 7554 }, { "epoch": 0.8777229160615742, "grad_norm": 0.4767739474773407, "learning_rate": 0.0001, "loss": 1.5876, "step": 7555 }, { "epoch": 0.8778390938135348, "grad_norm": 0.4820916950702667, "learning_rate": 0.0001, "loss": 1.3223, "step": 7556 }, { "epoch": 0.8779552715654952, "grad_norm": 0.49660539627075195, "learning_rate": 0.0001, "loss": 1.5837, "step": 7557 }, { "epoch": 0.8780714493174557, "grad_norm": 0.4909195303916931, "learning_rate": 0.0001, "loss": 1.6346, "step": 7558 }, { "epoch": 0.8781876270694162, "grad_norm": 0.47243842482566833, "learning_rate": 0.0001, "loss": 1.5658, "step": 7559 }, { "epoch": 0.8783038048213767, "grad_norm": 0.501683235168457, "learning_rate": 0.0001, "loss": 1.7332, "step": 7560 }, { "epoch": 0.8784199825733372, "grad_norm": 0.4446217119693756, "learning_rate": 0.0001, "loss": 1.4357, "step": 7561 }, { "epoch": 0.8785361603252977, "grad_norm": 0.5229566693305969, "learning_rate": 0.0001, "loss": 1.7222, "step": 7562 }, { "epoch": 0.8786523380772582, "grad_norm": 0.479064404964447, "learning_rate": 0.0001, "loss": 1.5351, "step": 7563 }, { "epoch": 0.8787685158292187, "grad_norm": 0.4563969075679779, "learning_rate": 0.0001, "loss": 1.5799, "step": 7564 }, { "epoch": 0.8788846935811792, "grad_norm": 0.47300323843955994, "learning_rate": 0.0001, "loss": 1.4887, "step": 7565 }, { "epoch": 0.8790008713331398, "grad_norm": 0.48597297072410583, "learning_rate": 0.0001, "loss": 1.5567, "step": 7566 }, { "epoch": 0.8791170490851002, "grad_norm": 0.4700767695903778, "learning_rate": 0.0001, "loss": 1.554, "step": 7567 }, { "epoch": 0.8792332268370607, "grad_norm": 0.50390625, "learning_rate": 0.0001, "loss": 1.7064, "step": 7568 }, { "epoch": 0.8793494045890212, "grad_norm": 0.4735111892223358, "learning_rate": 0.0001, "loss": 1.6123, "step": 7569 }, { "epoch": 0.8794655823409817, "grad_norm": 0.43248268961906433, "learning_rate": 0.0001, "loss": 1.4049, "step": 7570 }, { "epoch": 0.8795817600929422, "grad_norm": 0.4565393328666687, "learning_rate": 0.0001, "loss": 1.4616, "step": 7571 }, { "epoch": 0.8796979378449027, "grad_norm": 0.46394291520118713, "learning_rate": 0.0001, "loss": 1.4404, "step": 7572 }, { "epoch": 0.8798141155968632, "grad_norm": 0.47517409920692444, "learning_rate": 0.0001, "loss": 1.6117, "step": 7573 }, { "epoch": 0.8799302933488237, "grad_norm": 0.515457272529602, "learning_rate": 0.0001, "loss": 1.6547, "step": 7574 }, { "epoch": 0.8800464711007842, "grad_norm": 0.4533648192882538, "learning_rate": 0.0001, "loss": 1.5703, "step": 7575 }, { "epoch": 0.8801626488527446, "grad_norm": 0.4539215862751007, "learning_rate": 0.0001, "loss": 1.504, "step": 7576 }, { "epoch": 0.8802788266047052, "grad_norm": 0.5214062333106995, "learning_rate": 0.0001, "loss": 1.5462, "step": 7577 }, { "epoch": 0.8803950043566657, "grad_norm": 0.4697798192501068, "learning_rate": 0.0001, "loss": 1.5674, "step": 7578 }, { "epoch": 0.8805111821086262, "grad_norm": 0.4855616092681885, "learning_rate": 0.0001, "loss": 1.5567, "step": 7579 }, { "epoch": 0.8806273598605867, "grad_norm": 0.49214091897010803, "learning_rate": 0.0001, "loss": 1.6882, "step": 7580 }, { "epoch": 0.8807435376125472, "grad_norm": 0.5328443646430969, "learning_rate": 0.0001, "loss": 1.7461, "step": 7581 }, { "epoch": 0.8808597153645077, "grad_norm": 0.5216013789176941, "learning_rate": 0.0001, "loss": 1.8298, "step": 7582 }, { "epoch": 0.8809758931164682, "grad_norm": 0.46522417664527893, "learning_rate": 0.0001, "loss": 1.5963, "step": 7583 }, { "epoch": 0.8810920708684287, "grad_norm": 0.4373832046985626, "learning_rate": 0.0001, "loss": 1.5107, "step": 7584 }, { "epoch": 0.8812082486203892, "grad_norm": 0.49534451961517334, "learning_rate": 0.0001, "loss": 1.7228, "step": 7585 }, { "epoch": 0.8813244263723496, "grad_norm": 0.42766979336738586, "learning_rate": 0.0001, "loss": 1.3932, "step": 7586 }, { "epoch": 0.8814406041243102, "grad_norm": 0.47507309913635254, "learning_rate": 0.0001, "loss": 1.6604, "step": 7587 }, { "epoch": 0.8815567818762707, "grad_norm": 0.4551963210105896, "learning_rate": 0.0001, "loss": 1.4789, "step": 7588 }, { "epoch": 0.8816729596282312, "grad_norm": 0.46101507544517517, "learning_rate": 0.0001, "loss": 1.7364, "step": 7589 }, { "epoch": 0.8817891373801917, "grad_norm": 0.45468607544898987, "learning_rate": 0.0001, "loss": 1.3932, "step": 7590 }, { "epoch": 0.8819053151321522, "grad_norm": 0.4612409174442291, "learning_rate": 0.0001, "loss": 1.592, "step": 7591 }, { "epoch": 0.8820214928841127, "grad_norm": 0.4820188581943512, "learning_rate": 0.0001, "loss": 1.6734, "step": 7592 }, { "epoch": 0.8821376706360732, "grad_norm": 0.49493300914764404, "learning_rate": 0.0001, "loss": 1.5493, "step": 7593 }, { "epoch": 0.8822538483880337, "grad_norm": 0.45542287826538086, "learning_rate": 0.0001, "loss": 1.5775, "step": 7594 }, { "epoch": 0.8823700261399942, "grad_norm": 0.47837620973587036, "learning_rate": 0.0001, "loss": 1.6641, "step": 7595 }, { "epoch": 0.8824862038919546, "grad_norm": 0.45609262585639954, "learning_rate": 0.0001, "loss": 1.4127, "step": 7596 }, { "epoch": 0.8826023816439151, "grad_norm": 0.46420490741729736, "learning_rate": 0.0001, "loss": 1.56, "step": 7597 }, { "epoch": 0.8827185593958757, "grad_norm": 0.48322582244873047, "learning_rate": 0.0001, "loss": 1.5809, "step": 7598 }, { "epoch": 0.8828347371478362, "grad_norm": 0.4640806317329407, "learning_rate": 0.0001, "loss": 1.5535, "step": 7599 }, { "epoch": 0.8829509148997967, "grad_norm": 0.47058844566345215, "learning_rate": 0.0001, "loss": 1.6128, "step": 7600 }, { "epoch": 0.8830670926517572, "grad_norm": 0.5269159078598022, "learning_rate": 0.0001, "loss": 1.7702, "step": 7601 }, { "epoch": 0.8831832704037177, "grad_norm": 0.4986213445663452, "learning_rate": 0.0001, "loss": 1.6251, "step": 7602 }, { "epoch": 0.8832994481556782, "grad_norm": 0.4375273585319519, "learning_rate": 0.0001, "loss": 1.4266, "step": 7603 }, { "epoch": 0.8834156259076387, "grad_norm": 0.45944470167160034, "learning_rate": 0.0001, "loss": 1.6029, "step": 7604 }, { "epoch": 0.8835318036595992, "grad_norm": 0.4776669442653656, "learning_rate": 0.0001, "loss": 1.6147, "step": 7605 }, { "epoch": 0.8836479814115596, "grad_norm": 0.47542616724967957, "learning_rate": 0.0001, "loss": 1.5673, "step": 7606 }, { "epoch": 0.8837641591635201, "grad_norm": 0.4733422100543976, "learning_rate": 0.0001, "loss": 1.728, "step": 7607 }, { "epoch": 0.8838803369154807, "grad_norm": 0.47752857208251953, "learning_rate": 0.0001, "loss": 1.4314, "step": 7608 }, { "epoch": 0.8839965146674412, "grad_norm": 0.4597974121570587, "learning_rate": 0.0001, "loss": 1.4891, "step": 7609 }, { "epoch": 0.8841126924194017, "grad_norm": 0.46217969059944153, "learning_rate": 0.0001, "loss": 1.4955, "step": 7610 }, { "epoch": 0.8842288701713622, "grad_norm": 0.5063923001289368, "learning_rate": 0.0001, "loss": 1.6053, "step": 7611 }, { "epoch": 0.8843450479233227, "grad_norm": 0.4687163233757019, "learning_rate": 0.0001, "loss": 1.6046, "step": 7612 }, { "epoch": 0.8844612256752832, "grad_norm": 0.45635896921157837, "learning_rate": 0.0001, "loss": 1.4355, "step": 7613 }, { "epoch": 0.8845774034272437, "grad_norm": 0.46440935134887695, "learning_rate": 0.0001, "loss": 1.5972, "step": 7614 }, { "epoch": 0.8846935811792042, "grad_norm": 0.48333290219306946, "learning_rate": 0.0001, "loss": 1.615, "step": 7615 }, { "epoch": 0.8848097589311646, "grad_norm": 0.4907698929309845, "learning_rate": 0.0001, "loss": 1.6455, "step": 7616 }, { "epoch": 0.8849259366831251, "grad_norm": 0.48579859733581543, "learning_rate": 0.0001, "loss": 1.7002, "step": 7617 }, { "epoch": 0.8850421144350857, "grad_norm": 0.46631836891174316, "learning_rate": 0.0001, "loss": 1.5675, "step": 7618 }, { "epoch": 0.8851582921870462, "grad_norm": 0.47939544916152954, "learning_rate": 0.0001, "loss": 1.6185, "step": 7619 }, { "epoch": 0.8852744699390067, "grad_norm": 0.4607776701450348, "learning_rate": 0.0001, "loss": 1.5851, "step": 7620 }, { "epoch": 0.8853906476909672, "grad_norm": 0.47920143604278564, "learning_rate": 0.0001, "loss": 1.7053, "step": 7621 }, { "epoch": 0.8855068254429277, "grad_norm": 0.48284971714019775, "learning_rate": 0.0001, "loss": 1.6131, "step": 7622 }, { "epoch": 0.8856230031948882, "grad_norm": 0.4558715224266052, "learning_rate": 0.0001, "loss": 1.5614, "step": 7623 }, { "epoch": 0.8857391809468487, "grad_norm": 0.47470736503601074, "learning_rate": 0.0001, "loss": 1.669, "step": 7624 }, { "epoch": 0.8858553586988092, "grad_norm": 0.4660652279853821, "learning_rate": 0.0001, "loss": 1.5647, "step": 7625 }, { "epoch": 0.8859715364507696, "grad_norm": 0.5015971064567566, "learning_rate": 0.0001, "loss": 1.8973, "step": 7626 }, { "epoch": 0.8860877142027301, "grad_norm": 0.48925477266311646, "learning_rate": 0.0001, "loss": 1.612, "step": 7627 }, { "epoch": 0.8862038919546906, "grad_norm": 0.4653438925743103, "learning_rate": 0.0001, "loss": 1.5789, "step": 7628 }, { "epoch": 0.8863200697066512, "grad_norm": 0.4613575041294098, "learning_rate": 0.0001, "loss": 1.5857, "step": 7629 }, { "epoch": 0.8864362474586117, "grad_norm": 0.4613388776779175, "learning_rate": 0.0001, "loss": 1.7134, "step": 7630 }, { "epoch": 0.8865524252105722, "grad_norm": 0.4807787835597992, "learning_rate": 0.0001, "loss": 1.6487, "step": 7631 }, { "epoch": 0.8866686029625327, "grad_norm": 0.4956919252872467, "learning_rate": 0.0001, "loss": 1.5629, "step": 7632 }, { "epoch": 0.8867847807144932, "grad_norm": 0.4471133053302765, "learning_rate": 0.0001, "loss": 1.5413, "step": 7633 }, { "epoch": 0.8869009584664537, "grad_norm": 0.42097005248069763, "learning_rate": 0.0001, "loss": 1.4112, "step": 7634 }, { "epoch": 0.8870171362184142, "grad_norm": 0.45755913853645325, "learning_rate": 0.0001, "loss": 1.5275, "step": 7635 }, { "epoch": 0.8871333139703746, "grad_norm": 0.46528616547584534, "learning_rate": 0.0001, "loss": 1.5626, "step": 7636 }, { "epoch": 0.8872494917223351, "grad_norm": 0.44611406326293945, "learning_rate": 0.0001, "loss": 1.4773, "step": 7637 }, { "epoch": 0.8873656694742956, "grad_norm": 0.4587550759315491, "learning_rate": 0.0001, "loss": 1.6289, "step": 7638 }, { "epoch": 0.8874818472262562, "grad_norm": 0.48842063546180725, "learning_rate": 0.0001, "loss": 1.5901, "step": 7639 }, { "epoch": 0.8875980249782167, "grad_norm": 0.4626847505569458, "learning_rate": 0.0001, "loss": 1.5549, "step": 7640 }, { "epoch": 0.8877142027301772, "grad_norm": 0.45964398980140686, "learning_rate": 0.0001, "loss": 1.5335, "step": 7641 }, { "epoch": 0.8878303804821377, "grad_norm": 0.5278240442276001, "learning_rate": 0.0001, "loss": 1.6286, "step": 7642 }, { "epoch": 0.8879465582340982, "grad_norm": 0.4622969329357147, "learning_rate": 0.0001, "loss": 1.716, "step": 7643 }, { "epoch": 0.8880627359860587, "grad_norm": 0.47453299164772034, "learning_rate": 0.0001, "loss": 1.6328, "step": 7644 }, { "epoch": 0.8881789137380192, "grad_norm": 0.4532757103443146, "learning_rate": 0.0001, "loss": 1.5739, "step": 7645 }, { "epoch": 0.8882950914899796, "grad_norm": 0.47530779242515564, "learning_rate": 0.0001, "loss": 1.5514, "step": 7646 }, { "epoch": 0.8884112692419401, "grad_norm": 0.4514038562774658, "learning_rate": 0.0001, "loss": 1.462, "step": 7647 }, { "epoch": 0.8885274469939006, "grad_norm": 0.48598358035087585, "learning_rate": 0.0001, "loss": 1.6245, "step": 7648 }, { "epoch": 0.8886436247458611, "grad_norm": 0.4912118911743164, "learning_rate": 0.0001, "loss": 1.5128, "step": 7649 }, { "epoch": 0.8887598024978217, "grad_norm": 0.4736858010292053, "learning_rate": 0.0001, "loss": 1.6595, "step": 7650 }, { "epoch": 0.8888759802497822, "grad_norm": 0.4599950611591339, "learning_rate": 0.0001, "loss": 1.5116, "step": 7651 }, { "epoch": 0.8889921580017427, "grad_norm": 0.45858752727508545, "learning_rate": 0.0001, "loss": 1.5929, "step": 7652 }, { "epoch": 0.8891083357537032, "grad_norm": 0.4588874280452728, "learning_rate": 0.0001, "loss": 1.3829, "step": 7653 }, { "epoch": 0.8892245135056637, "grad_norm": 0.4724344313144684, "learning_rate": 0.0001, "loss": 1.6388, "step": 7654 }, { "epoch": 0.8893406912576242, "grad_norm": 0.46815189719200134, "learning_rate": 0.0001, "loss": 1.6232, "step": 7655 }, { "epoch": 0.8894568690095846, "grad_norm": 0.47574499249458313, "learning_rate": 0.0001, "loss": 1.6022, "step": 7656 }, { "epoch": 0.8895730467615451, "grad_norm": 0.5131924152374268, "learning_rate": 0.0001, "loss": 1.7868, "step": 7657 }, { "epoch": 0.8896892245135056, "grad_norm": 0.47800272703170776, "learning_rate": 0.0001, "loss": 1.5755, "step": 7658 }, { "epoch": 0.8898054022654661, "grad_norm": 0.4923827052116394, "learning_rate": 0.0001, "loss": 1.6173, "step": 7659 }, { "epoch": 0.8899215800174267, "grad_norm": 0.4540409743785858, "learning_rate": 0.0001, "loss": 1.4549, "step": 7660 }, { "epoch": 0.8900377577693872, "grad_norm": 0.4510956108570099, "learning_rate": 0.0001, "loss": 1.428, "step": 7661 }, { "epoch": 0.8901539355213477, "grad_norm": 0.47217369079589844, "learning_rate": 0.0001, "loss": 1.6506, "step": 7662 }, { "epoch": 0.8902701132733082, "grad_norm": 0.5065828561782837, "learning_rate": 0.0001, "loss": 1.5586, "step": 7663 }, { "epoch": 0.8903862910252687, "grad_norm": 0.47260862588882446, "learning_rate": 0.0001, "loss": 1.5327, "step": 7664 }, { "epoch": 0.8905024687772292, "grad_norm": 0.4974091947078705, "learning_rate": 0.0001, "loss": 1.6739, "step": 7665 }, { "epoch": 0.8906186465291897, "grad_norm": 0.5020241737365723, "learning_rate": 0.0001, "loss": 1.5533, "step": 7666 }, { "epoch": 0.8907348242811501, "grad_norm": 0.47812047600746155, "learning_rate": 0.0001, "loss": 1.573, "step": 7667 }, { "epoch": 0.8908510020331106, "grad_norm": 0.5125158429145813, "learning_rate": 0.0001, "loss": 1.8338, "step": 7668 }, { "epoch": 0.8909671797850711, "grad_norm": 0.4838445484638214, "learning_rate": 0.0001, "loss": 1.6133, "step": 7669 }, { "epoch": 0.8910833575370316, "grad_norm": 0.4530005156993866, "learning_rate": 0.0001, "loss": 1.5775, "step": 7670 }, { "epoch": 0.8911995352889922, "grad_norm": 0.4553331732749939, "learning_rate": 0.0001, "loss": 1.6472, "step": 7671 }, { "epoch": 0.8913157130409527, "grad_norm": 0.46588602662086487, "learning_rate": 0.0001, "loss": 1.3147, "step": 7672 }, { "epoch": 0.8914318907929132, "grad_norm": 0.46226534247398376, "learning_rate": 0.0001, "loss": 1.4333, "step": 7673 }, { "epoch": 0.8915480685448737, "grad_norm": 0.506314754486084, "learning_rate": 0.0001, "loss": 1.6884, "step": 7674 }, { "epoch": 0.8916642462968342, "grad_norm": 0.49548977613449097, "learning_rate": 0.0001, "loss": 1.7037, "step": 7675 }, { "epoch": 0.8917804240487947, "grad_norm": 0.4775904715061188, "learning_rate": 0.0001, "loss": 1.5836, "step": 7676 }, { "epoch": 0.8918966018007551, "grad_norm": 0.4993993639945984, "learning_rate": 0.0001, "loss": 1.5624, "step": 7677 }, { "epoch": 0.8920127795527156, "grad_norm": 0.49439752101898193, "learning_rate": 0.0001, "loss": 1.555, "step": 7678 }, { "epoch": 0.8921289573046761, "grad_norm": 0.4990589916706085, "learning_rate": 0.0001, "loss": 1.6649, "step": 7679 }, { "epoch": 0.8922451350566366, "grad_norm": 0.4424578547477722, "learning_rate": 0.0001, "loss": 1.2997, "step": 7680 }, { "epoch": 0.8923613128085972, "grad_norm": 0.4604630768299103, "learning_rate": 0.0001, "loss": 1.6151, "step": 7681 }, { "epoch": 0.8924774905605577, "grad_norm": 0.4625977575778961, "learning_rate": 0.0001, "loss": 1.5241, "step": 7682 }, { "epoch": 0.8925936683125182, "grad_norm": 0.4803260266780853, "learning_rate": 0.0001, "loss": 1.6191, "step": 7683 }, { "epoch": 0.8927098460644787, "grad_norm": 0.464473694562912, "learning_rate": 0.0001, "loss": 1.5425, "step": 7684 }, { "epoch": 0.8928260238164392, "grad_norm": 0.44447073340415955, "learning_rate": 0.0001, "loss": 1.441, "step": 7685 }, { "epoch": 0.8929422015683997, "grad_norm": 0.44388943910598755, "learning_rate": 0.0001, "loss": 1.6461, "step": 7686 }, { "epoch": 0.8930583793203601, "grad_norm": 0.5310772657394409, "learning_rate": 0.0001, "loss": 1.6304, "step": 7687 }, { "epoch": 0.8931745570723206, "grad_norm": 0.4848250150680542, "learning_rate": 0.0001, "loss": 1.4879, "step": 7688 }, { "epoch": 0.8932907348242811, "grad_norm": 0.4669688940048218, "learning_rate": 0.0001, "loss": 1.5178, "step": 7689 }, { "epoch": 0.8934069125762416, "grad_norm": 0.47490620613098145, "learning_rate": 0.0001, "loss": 1.5267, "step": 7690 }, { "epoch": 0.8935230903282021, "grad_norm": 0.49228766560554504, "learning_rate": 0.0001, "loss": 1.6218, "step": 7691 }, { "epoch": 0.8936392680801627, "grad_norm": 0.5638328790664673, "learning_rate": 0.0001, "loss": 1.9294, "step": 7692 }, { "epoch": 0.8937554458321232, "grad_norm": 0.48162275552749634, "learning_rate": 0.0001, "loss": 1.5827, "step": 7693 }, { "epoch": 0.8938716235840837, "grad_norm": 0.4998082220554352, "learning_rate": 0.0001, "loss": 1.6612, "step": 7694 }, { "epoch": 0.8939878013360442, "grad_norm": 0.4761600196361542, "learning_rate": 0.0001, "loss": 1.5412, "step": 7695 }, { "epoch": 0.8941039790880047, "grad_norm": 0.4868890345096588, "learning_rate": 0.0001, "loss": 1.6922, "step": 7696 }, { "epoch": 0.8942201568399651, "grad_norm": 0.4814538061618805, "learning_rate": 0.0001, "loss": 1.5334, "step": 7697 }, { "epoch": 0.8943363345919256, "grad_norm": 0.46864473819732666, "learning_rate": 0.0001, "loss": 1.7473, "step": 7698 }, { "epoch": 0.8944525123438861, "grad_norm": 0.514991044998169, "learning_rate": 0.0001, "loss": 1.6549, "step": 7699 }, { "epoch": 0.8945686900958466, "grad_norm": 0.4469473361968994, "learning_rate": 0.0001, "loss": 1.5397, "step": 7700 }, { "epoch": 0.8946848678478071, "grad_norm": 0.42899852991104126, "learning_rate": 0.0001, "loss": 1.4535, "step": 7701 }, { "epoch": 0.8948010455997677, "grad_norm": 0.44255563616752625, "learning_rate": 0.0001, "loss": 1.4856, "step": 7702 }, { "epoch": 0.8949172233517282, "grad_norm": 0.4918566942214966, "learning_rate": 0.0001, "loss": 1.578, "step": 7703 }, { "epoch": 0.8950334011036887, "grad_norm": 0.506798505783081, "learning_rate": 0.0001, "loss": 1.6809, "step": 7704 }, { "epoch": 0.8951495788556492, "grad_norm": 0.4588955044746399, "learning_rate": 0.0001, "loss": 1.5369, "step": 7705 }, { "epoch": 0.8952657566076097, "grad_norm": 0.4813918173313141, "learning_rate": 0.0001, "loss": 1.5812, "step": 7706 }, { "epoch": 0.8953819343595701, "grad_norm": 0.4796220064163208, "learning_rate": 0.0001, "loss": 1.5712, "step": 7707 }, { "epoch": 0.8954981121115306, "grad_norm": 0.5280551910400391, "learning_rate": 0.0001, "loss": 1.7209, "step": 7708 }, { "epoch": 0.8956142898634911, "grad_norm": 0.5038082599639893, "learning_rate": 0.0001, "loss": 1.6893, "step": 7709 }, { "epoch": 0.8957304676154516, "grad_norm": 0.45545947551727295, "learning_rate": 0.0001, "loss": 1.4836, "step": 7710 }, { "epoch": 0.8958466453674121, "grad_norm": 0.5030218958854675, "learning_rate": 0.0001, "loss": 1.6816, "step": 7711 }, { "epoch": 0.8959628231193726, "grad_norm": 0.4473370909690857, "learning_rate": 0.0001, "loss": 1.5029, "step": 7712 }, { "epoch": 0.8960790008713332, "grad_norm": 0.7040799856185913, "learning_rate": 0.0001, "loss": 1.4743, "step": 7713 }, { "epoch": 0.8961951786232937, "grad_norm": 0.46077761054039, "learning_rate": 0.0001, "loss": 1.6841, "step": 7714 }, { "epoch": 0.8963113563752542, "grad_norm": 0.44979485869407654, "learning_rate": 0.0001, "loss": 1.5214, "step": 7715 }, { "epoch": 0.8964275341272147, "grad_norm": 0.46903368830680847, "learning_rate": 0.0001, "loss": 1.5326, "step": 7716 }, { "epoch": 0.8965437118791751, "grad_norm": 0.4809918701648712, "learning_rate": 0.0001, "loss": 1.6342, "step": 7717 }, { "epoch": 0.8966598896311356, "grad_norm": 0.4725591838359833, "learning_rate": 0.0001, "loss": 1.5145, "step": 7718 }, { "epoch": 0.8967760673830961, "grad_norm": 0.49973586201667786, "learning_rate": 0.0001, "loss": 1.783, "step": 7719 }, { "epoch": 0.8968922451350566, "grad_norm": 0.4993938207626343, "learning_rate": 0.0001, "loss": 1.4557, "step": 7720 }, { "epoch": 0.8970084228870171, "grad_norm": 0.5148173570632935, "learning_rate": 0.0001, "loss": 1.655, "step": 7721 }, { "epoch": 0.8971246006389776, "grad_norm": 0.49831125140190125, "learning_rate": 0.0001, "loss": 1.5482, "step": 7722 }, { "epoch": 0.8972407783909382, "grad_norm": 0.46448323130607605, "learning_rate": 0.0001, "loss": 1.6107, "step": 7723 }, { "epoch": 0.8973569561428987, "grad_norm": 0.47163113951683044, "learning_rate": 0.0001, "loss": 1.6136, "step": 7724 }, { "epoch": 0.8974731338948592, "grad_norm": 0.45095062255859375, "learning_rate": 0.0001, "loss": 1.5348, "step": 7725 }, { "epoch": 0.8975893116468197, "grad_norm": 0.4865681231021881, "learning_rate": 0.0001, "loss": 1.6234, "step": 7726 }, { "epoch": 0.8977054893987801, "grad_norm": 0.4516942501068115, "learning_rate": 0.0001, "loss": 1.6462, "step": 7727 }, { "epoch": 0.8978216671507406, "grad_norm": 0.45870745182037354, "learning_rate": 0.0001, "loss": 1.5657, "step": 7728 }, { "epoch": 0.8979378449027011, "grad_norm": 0.5079413056373596, "learning_rate": 0.0001, "loss": 1.558, "step": 7729 }, { "epoch": 0.8980540226546616, "grad_norm": 0.46579909324645996, "learning_rate": 0.0001, "loss": 1.497, "step": 7730 }, { "epoch": 0.8981702004066221, "grad_norm": 0.46913209557533264, "learning_rate": 0.0001, "loss": 1.6732, "step": 7731 }, { "epoch": 0.8982863781585826, "grad_norm": 0.46891793608665466, "learning_rate": 0.0001, "loss": 1.5816, "step": 7732 }, { "epoch": 0.8984025559105431, "grad_norm": 0.5008355379104614, "learning_rate": 0.0001, "loss": 1.654, "step": 7733 }, { "epoch": 0.8985187336625037, "grad_norm": 0.48343855142593384, "learning_rate": 0.0001, "loss": 1.5644, "step": 7734 }, { "epoch": 0.8986349114144642, "grad_norm": 0.47268950939178467, "learning_rate": 0.0001, "loss": 1.5458, "step": 7735 }, { "epoch": 0.8987510891664247, "grad_norm": 0.5122753381729126, "learning_rate": 0.0001, "loss": 1.7408, "step": 7736 }, { "epoch": 0.8988672669183851, "grad_norm": 0.45394930243492126, "learning_rate": 0.0001, "loss": 1.6283, "step": 7737 }, { "epoch": 0.8989834446703456, "grad_norm": 0.4667399823665619, "learning_rate": 0.0001, "loss": 1.6136, "step": 7738 }, { "epoch": 0.8990996224223061, "grad_norm": 0.5103424191474915, "learning_rate": 0.0001, "loss": 1.7418, "step": 7739 }, { "epoch": 0.8992158001742666, "grad_norm": 0.5049645304679871, "learning_rate": 0.0001, "loss": 1.6137, "step": 7740 }, { "epoch": 0.8993319779262271, "grad_norm": 0.45100435614585876, "learning_rate": 0.0001, "loss": 1.3736, "step": 7741 }, { "epoch": 0.8994481556781876, "grad_norm": 0.4995354115962982, "learning_rate": 0.0001, "loss": 1.5616, "step": 7742 }, { "epoch": 0.8995643334301481, "grad_norm": 0.5154250264167786, "learning_rate": 0.0001, "loss": 1.4851, "step": 7743 }, { "epoch": 0.8996805111821087, "grad_norm": 0.4704994261264801, "learning_rate": 0.0001, "loss": 1.6447, "step": 7744 }, { "epoch": 0.8997966889340692, "grad_norm": 0.4912912845611572, "learning_rate": 0.0001, "loss": 1.6153, "step": 7745 }, { "epoch": 0.8999128666860297, "grad_norm": 0.4594641327857971, "learning_rate": 0.0001, "loss": 1.4078, "step": 7746 }, { "epoch": 0.9000290444379901, "grad_norm": 0.48515209555625916, "learning_rate": 0.0001, "loss": 1.4787, "step": 7747 }, { "epoch": 0.9001452221899506, "grad_norm": 0.4500701129436493, "learning_rate": 0.0001, "loss": 1.5444, "step": 7748 }, { "epoch": 0.9002613999419111, "grad_norm": 0.5031145811080933, "learning_rate": 0.0001, "loss": 1.6389, "step": 7749 }, { "epoch": 0.9003775776938716, "grad_norm": 0.46528854966163635, "learning_rate": 0.0001, "loss": 1.593, "step": 7750 }, { "epoch": 0.9004937554458321, "grad_norm": 0.4735311269760132, "learning_rate": 0.0001, "loss": 1.5872, "step": 7751 }, { "epoch": 0.9006099331977926, "grad_norm": 0.4692305028438568, "learning_rate": 0.0001, "loss": 1.5068, "step": 7752 }, { "epoch": 0.9007261109497531, "grad_norm": 0.48819053173065186, "learning_rate": 0.0001, "loss": 1.6083, "step": 7753 }, { "epoch": 0.9008422887017136, "grad_norm": 0.494018018245697, "learning_rate": 0.0001, "loss": 1.546, "step": 7754 }, { "epoch": 0.9009584664536742, "grad_norm": 0.48794278502464294, "learning_rate": 0.0001, "loss": 1.6929, "step": 7755 }, { "epoch": 0.9010746442056347, "grad_norm": 0.48159360885620117, "learning_rate": 0.0001, "loss": 1.5261, "step": 7756 }, { "epoch": 0.9011908219575951, "grad_norm": 0.5053829550743103, "learning_rate": 0.0001, "loss": 1.5624, "step": 7757 }, { "epoch": 0.9013069997095556, "grad_norm": 0.47055330872535706, "learning_rate": 0.0001, "loss": 1.4752, "step": 7758 }, { "epoch": 0.9014231774615161, "grad_norm": 0.46060261130332947, "learning_rate": 0.0001, "loss": 1.4367, "step": 7759 }, { "epoch": 0.9015393552134766, "grad_norm": 0.46970701217651367, "learning_rate": 0.0001, "loss": 1.4834, "step": 7760 }, { "epoch": 0.9016555329654371, "grad_norm": 0.45572981238365173, "learning_rate": 0.0001, "loss": 1.4527, "step": 7761 }, { "epoch": 0.9017717107173976, "grad_norm": 0.44444963335990906, "learning_rate": 0.0001, "loss": 1.5054, "step": 7762 }, { "epoch": 0.9018878884693581, "grad_norm": 0.43386489152908325, "learning_rate": 0.0001, "loss": 1.3637, "step": 7763 }, { "epoch": 0.9020040662213186, "grad_norm": 0.4735313057899475, "learning_rate": 0.0001, "loss": 1.4992, "step": 7764 }, { "epoch": 0.9021202439732792, "grad_norm": 0.5182105898857117, "learning_rate": 0.0001, "loss": 1.5125, "step": 7765 }, { "epoch": 0.9022364217252397, "grad_norm": 0.5161397457122803, "learning_rate": 0.0001, "loss": 1.5711, "step": 7766 }, { "epoch": 0.9023525994772001, "grad_norm": 0.45563217997550964, "learning_rate": 0.0001, "loss": 1.4512, "step": 7767 }, { "epoch": 0.9024687772291606, "grad_norm": 0.5249543786048889, "learning_rate": 0.0001, "loss": 1.5754, "step": 7768 }, { "epoch": 0.9025849549811211, "grad_norm": 0.49920180439949036, "learning_rate": 0.0001, "loss": 1.7509, "step": 7769 }, { "epoch": 0.9027011327330816, "grad_norm": 0.43166783452033997, "learning_rate": 0.0001, "loss": 1.5645, "step": 7770 }, { "epoch": 0.9028173104850421, "grad_norm": 0.4483865201473236, "learning_rate": 0.0001, "loss": 1.5911, "step": 7771 }, { "epoch": 0.9029334882370026, "grad_norm": 0.43736976385116577, "learning_rate": 0.0001, "loss": 1.5115, "step": 7772 }, { "epoch": 0.9030496659889631, "grad_norm": 0.48580455780029297, "learning_rate": 0.0001, "loss": 1.5472, "step": 7773 }, { "epoch": 0.9031658437409236, "grad_norm": 0.47388365864753723, "learning_rate": 0.0001, "loss": 1.5931, "step": 7774 }, { "epoch": 0.9032820214928841, "grad_norm": 0.49823474884033203, "learning_rate": 0.0001, "loss": 1.6047, "step": 7775 }, { "epoch": 0.9033981992448447, "grad_norm": 0.451608270406723, "learning_rate": 0.0001, "loss": 1.5266, "step": 7776 }, { "epoch": 0.9035143769968051, "grad_norm": 0.4777389168739319, "learning_rate": 0.0001, "loss": 1.7107, "step": 7777 }, { "epoch": 0.9036305547487656, "grad_norm": 0.4792795777320862, "learning_rate": 0.0001, "loss": 1.655, "step": 7778 }, { "epoch": 0.9037467325007261, "grad_norm": 0.4765215814113617, "learning_rate": 0.0001, "loss": 1.5002, "step": 7779 }, { "epoch": 0.9038629102526866, "grad_norm": 0.46686357259750366, "learning_rate": 0.0001, "loss": 1.4847, "step": 7780 }, { "epoch": 0.9039790880046471, "grad_norm": 0.5087867379188538, "learning_rate": 0.0001, "loss": 1.9357, "step": 7781 }, { "epoch": 0.9040952657566076, "grad_norm": 0.46744218468666077, "learning_rate": 0.0001, "loss": 1.5073, "step": 7782 }, { "epoch": 0.9042114435085681, "grad_norm": 0.5227089524269104, "learning_rate": 0.0001, "loss": 1.5938, "step": 7783 }, { "epoch": 0.9043276212605286, "grad_norm": 0.4557849168777466, "learning_rate": 0.0001, "loss": 1.5522, "step": 7784 }, { "epoch": 0.9044437990124891, "grad_norm": 0.4599103033542633, "learning_rate": 0.0001, "loss": 1.6792, "step": 7785 }, { "epoch": 0.9045599767644497, "grad_norm": 0.5397565364837646, "learning_rate": 0.0001, "loss": 1.8295, "step": 7786 }, { "epoch": 0.9046761545164101, "grad_norm": 0.5359669923782349, "learning_rate": 0.0001, "loss": 1.7149, "step": 7787 }, { "epoch": 0.9047923322683706, "grad_norm": 0.49455010890960693, "learning_rate": 0.0001, "loss": 1.7036, "step": 7788 }, { "epoch": 0.9049085100203311, "grad_norm": 0.4927608072757721, "learning_rate": 0.0001, "loss": 1.668, "step": 7789 }, { "epoch": 0.9050246877722916, "grad_norm": 0.45446282625198364, "learning_rate": 0.0001, "loss": 1.5499, "step": 7790 }, { "epoch": 0.9051408655242521, "grad_norm": 0.4715528190135956, "learning_rate": 0.0001, "loss": 1.5217, "step": 7791 }, { "epoch": 0.9052570432762126, "grad_norm": 0.47455599904060364, "learning_rate": 0.0001, "loss": 1.6861, "step": 7792 }, { "epoch": 0.9053732210281731, "grad_norm": 0.4646841585636139, "learning_rate": 0.0001, "loss": 1.5259, "step": 7793 }, { "epoch": 0.9054893987801336, "grad_norm": 0.4721209406852722, "learning_rate": 0.0001, "loss": 1.6885, "step": 7794 }, { "epoch": 0.9056055765320941, "grad_norm": 0.4585552215576172, "learning_rate": 0.0001, "loss": 1.5124, "step": 7795 }, { "epoch": 0.9057217542840547, "grad_norm": 0.49558770656585693, "learning_rate": 0.0001, "loss": 1.6104, "step": 7796 }, { "epoch": 0.9058379320360151, "grad_norm": 0.45324012637138367, "learning_rate": 0.0001, "loss": 1.6386, "step": 7797 }, { "epoch": 0.9059541097879756, "grad_norm": 0.44756069779396057, "learning_rate": 0.0001, "loss": 1.4753, "step": 7798 }, { "epoch": 0.9060702875399361, "grad_norm": 0.48996299505233765, "learning_rate": 0.0001, "loss": 1.8288, "step": 7799 }, { "epoch": 0.9061864652918966, "grad_norm": 0.47252756357192993, "learning_rate": 0.0001, "loss": 1.6035, "step": 7800 }, { "epoch": 0.9063026430438571, "grad_norm": 0.4882994294166565, "learning_rate": 0.0001, "loss": 1.6112, "step": 7801 }, { "epoch": 0.9064188207958176, "grad_norm": 0.49475234746932983, "learning_rate": 0.0001, "loss": 1.7413, "step": 7802 }, { "epoch": 0.9065349985477781, "grad_norm": 0.467546671628952, "learning_rate": 0.0001, "loss": 1.3964, "step": 7803 }, { "epoch": 0.9066511762997386, "grad_norm": 0.47339338064193726, "learning_rate": 0.0001, "loss": 1.6716, "step": 7804 }, { "epoch": 0.9067673540516991, "grad_norm": 0.4243144094944, "learning_rate": 0.0001, "loss": 1.2828, "step": 7805 }, { "epoch": 0.9068835318036595, "grad_norm": 0.4705776572227478, "learning_rate": 0.0001, "loss": 1.6059, "step": 7806 }, { "epoch": 0.9069997095556201, "grad_norm": 0.48262354731559753, "learning_rate": 0.0001, "loss": 1.5293, "step": 7807 }, { "epoch": 0.9071158873075806, "grad_norm": 0.46224817633628845, "learning_rate": 0.0001, "loss": 1.4478, "step": 7808 }, { "epoch": 0.9072320650595411, "grad_norm": 0.48688849806785583, "learning_rate": 0.0001, "loss": 1.6116, "step": 7809 }, { "epoch": 0.9073482428115016, "grad_norm": 0.47182363271713257, "learning_rate": 0.0001, "loss": 1.6092, "step": 7810 }, { "epoch": 0.9074644205634621, "grad_norm": 0.5122577548027039, "learning_rate": 0.0001, "loss": 1.5642, "step": 7811 }, { "epoch": 0.9075805983154226, "grad_norm": 0.4881529211997986, "learning_rate": 0.0001, "loss": 1.6594, "step": 7812 }, { "epoch": 0.9076967760673831, "grad_norm": 0.5338044166564941, "learning_rate": 0.0001, "loss": 1.7145, "step": 7813 }, { "epoch": 0.9078129538193436, "grad_norm": 0.4970211684703827, "learning_rate": 0.0001, "loss": 1.7056, "step": 7814 }, { "epoch": 0.9079291315713041, "grad_norm": 0.574657142162323, "learning_rate": 0.0001, "loss": 1.3376, "step": 7815 }, { "epoch": 0.9080453093232645, "grad_norm": 0.44657719135284424, "learning_rate": 0.0001, "loss": 1.4165, "step": 7816 }, { "epoch": 0.9081614870752251, "grad_norm": 0.4609968066215515, "learning_rate": 0.0001, "loss": 1.5561, "step": 7817 }, { "epoch": 0.9082776648271856, "grad_norm": 0.4657510221004486, "learning_rate": 0.0001, "loss": 1.6129, "step": 7818 }, { "epoch": 0.9083938425791461, "grad_norm": 0.4992734491825104, "learning_rate": 0.0001, "loss": 1.6048, "step": 7819 }, { "epoch": 0.9085100203311066, "grad_norm": 0.5076075196266174, "learning_rate": 0.0001, "loss": 1.7326, "step": 7820 }, { "epoch": 0.9086261980830671, "grad_norm": 0.4936392605304718, "learning_rate": 0.0001, "loss": 1.7559, "step": 7821 }, { "epoch": 0.9087423758350276, "grad_norm": 0.47953271865844727, "learning_rate": 0.0001, "loss": 1.5399, "step": 7822 }, { "epoch": 0.9088585535869881, "grad_norm": 0.4652204215526581, "learning_rate": 0.0001, "loss": 1.652, "step": 7823 }, { "epoch": 0.9089747313389486, "grad_norm": 0.4872530698776245, "learning_rate": 0.0001, "loss": 1.521, "step": 7824 }, { "epoch": 0.9090909090909091, "grad_norm": 0.4713476300239563, "learning_rate": 0.0001, "loss": 1.5699, "step": 7825 }, { "epoch": 0.9092070868428695, "grad_norm": 0.4624953269958496, "learning_rate": 0.0001, "loss": 1.6234, "step": 7826 }, { "epoch": 0.90932326459483, "grad_norm": 0.49820998311042786, "learning_rate": 0.0001, "loss": 1.6088, "step": 7827 }, { "epoch": 0.9094394423467906, "grad_norm": 0.4489380419254303, "learning_rate": 0.0001, "loss": 1.4541, "step": 7828 }, { "epoch": 0.9095556200987511, "grad_norm": 0.48509761691093445, "learning_rate": 0.0001, "loss": 1.6833, "step": 7829 }, { "epoch": 0.9096717978507116, "grad_norm": 0.5044791102409363, "learning_rate": 0.0001, "loss": 1.5291, "step": 7830 }, { "epoch": 0.9097879756026721, "grad_norm": 0.47124698758125305, "learning_rate": 0.0001, "loss": 1.5421, "step": 7831 }, { "epoch": 0.9099041533546326, "grad_norm": 0.4781220555305481, "learning_rate": 0.0001, "loss": 1.595, "step": 7832 }, { "epoch": 0.9100203311065931, "grad_norm": 0.47968727350234985, "learning_rate": 0.0001, "loss": 1.7076, "step": 7833 }, { "epoch": 0.9101365088585536, "grad_norm": 0.5137500166893005, "learning_rate": 0.0001, "loss": 1.7598, "step": 7834 }, { "epoch": 0.9102526866105141, "grad_norm": 0.4631037414073944, "learning_rate": 0.0001, "loss": 1.5031, "step": 7835 }, { "epoch": 0.9103688643624746, "grad_norm": 0.5219733715057373, "learning_rate": 0.0001, "loss": 1.7391, "step": 7836 }, { "epoch": 0.910485042114435, "grad_norm": 0.4807119071483612, "learning_rate": 0.0001, "loss": 1.6519, "step": 7837 }, { "epoch": 0.9106012198663956, "grad_norm": 0.5032156705856323, "learning_rate": 0.0001, "loss": 1.7154, "step": 7838 }, { "epoch": 0.9107173976183561, "grad_norm": 0.7016561031341553, "learning_rate": 0.0001, "loss": 1.4776, "step": 7839 }, { "epoch": 0.9108335753703166, "grad_norm": 0.4516977071762085, "learning_rate": 0.0001, "loss": 1.5318, "step": 7840 }, { "epoch": 0.9109497531222771, "grad_norm": 0.4732464849948883, "learning_rate": 0.0001, "loss": 1.5465, "step": 7841 }, { "epoch": 0.9110659308742376, "grad_norm": 0.4913138151168823, "learning_rate": 0.0001, "loss": 1.6617, "step": 7842 }, { "epoch": 0.9111821086261981, "grad_norm": 0.5138623118400574, "learning_rate": 0.0001, "loss": 1.7446, "step": 7843 }, { "epoch": 0.9112982863781586, "grad_norm": 0.46417149901390076, "learning_rate": 0.0001, "loss": 1.6198, "step": 7844 }, { "epoch": 0.9114144641301191, "grad_norm": 0.47088223695755005, "learning_rate": 0.0001, "loss": 1.5643, "step": 7845 }, { "epoch": 0.9115306418820796, "grad_norm": 0.45091691613197327, "learning_rate": 0.0001, "loss": 1.5734, "step": 7846 }, { "epoch": 0.91164681963404, "grad_norm": 0.45545732975006104, "learning_rate": 0.0001, "loss": 1.4915, "step": 7847 }, { "epoch": 0.9117629973860005, "grad_norm": 0.45981520414352417, "learning_rate": 0.0001, "loss": 1.4383, "step": 7848 }, { "epoch": 0.9118791751379611, "grad_norm": 0.472409725189209, "learning_rate": 0.0001, "loss": 1.5439, "step": 7849 }, { "epoch": 0.9119953528899216, "grad_norm": 0.5488510131835938, "learning_rate": 0.0001, "loss": 1.7462, "step": 7850 }, { "epoch": 0.9121115306418821, "grad_norm": 0.48794418573379517, "learning_rate": 0.0001, "loss": 1.4728, "step": 7851 }, { "epoch": 0.9122277083938426, "grad_norm": 0.4115673899650574, "learning_rate": 0.0001, "loss": 1.4473, "step": 7852 }, { "epoch": 0.9123438861458031, "grad_norm": 0.46749910712242126, "learning_rate": 0.0001, "loss": 1.6613, "step": 7853 }, { "epoch": 0.9124600638977636, "grad_norm": 0.47917798161506653, "learning_rate": 0.0001, "loss": 1.5905, "step": 7854 }, { "epoch": 0.9125762416497241, "grad_norm": 0.46126171946525574, "learning_rate": 0.0001, "loss": 1.6636, "step": 7855 }, { "epoch": 0.9126924194016846, "grad_norm": 0.4459565281867981, "learning_rate": 0.0001, "loss": 1.4559, "step": 7856 }, { "epoch": 0.912808597153645, "grad_norm": 0.4595984220504761, "learning_rate": 0.0001, "loss": 1.624, "step": 7857 }, { "epoch": 0.9129247749056055, "grad_norm": 0.45594367384910583, "learning_rate": 0.0001, "loss": 1.544, "step": 7858 }, { "epoch": 0.9130409526575661, "grad_norm": 0.4766393303871155, "learning_rate": 0.0001, "loss": 1.5009, "step": 7859 }, { "epoch": 0.9131571304095266, "grad_norm": 0.5518864393234253, "learning_rate": 0.0001, "loss": 1.7039, "step": 7860 }, { "epoch": 0.9132733081614871, "grad_norm": 0.48562371730804443, "learning_rate": 0.0001, "loss": 1.583, "step": 7861 }, { "epoch": 0.9133894859134476, "grad_norm": 0.5005999207496643, "learning_rate": 0.0001, "loss": 1.7962, "step": 7862 }, { "epoch": 0.9135056636654081, "grad_norm": 0.4571722149848938, "learning_rate": 0.0001, "loss": 1.5681, "step": 7863 }, { "epoch": 0.9136218414173686, "grad_norm": 0.4847254753112793, "learning_rate": 0.0001, "loss": 1.5251, "step": 7864 }, { "epoch": 0.9137380191693291, "grad_norm": 0.5055410265922546, "learning_rate": 0.0001, "loss": 1.7978, "step": 7865 }, { "epoch": 0.9138541969212896, "grad_norm": 0.5655259490013123, "learning_rate": 0.0001, "loss": 1.5319, "step": 7866 }, { "epoch": 0.91397037467325, "grad_norm": 0.46798625588417053, "learning_rate": 0.0001, "loss": 1.6612, "step": 7867 }, { "epoch": 0.9140865524252105, "grad_norm": 0.48041674494743347, "learning_rate": 0.0001, "loss": 1.6535, "step": 7868 }, { "epoch": 0.914202730177171, "grad_norm": 0.4875914752483368, "learning_rate": 0.0001, "loss": 1.6126, "step": 7869 }, { "epoch": 0.9143189079291316, "grad_norm": 0.49609968066215515, "learning_rate": 0.0001, "loss": 1.7816, "step": 7870 }, { "epoch": 0.9144350856810921, "grad_norm": 0.43055441975593567, "learning_rate": 0.0001, "loss": 1.4804, "step": 7871 }, { "epoch": 0.9145512634330526, "grad_norm": 0.4403667747974396, "learning_rate": 0.0001, "loss": 1.5251, "step": 7872 }, { "epoch": 0.9146674411850131, "grad_norm": 0.4711524546146393, "learning_rate": 0.0001, "loss": 1.5767, "step": 7873 }, { "epoch": 0.9147836189369736, "grad_norm": 0.49612268805503845, "learning_rate": 0.0001, "loss": 1.7506, "step": 7874 }, { "epoch": 0.9148997966889341, "grad_norm": 0.4640529751777649, "learning_rate": 0.0001, "loss": 1.5309, "step": 7875 }, { "epoch": 0.9150159744408946, "grad_norm": 0.45342904329299927, "learning_rate": 0.0001, "loss": 1.5846, "step": 7876 }, { "epoch": 0.915132152192855, "grad_norm": 0.4637288749217987, "learning_rate": 0.0001, "loss": 1.416, "step": 7877 }, { "epoch": 0.9152483299448155, "grad_norm": 0.5066947937011719, "learning_rate": 0.0001, "loss": 1.486, "step": 7878 }, { "epoch": 0.915364507696776, "grad_norm": 0.47863486409187317, "learning_rate": 0.0001, "loss": 1.5661, "step": 7879 }, { "epoch": 0.9154806854487366, "grad_norm": 0.4799175262451172, "learning_rate": 0.0001, "loss": 1.6418, "step": 7880 }, { "epoch": 0.9155968632006971, "grad_norm": 0.49706557393074036, "learning_rate": 0.0001, "loss": 1.5838, "step": 7881 }, { "epoch": 0.9157130409526576, "grad_norm": 0.49601349234580994, "learning_rate": 0.0001, "loss": 1.6441, "step": 7882 }, { "epoch": 0.9158292187046181, "grad_norm": 0.5072823762893677, "learning_rate": 0.0001, "loss": 1.542, "step": 7883 }, { "epoch": 0.9159453964565786, "grad_norm": 0.5159144401550293, "learning_rate": 0.0001, "loss": 1.6384, "step": 7884 }, { "epoch": 0.9160615742085391, "grad_norm": 0.46649453043937683, "learning_rate": 0.0001, "loss": 1.489, "step": 7885 }, { "epoch": 0.9161777519604996, "grad_norm": 0.48782074451446533, "learning_rate": 0.0001, "loss": 1.6177, "step": 7886 }, { "epoch": 0.91629392971246, "grad_norm": 0.5250930190086365, "learning_rate": 0.0001, "loss": 1.6332, "step": 7887 }, { "epoch": 0.9164101074644205, "grad_norm": 0.4878339469432831, "learning_rate": 0.0001, "loss": 1.5546, "step": 7888 }, { "epoch": 0.916526285216381, "grad_norm": 0.45729678869247437, "learning_rate": 0.0001, "loss": 1.5296, "step": 7889 }, { "epoch": 0.9166424629683415, "grad_norm": 0.47600996494293213, "learning_rate": 0.0001, "loss": 1.5506, "step": 7890 }, { "epoch": 0.9167586407203021, "grad_norm": 0.5157418847084045, "learning_rate": 0.0001, "loss": 1.6465, "step": 7891 }, { "epoch": 0.9168748184722626, "grad_norm": 0.5000462532043457, "learning_rate": 0.0001, "loss": 1.6882, "step": 7892 }, { "epoch": 0.9169909962242231, "grad_norm": 0.48779532313346863, "learning_rate": 0.0001, "loss": 1.3709, "step": 7893 }, { "epoch": 0.9171071739761836, "grad_norm": 0.49037328362464905, "learning_rate": 0.0001, "loss": 1.5719, "step": 7894 }, { "epoch": 0.9172233517281441, "grad_norm": 0.47151613235473633, "learning_rate": 0.0001, "loss": 1.5745, "step": 7895 }, { "epoch": 0.9173395294801046, "grad_norm": 0.5137687921524048, "learning_rate": 0.0001, "loss": 1.6653, "step": 7896 }, { "epoch": 0.917455707232065, "grad_norm": 0.4644807279109955, "learning_rate": 0.0001, "loss": 1.5263, "step": 7897 }, { "epoch": 0.9175718849840255, "grad_norm": 0.45312634110450745, "learning_rate": 0.0001, "loss": 1.5498, "step": 7898 }, { "epoch": 0.917688062735986, "grad_norm": 0.5129163861274719, "learning_rate": 0.0001, "loss": 1.6555, "step": 7899 }, { "epoch": 0.9178042404879465, "grad_norm": 0.474354088306427, "learning_rate": 0.0001, "loss": 1.5625, "step": 7900 }, { "epoch": 0.9179204182399071, "grad_norm": 0.47130098938941956, "learning_rate": 0.0001, "loss": 1.4614, "step": 7901 }, { "epoch": 0.9180365959918676, "grad_norm": 0.4573417901992798, "learning_rate": 0.0001, "loss": 1.5293, "step": 7902 }, { "epoch": 0.9181527737438281, "grad_norm": 0.46235814690589905, "learning_rate": 0.0001, "loss": 1.758, "step": 7903 }, { "epoch": 0.9182689514957886, "grad_norm": 0.5168223977088928, "learning_rate": 0.0001, "loss": 1.2772, "step": 7904 }, { "epoch": 0.9183851292477491, "grad_norm": 0.4721146523952484, "learning_rate": 0.0001, "loss": 1.6981, "step": 7905 }, { "epoch": 0.9185013069997096, "grad_norm": 0.49531880021095276, "learning_rate": 0.0001, "loss": 1.5861, "step": 7906 }, { "epoch": 0.91861748475167, "grad_norm": 0.49574729800224304, "learning_rate": 0.0001, "loss": 1.5242, "step": 7907 }, { "epoch": 0.9187336625036305, "grad_norm": 0.4491695761680603, "learning_rate": 0.0001, "loss": 1.4508, "step": 7908 }, { "epoch": 0.918849840255591, "grad_norm": 0.45107486844062805, "learning_rate": 0.0001, "loss": 1.4855, "step": 7909 }, { "epoch": 0.9189660180075515, "grad_norm": 0.4571503698825836, "learning_rate": 0.0001, "loss": 1.6184, "step": 7910 }, { "epoch": 0.919082195759512, "grad_norm": 0.49854135513305664, "learning_rate": 0.0001, "loss": 1.7238, "step": 7911 }, { "epoch": 0.9191983735114726, "grad_norm": 0.4973702132701874, "learning_rate": 0.0001, "loss": 1.5851, "step": 7912 }, { "epoch": 0.9193145512634331, "grad_norm": 0.47900450229644775, "learning_rate": 0.0001, "loss": 1.4472, "step": 7913 }, { "epoch": 0.9194307290153936, "grad_norm": 0.42754286527633667, "learning_rate": 0.0001, "loss": 1.5078, "step": 7914 }, { "epoch": 0.9195469067673541, "grad_norm": 0.5205299854278564, "learning_rate": 0.0001, "loss": 1.6868, "step": 7915 }, { "epoch": 0.9196630845193146, "grad_norm": 0.49486666917800903, "learning_rate": 0.0001, "loss": 1.685, "step": 7916 }, { "epoch": 0.919779262271275, "grad_norm": 0.47060513496398926, "learning_rate": 0.0001, "loss": 1.4396, "step": 7917 }, { "epoch": 0.9198954400232355, "grad_norm": 0.4912223517894745, "learning_rate": 0.0001, "loss": 1.7563, "step": 7918 }, { "epoch": 0.920011617775196, "grad_norm": 0.46059104800224304, "learning_rate": 0.0001, "loss": 1.3697, "step": 7919 }, { "epoch": 0.9201277955271565, "grad_norm": 0.4507901072502136, "learning_rate": 0.0001, "loss": 1.5831, "step": 7920 }, { "epoch": 0.920243973279117, "grad_norm": 0.49140775203704834, "learning_rate": 0.0001, "loss": 1.4478, "step": 7921 }, { "epoch": 0.9203601510310776, "grad_norm": 0.4491073489189148, "learning_rate": 0.0001, "loss": 1.3941, "step": 7922 }, { "epoch": 0.9204763287830381, "grad_norm": 0.49387529492378235, "learning_rate": 0.0001, "loss": 1.5508, "step": 7923 }, { "epoch": 0.9205925065349986, "grad_norm": 0.4687308669090271, "learning_rate": 0.0001, "loss": 1.5208, "step": 7924 }, { "epoch": 0.9207086842869591, "grad_norm": 0.5228381752967834, "learning_rate": 0.0001, "loss": 1.8851, "step": 7925 }, { "epoch": 0.9208248620389196, "grad_norm": 0.49759143590927124, "learning_rate": 0.0001, "loss": 1.6124, "step": 7926 }, { "epoch": 0.92094103979088, "grad_norm": 0.47675225138664246, "learning_rate": 0.0001, "loss": 1.609, "step": 7927 }, { "epoch": 0.9210572175428405, "grad_norm": 0.45978063344955444, "learning_rate": 0.0001, "loss": 1.5576, "step": 7928 }, { "epoch": 0.921173395294801, "grad_norm": 0.5017638802528381, "learning_rate": 0.0001, "loss": 1.7266, "step": 7929 }, { "epoch": 0.9212895730467615, "grad_norm": 0.467979371547699, "learning_rate": 0.0001, "loss": 1.6362, "step": 7930 }, { "epoch": 0.921405750798722, "grad_norm": 0.4964231848716736, "learning_rate": 0.0001, "loss": 1.4573, "step": 7931 }, { "epoch": 0.9215219285506825, "grad_norm": 0.46611857414245605, "learning_rate": 0.0001, "loss": 1.5287, "step": 7932 }, { "epoch": 0.9216381063026431, "grad_norm": 0.47594428062438965, "learning_rate": 0.0001, "loss": 1.6423, "step": 7933 }, { "epoch": 0.9217542840546036, "grad_norm": 0.4936331510543823, "learning_rate": 0.0001, "loss": 1.6663, "step": 7934 }, { "epoch": 0.9218704618065641, "grad_norm": 0.4947721064090729, "learning_rate": 0.0001, "loss": 1.5737, "step": 7935 }, { "epoch": 0.9219866395585246, "grad_norm": 0.4754369854927063, "learning_rate": 0.0001, "loss": 1.5118, "step": 7936 }, { "epoch": 0.922102817310485, "grad_norm": 0.46831005811691284, "learning_rate": 0.0001, "loss": 1.5884, "step": 7937 }, { "epoch": 0.9222189950624455, "grad_norm": 0.498220294713974, "learning_rate": 0.0001, "loss": 1.5117, "step": 7938 }, { "epoch": 0.922335172814406, "grad_norm": 0.4809699058532715, "learning_rate": 0.0001, "loss": 1.6335, "step": 7939 }, { "epoch": 0.9224513505663665, "grad_norm": 0.4623294174671173, "learning_rate": 0.0001, "loss": 1.4411, "step": 7940 }, { "epoch": 0.922567528318327, "grad_norm": 0.49049609899520874, "learning_rate": 0.0001, "loss": 1.5309, "step": 7941 }, { "epoch": 0.9226837060702875, "grad_norm": 0.47794732451438904, "learning_rate": 0.0001, "loss": 1.5218, "step": 7942 }, { "epoch": 0.9227998838222481, "grad_norm": 0.49489712715148926, "learning_rate": 0.0001, "loss": 1.7071, "step": 7943 }, { "epoch": 0.9229160615742086, "grad_norm": 0.4838644564151764, "learning_rate": 0.0001, "loss": 1.6534, "step": 7944 }, { "epoch": 0.9230322393261691, "grad_norm": 0.542927622795105, "learning_rate": 0.0001, "loss": 1.8534, "step": 7945 }, { "epoch": 0.9231484170781296, "grad_norm": 0.484602153301239, "learning_rate": 0.0001, "loss": 1.7658, "step": 7946 }, { "epoch": 0.92326459483009, "grad_norm": 0.45211806893348694, "learning_rate": 0.0001, "loss": 1.3895, "step": 7947 }, { "epoch": 0.9233807725820505, "grad_norm": 0.5238228440284729, "learning_rate": 0.0001, "loss": 1.7041, "step": 7948 }, { "epoch": 0.923496950334011, "grad_norm": 0.4958471655845642, "learning_rate": 0.0001, "loss": 1.6182, "step": 7949 }, { "epoch": 0.9236131280859715, "grad_norm": 0.4387954771518707, "learning_rate": 0.0001, "loss": 1.3532, "step": 7950 }, { "epoch": 0.923729305837932, "grad_norm": 0.4402277171611786, "learning_rate": 0.0001, "loss": 1.4075, "step": 7951 }, { "epoch": 0.9238454835898925, "grad_norm": 0.5100318193435669, "learning_rate": 0.0001, "loss": 1.5843, "step": 7952 }, { "epoch": 0.923961661341853, "grad_norm": 0.4846263527870178, "learning_rate": 0.0001, "loss": 1.6603, "step": 7953 }, { "epoch": 0.9240778390938136, "grad_norm": 0.4743453562259674, "learning_rate": 0.0001, "loss": 1.5289, "step": 7954 }, { "epoch": 0.9241940168457741, "grad_norm": 0.5055150985717773, "learning_rate": 0.0001, "loss": 1.6125, "step": 7955 }, { "epoch": 0.9243101945977346, "grad_norm": 0.46084272861480713, "learning_rate": 0.0001, "loss": 1.5453, "step": 7956 }, { "epoch": 0.924426372349695, "grad_norm": 0.47752270102500916, "learning_rate": 0.0001, "loss": 1.5842, "step": 7957 }, { "epoch": 0.9245425501016555, "grad_norm": 0.47318798303604126, "learning_rate": 0.0001, "loss": 1.6307, "step": 7958 }, { "epoch": 0.924658727853616, "grad_norm": 0.4867374897003174, "learning_rate": 0.0001, "loss": 1.6222, "step": 7959 }, { "epoch": 0.9247749056055765, "grad_norm": 0.48629096150398254, "learning_rate": 0.0001, "loss": 1.7713, "step": 7960 }, { "epoch": 0.924891083357537, "grad_norm": 0.47836753726005554, "learning_rate": 0.0001, "loss": 1.5697, "step": 7961 }, { "epoch": 0.9250072611094975, "grad_norm": 0.49589991569519043, "learning_rate": 0.0001, "loss": 1.6846, "step": 7962 }, { "epoch": 0.925123438861458, "grad_norm": 0.46878737211227417, "learning_rate": 0.0001, "loss": 1.6223, "step": 7963 }, { "epoch": 0.9252396166134186, "grad_norm": 0.4647105634212494, "learning_rate": 0.0001, "loss": 1.6032, "step": 7964 }, { "epoch": 0.9253557943653791, "grad_norm": 0.46214747428894043, "learning_rate": 0.0001, "loss": 1.5577, "step": 7965 }, { "epoch": 0.9254719721173396, "grad_norm": 0.5062007904052734, "learning_rate": 0.0001, "loss": 1.8203, "step": 7966 }, { "epoch": 0.9255881498693, "grad_norm": 0.5121557116508484, "learning_rate": 0.0001, "loss": 1.7192, "step": 7967 }, { "epoch": 0.9257043276212605, "grad_norm": 0.44176679849624634, "learning_rate": 0.0001, "loss": 1.4522, "step": 7968 }, { "epoch": 0.925820505373221, "grad_norm": 0.47141146659851074, "learning_rate": 0.0001, "loss": 1.4804, "step": 7969 }, { "epoch": 0.9259366831251815, "grad_norm": 0.48892006278038025, "learning_rate": 0.0001, "loss": 1.6353, "step": 7970 }, { "epoch": 0.926052860877142, "grad_norm": 0.512973964214325, "learning_rate": 0.0001, "loss": 1.7151, "step": 7971 }, { "epoch": 0.9261690386291025, "grad_norm": 0.4414759576320648, "learning_rate": 0.0001, "loss": 1.3828, "step": 7972 }, { "epoch": 0.926285216381063, "grad_norm": 0.48887619376182556, "learning_rate": 0.0001, "loss": 1.6999, "step": 7973 }, { "epoch": 0.9264013941330236, "grad_norm": 0.45762237906455994, "learning_rate": 0.0001, "loss": 1.5184, "step": 7974 }, { "epoch": 0.9265175718849841, "grad_norm": 0.45847946405410767, "learning_rate": 0.0001, "loss": 1.6602, "step": 7975 }, { "epoch": 0.9266337496369446, "grad_norm": 0.49288585782051086, "learning_rate": 0.0001, "loss": 1.762, "step": 7976 }, { "epoch": 0.926749927388905, "grad_norm": 0.48561185598373413, "learning_rate": 0.0001, "loss": 1.5289, "step": 7977 }, { "epoch": 0.9268661051408655, "grad_norm": 0.447745680809021, "learning_rate": 0.0001, "loss": 1.5616, "step": 7978 }, { "epoch": 0.926982282892826, "grad_norm": 0.49611517786979675, "learning_rate": 0.0001, "loss": 1.6865, "step": 7979 }, { "epoch": 0.9270984606447865, "grad_norm": 0.46112751960754395, "learning_rate": 0.0001, "loss": 1.6159, "step": 7980 }, { "epoch": 0.927214638396747, "grad_norm": 0.4908156991004944, "learning_rate": 0.0001, "loss": 1.6442, "step": 7981 }, { "epoch": 0.9273308161487075, "grad_norm": 0.4530208706855774, "learning_rate": 0.0001, "loss": 1.5535, "step": 7982 }, { "epoch": 0.927446993900668, "grad_norm": 0.49181050062179565, "learning_rate": 0.0001, "loss": 1.7461, "step": 7983 }, { "epoch": 0.9275631716526285, "grad_norm": 0.49187758564949036, "learning_rate": 0.0001, "loss": 1.7731, "step": 7984 }, { "epoch": 0.9276793494045891, "grad_norm": 0.5021129846572876, "learning_rate": 0.0001, "loss": 1.6372, "step": 7985 }, { "epoch": 0.9277955271565496, "grad_norm": 0.508470892906189, "learning_rate": 0.0001, "loss": 1.552, "step": 7986 }, { "epoch": 0.92791170490851, "grad_norm": 0.5105016231536865, "learning_rate": 0.0001, "loss": 1.7802, "step": 7987 }, { "epoch": 0.9280278826604705, "grad_norm": 0.4812169671058655, "learning_rate": 0.0001, "loss": 1.6524, "step": 7988 }, { "epoch": 0.928144060412431, "grad_norm": 0.4714551568031311, "learning_rate": 0.0001, "loss": 1.5908, "step": 7989 }, { "epoch": 0.9282602381643915, "grad_norm": 0.4296482801437378, "learning_rate": 0.0001, "loss": 1.5015, "step": 7990 }, { "epoch": 0.928376415916352, "grad_norm": 0.45855334401130676, "learning_rate": 0.0001, "loss": 1.574, "step": 7991 }, { "epoch": 0.9284925936683125, "grad_norm": 0.4766031801700592, "learning_rate": 0.0001, "loss": 1.6346, "step": 7992 }, { "epoch": 0.928608771420273, "grad_norm": 0.45450130105018616, "learning_rate": 0.0001, "loss": 1.4737, "step": 7993 }, { "epoch": 0.9287249491722335, "grad_norm": 0.512444794178009, "learning_rate": 0.0001, "loss": 1.6291, "step": 7994 }, { "epoch": 0.9288411269241941, "grad_norm": 0.49401575326919556, "learning_rate": 0.0001, "loss": 1.6025, "step": 7995 }, { "epoch": 0.9289573046761546, "grad_norm": 0.5121031999588013, "learning_rate": 0.0001, "loss": 1.6722, "step": 7996 }, { "epoch": 0.929073482428115, "grad_norm": 0.4602169692516327, "learning_rate": 0.0001, "loss": 1.5312, "step": 7997 }, { "epoch": 0.9291896601800755, "grad_norm": 0.49888116121292114, "learning_rate": 0.0001, "loss": 1.728, "step": 7998 }, { "epoch": 0.929305837932036, "grad_norm": 0.5206019878387451, "learning_rate": 0.0001, "loss": 1.6714, "step": 7999 }, { "epoch": 0.9294220156839965, "grad_norm": 0.4638976454734802, "learning_rate": 0.0001, "loss": 1.5465, "step": 8000 }, { "epoch": 0.929538193435957, "grad_norm": 0.48082998394966125, "learning_rate": 0.0001, "loss": 1.8074, "step": 8001 }, { "epoch": 0.9296543711879175, "grad_norm": 0.4941411018371582, "learning_rate": 0.0001, "loss": 1.8171, "step": 8002 }, { "epoch": 0.929770548939878, "grad_norm": 0.45711395144462585, "learning_rate": 0.0001, "loss": 1.6415, "step": 8003 }, { "epoch": 0.9298867266918385, "grad_norm": 0.4651217758655548, "learning_rate": 0.0001, "loss": 1.5275, "step": 8004 }, { "epoch": 0.930002904443799, "grad_norm": 0.45442432165145874, "learning_rate": 0.0001, "loss": 1.5638, "step": 8005 }, { "epoch": 0.9301190821957596, "grad_norm": 0.4956342577934265, "learning_rate": 0.0001, "loss": 1.4252, "step": 8006 }, { "epoch": 0.93023525994772, "grad_norm": 0.5071079730987549, "learning_rate": 0.0001, "loss": 1.6602, "step": 8007 }, { "epoch": 0.9303514376996805, "grad_norm": 0.4929546117782593, "learning_rate": 0.0001, "loss": 1.6939, "step": 8008 }, { "epoch": 0.930467615451641, "grad_norm": 0.5169224739074707, "learning_rate": 0.0001, "loss": 1.6255, "step": 8009 }, { "epoch": 0.9305837932036015, "grad_norm": 0.5088503956794739, "learning_rate": 0.0001, "loss": 1.5911, "step": 8010 }, { "epoch": 0.930699970955562, "grad_norm": 0.48332953453063965, "learning_rate": 0.0001, "loss": 1.712, "step": 8011 }, { "epoch": 0.9308161487075225, "grad_norm": 0.49917006492614746, "learning_rate": 0.0001, "loss": 1.601, "step": 8012 }, { "epoch": 0.930932326459483, "grad_norm": 0.4794350564479828, "learning_rate": 0.0001, "loss": 1.5455, "step": 8013 }, { "epoch": 0.9310485042114435, "grad_norm": 0.4891558885574341, "learning_rate": 0.0001, "loss": 1.6764, "step": 8014 }, { "epoch": 0.931164681963404, "grad_norm": 0.4725422263145447, "learning_rate": 0.0001, "loss": 1.6397, "step": 8015 }, { "epoch": 0.9312808597153646, "grad_norm": 0.48339328169822693, "learning_rate": 0.0001, "loss": 1.5296, "step": 8016 }, { "epoch": 0.931397037467325, "grad_norm": 0.4951247572898865, "learning_rate": 0.0001, "loss": 1.6353, "step": 8017 }, { "epoch": 0.9315132152192855, "grad_norm": 0.46006613969802856, "learning_rate": 0.0001, "loss": 1.532, "step": 8018 }, { "epoch": 0.931629392971246, "grad_norm": 0.4528697431087494, "learning_rate": 0.0001, "loss": 1.643, "step": 8019 }, { "epoch": 0.9317455707232065, "grad_norm": 0.46240419149398804, "learning_rate": 0.0001, "loss": 1.3742, "step": 8020 }, { "epoch": 0.931861748475167, "grad_norm": 0.4489692449569702, "learning_rate": 0.0001, "loss": 1.5491, "step": 8021 }, { "epoch": 0.9319779262271275, "grad_norm": 0.473308265209198, "learning_rate": 0.0001, "loss": 1.6718, "step": 8022 }, { "epoch": 0.932094103979088, "grad_norm": 0.46006840467453003, "learning_rate": 0.0001, "loss": 1.4633, "step": 8023 }, { "epoch": 0.9322102817310485, "grad_norm": 0.501011848449707, "learning_rate": 0.0001, "loss": 1.645, "step": 8024 }, { "epoch": 0.932326459483009, "grad_norm": 0.46970438957214355, "learning_rate": 0.0001, "loss": 1.7605, "step": 8025 }, { "epoch": 0.9324426372349695, "grad_norm": 0.45778888463974, "learning_rate": 0.0001, "loss": 1.4962, "step": 8026 }, { "epoch": 0.93255881498693, "grad_norm": 0.46483826637268066, "learning_rate": 0.0001, "loss": 1.6317, "step": 8027 }, { "epoch": 0.9326749927388905, "grad_norm": 0.4459386169910431, "learning_rate": 0.0001, "loss": 1.5468, "step": 8028 }, { "epoch": 0.932791170490851, "grad_norm": 0.5162267088890076, "learning_rate": 0.0001, "loss": 1.7774, "step": 8029 }, { "epoch": 0.9329073482428115, "grad_norm": 0.46646633744239807, "learning_rate": 0.0001, "loss": 1.443, "step": 8030 }, { "epoch": 0.933023525994772, "grad_norm": 0.5139045119285583, "learning_rate": 0.0001, "loss": 1.7225, "step": 8031 }, { "epoch": 0.9331397037467325, "grad_norm": 0.44446662068367004, "learning_rate": 0.0001, "loss": 1.3792, "step": 8032 }, { "epoch": 0.933255881498693, "grad_norm": 0.46956273913383484, "learning_rate": 0.0001, "loss": 1.5063, "step": 8033 }, { "epoch": 0.9333720592506535, "grad_norm": 0.4521263837814331, "learning_rate": 0.0001, "loss": 1.5792, "step": 8034 }, { "epoch": 0.933488237002614, "grad_norm": 0.5222106575965881, "learning_rate": 0.0001, "loss": 1.6902, "step": 8035 }, { "epoch": 0.9336044147545745, "grad_norm": 0.44853895902633667, "learning_rate": 0.0001, "loss": 1.5561, "step": 8036 }, { "epoch": 0.933720592506535, "grad_norm": 0.48177504539489746, "learning_rate": 0.0001, "loss": 1.5831, "step": 8037 }, { "epoch": 0.9338367702584955, "grad_norm": 0.4503477215766907, "learning_rate": 0.0001, "loss": 1.3327, "step": 8038 }, { "epoch": 0.933952948010456, "grad_norm": 0.507217288017273, "learning_rate": 0.0001, "loss": 1.7489, "step": 8039 }, { "epoch": 0.9340691257624165, "grad_norm": 0.4814956486225128, "learning_rate": 0.0001, "loss": 1.6036, "step": 8040 }, { "epoch": 0.934185303514377, "grad_norm": 0.4742831885814667, "learning_rate": 0.0001, "loss": 1.5807, "step": 8041 }, { "epoch": 0.9343014812663375, "grad_norm": 0.4570356607437134, "learning_rate": 0.0001, "loss": 1.4408, "step": 8042 }, { "epoch": 0.934417659018298, "grad_norm": 0.46834975481033325, "learning_rate": 0.0001, "loss": 1.4698, "step": 8043 }, { "epoch": 0.9345338367702585, "grad_norm": 0.5158945322036743, "learning_rate": 0.0001, "loss": 1.4868, "step": 8044 }, { "epoch": 0.934650014522219, "grad_norm": 0.5279756188392639, "learning_rate": 0.0001, "loss": 1.5014, "step": 8045 }, { "epoch": 0.9347661922741795, "grad_norm": 0.47560861706733704, "learning_rate": 0.0001, "loss": 1.5755, "step": 8046 }, { "epoch": 0.9348823700261399, "grad_norm": 0.4888511300086975, "learning_rate": 0.0001, "loss": 1.6097, "step": 8047 }, { "epoch": 0.9349985477781005, "grad_norm": 0.4677184820175171, "learning_rate": 0.0001, "loss": 1.6272, "step": 8048 }, { "epoch": 0.935114725530061, "grad_norm": 0.4719541370868683, "learning_rate": 0.0001, "loss": 1.6077, "step": 8049 }, { "epoch": 0.9352309032820215, "grad_norm": 0.477180540561676, "learning_rate": 0.0001, "loss": 1.6941, "step": 8050 }, { "epoch": 0.935347081033982, "grad_norm": 0.46396854519844055, "learning_rate": 0.0001, "loss": 1.3669, "step": 8051 }, { "epoch": 0.9354632587859425, "grad_norm": 0.4677002727985382, "learning_rate": 0.0001, "loss": 1.493, "step": 8052 }, { "epoch": 0.935579436537903, "grad_norm": 0.45009395480155945, "learning_rate": 0.0001, "loss": 1.4174, "step": 8053 }, { "epoch": 0.9356956142898635, "grad_norm": 0.47856685519218445, "learning_rate": 0.0001, "loss": 1.613, "step": 8054 }, { "epoch": 0.935811792041824, "grad_norm": 0.5348851084709167, "learning_rate": 0.0001, "loss": 1.5635, "step": 8055 }, { "epoch": 0.9359279697937845, "grad_norm": 0.4984420835971832, "learning_rate": 0.0001, "loss": 1.6902, "step": 8056 }, { "epoch": 0.9360441475457449, "grad_norm": 0.5199939608573914, "learning_rate": 0.0001, "loss": 1.6925, "step": 8057 }, { "epoch": 0.9361603252977055, "grad_norm": 0.5202213525772095, "learning_rate": 0.0001, "loss": 1.6618, "step": 8058 }, { "epoch": 0.936276503049666, "grad_norm": 0.5361291766166687, "learning_rate": 0.0001, "loss": 1.5374, "step": 8059 }, { "epoch": 0.9363926808016265, "grad_norm": 0.49866026639938354, "learning_rate": 0.0001, "loss": 1.6217, "step": 8060 }, { "epoch": 0.936508858553587, "grad_norm": 0.49306201934814453, "learning_rate": 0.0001, "loss": 1.5924, "step": 8061 }, { "epoch": 0.9366250363055475, "grad_norm": 0.4813399910926819, "learning_rate": 0.0001, "loss": 1.6272, "step": 8062 }, { "epoch": 0.936741214057508, "grad_norm": 0.47636452317237854, "learning_rate": 0.0001, "loss": 1.6794, "step": 8063 }, { "epoch": 0.9368573918094685, "grad_norm": 0.4818163812160492, "learning_rate": 0.0001, "loss": 1.4483, "step": 8064 }, { "epoch": 0.936973569561429, "grad_norm": 0.46799615025520325, "learning_rate": 0.0001, "loss": 1.6217, "step": 8065 }, { "epoch": 0.9370897473133895, "grad_norm": 0.48917245864868164, "learning_rate": 0.0001, "loss": 1.6402, "step": 8066 }, { "epoch": 0.9372059250653499, "grad_norm": 0.47310730814933777, "learning_rate": 0.0001, "loss": 1.6164, "step": 8067 }, { "epoch": 0.9373221028173104, "grad_norm": 0.5221018195152283, "learning_rate": 0.0001, "loss": 1.8971, "step": 8068 }, { "epoch": 0.937438280569271, "grad_norm": 0.48749321699142456, "learning_rate": 0.0001, "loss": 1.5722, "step": 8069 }, { "epoch": 0.9375544583212315, "grad_norm": 0.4679155945777893, "learning_rate": 0.0001, "loss": 1.5243, "step": 8070 }, { "epoch": 0.937670636073192, "grad_norm": 0.47157520055770874, "learning_rate": 0.0001, "loss": 1.4295, "step": 8071 }, { "epoch": 0.9377868138251525, "grad_norm": 0.4769766628742218, "learning_rate": 0.0001, "loss": 1.5411, "step": 8072 }, { "epoch": 0.937902991577113, "grad_norm": 0.47961485385894775, "learning_rate": 0.0001, "loss": 1.7161, "step": 8073 }, { "epoch": 0.9380191693290735, "grad_norm": 0.4806070327758789, "learning_rate": 0.0001, "loss": 1.5829, "step": 8074 }, { "epoch": 0.938135347081034, "grad_norm": 0.4962315261363983, "learning_rate": 0.0001, "loss": 1.658, "step": 8075 }, { "epoch": 0.9382515248329945, "grad_norm": 0.5138833522796631, "learning_rate": 0.0001, "loss": 1.6177, "step": 8076 }, { "epoch": 0.9383677025849549, "grad_norm": 0.4957931935787201, "learning_rate": 0.0001, "loss": 1.6425, "step": 8077 }, { "epoch": 0.9384838803369154, "grad_norm": 0.49878206849098206, "learning_rate": 0.0001, "loss": 1.6026, "step": 8078 }, { "epoch": 0.938600058088876, "grad_norm": 0.49658435583114624, "learning_rate": 0.0001, "loss": 1.5145, "step": 8079 }, { "epoch": 0.9387162358408365, "grad_norm": 0.508560836315155, "learning_rate": 0.0001, "loss": 1.5825, "step": 8080 }, { "epoch": 0.938832413592797, "grad_norm": 0.4853934943675995, "learning_rate": 0.0001, "loss": 1.5385, "step": 8081 }, { "epoch": 0.9389485913447575, "grad_norm": 0.478221595287323, "learning_rate": 0.0001, "loss": 1.5652, "step": 8082 }, { "epoch": 0.939064769096718, "grad_norm": 0.4989098012447357, "learning_rate": 0.0001, "loss": 1.647, "step": 8083 }, { "epoch": 0.9391809468486785, "grad_norm": 0.4413130283355713, "learning_rate": 0.0001, "loss": 1.4044, "step": 8084 }, { "epoch": 0.939297124600639, "grad_norm": 0.448485791683197, "learning_rate": 0.0001, "loss": 1.6516, "step": 8085 }, { "epoch": 0.9394133023525995, "grad_norm": 0.4658021330833435, "learning_rate": 0.0001, "loss": 1.5351, "step": 8086 }, { "epoch": 0.9395294801045599, "grad_norm": 0.48055848479270935, "learning_rate": 0.0001, "loss": 1.556, "step": 8087 }, { "epoch": 0.9396456578565204, "grad_norm": 0.47321316599845886, "learning_rate": 0.0001, "loss": 1.6199, "step": 8088 }, { "epoch": 0.9397618356084809, "grad_norm": 0.44191011786460876, "learning_rate": 0.0001, "loss": 1.3483, "step": 8089 }, { "epoch": 0.9398780133604415, "grad_norm": 0.5220785140991211, "learning_rate": 0.0001, "loss": 1.5912, "step": 8090 }, { "epoch": 0.939994191112402, "grad_norm": 0.4720672070980072, "learning_rate": 0.0001, "loss": 1.6028, "step": 8091 }, { "epoch": 0.9401103688643625, "grad_norm": 0.46814772486686707, "learning_rate": 0.0001, "loss": 1.5003, "step": 8092 }, { "epoch": 0.940226546616323, "grad_norm": 0.5168691873550415, "learning_rate": 0.0001, "loss": 1.8196, "step": 8093 }, { "epoch": 0.9403427243682835, "grad_norm": 0.4674499034881592, "learning_rate": 0.0001, "loss": 1.5951, "step": 8094 }, { "epoch": 0.940458902120244, "grad_norm": 0.49029046297073364, "learning_rate": 0.0001, "loss": 1.7264, "step": 8095 }, { "epoch": 0.9405750798722045, "grad_norm": 0.5169382691383362, "learning_rate": 0.0001, "loss": 1.5545, "step": 8096 }, { "epoch": 0.940691257624165, "grad_norm": 0.48217153549194336, "learning_rate": 0.0001, "loss": 1.5912, "step": 8097 }, { "epoch": 0.9408074353761254, "grad_norm": 0.49331241846084595, "learning_rate": 0.0001, "loss": 1.5199, "step": 8098 }, { "epoch": 0.9409236131280859, "grad_norm": 0.46810704469680786, "learning_rate": 0.0001, "loss": 1.553, "step": 8099 }, { "epoch": 0.9410397908800465, "grad_norm": 0.5021561980247498, "learning_rate": 0.0001, "loss": 1.7799, "step": 8100 }, { "epoch": 0.941155968632007, "grad_norm": 0.44212183356285095, "learning_rate": 0.0001, "loss": 1.5045, "step": 8101 }, { "epoch": 0.9412721463839675, "grad_norm": 0.4898991882801056, "learning_rate": 0.0001, "loss": 1.6766, "step": 8102 }, { "epoch": 0.941388324135928, "grad_norm": 0.45801275968551636, "learning_rate": 0.0001, "loss": 1.6186, "step": 8103 }, { "epoch": 0.9415045018878885, "grad_norm": 0.45079344511032104, "learning_rate": 0.0001, "loss": 1.542, "step": 8104 }, { "epoch": 0.941620679639849, "grad_norm": 0.4727324843406677, "learning_rate": 0.0001, "loss": 1.6742, "step": 8105 }, { "epoch": 0.9417368573918095, "grad_norm": 0.4644958972930908, "learning_rate": 0.0001, "loss": 1.4713, "step": 8106 }, { "epoch": 0.94185303514377, "grad_norm": 0.5391181707382202, "learning_rate": 0.0001, "loss": 1.7521, "step": 8107 }, { "epoch": 0.9419692128957304, "grad_norm": 0.5218836665153503, "learning_rate": 0.0001, "loss": 1.6922, "step": 8108 }, { "epoch": 0.9420853906476909, "grad_norm": 0.486215740442276, "learning_rate": 0.0001, "loss": 1.6442, "step": 8109 }, { "epoch": 0.9422015683996514, "grad_norm": 0.4889783263206482, "learning_rate": 0.0001, "loss": 1.5473, "step": 8110 }, { "epoch": 0.942317746151612, "grad_norm": 0.44267192482948303, "learning_rate": 0.0001, "loss": 1.4231, "step": 8111 }, { "epoch": 0.9424339239035725, "grad_norm": 0.49176719784736633, "learning_rate": 0.0001, "loss": 1.5304, "step": 8112 }, { "epoch": 0.942550101655533, "grad_norm": 0.5220740437507629, "learning_rate": 0.0001, "loss": 1.5346, "step": 8113 }, { "epoch": 0.9426662794074935, "grad_norm": 0.47036683559417725, "learning_rate": 0.0001, "loss": 1.4976, "step": 8114 }, { "epoch": 0.942782457159454, "grad_norm": 0.4946513772010803, "learning_rate": 0.0001, "loss": 1.4836, "step": 8115 }, { "epoch": 0.9428986349114145, "grad_norm": 0.48926377296447754, "learning_rate": 0.0001, "loss": 1.6243, "step": 8116 }, { "epoch": 0.943014812663375, "grad_norm": 0.46401751041412354, "learning_rate": 0.0001, "loss": 1.5319, "step": 8117 }, { "epoch": 0.9431309904153354, "grad_norm": 0.4936821162700653, "learning_rate": 0.0001, "loss": 1.5866, "step": 8118 }, { "epoch": 0.9432471681672959, "grad_norm": 0.5162927508354187, "learning_rate": 0.0001, "loss": 1.611, "step": 8119 }, { "epoch": 0.9433633459192564, "grad_norm": 0.49203792214393616, "learning_rate": 0.0001, "loss": 1.6242, "step": 8120 }, { "epoch": 0.943479523671217, "grad_norm": 0.48828327655792236, "learning_rate": 0.0001, "loss": 1.6006, "step": 8121 }, { "epoch": 0.9435957014231775, "grad_norm": 0.4814288914203644, "learning_rate": 0.0001, "loss": 1.6292, "step": 8122 }, { "epoch": 0.943711879175138, "grad_norm": 0.4744342267513275, "learning_rate": 0.0001, "loss": 1.6242, "step": 8123 }, { "epoch": 0.9438280569270985, "grad_norm": 0.4669947922229767, "learning_rate": 0.0001, "loss": 1.5537, "step": 8124 }, { "epoch": 0.943944234679059, "grad_norm": 0.4419528841972351, "learning_rate": 0.0001, "loss": 1.5611, "step": 8125 }, { "epoch": 0.9440604124310195, "grad_norm": 0.47628259658813477, "learning_rate": 0.0001, "loss": 1.4627, "step": 8126 }, { "epoch": 0.94417659018298, "grad_norm": 0.4776514768600464, "learning_rate": 0.0001, "loss": 1.439, "step": 8127 }, { "epoch": 0.9442927679349404, "grad_norm": 0.4446832537651062, "learning_rate": 0.0001, "loss": 1.5658, "step": 8128 }, { "epoch": 0.9444089456869009, "grad_norm": 0.5115212202072144, "learning_rate": 0.0001, "loss": 1.6909, "step": 8129 }, { "epoch": 0.9445251234388614, "grad_norm": 0.47382161021232605, "learning_rate": 0.0001, "loss": 1.409, "step": 8130 }, { "epoch": 0.9446413011908219, "grad_norm": 0.4738979637622833, "learning_rate": 0.0001, "loss": 1.4764, "step": 8131 }, { "epoch": 0.9447574789427825, "grad_norm": 0.4864945113658905, "learning_rate": 0.0001, "loss": 1.6893, "step": 8132 }, { "epoch": 0.944873656694743, "grad_norm": 0.5098589658737183, "learning_rate": 0.0001, "loss": 1.4883, "step": 8133 }, { "epoch": 0.9449898344467035, "grad_norm": 0.4890642464160919, "learning_rate": 0.0001, "loss": 1.7127, "step": 8134 }, { "epoch": 0.945106012198664, "grad_norm": 0.499066025018692, "learning_rate": 0.0001, "loss": 1.722, "step": 8135 }, { "epoch": 0.9452221899506245, "grad_norm": 0.4744119942188263, "learning_rate": 0.0001, "loss": 1.6039, "step": 8136 }, { "epoch": 0.945338367702585, "grad_norm": 0.45313483476638794, "learning_rate": 0.0001, "loss": 1.5495, "step": 8137 }, { "epoch": 0.9454545454545454, "grad_norm": 0.4957541227340698, "learning_rate": 0.0001, "loss": 1.7737, "step": 8138 }, { "epoch": 0.9455707232065059, "grad_norm": 0.46510589122772217, "learning_rate": 0.0001, "loss": 1.5611, "step": 8139 }, { "epoch": 0.9456869009584664, "grad_norm": 0.49345633387565613, "learning_rate": 0.0001, "loss": 1.5756, "step": 8140 }, { "epoch": 0.9458030787104269, "grad_norm": 0.44916290044784546, "learning_rate": 0.0001, "loss": 1.5718, "step": 8141 }, { "epoch": 0.9459192564623875, "grad_norm": 0.5268651247024536, "learning_rate": 0.0001, "loss": 1.6663, "step": 8142 }, { "epoch": 0.946035434214348, "grad_norm": 0.4611073136329651, "learning_rate": 0.0001, "loss": 1.5264, "step": 8143 }, { "epoch": 0.9461516119663085, "grad_norm": 0.4878610372543335, "learning_rate": 0.0001, "loss": 1.6375, "step": 8144 }, { "epoch": 0.946267789718269, "grad_norm": 0.48265448212623596, "learning_rate": 0.0001, "loss": 1.5268, "step": 8145 }, { "epoch": 0.9463839674702295, "grad_norm": 0.4883013963699341, "learning_rate": 0.0001, "loss": 1.5256, "step": 8146 }, { "epoch": 0.94650014522219, "grad_norm": 0.4902404844760895, "learning_rate": 0.0001, "loss": 1.5719, "step": 8147 }, { "epoch": 0.9466163229741504, "grad_norm": 0.48211053013801575, "learning_rate": 0.0001, "loss": 1.4926, "step": 8148 }, { "epoch": 0.9467325007261109, "grad_norm": 0.4793367087841034, "learning_rate": 0.0001, "loss": 1.5874, "step": 8149 }, { "epoch": 0.9468486784780714, "grad_norm": 0.47337889671325684, "learning_rate": 0.0001, "loss": 1.6667, "step": 8150 }, { "epoch": 0.9469648562300319, "grad_norm": 0.45876795053482056, "learning_rate": 0.0001, "loss": 1.4361, "step": 8151 }, { "epoch": 0.9470810339819925, "grad_norm": 0.5049482583999634, "learning_rate": 0.0001, "loss": 1.7341, "step": 8152 }, { "epoch": 0.947197211733953, "grad_norm": 0.4585045874118805, "learning_rate": 0.0001, "loss": 1.6708, "step": 8153 }, { "epoch": 0.9473133894859135, "grad_norm": 0.48491230607032776, "learning_rate": 0.0001, "loss": 1.6093, "step": 8154 }, { "epoch": 0.947429567237874, "grad_norm": 0.48654916882514954, "learning_rate": 0.0001, "loss": 1.7573, "step": 8155 }, { "epoch": 0.9475457449898345, "grad_norm": 0.4684811234474182, "learning_rate": 0.0001, "loss": 1.5297, "step": 8156 }, { "epoch": 0.947661922741795, "grad_norm": 0.4712132513523102, "learning_rate": 0.0001, "loss": 1.6316, "step": 8157 }, { "epoch": 0.9477781004937554, "grad_norm": 0.4901881217956543, "learning_rate": 0.0001, "loss": 1.8043, "step": 8158 }, { "epoch": 0.9478942782457159, "grad_norm": 0.4755280017852783, "learning_rate": 0.0001, "loss": 1.6087, "step": 8159 }, { "epoch": 0.9480104559976764, "grad_norm": 0.5146501064300537, "learning_rate": 0.0001, "loss": 1.5001, "step": 8160 }, { "epoch": 0.9481266337496369, "grad_norm": 0.4786891043186188, "learning_rate": 0.0001, "loss": 1.6811, "step": 8161 }, { "epoch": 0.9482428115015974, "grad_norm": 0.44507408142089844, "learning_rate": 0.0001, "loss": 1.5259, "step": 8162 }, { "epoch": 0.948358989253558, "grad_norm": 0.4719574749469757, "learning_rate": 0.0001, "loss": 1.6408, "step": 8163 }, { "epoch": 0.9484751670055185, "grad_norm": 0.48670482635498047, "learning_rate": 0.0001, "loss": 1.5036, "step": 8164 }, { "epoch": 0.948591344757479, "grad_norm": 0.4823938012123108, "learning_rate": 0.0001, "loss": 1.5271, "step": 8165 }, { "epoch": 0.9487075225094395, "grad_norm": 0.4881124794483185, "learning_rate": 0.0001, "loss": 1.607, "step": 8166 }, { "epoch": 0.9488237002614, "grad_norm": 0.4872508943080902, "learning_rate": 0.0001, "loss": 1.565, "step": 8167 }, { "epoch": 0.9489398780133604, "grad_norm": 0.5031945109367371, "learning_rate": 0.0001, "loss": 1.6907, "step": 8168 }, { "epoch": 0.9490560557653209, "grad_norm": 0.5044823884963989, "learning_rate": 0.0001, "loss": 1.6954, "step": 8169 }, { "epoch": 0.9491722335172814, "grad_norm": 0.48246756196022034, "learning_rate": 0.0001, "loss": 1.6214, "step": 8170 }, { "epoch": 0.9492884112692419, "grad_norm": 0.4963812232017517, "learning_rate": 0.0001, "loss": 1.531, "step": 8171 }, { "epoch": 0.9494045890212024, "grad_norm": 0.4816388785839081, "learning_rate": 0.0001, "loss": 1.5913, "step": 8172 }, { "epoch": 0.949520766773163, "grad_norm": 0.4748592674732208, "learning_rate": 0.0001, "loss": 1.4296, "step": 8173 }, { "epoch": 0.9496369445251235, "grad_norm": 0.46375009417533875, "learning_rate": 0.0001, "loss": 1.4255, "step": 8174 }, { "epoch": 0.949753122277084, "grad_norm": 0.45289376378059387, "learning_rate": 0.0001, "loss": 1.558, "step": 8175 }, { "epoch": 0.9498693000290445, "grad_norm": 0.48634836077690125, "learning_rate": 0.0001, "loss": 1.6393, "step": 8176 }, { "epoch": 0.949985477781005, "grad_norm": 0.49181076884269714, "learning_rate": 0.0001, "loss": 1.6262, "step": 8177 }, { "epoch": 0.9501016555329654, "grad_norm": 0.45793280005455017, "learning_rate": 0.0001, "loss": 1.4994, "step": 8178 }, { "epoch": 0.9502178332849259, "grad_norm": 0.4974653720855713, "learning_rate": 0.0001, "loss": 1.6517, "step": 8179 }, { "epoch": 0.9503340110368864, "grad_norm": 0.4716089367866516, "learning_rate": 0.0001, "loss": 1.5849, "step": 8180 }, { "epoch": 0.9504501887888469, "grad_norm": 0.45210596919059753, "learning_rate": 0.0001, "loss": 1.4941, "step": 8181 }, { "epoch": 0.9505663665408074, "grad_norm": 0.5031945705413818, "learning_rate": 0.0001, "loss": 1.6215, "step": 8182 }, { "epoch": 0.9506825442927679, "grad_norm": 0.46104729175567627, "learning_rate": 0.0001, "loss": 1.5813, "step": 8183 }, { "epoch": 0.9507987220447285, "grad_norm": 0.45279985666275024, "learning_rate": 0.0001, "loss": 1.6108, "step": 8184 }, { "epoch": 0.950914899796689, "grad_norm": 0.618247389793396, "learning_rate": 0.0001, "loss": 1.635, "step": 8185 }, { "epoch": 0.9510310775486495, "grad_norm": 0.47192826867103577, "learning_rate": 0.0001, "loss": 1.4801, "step": 8186 }, { "epoch": 0.95114725530061, "grad_norm": 0.46734699606895447, "learning_rate": 0.0001, "loss": 1.567, "step": 8187 }, { "epoch": 0.9512634330525704, "grad_norm": 0.46649816632270813, "learning_rate": 0.0001, "loss": 1.3187, "step": 8188 }, { "epoch": 0.9513796108045309, "grad_norm": 0.5192746520042419, "learning_rate": 0.0001, "loss": 1.5849, "step": 8189 }, { "epoch": 0.9514957885564914, "grad_norm": 0.476370632648468, "learning_rate": 0.0001, "loss": 1.568, "step": 8190 }, { "epoch": 0.9516119663084519, "grad_norm": 0.49778425693511963, "learning_rate": 0.0001, "loss": 1.6588, "step": 8191 }, { "epoch": 0.9517281440604124, "grad_norm": 0.4732555150985718, "learning_rate": 0.0001, "loss": 1.5167, "step": 8192 }, { "epoch": 0.9518443218123729, "grad_norm": 0.47929319739341736, "learning_rate": 0.0001, "loss": 1.646, "step": 8193 }, { "epoch": 0.9519604995643335, "grad_norm": 0.44977056980133057, "learning_rate": 0.0001, "loss": 1.4695, "step": 8194 }, { "epoch": 0.952076677316294, "grad_norm": 0.5259702205657959, "learning_rate": 0.0001, "loss": 1.7958, "step": 8195 }, { "epoch": 0.9521928550682545, "grad_norm": 0.4790816307067871, "learning_rate": 0.0001, "loss": 1.4841, "step": 8196 }, { "epoch": 0.952309032820215, "grad_norm": 0.48329922556877136, "learning_rate": 0.0001, "loss": 1.6321, "step": 8197 }, { "epoch": 0.9524252105721754, "grad_norm": 0.451102077960968, "learning_rate": 0.0001, "loss": 1.4469, "step": 8198 }, { "epoch": 0.9525413883241359, "grad_norm": 0.4747765064239502, "learning_rate": 0.0001, "loss": 1.6745, "step": 8199 }, { "epoch": 0.9526575660760964, "grad_norm": 0.5516743063926697, "learning_rate": 0.0001, "loss": 1.7372, "step": 8200 }, { "epoch": 0.9527737438280569, "grad_norm": 0.4885726869106293, "learning_rate": 0.0001, "loss": 1.5667, "step": 8201 }, { "epoch": 0.9528899215800174, "grad_norm": 0.4775313436985016, "learning_rate": 0.0001, "loss": 1.5917, "step": 8202 }, { "epoch": 0.9530060993319779, "grad_norm": 0.4628119170665741, "learning_rate": 0.0001, "loss": 1.4747, "step": 8203 }, { "epoch": 0.9531222770839384, "grad_norm": 0.4696086049079895, "learning_rate": 0.0001, "loss": 1.6371, "step": 8204 }, { "epoch": 0.953238454835899, "grad_norm": 0.4801238179206848, "learning_rate": 0.0001, "loss": 1.6711, "step": 8205 }, { "epoch": 0.9533546325878595, "grad_norm": 0.46630024909973145, "learning_rate": 0.0001, "loss": 1.6602, "step": 8206 }, { "epoch": 0.95347081033982, "grad_norm": 0.5154693722724915, "learning_rate": 0.0001, "loss": 1.7074, "step": 8207 }, { "epoch": 0.9535869880917804, "grad_norm": 0.456986665725708, "learning_rate": 0.0001, "loss": 1.4658, "step": 8208 }, { "epoch": 0.9537031658437409, "grad_norm": 0.4490494430065155, "learning_rate": 0.0001, "loss": 1.4067, "step": 8209 }, { "epoch": 0.9538193435957014, "grad_norm": 0.4910639226436615, "learning_rate": 0.0001, "loss": 1.6069, "step": 8210 }, { "epoch": 0.9539355213476619, "grad_norm": 0.5007901787757874, "learning_rate": 0.0001, "loss": 1.6315, "step": 8211 }, { "epoch": 0.9540516990996224, "grad_norm": 0.45628076791763306, "learning_rate": 0.0001, "loss": 1.5923, "step": 8212 }, { "epoch": 0.9541678768515829, "grad_norm": 0.48491212725639343, "learning_rate": 0.0001, "loss": 1.605, "step": 8213 }, { "epoch": 0.9542840546035434, "grad_norm": 0.48505932092666626, "learning_rate": 0.0001, "loss": 1.4959, "step": 8214 }, { "epoch": 0.954400232355504, "grad_norm": 0.5037823915481567, "learning_rate": 0.0001, "loss": 1.4856, "step": 8215 }, { "epoch": 0.9545164101074645, "grad_norm": 0.5135097503662109, "learning_rate": 0.0001, "loss": 1.7156, "step": 8216 }, { "epoch": 0.954632587859425, "grad_norm": 0.48048651218414307, "learning_rate": 0.0001, "loss": 1.7173, "step": 8217 }, { "epoch": 0.9547487656113854, "grad_norm": 0.48271870613098145, "learning_rate": 0.0001, "loss": 1.5344, "step": 8218 }, { "epoch": 0.9548649433633459, "grad_norm": 0.48280853033065796, "learning_rate": 0.0001, "loss": 1.7241, "step": 8219 }, { "epoch": 0.9549811211153064, "grad_norm": 0.4850626289844513, "learning_rate": 0.0001, "loss": 1.6221, "step": 8220 }, { "epoch": 0.9550972988672669, "grad_norm": 0.48917168378829956, "learning_rate": 0.0001, "loss": 1.6632, "step": 8221 }, { "epoch": 0.9552134766192274, "grad_norm": 0.4904051423072815, "learning_rate": 0.0001, "loss": 1.627, "step": 8222 }, { "epoch": 0.9553296543711879, "grad_norm": 0.47541365027427673, "learning_rate": 0.0001, "loss": 1.5431, "step": 8223 }, { "epoch": 0.9554458321231484, "grad_norm": 0.49688780307769775, "learning_rate": 0.0001, "loss": 1.7412, "step": 8224 }, { "epoch": 0.9555620098751089, "grad_norm": 0.5080480575561523, "learning_rate": 0.0001, "loss": 1.6331, "step": 8225 }, { "epoch": 0.9556781876270695, "grad_norm": 0.48701146245002747, "learning_rate": 0.0001, "loss": 1.5155, "step": 8226 }, { "epoch": 0.95579436537903, "grad_norm": 0.4538536071777344, "learning_rate": 0.0001, "loss": 1.5677, "step": 8227 }, { "epoch": 0.9559105431309904, "grad_norm": 0.47831106185913086, "learning_rate": 0.0001, "loss": 1.5717, "step": 8228 }, { "epoch": 0.9560267208829509, "grad_norm": 0.4827806055545807, "learning_rate": 0.0001, "loss": 1.5685, "step": 8229 }, { "epoch": 0.9561428986349114, "grad_norm": 0.4946243166923523, "learning_rate": 0.0001, "loss": 1.6507, "step": 8230 }, { "epoch": 0.9562590763868719, "grad_norm": 0.5115143060684204, "learning_rate": 0.0001, "loss": 1.6444, "step": 8231 }, { "epoch": 0.9563752541388324, "grad_norm": 0.4955557584762573, "learning_rate": 0.0001, "loss": 1.6565, "step": 8232 }, { "epoch": 0.9564914318907929, "grad_norm": 0.45829346776008606, "learning_rate": 0.0001, "loss": 1.4938, "step": 8233 }, { "epoch": 0.9566076096427534, "grad_norm": 0.4455798864364624, "learning_rate": 0.0001, "loss": 1.5516, "step": 8234 }, { "epoch": 0.9567237873947139, "grad_norm": 0.48835891485214233, "learning_rate": 0.0001, "loss": 1.6687, "step": 8235 }, { "epoch": 0.9568399651466745, "grad_norm": 0.46689534187316895, "learning_rate": 0.0001, "loss": 1.6003, "step": 8236 }, { "epoch": 0.956956142898635, "grad_norm": 0.5193210244178772, "learning_rate": 0.0001, "loss": 1.4655, "step": 8237 }, { "epoch": 0.9570723206505954, "grad_norm": 0.46074503660202026, "learning_rate": 0.0001, "loss": 1.4701, "step": 8238 }, { "epoch": 0.9571884984025559, "grad_norm": 0.48494353890419006, "learning_rate": 0.0001, "loss": 1.5749, "step": 8239 }, { "epoch": 0.9573046761545164, "grad_norm": 0.45777463912963867, "learning_rate": 0.0001, "loss": 1.5426, "step": 8240 }, { "epoch": 0.9574208539064769, "grad_norm": 0.49893805384635925, "learning_rate": 0.0001, "loss": 1.6708, "step": 8241 }, { "epoch": 0.9575370316584374, "grad_norm": 0.500220537185669, "learning_rate": 0.0001, "loss": 1.6171, "step": 8242 }, { "epoch": 0.9576532094103979, "grad_norm": 0.4701831638813019, "learning_rate": 0.0001, "loss": 1.542, "step": 8243 }, { "epoch": 0.9577693871623584, "grad_norm": 0.465260773897171, "learning_rate": 0.0001, "loss": 1.5155, "step": 8244 }, { "epoch": 0.9578855649143189, "grad_norm": 0.4423646628856659, "learning_rate": 0.0001, "loss": 1.5118, "step": 8245 }, { "epoch": 0.9580017426662794, "grad_norm": 0.49591514468193054, "learning_rate": 0.0001, "loss": 1.6665, "step": 8246 }, { "epoch": 0.95811792041824, "grad_norm": 0.4624507427215576, "learning_rate": 0.0001, "loss": 1.5759, "step": 8247 }, { "epoch": 0.9582340981702004, "grad_norm": 0.4537637531757355, "learning_rate": 0.0001, "loss": 1.3326, "step": 8248 }, { "epoch": 0.9583502759221609, "grad_norm": 0.45610177516937256, "learning_rate": 0.0001, "loss": 1.5874, "step": 8249 }, { "epoch": 0.9584664536741214, "grad_norm": 0.5058176517486572, "learning_rate": 0.0001, "loss": 1.5944, "step": 8250 }, { "epoch": 0.9585826314260819, "grad_norm": 0.47740715742111206, "learning_rate": 0.0001, "loss": 1.5308, "step": 8251 }, { "epoch": 0.9586988091780424, "grad_norm": 0.5462493300437927, "learning_rate": 0.0001, "loss": 1.643, "step": 8252 }, { "epoch": 0.9588149869300029, "grad_norm": 0.4755356013774872, "learning_rate": 0.0001, "loss": 1.4913, "step": 8253 }, { "epoch": 0.9589311646819634, "grad_norm": 0.4820975661277771, "learning_rate": 0.0001, "loss": 1.534, "step": 8254 }, { "epoch": 0.9590473424339239, "grad_norm": 0.4660854637622833, "learning_rate": 0.0001, "loss": 1.5667, "step": 8255 }, { "epoch": 0.9591635201858844, "grad_norm": 0.5366430282592773, "learning_rate": 0.0001, "loss": 1.6443, "step": 8256 }, { "epoch": 0.959279697937845, "grad_norm": 0.5088802576065063, "learning_rate": 0.0001, "loss": 1.6137, "step": 8257 }, { "epoch": 0.9593958756898054, "grad_norm": 0.5079680681228638, "learning_rate": 0.0001, "loss": 1.753, "step": 8258 }, { "epoch": 0.9595120534417659, "grad_norm": 0.4622834622859955, "learning_rate": 0.0001, "loss": 1.5732, "step": 8259 }, { "epoch": 0.9596282311937264, "grad_norm": 0.5069734454154968, "learning_rate": 0.0001, "loss": 1.6656, "step": 8260 }, { "epoch": 0.9597444089456869, "grad_norm": 0.493930459022522, "learning_rate": 0.0001, "loss": 1.4827, "step": 8261 }, { "epoch": 0.9598605866976474, "grad_norm": 0.5060780644416809, "learning_rate": 0.0001, "loss": 1.4897, "step": 8262 }, { "epoch": 0.9599767644496079, "grad_norm": 0.48548170924186707, "learning_rate": 0.0001, "loss": 1.6126, "step": 8263 }, { "epoch": 0.9600929422015684, "grad_norm": 0.45788517594337463, "learning_rate": 0.0001, "loss": 1.505, "step": 8264 }, { "epoch": 0.9602091199535289, "grad_norm": 0.46271592378616333, "learning_rate": 0.0001, "loss": 1.7345, "step": 8265 }, { "epoch": 0.9603252977054894, "grad_norm": 0.48142072558403015, "learning_rate": 0.0001, "loss": 1.627, "step": 8266 }, { "epoch": 0.9604414754574498, "grad_norm": 0.47520777583122253, "learning_rate": 0.0001, "loss": 1.6576, "step": 8267 }, { "epoch": 0.9605576532094104, "grad_norm": 0.4597654938697815, "learning_rate": 0.0001, "loss": 1.5501, "step": 8268 }, { "epoch": 0.9606738309613709, "grad_norm": 0.4608597159385681, "learning_rate": 0.0001, "loss": 1.511, "step": 8269 }, { "epoch": 0.9607900087133314, "grad_norm": 0.4679330289363861, "learning_rate": 0.0001, "loss": 1.4058, "step": 8270 }, { "epoch": 0.9609061864652919, "grad_norm": 0.49732252955436707, "learning_rate": 0.0001, "loss": 1.5015, "step": 8271 }, { "epoch": 0.9610223642172524, "grad_norm": 0.4775616526603699, "learning_rate": 0.0001, "loss": 1.5531, "step": 8272 }, { "epoch": 0.9611385419692129, "grad_norm": 0.5359445214271545, "learning_rate": 0.0001, "loss": 1.58, "step": 8273 }, { "epoch": 0.9612547197211734, "grad_norm": 0.4927753508090973, "learning_rate": 0.0001, "loss": 1.6331, "step": 8274 }, { "epoch": 0.9613708974731339, "grad_norm": 0.48681047558784485, "learning_rate": 0.0001, "loss": 1.6162, "step": 8275 }, { "epoch": 0.9614870752250944, "grad_norm": 0.49308332800865173, "learning_rate": 0.0001, "loss": 1.563, "step": 8276 }, { "epoch": 0.9616032529770548, "grad_norm": 0.48825904726982117, "learning_rate": 0.0001, "loss": 1.6006, "step": 8277 }, { "epoch": 0.9617194307290154, "grad_norm": 0.5138652324676514, "learning_rate": 0.0001, "loss": 1.6027, "step": 8278 }, { "epoch": 0.9618356084809759, "grad_norm": 0.5201594233512878, "learning_rate": 0.0001, "loss": 1.6361, "step": 8279 }, { "epoch": 0.9619517862329364, "grad_norm": 0.5101884603500366, "learning_rate": 0.0001, "loss": 1.7062, "step": 8280 }, { "epoch": 0.9620679639848969, "grad_norm": 0.45568764209747314, "learning_rate": 0.0001, "loss": 1.5697, "step": 8281 }, { "epoch": 0.9621841417368574, "grad_norm": 0.46922385692596436, "learning_rate": 0.0001, "loss": 1.5052, "step": 8282 }, { "epoch": 0.9623003194888179, "grad_norm": 0.47019103169441223, "learning_rate": 0.0001, "loss": 1.6064, "step": 8283 }, { "epoch": 0.9624164972407784, "grad_norm": 0.5026654601097107, "learning_rate": 0.0001, "loss": 1.6182, "step": 8284 }, { "epoch": 0.9625326749927389, "grad_norm": 0.5346139669418335, "learning_rate": 0.0001, "loss": 1.6782, "step": 8285 }, { "epoch": 0.9626488527446994, "grad_norm": 0.4690621495246887, "learning_rate": 0.0001, "loss": 1.5121, "step": 8286 }, { "epoch": 0.9627650304966598, "grad_norm": 0.5523574948310852, "learning_rate": 0.0001, "loss": 1.8614, "step": 8287 }, { "epoch": 0.9628812082486203, "grad_norm": 0.5208613872528076, "learning_rate": 0.0001, "loss": 1.69, "step": 8288 }, { "epoch": 0.9629973860005809, "grad_norm": 0.5181357264518738, "learning_rate": 0.0001, "loss": 1.6707, "step": 8289 }, { "epoch": 0.9631135637525414, "grad_norm": 0.49998828768730164, "learning_rate": 0.0001, "loss": 1.57, "step": 8290 }, { "epoch": 0.9632297415045019, "grad_norm": 0.47236523032188416, "learning_rate": 0.0001, "loss": 1.4172, "step": 8291 }, { "epoch": 0.9633459192564624, "grad_norm": 0.49933215975761414, "learning_rate": 0.0001, "loss": 1.6872, "step": 8292 }, { "epoch": 0.9634620970084229, "grad_norm": 0.48672953248023987, "learning_rate": 0.0001, "loss": 1.5394, "step": 8293 }, { "epoch": 0.9635782747603834, "grad_norm": 0.45596054196357727, "learning_rate": 0.0001, "loss": 1.4982, "step": 8294 }, { "epoch": 0.9636944525123439, "grad_norm": 0.47312960028648376, "learning_rate": 0.0001, "loss": 1.505, "step": 8295 }, { "epoch": 0.9638106302643044, "grad_norm": 0.5012199282646179, "learning_rate": 0.0001, "loss": 1.499, "step": 8296 }, { "epoch": 0.9639268080162648, "grad_norm": 0.516236424446106, "learning_rate": 0.0001, "loss": 1.6011, "step": 8297 }, { "epoch": 0.9640429857682253, "grad_norm": 0.44847118854522705, "learning_rate": 0.0001, "loss": 1.5113, "step": 8298 }, { "epoch": 0.9641591635201859, "grad_norm": 0.4940093457698822, "learning_rate": 0.0001, "loss": 1.5627, "step": 8299 }, { "epoch": 0.9642753412721464, "grad_norm": 0.4744294285774231, "learning_rate": 0.0001, "loss": 1.5101, "step": 8300 }, { "epoch": 0.9643915190241069, "grad_norm": 0.4880334436893463, "learning_rate": 0.0001, "loss": 1.5232, "step": 8301 }, { "epoch": 0.9645076967760674, "grad_norm": 0.49763065576553345, "learning_rate": 0.0001, "loss": 1.6985, "step": 8302 }, { "epoch": 0.9646238745280279, "grad_norm": 0.49351394176483154, "learning_rate": 0.0001, "loss": 1.7411, "step": 8303 }, { "epoch": 0.9647400522799884, "grad_norm": 0.4660297930240631, "learning_rate": 0.0001, "loss": 1.5433, "step": 8304 }, { "epoch": 0.9648562300319489, "grad_norm": 0.4791782796382904, "learning_rate": 0.0001, "loss": 1.6004, "step": 8305 }, { "epoch": 0.9649724077839094, "grad_norm": 0.49400556087493896, "learning_rate": 0.0001, "loss": 1.5253, "step": 8306 }, { "epoch": 0.9650885855358698, "grad_norm": 0.4803016483783722, "learning_rate": 0.0001, "loss": 1.5192, "step": 8307 }, { "epoch": 0.9652047632878303, "grad_norm": 0.49062761664390564, "learning_rate": 0.0001, "loss": 1.6898, "step": 8308 }, { "epoch": 0.9653209410397908, "grad_norm": 0.5109800696372986, "learning_rate": 0.0001, "loss": 1.631, "step": 8309 }, { "epoch": 0.9654371187917514, "grad_norm": 0.503459632396698, "learning_rate": 0.0001, "loss": 1.6486, "step": 8310 }, { "epoch": 0.9655532965437119, "grad_norm": 0.47792255878448486, "learning_rate": 0.0001, "loss": 1.5972, "step": 8311 }, { "epoch": 0.9656694742956724, "grad_norm": 0.4999959468841553, "learning_rate": 0.0001, "loss": 1.6538, "step": 8312 }, { "epoch": 0.9657856520476329, "grad_norm": 0.49376481771469116, "learning_rate": 0.0001, "loss": 1.6086, "step": 8313 }, { "epoch": 0.9659018297995934, "grad_norm": 0.4859766662120819, "learning_rate": 0.0001, "loss": 1.5698, "step": 8314 }, { "epoch": 0.9660180075515539, "grad_norm": 0.4656546413898468, "learning_rate": 0.0001, "loss": 1.5478, "step": 8315 }, { "epoch": 0.9661341853035144, "grad_norm": 0.48763415217399597, "learning_rate": 0.0001, "loss": 1.5704, "step": 8316 }, { "epoch": 0.9662503630554748, "grad_norm": 0.479206919670105, "learning_rate": 0.0001, "loss": 1.5964, "step": 8317 }, { "epoch": 0.9663665408074353, "grad_norm": 0.48830246925354004, "learning_rate": 0.0001, "loss": 1.6451, "step": 8318 }, { "epoch": 0.9664827185593958, "grad_norm": 0.49852919578552246, "learning_rate": 0.0001, "loss": 1.5722, "step": 8319 }, { "epoch": 0.9665988963113564, "grad_norm": 0.45353084802627563, "learning_rate": 0.0001, "loss": 1.4958, "step": 8320 }, { "epoch": 0.9667150740633169, "grad_norm": 0.49439936876296997, "learning_rate": 0.0001, "loss": 1.6234, "step": 8321 }, { "epoch": 0.9668312518152774, "grad_norm": 0.48684781789779663, "learning_rate": 0.0001, "loss": 1.5654, "step": 8322 }, { "epoch": 0.9669474295672379, "grad_norm": 0.47375577688217163, "learning_rate": 0.0001, "loss": 1.5868, "step": 8323 }, { "epoch": 0.9670636073191984, "grad_norm": 0.45029014348983765, "learning_rate": 0.0001, "loss": 1.5204, "step": 8324 }, { "epoch": 0.9671797850711589, "grad_norm": 0.49063849449157715, "learning_rate": 0.0001, "loss": 1.6855, "step": 8325 }, { "epoch": 0.9672959628231194, "grad_norm": 0.5382382273674011, "learning_rate": 0.0001, "loss": 1.6313, "step": 8326 }, { "epoch": 0.9674121405750798, "grad_norm": 0.49472776055336, "learning_rate": 0.0001, "loss": 1.5912, "step": 8327 }, { "epoch": 0.9675283183270403, "grad_norm": 0.4821051359176636, "learning_rate": 0.0001, "loss": 1.6096, "step": 8328 }, { "epoch": 0.9676444960790008, "grad_norm": 0.47823530435562134, "learning_rate": 0.0001, "loss": 1.612, "step": 8329 }, { "epoch": 0.9677606738309613, "grad_norm": 0.4891481399536133, "learning_rate": 0.0001, "loss": 1.5234, "step": 8330 }, { "epoch": 0.9678768515829219, "grad_norm": 0.49164843559265137, "learning_rate": 0.0001, "loss": 1.6268, "step": 8331 }, { "epoch": 0.9679930293348824, "grad_norm": 0.473734974861145, "learning_rate": 0.0001, "loss": 1.5407, "step": 8332 }, { "epoch": 0.9681092070868429, "grad_norm": 0.47365325689315796, "learning_rate": 0.0001, "loss": 1.4787, "step": 8333 }, { "epoch": 0.9682253848388034, "grad_norm": 0.4796622395515442, "learning_rate": 0.0001, "loss": 1.4766, "step": 8334 }, { "epoch": 0.9683415625907639, "grad_norm": 0.5099849700927734, "learning_rate": 0.0001, "loss": 1.5258, "step": 8335 }, { "epoch": 0.9684577403427244, "grad_norm": 0.4894762337207794, "learning_rate": 0.0001, "loss": 1.5813, "step": 8336 }, { "epoch": 0.9685739180946848, "grad_norm": 0.5052227973937988, "learning_rate": 0.0001, "loss": 1.633, "step": 8337 }, { "epoch": 0.9686900958466453, "grad_norm": 0.5599405169487, "learning_rate": 0.0001, "loss": 1.7586, "step": 8338 }, { "epoch": 0.9688062735986058, "grad_norm": 0.5493488907814026, "learning_rate": 0.0001, "loss": 1.5712, "step": 8339 }, { "epoch": 0.9689224513505663, "grad_norm": 0.47974830865859985, "learning_rate": 0.0001, "loss": 1.6222, "step": 8340 }, { "epoch": 0.9690386291025269, "grad_norm": 0.46246853470802307, "learning_rate": 0.0001, "loss": 1.6754, "step": 8341 }, { "epoch": 0.9691548068544874, "grad_norm": 0.5237904787063599, "learning_rate": 0.0001, "loss": 1.7271, "step": 8342 }, { "epoch": 0.9692709846064479, "grad_norm": 0.49900978803634644, "learning_rate": 0.0001, "loss": 1.6926, "step": 8343 }, { "epoch": 0.9693871623584084, "grad_norm": 0.45971980690956116, "learning_rate": 0.0001, "loss": 1.4754, "step": 8344 }, { "epoch": 0.9695033401103689, "grad_norm": 0.46642741560935974, "learning_rate": 0.0001, "loss": 1.5411, "step": 8345 }, { "epoch": 0.9696195178623294, "grad_norm": 0.4861880838871002, "learning_rate": 0.0001, "loss": 1.6039, "step": 8346 }, { "epoch": 0.9697356956142898, "grad_norm": 0.49903160333633423, "learning_rate": 0.0001, "loss": 1.6349, "step": 8347 }, { "epoch": 0.9698518733662503, "grad_norm": 0.47082969546318054, "learning_rate": 0.0001, "loss": 1.601, "step": 8348 }, { "epoch": 0.9699680511182108, "grad_norm": 0.48401105403900146, "learning_rate": 0.0001, "loss": 1.5224, "step": 8349 }, { "epoch": 0.9700842288701713, "grad_norm": 0.4795251190662384, "learning_rate": 0.0001, "loss": 1.6493, "step": 8350 }, { "epoch": 0.9702004066221319, "grad_norm": 0.4561566114425659, "learning_rate": 0.0001, "loss": 1.4822, "step": 8351 }, { "epoch": 0.9703165843740924, "grad_norm": 0.48859137296676636, "learning_rate": 0.0001, "loss": 1.5395, "step": 8352 }, { "epoch": 0.9704327621260529, "grad_norm": 0.447553426027298, "learning_rate": 0.0001, "loss": 1.4135, "step": 8353 }, { "epoch": 0.9705489398780134, "grad_norm": 0.4990467429161072, "learning_rate": 0.0001, "loss": 1.561, "step": 8354 }, { "epoch": 0.9706651176299739, "grad_norm": 0.5028551816940308, "learning_rate": 0.0001, "loss": 1.7281, "step": 8355 }, { "epoch": 0.9707812953819344, "grad_norm": 0.49683597683906555, "learning_rate": 0.0001, "loss": 1.5454, "step": 8356 }, { "epoch": 0.9708974731338949, "grad_norm": 0.47198501229286194, "learning_rate": 0.0001, "loss": 1.6287, "step": 8357 }, { "epoch": 0.9710136508858553, "grad_norm": 0.5202149152755737, "learning_rate": 0.0001, "loss": 1.6653, "step": 8358 }, { "epoch": 0.9711298286378158, "grad_norm": 0.505222737789154, "learning_rate": 0.0001, "loss": 1.687, "step": 8359 }, { "epoch": 0.9712460063897763, "grad_norm": 0.5158810615539551, "learning_rate": 0.0001, "loss": 1.6364, "step": 8360 }, { "epoch": 0.9713621841417368, "grad_norm": 0.5185474157333374, "learning_rate": 0.0001, "loss": 1.8292, "step": 8361 }, { "epoch": 0.9714783618936974, "grad_norm": 0.479807049036026, "learning_rate": 0.0001, "loss": 1.5451, "step": 8362 }, { "epoch": 0.9715945396456579, "grad_norm": 0.47190988063812256, "learning_rate": 0.0001, "loss": 1.6801, "step": 8363 }, { "epoch": 0.9717107173976184, "grad_norm": 0.47049620747566223, "learning_rate": 0.0001, "loss": 1.5388, "step": 8364 }, { "epoch": 0.9718268951495789, "grad_norm": 0.49020835757255554, "learning_rate": 0.0001, "loss": 1.6445, "step": 8365 }, { "epoch": 0.9719430729015394, "grad_norm": 0.49755486845970154, "learning_rate": 0.0001, "loss": 1.5634, "step": 8366 }, { "epoch": 0.9720592506534999, "grad_norm": 0.4959685504436493, "learning_rate": 0.0001, "loss": 1.6562, "step": 8367 }, { "epoch": 0.9721754284054603, "grad_norm": 0.5201342701911926, "learning_rate": 0.0001, "loss": 1.5855, "step": 8368 }, { "epoch": 0.9722916061574208, "grad_norm": 0.4995202422142029, "learning_rate": 0.0001, "loss": 1.5136, "step": 8369 }, { "epoch": 0.9724077839093813, "grad_norm": 0.4514632225036621, "learning_rate": 0.0001, "loss": 1.3639, "step": 8370 }, { "epoch": 0.9725239616613418, "grad_norm": 0.46928879618644714, "learning_rate": 0.0001, "loss": 1.4085, "step": 8371 }, { "epoch": 0.9726401394133024, "grad_norm": 0.4802158772945404, "learning_rate": 0.0001, "loss": 1.5368, "step": 8372 }, { "epoch": 0.9727563171652629, "grad_norm": 0.4651092290878296, "learning_rate": 0.0001, "loss": 1.5232, "step": 8373 }, { "epoch": 0.9728724949172234, "grad_norm": 0.46335577964782715, "learning_rate": 0.0001, "loss": 1.4136, "step": 8374 }, { "epoch": 0.9729886726691839, "grad_norm": 0.48499464988708496, "learning_rate": 0.0001, "loss": 1.7017, "step": 8375 }, { "epoch": 0.9731048504211444, "grad_norm": 0.4987207055091858, "learning_rate": 0.0001, "loss": 1.6231, "step": 8376 }, { "epoch": 0.9732210281731049, "grad_norm": 0.505811333656311, "learning_rate": 0.0001, "loss": 1.7212, "step": 8377 }, { "epoch": 0.9733372059250653, "grad_norm": 0.49306151270866394, "learning_rate": 0.0001, "loss": 1.6688, "step": 8378 }, { "epoch": 0.9734533836770258, "grad_norm": 0.4923427700996399, "learning_rate": 0.0001, "loss": 1.6344, "step": 8379 }, { "epoch": 0.9735695614289863, "grad_norm": 0.4896833002567291, "learning_rate": 0.0001, "loss": 1.5661, "step": 8380 }, { "epoch": 0.9736857391809468, "grad_norm": 0.52273029088974, "learning_rate": 0.0001, "loss": 1.5139, "step": 8381 }, { "epoch": 0.9738019169329073, "grad_norm": 0.48786941170692444, "learning_rate": 0.0001, "loss": 1.4738, "step": 8382 }, { "epoch": 0.9739180946848679, "grad_norm": 0.49467676877975464, "learning_rate": 0.0001, "loss": 1.7052, "step": 8383 }, { "epoch": 0.9740342724368284, "grad_norm": 0.49212512373924255, "learning_rate": 0.0001, "loss": 1.6339, "step": 8384 }, { "epoch": 0.9741504501887889, "grad_norm": 0.5036439299583435, "learning_rate": 0.0001, "loss": 1.8137, "step": 8385 }, { "epoch": 0.9742666279407494, "grad_norm": 0.5129532814025879, "learning_rate": 0.0001, "loss": 1.7383, "step": 8386 }, { "epoch": 0.9743828056927099, "grad_norm": 0.5009684562683105, "learning_rate": 0.0001, "loss": 1.6897, "step": 8387 }, { "epoch": 0.9744989834446703, "grad_norm": 0.47049927711486816, "learning_rate": 0.0001, "loss": 1.5429, "step": 8388 }, { "epoch": 0.9746151611966308, "grad_norm": 0.47478753328323364, "learning_rate": 0.0001, "loss": 1.6045, "step": 8389 }, { "epoch": 0.9747313389485913, "grad_norm": 0.46627920866012573, "learning_rate": 0.0001, "loss": 1.5066, "step": 8390 }, { "epoch": 0.9748475167005518, "grad_norm": 0.47100529074668884, "learning_rate": 0.0001, "loss": 1.5564, "step": 8391 }, { "epoch": 0.9749636944525123, "grad_norm": 0.4588838815689087, "learning_rate": 0.0001, "loss": 1.5868, "step": 8392 }, { "epoch": 0.9750798722044729, "grad_norm": 0.5259383916854858, "learning_rate": 0.0001, "loss": 1.6828, "step": 8393 }, { "epoch": 0.9751960499564334, "grad_norm": 0.46649569272994995, "learning_rate": 0.0001, "loss": 1.5351, "step": 8394 }, { "epoch": 0.9753122277083939, "grad_norm": 0.48280128836631775, "learning_rate": 0.0001, "loss": 1.8095, "step": 8395 }, { "epoch": 0.9754284054603544, "grad_norm": 0.510216474533081, "learning_rate": 0.0001, "loss": 1.5507, "step": 8396 }, { "epoch": 0.9755445832123149, "grad_norm": 0.48745256662368774, "learning_rate": 0.0001, "loss": 1.6381, "step": 8397 }, { "epoch": 0.9756607609642753, "grad_norm": 0.5059899091720581, "learning_rate": 0.0001, "loss": 1.6724, "step": 8398 }, { "epoch": 0.9757769387162358, "grad_norm": 0.4656660854816437, "learning_rate": 0.0001, "loss": 1.6712, "step": 8399 }, { "epoch": 0.9758931164681963, "grad_norm": 0.5184774398803711, "learning_rate": 0.0001, "loss": 1.8107, "step": 8400 }, { "epoch": 0.9760092942201568, "grad_norm": 0.4714641273021698, "learning_rate": 0.0001, "loss": 1.4841, "step": 8401 }, { "epoch": 0.9761254719721173, "grad_norm": 0.4753645062446594, "learning_rate": 0.0001, "loss": 1.7, "step": 8402 }, { "epoch": 0.9762416497240778, "grad_norm": 0.5242862701416016, "learning_rate": 0.0001, "loss": 1.8109, "step": 8403 }, { "epoch": 0.9763578274760384, "grad_norm": 0.5038365721702576, "learning_rate": 0.0001, "loss": 1.8278, "step": 8404 }, { "epoch": 0.9764740052279989, "grad_norm": 0.5028054714202881, "learning_rate": 0.0001, "loss": 1.7647, "step": 8405 }, { "epoch": 0.9765901829799594, "grad_norm": 0.4805586636066437, "learning_rate": 0.0001, "loss": 1.5734, "step": 8406 }, { "epoch": 0.9767063607319199, "grad_norm": 0.4857881963253021, "learning_rate": 0.0001, "loss": 1.4969, "step": 8407 }, { "epoch": 0.9768225384838803, "grad_norm": 0.47471508383750916, "learning_rate": 0.0001, "loss": 1.5072, "step": 8408 }, { "epoch": 0.9769387162358408, "grad_norm": 0.4627721309661865, "learning_rate": 0.0001, "loss": 1.4733, "step": 8409 }, { "epoch": 0.9770548939878013, "grad_norm": 0.4718911647796631, "learning_rate": 0.0001, "loss": 1.4798, "step": 8410 }, { "epoch": 0.9771710717397618, "grad_norm": 0.5181329846382141, "learning_rate": 0.0001, "loss": 1.6377, "step": 8411 }, { "epoch": 0.9772872494917223, "grad_norm": 0.4619660973548889, "learning_rate": 0.0001, "loss": 1.4207, "step": 8412 }, { "epoch": 0.9774034272436828, "grad_norm": 0.5440229773521423, "learning_rate": 0.0001, "loss": 1.7901, "step": 8413 }, { "epoch": 0.9775196049956434, "grad_norm": 0.4952663481235504, "learning_rate": 0.0001, "loss": 1.6403, "step": 8414 }, { "epoch": 0.9776357827476039, "grad_norm": 0.4936034083366394, "learning_rate": 0.0001, "loss": 1.6011, "step": 8415 }, { "epoch": 0.9777519604995644, "grad_norm": 0.5232906341552734, "learning_rate": 0.0001, "loss": 1.5554, "step": 8416 }, { "epoch": 0.9778681382515249, "grad_norm": 0.5204361081123352, "learning_rate": 0.0001, "loss": 1.561, "step": 8417 }, { "epoch": 0.9779843160034853, "grad_norm": 0.4749402701854706, "learning_rate": 0.0001, "loss": 1.4276, "step": 8418 }, { "epoch": 0.9781004937554458, "grad_norm": 0.4584287106990814, "learning_rate": 0.0001, "loss": 1.5456, "step": 8419 }, { "epoch": 0.9782166715074063, "grad_norm": 0.4839183986186981, "learning_rate": 0.0001, "loss": 1.62, "step": 8420 }, { "epoch": 0.9783328492593668, "grad_norm": 0.4972332715988159, "learning_rate": 0.0001, "loss": 1.6055, "step": 8421 }, { "epoch": 0.9784490270113273, "grad_norm": 0.5022913813591003, "learning_rate": 0.0001, "loss": 1.5358, "step": 8422 }, { "epoch": 0.9785652047632878, "grad_norm": 0.5144555568695068, "learning_rate": 0.0001, "loss": 1.6239, "step": 8423 }, { "epoch": 0.9786813825152483, "grad_norm": 0.5038486123085022, "learning_rate": 0.0001, "loss": 1.6212, "step": 8424 }, { "epoch": 0.9787975602672089, "grad_norm": 0.45622316002845764, "learning_rate": 0.0001, "loss": 1.4674, "step": 8425 }, { "epoch": 0.9789137380191694, "grad_norm": 0.48347601294517517, "learning_rate": 0.0001, "loss": 1.5393, "step": 8426 }, { "epoch": 0.9790299157711299, "grad_norm": 0.5117625594139099, "learning_rate": 0.0001, "loss": 1.5874, "step": 8427 }, { "epoch": 0.9791460935230903, "grad_norm": 0.47401314973831177, "learning_rate": 0.0001, "loss": 1.6127, "step": 8428 }, { "epoch": 0.9792622712750508, "grad_norm": 0.4935537278652191, "learning_rate": 0.0001, "loss": 1.6038, "step": 8429 }, { "epoch": 0.9793784490270113, "grad_norm": 0.47367969155311584, "learning_rate": 0.0001, "loss": 1.5804, "step": 8430 }, { "epoch": 0.9794946267789718, "grad_norm": 0.4980182647705078, "learning_rate": 0.0001, "loss": 1.6661, "step": 8431 }, { "epoch": 0.9796108045309323, "grad_norm": 0.5033068656921387, "learning_rate": 0.0001, "loss": 1.6927, "step": 8432 }, { "epoch": 0.9797269822828928, "grad_norm": 0.49150750041007996, "learning_rate": 0.0001, "loss": 1.7359, "step": 8433 }, { "epoch": 0.9798431600348533, "grad_norm": 0.4907017648220062, "learning_rate": 0.0001, "loss": 1.6193, "step": 8434 }, { "epoch": 0.9799593377868139, "grad_norm": 0.46434399485588074, "learning_rate": 0.0001, "loss": 1.5551, "step": 8435 }, { "epoch": 0.9800755155387744, "grad_norm": 0.4952322840690613, "learning_rate": 0.0001, "loss": 1.5669, "step": 8436 }, { "epoch": 0.9801916932907349, "grad_norm": 0.4482409358024597, "learning_rate": 0.0001, "loss": 1.4537, "step": 8437 }, { "epoch": 0.9803078710426953, "grad_norm": 0.4462791979312897, "learning_rate": 0.0001, "loss": 1.4427, "step": 8438 }, { "epoch": 0.9804240487946558, "grad_norm": 0.5374612808227539, "learning_rate": 0.0001, "loss": 1.8228, "step": 8439 }, { "epoch": 0.9805402265466163, "grad_norm": 0.5060397386550903, "learning_rate": 0.0001, "loss": 1.693, "step": 8440 }, { "epoch": 0.9806564042985768, "grad_norm": 0.4491170048713684, "learning_rate": 0.0001, "loss": 1.4703, "step": 8441 }, { "epoch": 0.9807725820505373, "grad_norm": 0.49389106035232544, "learning_rate": 0.0001, "loss": 1.5651, "step": 8442 }, { "epoch": 0.9808887598024978, "grad_norm": 0.49476566910743713, "learning_rate": 0.0001, "loss": 1.5014, "step": 8443 }, { "epoch": 0.9810049375544583, "grad_norm": 0.45577380061149597, "learning_rate": 0.0001, "loss": 1.3664, "step": 8444 }, { "epoch": 0.9811211153064188, "grad_norm": 0.48937976360321045, "learning_rate": 0.0001, "loss": 1.5614, "step": 8445 }, { "epoch": 0.9812372930583794, "grad_norm": 0.4766305983066559, "learning_rate": 0.0001, "loss": 1.5832, "step": 8446 }, { "epoch": 0.9813534708103399, "grad_norm": 0.4743248224258423, "learning_rate": 0.0001, "loss": 1.6103, "step": 8447 }, { "epoch": 0.9814696485623003, "grad_norm": 0.504391610622406, "learning_rate": 0.0001, "loss": 1.5322, "step": 8448 }, { "epoch": 0.9815858263142608, "grad_norm": 0.46172428131103516, "learning_rate": 0.0001, "loss": 1.5427, "step": 8449 }, { "epoch": 0.9817020040662213, "grad_norm": 0.4999525249004364, "learning_rate": 0.0001, "loss": 1.6239, "step": 8450 }, { "epoch": 0.9818181818181818, "grad_norm": 0.49794474244117737, "learning_rate": 0.0001, "loss": 1.7356, "step": 8451 }, { "epoch": 0.9819343595701423, "grad_norm": 0.47993290424346924, "learning_rate": 0.0001, "loss": 1.6223, "step": 8452 }, { "epoch": 0.9820505373221028, "grad_norm": 0.5240808725357056, "learning_rate": 0.0001, "loss": 1.7495, "step": 8453 }, { "epoch": 0.9821667150740633, "grad_norm": 0.486801415681839, "learning_rate": 0.0001, "loss": 1.6978, "step": 8454 }, { "epoch": 0.9822828928260238, "grad_norm": 0.5116367936134338, "learning_rate": 0.0001, "loss": 1.6763, "step": 8455 }, { "epoch": 0.9823990705779844, "grad_norm": 0.4694158732891083, "learning_rate": 0.0001, "loss": 1.638, "step": 8456 }, { "epoch": 0.9825152483299449, "grad_norm": 0.4877493679523468, "learning_rate": 0.0001, "loss": 1.5703, "step": 8457 }, { "epoch": 0.9826314260819053, "grad_norm": 0.5048372745513916, "learning_rate": 0.0001, "loss": 1.6968, "step": 8458 }, { "epoch": 0.9827476038338658, "grad_norm": 0.4648038148880005, "learning_rate": 0.0001, "loss": 1.4964, "step": 8459 }, { "epoch": 0.9828637815858263, "grad_norm": 0.48997968435287476, "learning_rate": 0.0001, "loss": 1.4991, "step": 8460 }, { "epoch": 0.9829799593377868, "grad_norm": 0.4944762885570526, "learning_rate": 0.0001, "loss": 1.6226, "step": 8461 }, { "epoch": 0.9830961370897473, "grad_norm": 0.4919930398464203, "learning_rate": 0.0001, "loss": 1.499, "step": 8462 }, { "epoch": 0.9832123148417078, "grad_norm": 0.49641650915145874, "learning_rate": 0.0001, "loss": 1.5649, "step": 8463 }, { "epoch": 0.9833284925936683, "grad_norm": 0.4891132414340973, "learning_rate": 0.0001, "loss": 1.6833, "step": 8464 }, { "epoch": 0.9834446703456288, "grad_norm": 0.4601823687553406, "learning_rate": 0.0001, "loss": 1.531, "step": 8465 }, { "epoch": 0.9835608480975893, "grad_norm": 0.4825168550014496, "learning_rate": 0.0001, "loss": 1.6394, "step": 8466 }, { "epoch": 0.9836770258495499, "grad_norm": 0.5182327032089233, "learning_rate": 0.0001, "loss": 1.5996, "step": 8467 }, { "epoch": 0.9837932036015103, "grad_norm": 0.47299832105636597, "learning_rate": 0.0001, "loss": 1.6853, "step": 8468 }, { "epoch": 0.9839093813534708, "grad_norm": 0.505413293838501, "learning_rate": 0.0001, "loss": 1.664, "step": 8469 }, { "epoch": 0.9840255591054313, "grad_norm": 0.4697488844394684, "learning_rate": 0.0001, "loss": 1.41, "step": 8470 }, { "epoch": 0.9841417368573918, "grad_norm": 0.4823164641857147, "learning_rate": 0.0001, "loss": 1.6351, "step": 8471 }, { "epoch": 0.9842579146093523, "grad_norm": 0.49613675475120544, "learning_rate": 0.0001, "loss": 1.6637, "step": 8472 }, { "epoch": 0.9843740923613128, "grad_norm": 0.49066096544265747, "learning_rate": 0.0001, "loss": 1.3881, "step": 8473 }, { "epoch": 0.9844902701132733, "grad_norm": 0.4947955012321472, "learning_rate": 0.0001, "loss": 1.6372, "step": 8474 }, { "epoch": 0.9846064478652338, "grad_norm": 0.4665144681930542, "learning_rate": 0.0001, "loss": 1.5908, "step": 8475 }, { "epoch": 0.9847226256171943, "grad_norm": 0.5288201570510864, "learning_rate": 0.0001, "loss": 1.8544, "step": 8476 }, { "epoch": 0.9848388033691549, "grad_norm": 0.4513753354549408, "learning_rate": 0.0001, "loss": 1.3675, "step": 8477 }, { "epoch": 0.9849549811211153, "grad_norm": 0.49009808897972107, "learning_rate": 0.0001, "loss": 1.5639, "step": 8478 }, { "epoch": 0.9850711588730758, "grad_norm": 0.5423754453659058, "learning_rate": 0.0001, "loss": 1.7736, "step": 8479 }, { "epoch": 0.9851873366250363, "grad_norm": 0.5365590453147888, "learning_rate": 0.0001, "loss": 1.6318, "step": 8480 }, { "epoch": 0.9853035143769968, "grad_norm": 0.44759395718574524, "learning_rate": 0.0001, "loss": 1.5299, "step": 8481 }, { "epoch": 0.9854196921289573, "grad_norm": 0.5050853490829468, "learning_rate": 0.0001, "loss": 1.5673, "step": 8482 }, { "epoch": 0.9855358698809178, "grad_norm": 0.4968460202217102, "learning_rate": 0.0001, "loss": 1.5135, "step": 8483 }, { "epoch": 0.9856520476328783, "grad_norm": 0.4942067265510559, "learning_rate": 0.0001, "loss": 1.497, "step": 8484 }, { "epoch": 0.9857682253848388, "grad_norm": 0.5043184161186218, "learning_rate": 0.0001, "loss": 1.5682, "step": 8485 }, { "epoch": 0.9858844031367993, "grad_norm": 0.4849433898925781, "learning_rate": 0.0001, "loss": 1.6144, "step": 8486 }, { "epoch": 0.9860005808887597, "grad_norm": 0.5094505548477173, "learning_rate": 0.0001, "loss": 1.6876, "step": 8487 }, { "epoch": 0.9861167586407203, "grad_norm": 0.5001594424247742, "learning_rate": 0.0001, "loss": 1.6043, "step": 8488 }, { "epoch": 0.9862329363926808, "grad_norm": 0.47105178236961365, "learning_rate": 0.0001, "loss": 1.4964, "step": 8489 }, { "epoch": 0.9863491141446413, "grad_norm": 0.4758048951625824, "learning_rate": 0.0001, "loss": 1.3925, "step": 8490 }, { "epoch": 0.9864652918966018, "grad_norm": 0.4678146541118622, "learning_rate": 0.0001, "loss": 1.5739, "step": 8491 }, { "epoch": 0.9865814696485623, "grad_norm": 0.4687998294830322, "learning_rate": 0.0001, "loss": 1.5574, "step": 8492 }, { "epoch": 0.9866976474005228, "grad_norm": 0.5320387482643127, "learning_rate": 0.0001, "loss": 1.6209, "step": 8493 }, { "epoch": 0.9868138251524833, "grad_norm": 0.49444085359573364, "learning_rate": 0.0001, "loss": 1.64, "step": 8494 }, { "epoch": 0.9869300029044438, "grad_norm": 0.4462701678276062, "learning_rate": 0.0001, "loss": 1.491, "step": 8495 }, { "epoch": 0.9870461806564043, "grad_norm": 0.46784061193466187, "learning_rate": 0.0001, "loss": 1.547, "step": 8496 }, { "epoch": 0.9871623584083647, "grad_norm": 0.4919097423553467, "learning_rate": 0.0001, "loss": 1.6808, "step": 8497 }, { "epoch": 0.9872785361603253, "grad_norm": 0.48356717824935913, "learning_rate": 0.0001, "loss": 1.7458, "step": 8498 }, { "epoch": 0.9873947139122858, "grad_norm": 0.4529293477535248, "learning_rate": 0.0001, "loss": 1.6302, "step": 8499 }, { "epoch": 0.9875108916642463, "grad_norm": 0.48209211230278015, "learning_rate": 0.0001, "loss": 1.63, "step": 8500 }, { "epoch": 0.9876270694162068, "grad_norm": 0.4654987156391144, "learning_rate": 0.0001, "loss": 1.6053, "step": 8501 }, { "epoch": 0.9877432471681673, "grad_norm": 0.4693775177001953, "learning_rate": 0.0001, "loss": 1.4176, "step": 8502 }, { "epoch": 0.9878594249201278, "grad_norm": 0.4983534514904022, "learning_rate": 0.0001, "loss": 1.5224, "step": 8503 }, { "epoch": 0.9879756026720883, "grad_norm": 0.4841345250606537, "learning_rate": 0.0001, "loss": 1.5284, "step": 8504 }, { "epoch": 0.9880917804240488, "grad_norm": 0.5288339853286743, "learning_rate": 0.0001, "loss": 1.7306, "step": 8505 }, { "epoch": 0.9882079581760093, "grad_norm": 0.482524037361145, "learning_rate": 0.0001, "loss": 1.5476, "step": 8506 }, { "epoch": 0.9883241359279697, "grad_norm": 0.48337680101394653, "learning_rate": 0.0001, "loss": 1.5168, "step": 8507 }, { "epoch": 0.9884403136799302, "grad_norm": 0.4871644079685211, "learning_rate": 0.0001, "loss": 1.5644, "step": 8508 }, { "epoch": 0.9885564914318908, "grad_norm": 0.47832533717155457, "learning_rate": 0.0001, "loss": 1.6587, "step": 8509 }, { "epoch": 0.9886726691838513, "grad_norm": 0.4998105764389038, "learning_rate": 0.0001, "loss": 1.4837, "step": 8510 }, { "epoch": 0.9887888469358118, "grad_norm": 0.4856521189212799, "learning_rate": 0.0001, "loss": 1.4956, "step": 8511 }, { "epoch": 0.9889050246877723, "grad_norm": 0.4702942967414856, "learning_rate": 0.0001, "loss": 1.51, "step": 8512 }, { "epoch": 0.9890212024397328, "grad_norm": 0.4836369752883911, "learning_rate": 0.0001, "loss": 1.5537, "step": 8513 }, { "epoch": 0.9891373801916933, "grad_norm": 0.4650129973888397, "learning_rate": 0.0001, "loss": 1.4638, "step": 8514 }, { "epoch": 0.9892535579436538, "grad_norm": 0.47725415229797363, "learning_rate": 0.0001, "loss": 1.5804, "step": 8515 }, { "epoch": 0.9893697356956143, "grad_norm": 0.4913800358772278, "learning_rate": 0.0001, "loss": 1.5883, "step": 8516 }, { "epoch": 0.9894859134475747, "grad_norm": 0.5163478851318359, "learning_rate": 0.0001, "loss": 1.6778, "step": 8517 }, { "epoch": 0.9896020911995352, "grad_norm": 0.4827187657356262, "learning_rate": 0.0001, "loss": 1.6692, "step": 8518 }, { "epoch": 0.9897182689514958, "grad_norm": 0.5060868859291077, "learning_rate": 0.0001, "loss": 1.6114, "step": 8519 }, { "epoch": 0.9898344467034563, "grad_norm": 0.5102714896202087, "learning_rate": 0.0001, "loss": 1.6579, "step": 8520 }, { "epoch": 0.9899506244554168, "grad_norm": 0.4683992862701416, "learning_rate": 0.0001, "loss": 1.5495, "step": 8521 }, { "epoch": 0.9900668022073773, "grad_norm": 0.4988732635974884, "learning_rate": 0.0001, "loss": 1.5149, "step": 8522 }, { "epoch": 0.9901829799593378, "grad_norm": 0.48030632734298706, "learning_rate": 0.0001, "loss": 1.5803, "step": 8523 }, { "epoch": 0.9902991577112983, "grad_norm": 0.48717671632766724, "learning_rate": 0.0001, "loss": 1.6383, "step": 8524 }, { "epoch": 0.9904153354632588, "grad_norm": 0.4652939736843109, "learning_rate": 0.0001, "loss": 1.4736, "step": 8525 }, { "epoch": 0.9905315132152193, "grad_norm": 0.4697415232658386, "learning_rate": 0.0001, "loss": 1.5953, "step": 8526 }, { "epoch": 0.9906476909671798, "grad_norm": 0.4873330593109131, "learning_rate": 0.0001, "loss": 1.4776, "step": 8527 }, { "epoch": 0.9907638687191402, "grad_norm": 0.5050543546676636, "learning_rate": 0.0001, "loss": 1.6988, "step": 8528 }, { "epoch": 0.9908800464711008, "grad_norm": 0.499187171459198, "learning_rate": 0.0001, "loss": 1.7552, "step": 8529 }, { "epoch": 0.9909962242230613, "grad_norm": 0.5250015258789062, "learning_rate": 0.0001, "loss": 1.7133, "step": 8530 }, { "epoch": 0.9911124019750218, "grad_norm": 0.48826849460601807, "learning_rate": 0.0001, "loss": 1.5265, "step": 8531 }, { "epoch": 0.9912285797269823, "grad_norm": 0.45894527435302734, "learning_rate": 0.0001, "loss": 1.4252, "step": 8532 }, { "epoch": 0.9913447574789428, "grad_norm": 0.45296138525009155, "learning_rate": 0.0001, "loss": 1.4576, "step": 8533 }, { "epoch": 0.9914609352309033, "grad_norm": 0.5120823383331299, "learning_rate": 0.0001, "loss": 1.5537, "step": 8534 }, { "epoch": 0.9915771129828638, "grad_norm": 0.5219103693962097, "learning_rate": 0.0001, "loss": 1.7421, "step": 8535 }, { "epoch": 0.9916932907348243, "grad_norm": 0.41183364391326904, "learning_rate": 0.0001, "loss": 1.2066, "step": 8536 }, { "epoch": 0.9918094684867848, "grad_norm": 0.49815478920936584, "learning_rate": 0.0001, "loss": 1.671, "step": 8537 }, { "epoch": 0.9919256462387452, "grad_norm": 0.4828040301799774, "learning_rate": 0.0001, "loss": 1.5463, "step": 8538 }, { "epoch": 0.9920418239907057, "grad_norm": 0.48174577951431274, "learning_rate": 0.0001, "loss": 1.6149, "step": 8539 }, { "epoch": 0.9921580017426663, "grad_norm": 0.4945439100265503, "learning_rate": 0.0001, "loss": 1.4874, "step": 8540 }, { "epoch": 0.9922741794946268, "grad_norm": 0.5396121144294739, "learning_rate": 0.0001, "loss": 1.4201, "step": 8541 }, { "epoch": 0.9923903572465873, "grad_norm": 0.4893397390842438, "learning_rate": 0.0001, "loss": 1.6438, "step": 8542 }, { "epoch": 0.9925065349985478, "grad_norm": 0.4512374699115753, "learning_rate": 0.0001, "loss": 1.5482, "step": 8543 }, { "epoch": 0.9926227127505083, "grad_norm": 0.5038131475448608, "learning_rate": 0.0001, "loss": 1.6511, "step": 8544 }, { "epoch": 0.9927388905024688, "grad_norm": 0.47863298654556274, "learning_rate": 0.0001, "loss": 1.6239, "step": 8545 }, { "epoch": 0.9928550682544293, "grad_norm": 0.4746008515357971, "learning_rate": 0.0001, "loss": 1.4573, "step": 8546 }, { "epoch": 0.9929712460063898, "grad_norm": 0.5020937323570251, "learning_rate": 0.0001, "loss": 1.6619, "step": 8547 }, { "epoch": 0.9930874237583502, "grad_norm": 0.5293759703636169, "learning_rate": 0.0001, "loss": 1.6165, "step": 8548 }, { "epoch": 0.9932036015103107, "grad_norm": 0.49491435289382935, "learning_rate": 0.0001, "loss": 1.6167, "step": 8549 }, { "epoch": 0.9933197792622713, "grad_norm": 0.4743204414844513, "learning_rate": 0.0001, "loss": 1.6571, "step": 8550 }, { "epoch": 0.9934359570142318, "grad_norm": 0.47521528601646423, "learning_rate": 0.0001, "loss": 1.527, "step": 8551 }, { "epoch": 0.9935521347661923, "grad_norm": 0.5073684453964233, "learning_rate": 0.0001, "loss": 1.7728, "step": 8552 }, { "epoch": 0.9936683125181528, "grad_norm": 0.5444392561912537, "learning_rate": 0.0001, "loss": 1.6679, "step": 8553 }, { "epoch": 0.9937844902701133, "grad_norm": 0.47420382499694824, "learning_rate": 0.0001, "loss": 1.4998, "step": 8554 }, { "epoch": 0.9939006680220738, "grad_norm": 0.4889985918998718, "learning_rate": 0.0001, "loss": 1.6306, "step": 8555 }, { "epoch": 0.9940168457740343, "grad_norm": 0.49144667387008667, "learning_rate": 0.0001, "loss": 1.6487, "step": 8556 }, { "epoch": 0.9941330235259948, "grad_norm": 0.5177491307258606, "learning_rate": 0.0001, "loss": 1.6854, "step": 8557 }, { "epoch": 0.9942492012779552, "grad_norm": 0.4622519612312317, "learning_rate": 0.0001, "loss": 1.5771, "step": 8558 }, { "epoch": 0.9943653790299157, "grad_norm": 0.463818222284317, "learning_rate": 0.0001, "loss": 1.5265, "step": 8559 }, { "epoch": 0.9944815567818762, "grad_norm": 0.46386709809303284, "learning_rate": 0.0001, "loss": 1.4344, "step": 8560 }, { "epoch": 0.9945977345338368, "grad_norm": 0.4772911071777344, "learning_rate": 0.0001, "loss": 1.4433, "step": 8561 }, { "epoch": 0.9947139122857973, "grad_norm": 0.47931206226348877, "learning_rate": 0.0001, "loss": 1.5446, "step": 8562 }, { "epoch": 0.9948300900377578, "grad_norm": 0.5052580237388611, "learning_rate": 0.0001, "loss": 1.5963, "step": 8563 }, { "epoch": 0.9949462677897183, "grad_norm": 0.46733200550079346, "learning_rate": 0.0001, "loss": 1.3345, "step": 8564 }, { "epoch": 0.9950624455416788, "grad_norm": 0.5169939994812012, "learning_rate": 0.0001, "loss": 1.4962, "step": 8565 }, { "epoch": 0.9951786232936393, "grad_norm": 0.5090740919113159, "learning_rate": 0.0001, "loss": 1.7412, "step": 8566 }, { "epoch": 0.9952948010455998, "grad_norm": 0.4987412691116333, "learning_rate": 0.0001, "loss": 1.4844, "step": 8567 }, { "epoch": 0.9954109787975602, "grad_norm": 0.44403526186943054, "learning_rate": 0.0001, "loss": 1.5212, "step": 8568 }, { "epoch": 0.9955271565495207, "grad_norm": 0.5056188702583313, "learning_rate": 0.0001, "loss": 1.659, "step": 8569 }, { "epoch": 0.9956433343014812, "grad_norm": 0.4768769145011902, "learning_rate": 0.0001, "loss": 1.4187, "step": 8570 }, { "epoch": 0.9957595120534418, "grad_norm": 0.5366693735122681, "learning_rate": 0.0001, "loss": 1.6996, "step": 8571 }, { "epoch": 0.9958756898054023, "grad_norm": 0.49038922786712646, "learning_rate": 0.0001, "loss": 1.6859, "step": 8572 }, { "epoch": 0.9959918675573628, "grad_norm": 0.4839872419834137, "learning_rate": 0.0001, "loss": 1.7012, "step": 8573 }, { "epoch": 0.9961080453093233, "grad_norm": 0.4965837001800537, "learning_rate": 0.0001, "loss": 1.6153, "step": 8574 }, { "epoch": 0.9962242230612838, "grad_norm": 0.4981982111930847, "learning_rate": 0.0001, "loss": 1.7764, "step": 8575 }, { "epoch": 0.9963404008132443, "grad_norm": 0.47916489839553833, "learning_rate": 0.0001, "loss": 1.4184, "step": 8576 }, { "epoch": 0.9964565785652048, "grad_norm": 0.5100079774856567, "learning_rate": 0.0001, "loss": 1.5561, "step": 8577 }, { "epoch": 0.9965727563171652, "grad_norm": 0.46579378843307495, "learning_rate": 0.0001, "loss": 1.4657, "step": 8578 }, { "epoch": 0.9966889340691257, "grad_norm": 0.4757480323314667, "learning_rate": 0.0001, "loss": 1.5739, "step": 8579 }, { "epoch": 0.9968051118210862, "grad_norm": 0.49027368426322937, "learning_rate": 0.0001, "loss": 1.623, "step": 8580 }, { "epoch": 0.9969212895730467, "grad_norm": 0.4813475012779236, "learning_rate": 0.0001, "loss": 1.5207, "step": 8581 }, { "epoch": 0.9970374673250073, "grad_norm": 0.4723621606826782, "learning_rate": 0.0001, "loss": 1.515, "step": 8582 }, { "epoch": 0.9971536450769678, "grad_norm": 0.476460337638855, "learning_rate": 0.0001, "loss": 1.4617, "step": 8583 }, { "epoch": 0.9972698228289283, "grad_norm": 0.4537820518016815, "learning_rate": 0.0001, "loss": 1.4144, "step": 8584 }, { "epoch": 0.9973860005808888, "grad_norm": 0.4895983636379242, "learning_rate": 0.0001, "loss": 1.5439, "step": 8585 }, { "epoch": 0.9975021783328493, "grad_norm": 0.4980904161930084, "learning_rate": 0.0001, "loss": 1.6071, "step": 8586 }, { "epoch": 0.9976183560848098, "grad_norm": 0.4741244912147522, "learning_rate": 0.0001, "loss": 1.5683, "step": 8587 }, { "epoch": 0.9977345338367702, "grad_norm": 0.4763984680175781, "learning_rate": 0.0001, "loss": 1.5386, "step": 8588 }, { "epoch": 0.9978507115887307, "grad_norm": 0.49783235788345337, "learning_rate": 0.0001, "loss": 1.6471, "step": 8589 }, { "epoch": 0.9979668893406912, "grad_norm": 0.5360357165336609, "learning_rate": 0.0001, "loss": 1.555, "step": 8590 }, { "epoch": 0.9980830670926517, "grad_norm": 0.5079225301742554, "learning_rate": 0.0001, "loss": 1.5375, "step": 8591 }, { "epoch": 0.9981992448446123, "grad_norm": 0.5056329965591431, "learning_rate": 0.0001, "loss": 1.6868, "step": 8592 }, { "epoch": 0.9983154225965728, "grad_norm": 0.46236658096313477, "learning_rate": 0.0001, "loss": 1.4826, "step": 8593 }, { "epoch": 0.9984316003485333, "grad_norm": 0.5396600365638733, "learning_rate": 0.0001, "loss": 1.807, "step": 8594 }, { "epoch": 0.9985477781004938, "grad_norm": 0.46349719166755676, "learning_rate": 0.0001, "loss": 1.6027, "step": 8595 }, { "epoch": 0.9986639558524543, "grad_norm": 0.5006660223007202, "learning_rate": 0.0001, "loss": 1.5728, "step": 8596 }, { "epoch": 0.9987801336044148, "grad_norm": 0.4902550280094147, "learning_rate": 0.0001, "loss": 1.567, "step": 8597 }, { "epoch": 0.9988963113563752, "grad_norm": 0.48567691445350647, "learning_rate": 0.0001, "loss": 1.561, "step": 8598 }, { "epoch": 0.9990124891083357, "grad_norm": 0.47548311948776245, "learning_rate": 0.0001, "loss": 1.7234, "step": 8599 }, { "epoch": 0.9991286668602962, "grad_norm": 0.47243523597717285, "learning_rate": 0.0001, "loss": 1.5788, "step": 8600 }, { "epoch": 0.9992448446122567, "grad_norm": 0.4965050518512726, "learning_rate": 0.0001, "loss": 1.6962, "step": 8601 }, { "epoch": 0.9993610223642172, "grad_norm": 0.5007203817367554, "learning_rate": 0.0001, "loss": 1.707, "step": 8602 }, { "epoch": 0.9994772001161778, "grad_norm": 0.48062342405319214, "learning_rate": 0.0001, "loss": 1.5141, "step": 8603 }, { "epoch": 0.9995933778681383, "grad_norm": 0.5184653401374817, "learning_rate": 0.0001, "loss": 1.6188, "step": 8604 }, { "epoch": 0.9997095556200988, "grad_norm": 0.4720516502857208, "learning_rate": 0.0001, "loss": 1.553, "step": 8605 }, { "epoch": 0.9998257333720593, "grad_norm": 0.4640704393386841, "learning_rate": 0.0001, "loss": 1.4149, "step": 8606 }, { "epoch": 0.9999419111240198, "grad_norm": 0.4718928933143616, "learning_rate": 0.0001, "loss": 1.4999, "step": 8607 }, { "epoch": 1.0000580888759802, "grad_norm": 0.4901491701602936, "learning_rate": 0.0001, "loss": 1.4583, "step": 8608 }, { "epoch": 1.0001742666279407, "grad_norm": 0.5047919154167175, "learning_rate": 0.0001, "loss": 1.5183, "step": 8609 }, { "epoch": 1.0002904443799012, "grad_norm": 0.5121291279792786, "learning_rate": 0.0001, "loss": 1.6074, "step": 8610 }, { "epoch": 1.0004066221318617, "grad_norm": 0.46516066789627075, "learning_rate": 0.0001, "loss": 1.3715, "step": 8611 }, { "epoch": 1.0005227998838222, "grad_norm": 0.4786158800125122, "learning_rate": 0.0001, "loss": 1.5568, "step": 8612 }, { "epoch": 1.0006389776357827, "grad_norm": 0.507445752620697, "learning_rate": 0.0001, "loss": 1.4281, "step": 8613 }, { "epoch": 1.0007551553877432, "grad_norm": 0.5036141872406006, "learning_rate": 0.0001, "loss": 1.5231, "step": 8614 }, { "epoch": 1.0008713331397037, "grad_norm": 0.46509867906570435, "learning_rate": 0.0001, "loss": 1.4069, "step": 8615 }, { "epoch": 1.0009875108916642, "grad_norm": 0.5023954510688782, "learning_rate": 0.0001, "loss": 1.5552, "step": 8616 }, { "epoch": 1.0011036886436246, "grad_norm": 0.4837322235107422, "learning_rate": 0.0001, "loss": 1.3964, "step": 8617 }, { "epoch": 1.0012198663955854, "grad_norm": 0.5714748501777649, "learning_rate": 0.0001, "loss": 1.4821, "step": 8618 }, { "epoch": 1.0013360441475458, "grad_norm": 0.4612061381340027, "learning_rate": 0.0001, "loss": 1.3439, "step": 8619 }, { "epoch": 1.0014522218995063, "grad_norm": 0.5135992765426636, "learning_rate": 0.0001, "loss": 1.4263, "step": 8620 }, { "epoch": 1.0015683996514668, "grad_norm": 0.5336510539054871, "learning_rate": 0.0001, "loss": 1.5123, "step": 8621 }, { "epoch": 1.0016845774034273, "grad_norm": 0.531232476234436, "learning_rate": 0.0001, "loss": 1.3025, "step": 8622 }, { "epoch": 1.0018007551553878, "grad_norm": 0.5298891663551331, "learning_rate": 0.0001, "loss": 1.6329, "step": 8623 }, { "epoch": 1.0019169329073483, "grad_norm": 0.5009737014770508, "learning_rate": 0.0001, "loss": 1.3511, "step": 8624 }, { "epoch": 1.0020331106593088, "grad_norm": 0.5414840579032898, "learning_rate": 0.0001, "loss": 1.3387, "step": 8625 }, { "epoch": 1.0021492884112693, "grad_norm": 0.5156089067459106, "learning_rate": 0.0001, "loss": 1.5678, "step": 8626 }, { "epoch": 1.0022654661632298, "grad_norm": 0.48899945616722107, "learning_rate": 0.0001, "loss": 1.3948, "step": 8627 }, { "epoch": 1.0023816439151902, "grad_norm": 0.5076591968536377, "learning_rate": 0.0001, "loss": 1.5157, "step": 8628 }, { "epoch": 1.0024978216671507, "grad_norm": 0.5568975806236267, "learning_rate": 0.0001, "loss": 1.506, "step": 8629 }, { "epoch": 1.0026139994191112, "grad_norm": 0.5163382887840271, "learning_rate": 0.0001, "loss": 1.4018, "step": 8630 }, { "epoch": 1.0027301771710717, "grad_norm": 0.5191821455955505, "learning_rate": 0.0001, "loss": 1.3851, "step": 8631 }, { "epoch": 1.0028463549230322, "grad_norm": 0.5488196015357971, "learning_rate": 0.0001, "loss": 1.5607, "step": 8632 }, { "epoch": 1.0029625326749927, "grad_norm": 0.49666348099708557, "learning_rate": 0.0001, "loss": 1.3315, "step": 8633 }, { "epoch": 1.0030787104269532, "grad_norm": 0.5014142394065857, "learning_rate": 0.0001, "loss": 1.4998, "step": 8634 }, { "epoch": 1.0031948881789137, "grad_norm": 0.48046401143074036, "learning_rate": 0.0001, "loss": 1.4761, "step": 8635 }, { "epoch": 1.0033110659308742, "grad_norm": 0.6161479353904724, "learning_rate": 0.0001, "loss": 1.364, "step": 8636 }, { "epoch": 1.0034272436828346, "grad_norm": 0.5767279267311096, "learning_rate": 0.0001, "loss": 1.5088, "step": 8637 }, { "epoch": 1.0035434214347951, "grad_norm": 0.517012357711792, "learning_rate": 0.0001, "loss": 1.5278, "step": 8638 }, { "epoch": 1.0036595991867558, "grad_norm": 0.5002815127372742, "learning_rate": 0.0001, "loss": 1.5128, "step": 8639 }, { "epoch": 1.0037757769387163, "grad_norm": 0.5128475427627563, "learning_rate": 0.0001, "loss": 1.3022, "step": 8640 }, { "epoch": 1.0038919546906768, "grad_norm": 0.5304717421531677, "learning_rate": 0.0001, "loss": 1.657, "step": 8641 }, { "epoch": 1.0040081324426373, "grad_norm": 0.4929862320423126, "learning_rate": 0.0001, "loss": 1.4718, "step": 8642 }, { "epoch": 1.0041243101945978, "grad_norm": 0.5075370669364929, "learning_rate": 0.0001, "loss": 1.4413, "step": 8643 }, { "epoch": 1.0042404879465583, "grad_norm": 0.5167858004570007, "learning_rate": 0.0001, "loss": 1.6206, "step": 8644 }, { "epoch": 1.0043566656985188, "grad_norm": 0.45497381687164307, "learning_rate": 0.0001, "loss": 1.2107, "step": 8645 }, { "epoch": 1.0044728434504793, "grad_norm": 0.5639305710792542, "learning_rate": 0.0001, "loss": 1.5184, "step": 8646 }, { "epoch": 1.0045890212024398, "grad_norm": 0.558202862739563, "learning_rate": 0.0001, "loss": 1.5464, "step": 8647 }, { "epoch": 1.0047051989544002, "grad_norm": 0.5788266658782959, "learning_rate": 0.0001, "loss": 1.4751, "step": 8648 }, { "epoch": 1.0048213767063607, "grad_norm": 0.5463119745254517, "learning_rate": 0.0001, "loss": 1.5891, "step": 8649 }, { "epoch": 1.0049375544583212, "grad_norm": 0.5739423632621765, "learning_rate": 0.0001, "loss": 1.5901, "step": 8650 }, { "epoch": 1.0050537322102817, "grad_norm": 0.5138026475906372, "learning_rate": 0.0001, "loss": 1.366, "step": 8651 }, { "epoch": 1.0051699099622422, "grad_norm": 0.5081404447555542, "learning_rate": 0.0001, "loss": 1.4853, "step": 8652 }, { "epoch": 1.0052860877142027, "grad_norm": 0.5575280785560608, "learning_rate": 0.0001, "loss": 1.5619, "step": 8653 }, { "epoch": 1.0054022654661632, "grad_norm": 0.49208971858024597, "learning_rate": 0.0001, "loss": 1.4699, "step": 8654 }, { "epoch": 1.0055184432181237, "grad_norm": 0.4909859001636505, "learning_rate": 0.0001, "loss": 1.3758, "step": 8655 }, { "epoch": 1.0056346209700842, "grad_norm": 0.5197198987007141, "learning_rate": 0.0001, "loss": 1.6733, "step": 8656 }, { "epoch": 1.0057507987220446, "grad_norm": 0.4799864888191223, "learning_rate": 0.0001, "loss": 1.4464, "step": 8657 }, { "epoch": 1.0058669764740051, "grad_norm": 0.4831238389015198, "learning_rate": 0.0001, "loss": 1.3808, "step": 8658 }, { "epoch": 1.0059831542259656, "grad_norm": 0.47417446970939636, "learning_rate": 0.0001, "loss": 1.3522, "step": 8659 }, { "epoch": 1.0060993319779263, "grad_norm": 0.48708805441856384, "learning_rate": 0.0001, "loss": 1.4166, "step": 8660 }, { "epoch": 1.0062155097298868, "grad_norm": 0.5076435208320618, "learning_rate": 0.0001, "loss": 1.478, "step": 8661 }, { "epoch": 1.0063316874818473, "grad_norm": 0.4805087745189667, "learning_rate": 0.0001, "loss": 1.3286, "step": 8662 }, { "epoch": 1.0064478652338078, "grad_norm": 0.5486977696418762, "learning_rate": 0.0001, "loss": 1.567, "step": 8663 }, { "epoch": 1.0065640429857683, "grad_norm": 0.5629132390022278, "learning_rate": 0.0001, "loss": 1.5211, "step": 8664 }, { "epoch": 1.0066802207377288, "grad_norm": 0.5157844424247742, "learning_rate": 0.0001, "loss": 1.4976, "step": 8665 }, { "epoch": 1.0067963984896893, "grad_norm": 0.5176374316215515, "learning_rate": 0.0001, "loss": 1.4913, "step": 8666 }, { "epoch": 1.0069125762416498, "grad_norm": 0.5113467574119568, "learning_rate": 0.0001, "loss": 1.3973, "step": 8667 }, { "epoch": 1.0070287539936102, "grad_norm": 0.5078527331352234, "learning_rate": 0.0001, "loss": 1.4826, "step": 8668 }, { "epoch": 1.0071449317455707, "grad_norm": 0.5093343257904053, "learning_rate": 0.0001, "loss": 1.3933, "step": 8669 }, { "epoch": 1.0072611094975312, "grad_norm": 0.5552733540534973, "learning_rate": 0.0001, "loss": 1.5032, "step": 8670 }, { "epoch": 1.0073772872494917, "grad_norm": 0.5340274572372437, "learning_rate": 0.0001, "loss": 1.5547, "step": 8671 }, { "epoch": 1.0074934650014522, "grad_norm": 0.5053258538246155, "learning_rate": 0.0001, "loss": 1.3473, "step": 8672 }, { "epoch": 1.0076096427534127, "grad_norm": 0.5026618838310242, "learning_rate": 0.0001, "loss": 1.3966, "step": 8673 }, { "epoch": 1.0077258205053732, "grad_norm": 0.6316371560096741, "learning_rate": 0.0001, "loss": 1.6261, "step": 8674 }, { "epoch": 1.0078419982573337, "grad_norm": 0.48417550325393677, "learning_rate": 0.0001, "loss": 1.3751, "step": 8675 }, { "epoch": 1.0079581760092942, "grad_norm": 0.4940205514431, "learning_rate": 0.0001, "loss": 1.4245, "step": 8676 }, { "epoch": 1.0080743537612546, "grad_norm": 0.5635389089584351, "learning_rate": 0.0001, "loss": 1.4426, "step": 8677 }, { "epoch": 1.0081905315132151, "grad_norm": 0.5025110840797424, "learning_rate": 0.0001, "loss": 1.4626, "step": 8678 }, { "epoch": 1.0083067092651756, "grad_norm": 0.5255551338195801, "learning_rate": 0.0001, "loss": 1.347, "step": 8679 }, { "epoch": 1.0084228870171361, "grad_norm": 0.5457799434661865, "learning_rate": 0.0001, "loss": 1.6472, "step": 8680 }, { "epoch": 1.0085390647690968, "grad_norm": 0.5311607122421265, "learning_rate": 0.0001, "loss": 1.5476, "step": 8681 }, { "epoch": 1.0086552425210573, "grad_norm": 0.5294631719589233, "learning_rate": 0.0001, "loss": 1.5026, "step": 8682 }, { "epoch": 1.0087714202730178, "grad_norm": 0.5186182856559753, "learning_rate": 0.0001, "loss": 1.498, "step": 8683 }, { "epoch": 1.0088875980249783, "grad_norm": 0.502144455909729, "learning_rate": 0.0001, "loss": 1.4491, "step": 8684 }, { "epoch": 1.0090037757769388, "grad_norm": 0.49139168858528137, "learning_rate": 0.0001, "loss": 1.4062, "step": 8685 }, { "epoch": 1.0091199535288993, "grad_norm": 0.5063096284866333, "learning_rate": 0.0001, "loss": 1.3168, "step": 8686 }, { "epoch": 1.0092361312808598, "grad_norm": 0.539588212966919, "learning_rate": 0.0001, "loss": 1.5566, "step": 8687 }, { "epoch": 1.0093523090328202, "grad_norm": 0.5028448104858398, "learning_rate": 0.0001, "loss": 1.3836, "step": 8688 }, { "epoch": 1.0094684867847807, "grad_norm": 0.5191101431846619, "learning_rate": 0.0001, "loss": 1.4249, "step": 8689 }, { "epoch": 1.0095846645367412, "grad_norm": 0.5024988055229187, "learning_rate": 0.0001, "loss": 1.3312, "step": 8690 }, { "epoch": 1.0097008422887017, "grad_norm": 0.5392231941223145, "learning_rate": 0.0001, "loss": 1.4941, "step": 8691 }, { "epoch": 1.0098170200406622, "grad_norm": 0.5703687071800232, "learning_rate": 0.0001, "loss": 1.5044, "step": 8692 }, { "epoch": 1.0099331977926227, "grad_norm": 0.5280400514602661, "learning_rate": 0.0001, "loss": 1.4647, "step": 8693 }, { "epoch": 1.0100493755445832, "grad_norm": 0.5094478726387024, "learning_rate": 0.0001, "loss": 1.4625, "step": 8694 }, { "epoch": 1.0101655532965437, "grad_norm": 0.5534915328025818, "learning_rate": 0.0001, "loss": 1.4222, "step": 8695 }, { "epoch": 1.0102817310485042, "grad_norm": 0.49775946140289307, "learning_rate": 0.0001, "loss": 1.3503, "step": 8696 }, { "epoch": 1.0103979088004647, "grad_norm": 0.5414407253265381, "learning_rate": 0.0001, "loss": 1.3478, "step": 8697 }, { "epoch": 1.0105140865524251, "grad_norm": 0.5187419652938843, "learning_rate": 0.0001, "loss": 1.3347, "step": 8698 }, { "epoch": 1.0106302643043856, "grad_norm": 0.5660606026649475, "learning_rate": 0.0001, "loss": 1.4956, "step": 8699 }, { "epoch": 1.0107464420563461, "grad_norm": 0.5465399026870728, "learning_rate": 0.0001, "loss": 1.5229, "step": 8700 }, { "epoch": 1.0108626198083066, "grad_norm": 0.5141976475715637, "learning_rate": 0.0001, "loss": 1.3697, "step": 8701 }, { "epoch": 1.0109787975602673, "grad_norm": 0.5536482930183411, "learning_rate": 0.0001, "loss": 1.5109, "step": 8702 }, { "epoch": 1.0110949753122278, "grad_norm": 0.5275995135307312, "learning_rate": 0.0001, "loss": 1.5241, "step": 8703 }, { "epoch": 1.0112111530641883, "grad_norm": 0.5400773286819458, "learning_rate": 0.0001, "loss": 1.4405, "step": 8704 }, { "epoch": 1.0113273308161488, "grad_norm": 0.5452625751495361, "learning_rate": 0.0001, "loss": 1.5017, "step": 8705 }, { "epoch": 1.0114435085681093, "grad_norm": 0.5686435699462891, "learning_rate": 0.0001, "loss": 1.571, "step": 8706 }, { "epoch": 1.0115596863200698, "grad_norm": 0.5052348971366882, "learning_rate": 0.0001, "loss": 1.4301, "step": 8707 }, { "epoch": 1.0116758640720303, "grad_norm": 0.5007266998291016, "learning_rate": 0.0001, "loss": 1.4753, "step": 8708 }, { "epoch": 1.0117920418239907, "grad_norm": 0.5304220914840698, "learning_rate": 0.0001, "loss": 1.5255, "step": 8709 }, { "epoch": 1.0119082195759512, "grad_norm": 0.5678626298904419, "learning_rate": 0.0001, "loss": 1.5826, "step": 8710 }, { "epoch": 1.0120243973279117, "grad_norm": 0.5126900672912598, "learning_rate": 0.0001, "loss": 1.3308, "step": 8711 }, { "epoch": 1.0121405750798722, "grad_norm": 0.543702244758606, "learning_rate": 0.0001, "loss": 1.404, "step": 8712 }, { "epoch": 1.0122567528318327, "grad_norm": 0.5045793056488037, "learning_rate": 0.0001, "loss": 1.5412, "step": 8713 }, { "epoch": 1.0123729305837932, "grad_norm": 0.496380478143692, "learning_rate": 0.0001, "loss": 1.3823, "step": 8714 }, { "epoch": 1.0124891083357537, "grad_norm": 0.5149445533752441, "learning_rate": 0.0001, "loss": 1.2453, "step": 8715 }, { "epoch": 1.0126052860877142, "grad_norm": 0.5445921421051025, "learning_rate": 0.0001, "loss": 1.5304, "step": 8716 }, { "epoch": 1.0127214638396747, "grad_norm": 0.536666214466095, "learning_rate": 0.0001, "loss": 1.6137, "step": 8717 }, { "epoch": 1.0128376415916351, "grad_norm": 0.49998173117637634, "learning_rate": 0.0001, "loss": 1.4359, "step": 8718 }, { "epoch": 1.0129538193435956, "grad_norm": 0.5400097966194153, "learning_rate": 0.0001, "loss": 1.5228, "step": 8719 }, { "epoch": 1.0130699970955561, "grad_norm": 0.5498408079147339, "learning_rate": 0.0001, "loss": 1.5262, "step": 8720 }, { "epoch": 1.0131861748475166, "grad_norm": 0.4792373478412628, "learning_rate": 0.0001, "loss": 1.3712, "step": 8721 }, { "epoch": 1.013302352599477, "grad_norm": 0.49711140990257263, "learning_rate": 0.0001, "loss": 1.4074, "step": 8722 }, { "epoch": 1.0134185303514378, "grad_norm": 0.5019938349723816, "learning_rate": 0.0001, "loss": 1.3793, "step": 8723 }, { "epoch": 1.0135347081033983, "grad_norm": 0.4867321252822876, "learning_rate": 0.0001, "loss": 1.3943, "step": 8724 }, { "epoch": 1.0136508858553588, "grad_norm": 0.5257205367088318, "learning_rate": 0.0001, "loss": 1.4509, "step": 8725 }, { "epoch": 1.0137670636073193, "grad_norm": 0.5636011362075806, "learning_rate": 0.0001, "loss": 1.5382, "step": 8726 }, { "epoch": 1.0138832413592798, "grad_norm": 0.5135155916213989, "learning_rate": 0.0001, "loss": 1.2232, "step": 8727 }, { "epoch": 1.0139994191112403, "grad_norm": 0.524466872215271, "learning_rate": 0.0001, "loss": 1.4098, "step": 8728 }, { "epoch": 1.0141155968632007, "grad_norm": 0.5000982284545898, "learning_rate": 0.0001, "loss": 1.3986, "step": 8729 }, { "epoch": 1.0142317746151612, "grad_norm": 0.5312855243682861, "learning_rate": 0.0001, "loss": 1.4831, "step": 8730 }, { "epoch": 1.0143479523671217, "grad_norm": 0.530696451663971, "learning_rate": 0.0001, "loss": 1.5603, "step": 8731 }, { "epoch": 1.0144641301190822, "grad_norm": 0.5051412582397461, "learning_rate": 0.0001, "loss": 1.2977, "step": 8732 }, { "epoch": 1.0145803078710427, "grad_norm": 0.5274531245231628, "learning_rate": 0.0001, "loss": 1.41, "step": 8733 }, { "epoch": 1.0146964856230032, "grad_norm": 0.5309851169586182, "learning_rate": 0.0001, "loss": 1.47, "step": 8734 }, { "epoch": 1.0148126633749637, "grad_norm": 0.575518012046814, "learning_rate": 0.0001, "loss": 1.6739, "step": 8735 }, { "epoch": 1.0149288411269242, "grad_norm": 0.5956768989562988, "learning_rate": 0.0001, "loss": 1.5394, "step": 8736 }, { "epoch": 1.0150450188788847, "grad_norm": 0.532591700553894, "learning_rate": 0.0001, "loss": 1.5308, "step": 8737 }, { "epoch": 1.0151611966308451, "grad_norm": 0.534724235534668, "learning_rate": 0.0001, "loss": 1.4574, "step": 8738 }, { "epoch": 1.0152773743828056, "grad_norm": 0.5360231995582581, "learning_rate": 0.0001, "loss": 1.2831, "step": 8739 }, { "epoch": 1.0153935521347661, "grad_norm": 0.557018518447876, "learning_rate": 0.0001, "loss": 1.6229, "step": 8740 }, { "epoch": 1.0155097298867266, "grad_norm": 0.6012376546859741, "learning_rate": 0.0001, "loss": 1.5889, "step": 8741 }, { "epoch": 1.015625907638687, "grad_norm": 0.4950985908508301, "learning_rate": 0.0001, "loss": 1.3743, "step": 8742 }, { "epoch": 1.0157420853906476, "grad_norm": 0.5329170227050781, "learning_rate": 0.0001, "loss": 1.3792, "step": 8743 }, { "epoch": 1.0158582631426083, "grad_norm": 0.5413834452629089, "learning_rate": 0.0001, "loss": 1.4078, "step": 8744 }, { "epoch": 1.0159744408945688, "grad_norm": 0.5318317413330078, "learning_rate": 0.0001, "loss": 1.516, "step": 8745 }, { "epoch": 1.0160906186465293, "grad_norm": 0.49791741371154785, "learning_rate": 0.0001, "loss": 1.3419, "step": 8746 }, { "epoch": 1.0162067963984898, "grad_norm": 0.5165941119194031, "learning_rate": 0.0001, "loss": 1.3846, "step": 8747 }, { "epoch": 1.0163229741504503, "grad_norm": 0.5537638068199158, "learning_rate": 0.0001, "loss": 1.5566, "step": 8748 }, { "epoch": 1.0164391519024107, "grad_norm": 0.5026431083679199, "learning_rate": 0.0001, "loss": 1.4458, "step": 8749 }, { "epoch": 1.0165553296543712, "grad_norm": 0.5347225069999695, "learning_rate": 0.0001, "loss": 1.4458, "step": 8750 }, { "epoch": 1.0166715074063317, "grad_norm": 0.5551308393478394, "learning_rate": 0.0001, "loss": 1.5918, "step": 8751 }, { "epoch": 1.0167876851582922, "grad_norm": 0.5152553915977478, "learning_rate": 0.0001, "loss": 1.52, "step": 8752 }, { "epoch": 1.0169038629102527, "grad_norm": 0.5854957699775696, "learning_rate": 0.0001, "loss": 1.7042, "step": 8753 }, { "epoch": 1.0170200406622132, "grad_norm": 0.5672193169593811, "learning_rate": 0.0001, "loss": 1.6815, "step": 8754 }, { "epoch": 1.0171362184141737, "grad_norm": 0.5420659184455872, "learning_rate": 0.0001, "loss": 1.5568, "step": 8755 }, { "epoch": 1.0172523961661342, "grad_norm": 0.5565413236618042, "learning_rate": 0.0001, "loss": 1.5412, "step": 8756 }, { "epoch": 1.0173685739180947, "grad_norm": 0.49962347745895386, "learning_rate": 0.0001, "loss": 1.4468, "step": 8757 }, { "epoch": 1.0174847516700551, "grad_norm": 0.5187664031982422, "learning_rate": 0.0001, "loss": 1.5169, "step": 8758 }, { "epoch": 1.0176009294220156, "grad_norm": 0.5551736354827881, "learning_rate": 0.0001, "loss": 1.5355, "step": 8759 }, { "epoch": 1.0177171071739761, "grad_norm": 0.5217881202697754, "learning_rate": 0.0001, "loss": 1.5438, "step": 8760 }, { "epoch": 1.0178332849259366, "grad_norm": 0.5211256742477417, "learning_rate": 0.0001, "loss": 1.4881, "step": 8761 }, { "epoch": 1.017949462677897, "grad_norm": 0.5577038526535034, "learning_rate": 0.0001, "loss": 1.5617, "step": 8762 }, { "epoch": 1.0180656404298576, "grad_norm": 0.5176113247871399, "learning_rate": 0.0001, "loss": 1.3198, "step": 8763 }, { "epoch": 1.018181818181818, "grad_norm": 0.5276638865470886, "learning_rate": 0.0001, "loss": 1.4202, "step": 8764 }, { "epoch": 1.0182979959337788, "grad_norm": 0.5093098282814026, "learning_rate": 0.0001, "loss": 1.4446, "step": 8765 }, { "epoch": 1.0184141736857393, "grad_norm": 0.5525769591331482, "learning_rate": 0.0001, "loss": 1.5357, "step": 8766 }, { "epoch": 1.0185303514376998, "grad_norm": 0.4998970329761505, "learning_rate": 0.0001, "loss": 1.4808, "step": 8767 }, { "epoch": 1.0186465291896603, "grad_norm": 0.5910646915435791, "learning_rate": 0.0001, "loss": 1.7389, "step": 8768 }, { "epoch": 1.0187627069416207, "grad_norm": 0.5479773879051208, "learning_rate": 0.0001, "loss": 1.6655, "step": 8769 }, { "epoch": 1.0188788846935812, "grad_norm": 0.5090250968933105, "learning_rate": 0.0001, "loss": 1.3965, "step": 8770 }, { "epoch": 1.0189950624455417, "grad_norm": 0.5310608148574829, "learning_rate": 0.0001, "loss": 1.5007, "step": 8771 }, { "epoch": 1.0191112401975022, "grad_norm": 0.4970023036003113, "learning_rate": 0.0001, "loss": 1.3572, "step": 8772 }, { "epoch": 1.0192274179494627, "grad_norm": 0.504270613193512, "learning_rate": 0.0001, "loss": 1.373, "step": 8773 }, { "epoch": 1.0193435957014232, "grad_norm": 0.49610963463783264, "learning_rate": 0.0001, "loss": 1.3157, "step": 8774 }, { "epoch": 1.0194597734533837, "grad_norm": 0.5968664884567261, "learning_rate": 0.0001, "loss": 1.5525, "step": 8775 }, { "epoch": 1.0195759512053442, "grad_norm": 0.4930863082408905, "learning_rate": 0.0001, "loss": 1.4478, "step": 8776 }, { "epoch": 1.0196921289573047, "grad_norm": 0.5297977924346924, "learning_rate": 0.0001, "loss": 1.4772, "step": 8777 }, { "epoch": 1.0198083067092651, "grad_norm": 0.5384182333946228, "learning_rate": 0.0001, "loss": 1.3761, "step": 8778 }, { "epoch": 1.0199244844612256, "grad_norm": 0.5289637446403503, "learning_rate": 0.0001, "loss": 1.3421, "step": 8779 }, { "epoch": 1.0200406622131861, "grad_norm": 0.4995555877685547, "learning_rate": 0.0001, "loss": 1.4968, "step": 8780 }, { "epoch": 1.0201568399651466, "grad_norm": 0.5270565748214722, "learning_rate": 0.0001, "loss": 1.4003, "step": 8781 }, { "epoch": 1.020273017717107, "grad_norm": 0.5530638694763184, "learning_rate": 0.0001, "loss": 1.4916, "step": 8782 }, { "epoch": 1.0203891954690676, "grad_norm": 0.519996702671051, "learning_rate": 0.0001, "loss": 1.4601, "step": 8783 }, { "epoch": 1.020505373221028, "grad_norm": 0.5551260113716125, "learning_rate": 0.0001, "loss": 1.5845, "step": 8784 }, { "epoch": 1.0206215509729886, "grad_norm": 0.5324636697769165, "learning_rate": 0.0001, "loss": 1.5639, "step": 8785 }, { "epoch": 1.0207377287249493, "grad_norm": 0.5127756595611572, "learning_rate": 0.0001, "loss": 1.4244, "step": 8786 }, { "epoch": 1.0208539064769098, "grad_norm": 0.5320945978164673, "learning_rate": 0.0001, "loss": 1.5453, "step": 8787 }, { "epoch": 1.0209700842288703, "grad_norm": 0.5293431282043457, "learning_rate": 0.0001, "loss": 1.5285, "step": 8788 }, { "epoch": 1.0210862619808307, "grad_norm": 0.5258355140686035, "learning_rate": 0.0001, "loss": 1.4553, "step": 8789 }, { "epoch": 1.0212024397327912, "grad_norm": 0.6581287980079651, "learning_rate": 0.0001, "loss": 1.3883, "step": 8790 }, { "epoch": 1.0213186174847517, "grad_norm": 0.5154805779457092, "learning_rate": 0.0001, "loss": 1.4723, "step": 8791 }, { "epoch": 1.0214347952367122, "grad_norm": 0.48728328943252563, "learning_rate": 0.0001, "loss": 1.3606, "step": 8792 }, { "epoch": 1.0215509729886727, "grad_norm": 0.5164970755577087, "learning_rate": 0.0001, "loss": 1.6262, "step": 8793 }, { "epoch": 1.0216671507406332, "grad_norm": 0.5162052512168884, "learning_rate": 0.0001, "loss": 1.573, "step": 8794 }, { "epoch": 1.0217833284925937, "grad_norm": 0.5044278502464294, "learning_rate": 0.0001, "loss": 1.4329, "step": 8795 }, { "epoch": 1.0218995062445542, "grad_norm": 0.5441913604736328, "learning_rate": 0.0001, "loss": 1.474, "step": 8796 }, { "epoch": 1.0220156839965147, "grad_norm": 0.4957961440086365, "learning_rate": 0.0001, "loss": 1.3308, "step": 8797 }, { "epoch": 1.0221318617484751, "grad_norm": 0.49223870038986206, "learning_rate": 0.0001, "loss": 1.2865, "step": 8798 }, { "epoch": 1.0222480395004356, "grad_norm": 0.5398341417312622, "learning_rate": 0.0001, "loss": 1.4021, "step": 8799 }, { "epoch": 1.0223642172523961, "grad_norm": 0.5927699208259583, "learning_rate": 0.0001, "loss": 1.6615, "step": 8800 }, { "epoch": 1.0224803950043566, "grad_norm": 0.5420244932174683, "learning_rate": 0.0001, "loss": 1.3955, "step": 8801 }, { "epoch": 1.022596572756317, "grad_norm": 0.5336340069770813, "learning_rate": 0.0001, "loss": 1.5337, "step": 8802 }, { "epoch": 1.0227127505082776, "grad_norm": 0.5515368580818176, "learning_rate": 0.0001, "loss": 1.4957, "step": 8803 }, { "epoch": 1.022828928260238, "grad_norm": 0.5734601020812988, "learning_rate": 0.0001, "loss": 1.5024, "step": 8804 }, { "epoch": 1.0229451060121986, "grad_norm": 0.4861997067928314, "learning_rate": 0.0001, "loss": 1.346, "step": 8805 }, { "epoch": 1.023061283764159, "grad_norm": 0.501109778881073, "learning_rate": 0.0001, "loss": 1.3825, "step": 8806 }, { "epoch": 1.0231774615161198, "grad_norm": 0.5470065474510193, "learning_rate": 0.0001, "loss": 1.5643, "step": 8807 }, { "epoch": 1.0232936392680803, "grad_norm": 0.5535728335380554, "learning_rate": 0.0001, "loss": 1.4396, "step": 8808 }, { "epoch": 1.0234098170200407, "grad_norm": 0.555833101272583, "learning_rate": 0.0001, "loss": 1.6016, "step": 8809 }, { "epoch": 1.0235259947720012, "grad_norm": 0.48432549834251404, "learning_rate": 0.0001, "loss": 1.2699, "step": 8810 }, { "epoch": 1.0236421725239617, "grad_norm": 0.5172594785690308, "learning_rate": 0.0001, "loss": 1.4708, "step": 8811 }, { "epoch": 1.0237583502759222, "grad_norm": 0.5628485083580017, "learning_rate": 0.0001, "loss": 1.6034, "step": 8812 }, { "epoch": 1.0238745280278827, "grad_norm": 0.6112015247344971, "learning_rate": 0.0001, "loss": 1.6817, "step": 8813 }, { "epoch": 1.0239907057798432, "grad_norm": 0.5038037896156311, "learning_rate": 0.0001, "loss": 1.4484, "step": 8814 }, { "epoch": 1.0241068835318037, "grad_norm": 0.5269456505775452, "learning_rate": 0.0001, "loss": 1.5061, "step": 8815 }, { "epoch": 1.0242230612837642, "grad_norm": 0.48238101601600647, "learning_rate": 0.0001, "loss": 1.3186, "step": 8816 }, { "epoch": 1.0243392390357247, "grad_norm": 0.5162169337272644, "learning_rate": 0.0001, "loss": 1.464, "step": 8817 }, { "epoch": 1.0244554167876851, "grad_norm": 0.5213837027549744, "learning_rate": 0.0001, "loss": 1.4911, "step": 8818 }, { "epoch": 1.0245715945396456, "grad_norm": 0.5037201642990112, "learning_rate": 0.0001, "loss": 1.2898, "step": 8819 }, { "epoch": 1.0246877722916061, "grad_norm": 0.517815113067627, "learning_rate": 0.0001, "loss": 1.5051, "step": 8820 }, { "epoch": 1.0248039500435666, "grad_norm": 0.5647580027580261, "learning_rate": 0.0001, "loss": 1.5806, "step": 8821 }, { "epoch": 1.024920127795527, "grad_norm": 0.549393355846405, "learning_rate": 0.0001, "loss": 1.5951, "step": 8822 }, { "epoch": 1.0250363055474876, "grad_norm": 0.5423721075057983, "learning_rate": 0.0001, "loss": 1.4828, "step": 8823 }, { "epoch": 1.025152483299448, "grad_norm": 0.5369595289230347, "learning_rate": 0.0001, "loss": 1.5219, "step": 8824 }, { "epoch": 1.0252686610514086, "grad_norm": 0.6080834269523621, "learning_rate": 0.0001, "loss": 1.7438, "step": 8825 }, { "epoch": 1.025384838803369, "grad_norm": 0.5140199065208435, "learning_rate": 0.0001, "loss": 1.3756, "step": 8826 }, { "epoch": 1.0255010165553295, "grad_norm": 0.5014476776123047, "learning_rate": 0.0001, "loss": 1.5028, "step": 8827 }, { "epoch": 1.0256171943072903, "grad_norm": 0.5367702841758728, "learning_rate": 0.0001, "loss": 1.3651, "step": 8828 }, { "epoch": 1.0257333720592507, "grad_norm": 0.5167638659477234, "learning_rate": 0.0001, "loss": 1.329, "step": 8829 }, { "epoch": 1.0258495498112112, "grad_norm": 0.5390440225601196, "learning_rate": 0.0001, "loss": 1.4476, "step": 8830 }, { "epoch": 1.0259657275631717, "grad_norm": 0.5453746914863586, "learning_rate": 0.0001, "loss": 1.4783, "step": 8831 }, { "epoch": 1.0260819053151322, "grad_norm": 0.5189757943153381, "learning_rate": 0.0001, "loss": 1.4924, "step": 8832 }, { "epoch": 1.0261980830670927, "grad_norm": 0.5460232496261597, "learning_rate": 0.0001, "loss": 1.496, "step": 8833 }, { "epoch": 1.0263142608190532, "grad_norm": 0.4965575337409973, "learning_rate": 0.0001, "loss": 1.448, "step": 8834 }, { "epoch": 1.0264304385710137, "grad_norm": 0.5070030689239502, "learning_rate": 0.0001, "loss": 1.4164, "step": 8835 }, { "epoch": 1.0265466163229742, "grad_norm": 0.5291348099708557, "learning_rate": 0.0001, "loss": 1.5749, "step": 8836 }, { "epoch": 1.0266627940749347, "grad_norm": 0.49816176295280457, "learning_rate": 0.0001, "loss": 1.3712, "step": 8837 }, { "epoch": 1.0267789718268951, "grad_norm": 0.5235051512718201, "learning_rate": 0.0001, "loss": 1.4987, "step": 8838 }, { "epoch": 1.0268951495788556, "grad_norm": 0.5131754875183105, "learning_rate": 0.0001, "loss": 1.336, "step": 8839 }, { "epoch": 1.0270113273308161, "grad_norm": 0.5183280110359192, "learning_rate": 0.0001, "loss": 1.4141, "step": 8840 }, { "epoch": 1.0271275050827766, "grad_norm": 0.5118065476417542, "learning_rate": 0.0001, "loss": 1.4272, "step": 8841 }, { "epoch": 1.027243682834737, "grad_norm": 0.5284550189971924, "learning_rate": 0.0001, "loss": 1.438, "step": 8842 }, { "epoch": 1.0273598605866976, "grad_norm": 0.5649248957633972, "learning_rate": 0.0001, "loss": 1.5256, "step": 8843 }, { "epoch": 1.027476038338658, "grad_norm": 0.5072284936904907, "learning_rate": 0.0001, "loss": 1.4069, "step": 8844 }, { "epoch": 1.0275922160906186, "grad_norm": 0.5180877447128296, "learning_rate": 0.0001, "loss": 1.3545, "step": 8845 }, { "epoch": 1.027708393842579, "grad_norm": 0.5194306373596191, "learning_rate": 0.0001, "loss": 1.4195, "step": 8846 }, { "epoch": 1.0278245715945395, "grad_norm": 0.5667744278907776, "learning_rate": 0.0001, "loss": 1.5755, "step": 8847 }, { "epoch": 1.0279407493465, "grad_norm": 0.5082692503929138, "learning_rate": 0.0001, "loss": 1.5024, "step": 8848 }, { "epoch": 1.0280569270984607, "grad_norm": 0.6028507947921753, "learning_rate": 0.0001, "loss": 1.4596, "step": 8849 }, { "epoch": 1.0281731048504212, "grad_norm": 0.5387246012687683, "learning_rate": 0.0001, "loss": 1.5774, "step": 8850 }, { "epoch": 1.0282892826023817, "grad_norm": 0.4912740886211395, "learning_rate": 0.0001, "loss": 1.4794, "step": 8851 }, { "epoch": 1.0284054603543422, "grad_norm": 0.5244132876396179, "learning_rate": 0.0001, "loss": 1.3191, "step": 8852 }, { "epoch": 1.0285216381063027, "grad_norm": 0.5031388998031616, "learning_rate": 0.0001, "loss": 1.3729, "step": 8853 }, { "epoch": 1.0286378158582632, "grad_norm": 0.544710636138916, "learning_rate": 0.0001, "loss": 1.5955, "step": 8854 }, { "epoch": 1.0287539936102237, "grad_norm": 0.54068523645401, "learning_rate": 0.0001, "loss": 1.3927, "step": 8855 }, { "epoch": 1.0288701713621842, "grad_norm": 0.5172038078308105, "learning_rate": 0.0001, "loss": 1.4375, "step": 8856 }, { "epoch": 1.0289863491141447, "grad_norm": 0.49291446805000305, "learning_rate": 0.0001, "loss": 1.3361, "step": 8857 }, { "epoch": 1.0291025268661051, "grad_norm": 0.5305606722831726, "learning_rate": 0.0001, "loss": 1.4012, "step": 8858 }, { "epoch": 1.0292187046180656, "grad_norm": 0.4981054365634918, "learning_rate": 0.0001, "loss": 1.5502, "step": 8859 }, { "epoch": 1.0293348823700261, "grad_norm": 0.510947585105896, "learning_rate": 0.0001, "loss": 1.3286, "step": 8860 }, { "epoch": 1.0294510601219866, "grad_norm": 0.5207487940788269, "learning_rate": 0.0001, "loss": 1.4023, "step": 8861 }, { "epoch": 1.029567237873947, "grad_norm": 0.5454730987548828, "learning_rate": 0.0001, "loss": 1.5136, "step": 8862 }, { "epoch": 1.0296834156259076, "grad_norm": 0.5361663699150085, "learning_rate": 0.0001, "loss": 1.6202, "step": 8863 }, { "epoch": 1.029799593377868, "grad_norm": 0.5152962803840637, "learning_rate": 0.0001, "loss": 1.3848, "step": 8864 }, { "epoch": 1.0299157711298286, "grad_norm": 0.5426158905029297, "learning_rate": 0.0001, "loss": 1.2973, "step": 8865 }, { "epoch": 1.030031948881789, "grad_norm": 0.5150105357170105, "learning_rate": 0.0001, "loss": 1.4212, "step": 8866 }, { "epoch": 1.0301481266337496, "grad_norm": 0.5968256592750549, "learning_rate": 0.0001, "loss": 1.6619, "step": 8867 }, { "epoch": 1.03026430438571, "grad_norm": 0.4991264343261719, "learning_rate": 0.0001, "loss": 1.3305, "step": 8868 }, { "epoch": 1.0303804821376705, "grad_norm": 0.5225552916526794, "learning_rate": 0.0001, "loss": 1.5386, "step": 8869 }, { "epoch": 1.0304966598896312, "grad_norm": 0.5236073732376099, "learning_rate": 0.0001, "loss": 1.5996, "step": 8870 }, { "epoch": 1.0306128376415917, "grad_norm": 0.5573896169662476, "learning_rate": 0.0001, "loss": 1.4516, "step": 8871 }, { "epoch": 1.0307290153935522, "grad_norm": 0.47595614194869995, "learning_rate": 0.0001, "loss": 1.312, "step": 8872 }, { "epoch": 1.0308451931455127, "grad_norm": 0.5099783539772034, "learning_rate": 0.0001, "loss": 1.3819, "step": 8873 }, { "epoch": 1.0309613708974732, "grad_norm": 0.5189265608787537, "learning_rate": 0.0001, "loss": 1.5551, "step": 8874 }, { "epoch": 1.0310775486494337, "grad_norm": 0.567966103553772, "learning_rate": 0.0001, "loss": 1.5482, "step": 8875 }, { "epoch": 1.0311937264013942, "grad_norm": 0.5577686429023743, "learning_rate": 0.0001, "loss": 1.674, "step": 8876 }, { "epoch": 1.0313099041533547, "grad_norm": 0.5853254795074463, "learning_rate": 0.0001, "loss": 1.4078, "step": 8877 }, { "epoch": 1.0314260819053152, "grad_norm": 0.5423609018325806, "learning_rate": 0.0001, "loss": 1.4577, "step": 8878 }, { "epoch": 1.0315422596572756, "grad_norm": 0.5053103566169739, "learning_rate": 0.0001, "loss": 1.5071, "step": 8879 }, { "epoch": 1.0316584374092361, "grad_norm": 0.5344913601875305, "learning_rate": 0.0001, "loss": 1.7323, "step": 8880 }, { "epoch": 1.0317746151611966, "grad_norm": 0.5342625379562378, "learning_rate": 0.0001, "loss": 1.5624, "step": 8881 }, { "epoch": 1.031890792913157, "grad_norm": 0.48883843421936035, "learning_rate": 0.0001, "loss": 1.2327, "step": 8882 }, { "epoch": 1.0320069706651176, "grad_norm": 0.49293336272239685, "learning_rate": 0.0001, "loss": 1.4864, "step": 8883 }, { "epoch": 1.032123148417078, "grad_norm": 0.4973764717578888, "learning_rate": 0.0001, "loss": 1.1467, "step": 8884 }, { "epoch": 1.0322393261690386, "grad_norm": 0.49885573983192444, "learning_rate": 0.0001, "loss": 1.4539, "step": 8885 }, { "epoch": 1.032355503920999, "grad_norm": 0.5181958079338074, "learning_rate": 0.0001, "loss": 1.478, "step": 8886 }, { "epoch": 1.0324716816729596, "grad_norm": 0.5104524493217468, "learning_rate": 0.0001, "loss": 1.4491, "step": 8887 }, { "epoch": 1.03258785942492, "grad_norm": 0.5194922089576721, "learning_rate": 0.0001, "loss": 1.4931, "step": 8888 }, { "epoch": 1.0327040371768805, "grad_norm": 0.5585922002792358, "learning_rate": 0.0001, "loss": 1.4493, "step": 8889 }, { "epoch": 1.032820214928841, "grad_norm": 0.49765944480895996, "learning_rate": 0.0001, "loss": 1.3614, "step": 8890 }, { "epoch": 1.0329363926808017, "grad_norm": 0.5335497260093689, "learning_rate": 0.0001, "loss": 1.3586, "step": 8891 }, { "epoch": 1.0330525704327622, "grad_norm": 0.5823356509208679, "learning_rate": 0.0001, "loss": 1.4327, "step": 8892 }, { "epoch": 1.0331687481847227, "grad_norm": 0.5257899165153503, "learning_rate": 0.0001, "loss": 1.5516, "step": 8893 }, { "epoch": 1.0332849259366832, "grad_norm": 0.5384433269500732, "learning_rate": 0.0001, "loss": 1.5017, "step": 8894 }, { "epoch": 1.0334011036886437, "grad_norm": 0.5588715672492981, "learning_rate": 0.0001, "loss": 1.3805, "step": 8895 }, { "epoch": 1.0335172814406042, "grad_norm": 0.5330429673194885, "learning_rate": 0.0001, "loss": 1.482, "step": 8896 }, { "epoch": 1.0336334591925647, "grad_norm": 0.515548050403595, "learning_rate": 0.0001, "loss": 1.447, "step": 8897 }, { "epoch": 1.0337496369445252, "grad_norm": 0.5171681046485901, "learning_rate": 0.0001, "loss": 1.6035, "step": 8898 }, { "epoch": 1.0338658146964856, "grad_norm": 0.5828492045402527, "learning_rate": 0.0001, "loss": 1.6547, "step": 8899 }, { "epoch": 1.0339819924484461, "grad_norm": 0.4846728444099426, "learning_rate": 0.0001, "loss": 1.4961, "step": 8900 }, { "epoch": 1.0340981702004066, "grad_norm": 0.5182422995567322, "learning_rate": 0.0001, "loss": 1.4393, "step": 8901 }, { "epoch": 1.034214347952367, "grad_norm": 0.5431578755378723, "learning_rate": 0.0001, "loss": 1.4924, "step": 8902 }, { "epoch": 1.0343305257043276, "grad_norm": 0.5584523677825928, "learning_rate": 0.0001, "loss": 1.4686, "step": 8903 }, { "epoch": 1.034446703456288, "grad_norm": 0.5252351760864258, "learning_rate": 0.0001, "loss": 1.269, "step": 8904 }, { "epoch": 1.0345628812082486, "grad_norm": 0.575616717338562, "learning_rate": 0.0001, "loss": 1.6217, "step": 8905 }, { "epoch": 1.034679058960209, "grad_norm": 0.6257792115211487, "learning_rate": 0.0001, "loss": 1.7119, "step": 8906 }, { "epoch": 1.0347952367121696, "grad_norm": 0.532507598400116, "learning_rate": 0.0001, "loss": 1.5132, "step": 8907 }, { "epoch": 1.03491141446413, "grad_norm": 0.5430809855461121, "learning_rate": 0.0001, "loss": 1.4875, "step": 8908 }, { "epoch": 1.0350275922160905, "grad_norm": 0.5150136351585388, "learning_rate": 0.0001, "loss": 1.4398, "step": 8909 }, { "epoch": 1.035143769968051, "grad_norm": 0.5215170383453369, "learning_rate": 0.0001, "loss": 1.4457, "step": 8910 }, { "epoch": 1.0352599477200117, "grad_norm": 0.5841111540794373, "learning_rate": 0.0001, "loss": 1.5476, "step": 8911 }, { "epoch": 1.0353761254719722, "grad_norm": 0.5070576667785645, "learning_rate": 0.0001, "loss": 1.5439, "step": 8912 }, { "epoch": 1.0354923032239327, "grad_norm": 0.5849988460540771, "learning_rate": 0.0001, "loss": 1.4834, "step": 8913 }, { "epoch": 1.0356084809758932, "grad_norm": 0.5653141736984253, "learning_rate": 0.0001, "loss": 1.5947, "step": 8914 }, { "epoch": 1.0357246587278537, "grad_norm": 0.5517808198928833, "learning_rate": 0.0001, "loss": 1.5693, "step": 8915 }, { "epoch": 1.0358408364798142, "grad_norm": 0.5292582511901855, "learning_rate": 0.0001, "loss": 1.3979, "step": 8916 }, { "epoch": 1.0359570142317747, "grad_norm": 0.5182827711105347, "learning_rate": 0.0001, "loss": 1.4861, "step": 8917 }, { "epoch": 1.0360731919837352, "grad_norm": 0.5552460551261902, "learning_rate": 0.0001, "loss": 1.4503, "step": 8918 }, { "epoch": 1.0361893697356956, "grad_norm": 0.5514026880264282, "learning_rate": 0.0001, "loss": 1.3159, "step": 8919 }, { "epoch": 1.0363055474876561, "grad_norm": 0.5646169185638428, "learning_rate": 0.0001, "loss": 1.4833, "step": 8920 }, { "epoch": 1.0364217252396166, "grad_norm": 0.5406813621520996, "learning_rate": 0.0001, "loss": 1.5865, "step": 8921 }, { "epoch": 1.036537902991577, "grad_norm": 0.5190636515617371, "learning_rate": 0.0001, "loss": 1.5229, "step": 8922 }, { "epoch": 1.0366540807435376, "grad_norm": 0.538845956325531, "learning_rate": 0.0001, "loss": 1.6992, "step": 8923 }, { "epoch": 1.036770258495498, "grad_norm": 0.5209506154060364, "learning_rate": 0.0001, "loss": 1.3851, "step": 8924 }, { "epoch": 1.0368864362474586, "grad_norm": 0.5400375127792358, "learning_rate": 0.0001, "loss": 1.332, "step": 8925 }, { "epoch": 1.037002613999419, "grad_norm": 0.5286649465560913, "learning_rate": 0.0001, "loss": 1.4175, "step": 8926 }, { "epoch": 1.0371187917513796, "grad_norm": 0.5743568539619446, "learning_rate": 0.0001, "loss": 1.4531, "step": 8927 }, { "epoch": 1.03723496950334, "grad_norm": 0.5251564979553223, "learning_rate": 0.0001, "loss": 1.4803, "step": 8928 }, { "epoch": 1.0373511472553005, "grad_norm": 0.5093726515769958, "learning_rate": 0.0001, "loss": 1.4133, "step": 8929 }, { "epoch": 1.037467325007261, "grad_norm": 0.5467512011528015, "learning_rate": 0.0001, "loss": 1.5559, "step": 8930 }, { "epoch": 1.0375835027592215, "grad_norm": 0.5583242774009705, "learning_rate": 0.0001, "loss": 1.5438, "step": 8931 }, { "epoch": 1.037699680511182, "grad_norm": 0.5613623857498169, "learning_rate": 0.0001, "loss": 1.4814, "step": 8932 }, { "epoch": 1.0378158582631427, "grad_norm": 0.5468906760215759, "learning_rate": 0.0001, "loss": 1.4407, "step": 8933 }, { "epoch": 1.0379320360151032, "grad_norm": 0.5442972183227539, "learning_rate": 0.0001, "loss": 1.3933, "step": 8934 }, { "epoch": 1.0380482137670637, "grad_norm": 0.5066025853157043, "learning_rate": 0.0001, "loss": 1.5188, "step": 8935 }, { "epoch": 1.0381643915190242, "grad_norm": 0.548570990562439, "learning_rate": 0.0001, "loss": 1.5706, "step": 8936 }, { "epoch": 1.0382805692709847, "grad_norm": 0.5386464595794678, "learning_rate": 0.0001, "loss": 1.5915, "step": 8937 }, { "epoch": 1.0383967470229452, "grad_norm": 0.6019245386123657, "learning_rate": 0.0001, "loss": 1.6093, "step": 8938 }, { "epoch": 1.0385129247749056, "grad_norm": 0.5137844085693359, "learning_rate": 0.0001, "loss": 1.3028, "step": 8939 }, { "epoch": 1.0386291025268661, "grad_norm": 0.5185123085975647, "learning_rate": 0.0001, "loss": 1.5192, "step": 8940 }, { "epoch": 1.0387452802788266, "grad_norm": 0.5555708408355713, "learning_rate": 0.0001, "loss": 1.3948, "step": 8941 }, { "epoch": 1.038861458030787, "grad_norm": 0.49165698885917664, "learning_rate": 0.0001, "loss": 1.3102, "step": 8942 }, { "epoch": 1.0389776357827476, "grad_norm": 0.568077564239502, "learning_rate": 0.0001, "loss": 1.6435, "step": 8943 }, { "epoch": 1.039093813534708, "grad_norm": 0.5384320020675659, "learning_rate": 0.0001, "loss": 1.4999, "step": 8944 }, { "epoch": 1.0392099912866686, "grad_norm": 0.5384372472763062, "learning_rate": 0.0001, "loss": 1.3606, "step": 8945 }, { "epoch": 1.039326169038629, "grad_norm": 0.5323529839515686, "learning_rate": 0.0001, "loss": 1.4326, "step": 8946 }, { "epoch": 1.0394423467905896, "grad_norm": 0.5654020309448242, "learning_rate": 0.0001, "loss": 1.6373, "step": 8947 }, { "epoch": 1.03955852454255, "grad_norm": 0.5455036163330078, "learning_rate": 0.0001, "loss": 1.2164, "step": 8948 }, { "epoch": 1.0396747022945105, "grad_norm": 0.5437000393867493, "learning_rate": 0.0001, "loss": 1.4991, "step": 8949 }, { "epoch": 1.039790880046471, "grad_norm": 0.5596999526023865, "learning_rate": 0.0001, "loss": 1.5026, "step": 8950 }, { "epoch": 1.0399070577984315, "grad_norm": 0.5869429707527161, "learning_rate": 0.0001, "loss": 1.5198, "step": 8951 }, { "epoch": 1.040023235550392, "grad_norm": 0.5478681921958923, "learning_rate": 0.0001, "loss": 1.34, "step": 8952 }, { "epoch": 1.0401394133023527, "grad_norm": 0.5384049415588379, "learning_rate": 0.0001, "loss": 1.4315, "step": 8953 }, { "epoch": 1.0402555910543132, "grad_norm": 0.5583698749542236, "learning_rate": 0.0001, "loss": 1.5661, "step": 8954 }, { "epoch": 1.0403717688062737, "grad_norm": 0.5423256754875183, "learning_rate": 0.0001, "loss": 1.4291, "step": 8955 }, { "epoch": 1.0404879465582342, "grad_norm": 0.5298671722412109, "learning_rate": 0.0001, "loss": 1.4724, "step": 8956 }, { "epoch": 1.0406041243101947, "grad_norm": 0.5103389024734497, "learning_rate": 0.0001, "loss": 1.2942, "step": 8957 }, { "epoch": 1.0407203020621552, "grad_norm": 0.5083001852035522, "learning_rate": 0.0001, "loss": 1.4806, "step": 8958 }, { "epoch": 1.0408364798141156, "grad_norm": 0.5489501953125, "learning_rate": 0.0001, "loss": 1.5624, "step": 8959 }, { "epoch": 1.0409526575660761, "grad_norm": 0.5339622497558594, "learning_rate": 0.0001, "loss": 1.4333, "step": 8960 }, { "epoch": 1.0410688353180366, "grad_norm": 0.5265421271324158, "learning_rate": 0.0001, "loss": 1.5101, "step": 8961 }, { "epoch": 1.0411850130699971, "grad_norm": 0.539472222328186, "learning_rate": 0.0001, "loss": 1.4749, "step": 8962 }, { "epoch": 1.0413011908219576, "grad_norm": 0.5840896964073181, "learning_rate": 0.0001, "loss": 1.4922, "step": 8963 }, { "epoch": 1.041417368573918, "grad_norm": 0.5356101393699646, "learning_rate": 0.0001, "loss": 1.4835, "step": 8964 }, { "epoch": 1.0415335463258786, "grad_norm": 0.5115817785263062, "learning_rate": 0.0001, "loss": 1.4016, "step": 8965 }, { "epoch": 1.041649724077839, "grad_norm": 0.5461002588272095, "learning_rate": 0.0001, "loss": 1.4222, "step": 8966 }, { "epoch": 1.0417659018297996, "grad_norm": 0.515606701374054, "learning_rate": 0.0001, "loss": 1.4263, "step": 8967 }, { "epoch": 1.04188207958176, "grad_norm": 0.5375041365623474, "learning_rate": 0.0001, "loss": 1.6334, "step": 8968 }, { "epoch": 1.0419982573337205, "grad_norm": 0.5649837851524353, "learning_rate": 0.0001, "loss": 1.5275, "step": 8969 }, { "epoch": 1.042114435085681, "grad_norm": 0.5564467310905457, "learning_rate": 0.0001, "loss": 1.4987, "step": 8970 }, { "epoch": 1.0422306128376415, "grad_norm": 0.5804434418678284, "learning_rate": 0.0001, "loss": 1.6147, "step": 8971 }, { "epoch": 1.042346790589602, "grad_norm": 0.5008756518363953, "learning_rate": 0.0001, "loss": 1.3786, "step": 8972 }, { "epoch": 1.0424629683415625, "grad_norm": 0.4874401092529297, "learning_rate": 0.0001, "loss": 1.4006, "step": 8973 }, { "epoch": 1.042579146093523, "grad_norm": 0.5213797688484192, "learning_rate": 0.0001, "loss": 1.5882, "step": 8974 }, { "epoch": 1.0426953238454837, "grad_norm": 0.5059088468551636, "learning_rate": 0.0001, "loss": 1.4512, "step": 8975 }, { "epoch": 1.0428115015974442, "grad_norm": 0.5125690698623657, "learning_rate": 0.0001, "loss": 1.3756, "step": 8976 }, { "epoch": 1.0429276793494047, "grad_norm": 0.4973143935203552, "learning_rate": 0.0001, "loss": 1.4351, "step": 8977 }, { "epoch": 1.0430438571013652, "grad_norm": 0.5088719129562378, "learning_rate": 0.0001, "loss": 1.5333, "step": 8978 }, { "epoch": 1.0431600348533256, "grad_norm": 0.5613342523574829, "learning_rate": 0.0001, "loss": 1.5149, "step": 8979 }, { "epoch": 1.0432762126052861, "grad_norm": 0.5364037752151489, "learning_rate": 0.0001, "loss": 1.4992, "step": 8980 }, { "epoch": 1.0433923903572466, "grad_norm": 0.5083293914794922, "learning_rate": 0.0001, "loss": 1.4213, "step": 8981 }, { "epoch": 1.0435085681092071, "grad_norm": 0.5561593174934387, "learning_rate": 0.0001, "loss": 1.4165, "step": 8982 }, { "epoch": 1.0436247458611676, "grad_norm": 0.49919378757476807, "learning_rate": 0.0001, "loss": 1.4196, "step": 8983 }, { "epoch": 1.043740923613128, "grad_norm": 0.5361339449882507, "learning_rate": 0.0001, "loss": 1.3825, "step": 8984 }, { "epoch": 1.0438571013650886, "grad_norm": 0.5248799920082092, "learning_rate": 0.0001, "loss": 1.5932, "step": 8985 }, { "epoch": 1.043973279117049, "grad_norm": 0.5760166049003601, "learning_rate": 0.0001, "loss": 1.5835, "step": 8986 }, { "epoch": 1.0440894568690096, "grad_norm": 0.580518364906311, "learning_rate": 0.0001, "loss": 1.5113, "step": 8987 }, { "epoch": 1.04420563462097, "grad_norm": 0.5657472610473633, "learning_rate": 0.0001, "loss": 1.6004, "step": 8988 }, { "epoch": 1.0443218123729305, "grad_norm": 0.5147629380226135, "learning_rate": 0.0001, "loss": 1.3792, "step": 8989 }, { "epoch": 1.044437990124891, "grad_norm": 0.5364604592323303, "learning_rate": 0.0001, "loss": 1.4996, "step": 8990 }, { "epoch": 1.0445541678768515, "grad_norm": 0.4889623820781708, "learning_rate": 0.0001, "loss": 1.3463, "step": 8991 }, { "epoch": 1.044670345628812, "grad_norm": 0.5112910270690918, "learning_rate": 0.0001, "loss": 1.423, "step": 8992 }, { "epoch": 1.0447865233807725, "grad_norm": 0.589764416217804, "learning_rate": 0.0001, "loss": 1.5348, "step": 8993 }, { "epoch": 1.044902701132733, "grad_norm": 0.5439825057983398, "learning_rate": 0.0001, "loss": 1.5048, "step": 8994 }, { "epoch": 1.0450188788846937, "grad_norm": 0.5245374441146851, "learning_rate": 0.0001, "loss": 1.4242, "step": 8995 }, { "epoch": 1.0451350566366542, "grad_norm": 0.5435076951980591, "learning_rate": 0.0001, "loss": 1.4895, "step": 8996 }, { "epoch": 1.0452512343886147, "grad_norm": 0.6293114423751831, "learning_rate": 0.0001, "loss": 1.6639, "step": 8997 }, { "epoch": 1.0453674121405752, "grad_norm": 0.5348904728889465, "learning_rate": 0.0001, "loss": 1.4794, "step": 8998 }, { "epoch": 1.0454835898925356, "grad_norm": 0.5541291832923889, "learning_rate": 0.0001, "loss": 1.5895, "step": 8999 }, { "epoch": 1.0455997676444961, "grad_norm": 0.6162405610084534, "learning_rate": 0.0001, "loss": 1.8332, "step": 9000 }, { "epoch": 1.0457159453964566, "grad_norm": 0.5342404842376709, "learning_rate": 0.0001, "loss": 1.6188, "step": 9001 }, { "epoch": 1.0458321231484171, "grad_norm": 0.5075395107269287, "learning_rate": 0.0001, "loss": 1.2412, "step": 9002 }, { "epoch": 1.0459483009003776, "grad_norm": 0.529941737651825, "learning_rate": 0.0001, "loss": 1.4548, "step": 9003 }, { "epoch": 1.046064478652338, "grad_norm": 0.5113204717636108, "learning_rate": 0.0001, "loss": 1.4907, "step": 9004 }, { "epoch": 1.0461806564042986, "grad_norm": 0.5787484049797058, "learning_rate": 0.0001, "loss": 1.7276, "step": 9005 }, { "epoch": 1.046296834156259, "grad_norm": 0.5223624110221863, "learning_rate": 0.0001, "loss": 1.4266, "step": 9006 }, { "epoch": 1.0464130119082196, "grad_norm": 0.5437747836112976, "learning_rate": 0.0001, "loss": 1.5004, "step": 9007 }, { "epoch": 1.04652918966018, "grad_norm": 0.5204196572303772, "learning_rate": 0.0001, "loss": 1.43, "step": 9008 }, { "epoch": 1.0466453674121405, "grad_norm": 0.5550678372383118, "learning_rate": 0.0001, "loss": 1.5454, "step": 9009 }, { "epoch": 1.046761545164101, "grad_norm": 0.5623777508735657, "learning_rate": 0.0001, "loss": 1.4201, "step": 9010 }, { "epoch": 1.0468777229160615, "grad_norm": 0.5182955861091614, "learning_rate": 0.0001, "loss": 1.5477, "step": 9011 }, { "epoch": 1.046993900668022, "grad_norm": 0.5236296653747559, "learning_rate": 0.0001, "loss": 1.5087, "step": 9012 }, { "epoch": 1.0471100784199825, "grad_norm": 0.5745497345924377, "learning_rate": 0.0001, "loss": 1.6299, "step": 9013 }, { "epoch": 1.047226256171943, "grad_norm": 0.5346052646636963, "learning_rate": 0.0001, "loss": 1.5206, "step": 9014 }, { "epoch": 1.0473424339239035, "grad_norm": 0.523341178894043, "learning_rate": 0.0001, "loss": 1.5304, "step": 9015 }, { "epoch": 1.047458611675864, "grad_norm": 0.5185964703559875, "learning_rate": 0.0001, "loss": 1.5386, "step": 9016 }, { "epoch": 1.0475747894278247, "grad_norm": 0.5521393418312073, "learning_rate": 0.0001, "loss": 1.7436, "step": 9017 }, { "epoch": 1.0476909671797852, "grad_norm": 0.566999077796936, "learning_rate": 0.0001, "loss": 1.5032, "step": 9018 }, { "epoch": 1.0478071449317456, "grad_norm": 0.5497294068336487, "learning_rate": 0.0001, "loss": 1.4579, "step": 9019 }, { "epoch": 1.0479233226837061, "grad_norm": 0.5116751194000244, "learning_rate": 0.0001, "loss": 1.5127, "step": 9020 }, { "epoch": 1.0480395004356666, "grad_norm": 0.5744738578796387, "learning_rate": 0.0001, "loss": 1.4848, "step": 9021 }, { "epoch": 1.0481556781876271, "grad_norm": 0.4972259998321533, "learning_rate": 0.0001, "loss": 1.3011, "step": 9022 }, { "epoch": 1.0482718559395876, "grad_norm": 0.5275663137435913, "learning_rate": 0.0001, "loss": 1.528, "step": 9023 }, { "epoch": 1.048388033691548, "grad_norm": 0.4991401135921478, "learning_rate": 0.0001, "loss": 1.2919, "step": 9024 }, { "epoch": 1.0485042114435086, "grad_norm": 0.507801353931427, "learning_rate": 0.0001, "loss": 1.4393, "step": 9025 }, { "epoch": 1.048620389195469, "grad_norm": 0.528578519821167, "learning_rate": 0.0001, "loss": 1.522, "step": 9026 }, { "epoch": 1.0487365669474296, "grad_norm": 0.5243181586265564, "learning_rate": 0.0001, "loss": 1.466, "step": 9027 }, { "epoch": 1.04885274469939, "grad_norm": 0.5625602602958679, "learning_rate": 0.0001, "loss": 1.4427, "step": 9028 }, { "epoch": 1.0489689224513505, "grad_norm": 0.539786159992218, "learning_rate": 0.0001, "loss": 1.4553, "step": 9029 }, { "epoch": 1.049085100203311, "grad_norm": 0.5501077175140381, "learning_rate": 0.0001, "loss": 1.269, "step": 9030 }, { "epoch": 1.0492012779552715, "grad_norm": 0.5288931131362915, "learning_rate": 0.0001, "loss": 1.4523, "step": 9031 }, { "epoch": 1.049317455707232, "grad_norm": 0.5670494437217712, "learning_rate": 0.0001, "loss": 1.5632, "step": 9032 }, { "epoch": 1.0494336334591925, "grad_norm": 0.5463694930076599, "learning_rate": 0.0001, "loss": 1.5296, "step": 9033 }, { "epoch": 1.049549811211153, "grad_norm": 0.5625814199447632, "learning_rate": 0.0001, "loss": 1.559, "step": 9034 }, { "epoch": 1.0496659889631135, "grad_norm": 0.5289028286933899, "learning_rate": 0.0001, "loss": 1.4389, "step": 9035 }, { "epoch": 1.049782166715074, "grad_norm": 0.5531147122383118, "learning_rate": 0.0001, "loss": 1.5066, "step": 9036 }, { "epoch": 1.0498983444670347, "grad_norm": 0.5495567917823792, "learning_rate": 0.0001, "loss": 1.3533, "step": 9037 }, { "epoch": 1.0500145222189952, "grad_norm": 0.5757434368133545, "learning_rate": 0.0001, "loss": 1.6329, "step": 9038 }, { "epoch": 1.0501306999709557, "grad_norm": 0.5772126317024231, "learning_rate": 0.0001, "loss": 1.4649, "step": 9039 }, { "epoch": 1.0502468777229161, "grad_norm": 0.5771605968475342, "learning_rate": 0.0001, "loss": 1.3436, "step": 9040 }, { "epoch": 1.0503630554748766, "grad_norm": 0.5607427358627319, "learning_rate": 0.0001, "loss": 1.5775, "step": 9041 }, { "epoch": 1.0504792332268371, "grad_norm": 0.5379540920257568, "learning_rate": 0.0001, "loss": 1.4105, "step": 9042 }, { "epoch": 1.0505954109787976, "grad_norm": 0.5043210387229919, "learning_rate": 0.0001, "loss": 1.4431, "step": 9043 }, { "epoch": 1.050711588730758, "grad_norm": 0.5489790439605713, "learning_rate": 0.0001, "loss": 1.4465, "step": 9044 }, { "epoch": 1.0508277664827186, "grad_norm": 0.5171908736228943, "learning_rate": 0.0001, "loss": 1.4456, "step": 9045 }, { "epoch": 1.050943944234679, "grad_norm": 0.5216082334518433, "learning_rate": 0.0001, "loss": 1.3013, "step": 9046 }, { "epoch": 1.0510601219866396, "grad_norm": 0.5552801489830017, "learning_rate": 0.0001, "loss": 1.5301, "step": 9047 }, { "epoch": 1.0511762997386, "grad_norm": 0.5355478525161743, "learning_rate": 0.0001, "loss": 1.3406, "step": 9048 }, { "epoch": 1.0512924774905605, "grad_norm": 0.5976464748382568, "learning_rate": 0.0001, "loss": 1.6491, "step": 9049 }, { "epoch": 1.051408655242521, "grad_norm": 0.5584911704063416, "learning_rate": 0.0001, "loss": 1.4411, "step": 9050 }, { "epoch": 1.0515248329944815, "grad_norm": 0.5827659964561462, "learning_rate": 0.0001, "loss": 1.5539, "step": 9051 }, { "epoch": 1.051641010746442, "grad_norm": 0.5411894917488098, "learning_rate": 0.0001, "loss": 1.397, "step": 9052 }, { "epoch": 1.0517571884984025, "grad_norm": 0.507165789604187, "learning_rate": 0.0001, "loss": 1.3847, "step": 9053 }, { "epoch": 1.051873366250363, "grad_norm": 0.5528204441070557, "learning_rate": 0.0001, "loss": 1.3638, "step": 9054 }, { "epoch": 1.0519895440023235, "grad_norm": 0.5597609877586365, "learning_rate": 0.0001, "loss": 1.5453, "step": 9055 }, { "epoch": 1.052105721754284, "grad_norm": 0.5310388207435608, "learning_rate": 0.0001, "loss": 1.4972, "step": 9056 }, { "epoch": 1.0522218995062445, "grad_norm": 0.5567108392715454, "learning_rate": 0.0001, "loss": 1.4428, "step": 9057 }, { "epoch": 1.052338077258205, "grad_norm": 0.539630651473999, "learning_rate": 0.0001, "loss": 1.3737, "step": 9058 }, { "epoch": 1.0524542550101657, "grad_norm": 0.5464910864830017, "learning_rate": 0.0001, "loss": 1.4159, "step": 9059 }, { "epoch": 1.0525704327621261, "grad_norm": 0.5596455931663513, "learning_rate": 0.0001, "loss": 1.4302, "step": 9060 }, { "epoch": 1.0526866105140866, "grad_norm": 0.5715380311012268, "learning_rate": 0.0001, "loss": 1.3316, "step": 9061 }, { "epoch": 1.0528027882660471, "grad_norm": 0.5546104907989502, "learning_rate": 0.0001, "loss": 1.3358, "step": 9062 }, { "epoch": 1.0529189660180076, "grad_norm": 0.7018982768058777, "learning_rate": 0.0001, "loss": 1.3523, "step": 9063 }, { "epoch": 1.053035143769968, "grad_norm": 0.5631259679794312, "learning_rate": 0.0001, "loss": 1.3676, "step": 9064 }, { "epoch": 1.0531513215219286, "grad_norm": 0.5323575735092163, "learning_rate": 0.0001, "loss": 1.3987, "step": 9065 }, { "epoch": 1.053267499273889, "grad_norm": 0.5142088532447815, "learning_rate": 0.0001, "loss": 1.4036, "step": 9066 }, { "epoch": 1.0533836770258496, "grad_norm": 0.5592623353004456, "learning_rate": 0.0001, "loss": 1.4503, "step": 9067 }, { "epoch": 1.05349985477781, "grad_norm": 0.5052597522735596, "learning_rate": 0.0001, "loss": 1.4116, "step": 9068 }, { "epoch": 1.0536160325297705, "grad_norm": 0.517086923122406, "learning_rate": 0.0001, "loss": 1.4075, "step": 9069 }, { "epoch": 1.053732210281731, "grad_norm": 0.5268409252166748, "learning_rate": 0.0001, "loss": 1.4975, "step": 9070 }, { "epoch": 1.0538483880336915, "grad_norm": 0.536361038684845, "learning_rate": 0.0001, "loss": 1.5102, "step": 9071 }, { "epoch": 1.053964565785652, "grad_norm": 0.5307349562644958, "learning_rate": 0.0001, "loss": 1.3567, "step": 9072 }, { "epoch": 1.0540807435376125, "grad_norm": 0.5793299078941345, "learning_rate": 0.0001, "loss": 1.5215, "step": 9073 }, { "epoch": 1.054196921289573, "grad_norm": 0.5216554999351501, "learning_rate": 0.0001, "loss": 1.4039, "step": 9074 }, { "epoch": 1.0543130990415335, "grad_norm": 0.49403879046440125, "learning_rate": 0.0001, "loss": 1.2771, "step": 9075 }, { "epoch": 1.054429276793494, "grad_norm": 0.5500255823135376, "learning_rate": 0.0001, "loss": 1.4948, "step": 9076 }, { "epoch": 1.0545454545454545, "grad_norm": 0.5305203795433044, "learning_rate": 0.0001, "loss": 1.5828, "step": 9077 }, { "epoch": 1.054661632297415, "grad_norm": 0.5569433569908142, "learning_rate": 0.0001, "loss": 1.553, "step": 9078 }, { "epoch": 1.0547778100493757, "grad_norm": 0.4997643232345581, "learning_rate": 0.0001, "loss": 1.3793, "step": 9079 }, { "epoch": 1.0548939878013361, "grad_norm": 0.5268417596817017, "learning_rate": 0.0001, "loss": 1.4932, "step": 9080 }, { "epoch": 1.0550101655532966, "grad_norm": 0.5548385381698608, "learning_rate": 0.0001, "loss": 1.5881, "step": 9081 }, { "epoch": 1.0551263433052571, "grad_norm": 0.5406033992767334, "learning_rate": 0.0001, "loss": 1.5279, "step": 9082 }, { "epoch": 1.0552425210572176, "grad_norm": 0.5218345522880554, "learning_rate": 0.0001, "loss": 1.4086, "step": 9083 }, { "epoch": 1.055358698809178, "grad_norm": 0.5393182635307312, "learning_rate": 0.0001, "loss": 1.3487, "step": 9084 }, { "epoch": 1.0554748765611386, "grad_norm": 0.5356934070587158, "learning_rate": 0.0001, "loss": 1.3364, "step": 9085 }, { "epoch": 1.055591054313099, "grad_norm": 0.5603475570678711, "learning_rate": 0.0001, "loss": 1.6026, "step": 9086 }, { "epoch": 1.0557072320650596, "grad_norm": 0.5328572988510132, "learning_rate": 0.0001, "loss": 1.6066, "step": 9087 }, { "epoch": 1.05582340981702, "grad_norm": 0.5267220139503479, "learning_rate": 0.0001, "loss": 1.383, "step": 9088 }, { "epoch": 1.0559395875689805, "grad_norm": 0.5259425640106201, "learning_rate": 0.0001, "loss": 1.4592, "step": 9089 }, { "epoch": 1.056055765320941, "grad_norm": 0.5833221673965454, "learning_rate": 0.0001, "loss": 1.6627, "step": 9090 }, { "epoch": 1.0561719430729015, "grad_norm": 0.5569556951522827, "learning_rate": 0.0001, "loss": 1.4606, "step": 9091 }, { "epoch": 1.056288120824862, "grad_norm": 0.5657941699028015, "learning_rate": 0.0001, "loss": 1.5946, "step": 9092 }, { "epoch": 1.0564042985768225, "grad_norm": 0.5313854813575745, "learning_rate": 0.0001, "loss": 1.4161, "step": 9093 }, { "epoch": 1.056520476328783, "grad_norm": 0.530001699924469, "learning_rate": 0.0001, "loss": 1.431, "step": 9094 }, { "epoch": 1.0566366540807435, "grad_norm": 0.5262467265129089, "learning_rate": 0.0001, "loss": 1.4129, "step": 9095 }, { "epoch": 1.056752831832704, "grad_norm": 0.567335844039917, "learning_rate": 0.0001, "loss": 1.3553, "step": 9096 }, { "epoch": 1.0568690095846645, "grad_norm": 0.5013755559921265, "learning_rate": 0.0001, "loss": 1.4375, "step": 9097 }, { "epoch": 1.056985187336625, "grad_norm": 0.494312047958374, "learning_rate": 0.0001, "loss": 1.445, "step": 9098 }, { "epoch": 1.0571013650885854, "grad_norm": 0.5399910807609558, "learning_rate": 0.0001, "loss": 1.6267, "step": 9099 }, { "epoch": 1.0572175428405461, "grad_norm": 0.5516524314880371, "learning_rate": 0.0001, "loss": 1.3564, "step": 9100 }, { "epoch": 1.0573337205925066, "grad_norm": 0.5304887294769287, "learning_rate": 0.0001, "loss": 1.3785, "step": 9101 }, { "epoch": 1.0574498983444671, "grad_norm": 0.591498851776123, "learning_rate": 0.0001, "loss": 1.542, "step": 9102 }, { "epoch": 1.0575660760964276, "grad_norm": 0.5390315651893616, "learning_rate": 0.0001, "loss": 1.3806, "step": 9103 }, { "epoch": 1.057682253848388, "grad_norm": 0.5507586598396301, "learning_rate": 0.0001, "loss": 1.4722, "step": 9104 }, { "epoch": 1.0577984316003486, "grad_norm": 0.5363011956214905, "learning_rate": 0.0001, "loss": 1.3498, "step": 9105 }, { "epoch": 1.057914609352309, "grad_norm": 0.5194579362869263, "learning_rate": 0.0001, "loss": 1.5218, "step": 9106 }, { "epoch": 1.0580307871042696, "grad_norm": 0.5246658325195312, "learning_rate": 0.0001, "loss": 1.5004, "step": 9107 }, { "epoch": 1.05814696485623, "grad_norm": 0.5200863480567932, "learning_rate": 0.0001, "loss": 1.5608, "step": 9108 }, { "epoch": 1.0582631426081905, "grad_norm": 0.5483285188674927, "learning_rate": 0.0001, "loss": 1.5353, "step": 9109 }, { "epoch": 1.058379320360151, "grad_norm": 0.49789682030677795, "learning_rate": 0.0001, "loss": 1.3264, "step": 9110 }, { "epoch": 1.0584954981121115, "grad_norm": 0.5474060773849487, "learning_rate": 0.0001, "loss": 1.4245, "step": 9111 }, { "epoch": 1.058611675864072, "grad_norm": 0.6262152791023254, "learning_rate": 0.0001, "loss": 1.6001, "step": 9112 }, { "epoch": 1.0587278536160325, "grad_norm": 0.522517740726471, "learning_rate": 0.0001, "loss": 1.5039, "step": 9113 }, { "epoch": 1.058844031367993, "grad_norm": 0.5017138123512268, "learning_rate": 0.0001, "loss": 1.413, "step": 9114 }, { "epoch": 1.0589602091199535, "grad_norm": 0.5721921920776367, "learning_rate": 0.0001, "loss": 1.5267, "step": 9115 }, { "epoch": 1.059076386871914, "grad_norm": 0.5460672378540039, "learning_rate": 0.0001, "loss": 1.3668, "step": 9116 }, { "epoch": 1.0591925646238745, "grad_norm": 0.5231673121452332, "learning_rate": 0.0001, "loss": 1.5067, "step": 9117 }, { "epoch": 1.059308742375835, "grad_norm": 0.5345351099967957, "learning_rate": 0.0001, "loss": 1.521, "step": 9118 }, { "epoch": 1.0594249201277954, "grad_norm": 0.5771772861480713, "learning_rate": 0.0001, "loss": 1.708, "step": 9119 }, { "epoch": 1.059541097879756, "grad_norm": 0.5495395660400391, "learning_rate": 0.0001, "loss": 1.4496, "step": 9120 }, { "epoch": 1.0596572756317166, "grad_norm": 0.5115154981613159, "learning_rate": 0.0001, "loss": 1.2167, "step": 9121 }, { "epoch": 1.0597734533836771, "grad_norm": 0.5737767219543457, "learning_rate": 0.0001, "loss": 1.6987, "step": 9122 }, { "epoch": 1.0598896311356376, "grad_norm": 0.5117427706718445, "learning_rate": 0.0001, "loss": 1.3973, "step": 9123 }, { "epoch": 1.060005808887598, "grad_norm": 0.5852498412132263, "learning_rate": 0.0001, "loss": 1.5844, "step": 9124 }, { "epoch": 1.0601219866395586, "grad_norm": 0.5291174054145813, "learning_rate": 0.0001, "loss": 1.4754, "step": 9125 }, { "epoch": 1.060238164391519, "grad_norm": 0.5075642466545105, "learning_rate": 0.0001, "loss": 1.414, "step": 9126 }, { "epoch": 1.0603543421434796, "grad_norm": 0.47634080052375793, "learning_rate": 0.0001, "loss": 1.3369, "step": 9127 }, { "epoch": 1.06047051989544, "grad_norm": 0.5397040843963623, "learning_rate": 0.0001, "loss": 1.6, "step": 9128 }, { "epoch": 1.0605866976474005, "grad_norm": 0.5401850938796997, "learning_rate": 0.0001, "loss": 1.5148, "step": 9129 }, { "epoch": 1.060702875399361, "grad_norm": 0.5514304041862488, "learning_rate": 0.0001, "loss": 1.5312, "step": 9130 }, { "epoch": 1.0608190531513215, "grad_norm": 0.5192503929138184, "learning_rate": 0.0001, "loss": 1.4662, "step": 9131 }, { "epoch": 1.060935230903282, "grad_norm": 0.5016432404518127, "learning_rate": 0.0001, "loss": 1.6191, "step": 9132 }, { "epoch": 1.0610514086552425, "grad_norm": 0.5191652178764343, "learning_rate": 0.0001, "loss": 1.3155, "step": 9133 }, { "epoch": 1.061167586407203, "grad_norm": 0.5235947966575623, "learning_rate": 0.0001, "loss": 1.5193, "step": 9134 }, { "epoch": 1.0612837641591635, "grad_norm": 0.538102924823761, "learning_rate": 0.0001, "loss": 1.4814, "step": 9135 }, { "epoch": 1.061399941911124, "grad_norm": 0.5283055901527405, "learning_rate": 0.0001, "loss": 1.5145, "step": 9136 }, { "epoch": 1.0615161196630845, "grad_norm": 0.5631850361824036, "learning_rate": 0.0001, "loss": 1.662, "step": 9137 }, { "epoch": 1.061632297415045, "grad_norm": 0.5427637100219727, "learning_rate": 0.0001, "loss": 1.4435, "step": 9138 }, { "epoch": 1.0617484751670054, "grad_norm": 0.5537014603614807, "learning_rate": 0.0001, "loss": 1.5422, "step": 9139 }, { "epoch": 1.061864652918966, "grad_norm": 0.5393050312995911, "learning_rate": 0.0001, "loss": 1.6201, "step": 9140 }, { "epoch": 1.0619808306709264, "grad_norm": 0.5826035141944885, "learning_rate": 0.0001, "loss": 1.6369, "step": 9141 }, { "epoch": 1.0620970084228871, "grad_norm": 0.5376782417297363, "learning_rate": 0.0001, "loss": 1.4487, "step": 9142 }, { "epoch": 1.0622131861748476, "grad_norm": 0.5235744714736938, "learning_rate": 0.0001, "loss": 1.3827, "step": 9143 }, { "epoch": 1.062329363926808, "grad_norm": 0.5324127674102783, "learning_rate": 0.0001, "loss": 1.4812, "step": 9144 }, { "epoch": 1.0624455416787686, "grad_norm": 0.5413472652435303, "learning_rate": 0.0001, "loss": 1.5188, "step": 9145 }, { "epoch": 1.062561719430729, "grad_norm": 0.5227320790290833, "learning_rate": 0.0001, "loss": 1.5676, "step": 9146 }, { "epoch": 1.0626778971826896, "grad_norm": 0.5471850037574768, "learning_rate": 0.0001, "loss": 1.4662, "step": 9147 }, { "epoch": 1.06279407493465, "grad_norm": 0.5257613658905029, "learning_rate": 0.0001, "loss": 1.4821, "step": 9148 }, { "epoch": 1.0629102526866105, "grad_norm": 0.5216736793518066, "learning_rate": 0.0001, "loss": 1.4997, "step": 9149 }, { "epoch": 1.063026430438571, "grad_norm": 0.5594833493232727, "learning_rate": 0.0001, "loss": 1.6263, "step": 9150 }, { "epoch": 1.0631426081905315, "grad_norm": 0.49669763445854187, "learning_rate": 0.0001, "loss": 1.4158, "step": 9151 }, { "epoch": 1.063258785942492, "grad_norm": 0.5265761613845825, "learning_rate": 0.0001, "loss": 1.5632, "step": 9152 }, { "epoch": 1.0633749636944525, "grad_norm": 0.5300150513648987, "learning_rate": 0.0001, "loss": 1.4174, "step": 9153 }, { "epoch": 1.063491141446413, "grad_norm": 0.5201780796051025, "learning_rate": 0.0001, "loss": 1.3244, "step": 9154 }, { "epoch": 1.0636073191983735, "grad_norm": 0.6005831360816956, "learning_rate": 0.0001, "loss": 1.5095, "step": 9155 }, { "epoch": 1.063723496950334, "grad_norm": 0.5342938303947449, "learning_rate": 0.0001, "loss": 1.2851, "step": 9156 }, { "epoch": 1.0638396747022945, "grad_norm": 0.5873286128044128, "learning_rate": 0.0001, "loss": 1.5314, "step": 9157 }, { "epoch": 1.063955852454255, "grad_norm": 0.5283979177474976, "learning_rate": 0.0001, "loss": 1.3008, "step": 9158 }, { "epoch": 1.0640720302062154, "grad_norm": 0.5319207310676575, "learning_rate": 0.0001, "loss": 1.3622, "step": 9159 }, { "epoch": 1.064188207958176, "grad_norm": 0.526910662651062, "learning_rate": 0.0001, "loss": 1.3385, "step": 9160 }, { "epoch": 1.0643043857101364, "grad_norm": 0.5416330099105835, "learning_rate": 0.0001, "loss": 1.369, "step": 9161 }, { "epoch": 1.0644205634620971, "grad_norm": 0.5400428175926208, "learning_rate": 0.0001, "loss": 1.4853, "step": 9162 }, { "epoch": 1.0645367412140576, "grad_norm": 0.5297672152519226, "learning_rate": 0.0001, "loss": 1.4955, "step": 9163 }, { "epoch": 1.064652918966018, "grad_norm": 0.5449459552764893, "learning_rate": 0.0001, "loss": 1.4985, "step": 9164 }, { "epoch": 1.0647690967179786, "grad_norm": 0.5715656876564026, "learning_rate": 0.0001, "loss": 1.6004, "step": 9165 }, { "epoch": 1.064885274469939, "grad_norm": 0.5295259952545166, "learning_rate": 0.0001, "loss": 1.3461, "step": 9166 }, { "epoch": 1.0650014522218996, "grad_norm": 0.5438884496688843, "learning_rate": 0.0001, "loss": 1.3848, "step": 9167 }, { "epoch": 1.06511762997386, "grad_norm": 0.542167603969574, "learning_rate": 0.0001, "loss": 1.5223, "step": 9168 }, { "epoch": 1.0652338077258205, "grad_norm": 0.5348635315895081, "learning_rate": 0.0001, "loss": 1.3742, "step": 9169 }, { "epoch": 1.065349985477781, "grad_norm": 0.5906919836997986, "learning_rate": 0.0001, "loss": 1.6726, "step": 9170 }, { "epoch": 1.0654661632297415, "grad_norm": 0.5702545642852783, "learning_rate": 0.0001, "loss": 1.4587, "step": 9171 }, { "epoch": 1.065582340981702, "grad_norm": 0.5129088759422302, "learning_rate": 0.0001, "loss": 1.2603, "step": 9172 }, { "epoch": 1.0656985187336625, "grad_norm": 0.5603011250495911, "learning_rate": 0.0001, "loss": 1.5184, "step": 9173 }, { "epoch": 1.065814696485623, "grad_norm": 0.6020777821540833, "learning_rate": 0.0001, "loss": 1.5807, "step": 9174 }, { "epoch": 1.0659308742375835, "grad_norm": 0.5032897591590881, "learning_rate": 0.0001, "loss": 1.3745, "step": 9175 }, { "epoch": 1.066047051989544, "grad_norm": 0.5128781199455261, "learning_rate": 0.0001, "loss": 1.2593, "step": 9176 }, { "epoch": 1.0661632297415045, "grad_norm": 0.5254261493682861, "learning_rate": 0.0001, "loss": 1.3345, "step": 9177 }, { "epoch": 1.066279407493465, "grad_norm": 0.508584201335907, "learning_rate": 0.0001, "loss": 1.4429, "step": 9178 }, { "epoch": 1.0663955852454254, "grad_norm": 0.5458223223686218, "learning_rate": 0.0001, "loss": 1.487, "step": 9179 }, { "epoch": 1.066511762997386, "grad_norm": 0.5139407515525818, "learning_rate": 0.0001, "loss": 1.4294, "step": 9180 }, { "epoch": 1.0666279407493464, "grad_norm": 0.505296528339386, "learning_rate": 0.0001, "loss": 1.3642, "step": 9181 }, { "epoch": 1.066744118501307, "grad_norm": 0.5002397298812866, "learning_rate": 0.0001, "loss": 1.3983, "step": 9182 }, { "epoch": 1.0668602962532674, "grad_norm": 0.551529049873352, "learning_rate": 0.0001, "loss": 1.5567, "step": 9183 }, { "epoch": 1.0669764740052279, "grad_norm": 0.578542947769165, "learning_rate": 0.0001, "loss": 1.4973, "step": 9184 }, { "epoch": 1.0670926517571886, "grad_norm": 0.531636655330658, "learning_rate": 0.0001, "loss": 1.5038, "step": 9185 }, { "epoch": 1.067208829509149, "grad_norm": 0.5241780281066895, "learning_rate": 0.0001, "loss": 1.3676, "step": 9186 }, { "epoch": 1.0673250072611096, "grad_norm": 0.5631434321403503, "learning_rate": 0.0001, "loss": 1.5443, "step": 9187 }, { "epoch": 1.06744118501307, "grad_norm": 0.5912564992904663, "learning_rate": 0.0001, "loss": 1.5429, "step": 9188 }, { "epoch": 1.0675573627650305, "grad_norm": 0.4988497197628021, "learning_rate": 0.0001, "loss": 1.5006, "step": 9189 }, { "epoch": 1.067673540516991, "grad_norm": 0.5904607772827148, "learning_rate": 0.0001, "loss": 1.4782, "step": 9190 }, { "epoch": 1.0677897182689515, "grad_norm": 0.5441171526908875, "learning_rate": 0.0001, "loss": 1.3963, "step": 9191 }, { "epoch": 1.067905896020912, "grad_norm": 0.541077196598053, "learning_rate": 0.0001, "loss": 1.5066, "step": 9192 }, { "epoch": 1.0680220737728725, "grad_norm": 0.53225177526474, "learning_rate": 0.0001, "loss": 1.4918, "step": 9193 }, { "epoch": 1.068138251524833, "grad_norm": 0.543021559715271, "learning_rate": 0.0001, "loss": 1.3748, "step": 9194 }, { "epoch": 1.0682544292767935, "grad_norm": 0.6031702756881714, "learning_rate": 0.0001, "loss": 1.5544, "step": 9195 }, { "epoch": 1.068370607028754, "grad_norm": 0.571552574634552, "learning_rate": 0.0001, "loss": 1.3105, "step": 9196 }, { "epoch": 1.0684867847807145, "grad_norm": 0.5475126504898071, "learning_rate": 0.0001, "loss": 1.4106, "step": 9197 }, { "epoch": 1.068602962532675, "grad_norm": 0.5954582691192627, "learning_rate": 0.0001, "loss": 1.6204, "step": 9198 }, { "epoch": 1.0687191402846354, "grad_norm": 0.5108276605606079, "learning_rate": 0.0001, "loss": 1.3587, "step": 9199 }, { "epoch": 1.068835318036596, "grad_norm": 0.5748639106750488, "learning_rate": 0.0001, "loss": 1.6054, "step": 9200 }, { "epoch": 1.0689514957885564, "grad_norm": 0.5265009999275208, "learning_rate": 0.0001, "loss": 1.3955, "step": 9201 }, { "epoch": 1.069067673540517, "grad_norm": 0.5206406116485596, "learning_rate": 0.0001, "loss": 1.4456, "step": 9202 }, { "epoch": 1.0691838512924774, "grad_norm": 0.5493237972259521, "learning_rate": 0.0001, "loss": 1.6656, "step": 9203 }, { "epoch": 1.069300029044438, "grad_norm": 0.5620342493057251, "learning_rate": 0.0001, "loss": 1.4971, "step": 9204 }, { "epoch": 1.0694162067963986, "grad_norm": 0.5706990361213684, "learning_rate": 0.0001, "loss": 1.6995, "step": 9205 }, { "epoch": 1.069532384548359, "grad_norm": 0.5248960852622986, "learning_rate": 0.0001, "loss": 1.4711, "step": 9206 }, { "epoch": 1.0696485623003196, "grad_norm": 0.5431904196739197, "learning_rate": 0.0001, "loss": 1.4966, "step": 9207 }, { "epoch": 1.06976474005228, "grad_norm": 0.5573443174362183, "learning_rate": 0.0001, "loss": 1.5127, "step": 9208 }, { "epoch": 1.0698809178042405, "grad_norm": 0.5166314840316772, "learning_rate": 0.0001, "loss": 1.4974, "step": 9209 }, { "epoch": 1.069997095556201, "grad_norm": 0.5510638356208801, "learning_rate": 0.0001, "loss": 1.4451, "step": 9210 }, { "epoch": 1.0701132733081615, "grad_norm": 0.5057435035705566, "learning_rate": 0.0001, "loss": 1.3959, "step": 9211 }, { "epoch": 1.070229451060122, "grad_norm": 0.5617602467536926, "learning_rate": 0.0001, "loss": 1.5963, "step": 9212 }, { "epoch": 1.0703456288120825, "grad_norm": 0.5629894733428955, "learning_rate": 0.0001, "loss": 1.5919, "step": 9213 }, { "epoch": 1.070461806564043, "grad_norm": 0.5455237030982971, "learning_rate": 0.0001, "loss": 1.4992, "step": 9214 }, { "epoch": 1.0705779843160035, "grad_norm": 0.5817468762397766, "learning_rate": 0.0001, "loss": 1.5759, "step": 9215 }, { "epoch": 1.070694162067964, "grad_norm": 0.5639060735702515, "learning_rate": 0.0001, "loss": 1.4777, "step": 9216 }, { "epoch": 1.0708103398199245, "grad_norm": 0.5920657515525818, "learning_rate": 0.0001, "loss": 1.6202, "step": 9217 }, { "epoch": 1.070926517571885, "grad_norm": 0.513184666633606, "learning_rate": 0.0001, "loss": 1.394, "step": 9218 }, { "epoch": 1.0710426953238454, "grad_norm": 0.5242840647697449, "learning_rate": 0.0001, "loss": 1.5025, "step": 9219 }, { "epoch": 1.071158873075806, "grad_norm": 0.5487340688705444, "learning_rate": 0.0001, "loss": 1.5416, "step": 9220 }, { "epoch": 1.0712750508277664, "grad_norm": 0.7941916584968567, "learning_rate": 0.0001, "loss": 1.7697, "step": 9221 }, { "epoch": 1.071391228579727, "grad_norm": 0.5666763782501221, "learning_rate": 0.0001, "loss": 1.6823, "step": 9222 }, { "epoch": 1.0715074063316874, "grad_norm": 0.5537146925926208, "learning_rate": 0.0001, "loss": 1.3735, "step": 9223 }, { "epoch": 1.0716235840836479, "grad_norm": 0.5601913928985596, "learning_rate": 0.0001, "loss": 1.3579, "step": 9224 }, { "epoch": 1.0717397618356084, "grad_norm": 0.5344251990318298, "learning_rate": 0.0001, "loss": 1.5254, "step": 9225 }, { "epoch": 1.071855939587569, "grad_norm": 0.5359842777252197, "learning_rate": 0.0001, "loss": 1.3922, "step": 9226 }, { "epoch": 1.0719721173395296, "grad_norm": 0.5405870079994202, "learning_rate": 0.0001, "loss": 1.4498, "step": 9227 }, { "epoch": 1.07208829509149, "grad_norm": 0.51329505443573, "learning_rate": 0.0001, "loss": 1.4123, "step": 9228 }, { "epoch": 1.0722044728434506, "grad_norm": 0.5659348368644714, "learning_rate": 0.0001, "loss": 1.3418, "step": 9229 }, { "epoch": 1.072320650595411, "grad_norm": 0.5718405842781067, "learning_rate": 0.0001, "loss": 1.5285, "step": 9230 }, { "epoch": 1.0724368283473715, "grad_norm": 0.5486005544662476, "learning_rate": 0.0001, "loss": 1.6518, "step": 9231 }, { "epoch": 1.072553006099332, "grad_norm": 0.5066012740135193, "learning_rate": 0.0001, "loss": 1.3796, "step": 9232 }, { "epoch": 1.0726691838512925, "grad_norm": 0.5650283098220825, "learning_rate": 0.0001, "loss": 1.5083, "step": 9233 }, { "epoch": 1.072785361603253, "grad_norm": 0.5359314680099487, "learning_rate": 0.0001, "loss": 1.4413, "step": 9234 }, { "epoch": 1.0729015393552135, "grad_norm": 0.5751736164093018, "learning_rate": 0.0001, "loss": 1.6148, "step": 9235 }, { "epoch": 1.073017717107174, "grad_norm": 0.5163309574127197, "learning_rate": 0.0001, "loss": 1.4626, "step": 9236 }, { "epoch": 1.0731338948591345, "grad_norm": 0.550383448600769, "learning_rate": 0.0001, "loss": 1.459, "step": 9237 }, { "epoch": 1.073250072611095, "grad_norm": 0.5238305330276489, "learning_rate": 0.0001, "loss": 1.4432, "step": 9238 }, { "epoch": 1.0733662503630554, "grad_norm": 0.49950873851776123, "learning_rate": 0.0001, "loss": 1.3584, "step": 9239 }, { "epoch": 1.073482428115016, "grad_norm": 0.508315920829773, "learning_rate": 0.0001, "loss": 1.3408, "step": 9240 }, { "epoch": 1.0735986058669764, "grad_norm": 0.5786048173904419, "learning_rate": 0.0001, "loss": 1.27, "step": 9241 }, { "epoch": 1.073714783618937, "grad_norm": 0.5773483514785767, "learning_rate": 0.0001, "loss": 1.5728, "step": 9242 }, { "epoch": 1.0738309613708974, "grad_norm": 0.5315750241279602, "learning_rate": 0.0001, "loss": 1.469, "step": 9243 }, { "epoch": 1.0739471391228579, "grad_norm": 0.6098319292068481, "learning_rate": 0.0001, "loss": 1.5211, "step": 9244 }, { "epoch": 1.0740633168748184, "grad_norm": 0.5994076132774353, "learning_rate": 0.0001, "loss": 1.6424, "step": 9245 }, { "epoch": 1.074179494626779, "grad_norm": 0.5081837177276611, "learning_rate": 0.0001, "loss": 1.3348, "step": 9246 }, { "epoch": 1.0742956723787396, "grad_norm": 0.5301439166069031, "learning_rate": 0.0001, "loss": 1.4837, "step": 9247 }, { "epoch": 1.0744118501307, "grad_norm": 0.5718616247177124, "learning_rate": 0.0001, "loss": 1.5717, "step": 9248 }, { "epoch": 1.0745280278826606, "grad_norm": 0.5080563426017761, "learning_rate": 0.0001, "loss": 1.3807, "step": 9249 }, { "epoch": 1.074644205634621, "grad_norm": 0.5610292553901672, "learning_rate": 0.0001, "loss": 1.4981, "step": 9250 }, { "epoch": 1.0747603833865815, "grad_norm": 0.5332043766975403, "learning_rate": 0.0001, "loss": 1.4046, "step": 9251 }, { "epoch": 1.074876561138542, "grad_norm": 0.5250877737998962, "learning_rate": 0.0001, "loss": 1.4784, "step": 9252 }, { "epoch": 1.0749927388905025, "grad_norm": 0.5090211629867554, "learning_rate": 0.0001, "loss": 1.3705, "step": 9253 }, { "epoch": 1.075108916642463, "grad_norm": 0.5132327079772949, "learning_rate": 0.0001, "loss": 1.4105, "step": 9254 }, { "epoch": 1.0752250943944235, "grad_norm": 0.5518538951873779, "learning_rate": 0.0001, "loss": 1.5959, "step": 9255 }, { "epoch": 1.075341272146384, "grad_norm": 0.5492732524871826, "learning_rate": 0.0001, "loss": 1.6017, "step": 9256 }, { "epoch": 1.0754574498983445, "grad_norm": 0.5491774678230286, "learning_rate": 0.0001, "loss": 1.4346, "step": 9257 }, { "epoch": 1.075573627650305, "grad_norm": 0.5792922377586365, "learning_rate": 0.0001, "loss": 1.5449, "step": 9258 }, { "epoch": 1.0756898054022654, "grad_norm": 0.5162052512168884, "learning_rate": 0.0001, "loss": 1.3871, "step": 9259 }, { "epoch": 1.075805983154226, "grad_norm": 0.5442473292350769, "learning_rate": 0.0001, "loss": 1.5057, "step": 9260 }, { "epoch": 1.0759221609061864, "grad_norm": 0.5718018412590027, "learning_rate": 0.0001, "loss": 1.5434, "step": 9261 }, { "epoch": 1.076038338658147, "grad_norm": 0.53264319896698, "learning_rate": 0.0001, "loss": 1.4633, "step": 9262 }, { "epoch": 1.0761545164101074, "grad_norm": 0.5195859670639038, "learning_rate": 0.0001, "loss": 1.3004, "step": 9263 }, { "epoch": 1.0762706941620679, "grad_norm": 0.5994012951850891, "learning_rate": 0.0001, "loss": 1.6071, "step": 9264 }, { "epoch": 1.0763868719140284, "grad_norm": 0.5237147212028503, "learning_rate": 0.0001, "loss": 1.3221, "step": 9265 }, { "epoch": 1.0765030496659889, "grad_norm": 0.5110024809837341, "learning_rate": 0.0001, "loss": 1.2827, "step": 9266 }, { "epoch": 1.0766192274179494, "grad_norm": 0.5228458642959595, "learning_rate": 0.0001, "loss": 1.4062, "step": 9267 }, { "epoch": 1.07673540516991, "grad_norm": 0.5706833004951477, "learning_rate": 0.0001, "loss": 1.5371, "step": 9268 }, { "epoch": 1.0768515829218706, "grad_norm": 0.5496780276298523, "learning_rate": 0.0001, "loss": 1.5444, "step": 9269 }, { "epoch": 1.076967760673831, "grad_norm": 0.5509944558143616, "learning_rate": 0.0001, "loss": 1.3909, "step": 9270 }, { "epoch": 1.0770839384257915, "grad_norm": 0.5543323755264282, "learning_rate": 0.0001, "loss": 1.4529, "step": 9271 }, { "epoch": 1.077200116177752, "grad_norm": 0.5567429661750793, "learning_rate": 0.0001, "loss": 1.441, "step": 9272 }, { "epoch": 1.0773162939297125, "grad_norm": 0.5358352661132812, "learning_rate": 0.0001, "loss": 1.3017, "step": 9273 }, { "epoch": 1.077432471681673, "grad_norm": 0.5601693987846375, "learning_rate": 0.0001, "loss": 1.5659, "step": 9274 }, { "epoch": 1.0775486494336335, "grad_norm": 0.5503692626953125, "learning_rate": 0.0001, "loss": 1.4401, "step": 9275 }, { "epoch": 1.077664827185594, "grad_norm": 0.5987486839294434, "learning_rate": 0.0001, "loss": 1.459, "step": 9276 }, { "epoch": 1.0777810049375545, "grad_norm": 0.6052455902099609, "learning_rate": 0.0001, "loss": 1.6409, "step": 9277 }, { "epoch": 1.077897182689515, "grad_norm": 0.5339061617851257, "learning_rate": 0.0001, "loss": 1.4148, "step": 9278 }, { "epoch": 1.0780133604414754, "grad_norm": 0.5292863845825195, "learning_rate": 0.0001, "loss": 1.5569, "step": 9279 }, { "epoch": 1.078129538193436, "grad_norm": 0.5491698980331421, "learning_rate": 0.0001, "loss": 1.3836, "step": 9280 }, { "epoch": 1.0782457159453964, "grad_norm": 0.5501977205276489, "learning_rate": 0.0001, "loss": 1.6123, "step": 9281 }, { "epoch": 1.078361893697357, "grad_norm": 0.5370213389396667, "learning_rate": 0.0001, "loss": 1.5334, "step": 9282 }, { "epoch": 1.0784780714493174, "grad_norm": 0.5482035279273987, "learning_rate": 0.0001, "loss": 1.239, "step": 9283 }, { "epoch": 1.0785942492012779, "grad_norm": 0.5296024680137634, "learning_rate": 0.0001, "loss": 1.4803, "step": 9284 }, { "epoch": 1.0787104269532384, "grad_norm": 0.4946173131465912, "learning_rate": 0.0001, "loss": 1.4238, "step": 9285 }, { "epoch": 1.0788266047051989, "grad_norm": 0.5261144042015076, "learning_rate": 0.0001, "loss": 1.4101, "step": 9286 }, { "epoch": 1.0789427824571594, "grad_norm": 0.5421222448348999, "learning_rate": 0.0001, "loss": 1.4615, "step": 9287 }, { "epoch": 1.07905896020912, "grad_norm": 0.5617155432701111, "learning_rate": 0.0001, "loss": 1.4545, "step": 9288 }, { "epoch": 1.0791751379610806, "grad_norm": 0.5525049567222595, "learning_rate": 0.0001, "loss": 1.4768, "step": 9289 }, { "epoch": 1.079291315713041, "grad_norm": 0.5454285740852356, "learning_rate": 0.0001, "loss": 1.559, "step": 9290 }, { "epoch": 1.0794074934650015, "grad_norm": 0.5999751091003418, "learning_rate": 0.0001, "loss": 1.3304, "step": 9291 }, { "epoch": 1.079523671216962, "grad_norm": 0.5454568862915039, "learning_rate": 0.0001, "loss": 1.375, "step": 9292 }, { "epoch": 1.0796398489689225, "grad_norm": 0.5416858792304993, "learning_rate": 0.0001, "loss": 1.4815, "step": 9293 }, { "epoch": 1.079756026720883, "grad_norm": 0.5695158243179321, "learning_rate": 0.0001, "loss": 1.5292, "step": 9294 }, { "epoch": 1.0798722044728435, "grad_norm": 0.5237754583358765, "learning_rate": 0.0001, "loss": 1.2665, "step": 9295 }, { "epoch": 1.079988382224804, "grad_norm": 0.5280129909515381, "learning_rate": 0.0001, "loss": 1.3654, "step": 9296 }, { "epoch": 1.0801045599767645, "grad_norm": 0.5281645655632019, "learning_rate": 0.0001, "loss": 1.2907, "step": 9297 }, { "epoch": 1.080220737728725, "grad_norm": 0.5463537573814392, "learning_rate": 0.0001, "loss": 1.4693, "step": 9298 }, { "epoch": 1.0803369154806854, "grad_norm": 0.5508739948272705, "learning_rate": 0.0001, "loss": 1.304, "step": 9299 }, { "epoch": 1.080453093232646, "grad_norm": 0.6003327369689941, "learning_rate": 0.0001, "loss": 1.4681, "step": 9300 }, { "epoch": 1.0805692709846064, "grad_norm": 0.5155467987060547, "learning_rate": 0.0001, "loss": 1.3432, "step": 9301 }, { "epoch": 1.080685448736567, "grad_norm": 0.5434906482696533, "learning_rate": 0.0001, "loss": 1.4054, "step": 9302 }, { "epoch": 1.0808016264885274, "grad_norm": 0.5202093124389648, "learning_rate": 0.0001, "loss": 1.3913, "step": 9303 }, { "epoch": 1.080917804240488, "grad_norm": 0.5816706418991089, "learning_rate": 0.0001, "loss": 1.4522, "step": 9304 }, { "epoch": 1.0810339819924484, "grad_norm": 0.5725724101066589, "learning_rate": 0.0001, "loss": 1.5505, "step": 9305 }, { "epoch": 1.0811501597444089, "grad_norm": 0.5437831282615662, "learning_rate": 0.0001, "loss": 1.5443, "step": 9306 }, { "epoch": 1.0812663374963694, "grad_norm": 0.5659807920455933, "learning_rate": 0.0001, "loss": 1.4473, "step": 9307 }, { "epoch": 1.0813825152483298, "grad_norm": 0.5198776125907898, "learning_rate": 0.0001, "loss": 1.4128, "step": 9308 }, { "epoch": 1.0814986930002903, "grad_norm": 0.6664596199989319, "learning_rate": 0.0001, "loss": 1.7793, "step": 9309 }, { "epoch": 1.081614870752251, "grad_norm": 0.5590860247612, "learning_rate": 0.0001, "loss": 1.4313, "step": 9310 }, { "epoch": 1.0817310485042115, "grad_norm": 0.5522528290748596, "learning_rate": 0.0001, "loss": 1.378, "step": 9311 }, { "epoch": 1.081847226256172, "grad_norm": 0.5644780397415161, "learning_rate": 0.0001, "loss": 1.3321, "step": 9312 }, { "epoch": 1.0819634040081325, "grad_norm": 0.5319089889526367, "learning_rate": 0.0001, "loss": 1.2627, "step": 9313 }, { "epoch": 1.082079581760093, "grad_norm": 0.5518312454223633, "learning_rate": 0.0001, "loss": 1.3252, "step": 9314 }, { "epoch": 1.0821957595120535, "grad_norm": 0.5743319392204285, "learning_rate": 0.0001, "loss": 1.5075, "step": 9315 }, { "epoch": 1.082311937264014, "grad_norm": 0.5341470241546631, "learning_rate": 0.0001, "loss": 1.348, "step": 9316 }, { "epoch": 1.0824281150159745, "grad_norm": 0.5499274134635925, "learning_rate": 0.0001, "loss": 1.3848, "step": 9317 }, { "epoch": 1.082544292767935, "grad_norm": 0.5466471910476685, "learning_rate": 0.0001, "loss": 1.4814, "step": 9318 }, { "epoch": 1.0826604705198954, "grad_norm": 0.5436463356018066, "learning_rate": 0.0001, "loss": 1.3873, "step": 9319 }, { "epoch": 1.082776648271856, "grad_norm": 0.5741965770721436, "learning_rate": 0.0001, "loss": 1.5742, "step": 9320 }, { "epoch": 1.0828928260238164, "grad_norm": 0.5448581576347351, "learning_rate": 0.0001, "loss": 1.4217, "step": 9321 }, { "epoch": 1.083009003775777, "grad_norm": 0.5822571516036987, "learning_rate": 0.0001, "loss": 1.5581, "step": 9322 }, { "epoch": 1.0831251815277374, "grad_norm": 0.6465715765953064, "learning_rate": 0.0001, "loss": 1.5893, "step": 9323 }, { "epoch": 1.083241359279698, "grad_norm": 0.5584543943405151, "learning_rate": 0.0001, "loss": 1.5302, "step": 9324 }, { "epoch": 1.0833575370316584, "grad_norm": 0.5714010000228882, "learning_rate": 0.0001, "loss": 1.5215, "step": 9325 }, { "epoch": 1.0834737147836189, "grad_norm": 0.5465094447135925, "learning_rate": 0.0001, "loss": 1.4766, "step": 9326 }, { "epoch": 1.0835898925355794, "grad_norm": 0.5097503066062927, "learning_rate": 0.0001, "loss": 1.3755, "step": 9327 }, { "epoch": 1.0837060702875398, "grad_norm": 0.5056722164154053, "learning_rate": 0.0001, "loss": 1.2346, "step": 9328 }, { "epoch": 1.0838222480395003, "grad_norm": 0.565323531627655, "learning_rate": 0.0001, "loss": 1.3642, "step": 9329 }, { "epoch": 1.083938425791461, "grad_norm": 0.5437831878662109, "learning_rate": 0.0001, "loss": 1.3076, "step": 9330 }, { "epoch": 1.0840546035434215, "grad_norm": 0.5402218103408813, "learning_rate": 0.0001, "loss": 1.3891, "step": 9331 }, { "epoch": 1.084170781295382, "grad_norm": 0.5713097453117371, "learning_rate": 0.0001, "loss": 1.4606, "step": 9332 }, { "epoch": 1.0842869590473425, "grad_norm": 0.5773684978485107, "learning_rate": 0.0001, "loss": 1.4461, "step": 9333 }, { "epoch": 1.084403136799303, "grad_norm": 0.4951843023300171, "learning_rate": 0.0001, "loss": 1.1599, "step": 9334 }, { "epoch": 1.0845193145512635, "grad_norm": 0.5116522312164307, "learning_rate": 0.0001, "loss": 1.4187, "step": 9335 }, { "epoch": 1.084635492303224, "grad_norm": 0.5548305511474609, "learning_rate": 0.0001, "loss": 1.6528, "step": 9336 }, { "epoch": 1.0847516700551845, "grad_norm": 0.5454692840576172, "learning_rate": 0.0001, "loss": 1.5651, "step": 9337 }, { "epoch": 1.084867847807145, "grad_norm": 0.5281533598899841, "learning_rate": 0.0001, "loss": 1.385, "step": 9338 }, { "epoch": 1.0849840255591054, "grad_norm": 0.5260929465293884, "learning_rate": 0.0001, "loss": 1.3408, "step": 9339 }, { "epoch": 1.085100203311066, "grad_norm": 0.5246114134788513, "learning_rate": 0.0001, "loss": 1.3622, "step": 9340 }, { "epoch": 1.0852163810630264, "grad_norm": 0.5710988640785217, "learning_rate": 0.0001, "loss": 1.5343, "step": 9341 }, { "epoch": 1.085332558814987, "grad_norm": 0.5941675901412964, "learning_rate": 0.0001, "loss": 1.5731, "step": 9342 }, { "epoch": 1.0854487365669474, "grad_norm": 0.5492114424705505, "learning_rate": 0.0001, "loss": 1.4177, "step": 9343 }, { "epoch": 1.085564914318908, "grad_norm": 0.5595629215240479, "learning_rate": 0.0001, "loss": 1.4012, "step": 9344 }, { "epoch": 1.0856810920708684, "grad_norm": 0.5756628513336182, "learning_rate": 0.0001, "loss": 1.5284, "step": 9345 }, { "epoch": 1.0857972698228289, "grad_norm": 0.5529462099075317, "learning_rate": 0.0001, "loss": 1.5355, "step": 9346 }, { "epoch": 1.0859134475747894, "grad_norm": 0.5427260398864746, "learning_rate": 0.0001, "loss": 1.3664, "step": 9347 }, { "epoch": 1.0860296253267498, "grad_norm": 0.49472326040267944, "learning_rate": 0.0001, "loss": 1.3941, "step": 9348 }, { "epoch": 1.0861458030787103, "grad_norm": 0.5269715189933777, "learning_rate": 0.0001, "loss": 1.4096, "step": 9349 }, { "epoch": 1.0862619808306708, "grad_norm": 0.5339666604995728, "learning_rate": 0.0001, "loss": 1.4698, "step": 9350 }, { "epoch": 1.0863781585826313, "grad_norm": 0.5161392688751221, "learning_rate": 0.0001, "loss": 1.3614, "step": 9351 }, { "epoch": 1.086494336334592, "grad_norm": 0.5255089998245239, "learning_rate": 0.0001, "loss": 1.3307, "step": 9352 }, { "epoch": 1.0866105140865525, "grad_norm": 0.5742126107215881, "learning_rate": 0.0001, "loss": 1.567, "step": 9353 }, { "epoch": 1.086726691838513, "grad_norm": 0.5712875723838806, "learning_rate": 0.0001, "loss": 1.4733, "step": 9354 }, { "epoch": 1.0868428695904735, "grad_norm": 0.5453868508338928, "learning_rate": 0.0001, "loss": 1.5304, "step": 9355 }, { "epoch": 1.086959047342434, "grad_norm": 0.5706861019134521, "learning_rate": 0.0001, "loss": 1.4652, "step": 9356 }, { "epoch": 1.0870752250943945, "grad_norm": 0.5392447710037231, "learning_rate": 0.0001, "loss": 1.45, "step": 9357 }, { "epoch": 1.087191402846355, "grad_norm": 0.5431585907936096, "learning_rate": 0.0001, "loss": 1.484, "step": 9358 }, { "epoch": 1.0873075805983154, "grad_norm": 0.6064168810844421, "learning_rate": 0.0001, "loss": 1.596, "step": 9359 }, { "epoch": 1.087423758350276, "grad_norm": 0.6154705882072449, "learning_rate": 0.0001, "loss": 1.4216, "step": 9360 }, { "epoch": 1.0875399361022364, "grad_norm": 0.5736680030822754, "learning_rate": 0.0001, "loss": 1.5443, "step": 9361 }, { "epoch": 1.087656113854197, "grad_norm": 0.5389428734779358, "learning_rate": 0.0001, "loss": 1.5365, "step": 9362 }, { "epoch": 1.0877722916061574, "grad_norm": 0.5451179146766663, "learning_rate": 0.0001, "loss": 1.4913, "step": 9363 }, { "epoch": 1.087888469358118, "grad_norm": 0.5412275791168213, "learning_rate": 0.0001, "loss": 1.4005, "step": 9364 }, { "epoch": 1.0880046471100784, "grad_norm": 0.5797693729400635, "learning_rate": 0.0001, "loss": 1.5728, "step": 9365 }, { "epoch": 1.0881208248620389, "grad_norm": 0.5813042521476746, "learning_rate": 0.0001, "loss": 1.7605, "step": 9366 }, { "epoch": 1.0882370026139994, "grad_norm": 0.5206052660942078, "learning_rate": 0.0001, "loss": 1.3434, "step": 9367 }, { "epoch": 1.0883531803659598, "grad_norm": 0.5384072661399841, "learning_rate": 0.0001, "loss": 1.5433, "step": 9368 }, { "epoch": 1.0884693581179203, "grad_norm": 0.49630725383758545, "learning_rate": 0.0001, "loss": 1.4529, "step": 9369 }, { "epoch": 1.0885855358698808, "grad_norm": 0.5630286335945129, "learning_rate": 0.0001, "loss": 1.498, "step": 9370 }, { "epoch": 1.0887017136218413, "grad_norm": 0.5383244156837463, "learning_rate": 0.0001, "loss": 1.3553, "step": 9371 }, { "epoch": 1.088817891373802, "grad_norm": 0.6035181879997253, "learning_rate": 0.0001, "loss": 1.489, "step": 9372 }, { "epoch": 1.0889340691257625, "grad_norm": 0.5531001687049866, "learning_rate": 0.0001, "loss": 1.584, "step": 9373 }, { "epoch": 1.089050246877723, "grad_norm": 0.5467984676361084, "learning_rate": 0.0001, "loss": 1.3719, "step": 9374 }, { "epoch": 1.0891664246296835, "grad_norm": 0.5076682567596436, "learning_rate": 0.0001, "loss": 1.4403, "step": 9375 }, { "epoch": 1.089282602381644, "grad_norm": 0.5291927456855774, "learning_rate": 0.0001, "loss": 1.3413, "step": 9376 }, { "epoch": 1.0893987801336045, "grad_norm": 0.4952625632286072, "learning_rate": 0.0001, "loss": 1.3823, "step": 9377 }, { "epoch": 1.089514957885565, "grad_norm": 0.585818350315094, "learning_rate": 0.0001, "loss": 1.656, "step": 9378 }, { "epoch": 1.0896311356375254, "grad_norm": 0.6007453799247742, "learning_rate": 0.0001, "loss": 1.5042, "step": 9379 }, { "epoch": 1.089747313389486, "grad_norm": 0.5968008637428284, "learning_rate": 0.0001, "loss": 1.5623, "step": 9380 }, { "epoch": 1.0898634911414464, "grad_norm": 0.5476697087287903, "learning_rate": 0.0001, "loss": 1.5362, "step": 9381 }, { "epoch": 1.089979668893407, "grad_norm": 0.5096556544303894, "learning_rate": 0.0001, "loss": 1.2194, "step": 9382 }, { "epoch": 1.0900958466453674, "grad_norm": 0.5080416798591614, "learning_rate": 0.0001, "loss": 1.23, "step": 9383 }, { "epoch": 1.090212024397328, "grad_norm": 0.5420169234275818, "learning_rate": 0.0001, "loss": 1.4699, "step": 9384 }, { "epoch": 1.0903282021492884, "grad_norm": 0.543263852596283, "learning_rate": 0.0001, "loss": 1.3452, "step": 9385 }, { "epoch": 1.0904443799012489, "grad_norm": 0.500427782535553, "learning_rate": 0.0001, "loss": 1.4676, "step": 9386 }, { "epoch": 1.0905605576532094, "grad_norm": 0.5219041109085083, "learning_rate": 0.0001, "loss": 1.2534, "step": 9387 }, { "epoch": 1.0906767354051699, "grad_norm": 0.5618671774864197, "learning_rate": 0.0001, "loss": 1.5241, "step": 9388 }, { "epoch": 1.0907929131571303, "grad_norm": 0.567960262298584, "learning_rate": 0.0001, "loss": 1.599, "step": 9389 }, { "epoch": 1.0909090909090908, "grad_norm": 0.5592353940010071, "learning_rate": 0.0001, "loss": 1.4526, "step": 9390 }, { "epoch": 1.0910252686610513, "grad_norm": 0.5943126082420349, "learning_rate": 0.0001, "loss": 1.4938, "step": 9391 }, { "epoch": 1.0911414464130118, "grad_norm": 0.5825600028038025, "learning_rate": 0.0001, "loss": 1.62, "step": 9392 }, { "epoch": 1.0912576241649723, "grad_norm": 0.5191078186035156, "learning_rate": 0.0001, "loss": 1.4428, "step": 9393 }, { "epoch": 1.091373801916933, "grad_norm": 0.5243180394172668, "learning_rate": 0.0001, "loss": 1.4733, "step": 9394 }, { "epoch": 1.0914899796688935, "grad_norm": 0.548667311668396, "learning_rate": 0.0001, "loss": 1.4179, "step": 9395 }, { "epoch": 1.091606157420854, "grad_norm": 0.5515474081039429, "learning_rate": 0.0001, "loss": 1.3246, "step": 9396 }, { "epoch": 1.0917223351728145, "grad_norm": 0.5582205057144165, "learning_rate": 0.0001, "loss": 1.5639, "step": 9397 }, { "epoch": 1.091838512924775, "grad_norm": 0.5735296607017517, "learning_rate": 0.0001, "loss": 1.5022, "step": 9398 }, { "epoch": 1.0919546906767355, "grad_norm": 0.5381777286529541, "learning_rate": 0.0001, "loss": 1.4056, "step": 9399 }, { "epoch": 1.092070868428696, "grad_norm": 0.506504237651825, "learning_rate": 0.0001, "loss": 1.3763, "step": 9400 }, { "epoch": 1.0921870461806564, "grad_norm": 0.5787509679794312, "learning_rate": 0.0001, "loss": 1.4996, "step": 9401 }, { "epoch": 1.092303223932617, "grad_norm": 0.543312132358551, "learning_rate": 0.0001, "loss": 1.3438, "step": 9402 }, { "epoch": 1.0924194016845774, "grad_norm": 0.5725640058517456, "learning_rate": 0.0001, "loss": 1.54, "step": 9403 }, { "epoch": 1.092535579436538, "grad_norm": 0.5353484153747559, "learning_rate": 0.0001, "loss": 1.2541, "step": 9404 }, { "epoch": 1.0926517571884984, "grad_norm": 0.5454126596450806, "learning_rate": 0.0001, "loss": 1.3389, "step": 9405 }, { "epoch": 1.0927679349404589, "grad_norm": 0.549069881439209, "learning_rate": 0.0001, "loss": 1.5135, "step": 9406 }, { "epoch": 1.0928841126924194, "grad_norm": 0.572575032711029, "learning_rate": 0.0001, "loss": 1.5948, "step": 9407 }, { "epoch": 1.0930002904443799, "grad_norm": 0.5477794408798218, "learning_rate": 0.0001, "loss": 1.3677, "step": 9408 }, { "epoch": 1.0931164681963403, "grad_norm": 0.5575615167617798, "learning_rate": 0.0001, "loss": 1.6481, "step": 9409 }, { "epoch": 1.0932326459483008, "grad_norm": 0.502451479434967, "learning_rate": 0.0001, "loss": 1.4112, "step": 9410 }, { "epoch": 1.0933488237002613, "grad_norm": 0.5695579051971436, "learning_rate": 0.0001, "loss": 1.6426, "step": 9411 }, { "epoch": 1.0934650014522218, "grad_norm": 0.5555123686790466, "learning_rate": 0.0001, "loss": 1.4722, "step": 9412 }, { "epoch": 1.0935811792041823, "grad_norm": 0.5110344886779785, "learning_rate": 0.0001, "loss": 1.485, "step": 9413 }, { "epoch": 1.093697356956143, "grad_norm": 0.514350414276123, "learning_rate": 0.0001, "loss": 1.3294, "step": 9414 }, { "epoch": 1.0938135347081035, "grad_norm": 0.570740818977356, "learning_rate": 0.0001, "loss": 1.6172, "step": 9415 }, { "epoch": 1.093929712460064, "grad_norm": 0.5143060684204102, "learning_rate": 0.0001, "loss": 1.5067, "step": 9416 }, { "epoch": 1.0940458902120245, "grad_norm": 0.5782046914100647, "learning_rate": 0.0001, "loss": 1.6283, "step": 9417 }, { "epoch": 1.094162067963985, "grad_norm": 0.5496023893356323, "learning_rate": 0.0001, "loss": 1.3474, "step": 9418 }, { "epoch": 1.0942782457159455, "grad_norm": 0.4826601445674896, "learning_rate": 0.0001, "loss": 1.1978, "step": 9419 }, { "epoch": 1.094394423467906, "grad_norm": 0.5562894940376282, "learning_rate": 0.0001, "loss": 1.4771, "step": 9420 }, { "epoch": 1.0945106012198664, "grad_norm": 0.5985347032546997, "learning_rate": 0.0001, "loss": 1.5084, "step": 9421 }, { "epoch": 1.094626778971827, "grad_norm": 0.5639268755912781, "learning_rate": 0.0001, "loss": 1.359, "step": 9422 }, { "epoch": 1.0947429567237874, "grad_norm": 0.6072770357131958, "learning_rate": 0.0001, "loss": 1.7112, "step": 9423 }, { "epoch": 1.094859134475748, "grad_norm": 0.5154184103012085, "learning_rate": 0.0001, "loss": 1.1247, "step": 9424 }, { "epoch": 1.0949753122277084, "grad_norm": 0.6177548170089722, "learning_rate": 0.0001, "loss": 1.6308, "step": 9425 }, { "epoch": 1.0950914899796689, "grad_norm": 0.5776795148849487, "learning_rate": 0.0001, "loss": 1.4542, "step": 9426 }, { "epoch": 1.0952076677316294, "grad_norm": 0.5506322383880615, "learning_rate": 0.0001, "loss": 1.4099, "step": 9427 }, { "epoch": 1.0953238454835899, "grad_norm": 0.5430233478546143, "learning_rate": 0.0001, "loss": 1.4948, "step": 9428 }, { "epoch": 1.0954400232355503, "grad_norm": 0.5830442309379578, "learning_rate": 0.0001, "loss": 1.4822, "step": 9429 }, { "epoch": 1.0955562009875108, "grad_norm": 0.541569173336029, "learning_rate": 0.0001, "loss": 1.5574, "step": 9430 }, { "epoch": 1.0956723787394713, "grad_norm": 0.556121289730072, "learning_rate": 0.0001, "loss": 1.5205, "step": 9431 }, { "epoch": 1.0957885564914318, "grad_norm": 0.5274482369422913, "learning_rate": 0.0001, "loss": 1.4293, "step": 9432 }, { "epoch": 1.0959047342433923, "grad_norm": 0.5375184416770935, "learning_rate": 0.0001, "loss": 1.4055, "step": 9433 }, { "epoch": 1.0960209119953528, "grad_norm": 0.5496751666069031, "learning_rate": 0.0001, "loss": 1.4816, "step": 9434 }, { "epoch": 1.0961370897473133, "grad_norm": 0.5520657896995544, "learning_rate": 0.0001, "loss": 1.4712, "step": 9435 }, { "epoch": 1.096253267499274, "grad_norm": 0.5120051503181458, "learning_rate": 0.0001, "loss": 1.5101, "step": 9436 }, { "epoch": 1.0963694452512345, "grad_norm": 0.5237596035003662, "learning_rate": 0.0001, "loss": 1.4054, "step": 9437 }, { "epoch": 1.096485623003195, "grad_norm": 0.5413674116134644, "learning_rate": 0.0001, "loss": 1.4558, "step": 9438 }, { "epoch": 1.0966018007551555, "grad_norm": 0.5916610360145569, "learning_rate": 0.0001, "loss": 1.5184, "step": 9439 }, { "epoch": 1.096717978507116, "grad_norm": 0.6033625602722168, "learning_rate": 0.0001, "loss": 1.5428, "step": 9440 }, { "epoch": 1.0968341562590764, "grad_norm": 0.549626886844635, "learning_rate": 0.0001, "loss": 1.4412, "step": 9441 }, { "epoch": 1.096950334011037, "grad_norm": 0.566194474697113, "learning_rate": 0.0001, "loss": 1.637, "step": 9442 }, { "epoch": 1.0970665117629974, "grad_norm": 0.5590158700942993, "learning_rate": 0.0001, "loss": 1.4561, "step": 9443 }, { "epoch": 1.097182689514958, "grad_norm": 0.5413280129432678, "learning_rate": 0.0001, "loss": 1.4254, "step": 9444 }, { "epoch": 1.0972988672669184, "grad_norm": 0.5623098611831665, "learning_rate": 0.0001, "loss": 1.4923, "step": 9445 }, { "epoch": 1.0974150450188789, "grad_norm": 0.6405941843986511, "learning_rate": 0.0001, "loss": 1.4324, "step": 9446 }, { "epoch": 1.0975312227708394, "grad_norm": 0.5057178735733032, "learning_rate": 0.0001, "loss": 1.5107, "step": 9447 }, { "epoch": 1.0976474005227999, "grad_norm": 0.5987020134925842, "learning_rate": 0.0001, "loss": 1.308, "step": 9448 }, { "epoch": 1.0977635782747603, "grad_norm": 0.5348968505859375, "learning_rate": 0.0001, "loss": 1.344, "step": 9449 }, { "epoch": 1.0978797560267208, "grad_norm": 0.5361587405204773, "learning_rate": 0.0001, "loss": 1.4619, "step": 9450 }, { "epoch": 1.0979959337786813, "grad_norm": 0.549802839756012, "learning_rate": 0.0001, "loss": 1.4666, "step": 9451 }, { "epoch": 1.0981121115306418, "grad_norm": 0.5512414574623108, "learning_rate": 0.0001, "loss": 1.5521, "step": 9452 }, { "epoch": 1.0982282892826023, "grad_norm": 0.5330852270126343, "learning_rate": 0.0001, "loss": 1.5781, "step": 9453 }, { "epoch": 1.0983444670345628, "grad_norm": 0.5517197847366333, "learning_rate": 0.0001, "loss": 1.3646, "step": 9454 }, { "epoch": 1.0984606447865233, "grad_norm": 0.5206728577613831, "learning_rate": 0.0001, "loss": 1.3936, "step": 9455 }, { "epoch": 1.098576822538484, "grad_norm": 0.5265716910362244, "learning_rate": 0.0001, "loss": 1.3308, "step": 9456 }, { "epoch": 1.0986930002904445, "grad_norm": 0.5415132641792297, "learning_rate": 0.0001, "loss": 1.385, "step": 9457 }, { "epoch": 1.098809178042405, "grad_norm": 0.5541356801986694, "learning_rate": 0.0001, "loss": 1.4734, "step": 9458 }, { "epoch": 1.0989253557943655, "grad_norm": 0.5190122723579407, "learning_rate": 0.0001, "loss": 1.3846, "step": 9459 }, { "epoch": 1.099041533546326, "grad_norm": 0.554965615272522, "learning_rate": 0.0001, "loss": 1.5115, "step": 9460 }, { "epoch": 1.0991577112982864, "grad_norm": 0.5586269497871399, "learning_rate": 0.0001, "loss": 1.4455, "step": 9461 }, { "epoch": 1.099273889050247, "grad_norm": 0.5546631217002869, "learning_rate": 0.0001, "loss": 1.5013, "step": 9462 }, { "epoch": 1.0993900668022074, "grad_norm": 0.5459680557250977, "learning_rate": 0.0001, "loss": 1.4849, "step": 9463 }, { "epoch": 1.099506244554168, "grad_norm": 0.568166196346283, "learning_rate": 0.0001, "loss": 1.566, "step": 9464 }, { "epoch": 1.0996224223061284, "grad_norm": 0.5397794842720032, "learning_rate": 0.0001, "loss": 1.5538, "step": 9465 }, { "epoch": 1.0997386000580889, "grad_norm": 0.5615697503089905, "learning_rate": 0.0001, "loss": 1.5312, "step": 9466 }, { "epoch": 1.0998547778100494, "grad_norm": 0.6064543128013611, "learning_rate": 0.0001, "loss": 1.5027, "step": 9467 }, { "epoch": 1.0999709555620099, "grad_norm": 0.5744174718856812, "learning_rate": 0.0001, "loss": 1.4792, "step": 9468 }, { "epoch": 1.1000871333139703, "grad_norm": 0.5579218864440918, "learning_rate": 0.0001, "loss": 1.4867, "step": 9469 }, { "epoch": 1.1002033110659308, "grad_norm": 0.5771361589431763, "learning_rate": 0.0001, "loss": 1.508, "step": 9470 }, { "epoch": 1.1003194888178913, "grad_norm": 0.575631856918335, "learning_rate": 0.0001, "loss": 1.5174, "step": 9471 }, { "epoch": 1.1004356665698518, "grad_norm": 0.5344403982162476, "learning_rate": 0.0001, "loss": 1.5082, "step": 9472 }, { "epoch": 1.1005518443218123, "grad_norm": 0.5184001922607422, "learning_rate": 0.0001, "loss": 1.3486, "step": 9473 }, { "epoch": 1.1006680220737728, "grad_norm": 0.5264585018157959, "learning_rate": 0.0001, "loss": 1.3588, "step": 9474 }, { "epoch": 1.1007841998257333, "grad_norm": 0.5590055584907532, "learning_rate": 0.0001, "loss": 1.4734, "step": 9475 }, { "epoch": 1.1009003775776938, "grad_norm": 0.5235846638679504, "learning_rate": 0.0001, "loss": 1.4077, "step": 9476 }, { "epoch": 1.1010165553296543, "grad_norm": 0.5595501661300659, "learning_rate": 0.0001, "loss": 1.4029, "step": 9477 }, { "epoch": 1.101132733081615, "grad_norm": 0.5435571074485779, "learning_rate": 0.0001, "loss": 1.4213, "step": 9478 }, { "epoch": 1.1012489108335755, "grad_norm": 0.5264965295791626, "learning_rate": 0.0001, "loss": 1.455, "step": 9479 }, { "epoch": 1.101365088585536, "grad_norm": 0.529292643070221, "learning_rate": 0.0001, "loss": 1.4103, "step": 9480 }, { "epoch": 1.1014812663374964, "grad_norm": 0.5540322661399841, "learning_rate": 0.0001, "loss": 1.5225, "step": 9481 }, { "epoch": 1.101597444089457, "grad_norm": 0.5076307058334351, "learning_rate": 0.0001, "loss": 1.3656, "step": 9482 }, { "epoch": 1.1017136218414174, "grad_norm": 0.5783825516700745, "learning_rate": 0.0001, "loss": 1.5087, "step": 9483 }, { "epoch": 1.101829799593378, "grad_norm": 0.5533871650695801, "learning_rate": 0.0001, "loss": 1.4969, "step": 9484 }, { "epoch": 1.1019459773453384, "grad_norm": 0.5388155579566956, "learning_rate": 0.0001, "loss": 1.4595, "step": 9485 }, { "epoch": 1.1020621550972989, "grad_norm": 0.5467915534973145, "learning_rate": 0.0001, "loss": 1.5315, "step": 9486 }, { "epoch": 1.1021783328492594, "grad_norm": 0.6151425242424011, "learning_rate": 0.0001, "loss": 1.5872, "step": 9487 }, { "epoch": 1.1022945106012199, "grad_norm": 0.5877282023429871, "learning_rate": 0.0001, "loss": 1.5517, "step": 9488 }, { "epoch": 1.1024106883531803, "grad_norm": 0.5632818937301636, "learning_rate": 0.0001, "loss": 1.5353, "step": 9489 }, { "epoch": 1.1025268661051408, "grad_norm": 0.5703235268592834, "learning_rate": 0.0001, "loss": 1.5007, "step": 9490 }, { "epoch": 1.1026430438571013, "grad_norm": 0.563475489616394, "learning_rate": 0.0001, "loss": 1.5169, "step": 9491 }, { "epoch": 1.1027592216090618, "grad_norm": 0.5732678174972534, "learning_rate": 0.0001, "loss": 1.4732, "step": 9492 }, { "epoch": 1.1028753993610223, "grad_norm": 0.5850802659988403, "learning_rate": 0.0001, "loss": 1.4957, "step": 9493 }, { "epoch": 1.1029915771129828, "grad_norm": 0.5793954133987427, "learning_rate": 0.0001, "loss": 1.583, "step": 9494 }, { "epoch": 1.1031077548649433, "grad_norm": 0.5984706878662109, "learning_rate": 0.0001, "loss": 1.6546, "step": 9495 }, { "epoch": 1.1032239326169038, "grad_norm": 0.5244345664978027, "learning_rate": 0.0001, "loss": 1.4301, "step": 9496 }, { "epoch": 1.1033401103688645, "grad_norm": 0.5405702590942383, "learning_rate": 0.0001, "loss": 1.359, "step": 9497 }, { "epoch": 1.103456288120825, "grad_norm": 0.5445573329925537, "learning_rate": 0.0001, "loss": 1.615, "step": 9498 }, { "epoch": 1.1035724658727855, "grad_norm": 0.5632995367050171, "learning_rate": 0.0001, "loss": 1.4753, "step": 9499 }, { "epoch": 1.103688643624746, "grad_norm": 0.5281904935836792, "learning_rate": 0.0001, "loss": 1.4822, "step": 9500 }, { "epoch": 1.1038048213767064, "grad_norm": 0.5330088138580322, "learning_rate": 0.0001, "loss": 1.5222, "step": 9501 }, { "epoch": 1.103920999128667, "grad_norm": 0.514532744884491, "learning_rate": 0.0001, "loss": 1.4414, "step": 9502 }, { "epoch": 1.1040371768806274, "grad_norm": 0.548172652721405, "learning_rate": 0.0001, "loss": 1.4451, "step": 9503 }, { "epoch": 1.104153354632588, "grad_norm": 0.5549656748771667, "learning_rate": 0.0001, "loss": 1.3786, "step": 9504 }, { "epoch": 1.1042695323845484, "grad_norm": 0.5625692009925842, "learning_rate": 0.0001, "loss": 1.5153, "step": 9505 }, { "epoch": 1.1043857101365089, "grad_norm": 0.5380663871765137, "learning_rate": 0.0001, "loss": 1.3388, "step": 9506 }, { "epoch": 1.1045018878884694, "grad_norm": 0.530598521232605, "learning_rate": 0.0001, "loss": 1.3424, "step": 9507 }, { "epoch": 1.1046180656404299, "grad_norm": 0.5374849438667297, "learning_rate": 0.0001, "loss": 1.4022, "step": 9508 }, { "epoch": 1.1047342433923903, "grad_norm": 0.5946294665336609, "learning_rate": 0.0001, "loss": 1.6452, "step": 9509 }, { "epoch": 1.1048504211443508, "grad_norm": 0.5669423937797546, "learning_rate": 0.0001, "loss": 1.3185, "step": 9510 }, { "epoch": 1.1049665988963113, "grad_norm": 0.6064764261245728, "learning_rate": 0.0001, "loss": 1.4928, "step": 9511 }, { "epoch": 1.1050827766482718, "grad_norm": 0.5889172554016113, "learning_rate": 0.0001, "loss": 1.455, "step": 9512 }, { "epoch": 1.1051989544002323, "grad_norm": 0.5890141725540161, "learning_rate": 0.0001, "loss": 1.4985, "step": 9513 }, { "epoch": 1.1053151321521928, "grad_norm": 0.5455361604690552, "learning_rate": 0.0001, "loss": 1.4995, "step": 9514 }, { "epoch": 1.1054313099041533, "grad_norm": 0.5481176972389221, "learning_rate": 0.0001, "loss": 1.5108, "step": 9515 }, { "epoch": 1.1055474876561138, "grad_norm": 0.602695882320404, "learning_rate": 0.0001, "loss": 1.6609, "step": 9516 }, { "epoch": 1.1056636654080743, "grad_norm": 0.5367211103439331, "learning_rate": 0.0001, "loss": 1.5845, "step": 9517 }, { "epoch": 1.1057798431600347, "grad_norm": 0.5153917670249939, "learning_rate": 0.0001, "loss": 1.5084, "step": 9518 }, { "epoch": 1.1058960209119952, "grad_norm": 0.523540735244751, "learning_rate": 0.0001, "loss": 1.4537, "step": 9519 }, { "epoch": 1.106012198663956, "grad_norm": 0.5269156098365784, "learning_rate": 0.0001, "loss": 1.4749, "step": 9520 }, { "epoch": 1.1061283764159164, "grad_norm": 0.5302053689956665, "learning_rate": 0.0001, "loss": 1.4461, "step": 9521 }, { "epoch": 1.106244554167877, "grad_norm": 0.5808258652687073, "learning_rate": 0.0001, "loss": 1.4169, "step": 9522 }, { "epoch": 1.1063607319198374, "grad_norm": 0.6308403611183167, "learning_rate": 0.0001, "loss": 1.5731, "step": 9523 }, { "epoch": 1.106476909671798, "grad_norm": 0.6031569838523865, "learning_rate": 0.0001, "loss": 1.5984, "step": 9524 }, { "epoch": 1.1065930874237584, "grad_norm": 0.5299091339111328, "learning_rate": 0.0001, "loss": 1.4018, "step": 9525 }, { "epoch": 1.1067092651757189, "grad_norm": 0.5627480745315552, "learning_rate": 0.0001, "loss": 1.6189, "step": 9526 }, { "epoch": 1.1068254429276794, "grad_norm": 0.5670896172523499, "learning_rate": 0.0001, "loss": 1.5035, "step": 9527 }, { "epoch": 1.1069416206796399, "grad_norm": 0.5727272629737854, "learning_rate": 0.0001, "loss": 1.3688, "step": 9528 }, { "epoch": 1.1070577984316003, "grad_norm": 0.4948303699493408, "learning_rate": 0.0001, "loss": 1.3057, "step": 9529 }, { "epoch": 1.1071739761835608, "grad_norm": 0.5203784108161926, "learning_rate": 0.0001, "loss": 1.4601, "step": 9530 }, { "epoch": 1.1072901539355213, "grad_norm": 0.5920235514640808, "learning_rate": 0.0001, "loss": 1.4401, "step": 9531 }, { "epoch": 1.1074063316874818, "grad_norm": 0.5513145923614502, "learning_rate": 0.0001, "loss": 1.4962, "step": 9532 }, { "epoch": 1.1075225094394423, "grad_norm": 0.5774796605110168, "learning_rate": 0.0001, "loss": 1.5884, "step": 9533 }, { "epoch": 1.1076386871914028, "grad_norm": 0.5905603170394897, "learning_rate": 0.0001, "loss": 1.5502, "step": 9534 }, { "epoch": 1.1077548649433633, "grad_norm": 0.5975148677825928, "learning_rate": 0.0001, "loss": 1.5869, "step": 9535 }, { "epoch": 1.1078710426953238, "grad_norm": 0.5394390821456909, "learning_rate": 0.0001, "loss": 1.4733, "step": 9536 }, { "epoch": 1.1079872204472843, "grad_norm": 0.5816001296043396, "learning_rate": 0.0001, "loss": 1.4818, "step": 9537 }, { "epoch": 1.1081033981992447, "grad_norm": 0.5599253177642822, "learning_rate": 0.0001, "loss": 1.3033, "step": 9538 }, { "epoch": 1.1082195759512055, "grad_norm": 0.5270270109176636, "learning_rate": 0.0001, "loss": 1.4092, "step": 9539 }, { "epoch": 1.108335753703166, "grad_norm": 0.5689064264297485, "learning_rate": 0.0001, "loss": 1.536, "step": 9540 }, { "epoch": 1.1084519314551264, "grad_norm": 0.5355569124221802, "learning_rate": 0.0001, "loss": 1.6211, "step": 9541 }, { "epoch": 1.108568109207087, "grad_norm": 0.5691352486610413, "learning_rate": 0.0001, "loss": 1.4697, "step": 9542 }, { "epoch": 1.1086842869590474, "grad_norm": 0.5562692284584045, "learning_rate": 0.0001, "loss": 1.5146, "step": 9543 }, { "epoch": 1.108800464711008, "grad_norm": 0.5737292766571045, "learning_rate": 0.0001, "loss": 1.4986, "step": 9544 }, { "epoch": 1.1089166424629684, "grad_norm": 0.5629547238349915, "learning_rate": 0.0001, "loss": 1.4551, "step": 9545 }, { "epoch": 1.1090328202149289, "grad_norm": 0.5484119057655334, "learning_rate": 0.0001, "loss": 1.4055, "step": 9546 }, { "epoch": 1.1091489979668894, "grad_norm": 0.5538215041160583, "learning_rate": 0.0001, "loss": 1.6544, "step": 9547 }, { "epoch": 1.1092651757188499, "grad_norm": 0.5706968903541565, "learning_rate": 0.0001, "loss": 1.7345, "step": 9548 }, { "epoch": 1.1093813534708103, "grad_norm": 0.5173110961914062, "learning_rate": 0.0001, "loss": 1.4377, "step": 9549 }, { "epoch": 1.1094975312227708, "grad_norm": 0.5889958143234253, "learning_rate": 0.0001, "loss": 1.4377, "step": 9550 }, { "epoch": 1.1096137089747313, "grad_norm": 0.5318642258644104, "learning_rate": 0.0001, "loss": 1.4558, "step": 9551 }, { "epoch": 1.1097298867266918, "grad_norm": 0.5898750424385071, "learning_rate": 0.0001, "loss": 1.5067, "step": 9552 }, { "epoch": 1.1098460644786523, "grad_norm": 0.5130350589752197, "learning_rate": 0.0001, "loss": 1.3356, "step": 9553 }, { "epoch": 1.1099622422306128, "grad_norm": 0.5386868119239807, "learning_rate": 0.0001, "loss": 1.6305, "step": 9554 }, { "epoch": 1.1100784199825733, "grad_norm": 0.5684298872947693, "learning_rate": 0.0001, "loss": 1.4623, "step": 9555 }, { "epoch": 1.1101945977345338, "grad_norm": 0.5544883012771606, "learning_rate": 0.0001, "loss": 1.5781, "step": 9556 }, { "epoch": 1.1103107754864943, "grad_norm": 0.5304974913597107, "learning_rate": 0.0001, "loss": 1.3058, "step": 9557 }, { "epoch": 1.1104269532384548, "grad_norm": 0.5397143363952637, "learning_rate": 0.0001, "loss": 1.4132, "step": 9558 }, { "epoch": 1.1105431309904152, "grad_norm": 0.5635391473770142, "learning_rate": 0.0001, "loss": 1.4487, "step": 9559 }, { "epoch": 1.1106593087423757, "grad_norm": 0.5330328345298767, "learning_rate": 0.0001, "loss": 1.5323, "step": 9560 }, { "epoch": 1.1107754864943362, "grad_norm": 0.5222686529159546, "learning_rate": 0.0001, "loss": 1.2798, "step": 9561 }, { "epoch": 1.110891664246297, "grad_norm": 0.5562769770622253, "learning_rate": 0.0001, "loss": 1.4548, "step": 9562 }, { "epoch": 1.1110078419982574, "grad_norm": 0.5504674911499023, "learning_rate": 0.0001, "loss": 1.5051, "step": 9563 }, { "epoch": 1.111124019750218, "grad_norm": 0.5468124747276306, "learning_rate": 0.0001, "loss": 1.4323, "step": 9564 }, { "epoch": 1.1112401975021784, "grad_norm": 0.5254167914390564, "learning_rate": 0.0001, "loss": 1.5379, "step": 9565 }, { "epoch": 1.1113563752541389, "grad_norm": 0.5691385269165039, "learning_rate": 0.0001, "loss": 1.6049, "step": 9566 }, { "epoch": 1.1114725530060994, "grad_norm": 0.570458710193634, "learning_rate": 0.0001, "loss": 1.4127, "step": 9567 }, { "epoch": 1.1115887307580599, "grad_norm": 0.5755772590637207, "learning_rate": 0.0001, "loss": 1.5589, "step": 9568 }, { "epoch": 1.1117049085100204, "grad_norm": 0.5539082288742065, "learning_rate": 0.0001, "loss": 1.5229, "step": 9569 }, { "epoch": 1.1118210862619808, "grad_norm": 0.5218854546546936, "learning_rate": 0.0001, "loss": 1.2754, "step": 9570 }, { "epoch": 1.1119372640139413, "grad_norm": 0.5608935952186584, "learning_rate": 0.0001, "loss": 1.5962, "step": 9571 }, { "epoch": 1.1120534417659018, "grad_norm": 0.5075331330299377, "learning_rate": 0.0001, "loss": 1.2668, "step": 9572 }, { "epoch": 1.1121696195178623, "grad_norm": 0.5797237753868103, "learning_rate": 0.0001, "loss": 1.4987, "step": 9573 }, { "epoch": 1.1122857972698228, "grad_norm": 0.5733375549316406, "learning_rate": 0.0001, "loss": 1.5519, "step": 9574 }, { "epoch": 1.1124019750217833, "grad_norm": 0.5502446293830872, "learning_rate": 0.0001, "loss": 1.467, "step": 9575 }, { "epoch": 1.1125181527737438, "grad_norm": 0.5844667553901672, "learning_rate": 0.0001, "loss": 1.5373, "step": 9576 }, { "epoch": 1.1126343305257043, "grad_norm": 0.5160120129585266, "learning_rate": 0.0001, "loss": 1.3696, "step": 9577 }, { "epoch": 1.1127505082776648, "grad_norm": 0.5747503042221069, "learning_rate": 0.0001, "loss": 1.4859, "step": 9578 }, { "epoch": 1.1128666860296252, "grad_norm": 0.5804192423820496, "learning_rate": 0.0001, "loss": 1.5198, "step": 9579 }, { "epoch": 1.1129828637815857, "grad_norm": 0.5319089293479919, "learning_rate": 0.0001, "loss": 1.4072, "step": 9580 }, { "epoch": 1.1130990415335464, "grad_norm": 0.5397303104400635, "learning_rate": 0.0001, "loss": 1.3097, "step": 9581 }, { "epoch": 1.113215219285507, "grad_norm": 0.5700457692146301, "learning_rate": 0.0001, "loss": 1.4927, "step": 9582 }, { "epoch": 1.1133313970374674, "grad_norm": 0.5417009592056274, "learning_rate": 0.0001, "loss": 1.4684, "step": 9583 }, { "epoch": 1.113447574789428, "grad_norm": 0.573021650314331, "learning_rate": 0.0001, "loss": 1.4537, "step": 9584 }, { "epoch": 1.1135637525413884, "grad_norm": 0.5706760883331299, "learning_rate": 0.0001, "loss": 1.7165, "step": 9585 }, { "epoch": 1.1136799302933489, "grad_norm": 0.5758902430534363, "learning_rate": 0.0001, "loss": 1.6319, "step": 9586 }, { "epoch": 1.1137961080453094, "grad_norm": 0.541764497756958, "learning_rate": 0.0001, "loss": 1.3593, "step": 9587 }, { "epoch": 1.1139122857972699, "grad_norm": 0.5544206500053406, "learning_rate": 0.0001, "loss": 1.4866, "step": 9588 }, { "epoch": 1.1140284635492304, "grad_norm": 0.6012476086616516, "learning_rate": 0.0001, "loss": 1.6206, "step": 9589 }, { "epoch": 1.1141446413011908, "grad_norm": 0.5391044020652771, "learning_rate": 0.0001, "loss": 1.2987, "step": 9590 }, { "epoch": 1.1142608190531513, "grad_norm": 0.547203540802002, "learning_rate": 0.0001, "loss": 1.5008, "step": 9591 }, { "epoch": 1.1143769968051118, "grad_norm": 0.5447909832000732, "learning_rate": 0.0001, "loss": 1.4603, "step": 9592 }, { "epoch": 1.1144931745570723, "grad_norm": 0.5598756074905396, "learning_rate": 0.0001, "loss": 1.5212, "step": 9593 }, { "epoch": 1.1146093523090328, "grad_norm": 0.5283433198928833, "learning_rate": 0.0001, "loss": 1.2927, "step": 9594 }, { "epoch": 1.1147255300609933, "grad_norm": 0.5749019384384155, "learning_rate": 0.0001, "loss": 1.4112, "step": 9595 }, { "epoch": 1.1148417078129538, "grad_norm": 0.5759902000427246, "learning_rate": 0.0001, "loss": 1.4703, "step": 9596 }, { "epoch": 1.1149578855649143, "grad_norm": 0.5656595230102539, "learning_rate": 0.0001, "loss": 1.5034, "step": 9597 }, { "epoch": 1.1150740633168748, "grad_norm": 0.5748841762542725, "learning_rate": 0.0001, "loss": 1.5227, "step": 9598 }, { "epoch": 1.1151902410688352, "grad_norm": 0.4959535002708435, "learning_rate": 0.0001, "loss": 1.3178, "step": 9599 }, { "epoch": 1.1153064188207957, "grad_norm": 0.5196616053581238, "learning_rate": 0.0001, "loss": 1.36, "step": 9600 }, { "epoch": 1.1154225965727562, "grad_norm": 0.5188265442848206, "learning_rate": 0.0001, "loss": 1.4426, "step": 9601 }, { "epoch": 1.1155387743247167, "grad_norm": 0.5687859058380127, "learning_rate": 0.0001, "loss": 1.5413, "step": 9602 }, { "epoch": 1.1156549520766774, "grad_norm": 0.5094379186630249, "learning_rate": 0.0001, "loss": 1.5168, "step": 9603 }, { "epoch": 1.115771129828638, "grad_norm": 0.5415772199630737, "learning_rate": 0.0001, "loss": 1.5183, "step": 9604 }, { "epoch": 1.1158873075805984, "grad_norm": 0.5665364861488342, "learning_rate": 0.0001, "loss": 1.5004, "step": 9605 }, { "epoch": 1.1160034853325589, "grad_norm": 0.5534007549285889, "learning_rate": 0.0001, "loss": 1.4372, "step": 9606 }, { "epoch": 1.1161196630845194, "grad_norm": 0.5435113310813904, "learning_rate": 0.0001, "loss": 1.3756, "step": 9607 }, { "epoch": 1.1162358408364799, "grad_norm": 0.5332193374633789, "learning_rate": 0.0001, "loss": 1.4215, "step": 9608 }, { "epoch": 1.1163520185884404, "grad_norm": 0.5340940952301025, "learning_rate": 0.0001, "loss": 1.4451, "step": 9609 }, { "epoch": 1.1164681963404008, "grad_norm": 0.5510147213935852, "learning_rate": 0.0001, "loss": 1.4416, "step": 9610 }, { "epoch": 1.1165843740923613, "grad_norm": 0.5450196266174316, "learning_rate": 0.0001, "loss": 1.5096, "step": 9611 }, { "epoch": 1.1167005518443218, "grad_norm": 0.5567142367362976, "learning_rate": 0.0001, "loss": 1.5802, "step": 9612 }, { "epoch": 1.1168167295962823, "grad_norm": 0.5469493865966797, "learning_rate": 0.0001, "loss": 1.6743, "step": 9613 }, { "epoch": 1.1169329073482428, "grad_norm": 0.5908190011978149, "learning_rate": 0.0001, "loss": 1.5183, "step": 9614 }, { "epoch": 1.1170490851002033, "grad_norm": 0.5396865010261536, "learning_rate": 0.0001, "loss": 1.5314, "step": 9615 }, { "epoch": 1.1171652628521638, "grad_norm": 0.5347968339920044, "learning_rate": 0.0001, "loss": 1.5557, "step": 9616 }, { "epoch": 1.1172814406041243, "grad_norm": 0.5435789823532104, "learning_rate": 0.0001, "loss": 1.4568, "step": 9617 }, { "epoch": 1.1173976183560848, "grad_norm": 0.530580461025238, "learning_rate": 0.0001, "loss": 1.3363, "step": 9618 }, { "epoch": 1.1175137961080452, "grad_norm": 0.5703923106193542, "learning_rate": 0.0001, "loss": 1.5131, "step": 9619 }, { "epoch": 1.1176299738600057, "grad_norm": 0.5795918703079224, "learning_rate": 0.0001, "loss": 1.6841, "step": 9620 }, { "epoch": 1.1177461516119662, "grad_norm": 0.5287982225418091, "learning_rate": 0.0001, "loss": 1.4183, "step": 9621 }, { "epoch": 1.1178623293639267, "grad_norm": 0.5755120515823364, "learning_rate": 0.0001, "loss": 1.5835, "step": 9622 }, { "epoch": 1.1179785071158874, "grad_norm": 0.569776713848114, "learning_rate": 0.0001, "loss": 1.6366, "step": 9623 }, { "epoch": 1.118094684867848, "grad_norm": 0.5526320338249207, "learning_rate": 0.0001, "loss": 1.4253, "step": 9624 }, { "epoch": 1.1182108626198084, "grad_norm": 0.5705094337463379, "learning_rate": 0.0001, "loss": 1.4741, "step": 9625 }, { "epoch": 1.1183270403717689, "grad_norm": 0.5555620789527893, "learning_rate": 0.0001, "loss": 1.6051, "step": 9626 }, { "epoch": 1.1184432181237294, "grad_norm": 0.5216465592384338, "learning_rate": 0.0001, "loss": 1.3455, "step": 9627 }, { "epoch": 1.1185593958756899, "grad_norm": 0.5284774899482727, "learning_rate": 0.0001, "loss": 1.4075, "step": 9628 }, { "epoch": 1.1186755736276504, "grad_norm": 0.47694408893585205, "learning_rate": 0.0001, "loss": 1.2213, "step": 9629 }, { "epoch": 1.1187917513796108, "grad_norm": 0.547134280204773, "learning_rate": 0.0001, "loss": 1.5259, "step": 9630 }, { "epoch": 1.1189079291315713, "grad_norm": 0.5496186017990112, "learning_rate": 0.0001, "loss": 1.4448, "step": 9631 }, { "epoch": 1.1190241068835318, "grad_norm": 0.6156900525093079, "learning_rate": 0.0001, "loss": 1.553, "step": 9632 }, { "epoch": 1.1191402846354923, "grad_norm": 0.6084433794021606, "learning_rate": 0.0001, "loss": 1.5547, "step": 9633 }, { "epoch": 1.1192564623874528, "grad_norm": 0.5293398499488831, "learning_rate": 0.0001, "loss": 1.4728, "step": 9634 }, { "epoch": 1.1193726401394133, "grad_norm": 0.550264835357666, "learning_rate": 0.0001, "loss": 1.3892, "step": 9635 }, { "epoch": 1.1194888178913738, "grad_norm": 0.5736042261123657, "learning_rate": 0.0001, "loss": 1.4799, "step": 9636 }, { "epoch": 1.1196049956433343, "grad_norm": 0.5628027319908142, "learning_rate": 0.0001, "loss": 1.5377, "step": 9637 }, { "epoch": 1.1197211733952948, "grad_norm": 0.5793375372886658, "learning_rate": 0.0001, "loss": 1.351, "step": 9638 }, { "epoch": 1.1198373511472552, "grad_norm": 0.5381454229354858, "learning_rate": 0.0001, "loss": 1.4153, "step": 9639 }, { "epoch": 1.1199535288992157, "grad_norm": 0.48907560110092163, "learning_rate": 0.0001, "loss": 1.3104, "step": 9640 }, { "epoch": 1.1200697066511762, "grad_norm": 0.5359677672386169, "learning_rate": 0.0001, "loss": 1.4069, "step": 9641 }, { "epoch": 1.1201858844031367, "grad_norm": 0.5884495377540588, "learning_rate": 0.0001, "loss": 1.4382, "step": 9642 }, { "epoch": 1.1203020621550972, "grad_norm": 0.5695892572402954, "learning_rate": 0.0001, "loss": 1.4316, "step": 9643 }, { "epoch": 1.1204182399070577, "grad_norm": 0.5442866683006287, "learning_rate": 0.0001, "loss": 1.2676, "step": 9644 }, { "epoch": 1.1205344176590184, "grad_norm": 0.5650526881217957, "learning_rate": 0.0001, "loss": 1.5336, "step": 9645 }, { "epoch": 1.120650595410979, "grad_norm": 0.6149001717567444, "learning_rate": 0.0001, "loss": 1.586, "step": 9646 }, { "epoch": 1.1207667731629394, "grad_norm": 0.5208737254142761, "learning_rate": 0.0001, "loss": 1.3818, "step": 9647 }, { "epoch": 1.1208829509148999, "grad_norm": 0.5633329749107361, "learning_rate": 0.0001, "loss": 1.4319, "step": 9648 }, { "epoch": 1.1209991286668604, "grad_norm": 0.524625301361084, "learning_rate": 0.0001, "loss": 1.3997, "step": 9649 }, { "epoch": 1.1211153064188208, "grad_norm": 0.5944722890853882, "learning_rate": 0.0001, "loss": 1.6914, "step": 9650 }, { "epoch": 1.1212314841707813, "grad_norm": 0.5723394155502319, "learning_rate": 0.0001, "loss": 1.5587, "step": 9651 }, { "epoch": 1.1213476619227418, "grad_norm": 0.5389914512634277, "learning_rate": 0.0001, "loss": 1.4158, "step": 9652 }, { "epoch": 1.1214638396747023, "grad_norm": 0.5923259854316711, "learning_rate": 0.0001, "loss": 1.6293, "step": 9653 }, { "epoch": 1.1215800174266628, "grad_norm": 0.562023401260376, "learning_rate": 0.0001, "loss": 1.5501, "step": 9654 }, { "epoch": 1.1216961951786233, "grad_norm": 0.5436960458755493, "learning_rate": 0.0001, "loss": 1.4251, "step": 9655 }, { "epoch": 1.1218123729305838, "grad_norm": 0.5693610310554504, "learning_rate": 0.0001, "loss": 1.517, "step": 9656 }, { "epoch": 1.1219285506825443, "grad_norm": 0.5483703017234802, "learning_rate": 0.0001, "loss": 1.5909, "step": 9657 }, { "epoch": 1.1220447284345048, "grad_norm": 0.5604730844497681, "learning_rate": 0.0001, "loss": 1.5313, "step": 9658 }, { "epoch": 1.1221609061864652, "grad_norm": 0.5464431643486023, "learning_rate": 0.0001, "loss": 1.3452, "step": 9659 }, { "epoch": 1.1222770839384257, "grad_norm": 0.6087496280670166, "learning_rate": 0.0001, "loss": 1.492, "step": 9660 }, { "epoch": 1.1223932616903862, "grad_norm": 0.5438372492790222, "learning_rate": 0.0001, "loss": 1.4823, "step": 9661 }, { "epoch": 1.1225094394423467, "grad_norm": 0.5792588591575623, "learning_rate": 0.0001, "loss": 1.5359, "step": 9662 }, { "epoch": 1.1226256171943072, "grad_norm": 0.5307050347328186, "learning_rate": 0.0001, "loss": 1.3508, "step": 9663 }, { "epoch": 1.1227417949462677, "grad_norm": 0.5792557001113892, "learning_rate": 0.0001, "loss": 1.461, "step": 9664 }, { "epoch": 1.1228579726982284, "grad_norm": 0.568598747253418, "learning_rate": 0.0001, "loss": 1.4901, "step": 9665 }, { "epoch": 1.122974150450189, "grad_norm": 0.5365720987319946, "learning_rate": 0.0001, "loss": 1.3632, "step": 9666 }, { "epoch": 1.1230903282021494, "grad_norm": 0.5844710469245911, "learning_rate": 0.0001, "loss": 1.4948, "step": 9667 }, { "epoch": 1.1232065059541099, "grad_norm": 0.5613291263580322, "learning_rate": 0.0001, "loss": 1.3457, "step": 9668 }, { "epoch": 1.1233226837060704, "grad_norm": 0.5521247386932373, "learning_rate": 0.0001, "loss": 1.5323, "step": 9669 }, { "epoch": 1.1234388614580308, "grad_norm": 0.5522918701171875, "learning_rate": 0.0001, "loss": 1.6036, "step": 9670 }, { "epoch": 1.1235550392099913, "grad_norm": 0.5793119072914124, "learning_rate": 0.0001, "loss": 1.4542, "step": 9671 }, { "epoch": 1.1236712169619518, "grad_norm": 0.5977482199668884, "learning_rate": 0.0001, "loss": 1.5044, "step": 9672 }, { "epoch": 1.1237873947139123, "grad_norm": 0.5508487224578857, "learning_rate": 0.0001, "loss": 1.44, "step": 9673 }, { "epoch": 1.1239035724658728, "grad_norm": 0.5501763820648193, "learning_rate": 0.0001, "loss": 1.5009, "step": 9674 }, { "epoch": 1.1240197502178333, "grad_norm": 0.6087480187416077, "learning_rate": 0.0001, "loss": 1.5273, "step": 9675 }, { "epoch": 1.1241359279697938, "grad_norm": 0.580289900302887, "learning_rate": 0.0001, "loss": 1.5003, "step": 9676 }, { "epoch": 1.1242521057217543, "grad_norm": 0.5434087514877319, "learning_rate": 0.0001, "loss": 1.4947, "step": 9677 }, { "epoch": 1.1243682834737148, "grad_norm": 0.5863969922065735, "learning_rate": 0.0001, "loss": 1.5204, "step": 9678 }, { "epoch": 1.1244844612256752, "grad_norm": 0.5805991291999817, "learning_rate": 0.0001, "loss": 1.5329, "step": 9679 }, { "epoch": 1.1246006389776357, "grad_norm": 0.562703013420105, "learning_rate": 0.0001, "loss": 1.4149, "step": 9680 }, { "epoch": 1.1247168167295962, "grad_norm": 0.5464785695075989, "learning_rate": 0.0001, "loss": 1.3219, "step": 9681 }, { "epoch": 1.1248329944815567, "grad_norm": 0.5555753707885742, "learning_rate": 0.0001, "loss": 1.5344, "step": 9682 }, { "epoch": 1.1249491722335172, "grad_norm": 0.5941157937049866, "learning_rate": 0.0001, "loss": 1.4475, "step": 9683 }, { "epoch": 1.1250653499854777, "grad_norm": 0.6653990745544434, "learning_rate": 0.0001, "loss": 1.4802, "step": 9684 }, { "epoch": 1.1251815277374382, "grad_norm": 0.5832642912864685, "learning_rate": 0.0001, "loss": 1.4649, "step": 9685 }, { "epoch": 1.1252977054893987, "grad_norm": 0.5274986028671265, "learning_rate": 0.0001, "loss": 1.493, "step": 9686 }, { "epoch": 1.1254138832413592, "grad_norm": 0.5797598361968994, "learning_rate": 0.0001, "loss": 1.4681, "step": 9687 }, { "epoch": 1.1255300609933199, "grad_norm": 0.5291623473167419, "learning_rate": 0.0001, "loss": 1.4156, "step": 9688 }, { "epoch": 1.1256462387452804, "grad_norm": 0.5337206125259399, "learning_rate": 0.0001, "loss": 1.4923, "step": 9689 }, { "epoch": 1.1257624164972408, "grad_norm": 0.5710740685462952, "learning_rate": 0.0001, "loss": 1.7177, "step": 9690 }, { "epoch": 1.1258785942492013, "grad_norm": 0.5532470345497131, "learning_rate": 0.0001, "loss": 1.6986, "step": 9691 }, { "epoch": 1.1259947720011618, "grad_norm": 0.5095158219337463, "learning_rate": 0.0001, "loss": 1.423, "step": 9692 }, { "epoch": 1.1261109497531223, "grad_norm": 0.5526217222213745, "learning_rate": 0.0001, "loss": 1.5832, "step": 9693 }, { "epoch": 1.1262271275050828, "grad_norm": 0.5839323997497559, "learning_rate": 0.0001, "loss": 1.6062, "step": 9694 }, { "epoch": 1.1263433052570433, "grad_norm": 0.5310125350952148, "learning_rate": 0.0001, "loss": 1.273, "step": 9695 }, { "epoch": 1.1264594830090038, "grad_norm": 0.5890069007873535, "learning_rate": 0.0001, "loss": 1.4701, "step": 9696 }, { "epoch": 1.1265756607609643, "grad_norm": 0.5359507203102112, "learning_rate": 0.0001, "loss": 1.4048, "step": 9697 }, { "epoch": 1.1266918385129248, "grad_norm": 0.5606240630149841, "learning_rate": 0.0001, "loss": 1.4849, "step": 9698 }, { "epoch": 1.1268080162648852, "grad_norm": 0.534092128276825, "learning_rate": 0.0001, "loss": 1.367, "step": 9699 }, { "epoch": 1.1269241940168457, "grad_norm": 0.553125262260437, "learning_rate": 0.0001, "loss": 1.4268, "step": 9700 }, { "epoch": 1.1270403717688062, "grad_norm": 0.5301721096038818, "learning_rate": 0.0001, "loss": 1.3242, "step": 9701 }, { "epoch": 1.1271565495207667, "grad_norm": 0.6657614707946777, "learning_rate": 0.0001, "loss": 1.6773, "step": 9702 }, { "epoch": 1.1272727272727272, "grad_norm": 0.541265606880188, "learning_rate": 0.0001, "loss": 1.3735, "step": 9703 }, { "epoch": 1.1273889050246877, "grad_norm": 0.5840449333190918, "learning_rate": 0.0001, "loss": 1.4717, "step": 9704 }, { "epoch": 1.1275050827766482, "grad_norm": 0.5938639044761658, "learning_rate": 0.0001, "loss": 1.6064, "step": 9705 }, { "epoch": 1.127621260528609, "grad_norm": 0.5215795040130615, "learning_rate": 0.0001, "loss": 1.5404, "step": 9706 }, { "epoch": 1.1277374382805694, "grad_norm": 0.537142276763916, "learning_rate": 0.0001, "loss": 1.506, "step": 9707 }, { "epoch": 1.1278536160325299, "grad_norm": 0.6039302349090576, "learning_rate": 0.0001, "loss": 1.5017, "step": 9708 }, { "epoch": 1.1279697937844904, "grad_norm": 0.510535478591919, "learning_rate": 0.0001, "loss": 1.4446, "step": 9709 }, { "epoch": 1.1280859715364508, "grad_norm": 0.5227552056312561, "learning_rate": 0.0001, "loss": 1.4608, "step": 9710 }, { "epoch": 1.1282021492884113, "grad_norm": 0.5117825865745544, "learning_rate": 0.0001, "loss": 1.386, "step": 9711 }, { "epoch": 1.1283183270403718, "grad_norm": 0.5713513493537903, "learning_rate": 0.0001, "loss": 1.5172, "step": 9712 }, { "epoch": 1.1284345047923323, "grad_norm": 0.6017709374427795, "learning_rate": 0.0001, "loss": 1.4103, "step": 9713 }, { "epoch": 1.1285506825442928, "grad_norm": 0.5840389728546143, "learning_rate": 0.0001, "loss": 1.4657, "step": 9714 }, { "epoch": 1.1286668602962533, "grad_norm": 0.5689877867698669, "learning_rate": 0.0001, "loss": 1.4169, "step": 9715 }, { "epoch": 1.1287830380482138, "grad_norm": 0.603010356426239, "learning_rate": 0.0001, "loss": 1.5207, "step": 9716 }, { "epoch": 1.1288992158001743, "grad_norm": 0.5397545695304871, "learning_rate": 0.0001, "loss": 1.407, "step": 9717 }, { "epoch": 1.1290153935521348, "grad_norm": 0.5655140280723572, "learning_rate": 0.0001, "loss": 1.5362, "step": 9718 }, { "epoch": 1.1291315713040952, "grad_norm": 0.5146525502204895, "learning_rate": 0.0001, "loss": 1.4001, "step": 9719 }, { "epoch": 1.1292477490560557, "grad_norm": 0.6142404675483704, "learning_rate": 0.0001, "loss": 1.5049, "step": 9720 }, { "epoch": 1.1293639268080162, "grad_norm": 0.5134833455085754, "learning_rate": 0.0001, "loss": 1.3542, "step": 9721 }, { "epoch": 1.1294801045599767, "grad_norm": 0.5315486788749695, "learning_rate": 0.0001, "loss": 1.3618, "step": 9722 }, { "epoch": 1.1295962823119372, "grad_norm": 0.5732195973396301, "learning_rate": 0.0001, "loss": 1.52, "step": 9723 }, { "epoch": 1.1297124600638977, "grad_norm": 0.5358231067657471, "learning_rate": 0.0001, "loss": 1.4426, "step": 9724 }, { "epoch": 1.1298286378158582, "grad_norm": 0.5462802648544312, "learning_rate": 0.0001, "loss": 1.4065, "step": 9725 }, { "epoch": 1.1299448155678187, "grad_norm": 0.5840174555778503, "learning_rate": 0.0001, "loss": 1.5295, "step": 9726 }, { "epoch": 1.1300609933197792, "grad_norm": 0.536839485168457, "learning_rate": 0.0001, "loss": 1.2596, "step": 9727 }, { "epoch": 1.1301771710717397, "grad_norm": 0.5742023587226868, "learning_rate": 0.0001, "loss": 1.534, "step": 9728 }, { "epoch": 1.1302933488237001, "grad_norm": 0.5591979026794434, "learning_rate": 0.0001, "loss": 1.4578, "step": 9729 }, { "epoch": 1.1304095265756609, "grad_norm": 0.5407907366752625, "learning_rate": 0.0001, "loss": 1.368, "step": 9730 }, { "epoch": 1.1305257043276213, "grad_norm": 0.5862224102020264, "learning_rate": 0.0001, "loss": 1.6457, "step": 9731 }, { "epoch": 1.1306418820795818, "grad_norm": 0.5518715977668762, "learning_rate": 0.0001, "loss": 1.421, "step": 9732 }, { "epoch": 1.1307580598315423, "grad_norm": 0.5652369260787964, "learning_rate": 0.0001, "loss": 1.4673, "step": 9733 }, { "epoch": 1.1308742375835028, "grad_norm": 0.5239549279212952, "learning_rate": 0.0001, "loss": 1.4401, "step": 9734 }, { "epoch": 1.1309904153354633, "grad_norm": 0.5143555998802185, "learning_rate": 0.0001, "loss": 1.3138, "step": 9735 }, { "epoch": 1.1311065930874238, "grad_norm": 0.5065417289733887, "learning_rate": 0.0001, "loss": 1.4762, "step": 9736 }, { "epoch": 1.1312227708393843, "grad_norm": 0.5721327662467957, "learning_rate": 0.0001, "loss": 1.5481, "step": 9737 }, { "epoch": 1.1313389485913448, "grad_norm": 0.553887665271759, "learning_rate": 0.0001, "loss": 1.5036, "step": 9738 }, { "epoch": 1.1314551263433053, "grad_norm": 0.5835604667663574, "learning_rate": 0.0001, "loss": 1.6025, "step": 9739 }, { "epoch": 1.1315713040952657, "grad_norm": 0.6082079410552979, "learning_rate": 0.0001, "loss": 1.5311, "step": 9740 }, { "epoch": 1.1316874818472262, "grad_norm": 0.5478276014328003, "learning_rate": 0.0001, "loss": 1.559, "step": 9741 }, { "epoch": 1.1318036595991867, "grad_norm": 0.5668119192123413, "learning_rate": 0.0001, "loss": 1.5674, "step": 9742 }, { "epoch": 1.1319198373511472, "grad_norm": 0.5409606695175171, "learning_rate": 0.0001, "loss": 1.2484, "step": 9743 }, { "epoch": 1.1320360151031077, "grad_norm": 0.5933104157447815, "learning_rate": 0.0001, "loss": 1.5732, "step": 9744 }, { "epoch": 1.1321521928550682, "grad_norm": 0.5886265635490417, "learning_rate": 0.0001, "loss": 1.5903, "step": 9745 }, { "epoch": 1.1322683706070287, "grad_norm": 0.534992516040802, "learning_rate": 0.0001, "loss": 1.4977, "step": 9746 }, { "epoch": 1.1323845483589892, "grad_norm": 0.5970652103424072, "learning_rate": 0.0001, "loss": 1.538, "step": 9747 }, { "epoch": 1.1325007261109499, "grad_norm": 0.5775313377380371, "learning_rate": 0.0001, "loss": 1.5226, "step": 9748 }, { "epoch": 1.1326169038629104, "grad_norm": 0.5551691055297852, "learning_rate": 0.0001, "loss": 1.4154, "step": 9749 }, { "epoch": 1.1327330816148709, "grad_norm": 0.57322096824646, "learning_rate": 0.0001, "loss": 1.5234, "step": 9750 }, { "epoch": 1.1328492593668313, "grad_norm": 0.5926565527915955, "learning_rate": 0.0001, "loss": 1.399, "step": 9751 }, { "epoch": 1.1329654371187918, "grad_norm": 0.5369096398353577, "learning_rate": 0.0001, "loss": 1.3547, "step": 9752 }, { "epoch": 1.1330816148707523, "grad_norm": 0.5695880651473999, "learning_rate": 0.0001, "loss": 1.5324, "step": 9753 }, { "epoch": 1.1331977926227128, "grad_norm": 0.5742299556732178, "learning_rate": 0.0001, "loss": 1.5644, "step": 9754 }, { "epoch": 1.1333139703746733, "grad_norm": 0.5906009674072266, "learning_rate": 0.0001, "loss": 1.5804, "step": 9755 }, { "epoch": 1.1334301481266338, "grad_norm": 0.5348914265632629, "learning_rate": 0.0001, "loss": 1.3925, "step": 9756 }, { "epoch": 1.1335463258785943, "grad_norm": 0.5486701130867004, "learning_rate": 0.0001, "loss": 1.5705, "step": 9757 }, { "epoch": 1.1336625036305548, "grad_norm": 0.5349100828170776, "learning_rate": 0.0001, "loss": 1.3754, "step": 9758 }, { "epoch": 1.1337786813825153, "grad_norm": 0.6003459692001343, "learning_rate": 0.0001, "loss": 1.475, "step": 9759 }, { "epoch": 1.1338948591344757, "grad_norm": 0.545295774936676, "learning_rate": 0.0001, "loss": 1.4824, "step": 9760 }, { "epoch": 1.1340110368864362, "grad_norm": 0.5949148535728455, "learning_rate": 0.0001, "loss": 1.4104, "step": 9761 }, { "epoch": 1.1341272146383967, "grad_norm": 0.6306147575378418, "learning_rate": 0.0001, "loss": 1.6523, "step": 9762 }, { "epoch": 1.1342433923903572, "grad_norm": 0.5710545182228088, "learning_rate": 0.0001, "loss": 1.3827, "step": 9763 }, { "epoch": 1.1343595701423177, "grad_norm": 0.5881208777427673, "learning_rate": 0.0001, "loss": 1.5104, "step": 9764 }, { "epoch": 1.1344757478942782, "grad_norm": 0.5546616315841675, "learning_rate": 0.0001, "loss": 1.3955, "step": 9765 }, { "epoch": 1.1345919256462387, "grad_norm": 0.5268201231956482, "learning_rate": 0.0001, "loss": 1.373, "step": 9766 }, { "epoch": 1.1347081033981992, "grad_norm": 0.5580306053161621, "learning_rate": 0.0001, "loss": 1.5446, "step": 9767 }, { "epoch": 1.1348242811501597, "grad_norm": 0.6127236485481262, "learning_rate": 0.0001, "loss": 1.4528, "step": 9768 }, { "epoch": 1.1349404589021201, "grad_norm": 0.5953782200813293, "learning_rate": 0.0001, "loss": 1.5394, "step": 9769 }, { "epoch": 1.1350566366540806, "grad_norm": 0.5282092690467834, "learning_rate": 0.0001, "loss": 1.4261, "step": 9770 }, { "epoch": 1.1351728144060411, "grad_norm": 0.5678205490112305, "learning_rate": 0.0001, "loss": 1.4748, "step": 9771 }, { "epoch": 1.1352889921580018, "grad_norm": 0.522504448890686, "learning_rate": 0.0001, "loss": 1.4129, "step": 9772 }, { "epoch": 1.1354051699099623, "grad_norm": 0.5682795643806458, "learning_rate": 0.0001, "loss": 1.4627, "step": 9773 }, { "epoch": 1.1355213476619228, "grad_norm": 0.5516985654830933, "learning_rate": 0.0001, "loss": 1.3844, "step": 9774 }, { "epoch": 1.1356375254138833, "grad_norm": 0.5454441905021667, "learning_rate": 0.0001, "loss": 1.6141, "step": 9775 }, { "epoch": 1.1357537031658438, "grad_norm": 0.5516277551651001, "learning_rate": 0.0001, "loss": 1.4229, "step": 9776 }, { "epoch": 1.1358698809178043, "grad_norm": 0.5397228002548218, "learning_rate": 0.0001, "loss": 1.6032, "step": 9777 }, { "epoch": 1.1359860586697648, "grad_norm": 0.56315678358078, "learning_rate": 0.0001, "loss": 1.6077, "step": 9778 }, { "epoch": 1.1361022364217253, "grad_norm": 0.5425570607185364, "learning_rate": 0.0001, "loss": 1.425, "step": 9779 }, { "epoch": 1.1362184141736857, "grad_norm": 0.5313009023666382, "learning_rate": 0.0001, "loss": 1.4678, "step": 9780 }, { "epoch": 1.1363345919256462, "grad_norm": 0.5546362996101379, "learning_rate": 0.0001, "loss": 1.5133, "step": 9781 }, { "epoch": 1.1364507696776067, "grad_norm": 0.5482527017593384, "learning_rate": 0.0001, "loss": 1.5031, "step": 9782 }, { "epoch": 1.1365669474295672, "grad_norm": 0.5520925521850586, "learning_rate": 0.0001, "loss": 1.358, "step": 9783 }, { "epoch": 1.1366831251815277, "grad_norm": 0.5753809809684753, "learning_rate": 0.0001, "loss": 1.5268, "step": 9784 }, { "epoch": 1.1367993029334882, "grad_norm": 0.5345529317855835, "learning_rate": 0.0001, "loss": 1.3329, "step": 9785 }, { "epoch": 1.1369154806854487, "grad_norm": 0.5629556775093079, "learning_rate": 0.0001, "loss": 1.4472, "step": 9786 }, { "epoch": 1.1370316584374092, "grad_norm": 0.524359405040741, "learning_rate": 0.0001, "loss": 1.1784, "step": 9787 }, { "epoch": 1.1371478361893697, "grad_norm": 0.5655604004859924, "learning_rate": 0.0001, "loss": 1.4612, "step": 9788 }, { "epoch": 1.1372640139413301, "grad_norm": 0.5480789542198181, "learning_rate": 0.0001, "loss": 1.3745, "step": 9789 }, { "epoch": 1.1373801916932909, "grad_norm": 0.568131685256958, "learning_rate": 0.0001, "loss": 1.4528, "step": 9790 }, { "epoch": 1.1374963694452513, "grad_norm": 0.5922342538833618, "learning_rate": 0.0001, "loss": 1.5323, "step": 9791 }, { "epoch": 1.1376125471972118, "grad_norm": 0.601453423500061, "learning_rate": 0.0001, "loss": 1.6236, "step": 9792 }, { "epoch": 1.1377287249491723, "grad_norm": 0.5655685663223267, "learning_rate": 0.0001, "loss": 1.5018, "step": 9793 }, { "epoch": 1.1378449027011328, "grad_norm": 0.5532330274581909, "learning_rate": 0.0001, "loss": 1.4919, "step": 9794 }, { "epoch": 1.1379610804530933, "grad_norm": 0.5482434034347534, "learning_rate": 0.0001, "loss": 1.5045, "step": 9795 }, { "epoch": 1.1380772582050538, "grad_norm": 0.5250821113586426, "learning_rate": 0.0001, "loss": 1.3726, "step": 9796 }, { "epoch": 1.1381934359570143, "grad_norm": 0.5487037897109985, "learning_rate": 0.0001, "loss": 1.5452, "step": 9797 }, { "epoch": 1.1383096137089748, "grad_norm": 0.5330802798271179, "learning_rate": 0.0001, "loss": 1.4124, "step": 9798 }, { "epoch": 1.1384257914609353, "grad_norm": 0.5245431065559387, "learning_rate": 0.0001, "loss": 1.5838, "step": 9799 }, { "epoch": 1.1385419692128957, "grad_norm": 0.5072425007820129, "learning_rate": 0.0001, "loss": 1.3497, "step": 9800 }, { "epoch": 1.1386581469648562, "grad_norm": 0.5386658310890198, "learning_rate": 0.0001, "loss": 1.4285, "step": 9801 }, { "epoch": 1.1387743247168167, "grad_norm": 0.5451638698577881, "learning_rate": 0.0001, "loss": 1.5039, "step": 9802 }, { "epoch": 1.1388905024687772, "grad_norm": 0.5327491760253906, "learning_rate": 0.0001, "loss": 1.4741, "step": 9803 }, { "epoch": 1.1390066802207377, "grad_norm": 0.5449811220169067, "learning_rate": 0.0001, "loss": 1.4579, "step": 9804 }, { "epoch": 1.1391228579726982, "grad_norm": 0.5945097208023071, "learning_rate": 0.0001, "loss": 1.5619, "step": 9805 }, { "epoch": 1.1392390357246587, "grad_norm": 0.5785495042800903, "learning_rate": 0.0001, "loss": 1.4953, "step": 9806 }, { "epoch": 1.1393552134766192, "grad_norm": 0.5066744089126587, "learning_rate": 0.0001, "loss": 1.3782, "step": 9807 }, { "epoch": 1.1394713912285797, "grad_norm": 0.6268693208694458, "learning_rate": 0.0001, "loss": 1.601, "step": 9808 }, { "epoch": 1.1395875689805401, "grad_norm": 0.5436332821846008, "learning_rate": 0.0001, "loss": 1.4839, "step": 9809 }, { "epoch": 1.1397037467325006, "grad_norm": 0.5843521356582642, "learning_rate": 0.0001, "loss": 1.4932, "step": 9810 }, { "epoch": 1.1398199244844611, "grad_norm": 0.5359347462654114, "learning_rate": 0.0001, "loss": 1.4952, "step": 9811 }, { "epoch": 1.1399361022364216, "grad_norm": 0.5501775145530701, "learning_rate": 0.0001, "loss": 1.4508, "step": 9812 }, { "epoch": 1.140052279988382, "grad_norm": 0.5738726258277893, "learning_rate": 0.0001, "loss": 1.6331, "step": 9813 }, { "epoch": 1.1401684577403428, "grad_norm": 0.6035877466201782, "learning_rate": 0.0001, "loss": 1.507, "step": 9814 }, { "epoch": 1.1402846354923033, "grad_norm": 0.5534920692443848, "learning_rate": 0.0001, "loss": 1.4699, "step": 9815 }, { "epoch": 1.1404008132442638, "grad_norm": 0.5403549075126648, "learning_rate": 0.0001, "loss": 1.5539, "step": 9816 }, { "epoch": 1.1405169909962243, "grad_norm": 0.5253798961639404, "learning_rate": 0.0001, "loss": 1.3346, "step": 9817 }, { "epoch": 1.1406331687481848, "grad_norm": 0.5829864144325256, "learning_rate": 0.0001, "loss": 1.5772, "step": 9818 }, { "epoch": 1.1407493465001453, "grad_norm": 0.5337379574775696, "learning_rate": 0.0001, "loss": 1.447, "step": 9819 }, { "epoch": 1.1408655242521057, "grad_norm": 0.5743491053581238, "learning_rate": 0.0001, "loss": 1.4244, "step": 9820 }, { "epoch": 1.1409817020040662, "grad_norm": 0.524262011051178, "learning_rate": 0.0001, "loss": 1.3892, "step": 9821 }, { "epoch": 1.1410978797560267, "grad_norm": 0.5501481890678406, "learning_rate": 0.0001, "loss": 1.4874, "step": 9822 }, { "epoch": 1.1412140575079872, "grad_norm": 0.5878981351852417, "learning_rate": 0.0001, "loss": 1.7145, "step": 9823 }, { "epoch": 1.1413302352599477, "grad_norm": 0.5892753601074219, "learning_rate": 0.0001, "loss": 1.4559, "step": 9824 }, { "epoch": 1.1414464130119082, "grad_norm": 0.5729379653930664, "learning_rate": 0.0001, "loss": 1.6213, "step": 9825 }, { "epoch": 1.1415625907638687, "grad_norm": 0.5200401544570923, "learning_rate": 0.0001, "loss": 1.3968, "step": 9826 }, { "epoch": 1.1416787685158292, "grad_norm": 0.5553308725357056, "learning_rate": 0.0001, "loss": 1.3769, "step": 9827 }, { "epoch": 1.1417949462677897, "grad_norm": 0.5183868408203125, "learning_rate": 0.0001, "loss": 1.2838, "step": 9828 }, { "epoch": 1.1419111240197501, "grad_norm": 0.5820907950401306, "learning_rate": 0.0001, "loss": 1.6124, "step": 9829 }, { "epoch": 1.1420273017717106, "grad_norm": 0.589948296546936, "learning_rate": 0.0001, "loss": 1.5892, "step": 9830 }, { "epoch": 1.1421434795236711, "grad_norm": 0.5357127785682678, "learning_rate": 0.0001, "loss": 1.4233, "step": 9831 }, { "epoch": 1.1422596572756318, "grad_norm": 0.5246970057487488, "learning_rate": 0.0001, "loss": 1.3825, "step": 9832 }, { "epoch": 1.1423758350275923, "grad_norm": 0.5680531859397888, "learning_rate": 0.0001, "loss": 1.5088, "step": 9833 }, { "epoch": 1.1424920127795528, "grad_norm": 0.5650409460067749, "learning_rate": 0.0001, "loss": 1.4552, "step": 9834 }, { "epoch": 1.1426081905315133, "grad_norm": 0.5513635873794556, "learning_rate": 0.0001, "loss": 1.471, "step": 9835 }, { "epoch": 1.1427243682834738, "grad_norm": 0.5147875547409058, "learning_rate": 0.0001, "loss": 1.3892, "step": 9836 }, { "epoch": 1.1428405460354343, "grad_norm": 0.5659693479537964, "learning_rate": 0.0001, "loss": 1.4035, "step": 9837 }, { "epoch": 1.1429567237873948, "grad_norm": 0.5677375793457031, "learning_rate": 0.0001, "loss": 1.5084, "step": 9838 }, { "epoch": 1.1430729015393553, "grad_norm": 0.5909645557403564, "learning_rate": 0.0001, "loss": 1.5125, "step": 9839 }, { "epoch": 1.1431890792913157, "grad_norm": 0.603756308555603, "learning_rate": 0.0001, "loss": 1.5208, "step": 9840 }, { "epoch": 1.1433052570432762, "grad_norm": 0.6034809350967407, "learning_rate": 0.0001, "loss": 1.5083, "step": 9841 }, { "epoch": 1.1434214347952367, "grad_norm": 0.5629401803016663, "learning_rate": 0.0001, "loss": 1.5183, "step": 9842 }, { "epoch": 1.1435376125471972, "grad_norm": 0.5638735890388489, "learning_rate": 0.0001, "loss": 1.5628, "step": 9843 }, { "epoch": 1.1436537902991577, "grad_norm": 0.572892427444458, "learning_rate": 0.0001, "loss": 1.4563, "step": 9844 }, { "epoch": 1.1437699680511182, "grad_norm": 0.5544632077217102, "learning_rate": 0.0001, "loss": 1.4904, "step": 9845 }, { "epoch": 1.1438861458030787, "grad_norm": 0.596784770488739, "learning_rate": 0.0001, "loss": 1.5844, "step": 9846 }, { "epoch": 1.1440023235550392, "grad_norm": 0.5610263347625732, "learning_rate": 0.0001, "loss": 1.555, "step": 9847 }, { "epoch": 1.1441185013069997, "grad_norm": 0.5021182298660278, "learning_rate": 0.0001, "loss": 1.4044, "step": 9848 }, { "epoch": 1.1442346790589601, "grad_norm": 0.5500348210334778, "learning_rate": 0.0001, "loss": 1.5557, "step": 9849 }, { "epoch": 1.1443508568109206, "grad_norm": 0.5434955358505249, "learning_rate": 0.0001, "loss": 1.447, "step": 9850 }, { "epoch": 1.1444670345628811, "grad_norm": 0.5625091195106506, "learning_rate": 0.0001, "loss": 1.4034, "step": 9851 }, { "epoch": 1.1445832123148416, "grad_norm": 0.5443983674049377, "learning_rate": 0.0001, "loss": 1.5446, "step": 9852 }, { "epoch": 1.144699390066802, "grad_norm": 0.5581114888191223, "learning_rate": 0.0001, "loss": 1.5058, "step": 9853 }, { "epoch": 1.1448155678187626, "grad_norm": 0.5418434143066406, "learning_rate": 0.0001, "loss": 1.4448, "step": 9854 }, { "epoch": 1.1449317455707233, "grad_norm": 0.5469671487808228, "learning_rate": 0.0001, "loss": 1.5129, "step": 9855 }, { "epoch": 1.1450479233226838, "grad_norm": 0.5789541602134705, "learning_rate": 0.0001, "loss": 1.4371, "step": 9856 }, { "epoch": 1.1451641010746443, "grad_norm": 0.5320298075675964, "learning_rate": 0.0001, "loss": 1.3405, "step": 9857 }, { "epoch": 1.1452802788266048, "grad_norm": 0.563693642616272, "learning_rate": 0.0001, "loss": 1.5974, "step": 9858 }, { "epoch": 1.1453964565785653, "grad_norm": 0.5295918583869934, "learning_rate": 0.0001, "loss": 1.5261, "step": 9859 }, { "epoch": 1.1455126343305257, "grad_norm": 0.558912456035614, "learning_rate": 0.0001, "loss": 1.515, "step": 9860 }, { "epoch": 1.1456288120824862, "grad_norm": 0.5343863368034363, "learning_rate": 0.0001, "loss": 1.5597, "step": 9861 }, { "epoch": 1.1457449898344467, "grad_norm": 0.543212354183197, "learning_rate": 0.0001, "loss": 1.4298, "step": 9862 }, { "epoch": 1.1458611675864072, "grad_norm": 0.5438005924224854, "learning_rate": 0.0001, "loss": 1.4411, "step": 9863 }, { "epoch": 1.1459773453383677, "grad_norm": 0.525090217590332, "learning_rate": 0.0001, "loss": 1.258, "step": 9864 }, { "epoch": 1.1460935230903282, "grad_norm": 0.5800353288650513, "learning_rate": 0.0001, "loss": 1.4973, "step": 9865 }, { "epoch": 1.1462097008422887, "grad_norm": 0.565473198890686, "learning_rate": 0.0001, "loss": 1.4686, "step": 9866 }, { "epoch": 1.1463258785942492, "grad_norm": 0.5632652640342712, "learning_rate": 0.0001, "loss": 1.4616, "step": 9867 }, { "epoch": 1.1464420563462097, "grad_norm": 0.5769978165626526, "learning_rate": 0.0001, "loss": 1.4663, "step": 9868 }, { "epoch": 1.1465582340981701, "grad_norm": 0.6273400187492371, "learning_rate": 0.0001, "loss": 1.6432, "step": 9869 }, { "epoch": 1.1466744118501306, "grad_norm": 0.6044973134994507, "learning_rate": 0.0001, "loss": 1.6373, "step": 9870 }, { "epoch": 1.1467905896020911, "grad_norm": 0.5586897730827332, "learning_rate": 0.0001, "loss": 1.5407, "step": 9871 }, { "epoch": 1.1469067673540516, "grad_norm": 0.5836507081985474, "learning_rate": 0.0001, "loss": 1.4729, "step": 9872 }, { "epoch": 1.147022945106012, "grad_norm": 0.5953487753868103, "learning_rate": 0.0001, "loss": 1.6167, "step": 9873 }, { "epoch": 1.1471391228579728, "grad_norm": 0.5075315833091736, "learning_rate": 0.0001, "loss": 1.336, "step": 9874 }, { "epoch": 1.1472553006099333, "grad_norm": 0.5457214713096619, "learning_rate": 0.0001, "loss": 1.3285, "step": 9875 }, { "epoch": 1.1473714783618938, "grad_norm": 0.5651188492774963, "learning_rate": 0.0001, "loss": 1.6115, "step": 9876 }, { "epoch": 1.1474876561138543, "grad_norm": 0.6411771774291992, "learning_rate": 0.0001, "loss": 1.6225, "step": 9877 }, { "epoch": 1.1476038338658148, "grad_norm": 0.516982913017273, "learning_rate": 0.0001, "loss": 1.4613, "step": 9878 }, { "epoch": 1.1477200116177753, "grad_norm": 0.538935661315918, "learning_rate": 0.0001, "loss": 1.5169, "step": 9879 }, { "epoch": 1.1478361893697357, "grad_norm": 0.6248571872711182, "learning_rate": 0.0001, "loss": 1.5922, "step": 9880 }, { "epoch": 1.1479523671216962, "grad_norm": 0.5510977506637573, "learning_rate": 0.0001, "loss": 1.6002, "step": 9881 }, { "epoch": 1.1480685448736567, "grad_norm": 0.5983824133872986, "learning_rate": 0.0001, "loss": 1.6403, "step": 9882 }, { "epoch": 1.1481847226256172, "grad_norm": 0.5636187195777893, "learning_rate": 0.0001, "loss": 1.4609, "step": 9883 }, { "epoch": 1.1483009003775777, "grad_norm": 0.5564061999320984, "learning_rate": 0.0001, "loss": 1.4619, "step": 9884 }, { "epoch": 1.1484170781295382, "grad_norm": 0.5623925924301147, "learning_rate": 0.0001, "loss": 1.3934, "step": 9885 }, { "epoch": 1.1485332558814987, "grad_norm": 0.5958746075630188, "learning_rate": 0.0001, "loss": 1.391, "step": 9886 }, { "epoch": 1.1486494336334592, "grad_norm": 0.6206726431846619, "learning_rate": 0.0001, "loss": 1.5622, "step": 9887 }, { "epoch": 1.1487656113854197, "grad_norm": 0.5640969276428223, "learning_rate": 0.0001, "loss": 1.5668, "step": 9888 }, { "epoch": 1.1488817891373801, "grad_norm": 0.5939497947692871, "learning_rate": 0.0001, "loss": 1.3998, "step": 9889 }, { "epoch": 1.1489979668893406, "grad_norm": 0.5671570897102356, "learning_rate": 0.0001, "loss": 1.393, "step": 9890 }, { "epoch": 1.1491141446413011, "grad_norm": 0.5769670605659485, "learning_rate": 0.0001, "loss": 1.4192, "step": 9891 }, { "epoch": 1.1492303223932616, "grad_norm": 0.5393675565719604, "learning_rate": 0.0001, "loss": 1.6622, "step": 9892 }, { "epoch": 1.149346500145222, "grad_norm": 0.5987977385520935, "learning_rate": 0.0001, "loss": 1.482, "step": 9893 }, { "epoch": 1.1494626778971826, "grad_norm": 0.5398848652839661, "learning_rate": 0.0001, "loss": 1.3972, "step": 9894 }, { "epoch": 1.149578855649143, "grad_norm": 0.5538201332092285, "learning_rate": 0.0001, "loss": 1.5631, "step": 9895 }, { "epoch": 1.1496950334011036, "grad_norm": 0.533630907535553, "learning_rate": 0.0001, "loss": 1.43, "step": 9896 }, { "epoch": 1.1498112111530643, "grad_norm": 0.5726608037948608, "learning_rate": 0.0001, "loss": 1.5875, "step": 9897 }, { "epoch": 1.1499273889050248, "grad_norm": 0.5075186491012573, "learning_rate": 0.0001, "loss": 1.362, "step": 9898 }, { "epoch": 1.1500435666569853, "grad_norm": 0.539199948310852, "learning_rate": 0.0001, "loss": 1.324, "step": 9899 }, { "epoch": 1.1501597444089458, "grad_norm": 0.5326648950576782, "learning_rate": 0.0001, "loss": 1.5828, "step": 9900 }, { "epoch": 1.1502759221609062, "grad_norm": 0.5526677966117859, "learning_rate": 0.0001, "loss": 1.4872, "step": 9901 }, { "epoch": 1.1503920999128667, "grad_norm": 0.5454682111740112, "learning_rate": 0.0001, "loss": 1.4269, "step": 9902 }, { "epoch": 1.1505082776648272, "grad_norm": 0.5236324071884155, "learning_rate": 0.0001, "loss": 1.3648, "step": 9903 }, { "epoch": 1.1506244554167877, "grad_norm": 0.5452289581298828, "learning_rate": 0.0001, "loss": 1.441, "step": 9904 }, { "epoch": 1.1507406331687482, "grad_norm": 0.5598399639129639, "learning_rate": 0.0001, "loss": 1.5572, "step": 9905 }, { "epoch": 1.1508568109207087, "grad_norm": 0.5843247771263123, "learning_rate": 0.0001, "loss": 1.6318, "step": 9906 }, { "epoch": 1.1509729886726692, "grad_norm": 0.5665671229362488, "learning_rate": 0.0001, "loss": 1.5197, "step": 9907 }, { "epoch": 1.1510891664246297, "grad_norm": 0.515862226486206, "learning_rate": 0.0001, "loss": 1.3599, "step": 9908 }, { "epoch": 1.1512053441765902, "grad_norm": 0.542131781578064, "learning_rate": 0.0001, "loss": 1.5009, "step": 9909 }, { "epoch": 1.1513215219285506, "grad_norm": 0.5564214587211609, "learning_rate": 0.0001, "loss": 1.3456, "step": 9910 }, { "epoch": 1.1514376996805111, "grad_norm": 0.5730974674224854, "learning_rate": 0.0001, "loss": 1.5647, "step": 9911 }, { "epoch": 1.1515538774324716, "grad_norm": 0.5575627088546753, "learning_rate": 0.0001, "loss": 1.4728, "step": 9912 }, { "epoch": 1.151670055184432, "grad_norm": 0.5514092445373535, "learning_rate": 0.0001, "loss": 1.4182, "step": 9913 }, { "epoch": 1.1517862329363926, "grad_norm": 0.5962027311325073, "learning_rate": 0.0001, "loss": 1.465, "step": 9914 }, { "epoch": 1.151902410688353, "grad_norm": 0.5560566186904907, "learning_rate": 0.0001, "loss": 1.4282, "step": 9915 }, { "epoch": 1.1520185884403138, "grad_norm": 0.5740779042243958, "learning_rate": 0.0001, "loss": 1.5807, "step": 9916 }, { "epoch": 1.1521347661922743, "grad_norm": 0.5669369101524353, "learning_rate": 0.0001, "loss": 1.4826, "step": 9917 }, { "epoch": 1.1522509439442348, "grad_norm": 0.5397534370422363, "learning_rate": 0.0001, "loss": 1.3715, "step": 9918 }, { "epoch": 1.1523671216961953, "grad_norm": 0.5681161284446716, "learning_rate": 0.0001, "loss": 1.3601, "step": 9919 }, { "epoch": 1.1524832994481558, "grad_norm": 0.5549933910369873, "learning_rate": 0.0001, "loss": 1.4637, "step": 9920 }, { "epoch": 1.1525994772001162, "grad_norm": 0.5869593024253845, "learning_rate": 0.0001, "loss": 1.4902, "step": 9921 }, { "epoch": 1.1527156549520767, "grad_norm": 0.5578819513320923, "learning_rate": 0.0001, "loss": 1.4433, "step": 9922 }, { "epoch": 1.1528318327040372, "grad_norm": 0.5640743970870972, "learning_rate": 0.0001, "loss": 1.4265, "step": 9923 }, { "epoch": 1.1529480104559977, "grad_norm": 0.5549669861793518, "learning_rate": 0.0001, "loss": 1.5671, "step": 9924 }, { "epoch": 1.1530641882079582, "grad_norm": 0.5610499978065491, "learning_rate": 0.0001, "loss": 1.4156, "step": 9925 }, { "epoch": 1.1531803659599187, "grad_norm": 0.5248560905456543, "learning_rate": 0.0001, "loss": 1.4177, "step": 9926 }, { "epoch": 1.1532965437118792, "grad_norm": 0.5581945180892944, "learning_rate": 0.0001, "loss": 1.4434, "step": 9927 }, { "epoch": 1.1534127214638397, "grad_norm": 0.5940243601799011, "learning_rate": 0.0001, "loss": 1.4916, "step": 9928 }, { "epoch": 1.1535288992158002, "grad_norm": 0.5478717088699341, "learning_rate": 0.0001, "loss": 1.5128, "step": 9929 }, { "epoch": 1.1536450769677606, "grad_norm": 0.6184346079826355, "learning_rate": 0.0001, "loss": 1.4189, "step": 9930 }, { "epoch": 1.1537612547197211, "grad_norm": 0.5686115622520447, "learning_rate": 0.0001, "loss": 1.4612, "step": 9931 }, { "epoch": 1.1538774324716816, "grad_norm": 0.5752643942832947, "learning_rate": 0.0001, "loss": 1.7198, "step": 9932 }, { "epoch": 1.153993610223642, "grad_norm": 0.5206838250160217, "learning_rate": 0.0001, "loss": 1.5202, "step": 9933 }, { "epoch": 1.1541097879756026, "grad_norm": 0.5340604186058044, "learning_rate": 0.0001, "loss": 1.5025, "step": 9934 }, { "epoch": 1.154225965727563, "grad_norm": 0.5354512929916382, "learning_rate": 0.0001, "loss": 1.4083, "step": 9935 }, { "epoch": 1.1543421434795236, "grad_norm": 0.5514809489250183, "learning_rate": 0.0001, "loss": 1.5335, "step": 9936 }, { "epoch": 1.154458321231484, "grad_norm": 0.547510027885437, "learning_rate": 0.0001, "loss": 1.4518, "step": 9937 }, { "epoch": 1.1545744989834446, "grad_norm": 0.5750606060028076, "learning_rate": 0.0001, "loss": 1.6024, "step": 9938 }, { "epoch": 1.1546906767354053, "grad_norm": 0.5391731858253479, "learning_rate": 0.0001, "loss": 1.492, "step": 9939 }, { "epoch": 1.1548068544873658, "grad_norm": 0.5582312345504761, "learning_rate": 0.0001, "loss": 1.4282, "step": 9940 }, { "epoch": 1.1549230322393262, "grad_norm": 0.5333447456359863, "learning_rate": 0.0001, "loss": 1.4328, "step": 9941 }, { "epoch": 1.1550392099912867, "grad_norm": 0.5500908493995667, "learning_rate": 0.0001, "loss": 1.4777, "step": 9942 }, { "epoch": 1.1551553877432472, "grad_norm": 0.5635502934455872, "learning_rate": 0.0001, "loss": 1.5813, "step": 9943 }, { "epoch": 1.1552715654952077, "grad_norm": 0.5841420292854309, "learning_rate": 0.0001, "loss": 1.518, "step": 9944 }, { "epoch": 1.1553877432471682, "grad_norm": 0.5658759474754333, "learning_rate": 0.0001, "loss": 1.492, "step": 9945 }, { "epoch": 1.1555039209991287, "grad_norm": 0.5532236099243164, "learning_rate": 0.0001, "loss": 1.4737, "step": 9946 }, { "epoch": 1.1556200987510892, "grad_norm": 0.5682868361473083, "learning_rate": 0.0001, "loss": 1.5697, "step": 9947 }, { "epoch": 1.1557362765030497, "grad_norm": 0.5833743810653687, "learning_rate": 0.0001, "loss": 1.4586, "step": 9948 }, { "epoch": 1.1558524542550102, "grad_norm": 0.5212302803993225, "learning_rate": 0.0001, "loss": 1.2786, "step": 9949 }, { "epoch": 1.1559686320069706, "grad_norm": 0.6027308702468872, "learning_rate": 0.0001, "loss": 1.4155, "step": 9950 }, { "epoch": 1.1560848097589311, "grad_norm": 0.5450575351715088, "learning_rate": 0.0001, "loss": 1.4678, "step": 9951 }, { "epoch": 1.1562009875108916, "grad_norm": 0.5215844511985779, "learning_rate": 0.0001, "loss": 1.4646, "step": 9952 }, { "epoch": 1.156317165262852, "grad_norm": 0.5741782188415527, "learning_rate": 0.0001, "loss": 1.4504, "step": 9953 }, { "epoch": 1.1564333430148126, "grad_norm": 0.5755845904350281, "learning_rate": 0.0001, "loss": 1.6222, "step": 9954 }, { "epoch": 1.156549520766773, "grad_norm": 0.593981921672821, "learning_rate": 0.0001, "loss": 1.6518, "step": 9955 }, { "epoch": 1.1566656985187336, "grad_norm": 0.5386795401573181, "learning_rate": 0.0001, "loss": 1.4433, "step": 9956 }, { "epoch": 1.156781876270694, "grad_norm": 0.5577713847160339, "learning_rate": 0.0001, "loss": 1.3742, "step": 9957 }, { "epoch": 1.1568980540226548, "grad_norm": 0.5421655178070068, "learning_rate": 0.0001, "loss": 1.3991, "step": 9958 }, { "epoch": 1.1570142317746153, "grad_norm": 0.5427201986312866, "learning_rate": 0.0001, "loss": 1.5449, "step": 9959 }, { "epoch": 1.1571304095265758, "grad_norm": 0.5640281438827515, "learning_rate": 0.0001, "loss": 1.6018, "step": 9960 }, { "epoch": 1.1572465872785362, "grad_norm": 0.5602498054504395, "learning_rate": 0.0001, "loss": 1.5278, "step": 9961 }, { "epoch": 1.1573627650304967, "grad_norm": 0.5524298548698425, "learning_rate": 0.0001, "loss": 1.4721, "step": 9962 }, { "epoch": 1.1574789427824572, "grad_norm": 0.5706125497817993, "learning_rate": 0.0001, "loss": 1.4719, "step": 9963 }, { "epoch": 1.1575951205344177, "grad_norm": 0.5506826639175415, "learning_rate": 0.0001, "loss": 1.3571, "step": 9964 }, { "epoch": 1.1577112982863782, "grad_norm": 0.5594316720962524, "learning_rate": 0.0001, "loss": 1.482, "step": 9965 }, { "epoch": 1.1578274760383387, "grad_norm": 0.5849363207817078, "learning_rate": 0.0001, "loss": 1.6765, "step": 9966 }, { "epoch": 1.1579436537902992, "grad_norm": 0.5251225233078003, "learning_rate": 0.0001, "loss": 1.2302, "step": 9967 }, { "epoch": 1.1580598315422597, "grad_norm": 0.6071436405181885, "learning_rate": 0.0001, "loss": 1.5038, "step": 9968 }, { "epoch": 1.1581760092942202, "grad_norm": 0.5688941478729248, "learning_rate": 0.0001, "loss": 1.6126, "step": 9969 }, { "epoch": 1.1582921870461806, "grad_norm": 0.5586535930633545, "learning_rate": 0.0001, "loss": 1.4697, "step": 9970 }, { "epoch": 1.1584083647981411, "grad_norm": 0.6454538702964783, "learning_rate": 0.0001, "loss": 1.5929, "step": 9971 }, { "epoch": 1.1585245425501016, "grad_norm": 0.5616946816444397, "learning_rate": 0.0001, "loss": 1.389, "step": 9972 }, { "epoch": 1.158640720302062, "grad_norm": 0.6290552616119385, "learning_rate": 0.0001, "loss": 1.6434, "step": 9973 }, { "epoch": 1.1587568980540226, "grad_norm": 0.5599773526191711, "learning_rate": 0.0001, "loss": 1.3618, "step": 9974 }, { "epoch": 1.158873075805983, "grad_norm": 0.5668715834617615, "learning_rate": 0.0001, "loss": 1.403, "step": 9975 }, { "epoch": 1.1589892535579436, "grad_norm": 0.576244056224823, "learning_rate": 0.0001, "loss": 1.5943, "step": 9976 }, { "epoch": 1.159105431309904, "grad_norm": 0.5568404793739319, "learning_rate": 0.0001, "loss": 1.3909, "step": 9977 }, { "epoch": 1.1592216090618646, "grad_norm": 0.5190873146057129, "learning_rate": 0.0001, "loss": 1.3981, "step": 9978 }, { "epoch": 1.159337786813825, "grad_norm": 0.580040693283081, "learning_rate": 0.0001, "loss": 1.3702, "step": 9979 }, { "epoch": 1.1594539645657855, "grad_norm": 0.5603786110877991, "learning_rate": 0.0001, "loss": 1.6677, "step": 9980 }, { "epoch": 1.1595701423177462, "grad_norm": 0.5624469518661499, "learning_rate": 0.0001, "loss": 1.5952, "step": 9981 }, { "epoch": 1.1596863200697067, "grad_norm": 0.5951324701309204, "learning_rate": 0.0001, "loss": 1.6157, "step": 9982 }, { "epoch": 1.1598024978216672, "grad_norm": 0.547521710395813, "learning_rate": 0.0001, "loss": 1.474, "step": 9983 }, { "epoch": 1.1599186755736277, "grad_norm": 0.5596818923950195, "learning_rate": 0.0001, "loss": 1.4823, "step": 9984 }, { "epoch": 1.1600348533255882, "grad_norm": 0.5726192593574524, "learning_rate": 0.0001, "loss": 1.4882, "step": 9985 }, { "epoch": 1.1601510310775487, "grad_norm": 0.5832332968711853, "learning_rate": 0.0001, "loss": 1.7152, "step": 9986 }, { "epoch": 1.1602672088295092, "grad_norm": 0.5925440192222595, "learning_rate": 0.0001, "loss": 1.5852, "step": 9987 }, { "epoch": 1.1603833865814697, "grad_norm": 0.6479099988937378, "learning_rate": 0.0001, "loss": 1.6276, "step": 9988 }, { "epoch": 1.1604995643334302, "grad_norm": 0.5257347226142883, "learning_rate": 0.0001, "loss": 1.4354, "step": 9989 }, { "epoch": 1.1606157420853906, "grad_norm": 0.5414207577705383, "learning_rate": 0.0001, "loss": 1.4025, "step": 9990 }, { "epoch": 1.1607319198373511, "grad_norm": 0.5247325897216797, "learning_rate": 0.0001, "loss": 1.511, "step": 9991 }, { "epoch": 1.1608480975893116, "grad_norm": 0.5498883724212646, "learning_rate": 0.0001, "loss": 1.535, "step": 9992 }, { "epoch": 1.1609642753412721, "grad_norm": 0.5376471877098083, "learning_rate": 0.0001, "loss": 1.4189, "step": 9993 }, { "epoch": 1.1610804530932326, "grad_norm": 0.5790148377418518, "learning_rate": 0.0001, "loss": 1.6122, "step": 9994 }, { "epoch": 1.161196630845193, "grad_norm": 0.5751370191574097, "learning_rate": 0.0001, "loss": 1.4635, "step": 9995 }, { "epoch": 1.1613128085971536, "grad_norm": 0.5805469751358032, "learning_rate": 0.0001, "loss": 1.4589, "step": 9996 }, { "epoch": 1.161428986349114, "grad_norm": 0.5531786680221558, "learning_rate": 0.0001, "loss": 1.4823, "step": 9997 }, { "epoch": 1.1615451641010746, "grad_norm": 0.5185563564300537, "learning_rate": 0.0001, "loss": 1.4648, "step": 9998 }, { "epoch": 1.1616613418530353, "grad_norm": 0.5276340246200562, "learning_rate": 0.0001, "loss": 1.3336, "step": 9999 }, { "epoch": 1.1617775196049958, "grad_norm": 0.5624109506607056, "learning_rate": 0.0001, "loss": 1.5546, "step": 10000 }, { "epoch": 1.1618936973569562, "grad_norm": 0.5590581297874451, "learning_rate": 0.0001, "loss": 1.4464, "step": 10001 }, { "epoch": 1.1620098751089167, "grad_norm": 0.60872483253479, "learning_rate": 0.0001, "loss": 1.502, "step": 10002 }, { "epoch": 1.1621260528608772, "grad_norm": 0.5246327519416809, "learning_rate": 0.0001, "loss": 1.3447, "step": 10003 }, { "epoch": 1.1622422306128377, "grad_norm": 0.5634382963180542, "learning_rate": 0.0001, "loss": 1.4741, "step": 10004 }, { "epoch": 1.1623584083647982, "grad_norm": 0.55689537525177, "learning_rate": 0.0001, "loss": 1.3776, "step": 10005 }, { "epoch": 1.1624745861167587, "grad_norm": 0.5736417174339294, "learning_rate": 0.0001, "loss": 1.4398, "step": 10006 }, { "epoch": 1.1625907638687192, "grad_norm": 0.5619802474975586, "learning_rate": 0.0001, "loss": 1.6099, "step": 10007 }, { "epoch": 1.1627069416206797, "grad_norm": 0.5841330885887146, "learning_rate": 0.0001, "loss": 1.6485, "step": 10008 }, { "epoch": 1.1628231193726402, "grad_norm": 0.5336185693740845, "learning_rate": 0.0001, "loss": 1.2244, "step": 10009 }, { "epoch": 1.1629392971246006, "grad_norm": 0.5390673875808716, "learning_rate": 0.0001, "loss": 1.4239, "step": 10010 }, { "epoch": 1.1630554748765611, "grad_norm": 0.5778940320014954, "learning_rate": 0.0001, "loss": 1.2823, "step": 10011 }, { "epoch": 1.1631716526285216, "grad_norm": 0.5761231184005737, "learning_rate": 0.0001, "loss": 1.4649, "step": 10012 }, { "epoch": 1.1632878303804821, "grad_norm": 0.5847422480583191, "learning_rate": 0.0001, "loss": 1.5658, "step": 10013 }, { "epoch": 1.1634040081324426, "grad_norm": 0.6390685439109802, "learning_rate": 0.0001, "loss": 1.5276, "step": 10014 }, { "epoch": 1.163520185884403, "grad_norm": 0.5886343121528625, "learning_rate": 0.0001, "loss": 1.4341, "step": 10015 }, { "epoch": 1.1636363636363636, "grad_norm": 0.581598699092865, "learning_rate": 0.0001, "loss": 1.4182, "step": 10016 }, { "epoch": 1.163752541388324, "grad_norm": 0.6227822303771973, "learning_rate": 0.0001, "loss": 1.4497, "step": 10017 }, { "epoch": 1.1638687191402846, "grad_norm": 0.5321307182312012, "learning_rate": 0.0001, "loss": 1.4031, "step": 10018 }, { "epoch": 1.163984896892245, "grad_norm": 0.6062250137329102, "learning_rate": 0.0001, "loss": 1.6074, "step": 10019 }, { "epoch": 1.1641010746442055, "grad_norm": 0.5998744964599609, "learning_rate": 0.0001, "loss": 1.4752, "step": 10020 }, { "epoch": 1.164217252396166, "grad_norm": 0.6060172319412231, "learning_rate": 0.0001, "loss": 1.458, "step": 10021 }, { "epoch": 1.1643334301481265, "grad_norm": 0.5388416051864624, "learning_rate": 0.0001, "loss": 1.5048, "step": 10022 }, { "epoch": 1.1644496079000872, "grad_norm": 0.5970954895019531, "learning_rate": 0.0001, "loss": 1.545, "step": 10023 }, { "epoch": 1.1645657856520477, "grad_norm": 0.5507951974868774, "learning_rate": 0.0001, "loss": 1.4816, "step": 10024 }, { "epoch": 1.1646819634040082, "grad_norm": 0.5912706255912781, "learning_rate": 0.0001, "loss": 1.4366, "step": 10025 }, { "epoch": 1.1647981411559687, "grad_norm": 0.5806775093078613, "learning_rate": 0.0001, "loss": 1.4093, "step": 10026 }, { "epoch": 1.1649143189079292, "grad_norm": 0.6133882403373718, "learning_rate": 0.0001, "loss": 1.5617, "step": 10027 }, { "epoch": 1.1650304966598897, "grad_norm": 0.5739568471908569, "learning_rate": 0.0001, "loss": 1.4323, "step": 10028 }, { "epoch": 1.1651466744118502, "grad_norm": 0.5220316052436829, "learning_rate": 0.0001, "loss": 1.3818, "step": 10029 }, { "epoch": 1.1652628521638106, "grad_norm": 0.5443993806838989, "learning_rate": 0.0001, "loss": 1.4489, "step": 10030 }, { "epoch": 1.1653790299157711, "grad_norm": 0.5931938290596008, "learning_rate": 0.0001, "loss": 1.4597, "step": 10031 }, { "epoch": 1.1654952076677316, "grad_norm": 0.5669753551483154, "learning_rate": 0.0001, "loss": 1.3503, "step": 10032 }, { "epoch": 1.1656113854196921, "grad_norm": 0.5309962034225464, "learning_rate": 0.0001, "loss": 1.3674, "step": 10033 }, { "epoch": 1.1657275631716526, "grad_norm": 0.5499364137649536, "learning_rate": 0.0001, "loss": 1.3246, "step": 10034 }, { "epoch": 1.165843740923613, "grad_norm": 0.5379160046577454, "learning_rate": 0.0001, "loss": 1.4529, "step": 10035 }, { "epoch": 1.1659599186755736, "grad_norm": 0.605657696723938, "learning_rate": 0.0001, "loss": 1.5954, "step": 10036 }, { "epoch": 1.166076096427534, "grad_norm": 0.5854047536849976, "learning_rate": 0.0001, "loss": 1.4527, "step": 10037 }, { "epoch": 1.1661922741794946, "grad_norm": 0.6003736853599548, "learning_rate": 0.0001, "loss": 1.4352, "step": 10038 }, { "epoch": 1.166308451931455, "grad_norm": 0.5782053470611572, "learning_rate": 0.0001, "loss": 1.4934, "step": 10039 }, { "epoch": 1.1664246296834155, "grad_norm": 0.5899875164031982, "learning_rate": 0.0001, "loss": 1.689, "step": 10040 }, { "epoch": 1.1665408074353762, "grad_norm": 0.580557107925415, "learning_rate": 0.0001, "loss": 1.5173, "step": 10041 }, { "epoch": 1.1666569851873367, "grad_norm": 0.5655171275138855, "learning_rate": 0.0001, "loss": 1.49, "step": 10042 }, { "epoch": 1.1667731629392972, "grad_norm": 0.5810794830322266, "learning_rate": 0.0001, "loss": 1.5587, "step": 10043 }, { "epoch": 1.1668893406912577, "grad_norm": 0.5902233123779297, "learning_rate": 0.0001, "loss": 1.6072, "step": 10044 }, { "epoch": 1.1670055184432182, "grad_norm": 0.5441036820411682, "learning_rate": 0.0001, "loss": 1.4305, "step": 10045 }, { "epoch": 1.1671216961951787, "grad_norm": 0.5872811675071716, "learning_rate": 0.0001, "loss": 1.499, "step": 10046 }, { "epoch": 1.1672378739471392, "grad_norm": 0.5635554194450378, "learning_rate": 0.0001, "loss": 1.3447, "step": 10047 }, { "epoch": 1.1673540516990997, "grad_norm": 0.5794597864151001, "learning_rate": 0.0001, "loss": 1.4998, "step": 10048 }, { "epoch": 1.1674702294510602, "grad_norm": 0.5093443393707275, "learning_rate": 0.0001, "loss": 1.3911, "step": 10049 }, { "epoch": 1.1675864072030206, "grad_norm": 0.5254181623458862, "learning_rate": 0.0001, "loss": 1.415, "step": 10050 }, { "epoch": 1.1677025849549811, "grad_norm": 0.5460498929023743, "learning_rate": 0.0001, "loss": 1.4379, "step": 10051 }, { "epoch": 1.1678187627069416, "grad_norm": 0.5802798867225647, "learning_rate": 0.0001, "loss": 1.4191, "step": 10052 }, { "epoch": 1.1679349404589021, "grad_norm": 0.5423253178596497, "learning_rate": 0.0001, "loss": 1.4365, "step": 10053 }, { "epoch": 1.1680511182108626, "grad_norm": 0.5617722272872925, "learning_rate": 0.0001, "loss": 1.2869, "step": 10054 }, { "epoch": 1.168167295962823, "grad_norm": 0.542448103427887, "learning_rate": 0.0001, "loss": 1.4043, "step": 10055 }, { "epoch": 1.1682834737147836, "grad_norm": 0.5292364954948425, "learning_rate": 0.0001, "loss": 1.4988, "step": 10056 }, { "epoch": 1.168399651466744, "grad_norm": 0.6243206858634949, "learning_rate": 0.0001, "loss": 1.496, "step": 10057 }, { "epoch": 1.1685158292187046, "grad_norm": 0.5304319858551025, "learning_rate": 0.0001, "loss": 1.36, "step": 10058 }, { "epoch": 1.168632006970665, "grad_norm": 0.5566378831863403, "learning_rate": 0.0001, "loss": 1.5019, "step": 10059 }, { "epoch": 1.1687481847226255, "grad_norm": 0.6295537352561951, "learning_rate": 0.0001, "loss": 1.6282, "step": 10060 }, { "epoch": 1.168864362474586, "grad_norm": 0.5838227868080139, "learning_rate": 0.0001, "loss": 1.5474, "step": 10061 }, { "epoch": 1.1689805402265465, "grad_norm": 0.5756407380104065, "learning_rate": 0.0001, "loss": 1.6067, "step": 10062 }, { "epoch": 1.169096717978507, "grad_norm": 0.5596350431442261, "learning_rate": 0.0001, "loss": 1.4751, "step": 10063 }, { "epoch": 1.1692128957304675, "grad_norm": 0.5502869486808777, "learning_rate": 0.0001, "loss": 1.4222, "step": 10064 }, { "epoch": 1.1693290734824282, "grad_norm": 0.5689765214920044, "learning_rate": 0.0001, "loss": 1.3957, "step": 10065 }, { "epoch": 1.1694452512343887, "grad_norm": 0.5550212264060974, "learning_rate": 0.0001, "loss": 1.5104, "step": 10066 }, { "epoch": 1.1695614289863492, "grad_norm": 0.5285496711730957, "learning_rate": 0.0001, "loss": 1.3828, "step": 10067 }, { "epoch": 1.1696776067383097, "grad_norm": 0.5546186566352844, "learning_rate": 0.0001, "loss": 1.3966, "step": 10068 }, { "epoch": 1.1697937844902702, "grad_norm": 0.5387147068977356, "learning_rate": 0.0001, "loss": 1.4273, "step": 10069 }, { "epoch": 1.1699099622422306, "grad_norm": 0.6305792927742004, "learning_rate": 0.0001, "loss": 1.6158, "step": 10070 }, { "epoch": 1.1700261399941911, "grad_norm": 0.5712059736251831, "learning_rate": 0.0001, "loss": 1.5246, "step": 10071 }, { "epoch": 1.1701423177461516, "grad_norm": 0.5652614831924438, "learning_rate": 0.0001, "loss": 1.5069, "step": 10072 }, { "epoch": 1.1702584954981121, "grad_norm": 0.6206264495849609, "learning_rate": 0.0001, "loss": 1.667, "step": 10073 }, { "epoch": 1.1703746732500726, "grad_norm": 0.5684471726417542, "learning_rate": 0.0001, "loss": 1.3606, "step": 10074 }, { "epoch": 1.170490851002033, "grad_norm": 0.5581884980201721, "learning_rate": 0.0001, "loss": 1.4814, "step": 10075 }, { "epoch": 1.1706070287539936, "grad_norm": 0.568840742111206, "learning_rate": 0.0001, "loss": 1.4518, "step": 10076 }, { "epoch": 1.170723206505954, "grad_norm": 0.5445674061775208, "learning_rate": 0.0001, "loss": 1.4405, "step": 10077 }, { "epoch": 1.1708393842579146, "grad_norm": 0.52695631980896, "learning_rate": 0.0001, "loss": 1.332, "step": 10078 }, { "epoch": 1.170955562009875, "grad_norm": 0.5562939643859863, "learning_rate": 0.0001, "loss": 1.5677, "step": 10079 }, { "epoch": 1.1710717397618355, "grad_norm": 0.5368567705154419, "learning_rate": 0.0001, "loss": 1.3677, "step": 10080 }, { "epoch": 1.171187917513796, "grad_norm": 0.569368839263916, "learning_rate": 0.0001, "loss": 1.5184, "step": 10081 }, { "epoch": 1.1713040952657565, "grad_norm": 0.5831530690193176, "learning_rate": 0.0001, "loss": 1.7276, "step": 10082 }, { "epoch": 1.1714202730177172, "grad_norm": 0.5623541474342346, "learning_rate": 0.0001, "loss": 1.4092, "step": 10083 }, { "epoch": 1.1715364507696777, "grad_norm": 0.5392382144927979, "learning_rate": 0.0001, "loss": 1.3762, "step": 10084 }, { "epoch": 1.1716526285216382, "grad_norm": 0.5557898283004761, "learning_rate": 0.0001, "loss": 1.2674, "step": 10085 }, { "epoch": 1.1717688062735987, "grad_norm": 0.5454236268997192, "learning_rate": 0.0001, "loss": 1.563, "step": 10086 }, { "epoch": 1.1718849840255592, "grad_norm": 0.5669856071472168, "learning_rate": 0.0001, "loss": 1.365, "step": 10087 }, { "epoch": 1.1720011617775197, "grad_norm": 0.5392054319381714, "learning_rate": 0.0001, "loss": 1.4008, "step": 10088 }, { "epoch": 1.1721173395294802, "grad_norm": 0.544360876083374, "learning_rate": 0.0001, "loss": 1.4126, "step": 10089 }, { "epoch": 1.1722335172814407, "grad_norm": 0.5895025730133057, "learning_rate": 0.0001, "loss": 1.5437, "step": 10090 }, { "epoch": 1.1723496950334011, "grad_norm": 0.504751443862915, "learning_rate": 0.0001, "loss": 1.308, "step": 10091 }, { "epoch": 1.1724658727853616, "grad_norm": 0.5912155508995056, "learning_rate": 0.0001, "loss": 1.5424, "step": 10092 }, { "epoch": 1.1725820505373221, "grad_norm": 0.5704768300056458, "learning_rate": 0.0001, "loss": 1.397, "step": 10093 }, { "epoch": 1.1726982282892826, "grad_norm": 0.6013458371162415, "learning_rate": 0.0001, "loss": 1.4999, "step": 10094 }, { "epoch": 1.172814406041243, "grad_norm": 0.5588251352310181, "learning_rate": 0.0001, "loss": 1.4981, "step": 10095 }, { "epoch": 1.1729305837932036, "grad_norm": 0.5580037832260132, "learning_rate": 0.0001, "loss": 1.5068, "step": 10096 }, { "epoch": 1.173046761545164, "grad_norm": 0.5618078708648682, "learning_rate": 0.0001, "loss": 1.4398, "step": 10097 }, { "epoch": 1.1731629392971246, "grad_norm": 0.5638139247894287, "learning_rate": 0.0001, "loss": 1.392, "step": 10098 }, { "epoch": 1.173279117049085, "grad_norm": 0.49057668447494507, "learning_rate": 0.0001, "loss": 1.3168, "step": 10099 }, { "epoch": 1.1733952948010455, "grad_norm": 0.5525259971618652, "learning_rate": 0.0001, "loss": 1.5545, "step": 10100 }, { "epoch": 1.173511472553006, "grad_norm": 0.5700230598449707, "learning_rate": 0.0001, "loss": 1.4463, "step": 10101 }, { "epoch": 1.1736276503049665, "grad_norm": 0.517629861831665, "learning_rate": 0.0001, "loss": 1.4066, "step": 10102 }, { "epoch": 1.173743828056927, "grad_norm": 0.5485448241233826, "learning_rate": 0.0001, "loss": 1.4147, "step": 10103 }, { "epoch": 1.1738600058088875, "grad_norm": 0.5651414394378662, "learning_rate": 0.0001, "loss": 1.4629, "step": 10104 }, { "epoch": 1.173976183560848, "grad_norm": 0.5873313546180725, "learning_rate": 0.0001, "loss": 1.5314, "step": 10105 }, { "epoch": 1.1740923613128085, "grad_norm": 0.5807081460952759, "learning_rate": 0.0001, "loss": 1.4068, "step": 10106 }, { "epoch": 1.1742085390647692, "grad_norm": 0.5531933903694153, "learning_rate": 0.0001, "loss": 1.4888, "step": 10107 }, { "epoch": 1.1743247168167297, "grad_norm": 0.6018572449684143, "learning_rate": 0.0001, "loss": 1.5242, "step": 10108 }, { "epoch": 1.1744408945686902, "grad_norm": 0.5930827260017395, "learning_rate": 0.0001, "loss": 1.5595, "step": 10109 }, { "epoch": 1.1745570723206507, "grad_norm": 0.5503697991371155, "learning_rate": 0.0001, "loss": 1.3225, "step": 10110 }, { "epoch": 1.1746732500726111, "grad_norm": 0.5934546589851379, "learning_rate": 0.0001, "loss": 1.5799, "step": 10111 }, { "epoch": 1.1747894278245716, "grad_norm": 0.6462099552154541, "learning_rate": 0.0001, "loss": 1.6257, "step": 10112 }, { "epoch": 1.1749056055765321, "grad_norm": 0.5270904302597046, "learning_rate": 0.0001, "loss": 1.3637, "step": 10113 }, { "epoch": 1.1750217833284926, "grad_norm": 0.6062031984329224, "learning_rate": 0.0001, "loss": 1.4929, "step": 10114 }, { "epoch": 1.175137961080453, "grad_norm": 0.6364587545394897, "learning_rate": 0.0001, "loss": 1.502, "step": 10115 }, { "epoch": 1.1752541388324136, "grad_norm": 0.6208913326263428, "learning_rate": 0.0001, "loss": 1.5737, "step": 10116 }, { "epoch": 1.175370316584374, "grad_norm": 0.5763484835624695, "learning_rate": 0.0001, "loss": 1.4576, "step": 10117 }, { "epoch": 1.1754864943363346, "grad_norm": 0.6294147968292236, "learning_rate": 0.0001, "loss": 1.6216, "step": 10118 }, { "epoch": 1.175602672088295, "grad_norm": 0.5596899390220642, "learning_rate": 0.0001, "loss": 1.5756, "step": 10119 }, { "epoch": 1.1757188498402555, "grad_norm": 0.6140156984329224, "learning_rate": 0.0001, "loss": 1.6399, "step": 10120 }, { "epoch": 1.175835027592216, "grad_norm": 0.5578920841217041, "learning_rate": 0.0001, "loss": 1.4892, "step": 10121 }, { "epoch": 1.1759512053441765, "grad_norm": 0.5545052886009216, "learning_rate": 0.0001, "loss": 1.41, "step": 10122 }, { "epoch": 1.176067383096137, "grad_norm": 0.5487355589866638, "learning_rate": 0.0001, "loss": 1.3392, "step": 10123 }, { "epoch": 1.1761835608480975, "grad_norm": 0.5947970747947693, "learning_rate": 0.0001, "loss": 1.5874, "step": 10124 }, { "epoch": 1.1762997386000582, "grad_norm": 0.5524084568023682, "learning_rate": 0.0001, "loss": 1.4826, "step": 10125 }, { "epoch": 1.1764159163520187, "grad_norm": 0.5855798125267029, "learning_rate": 0.0001, "loss": 1.5117, "step": 10126 }, { "epoch": 1.1765320941039792, "grad_norm": 0.5738104581832886, "learning_rate": 0.0001, "loss": 1.5542, "step": 10127 }, { "epoch": 1.1766482718559397, "grad_norm": 0.6277796030044556, "learning_rate": 0.0001, "loss": 1.51, "step": 10128 }, { "epoch": 1.1767644496079002, "grad_norm": 0.558943510055542, "learning_rate": 0.0001, "loss": 1.4912, "step": 10129 }, { "epoch": 1.1768806273598607, "grad_norm": 0.5805367231369019, "learning_rate": 0.0001, "loss": 1.6493, "step": 10130 }, { "epoch": 1.1769968051118211, "grad_norm": 0.5456610918045044, "learning_rate": 0.0001, "loss": 1.4819, "step": 10131 }, { "epoch": 1.1771129828637816, "grad_norm": 0.5442765355110168, "learning_rate": 0.0001, "loss": 1.4487, "step": 10132 }, { "epoch": 1.1772291606157421, "grad_norm": 0.5121324062347412, "learning_rate": 0.0001, "loss": 1.3845, "step": 10133 }, { "epoch": 1.1773453383677026, "grad_norm": 0.5197672843933105, "learning_rate": 0.0001, "loss": 1.3748, "step": 10134 }, { "epoch": 1.177461516119663, "grad_norm": 0.5549630522727966, "learning_rate": 0.0001, "loss": 1.4575, "step": 10135 }, { "epoch": 1.1775776938716236, "grad_norm": 0.6009778380393982, "learning_rate": 0.0001, "loss": 1.6594, "step": 10136 }, { "epoch": 1.177693871623584, "grad_norm": 0.6478327512741089, "learning_rate": 0.0001, "loss": 1.3931, "step": 10137 }, { "epoch": 1.1778100493755446, "grad_norm": 0.5748621821403503, "learning_rate": 0.0001, "loss": 1.5719, "step": 10138 }, { "epoch": 1.177926227127505, "grad_norm": 0.5634380578994751, "learning_rate": 0.0001, "loss": 1.4188, "step": 10139 }, { "epoch": 1.1780424048794655, "grad_norm": 0.5881836414337158, "learning_rate": 0.0001, "loss": 1.5994, "step": 10140 }, { "epoch": 1.178158582631426, "grad_norm": 0.5679641962051392, "learning_rate": 0.0001, "loss": 1.5643, "step": 10141 }, { "epoch": 1.1782747603833865, "grad_norm": 0.5754004120826721, "learning_rate": 0.0001, "loss": 1.6292, "step": 10142 }, { "epoch": 1.178390938135347, "grad_norm": 0.6078845858573914, "learning_rate": 0.0001, "loss": 1.5526, "step": 10143 }, { "epoch": 1.1785071158873075, "grad_norm": 0.5382782220840454, "learning_rate": 0.0001, "loss": 1.3716, "step": 10144 }, { "epoch": 1.178623293639268, "grad_norm": 0.565054178237915, "learning_rate": 0.0001, "loss": 1.4706, "step": 10145 }, { "epoch": 1.1787394713912285, "grad_norm": 0.6017858982086182, "learning_rate": 0.0001, "loss": 1.5564, "step": 10146 }, { "epoch": 1.178855649143189, "grad_norm": 0.533458948135376, "learning_rate": 0.0001, "loss": 1.2963, "step": 10147 }, { "epoch": 1.1789718268951495, "grad_norm": 0.6092103719711304, "learning_rate": 0.0001, "loss": 1.5096, "step": 10148 }, { "epoch": 1.1790880046471102, "grad_norm": 0.5930465459823608, "learning_rate": 0.0001, "loss": 1.476, "step": 10149 }, { "epoch": 1.1792041823990707, "grad_norm": 0.5392512083053589, "learning_rate": 0.0001, "loss": 1.4036, "step": 10150 }, { "epoch": 1.1793203601510311, "grad_norm": 0.5717263221740723, "learning_rate": 0.0001, "loss": 1.5386, "step": 10151 }, { "epoch": 1.1794365379029916, "grad_norm": 0.5188798308372498, "learning_rate": 0.0001, "loss": 1.429, "step": 10152 }, { "epoch": 1.1795527156549521, "grad_norm": 0.5673980712890625, "learning_rate": 0.0001, "loss": 1.446, "step": 10153 }, { "epoch": 1.1796688934069126, "grad_norm": 0.5817063450813293, "learning_rate": 0.0001, "loss": 1.7309, "step": 10154 }, { "epoch": 1.179785071158873, "grad_norm": 0.5734279751777649, "learning_rate": 0.0001, "loss": 1.5461, "step": 10155 }, { "epoch": 1.1799012489108336, "grad_norm": 0.540127158164978, "learning_rate": 0.0001, "loss": 1.5124, "step": 10156 }, { "epoch": 1.180017426662794, "grad_norm": 0.6068228483200073, "learning_rate": 0.0001, "loss": 1.5512, "step": 10157 }, { "epoch": 1.1801336044147546, "grad_norm": 0.5281007289886475, "learning_rate": 0.0001, "loss": 1.3727, "step": 10158 }, { "epoch": 1.180249782166715, "grad_norm": 0.57302325963974, "learning_rate": 0.0001, "loss": 1.4104, "step": 10159 }, { "epoch": 1.1803659599186755, "grad_norm": 0.5550638437271118, "learning_rate": 0.0001, "loss": 1.5377, "step": 10160 }, { "epoch": 1.180482137670636, "grad_norm": 0.559135377407074, "learning_rate": 0.0001, "loss": 1.5182, "step": 10161 }, { "epoch": 1.1805983154225965, "grad_norm": 0.5715136528015137, "learning_rate": 0.0001, "loss": 1.481, "step": 10162 }, { "epoch": 1.180714493174557, "grad_norm": 0.5593242645263672, "learning_rate": 0.0001, "loss": 1.536, "step": 10163 }, { "epoch": 1.1808306709265175, "grad_norm": 0.5487082004547119, "learning_rate": 0.0001, "loss": 1.4297, "step": 10164 }, { "epoch": 1.180946848678478, "grad_norm": 0.5988155603408813, "learning_rate": 0.0001, "loss": 1.6336, "step": 10165 }, { "epoch": 1.1810630264304385, "grad_norm": 0.5767937302589417, "learning_rate": 0.0001, "loss": 1.5421, "step": 10166 }, { "epoch": 1.1811792041823992, "grad_norm": 0.6116029620170593, "learning_rate": 0.0001, "loss": 1.5717, "step": 10167 }, { "epoch": 1.1812953819343597, "grad_norm": 0.5582594275474548, "learning_rate": 0.0001, "loss": 1.361, "step": 10168 }, { "epoch": 1.1814115596863202, "grad_norm": 0.5252476930618286, "learning_rate": 0.0001, "loss": 1.4629, "step": 10169 }, { "epoch": 1.1815277374382807, "grad_norm": 0.5445083379745483, "learning_rate": 0.0001, "loss": 1.5312, "step": 10170 }, { "epoch": 1.1816439151902411, "grad_norm": 0.5257118344306946, "learning_rate": 0.0001, "loss": 1.3504, "step": 10171 }, { "epoch": 1.1817600929422016, "grad_norm": 0.5568935871124268, "learning_rate": 0.0001, "loss": 1.4852, "step": 10172 }, { "epoch": 1.1818762706941621, "grad_norm": 0.5395621061325073, "learning_rate": 0.0001, "loss": 1.5846, "step": 10173 }, { "epoch": 1.1819924484461226, "grad_norm": 0.5763841867446899, "learning_rate": 0.0001, "loss": 1.57, "step": 10174 }, { "epoch": 1.182108626198083, "grad_norm": 0.5146093964576721, "learning_rate": 0.0001, "loss": 1.3141, "step": 10175 }, { "epoch": 1.1822248039500436, "grad_norm": 0.5548669099807739, "learning_rate": 0.0001, "loss": 1.4279, "step": 10176 }, { "epoch": 1.182340981702004, "grad_norm": 0.5692338347434998, "learning_rate": 0.0001, "loss": 1.4724, "step": 10177 }, { "epoch": 1.1824571594539646, "grad_norm": 0.5822345614433289, "learning_rate": 0.0001, "loss": 1.5019, "step": 10178 }, { "epoch": 1.182573337205925, "grad_norm": 0.5448129177093506, "learning_rate": 0.0001, "loss": 1.3834, "step": 10179 }, { "epoch": 1.1826895149578855, "grad_norm": 0.6466269493103027, "learning_rate": 0.0001, "loss": 1.4509, "step": 10180 }, { "epoch": 1.182805692709846, "grad_norm": 0.6030110120773315, "learning_rate": 0.0001, "loss": 1.4385, "step": 10181 }, { "epoch": 1.1829218704618065, "grad_norm": 0.5867081880569458, "learning_rate": 0.0001, "loss": 1.6097, "step": 10182 }, { "epoch": 1.183038048213767, "grad_norm": 0.543514609336853, "learning_rate": 0.0001, "loss": 1.4714, "step": 10183 }, { "epoch": 1.1831542259657275, "grad_norm": 0.585824728012085, "learning_rate": 0.0001, "loss": 1.6173, "step": 10184 }, { "epoch": 1.183270403717688, "grad_norm": 0.5779645442962646, "learning_rate": 0.0001, "loss": 1.5448, "step": 10185 }, { "epoch": 1.1833865814696485, "grad_norm": 0.5890458226203918, "learning_rate": 0.0001, "loss": 1.5479, "step": 10186 }, { "epoch": 1.183502759221609, "grad_norm": 0.5767694711685181, "learning_rate": 0.0001, "loss": 1.5161, "step": 10187 }, { "epoch": 1.1836189369735695, "grad_norm": 0.5827814936637878, "learning_rate": 0.0001, "loss": 1.4696, "step": 10188 }, { "epoch": 1.18373511472553, "grad_norm": 0.5493532419204712, "learning_rate": 0.0001, "loss": 1.4297, "step": 10189 }, { "epoch": 1.1838512924774907, "grad_norm": 0.5599081516265869, "learning_rate": 0.0001, "loss": 1.4913, "step": 10190 }, { "epoch": 1.1839674702294511, "grad_norm": 0.5541647672653198, "learning_rate": 0.0001, "loss": 1.5116, "step": 10191 }, { "epoch": 1.1840836479814116, "grad_norm": 0.5816530585289001, "learning_rate": 0.0001, "loss": 1.611, "step": 10192 }, { "epoch": 1.1841998257333721, "grad_norm": 0.6219608187675476, "learning_rate": 0.0001, "loss": 1.5939, "step": 10193 }, { "epoch": 1.1843160034853326, "grad_norm": 0.5906521081924438, "learning_rate": 0.0001, "loss": 1.6049, "step": 10194 }, { "epoch": 1.184432181237293, "grad_norm": 0.5859937071800232, "learning_rate": 0.0001, "loss": 1.5138, "step": 10195 }, { "epoch": 1.1845483589892536, "grad_norm": 0.5565850138664246, "learning_rate": 0.0001, "loss": 1.4176, "step": 10196 }, { "epoch": 1.184664536741214, "grad_norm": 0.5286614298820496, "learning_rate": 0.0001, "loss": 1.2829, "step": 10197 }, { "epoch": 1.1847807144931746, "grad_norm": 0.5622779726982117, "learning_rate": 0.0001, "loss": 1.6416, "step": 10198 }, { "epoch": 1.184896892245135, "grad_norm": 0.5423121452331543, "learning_rate": 0.0001, "loss": 1.5449, "step": 10199 }, { "epoch": 1.1850130699970955, "grad_norm": 0.564444899559021, "learning_rate": 0.0001, "loss": 1.5836, "step": 10200 }, { "epoch": 1.185129247749056, "grad_norm": 0.5337464213371277, "learning_rate": 0.0001, "loss": 1.4449, "step": 10201 }, { "epoch": 1.1852454255010165, "grad_norm": 0.5334920287132263, "learning_rate": 0.0001, "loss": 1.369, "step": 10202 }, { "epoch": 1.185361603252977, "grad_norm": 0.5582081079483032, "learning_rate": 0.0001, "loss": 1.5346, "step": 10203 }, { "epoch": 1.1854777810049375, "grad_norm": 0.5935722589492798, "learning_rate": 0.0001, "loss": 1.5975, "step": 10204 }, { "epoch": 1.185593958756898, "grad_norm": 0.606367290019989, "learning_rate": 0.0001, "loss": 1.6052, "step": 10205 }, { "epoch": 1.1857101365088585, "grad_norm": 0.5262031555175781, "learning_rate": 0.0001, "loss": 1.3391, "step": 10206 }, { "epoch": 1.185826314260819, "grad_norm": 0.5637447237968445, "learning_rate": 0.0001, "loss": 1.2968, "step": 10207 }, { "epoch": 1.1859424920127795, "grad_norm": 0.5710691213607788, "learning_rate": 0.0001, "loss": 1.4793, "step": 10208 }, { "epoch": 1.1860586697647402, "grad_norm": 0.5453031063079834, "learning_rate": 0.0001, "loss": 1.432, "step": 10209 }, { "epoch": 1.1861748475167007, "grad_norm": 0.530640721321106, "learning_rate": 0.0001, "loss": 1.4899, "step": 10210 }, { "epoch": 1.1862910252686611, "grad_norm": 0.5612127184867859, "learning_rate": 0.0001, "loss": 1.4825, "step": 10211 }, { "epoch": 1.1864072030206216, "grad_norm": 0.5393279194831848, "learning_rate": 0.0001, "loss": 1.5267, "step": 10212 }, { "epoch": 1.1865233807725821, "grad_norm": 0.5792052149772644, "learning_rate": 0.0001, "loss": 1.6498, "step": 10213 }, { "epoch": 1.1866395585245426, "grad_norm": 0.5454745292663574, "learning_rate": 0.0001, "loss": 1.4819, "step": 10214 }, { "epoch": 1.186755736276503, "grad_norm": 0.574486255645752, "learning_rate": 0.0001, "loss": 1.5309, "step": 10215 }, { "epoch": 1.1868719140284636, "grad_norm": 0.5667810440063477, "learning_rate": 0.0001, "loss": 1.4863, "step": 10216 }, { "epoch": 1.186988091780424, "grad_norm": 0.540420651435852, "learning_rate": 0.0001, "loss": 1.3731, "step": 10217 }, { "epoch": 1.1871042695323846, "grad_norm": 0.5797957181930542, "learning_rate": 0.0001, "loss": 1.5712, "step": 10218 }, { "epoch": 1.187220447284345, "grad_norm": 0.5845776200294495, "learning_rate": 0.0001, "loss": 1.5492, "step": 10219 }, { "epoch": 1.1873366250363055, "grad_norm": 0.5893095135688782, "learning_rate": 0.0001, "loss": 1.5909, "step": 10220 }, { "epoch": 1.187452802788266, "grad_norm": 0.5685760378837585, "learning_rate": 0.0001, "loss": 1.4096, "step": 10221 }, { "epoch": 1.1875689805402265, "grad_norm": 0.5455953478813171, "learning_rate": 0.0001, "loss": 1.3178, "step": 10222 }, { "epoch": 1.187685158292187, "grad_norm": 0.5952999591827393, "learning_rate": 0.0001, "loss": 1.4628, "step": 10223 }, { "epoch": 1.1878013360441475, "grad_norm": 0.5808957815170288, "learning_rate": 0.0001, "loss": 1.529, "step": 10224 }, { "epoch": 1.187917513796108, "grad_norm": 0.5412543416023254, "learning_rate": 0.0001, "loss": 1.444, "step": 10225 }, { "epoch": 1.1880336915480685, "grad_norm": 0.5611621141433716, "learning_rate": 0.0001, "loss": 1.4994, "step": 10226 }, { "epoch": 1.188149869300029, "grad_norm": 0.5732444524765015, "learning_rate": 0.0001, "loss": 1.5301, "step": 10227 }, { "epoch": 1.1882660470519895, "grad_norm": 0.6303038597106934, "learning_rate": 0.0001, "loss": 1.5166, "step": 10228 }, { "epoch": 1.18838222480395, "grad_norm": 0.5532310009002686, "learning_rate": 0.0001, "loss": 1.4627, "step": 10229 }, { "epoch": 1.1884984025559104, "grad_norm": 0.5555628538131714, "learning_rate": 0.0001, "loss": 1.5655, "step": 10230 }, { "epoch": 1.188614580307871, "grad_norm": 0.5487105846405029, "learning_rate": 0.0001, "loss": 1.3955, "step": 10231 }, { "epoch": 1.1887307580598316, "grad_norm": 0.5543938279151917, "learning_rate": 0.0001, "loss": 1.3153, "step": 10232 }, { "epoch": 1.1888469358117921, "grad_norm": 0.5894563794136047, "learning_rate": 0.0001, "loss": 1.5996, "step": 10233 }, { "epoch": 1.1889631135637526, "grad_norm": 0.5444560647010803, "learning_rate": 0.0001, "loss": 1.5692, "step": 10234 }, { "epoch": 1.189079291315713, "grad_norm": 0.578478991985321, "learning_rate": 0.0001, "loss": 1.4049, "step": 10235 }, { "epoch": 1.1891954690676736, "grad_norm": 0.5442149639129639, "learning_rate": 0.0001, "loss": 1.5456, "step": 10236 }, { "epoch": 1.189311646819634, "grad_norm": 0.5239068269729614, "learning_rate": 0.0001, "loss": 1.2212, "step": 10237 }, { "epoch": 1.1894278245715946, "grad_norm": 0.59259033203125, "learning_rate": 0.0001, "loss": 1.2868, "step": 10238 }, { "epoch": 1.189544002323555, "grad_norm": 0.536356508731842, "learning_rate": 0.0001, "loss": 1.3534, "step": 10239 }, { "epoch": 1.1896601800755155, "grad_norm": 0.6062889099121094, "learning_rate": 0.0001, "loss": 1.5112, "step": 10240 }, { "epoch": 1.189776357827476, "grad_norm": 0.5797139406204224, "learning_rate": 0.0001, "loss": 1.618, "step": 10241 }, { "epoch": 1.1898925355794365, "grad_norm": 0.6242355704307556, "learning_rate": 0.0001, "loss": 1.4998, "step": 10242 }, { "epoch": 1.190008713331397, "grad_norm": 0.5886520147323608, "learning_rate": 0.0001, "loss": 1.4482, "step": 10243 }, { "epoch": 1.1901248910833575, "grad_norm": 0.6011394262313843, "learning_rate": 0.0001, "loss": 1.4937, "step": 10244 }, { "epoch": 1.190241068835318, "grad_norm": 0.5653864145278931, "learning_rate": 0.0001, "loss": 1.5659, "step": 10245 }, { "epoch": 1.1903572465872785, "grad_norm": 0.5808043479919434, "learning_rate": 0.0001, "loss": 1.4681, "step": 10246 }, { "epoch": 1.190473424339239, "grad_norm": 0.5999670028686523, "learning_rate": 0.0001, "loss": 1.5587, "step": 10247 }, { "epoch": 1.1905896020911995, "grad_norm": 0.6177708506584167, "learning_rate": 0.0001, "loss": 1.7287, "step": 10248 }, { "epoch": 1.19070577984316, "grad_norm": 0.6043134331703186, "learning_rate": 0.0001, "loss": 1.6253, "step": 10249 }, { "epoch": 1.1908219575951204, "grad_norm": 0.5593743324279785, "learning_rate": 0.0001, "loss": 1.4044, "step": 10250 }, { "epoch": 1.1909381353470812, "grad_norm": 0.5556023716926575, "learning_rate": 0.0001, "loss": 1.4996, "step": 10251 }, { "epoch": 1.1910543130990416, "grad_norm": 0.5386412143707275, "learning_rate": 0.0001, "loss": 1.521, "step": 10252 }, { "epoch": 1.1911704908510021, "grad_norm": 0.5457038879394531, "learning_rate": 0.0001, "loss": 1.5724, "step": 10253 }, { "epoch": 1.1912866686029626, "grad_norm": 0.5371884107589722, "learning_rate": 0.0001, "loss": 1.5583, "step": 10254 }, { "epoch": 1.191402846354923, "grad_norm": 0.5815069675445557, "learning_rate": 0.0001, "loss": 1.6517, "step": 10255 }, { "epoch": 1.1915190241068836, "grad_norm": 0.5567863583564758, "learning_rate": 0.0001, "loss": 1.6194, "step": 10256 }, { "epoch": 1.191635201858844, "grad_norm": 0.5386295318603516, "learning_rate": 0.0001, "loss": 1.5486, "step": 10257 }, { "epoch": 1.1917513796108046, "grad_norm": 0.5735299587249756, "learning_rate": 0.0001, "loss": 1.4414, "step": 10258 }, { "epoch": 1.191867557362765, "grad_norm": 0.5478100776672363, "learning_rate": 0.0001, "loss": 1.5183, "step": 10259 }, { "epoch": 1.1919837351147256, "grad_norm": 0.5614447593688965, "learning_rate": 0.0001, "loss": 1.4083, "step": 10260 }, { "epoch": 1.192099912866686, "grad_norm": 0.5918525457382202, "learning_rate": 0.0001, "loss": 1.3954, "step": 10261 }, { "epoch": 1.1922160906186465, "grad_norm": 0.5843601822853088, "learning_rate": 0.0001, "loss": 1.4277, "step": 10262 }, { "epoch": 1.192332268370607, "grad_norm": 0.5581550598144531, "learning_rate": 0.0001, "loss": 1.4389, "step": 10263 }, { "epoch": 1.1924484461225675, "grad_norm": 0.6755667328834534, "learning_rate": 0.0001, "loss": 1.6176, "step": 10264 }, { "epoch": 1.192564623874528, "grad_norm": 0.6215943694114685, "learning_rate": 0.0001, "loss": 1.6203, "step": 10265 }, { "epoch": 1.1926808016264885, "grad_norm": 0.6235942840576172, "learning_rate": 0.0001, "loss": 1.5156, "step": 10266 }, { "epoch": 1.192796979378449, "grad_norm": 0.5801181197166443, "learning_rate": 0.0001, "loss": 1.463, "step": 10267 }, { "epoch": 1.1929131571304095, "grad_norm": 0.5831395983695984, "learning_rate": 0.0001, "loss": 1.5152, "step": 10268 }, { "epoch": 1.19302933488237, "grad_norm": 0.5884434580802917, "learning_rate": 0.0001, "loss": 1.5711, "step": 10269 }, { "epoch": 1.1931455126343304, "grad_norm": 0.6119523048400879, "learning_rate": 0.0001, "loss": 1.6301, "step": 10270 }, { "epoch": 1.193261690386291, "grad_norm": 0.5671009421348572, "learning_rate": 0.0001, "loss": 1.393, "step": 10271 }, { "epoch": 1.1933778681382514, "grad_norm": 0.5740747451782227, "learning_rate": 0.0001, "loss": 1.2502, "step": 10272 }, { "epoch": 1.193494045890212, "grad_norm": 0.5850660800933838, "learning_rate": 0.0001, "loss": 1.3729, "step": 10273 }, { "epoch": 1.1936102236421726, "grad_norm": 0.5821871757507324, "learning_rate": 0.0001, "loss": 1.3578, "step": 10274 }, { "epoch": 1.193726401394133, "grad_norm": 0.5659012198448181, "learning_rate": 0.0001, "loss": 1.418, "step": 10275 }, { "epoch": 1.1938425791460936, "grad_norm": 0.6060782074928284, "learning_rate": 0.0001, "loss": 1.6198, "step": 10276 }, { "epoch": 1.193958756898054, "grad_norm": 0.6207935214042664, "learning_rate": 0.0001, "loss": 1.4444, "step": 10277 }, { "epoch": 1.1940749346500146, "grad_norm": 0.549321711063385, "learning_rate": 0.0001, "loss": 1.3837, "step": 10278 }, { "epoch": 1.194191112401975, "grad_norm": 0.5631564259529114, "learning_rate": 0.0001, "loss": 1.5047, "step": 10279 }, { "epoch": 1.1943072901539356, "grad_norm": 0.5477903485298157, "learning_rate": 0.0001, "loss": 1.4314, "step": 10280 }, { "epoch": 1.194423467905896, "grad_norm": 0.6051657199859619, "learning_rate": 0.0001, "loss": 1.4717, "step": 10281 }, { "epoch": 1.1945396456578565, "grad_norm": 0.6054665446281433, "learning_rate": 0.0001, "loss": 1.5794, "step": 10282 }, { "epoch": 1.194655823409817, "grad_norm": 0.57652747631073, "learning_rate": 0.0001, "loss": 1.5908, "step": 10283 }, { "epoch": 1.1947720011617775, "grad_norm": 0.5573504567146301, "learning_rate": 0.0001, "loss": 1.399, "step": 10284 }, { "epoch": 1.194888178913738, "grad_norm": 0.5924124121665955, "learning_rate": 0.0001, "loss": 1.4233, "step": 10285 }, { "epoch": 1.1950043566656985, "grad_norm": 0.5285975337028503, "learning_rate": 0.0001, "loss": 1.4941, "step": 10286 }, { "epoch": 1.195120534417659, "grad_norm": 0.5311710834503174, "learning_rate": 0.0001, "loss": 1.4012, "step": 10287 }, { "epoch": 1.1952367121696195, "grad_norm": 0.5347359776496887, "learning_rate": 0.0001, "loss": 1.4686, "step": 10288 }, { "epoch": 1.19535288992158, "grad_norm": 0.563667356967926, "learning_rate": 0.0001, "loss": 1.4839, "step": 10289 }, { "epoch": 1.1954690676735404, "grad_norm": 0.568260669708252, "learning_rate": 0.0001, "loss": 1.3454, "step": 10290 }, { "epoch": 1.195585245425501, "grad_norm": 0.638574481010437, "learning_rate": 0.0001, "loss": 1.5333, "step": 10291 }, { "epoch": 1.1957014231774614, "grad_norm": 0.6492049098014832, "learning_rate": 0.0001, "loss": 1.6565, "step": 10292 }, { "epoch": 1.1958176009294221, "grad_norm": 0.5210595726966858, "learning_rate": 0.0001, "loss": 1.4179, "step": 10293 }, { "epoch": 1.1959337786813826, "grad_norm": 0.5696560144424438, "learning_rate": 0.0001, "loss": 1.5132, "step": 10294 }, { "epoch": 1.196049956433343, "grad_norm": 0.5875056982040405, "learning_rate": 0.0001, "loss": 1.5027, "step": 10295 }, { "epoch": 1.1961661341853036, "grad_norm": 0.5168067216873169, "learning_rate": 0.0001, "loss": 1.436, "step": 10296 }, { "epoch": 1.196282311937264, "grad_norm": 0.5388482213020325, "learning_rate": 0.0001, "loss": 1.3848, "step": 10297 }, { "epoch": 1.1963984896892246, "grad_norm": 0.595585823059082, "learning_rate": 0.0001, "loss": 1.3237, "step": 10298 }, { "epoch": 1.196514667441185, "grad_norm": 0.5622440576553345, "learning_rate": 0.0001, "loss": 1.5511, "step": 10299 }, { "epoch": 1.1966308451931456, "grad_norm": 0.6017595529556274, "learning_rate": 0.0001, "loss": 1.6546, "step": 10300 }, { "epoch": 1.196747022945106, "grad_norm": 0.5587907433509827, "learning_rate": 0.0001, "loss": 1.419, "step": 10301 }, { "epoch": 1.1968632006970665, "grad_norm": 0.5632277727127075, "learning_rate": 0.0001, "loss": 1.3717, "step": 10302 }, { "epoch": 1.196979378449027, "grad_norm": 0.5504304766654968, "learning_rate": 0.0001, "loss": 1.5479, "step": 10303 }, { "epoch": 1.1970955562009875, "grad_norm": 0.5563431978225708, "learning_rate": 0.0001, "loss": 1.4499, "step": 10304 }, { "epoch": 1.197211733952948, "grad_norm": 0.5547022223472595, "learning_rate": 0.0001, "loss": 1.3948, "step": 10305 }, { "epoch": 1.1973279117049085, "grad_norm": 0.5395157337188721, "learning_rate": 0.0001, "loss": 1.3875, "step": 10306 }, { "epoch": 1.197444089456869, "grad_norm": 0.6372838020324707, "learning_rate": 0.0001, "loss": 1.757, "step": 10307 }, { "epoch": 1.1975602672088295, "grad_norm": 0.5683894157409668, "learning_rate": 0.0001, "loss": 1.5161, "step": 10308 }, { "epoch": 1.19767644496079, "grad_norm": 0.5659453868865967, "learning_rate": 0.0001, "loss": 1.4273, "step": 10309 }, { "epoch": 1.1977926227127504, "grad_norm": 0.5578935146331787, "learning_rate": 0.0001, "loss": 1.4621, "step": 10310 }, { "epoch": 1.197908800464711, "grad_norm": 0.5382188558578491, "learning_rate": 0.0001, "loss": 1.543, "step": 10311 }, { "epoch": 1.1980249782166714, "grad_norm": 0.5252891182899475, "learning_rate": 0.0001, "loss": 1.3211, "step": 10312 }, { "epoch": 1.198141155968632, "grad_norm": 0.545369029045105, "learning_rate": 0.0001, "loss": 1.5007, "step": 10313 }, { "epoch": 1.1982573337205924, "grad_norm": 0.5860430598258972, "learning_rate": 0.0001, "loss": 1.5027, "step": 10314 }, { "epoch": 1.1983735114725529, "grad_norm": 0.5563281774520874, "learning_rate": 0.0001, "loss": 1.5026, "step": 10315 }, { "epoch": 1.1984896892245136, "grad_norm": 0.5620051622390747, "learning_rate": 0.0001, "loss": 1.3899, "step": 10316 }, { "epoch": 1.198605866976474, "grad_norm": 0.5942341685295105, "learning_rate": 0.0001, "loss": 1.5773, "step": 10317 }, { "epoch": 1.1987220447284346, "grad_norm": 0.6117276549339294, "learning_rate": 0.0001, "loss": 1.6346, "step": 10318 }, { "epoch": 1.198838222480395, "grad_norm": 0.5562477707862854, "learning_rate": 0.0001, "loss": 1.4398, "step": 10319 }, { "epoch": 1.1989544002323556, "grad_norm": 0.5761712193489075, "learning_rate": 0.0001, "loss": 1.4883, "step": 10320 }, { "epoch": 1.199070577984316, "grad_norm": 0.552450954914093, "learning_rate": 0.0001, "loss": 1.3354, "step": 10321 }, { "epoch": 1.1991867557362765, "grad_norm": 0.5395271182060242, "learning_rate": 0.0001, "loss": 1.3772, "step": 10322 }, { "epoch": 1.199302933488237, "grad_norm": 0.555846631526947, "learning_rate": 0.0001, "loss": 1.4032, "step": 10323 }, { "epoch": 1.1994191112401975, "grad_norm": 0.5601608157157898, "learning_rate": 0.0001, "loss": 1.591, "step": 10324 }, { "epoch": 1.199535288992158, "grad_norm": 0.5598324537277222, "learning_rate": 0.0001, "loss": 1.4669, "step": 10325 }, { "epoch": 1.1996514667441185, "grad_norm": 0.5127769112586975, "learning_rate": 0.0001, "loss": 1.4241, "step": 10326 }, { "epoch": 1.199767644496079, "grad_norm": 0.5928462147712708, "learning_rate": 0.0001, "loss": 1.6712, "step": 10327 }, { "epoch": 1.1998838222480395, "grad_norm": 0.5600936412811279, "learning_rate": 0.0001, "loss": 1.5328, "step": 10328 }, { "epoch": 1.2, "grad_norm": 0.5994905233383179, "learning_rate": 0.0001, "loss": 1.4261, "step": 10329 }, { "epoch": 1.2001161777519604, "grad_norm": 0.5742284059524536, "learning_rate": 0.0001, "loss": 1.4516, "step": 10330 }, { "epoch": 1.200232355503921, "grad_norm": 0.5460923314094543, "learning_rate": 0.0001, "loss": 1.4823, "step": 10331 }, { "epoch": 1.2003485332558814, "grad_norm": 0.5468488931655884, "learning_rate": 0.0001, "loss": 1.4799, "step": 10332 }, { "epoch": 1.200464711007842, "grad_norm": 0.5500593781471252, "learning_rate": 0.0001, "loss": 1.2928, "step": 10333 }, { "epoch": 1.2005808887598026, "grad_norm": 0.5604000091552734, "learning_rate": 0.0001, "loss": 1.4501, "step": 10334 }, { "epoch": 1.2006970665117631, "grad_norm": 0.5383157730102539, "learning_rate": 0.0001, "loss": 1.414, "step": 10335 }, { "epoch": 1.2008132442637236, "grad_norm": 0.6212254166603088, "learning_rate": 0.0001, "loss": 1.5292, "step": 10336 }, { "epoch": 1.200929422015684, "grad_norm": 0.5845767259597778, "learning_rate": 0.0001, "loss": 1.4383, "step": 10337 }, { "epoch": 1.2010455997676446, "grad_norm": 0.517145574092865, "learning_rate": 0.0001, "loss": 1.2527, "step": 10338 }, { "epoch": 1.201161777519605, "grad_norm": 0.617876410484314, "learning_rate": 0.0001, "loss": 1.4751, "step": 10339 }, { "epoch": 1.2012779552715656, "grad_norm": 0.5485627055168152, "learning_rate": 0.0001, "loss": 1.3546, "step": 10340 }, { "epoch": 1.201394133023526, "grad_norm": 0.5494995713233948, "learning_rate": 0.0001, "loss": 1.4588, "step": 10341 }, { "epoch": 1.2015103107754865, "grad_norm": 0.5931583642959595, "learning_rate": 0.0001, "loss": 1.4738, "step": 10342 }, { "epoch": 1.201626488527447, "grad_norm": 0.57831871509552, "learning_rate": 0.0001, "loss": 1.4587, "step": 10343 }, { "epoch": 1.2017426662794075, "grad_norm": 0.5546940565109253, "learning_rate": 0.0001, "loss": 1.2925, "step": 10344 }, { "epoch": 1.201858844031368, "grad_norm": 0.5821717381477356, "learning_rate": 0.0001, "loss": 1.4944, "step": 10345 }, { "epoch": 1.2019750217833285, "grad_norm": 0.5914159417152405, "learning_rate": 0.0001, "loss": 1.4987, "step": 10346 }, { "epoch": 1.202091199535289, "grad_norm": 0.5691181421279907, "learning_rate": 0.0001, "loss": 1.3892, "step": 10347 }, { "epoch": 1.2022073772872495, "grad_norm": 0.5852946639060974, "learning_rate": 0.0001, "loss": 1.5349, "step": 10348 }, { "epoch": 1.20232355503921, "grad_norm": 0.5942548513412476, "learning_rate": 0.0001, "loss": 1.3972, "step": 10349 }, { "epoch": 1.2024397327911704, "grad_norm": 0.595831036567688, "learning_rate": 0.0001, "loss": 1.5364, "step": 10350 }, { "epoch": 1.202555910543131, "grad_norm": 0.5792760848999023, "learning_rate": 0.0001, "loss": 1.4927, "step": 10351 }, { "epoch": 1.2026720882950914, "grad_norm": 0.5801286101341248, "learning_rate": 0.0001, "loss": 1.3312, "step": 10352 }, { "epoch": 1.202788266047052, "grad_norm": 0.6192400455474854, "learning_rate": 0.0001, "loss": 1.746, "step": 10353 }, { "epoch": 1.2029044437990124, "grad_norm": 0.6081662774085999, "learning_rate": 0.0001, "loss": 1.5822, "step": 10354 }, { "epoch": 1.203020621550973, "grad_norm": 0.6102173924446106, "learning_rate": 0.0001, "loss": 1.6073, "step": 10355 }, { "epoch": 1.2031367993029334, "grad_norm": 0.5657384395599365, "learning_rate": 0.0001, "loss": 1.5184, "step": 10356 }, { "epoch": 1.2032529770548939, "grad_norm": 0.5585178732872009, "learning_rate": 0.0001, "loss": 1.4841, "step": 10357 }, { "epoch": 1.2033691548068546, "grad_norm": 0.5775259137153625, "learning_rate": 0.0001, "loss": 1.5499, "step": 10358 }, { "epoch": 1.203485332558815, "grad_norm": 0.557350754737854, "learning_rate": 0.0001, "loss": 1.5614, "step": 10359 }, { "epoch": 1.2036015103107756, "grad_norm": 0.5576322078704834, "learning_rate": 0.0001, "loss": 1.7029, "step": 10360 }, { "epoch": 1.203717688062736, "grad_norm": 0.5494878888130188, "learning_rate": 0.0001, "loss": 1.4551, "step": 10361 }, { "epoch": 1.2038338658146965, "grad_norm": 0.580441951751709, "learning_rate": 0.0001, "loss": 1.5666, "step": 10362 }, { "epoch": 1.203950043566657, "grad_norm": 0.5665770173072815, "learning_rate": 0.0001, "loss": 1.5302, "step": 10363 }, { "epoch": 1.2040662213186175, "grad_norm": 0.5588644742965698, "learning_rate": 0.0001, "loss": 1.3927, "step": 10364 }, { "epoch": 1.204182399070578, "grad_norm": 0.6142786741256714, "learning_rate": 0.0001, "loss": 1.5917, "step": 10365 }, { "epoch": 1.2042985768225385, "grad_norm": 0.6669145822525024, "learning_rate": 0.0001, "loss": 1.5222, "step": 10366 }, { "epoch": 1.204414754574499, "grad_norm": 0.5337054133415222, "learning_rate": 0.0001, "loss": 1.3741, "step": 10367 }, { "epoch": 1.2045309323264595, "grad_norm": 0.5432060956954956, "learning_rate": 0.0001, "loss": 1.3621, "step": 10368 }, { "epoch": 1.20464711007842, "grad_norm": 0.6041423678398132, "learning_rate": 0.0001, "loss": 1.6736, "step": 10369 }, { "epoch": 1.2047632878303804, "grad_norm": 0.5839071869850159, "learning_rate": 0.0001, "loss": 1.4869, "step": 10370 }, { "epoch": 1.204879465582341, "grad_norm": 0.5747912526130676, "learning_rate": 0.0001, "loss": 1.5618, "step": 10371 }, { "epoch": 1.2049956433343014, "grad_norm": 0.5971471667289734, "learning_rate": 0.0001, "loss": 1.5796, "step": 10372 }, { "epoch": 1.205111821086262, "grad_norm": 0.5386415123939514, "learning_rate": 0.0001, "loss": 1.4433, "step": 10373 }, { "epoch": 1.2052279988382224, "grad_norm": 0.5704327821731567, "learning_rate": 0.0001, "loss": 1.5432, "step": 10374 }, { "epoch": 1.205344176590183, "grad_norm": 0.5206299424171448, "learning_rate": 0.0001, "loss": 1.4509, "step": 10375 }, { "epoch": 1.2054603543421436, "grad_norm": 0.5865551829338074, "learning_rate": 0.0001, "loss": 1.342, "step": 10376 }, { "epoch": 1.205576532094104, "grad_norm": 0.5369005799293518, "learning_rate": 0.0001, "loss": 1.487, "step": 10377 }, { "epoch": 1.2056927098460646, "grad_norm": 0.5041610598564148, "learning_rate": 0.0001, "loss": 1.1966, "step": 10378 }, { "epoch": 1.205808887598025, "grad_norm": 0.5628383755683899, "learning_rate": 0.0001, "loss": 1.6165, "step": 10379 }, { "epoch": 1.2059250653499856, "grad_norm": 0.5555375218391418, "learning_rate": 0.0001, "loss": 1.5333, "step": 10380 }, { "epoch": 1.206041243101946, "grad_norm": 0.563068687915802, "learning_rate": 0.0001, "loss": 1.5383, "step": 10381 }, { "epoch": 1.2061574208539065, "grad_norm": 0.5873087644577026, "learning_rate": 0.0001, "loss": 1.604, "step": 10382 }, { "epoch": 1.206273598605867, "grad_norm": 0.5575566291809082, "learning_rate": 0.0001, "loss": 1.5667, "step": 10383 }, { "epoch": 1.2063897763578275, "grad_norm": 0.58903968334198, "learning_rate": 0.0001, "loss": 1.3605, "step": 10384 }, { "epoch": 1.206505954109788, "grad_norm": 0.5939505696296692, "learning_rate": 0.0001, "loss": 1.7172, "step": 10385 }, { "epoch": 1.2066221318617485, "grad_norm": 0.5417733192443848, "learning_rate": 0.0001, "loss": 1.4175, "step": 10386 }, { "epoch": 1.206738309613709, "grad_norm": 0.52766352891922, "learning_rate": 0.0001, "loss": 1.4536, "step": 10387 }, { "epoch": 1.2068544873656695, "grad_norm": 0.5897431373596191, "learning_rate": 0.0001, "loss": 1.4175, "step": 10388 }, { "epoch": 1.20697066511763, "grad_norm": 0.6240753531455994, "learning_rate": 0.0001, "loss": 1.4608, "step": 10389 }, { "epoch": 1.2070868428695904, "grad_norm": 0.6030203104019165, "learning_rate": 0.0001, "loss": 1.4075, "step": 10390 }, { "epoch": 1.207203020621551, "grad_norm": 0.6183759570121765, "learning_rate": 0.0001, "loss": 1.4371, "step": 10391 }, { "epoch": 1.2073191983735114, "grad_norm": 0.5684889554977417, "learning_rate": 0.0001, "loss": 1.4891, "step": 10392 }, { "epoch": 1.207435376125472, "grad_norm": 0.5397784113883972, "learning_rate": 0.0001, "loss": 1.3721, "step": 10393 }, { "epoch": 1.2075515538774324, "grad_norm": 0.5187193751335144, "learning_rate": 0.0001, "loss": 1.3977, "step": 10394 }, { "epoch": 1.207667731629393, "grad_norm": 0.5413998961448669, "learning_rate": 0.0001, "loss": 1.3461, "step": 10395 }, { "epoch": 1.2077839093813534, "grad_norm": 0.545616626739502, "learning_rate": 0.0001, "loss": 1.3606, "step": 10396 }, { "epoch": 1.2079000871333139, "grad_norm": 0.5437708497047424, "learning_rate": 0.0001, "loss": 1.4803, "step": 10397 }, { "epoch": 1.2080162648852744, "grad_norm": 0.56759113073349, "learning_rate": 0.0001, "loss": 1.4261, "step": 10398 }, { "epoch": 1.2081324426372348, "grad_norm": 0.5678495764732361, "learning_rate": 0.0001, "loss": 1.5097, "step": 10399 }, { "epoch": 1.2082486203891956, "grad_norm": 0.5646939277648926, "learning_rate": 0.0001, "loss": 1.5916, "step": 10400 }, { "epoch": 1.208364798141156, "grad_norm": 0.554522693157196, "learning_rate": 0.0001, "loss": 1.4276, "step": 10401 }, { "epoch": 1.2084809758931165, "grad_norm": 0.5794949531555176, "learning_rate": 0.0001, "loss": 1.438, "step": 10402 }, { "epoch": 1.208597153645077, "grad_norm": 0.5517016053199768, "learning_rate": 0.0001, "loss": 1.5691, "step": 10403 }, { "epoch": 1.2087133313970375, "grad_norm": 0.5888990759849548, "learning_rate": 0.0001, "loss": 1.5009, "step": 10404 }, { "epoch": 1.208829509148998, "grad_norm": 0.5803018808364868, "learning_rate": 0.0001, "loss": 1.4703, "step": 10405 }, { "epoch": 1.2089456869009585, "grad_norm": 0.5619463324546814, "learning_rate": 0.0001, "loss": 1.5176, "step": 10406 }, { "epoch": 1.209061864652919, "grad_norm": 0.5671004056930542, "learning_rate": 0.0001, "loss": 1.7036, "step": 10407 }, { "epoch": 1.2091780424048795, "grad_norm": 0.5472593307495117, "learning_rate": 0.0001, "loss": 1.4578, "step": 10408 }, { "epoch": 1.20929422015684, "grad_norm": 0.5378280878067017, "learning_rate": 0.0001, "loss": 1.4031, "step": 10409 }, { "epoch": 1.2094103979088004, "grad_norm": 0.5641921758651733, "learning_rate": 0.0001, "loss": 1.3906, "step": 10410 }, { "epoch": 1.209526575660761, "grad_norm": 0.5625640749931335, "learning_rate": 0.0001, "loss": 1.4926, "step": 10411 }, { "epoch": 1.2096427534127214, "grad_norm": 0.5496149063110352, "learning_rate": 0.0001, "loss": 1.5037, "step": 10412 }, { "epoch": 1.209758931164682, "grad_norm": 0.5828267931938171, "learning_rate": 0.0001, "loss": 1.6947, "step": 10413 }, { "epoch": 1.2098751089166424, "grad_norm": 0.5525361895561218, "learning_rate": 0.0001, "loss": 1.5652, "step": 10414 }, { "epoch": 1.209991286668603, "grad_norm": 0.524874746799469, "learning_rate": 0.0001, "loss": 1.3639, "step": 10415 }, { "epoch": 1.2101074644205634, "grad_norm": 0.6062628626823425, "learning_rate": 0.0001, "loss": 1.6837, "step": 10416 }, { "epoch": 1.2102236421725239, "grad_norm": 0.5272751450538635, "learning_rate": 0.0001, "loss": 1.4494, "step": 10417 }, { "epoch": 1.2103398199244846, "grad_norm": 0.5667693614959717, "learning_rate": 0.0001, "loss": 1.507, "step": 10418 }, { "epoch": 1.210455997676445, "grad_norm": 0.5765136480331421, "learning_rate": 0.0001, "loss": 1.3549, "step": 10419 }, { "epoch": 1.2105721754284056, "grad_norm": 0.5195456743240356, "learning_rate": 0.0001, "loss": 1.286, "step": 10420 }, { "epoch": 1.210688353180366, "grad_norm": 0.6368436813354492, "learning_rate": 0.0001, "loss": 1.6614, "step": 10421 }, { "epoch": 1.2108045309323265, "grad_norm": 0.5609405040740967, "learning_rate": 0.0001, "loss": 1.4401, "step": 10422 }, { "epoch": 1.210920708684287, "grad_norm": 0.6603508591651917, "learning_rate": 0.0001, "loss": 1.7794, "step": 10423 }, { "epoch": 1.2110368864362475, "grad_norm": 0.5625258684158325, "learning_rate": 0.0001, "loss": 1.4226, "step": 10424 }, { "epoch": 1.211153064188208, "grad_norm": 0.5961827039718628, "learning_rate": 0.0001, "loss": 1.4507, "step": 10425 }, { "epoch": 1.2112692419401685, "grad_norm": 0.5945589542388916, "learning_rate": 0.0001, "loss": 1.7156, "step": 10426 }, { "epoch": 1.211385419692129, "grad_norm": 0.547545850276947, "learning_rate": 0.0001, "loss": 1.513, "step": 10427 }, { "epoch": 1.2115015974440895, "grad_norm": 0.5815989971160889, "learning_rate": 0.0001, "loss": 1.4578, "step": 10428 }, { "epoch": 1.21161777519605, "grad_norm": 0.5638428926467896, "learning_rate": 0.0001, "loss": 1.4489, "step": 10429 }, { "epoch": 1.2117339529480105, "grad_norm": 0.5702305436134338, "learning_rate": 0.0001, "loss": 1.4775, "step": 10430 }, { "epoch": 1.211850130699971, "grad_norm": 0.5992822051048279, "learning_rate": 0.0001, "loss": 1.441, "step": 10431 }, { "epoch": 1.2119663084519314, "grad_norm": 0.5825338363647461, "learning_rate": 0.0001, "loss": 1.5207, "step": 10432 }, { "epoch": 1.212082486203892, "grad_norm": 0.5452916622161865, "learning_rate": 0.0001, "loss": 1.3638, "step": 10433 }, { "epoch": 1.2121986639558524, "grad_norm": 0.6069163084030151, "learning_rate": 0.0001, "loss": 1.4098, "step": 10434 }, { "epoch": 1.212314841707813, "grad_norm": 0.6051025390625, "learning_rate": 0.0001, "loss": 1.608, "step": 10435 }, { "epoch": 1.2124310194597734, "grad_norm": 0.5363568663597107, "learning_rate": 0.0001, "loss": 1.274, "step": 10436 }, { "epoch": 1.2125471972117339, "grad_norm": 0.5586710572242737, "learning_rate": 0.0001, "loss": 1.3582, "step": 10437 }, { "epoch": 1.2126633749636944, "grad_norm": 0.5546153783798218, "learning_rate": 0.0001, "loss": 1.4534, "step": 10438 }, { "epoch": 1.2127795527156549, "grad_norm": 0.5305795073509216, "learning_rate": 0.0001, "loss": 1.4127, "step": 10439 }, { "epoch": 1.2128957304676153, "grad_norm": 0.5443617701530457, "learning_rate": 0.0001, "loss": 1.453, "step": 10440 }, { "epoch": 1.2130119082195758, "grad_norm": 0.6291701197624207, "learning_rate": 0.0001, "loss": 1.7236, "step": 10441 }, { "epoch": 1.2131280859715365, "grad_norm": 0.6024119853973389, "learning_rate": 0.0001, "loss": 1.5023, "step": 10442 }, { "epoch": 1.213244263723497, "grad_norm": 0.566421389579773, "learning_rate": 0.0001, "loss": 1.3462, "step": 10443 }, { "epoch": 1.2133604414754575, "grad_norm": 0.6556349992752075, "learning_rate": 0.0001, "loss": 1.4306, "step": 10444 }, { "epoch": 1.213476619227418, "grad_norm": 0.5445832014083862, "learning_rate": 0.0001, "loss": 1.3331, "step": 10445 }, { "epoch": 1.2135927969793785, "grad_norm": 0.5676811933517456, "learning_rate": 0.0001, "loss": 1.585, "step": 10446 }, { "epoch": 1.213708974731339, "grad_norm": 0.5820676684379578, "learning_rate": 0.0001, "loss": 1.4923, "step": 10447 }, { "epoch": 1.2138251524832995, "grad_norm": 0.5882712602615356, "learning_rate": 0.0001, "loss": 1.4838, "step": 10448 }, { "epoch": 1.21394133023526, "grad_norm": 0.5337011218070984, "learning_rate": 0.0001, "loss": 1.4753, "step": 10449 }, { "epoch": 1.2140575079872205, "grad_norm": 0.6516126394271851, "learning_rate": 0.0001, "loss": 1.4206, "step": 10450 }, { "epoch": 1.214173685739181, "grad_norm": 0.537398099899292, "learning_rate": 0.0001, "loss": 1.3852, "step": 10451 }, { "epoch": 1.2142898634911414, "grad_norm": 0.5600173473358154, "learning_rate": 0.0001, "loss": 1.2863, "step": 10452 }, { "epoch": 1.214406041243102, "grad_norm": 0.5451828241348267, "learning_rate": 0.0001, "loss": 1.3837, "step": 10453 }, { "epoch": 1.2145222189950624, "grad_norm": 0.6120473742485046, "learning_rate": 0.0001, "loss": 1.5573, "step": 10454 }, { "epoch": 1.214638396747023, "grad_norm": 0.5406019687652588, "learning_rate": 0.0001, "loss": 1.4847, "step": 10455 }, { "epoch": 1.2147545744989834, "grad_norm": 0.49340879917144775, "learning_rate": 0.0001, "loss": 1.2085, "step": 10456 }, { "epoch": 1.2148707522509439, "grad_norm": 0.5855630040168762, "learning_rate": 0.0001, "loss": 1.5449, "step": 10457 }, { "epoch": 1.2149869300029044, "grad_norm": 0.5757850408554077, "learning_rate": 0.0001, "loss": 1.4777, "step": 10458 }, { "epoch": 1.2151031077548649, "grad_norm": 0.6049354076385498, "learning_rate": 0.0001, "loss": 1.4986, "step": 10459 }, { "epoch": 1.2152192855068256, "grad_norm": 0.5614561438560486, "learning_rate": 0.0001, "loss": 1.1913, "step": 10460 }, { "epoch": 1.215335463258786, "grad_norm": 0.58977210521698, "learning_rate": 0.0001, "loss": 1.5599, "step": 10461 }, { "epoch": 1.2154516410107465, "grad_norm": 0.5945138335227966, "learning_rate": 0.0001, "loss": 1.3591, "step": 10462 }, { "epoch": 1.215567818762707, "grad_norm": 0.5707786083221436, "learning_rate": 0.0001, "loss": 1.4646, "step": 10463 }, { "epoch": 1.2156839965146675, "grad_norm": 0.6250419020652771, "learning_rate": 0.0001, "loss": 1.5176, "step": 10464 }, { "epoch": 1.215800174266628, "grad_norm": 0.5555652379989624, "learning_rate": 0.0001, "loss": 1.5091, "step": 10465 }, { "epoch": 1.2159163520185885, "grad_norm": 0.5932562947273254, "learning_rate": 0.0001, "loss": 1.3049, "step": 10466 }, { "epoch": 1.216032529770549, "grad_norm": 0.5786982178688049, "learning_rate": 0.0001, "loss": 1.5366, "step": 10467 }, { "epoch": 1.2161487075225095, "grad_norm": 0.5523450970649719, "learning_rate": 0.0001, "loss": 1.5297, "step": 10468 }, { "epoch": 1.21626488527447, "grad_norm": 0.5957953333854675, "learning_rate": 0.0001, "loss": 1.4569, "step": 10469 }, { "epoch": 1.2163810630264305, "grad_norm": 0.6025353074073792, "learning_rate": 0.0001, "loss": 1.6472, "step": 10470 }, { "epoch": 1.216497240778391, "grad_norm": 0.6158897280693054, "learning_rate": 0.0001, "loss": 1.6028, "step": 10471 }, { "epoch": 1.2166134185303514, "grad_norm": 0.5769653916358948, "learning_rate": 0.0001, "loss": 1.4907, "step": 10472 }, { "epoch": 1.216729596282312, "grad_norm": 0.5845011472702026, "learning_rate": 0.0001, "loss": 1.6525, "step": 10473 }, { "epoch": 1.2168457740342724, "grad_norm": 0.5649383068084717, "learning_rate": 0.0001, "loss": 1.3627, "step": 10474 }, { "epoch": 1.216961951786233, "grad_norm": 0.5390584468841553, "learning_rate": 0.0001, "loss": 1.4094, "step": 10475 }, { "epoch": 1.2170781295381934, "grad_norm": 0.5726324319839478, "learning_rate": 0.0001, "loss": 1.55, "step": 10476 }, { "epoch": 1.2171943072901539, "grad_norm": 0.5737690329551697, "learning_rate": 0.0001, "loss": 1.4491, "step": 10477 }, { "epoch": 1.2173104850421144, "grad_norm": 0.5977112650871277, "learning_rate": 0.0001, "loss": 1.5047, "step": 10478 }, { "epoch": 1.2174266627940749, "grad_norm": 0.5659692287445068, "learning_rate": 0.0001, "loss": 1.6041, "step": 10479 }, { "epoch": 1.2175428405460353, "grad_norm": 0.6409531831741333, "learning_rate": 0.0001, "loss": 1.5836, "step": 10480 }, { "epoch": 1.2176590182979958, "grad_norm": 0.5651158094406128, "learning_rate": 0.0001, "loss": 1.6229, "step": 10481 }, { "epoch": 1.2177751960499563, "grad_norm": 0.5669698119163513, "learning_rate": 0.0001, "loss": 1.4251, "step": 10482 }, { "epoch": 1.2178913738019168, "grad_norm": 0.5658094882965088, "learning_rate": 0.0001, "loss": 1.5442, "step": 10483 }, { "epoch": 1.2180075515538775, "grad_norm": 0.5667807459831238, "learning_rate": 0.0001, "loss": 1.4133, "step": 10484 }, { "epoch": 1.218123729305838, "grad_norm": 0.5489072203636169, "learning_rate": 0.0001, "loss": 1.4643, "step": 10485 }, { "epoch": 1.2182399070577985, "grad_norm": 0.6018260717391968, "learning_rate": 0.0001, "loss": 1.7007, "step": 10486 }, { "epoch": 1.218356084809759, "grad_norm": 0.5693395137786865, "learning_rate": 0.0001, "loss": 1.5865, "step": 10487 }, { "epoch": 1.2184722625617195, "grad_norm": 0.5397615432739258, "learning_rate": 0.0001, "loss": 1.4919, "step": 10488 }, { "epoch": 1.21858844031368, "grad_norm": 0.5639338493347168, "learning_rate": 0.0001, "loss": 1.3537, "step": 10489 }, { "epoch": 1.2187046180656405, "grad_norm": 0.5436851382255554, "learning_rate": 0.0001, "loss": 1.4667, "step": 10490 }, { "epoch": 1.218820795817601, "grad_norm": 0.5400302410125732, "learning_rate": 0.0001, "loss": 1.401, "step": 10491 }, { "epoch": 1.2189369735695614, "grad_norm": 0.5486143231391907, "learning_rate": 0.0001, "loss": 1.4215, "step": 10492 }, { "epoch": 1.219053151321522, "grad_norm": 0.6082191467285156, "learning_rate": 0.0001, "loss": 1.4852, "step": 10493 }, { "epoch": 1.2191693290734824, "grad_norm": 0.5857893228530884, "learning_rate": 0.0001, "loss": 1.3528, "step": 10494 }, { "epoch": 1.219285506825443, "grad_norm": 0.592745304107666, "learning_rate": 0.0001, "loss": 1.4238, "step": 10495 }, { "epoch": 1.2194016845774034, "grad_norm": 0.5825994610786438, "learning_rate": 0.0001, "loss": 1.5337, "step": 10496 }, { "epoch": 1.2195178623293639, "grad_norm": 0.5833573341369629, "learning_rate": 0.0001, "loss": 1.437, "step": 10497 }, { "epoch": 1.2196340400813244, "grad_norm": 0.5565202236175537, "learning_rate": 0.0001, "loss": 1.3335, "step": 10498 }, { "epoch": 1.2197502178332849, "grad_norm": 0.5850576758384705, "learning_rate": 0.0001, "loss": 1.5889, "step": 10499 }, { "epoch": 1.2198663955852453, "grad_norm": 0.5984856486320496, "learning_rate": 0.0001, "loss": 1.3888, "step": 10500 }, { "epoch": 1.2199825733372058, "grad_norm": 0.5830519795417786, "learning_rate": 0.0001, "loss": 1.6643, "step": 10501 }, { "epoch": 1.2200987510891665, "grad_norm": 0.5867604613304138, "learning_rate": 0.0001, "loss": 1.2365, "step": 10502 }, { "epoch": 1.220214928841127, "grad_norm": 0.5673847198486328, "learning_rate": 0.0001, "loss": 1.3835, "step": 10503 }, { "epoch": 1.2203311065930875, "grad_norm": 0.5929017066955566, "learning_rate": 0.0001, "loss": 1.3362, "step": 10504 }, { "epoch": 1.220447284345048, "grad_norm": 0.5458045601844788, "learning_rate": 0.0001, "loss": 1.356, "step": 10505 }, { "epoch": 1.2205634620970085, "grad_norm": 0.6023081541061401, "learning_rate": 0.0001, "loss": 1.4702, "step": 10506 }, { "epoch": 1.220679639848969, "grad_norm": 0.6090680956840515, "learning_rate": 0.0001, "loss": 1.5305, "step": 10507 }, { "epoch": 1.2207958176009295, "grad_norm": 0.5901305079460144, "learning_rate": 0.0001, "loss": 1.5119, "step": 10508 }, { "epoch": 1.22091199535289, "grad_norm": 0.6401813626289368, "learning_rate": 0.0001, "loss": 1.6139, "step": 10509 }, { "epoch": 1.2210281731048505, "grad_norm": 0.6004911661148071, "learning_rate": 0.0001, "loss": 1.4363, "step": 10510 }, { "epoch": 1.221144350856811, "grad_norm": 0.5764592289924622, "learning_rate": 0.0001, "loss": 1.4868, "step": 10511 }, { "epoch": 1.2212605286087714, "grad_norm": 0.5585955381393433, "learning_rate": 0.0001, "loss": 1.5034, "step": 10512 }, { "epoch": 1.221376706360732, "grad_norm": 0.5347375273704529, "learning_rate": 0.0001, "loss": 1.3796, "step": 10513 }, { "epoch": 1.2214928841126924, "grad_norm": 0.5225241780281067, "learning_rate": 0.0001, "loss": 1.35, "step": 10514 }, { "epoch": 1.221609061864653, "grad_norm": 0.5799035429954529, "learning_rate": 0.0001, "loss": 1.4947, "step": 10515 }, { "epoch": 1.2217252396166134, "grad_norm": 0.5327029824256897, "learning_rate": 0.0001, "loss": 1.411, "step": 10516 }, { "epoch": 1.2218414173685739, "grad_norm": 0.5418916344642639, "learning_rate": 0.0001, "loss": 1.4701, "step": 10517 }, { "epoch": 1.2219575951205344, "grad_norm": 0.5544294714927673, "learning_rate": 0.0001, "loss": 1.3398, "step": 10518 }, { "epoch": 1.2220737728724949, "grad_norm": 0.6825329065322876, "learning_rate": 0.0001, "loss": 1.4933, "step": 10519 }, { "epoch": 1.2221899506244553, "grad_norm": 0.5987062454223633, "learning_rate": 0.0001, "loss": 1.3924, "step": 10520 }, { "epoch": 1.2223061283764158, "grad_norm": 0.6384679675102234, "learning_rate": 0.0001, "loss": 1.5034, "step": 10521 }, { "epoch": 1.2224223061283763, "grad_norm": 0.5685727000236511, "learning_rate": 0.0001, "loss": 1.3943, "step": 10522 }, { "epoch": 1.2225384838803368, "grad_norm": 0.5603922009468079, "learning_rate": 0.0001, "loss": 1.4228, "step": 10523 }, { "epoch": 1.2226546616322973, "grad_norm": 0.5447299480438232, "learning_rate": 0.0001, "loss": 1.2081, "step": 10524 }, { "epoch": 1.2227708393842578, "grad_norm": 0.6237144470214844, "learning_rate": 0.0001, "loss": 1.7241, "step": 10525 }, { "epoch": 1.2228870171362185, "grad_norm": 0.567017674446106, "learning_rate": 0.0001, "loss": 1.5054, "step": 10526 }, { "epoch": 1.223003194888179, "grad_norm": 0.5752435326576233, "learning_rate": 0.0001, "loss": 1.3276, "step": 10527 }, { "epoch": 1.2231193726401395, "grad_norm": 0.5610300898551941, "learning_rate": 0.0001, "loss": 1.4231, "step": 10528 }, { "epoch": 1.2232355503921, "grad_norm": 0.5971804857254028, "learning_rate": 0.0001, "loss": 1.3758, "step": 10529 }, { "epoch": 1.2233517281440605, "grad_norm": 0.5715692639350891, "learning_rate": 0.0001, "loss": 1.4178, "step": 10530 }, { "epoch": 1.223467905896021, "grad_norm": 0.5297221541404724, "learning_rate": 0.0001, "loss": 1.3276, "step": 10531 }, { "epoch": 1.2235840836479814, "grad_norm": 0.5569010376930237, "learning_rate": 0.0001, "loss": 1.3424, "step": 10532 }, { "epoch": 1.223700261399942, "grad_norm": 0.5953126549720764, "learning_rate": 0.0001, "loss": 1.4072, "step": 10533 }, { "epoch": 1.2238164391519024, "grad_norm": 0.6054908037185669, "learning_rate": 0.0001, "loss": 1.4562, "step": 10534 }, { "epoch": 1.223932616903863, "grad_norm": 0.5700466632843018, "learning_rate": 0.0001, "loss": 1.5383, "step": 10535 }, { "epoch": 1.2240487946558234, "grad_norm": 0.5606374144554138, "learning_rate": 0.0001, "loss": 1.4028, "step": 10536 }, { "epoch": 1.2241649724077839, "grad_norm": 0.5463014245033264, "learning_rate": 0.0001, "loss": 1.5375, "step": 10537 }, { "epoch": 1.2242811501597444, "grad_norm": 0.5794979333877563, "learning_rate": 0.0001, "loss": 1.4477, "step": 10538 }, { "epoch": 1.2243973279117049, "grad_norm": 0.5675046443939209, "learning_rate": 0.0001, "loss": 1.5133, "step": 10539 }, { "epoch": 1.2245135056636653, "grad_norm": 0.5605546236038208, "learning_rate": 0.0001, "loss": 1.4852, "step": 10540 }, { "epoch": 1.2246296834156258, "grad_norm": 0.5566608905792236, "learning_rate": 0.0001, "loss": 1.3657, "step": 10541 }, { "epoch": 1.2247458611675863, "grad_norm": 0.60628741979599, "learning_rate": 0.0001, "loss": 1.6062, "step": 10542 }, { "epoch": 1.2248620389195468, "grad_norm": 0.5989949107170105, "learning_rate": 0.0001, "loss": 1.4082, "step": 10543 }, { "epoch": 1.2249782166715075, "grad_norm": 0.601296067237854, "learning_rate": 0.0001, "loss": 1.5123, "step": 10544 }, { "epoch": 1.225094394423468, "grad_norm": 0.6421616673469543, "learning_rate": 0.0001, "loss": 1.5389, "step": 10545 }, { "epoch": 1.2252105721754285, "grad_norm": 0.5911616086959839, "learning_rate": 0.0001, "loss": 1.7276, "step": 10546 }, { "epoch": 1.225326749927389, "grad_norm": 0.6583091020584106, "learning_rate": 0.0001, "loss": 1.6858, "step": 10547 }, { "epoch": 1.2254429276793495, "grad_norm": 0.575006365776062, "learning_rate": 0.0001, "loss": 1.5305, "step": 10548 }, { "epoch": 1.22555910543131, "grad_norm": 0.5844583511352539, "learning_rate": 0.0001, "loss": 1.5218, "step": 10549 }, { "epoch": 1.2256752831832705, "grad_norm": 0.5701775550842285, "learning_rate": 0.0001, "loss": 1.5935, "step": 10550 }, { "epoch": 1.225791460935231, "grad_norm": 0.5439273118972778, "learning_rate": 0.0001, "loss": 1.3318, "step": 10551 }, { "epoch": 1.2259076386871914, "grad_norm": 0.5806834697723389, "learning_rate": 0.0001, "loss": 1.3744, "step": 10552 }, { "epoch": 1.226023816439152, "grad_norm": 0.5335421562194824, "learning_rate": 0.0001, "loss": 1.1911, "step": 10553 }, { "epoch": 1.2261399941911124, "grad_norm": 0.5800783038139343, "learning_rate": 0.0001, "loss": 1.4885, "step": 10554 }, { "epoch": 1.226256171943073, "grad_norm": 0.5973320007324219, "learning_rate": 0.0001, "loss": 1.3504, "step": 10555 }, { "epoch": 1.2263723496950334, "grad_norm": 0.5879946351051331, "learning_rate": 0.0001, "loss": 1.4086, "step": 10556 }, { "epoch": 1.2264885274469939, "grad_norm": 0.621277928352356, "learning_rate": 0.0001, "loss": 1.5551, "step": 10557 }, { "epoch": 1.2266047051989544, "grad_norm": 0.5970613360404968, "learning_rate": 0.0001, "loss": 1.2918, "step": 10558 }, { "epoch": 1.2267208829509149, "grad_norm": 0.593627393245697, "learning_rate": 0.0001, "loss": 1.4994, "step": 10559 }, { "epoch": 1.2268370607028753, "grad_norm": 0.6126559972763062, "learning_rate": 0.0001, "loss": 1.5531, "step": 10560 }, { "epoch": 1.2269532384548358, "grad_norm": 0.5869115591049194, "learning_rate": 0.0001, "loss": 1.5224, "step": 10561 }, { "epoch": 1.2270694162067963, "grad_norm": 0.587081789970398, "learning_rate": 0.0001, "loss": 1.5067, "step": 10562 }, { "epoch": 1.2271855939587568, "grad_norm": 0.5886223316192627, "learning_rate": 0.0001, "loss": 1.5962, "step": 10563 }, { "epoch": 1.2273017717107173, "grad_norm": 0.5749689936637878, "learning_rate": 0.0001, "loss": 1.4966, "step": 10564 }, { "epoch": 1.2274179494626778, "grad_norm": 0.5669975876808167, "learning_rate": 0.0001, "loss": 1.4723, "step": 10565 }, { "epoch": 1.2275341272146383, "grad_norm": 0.5260981917381287, "learning_rate": 0.0001, "loss": 1.4386, "step": 10566 }, { "epoch": 1.227650304966599, "grad_norm": 0.5275533199310303, "learning_rate": 0.0001, "loss": 1.4732, "step": 10567 }, { "epoch": 1.2277664827185595, "grad_norm": 0.5393403768539429, "learning_rate": 0.0001, "loss": 1.37, "step": 10568 }, { "epoch": 1.22788266047052, "grad_norm": 0.5790442824363708, "learning_rate": 0.0001, "loss": 1.4346, "step": 10569 }, { "epoch": 1.2279988382224805, "grad_norm": 0.579831063747406, "learning_rate": 0.0001, "loss": 1.3978, "step": 10570 }, { "epoch": 1.228115015974441, "grad_norm": 0.5463832020759583, "learning_rate": 0.0001, "loss": 1.4308, "step": 10571 }, { "epoch": 1.2282311937264014, "grad_norm": 0.551559567451477, "learning_rate": 0.0001, "loss": 1.4143, "step": 10572 }, { "epoch": 1.228347371478362, "grad_norm": 0.544151782989502, "learning_rate": 0.0001, "loss": 1.3611, "step": 10573 }, { "epoch": 1.2284635492303224, "grad_norm": 0.5910764932632446, "learning_rate": 0.0001, "loss": 1.4523, "step": 10574 }, { "epoch": 1.228579726982283, "grad_norm": 0.5585051774978638, "learning_rate": 0.0001, "loss": 1.376, "step": 10575 }, { "epoch": 1.2286959047342434, "grad_norm": 0.5787076354026794, "learning_rate": 0.0001, "loss": 1.5203, "step": 10576 }, { "epoch": 1.2288120824862039, "grad_norm": 0.5803587436676025, "learning_rate": 0.0001, "loss": 1.5013, "step": 10577 }, { "epoch": 1.2289282602381644, "grad_norm": 0.5648260116577148, "learning_rate": 0.0001, "loss": 1.4385, "step": 10578 }, { "epoch": 1.2290444379901249, "grad_norm": 0.5887954235076904, "learning_rate": 0.0001, "loss": 1.6515, "step": 10579 }, { "epoch": 1.2291606157420853, "grad_norm": 0.6301035284996033, "learning_rate": 0.0001, "loss": 1.5682, "step": 10580 }, { "epoch": 1.2292767934940458, "grad_norm": 0.5514262914657593, "learning_rate": 0.0001, "loss": 1.4847, "step": 10581 }, { "epoch": 1.2293929712460063, "grad_norm": 0.5427731275558472, "learning_rate": 0.0001, "loss": 1.5574, "step": 10582 }, { "epoch": 1.2295091489979668, "grad_norm": 0.5385409593582153, "learning_rate": 0.0001, "loss": 1.3763, "step": 10583 }, { "epoch": 1.2296253267499273, "grad_norm": 0.5767699480056763, "learning_rate": 0.0001, "loss": 1.5571, "step": 10584 }, { "epoch": 1.2297415045018878, "grad_norm": 0.5255317687988281, "learning_rate": 0.0001, "loss": 1.4605, "step": 10585 }, { "epoch": 1.2298576822538485, "grad_norm": 0.5520825982093811, "learning_rate": 0.0001, "loss": 1.4506, "step": 10586 }, { "epoch": 1.229973860005809, "grad_norm": 0.5681281089782715, "learning_rate": 0.0001, "loss": 1.6923, "step": 10587 }, { "epoch": 1.2300900377577695, "grad_norm": 0.5493902564048767, "learning_rate": 0.0001, "loss": 1.4525, "step": 10588 }, { "epoch": 1.23020621550973, "grad_norm": 0.5706425309181213, "learning_rate": 0.0001, "loss": 1.5089, "step": 10589 }, { "epoch": 1.2303223932616905, "grad_norm": 0.5630264282226562, "learning_rate": 0.0001, "loss": 1.4545, "step": 10590 }, { "epoch": 1.230438571013651, "grad_norm": 0.5919108986854553, "learning_rate": 0.0001, "loss": 1.4042, "step": 10591 }, { "epoch": 1.2305547487656114, "grad_norm": 0.5534734129905701, "learning_rate": 0.0001, "loss": 1.4322, "step": 10592 }, { "epoch": 1.230670926517572, "grad_norm": 0.5363836884498596, "learning_rate": 0.0001, "loss": 1.336, "step": 10593 }, { "epoch": 1.2307871042695324, "grad_norm": 0.5639822483062744, "learning_rate": 0.0001, "loss": 1.3161, "step": 10594 }, { "epoch": 1.230903282021493, "grad_norm": 0.5711953639984131, "learning_rate": 0.0001, "loss": 1.5166, "step": 10595 }, { "epoch": 1.2310194597734534, "grad_norm": 0.5398626327514648, "learning_rate": 0.0001, "loss": 1.3888, "step": 10596 }, { "epoch": 1.2311356375254139, "grad_norm": 0.5533974766731262, "learning_rate": 0.0001, "loss": 1.3355, "step": 10597 }, { "epoch": 1.2312518152773744, "grad_norm": 0.5976875424385071, "learning_rate": 0.0001, "loss": 1.4595, "step": 10598 }, { "epoch": 1.2313679930293349, "grad_norm": 0.5598598122596741, "learning_rate": 0.0001, "loss": 1.3329, "step": 10599 }, { "epoch": 1.2314841707812954, "grad_norm": 0.5962227582931519, "learning_rate": 0.0001, "loss": 1.4619, "step": 10600 }, { "epoch": 1.2316003485332558, "grad_norm": 0.523517370223999, "learning_rate": 0.0001, "loss": 1.3384, "step": 10601 }, { "epoch": 1.2317165262852163, "grad_norm": 0.5865228772163391, "learning_rate": 0.0001, "loss": 1.4522, "step": 10602 }, { "epoch": 1.2318327040371768, "grad_norm": 0.5959430932998657, "learning_rate": 0.0001, "loss": 1.507, "step": 10603 }, { "epoch": 1.2319488817891373, "grad_norm": 0.5698263645172119, "learning_rate": 0.0001, "loss": 1.3024, "step": 10604 }, { "epoch": 1.2320650595410978, "grad_norm": 0.5440550446510315, "learning_rate": 0.0001, "loss": 1.3271, "step": 10605 }, { "epoch": 1.2321812372930583, "grad_norm": 0.5678781270980835, "learning_rate": 0.0001, "loss": 1.468, "step": 10606 }, { "epoch": 1.2322974150450188, "grad_norm": 0.6232162714004517, "learning_rate": 0.0001, "loss": 1.5875, "step": 10607 }, { "epoch": 1.2324135927969793, "grad_norm": 0.5675930380821228, "learning_rate": 0.0001, "loss": 1.4716, "step": 10608 }, { "epoch": 1.23252977054894, "grad_norm": 0.5585689544677734, "learning_rate": 0.0001, "loss": 1.379, "step": 10609 }, { "epoch": 1.2326459483009005, "grad_norm": 0.5632004141807556, "learning_rate": 0.0001, "loss": 1.3636, "step": 10610 }, { "epoch": 1.232762126052861, "grad_norm": 0.5808938145637512, "learning_rate": 0.0001, "loss": 1.6049, "step": 10611 }, { "epoch": 1.2328783038048214, "grad_norm": 0.5879512429237366, "learning_rate": 0.0001, "loss": 1.5081, "step": 10612 }, { "epoch": 1.232994481556782, "grad_norm": 0.5673100352287292, "learning_rate": 0.0001, "loss": 1.5315, "step": 10613 }, { "epoch": 1.2331106593087424, "grad_norm": 0.6437278389930725, "learning_rate": 0.0001, "loss": 1.5295, "step": 10614 }, { "epoch": 1.233226837060703, "grad_norm": 0.554803192615509, "learning_rate": 0.0001, "loss": 1.5641, "step": 10615 }, { "epoch": 1.2333430148126634, "grad_norm": 0.5712867379188538, "learning_rate": 0.0001, "loss": 1.3592, "step": 10616 }, { "epoch": 1.2334591925646239, "grad_norm": 0.5592646598815918, "learning_rate": 0.0001, "loss": 1.4562, "step": 10617 }, { "epoch": 1.2335753703165844, "grad_norm": 0.5389765501022339, "learning_rate": 0.0001, "loss": 1.3461, "step": 10618 }, { "epoch": 1.2336915480685449, "grad_norm": 0.622361958026886, "learning_rate": 0.0001, "loss": 1.6166, "step": 10619 }, { "epoch": 1.2338077258205054, "grad_norm": 0.5598737597465515, "learning_rate": 0.0001, "loss": 1.4707, "step": 10620 }, { "epoch": 1.2339239035724658, "grad_norm": 0.5794302821159363, "learning_rate": 0.0001, "loss": 1.5521, "step": 10621 }, { "epoch": 1.2340400813244263, "grad_norm": 0.5385465025901794, "learning_rate": 0.0001, "loss": 1.3243, "step": 10622 }, { "epoch": 1.2341562590763868, "grad_norm": 0.5621716380119324, "learning_rate": 0.0001, "loss": 1.4385, "step": 10623 }, { "epoch": 1.2342724368283473, "grad_norm": 0.5806053876876831, "learning_rate": 0.0001, "loss": 1.6138, "step": 10624 }, { "epoch": 1.2343886145803078, "grad_norm": 0.5949336290359497, "learning_rate": 0.0001, "loss": 1.3553, "step": 10625 }, { "epoch": 1.2345047923322683, "grad_norm": 0.6420254707336426, "learning_rate": 0.0001, "loss": 1.6207, "step": 10626 }, { "epoch": 1.2346209700842288, "grad_norm": 0.5900982618331909, "learning_rate": 0.0001, "loss": 1.5271, "step": 10627 }, { "epoch": 1.2347371478361895, "grad_norm": 0.5828454494476318, "learning_rate": 0.0001, "loss": 1.5352, "step": 10628 }, { "epoch": 1.23485332558815, "grad_norm": 0.553706705570221, "learning_rate": 0.0001, "loss": 1.3842, "step": 10629 }, { "epoch": 1.2349695033401105, "grad_norm": 0.5648255944252014, "learning_rate": 0.0001, "loss": 1.5772, "step": 10630 }, { "epoch": 1.235085681092071, "grad_norm": 0.5571692585945129, "learning_rate": 0.0001, "loss": 1.4299, "step": 10631 }, { "epoch": 1.2352018588440314, "grad_norm": 0.5914157032966614, "learning_rate": 0.0001, "loss": 1.5363, "step": 10632 }, { "epoch": 1.235318036595992, "grad_norm": 0.5305378437042236, "learning_rate": 0.0001, "loss": 1.4097, "step": 10633 }, { "epoch": 1.2354342143479524, "grad_norm": 0.5221390724182129, "learning_rate": 0.0001, "loss": 1.4712, "step": 10634 }, { "epoch": 1.235550392099913, "grad_norm": 0.5526520013809204, "learning_rate": 0.0001, "loss": 1.5835, "step": 10635 }, { "epoch": 1.2356665698518734, "grad_norm": 0.552923321723938, "learning_rate": 0.0001, "loss": 1.4845, "step": 10636 }, { "epoch": 1.2357827476038339, "grad_norm": 0.5690024495124817, "learning_rate": 0.0001, "loss": 1.5021, "step": 10637 }, { "epoch": 1.2358989253557944, "grad_norm": 0.5636914372444153, "learning_rate": 0.0001, "loss": 1.4861, "step": 10638 }, { "epoch": 1.2360151031077549, "grad_norm": 0.6583706736564636, "learning_rate": 0.0001, "loss": 1.7168, "step": 10639 }, { "epoch": 1.2361312808597154, "grad_norm": 0.600277304649353, "learning_rate": 0.0001, "loss": 1.6215, "step": 10640 }, { "epoch": 1.2362474586116758, "grad_norm": 0.6001637578010559, "learning_rate": 0.0001, "loss": 1.525, "step": 10641 }, { "epoch": 1.2363636363636363, "grad_norm": 0.5948476195335388, "learning_rate": 0.0001, "loss": 1.5354, "step": 10642 }, { "epoch": 1.2364798141155968, "grad_norm": 0.6298750638961792, "learning_rate": 0.0001, "loss": 1.4664, "step": 10643 }, { "epoch": 1.2365959918675573, "grad_norm": 0.578194797039032, "learning_rate": 0.0001, "loss": 1.4185, "step": 10644 }, { "epoch": 1.2367121696195178, "grad_norm": 0.5624285340309143, "learning_rate": 0.0001, "loss": 1.5575, "step": 10645 }, { "epoch": 1.2368283473714783, "grad_norm": 0.5956478714942932, "learning_rate": 0.0001, "loss": 1.5, "step": 10646 }, { "epoch": 1.2369445251234388, "grad_norm": 0.5751087665557861, "learning_rate": 0.0001, "loss": 1.4631, "step": 10647 }, { "epoch": 1.2370607028753993, "grad_norm": 0.5814287066459656, "learning_rate": 0.0001, "loss": 1.4004, "step": 10648 }, { "epoch": 1.2371768806273598, "grad_norm": 0.5077279210090637, "learning_rate": 0.0001, "loss": 1.2642, "step": 10649 }, { "epoch": 1.2372930583793202, "grad_norm": 0.579255223274231, "learning_rate": 0.0001, "loss": 1.5844, "step": 10650 }, { "epoch": 1.237409236131281, "grad_norm": 0.6207264065742493, "learning_rate": 0.0001, "loss": 1.5855, "step": 10651 }, { "epoch": 1.2375254138832414, "grad_norm": 0.6084131598472595, "learning_rate": 0.0001, "loss": 1.4875, "step": 10652 }, { "epoch": 1.237641591635202, "grad_norm": 0.5942187905311584, "learning_rate": 0.0001, "loss": 1.6195, "step": 10653 }, { "epoch": 1.2377577693871624, "grad_norm": 0.5793184638023376, "learning_rate": 0.0001, "loss": 1.5205, "step": 10654 }, { "epoch": 1.237873947139123, "grad_norm": 0.5800839066505432, "learning_rate": 0.0001, "loss": 1.4165, "step": 10655 }, { "epoch": 1.2379901248910834, "grad_norm": 0.5806957483291626, "learning_rate": 0.0001, "loss": 1.4971, "step": 10656 }, { "epoch": 1.2381063026430439, "grad_norm": 0.5428292751312256, "learning_rate": 0.0001, "loss": 1.2147, "step": 10657 }, { "epoch": 1.2382224803950044, "grad_norm": 0.5477049350738525, "learning_rate": 0.0001, "loss": 1.3976, "step": 10658 }, { "epoch": 1.2383386581469649, "grad_norm": 0.5499922037124634, "learning_rate": 0.0001, "loss": 1.3248, "step": 10659 }, { "epoch": 1.2384548358989254, "grad_norm": 0.5535095930099487, "learning_rate": 0.0001, "loss": 1.312, "step": 10660 }, { "epoch": 1.2385710136508858, "grad_norm": 0.5951287150382996, "learning_rate": 0.0001, "loss": 1.5418, "step": 10661 }, { "epoch": 1.2386871914028463, "grad_norm": 0.5869619846343994, "learning_rate": 0.0001, "loss": 1.4575, "step": 10662 }, { "epoch": 1.2388033691548068, "grad_norm": 0.5614014267921448, "learning_rate": 0.0001, "loss": 1.4689, "step": 10663 }, { "epoch": 1.2389195469067673, "grad_norm": 0.5796566605567932, "learning_rate": 0.0001, "loss": 1.4835, "step": 10664 }, { "epoch": 1.2390357246587278, "grad_norm": 0.5910658240318298, "learning_rate": 0.0001, "loss": 1.5578, "step": 10665 }, { "epoch": 1.2391519024106883, "grad_norm": 0.564470112323761, "learning_rate": 0.0001, "loss": 1.4322, "step": 10666 }, { "epoch": 1.2392680801626488, "grad_norm": 0.5840427279472351, "learning_rate": 0.0001, "loss": 1.3944, "step": 10667 }, { "epoch": 1.2393842579146093, "grad_norm": 0.5382651090621948, "learning_rate": 0.0001, "loss": 1.2162, "step": 10668 }, { "epoch": 1.2395004356665698, "grad_norm": 0.5443084836006165, "learning_rate": 0.0001, "loss": 1.3367, "step": 10669 }, { "epoch": 1.2396166134185305, "grad_norm": 0.5765810012817383, "learning_rate": 0.0001, "loss": 1.3894, "step": 10670 }, { "epoch": 1.239732791170491, "grad_norm": 0.5465205907821655, "learning_rate": 0.0001, "loss": 1.3926, "step": 10671 }, { "epoch": 1.2398489689224514, "grad_norm": 0.5790244936943054, "learning_rate": 0.0001, "loss": 1.3868, "step": 10672 }, { "epoch": 1.239965146674412, "grad_norm": 0.5863944888114929, "learning_rate": 0.0001, "loss": 1.5827, "step": 10673 }, { "epoch": 1.2400813244263724, "grad_norm": 0.5954928994178772, "learning_rate": 0.0001, "loss": 1.3925, "step": 10674 }, { "epoch": 1.240197502178333, "grad_norm": 0.6234539747238159, "learning_rate": 0.0001, "loss": 1.5407, "step": 10675 }, { "epoch": 1.2403136799302934, "grad_norm": 0.5883687734603882, "learning_rate": 0.0001, "loss": 1.5754, "step": 10676 }, { "epoch": 1.240429857682254, "grad_norm": 0.5818089842796326, "learning_rate": 0.0001, "loss": 1.4462, "step": 10677 }, { "epoch": 1.2405460354342144, "grad_norm": 0.6155632138252258, "learning_rate": 0.0001, "loss": 1.4489, "step": 10678 }, { "epoch": 1.2406622131861749, "grad_norm": 0.5792834162712097, "learning_rate": 0.0001, "loss": 1.4936, "step": 10679 }, { "epoch": 1.2407783909381354, "grad_norm": 0.5829952359199524, "learning_rate": 0.0001, "loss": 1.5449, "step": 10680 }, { "epoch": 1.2408945686900958, "grad_norm": 0.5667127370834351, "learning_rate": 0.0001, "loss": 1.4372, "step": 10681 }, { "epoch": 1.2410107464420563, "grad_norm": 0.5373808145523071, "learning_rate": 0.0001, "loss": 1.364, "step": 10682 }, { "epoch": 1.2411269241940168, "grad_norm": 0.5681789517402649, "learning_rate": 0.0001, "loss": 1.3148, "step": 10683 }, { "epoch": 1.2412431019459773, "grad_norm": 0.5821676254272461, "learning_rate": 0.0001, "loss": 1.4943, "step": 10684 }, { "epoch": 1.2413592796979378, "grad_norm": 0.583202600479126, "learning_rate": 0.0001, "loss": 1.6383, "step": 10685 }, { "epoch": 1.2414754574498983, "grad_norm": 0.5483458638191223, "learning_rate": 0.0001, "loss": 1.4638, "step": 10686 }, { "epoch": 1.2415916352018588, "grad_norm": 0.5675621032714844, "learning_rate": 0.0001, "loss": 1.4976, "step": 10687 }, { "epoch": 1.2417078129538193, "grad_norm": 0.5624716877937317, "learning_rate": 0.0001, "loss": 1.4133, "step": 10688 }, { "epoch": 1.2418239907057798, "grad_norm": 0.6353493332862854, "learning_rate": 0.0001, "loss": 1.5201, "step": 10689 }, { "epoch": 1.2419401684577402, "grad_norm": 0.5259159803390503, "learning_rate": 0.0001, "loss": 1.3333, "step": 10690 }, { "epoch": 1.2420563462097007, "grad_norm": 0.546409547328949, "learning_rate": 0.0001, "loss": 1.4868, "step": 10691 }, { "epoch": 1.2421725239616612, "grad_norm": 0.5584643483161926, "learning_rate": 0.0001, "loss": 1.4144, "step": 10692 }, { "epoch": 1.242288701713622, "grad_norm": 0.5916255116462708, "learning_rate": 0.0001, "loss": 1.4505, "step": 10693 }, { "epoch": 1.2424048794655824, "grad_norm": 0.6383016705513, "learning_rate": 0.0001, "loss": 1.5633, "step": 10694 }, { "epoch": 1.242521057217543, "grad_norm": 0.5840632319450378, "learning_rate": 0.0001, "loss": 1.4845, "step": 10695 }, { "epoch": 1.2426372349695034, "grad_norm": 0.58888179063797, "learning_rate": 0.0001, "loss": 1.5685, "step": 10696 }, { "epoch": 1.242753412721464, "grad_norm": 0.5815372467041016, "learning_rate": 0.0001, "loss": 1.4579, "step": 10697 }, { "epoch": 1.2428695904734244, "grad_norm": 0.5951624512672424, "learning_rate": 0.0001, "loss": 1.416, "step": 10698 }, { "epoch": 1.2429857682253849, "grad_norm": 0.611923098564148, "learning_rate": 0.0001, "loss": 1.3961, "step": 10699 }, { "epoch": 1.2431019459773454, "grad_norm": 0.5687365531921387, "learning_rate": 0.0001, "loss": 1.5876, "step": 10700 }, { "epoch": 1.2432181237293058, "grad_norm": 0.5626431703567505, "learning_rate": 0.0001, "loss": 1.5305, "step": 10701 }, { "epoch": 1.2433343014812663, "grad_norm": 0.5979207754135132, "learning_rate": 0.0001, "loss": 1.5413, "step": 10702 }, { "epoch": 1.2434504792332268, "grad_norm": 0.5764636993408203, "learning_rate": 0.0001, "loss": 1.6826, "step": 10703 }, { "epoch": 1.2435666569851873, "grad_norm": 0.6030092835426331, "learning_rate": 0.0001, "loss": 1.653, "step": 10704 }, { "epoch": 1.2436828347371478, "grad_norm": 0.5411743521690369, "learning_rate": 0.0001, "loss": 1.3816, "step": 10705 }, { "epoch": 1.2437990124891083, "grad_norm": 0.5814303755760193, "learning_rate": 0.0001, "loss": 1.6629, "step": 10706 }, { "epoch": 1.2439151902410688, "grad_norm": 0.5599769949913025, "learning_rate": 0.0001, "loss": 1.3897, "step": 10707 }, { "epoch": 1.2440313679930293, "grad_norm": 0.5339173674583435, "learning_rate": 0.0001, "loss": 1.4417, "step": 10708 }, { "epoch": 1.2441475457449898, "grad_norm": 0.5466998815536499, "learning_rate": 0.0001, "loss": 1.4545, "step": 10709 }, { "epoch": 1.2442637234969502, "grad_norm": 0.6054586172103882, "learning_rate": 0.0001, "loss": 1.6259, "step": 10710 }, { "epoch": 1.244379901248911, "grad_norm": 0.5586958527565002, "learning_rate": 0.0001, "loss": 1.4248, "step": 10711 }, { "epoch": 1.2444960790008714, "grad_norm": 0.5811334252357483, "learning_rate": 0.0001, "loss": 1.4595, "step": 10712 }, { "epoch": 1.244612256752832, "grad_norm": 0.5410207509994507, "learning_rate": 0.0001, "loss": 1.418, "step": 10713 }, { "epoch": 1.2447284345047924, "grad_norm": 0.5457951426506042, "learning_rate": 0.0001, "loss": 1.5074, "step": 10714 }, { "epoch": 1.244844612256753, "grad_norm": 0.6239075660705566, "learning_rate": 0.0001, "loss": 1.5306, "step": 10715 }, { "epoch": 1.2449607900087134, "grad_norm": 0.6004078984260559, "learning_rate": 0.0001, "loss": 1.6734, "step": 10716 }, { "epoch": 1.245076967760674, "grad_norm": 0.5737530589103699, "learning_rate": 0.0001, "loss": 1.4565, "step": 10717 }, { "epoch": 1.2451931455126344, "grad_norm": 0.5727450847625732, "learning_rate": 0.0001, "loss": 1.6573, "step": 10718 }, { "epoch": 1.2453093232645949, "grad_norm": 0.5446699261665344, "learning_rate": 0.0001, "loss": 1.2936, "step": 10719 }, { "epoch": 1.2454255010165554, "grad_norm": 0.5467056035995483, "learning_rate": 0.0001, "loss": 1.3091, "step": 10720 }, { "epoch": 1.2455416787685158, "grad_norm": 0.5639151334762573, "learning_rate": 0.0001, "loss": 1.4404, "step": 10721 }, { "epoch": 1.2456578565204763, "grad_norm": 0.6179391741752625, "learning_rate": 0.0001, "loss": 1.3933, "step": 10722 }, { "epoch": 1.2457740342724368, "grad_norm": 0.5786750912666321, "learning_rate": 0.0001, "loss": 1.409, "step": 10723 }, { "epoch": 1.2458902120243973, "grad_norm": 0.5844369530677795, "learning_rate": 0.0001, "loss": 1.5618, "step": 10724 }, { "epoch": 1.2460063897763578, "grad_norm": 0.5636518597602844, "learning_rate": 0.0001, "loss": 1.3696, "step": 10725 }, { "epoch": 1.2461225675283183, "grad_norm": 0.5851801633834839, "learning_rate": 0.0001, "loss": 1.4825, "step": 10726 }, { "epoch": 1.2462387452802788, "grad_norm": 0.5837157368659973, "learning_rate": 0.0001, "loss": 1.2587, "step": 10727 }, { "epoch": 1.2463549230322393, "grad_norm": 0.5497305393218994, "learning_rate": 0.0001, "loss": 1.5463, "step": 10728 }, { "epoch": 1.2464711007841998, "grad_norm": 0.6165003180503845, "learning_rate": 0.0001, "loss": 1.637, "step": 10729 }, { "epoch": 1.2465872785361602, "grad_norm": 0.595329999923706, "learning_rate": 0.0001, "loss": 1.4555, "step": 10730 }, { "epoch": 1.2467034562881207, "grad_norm": 0.5779715180397034, "learning_rate": 0.0001, "loss": 1.5749, "step": 10731 }, { "epoch": 1.2468196340400812, "grad_norm": 0.6294297575950623, "learning_rate": 0.0001, "loss": 1.6384, "step": 10732 }, { "epoch": 1.2469358117920417, "grad_norm": 0.5893142223358154, "learning_rate": 0.0001, "loss": 1.4439, "step": 10733 }, { "epoch": 1.2470519895440022, "grad_norm": 0.5959643721580505, "learning_rate": 0.0001, "loss": 1.4772, "step": 10734 }, { "epoch": 1.247168167295963, "grad_norm": 0.6068616509437561, "learning_rate": 0.0001, "loss": 1.4337, "step": 10735 }, { "epoch": 1.2472843450479234, "grad_norm": 0.5508027076721191, "learning_rate": 0.0001, "loss": 1.3589, "step": 10736 }, { "epoch": 1.247400522799884, "grad_norm": 0.5569624900817871, "learning_rate": 0.0001, "loss": 1.4259, "step": 10737 }, { "epoch": 1.2475167005518444, "grad_norm": 0.5653142333030701, "learning_rate": 0.0001, "loss": 1.5091, "step": 10738 }, { "epoch": 1.2476328783038049, "grad_norm": 0.6183950304985046, "learning_rate": 0.0001, "loss": 1.5533, "step": 10739 }, { "epoch": 1.2477490560557654, "grad_norm": 0.5955910086631775, "learning_rate": 0.0001, "loss": 1.3331, "step": 10740 }, { "epoch": 1.2478652338077258, "grad_norm": 0.5446209907531738, "learning_rate": 0.0001, "loss": 1.4491, "step": 10741 }, { "epoch": 1.2479814115596863, "grad_norm": 0.5655883550643921, "learning_rate": 0.0001, "loss": 1.4624, "step": 10742 }, { "epoch": 1.2480975893116468, "grad_norm": 0.5753975510597229, "learning_rate": 0.0001, "loss": 1.5188, "step": 10743 }, { "epoch": 1.2482137670636073, "grad_norm": 0.5425572991371155, "learning_rate": 0.0001, "loss": 1.4076, "step": 10744 }, { "epoch": 1.2483299448155678, "grad_norm": 0.6025961637496948, "learning_rate": 0.0001, "loss": 1.5467, "step": 10745 }, { "epoch": 1.2484461225675283, "grad_norm": 0.5763283967971802, "learning_rate": 0.0001, "loss": 1.5884, "step": 10746 }, { "epoch": 1.2485623003194888, "grad_norm": 0.5742851495742798, "learning_rate": 0.0001, "loss": 1.501, "step": 10747 }, { "epoch": 1.2486784780714493, "grad_norm": 0.5632321834564209, "learning_rate": 0.0001, "loss": 1.539, "step": 10748 }, { "epoch": 1.2487946558234098, "grad_norm": 0.5582688450813293, "learning_rate": 0.0001, "loss": 1.4634, "step": 10749 }, { "epoch": 1.2489108335753702, "grad_norm": 0.5706313848495483, "learning_rate": 0.0001, "loss": 1.4574, "step": 10750 }, { "epoch": 1.2490270113273307, "grad_norm": 0.5375720262527466, "learning_rate": 0.0001, "loss": 1.2735, "step": 10751 }, { "epoch": 1.2491431890792912, "grad_norm": 0.5515146851539612, "learning_rate": 0.0001, "loss": 1.4086, "step": 10752 }, { "epoch": 1.249259366831252, "grad_norm": 0.5534106492996216, "learning_rate": 0.0001, "loss": 1.284, "step": 10753 }, { "epoch": 1.2493755445832124, "grad_norm": 0.5403925776481628, "learning_rate": 0.0001, "loss": 1.2556, "step": 10754 }, { "epoch": 1.249491722335173, "grad_norm": 0.6206772923469543, "learning_rate": 0.0001, "loss": 1.687, "step": 10755 }, { "epoch": 1.2496079000871334, "grad_norm": 0.6456174850463867, "learning_rate": 0.0001, "loss": 1.467, "step": 10756 }, { "epoch": 1.249724077839094, "grad_norm": 0.5558960437774658, "learning_rate": 0.0001, "loss": 1.3112, "step": 10757 }, { "epoch": 1.2498402555910544, "grad_norm": 0.5899320244789124, "learning_rate": 0.0001, "loss": 1.4726, "step": 10758 }, { "epoch": 1.2499564333430149, "grad_norm": 0.5711237192153931, "learning_rate": 0.0001, "loss": 1.5579, "step": 10759 }, { "epoch": 1.2500726110949754, "grad_norm": 0.5839224457740784, "learning_rate": 0.0001, "loss": 1.607, "step": 10760 }, { "epoch": 1.2501887888469359, "grad_norm": 0.5762811899185181, "learning_rate": 0.0001, "loss": 1.4382, "step": 10761 }, { "epoch": 1.2503049665988963, "grad_norm": 0.5910693407058716, "learning_rate": 0.0001, "loss": 1.5329, "step": 10762 }, { "epoch": 1.2504211443508568, "grad_norm": 0.5522376298904419, "learning_rate": 0.0001, "loss": 1.1839, "step": 10763 }, { "epoch": 1.2505373221028173, "grad_norm": 0.6215783357620239, "learning_rate": 0.0001, "loss": 1.5552, "step": 10764 }, { "epoch": 1.2506534998547778, "grad_norm": 0.5663979649543762, "learning_rate": 0.0001, "loss": 1.5441, "step": 10765 }, { "epoch": 1.2507696776067383, "grad_norm": 0.5686525702476501, "learning_rate": 0.0001, "loss": 1.5091, "step": 10766 }, { "epoch": 1.2508858553586988, "grad_norm": 0.5687610507011414, "learning_rate": 0.0001, "loss": 1.3635, "step": 10767 }, { "epoch": 1.2510020331106593, "grad_norm": 0.5779849290847778, "learning_rate": 0.0001, "loss": 1.6153, "step": 10768 }, { "epoch": 1.2511182108626198, "grad_norm": 0.5521991848945618, "learning_rate": 0.0001, "loss": 1.4001, "step": 10769 }, { "epoch": 1.2512343886145803, "grad_norm": 0.5512406229972839, "learning_rate": 0.0001, "loss": 1.3389, "step": 10770 }, { "epoch": 1.2513505663665407, "grad_norm": 0.5759303569793701, "learning_rate": 0.0001, "loss": 1.5774, "step": 10771 }, { "epoch": 1.2514667441185012, "grad_norm": 0.5932089686393738, "learning_rate": 0.0001, "loss": 1.6612, "step": 10772 }, { "epoch": 1.2515829218704617, "grad_norm": 0.5830219388008118, "learning_rate": 0.0001, "loss": 1.3805, "step": 10773 }, { "epoch": 1.2516990996224222, "grad_norm": 0.6111432909965515, "learning_rate": 0.0001, "loss": 1.5502, "step": 10774 }, { "epoch": 1.2518152773743827, "grad_norm": 0.554973840713501, "learning_rate": 0.0001, "loss": 1.4567, "step": 10775 }, { "epoch": 1.2519314551263432, "grad_norm": 0.5329660177230835, "learning_rate": 0.0001, "loss": 1.4075, "step": 10776 }, { "epoch": 1.2520476328783037, "grad_norm": 0.5556033849716187, "learning_rate": 0.0001, "loss": 1.3319, "step": 10777 }, { "epoch": 1.2521638106302644, "grad_norm": 0.5581437349319458, "learning_rate": 0.0001, "loss": 1.4436, "step": 10778 }, { "epoch": 1.2522799883822249, "grad_norm": 0.5687176585197449, "learning_rate": 0.0001, "loss": 1.4248, "step": 10779 }, { "epoch": 1.2523961661341854, "grad_norm": 0.6239218711853027, "learning_rate": 0.0001, "loss": 1.5807, "step": 10780 }, { "epoch": 1.2525123438861459, "grad_norm": 0.542938768863678, "learning_rate": 0.0001, "loss": 1.3693, "step": 10781 }, { "epoch": 1.2526285216381063, "grad_norm": 0.5767273902893066, "learning_rate": 0.0001, "loss": 1.5495, "step": 10782 }, { "epoch": 1.2527446993900668, "grad_norm": 0.5635040998458862, "learning_rate": 0.0001, "loss": 1.6863, "step": 10783 }, { "epoch": 1.2528608771420273, "grad_norm": 0.5268775820732117, "learning_rate": 0.0001, "loss": 1.3677, "step": 10784 }, { "epoch": 1.2529770548939878, "grad_norm": 0.5350583791732788, "learning_rate": 0.0001, "loss": 1.4114, "step": 10785 }, { "epoch": 1.2530932326459483, "grad_norm": 0.5941740870475769, "learning_rate": 0.0001, "loss": 1.5906, "step": 10786 }, { "epoch": 1.2532094103979088, "grad_norm": 0.5417436361312866, "learning_rate": 0.0001, "loss": 1.2903, "step": 10787 }, { "epoch": 1.2533255881498693, "grad_norm": 0.5705372095108032, "learning_rate": 0.0001, "loss": 1.4168, "step": 10788 }, { "epoch": 1.2534417659018298, "grad_norm": 0.5509761571884155, "learning_rate": 0.0001, "loss": 1.4277, "step": 10789 }, { "epoch": 1.2535579436537903, "grad_norm": 0.6663389205932617, "learning_rate": 0.0001, "loss": 1.6093, "step": 10790 }, { "epoch": 1.2536741214057507, "grad_norm": 0.6134310960769653, "learning_rate": 0.0001, "loss": 1.4366, "step": 10791 }, { "epoch": 1.2537902991577112, "grad_norm": 0.5742259621620178, "learning_rate": 0.0001, "loss": 1.4703, "step": 10792 }, { "epoch": 1.2539064769096717, "grad_norm": 0.5614108443260193, "learning_rate": 0.0001, "loss": 1.5148, "step": 10793 }, { "epoch": 1.2540226546616324, "grad_norm": 0.5708277821540833, "learning_rate": 0.0001, "loss": 1.4222, "step": 10794 }, { "epoch": 1.254138832413593, "grad_norm": 0.5575094819068909, "learning_rate": 0.0001, "loss": 1.3619, "step": 10795 }, { "epoch": 1.2542550101655534, "grad_norm": 0.6430823802947998, "learning_rate": 0.0001, "loss": 1.4687, "step": 10796 }, { "epoch": 1.254371187917514, "grad_norm": 0.6255773901939392, "learning_rate": 0.0001, "loss": 1.5583, "step": 10797 }, { "epoch": 1.2544873656694744, "grad_norm": 0.5493278503417969, "learning_rate": 0.0001, "loss": 1.4425, "step": 10798 }, { "epoch": 1.2546035434214349, "grad_norm": 0.5868274569511414, "learning_rate": 0.0001, "loss": 1.4117, "step": 10799 }, { "epoch": 1.2547197211733954, "grad_norm": 0.5614247918128967, "learning_rate": 0.0001, "loss": 1.5633, "step": 10800 }, { "epoch": 1.2548358989253559, "grad_norm": 0.6078110933303833, "learning_rate": 0.0001, "loss": 1.6503, "step": 10801 }, { "epoch": 1.2549520766773163, "grad_norm": 0.5670056939125061, "learning_rate": 0.0001, "loss": 1.3722, "step": 10802 }, { "epoch": 1.2550682544292768, "grad_norm": 0.5822281837463379, "learning_rate": 0.0001, "loss": 1.3983, "step": 10803 }, { "epoch": 1.2551844321812373, "grad_norm": 0.5852466821670532, "learning_rate": 0.0001, "loss": 1.5878, "step": 10804 }, { "epoch": 1.2553006099331978, "grad_norm": 0.5783720016479492, "learning_rate": 0.0001, "loss": 1.2861, "step": 10805 }, { "epoch": 1.2554167876851583, "grad_norm": 0.5860381722450256, "learning_rate": 0.0001, "loss": 1.416, "step": 10806 }, { "epoch": 1.2555329654371188, "grad_norm": 0.5932544469833374, "learning_rate": 0.0001, "loss": 1.6662, "step": 10807 }, { "epoch": 1.2556491431890793, "grad_norm": 0.6420629024505615, "learning_rate": 0.0001, "loss": 1.5633, "step": 10808 }, { "epoch": 1.2557653209410398, "grad_norm": 0.6060267090797424, "learning_rate": 0.0001, "loss": 1.4374, "step": 10809 }, { "epoch": 1.2558814986930003, "grad_norm": 0.5844810605049133, "learning_rate": 0.0001, "loss": 1.4504, "step": 10810 }, { "epoch": 1.2559976764449607, "grad_norm": 0.6256417632102966, "learning_rate": 0.0001, "loss": 1.5777, "step": 10811 }, { "epoch": 1.2561138541969212, "grad_norm": 0.5611162185668945, "learning_rate": 0.0001, "loss": 1.6017, "step": 10812 }, { "epoch": 1.2562300319488817, "grad_norm": 0.5997201204299927, "learning_rate": 0.0001, "loss": 1.682, "step": 10813 }, { "epoch": 1.2563462097008422, "grad_norm": 0.580735981464386, "learning_rate": 0.0001, "loss": 1.5223, "step": 10814 }, { "epoch": 1.2564623874528027, "grad_norm": 0.5708926916122437, "learning_rate": 0.0001, "loss": 1.3772, "step": 10815 }, { "epoch": 1.2565785652047632, "grad_norm": 0.5596855282783508, "learning_rate": 0.0001, "loss": 1.3001, "step": 10816 }, { "epoch": 1.2566947429567237, "grad_norm": 0.6057003736495972, "learning_rate": 0.0001, "loss": 1.4523, "step": 10817 }, { "epoch": 1.2568109207086842, "grad_norm": 0.6439718008041382, "learning_rate": 0.0001, "loss": 1.5285, "step": 10818 }, { "epoch": 1.2569270984606447, "grad_norm": 0.5926083326339722, "learning_rate": 0.0001, "loss": 1.6552, "step": 10819 }, { "epoch": 1.2570432762126054, "grad_norm": 0.5531743764877319, "learning_rate": 0.0001, "loss": 1.5071, "step": 10820 }, { "epoch": 1.2571594539645659, "grad_norm": 0.5597687363624573, "learning_rate": 0.0001, "loss": 1.6024, "step": 10821 }, { "epoch": 1.2572756317165263, "grad_norm": 0.5524884462356567, "learning_rate": 0.0001, "loss": 1.4648, "step": 10822 }, { "epoch": 1.2573918094684868, "grad_norm": 0.6109902262687683, "learning_rate": 0.0001, "loss": 1.4158, "step": 10823 }, { "epoch": 1.2575079872204473, "grad_norm": 0.5356048345565796, "learning_rate": 0.0001, "loss": 1.4295, "step": 10824 }, { "epoch": 1.2576241649724078, "grad_norm": 0.5408940315246582, "learning_rate": 0.0001, "loss": 1.2335, "step": 10825 }, { "epoch": 1.2577403427243683, "grad_norm": 0.5846429467201233, "learning_rate": 0.0001, "loss": 1.5364, "step": 10826 }, { "epoch": 1.2578565204763288, "grad_norm": 0.5699112415313721, "learning_rate": 0.0001, "loss": 1.4308, "step": 10827 }, { "epoch": 1.2579726982282893, "grad_norm": 0.6659561395645142, "learning_rate": 0.0001, "loss": 1.6991, "step": 10828 }, { "epoch": 1.2580888759802498, "grad_norm": 0.6590319871902466, "learning_rate": 0.0001, "loss": 1.6286, "step": 10829 }, { "epoch": 1.2582050537322103, "grad_norm": 0.633607804775238, "learning_rate": 0.0001, "loss": 1.5275, "step": 10830 }, { "epoch": 1.2583212314841707, "grad_norm": 0.5824370980262756, "learning_rate": 0.0001, "loss": 1.4908, "step": 10831 }, { "epoch": 1.2584374092361312, "grad_norm": 0.6036174893379211, "learning_rate": 0.0001, "loss": 1.4916, "step": 10832 }, { "epoch": 1.2585535869880917, "grad_norm": 0.5827155709266663, "learning_rate": 0.0001, "loss": 1.3925, "step": 10833 }, { "epoch": 1.2586697647400522, "grad_norm": 0.579399585723877, "learning_rate": 0.0001, "loss": 1.4674, "step": 10834 }, { "epoch": 1.2587859424920127, "grad_norm": 0.5795319080352783, "learning_rate": 0.0001, "loss": 1.5249, "step": 10835 }, { "epoch": 1.2589021202439734, "grad_norm": 0.5639935731887817, "learning_rate": 0.0001, "loss": 1.3972, "step": 10836 }, { "epoch": 1.259018297995934, "grad_norm": 0.5532496571540833, "learning_rate": 0.0001, "loss": 1.465, "step": 10837 }, { "epoch": 1.2591344757478944, "grad_norm": 0.5931538343429565, "learning_rate": 0.0001, "loss": 1.5833, "step": 10838 }, { "epoch": 1.2592506534998549, "grad_norm": 0.6140592098236084, "learning_rate": 0.0001, "loss": 1.6095, "step": 10839 }, { "epoch": 1.2593668312518154, "grad_norm": 0.5485202074050903, "learning_rate": 0.0001, "loss": 1.4436, "step": 10840 }, { "epoch": 1.2594830090037759, "grad_norm": 0.5648629069328308, "learning_rate": 0.0001, "loss": 1.3829, "step": 10841 }, { "epoch": 1.2595991867557363, "grad_norm": 0.5815029740333557, "learning_rate": 0.0001, "loss": 1.5187, "step": 10842 }, { "epoch": 1.2597153645076968, "grad_norm": 0.5790690183639526, "learning_rate": 0.0001, "loss": 1.4802, "step": 10843 }, { "epoch": 1.2598315422596573, "grad_norm": 0.6052435636520386, "learning_rate": 0.0001, "loss": 1.5367, "step": 10844 }, { "epoch": 1.2599477200116178, "grad_norm": 0.5813129544258118, "learning_rate": 0.0001, "loss": 1.5912, "step": 10845 }, { "epoch": 1.2600638977635783, "grad_norm": 0.5918418169021606, "learning_rate": 0.0001, "loss": 1.7113, "step": 10846 }, { "epoch": 1.2601800755155388, "grad_norm": 0.5554955005645752, "learning_rate": 0.0001, "loss": 1.5289, "step": 10847 }, { "epoch": 1.2602962532674993, "grad_norm": 0.540164589881897, "learning_rate": 0.0001, "loss": 1.3184, "step": 10848 }, { "epoch": 1.2604124310194598, "grad_norm": 0.6040856242179871, "learning_rate": 0.0001, "loss": 1.4184, "step": 10849 }, { "epoch": 1.2605286087714203, "grad_norm": 0.5804588794708252, "learning_rate": 0.0001, "loss": 1.4578, "step": 10850 }, { "epoch": 1.2606447865233807, "grad_norm": 0.5707676410675049, "learning_rate": 0.0001, "loss": 1.5481, "step": 10851 }, { "epoch": 1.2607609642753412, "grad_norm": 0.647530734539032, "learning_rate": 0.0001, "loss": 1.5698, "step": 10852 }, { "epoch": 1.2608771420273017, "grad_norm": 0.5723570585250854, "learning_rate": 0.0001, "loss": 1.3976, "step": 10853 }, { "epoch": 1.2609933197792622, "grad_norm": 0.5460187196731567, "learning_rate": 0.0001, "loss": 1.3676, "step": 10854 }, { "epoch": 1.2611094975312227, "grad_norm": 0.5793501138687134, "learning_rate": 0.0001, "loss": 1.4138, "step": 10855 }, { "epoch": 1.2612256752831832, "grad_norm": 0.6033415198326111, "learning_rate": 0.0001, "loss": 1.5383, "step": 10856 }, { "epoch": 1.2613418530351437, "grad_norm": 0.5997538566589355, "learning_rate": 0.0001, "loss": 1.2872, "step": 10857 }, { "epoch": 1.2614580307871042, "grad_norm": 0.5751636624336243, "learning_rate": 0.0001, "loss": 1.4681, "step": 10858 }, { "epoch": 1.2615742085390647, "grad_norm": 0.6361991167068481, "learning_rate": 0.0001, "loss": 1.4256, "step": 10859 }, { "epoch": 1.2616903862910251, "grad_norm": 0.5734949707984924, "learning_rate": 0.0001, "loss": 1.4381, "step": 10860 }, { "epoch": 1.2618065640429856, "grad_norm": 0.6365008354187012, "learning_rate": 0.0001, "loss": 1.6581, "step": 10861 }, { "epoch": 1.2619227417949463, "grad_norm": 0.6318557858467102, "learning_rate": 0.0001, "loss": 1.715, "step": 10862 }, { "epoch": 1.2620389195469068, "grad_norm": 0.6001318693161011, "learning_rate": 0.0001, "loss": 1.5687, "step": 10863 }, { "epoch": 1.2621550972988673, "grad_norm": 0.6317021250724792, "learning_rate": 0.0001, "loss": 1.6188, "step": 10864 }, { "epoch": 1.2622712750508278, "grad_norm": 0.5643987059593201, "learning_rate": 0.0001, "loss": 1.4726, "step": 10865 }, { "epoch": 1.2623874528027883, "grad_norm": 0.5744731426239014, "learning_rate": 0.0001, "loss": 1.524, "step": 10866 }, { "epoch": 1.2625036305547488, "grad_norm": 0.605496883392334, "learning_rate": 0.0001, "loss": 1.5188, "step": 10867 }, { "epoch": 1.2626198083067093, "grad_norm": 0.5679904222488403, "learning_rate": 0.0001, "loss": 1.4915, "step": 10868 }, { "epoch": 1.2627359860586698, "grad_norm": 0.5534711480140686, "learning_rate": 0.0001, "loss": 1.4292, "step": 10869 }, { "epoch": 1.2628521638106303, "grad_norm": 0.5767245292663574, "learning_rate": 0.0001, "loss": 1.4301, "step": 10870 }, { "epoch": 1.2629683415625907, "grad_norm": 0.5895627737045288, "learning_rate": 0.0001, "loss": 1.6787, "step": 10871 }, { "epoch": 1.2630845193145512, "grad_norm": 0.5530607104301453, "learning_rate": 0.0001, "loss": 1.3382, "step": 10872 }, { "epoch": 1.2632006970665117, "grad_norm": 0.5866522789001465, "learning_rate": 0.0001, "loss": 1.5503, "step": 10873 }, { "epoch": 1.2633168748184722, "grad_norm": 0.5559065937995911, "learning_rate": 0.0001, "loss": 1.3051, "step": 10874 }, { "epoch": 1.2634330525704327, "grad_norm": 0.5616282820701599, "learning_rate": 0.0001, "loss": 1.2669, "step": 10875 }, { "epoch": 1.2635492303223932, "grad_norm": 0.5756585597991943, "learning_rate": 0.0001, "loss": 1.454, "step": 10876 }, { "epoch": 1.2636654080743537, "grad_norm": 0.6078899502754211, "learning_rate": 0.0001, "loss": 1.5251, "step": 10877 }, { "epoch": 1.2637815858263144, "grad_norm": 0.5421556830406189, "learning_rate": 0.0001, "loss": 1.3957, "step": 10878 }, { "epoch": 1.2638977635782749, "grad_norm": 0.6021136045455933, "learning_rate": 0.0001, "loss": 1.4868, "step": 10879 }, { "epoch": 1.2640139413302354, "grad_norm": 0.6049302220344543, "learning_rate": 0.0001, "loss": 1.4672, "step": 10880 }, { "epoch": 1.2641301190821959, "grad_norm": 0.5303308963775635, "learning_rate": 0.0001, "loss": 1.2202, "step": 10881 }, { "epoch": 1.2642462968341563, "grad_norm": 0.5799589157104492, "learning_rate": 0.0001, "loss": 1.4065, "step": 10882 }, { "epoch": 1.2643624745861168, "grad_norm": 0.5389856696128845, "learning_rate": 0.0001, "loss": 1.2409, "step": 10883 }, { "epoch": 1.2644786523380773, "grad_norm": 0.5793135166168213, "learning_rate": 0.0001, "loss": 1.3456, "step": 10884 }, { "epoch": 1.2645948300900378, "grad_norm": 0.665398120880127, "learning_rate": 0.0001, "loss": 1.6816, "step": 10885 }, { "epoch": 1.2647110078419983, "grad_norm": 0.6195827126502991, "learning_rate": 0.0001, "loss": 1.3291, "step": 10886 }, { "epoch": 1.2648271855939588, "grad_norm": 0.603444516658783, "learning_rate": 0.0001, "loss": 1.4637, "step": 10887 }, { "epoch": 1.2649433633459193, "grad_norm": 0.6044740080833435, "learning_rate": 0.0001, "loss": 1.2983, "step": 10888 }, { "epoch": 1.2650595410978798, "grad_norm": 0.5764860510826111, "learning_rate": 0.0001, "loss": 1.4351, "step": 10889 }, { "epoch": 1.2651757188498403, "grad_norm": 0.5791444182395935, "learning_rate": 0.0001, "loss": 1.3424, "step": 10890 }, { "epoch": 1.2652918966018007, "grad_norm": 0.6098731160163879, "learning_rate": 0.0001, "loss": 1.4804, "step": 10891 }, { "epoch": 1.2654080743537612, "grad_norm": 0.5369520783424377, "learning_rate": 0.0001, "loss": 1.3948, "step": 10892 }, { "epoch": 1.2655242521057217, "grad_norm": 0.6197044849395752, "learning_rate": 0.0001, "loss": 1.368, "step": 10893 }, { "epoch": 1.2656404298576822, "grad_norm": 0.588814377784729, "learning_rate": 0.0001, "loss": 1.3916, "step": 10894 }, { "epoch": 1.2657566076096427, "grad_norm": 0.5804957747459412, "learning_rate": 0.0001, "loss": 1.4814, "step": 10895 }, { "epoch": 1.2658727853616032, "grad_norm": 0.6161913275718689, "learning_rate": 0.0001, "loss": 1.641, "step": 10896 }, { "epoch": 1.2659889631135637, "grad_norm": 0.6082305312156677, "learning_rate": 0.0001, "loss": 1.5288, "step": 10897 }, { "epoch": 1.2661051408655242, "grad_norm": 0.5396662950515747, "learning_rate": 0.0001, "loss": 1.5025, "step": 10898 }, { "epoch": 1.2662213186174847, "grad_norm": 0.5761926770210266, "learning_rate": 0.0001, "loss": 1.4168, "step": 10899 }, { "epoch": 1.2663374963694451, "grad_norm": 0.5800683498382568, "learning_rate": 0.0001, "loss": 1.4611, "step": 10900 }, { "epoch": 1.2664536741214056, "grad_norm": 0.5463969111442566, "learning_rate": 0.0001, "loss": 1.4545, "step": 10901 }, { "epoch": 1.2665698518733661, "grad_norm": 0.5822511315345764, "learning_rate": 0.0001, "loss": 1.486, "step": 10902 }, { "epoch": 1.2666860296253266, "grad_norm": 0.6074179410934448, "learning_rate": 0.0001, "loss": 1.7182, "step": 10903 }, { "epoch": 1.2668022073772873, "grad_norm": 0.6279671788215637, "learning_rate": 0.0001, "loss": 1.4481, "step": 10904 }, { "epoch": 1.2669183851292478, "grad_norm": 0.6327800750732422, "learning_rate": 0.0001, "loss": 1.4574, "step": 10905 }, { "epoch": 1.2670345628812083, "grad_norm": 0.6204458475112915, "learning_rate": 0.0001, "loss": 1.483, "step": 10906 }, { "epoch": 1.2671507406331688, "grad_norm": 0.5944911241531372, "learning_rate": 0.0001, "loss": 1.6018, "step": 10907 }, { "epoch": 1.2672669183851293, "grad_norm": 0.5533126592636108, "learning_rate": 0.0001, "loss": 1.5541, "step": 10908 }, { "epoch": 1.2673830961370898, "grad_norm": 0.5814772844314575, "learning_rate": 0.0001, "loss": 1.5974, "step": 10909 }, { "epoch": 1.2674992738890503, "grad_norm": 0.6086983680725098, "learning_rate": 0.0001, "loss": 1.4617, "step": 10910 }, { "epoch": 1.2676154516410107, "grad_norm": 0.6263343691825867, "learning_rate": 0.0001, "loss": 1.5468, "step": 10911 }, { "epoch": 1.2677316293929712, "grad_norm": 0.5537508130073547, "learning_rate": 0.0001, "loss": 1.3299, "step": 10912 }, { "epoch": 1.2678478071449317, "grad_norm": 0.5584046840667725, "learning_rate": 0.0001, "loss": 1.3079, "step": 10913 }, { "epoch": 1.2679639848968922, "grad_norm": 0.576492428779602, "learning_rate": 0.0001, "loss": 1.4632, "step": 10914 }, { "epoch": 1.2680801626488527, "grad_norm": 0.5651898384094238, "learning_rate": 0.0001, "loss": 1.2658, "step": 10915 }, { "epoch": 1.2681963404008132, "grad_norm": 0.5675121545791626, "learning_rate": 0.0001, "loss": 1.4674, "step": 10916 }, { "epoch": 1.2683125181527737, "grad_norm": 0.612667977809906, "learning_rate": 0.0001, "loss": 1.5688, "step": 10917 }, { "epoch": 1.2684286959047342, "grad_norm": 0.6280263066291809, "learning_rate": 0.0001, "loss": 1.694, "step": 10918 }, { "epoch": 1.2685448736566947, "grad_norm": 0.5767539739608765, "learning_rate": 0.0001, "loss": 1.4455, "step": 10919 }, { "epoch": 1.2686610514086554, "grad_norm": 0.543782114982605, "learning_rate": 0.0001, "loss": 1.4729, "step": 10920 }, { "epoch": 1.2687772291606159, "grad_norm": 0.5753729939460754, "learning_rate": 0.0001, "loss": 1.5188, "step": 10921 }, { "epoch": 1.2688934069125763, "grad_norm": 0.5873640775680542, "learning_rate": 0.0001, "loss": 1.4895, "step": 10922 }, { "epoch": 1.2690095846645368, "grad_norm": 0.556169867515564, "learning_rate": 0.0001, "loss": 1.4408, "step": 10923 }, { "epoch": 1.2691257624164973, "grad_norm": 0.588584303855896, "learning_rate": 0.0001, "loss": 1.6303, "step": 10924 }, { "epoch": 1.2692419401684578, "grad_norm": 0.5426774621009827, "learning_rate": 0.0001, "loss": 1.411, "step": 10925 }, { "epoch": 1.2693581179204183, "grad_norm": 0.5747091174125671, "learning_rate": 0.0001, "loss": 1.4987, "step": 10926 }, { "epoch": 1.2694742956723788, "grad_norm": 0.5184979438781738, "learning_rate": 0.0001, "loss": 1.308, "step": 10927 }, { "epoch": 1.2695904734243393, "grad_norm": 0.6157345175743103, "learning_rate": 0.0001, "loss": 1.5492, "step": 10928 }, { "epoch": 1.2697066511762998, "grad_norm": 0.6227047443389893, "learning_rate": 0.0001, "loss": 1.4771, "step": 10929 }, { "epoch": 1.2698228289282603, "grad_norm": 0.5665978193283081, "learning_rate": 0.0001, "loss": 1.4822, "step": 10930 }, { "epoch": 1.2699390066802207, "grad_norm": 0.5821638703346252, "learning_rate": 0.0001, "loss": 1.5109, "step": 10931 }, { "epoch": 1.2700551844321812, "grad_norm": 0.5837578773498535, "learning_rate": 0.0001, "loss": 1.6321, "step": 10932 }, { "epoch": 1.2701713621841417, "grad_norm": 0.5726567506790161, "learning_rate": 0.0001, "loss": 1.464, "step": 10933 }, { "epoch": 1.2702875399361022, "grad_norm": 0.5908870697021484, "learning_rate": 0.0001, "loss": 1.5283, "step": 10934 }, { "epoch": 1.2704037176880627, "grad_norm": 0.5780557990074158, "learning_rate": 0.0001, "loss": 1.539, "step": 10935 }, { "epoch": 1.2705198954400232, "grad_norm": 0.5335168838500977, "learning_rate": 0.0001, "loss": 1.3698, "step": 10936 }, { "epoch": 1.2706360731919837, "grad_norm": 0.5760794281959534, "learning_rate": 0.0001, "loss": 1.4409, "step": 10937 }, { "epoch": 1.2707522509439442, "grad_norm": 0.5667716264724731, "learning_rate": 0.0001, "loss": 1.4142, "step": 10938 }, { "epoch": 1.2708684286959047, "grad_norm": 0.5758441686630249, "learning_rate": 0.0001, "loss": 1.4093, "step": 10939 }, { "epoch": 1.2709846064478652, "grad_norm": 0.5902236104011536, "learning_rate": 0.0001, "loss": 1.616, "step": 10940 }, { "epoch": 1.2711007841998256, "grad_norm": 0.587958037853241, "learning_rate": 0.0001, "loss": 1.4587, "step": 10941 }, { "epoch": 1.2712169619517861, "grad_norm": 0.6102663278579712, "learning_rate": 0.0001, "loss": 1.4048, "step": 10942 }, { "epoch": 1.2713331397037466, "grad_norm": 0.5977857708930969, "learning_rate": 0.0001, "loss": 1.3835, "step": 10943 }, { "epoch": 1.271449317455707, "grad_norm": 0.5918571352958679, "learning_rate": 0.0001, "loss": 1.5698, "step": 10944 }, { "epoch": 1.2715654952076676, "grad_norm": 0.5774389505386353, "learning_rate": 0.0001, "loss": 1.3861, "step": 10945 }, { "epoch": 1.2716816729596283, "grad_norm": 0.6010609865188599, "learning_rate": 0.0001, "loss": 1.4712, "step": 10946 }, { "epoch": 1.2717978507115888, "grad_norm": 0.5825393795967102, "learning_rate": 0.0001, "loss": 1.6338, "step": 10947 }, { "epoch": 1.2719140284635493, "grad_norm": 0.6239941120147705, "learning_rate": 0.0001, "loss": 1.4736, "step": 10948 }, { "epoch": 1.2720302062155098, "grad_norm": 0.5607348680496216, "learning_rate": 0.0001, "loss": 1.3118, "step": 10949 }, { "epoch": 1.2721463839674703, "grad_norm": 0.6177657842636108, "learning_rate": 0.0001, "loss": 1.7027, "step": 10950 }, { "epoch": 1.2722625617194308, "grad_norm": 0.5948476791381836, "learning_rate": 0.0001, "loss": 1.5479, "step": 10951 }, { "epoch": 1.2723787394713912, "grad_norm": 0.5765991806983948, "learning_rate": 0.0001, "loss": 1.6417, "step": 10952 }, { "epoch": 1.2724949172233517, "grad_norm": 0.5915812253952026, "learning_rate": 0.0001, "loss": 1.5304, "step": 10953 }, { "epoch": 1.2726110949753122, "grad_norm": 0.5970553755760193, "learning_rate": 0.0001, "loss": 1.6319, "step": 10954 }, { "epoch": 1.2727272727272727, "grad_norm": 0.5600100755691528, "learning_rate": 0.0001, "loss": 1.3926, "step": 10955 }, { "epoch": 1.2728434504792332, "grad_norm": 0.5822739005088806, "learning_rate": 0.0001, "loss": 1.5323, "step": 10956 }, { "epoch": 1.2729596282311937, "grad_norm": 0.6493605971336365, "learning_rate": 0.0001, "loss": 1.3364, "step": 10957 }, { "epoch": 1.2730758059831542, "grad_norm": 0.5897579193115234, "learning_rate": 0.0001, "loss": 1.5439, "step": 10958 }, { "epoch": 1.2731919837351147, "grad_norm": 0.5744937062263489, "learning_rate": 0.0001, "loss": 1.5039, "step": 10959 }, { "epoch": 1.2733081614870752, "grad_norm": 0.5749931335449219, "learning_rate": 0.0001, "loss": 1.5115, "step": 10960 }, { "epoch": 1.2734243392390359, "grad_norm": 0.5711995363235474, "learning_rate": 0.0001, "loss": 1.5007, "step": 10961 }, { "epoch": 1.2735405169909964, "grad_norm": 0.572600781917572, "learning_rate": 0.0001, "loss": 1.4066, "step": 10962 }, { "epoch": 1.2736566947429568, "grad_norm": 0.5981788635253906, "learning_rate": 0.0001, "loss": 1.652, "step": 10963 }, { "epoch": 1.2737728724949173, "grad_norm": 0.6050378084182739, "learning_rate": 0.0001, "loss": 1.5157, "step": 10964 }, { "epoch": 1.2738890502468778, "grad_norm": 0.5755563974380493, "learning_rate": 0.0001, "loss": 1.6514, "step": 10965 }, { "epoch": 1.2740052279988383, "grad_norm": 0.5697473287582397, "learning_rate": 0.0001, "loss": 1.3886, "step": 10966 }, { "epoch": 1.2741214057507988, "grad_norm": 0.5516510605812073, "learning_rate": 0.0001, "loss": 1.626, "step": 10967 }, { "epoch": 1.2742375835027593, "grad_norm": 0.5390398502349854, "learning_rate": 0.0001, "loss": 1.2317, "step": 10968 }, { "epoch": 1.2743537612547198, "grad_norm": 0.5673388838768005, "learning_rate": 0.0001, "loss": 1.3808, "step": 10969 }, { "epoch": 1.2744699390066803, "grad_norm": 0.5935032367706299, "learning_rate": 0.0001, "loss": 1.5228, "step": 10970 }, { "epoch": 1.2745861167586408, "grad_norm": 0.604706883430481, "learning_rate": 0.0001, "loss": 1.5436, "step": 10971 }, { "epoch": 1.2747022945106012, "grad_norm": 0.643751859664917, "learning_rate": 0.0001, "loss": 1.5331, "step": 10972 }, { "epoch": 1.2748184722625617, "grad_norm": 0.6240712404251099, "learning_rate": 0.0001, "loss": 1.6298, "step": 10973 }, { "epoch": 1.2749346500145222, "grad_norm": 0.5518888831138611, "learning_rate": 0.0001, "loss": 1.4981, "step": 10974 }, { "epoch": 1.2750508277664827, "grad_norm": 0.6049218773841858, "learning_rate": 0.0001, "loss": 1.4118, "step": 10975 }, { "epoch": 1.2751670055184432, "grad_norm": 0.6087144017219543, "learning_rate": 0.0001, "loss": 1.4068, "step": 10976 }, { "epoch": 1.2752831832704037, "grad_norm": 0.5613987445831299, "learning_rate": 0.0001, "loss": 1.6216, "step": 10977 }, { "epoch": 1.2753993610223642, "grad_norm": 0.6147012710571289, "learning_rate": 0.0001, "loss": 1.5426, "step": 10978 }, { "epoch": 1.2755155387743247, "grad_norm": 0.6045247316360474, "learning_rate": 0.0001, "loss": 1.5239, "step": 10979 }, { "epoch": 1.2756317165262852, "grad_norm": 0.5130501389503479, "learning_rate": 0.0001, "loss": 1.2315, "step": 10980 }, { "epoch": 1.2757478942782456, "grad_norm": 0.5664576888084412, "learning_rate": 0.0001, "loss": 1.5506, "step": 10981 }, { "epoch": 1.2758640720302061, "grad_norm": 0.5801703929901123, "learning_rate": 0.0001, "loss": 1.5441, "step": 10982 }, { "epoch": 1.2759802497821666, "grad_norm": 0.6036249399185181, "learning_rate": 0.0001, "loss": 1.3984, "step": 10983 }, { "epoch": 1.276096427534127, "grad_norm": 0.6023778915405273, "learning_rate": 0.0001, "loss": 1.4279, "step": 10984 }, { "epoch": 1.2762126052860876, "grad_norm": 0.5702974200248718, "learning_rate": 0.0001, "loss": 1.5797, "step": 10985 }, { "epoch": 1.276328783038048, "grad_norm": 0.5740309357643127, "learning_rate": 0.0001, "loss": 1.2492, "step": 10986 }, { "epoch": 1.2764449607900086, "grad_norm": 0.6035803556442261, "learning_rate": 0.0001, "loss": 1.6401, "step": 10987 }, { "epoch": 1.2765611385419693, "grad_norm": 0.6096132397651672, "learning_rate": 0.0001, "loss": 1.5221, "step": 10988 }, { "epoch": 1.2766773162939298, "grad_norm": 0.5646277070045471, "learning_rate": 0.0001, "loss": 1.4154, "step": 10989 }, { "epoch": 1.2767934940458903, "grad_norm": 0.572085440158844, "learning_rate": 0.0001, "loss": 1.5288, "step": 10990 }, { "epoch": 1.2769096717978508, "grad_norm": 0.5817086696624756, "learning_rate": 0.0001, "loss": 1.57, "step": 10991 }, { "epoch": 1.2770258495498112, "grad_norm": 0.5460660457611084, "learning_rate": 0.0001, "loss": 1.3342, "step": 10992 }, { "epoch": 1.2771420273017717, "grad_norm": 0.622367262840271, "learning_rate": 0.0001, "loss": 1.6075, "step": 10993 }, { "epoch": 1.2772582050537322, "grad_norm": 0.5549526214599609, "learning_rate": 0.0001, "loss": 1.2616, "step": 10994 }, { "epoch": 1.2773743828056927, "grad_norm": 0.5860145688056946, "learning_rate": 0.0001, "loss": 1.4507, "step": 10995 }, { "epoch": 1.2774905605576532, "grad_norm": 0.6013427972793579, "learning_rate": 0.0001, "loss": 1.4988, "step": 10996 }, { "epoch": 1.2776067383096137, "grad_norm": 0.6159816980361938, "learning_rate": 0.0001, "loss": 1.6728, "step": 10997 }, { "epoch": 1.2777229160615742, "grad_norm": 0.5887131690979004, "learning_rate": 0.0001, "loss": 1.3728, "step": 10998 }, { "epoch": 1.2778390938135347, "grad_norm": 0.555867612361908, "learning_rate": 0.0001, "loss": 1.4287, "step": 10999 }, { "epoch": 1.2779552715654952, "grad_norm": 0.589248538017273, "learning_rate": 0.0001, "loss": 1.5133, "step": 11000 }, { "epoch": 1.2780714493174556, "grad_norm": 0.5850374698638916, "learning_rate": 0.0001, "loss": 1.4953, "step": 11001 }, { "epoch": 1.2781876270694161, "grad_norm": 0.5467734932899475, "learning_rate": 0.0001, "loss": 1.3924, "step": 11002 }, { "epoch": 1.2783038048213768, "grad_norm": 0.54944908618927, "learning_rate": 0.0001, "loss": 1.5287, "step": 11003 }, { "epoch": 1.2784199825733373, "grad_norm": 0.5763708353042603, "learning_rate": 0.0001, "loss": 1.6101, "step": 11004 }, { "epoch": 1.2785361603252978, "grad_norm": 0.5769256353378296, "learning_rate": 0.0001, "loss": 1.4517, "step": 11005 }, { "epoch": 1.2786523380772583, "grad_norm": 0.5926720499992371, "learning_rate": 0.0001, "loss": 1.5032, "step": 11006 }, { "epoch": 1.2787685158292188, "grad_norm": 0.5912777781486511, "learning_rate": 0.0001, "loss": 1.5129, "step": 11007 }, { "epoch": 1.2788846935811793, "grad_norm": 0.545050859451294, "learning_rate": 0.0001, "loss": 1.3467, "step": 11008 }, { "epoch": 1.2790008713331398, "grad_norm": 0.5366930365562439, "learning_rate": 0.0001, "loss": 1.4805, "step": 11009 }, { "epoch": 1.2791170490851003, "grad_norm": 0.5812363028526306, "learning_rate": 0.0001, "loss": 1.4036, "step": 11010 }, { "epoch": 1.2792332268370608, "grad_norm": 0.6034533381462097, "learning_rate": 0.0001, "loss": 1.573, "step": 11011 }, { "epoch": 1.2793494045890212, "grad_norm": 0.5460754632949829, "learning_rate": 0.0001, "loss": 1.5372, "step": 11012 }, { "epoch": 1.2794655823409817, "grad_norm": 0.5772432088851929, "learning_rate": 0.0001, "loss": 1.5513, "step": 11013 }, { "epoch": 1.2795817600929422, "grad_norm": 0.5885570049285889, "learning_rate": 0.0001, "loss": 1.536, "step": 11014 }, { "epoch": 1.2796979378449027, "grad_norm": 0.6416093707084656, "learning_rate": 0.0001, "loss": 1.6099, "step": 11015 }, { "epoch": 1.2798141155968632, "grad_norm": 0.576856791973114, "learning_rate": 0.0001, "loss": 1.5727, "step": 11016 }, { "epoch": 1.2799302933488237, "grad_norm": 0.5491908192634583, "learning_rate": 0.0001, "loss": 1.3706, "step": 11017 }, { "epoch": 1.2800464711007842, "grad_norm": 0.5981287956237793, "learning_rate": 0.0001, "loss": 1.6112, "step": 11018 }, { "epoch": 1.2801626488527447, "grad_norm": 0.5656360387802124, "learning_rate": 0.0001, "loss": 1.3962, "step": 11019 }, { "epoch": 1.2802788266047052, "grad_norm": 0.52436763048172, "learning_rate": 0.0001, "loss": 1.3104, "step": 11020 }, { "epoch": 1.2803950043566656, "grad_norm": 0.5909287929534912, "learning_rate": 0.0001, "loss": 1.5743, "step": 11021 }, { "epoch": 1.2805111821086261, "grad_norm": 0.5850455164909363, "learning_rate": 0.0001, "loss": 1.6079, "step": 11022 }, { "epoch": 1.2806273598605866, "grad_norm": 0.6200546026229858, "learning_rate": 0.0001, "loss": 1.523, "step": 11023 }, { "epoch": 1.2807435376125471, "grad_norm": 0.5266997218132019, "learning_rate": 0.0001, "loss": 1.3889, "step": 11024 }, { "epoch": 1.2808597153645076, "grad_norm": 0.5400038957595825, "learning_rate": 0.0001, "loss": 1.3844, "step": 11025 }, { "epoch": 1.280975893116468, "grad_norm": 0.5605980157852173, "learning_rate": 0.0001, "loss": 1.4657, "step": 11026 }, { "epoch": 1.2810920708684286, "grad_norm": 0.5961278080940247, "learning_rate": 0.0001, "loss": 1.441, "step": 11027 }, { "epoch": 1.281208248620389, "grad_norm": 0.5594688653945923, "learning_rate": 0.0001, "loss": 1.5488, "step": 11028 }, { "epoch": 1.2813244263723496, "grad_norm": 0.5864664912223816, "learning_rate": 0.0001, "loss": 1.2833, "step": 11029 }, { "epoch": 1.2814406041243103, "grad_norm": 0.579258382320404, "learning_rate": 0.0001, "loss": 1.393, "step": 11030 }, { "epoch": 1.2815567818762708, "grad_norm": 0.5354270339012146, "learning_rate": 0.0001, "loss": 1.3188, "step": 11031 }, { "epoch": 1.2816729596282312, "grad_norm": 0.5614170432090759, "learning_rate": 0.0001, "loss": 1.589, "step": 11032 }, { "epoch": 1.2817891373801917, "grad_norm": 0.5893466472625732, "learning_rate": 0.0001, "loss": 1.4084, "step": 11033 }, { "epoch": 1.2819053151321522, "grad_norm": 0.5984696745872498, "learning_rate": 0.0001, "loss": 1.4392, "step": 11034 }, { "epoch": 1.2820214928841127, "grad_norm": 0.5932953357696533, "learning_rate": 0.0001, "loss": 1.6025, "step": 11035 }, { "epoch": 1.2821376706360732, "grad_norm": 0.5950168371200562, "learning_rate": 0.0001, "loss": 1.5867, "step": 11036 }, { "epoch": 1.2822538483880337, "grad_norm": 0.5833539962768555, "learning_rate": 0.0001, "loss": 1.6473, "step": 11037 }, { "epoch": 1.2823700261399942, "grad_norm": 0.6518958210945129, "learning_rate": 0.0001, "loss": 1.276, "step": 11038 }, { "epoch": 1.2824862038919547, "grad_norm": 0.5316762328147888, "learning_rate": 0.0001, "loss": 1.4414, "step": 11039 }, { "epoch": 1.2826023816439152, "grad_norm": 0.6396839022636414, "learning_rate": 0.0001, "loss": 1.6239, "step": 11040 }, { "epoch": 1.2827185593958756, "grad_norm": 0.574507474899292, "learning_rate": 0.0001, "loss": 1.496, "step": 11041 }, { "epoch": 1.2828347371478361, "grad_norm": 0.5620124936103821, "learning_rate": 0.0001, "loss": 1.3733, "step": 11042 }, { "epoch": 1.2829509148997966, "grad_norm": 0.5690659880638123, "learning_rate": 0.0001, "loss": 1.3846, "step": 11043 }, { "epoch": 1.2830670926517571, "grad_norm": 0.5415745973587036, "learning_rate": 0.0001, "loss": 1.3145, "step": 11044 }, { "epoch": 1.2831832704037178, "grad_norm": 0.5873833298683167, "learning_rate": 0.0001, "loss": 1.551, "step": 11045 }, { "epoch": 1.2832994481556783, "grad_norm": 0.5984528064727783, "learning_rate": 0.0001, "loss": 1.5172, "step": 11046 }, { "epoch": 1.2834156259076388, "grad_norm": 0.6160796284675598, "learning_rate": 0.0001, "loss": 1.4018, "step": 11047 }, { "epoch": 1.2835318036595993, "grad_norm": 0.604532778263092, "learning_rate": 0.0001, "loss": 1.4446, "step": 11048 }, { "epoch": 1.2836479814115598, "grad_norm": 0.5491237044334412, "learning_rate": 0.0001, "loss": 1.3932, "step": 11049 }, { "epoch": 1.2837641591635203, "grad_norm": 0.5465075969696045, "learning_rate": 0.0001, "loss": 1.5201, "step": 11050 }, { "epoch": 1.2838803369154808, "grad_norm": 0.6033710837364197, "learning_rate": 0.0001, "loss": 1.5147, "step": 11051 }, { "epoch": 1.2839965146674412, "grad_norm": 0.5667850375175476, "learning_rate": 0.0001, "loss": 1.5165, "step": 11052 }, { "epoch": 1.2841126924194017, "grad_norm": 0.5517511367797852, "learning_rate": 0.0001, "loss": 1.389, "step": 11053 }, { "epoch": 1.2842288701713622, "grad_norm": 0.5938916802406311, "learning_rate": 0.0001, "loss": 1.5116, "step": 11054 }, { "epoch": 1.2843450479233227, "grad_norm": 0.5698368549346924, "learning_rate": 0.0001, "loss": 1.439, "step": 11055 }, { "epoch": 1.2844612256752832, "grad_norm": 0.5910446643829346, "learning_rate": 0.0001, "loss": 1.4874, "step": 11056 }, { "epoch": 1.2845774034272437, "grad_norm": 0.5896273255348206, "learning_rate": 0.0001, "loss": 1.3875, "step": 11057 }, { "epoch": 1.2846935811792042, "grad_norm": 0.5363755226135254, "learning_rate": 0.0001, "loss": 1.3841, "step": 11058 }, { "epoch": 1.2848097589311647, "grad_norm": 0.6186480522155762, "learning_rate": 0.0001, "loss": 1.5301, "step": 11059 }, { "epoch": 1.2849259366831252, "grad_norm": 0.5805513858795166, "learning_rate": 0.0001, "loss": 1.4924, "step": 11060 }, { "epoch": 1.2850421144350856, "grad_norm": 0.6029322743415833, "learning_rate": 0.0001, "loss": 1.3933, "step": 11061 }, { "epoch": 1.2851582921870461, "grad_norm": 0.5690828561782837, "learning_rate": 0.0001, "loss": 1.4572, "step": 11062 }, { "epoch": 1.2852744699390066, "grad_norm": 0.5746386051177979, "learning_rate": 0.0001, "loss": 1.4443, "step": 11063 }, { "epoch": 1.2853906476909671, "grad_norm": 0.5679745674133301, "learning_rate": 0.0001, "loss": 1.3696, "step": 11064 }, { "epoch": 1.2855068254429276, "grad_norm": 0.6062312126159668, "learning_rate": 0.0001, "loss": 1.5775, "step": 11065 }, { "epoch": 1.285623003194888, "grad_norm": 0.5830647945404053, "learning_rate": 0.0001, "loss": 1.4848, "step": 11066 }, { "epoch": 1.2857391809468486, "grad_norm": 0.608407199382782, "learning_rate": 0.0001, "loss": 1.6409, "step": 11067 }, { "epoch": 1.285855358698809, "grad_norm": 0.5620405673980713, "learning_rate": 0.0001, "loss": 1.4291, "step": 11068 }, { "epoch": 1.2859715364507696, "grad_norm": 0.5535077452659607, "learning_rate": 0.0001, "loss": 1.4512, "step": 11069 }, { "epoch": 1.28608771420273, "grad_norm": 0.5676767230033875, "learning_rate": 0.0001, "loss": 1.432, "step": 11070 }, { "epoch": 1.2862038919546908, "grad_norm": 0.572475790977478, "learning_rate": 0.0001, "loss": 1.4525, "step": 11071 }, { "epoch": 1.2863200697066512, "grad_norm": 0.5898261666297913, "learning_rate": 0.0001, "loss": 1.3499, "step": 11072 }, { "epoch": 1.2864362474586117, "grad_norm": 0.5925431251525879, "learning_rate": 0.0001, "loss": 1.4481, "step": 11073 }, { "epoch": 1.2865524252105722, "grad_norm": 0.6155528426170349, "learning_rate": 0.0001, "loss": 1.5104, "step": 11074 }, { "epoch": 1.2866686029625327, "grad_norm": 0.5945338010787964, "learning_rate": 0.0001, "loss": 1.3821, "step": 11075 }, { "epoch": 1.2867847807144932, "grad_norm": 0.5834474563598633, "learning_rate": 0.0001, "loss": 1.4329, "step": 11076 }, { "epoch": 1.2869009584664537, "grad_norm": 0.5647174715995789, "learning_rate": 0.0001, "loss": 1.4092, "step": 11077 }, { "epoch": 1.2870171362184142, "grad_norm": 0.6176016926765442, "learning_rate": 0.0001, "loss": 1.5172, "step": 11078 }, { "epoch": 1.2871333139703747, "grad_norm": 0.5899996161460876, "learning_rate": 0.0001, "loss": 1.55, "step": 11079 }, { "epoch": 1.2872494917223352, "grad_norm": 0.605802595615387, "learning_rate": 0.0001, "loss": 1.4564, "step": 11080 }, { "epoch": 1.2873656694742956, "grad_norm": 0.6284874677658081, "learning_rate": 0.0001, "loss": 1.4376, "step": 11081 }, { "epoch": 1.2874818472262561, "grad_norm": 0.5752767324447632, "learning_rate": 0.0001, "loss": 1.3874, "step": 11082 }, { "epoch": 1.2875980249782166, "grad_norm": 0.5632016062736511, "learning_rate": 0.0001, "loss": 1.5345, "step": 11083 }, { "epoch": 1.2877142027301771, "grad_norm": 0.5655538439750671, "learning_rate": 0.0001, "loss": 1.6066, "step": 11084 }, { "epoch": 1.2878303804821376, "grad_norm": 0.5820789337158203, "learning_rate": 0.0001, "loss": 1.4042, "step": 11085 }, { "epoch": 1.287946558234098, "grad_norm": 0.6027318835258484, "learning_rate": 0.0001, "loss": 1.5787, "step": 11086 }, { "epoch": 1.2880627359860588, "grad_norm": 0.6038578152656555, "learning_rate": 0.0001, "loss": 1.5104, "step": 11087 }, { "epoch": 1.2881789137380193, "grad_norm": 0.5578043460845947, "learning_rate": 0.0001, "loss": 1.4933, "step": 11088 }, { "epoch": 1.2882950914899798, "grad_norm": 0.568867564201355, "learning_rate": 0.0001, "loss": 1.5107, "step": 11089 }, { "epoch": 1.2884112692419403, "grad_norm": 0.6275894641876221, "learning_rate": 0.0001, "loss": 1.6483, "step": 11090 }, { "epoch": 1.2885274469939008, "grad_norm": 0.5560985207557678, "learning_rate": 0.0001, "loss": 1.5027, "step": 11091 }, { "epoch": 1.2886436247458612, "grad_norm": 0.5904410481452942, "learning_rate": 0.0001, "loss": 1.4185, "step": 11092 }, { "epoch": 1.2887598024978217, "grad_norm": 0.5531237125396729, "learning_rate": 0.0001, "loss": 1.4716, "step": 11093 }, { "epoch": 1.2888759802497822, "grad_norm": 0.6044683456420898, "learning_rate": 0.0001, "loss": 1.6057, "step": 11094 }, { "epoch": 1.2889921580017427, "grad_norm": 0.5353182554244995, "learning_rate": 0.0001, "loss": 1.5528, "step": 11095 }, { "epoch": 1.2891083357537032, "grad_norm": 0.5463939905166626, "learning_rate": 0.0001, "loss": 1.3904, "step": 11096 }, { "epoch": 1.2892245135056637, "grad_norm": 0.6040199995040894, "learning_rate": 0.0001, "loss": 1.4816, "step": 11097 }, { "epoch": 1.2893406912576242, "grad_norm": 0.567561686038971, "learning_rate": 0.0001, "loss": 1.5263, "step": 11098 }, { "epoch": 1.2894568690095847, "grad_norm": 0.5605993866920471, "learning_rate": 0.0001, "loss": 1.298, "step": 11099 }, { "epoch": 1.2895730467615452, "grad_norm": 0.5997190475463867, "learning_rate": 0.0001, "loss": 1.4685, "step": 11100 }, { "epoch": 1.2896892245135056, "grad_norm": 0.5995270013809204, "learning_rate": 0.0001, "loss": 1.5044, "step": 11101 }, { "epoch": 1.2898054022654661, "grad_norm": 0.5469571948051453, "learning_rate": 0.0001, "loss": 1.3763, "step": 11102 }, { "epoch": 1.2899215800174266, "grad_norm": 0.5535038709640503, "learning_rate": 0.0001, "loss": 1.4447, "step": 11103 }, { "epoch": 1.2900377577693871, "grad_norm": 0.5944443941116333, "learning_rate": 0.0001, "loss": 1.3665, "step": 11104 }, { "epoch": 1.2901539355213476, "grad_norm": 0.5781884789466858, "learning_rate": 0.0001, "loss": 1.5146, "step": 11105 }, { "epoch": 1.290270113273308, "grad_norm": 0.5380309820175171, "learning_rate": 0.0001, "loss": 1.3409, "step": 11106 }, { "epoch": 1.2903862910252686, "grad_norm": 0.6192278861999512, "learning_rate": 0.0001, "loss": 1.5987, "step": 11107 }, { "epoch": 1.290502468777229, "grad_norm": 0.5789281129837036, "learning_rate": 0.0001, "loss": 1.4082, "step": 11108 }, { "epoch": 1.2906186465291896, "grad_norm": 0.53936368227005, "learning_rate": 0.0001, "loss": 1.4615, "step": 11109 }, { "epoch": 1.29073482428115, "grad_norm": 0.5656925439834595, "learning_rate": 0.0001, "loss": 1.403, "step": 11110 }, { "epoch": 1.2908510020331105, "grad_norm": 0.5613973736763, "learning_rate": 0.0001, "loss": 1.4213, "step": 11111 }, { "epoch": 1.290967179785071, "grad_norm": 0.5758323073387146, "learning_rate": 0.0001, "loss": 1.435, "step": 11112 }, { "epoch": 1.2910833575370317, "grad_norm": 0.5894516706466675, "learning_rate": 0.0001, "loss": 1.4765, "step": 11113 }, { "epoch": 1.2911995352889922, "grad_norm": 0.6146057844161987, "learning_rate": 0.0001, "loss": 1.6059, "step": 11114 }, { "epoch": 1.2913157130409527, "grad_norm": 0.5563119649887085, "learning_rate": 0.0001, "loss": 1.3916, "step": 11115 }, { "epoch": 1.2914318907929132, "grad_norm": 0.5988980531692505, "learning_rate": 0.0001, "loss": 1.3723, "step": 11116 }, { "epoch": 1.2915480685448737, "grad_norm": 0.5595650672912598, "learning_rate": 0.0001, "loss": 1.3951, "step": 11117 }, { "epoch": 1.2916642462968342, "grad_norm": 0.6587527990341187, "learning_rate": 0.0001, "loss": 1.4094, "step": 11118 }, { "epoch": 1.2917804240487947, "grad_norm": 0.5808438062667847, "learning_rate": 0.0001, "loss": 1.475, "step": 11119 }, { "epoch": 1.2918966018007552, "grad_norm": 0.5954582095146179, "learning_rate": 0.0001, "loss": 1.3649, "step": 11120 }, { "epoch": 1.2920127795527157, "grad_norm": 0.5856751799583435, "learning_rate": 0.0001, "loss": 1.4861, "step": 11121 }, { "epoch": 1.2921289573046761, "grad_norm": 0.634069561958313, "learning_rate": 0.0001, "loss": 1.6051, "step": 11122 }, { "epoch": 1.2922451350566366, "grad_norm": 0.5883323550224304, "learning_rate": 0.0001, "loss": 1.4626, "step": 11123 }, { "epoch": 1.2923613128085971, "grad_norm": 0.5847913026809692, "learning_rate": 0.0001, "loss": 1.3908, "step": 11124 }, { "epoch": 1.2924774905605576, "grad_norm": 0.6062279939651489, "learning_rate": 0.0001, "loss": 1.6381, "step": 11125 }, { "epoch": 1.292593668312518, "grad_norm": 0.5873641967773438, "learning_rate": 0.0001, "loss": 1.3793, "step": 11126 }, { "epoch": 1.2927098460644786, "grad_norm": 0.5468490719795227, "learning_rate": 0.0001, "loss": 1.4905, "step": 11127 }, { "epoch": 1.292826023816439, "grad_norm": 0.6617196798324585, "learning_rate": 0.0001, "loss": 1.7155, "step": 11128 }, { "epoch": 1.2929422015683998, "grad_norm": 0.6220079660415649, "learning_rate": 0.0001, "loss": 1.5208, "step": 11129 }, { "epoch": 1.2930583793203603, "grad_norm": 0.5805248022079468, "learning_rate": 0.0001, "loss": 1.5515, "step": 11130 }, { "epoch": 1.2931745570723208, "grad_norm": 0.5914593935012817, "learning_rate": 0.0001, "loss": 1.4297, "step": 11131 }, { "epoch": 1.2932907348242813, "grad_norm": 0.5885531306266785, "learning_rate": 0.0001, "loss": 1.5298, "step": 11132 }, { "epoch": 1.2934069125762417, "grad_norm": 0.5579025149345398, "learning_rate": 0.0001, "loss": 1.4472, "step": 11133 }, { "epoch": 1.2935230903282022, "grad_norm": 0.6001786589622498, "learning_rate": 0.0001, "loss": 1.5589, "step": 11134 }, { "epoch": 1.2936392680801627, "grad_norm": 0.5882461071014404, "learning_rate": 0.0001, "loss": 1.4804, "step": 11135 }, { "epoch": 1.2937554458321232, "grad_norm": 0.5694596767425537, "learning_rate": 0.0001, "loss": 1.3329, "step": 11136 }, { "epoch": 1.2938716235840837, "grad_norm": 0.5779051780700684, "learning_rate": 0.0001, "loss": 1.6199, "step": 11137 }, { "epoch": 1.2939878013360442, "grad_norm": 0.6211315393447876, "learning_rate": 0.0001, "loss": 1.6313, "step": 11138 }, { "epoch": 1.2941039790880047, "grad_norm": 0.5611022710800171, "learning_rate": 0.0001, "loss": 1.4081, "step": 11139 }, { "epoch": 1.2942201568399652, "grad_norm": 0.5747177004814148, "learning_rate": 0.0001, "loss": 1.4622, "step": 11140 }, { "epoch": 1.2943363345919257, "grad_norm": 0.5457985401153564, "learning_rate": 0.0001, "loss": 1.2923, "step": 11141 }, { "epoch": 1.2944525123438861, "grad_norm": 0.5791705846786499, "learning_rate": 0.0001, "loss": 1.4877, "step": 11142 }, { "epoch": 1.2945686900958466, "grad_norm": 0.5527358055114746, "learning_rate": 0.0001, "loss": 1.3722, "step": 11143 }, { "epoch": 1.2946848678478071, "grad_norm": 0.5855079889297485, "learning_rate": 0.0001, "loss": 1.4477, "step": 11144 }, { "epoch": 1.2948010455997676, "grad_norm": 0.5779101252555847, "learning_rate": 0.0001, "loss": 1.559, "step": 11145 }, { "epoch": 1.294917223351728, "grad_norm": 0.6360836029052734, "learning_rate": 0.0001, "loss": 1.81, "step": 11146 }, { "epoch": 1.2950334011036886, "grad_norm": 0.5778235197067261, "learning_rate": 0.0001, "loss": 1.3919, "step": 11147 }, { "epoch": 1.295149578855649, "grad_norm": 0.5182598233222961, "learning_rate": 0.0001, "loss": 1.2329, "step": 11148 }, { "epoch": 1.2952657566076096, "grad_norm": 0.5818975567817688, "learning_rate": 0.0001, "loss": 1.4925, "step": 11149 }, { "epoch": 1.29538193435957, "grad_norm": 0.565624475479126, "learning_rate": 0.0001, "loss": 1.3899, "step": 11150 }, { "epoch": 1.2954981121115305, "grad_norm": 0.5845359563827515, "learning_rate": 0.0001, "loss": 1.6526, "step": 11151 }, { "epoch": 1.295614289863491, "grad_norm": 0.5842732787132263, "learning_rate": 0.0001, "loss": 1.4591, "step": 11152 }, { "epoch": 1.2957304676154515, "grad_norm": 0.5768247842788696, "learning_rate": 0.0001, "loss": 1.4674, "step": 11153 }, { "epoch": 1.295846645367412, "grad_norm": 0.6080706119537354, "learning_rate": 0.0001, "loss": 1.4585, "step": 11154 }, { "epoch": 1.2959628231193727, "grad_norm": 0.6032418608665466, "learning_rate": 0.0001, "loss": 1.5665, "step": 11155 }, { "epoch": 1.2960790008713332, "grad_norm": 0.5942699909210205, "learning_rate": 0.0001, "loss": 1.5165, "step": 11156 }, { "epoch": 1.2961951786232937, "grad_norm": 0.6410212516784668, "learning_rate": 0.0001, "loss": 1.5121, "step": 11157 }, { "epoch": 1.2963113563752542, "grad_norm": 0.576407253742218, "learning_rate": 0.0001, "loss": 1.4459, "step": 11158 }, { "epoch": 1.2964275341272147, "grad_norm": 0.5596346855163574, "learning_rate": 0.0001, "loss": 1.293, "step": 11159 }, { "epoch": 1.2965437118791752, "grad_norm": 0.5900881886482239, "learning_rate": 0.0001, "loss": 1.503, "step": 11160 }, { "epoch": 1.2966598896311357, "grad_norm": 0.622340977191925, "learning_rate": 0.0001, "loss": 1.5566, "step": 11161 }, { "epoch": 1.2967760673830961, "grad_norm": 0.5715858936309814, "learning_rate": 0.0001, "loss": 1.2327, "step": 11162 }, { "epoch": 1.2968922451350566, "grad_norm": 0.5746508836746216, "learning_rate": 0.0001, "loss": 1.4591, "step": 11163 }, { "epoch": 1.2970084228870171, "grad_norm": 0.5897271037101746, "learning_rate": 0.0001, "loss": 1.4323, "step": 11164 }, { "epoch": 1.2971246006389776, "grad_norm": 0.607289731502533, "learning_rate": 0.0001, "loss": 1.6555, "step": 11165 }, { "epoch": 1.297240778390938, "grad_norm": 0.5573462843894958, "learning_rate": 0.0001, "loss": 1.2812, "step": 11166 }, { "epoch": 1.2973569561428986, "grad_norm": 0.5533374547958374, "learning_rate": 0.0001, "loss": 1.3657, "step": 11167 }, { "epoch": 1.297473133894859, "grad_norm": 0.5837119221687317, "learning_rate": 0.0001, "loss": 1.4336, "step": 11168 }, { "epoch": 1.2975893116468196, "grad_norm": 0.6015486121177673, "learning_rate": 0.0001, "loss": 1.4731, "step": 11169 }, { "epoch": 1.29770548939878, "grad_norm": 0.5753357410430908, "learning_rate": 0.0001, "loss": 1.4456, "step": 11170 }, { "epoch": 1.2978216671507408, "grad_norm": 0.578898549079895, "learning_rate": 0.0001, "loss": 1.6588, "step": 11171 }, { "epoch": 1.2979378449027013, "grad_norm": 0.5891339778900146, "learning_rate": 0.0001, "loss": 1.3499, "step": 11172 }, { "epoch": 1.2980540226546617, "grad_norm": 0.6010352373123169, "learning_rate": 0.0001, "loss": 1.4964, "step": 11173 }, { "epoch": 1.2981702004066222, "grad_norm": 0.5747548937797546, "learning_rate": 0.0001, "loss": 1.5188, "step": 11174 }, { "epoch": 1.2982863781585827, "grad_norm": 0.5667596459388733, "learning_rate": 0.0001, "loss": 1.4827, "step": 11175 }, { "epoch": 1.2984025559105432, "grad_norm": 0.611515462398529, "learning_rate": 0.0001, "loss": 1.4193, "step": 11176 }, { "epoch": 1.2985187336625037, "grad_norm": 0.6042912602424622, "learning_rate": 0.0001, "loss": 1.5121, "step": 11177 }, { "epoch": 1.2986349114144642, "grad_norm": 0.5766490697860718, "learning_rate": 0.0001, "loss": 1.4842, "step": 11178 }, { "epoch": 1.2987510891664247, "grad_norm": 0.5529983043670654, "learning_rate": 0.0001, "loss": 1.5855, "step": 11179 }, { "epoch": 1.2988672669183852, "grad_norm": 0.5562178492546082, "learning_rate": 0.0001, "loss": 1.5143, "step": 11180 }, { "epoch": 1.2989834446703457, "grad_norm": 0.6044286489486694, "learning_rate": 0.0001, "loss": 1.4856, "step": 11181 }, { "epoch": 1.2990996224223061, "grad_norm": 0.606543779373169, "learning_rate": 0.0001, "loss": 1.54, "step": 11182 }, { "epoch": 1.2992158001742666, "grad_norm": 0.562369704246521, "learning_rate": 0.0001, "loss": 1.3999, "step": 11183 }, { "epoch": 1.2993319779262271, "grad_norm": 0.5866766571998596, "learning_rate": 0.0001, "loss": 1.4549, "step": 11184 }, { "epoch": 1.2994481556781876, "grad_norm": 0.5652108192443848, "learning_rate": 0.0001, "loss": 1.6506, "step": 11185 }, { "epoch": 1.299564333430148, "grad_norm": 0.5331071615219116, "learning_rate": 0.0001, "loss": 1.3049, "step": 11186 }, { "epoch": 1.2996805111821086, "grad_norm": 0.5862208604812622, "learning_rate": 0.0001, "loss": 1.395, "step": 11187 }, { "epoch": 1.299796688934069, "grad_norm": 0.5830875635147095, "learning_rate": 0.0001, "loss": 1.6407, "step": 11188 }, { "epoch": 1.2999128666860296, "grad_norm": 0.5540904402732849, "learning_rate": 0.0001, "loss": 1.3349, "step": 11189 }, { "epoch": 1.30002904443799, "grad_norm": 0.5949186086654663, "learning_rate": 0.0001, "loss": 1.4834, "step": 11190 }, { "epoch": 1.3001452221899505, "grad_norm": 0.6121503710746765, "learning_rate": 0.0001, "loss": 1.5453, "step": 11191 }, { "epoch": 1.300261399941911, "grad_norm": 0.5854485034942627, "learning_rate": 0.0001, "loss": 1.4894, "step": 11192 }, { "epoch": 1.3003775776938715, "grad_norm": 0.5966970920562744, "learning_rate": 0.0001, "loss": 1.4434, "step": 11193 }, { "epoch": 1.300493755445832, "grad_norm": 0.567981481552124, "learning_rate": 0.0001, "loss": 1.6029, "step": 11194 }, { "epoch": 1.3006099331977925, "grad_norm": 0.568736732006073, "learning_rate": 0.0001, "loss": 1.5138, "step": 11195 }, { "epoch": 1.300726110949753, "grad_norm": 0.5789809823036194, "learning_rate": 0.0001, "loss": 1.4117, "step": 11196 }, { "epoch": 1.3008422887017137, "grad_norm": 0.592512845993042, "learning_rate": 0.0001, "loss": 1.4149, "step": 11197 }, { "epoch": 1.3009584664536742, "grad_norm": 0.564712405204773, "learning_rate": 0.0001, "loss": 1.355, "step": 11198 }, { "epoch": 1.3010746442056347, "grad_norm": 0.5966187715530396, "learning_rate": 0.0001, "loss": 1.4929, "step": 11199 }, { "epoch": 1.3011908219575952, "grad_norm": 0.602142870426178, "learning_rate": 0.0001, "loss": 1.6281, "step": 11200 }, { "epoch": 1.3013069997095557, "grad_norm": 0.5634971857070923, "learning_rate": 0.0001, "loss": 1.338, "step": 11201 }, { "epoch": 1.3014231774615161, "grad_norm": 0.549117922782898, "learning_rate": 0.0001, "loss": 1.4056, "step": 11202 }, { "epoch": 1.3015393552134766, "grad_norm": 0.5619381070137024, "learning_rate": 0.0001, "loss": 1.5767, "step": 11203 }, { "epoch": 1.3016555329654371, "grad_norm": 0.5681606531143188, "learning_rate": 0.0001, "loss": 1.4639, "step": 11204 }, { "epoch": 1.3017717107173976, "grad_norm": 0.5451623797416687, "learning_rate": 0.0001, "loss": 1.5145, "step": 11205 }, { "epoch": 1.301887888469358, "grad_norm": 0.550369381904602, "learning_rate": 0.0001, "loss": 1.4309, "step": 11206 }, { "epoch": 1.3020040662213186, "grad_norm": 0.5932458639144897, "learning_rate": 0.0001, "loss": 1.5924, "step": 11207 }, { "epoch": 1.302120243973279, "grad_norm": 0.529190719127655, "learning_rate": 0.0001, "loss": 1.3753, "step": 11208 }, { "epoch": 1.3022364217252396, "grad_norm": 0.584304928779602, "learning_rate": 0.0001, "loss": 1.3129, "step": 11209 }, { "epoch": 1.3023525994772, "grad_norm": 0.6115213632583618, "learning_rate": 0.0001, "loss": 1.4993, "step": 11210 }, { "epoch": 1.3024687772291605, "grad_norm": 0.5591283440589905, "learning_rate": 0.0001, "loss": 1.4011, "step": 11211 }, { "epoch": 1.302584954981121, "grad_norm": 0.6138569116592407, "learning_rate": 0.0001, "loss": 1.6387, "step": 11212 }, { "epoch": 1.3027011327330817, "grad_norm": 0.5965496897697449, "learning_rate": 0.0001, "loss": 1.5133, "step": 11213 }, { "epoch": 1.3028173104850422, "grad_norm": 0.5974039435386658, "learning_rate": 0.0001, "loss": 1.5243, "step": 11214 }, { "epoch": 1.3029334882370027, "grad_norm": 0.5517904162406921, "learning_rate": 0.0001, "loss": 1.3946, "step": 11215 }, { "epoch": 1.3030496659889632, "grad_norm": 0.6423289775848389, "learning_rate": 0.0001, "loss": 1.5502, "step": 11216 }, { "epoch": 1.3031658437409237, "grad_norm": 0.5717339515686035, "learning_rate": 0.0001, "loss": 1.4246, "step": 11217 }, { "epoch": 1.3032820214928842, "grad_norm": 0.5621806383132935, "learning_rate": 0.0001, "loss": 1.457, "step": 11218 }, { "epoch": 1.3033981992448447, "grad_norm": 0.5442423224449158, "learning_rate": 0.0001, "loss": 1.3835, "step": 11219 }, { "epoch": 1.3035143769968052, "grad_norm": 0.6146840453147888, "learning_rate": 0.0001, "loss": 1.6248, "step": 11220 }, { "epoch": 1.3036305547487657, "grad_norm": 0.54494708776474, "learning_rate": 0.0001, "loss": 1.4485, "step": 11221 }, { "epoch": 1.3037467325007261, "grad_norm": 0.5648915767669678, "learning_rate": 0.0001, "loss": 1.346, "step": 11222 }, { "epoch": 1.3038629102526866, "grad_norm": 0.5836517810821533, "learning_rate": 0.0001, "loss": 1.6378, "step": 11223 }, { "epoch": 1.3039790880046471, "grad_norm": 0.6305371522903442, "learning_rate": 0.0001, "loss": 1.5957, "step": 11224 }, { "epoch": 1.3040952657566076, "grad_norm": 0.5962154865264893, "learning_rate": 0.0001, "loss": 1.6403, "step": 11225 }, { "epoch": 1.304211443508568, "grad_norm": 0.5581847429275513, "learning_rate": 0.0001, "loss": 1.429, "step": 11226 }, { "epoch": 1.3043276212605286, "grad_norm": 0.5733591318130493, "learning_rate": 0.0001, "loss": 1.5465, "step": 11227 }, { "epoch": 1.304443799012489, "grad_norm": 0.5814220309257507, "learning_rate": 0.0001, "loss": 1.4996, "step": 11228 }, { "epoch": 1.3045599767644496, "grad_norm": 0.5474358201026917, "learning_rate": 0.0001, "loss": 1.4572, "step": 11229 }, { "epoch": 1.30467615451641, "grad_norm": 0.5901806354522705, "learning_rate": 0.0001, "loss": 1.4194, "step": 11230 }, { "epoch": 1.3047923322683705, "grad_norm": 0.6499736309051514, "learning_rate": 0.0001, "loss": 1.6479, "step": 11231 }, { "epoch": 1.304908510020331, "grad_norm": 0.5674834251403809, "learning_rate": 0.0001, "loss": 1.2315, "step": 11232 }, { "epoch": 1.3050246877722915, "grad_norm": 0.5817140340805054, "learning_rate": 0.0001, "loss": 1.5433, "step": 11233 }, { "epoch": 1.305140865524252, "grad_norm": 0.5742542147636414, "learning_rate": 0.0001, "loss": 1.5561, "step": 11234 }, { "epoch": 1.3052570432762125, "grad_norm": 0.5343637466430664, "learning_rate": 0.0001, "loss": 1.3944, "step": 11235 }, { "epoch": 1.305373221028173, "grad_norm": 0.623912513256073, "learning_rate": 0.0001, "loss": 1.5761, "step": 11236 }, { "epoch": 1.3054893987801335, "grad_norm": 0.5666230916976929, "learning_rate": 0.0001, "loss": 1.3874, "step": 11237 }, { "epoch": 1.305605576532094, "grad_norm": 0.5873108506202698, "learning_rate": 0.0001, "loss": 1.5621, "step": 11238 }, { "epoch": 1.3057217542840547, "grad_norm": 0.5752112865447998, "learning_rate": 0.0001, "loss": 1.4949, "step": 11239 }, { "epoch": 1.3058379320360152, "grad_norm": 0.528434693813324, "learning_rate": 0.0001, "loss": 1.4461, "step": 11240 }, { "epoch": 1.3059541097879757, "grad_norm": 0.5661198496818542, "learning_rate": 0.0001, "loss": 1.3099, "step": 11241 }, { "epoch": 1.3060702875399361, "grad_norm": 0.5762214064598083, "learning_rate": 0.0001, "loss": 1.4182, "step": 11242 }, { "epoch": 1.3061864652918966, "grad_norm": 0.5601356029510498, "learning_rate": 0.0001, "loss": 1.4627, "step": 11243 }, { "epoch": 1.3063026430438571, "grad_norm": 0.5520750284194946, "learning_rate": 0.0001, "loss": 1.3123, "step": 11244 }, { "epoch": 1.3064188207958176, "grad_norm": 0.5668884515762329, "learning_rate": 0.0001, "loss": 1.3559, "step": 11245 }, { "epoch": 1.306534998547778, "grad_norm": 0.5902964472770691, "learning_rate": 0.0001, "loss": 1.5034, "step": 11246 }, { "epoch": 1.3066511762997386, "grad_norm": 0.6026374697685242, "learning_rate": 0.0001, "loss": 1.3018, "step": 11247 }, { "epoch": 1.306767354051699, "grad_norm": 0.5816320776939392, "learning_rate": 0.0001, "loss": 1.2819, "step": 11248 }, { "epoch": 1.3068835318036596, "grad_norm": 0.5546746850013733, "learning_rate": 0.0001, "loss": 1.4505, "step": 11249 }, { "epoch": 1.30699970955562, "grad_norm": 0.657646119594574, "learning_rate": 0.0001, "loss": 1.5616, "step": 11250 }, { "epoch": 1.3071158873075805, "grad_norm": 0.5970759391784668, "learning_rate": 0.0001, "loss": 1.5271, "step": 11251 }, { "epoch": 1.307232065059541, "grad_norm": 0.6167639493942261, "learning_rate": 0.0001, "loss": 1.665, "step": 11252 }, { "epoch": 1.3073482428115015, "grad_norm": 0.5807110667228699, "learning_rate": 0.0001, "loss": 1.4903, "step": 11253 }, { "epoch": 1.307464420563462, "grad_norm": 0.5864242315292358, "learning_rate": 0.0001, "loss": 1.4656, "step": 11254 }, { "epoch": 1.3075805983154227, "grad_norm": 0.5699091553688049, "learning_rate": 0.0001, "loss": 1.4961, "step": 11255 }, { "epoch": 1.3076967760673832, "grad_norm": 0.5962982177734375, "learning_rate": 0.0001, "loss": 1.5011, "step": 11256 }, { "epoch": 1.3078129538193437, "grad_norm": 0.5681212544441223, "learning_rate": 0.0001, "loss": 1.4549, "step": 11257 }, { "epoch": 1.3079291315713042, "grad_norm": 0.6166642308235168, "learning_rate": 0.0001, "loss": 1.6184, "step": 11258 }, { "epoch": 1.3080453093232647, "grad_norm": 0.5670874714851379, "learning_rate": 0.0001, "loss": 1.4829, "step": 11259 }, { "epoch": 1.3081614870752252, "grad_norm": 0.5790867209434509, "learning_rate": 0.0001, "loss": 1.4262, "step": 11260 }, { "epoch": 1.3082776648271857, "grad_norm": 0.612891674041748, "learning_rate": 0.0001, "loss": 1.537, "step": 11261 }, { "epoch": 1.3083938425791461, "grad_norm": 0.5698578357696533, "learning_rate": 0.0001, "loss": 1.424, "step": 11262 }, { "epoch": 1.3085100203311066, "grad_norm": 0.5675930380821228, "learning_rate": 0.0001, "loss": 1.6215, "step": 11263 }, { "epoch": 1.3086261980830671, "grad_norm": 0.5446262359619141, "learning_rate": 0.0001, "loss": 1.4859, "step": 11264 }, { "epoch": 1.3087423758350276, "grad_norm": 0.5941632986068726, "learning_rate": 0.0001, "loss": 1.5968, "step": 11265 }, { "epoch": 1.308858553586988, "grad_norm": 0.6017045378684998, "learning_rate": 0.0001, "loss": 1.3836, "step": 11266 }, { "epoch": 1.3089747313389486, "grad_norm": 0.6072211265563965, "learning_rate": 0.0001, "loss": 1.7059, "step": 11267 }, { "epoch": 1.309090909090909, "grad_norm": 0.5675247311592102, "learning_rate": 0.0001, "loss": 1.4294, "step": 11268 }, { "epoch": 1.3092070868428696, "grad_norm": 0.6296728849411011, "learning_rate": 0.0001, "loss": 1.556, "step": 11269 }, { "epoch": 1.30932326459483, "grad_norm": 0.5690588355064392, "learning_rate": 0.0001, "loss": 1.3506, "step": 11270 }, { "epoch": 1.3094394423467905, "grad_norm": 0.5606512427330017, "learning_rate": 0.0001, "loss": 1.5288, "step": 11271 }, { "epoch": 1.309555620098751, "grad_norm": 0.6174575686454773, "learning_rate": 0.0001, "loss": 1.4779, "step": 11272 }, { "epoch": 1.3096717978507115, "grad_norm": 0.5738336443901062, "learning_rate": 0.0001, "loss": 1.559, "step": 11273 }, { "epoch": 1.309787975602672, "grad_norm": 0.6263105869293213, "learning_rate": 0.0001, "loss": 1.5608, "step": 11274 }, { "epoch": 1.3099041533546325, "grad_norm": 0.5497341156005859, "learning_rate": 0.0001, "loss": 1.505, "step": 11275 }, { "epoch": 1.310020331106593, "grad_norm": 0.5970504879951477, "learning_rate": 0.0001, "loss": 1.3028, "step": 11276 }, { "epoch": 1.3101365088585535, "grad_norm": 0.5593211054801941, "learning_rate": 0.0001, "loss": 1.5384, "step": 11277 }, { "epoch": 1.310252686610514, "grad_norm": 0.5131186246871948, "learning_rate": 0.0001, "loss": 1.4588, "step": 11278 }, { "epoch": 1.3103688643624745, "grad_norm": 0.5773268938064575, "learning_rate": 0.0001, "loss": 1.5205, "step": 11279 }, { "epoch": 1.310485042114435, "grad_norm": 0.564262330532074, "learning_rate": 0.0001, "loss": 1.5415, "step": 11280 }, { "epoch": 1.3106012198663957, "grad_norm": 0.5280408263206482, "learning_rate": 0.0001, "loss": 1.4061, "step": 11281 }, { "epoch": 1.3107173976183562, "grad_norm": 0.5752106308937073, "learning_rate": 0.0001, "loss": 1.4062, "step": 11282 }, { "epoch": 1.3108335753703166, "grad_norm": 0.5846773982048035, "learning_rate": 0.0001, "loss": 1.3796, "step": 11283 }, { "epoch": 1.3109497531222771, "grad_norm": 0.594781756401062, "learning_rate": 0.0001, "loss": 1.5035, "step": 11284 }, { "epoch": 1.3110659308742376, "grad_norm": 0.5700458288192749, "learning_rate": 0.0001, "loss": 1.4275, "step": 11285 }, { "epoch": 1.311182108626198, "grad_norm": 0.550640344619751, "learning_rate": 0.0001, "loss": 1.2759, "step": 11286 }, { "epoch": 1.3112982863781586, "grad_norm": 0.5917354822158813, "learning_rate": 0.0001, "loss": 1.5185, "step": 11287 }, { "epoch": 1.311414464130119, "grad_norm": 0.6752248406410217, "learning_rate": 0.0001, "loss": 1.4566, "step": 11288 }, { "epoch": 1.3115306418820796, "grad_norm": 0.6515881419181824, "learning_rate": 0.0001, "loss": 1.6851, "step": 11289 }, { "epoch": 1.31164681963404, "grad_norm": 0.6039806008338928, "learning_rate": 0.0001, "loss": 1.5634, "step": 11290 }, { "epoch": 1.3117629973860006, "grad_norm": 0.6334456205368042, "learning_rate": 0.0001, "loss": 1.3471, "step": 11291 }, { "epoch": 1.311879175137961, "grad_norm": 0.574119508266449, "learning_rate": 0.0001, "loss": 1.3821, "step": 11292 }, { "epoch": 1.3119953528899215, "grad_norm": 0.5512613654136658, "learning_rate": 0.0001, "loss": 1.3874, "step": 11293 }, { "epoch": 1.312111530641882, "grad_norm": 0.5976804494857788, "learning_rate": 0.0001, "loss": 1.4928, "step": 11294 }, { "epoch": 1.3122277083938425, "grad_norm": 0.6032594442367554, "learning_rate": 0.0001, "loss": 1.7967, "step": 11295 }, { "epoch": 1.312343886145803, "grad_norm": 0.5688466429710388, "learning_rate": 0.0001, "loss": 1.4937, "step": 11296 }, { "epoch": 1.3124600638977637, "grad_norm": 0.5861960053443909, "learning_rate": 0.0001, "loss": 1.4174, "step": 11297 }, { "epoch": 1.3125762416497242, "grad_norm": 0.5523131489753723, "learning_rate": 0.0001, "loss": 1.3868, "step": 11298 }, { "epoch": 1.3126924194016847, "grad_norm": 0.5910392999649048, "learning_rate": 0.0001, "loss": 1.3825, "step": 11299 }, { "epoch": 1.3128085971536452, "grad_norm": 0.574103832244873, "learning_rate": 0.0001, "loss": 1.4774, "step": 11300 }, { "epoch": 1.3129247749056057, "grad_norm": 0.5659858584403992, "learning_rate": 0.0001, "loss": 1.3085, "step": 11301 }, { "epoch": 1.3130409526575662, "grad_norm": 0.5777335166931152, "learning_rate": 0.0001, "loss": 1.3941, "step": 11302 }, { "epoch": 1.3131571304095266, "grad_norm": 0.6105539202690125, "learning_rate": 0.0001, "loss": 1.6012, "step": 11303 }, { "epoch": 1.3132733081614871, "grad_norm": 0.605769693851471, "learning_rate": 0.0001, "loss": 1.5137, "step": 11304 }, { "epoch": 1.3133894859134476, "grad_norm": 0.5957057476043701, "learning_rate": 0.0001, "loss": 1.412, "step": 11305 }, { "epoch": 1.313505663665408, "grad_norm": 0.5396880507469177, "learning_rate": 0.0001, "loss": 1.2279, "step": 11306 }, { "epoch": 1.3136218414173686, "grad_norm": 0.6096924543380737, "learning_rate": 0.0001, "loss": 1.4824, "step": 11307 }, { "epoch": 1.313738019169329, "grad_norm": 0.5338277816772461, "learning_rate": 0.0001, "loss": 1.3593, "step": 11308 }, { "epoch": 1.3138541969212896, "grad_norm": 0.5803439617156982, "learning_rate": 0.0001, "loss": 1.537, "step": 11309 }, { "epoch": 1.31397037467325, "grad_norm": 0.624612033367157, "learning_rate": 0.0001, "loss": 1.4565, "step": 11310 }, { "epoch": 1.3140865524252106, "grad_norm": 0.6359199285507202, "learning_rate": 0.0001, "loss": 1.4757, "step": 11311 }, { "epoch": 1.314202730177171, "grad_norm": 0.5825530290603638, "learning_rate": 0.0001, "loss": 1.6943, "step": 11312 }, { "epoch": 1.3143189079291315, "grad_norm": 0.5994022488594055, "learning_rate": 0.0001, "loss": 1.3063, "step": 11313 }, { "epoch": 1.314435085681092, "grad_norm": 0.5629523992538452, "learning_rate": 0.0001, "loss": 1.2914, "step": 11314 }, { "epoch": 1.3145512634330525, "grad_norm": 0.5352174639701843, "learning_rate": 0.0001, "loss": 1.3611, "step": 11315 }, { "epoch": 1.314667441185013, "grad_norm": 0.5785360336303711, "learning_rate": 0.0001, "loss": 1.4782, "step": 11316 }, { "epoch": 1.3147836189369735, "grad_norm": 0.5326957106590271, "learning_rate": 0.0001, "loss": 1.3276, "step": 11317 }, { "epoch": 1.314899796688934, "grad_norm": 0.5812473297119141, "learning_rate": 0.0001, "loss": 1.5484, "step": 11318 }, { "epoch": 1.3150159744408945, "grad_norm": 0.567494809627533, "learning_rate": 0.0001, "loss": 1.5323, "step": 11319 }, { "epoch": 1.315132152192855, "grad_norm": 0.6070596575737, "learning_rate": 0.0001, "loss": 1.4541, "step": 11320 }, { "epoch": 1.3152483299448154, "grad_norm": 0.6242623925209045, "learning_rate": 0.0001, "loss": 1.5356, "step": 11321 }, { "epoch": 1.315364507696776, "grad_norm": 0.5908821821212769, "learning_rate": 0.0001, "loss": 1.1826, "step": 11322 }, { "epoch": 1.3154806854487366, "grad_norm": 0.6052210927009583, "learning_rate": 0.0001, "loss": 1.5074, "step": 11323 }, { "epoch": 1.3155968632006971, "grad_norm": 0.5847655534744263, "learning_rate": 0.0001, "loss": 1.3685, "step": 11324 }, { "epoch": 1.3157130409526576, "grad_norm": 0.5557252168655396, "learning_rate": 0.0001, "loss": 1.4326, "step": 11325 }, { "epoch": 1.315829218704618, "grad_norm": 0.6291106939315796, "learning_rate": 0.0001, "loss": 1.4808, "step": 11326 }, { "epoch": 1.3159453964565786, "grad_norm": 0.5765290260314941, "learning_rate": 0.0001, "loss": 1.5763, "step": 11327 }, { "epoch": 1.316061574208539, "grad_norm": 0.5529724359512329, "learning_rate": 0.0001, "loss": 1.3843, "step": 11328 }, { "epoch": 1.3161777519604996, "grad_norm": 0.5492607355117798, "learning_rate": 0.0001, "loss": 1.564, "step": 11329 }, { "epoch": 1.31629392971246, "grad_norm": 0.6425765156745911, "learning_rate": 0.0001, "loss": 1.5878, "step": 11330 }, { "epoch": 1.3164101074644206, "grad_norm": 0.5800068974494934, "learning_rate": 0.0001, "loss": 1.4884, "step": 11331 }, { "epoch": 1.316526285216381, "grad_norm": 0.5752959251403809, "learning_rate": 0.0001, "loss": 1.5756, "step": 11332 }, { "epoch": 1.3166424629683415, "grad_norm": 0.5906381607055664, "learning_rate": 0.0001, "loss": 1.5202, "step": 11333 }, { "epoch": 1.316758640720302, "grad_norm": 0.6053020358085632, "learning_rate": 0.0001, "loss": 1.4883, "step": 11334 }, { "epoch": 1.3168748184722625, "grad_norm": 0.578601598739624, "learning_rate": 0.0001, "loss": 1.5683, "step": 11335 }, { "epoch": 1.316990996224223, "grad_norm": 0.5814728736877441, "learning_rate": 0.0001, "loss": 1.5167, "step": 11336 }, { "epoch": 1.3171071739761835, "grad_norm": 0.5758288502693176, "learning_rate": 0.0001, "loss": 1.4633, "step": 11337 }, { "epoch": 1.3172233517281442, "grad_norm": 0.5635868906974792, "learning_rate": 0.0001, "loss": 1.4216, "step": 11338 }, { "epoch": 1.3173395294801047, "grad_norm": 0.5569899082183838, "learning_rate": 0.0001, "loss": 1.3649, "step": 11339 }, { "epoch": 1.3174557072320652, "grad_norm": 0.5895545482635498, "learning_rate": 0.0001, "loss": 1.4655, "step": 11340 }, { "epoch": 1.3175718849840257, "grad_norm": 0.5705216526985168, "learning_rate": 0.0001, "loss": 1.4427, "step": 11341 }, { "epoch": 1.3176880627359862, "grad_norm": 0.5997105240821838, "learning_rate": 0.0001, "loss": 1.5919, "step": 11342 }, { "epoch": 1.3178042404879466, "grad_norm": 0.566246509552002, "learning_rate": 0.0001, "loss": 1.3959, "step": 11343 }, { "epoch": 1.3179204182399071, "grad_norm": 0.5906617641448975, "learning_rate": 0.0001, "loss": 1.4819, "step": 11344 }, { "epoch": 1.3180365959918676, "grad_norm": 0.6158729791641235, "learning_rate": 0.0001, "loss": 1.4863, "step": 11345 }, { "epoch": 1.318152773743828, "grad_norm": 0.6189556121826172, "learning_rate": 0.0001, "loss": 1.2961, "step": 11346 }, { "epoch": 1.3182689514957886, "grad_norm": 0.6094973683357239, "learning_rate": 0.0001, "loss": 1.5633, "step": 11347 }, { "epoch": 1.318385129247749, "grad_norm": 0.579227864742279, "learning_rate": 0.0001, "loss": 1.5111, "step": 11348 }, { "epoch": 1.3185013069997096, "grad_norm": 0.5919841527938843, "learning_rate": 0.0001, "loss": 1.4532, "step": 11349 }, { "epoch": 1.31861748475167, "grad_norm": 0.6161413192749023, "learning_rate": 0.0001, "loss": 1.5484, "step": 11350 }, { "epoch": 1.3187336625036306, "grad_norm": 0.6276363730430603, "learning_rate": 0.0001, "loss": 1.5114, "step": 11351 }, { "epoch": 1.318849840255591, "grad_norm": 0.5806869864463806, "learning_rate": 0.0001, "loss": 1.4119, "step": 11352 }, { "epoch": 1.3189660180075515, "grad_norm": 0.5905710458755493, "learning_rate": 0.0001, "loss": 1.3384, "step": 11353 }, { "epoch": 1.319082195759512, "grad_norm": 0.6194556355476379, "learning_rate": 0.0001, "loss": 1.6933, "step": 11354 }, { "epoch": 1.3191983735114725, "grad_norm": 0.6072413921356201, "learning_rate": 0.0001, "loss": 1.5567, "step": 11355 }, { "epoch": 1.319314551263433, "grad_norm": 0.6501614451408386, "learning_rate": 0.0001, "loss": 1.5474, "step": 11356 }, { "epoch": 1.3194307290153935, "grad_norm": 0.6481568217277527, "learning_rate": 0.0001, "loss": 1.5349, "step": 11357 }, { "epoch": 1.319546906767354, "grad_norm": 0.5640507340431213, "learning_rate": 0.0001, "loss": 1.4518, "step": 11358 }, { "epoch": 1.3196630845193145, "grad_norm": 0.5995705127716064, "learning_rate": 0.0001, "loss": 1.4165, "step": 11359 }, { "epoch": 1.319779262271275, "grad_norm": 0.561647355556488, "learning_rate": 0.0001, "loss": 1.4802, "step": 11360 }, { "epoch": 1.3198954400232354, "grad_norm": 0.5589419603347778, "learning_rate": 0.0001, "loss": 1.4506, "step": 11361 }, { "epoch": 1.320011617775196, "grad_norm": 0.5721113085746765, "learning_rate": 0.0001, "loss": 1.3061, "step": 11362 }, { "epoch": 1.3201277955271564, "grad_norm": 0.6529408097267151, "learning_rate": 0.0001, "loss": 1.6187, "step": 11363 }, { "epoch": 1.320243973279117, "grad_norm": 0.5891768932342529, "learning_rate": 0.0001, "loss": 1.4214, "step": 11364 }, { "epoch": 1.3203601510310776, "grad_norm": 0.5529371500015259, "learning_rate": 0.0001, "loss": 1.2607, "step": 11365 }, { "epoch": 1.3204763287830381, "grad_norm": 0.6112219095230103, "learning_rate": 0.0001, "loss": 1.6024, "step": 11366 }, { "epoch": 1.3205925065349986, "grad_norm": 0.559587299823761, "learning_rate": 0.0001, "loss": 1.3874, "step": 11367 }, { "epoch": 1.320708684286959, "grad_norm": 0.5865263938903809, "learning_rate": 0.0001, "loss": 1.4663, "step": 11368 }, { "epoch": 1.3208248620389196, "grad_norm": 0.5650449991226196, "learning_rate": 0.0001, "loss": 1.19, "step": 11369 }, { "epoch": 1.32094103979088, "grad_norm": 0.5587924122810364, "learning_rate": 0.0001, "loss": 1.5052, "step": 11370 }, { "epoch": 1.3210572175428406, "grad_norm": 0.6297246217727661, "learning_rate": 0.0001, "loss": 1.4897, "step": 11371 }, { "epoch": 1.321173395294801, "grad_norm": 0.59564608335495, "learning_rate": 0.0001, "loss": 1.3343, "step": 11372 }, { "epoch": 1.3212895730467615, "grad_norm": 0.5825412273406982, "learning_rate": 0.0001, "loss": 1.3953, "step": 11373 }, { "epoch": 1.321405750798722, "grad_norm": 0.6541181206703186, "learning_rate": 0.0001, "loss": 1.6079, "step": 11374 }, { "epoch": 1.3215219285506825, "grad_norm": 0.5667305588722229, "learning_rate": 0.0001, "loss": 1.4322, "step": 11375 }, { "epoch": 1.321638106302643, "grad_norm": 0.6084561347961426, "learning_rate": 0.0001, "loss": 1.4976, "step": 11376 }, { "epoch": 1.3217542840546035, "grad_norm": 0.6270078420639038, "learning_rate": 0.0001, "loss": 1.5412, "step": 11377 }, { "epoch": 1.321870461806564, "grad_norm": 0.576206386089325, "learning_rate": 0.0001, "loss": 1.4318, "step": 11378 }, { "epoch": 1.3219866395585245, "grad_norm": 0.6498734354972839, "learning_rate": 0.0001, "loss": 1.6348, "step": 11379 }, { "epoch": 1.3221028173104852, "grad_norm": 0.5948904752731323, "learning_rate": 0.0001, "loss": 1.6058, "step": 11380 }, { "epoch": 1.3222189950624457, "grad_norm": 0.5957643389701843, "learning_rate": 0.0001, "loss": 1.5562, "step": 11381 }, { "epoch": 1.3223351728144062, "grad_norm": 0.5807846784591675, "learning_rate": 0.0001, "loss": 1.458, "step": 11382 }, { "epoch": 1.3224513505663666, "grad_norm": 0.5830207467079163, "learning_rate": 0.0001, "loss": 1.4322, "step": 11383 }, { "epoch": 1.3225675283183271, "grad_norm": 0.5690310001373291, "learning_rate": 0.0001, "loss": 1.5376, "step": 11384 }, { "epoch": 1.3226837060702876, "grad_norm": 0.5671119689941406, "learning_rate": 0.0001, "loss": 1.445, "step": 11385 }, { "epoch": 1.3227998838222481, "grad_norm": 0.5723904967308044, "learning_rate": 0.0001, "loss": 1.3052, "step": 11386 }, { "epoch": 1.3229160615742086, "grad_norm": 0.5799258947372437, "learning_rate": 0.0001, "loss": 1.3406, "step": 11387 }, { "epoch": 1.323032239326169, "grad_norm": 0.6347507238388062, "learning_rate": 0.0001, "loss": 1.6213, "step": 11388 }, { "epoch": 1.3231484170781296, "grad_norm": 0.6038704514503479, "learning_rate": 0.0001, "loss": 1.5328, "step": 11389 }, { "epoch": 1.32326459483009, "grad_norm": 0.5528449416160583, "learning_rate": 0.0001, "loss": 1.4312, "step": 11390 }, { "epoch": 1.3233807725820506, "grad_norm": 0.6076419949531555, "learning_rate": 0.0001, "loss": 1.507, "step": 11391 }, { "epoch": 1.323496950334011, "grad_norm": 0.5536520481109619, "learning_rate": 0.0001, "loss": 1.5116, "step": 11392 }, { "epoch": 1.3236131280859715, "grad_norm": 0.58294278383255, "learning_rate": 0.0001, "loss": 1.4941, "step": 11393 }, { "epoch": 1.323729305837932, "grad_norm": 0.6274382472038269, "learning_rate": 0.0001, "loss": 1.6235, "step": 11394 }, { "epoch": 1.3238454835898925, "grad_norm": 0.5923705101013184, "learning_rate": 0.0001, "loss": 1.4008, "step": 11395 }, { "epoch": 1.323961661341853, "grad_norm": 0.6048977375030518, "learning_rate": 0.0001, "loss": 1.6093, "step": 11396 }, { "epoch": 1.3240778390938135, "grad_norm": 0.5796459913253784, "learning_rate": 0.0001, "loss": 1.5118, "step": 11397 }, { "epoch": 1.324194016845774, "grad_norm": 0.632941484451294, "learning_rate": 0.0001, "loss": 1.6222, "step": 11398 }, { "epoch": 1.3243101945977345, "grad_norm": 0.582012951374054, "learning_rate": 0.0001, "loss": 1.4579, "step": 11399 }, { "epoch": 1.324426372349695, "grad_norm": 0.5736321210861206, "learning_rate": 0.0001, "loss": 1.333, "step": 11400 }, { "epoch": 1.3245425501016554, "grad_norm": 0.5718238949775696, "learning_rate": 0.0001, "loss": 1.4398, "step": 11401 }, { "epoch": 1.324658727853616, "grad_norm": 0.6074345707893372, "learning_rate": 0.0001, "loss": 1.526, "step": 11402 }, { "epoch": 1.3247749056055764, "grad_norm": 0.6050236225128174, "learning_rate": 0.0001, "loss": 1.4498, "step": 11403 }, { "epoch": 1.324891083357537, "grad_norm": 0.5850708484649658, "learning_rate": 0.0001, "loss": 1.422, "step": 11404 }, { "epoch": 1.3250072611094974, "grad_norm": 0.6051907539367676, "learning_rate": 0.0001, "loss": 1.2887, "step": 11405 }, { "epoch": 1.3251234388614581, "grad_norm": 0.5894731283187866, "learning_rate": 0.0001, "loss": 1.2812, "step": 11406 }, { "epoch": 1.3252396166134186, "grad_norm": 0.5793226957321167, "learning_rate": 0.0001, "loss": 1.5097, "step": 11407 }, { "epoch": 1.325355794365379, "grad_norm": 0.564224898815155, "learning_rate": 0.0001, "loss": 1.2043, "step": 11408 }, { "epoch": 1.3254719721173396, "grad_norm": 0.5819063782691956, "learning_rate": 0.0001, "loss": 1.4239, "step": 11409 }, { "epoch": 1.3255881498693, "grad_norm": 0.539932370185852, "learning_rate": 0.0001, "loss": 1.5193, "step": 11410 }, { "epoch": 1.3257043276212606, "grad_norm": 0.5646186470985413, "learning_rate": 0.0001, "loss": 1.2979, "step": 11411 }, { "epoch": 1.325820505373221, "grad_norm": 0.5890675783157349, "learning_rate": 0.0001, "loss": 1.5787, "step": 11412 }, { "epoch": 1.3259366831251815, "grad_norm": 0.5870331525802612, "learning_rate": 0.0001, "loss": 1.5525, "step": 11413 }, { "epoch": 1.326052860877142, "grad_norm": 0.5778072476387024, "learning_rate": 0.0001, "loss": 1.4487, "step": 11414 }, { "epoch": 1.3261690386291025, "grad_norm": 0.5781219601631165, "learning_rate": 0.0001, "loss": 1.4424, "step": 11415 }, { "epoch": 1.326285216381063, "grad_norm": 0.6099265217781067, "learning_rate": 0.0001, "loss": 1.5679, "step": 11416 }, { "epoch": 1.3264013941330235, "grad_norm": 0.5225432515144348, "learning_rate": 0.0001, "loss": 1.2422, "step": 11417 }, { "epoch": 1.326517571884984, "grad_norm": 0.630908191204071, "learning_rate": 0.0001, "loss": 1.5711, "step": 11418 }, { "epoch": 1.3266337496369445, "grad_norm": 0.5389137268066406, "learning_rate": 0.0001, "loss": 1.2754, "step": 11419 }, { "epoch": 1.326749927388905, "grad_norm": 0.6575531959533691, "learning_rate": 0.0001, "loss": 1.5822, "step": 11420 }, { "epoch": 1.3268661051408654, "grad_norm": 0.6499255299568176, "learning_rate": 0.0001, "loss": 1.558, "step": 11421 }, { "epoch": 1.3269822828928262, "grad_norm": 0.5644387006759644, "learning_rate": 0.0001, "loss": 1.4451, "step": 11422 }, { "epoch": 1.3270984606447866, "grad_norm": 0.6073735356330872, "learning_rate": 0.0001, "loss": 1.5635, "step": 11423 }, { "epoch": 1.3272146383967471, "grad_norm": 0.6502189636230469, "learning_rate": 0.0001, "loss": 1.5142, "step": 11424 }, { "epoch": 1.3273308161487076, "grad_norm": 0.6316292881965637, "learning_rate": 0.0001, "loss": 1.5855, "step": 11425 }, { "epoch": 1.3274469939006681, "grad_norm": 0.5228843092918396, "learning_rate": 0.0001, "loss": 1.3864, "step": 11426 }, { "epoch": 1.3275631716526286, "grad_norm": 0.5909907221794128, "learning_rate": 0.0001, "loss": 1.5358, "step": 11427 }, { "epoch": 1.327679349404589, "grad_norm": 0.6318853497505188, "learning_rate": 0.0001, "loss": 1.569, "step": 11428 }, { "epoch": 1.3277955271565496, "grad_norm": 0.5925455689430237, "learning_rate": 0.0001, "loss": 1.4686, "step": 11429 }, { "epoch": 1.32791170490851, "grad_norm": 0.6002850532531738, "learning_rate": 0.0001, "loss": 1.5035, "step": 11430 }, { "epoch": 1.3280278826604706, "grad_norm": 0.6215521693229675, "learning_rate": 0.0001, "loss": 1.5532, "step": 11431 }, { "epoch": 1.328144060412431, "grad_norm": 0.6084332466125488, "learning_rate": 0.0001, "loss": 1.4333, "step": 11432 }, { "epoch": 1.3282602381643915, "grad_norm": 0.6313285231590271, "learning_rate": 0.0001, "loss": 1.5212, "step": 11433 }, { "epoch": 1.328376415916352, "grad_norm": 0.5687292814254761, "learning_rate": 0.0001, "loss": 1.4264, "step": 11434 }, { "epoch": 1.3284925936683125, "grad_norm": 0.6608239412307739, "learning_rate": 0.0001, "loss": 1.6383, "step": 11435 }, { "epoch": 1.328608771420273, "grad_norm": 0.6085755825042725, "learning_rate": 0.0001, "loss": 1.4592, "step": 11436 }, { "epoch": 1.3287249491722335, "grad_norm": 0.62114417552948, "learning_rate": 0.0001, "loss": 1.4995, "step": 11437 }, { "epoch": 1.328841126924194, "grad_norm": 0.5876638889312744, "learning_rate": 0.0001, "loss": 1.5137, "step": 11438 }, { "epoch": 1.3289573046761545, "grad_norm": 0.5959676504135132, "learning_rate": 0.0001, "loss": 1.5303, "step": 11439 }, { "epoch": 1.329073482428115, "grad_norm": 0.569511890411377, "learning_rate": 0.0001, "loss": 1.4759, "step": 11440 }, { "epoch": 1.3291896601800754, "grad_norm": 0.5644763112068176, "learning_rate": 0.0001, "loss": 1.5487, "step": 11441 }, { "epoch": 1.329305837932036, "grad_norm": 0.5564660429954529, "learning_rate": 0.0001, "loss": 1.5077, "step": 11442 }, { "epoch": 1.3294220156839964, "grad_norm": 0.5818626284599304, "learning_rate": 0.0001, "loss": 1.6736, "step": 11443 }, { "epoch": 1.329538193435957, "grad_norm": 0.6205581426620483, "learning_rate": 0.0001, "loss": 1.3026, "step": 11444 }, { "epoch": 1.3296543711879174, "grad_norm": 0.5764113068580627, "learning_rate": 0.0001, "loss": 1.414, "step": 11445 }, { "epoch": 1.329770548939878, "grad_norm": 0.5862987637519836, "learning_rate": 0.0001, "loss": 1.5675, "step": 11446 }, { "epoch": 1.3298867266918384, "grad_norm": 0.5654096007347107, "learning_rate": 0.0001, "loss": 1.2999, "step": 11447 }, { "epoch": 1.330002904443799, "grad_norm": 0.6210341453552246, "learning_rate": 0.0001, "loss": 1.5713, "step": 11448 }, { "epoch": 1.3301190821957596, "grad_norm": 0.6105040311813354, "learning_rate": 0.0001, "loss": 1.6378, "step": 11449 }, { "epoch": 1.33023525994772, "grad_norm": 0.5692093372344971, "learning_rate": 0.0001, "loss": 1.5327, "step": 11450 }, { "epoch": 1.3303514376996806, "grad_norm": 0.5587416887283325, "learning_rate": 0.0001, "loss": 1.3777, "step": 11451 }, { "epoch": 1.330467615451641, "grad_norm": 0.5672914981842041, "learning_rate": 0.0001, "loss": 1.3449, "step": 11452 }, { "epoch": 1.3305837932036015, "grad_norm": 0.5590872764587402, "learning_rate": 0.0001, "loss": 1.5599, "step": 11453 }, { "epoch": 1.330699970955562, "grad_norm": 0.5552334189414978, "learning_rate": 0.0001, "loss": 1.649, "step": 11454 }, { "epoch": 1.3308161487075225, "grad_norm": 0.6139841079711914, "learning_rate": 0.0001, "loss": 1.5895, "step": 11455 }, { "epoch": 1.330932326459483, "grad_norm": 0.5881022810935974, "learning_rate": 0.0001, "loss": 1.3873, "step": 11456 }, { "epoch": 1.3310485042114435, "grad_norm": 0.5796955227851868, "learning_rate": 0.0001, "loss": 1.598, "step": 11457 }, { "epoch": 1.331164681963404, "grad_norm": 0.5995549559593201, "learning_rate": 0.0001, "loss": 1.437, "step": 11458 }, { "epoch": 1.3312808597153645, "grad_norm": 0.5792880654335022, "learning_rate": 0.0001, "loss": 1.4925, "step": 11459 }, { "epoch": 1.331397037467325, "grad_norm": 0.5754793882369995, "learning_rate": 0.0001, "loss": 1.6644, "step": 11460 }, { "epoch": 1.3315132152192855, "grad_norm": 0.593312680721283, "learning_rate": 0.0001, "loss": 1.5568, "step": 11461 }, { "epoch": 1.331629392971246, "grad_norm": 0.5615260601043701, "learning_rate": 0.0001, "loss": 1.3415, "step": 11462 }, { "epoch": 1.3317455707232064, "grad_norm": 0.5913121104240417, "learning_rate": 0.0001, "loss": 1.2549, "step": 11463 }, { "epoch": 1.3318617484751671, "grad_norm": 0.55987948179245, "learning_rate": 0.0001, "loss": 1.4738, "step": 11464 }, { "epoch": 1.3319779262271276, "grad_norm": 0.6145794987678528, "learning_rate": 0.0001, "loss": 1.5215, "step": 11465 }, { "epoch": 1.3320941039790881, "grad_norm": 0.5672405362129211, "learning_rate": 0.0001, "loss": 1.4475, "step": 11466 }, { "epoch": 1.3322102817310486, "grad_norm": 0.6223773956298828, "learning_rate": 0.0001, "loss": 1.5384, "step": 11467 }, { "epoch": 1.332326459483009, "grad_norm": 0.5505383014678955, "learning_rate": 0.0001, "loss": 1.4953, "step": 11468 }, { "epoch": 1.3324426372349696, "grad_norm": 0.6319714784622192, "learning_rate": 0.0001, "loss": 1.4733, "step": 11469 }, { "epoch": 1.33255881498693, "grad_norm": 0.5813085436820984, "learning_rate": 0.0001, "loss": 1.5044, "step": 11470 }, { "epoch": 1.3326749927388906, "grad_norm": 0.5962019562721252, "learning_rate": 0.0001, "loss": 1.3574, "step": 11471 }, { "epoch": 1.332791170490851, "grad_norm": 0.5934303402900696, "learning_rate": 0.0001, "loss": 1.5318, "step": 11472 }, { "epoch": 1.3329073482428115, "grad_norm": 0.6068841218948364, "learning_rate": 0.0001, "loss": 1.3101, "step": 11473 }, { "epoch": 1.333023525994772, "grad_norm": 0.5842049717903137, "learning_rate": 0.0001, "loss": 1.522, "step": 11474 }, { "epoch": 1.3331397037467325, "grad_norm": 0.6370680928230286, "learning_rate": 0.0001, "loss": 1.4767, "step": 11475 }, { "epoch": 1.333255881498693, "grad_norm": 0.6403306722640991, "learning_rate": 0.0001, "loss": 1.4926, "step": 11476 }, { "epoch": 1.3333720592506535, "grad_norm": 0.5705578327178955, "learning_rate": 0.0001, "loss": 1.387, "step": 11477 }, { "epoch": 1.333488237002614, "grad_norm": 0.5864787101745605, "learning_rate": 0.0001, "loss": 1.623, "step": 11478 }, { "epoch": 1.3336044147545745, "grad_norm": 0.5673004388809204, "learning_rate": 0.0001, "loss": 1.4897, "step": 11479 }, { "epoch": 1.333720592506535, "grad_norm": 0.5407872200012207, "learning_rate": 0.0001, "loss": 1.3614, "step": 11480 }, { "epoch": 1.3338367702584955, "grad_norm": 0.53462815284729, "learning_rate": 0.0001, "loss": 1.3134, "step": 11481 }, { "epoch": 1.333952948010456, "grad_norm": 0.5789211392402649, "learning_rate": 0.0001, "loss": 1.3455, "step": 11482 }, { "epoch": 1.3340691257624164, "grad_norm": 0.6079903244972229, "learning_rate": 0.0001, "loss": 1.3866, "step": 11483 }, { "epoch": 1.334185303514377, "grad_norm": 0.5825889110565186, "learning_rate": 0.0001, "loss": 1.559, "step": 11484 }, { "epoch": 1.3343014812663374, "grad_norm": 0.565239429473877, "learning_rate": 0.0001, "loss": 1.4349, "step": 11485 }, { "epoch": 1.334417659018298, "grad_norm": 0.5922324061393738, "learning_rate": 0.0001, "loss": 1.4571, "step": 11486 }, { "epoch": 1.3345338367702584, "grad_norm": 0.5655604600906372, "learning_rate": 0.0001, "loss": 1.3975, "step": 11487 }, { "epoch": 1.3346500145222189, "grad_norm": 0.6273969411849976, "learning_rate": 0.0001, "loss": 1.7001, "step": 11488 }, { "epoch": 1.3347661922741794, "grad_norm": 0.6035481691360474, "learning_rate": 0.0001, "loss": 1.5982, "step": 11489 }, { "epoch": 1.33488237002614, "grad_norm": 0.5946334004402161, "learning_rate": 0.0001, "loss": 1.51, "step": 11490 }, { "epoch": 1.3349985477781006, "grad_norm": 0.587891697883606, "learning_rate": 0.0001, "loss": 1.558, "step": 11491 }, { "epoch": 1.335114725530061, "grad_norm": 0.6165737509727478, "learning_rate": 0.0001, "loss": 1.5527, "step": 11492 }, { "epoch": 1.3352309032820215, "grad_norm": 0.6015834808349609, "learning_rate": 0.0001, "loss": 1.5087, "step": 11493 }, { "epoch": 1.335347081033982, "grad_norm": 0.5921751856803894, "learning_rate": 0.0001, "loss": 1.4672, "step": 11494 }, { "epoch": 1.3354632587859425, "grad_norm": 0.602714478969574, "learning_rate": 0.0001, "loss": 1.2711, "step": 11495 }, { "epoch": 1.335579436537903, "grad_norm": 0.5740492939949036, "learning_rate": 0.0001, "loss": 1.5967, "step": 11496 }, { "epoch": 1.3356956142898635, "grad_norm": 0.5856906771659851, "learning_rate": 0.0001, "loss": 1.6184, "step": 11497 }, { "epoch": 1.335811792041824, "grad_norm": 0.5492340922355652, "learning_rate": 0.0001, "loss": 1.413, "step": 11498 }, { "epoch": 1.3359279697937845, "grad_norm": 0.6142228245735168, "learning_rate": 0.0001, "loss": 1.4071, "step": 11499 }, { "epoch": 1.336044147545745, "grad_norm": 0.5769044160842896, "learning_rate": 0.0001, "loss": 1.4111, "step": 11500 }, { "epoch": 1.3361603252977055, "grad_norm": 0.6063992381095886, "learning_rate": 0.0001, "loss": 1.5716, "step": 11501 }, { "epoch": 1.336276503049666, "grad_norm": 0.5918049812316895, "learning_rate": 0.0001, "loss": 1.4288, "step": 11502 }, { "epoch": 1.3363926808016264, "grad_norm": 0.6081083416938782, "learning_rate": 0.0001, "loss": 1.4468, "step": 11503 }, { "epoch": 1.336508858553587, "grad_norm": 0.567497730255127, "learning_rate": 0.0001, "loss": 1.4711, "step": 11504 }, { "epoch": 1.3366250363055474, "grad_norm": 0.5840210318565369, "learning_rate": 0.0001, "loss": 1.484, "step": 11505 }, { "epoch": 1.3367412140575081, "grad_norm": 0.5692864656448364, "learning_rate": 0.0001, "loss": 1.3308, "step": 11506 }, { "epoch": 1.3368573918094686, "grad_norm": 0.567046046257019, "learning_rate": 0.0001, "loss": 1.4432, "step": 11507 }, { "epoch": 1.336973569561429, "grad_norm": 0.5424807667732239, "learning_rate": 0.0001, "loss": 1.3719, "step": 11508 }, { "epoch": 1.3370897473133896, "grad_norm": 0.5703045725822449, "learning_rate": 0.0001, "loss": 1.5068, "step": 11509 }, { "epoch": 1.33720592506535, "grad_norm": 0.5481063723564148, "learning_rate": 0.0001, "loss": 1.3803, "step": 11510 }, { "epoch": 1.3373221028173106, "grad_norm": 0.5333367586135864, "learning_rate": 0.0001, "loss": 1.3449, "step": 11511 }, { "epoch": 1.337438280569271, "grad_norm": 0.6329618096351624, "learning_rate": 0.0001, "loss": 1.5148, "step": 11512 }, { "epoch": 1.3375544583212315, "grad_norm": 0.5826229453086853, "learning_rate": 0.0001, "loss": 1.3026, "step": 11513 }, { "epoch": 1.337670636073192, "grad_norm": 0.5669860243797302, "learning_rate": 0.0001, "loss": 1.4939, "step": 11514 }, { "epoch": 1.3377868138251525, "grad_norm": 0.5839524865150452, "learning_rate": 0.0001, "loss": 1.399, "step": 11515 }, { "epoch": 1.337902991577113, "grad_norm": 0.6274355053901672, "learning_rate": 0.0001, "loss": 1.6241, "step": 11516 }, { "epoch": 1.3380191693290735, "grad_norm": 0.5699935555458069, "learning_rate": 0.0001, "loss": 1.3266, "step": 11517 }, { "epoch": 1.338135347081034, "grad_norm": 0.5473787188529968, "learning_rate": 0.0001, "loss": 1.3531, "step": 11518 }, { "epoch": 1.3382515248329945, "grad_norm": 0.605440080165863, "learning_rate": 0.0001, "loss": 1.5424, "step": 11519 }, { "epoch": 1.338367702584955, "grad_norm": 0.558005690574646, "learning_rate": 0.0001, "loss": 1.3049, "step": 11520 }, { "epoch": 1.3384838803369155, "grad_norm": 0.6115478873252869, "learning_rate": 0.0001, "loss": 1.5498, "step": 11521 }, { "epoch": 1.338600058088876, "grad_norm": 0.590194821357727, "learning_rate": 0.0001, "loss": 1.4244, "step": 11522 }, { "epoch": 1.3387162358408364, "grad_norm": 0.5935150980949402, "learning_rate": 0.0001, "loss": 1.4641, "step": 11523 }, { "epoch": 1.338832413592797, "grad_norm": 0.6085209846496582, "learning_rate": 0.0001, "loss": 1.6074, "step": 11524 }, { "epoch": 1.3389485913447574, "grad_norm": 0.5541259050369263, "learning_rate": 0.0001, "loss": 1.332, "step": 11525 }, { "epoch": 1.339064769096718, "grad_norm": 0.5870988368988037, "learning_rate": 0.0001, "loss": 1.5353, "step": 11526 }, { "epoch": 1.3391809468486784, "grad_norm": 0.5646886825561523, "learning_rate": 0.0001, "loss": 1.5916, "step": 11527 }, { "epoch": 1.3392971246006389, "grad_norm": 0.5684593319892883, "learning_rate": 0.0001, "loss": 1.4385, "step": 11528 }, { "epoch": 1.3394133023525994, "grad_norm": 0.6310387849807739, "learning_rate": 0.0001, "loss": 1.7766, "step": 11529 }, { "epoch": 1.3395294801045599, "grad_norm": 0.6121505498886108, "learning_rate": 0.0001, "loss": 1.4929, "step": 11530 }, { "epoch": 1.3396456578565203, "grad_norm": 0.5295522212982178, "learning_rate": 0.0001, "loss": 1.4303, "step": 11531 }, { "epoch": 1.339761835608481, "grad_norm": 0.5718926191329956, "learning_rate": 0.0001, "loss": 1.3464, "step": 11532 }, { "epoch": 1.3398780133604415, "grad_norm": 0.598277747631073, "learning_rate": 0.0001, "loss": 1.5392, "step": 11533 }, { "epoch": 1.339994191112402, "grad_norm": 0.5410280227661133, "learning_rate": 0.0001, "loss": 1.3229, "step": 11534 }, { "epoch": 1.3401103688643625, "grad_norm": 0.545367956161499, "learning_rate": 0.0001, "loss": 1.5294, "step": 11535 }, { "epoch": 1.340226546616323, "grad_norm": 0.5809006690979004, "learning_rate": 0.0001, "loss": 1.5878, "step": 11536 }, { "epoch": 1.3403427243682835, "grad_norm": 0.6075916886329651, "learning_rate": 0.0001, "loss": 1.5287, "step": 11537 }, { "epoch": 1.340458902120244, "grad_norm": 0.5859774351119995, "learning_rate": 0.0001, "loss": 1.214, "step": 11538 }, { "epoch": 1.3405750798722045, "grad_norm": 0.5794239640235901, "learning_rate": 0.0001, "loss": 1.3904, "step": 11539 }, { "epoch": 1.340691257624165, "grad_norm": 0.5825338959693909, "learning_rate": 0.0001, "loss": 1.3614, "step": 11540 }, { "epoch": 1.3408074353761255, "grad_norm": 0.5686221718788147, "learning_rate": 0.0001, "loss": 1.2162, "step": 11541 }, { "epoch": 1.340923613128086, "grad_norm": 0.5766289234161377, "learning_rate": 0.0001, "loss": 1.5009, "step": 11542 }, { "epoch": 1.3410397908800464, "grad_norm": 0.6284175515174866, "learning_rate": 0.0001, "loss": 1.5627, "step": 11543 }, { "epoch": 1.341155968632007, "grad_norm": 0.6178475022315979, "learning_rate": 0.0001, "loss": 1.261, "step": 11544 }, { "epoch": 1.3412721463839674, "grad_norm": 0.6055488586425781, "learning_rate": 0.0001, "loss": 1.5144, "step": 11545 }, { "epoch": 1.341388324135928, "grad_norm": 0.6022990345954895, "learning_rate": 0.0001, "loss": 1.7137, "step": 11546 }, { "epoch": 1.3415045018878884, "grad_norm": 0.6357380151748657, "learning_rate": 0.0001, "loss": 1.5483, "step": 11547 }, { "epoch": 1.341620679639849, "grad_norm": 0.582905650138855, "learning_rate": 0.0001, "loss": 1.405, "step": 11548 }, { "epoch": 1.3417368573918096, "grad_norm": 0.5897303223609924, "learning_rate": 0.0001, "loss": 1.4004, "step": 11549 }, { "epoch": 1.34185303514377, "grad_norm": 0.6201091408729553, "learning_rate": 0.0001, "loss": 1.305, "step": 11550 }, { "epoch": 1.3419692128957306, "grad_norm": 0.5758824348449707, "learning_rate": 0.0001, "loss": 1.4441, "step": 11551 }, { "epoch": 1.342085390647691, "grad_norm": 0.5709869861602783, "learning_rate": 0.0001, "loss": 1.3256, "step": 11552 }, { "epoch": 1.3422015683996515, "grad_norm": 0.6379136443138123, "learning_rate": 0.0001, "loss": 1.4303, "step": 11553 }, { "epoch": 1.342317746151612, "grad_norm": 0.5988776087760925, "learning_rate": 0.0001, "loss": 1.4281, "step": 11554 }, { "epoch": 1.3424339239035725, "grad_norm": 0.597923994064331, "learning_rate": 0.0001, "loss": 1.4695, "step": 11555 }, { "epoch": 1.342550101655533, "grad_norm": 0.578001081943512, "learning_rate": 0.0001, "loss": 1.5161, "step": 11556 }, { "epoch": 1.3426662794074935, "grad_norm": 0.5900245308876038, "learning_rate": 0.0001, "loss": 1.5779, "step": 11557 }, { "epoch": 1.342782457159454, "grad_norm": 0.5991559028625488, "learning_rate": 0.0001, "loss": 1.5152, "step": 11558 }, { "epoch": 1.3428986349114145, "grad_norm": 0.5901554822921753, "learning_rate": 0.0001, "loss": 1.4043, "step": 11559 }, { "epoch": 1.343014812663375, "grad_norm": 0.5958110094070435, "learning_rate": 0.0001, "loss": 1.4813, "step": 11560 }, { "epoch": 1.3431309904153355, "grad_norm": 0.6428405046463013, "learning_rate": 0.0001, "loss": 1.5927, "step": 11561 }, { "epoch": 1.343247168167296, "grad_norm": 0.6413325071334839, "learning_rate": 0.0001, "loss": 1.6529, "step": 11562 }, { "epoch": 1.3433633459192564, "grad_norm": 0.596393346786499, "learning_rate": 0.0001, "loss": 1.4102, "step": 11563 }, { "epoch": 1.343479523671217, "grad_norm": 0.6022195219993591, "learning_rate": 0.0001, "loss": 1.5229, "step": 11564 }, { "epoch": 1.3435957014231774, "grad_norm": 0.5690363049507141, "learning_rate": 0.0001, "loss": 1.4665, "step": 11565 }, { "epoch": 1.343711879175138, "grad_norm": 0.5736088156700134, "learning_rate": 0.0001, "loss": 1.5445, "step": 11566 }, { "epoch": 1.3438280569270984, "grad_norm": 0.551696240901947, "learning_rate": 0.0001, "loss": 1.2572, "step": 11567 }, { "epoch": 1.3439442346790589, "grad_norm": 0.5662961602210999, "learning_rate": 0.0001, "loss": 1.5853, "step": 11568 }, { "epoch": 1.3440604124310194, "grad_norm": 0.5956161022186279, "learning_rate": 0.0001, "loss": 1.4668, "step": 11569 }, { "epoch": 1.3441765901829799, "grad_norm": 0.5635676383972168, "learning_rate": 0.0001, "loss": 1.5237, "step": 11570 }, { "epoch": 1.3442927679349403, "grad_norm": 0.5952593684196472, "learning_rate": 0.0001, "loss": 1.3867, "step": 11571 }, { "epoch": 1.3444089456869008, "grad_norm": 0.5866329073905945, "learning_rate": 0.0001, "loss": 1.5998, "step": 11572 }, { "epoch": 1.3445251234388613, "grad_norm": 0.6334074139595032, "learning_rate": 0.0001, "loss": 1.6183, "step": 11573 }, { "epoch": 1.344641301190822, "grad_norm": 0.5864160656929016, "learning_rate": 0.0001, "loss": 1.3639, "step": 11574 }, { "epoch": 1.3447574789427825, "grad_norm": 0.5770210027694702, "learning_rate": 0.0001, "loss": 1.3357, "step": 11575 }, { "epoch": 1.344873656694743, "grad_norm": 0.5954274535179138, "learning_rate": 0.0001, "loss": 1.4733, "step": 11576 }, { "epoch": 1.3449898344467035, "grad_norm": 0.6258078813552856, "learning_rate": 0.0001, "loss": 1.5786, "step": 11577 }, { "epoch": 1.345106012198664, "grad_norm": 0.62962406873703, "learning_rate": 0.0001, "loss": 1.5211, "step": 11578 }, { "epoch": 1.3452221899506245, "grad_norm": 0.572242259979248, "learning_rate": 0.0001, "loss": 1.2881, "step": 11579 }, { "epoch": 1.345338367702585, "grad_norm": 0.5981161594390869, "learning_rate": 0.0001, "loss": 1.5231, "step": 11580 }, { "epoch": 1.3454545454545455, "grad_norm": 0.5858927369117737, "learning_rate": 0.0001, "loss": 1.5263, "step": 11581 }, { "epoch": 1.345570723206506, "grad_norm": 0.5862258672714233, "learning_rate": 0.0001, "loss": 1.456, "step": 11582 }, { "epoch": 1.3456869009584664, "grad_norm": 0.5800897479057312, "learning_rate": 0.0001, "loss": 1.5431, "step": 11583 }, { "epoch": 1.345803078710427, "grad_norm": 0.5962756276130676, "learning_rate": 0.0001, "loss": 1.5612, "step": 11584 }, { "epoch": 1.3459192564623874, "grad_norm": 0.5521575808525085, "learning_rate": 0.0001, "loss": 1.3611, "step": 11585 }, { "epoch": 1.346035434214348, "grad_norm": 0.5704815983772278, "learning_rate": 0.0001, "loss": 1.4965, "step": 11586 }, { "epoch": 1.3461516119663084, "grad_norm": 0.55373615026474, "learning_rate": 0.0001, "loss": 1.5358, "step": 11587 }, { "epoch": 1.3462677897182689, "grad_norm": 0.5447104573249817, "learning_rate": 0.0001, "loss": 1.3546, "step": 11588 }, { "epoch": 1.3463839674702294, "grad_norm": 0.6218578815460205, "learning_rate": 0.0001, "loss": 1.4718, "step": 11589 }, { "epoch": 1.34650014522219, "grad_norm": 0.5952721238136292, "learning_rate": 0.0001, "loss": 1.3838, "step": 11590 }, { "epoch": 1.3466163229741506, "grad_norm": 0.5739622712135315, "learning_rate": 0.0001, "loss": 1.5094, "step": 11591 }, { "epoch": 1.346732500726111, "grad_norm": 0.5628431439399719, "learning_rate": 0.0001, "loss": 1.3305, "step": 11592 }, { "epoch": 1.3468486784780715, "grad_norm": 0.5847616195678711, "learning_rate": 0.0001, "loss": 1.6257, "step": 11593 }, { "epoch": 1.346964856230032, "grad_norm": 0.5959730744361877, "learning_rate": 0.0001, "loss": 1.4444, "step": 11594 }, { "epoch": 1.3470810339819925, "grad_norm": 0.5944980382919312, "learning_rate": 0.0001, "loss": 1.5047, "step": 11595 }, { "epoch": 1.347197211733953, "grad_norm": 0.6205131411552429, "learning_rate": 0.0001, "loss": 1.4828, "step": 11596 }, { "epoch": 1.3473133894859135, "grad_norm": 0.6303896903991699, "learning_rate": 0.0001, "loss": 1.6072, "step": 11597 }, { "epoch": 1.347429567237874, "grad_norm": 0.5928091406822205, "learning_rate": 0.0001, "loss": 1.5181, "step": 11598 }, { "epoch": 1.3475457449898345, "grad_norm": 0.6262045502662659, "learning_rate": 0.0001, "loss": 1.5543, "step": 11599 }, { "epoch": 1.347661922741795, "grad_norm": 0.6027948260307312, "learning_rate": 0.0001, "loss": 1.4472, "step": 11600 }, { "epoch": 1.3477781004937555, "grad_norm": 0.5992240905761719, "learning_rate": 0.0001, "loss": 1.4503, "step": 11601 }, { "epoch": 1.347894278245716, "grad_norm": 0.5964482426643372, "learning_rate": 0.0001, "loss": 1.4014, "step": 11602 }, { "epoch": 1.3480104559976764, "grad_norm": 0.5847633481025696, "learning_rate": 0.0001, "loss": 1.5462, "step": 11603 }, { "epoch": 1.348126633749637, "grad_norm": 0.5737441778182983, "learning_rate": 0.0001, "loss": 1.4899, "step": 11604 }, { "epoch": 1.3482428115015974, "grad_norm": 0.5500698685646057, "learning_rate": 0.0001, "loss": 1.4647, "step": 11605 }, { "epoch": 1.348358989253558, "grad_norm": 0.5695959329605103, "learning_rate": 0.0001, "loss": 1.484, "step": 11606 }, { "epoch": 1.3484751670055184, "grad_norm": 0.5738762617111206, "learning_rate": 0.0001, "loss": 1.6238, "step": 11607 }, { "epoch": 1.3485913447574789, "grad_norm": 0.5387170314788818, "learning_rate": 0.0001, "loss": 1.402, "step": 11608 }, { "epoch": 1.3487075225094394, "grad_norm": 0.5509867072105408, "learning_rate": 0.0001, "loss": 1.4802, "step": 11609 }, { "epoch": 1.3488237002613999, "grad_norm": 0.5982716679573059, "learning_rate": 0.0001, "loss": 1.4475, "step": 11610 }, { "epoch": 1.3489398780133603, "grad_norm": 0.5490851402282715, "learning_rate": 0.0001, "loss": 1.4278, "step": 11611 }, { "epoch": 1.3490560557653208, "grad_norm": 0.5994747877120972, "learning_rate": 0.0001, "loss": 1.4825, "step": 11612 }, { "epoch": 1.3491722335172813, "grad_norm": 0.6025500297546387, "learning_rate": 0.0001, "loss": 1.6634, "step": 11613 }, { "epoch": 1.3492884112692418, "grad_norm": 0.5859069228172302, "learning_rate": 0.0001, "loss": 1.4316, "step": 11614 }, { "epoch": 1.3494045890212023, "grad_norm": 0.6145164370536804, "learning_rate": 0.0001, "loss": 1.4605, "step": 11615 }, { "epoch": 1.349520766773163, "grad_norm": 0.6078627109527588, "learning_rate": 0.0001, "loss": 1.5687, "step": 11616 }, { "epoch": 1.3496369445251235, "grad_norm": 0.5715606808662415, "learning_rate": 0.0001, "loss": 1.5395, "step": 11617 }, { "epoch": 1.349753122277084, "grad_norm": 0.5815566778182983, "learning_rate": 0.0001, "loss": 1.403, "step": 11618 }, { "epoch": 1.3498693000290445, "grad_norm": 0.5756155252456665, "learning_rate": 0.0001, "loss": 1.3829, "step": 11619 }, { "epoch": 1.349985477781005, "grad_norm": 0.6013368368148804, "learning_rate": 0.0001, "loss": 1.492, "step": 11620 }, { "epoch": 1.3501016555329655, "grad_norm": 0.5688734650611877, "learning_rate": 0.0001, "loss": 1.3319, "step": 11621 }, { "epoch": 1.350217833284926, "grad_norm": 0.5575860738754272, "learning_rate": 0.0001, "loss": 1.3737, "step": 11622 }, { "epoch": 1.3503340110368864, "grad_norm": 0.6282293200492859, "learning_rate": 0.0001, "loss": 1.4909, "step": 11623 }, { "epoch": 1.350450188788847, "grad_norm": 0.5837210416793823, "learning_rate": 0.0001, "loss": 1.3822, "step": 11624 }, { "epoch": 1.3505663665408074, "grad_norm": 0.6083855032920837, "learning_rate": 0.0001, "loss": 1.5915, "step": 11625 }, { "epoch": 1.350682544292768, "grad_norm": 0.5760353803634644, "learning_rate": 0.0001, "loss": 1.4134, "step": 11626 }, { "epoch": 1.3507987220447284, "grad_norm": 0.5956183075904846, "learning_rate": 0.0001, "loss": 1.5215, "step": 11627 }, { "epoch": 1.3509148997966889, "grad_norm": 0.5973549485206604, "learning_rate": 0.0001, "loss": 1.6133, "step": 11628 }, { "epoch": 1.3510310775486494, "grad_norm": 0.5945565104484558, "learning_rate": 0.0001, "loss": 1.564, "step": 11629 }, { "epoch": 1.3511472553006099, "grad_norm": 0.5747148394584656, "learning_rate": 0.0001, "loss": 1.383, "step": 11630 }, { "epoch": 1.3512634330525704, "grad_norm": 0.6089474558830261, "learning_rate": 0.0001, "loss": 1.4009, "step": 11631 }, { "epoch": 1.351379610804531, "grad_norm": 0.5638538599014282, "learning_rate": 0.0001, "loss": 1.4793, "step": 11632 }, { "epoch": 1.3514957885564916, "grad_norm": 0.5719699263572693, "learning_rate": 0.0001, "loss": 1.3883, "step": 11633 }, { "epoch": 1.351611966308452, "grad_norm": 0.6159690618515015, "learning_rate": 0.0001, "loss": 1.4904, "step": 11634 }, { "epoch": 1.3517281440604125, "grad_norm": 0.6014747023582458, "learning_rate": 0.0001, "loss": 1.5219, "step": 11635 }, { "epoch": 1.351844321812373, "grad_norm": 0.5920547246932983, "learning_rate": 0.0001, "loss": 1.4401, "step": 11636 }, { "epoch": 1.3519604995643335, "grad_norm": 0.596462607383728, "learning_rate": 0.0001, "loss": 1.4317, "step": 11637 }, { "epoch": 1.352076677316294, "grad_norm": 0.5985187888145447, "learning_rate": 0.0001, "loss": 1.4105, "step": 11638 }, { "epoch": 1.3521928550682545, "grad_norm": 0.556800901889801, "learning_rate": 0.0001, "loss": 1.31, "step": 11639 }, { "epoch": 1.352309032820215, "grad_norm": 0.5883356332778931, "learning_rate": 0.0001, "loss": 1.584, "step": 11640 }, { "epoch": 1.3524252105721755, "grad_norm": 0.5282686352729797, "learning_rate": 0.0001, "loss": 1.4686, "step": 11641 }, { "epoch": 1.352541388324136, "grad_norm": 0.5937612056732178, "learning_rate": 0.0001, "loss": 1.4411, "step": 11642 }, { "epoch": 1.3526575660760964, "grad_norm": 0.6212623715400696, "learning_rate": 0.0001, "loss": 1.5084, "step": 11643 }, { "epoch": 1.352773743828057, "grad_norm": 0.5802718997001648, "learning_rate": 0.0001, "loss": 1.3355, "step": 11644 }, { "epoch": 1.3528899215800174, "grad_norm": 0.5913172364234924, "learning_rate": 0.0001, "loss": 1.4938, "step": 11645 }, { "epoch": 1.353006099331978, "grad_norm": 0.5834310054779053, "learning_rate": 0.0001, "loss": 1.4286, "step": 11646 }, { "epoch": 1.3531222770839384, "grad_norm": 0.5490809679031372, "learning_rate": 0.0001, "loss": 1.536, "step": 11647 }, { "epoch": 1.3532384548358989, "grad_norm": 0.5814580321311951, "learning_rate": 0.0001, "loss": 1.304, "step": 11648 }, { "epoch": 1.3533546325878594, "grad_norm": 0.6068845391273499, "learning_rate": 0.0001, "loss": 1.4916, "step": 11649 }, { "epoch": 1.3534708103398199, "grad_norm": 0.6316379308700562, "learning_rate": 0.0001, "loss": 1.2805, "step": 11650 }, { "epoch": 1.3535869880917804, "grad_norm": 0.5487270951271057, "learning_rate": 0.0001, "loss": 1.536, "step": 11651 }, { "epoch": 1.3537031658437408, "grad_norm": 0.6253399848937988, "learning_rate": 0.0001, "loss": 1.609, "step": 11652 }, { "epoch": 1.3538193435957013, "grad_norm": 0.5893374085426331, "learning_rate": 0.0001, "loss": 1.5526, "step": 11653 }, { "epoch": 1.3539355213476618, "grad_norm": 0.5669167637825012, "learning_rate": 0.0001, "loss": 1.51, "step": 11654 }, { "epoch": 1.3540516990996223, "grad_norm": 0.601037323474884, "learning_rate": 0.0001, "loss": 1.3515, "step": 11655 }, { "epoch": 1.3541678768515828, "grad_norm": 0.5730093717575073, "learning_rate": 0.0001, "loss": 1.4615, "step": 11656 }, { "epoch": 1.3542840546035433, "grad_norm": 0.5829418301582336, "learning_rate": 0.0001, "loss": 1.486, "step": 11657 }, { "epoch": 1.354400232355504, "grad_norm": 0.5747588872909546, "learning_rate": 0.0001, "loss": 1.5405, "step": 11658 }, { "epoch": 1.3545164101074645, "grad_norm": 0.6172161102294922, "learning_rate": 0.0001, "loss": 1.5367, "step": 11659 }, { "epoch": 1.354632587859425, "grad_norm": 0.5914645195007324, "learning_rate": 0.0001, "loss": 1.37, "step": 11660 }, { "epoch": 1.3547487656113855, "grad_norm": 0.6524379253387451, "learning_rate": 0.0001, "loss": 1.5559, "step": 11661 }, { "epoch": 1.354864943363346, "grad_norm": 0.588579535484314, "learning_rate": 0.0001, "loss": 1.38, "step": 11662 }, { "epoch": 1.3549811211153064, "grad_norm": 0.5805348753929138, "learning_rate": 0.0001, "loss": 1.6023, "step": 11663 }, { "epoch": 1.355097298867267, "grad_norm": 0.5591554641723633, "learning_rate": 0.0001, "loss": 1.4325, "step": 11664 }, { "epoch": 1.3552134766192274, "grad_norm": 0.545655369758606, "learning_rate": 0.0001, "loss": 1.5484, "step": 11665 }, { "epoch": 1.355329654371188, "grad_norm": 0.5984359383583069, "learning_rate": 0.0001, "loss": 1.573, "step": 11666 }, { "epoch": 1.3554458321231484, "grad_norm": 0.5707201361656189, "learning_rate": 0.0001, "loss": 1.5234, "step": 11667 }, { "epoch": 1.3555620098751089, "grad_norm": 0.6246615648269653, "learning_rate": 0.0001, "loss": 1.464, "step": 11668 }, { "epoch": 1.3556781876270694, "grad_norm": 0.5760742425918579, "learning_rate": 0.0001, "loss": 1.4609, "step": 11669 }, { "epoch": 1.3557943653790299, "grad_norm": 0.5490787029266357, "learning_rate": 0.0001, "loss": 1.2988, "step": 11670 }, { "epoch": 1.3559105431309904, "grad_norm": 0.5468339920043945, "learning_rate": 0.0001, "loss": 1.2894, "step": 11671 }, { "epoch": 1.3560267208829508, "grad_norm": 0.6399328708648682, "learning_rate": 0.0001, "loss": 1.4589, "step": 11672 }, { "epoch": 1.3561428986349113, "grad_norm": 0.6269949078559875, "learning_rate": 0.0001, "loss": 1.6827, "step": 11673 }, { "epoch": 1.356259076386872, "grad_norm": 0.5965935587882996, "learning_rate": 0.0001, "loss": 1.4121, "step": 11674 }, { "epoch": 1.3563752541388325, "grad_norm": 0.586742639541626, "learning_rate": 0.0001, "loss": 1.5327, "step": 11675 }, { "epoch": 1.356491431890793, "grad_norm": 0.5911440253257751, "learning_rate": 0.0001, "loss": 1.5497, "step": 11676 }, { "epoch": 1.3566076096427535, "grad_norm": 0.6567511558532715, "learning_rate": 0.0001, "loss": 1.6401, "step": 11677 }, { "epoch": 1.356723787394714, "grad_norm": 0.5498983263969421, "learning_rate": 0.0001, "loss": 1.269, "step": 11678 }, { "epoch": 1.3568399651466745, "grad_norm": 0.5766180753707886, "learning_rate": 0.0001, "loss": 1.4743, "step": 11679 }, { "epoch": 1.356956142898635, "grad_norm": 0.5323266386985779, "learning_rate": 0.0001, "loss": 1.2326, "step": 11680 }, { "epoch": 1.3570723206505955, "grad_norm": 0.5497226119041443, "learning_rate": 0.0001, "loss": 1.5357, "step": 11681 }, { "epoch": 1.357188498402556, "grad_norm": 0.5257942080497742, "learning_rate": 0.0001, "loss": 1.2964, "step": 11682 }, { "epoch": 1.3573046761545164, "grad_norm": 0.6501601338386536, "learning_rate": 0.0001, "loss": 1.5782, "step": 11683 }, { "epoch": 1.357420853906477, "grad_norm": 0.5762137174606323, "learning_rate": 0.0001, "loss": 1.3364, "step": 11684 }, { "epoch": 1.3575370316584374, "grad_norm": 0.6714363694190979, "learning_rate": 0.0001, "loss": 1.5958, "step": 11685 }, { "epoch": 1.357653209410398, "grad_norm": 0.614554762840271, "learning_rate": 0.0001, "loss": 1.5366, "step": 11686 }, { "epoch": 1.3577693871623584, "grad_norm": 0.5594595670700073, "learning_rate": 0.0001, "loss": 1.3527, "step": 11687 }, { "epoch": 1.3578855649143189, "grad_norm": 0.6071390509605408, "learning_rate": 0.0001, "loss": 1.4608, "step": 11688 }, { "epoch": 1.3580017426662794, "grad_norm": 0.5696654319763184, "learning_rate": 0.0001, "loss": 1.5251, "step": 11689 }, { "epoch": 1.3581179204182399, "grad_norm": 0.5189816355705261, "learning_rate": 0.0001, "loss": 1.2097, "step": 11690 }, { "epoch": 1.3582340981702004, "grad_norm": 0.5831829905509949, "learning_rate": 0.0001, "loss": 1.5446, "step": 11691 }, { "epoch": 1.3583502759221608, "grad_norm": 0.6040063500404358, "learning_rate": 0.0001, "loss": 1.597, "step": 11692 }, { "epoch": 1.3584664536741213, "grad_norm": 0.5630427598953247, "learning_rate": 0.0001, "loss": 1.384, "step": 11693 }, { "epoch": 1.3585826314260818, "grad_norm": 0.6305066347122192, "learning_rate": 0.0001, "loss": 1.6258, "step": 11694 }, { "epoch": 1.3586988091780423, "grad_norm": 0.614304780960083, "learning_rate": 0.0001, "loss": 1.4363, "step": 11695 }, { "epoch": 1.3588149869300028, "grad_norm": 0.5854483842849731, "learning_rate": 0.0001, "loss": 1.4955, "step": 11696 }, { "epoch": 1.3589311646819633, "grad_norm": 0.5789347290992737, "learning_rate": 0.0001, "loss": 1.4742, "step": 11697 }, { "epoch": 1.3590473424339238, "grad_norm": 0.5860118269920349, "learning_rate": 0.0001, "loss": 1.4509, "step": 11698 }, { "epoch": 1.3591635201858843, "grad_norm": 0.5637704730033875, "learning_rate": 0.0001, "loss": 1.4924, "step": 11699 }, { "epoch": 1.359279697937845, "grad_norm": 0.6265964508056641, "learning_rate": 0.0001, "loss": 1.5091, "step": 11700 }, { "epoch": 1.3593958756898055, "grad_norm": 0.6148953437805176, "learning_rate": 0.0001, "loss": 1.4862, "step": 11701 }, { "epoch": 1.359512053441766, "grad_norm": 0.6008301973342896, "learning_rate": 0.0001, "loss": 1.4017, "step": 11702 }, { "epoch": 1.3596282311937264, "grad_norm": 0.6362335681915283, "learning_rate": 0.0001, "loss": 1.4398, "step": 11703 }, { "epoch": 1.359744408945687, "grad_norm": 0.6056876182556152, "learning_rate": 0.0001, "loss": 1.4626, "step": 11704 }, { "epoch": 1.3598605866976474, "grad_norm": 0.625529944896698, "learning_rate": 0.0001, "loss": 1.3796, "step": 11705 }, { "epoch": 1.359976764449608, "grad_norm": 0.6198188662528992, "learning_rate": 0.0001, "loss": 1.3204, "step": 11706 }, { "epoch": 1.3600929422015684, "grad_norm": 0.540850818157196, "learning_rate": 0.0001, "loss": 1.3004, "step": 11707 }, { "epoch": 1.360209119953529, "grad_norm": 0.6461174488067627, "learning_rate": 0.0001, "loss": 1.5201, "step": 11708 }, { "epoch": 1.3603252977054894, "grad_norm": 0.5790330767631531, "learning_rate": 0.0001, "loss": 1.4575, "step": 11709 }, { "epoch": 1.3604414754574499, "grad_norm": 0.6312645673751831, "learning_rate": 0.0001, "loss": 1.6026, "step": 11710 }, { "epoch": 1.3605576532094104, "grad_norm": 0.5561741590499878, "learning_rate": 0.0001, "loss": 1.2689, "step": 11711 }, { "epoch": 1.3606738309613708, "grad_norm": 0.5601104497909546, "learning_rate": 0.0001, "loss": 1.4587, "step": 11712 }, { "epoch": 1.3607900087133313, "grad_norm": 0.6619279384613037, "learning_rate": 0.0001, "loss": 1.6191, "step": 11713 }, { "epoch": 1.3609061864652918, "grad_norm": 0.6291702389717102, "learning_rate": 0.0001, "loss": 1.5444, "step": 11714 }, { "epoch": 1.3610223642172525, "grad_norm": 0.6017669439315796, "learning_rate": 0.0001, "loss": 1.3569, "step": 11715 }, { "epoch": 1.361138541969213, "grad_norm": 0.5894443988800049, "learning_rate": 0.0001, "loss": 1.5589, "step": 11716 }, { "epoch": 1.3612547197211735, "grad_norm": 0.6237130165100098, "learning_rate": 0.0001, "loss": 1.5952, "step": 11717 }, { "epoch": 1.361370897473134, "grad_norm": 0.5698156952857971, "learning_rate": 0.0001, "loss": 1.3084, "step": 11718 }, { "epoch": 1.3614870752250945, "grad_norm": 0.5625483393669128, "learning_rate": 0.0001, "loss": 1.3964, "step": 11719 }, { "epoch": 1.361603252977055, "grad_norm": 0.6011530756950378, "learning_rate": 0.0001, "loss": 1.5401, "step": 11720 }, { "epoch": 1.3617194307290155, "grad_norm": 0.5694208741188049, "learning_rate": 0.0001, "loss": 1.4648, "step": 11721 }, { "epoch": 1.361835608480976, "grad_norm": 0.6437340974807739, "learning_rate": 0.0001, "loss": 1.5627, "step": 11722 }, { "epoch": 1.3619517862329364, "grad_norm": 0.5433085560798645, "learning_rate": 0.0001, "loss": 1.2774, "step": 11723 }, { "epoch": 1.362067963984897, "grad_norm": 0.5646154880523682, "learning_rate": 0.0001, "loss": 1.5754, "step": 11724 }, { "epoch": 1.3621841417368574, "grad_norm": 0.5664541125297546, "learning_rate": 0.0001, "loss": 1.3567, "step": 11725 }, { "epoch": 1.362300319488818, "grad_norm": 0.536392867565155, "learning_rate": 0.0001, "loss": 1.4264, "step": 11726 }, { "epoch": 1.3624164972407784, "grad_norm": 0.578348696231842, "learning_rate": 0.0001, "loss": 1.4304, "step": 11727 }, { "epoch": 1.362532674992739, "grad_norm": 0.633996307849884, "learning_rate": 0.0001, "loss": 1.5121, "step": 11728 }, { "epoch": 1.3626488527446994, "grad_norm": 0.5468116998672485, "learning_rate": 0.0001, "loss": 1.4625, "step": 11729 }, { "epoch": 1.3627650304966599, "grad_norm": 0.5580371618270874, "learning_rate": 0.0001, "loss": 1.3462, "step": 11730 }, { "epoch": 1.3628812082486204, "grad_norm": 0.5752867460250854, "learning_rate": 0.0001, "loss": 1.3912, "step": 11731 }, { "epoch": 1.3629973860005808, "grad_norm": 0.626487135887146, "learning_rate": 0.0001, "loss": 1.5946, "step": 11732 }, { "epoch": 1.3631135637525413, "grad_norm": 0.5772953033447266, "learning_rate": 0.0001, "loss": 1.4676, "step": 11733 }, { "epoch": 1.3632297415045018, "grad_norm": 0.5664294362068176, "learning_rate": 0.0001, "loss": 1.3619, "step": 11734 }, { "epoch": 1.3633459192564623, "grad_norm": 0.5935901999473572, "learning_rate": 0.0001, "loss": 1.519, "step": 11735 }, { "epoch": 1.3634620970084228, "grad_norm": 0.6294174194335938, "learning_rate": 0.0001, "loss": 1.6044, "step": 11736 }, { "epoch": 1.3635782747603833, "grad_norm": 0.6048177480697632, "learning_rate": 0.0001, "loss": 1.4796, "step": 11737 }, { "epoch": 1.3636944525123438, "grad_norm": 0.6142577528953552, "learning_rate": 0.0001, "loss": 1.4884, "step": 11738 }, { "epoch": 1.3638106302643043, "grad_norm": 0.549647867679596, "learning_rate": 0.0001, "loss": 1.4506, "step": 11739 }, { "epoch": 1.3639268080162648, "grad_norm": 0.6002432703971863, "learning_rate": 0.0001, "loss": 1.557, "step": 11740 }, { "epoch": 1.3640429857682252, "grad_norm": 0.5926917791366577, "learning_rate": 0.0001, "loss": 1.4598, "step": 11741 }, { "epoch": 1.364159163520186, "grad_norm": 0.5604010224342346, "learning_rate": 0.0001, "loss": 1.5695, "step": 11742 }, { "epoch": 1.3642753412721464, "grad_norm": 0.5525916218757629, "learning_rate": 0.0001, "loss": 1.3864, "step": 11743 }, { "epoch": 1.364391519024107, "grad_norm": 0.5697430968284607, "learning_rate": 0.0001, "loss": 1.3873, "step": 11744 }, { "epoch": 1.3645076967760674, "grad_norm": 0.5901938080787659, "learning_rate": 0.0001, "loss": 1.4719, "step": 11745 }, { "epoch": 1.364623874528028, "grad_norm": 0.6322489976882935, "learning_rate": 0.0001, "loss": 1.4157, "step": 11746 }, { "epoch": 1.3647400522799884, "grad_norm": 0.563325822353363, "learning_rate": 0.0001, "loss": 1.4027, "step": 11747 }, { "epoch": 1.364856230031949, "grad_norm": 0.5789667963981628, "learning_rate": 0.0001, "loss": 1.5618, "step": 11748 }, { "epoch": 1.3649724077839094, "grad_norm": 0.58510422706604, "learning_rate": 0.0001, "loss": 1.4739, "step": 11749 }, { "epoch": 1.3650885855358699, "grad_norm": 0.5673305988311768, "learning_rate": 0.0001, "loss": 1.3469, "step": 11750 }, { "epoch": 1.3652047632878304, "grad_norm": 0.6077521443367004, "learning_rate": 0.0001, "loss": 1.5691, "step": 11751 }, { "epoch": 1.3653209410397908, "grad_norm": 0.6002272963523865, "learning_rate": 0.0001, "loss": 1.6788, "step": 11752 }, { "epoch": 1.3654371187917513, "grad_norm": 0.5982224345207214, "learning_rate": 0.0001, "loss": 1.4948, "step": 11753 }, { "epoch": 1.3655532965437118, "grad_norm": 0.5681218504905701, "learning_rate": 0.0001, "loss": 1.3629, "step": 11754 }, { "epoch": 1.3656694742956723, "grad_norm": 0.6169049739837646, "learning_rate": 0.0001, "loss": 1.5424, "step": 11755 }, { "epoch": 1.3657856520476328, "grad_norm": 0.5828377604484558, "learning_rate": 0.0001, "loss": 1.4077, "step": 11756 }, { "epoch": 1.3659018297995935, "grad_norm": 0.5715786814689636, "learning_rate": 0.0001, "loss": 1.423, "step": 11757 }, { "epoch": 1.366018007551554, "grad_norm": 0.5703264474868774, "learning_rate": 0.0001, "loss": 1.4941, "step": 11758 }, { "epoch": 1.3661341853035145, "grad_norm": 0.6128681302070618, "learning_rate": 0.0001, "loss": 1.532, "step": 11759 }, { "epoch": 1.366250363055475, "grad_norm": 0.5742857456207275, "learning_rate": 0.0001, "loss": 1.3648, "step": 11760 }, { "epoch": 1.3663665408074355, "grad_norm": 0.5583314895629883, "learning_rate": 0.0001, "loss": 1.4086, "step": 11761 }, { "epoch": 1.366482718559396, "grad_norm": 0.5679374933242798, "learning_rate": 0.0001, "loss": 1.5492, "step": 11762 }, { "epoch": 1.3665988963113564, "grad_norm": 0.5741711258888245, "learning_rate": 0.0001, "loss": 1.4876, "step": 11763 }, { "epoch": 1.366715074063317, "grad_norm": 0.5635258555412292, "learning_rate": 0.0001, "loss": 1.4451, "step": 11764 }, { "epoch": 1.3668312518152774, "grad_norm": 0.5916835069656372, "learning_rate": 0.0001, "loss": 1.3363, "step": 11765 }, { "epoch": 1.366947429567238, "grad_norm": 0.5715609192848206, "learning_rate": 0.0001, "loss": 1.3299, "step": 11766 }, { "epoch": 1.3670636073191984, "grad_norm": 0.6283421516418457, "learning_rate": 0.0001, "loss": 1.588, "step": 11767 }, { "epoch": 1.367179785071159, "grad_norm": 0.557995617389679, "learning_rate": 0.0001, "loss": 1.3089, "step": 11768 }, { "epoch": 1.3672959628231194, "grad_norm": 0.5753653049468994, "learning_rate": 0.0001, "loss": 1.4653, "step": 11769 }, { "epoch": 1.3674121405750799, "grad_norm": 0.6110243797302246, "learning_rate": 0.0001, "loss": 1.5119, "step": 11770 }, { "epoch": 1.3675283183270404, "grad_norm": 0.642266571521759, "learning_rate": 0.0001, "loss": 1.4771, "step": 11771 }, { "epoch": 1.3676444960790008, "grad_norm": 0.5759993195533752, "learning_rate": 0.0001, "loss": 1.3839, "step": 11772 }, { "epoch": 1.3677606738309613, "grad_norm": 0.5989052653312683, "learning_rate": 0.0001, "loss": 1.5407, "step": 11773 }, { "epoch": 1.3678768515829218, "grad_norm": 0.5996981263160706, "learning_rate": 0.0001, "loss": 1.4698, "step": 11774 }, { "epoch": 1.3679930293348823, "grad_norm": 0.5985528826713562, "learning_rate": 0.0001, "loss": 1.4157, "step": 11775 }, { "epoch": 1.3681092070868428, "grad_norm": 0.5674298405647278, "learning_rate": 0.0001, "loss": 1.3278, "step": 11776 }, { "epoch": 1.3682253848388033, "grad_norm": 0.6040525436401367, "learning_rate": 0.0001, "loss": 1.5492, "step": 11777 }, { "epoch": 1.3683415625907638, "grad_norm": 0.5432946085929871, "learning_rate": 0.0001, "loss": 1.4631, "step": 11778 }, { "epoch": 1.3684577403427243, "grad_norm": 0.5686826109886169, "learning_rate": 0.0001, "loss": 1.4459, "step": 11779 }, { "epoch": 1.3685739180946848, "grad_norm": 0.5497419238090515, "learning_rate": 0.0001, "loss": 1.2312, "step": 11780 }, { "epoch": 1.3686900958466452, "grad_norm": 0.6301116943359375, "learning_rate": 0.0001, "loss": 1.4836, "step": 11781 }, { "epoch": 1.3688062735986057, "grad_norm": 0.562529981136322, "learning_rate": 0.0001, "loss": 1.4798, "step": 11782 }, { "epoch": 1.3689224513505664, "grad_norm": 0.6351971626281738, "learning_rate": 0.0001, "loss": 1.584, "step": 11783 }, { "epoch": 1.369038629102527, "grad_norm": 0.5856718420982361, "learning_rate": 0.0001, "loss": 1.4204, "step": 11784 }, { "epoch": 1.3691548068544874, "grad_norm": 0.5878714323043823, "learning_rate": 0.0001, "loss": 1.5263, "step": 11785 }, { "epoch": 1.369270984606448, "grad_norm": 0.5866144299507141, "learning_rate": 0.0001, "loss": 1.4446, "step": 11786 }, { "epoch": 1.3693871623584084, "grad_norm": 0.5930546522140503, "learning_rate": 0.0001, "loss": 1.436, "step": 11787 }, { "epoch": 1.369503340110369, "grad_norm": 0.5738999247550964, "learning_rate": 0.0001, "loss": 1.3887, "step": 11788 }, { "epoch": 1.3696195178623294, "grad_norm": 0.6546767950057983, "learning_rate": 0.0001, "loss": 1.7115, "step": 11789 }, { "epoch": 1.3697356956142899, "grad_norm": 0.5498051643371582, "learning_rate": 0.0001, "loss": 1.3995, "step": 11790 }, { "epoch": 1.3698518733662504, "grad_norm": 0.5807487368583679, "learning_rate": 0.0001, "loss": 1.4775, "step": 11791 }, { "epoch": 1.3699680511182108, "grad_norm": 0.582394003868103, "learning_rate": 0.0001, "loss": 1.4096, "step": 11792 }, { "epoch": 1.3700842288701713, "grad_norm": 0.5830745100975037, "learning_rate": 0.0001, "loss": 1.4392, "step": 11793 }, { "epoch": 1.3702004066221318, "grad_norm": 0.6170597672462463, "learning_rate": 0.0001, "loss": 1.6125, "step": 11794 }, { "epoch": 1.3703165843740923, "grad_norm": 0.5893037915229797, "learning_rate": 0.0001, "loss": 1.4202, "step": 11795 }, { "epoch": 1.3704327621260528, "grad_norm": 0.618791401386261, "learning_rate": 0.0001, "loss": 1.5396, "step": 11796 }, { "epoch": 1.3705489398780133, "grad_norm": 0.6027114987373352, "learning_rate": 0.0001, "loss": 1.4953, "step": 11797 }, { "epoch": 1.3706651176299738, "grad_norm": 0.5932108163833618, "learning_rate": 0.0001, "loss": 1.5312, "step": 11798 }, { "epoch": 1.3707812953819345, "grad_norm": 0.6085308790206909, "learning_rate": 0.0001, "loss": 1.4098, "step": 11799 }, { "epoch": 1.370897473133895, "grad_norm": 0.6194184422492981, "learning_rate": 0.0001, "loss": 1.4491, "step": 11800 }, { "epoch": 1.3710136508858555, "grad_norm": 0.5471352934837341, "learning_rate": 0.0001, "loss": 1.2728, "step": 11801 }, { "epoch": 1.371129828637816, "grad_norm": 0.5235902667045593, "learning_rate": 0.0001, "loss": 1.3627, "step": 11802 }, { "epoch": 1.3712460063897765, "grad_norm": 0.6246318817138672, "learning_rate": 0.0001, "loss": 1.393, "step": 11803 }, { "epoch": 1.371362184141737, "grad_norm": 0.6138882637023926, "learning_rate": 0.0001, "loss": 1.2501, "step": 11804 }, { "epoch": 1.3714783618936974, "grad_norm": 0.6174538731575012, "learning_rate": 0.0001, "loss": 1.34, "step": 11805 }, { "epoch": 1.371594539645658, "grad_norm": 0.6327789425849915, "learning_rate": 0.0001, "loss": 1.5747, "step": 11806 }, { "epoch": 1.3717107173976184, "grad_norm": 0.5819458365440369, "learning_rate": 0.0001, "loss": 1.4555, "step": 11807 }, { "epoch": 1.371826895149579, "grad_norm": 0.5566579699516296, "learning_rate": 0.0001, "loss": 1.3598, "step": 11808 }, { "epoch": 1.3719430729015394, "grad_norm": 0.608745813369751, "learning_rate": 0.0001, "loss": 1.7057, "step": 11809 }, { "epoch": 1.3720592506534999, "grad_norm": 0.6066353917121887, "learning_rate": 0.0001, "loss": 1.4786, "step": 11810 }, { "epoch": 1.3721754284054604, "grad_norm": 0.6526094079017639, "learning_rate": 0.0001, "loss": 1.6568, "step": 11811 }, { "epoch": 1.3722916061574209, "grad_norm": 0.5477768778800964, "learning_rate": 0.0001, "loss": 1.4258, "step": 11812 }, { "epoch": 1.3724077839093813, "grad_norm": 0.5935622453689575, "learning_rate": 0.0001, "loss": 1.4168, "step": 11813 }, { "epoch": 1.3725239616613418, "grad_norm": 0.6308596730232239, "learning_rate": 0.0001, "loss": 1.3882, "step": 11814 }, { "epoch": 1.3726401394133023, "grad_norm": 0.5651733875274658, "learning_rate": 0.0001, "loss": 1.2528, "step": 11815 }, { "epoch": 1.3727563171652628, "grad_norm": 0.5938041806221008, "learning_rate": 0.0001, "loss": 1.4036, "step": 11816 }, { "epoch": 1.3728724949172233, "grad_norm": 0.584787905216217, "learning_rate": 0.0001, "loss": 1.5447, "step": 11817 }, { "epoch": 1.3729886726691838, "grad_norm": 0.5955517292022705, "learning_rate": 0.0001, "loss": 1.3351, "step": 11818 }, { "epoch": 1.3731048504211443, "grad_norm": 0.595584511756897, "learning_rate": 0.0001, "loss": 1.5651, "step": 11819 }, { "epoch": 1.3732210281731048, "grad_norm": 0.5970519781112671, "learning_rate": 0.0001, "loss": 1.4131, "step": 11820 }, { "epoch": 1.3733372059250653, "grad_norm": 0.579200029373169, "learning_rate": 0.0001, "loss": 1.4517, "step": 11821 }, { "epoch": 1.3734533836770257, "grad_norm": 0.6147415637969971, "learning_rate": 0.0001, "loss": 1.5784, "step": 11822 }, { "epoch": 1.3735695614289862, "grad_norm": 0.6013969779014587, "learning_rate": 0.0001, "loss": 1.5465, "step": 11823 }, { "epoch": 1.3736857391809467, "grad_norm": 0.5508737564086914, "learning_rate": 0.0001, "loss": 1.4251, "step": 11824 }, { "epoch": 1.3738019169329074, "grad_norm": 0.5326734185218811, "learning_rate": 0.0001, "loss": 1.3177, "step": 11825 }, { "epoch": 1.373918094684868, "grad_norm": 0.5824387073516846, "learning_rate": 0.0001, "loss": 1.4181, "step": 11826 }, { "epoch": 1.3740342724368284, "grad_norm": 0.5780125856399536, "learning_rate": 0.0001, "loss": 1.3633, "step": 11827 }, { "epoch": 1.374150450188789, "grad_norm": 0.56535804271698, "learning_rate": 0.0001, "loss": 1.5986, "step": 11828 }, { "epoch": 1.3742666279407494, "grad_norm": 0.6138978600502014, "learning_rate": 0.0001, "loss": 1.5314, "step": 11829 }, { "epoch": 1.3743828056927099, "grad_norm": 0.6302936673164368, "learning_rate": 0.0001, "loss": 1.5483, "step": 11830 }, { "epoch": 1.3744989834446704, "grad_norm": 0.5425077080726624, "learning_rate": 0.0001, "loss": 1.2737, "step": 11831 }, { "epoch": 1.3746151611966309, "grad_norm": 0.5782576203346252, "learning_rate": 0.0001, "loss": 1.3394, "step": 11832 }, { "epoch": 1.3747313389485913, "grad_norm": 0.5924588441848755, "learning_rate": 0.0001, "loss": 1.4326, "step": 11833 }, { "epoch": 1.3748475167005518, "grad_norm": 0.6358031034469604, "learning_rate": 0.0001, "loss": 1.5883, "step": 11834 }, { "epoch": 1.3749636944525123, "grad_norm": 0.5872380137443542, "learning_rate": 0.0001, "loss": 1.3631, "step": 11835 }, { "epoch": 1.3750798722044728, "grad_norm": 0.5860925316810608, "learning_rate": 0.0001, "loss": 1.5267, "step": 11836 }, { "epoch": 1.3751960499564333, "grad_norm": 0.5498318076133728, "learning_rate": 0.0001, "loss": 1.4259, "step": 11837 }, { "epoch": 1.3753122277083938, "grad_norm": 0.5745248198509216, "learning_rate": 0.0001, "loss": 1.5403, "step": 11838 }, { "epoch": 1.3754284054603543, "grad_norm": 0.6191520690917969, "learning_rate": 0.0001, "loss": 1.4546, "step": 11839 }, { "epoch": 1.3755445832123148, "grad_norm": 0.5659325122833252, "learning_rate": 0.0001, "loss": 1.3464, "step": 11840 }, { "epoch": 1.3756607609642755, "grad_norm": 0.5831794738769531, "learning_rate": 0.0001, "loss": 1.3686, "step": 11841 }, { "epoch": 1.375776938716236, "grad_norm": 0.6090126633644104, "learning_rate": 0.0001, "loss": 1.4903, "step": 11842 }, { "epoch": 1.3758931164681965, "grad_norm": 0.5565717816352844, "learning_rate": 0.0001, "loss": 1.4553, "step": 11843 }, { "epoch": 1.376009294220157, "grad_norm": 0.6339083909988403, "learning_rate": 0.0001, "loss": 1.5181, "step": 11844 }, { "epoch": 1.3761254719721174, "grad_norm": 0.5814673900604248, "learning_rate": 0.0001, "loss": 1.3132, "step": 11845 }, { "epoch": 1.376241649724078, "grad_norm": 0.5672376155853271, "learning_rate": 0.0001, "loss": 1.3554, "step": 11846 }, { "epoch": 1.3763578274760384, "grad_norm": 0.6251681447029114, "learning_rate": 0.0001, "loss": 1.5639, "step": 11847 }, { "epoch": 1.376474005227999, "grad_norm": 0.5934876203536987, "learning_rate": 0.0001, "loss": 1.5827, "step": 11848 }, { "epoch": 1.3765901829799594, "grad_norm": 0.6062328219413757, "learning_rate": 0.0001, "loss": 1.5786, "step": 11849 }, { "epoch": 1.3767063607319199, "grad_norm": 0.6141518354415894, "learning_rate": 0.0001, "loss": 1.7519, "step": 11850 }, { "epoch": 1.3768225384838804, "grad_norm": 0.5661596059799194, "learning_rate": 0.0001, "loss": 1.4716, "step": 11851 }, { "epoch": 1.3769387162358409, "grad_norm": 0.5913867354393005, "learning_rate": 0.0001, "loss": 1.523, "step": 11852 }, { "epoch": 1.3770548939878013, "grad_norm": 0.6132528185844421, "learning_rate": 0.0001, "loss": 1.5667, "step": 11853 }, { "epoch": 1.3771710717397618, "grad_norm": 0.5519713163375854, "learning_rate": 0.0001, "loss": 1.2775, "step": 11854 }, { "epoch": 1.3772872494917223, "grad_norm": 0.586379885673523, "learning_rate": 0.0001, "loss": 1.3913, "step": 11855 }, { "epoch": 1.3774034272436828, "grad_norm": 0.6136939525604248, "learning_rate": 0.0001, "loss": 1.5782, "step": 11856 }, { "epoch": 1.3775196049956433, "grad_norm": 0.6094126105308533, "learning_rate": 0.0001, "loss": 1.4171, "step": 11857 }, { "epoch": 1.3776357827476038, "grad_norm": 0.6217323541641235, "learning_rate": 0.0001, "loss": 1.4317, "step": 11858 }, { "epoch": 1.3777519604995643, "grad_norm": 0.6349629163742065, "learning_rate": 0.0001, "loss": 1.4441, "step": 11859 }, { "epoch": 1.3778681382515248, "grad_norm": 0.565229058265686, "learning_rate": 0.0001, "loss": 1.5085, "step": 11860 }, { "epoch": 1.3779843160034853, "grad_norm": 0.6026855707168579, "learning_rate": 0.0001, "loss": 1.4853, "step": 11861 }, { "epoch": 1.3781004937554457, "grad_norm": 0.5912261009216309, "learning_rate": 0.0001, "loss": 1.5247, "step": 11862 }, { "epoch": 1.3782166715074062, "grad_norm": 0.5594475865364075, "learning_rate": 0.0001, "loss": 1.5227, "step": 11863 }, { "epoch": 1.3783328492593667, "grad_norm": 0.5987929701805115, "learning_rate": 0.0001, "loss": 1.4032, "step": 11864 }, { "epoch": 1.3784490270113272, "grad_norm": 0.5730629563331604, "learning_rate": 0.0001, "loss": 1.338, "step": 11865 }, { "epoch": 1.3785652047632877, "grad_norm": 0.5603897571563721, "learning_rate": 0.0001, "loss": 1.4159, "step": 11866 }, { "epoch": 1.3786813825152484, "grad_norm": 0.6142343878746033, "learning_rate": 0.0001, "loss": 1.5328, "step": 11867 }, { "epoch": 1.378797560267209, "grad_norm": 0.6077200174331665, "learning_rate": 0.0001, "loss": 1.516, "step": 11868 }, { "epoch": 1.3789137380191694, "grad_norm": 0.5924538373947144, "learning_rate": 0.0001, "loss": 1.5671, "step": 11869 }, { "epoch": 1.3790299157711299, "grad_norm": 0.572593092918396, "learning_rate": 0.0001, "loss": 1.3483, "step": 11870 }, { "epoch": 1.3791460935230904, "grad_norm": 0.5781865119934082, "learning_rate": 0.0001, "loss": 1.2929, "step": 11871 }, { "epoch": 1.3792622712750509, "grad_norm": 0.6432904005050659, "learning_rate": 0.0001, "loss": 1.5648, "step": 11872 }, { "epoch": 1.3793784490270113, "grad_norm": 0.5920875072479248, "learning_rate": 0.0001, "loss": 1.3776, "step": 11873 }, { "epoch": 1.3794946267789718, "grad_norm": 0.5507972836494446, "learning_rate": 0.0001, "loss": 1.3918, "step": 11874 }, { "epoch": 1.3796108045309323, "grad_norm": 0.5901596546173096, "learning_rate": 0.0001, "loss": 1.4458, "step": 11875 }, { "epoch": 1.3797269822828928, "grad_norm": 0.6390935182571411, "learning_rate": 0.0001, "loss": 1.6274, "step": 11876 }, { "epoch": 1.3798431600348533, "grad_norm": 0.6109213829040527, "learning_rate": 0.0001, "loss": 1.5234, "step": 11877 }, { "epoch": 1.3799593377868138, "grad_norm": 0.5569701790809631, "learning_rate": 0.0001, "loss": 1.3981, "step": 11878 }, { "epoch": 1.3800755155387743, "grad_norm": 0.5873339772224426, "learning_rate": 0.0001, "loss": 1.4033, "step": 11879 }, { "epoch": 1.3801916932907348, "grad_norm": 0.6170241236686707, "learning_rate": 0.0001, "loss": 1.4489, "step": 11880 }, { "epoch": 1.3803078710426953, "grad_norm": 0.5810169577598572, "learning_rate": 0.0001, "loss": 1.3362, "step": 11881 }, { "epoch": 1.3804240487946557, "grad_norm": 0.6244444251060486, "learning_rate": 0.0001, "loss": 1.5752, "step": 11882 }, { "epoch": 1.3805402265466165, "grad_norm": 0.5936790704727173, "learning_rate": 0.0001, "loss": 1.3589, "step": 11883 }, { "epoch": 1.380656404298577, "grad_norm": 0.5871090888977051, "learning_rate": 0.0001, "loss": 1.4629, "step": 11884 }, { "epoch": 1.3807725820505374, "grad_norm": 0.6491077542304993, "learning_rate": 0.0001, "loss": 1.3812, "step": 11885 }, { "epoch": 1.380888759802498, "grad_norm": 0.5600960850715637, "learning_rate": 0.0001, "loss": 1.2137, "step": 11886 }, { "epoch": 1.3810049375544584, "grad_norm": 0.6018544435501099, "learning_rate": 0.0001, "loss": 1.4018, "step": 11887 }, { "epoch": 1.381121115306419, "grad_norm": 0.58489990234375, "learning_rate": 0.0001, "loss": 1.3715, "step": 11888 }, { "epoch": 1.3812372930583794, "grad_norm": 0.5836353302001953, "learning_rate": 0.0001, "loss": 1.2618, "step": 11889 }, { "epoch": 1.3813534708103399, "grad_norm": 0.6115884184837341, "learning_rate": 0.0001, "loss": 1.58, "step": 11890 }, { "epoch": 1.3814696485623004, "grad_norm": 0.6390863060951233, "learning_rate": 0.0001, "loss": 1.4521, "step": 11891 }, { "epoch": 1.3815858263142609, "grad_norm": 0.5852669477462769, "learning_rate": 0.0001, "loss": 1.4158, "step": 11892 }, { "epoch": 1.3817020040662213, "grad_norm": 0.6284686923027039, "learning_rate": 0.0001, "loss": 1.4269, "step": 11893 }, { "epoch": 1.3818181818181818, "grad_norm": 0.6020316481590271, "learning_rate": 0.0001, "loss": 1.5378, "step": 11894 }, { "epoch": 1.3819343595701423, "grad_norm": 0.650677502155304, "learning_rate": 0.0001, "loss": 1.7152, "step": 11895 }, { "epoch": 1.3820505373221028, "grad_norm": 0.5915738940238953, "learning_rate": 0.0001, "loss": 1.4075, "step": 11896 }, { "epoch": 1.3821667150740633, "grad_norm": 0.6179349422454834, "learning_rate": 0.0001, "loss": 1.5468, "step": 11897 }, { "epoch": 1.3822828928260238, "grad_norm": 0.6398385167121887, "learning_rate": 0.0001, "loss": 1.2884, "step": 11898 }, { "epoch": 1.3823990705779843, "grad_norm": 0.6043710112571716, "learning_rate": 0.0001, "loss": 1.4017, "step": 11899 }, { "epoch": 1.3825152483299448, "grad_norm": 0.5860773921012878, "learning_rate": 0.0001, "loss": 1.4341, "step": 11900 }, { "epoch": 1.3826314260819053, "grad_norm": 0.6333348751068115, "learning_rate": 0.0001, "loss": 1.3162, "step": 11901 }, { "epoch": 1.3827476038338657, "grad_norm": 0.6222668290138245, "learning_rate": 0.0001, "loss": 1.4695, "step": 11902 }, { "epoch": 1.3828637815858262, "grad_norm": 0.5919185280799866, "learning_rate": 0.0001, "loss": 1.3576, "step": 11903 }, { "epoch": 1.3829799593377867, "grad_norm": 0.5540260076522827, "learning_rate": 0.0001, "loss": 1.2607, "step": 11904 }, { "epoch": 1.3830961370897472, "grad_norm": 0.6326545476913452, "learning_rate": 0.0001, "loss": 1.5169, "step": 11905 }, { "epoch": 1.3832123148417077, "grad_norm": 0.5651580691337585, "learning_rate": 0.0001, "loss": 1.3546, "step": 11906 }, { "epoch": 1.3833284925936682, "grad_norm": 0.5845093131065369, "learning_rate": 0.0001, "loss": 1.7022, "step": 11907 }, { "epoch": 1.3834446703456287, "grad_norm": 0.6123130321502686, "learning_rate": 0.0001, "loss": 1.672, "step": 11908 }, { "epoch": 1.3835608480975894, "grad_norm": 0.5754287242889404, "learning_rate": 0.0001, "loss": 1.4662, "step": 11909 }, { "epoch": 1.3836770258495499, "grad_norm": 0.6103564500808716, "learning_rate": 0.0001, "loss": 1.4633, "step": 11910 }, { "epoch": 1.3837932036015104, "grad_norm": 0.6126376390457153, "learning_rate": 0.0001, "loss": 1.4635, "step": 11911 }, { "epoch": 1.3839093813534709, "grad_norm": 0.5791416168212891, "learning_rate": 0.0001, "loss": 1.4217, "step": 11912 }, { "epoch": 1.3840255591054313, "grad_norm": 0.5979540944099426, "learning_rate": 0.0001, "loss": 1.3736, "step": 11913 }, { "epoch": 1.3841417368573918, "grad_norm": 0.5859628319740295, "learning_rate": 0.0001, "loss": 1.2924, "step": 11914 }, { "epoch": 1.3842579146093523, "grad_norm": 0.6160323619842529, "learning_rate": 0.0001, "loss": 1.4061, "step": 11915 }, { "epoch": 1.3843740923613128, "grad_norm": 0.6810671091079712, "learning_rate": 0.0001, "loss": 1.3882, "step": 11916 }, { "epoch": 1.3844902701132733, "grad_norm": 0.6706360578536987, "learning_rate": 0.0001, "loss": 1.5949, "step": 11917 }, { "epoch": 1.3846064478652338, "grad_norm": 0.5953517556190491, "learning_rate": 0.0001, "loss": 1.3044, "step": 11918 }, { "epoch": 1.3847226256171943, "grad_norm": 0.5882486701011658, "learning_rate": 0.0001, "loss": 1.4569, "step": 11919 }, { "epoch": 1.3848388033691548, "grad_norm": 0.6317164301872253, "learning_rate": 0.0001, "loss": 1.5054, "step": 11920 }, { "epoch": 1.3849549811211153, "grad_norm": 0.6241341233253479, "learning_rate": 0.0001, "loss": 1.5809, "step": 11921 }, { "epoch": 1.3850711588730757, "grad_norm": 0.6015408635139465, "learning_rate": 0.0001, "loss": 1.4114, "step": 11922 }, { "epoch": 1.3851873366250362, "grad_norm": 0.5517604351043701, "learning_rate": 0.0001, "loss": 1.4652, "step": 11923 }, { "epoch": 1.3853035143769967, "grad_norm": 0.6593002080917358, "learning_rate": 0.0001, "loss": 1.6226, "step": 11924 }, { "epoch": 1.3854196921289574, "grad_norm": 0.5856340527534485, "learning_rate": 0.0001, "loss": 1.5272, "step": 11925 }, { "epoch": 1.385535869880918, "grad_norm": 0.6336721181869507, "learning_rate": 0.0001, "loss": 1.5932, "step": 11926 }, { "epoch": 1.3856520476328784, "grad_norm": 0.5610465407371521, "learning_rate": 0.0001, "loss": 1.6593, "step": 11927 }, { "epoch": 1.385768225384839, "grad_norm": 0.6065878868103027, "learning_rate": 0.0001, "loss": 1.4454, "step": 11928 }, { "epoch": 1.3858844031367994, "grad_norm": 0.5784077048301697, "learning_rate": 0.0001, "loss": 1.4109, "step": 11929 }, { "epoch": 1.3860005808887599, "grad_norm": 0.5930120348930359, "learning_rate": 0.0001, "loss": 1.2691, "step": 11930 }, { "epoch": 1.3861167586407204, "grad_norm": 0.599595308303833, "learning_rate": 0.0001, "loss": 1.3383, "step": 11931 }, { "epoch": 1.3862329363926809, "grad_norm": 0.6132897734642029, "learning_rate": 0.0001, "loss": 1.5586, "step": 11932 }, { "epoch": 1.3863491141446413, "grad_norm": 0.5871015191078186, "learning_rate": 0.0001, "loss": 1.3863, "step": 11933 }, { "epoch": 1.3864652918966018, "grad_norm": 0.588950514793396, "learning_rate": 0.0001, "loss": 1.3364, "step": 11934 }, { "epoch": 1.3865814696485623, "grad_norm": 0.5841085314750671, "learning_rate": 0.0001, "loss": 1.4215, "step": 11935 }, { "epoch": 1.3866976474005228, "grad_norm": 0.5449914932250977, "learning_rate": 0.0001, "loss": 1.3709, "step": 11936 }, { "epoch": 1.3868138251524833, "grad_norm": 0.6109606623649597, "learning_rate": 0.0001, "loss": 1.3732, "step": 11937 }, { "epoch": 1.3869300029044438, "grad_norm": 0.6314137578010559, "learning_rate": 0.0001, "loss": 1.3493, "step": 11938 }, { "epoch": 1.3870461806564043, "grad_norm": 0.6377010345458984, "learning_rate": 0.0001, "loss": 1.3789, "step": 11939 }, { "epoch": 1.3871623584083648, "grad_norm": 0.6468605399131775, "learning_rate": 0.0001, "loss": 1.5538, "step": 11940 }, { "epoch": 1.3872785361603253, "grad_norm": 0.6271966099739075, "learning_rate": 0.0001, "loss": 1.5676, "step": 11941 }, { "epoch": 1.3873947139122857, "grad_norm": 0.5833338499069214, "learning_rate": 0.0001, "loss": 1.5451, "step": 11942 }, { "epoch": 1.3875108916642462, "grad_norm": 0.5465646386146545, "learning_rate": 0.0001, "loss": 1.2402, "step": 11943 }, { "epoch": 1.3876270694162067, "grad_norm": 0.5553290247917175, "learning_rate": 0.0001, "loss": 1.3301, "step": 11944 }, { "epoch": 1.3877432471681672, "grad_norm": 0.6450720429420471, "learning_rate": 0.0001, "loss": 1.5107, "step": 11945 }, { "epoch": 1.3878594249201277, "grad_norm": 0.6172477602958679, "learning_rate": 0.0001, "loss": 1.5336, "step": 11946 }, { "epoch": 1.3879756026720882, "grad_norm": 0.6228959560394287, "learning_rate": 0.0001, "loss": 1.5618, "step": 11947 }, { "epoch": 1.3880917804240487, "grad_norm": 0.6162193417549133, "learning_rate": 0.0001, "loss": 1.4889, "step": 11948 }, { "epoch": 1.3882079581760092, "grad_norm": 0.6280121803283691, "learning_rate": 0.0001, "loss": 1.5292, "step": 11949 }, { "epoch": 1.3883241359279697, "grad_norm": 0.5984324812889099, "learning_rate": 0.0001, "loss": 1.6882, "step": 11950 }, { "epoch": 1.3884403136799304, "grad_norm": 0.5944661498069763, "learning_rate": 0.0001, "loss": 1.4478, "step": 11951 }, { "epoch": 1.3885564914318909, "grad_norm": 0.6321014165878296, "learning_rate": 0.0001, "loss": 1.4836, "step": 11952 }, { "epoch": 1.3886726691838513, "grad_norm": 0.6064655780792236, "learning_rate": 0.0001, "loss": 1.5876, "step": 11953 }, { "epoch": 1.3887888469358118, "grad_norm": 0.5939200520515442, "learning_rate": 0.0001, "loss": 1.443, "step": 11954 }, { "epoch": 1.3889050246877723, "grad_norm": 0.5834383368492126, "learning_rate": 0.0001, "loss": 1.3573, "step": 11955 }, { "epoch": 1.3890212024397328, "grad_norm": 0.5999382138252258, "learning_rate": 0.0001, "loss": 1.3501, "step": 11956 }, { "epoch": 1.3891373801916933, "grad_norm": 0.652275562286377, "learning_rate": 0.0001, "loss": 1.4214, "step": 11957 }, { "epoch": 1.3892535579436538, "grad_norm": 0.6107578277587891, "learning_rate": 0.0001, "loss": 1.472, "step": 11958 }, { "epoch": 1.3893697356956143, "grad_norm": 0.6266455054283142, "learning_rate": 0.0001, "loss": 1.6335, "step": 11959 }, { "epoch": 1.3894859134475748, "grad_norm": 0.5955777168273926, "learning_rate": 0.0001, "loss": 1.4787, "step": 11960 }, { "epoch": 1.3896020911995353, "grad_norm": 0.6200416684150696, "learning_rate": 0.0001, "loss": 1.4147, "step": 11961 }, { "epoch": 1.3897182689514957, "grad_norm": 0.6102170348167419, "learning_rate": 0.0001, "loss": 1.5664, "step": 11962 }, { "epoch": 1.3898344467034562, "grad_norm": 0.5599600076675415, "learning_rate": 0.0001, "loss": 1.432, "step": 11963 }, { "epoch": 1.3899506244554167, "grad_norm": 0.6110653281211853, "learning_rate": 0.0001, "loss": 1.5714, "step": 11964 }, { "epoch": 1.3900668022073772, "grad_norm": 0.5802760720252991, "learning_rate": 0.0001, "loss": 1.488, "step": 11965 }, { "epoch": 1.3901829799593377, "grad_norm": 0.5832214951515198, "learning_rate": 0.0001, "loss": 1.6663, "step": 11966 }, { "epoch": 1.3902991577112984, "grad_norm": 0.5920937657356262, "learning_rate": 0.0001, "loss": 1.3629, "step": 11967 }, { "epoch": 1.390415335463259, "grad_norm": 0.6126205921173096, "learning_rate": 0.0001, "loss": 1.4929, "step": 11968 }, { "epoch": 1.3905315132152194, "grad_norm": 0.6226344108581543, "learning_rate": 0.0001, "loss": 1.6481, "step": 11969 }, { "epoch": 1.3906476909671799, "grad_norm": 0.5618362426757812, "learning_rate": 0.0001, "loss": 1.3998, "step": 11970 }, { "epoch": 1.3907638687191404, "grad_norm": 0.5985326766967773, "learning_rate": 0.0001, "loss": 1.5428, "step": 11971 }, { "epoch": 1.3908800464711009, "grad_norm": 0.611813485622406, "learning_rate": 0.0001, "loss": 1.5763, "step": 11972 }, { "epoch": 1.3909962242230614, "grad_norm": 0.5800209641456604, "learning_rate": 0.0001, "loss": 1.4741, "step": 11973 }, { "epoch": 1.3911124019750218, "grad_norm": 0.6421018838882446, "learning_rate": 0.0001, "loss": 1.5652, "step": 11974 }, { "epoch": 1.3912285797269823, "grad_norm": 0.5720347166061401, "learning_rate": 0.0001, "loss": 1.4541, "step": 11975 }, { "epoch": 1.3913447574789428, "grad_norm": 0.5714155435562134, "learning_rate": 0.0001, "loss": 1.3498, "step": 11976 }, { "epoch": 1.3914609352309033, "grad_norm": 0.5725839734077454, "learning_rate": 0.0001, "loss": 1.479, "step": 11977 }, { "epoch": 1.3915771129828638, "grad_norm": 0.5719639658927917, "learning_rate": 0.0001, "loss": 1.5502, "step": 11978 }, { "epoch": 1.3916932907348243, "grad_norm": 0.5681836605072021, "learning_rate": 0.0001, "loss": 1.5435, "step": 11979 }, { "epoch": 1.3918094684867848, "grad_norm": 0.5832224488258362, "learning_rate": 0.0001, "loss": 1.5036, "step": 11980 }, { "epoch": 1.3919256462387453, "grad_norm": 0.6468372941017151, "learning_rate": 0.0001, "loss": 1.4172, "step": 11981 }, { "epoch": 1.3920418239907058, "grad_norm": 0.5739614367485046, "learning_rate": 0.0001, "loss": 1.5003, "step": 11982 }, { "epoch": 1.3921580017426662, "grad_norm": 0.6735573410987854, "learning_rate": 0.0001, "loss": 1.6389, "step": 11983 }, { "epoch": 1.3922741794946267, "grad_norm": 0.6430903077125549, "learning_rate": 0.0001, "loss": 1.5639, "step": 11984 }, { "epoch": 1.3923903572465872, "grad_norm": 0.5974782705307007, "learning_rate": 0.0001, "loss": 1.442, "step": 11985 }, { "epoch": 1.3925065349985477, "grad_norm": 0.6093497276306152, "learning_rate": 0.0001, "loss": 1.4616, "step": 11986 }, { "epoch": 1.3926227127505082, "grad_norm": 0.5952013731002808, "learning_rate": 0.0001, "loss": 1.481, "step": 11987 }, { "epoch": 1.3927388905024687, "grad_norm": 0.577219545841217, "learning_rate": 0.0001, "loss": 1.5508, "step": 11988 }, { "epoch": 1.3928550682544292, "grad_norm": 0.5776684284210205, "learning_rate": 0.0001, "loss": 1.4687, "step": 11989 }, { "epoch": 1.3929712460063897, "grad_norm": 0.580201268196106, "learning_rate": 0.0001, "loss": 1.5702, "step": 11990 }, { "epoch": 1.3930874237583502, "grad_norm": 0.5810071229934692, "learning_rate": 0.0001, "loss": 1.4639, "step": 11991 }, { "epoch": 1.3932036015103106, "grad_norm": 0.5787037014961243, "learning_rate": 0.0001, "loss": 1.3579, "step": 11992 }, { "epoch": 1.3933197792622714, "grad_norm": 0.5724977850914001, "learning_rate": 0.0001, "loss": 1.4215, "step": 11993 }, { "epoch": 1.3934359570142318, "grad_norm": 0.5736649632453918, "learning_rate": 0.0001, "loss": 1.454, "step": 11994 }, { "epoch": 1.3935521347661923, "grad_norm": 0.6170047521591187, "learning_rate": 0.0001, "loss": 1.3581, "step": 11995 }, { "epoch": 1.3936683125181528, "grad_norm": 0.5980851650238037, "learning_rate": 0.0001, "loss": 1.5077, "step": 11996 }, { "epoch": 1.3937844902701133, "grad_norm": 0.5532777309417725, "learning_rate": 0.0001, "loss": 1.3881, "step": 11997 }, { "epoch": 1.3939006680220738, "grad_norm": 0.6409569382667542, "learning_rate": 0.0001, "loss": 1.5328, "step": 11998 }, { "epoch": 1.3940168457740343, "grad_norm": 0.564768373966217, "learning_rate": 0.0001, "loss": 1.3573, "step": 11999 }, { "epoch": 1.3941330235259948, "grad_norm": 0.6319162845611572, "learning_rate": 0.0001, "loss": 1.5109, "step": 12000 }, { "epoch": 1.3942492012779553, "grad_norm": 0.537635862827301, "learning_rate": 0.0001, "loss": 1.2859, "step": 12001 }, { "epoch": 1.3943653790299158, "grad_norm": 0.6129380464553833, "learning_rate": 0.0001, "loss": 1.4286, "step": 12002 }, { "epoch": 1.3944815567818762, "grad_norm": 0.5544732809066772, "learning_rate": 0.0001, "loss": 1.328, "step": 12003 }, { "epoch": 1.3945977345338367, "grad_norm": 0.5666175484657288, "learning_rate": 0.0001, "loss": 1.448, "step": 12004 }, { "epoch": 1.3947139122857972, "grad_norm": 0.5639254450798035, "learning_rate": 0.0001, "loss": 1.2761, "step": 12005 }, { "epoch": 1.3948300900377577, "grad_norm": 0.6033602952957153, "learning_rate": 0.0001, "loss": 1.6798, "step": 12006 }, { "epoch": 1.3949462677897182, "grad_norm": 0.6276678442955017, "learning_rate": 0.0001, "loss": 1.4993, "step": 12007 }, { "epoch": 1.3950624455416787, "grad_norm": 0.5830793976783752, "learning_rate": 0.0001, "loss": 1.517, "step": 12008 }, { "epoch": 1.3951786232936394, "grad_norm": 0.5950773358345032, "learning_rate": 0.0001, "loss": 1.4298, "step": 12009 }, { "epoch": 1.3952948010455999, "grad_norm": 0.6116399168968201, "learning_rate": 0.0001, "loss": 1.4268, "step": 12010 }, { "epoch": 1.3954109787975604, "grad_norm": 0.5737411975860596, "learning_rate": 0.0001, "loss": 1.4567, "step": 12011 }, { "epoch": 1.3955271565495209, "grad_norm": 0.5670390129089355, "learning_rate": 0.0001, "loss": 1.3832, "step": 12012 }, { "epoch": 1.3956433343014814, "grad_norm": 0.5643200278282166, "learning_rate": 0.0001, "loss": 1.5224, "step": 12013 }, { "epoch": 1.3957595120534418, "grad_norm": 0.588569700717926, "learning_rate": 0.0001, "loss": 1.4724, "step": 12014 }, { "epoch": 1.3958756898054023, "grad_norm": 0.5916202664375305, "learning_rate": 0.0001, "loss": 1.3856, "step": 12015 }, { "epoch": 1.3959918675573628, "grad_norm": 0.57044517993927, "learning_rate": 0.0001, "loss": 1.4782, "step": 12016 }, { "epoch": 1.3961080453093233, "grad_norm": 0.5924696326255798, "learning_rate": 0.0001, "loss": 1.4195, "step": 12017 }, { "epoch": 1.3962242230612838, "grad_norm": 0.5659184455871582, "learning_rate": 0.0001, "loss": 1.3907, "step": 12018 }, { "epoch": 1.3963404008132443, "grad_norm": 0.6118780970573425, "learning_rate": 0.0001, "loss": 1.4633, "step": 12019 }, { "epoch": 1.3964565785652048, "grad_norm": 0.569577693939209, "learning_rate": 0.0001, "loss": 1.3672, "step": 12020 }, { "epoch": 1.3965727563171653, "grad_norm": 0.5691976547241211, "learning_rate": 0.0001, "loss": 1.3072, "step": 12021 }, { "epoch": 1.3966889340691258, "grad_norm": 0.6030403971672058, "learning_rate": 0.0001, "loss": 1.4615, "step": 12022 }, { "epoch": 1.3968051118210862, "grad_norm": 0.560248851776123, "learning_rate": 0.0001, "loss": 1.2186, "step": 12023 }, { "epoch": 1.3969212895730467, "grad_norm": 0.5866391658782959, "learning_rate": 0.0001, "loss": 1.4396, "step": 12024 }, { "epoch": 1.3970374673250072, "grad_norm": 0.624195396900177, "learning_rate": 0.0001, "loss": 1.6459, "step": 12025 }, { "epoch": 1.3971536450769677, "grad_norm": 0.6529926657676697, "learning_rate": 0.0001, "loss": 1.6331, "step": 12026 }, { "epoch": 1.3972698228289282, "grad_norm": 0.6984851360321045, "learning_rate": 0.0001, "loss": 1.6492, "step": 12027 }, { "epoch": 1.3973860005808887, "grad_norm": 0.6242703795433044, "learning_rate": 0.0001, "loss": 1.4967, "step": 12028 }, { "epoch": 1.3975021783328492, "grad_norm": 0.5590314865112305, "learning_rate": 0.0001, "loss": 1.3682, "step": 12029 }, { "epoch": 1.3976183560848097, "grad_norm": 0.6193909049034119, "learning_rate": 0.0001, "loss": 1.5752, "step": 12030 }, { "epoch": 1.3977345338367702, "grad_norm": 0.5770513415336609, "learning_rate": 0.0001, "loss": 1.563, "step": 12031 }, { "epoch": 1.3978507115887306, "grad_norm": 0.5817797780036926, "learning_rate": 0.0001, "loss": 1.4469, "step": 12032 }, { "epoch": 1.3979668893406911, "grad_norm": 0.5368718504905701, "learning_rate": 0.0001, "loss": 1.5278, "step": 12033 }, { "epoch": 1.3980830670926516, "grad_norm": 0.6158215403556824, "learning_rate": 0.0001, "loss": 1.5191, "step": 12034 }, { "epoch": 1.3981992448446123, "grad_norm": 0.5757713317871094, "learning_rate": 0.0001, "loss": 1.3353, "step": 12035 }, { "epoch": 1.3983154225965728, "grad_norm": 0.5898905992507935, "learning_rate": 0.0001, "loss": 1.4962, "step": 12036 }, { "epoch": 1.3984316003485333, "grad_norm": 0.5727129578590393, "learning_rate": 0.0001, "loss": 1.5522, "step": 12037 }, { "epoch": 1.3985477781004938, "grad_norm": 0.573549747467041, "learning_rate": 0.0001, "loss": 1.4548, "step": 12038 }, { "epoch": 1.3986639558524543, "grad_norm": 0.5966438055038452, "learning_rate": 0.0001, "loss": 1.4093, "step": 12039 }, { "epoch": 1.3987801336044148, "grad_norm": 0.5646287202835083, "learning_rate": 0.0001, "loss": 1.4349, "step": 12040 }, { "epoch": 1.3988963113563753, "grad_norm": 0.6419237852096558, "learning_rate": 0.0001, "loss": 1.6598, "step": 12041 }, { "epoch": 1.3990124891083358, "grad_norm": 0.62077397108078, "learning_rate": 0.0001, "loss": 1.5193, "step": 12042 }, { "epoch": 1.3991286668602962, "grad_norm": 0.5966094136238098, "learning_rate": 0.0001, "loss": 1.6089, "step": 12043 }, { "epoch": 1.3992448446122567, "grad_norm": 0.5946581363677979, "learning_rate": 0.0001, "loss": 1.4991, "step": 12044 }, { "epoch": 1.3993610223642172, "grad_norm": 0.5981970429420471, "learning_rate": 0.0001, "loss": 1.5537, "step": 12045 }, { "epoch": 1.3994772001161777, "grad_norm": 0.5579191446304321, "learning_rate": 0.0001, "loss": 1.3866, "step": 12046 }, { "epoch": 1.3995933778681382, "grad_norm": 0.5763770937919617, "learning_rate": 0.0001, "loss": 1.518, "step": 12047 }, { "epoch": 1.3997095556200987, "grad_norm": 0.5983228087425232, "learning_rate": 0.0001, "loss": 1.3796, "step": 12048 }, { "epoch": 1.3998257333720592, "grad_norm": 0.5548502206802368, "learning_rate": 0.0001, "loss": 1.2765, "step": 12049 }, { "epoch": 1.3999419111240199, "grad_norm": 0.598829984664917, "learning_rate": 0.0001, "loss": 1.4531, "step": 12050 }, { "epoch": 1.4000580888759804, "grad_norm": 0.5747253894805908, "learning_rate": 0.0001, "loss": 1.2931, "step": 12051 }, { "epoch": 1.4001742666279409, "grad_norm": 0.5953359007835388, "learning_rate": 0.0001, "loss": 1.3444, "step": 12052 }, { "epoch": 1.4002904443799014, "grad_norm": 0.5862138271331787, "learning_rate": 0.0001, "loss": 1.6761, "step": 12053 }, { "epoch": 1.4004066221318618, "grad_norm": 0.5845853090286255, "learning_rate": 0.0001, "loss": 1.5413, "step": 12054 }, { "epoch": 1.4005227998838223, "grad_norm": 0.5902648568153381, "learning_rate": 0.0001, "loss": 1.372, "step": 12055 }, { "epoch": 1.4006389776357828, "grad_norm": 0.6484795212745667, "learning_rate": 0.0001, "loss": 1.6057, "step": 12056 }, { "epoch": 1.4007551553877433, "grad_norm": 0.6351671814918518, "learning_rate": 0.0001, "loss": 1.6507, "step": 12057 }, { "epoch": 1.4008713331397038, "grad_norm": 0.5990087389945984, "learning_rate": 0.0001, "loss": 1.5616, "step": 12058 }, { "epoch": 1.4009875108916643, "grad_norm": 0.5999724864959717, "learning_rate": 0.0001, "loss": 1.5903, "step": 12059 }, { "epoch": 1.4011036886436248, "grad_norm": 0.6274200081825256, "learning_rate": 0.0001, "loss": 1.5174, "step": 12060 }, { "epoch": 1.4012198663955853, "grad_norm": 0.5672786831855774, "learning_rate": 0.0001, "loss": 1.4998, "step": 12061 }, { "epoch": 1.4013360441475458, "grad_norm": 0.6059845089912415, "learning_rate": 0.0001, "loss": 1.6055, "step": 12062 }, { "epoch": 1.4014522218995062, "grad_norm": 0.583419144153595, "learning_rate": 0.0001, "loss": 1.3577, "step": 12063 }, { "epoch": 1.4015683996514667, "grad_norm": 0.5932354927062988, "learning_rate": 0.0001, "loss": 1.5363, "step": 12064 }, { "epoch": 1.4016845774034272, "grad_norm": 0.6560299396514893, "learning_rate": 0.0001, "loss": 1.4958, "step": 12065 }, { "epoch": 1.4018007551553877, "grad_norm": 0.5621389150619507, "learning_rate": 0.0001, "loss": 1.5461, "step": 12066 }, { "epoch": 1.4019169329073482, "grad_norm": 0.5786882638931274, "learning_rate": 0.0001, "loss": 1.4813, "step": 12067 }, { "epoch": 1.4020331106593087, "grad_norm": 0.5459783673286438, "learning_rate": 0.0001, "loss": 1.5, "step": 12068 }, { "epoch": 1.4021492884112692, "grad_norm": 0.5643264651298523, "learning_rate": 0.0001, "loss": 1.5432, "step": 12069 }, { "epoch": 1.4022654661632297, "grad_norm": 0.5764142870903015, "learning_rate": 0.0001, "loss": 1.3799, "step": 12070 }, { "epoch": 1.4023816439151902, "grad_norm": 0.5647251009941101, "learning_rate": 0.0001, "loss": 1.4376, "step": 12071 }, { "epoch": 1.4024978216671506, "grad_norm": 0.5865373611450195, "learning_rate": 0.0001, "loss": 1.513, "step": 12072 }, { "epoch": 1.4026139994191111, "grad_norm": 0.5826906561851501, "learning_rate": 0.0001, "loss": 1.4781, "step": 12073 }, { "epoch": 1.4027301771710716, "grad_norm": 0.5692470669746399, "learning_rate": 0.0001, "loss": 1.3101, "step": 12074 }, { "epoch": 1.4028463549230321, "grad_norm": 0.5442948937416077, "learning_rate": 0.0001, "loss": 1.5004, "step": 12075 }, { "epoch": 1.4029625326749926, "grad_norm": 0.5658257007598877, "learning_rate": 0.0001, "loss": 1.3957, "step": 12076 }, { "epoch": 1.4030787104269533, "grad_norm": 0.595670759677887, "learning_rate": 0.0001, "loss": 1.5329, "step": 12077 }, { "epoch": 1.4031948881789138, "grad_norm": 0.6052472591400146, "learning_rate": 0.0001, "loss": 1.6607, "step": 12078 }, { "epoch": 1.4033110659308743, "grad_norm": 0.571075975894928, "learning_rate": 0.0001, "loss": 1.3262, "step": 12079 }, { "epoch": 1.4034272436828348, "grad_norm": 0.5579153299331665, "learning_rate": 0.0001, "loss": 1.4938, "step": 12080 }, { "epoch": 1.4035434214347953, "grad_norm": 0.5987997055053711, "learning_rate": 0.0001, "loss": 1.442, "step": 12081 }, { "epoch": 1.4036595991867558, "grad_norm": 0.5797942280769348, "learning_rate": 0.0001, "loss": 1.5384, "step": 12082 }, { "epoch": 1.4037757769387162, "grad_norm": 0.5952906012535095, "learning_rate": 0.0001, "loss": 1.3313, "step": 12083 }, { "epoch": 1.4038919546906767, "grad_norm": 0.6068854331970215, "learning_rate": 0.0001, "loss": 1.427, "step": 12084 }, { "epoch": 1.4040081324426372, "grad_norm": 0.5405552983283997, "learning_rate": 0.0001, "loss": 1.3378, "step": 12085 }, { "epoch": 1.4041243101945977, "grad_norm": 0.572024405002594, "learning_rate": 0.0001, "loss": 1.425, "step": 12086 }, { "epoch": 1.4042404879465582, "grad_norm": 0.5662723183631897, "learning_rate": 0.0001, "loss": 1.3541, "step": 12087 }, { "epoch": 1.4043566656985187, "grad_norm": 0.6154362559318542, "learning_rate": 0.0001, "loss": 1.5061, "step": 12088 }, { "epoch": 1.4044728434504792, "grad_norm": 0.6339954733848572, "learning_rate": 0.0001, "loss": 1.4753, "step": 12089 }, { "epoch": 1.4045890212024397, "grad_norm": 0.6176583170890808, "learning_rate": 0.0001, "loss": 1.4823, "step": 12090 }, { "epoch": 1.4047051989544002, "grad_norm": 0.6026437878608704, "learning_rate": 0.0001, "loss": 1.5651, "step": 12091 }, { "epoch": 1.4048213767063609, "grad_norm": 0.6249407529830933, "learning_rate": 0.0001, "loss": 1.4091, "step": 12092 }, { "epoch": 1.4049375544583214, "grad_norm": 0.583416223526001, "learning_rate": 0.0001, "loss": 1.4451, "step": 12093 }, { "epoch": 1.4050537322102818, "grad_norm": 0.6077203750610352, "learning_rate": 0.0001, "loss": 1.4855, "step": 12094 }, { "epoch": 1.4051699099622423, "grad_norm": 0.6278978586196899, "learning_rate": 0.0001, "loss": 1.6374, "step": 12095 }, { "epoch": 1.4052860877142028, "grad_norm": 0.5796852707862854, "learning_rate": 0.0001, "loss": 1.4089, "step": 12096 }, { "epoch": 1.4054022654661633, "grad_norm": 0.6815804839134216, "learning_rate": 0.0001, "loss": 1.596, "step": 12097 }, { "epoch": 1.4055184432181238, "grad_norm": 0.5999861359596252, "learning_rate": 0.0001, "loss": 1.4805, "step": 12098 }, { "epoch": 1.4056346209700843, "grad_norm": 0.5877710580825806, "learning_rate": 0.0001, "loss": 1.4686, "step": 12099 }, { "epoch": 1.4057507987220448, "grad_norm": 0.5974360108375549, "learning_rate": 0.0001, "loss": 1.4517, "step": 12100 }, { "epoch": 1.4058669764740053, "grad_norm": 0.6353722810745239, "learning_rate": 0.0001, "loss": 1.5599, "step": 12101 }, { "epoch": 1.4059831542259658, "grad_norm": 0.5967050194740295, "learning_rate": 0.0001, "loss": 1.5928, "step": 12102 }, { "epoch": 1.4060993319779262, "grad_norm": 0.5885502099990845, "learning_rate": 0.0001, "loss": 1.4882, "step": 12103 }, { "epoch": 1.4062155097298867, "grad_norm": 0.5896927118301392, "learning_rate": 0.0001, "loss": 1.5195, "step": 12104 }, { "epoch": 1.4063316874818472, "grad_norm": 0.536248505115509, "learning_rate": 0.0001, "loss": 1.3211, "step": 12105 }, { "epoch": 1.4064478652338077, "grad_norm": 0.5472630858421326, "learning_rate": 0.0001, "loss": 1.43, "step": 12106 }, { "epoch": 1.4065640429857682, "grad_norm": 0.5885692238807678, "learning_rate": 0.0001, "loss": 1.5514, "step": 12107 }, { "epoch": 1.4066802207377287, "grad_norm": 0.5371417999267578, "learning_rate": 0.0001, "loss": 1.187, "step": 12108 }, { "epoch": 1.4067963984896892, "grad_norm": 0.5889664888381958, "learning_rate": 0.0001, "loss": 1.3359, "step": 12109 }, { "epoch": 1.4069125762416497, "grad_norm": 0.6135367751121521, "learning_rate": 0.0001, "loss": 1.4449, "step": 12110 }, { "epoch": 1.4070287539936102, "grad_norm": 0.5663803815841675, "learning_rate": 0.0001, "loss": 1.2486, "step": 12111 }, { "epoch": 1.4071449317455706, "grad_norm": 0.6089535355567932, "learning_rate": 0.0001, "loss": 1.6265, "step": 12112 }, { "epoch": 1.4072611094975311, "grad_norm": 0.6175603270530701, "learning_rate": 0.0001, "loss": 1.6734, "step": 12113 }, { "epoch": 1.4073772872494916, "grad_norm": 0.5911760330200195, "learning_rate": 0.0001, "loss": 1.3986, "step": 12114 }, { "epoch": 1.4074934650014521, "grad_norm": 0.6093399524688721, "learning_rate": 0.0001, "loss": 1.5308, "step": 12115 }, { "epoch": 1.4076096427534126, "grad_norm": 0.5803685188293457, "learning_rate": 0.0001, "loss": 1.3183, "step": 12116 }, { "epoch": 1.407725820505373, "grad_norm": 0.5896522998809814, "learning_rate": 0.0001, "loss": 1.6055, "step": 12117 }, { "epoch": 1.4078419982573336, "grad_norm": 0.573806881904602, "learning_rate": 0.0001, "loss": 1.3718, "step": 12118 }, { "epoch": 1.4079581760092943, "grad_norm": 0.5594179034233093, "learning_rate": 0.0001, "loss": 1.3664, "step": 12119 }, { "epoch": 1.4080743537612548, "grad_norm": 0.6025107502937317, "learning_rate": 0.0001, "loss": 1.3603, "step": 12120 }, { "epoch": 1.4081905315132153, "grad_norm": 0.5635510683059692, "learning_rate": 0.0001, "loss": 1.5932, "step": 12121 }, { "epoch": 1.4083067092651758, "grad_norm": 0.5392552614212036, "learning_rate": 0.0001, "loss": 1.3761, "step": 12122 }, { "epoch": 1.4084228870171362, "grad_norm": 0.5588886141777039, "learning_rate": 0.0001, "loss": 1.4119, "step": 12123 }, { "epoch": 1.4085390647690967, "grad_norm": 0.5739279389381409, "learning_rate": 0.0001, "loss": 1.4097, "step": 12124 }, { "epoch": 1.4086552425210572, "grad_norm": 0.5879178047180176, "learning_rate": 0.0001, "loss": 1.6335, "step": 12125 }, { "epoch": 1.4087714202730177, "grad_norm": 0.5897621512413025, "learning_rate": 0.0001, "loss": 1.5896, "step": 12126 }, { "epoch": 1.4088875980249782, "grad_norm": 0.6275389194488525, "learning_rate": 0.0001, "loss": 1.5319, "step": 12127 }, { "epoch": 1.4090037757769387, "grad_norm": 0.6143684983253479, "learning_rate": 0.0001, "loss": 1.4294, "step": 12128 }, { "epoch": 1.4091199535288992, "grad_norm": 0.5457308292388916, "learning_rate": 0.0001, "loss": 1.305, "step": 12129 }, { "epoch": 1.4092361312808597, "grad_norm": 0.6137149930000305, "learning_rate": 0.0001, "loss": 1.5955, "step": 12130 }, { "epoch": 1.4093523090328202, "grad_norm": 0.5918616056442261, "learning_rate": 0.0001, "loss": 1.3573, "step": 12131 }, { "epoch": 1.4094684867847806, "grad_norm": 0.5807372331619263, "learning_rate": 0.0001, "loss": 1.4503, "step": 12132 }, { "epoch": 1.4095846645367411, "grad_norm": 0.6110495328903198, "learning_rate": 0.0001, "loss": 1.4377, "step": 12133 }, { "epoch": 1.4097008422887018, "grad_norm": 0.5787477493286133, "learning_rate": 0.0001, "loss": 1.3938, "step": 12134 }, { "epoch": 1.4098170200406623, "grad_norm": 0.5760836005210876, "learning_rate": 0.0001, "loss": 1.429, "step": 12135 }, { "epoch": 1.4099331977926228, "grad_norm": 0.6453091502189636, "learning_rate": 0.0001, "loss": 1.6428, "step": 12136 }, { "epoch": 1.4100493755445833, "grad_norm": 0.6404998898506165, "learning_rate": 0.0001, "loss": 1.5553, "step": 12137 }, { "epoch": 1.4101655532965438, "grad_norm": 0.6601923108100891, "learning_rate": 0.0001, "loss": 1.5512, "step": 12138 }, { "epoch": 1.4102817310485043, "grad_norm": 0.6087546944618225, "learning_rate": 0.0001, "loss": 1.4711, "step": 12139 }, { "epoch": 1.4103979088004648, "grad_norm": 0.5570737719535828, "learning_rate": 0.0001, "loss": 1.2991, "step": 12140 }, { "epoch": 1.4105140865524253, "grad_norm": 0.5413463115692139, "learning_rate": 0.0001, "loss": 1.185, "step": 12141 }, { "epoch": 1.4106302643043858, "grad_norm": 0.5919083952903748, "learning_rate": 0.0001, "loss": 1.4681, "step": 12142 }, { "epoch": 1.4107464420563463, "grad_norm": 0.593340277671814, "learning_rate": 0.0001, "loss": 1.7068, "step": 12143 }, { "epoch": 1.4108626198083067, "grad_norm": 0.6711177229881287, "learning_rate": 0.0001, "loss": 1.4589, "step": 12144 }, { "epoch": 1.4109787975602672, "grad_norm": 0.6454046368598938, "learning_rate": 0.0001, "loss": 1.4587, "step": 12145 }, { "epoch": 1.4110949753122277, "grad_norm": 0.576941967010498, "learning_rate": 0.0001, "loss": 1.4441, "step": 12146 }, { "epoch": 1.4112111530641882, "grad_norm": 0.5717135667800903, "learning_rate": 0.0001, "loss": 1.4607, "step": 12147 }, { "epoch": 1.4113273308161487, "grad_norm": 0.5811867117881775, "learning_rate": 0.0001, "loss": 1.4087, "step": 12148 }, { "epoch": 1.4114435085681092, "grad_norm": 0.5930108428001404, "learning_rate": 0.0001, "loss": 1.425, "step": 12149 }, { "epoch": 1.4115596863200697, "grad_norm": 0.6307324767112732, "learning_rate": 0.0001, "loss": 1.4217, "step": 12150 }, { "epoch": 1.4116758640720302, "grad_norm": 0.5676311254501343, "learning_rate": 0.0001, "loss": 1.3643, "step": 12151 }, { "epoch": 1.4117920418239907, "grad_norm": 0.6175429821014404, "learning_rate": 0.0001, "loss": 1.5436, "step": 12152 }, { "epoch": 1.4119082195759511, "grad_norm": 0.6128252744674683, "learning_rate": 0.0001, "loss": 1.5466, "step": 12153 }, { "epoch": 1.4120243973279116, "grad_norm": 0.5954226851463318, "learning_rate": 0.0001, "loss": 1.4271, "step": 12154 }, { "epoch": 1.4121405750798721, "grad_norm": 0.6151418089866638, "learning_rate": 0.0001, "loss": 1.5006, "step": 12155 }, { "epoch": 1.4122567528318326, "grad_norm": 0.6414467096328735, "learning_rate": 0.0001, "loss": 1.4582, "step": 12156 }, { "epoch": 1.412372930583793, "grad_norm": 0.6506933569908142, "learning_rate": 0.0001, "loss": 1.6118, "step": 12157 }, { "epoch": 1.4124891083357536, "grad_norm": 0.603602409362793, "learning_rate": 0.0001, "loss": 1.5034, "step": 12158 }, { "epoch": 1.412605286087714, "grad_norm": 0.5889068245887756, "learning_rate": 0.0001, "loss": 1.5119, "step": 12159 }, { "epoch": 1.4127214638396748, "grad_norm": 0.5658511519432068, "learning_rate": 0.0001, "loss": 1.4418, "step": 12160 }, { "epoch": 1.4128376415916353, "grad_norm": 0.5495668053627014, "learning_rate": 0.0001, "loss": 1.3484, "step": 12161 }, { "epoch": 1.4129538193435958, "grad_norm": 0.5731701254844666, "learning_rate": 0.0001, "loss": 1.3556, "step": 12162 }, { "epoch": 1.4130699970955563, "grad_norm": 0.6092509031295776, "learning_rate": 0.0001, "loss": 1.5438, "step": 12163 }, { "epoch": 1.4131861748475167, "grad_norm": 0.6485771536827087, "learning_rate": 0.0001, "loss": 1.5345, "step": 12164 }, { "epoch": 1.4133023525994772, "grad_norm": 0.5795009136199951, "learning_rate": 0.0001, "loss": 1.492, "step": 12165 }, { "epoch": 1.4134185303514377, "grad_norm": 0.5661119222640991, "learning_rate": 0.0001, "loss": 1.3841, "step": 12166 }, { "epoch": 1.4135347081033982, "grad_norm": 0.5987005829811096, "learning_rate": 0.0001, "loss": 1.4501, "step": 12167 }, { "epoch": 1.4136508858553587, "grad_norm": 0.5951616168022156, "learning_rate": 0.0001, "loss": 1.4909, "step": 12168 }, { "epoch": 1.4137670636073192, "grad_norm": 0.6180946826934814, "learning_rate": 0.0001, "loss": 1.5858, "step": 12169 }, { "epoch": 1.4138832413592797, "grad_norm": 0.5951772928237915, "learning_rate": 0.0001, "loss": 1.5188, "step": 12170 }, { "epoch": 1.4139994191112402, "grad_norm": 0.5945565700531006, "learning_rate": 0.0001, "loss": 1.3671, "step": 12171 }, { "epoch": 1.4141155968632007, "grad_norm": 0.6298597455024719, "learning_rate": 0.0001, "loss": 1.5741, "step": 12172 }, { "epoch": 1.4142317746151611, "grad_norm": 0.5758437514305115, "learning_rate": 0.0001, "loss": 1.478, "step": 12173 }, { "epoch": 1.4143479523671216, "grad_norm": 0.6084062457084656, "learning_rate": 0.0001, "loss": 1.635, "step": 12174 }, { "epoch": 1.4144641301190821, "grad_norm": 0.5881122946739197, "learning_rate": 0.0001, "loss": 1.3891, "step": 12175 }, { "epoch": 1.4145803078710428, "grad_norm": 0.6065901517868042, "learning_rate": 0.0001, "loss": 1.5944, "step": 12176 }, { "epoch": 1.4146964856230033, "grad_norm": 0.57171231508255, "learning_rate": 0.0001, "loss": 1.353, "step": 12177 }, { "epoch": 1.4148126633749638, "grad_norm": 0.606834888458252, "learning_rate": 0.0001, "loss": 1.4338, "step": 12178 }, { "epoch": 1.4149288411269243, "grad_norm": 0.6262538433074951, "learning_rate": 0.0001, "loss": 1.4993, "step": 12179 }, { "epoch": 1.4150450188788848, "grad_norm": 0.6646376252174377, "learning_rate": 0.0001, "loss": 1.6679, "step": 12180 }, { "epoch": 1.4151611966308453, "grad_norm": 0.6121298670768738, "learning_rate": 0.0001, "loss": 1.4422, "step": 12181 }, { "epoch": 1.4152773743828058, "grad_norm": 0.5688852071762085, "learning_rate": 0.0001, "loss": 1.4792, "step": 12182 }, { "epoch": 1.4153935521347663, "grad_norm": 0.5843722820281982, "learning_rate": 0.0001, "loss": 1.3999, "step": 12183 }, { "epoch": 1.4155097298867267, "grad_norm": 0.6013531684875488, "learning_rate": 0.0001, "loss": 1.3599, "step": 12184 }, { "epoch": 1.4156259076386872, "grad_norm": 0.5897423028945923, "learning_rate": 0.0001, "loss": 1.4682, "step": 12185 }, { "epoch": 1.4157420853906477, "grad_norm": 0.6187875270843506, "learning_rate": 0.0001, "loss": 1.4988, "step": 12186 }, { "epoch": 1.4158582631426082, "grad_norm": 0.5957651734352112, "learning_rate": 0.0001, "loss": 1.4374, "step": 12187 }, { "epoch": 1.4159744408945687, "grad_norm": 0.6102905869483948, "learning_rate": 0.0001, "loss": 1.5182, "step": 12188 }, { "epoch": 1.4160906186465292, "grad_norm": 0.5826073288917542, "learning_rate": 0.0001, "loss": 1.3984, "step": 12189 }, { "epoch": 1.4162067963984897, "grad_norm": 0.6257545948028564, "learning_rate": 0.0001, "loss": 1.4765, "step": 12190 }, { "epoch": 1.4163229741504502, "grad_norm": 0.6139261722564697, "learning_rate": 0.0001, "loss": 1.4693, "step": 12191 }, { "epoch": 1.4164391519024107, "grad_norm": 0.6307207942008972, "learning_rate": 0.0001, "loss": 1.5803, "step": 12192 }, { "epoch": 1.4165553296543711, "grad_norm": 0.603281557559967, "learning_rate": 0.0001, "loss": 1.4784, "step": 12193 }, { "epoch": 1.4166715074063316, "grad_norm": 0.5983949303627014, "learning_rate": 0.0001, "loss": 1.4863, "step": 12194 }, { "epoch": 1.4167876851582921, "grad_norm": 0.5481703281402588, "learning_rate": 0.0001, "loss": 1.4079, "step": 12195 }, { "epoch": 1.4169038629102526, "grad_norm": 0.6520917415618896, "learning_rate": 0.0001, "loss": 1.544, "step": 12196 }, { "epoch": 1.417020040662213, "grad_norm": 0.553615927696228, "learning_rate": 0.0001, "loss": 1.3623, "step": 12197 }, { "epoch": 1.4171362184141736, "grad_norm": 0.5994259119033813, "learning_rate": 0.0001, "loss": 1.6403, "step": 12198 }, { "epoch": 1.417252396166134, "grad_norm": 0.5477364659309387, "learning_rate": 0.0001, "loss": 1.3191, "step": 12199 }, { "epoch": 1.4173685739180946, "grad_norm": 0.6125954389572144, "learning_rate": 0.0001, "loss": 1.4055, "step": 12200 }, { "epoch": 1.417484751670055, "grad_norm": 0.642378032207489, "learning_rate": 0.0001, "loss": 1.5665, "step": 12201 }, { "epoch": 1.4176009294220158, "grad_norm": 0.5678790807723999, "learning_rate": 0.0001, "loss": 1.3472, "step": 12202 }, { "epoch": 1.4177171071739763, "grad_norm": 0.6221288442611694, "learning_rate": 0.0001, "loss": 1.5922, "step": 12203 }, { "epoch": 1.4178332849259367, "grad_norm": 0.6019090414047241, "learning_rate": 0.0001, "loss": 1.6035, "step": 12204 }, { "epoch": 1.4179494626778972, "grad_norm": 0.6357331275939941, "learning_rate": 0.0001, "loss": 1.5103, "step": 12205 }, { "epoch": 1.4180656404298577, "grad_norm": 0.5553552508354187, "learning_rate": 0.0001, "loss": 1.2786, "step": 12206 }, { "epoch": 1.4181818181818182, "grad_norm": 0.5938397645950317, "learning_rate": 0.0001, "loss": 1.4726, "step": 12207 }, { "epoch": 1.4182979959337787, "grad_norm": 0.5866955518722534, "learning_rate": 0.0001, "loss": 1.3525, "step": 12208 }, { "epoch": 1.4184141736857392, "grad_norm": 0.5551325678825378, "learning_rate": 0.0001, "loss": 1.4403, "step": 12209 }, { "epoch": 1.4185303514376997, "grad_norm": 0.5848428606987, "learning_rate": 0.0001, "loss": 1.5868, "step": 12210 }, { "epoch": 1.4186465291896602, "grad_norm": 0.5819724798202515, "learning_rate": 0.0001, "loss": 1.288, "step": 12211 }, { "epoch": 1.4187627069416207, "grad_norm": 0.5821203589439392, "learning_rate": 0.0001, "loss": 1.3893, "step": 12212 }, { "epoch": 1.4188788846935811, "grad_norm": 0.6024807691574097, "learning_rate": 0.0001, "loss": 1.6227, "step": 12213 }, { "epoch": 1.4189950624455416, "grad_norm": 0.5902191996574402, "learning_rate": 0.0001, "loss": 1.4969, "step": 12214 }, { "epoch": 1.4191112401975021, "grad_norm": 0.5712905526161194, "learning_rate": 0.0001, "loss": 1.5282, "step": 12215 }, { "epoch": 1.4192274179494626, "grad_norm": 0.5864148139953613, "learning_rate": 0.0001, "loss": 1.5394, "step": 12216 }, { "epoch": 1.419343595701423, "grad_norm": 0.5870895385742188, "learning_rate": 0.0001, "loss": 1.4937, "step": 12217 }, { "epoch": 1.4194597734533838, "grad_norm": 0.6374808549880981, "learning_rate": 0.0001, "loss": 1.6601, "step": 12218 }, { "epoch": 1.4195759512053443, "grad_norm": 0.5821941494941711, "learning_rate": 0.0001, "loss": 1.555, "step": 12219 }, { "epoch": 1.4196921289573048, "grad_norm": 0.6282082796096802, "learning_rate": 0.0001, "loss": 1.6674, "step": 12220 }, { "epoch": 1.4198083067092653, "grad_norm": 0.5595186352729797, "learning_rate": 0.0001, "loss": 1.5652, "step": 12221 }, { "epoch": 1.4199244844612258, "grad_norm": 0.5783095359802246, "learning_rate": 0.0001, "loss": 1.2876, "step": 12222 }, { "epoch": 1.4200406622131863, "grad_norm": 0.6268340349197388, "learning_rate": 0.0001, "loss": 1.2669, "step": 12223 }, { "epoch": 1.4201568399651467, "grad_norm": 0.6090834140777588, "learning_rate": 0.0001, "loss": 1.5892, "step": 12224 }, { "epoch": 1.4202730177171072, "grad_norm": 0.616369903087616, "learning_rate": 0.0001, "loss": 1.4723, "step": 12225 }, { "epoch": 1.4203891954690677, "grad_norm": 0.5462055802345276, "learning_rate": 0.0001, "loss": 1.3814, "step": 12226 }, { "epoch": 1.4205053732210282, "grad_norm": 0.6000543236732483, "learning_rate": 0.0001, "loss": 1.4903, "step": 12227 }, { "epoch": 1.4206215509729887, "grad_norm": 0.5703785419464111, "learning_rate": 0.0001, "loss": 1.3543, "step": 12228 }, { "epoch": 1.4207377287249492, "grad_norm": 0.6079279780387878, "learning_rate": 0.0001, "loss": 1.6149, "step": 12229 }, { "epoch": 1.4208539064769097, "grad_norm": 0.5839101076126099, "learning_rate": 0.0001, "loss": 1.3544, "step": 12230 }, { "epoch": 1.4209700842288702, "grad_norm": 0.5797370672225952, "learning_rate": 0.0001, "loss": 1.3705, "step": 12231 }, { "epoch": 1.4210862619808307, "grad_norm": 0.6282606720924377, "learning_rate": 0.0001, "loss": 1.7211, "step": 12232 }, { "epoch": 1.4212024397327911, "grad_norm": 0.6144933700561523, "learning_rate": 0.0001, "loss": 1.4443, "step": 12233 }, { "epoch": 1.4213186174847516, "grad_norm": 0.5617024898529053, "learning_rate": 0.0001, "loss": 1.4082, "step": 12234 }, { "epoch": 1.4214347952367121, "grad_norm": 0.6399552226066589, "learning_rate": 0.0001, "loss": 1.4947, "step": 12235 }, { "epoch": 1.4215509729886726, "grad_norm": 0.5742717385292053, "learning_rate": 0.0001, "loss": 1.477, "step": 12236 }, { "epoch": 1.421667150740633, "grad_norm": 0.5643975734710693, "learning_rate": 0.0001, "loss": 1.6196, "step": 12237 }, { "epoch": 1.4217833284925936, "grad_norm": 0.593525230884552, "learning_rate": 0.0001, "loss": 1.5359, "step": 12238 }, { "epoch": 1.421899506244554, "grad_norm": 0.5971342325210571, "learning_rate": 0.0001, "loss": 1.488, "step": 12239 }, { "epoch": 1.4220156839965146, "grad_norm": 0.6006376147270203, "learning_rate": 0.0001, "loss": 1.4928, "step": 12240 }, { "epoch": 1.422131861748475, "grad_norm": 0.6138136982917786, "learning_rate": 0.0001, "loss": 1.4406, "step": 12241 }, { "epoch": 1.4222480395004355, "grad_norm": 0.5824870467185974, "learning_rate": 0.0001, "loss": 1.4909, "step": 12242 }, { "epoch": 1.422364217252396, "grad_norm": 0.6046587824821472, "learning_rate": 0.0001, "loss": 1.5849, "step": 12243 }, { "epoch": 1.4224803950043567, "grad_norm": 0.5978199243545532, "learning_rate": 0.0001, "loss": 1.4384, "step": 12244 }, { "epoch": 1.4225965727563172, "grad_norm": 0.5881246328353882, "learning_rate": 0.0001, "loss": 1.4615, "step": 12245 }, { "epoch": 1.4227127505082777, "grad_norm": 0.5994898676872253, "learning_rate": 0.0001, "loss": 1.6306, "step": 12246 }, { "epoch": 1.4228289282602382, "grad_norm": 0.6213389039039612, "learning_rate": 0.0001, "loss": 1.571, "step": 12247 }, { "epoch": 1.4229451060121987, "grad_norm": 0.5992786884307861, "learning_rate": 0.0001, "loss": 1.3701, "step": 12248 }, { "epoch": 1.4230612837641592, "grad_norm": 0.5737839341163635, "learning_rate": 0.0001, "loss": 1.411, "step": 12249 }, { "epoch": 1.4231774615161197, "grad_norm": 0.6159092783927917, "learning_rate": 0.0001, "loss": 1.6564, "step": 12250 }, { "epoch": 1.4232936392680802, "grad_norm": 0.596993625164032, "learning_rate": 0.0001, "loss": 1.3847, "step": 12251 }, { "epoch": 1.4234098170200407, "grad_norm": 0.5880178213119507, "learning_rate": 0.0001, "loss": 1.5226, "step": 12252 }, { "epoch": 1.4235259947720011, "grad_norm": 0.6582285165786743, "learning_rate": 0.0001, "loss": 1.5161, "step": 12253 }, { "epoch": 1.4236421725239616, "grad_norm": 0.608213484287262, "learning_rate": 0.0001, "loss": 1.5923, "step": 12254 }, { "epoch": 1.4237583502759221, "grad_norm": 0.5370960235595703, "learning_rate": 0.0001, "loss": 1.4218, "step": 12255 }, { "epoch": 1.4238745280278826, "grad_norm": 0.6302644610404968, "learning_rate": 0.0001, "loss": 1.6416, "step": 12256 }, { "epoch": 1.423990705779843, "grad_norm": 0.5852667093276978, "learning_rate": 0.0001, "loss": 1.567, "step": 12257 }, { "epoch": 1.4241068835318036, "grad_norm": 0.6241486072540283, "learning_rate": 0.0001, "loss": 1.5519, "step": 12258 }, { "epoch": 1.424223061283764, "grad_norm": 0.5911672711372375, "learning_rate": 0.0001, "loss": 1.597, "step": 12259 }, { "epoch": 1.4243392390357248, "grad_norm": 0.5758203268051147, "learning_rate": 0.0001, "loss": 1.4032, "step": 12260 }, { "epoch": 1.4244554167876853, "grad_norm": 0.6107878088951111, "learning_rate": 0.0001, "loss": 1.5565, "step": 12261 }, { "epoch": 1.4245715945396458, "grad_norm": 0.5618066787719727, "learning_rate": 0.0001, "loss": 1.4639, "step": 12262 }, { "epoch": 1.4246877722916063, "grad_norm": 0.6158115863800049, "learning_rate": 0.0001, "loss": 1.8096, "step": 12263 }, { "epoch": 1.4248039500435667, "grad_norm": 0.6369965672492981, "learning_rate": 0.0001, "loss": 1.6449, "step": 12264 }, { "epoch": 1.4249201277955272, "grad_norm": 0.6416753530502319, "learning_rate": 0.0001, "loss": 1.5327, "step": 12265 }, { "epoch": 1.4250363055474877, "grad_norm": 0.5738315582275391, "learning_rate": 0.0001, "loss": 1.4617, "step": 12266 }, { "epoch": 1.4251524832994482, "grad_norm": 0.559740424156189, "learning_rate": 0.0001, "loss": 1.5697, "step": 12267 }, { "epoch": 1.4252686610514087, "grad_norm": 0.5917907953262329, "learning_rate": 0.0001, "loss": 1.5039, "step": 12268 }, { "epoch": 1.4253848388033692, "grad_norm": 0.5424224734306335, "learning_rate": 0.0001, "loss": 1.3014, "step": 12269 }, { "epoch": 1.4255010165553297, "grad_norm": 0.5904528498649597, "learning_rate": 0.0001, "loss": 1.4219, "step": 12270 }, { "epoch": 1.4256171943072902, "grad_norm": 0.6030588746070862, "learning_rate": 0.0001, "loss": 1.454, "step": 12271 }, { "epoch": 1.4257333720592507, "grad_norm": 0.5945565104484558, "learning_rate": 0.0001, "loss": 1.3429, "step": 12272 }, { "epoch": 1.4258495498112111, "grad_norm": 0.6340063810348511, "learning_rate": 0.0001, "loss": 1.5204, "step": 12273 }, { "epoch": 1.4259657275631716, "grad_norm": 0.5972619652748108, "learning_rate": 0.0001, "loss": 1.2667, "step": 12274 }, { "epoch": 1.4260819053151321, "grad_norm": 0.6026214361190796, "learning_rate": 0.0001, "loss": 1.4078, "step": 12275 }, { "epoch": 1.4261980830670926, "grad_norm": 0.6557515859603882, "learning_rate": 0.0001, "loss": 1.6499, "step": 12276 }, { "epoch": 1.426314260819053, "grad_norm": 0.6378829479217529, "learning_rate": 0.0001, "loss": 1.5011, "step": 12277 }, { "epoch": 1.4264304385710136, "grad_norm": 0.655852735042572, "learning_rate": 0.0001, "loss": 1.6301, "step": 12278 }, { "epoch": 1.426546616322974, "grad_norm": 0.5908859968185425, "learning_rate": 0.0001, "loss": 1.3729, "step": 12279 }, { "epoch": 1.4266627940749346, "grad_norm": 0.6130912899971008, "learning_rate": 0.0001, "loss": 1.4393, "step": 12280 }, { "epoch": 1.426778971826895, "grad_norm": 0.5807083249092102, "learning_rate": 0.0001, "loss": 1.4061, "step": 12281 }, { "epoch": 1.4268951495788555, "grad_norm": 0.6125562191009521, "learning_rate": 0.0001, "loss": 1.3565, "step": 12282 }, { "epoch": 1.427011327330816, "grad_norm": 0.5933031439781189, "learning_rate": 0.0001, "loss": 1.6421, "step": 12283 }, { "epoch": 1.4271275050827765, "grad_norm": 0.5931923389434814, "learning_rate": 0.0001, "loss": 1.4202, "step": 12284 }, { "epoch": 1.427243682834737, "grad_norm": 0.5963932275772095, "learning_rate": 0.0001, "loss": 1.3515, "step": 12285 }, { "epoch": 1.4273598605866977, "grad_norm": 0.5983773469924927, "learning_rate": 0.0001, "loss": 1.4857, "step": 12286 }, { "epoch": 1.4274760383386582, "grad_norm": 0.5747543573379517, "learning_rate": 0.0001, "loss": 1.398, "step": 12287 }, { "epoch": 1.4275922160906187, "grad_norm": 0.6180018782615662, "learning_rate": 0.0001, "loss": 1.5407, "step": 12288 }, { "epoch": 1.4277083938425792, "grad_norm": 0.6087707281112671, "learning_rate": 0.0001, "loss": 1.5047, "step": 12289 }, { "epoch": 1.4278245715945397, "grad_norm": 0.5981317758560181, "learning_rate": 0.0001, "loss": 1.4278, "step": 12290 }, { "epoch": 1.4279407493465002, "grad_norm": 0.5648661255836487, "learning_rate": 0.0001, "loss": 1.2978, "step": 12291 }, { "epoch": 1.4280569270984607, "grad_norm": 0.6909130215644836, "learning_rate": 0.0001, "loss": 1.7132, "step": 12292 }, { "epoch": 1.4281731048504211, "grad_norm": 0.603410005569458, "learning_rate": 0.0001, "loss": 1.4631, "step": 12293 }, { "epoch": 1.4282892826023816, "grad_norm": 0.5939321517944336, "learning_rate": 0.0001, "loss": 1.6074, "step": 12294 }, { "epoch": 1.4284054603543421, "grad_norm": 0.5982928276062012, "learning_rate": 0.0001, "loss": 1.4063, "step": 12295 }, { "epoch": 1.4285216381063026, "grad_norm": 0.6024434566497803, "learning_rate": 0.0001, "loss": 1.3077, "step": 12296 }, { "epoch": 1.428637815858263, "grad_norm": 0.656810998916626, "learning_rate": 0.0001, "loss": 1.4021, "step": 12297 }, { "epoch": 1.4287539936102236, "grad_norm": 0.6280006170272827, "learning_rate": 0.0001, "loss": 1.7002, "step": 12298 }, { "epoch": 1.428870171362184, "grad_norm": 0.6182981729507446, "learning_rate": 0.0001, "loss": 1.4683, "step": 12299 }, { "epoch": 1.4289863491141446, "grad_norm": 0.6073260307312012, "learning_rate": 0.0001, "loss": 1.423, "step": 12300 }, { "epoch": 1.429102526866105, "grad_norm": 0.6266109943389893, "learning_rate": 0.0001, "loss": 1.5458, "step": 12301 }, { "epoch": 1.4292187046180658, "grad_norm": 0.5566325187683105, "learning_rate": 0.0001, "loss": 1.4415, "step": 12302 }, { "epoch": 1.4293348823700263, "grad_norm": 0.6451058387756348, "learning_rate": 0.0001, "loss": 1.5358, "step": 12303 }, { "epoch": 1.4294510601219867, "grad_norm": 0.6131970882415771, "learning_rate": 0.0001, "loss": 1.4633, "step": 12304 }, { "epoch": 1.4295672378739472, "grad_norm": 0.5836344361305237, "learning_rate": 0.0001, "loss": 1.4836, "step": 12305 }, { "epoch": 1.4296834156259077, "grad_norm": 0.6282364726066589, "learning_rate": 0.0001, "loss": 1.4959, "step": 12306 }, { "epoch": 1.4297995933778682, "grad_norm": 0.5855079889297485, "learning_rate": 0.0001, "loss": 1.5406, "step": 12307 }, { "epoch": 1.4299157711298287, "grad_norm": 0.606343150138855, "learning_rate": 0.0001, "loss": 1.5755, "step": 12308 }, { "epoch": 1.4300319488817892, "grad_norm": 0.5874128341674805, "learning_rate": 0.0001, "loss": 1.5141, "step": 12309 }, { "epoch": 1.4301481266337497, "grad_norm": 0.6407783031463623, "learning_rate": 0.0001, "loss": 1.5108, "step": 12310 }, { "epoch": 1.4302643043857102, "grad_norm": 0.6048932671546936, "learning_rate": 0.0001, "loss": 1.4187, "step": 12311 }, { "epoch": 1.4303804821376707, "grad_norm": 0.6171375513076782, "learning_rate": 0.0001, "loss": 1.4019, "step": 12312 }, { "epoch": 1.4304966598896312, "grad_norm": 0.5970510244369507, "learning_rate": 0.0001, "loss": 1.3356, "step": 12313 }, { "epoch": 1.4306128376415916, "grad_norm": 0.5908089280128479, "learning_rate": 0.0001, "loss": 1.4893, "step": 12314 }, { "epoch": 1.4307290153935521, "grad_norm": 0.5906482338905334, "learning_rate": 0.0001, "loss": 1.4824, "step": 12315 }, { "epoch": 1.4308451931455126, "grad_norm": 0.5727223753929138, "learning_rate": 0.0001, "loss": 1.2765, "step": 12316 }, { "epoch": 1.430961370897473, "grad_norm": 0.5993191599845886, "learning_rate": 0.0001, "loss": 1.4062, "step": 12317 }, { "epoch": 1.4310775486494336, "grad_norm": 0.6337776184082031, "learning_rate": 0.0001, "loss": 1.5402, "step": 12318 }, { "epoch": 1.431193726401394, "grad_norm": 0.6048466563224792, "learning_rate": 0.0001, "loss": 1.3284, "step": 12319 }, { "epoch": 1.4313099041533546, "grad_norm": 0.5903589129447937, "learning_rate": 0.0001, "loss": 1.4525, "step": 12320 }, { "epoch": 1.431426081905315, "grad_norm": 0.5887618064880371, "learning_rate": 0.0001, "loss": 1.3265, "step": 12321 }, { "epoch": 1.4315422596572756, "grad_norm": 0.6316114068031311, "learning_rate": 0.0001, "loss": 1.4795, "step": 12322 }, { "epoch": 1.431658437409236, "grad_norm": 0.6685931086540222, "learning_rate": 0.0001, "loss": 1.6197, "step": 12323 }, { "epoch": 1.4317746151611965, "grad_norm": 0.6843975186347961, "learning_rate": 0.0001, "loss": 1.6115, "step": 12324 }, { "epoch": 1.431890792913157, "grad_norm": 0.5919124484062195, "learning_rate": 0.0001, "loss": 1.3703, "step": 12325 }, { "epoch": 1.4320069706651175, "grad_norm": 0.5988925695419312, "learning_rate": 0.0001, "loss": 1.5675, "step": 12326 }, { "epoch": 1.432123148417078, "grad_norm": 0.574201762676239, "learning_rate": 0.0001, "loss": 1.4022, "step": 12327 }, { "epoch": 1.4322393261690387, "grad_norm": 0.6083642840385437, "learning_rate": 0.0001, "loss": 1.3776, "step": 12328 }, { "epoch": 1.4323555039209992, "grad_norm": 0.5715479850769043, "learning_rate": 0.0001, "loss": 1.4453, "step": 12329 }, { "epoch": 1.4324716816729597, "grad_norm": 0.6147144436836243, "learning_rate": 0.0001, "loss": 1.5643, "step": 12330 }, { "epoch": 1.4325878594249202, "grad_norm": 0.6399287581443787, "learning_rate": 0.0001, "loss": 1.2921, "step": 12331 }, { "epoch": 1.4327040371768807, "grad_norm": 0.6057835221290588, "learning_rate": 0.0001, "loss": 1.5514, "step": 12332 }, { "epoch": 1.4328202149288412, "grad_norm": 0.6630198955535889, "learning_rate": 0.0001, "loss": 1.5945, "step": 12333 }, { "epoch": 1.4329363926808016, "grad_norm": 0.6036765575408936, "learning_rate": 0.0001, "loss": 1.4586, "step": 12334 }, { "epoch": 1.4330525704327621, "grad_norm": 0.5870527029037476, "learning_rate": 0.0001, "loss": 1.3427, "step": 12335 }, { "epoch": 1.4331687481847226, "grad_norm": 0.6059634685516357, "learning_rate": 0.0001, "loss": 1.349, "step": 12336 }, { "epoch": 1.433284925936683, "grad_norm": 0.5747280120849609, "learning_rate": 0.0001, "loss": 1.4929, "step": 12337 }, { "epoch": 1.4334011036886436, "grad_norm": 0.6289669275283813, "learning_rate": 0.0001, "loss": 1.4954, "step": 12338 }, { "epoch": 1.433517281440604, "grad_norm": 0.5724921226501465, "learning_rate": 0.0001, "loss": 1.4182, "step": 12339 }, { "epoch": 1.4336334591925646, "grad_norm": 0.6268770098686218, "learning_rate": 0.0001, "loss": 1.4344, "step": 12340 }, { "epoch": 1.433749636944525, "grad_norm": 0.6141247749328613, "learning_rate": 0.0001, "loss": 1.5246, "step": 12341 }, { "epoch": 1.4338658146964856, "grad_norm": 0.702262282371521, "learning_rate": 0.0001, "loss": 1.6, "step": 12342 }, { "epoch": 1.433981992448446, "grad_norm": 0.595066249370575, "learning_rate": 0.0001, "loss": 1.5602, "step": 12343 }, { "epoch": 1.4340981702004068, "grad_norm": 0.6045823693275452, "learning_rate": 0.0001, "loss": 1.4681, "step": 12344 }, { "epoch": 1.4342143479523672, "grad_norm": 0.5839776992797852, "learning_rate": 0.0001, "loss": 1.4221, "step": 12345 }, { "epoch": 1.4343305257043277, "grad_norm": 0.6514957547187805, "learning_rate": 0.0001, "loss": 1.5948, "step": 12346 }, { "epoch": 1.4344467034562882, "grad_norm": 0.5899053812026978, "learning_rate": 0.0001, "loss": 1.3677, "step": 12347 }, { "epoch": 1.4345628812082487, "grad_norm": 0.5787526369094849, "learning_rate": 0.0001, "loss": 1.4326, "step": 12348 }, { "epoch": 1.4346790589602092, "grad_norm": 0.5898358821868896, "learning_rate": 0.0001, "loss": 1.4003, "step": 12349 }, { "epoch": 1.4347952367121697, "grad_norm": 0.6458524465560913, "learning_rate": 0.0001, "loss": 1.5119, "step": 12350 }, { "epoch": 1.4349114144641302, "grad_norm": 0.646244466304779, "learning_rate": 0.0001, "loss": 1.5125, "step": 12351 }, { "epoch": 1.4350275922160907, "grad_norm": 0.5955740213394165, "learning_rate": 0.0001, "loss": 1.3983, "step": 12352 }, { "epoch": 1.4351437699680512, "grad_norm": 0.5988348722457886, "learning_rate": 0.0001, "loss": 1.4731, "step": 12353 }, { "epoch": 1.4352599477200116, "grad_norm": 0.5847843885421753, "learning_rate": 0.0001, "loss": 1.295, "step": 12354 }, { "epoch": 1.4353761254719721, "grad_norm": 0.6146335005760193, "learning_rate": 0.0001, "loss": 1.5285, "step": 12355 }, { "epoch": 1.4354923032239326, "grad_norm": 0.604469895362854, "learning_rate": 0.0001, "loss": 1.4477, "step": 12356 }, { "epoch": 1.435608480975893, "grad_norm": 0.6842860579490662, "learning_rate": 0.0001, "loss": 1.5657, "step": 12357 }, { "epoch": 1.4357246587278536, "grad_norm": 0.5877547860145569, "learning_rate": 0.0001, "loss": 1.4932, "step": 12358 }, { "epoch": 1.435840836479814, "grad_norm": 0.687014102935791, "learning_rate": 0.0001, "loss": 1.5146, "step": 12359 }, { "epoch": 1.4359570142317746, "grad_norm": 0.5968105792999268, "learning_rate": 0.0001, "loss": 1.4974, "step": 12360 }, { "epoch": 1.436073191983735, "grad_norm": 0.5516940355300903, "learning_rate": 0.0001, "loss": 1.3209, "step": 12361 }, { "epoch": 1.4361893697356956, "grad_norm": 0.6110051274299622, "learning_rate": 0.0001, "loss": 1.5701, "step": 12362 }, { "epoch": 1.436305547487656, "grad_norm": 0.5829412937164307, "learning_rate": 0.0001, "loss": 1.4339, "step": 12363 }, { "epoch": 1.4364217252396165, "grad_norm": 0.6736522912979126, "learning_rate": 0.0001, "loss": 1.5634, "step": 12364 }, { "epoch": 1.436537902991577, "grad_norm": 0.5804245471954346, "learning_rate": 0.0001, "loss": 1.3728, "step": 12365 }, { "epoch": 1.4366540807435375, "grad_norm": 0.6237603425979614, "learning_rate": 0.0001, "loss": 1.5145, "step": 12366 }, { "epoch": 1.436770258495498, "grad_norm": 0.6224261522293091, "learning_rate": 0.0001, "loss": 1.4202, "step": 12367 }, { "epoch": 1.4368864362474585, "grad_norm": 0.599617600440979, "learning_rate": 0.0001, "loss": 1.3436, "step": 12368 }, { "epoch": 1.437002613999419, "grad_norm": 0.5778672695159912, "learning_rate": 0.0001, "loss": 1.3741, "step": 12369 }, { "epoch": 1.4371187917513797, "grad_norm": 0.6169531941413879, "learning_rate": 0.0001, "loss": 1.5259, "step": 12370 }, { "epoch": 1.4372349695033402, "grad_norm": 0.6243462562561035, "learning_rate": 0.0001, "loss": 1.4946, "step": 12371 }, { "epoch": 1.4373511472553007, "grad_norm": 0.6082959175109863, "learning_rate": 0.0001, "loss": 1.4367, "step": 12372 }, { "epoch": 1.4374673250072612, "grad_norm": 0.5401190519332886, "learning_rate": 0.0001, "loss": 1.303, "step": 12373 }, { "epoch": 1.4375835027592216, "grad_norm": 0.5593122243881226, "learning_rate": 0.0001, "loss": 1.3716, "step": 12374 }, { "epoch": 1.4376996805111821, "grad_norm": 0.6530990600585938, "learning_rate": 0.0001, "loss": 1.4847, "step": 12375 }, { "epoch": 1.4378158582631426, "grad_norm": 0.5983834266662598, "learning_rate": 0.0001, "loss": 1.4915, "step": 12376 }, { "epoch": 1.437932036015103, "grad_norm": 0.6394164562225342, "learning_rate": 0.0001, "loss": 1.5291, "step": 12377 }, { "epoch": 1.4380482137670636, "grad_norm": 0.5918828845024109, "learning_rate": 0.0001, "loss": 1.593, "step": 12378 }, { "epoch": 1.438164391519024, "grad_norm": 0.5928050875663757, "learning_rate": 0.0001, "loss": 1.6254, "step": 12379 }, { "epoch": 1.4382805692709846, "grad_norm": 0.6321570873260498, "learning_rate": 0.0001, "loss": 1.499, "step": 12380 }, { "epoch": 1.438396747022945, "grad_norm": 0.5853205323219299, "learning_rate": 0.0001, "loss": 1.4488, "step": 12381 }, { "epoch": 1.4385129247749056, "grad_norm": 0.5675798058509827, "learning_rate": 0.0001, "loss": 1.4716, "step": 12382 }, { "epoch": 1.438629102526866, "grad_norm": 0.6351810097694397, "learning_rate": 0.0001, "loss": 1.5845, "step": 12383 }, { "epoch": 1.4387452802788265, "grad_norm": 0.575139582157135, "learning_rate": 0.0001, "loss": 1.4373, "step": 12384 }, { "epoch": 1.438861458030787, "grad_norm": 0.625085711479187, "learning_rate": 0.0001, "loss": 1.5833, "step": 12385 }, { "epoch": 1.4389776357827477, "grad_norm": 0.5660273432731628, "learning_rate": 0.0001, "loss": 1.5369, "step": 12386 }, { "epoch": 1.4390938135347082, "grad_norm": 0.642395555973053, "learning_rate": 0.0001, "loss": 1.6152, "step": 12387 }, { "epoch": 1.4392099912866687, "grad_norm": 0.5889819860458374, "learning_rate": 0.0001, "loss": 1.4482, "step": 12388 }, { "epoch": 1.4393261690386292, "grad_norm": 0.5822515487670898, "learning_rate": 0.0001, "loss": 1.3953, "step": 12389 }, { "epoch": 1.4394423467905897, "grad_norm": 0.5766950249671936, "learning_rate": 0.0001, "loss": 1.4415, "step": 12390 }, { "epoch": 1.4395585245425502, "grad_norm": 0.6112682819366455, "learning_rate": 0.0001, "loss": 1.3334, "step": 12391 }, { "epoch": 1.4396747022945107, "grad_norm": 0.6508562564849854, "learning_rate": 0.0001, "loss": 1.4605, "step": 12392 }, { "epoch": 1.4397908800464712, "grad_norm": 0.6107341647148132, "learning_rate": 0.0001, "loss": 1.5053, "step": 12393 }, { "epoch": 1.4399070577984316, "grad_norm": 0.5927090048789978, "learning_rate": 0.0001, "loss": 1.3687, "step": 12394 }, { "epoch": 1.4400232355503921, "grad_norm": 0.6269469261169434, "learning_rate": 0.0001, "loss": 1.5531, "step": 12395 }, { "epoch": 1.4401394133023526, "grad_norm": 0.6517143249511719, "learning_rate": 0.0001, "loss": 1.4674, "step": 12396 }, { "epoch": 1.4402555910543131, "grad_norm": 0.5869902968406677, "learning_rate": 0.0001, "loss": 1.452, "step": 12397 }, { "epoch": 1.4403717688062736, "grad_norm": 0.6312974691390991, "learning_rate": 0.0001, "loss": 1.4868, "step": 12398 }, { "epoch": 1.440487946558234, "grad_norm": 0.5878767371177673, "learning_rate": 0.0001, "loss": 1.5031, "step": 12399 }, { "epoch": 1.4406041243101946, "grad_norm": 0.6097482442855835, "learning_rate": 0.0001, "loss": 1.5549, "step": 12400 }, { "epoch": 1.440720302062155, "grad_norm": 0.5648863315582275, "learning_rate": 0.0001, "loss": 1.2942, "step": 12401 }, { "epoch": 1.4408364798141156, "grad_norm": 0.5527321100234985, "learning_rate": 0.0001, "loss": 1.2734, "step": 12402 }, { "epoch": 1.440952657566076, "grad_norm": 0.587592601776123, "learning_rate": 0.0001, "loss": 1.5027, "step": 12403 }, { "epoch": 1.4410688353180365, "grad_norm": 0.6088840961456299, "learning_rate": 0.0001, "loss": 1.6002, "step": 12404 }, { "epoch": 1.441185013069997, "grad_norm": 0.5852679014205933, "learning_rate": 0.0001, "loss": 1.3525, "step": 12405 }, { "epoch": 1.4413011908219575, "grad_norm": 0.65325528383255, "learning_rate": 0.0001, "loss": 1.3384, "step": 12406 }, { "epoch": 1.441417368573918, "grad_norm": 0.6041934490203857, "learning_rate": 0.0001, "loss": 1.4435, "step": 12407 }, { "epoch": 1.4415335463258785, "grad_norm": 0.6076403856277466, "learning_rate": 0.0001, "loss": 1.3845, "step": 12408 }, { "epoch": 1.441649724077839, "grad_norm": 0.6228477954864502, "learning_rate": 0.0001, "loss": 1.6537, "step": 12409 }, { "epoch": 1.4417659018297995, "grad_norm": 0.6112052202224731, "learning_rate": 0.0001, "loss": 1.5046, "step": 12410 }, { "epoch": 1.44188207958176, "grad_norm": 0.5732336640357971, "learning_rate": 0.0001, "loss": 1.2829, "step": 12411 }, { "epoch": 1.4419982573337207, "grad_norm": 0.6284494400024414, "learning_rate": 0.0001, "loss": 1.6029, "step": 12412 }, { "epoch": 1.4421144350856812, "grad_norm": 0.5829569101333618, "learning_rate": 0.0001, "loss": 1.4614, "step": 12413 }, { "epoch": 1.4422306128376416, "grad_norm": 0.6057117581367493, "learning_rate": 0.0001, "loss": 1.4082, "step": 12414 }, { "epoch": 1.4423467905896021, "grad_norm": 0.6320570111274719, "learning_rate": 0.0001, "loss": 1.5682, "step": 12415 }, { "epoch": 1.4424629683415626, "grad_norm": 0.6196115612983704, "learning_rate": 0.0001, "loss": 1.5874, "step": 12416 }, { "epoch": 1.4425791460935231, "grad_norm": 0.5623551607131958, "learning_rate": 0.0001, "loss": 1.3886, "step": 12417 }, { "epoch": 1.4426953238454836, "grad_norm": 0.6607520580291748, "learning_rate": 0.0001, "loss": 1.6463, "step": 12418 }, { "epoch": 1.442811501597444, "grad_norm": 0.617299497127533, "learning_rate": 0.0001, "loss": 1.3807, "step": 12419 }, { "epoch": 1.4429276793494046, "grad_norm": 0.6651842594146729, "learning_rate": 0.0001, "loss": 1.7093, "step": 12420 }, { "epoch": 1.443043857101365, "grad_norm": 0.5932329893112183, "learning_rate": 0.0001, "loss": 1.4479, "step": 12421 }, { "epoch": 1.4431600348533256, "grad_norm": 0.6579514145851135, "learning_rate": 0.0001, "loss": 1.4316, "step": 12422 }, { "epoch": 1.443276212605286, "grad_norm": 0.664814829826355, "learning_rate": 0.0001, "loss": 1.4231, "step": 12423 }, { "epoch": 1.4433923903572465, "grad_norm": 0.6053681373596191, "learning_rate": 0.0001, "loss": 1.4038, "step": 12424 }, { "epoch": 1.443508568109207, "grad_norm": 0.5771030187606812, "learning_rate": 0.0001, "loss": 1.4651, "step": 12425 }, { "epoch": 1.4436247458611675, "grad_norm": 0.594408392906189, "learning_rate": 0.0001, "loss": 1.5309, "step": 12426 }, { "epoch": 1.4437409236131282, "grad_norm": 0.6161181330680847, "learning_rate": 0.0001, "loss": 1.3431, "step": 12427 }, { "epoch": 1.4438571013650887, "grad_norm": 0.6196521520614624, "learning_rate": 0.0001, "loss": 1.5768, "step": 12428 }, { "epoch": 1.4439732791170492, "grad_norm": 0.6354605555534363, "learning_rate": 0.0001, "loss": 1.4162, "step": 12429 }, { "epoch": 1.4440894568690097, "grad_norm": 0.6167144179344177, "learning_rate": 0.0001, "loss": 1.4741, "step": 12430 }, { "epoch": 1.4442056346209702, "grad_norm": 0.6271317601203918, "learning_rate": 0.0001, "loss": 1.4724, "step": 12431 }, { "epoch": 1.4443218123729307, "grad_norm": 0.6042360067367554, "learning_rate": 0.0001, "loss": 1.547, "step": 12432 }, { "epoch": 1.4444379901248912, "grad_norm": 0.5653975605964661, "learning_rate": 0.0001, "loss": 1.3804, "step": 12433 }, { "epoch": 1.4445541678768516, "grad_norm": 0.5969522595405579, "learning_rate": 0.0001, "loss": 1.5427, "step": 12434 }, { "epoch": 1.4446703456288121, "grad_norm": 0.5756595730781555, "learning_rate": 0.0001, "loss": 1.4859, "step": 12435 }, { "epoch": 1.4447865233807726, "grad_norm": 0.5508007407188416, "learning_rate": 0.0001, "loss": 1.3658, "step": 12436 }, { "epoch": 1.4449027011327331, "grad_norm": 0.578050971031189, "learning_rate": 0.0001, "loss": 1.4602, "step": 12437 }, { "epoch": 1.4450188788846936, "grad_norm": 0.5895441174507141, "learning_rate": 0.0001, "loss": 1.3945, "step": 12438 }, { "epoch": 1.445135056636654, "grad_norm": 0.5890359282493591, "learning_rate": 0.0001, "loss": 1.3473, "step": 12439 }, { "epoch": 1.4452512343886146, "grad_norm": 0.5464291572570801, "learning_rate": 0.0001, "loss": 1.4685, "step": 12440 }, { "epoch": 1.445367412140575, "grad_norm": 0.6562072038650513, "learning_rate": 0.0001, "loss": 1.5293, "step": 12441 }, { "epoch": 1.4454835898925356, "grad_norm": 0.5997947454452515, "learning_rate": 0.0001, "loss": 1.5211, "step": 12442 }, { "epoch": 1.445599767644496, "grad_norm": 0.5633268356323242, "learning_rate": 0.0001, "loss": 1.2866, "step": 12443 }, { "epoch": 1.4457159453964565, "grad_norm": 0.5751778483390808, "learning_rate": 0.0001, "loss": 1.3926, "step": 12444 }, { "epoch": 1.445832123148417, "grad_norm": 0.6264200210571289, "learning_rate": 0.0001, "loss": 1.4634, "step": 12445 }, { "epoch": 1.4459483009003775, "grad_norm": 0.5811033844947815, "learning_rate": 0.0001, "loss": 1.3373, "step": 12446 }, { "epoch": 1.446064478652338, "grad_norm": 0.5755273103713989, "learning_rate": 0.0001, "loss": 1.4198, "step": 12447 }, { "epoch": 1.4461806564042985, "grad_norm": 0.6127162575721741, "learning_rate": 0.0001, "loss": 1.5854, "step": 12448 }, { "epoch": 1.446296834156259, "grad_norm": 0.631125271320343, "learning_rate": 0.0001, "loss": 1.5738, "step": 12449 }, { "epoch": 1.4464130119082195, "grad_norm": 0.6190559267997742, "learning_rate": 0.0001, "loss": 1.6324, "step": 12450 }, { "epoch": 1.44652918966018, "grad_norm": 0.6076474189758301, "learning_rate": 0.0001, "loss": 1.4934, "step": 12451 }, { "epoch": 1.4466453674121404, "grad_norm": 0.6262378692626953, "learning_rate": 0.0001, "loss": 1.5541, "step": 12452 }, { "epoch": 1.446761545164101, "grad_norm": 0.6727213859558105, "learning_rate": 0.0001, "loss": 1.6884, "step": 12453 }, { "epoch": 1.4468777229160616, "grad_norm": 0.5651970505714417, "learning_rate": 0.0001, "loss": 1.4839, "step": 12454 }, { "epoch": 1.4469939006680221, "grad_norm": 0.5852252244949341, "learning_rate": 0.0001, "loss": 1.5358, "step": 12455 }, { "epoch": 1.4471100784199826, "grad_norm": 0.6040740609169006, "learning_rate": 0.0001, "loss": 1.3637, "step": 12456 }, { "epoch": 1.4472262561719431, "grad_norm": 0.6351321935653687, "learning_rate": 0.0001, "loss": 1.4719, "step": 12457 }, { "epoch": 1.4473424339239036, "grad_norm": 0.5973447561264038, "learning_rate": 0.0001, "loss": 1.5772, "step": 12458 }, { "epoch": 1.447458611675864, "grad_norm": 0.5978641510009766, "learning_rate": 0.0001, "loss": 1.5503, "step": 12459 }, { "epoch": 1.4475747894278246, "grad_norm": 0.6435196995735168, "learning_rate": 0.0001, "loss": 1.6186, "step": 12460 }, { "epoch": 1.447690967179785, "grad_norm": 0.5686025619506836, "learning_rate": 0.0001, "loss": 1.4097, "step": 12461 }, { "epoch": 1.4478071449317456, "grad_norm": 0.5564260482788086, "learning_rate": 0.0001, "loss": 1.2317, "step": 12462 }, { "epoch": 1.447923322683706, "grad_norm": 0.6230119466781616, "learning_rate": 0.0001, "loss": 1.49, "step": 12463 }, { "epoch": 1.4480395004356665, "grad_norm": 0.5975537896156311, "learning_rate": 0.0001, "loss": 1.4125, "step": 12464 }, { "epoch": 1.448155678187627, "grad_norm": 0.5980433821678162, "learning_rate": 0.0001, "loss": 1.444, "step": 12465 }, { "epoch": 1.4482718559395875, "grad_norm": 0.5679630041122437, "learning_rate": 0.0001, "loss": 1.2907, "step": 12466 }, { "epoch": 1.448388033691548, "grad_norm": 0.643408477306366, "learning_rate": 0.0001, "loss": 1.5006, "step": 12467 }, { "epoch": 1.4485042114435085, "grad_norm": 0.6029306054115295, "learning_rate": 0.0001, "loss": 1.4144, "step": 12468 }, { "epoch": 1.4486203891954692, "grad_norm": 0.624788224697113, "learning_rate": 0.0001, "loss": 1.4684, "step": 12469 }, { "epoch": 1.4487365669474297, "grad_norm": 0.6190298199653625, "learning_rate": 0.0001, "loss": 1.2963, "step": 12470 }, { "epoch": 1.4488527446993902, "grad_norm": 0.6062245965003967, "learning_rate": 0.0001, "loss": 1.5758, "step": 12471 }, { "epoch": 1.4489689224513507, "grad_norm": 0.6660163402557373, "learning_rate": 0.0001, "loss": 1.5742, "step": 12472 }, { "epoch": 1.4490851002033112, "grad_norm": 0.5759906768798828, "learning_rate": 0.0001, "loss": 1.3949, "step": 12473 }, { "epoch": 1.4492012779552716, "grad_norm": 0.6217337846755981, "learning_rate": 0.0001, "loss": 1.483, "step": 12474 }, { "epoch": 1.4493174557072321, "grad_norm": 0.5870078802108765, "learning_rate": 0.0001, "loss": 1.4583, "step": 12475 }, { "epoch": 1.4494336334591926, "grad_norm": 0.5740808844566345, "learning_rate": 0.0001, "loss": 1.2145, "step": 12476 }, { "epoch": 1.4495498112111531, "grad_norm": 0.603854775428772, "learning_rate": 0.0001, "loss": 1.5352, "step": 12477 }, { "epoch": 1.4496659889631136, "grad_norm": 0.5887278914451599, "learning_rate": 0.0001, "loss": 1.4273, "step": 12478 }, { "epoch": 1.449782166715074, "grad_norm": 0.5863503217697144, "learning_rate": 0.0001, "loss": 1.4259, "step": 12479 }, { "epoch": 1.4498983444670346, "grad_norm": 0.5903075337409973, "learning_rate": 0.0001, "loss": 1.439, "step": 12480 }, { "epoch": 1.450014522218995, "grad_norm": 0.6201311945915222, "learning_rate": 0.0001, "loss": 1.4628, "step": 12481 }, { "epoch": 1.4501306999709556, "grad_norm": 0.6051203608512878, "learning_rate": 0.0001, "loss": 1.4167, "step": 12482 }, { "epoch": 1.450246877722916, "grad_norm": 0.6963009834289551, "learning_rate": 0.0001, "loss": 1.5327, "step": 12483 }, { "epoch": 1.4503630554748765, "grad_norm": 0.6066775918006897, "learning_rate": 0.0001, "loss": 1.401, "step": 12484 }, { "epoch": 1.450479233226837, "grad_norm": 0.5624590516090393, "learning_rate": 0.0001, "loss": 1.2795, "step": 12485 }, { "epoch": 1.4505954109787975, "grad_norm": 0.5763733983039856, "learning_rate": 0.0001, "loss": 1.3443, "step": 12486 }, { "epoch": 1.450711588730758, "grad_norm": 0.6045727729797363, "learning_rate": 0.0001, "loss": 1.6086, "step": 12487 }, { "epoch": 1.4508277664827185, "grad_norm": 0.6144154667854309, "learning_rate": 0.0001, "loss": 1.4593, "step": 12488 }, { "epoch": 1.450943944234679, "grad_norm": 0.6358152031898499, "learning_rate": 0.0001, "loss": 1.5782, "step": 12489 }, { "epoch": 1.4510601219866395, "grad_norm": 0.5821871161460876, "learning_rate": 0.0001, "loss": 1.4008, "step": 12490 }, { "epoch": 1.4511762997386, "grad_norm": 0.6219722032546997, "learning_rate": 0.0001, "loss": 1.4496, "step": 12491 }, { "epoch": 1.4512924774905605, "grad_norm": 0.5885584354400635, "learning_rate": 0.0001, "loss": 1.3345, "step": 12492 }, { "epoch": 1.451408655242521, "grad_norm": 0.6406071782112122, "learning_rate": 0.0001, "loss": 1.5645, "step": 12493 }, { "epoch": 1.4515248329944814, "grad_norm": 0.5714595317840576, "learning_rate": 0.0001, "loss": 1.2703, "step": 12494 }, { "epoch": 1.451641010746442, "grad_norm": 0.6086097955703735, "learning_rate": 0.0001, "loss": 1.6087, "step": 12495 }, { "epoch": 1.4517571884984026, "grad_norm": 0.6160340309143066, "learning_rate": 0.0001, "loss": 1.7256, "step": 12496 }, { "epoch": 1.4518733662503631, "grad_norm": 0.6223379969596863, "learning_rate": 0.0001, "loss": 1.5614, "step": 12497 }, { "epoch": 1.4519895440023236, "grad_norm": 0.6125460267066956, "learning_rate": 0.0001, "loss": 1.3264, "step": 12498 }, { "epoch": 1.452105721754284, "grad_norm": 0.6102118492126465, "learning_rate": 0.0001, "loss": 1.3831, "step": 12499 }, { "epoch": 1.4522218995062446, "grad_norm": 0.6099421381950378, "learning_rate": 0.0001, "loss": 1.3743, "step": 12500 }, { "epoch": 1.452338077258205, "grad_norm": 0.559723436832428, "learning_rate": 0.0001, "loss": 1.3618, "step": 12501 }, { "epoch": 1.4524542550101656, "grad_norm": 0.6058681011199951, "learning_rate": 0.0001, "loss": 1.552, "step": 12502 }, { "epoch": 1.452570432762126, "grad_norm": 0.6532595753669739, "learning_rate": 0.0001, "loss": 1.6282, "step": 12503 }, { "epoch": 1.4526866105140865, "grad_norm": 0.6153339743614197, "learning_rate": 0.0001, "loss": 1.5724, "step": 12504 }, { "epoch": 1.452802788266047, "grad_norm": 0.5688385963439941, "learning_rate": 0.0001, "loss": 1.2545, "step": 12505 }, { "epoch": 1.4529189660180075, "grad_norm": 0.6670095920562744, "learning_rate": 0.0001, "loss": 1.6536, "step": 12506 }, { "epoch": 1.453035143769968, "grad_norm": 0.6240342259407043, "learning_rate": 0.0001, "loss": 1.4827, "step": 12507 }, { "epoch": 1.4531513215219285, "grad_norm": 0.6448224186897278, "learning_rate": 0.0001, "loss": 1.629, "step": 12508 }, { "epoch": 1.453267499273889, "grad_norm": 0.6262985467910767, "learning_rate": 0.0001, "loss": 1.5922, "step": 12509 }, { "epoch": 1.4533836770258495, "grad_norm": 0.580199122428894, "learning_rate": 0.0001, "loss": 1.3952, "step": 12510 }, { "epoch": 1.4534998547778102, "grad_norm": 0.5817636847496033, "learning_rate": 0.0001, "loss": 1.3002, "step": 12511 }, { "epoch": 1.4536160325297707, "grad_norm": 0.6186819672584534, "learning_rate": 0.0001, "loss": 1.3567, "step": 12512 }, { "epoch": 1.4537322102817312, "grad_norm": 0.6286445260047913, "learning_rate": 0.0001, "loss": 1.4373, "step": 12513 }, { "epoch": 1.4538483880336917, "grad_norm": 0.6268229484558105, "learning_rate": 0.0001, "loss": 1.5821, "step": 12514 }, { "epoch": 1.4539645657856521, "grad_norm": 0.5959900617599487, "learning_rate": 0.0001, "loss": 1.4463, "step": 12515 }, { "epoch": 1.4540807435376126, "grad_norm": 0.6190244555473328, "learning_rate": 0.0001, "loss": 1.3746, "step": 12516 }, { "epoch": 1.4541969212895731, "grad_norm": 0.6077380180358887, "learning_rate": 0.0001, "loss": 1.5158, "step": 12517 }, { "epoch": 1.4543130990415336, "grad_norm": 0.6312915086746216, "learning_rate": 0.0001, "loss": 1.5431, "step": 12518 }, { "epoch": 1.454429276793494, "grad_norm": 0.642156720161438, "learning_rate": 0.0001, "loss": 1.6197, "step": 12519 }, { "epoch": 1.4545454545454546, "grad_norm": 0.6618093252182007, "learning_rate": 0.0001, "loss": 1.4224, "step": 12520 }, { "epoch": 1.454661632297415, "grad_norm": 0.61574786901474, "learning_rate": 0.0001, "loss": 1.5195, "step": 12521 }, { "epoch": 1.4547778100493756, "grad_norm": 0.5520590543746948, "learning_rate": 0.0001, "loss": 1.1745, "step": 12522 }, { "epoch": 1.454893987801336, "grad_norm": 0.6121432185173035, "learning_rate": 0.0001, "loss": 1.4132, "step": 12523 }, { "epoch": 1.4550101655532965, "grad_norm": 0.6063690185546875, "learning_rate": 0.0001, "loss": 1.3249, "step": 12524 }, { "epoch": 1.455126343305257, "grad_norm": 0.6351370811462402, "learning_rate": 0.0001, "loss": 1.3921, "step": 12525 }, { "epoch": 1.4552425210572175, "grad_norm": 0.5611215829849243, "learning_rate": 0.0001, "loss": 1.5057, "step": 12526 }, { "epoch": 1.455358698809178, "grad_norm": 0.6196964383125305, "learning_rate": 0.0001, "loss": 1.4851, "step": 12527 }, { "epoch": 1.4554748765611385, "grad_norm": 0.5891339778900146, "learning_rate": 0.0001, "loss": 1.3492, "step": 12528 }, { "epoch": 1.455591054313099, "grad_norm": 0.634013295173645, "learning_rate": 0.0001, "loss": 1.5088, "step": 12529 }, { "epoch": 1.4557072320650595, "grad_norm": 0.5923905968666077, "learning_rate": 0.0001, "loss": 1.5076, "step": 12530 }, { "epoch": 1.45582340981702, "grad_norm": 0.6321912407875061, "learning_rate": 0.0001, "loss": 1.5716, "step": 12531 }, { "epoch": 1.4559395875689805, "grad_norm": 0.5776755213737488, "learning_rate": 0.0001, "loss": 1.5406, "step": 12532 }, { "epoch": 1.456055765320941, "grad_norm": 0.5691312551498413, "learning_rate": 0.0001, "loss": 1.4853, "step": 12533 }, { "epoch": 1.4561719430729014, "grad_norm": 0.6355810165405273, "learning_rate": 0.0001, "loss": 1.4057, "step": 12534 }, { "epoch": 1.456288120824862, "grad_norm": 0.6114268898963928, "learning_rate": 0.0001, "loss": 1.5544, "step": 12535 }, { "epoch": 1.4564042985768224, "grad_norm": 0.5496631860733032, "learning_rate": 0.0001, "loss": 1.3485, "step": 12536 }, { "epoch": 1.4565204763287831, "grad_norm": 0.6105073094367981, "learning_rate": 0.0001, "loss": 1.5128, "step": 12537 }, { "epoch": 1.4566366540807436, "grad_norm": 0.5859121680259705, "learning_rate": 0.0001, "loss": 1.4094, "step": 12538 }, { "epoch": 1.456752831832704, "grad_norm": 0.6114695072174072, "learning_rate": 0.0001, "loss": 1.4062, "step": 12539 }, { "epoch": 1.4568690095846646, "grad_norm": 0.6075177788734436, "learning_rate": 0.0001, "loss": 1.3643, "step": 12540 }, { "epoch": 1.456985187336625, "grad_norm": 0.5910870432853699, "learning_rate": 0.0001, "loss": 1.4241, "step": 12541 }, { "epoch": 1.4571013650885856, "grad_norm": 0.595967710018158, "learning_rate": 0.0001, "loss": 1.4613, "step": 12542 }, { "epoch": 1.457217542840546, "grad_norm": 0.6669548749923706, "learning_rate": 0.0001, "loss": 1.6616, "step": 12543 }, { "epoch": 1.4573337205925065, "grad_norm": 0.6057718396186829, "learning_rate": 0.0001, "loss": 1.5482, "step": 12544 }, { "epoch": 1.457449898344467, "grad_norm": 0.5876947641372681, "learning_rate": 0.0001, "loss": 1.4503, "step": 12545 }, { "epoch": 1.4575660760964275, "grad_norm": 0.6106549501419067, "learning_rate": 0.0001, "loss": 1.4327, "step": 12546 }, { "epoch": 1.457682253848388, "grad_norm": 0.6266193985939026, "learning_rate": 0.0001, "loss": 1.2981, "step": 12547 }, { "epoch": 1.4577984316003485, "grad_norm": 0.6059079170227051, "learning_rate": 0.0001, "loss": 1.5158, "step": 12548 }, { "epoch": 1.457914609352309, "grad_norm": 0.5994994640350342, "learning_rate": 0.0001, "loss": 1.6222, "step": 12549 }, { "epoch": 1.4580307871042695, "grad_norm": 0.6154032945632935, "learning_rate": 0.0001, "loss": 1.5556, "step": 12550 }, { "epoch": 1.45814696485623, "grad_norm": 0.6104035377502441, "learning_rate": 0.0001, "loss": 1.5981, "step": 12551 }, { "epoch": 1.4582631426081905, "grad_norm": 0.6142469644546509, "learning_rate": 0.0001, "loss": 1.5546, "step": 12552 }, { "epoch": 1.4583793203601512, "grad_norm": 0.574055552482605, "learning_rate": 0.0001, "loss": 1.309, "step": 12553 }, { "epoch": 1.4584954981121117, "grad_norm": 0.5898140668869019, "learning_rate": 0.0001, "loss": 1.3786, "step": 12554 }, { "epoch": 1.4586116758640721, "grad_norm": 0.5797202587127686, "learning_rate": 0.0001, "loss": 1.472, "step": 12555 }, { "epoch": 1.4587278536160326, "grad_norm": 0.554664134979248, "learning_rate": 0.0001, "loss": 1.3166, "step": 12556 }, { "epoch": 1.4588440313679931, "grad_norm": 0.6085032820701599, "learning_rate": 0.0001, "loss": 1.4686, "step": 12557 }, { "epoch": 1.4589602091199536, "grad_norm": 0.625577986240387, "learning_rate": 0.0001, "loss": 1.5754, "step": 12558 }, { "epoch": 1.459076386871914, "grad_norm": 0.6222672462463379, "learning_rate": 0.0001, "loss": 1.5968, "step": 12559 }, { "epoch": 1.4591925646238746, "grad_norm": 0.61932772397995, "learning_rate": 0.0001, "loss": 1.5144, "step": 12560 }, { "epoch": 1.459308742375835, "grad_norm": 0.590408444404602, "learning_rate": 0.0001, "loss": 1.4954, "step": 12561 }, { "epoch": 1.4594249201277956, "grad_norm": 0.6052151918411255, "learning_rate": 0.0001, "loss": 1.5481, "step": 12562 }, { "epoch": 1.459541097879756, "grad_norm": 0.5636199712753296, "learning_rate": 0.0001, "loss": 1.452, "step": 12563 }, { "epoch": 1.4596572756317165, "grad_norm": 0.639377236366272, "learning_rate": 0.0001, "loss": 1.6294, "step": 12564 }, { "epoch": 1.459773453383677, "grad_norm": 0.5761369466781616, "learning_rate": 0.0001, "loss": 1.5082, "step": 12565 }, { "epoch": 1.4598896311356375, "grad_norm": 0.59371018409729, "learning_rate": 0.0001, "loss": 1.53, "step": 12566 }, { "epoch": 1.460005808887598, "grad_norm": 0.6135494112968445, "learning_rate": 0.0001, "loss": 1.5947, "step": 12567 }, { "epoch": 1.4601219866395585, "grad_norm": 0.6341174840927124, "learning_rate": 0.0001, "loss": 1.2997, "step": 12568 }, { "epoch": 1.460238164391519, "grad_norm": 0.6097371578216553, "learning_rate": 0.0001, "loss": 1.579, "step": 12569 }, { "epoch": 1.4603543421434795, "grad_norm": 0.5795385837554932, "learning_rate": 0.0001, "loss": 1.3574, "step": 12570 }, { "epoch": 1.46047051989544, "grad_norm": 0.618484377861023, "learning_rate": 0.0001, "loss": 1.4424, "step": 12571 }, { "epoch": 1.4605866976474005, "grad_norm": 0.5799589157104492, "learning_rate": 0.0001, "loss": 1.4669, "step": 12572 }, { "epoch": 1.460702875399361, "grad_norm": 0.6043416261672974, "learning_rate": 0.0001, "loss": 1.39, "step": 12573 }, { "epoch": 1.4608190531513214, "grad_norm": 0.5822587013244629, "learning_rate": 0.0001, "loss": 1.3416, "step": 12574 }, { "epoch": 1.460935230903282, "grad_norm": 0.6009185910224915, "learning_rate": 0.0001, "loss": 1.3142, "step": 12575 }, { "epoch": 1.4610514086552424, "grad_norm": 0.5679329633712769, "learning_rate": 0.0001, "loss": 1.3666, "step": 12576 }, { "epoch": 1.461167586407203, "grad_norm": 0.6215576529502869, "learning_rate": 0.0001, "loss": 1.5026, "step": 12577 }, { "epoch": 1.4612837641591634, "grad_norm": 0.5975043177604675, "learning_rate": 0.0001, "loss": 1.39, "step": 12578 }, { "epoch": 1.461399941911124, "grad_norm": 0.5994958281517029, "learning_rate": 0.0001, "loss": 1.3617, "step": 12579 }, { "epoch": 1.4615161196630846, "grad_norm": 0.6470338106155396, "learning_rate": 0.0001, "loss": 1.6114, "step": 12580 }, { "epoch": 1.461632297415045, "grad_norm": 0.7016136050224304, "learning_rate": 0.0001, "loss": 1.5484, "step": 12581 }, { "epoch": 1.4617484751670056, "grad_norm": 0.5727640986442566, "learning_rate": 0.0001, "loss": 1.3699, "step": 12582 }, { "epoch": 1.461864652918966, "grad_norm": 0.5754709839820862, "learning_rate": 0.0001, "loss": 1.4004, "step": 12583 }, { "epoch": 1.4619808306709265, "grad_norm": 0.5758077502250671, "learning_rate": 0.0001, "loss": 1.4384, "step": 12584 }, { "epoch": 1.462097008422887, "grad_norm": 0.5902067422866821, "learning_rate": 0.0001, "loss": 1.3025, "step": 12585 }, { "epoch": 1.4622131861748475, "grad_norm": 0.5974416136741638, "learning_rate": 0.0001, "loss": 1.6851, "step": 12586 }, { "epoch": 1.462329363926808, "grad_norm": 0.5915681719779968, "learning_rate": 0.0001, "loss": 1.4601, "step": 12587 }, { "epoch": 1.4624455416787685, "grad_norm": 0.5832463502883911, "learning_rate": 0.0001, "loss": 1.4135, "step": 12588 }, { "epoch": 1.462561719430729, "grad_norm": 0.6045186519622803, "learning_rate": 0.0001, "loss": 1.5844, "step": 12589 }, { "epoch": 1.4626778971826895, "grad_norm": 0.634339451789856, "learning_rate": 0.0001, "loss": 1.61, "step": 12590 }, { "epoch": 1.46279407493465, "grad_norm": 0.5863074660301208, "learning_rate": 0.0001, "loss": 1.4268, "step": 12591 }, { "epoch": 1.4629102526866105, "grad_norm": 0.577804684638977, "learning_rate": 0.0001, "loss": 1.4489, "step": 12592 }, { "epoch": 1.463026430438571, "grad_norm": 0.6123642921447754, "learning_rate": 0.0001, "loss": 1.5616, "step": 12593 }, { "epoch": 1.4631426081905314, "grad_norm": 0.6405830979347229, "learning_rate": 0.0001, "loss": 1.658, "step": 12594 }, { "epoch": 1.4632587859424921, "grad_norm": 0.6347610354423523, "learning_rate": 0.0001, "loss": 1.4584, "step": 12595 }, { "epoch": 1.4633749636944526, "grad_norm": 0.5713246464729309, "learning_rate": 0.0001, "loss": 1.4615, "step": 12596 }, { "epoch": 1.4634911414464131, "grad_norm": 0.5953773260116577, "learning_rate": 0.0001, "loss": 1.4558, "step": 12597 }, { "epoch": 1.4636073191983736, "grad_norm": 0.5882254838943481, "learning_rate": 0.0001, "loss": 1.5515, "step": 12598 }, { "epoch": 1.463723496950334, "grad_norm": 0.5993982553482056, "learning_rate": 0.0001, "loss": 1.3766, "step": 12599 }, { "epoch": 1.4638396747022946, "grad_norm": 0.6374693512916565, "learning_rate": 0.0001, "loss": 1.5259, "step": 12600 }, { "epoch": 1.463955852454255, "grad_norm": 0.6188412308692932, "learning_rate": 0.0001, "loss": 1.2187, "step": 12601 }, { "epoch": 1.4640720302062156, "grad_norm": 0.5834885835647583, "learning_rate": 0.0001, "loss": 1.4844, "step": 12602 }, { "epoch": 1.464188207958176, "grad_norm": 0.6189852356910706, "learning_rate": 0.0001, "loss": 1.5104, "step": 12603 }, { "epoch": 1.4643043857101365, "grad_norm": 0.6446365714073181, "learning_rate": 0.0001, "loss": 1.6013, "step": 12604 }, { "epoch": 1.464420563462097, "grad_norm": 0.6015826463699341, "learning_rate": 0.0001, "loss": 1.4581, "step": 12605 }, { "epoch": 1.4645367412140575, "grad_norm": 0.6048887372016907, "learning_rate": 0.0001, "loss": 1.391, "step": 12606 }, { "epoch": 1.464652918966018, "grad_norm": 0.5923680663108826, "learning_rate": 0.0001, "loss": 1.3832, "step": 12607 }, { "epoch": 1.4647690967179785, "grad_norm": 0.6021586656570435, "learning_rate": 0.0001, "loss": 1.4473, "step": 12608 }, { "epoch": 1.464885274469939, "grad_norm": 0.5922961831092834, "learning_rate": 0.0001, "loss": 1.5496, "step": 12609 }, { "epoch": 1.4650014522218995, "grad_norm": 0.5911400318145752, "learning_rate": 0.0001, "loss": 1.3201, "step": 12610 }, { "epoch": 1.46511762997386, "grad_norm": 0.6118112206459045, "learning_rate": 0.0001, "loss": 1.5522, "step": 12611 }, { "epoch": 1.4652338077258205, "grad_norm": 0.5985094308853149, "learning_rate": 0.0001, "loss": 1.4773, "step": 12612 }, { "epoch": 1.465349985477781, "grad_norm": 0.6050367951393127, "learning_rate": 0.0001, "loss": 1.4864, "step": 12613 }, { "epoch": 1.4654661632297414, "grad_norm": 0.5625033378601074, "learning_rate": 0.0001, "loss": 1.1263, "step": 12614 }, { "epoch": 1.465582340981702, "grad_norm": 0.6526144742965698, "learning_rate": 0.0001, "loss": 1.4681, "step": 12615 }, { "epoch": 1.4656985187336624, "grad_norm": 0.5911290645599365, "learning_rate": 0.0001, "loss": 1.4809, "step": 12616 }, { "epoch": 1.465814696485623, "grad_norm": 0.610272228717804, "learning_rate": 0.0001, "loss": 1.3182, "step": 12617 }, { "epoch": 1.4659308742375834, "grad_norm": 0.6235426068305969, "learning_rate": 0.0001, "loss": 1.6892, "step": 12618 }, { "epoch": 1.4660470519895439, "grad_norm": 0.5913504362106323, "learning_rate": 0.0001, "loss": 1.4667, "step": 12619 }, { "epoch": 1.4661632297415044, "grad_norm": 0.6157997846603394, "learning_rate": 0.0001, "loss": 1.528, "step": 12620 }, { "epoch": 1.466279407493465, "grad_norm": 0.6300349235534668, "learning_rate": 0.0001, "loss": 1.507, "step": 12621 }, { "epoch": 1.4663955852454256, "grad_norm": 0.5744130611419678, "learning_rate": 0.0001, "loss": 1.2242, "step": 12622 }, { "epoch": 1.466511762997386, "grad_norm": 0.6219088435173035, "learning_rate": 0.0001, "loss": 1.5588, "step": 12623 }, { "epoch": 1.4666279407493465, "grad_norm": 0.5827035307884216, "learning_rate": 0.0001, "loss": 1.4531, "step": 12624 }, { "epoch": 1.466744118501307, "grad_norm": 0.6625955104827881, "learning_rate": 0.0001, "loss": 1.6221, "step": 12625 }, { "epoch": 1.4668602962532675, "grad_norm": 0.6402918100357056, "learning_rate": 0.0001, "loss": 1.6257, "step": 12626 }, { "epoch": 1.466976474005228, "grad_norm": 0.5460782647132874, "learning_rate": 0.0001, "loss": 1.3716, "step": 12627 }, { "epoch": 1.4670926517571885, "grad_norm": 0.5440313220024109, "learning_rate": 0.0001, "loss": 1.3949, "step": 12628 }, { "epoch": 1.467208829509149, "grad_norm": 0.6203206181526184, "learning_rate": 0.0001, "loss": 1.6124, "step": 12629 }, { "epoch": 1.4673250072611095, "grad_norm": 0.59822678565979, "learning_rate": 0.0001, "loss": 1.4645, "step": 12630 }, { "epoch": 1.46744118501307, "grad_norm": 0.596192479133606, "learning_rate": 0.0001, "loss": 1.4839, "step": 12631 }, { "epoch": 1.4675573627650305, "grad_norm": 0.567420482635498, "learning_rate": 0.0001, "loss": 1.495, "step": 12632 }, { "epoch": 1.467673540516991, "grad_norm": 0.6413534879684448, "learning_rate": 0.0001, "loss": 1.3717, "step": 12633 }, { "epoch": 1.4677897182689514, "grad_norm": 0.5863707065582275, "learning_rate": 0.0001, "loss": 1.411, "step": 12634 }, { "epoch": 1.467905896020912, "grad_norm": 0.5698891282081604, "learning_rate": 0.0001, "loss": 1.3382, "step": 12635 }, { "epoch": 1.4680220737728724, "grad_norm": 0.6080556511878967, "learning_rate": 0.0001, "loss": 1.4324, "step": 12636 }, { "epoch": 1.4681382515248331, "grad_norm": 0.6110317707061768, "learning_rate": 0.0001, "loss": 1.5286, "step": 12637 }, { "epoch": 1.4682544292767936, "grad_norm": 0.6099866032600403, "learning_rate": 0.0001, "loss": 1.4331, "step": 12638 }, { "epoch": 1.468370607028754, "grad_norm": 0.5823593139648438, "learning_rate": 0.0001, "loss": 1.4508, "step": 12639 }, { "epoch": 1.4684867847807146, "grad_norm": 0.6026428937911987, "learning_rate": 0.0001, "loss": 1.6118, "step": 12640 }, { "epoch": 1.468602962532675, "grad_norm": 0.5801883935928345, "learning_rate": 0.0001, "loss": 1.41, "step": 12641 }, { "epoch": 1.4687191402846356, "grad_norm": 0.5360670685768127, "learning_rate": 0.0001, "loss": 1.3604, "step": 12642 }, { "epoch": 1.468835318036596, "grad_norm": 0.5586822032928467, "learning_rate": 0.0001, "loss": 1.3045, "step": 12643 }, { "epoch": 1.4689514957885565, "grad_norm": 0.5789737701416016, "learning_rate": 0.0001, "loss": 1.5573, "step": 12644 }, { "epoch": 1.469067673540517, "grad_norm": 0.5735986828804016, "learning_rate": 0.0001, "loss": 1.3718, "step": 12645 }, { "epoch": 1.4691838512924775, "grad_norm": 0.7054757475852966, "learning_rate": 0.0001, "loss": 1.5534, "step": 12646 }, { "epoch": 1.469300029044438, "grad_norm": 0.6231496930122375, "learning_rate": 0.0001, "loss": 1.4146, "step": 12647 }, { "epoch": 1.4694162067963985, "grad_norm": 0.5634230375289917, "learning_rate": 0.0001, "loss": 1.3909, "step": 12648 }, { "epoch": 1.469532384548359, "grad_norm": 0.6670734286308289, "learning_rate": 0.0001, "loss": 1.5684, "step": 12649 }, { "epoch": 1.4696485623003195, "grad_norm": 0.592263400554657, "learning_rate": 0.0001, "loss": 1.399, "step": 12650 }, { "epoch": 1.46976474005228, "grad_norm": 0.6188017725944519, "learning_rate": 0.0001, "loss": 1.5407, "step": 12651 }, { "epoch": 1.4698809178042405, "grad_norm": 0.5997496247291565, "learning_rate": 0.0001, "loss": 1.4838, "step": 12652 }, { "epoch": 1.469997095556201, "grad_norm": 0.6102793216705322, "learning_rate": 0.0001, "loss": 1.4263, "step": 12653 }, { "epoch": 1.4701132733081614, "grad_norm": 0.605833888053894, "learning_rate": 0.0001, "loss": 1.5273, "step": 12654 }, { "epoch": 1.470229451060122, "grad_norm": 0.606594443321228, "learning_rate": 0.0001, "loss": 1.4393, "step": 12655 }, { "epoch": 1.4703456288120824, "grad_norm": 0.5853731036186218, "learning_rate": 0.0001, "loss": 1.3908, "step": 12656 }, { "epoch": 1.470461806564043, "grad_norm": 0.59587162733078, "learning_rate": 0.0001, "loss": 1.343, "step": 12657 }, { "epoch": 1.4705779843160034, "grad_norm": 0.6557053327560425, "learning_rate": 0.0001, "loss": 1.7309, "step": 12658 }, { "epoch": 1.4706941620679639, "grad_norm": 0.6285101771354675, "learning_rate": 0.0001, "loss": 1.5067, "step": 12659 }, { "epoch": 1.4708103398199244, "grad_norm": 0.5789119005203247, "learning_rate": 0.0001, "loss": 1.2987, "step": 12660 }, { "epoch": 1.4709265175718849, "grad_norm": 0.5933101177215576, "learning_rate": 0.0001, "loss": 1.3469, "step": 12661 }, { "epoch": 1.4710426953238454, "grad_norm": 0.5343286395072937, "learning_rate": 0.0001, "loss": 1.3598, "step": 12662 }, { "epoch": 1.471158873075806, "grad_norm": 0.6221956610679626, "learning_rate": 0.0001, "loss": 1.4563, "step": 12663 }, { "epoch": 1.4712750508277666, "grad_norm": 0.6174246668815613, "learning_rate": 0.0001, "loss": 1.4157, "step": 12664 }, { "epoch": 1.471391228579727, "grad_norm": 0.6641348600387573, "learning_rate": 0.0001, "loss": 1.4751, "step": 12665 }, { "epoch": 1.4715074063316875, "grad_norm": 0.5612043738365173, "learning_rate": 0.0001, "loss": 1.2723, "step": 12666 }, { "epoch": 1.471623584083648, "grad_norm": 0.6334659457206726, "learning_rate": 0.0001, "loss": 1.5654, "step": 12667 }, { "epoch": 1.4717397618356085, "grad_norm": 0.5860728025436401, "learning_rate": 0.0001, "loss": 1.505, "step": 12668 }, { "epoch": 1.471855939587569, "grad_norm": 0.6046940684318542, "learning_rate": 0.0001, "loss": 1.5854, "step": 12669 }, { "epoch": 1.4719721173395295, "grad_norm": 0.5735712051391602, "learning_rate": 0.0001, "loss": 1.3659, "step": 12670 }, { "epoch": 1.47208829509149, "grad_norm": 0.6405760049819946, "learning_rate": 0.0001, "loss": 1.4848, "step": 12671 }, { "epoch": 1.4722044728434505, "grad_norm": 0.6492209434509277, "learning_rate": 0.0001, "loss": 1.5792, "step": 12672 }, { "epoch": 1.472320650595411, "grad_norm": 0.5880120396614075, "learning_rate": 0.0001, "loss": 1.2946, "step": 12673 }, { "epoch": 1.4724368283473714, "grad_norm": 0.5945394039154053, "learning_rate": 0.0001, "loss": 1.3463, "step": 12674 }, { "epoch": 1.472553006099332, "grad_norm": 0.6104127168655396, "learning_rate": 0.0001, "loss": 1.4624, "step": 12675 }, { "epoch": 1.4726691838512924, "grad_norm": 0.6284909248352051, "learning_rate": 0.0001, "loss": 1.6087, "step": 12676 }, { "epoch": 1.472785361603253, "grad_norm": 0.6103723645210266, "learning_rate": 0.0001, "loss": 1.5014, "step": 12677 }, { "epoch": 1.4729015393552134, "grad_norm": 0.6179032921791077, "learning_rate": 0.0001, "loss": 1.5127, "step": 12678 }, { "epoch": 1.473017717107174, "grad_norm": 0.5750223398208618, "learning_rate": 0.0001, "loss": 1.4715, "step": 12679 }, { "epoch": 1.4731338948591346, "grad_norm": 0.5653707981109619, "learning_rate": 0.0001, "loss": 1.2779, "step": 12680 }, { "epoch": 1.473250072611095, "grad_norm": 0.6165471076965332, "learning_rate": 0.0001, "loss": 1.3156, "step": 12681 }, { "epoch": 1.4733662503630556, "grad_norm": 0.5788655281066895, "learning_rate": 0.0001, "loss": 1.3653, "step": 12682 }, { "epoch": 1.473482428115016, "grad_norm": 0.5805605053901672, "learning_rate": 0.0001, "loss": 1.4103, "step": 12683 }, { "epoch": 1.4735986058669766, "grad_norm": 0.6233782172203064, "learning_rate": 0.0001, "loss": 1.427, "step": 12684 }, { "epoch": 1.473714783618937, "grad_norm": 0.6231675744056702, "learning_rate": 0.0001, "loss": 1.5637, "step": 12685 }, { "epoch": 1.4738309613708975, "grad_norm": 0.5923117399215698, "learning_rate": 0.0001, "loss": 1.3208, "step": 12686 }, { "epoch": 1.473947139122858, "grad_norm": 0.6191889047622681, "learning_rate": 0.0001, "loss": 1.4431, "step": 12687 }, { "epoch": 1.4740633168748185, "grad_norm": 0.5808857083320618, "learning_rate": 0.0001, "loss": 1.4314, "step": 12688 }, { "epoch": 1.474179494626779, "grad_norm": 0.6818951964378357, "learning_rate": 0.0001, "loss": 1.5522, "step": 12689 }, { "epoch": 1.4742956723787395, "grad_norm": 0.6566728353500366, "learning_rate": 0.0001, "loss": 1.5566, "step": 12690 }, { "epoch": 1.4744118501307, "grad_norm": 0.6200664639472961, "learning_rate": 0.0001, "loss": 1.5531, "step": 12691 }, { "epoch": 1.4745280278826605, "grad_norm": 0.5786384344100952, "learning_rate": 0.0001, "loss": 1.4235, "step": 12692 }, { "epoch": 1.474644205634621, "grad_norm": 0.5918937921524048, "learning_rate": 0.0001, "loss": 1.3155, "step": 12693 }, { "epoch": 1.4747603833865814, "grad_norm": 0.6062121987342834, "learning_rate": 0.0001, "loss": 1.4284, "step": 12694 }, { "epoch": 1.474876561138542, "grad_norm": 0.614924967288971, "learning_rate": 0.0001, "loss": 1.6247, "step": 12695 }, { "epoch": 1.4749927388905024, "grad_norm": 0.6098072528839111, "learning_rate": 0.0001, "loss": 1.3263, "step": 12696 }, { "epoch": 1.475108916642463, "grad_norm": 0.6092824935913086, "learning_rate": 0.0001, "loss": 1.4069, "step": 12697 }, { "epoch": 1.4752250943944234, "grad_norm": 0.6376292109489441, "learning_rate": 0.0001, "loss": 1.4499, "step": 12698 }, { "epoch": 1.4753412721463839, "grad_norm": 0.6538935899734497, "learning_rate": 0.0001, "loss": 1.4994, "step": 12699 }, { "epoch": 1.4754574498983444, "grad_norm": 0.6000575423240662, "learning_rate": 0.0001, "loss": 1.3261, "step": 12700 }, { "epoch": 1.4755736276503049, "grad_norm": 0.6823968887329102, "learning_rate": 0.0001, "loss": 1.2802, "step": 12701 }, { "epoch": 1.4756898054022654, "grad_norm": 0.6618595123291016, "learning_rate": 0.0001, "loss": 1.5822, "step": 12702 }, { "epoch": 1.4758059831542258, "grad_norm": 0.6516097187995911, "learning_rate": 0.0001, "loss": 1.5829, "step": 12703 }, { "epoch": 1.4759221609061863, "grad_norm": 0.6231207251548767, "learning_rate": 0.0001, "loss": 1.687, "step": 12704 }, { "epoch": 1.476038338658147, "grad_norm": 0.6442578434944153, "learning_rate": 0.0001, "loss": 1.4111, "step": 12705 }, { "epoch": 1.4761545164101075, "grad_norm": 0.6241633296012878, "learning_rate": 0.0001, "loss": 1.6373, "step": 12706 }, { "epoch": 1.476270694162068, "grad_norm": 0.5882102847099304, "learning_rate": 0.0001, "loss": 1.3378, "step": 12707 }, { "epoch": 1.4763868719140285, "grad_norm": 0.6080202460289001, "learning_rate": 0.0001, "loss": 1.5929, "step": 12708 }, { "epoch": 1.476503049665989, "grad_norm": 0.6191835999488831, "learning_rate": 0.0001, "loss": 1.4236, "step": 12709 }, { "epoch": 1.4766192274179495, "grad_norm": 0.5939556360244751, "learning_rate": 0.0001, "loss": 1.3395, "step": 12710 }, { "epoch": 1.47673540516991, "grad_norm": 0.6249536275863647, "learning_rate": 0.0001, "loss": 1.4709, "step": 12711 }, { "epoch": 1.4768515829218705, "grad_norm": 0.5902261137962341, "learning_rate": 0.0001, "loss": 1.519, "step": 12712 }, { "epoch": 1.476967760673831, "grad_norm": 0.6220241785049438, "learning_rate": 0.0001, "loss": 1.5443, "step": 12713 }, { "epoch": 1.4770839384257914, "grad_norm": 0.5737898349761963, "learning_rate": 0.0001, "loss": 1.5521, "step": 12714 }, { "epoch": 1.477200116177752, "grad_norm": 0.6006242036819458, "learning_rate": 0.0001, "loss": 1.5542, "step": 12715 }, { "epoch": 1.4773162939297124, "grad_norm": 0.6238159537315369, "learning_rate": 0.0001, "loss": 1.3714, "step": 12716 }, { "epoch": 1.477432471681673, "grad_norm": 0.5738762617111206, "learning_rate": 0.0001, "loss": 1.3738, "step": 12717 }, { "epoch": 1.4775486494336334, "grad_norm": 0.573409914970398, "learning_rate": 0.0001, "loss": 1.2755, "step": 12718 }, { "epoch": 1.4776648271855939, "grad_norm": 0.5664563775062561, "learning_rate": 0.0001, "loss": 1.4181, "step": 12719 }, { "epoch": 1.4777810049375544, "grad_norm": 0.6218703389167786, "learning_rate": 0.0001, "loss": 1.6621, "step": 12720 }, { "epoch": 1.477897182689515, "grad_norm": 0.5889036655426025, "learning_rate": 0.0001, "loss": 1.6083, "step": 12721 }, { "epoch": 1.4780133604414756, "grad_norm": 0.6417556405067444, "learning_rate": 0.0001, "loss": 1.6125, "step": 12722 }, { "epoch": 1.478129538193436, "grad_norm": 0.5941647291183472, "learning_rate": 0.0001, "loss": 1.5546, "step": 12723 }, { "epoch": 1.4782457159453966, "grad_norm": 0.5559972524642944, "learning_rate": 0.0001, "loss": 1.4296, "step": 12724 }, { "epoch": 1.478361893697357, "grad_norm": 0.6716883182525635, "learning_rate": 0.0001, "loss": 1.5613, "step": 12725 }, { "epoch": 1.4784780714493175, "grad_norm": 0.582317054271698, "learning_rate": 0.0001, "loss": 1.556, "step": 12726 }, { "epoch": 1.478594249201278, "grad_norm": 0.5578384399414062, "learning_rate": 0.0001, "loss": 1.4072, "step": 12727 }, { "epoch": 1.4787104269532385, "grad_norm": 0.6159401535987854, "learning_rate": 0.0001, "loss": 1.4841, "step": 12728 }, { "epoch": 1.478826604705199, "grad_norm": 0.5790227055549622, "learning_rate": 0.0001, "loss": 1.2968, "step": 12729 }, { "epoch": 1.4789427824571595, "grad_norm": 0.5429813265800476, "learning_rate": 0.0001, "loss": 1.2731, "step": 12730 }, { "epoch": 1.47905896020912, "grad_norm": 0.6098136901855469, "learning_rate": 0.0001, "loss": 1.5287, "step": 12731 }, { "epoch": 1.4791751379610805, "grad_norm": 0.6206817030906677, "learning_rate": 0.0001, "loss": 1.3591, "step": 12732 }, { "epoch": 1.479291315713041, "grad_norm": 0.6183215975761414, "learning_rate": 0.0001, "loss": 1.5038, "step": 12733 }, { "epoch": 1.4794074934650014, "grad_norm": 0.6770209670066833, "learning_rate": 0.0001, "loss": 1.5481, "step": 12734 }, { "epoch": 1.479523671216962, "grad_norm": 0.6122325658798218, "learning_rate": 0.0001, "loss": 1.3449, "step": 12735 }, { "epoch": 1.4796398489689224, "grad_norm": 0.6098016500473022, "learning_rate": 0.0001, "loss": 1.4898, "step": 12736 }, { "epoch": 1.479756026720883, "grad_norm": 0.6338397860527039, "learning_rate": 0.0001, "loss": 1.4301, "step": 12737 }, { "epoch": 1.4798722044728434, "grad_norm": 0.6147565841674805, "learning_rate": 0.0001, "loss": 1.5078, "step": 12738 }, { "epoch": 1.4799883822248039, "grad_norm": 0.622284471988678, "learning_rate": 0.0001, "loss": 1.546, "step": 12739 }, { "epoch": 1.4801045599767644, "grad_norm": 0.5711101293563843, "learning_rate": 0.0001, "loss": 1.4056, "step": 12740 }, { "epoch": 1.4802207377287249, "grad_norm": 0.5917078852653503, "learning_rate": 0.0001, "loss": 1.4603, "step": 12741 }, { "epoch": 1.4803369154806854, "grad_norm": 0.5660672783851624, "learning_rate": 0.0001, "loss": 1.3994, "step": 12742 }, { "epoch": 1.4804530932326458, "grad_norm": 0.586409330368042, "learning_rate": 0.0001, "loss": 1.3446, "step": 12743 }, { "epoch": 1.4805692709846063, "grad_norm": 0.6024650931358337, "learning_rate": 0.0001, "loss": 1.4112, "step": 12744 }, { "epoch": 1.4806854487365668, "grad_norm": 0.5979096293449402, "learning_rate": 0.0001, "loss": 1.4108, "step": 12745 }, { "epoch": 1.4808016264885273, "grad_norm": 0.6005404591560364, "learning_rate": 0.0001, "loss": 1.4058, "step": 12746 }, { "epoch": 1.480917804240488, "grad_norm": 0.6152083873748779, "learning_rate": 0.0001, "loss": 1.4269, "step": 12747 }, { "epoch": 1.4810339819924485, "grad_norm": 0.6299720406532288, "learning_rate": 0.0001, "loss": 1.306, "step": 12748 }, { "epoch": 1.481150159744409, "grad_norm": 0.6793599724769592, "learning_rate": 0.0001, "loss": 1.5236, "step": 12749 }, { "epoch": 1.4812663374963695, "grad_norm": 0.5687341690063477, "learning_rate": 0.0001, "loss": 1.4857, "step": 12750 }, { "epoch": 1.48138251524833, "grad_norm": 0.5625494718551636, "learning_rate": 0.0001, "loss": 1.3357, "step": 12751 }, { "epoch": 1.4814986930002905, "grad_norm": 0.6246379017829895, "learning_rate": 0.0001, "loss": 1.4443, "step": 12752 }, { "epoch": 1.481614870752251, "grad_norm": 0.6253631114959717, "learning_rate": 0.0001, "loss": 1.4476, "step": 12753 }, { "epoch": 1.4817310485042114, "grad_norm": 0.603517472743988, "learning_rate": 0.0001, "loss": 1.3907, "step": 12754 }, { "epoch": 1.481847226256172, "grad_norm": 0.5724433660507202, "learning_rate": 0.0001, "loss": 1.4412, "step": 12755 }, { "epoch": 1.4819634040081324, "grad_norm": 0.6194106340408325, "learning_rate": 0.0001, "loss": 1.4979, "step": 12756 }, { "epoch": 1.482079581760093, "grad_norm": 0.6359567642211914, "learning_rate": 0.0001, "loss": 1.5081, "step": 12757 }, { "epoch": 1.4821957595120534, "grad_norm": 0.5960992574691772, "learning_rate": 0.0001, "loss": 1.4689, "step": 12758 }, { "epoch": 1.482311937264014, "grad_norm": 0.5790872573852539, "learning_rate": 0.0001, "loss": 1.3061, "step": 12759 }, { "epoch": 1.4824281150159744, "grad_norm": 0.5998115539550781, "learning_rate": 0.0001, "loss": 1.5584, "step": 12760 }, { "epoch": 1.4825442927679349, "grad_norm": 0.5446498394012451, "learning_rate": 0.0001, "loss": 1.3484, "step": 12761 }, { "epoch": 1.4826604705198954, "grad_norm": 0.6086790561676025, "learning_rate": 0.0001, "loss": 1.4267, "step": 12762 }, { "epoch": 1.482776648271856, "grad_norm": 0.5855661630630493, "learning_rate": 0.0001, "loss": 1.1741, "step": 12763 }, { "epoch": 1.4828928260238166, "grad_norm": 0.6026800274848938, "learning_rate": 0.0001, "loss": 1.5557, "step": 12764 }, { "epoch": 1.483009003775777, "grad_norm": 0.5826662182807922, "learning_rate": 0.0001, "loss": 1.5116, "step": 12765 }, { "epoch": 1.4831251815277375, "grad_norm": 0.6261777281761169, "learning_rate": 0.0001, "loss": 1.3935, "step": 12766 }, { "epoch": 1.483241359279698, "grad_norm": 0.6069555282592773, "learning_rate": 0.0001, "loss": 1.3201, "step": 12767 }, { "epoch": 1.4833575370316585, "grad_norm": 0.6280861496925354, "learning_rate": 0.0001, "loss": 1.5374, "step": 12768 }, { "epoch": 1.483473714783619, "grad_norm": 0.6123801469802856, "learning_rate": 0.0001, "loss": 1.2891, "step": 12769 }, { "epoch": 1.4835898925355795, "grad_norm": 0.5813351273536682, "learning_rate": 0.0001, "loss": 1.3189, "step": 12770 }, { "epoch": 1.48370607028754, "grad_norm": 0.6434810161590576, "learning_rate": 0.0001, "loss": 1.5881, "step": 12771 }, { "epoch": 1.4838222480395005, "grad_norm": 0.6274320483207703, "learning_rate": 0.0001, "loss": 1.5605, "step": 12772 }, { "epoch": 1.483938425791461, "grad_norm": 0.5794488787651062, "learning_rate": 0.0001, "loss": 1.5702, "step": 12773 }, { "epoch": 1.4840546035434214, "grad_norm": 0.5843713879585266, "learning_rate": 0.0001, "loss": 1.4887, "step": 12774 }, { "epoch": 1.484170781295382, "grad_norm": 0.5706945657730103, "learning_rate": 0.0001, "loss": 1.4438, "step": 12775 }, { "epoch": 1.4842869590473424, "grad_norm": 0.6139296293258667, "learning_rate": 0.0001, "loss": 1.5284, "step": 12776 }, { "epoch": 1.484403136799303, "grad_norm": 0.658415675163269, "learning_rate": 0.0001, "loss": 1.5691, "step": 12777 }, { "epoch": 1.4845193145512634, "grad_norm": 0.6205752491950989, "learning_rate": 0.0001, "loss": 1.4368, "step": 12778 }, { "epoch": 1.484635492303224, "grad_norm": 0.5956551432609558, "learning_rate": 0.0001, "loss": 1.4247, "step": 12779 }, { "epoch": 1.4847516700551844, "grad_norm": 0.5731070637702942, "learning_rate": 0.0001, "loss": 1.4603, "step": 12780 }, { "epoch": 1.4848678478071449, "grad_norm": 0.5833913683891296, "learning_rate": 0.0001, "loss": 1.4769, "step": 12781 }, { "epoch": 1.4849840255591054, "grad_norm": 0.5474773645401001, "learning_rate": 0.0001, "loss": 1.509, "step": 12782 }, { "epoch": 1.4851002033110658, "grad_norm": 0.5622663497924805, "learning_rate": 0.0001, "loss": 1.4723, "step": 12783 }, { "epoch": 1.4852163810630263, "grad_norm": 0.5745351314544678, "learning_rate": 0.0001, "loss": 1.4861, "step": 12784 }, { "epoch": 1.4853325588149868, "grad_norm": 0.5686841607093811, "learning_rate": 0.0001, "loss": 1.4275, "step": 12785 }, { "epoch": 1.4854487365669473, "grad_norm": 0.5676813721656799, "learning_rate": 0.0001, "loss": 1.3021, "step": 12786 }, { "epoch": 1.4855649143189078, "grad_norm": 0.5953998565673828, "learning_rate": 0.0001, "loss": 1.3295, "step": 12787 }, { "epoch": 1.4856810920708683, "grad_norm": 0.5832408666610718, "learning_rate": 0.0001, "loss": 1.4133, "step": 12788 }, { "epoch": 1.485797269822829, "grad_norm": 0.6295830011367798, "learning_rate": 0.0001, "loss": 1.4368, "step": 12789 }, { "epoch": 1.4859134475747895, "grad_norm": 0.595488965511322, "learning_rate": 0.0001, "loss": 1.5149, "step": 12790 }, { "epoch": 1.48602962532675, "grad_norm": 0.5632891654968262, "learning_rate": 0.0001, "loss": 1.3456, "step": 12791 }, { "epoch": 1.4861458030787105, "grad_norm": 0.5827213525772095, "learning_rate": 0.0001, "loss": 1.4786, "step": 12792 }, { "epoch": 1.486261980830671, "grad_norm": 0.58870929479599, "learning_rate": 0.0001, "loss": 1.3798, "step": 12793 }, { "epoch": 1.4863781585826314, "grad_norm": 0.566349983215332, "learning_rate": 0.0001, "loss": 1.2853, "step": 12794 }, { "epoch": 1.486494336334592, "grad_norm": 0.5674836039543152, "learning_rate": 0.0001, "loss": 1.4405, "step": 12795 }, { "epoch": 1.4866105140865524, "grad_norm": 0.6061223745346069, "learning_rate": 0.0001, "loss": 1.5349, "step": 12796 }, { "epoch": 1.486726691838513, "grad_norm": 0.6239713430404663, "learning_rate": 0.0001, "loss": 1.5709, "step": 12797 }, { "epoch": 1.4868428695904734, "grad_norm": 0.5661698579788208, "learning_rate": 0.0001, "loss": 1.3916, "step": 12798 }, { "epoch": 1.486959047342434, "grad_norm": 0.6092931628227234, "learning_rate": 0.0001, "loss": 1.506, "step": 12799 }, { "epoch": 1.4870752250943944, "grad_norm": 0.6277779936790466, "learning_rate": 0.0001, "loss": 1.6359, "step": 12800 }, { "epoch": 1.4871914028463549, "grad_norm": 0.5889933705329895, "learning_rate": 0.0001, "loss": 1.3339, "step": 12801 }, { "epoch": 1.4873075805983154, "grad_norm": 0.5621128082275391, "learning_rate": 0.0001, "loss": 1.2968, "step": 12802 }, { "epoch": 1.4874237583502758, "grad_norm": 0.5749889016151428, "learning_rate": 0.0001, "loss": 1.4791, "step": 12803 }, { "epoch": 1.4875399361022366, "grad_norm": 0.6799257397651672, "learning_rate": 0.0001, "loss": 1.616, "step": 12804 }, { "epoch": 1.487656113854197, "grad_norm": 0.575088381767273, "learning_rate": 0.0001, "loss": 1.5002, "step": 12805 }, { "epoch": 1.4877722916061575, "grad_norm": 0.6447527408599854, "learning_rate": 0.0001, "loss": 1.5963, "step": 12806 }, { "epoch": 1.487888469358118, "grad_norm": 0.6129550337791443, "learning_rate": 0.0001, "loss": 1.4895, "step": 12807 }, { "epoch": 1.4880046471100785, "grad_norm": 0.6152228713035583, "learning_rate": 0.0001, "loss": 1.388, "step": 12808 }, { "epoch": 1.488120824862039, "grad_norm": 0.6193944215774536, "learning_rate": 0.0001, "loss": 1.5167, "step": 12809 }, { "epoch": 1.4882370026139995, "grad_norm": 0.6093383431434631, "learning_rate": 0.0001, "loss": 1.6097, "step": 12810 }, { "epoch": 1.48835318036596, "grad_norm": 0.589266300201416, "learning_rate": 0.0001, "loss": 1.3637, "step": 12811 }, { "epoch": 1.4884693581179205, "grad_norm": 0.6045963764190674, "learning_rate": 0.0001, "loss": 1.4242, "step": 12812 }, { "epoch": 1.488585535869881, "grad_norm": 0.5933640003204346, "learning_rate": 0.0001, "loss": 1.3325, "step": 12813 }, { "epoch": 1.4887017136218414, "grad_norm": 0.5831065773963928, "learning_rate": 0.0001, "loss": 1.4035, "step": 12814 }, { "epoch": 1.488817891373802, "grad_norm": 0.6712548136711121, "learning_rate": 0.0001, "loss": 1.4683, "step": 12815 }, { "epoch": 1.4889340691257624, "grad_norm": 0.5910773277282715, "learning_rate": 0.0001, "loss": 1.4458, "step": 12816 }, { "epoch": 1.489050246877723, "grad_norm": 0.6439294219017029, "learning_rate": 0.0001, "loss": 1.6244, "step": 12817 }, { "epoch": 1.4891664246296834, "grad_norm": 0.5771844387054443, "learning_rate": 0.0001, "loss": 1.4584, "step": 12818 }, { "epoch": 1.489282602381644, "grad_norm": 0.6531968712806702, "learning_rate": 0.0001, "loss": 1.533, "step": 12819 }, { "epoch": 1.4893987801336044, "grad_norm": 0.6017487049102783, "learning_rate": 0.0001, "loss": 1.4118, "step": 12820 }, { "epoch": 1.4895149578855649, "grad_norm": 0.6122699975967407, "learning_rate": 0.0001, "loss": 1.4481, "step": 12821 }, { "epoch": 1.4896311356375254, "grad_norm": 0.6408692598342896, "learning_rate": 0.0001, "loss": 1.5795, "step": 12822 }, { "epoch": 1.4897473133894858, "grad_norm": 0.6655322909355164, "learning_rate": 0.0001, "loss": 1.5516, "step": 12823 }, { "epoch": 1.4898634911414463, "grad_norm": 0.5901119709014893, "learning_rate": 0.0001, "loss": 1.3471, "step": 12824 }, { "epoch": 1.4899796688934068, "grad_norm": 0.574163019657135, "learning_rate": 0.0001, "loss": 1.3641, "step": 12825 }, { "epoch": 1.4900958466453673, "grad_norm": 0.641035795211792, "learning_rate": 0.0001, "loss": 1.4546, "step": 12826 }, { "epoch": 1.4902120243973278, "grad_norm": 0.6070435643196106, "learning_rate": 0.0001, "loss": 1.5669, "step": 12827 }, { "epoch": 1.4903282021492883, "grad_norm": 0.6125596761703491, "learning_rate": 0.0001, "loss": 1.5613, "step": 12828 }, { "epoch": 1.4904443799012488, "grad_norm": 0.5792481899261475, "learning_rate": 0.0001, "loss": 1.5242, "step": 12829 }, { "epoch": 1.4905605576532093, "grad_norm": 0.6352988481521606, "learning_rate": 0.0001, "loss": 1.6555, "step": 12830 }, { "epoch": 1.49067673540517, "grad_norm": 0.6231880187988281, "learning_rate": 0.0001, "loss": 1.6766, "step": 12831 }, { "epoch": 1.4907929131571305, "grad_norm": 0.5636422634124756, "learning_rate": 0.0001, "loss": 1.1924, "step": 12832 }, { "epoch": 1.490909090909091, "grad_norm": 0.6103519797325134, "learning_rate": 0.0001, "loss": 1.5181, "step": 12833 }, { "epoch": 1.4910252686610515, "grad_norm": 0.5947346687316895, "learning_rate": 0.0001, "loss": 1.3394, "step": 12834 }, { "epoch": 1.491141446413012, "grad_norm": 0.6331939101219177, "learning_rate": 0.0001, "loss": 1.4172, "step": 12835 }, { "epoch": 1.4912576241649724, "grad_norm": 0.5931614637374878, "learning_rate": 0.0001, "loss": 1.2726, "step": 12836 }, { "epoch": 1.491373801916933, "grad_norm": 0.6468703150749207, "learning_rate": 0.0001, "loss": 1.4609, "step": 12837 }, { "epoch": 1.4914899796688934, "grad_norm": 0.6426784992218018, "learning_rate": 0.0001, "loss": 1.5128, "step": 12838 }, { "epoch": 1.491606157420854, "grad_norm": 0.6114495992660522, "learning_rate": 0.0001, "loss": 1.3851, "step": 12839 }, { "epoch": 1.4917223351728144, "grad_norm": 0.5668593645095825, "learning_rate": 0.0001, "loss": 1.3923, "step": 12840 }, { "epoch": 1.4918385129247749, "grad_norm": 0.6152085661888123, "learning_rate": 0.0001, "loss": 1.4781, "step": 12841 }, { "epoch": 1.4919546906767354, "grad_norm": 0.6062020063400269, "learning_rate": 0.0001, "loss": 1.4925, "step": 12842 }, { "epoch": 1.4920708684286959, "grad_norm": 0.589048445224762, "learning_rate": 0.0001, "loss": 1.4028, "step": 12843 }, { "epoch": 1.4921870461806563, "grad_norm": 0.6015335917472839, "learning_rate": 0.0001, "loss": 1.3599, "step": 12844 }, { "epoch": 1.4923032239326168, "grad_norm": 0.581270158290863, "learning_rate": 0.0001, "loss": 1.4128, "step": 12845 }, { "epoch": 1.4924194016845775, "grad_norm": 0.5742862820625305, "learning_rate": 0.0001, "loss": 1.4333, "step": 12846 }, { "epoch": 1.492535579436538, "grad_norm": 0.6063739061355591, "learning_rate": 0.0001, "loss": 1.4433, "step": 12847 }, { "epoch": 1.4926517571884985, "grad_norm": 0.5964763760566711, "learning_rate": 0.0001, "loss": 1.3218, "step": 12848 }, { "epoch": 1.492767934940459, "grad_norm": 0.6101694107055664, "learning_rate": 0.0001, "loss": 1.5799, "step": 12849 }, { "epoch": 1.4928841126924195, "grad_norm": 0.6307281851768494, "learning_rate": 0.0001, "loss": 1.5571, "step": 12850 }, { "epoch": 1.49300029044438, "grad_norm": 0.5971539616584778, "learning_rate": 0.0001, "loss": 1.4422, "step": 12851 }, { "epoch": 1.4931164681963405, "grad_norm": 0.6096937656402588, "learning_rate": 0.0001, "loss": 1.4911, "step": 12852 }, { "epoch": 1.493232645948301, "grad_norm": 0.6482102274894714, "learning_rate": 0.0001, "loss": 1.4308, "step": 12853 }, { "epoch": 1.4933488237002615, "grad_norm": 0.6256344318389893, "learning_rate": 0.0001, "loss": 1.5942, "step": 12854 }, { "epoch": 1.493465001452222, "grad_norm": 0.5984079837799072, "learning_rate": 0.0001, "loss": 1.4259, "step": 12855 }, { "epoch": 1.4935811792041824, "grad_norm": 0.5901398658752441, "learning_rate": 0.0001, "loss": 1.4518, "step": 12856 }, { "epoch": 1.493697356956143, "grad_norm": 0.6232024431228638, "learning_rate": 0.0001, "loss": 1.5517, "step": 12857 }, { "epoch": 1.4938135347081034, "grad_norm": 0.5728425979614258, "learning_rate": 0.0001, "loss": 1.2854, "step": 12858 }, { "epoch": 1.493929712460064, "grad_norm": 0.617725670337677, "learning_rate": 0.0001, "loss": 1.447, "step": 12859 }, { "epoch": 1.4940458902120244, "grad_norm": 0.6785517334938049, "learning_rate": 0.0001, "loss": 1.4123, "step": 12860 }, { "epoch": 1.4941620679639849, "grad_norm": 0.6345421075820923, "learning_rate": 0.0001, "loss": 1.5892, "step": 12861 }, { "epoch": 1.4942782457159454, "grad_norm": 0.5922654271125793, "learning_rate": 0.0001, "loss": 1.4769, "step": 12862 }, { "epoch": 1.4943944234679059, "grad_norm": 0.6028105616569519, "learning_rate": 0.0001, "loss": 1.518, "step": 12863 }, { "epoch": 1.4945106012198663, "grad_norm": 0.6089810132980347, "learning_rate": 0.0001, "loss": 1.333, "step": 12864 }, { "epoch": 1.4946267789718268, "grad_norm": 0.6128267645835876, "learning_rate": 0.0001, "loss": 1.5329, "step": 12865 }, { "epoch": 1.4947429567237873, "grad_norm": 0.7104302644729614, "learning_rate": 0.0001, "loss": 1.7867, "step": 12866 }, { "epoch": 1.4948591344757478, "grad_norm": 0.6253650784492493, "learning_rate": 0.0001, "loss": 1.567, "step": 12867 }, { "epoch": 1.4949753122277083, "grad_norm": 0.6499916911125183, "learning_rate": 0.0001, "loss": 1.5073, "step": 12868 }, { "epoch": 1.4950914899796688, "grad_norm": 0.6280500888824463, "learning_rate": 0.0001, "loss": 1.421, "step": 12869 }, { "epoch": 1.4952076677316293, "grad_norm": 0.6782636046409607, "learning_rate": 0.0001, "loss": 1.6919, "step": 12870 }, { "epoch": 1.4953238454835898, "grad_norm": 0.6510661840438843, "learning_rate": 0.0001, "loss": 1.6278, "step": 12871 }, { "epoch": 1.4954400232355505, "grad_norm": 0.6078386306762695, "learning_rate": 0.0001, "loss": 1.2536, "step": 12872 }, { "epoch": 1.495556200987511, "grad_norm": 0.5885645747184753, "learning_rate": 0.0001, "loss": 1.3384, "step": 12873 }, { "epoch": 1.4956723787394715, "grad_norm": 0.560592770576477, "learning_rate": 0.0001, "loss": 1.4449, "step": 12874 }, { "epoch": 1.495788556491432, "grad_norm": 0.6519601345062256, "learning_rate": 0.0001, "loss": 1.4626, "step": 12875 }, { "epoch": 1.4959047342433924, "grad_norm": 0.6099236607551575, "learning_rate": 0.0001, "loss": 1.4496, "step": 12876 }, { "epoch": 1.496020911995353, "grad_norm": 0.5811101794242859, "learning_rate": 0.0001, "loss": 1.4068, "step": 12877 }, { "epoch": 1.4961370897473134, "grad_norm": 0.5691701769828796, "learning_rate": 0.0001, "loss": 1.3746, "step": 12878 }, { "epoch": 1.496253267499274, "grad_norm": 0.568541944026947, "learning_rate": 0.0001, "loss": 1.2409, "step": 12879 }, { "epoch": 1.4963694452512344, "grad_norm": 0.6283146142959595, "learning_rate": 0.0001, "loss": 1.4772, "step": 12880 }, { "epoch": 1.4964856230031949, "grad_norm": 0.6234946250915527, "learning_rate": 0.0001, "loss": 1.5406, "step": 12881 }, { "epoch": 1.4966018007551554, "grad_norm": 0.6640174984931946, "learning_rate": 0.0001, "loss": 1.756, "step": 12882 }, { "epoch": 1.4967179785071159, "grad_norm": 0.645535945892334, "learning_rate": 0.0001, "loss": 1.5076, "step": 12883 }, { "epoch": 1.4968341562590763, "grad_norm": 0.615752637386322, "learning_rate": 0.0001, "loss": 1.5753, "step": 12884 }, { "epoch": 1.4969503340110368, "grad_norm": 0.6437330842018127, "learning_rate": 0.0001, "loss": 1.764, "step": 12885 }, { "epoch": 1.4970665117629973, "grad_norm": 0.633956789970398, "learning_rate": 0.0001, "loss": 1.6816, "step": 12886 }, { "epoch": 1.4971826895149578, "grad_norm": 0.625065803527832, "learning_rate": 0.0001, "loss": 1.6142, "step": 12887 }, { "epoch": 1.4972988672669185, "grad_norm": 0.5906104445457458, "learning_rate": 0.0001, "loss": 1.2654, "step": 12888 }, { "epoch": 1.497415045018879, "grad_norm": 0.6069050431251526, "learning_rate": 0.0001, "loss": 1.5332, "step": 12889 }, { "epoch": 1.4975312227708395, "grad_norm": 0.5757995247840881, "learning_rate": 0.0001, "loss": 1.2766, "step": 12890 }, { "epoch": 1.4976474005228, "grad_norm": 0.6138774752616882, "learning_rate": 0.0001, "loss": 1.4732, "step": 12891 }, { "epoch": 1.4977635782747605, "grad_norm": 0.6091035008430481, "learning_rate": 0.0001, "loss": 1.4733, "step": 12892 }, { "epoch": 1.497879756026721, "grad_norm": 0.5950594544410706, "learning_rate": 0.0001, "loss": 1.524, "step": 12893 }, { "epoch": 1.4979959337786815, "grad_norm": 0.6246311664581299, "learning_rate": 0.0001, "loss": 1.4968, "step": 12894 }, { "epoch": 1.498112111530642, "grad_norm": 0.5842698216438293, "learning_rate": 0.0001, "loss": 1.365, "step": 12895 }, { "epoch": 1.4982282892826024, "grad_norm": 0.6111376881599426, "learning_rate": 0.0001, "loss": 1.3392, "step": 12896 }, { "epoch": 1.498344467034563, "grad_norm": 0.5652458667755127, "learning_rate": 0.0001, "loss": 1.2803, "step": 12897 }, { "epoch": 1.4984606447865234, "grad_norm": 0.600919246673584, "learning_rate": 0.0001, "loss": 1.4203, "step": 12898 }, { "epoch": 1.498576822538484, "grad_norm": 0.5875718593597412, "learning_rate": 0.0001, "loss": 1.2227, "step": 12899 }, { "epoch": 1.4986930002904444, "grad_norm": 0.6111353635787964, "learning_rate": 0.0001, "loss": 1.3412, "step": 12900 }, { "epoch": 1.4988091780424049, "grad_norm": 0.6344109177589417, "learning_rate": 0.0001, "loss": 1.7502, "step": 12901 }, { "epoch": 1.4989253557943654, "grad_norm": 0.6019986867904663, "learning_rate": 0.0001, "loss": 1.4987, "step": 12902 }, { "epoch": 1.4990415335463259, "grad_norm": 0.6078724265098572, "learning_rate": 0.0001, "loss": 1.5648, "step": 12903 }, { "epoch": 1.4991577112982863, "grad_norm": 0.6435593962669373, "learning_rate": 0.0001, "loss": 1.5631, "step": 12904 }, { "epoch": 1.4992738890502468, "grad_norm": 0.661933422088623, "learning_rate": 0.0001, "loss": 1.4889, "step": 12905 }, { "epoch": 1.4993900668022073, "grad_norm": 0.6397930383682251, "learning_rate": 0.0001, "loss": 1.6223, "step": 12906 }, { "epoch": 1.4995062445541678, "grad_norm": 0.5697651505470276, "learning_rate": 0.0001, "loss": 1.4793, "step": 12907 }, { "epoch": 1.4996224223061283, "grad_norm": 0.5920810103416443, "learning_rate": 0.0001, "loss": 1.4955, "step": 12908 }, { "epoch": 1.4997386000580888, "grad_norm": 0.6010572910308838, "learning_rate": 0.0001, "loss": 1.472, "step": 12909 }, { "epoch": 1.4998547778100493, "grad_norm": 0.5660801529884338, "learning_rate": 0.0001, "loss": 1.4184, "step": 12910 }, { "epoch": 1.4999709555620098, "grad_norm": 0.5613225102424622, "learning_rate": 0.0001, "loss": 1.3346, "step": 12911 }, { "epoch": 1.5000871333139703, "grad_norm": 0.6039864420890808, "learning_rate": 0.0001, "loss": 1.495, "step": 12912 }, { "epoch": 1.5002033110659307, "grad_norm": 0.6184871196746826, "learning_rate": 0.0001, "loss": 1.595, "step": 12913 }, { "epoch": 1.5003194888178912, "grad_norm": 0.602561891078949, "learning_rate": 0.0001, "loss": 1.4573, "step": 12914 }, { "epoch": 1.5004356665698517, "grad_norm": 0.597723126411438, "learning_rate": 0.0001, "loss": 1.4666, "step": 12915 }, { "epoch": 1.5005518443218122, "grad_norm": 0.6435630917549133, "learning_rate": 0.0001, "loss": 1.5864, "step": 12916 }, { "epoch": 1.500668022073773, "grad_norm": 0.6061123013496399, "learning_rate": 0.0001, "loss": 1.4316, "step": 12917 }, { "epoch": 1.5007841998257334, "grad_norm": 0.5902183055877686, "learning_rate": 0.0001, "loss": 1.5089, "step": 12918 }, { "epoch": 1.500900377577694, "grad_norm": 0.6924389600753784, "learning_rate": 0.0001, "loss": 1.552, "step": 12919 }, { "epoch": 1.5010165553296544, "grad_norm": 0.6402321457862854, "learning_rate": 0.0001, "loss": 1.4686, "step": 12920 }, { "epoch": 1.5011327330816149, "grad_norm": 0.6038404703140259, "learning_rate": 0.0001, "loss": 1.452, "step": 12921 }, { "epoch": 1.5012489108335754, "grad_norm": 0.5710585713386536, "learning_rate": 0.0001, "loss": 1.5174, "step": 12922 }, { "epoch": 1.5013650885855359, "grad_norm": 0.6276952624320984, "learning_rate": 0.0001, "loss": 1.5144, "step": 12923 }, { "epoch": 1.5014812663374963, "grad_norm": 0.6179443001747131, "learning_rate": 0.0001, "loss": 1.4969, "step": 12924 }, { "epoch": 1.5015974440894568, "grad_norm": 0.648830235004425, "learning_rate": 0.0001, "loss": 1.5558, "step": 12925 }, { "epoch": 1.5017136218414173, "grad_norm": 0.6248328685760498, "learning_rate": 0.0001, "loss": 1.6314, "step": 12926 }, { "epoch": 1.501829799593378, "grad_norm": 0.6510769724845886, "learning_rate": 0.0001, "loss": 1.5131, "step": 12927 }, { "epoch": 1.5019459773453385, "grad_norm": 0.6090167760848999, "learning_rate": 0.0001, "loss": 1.3349, "step": 12928 }, { "epoch": 1.502062155097299, "grad_norm": 0.5935208201408386, "learning_rate": 0.0001, "loss": 1.3589, "step": 12929 }, { "epoch": 1.5021783328492595, "grad_norm": 0.6401615142822266, "learning_rate": 0.0001, "loss": 1.4513, "step": 12930 }, { "epoch": 1.50229451060122, "grad_norm": 0.6500523090362549, "learning_rate": 0.0001, "loss": 1.5076, "step": 12931 }, { "epoch": 1.5024106883531805, "grad_norm": 0.6160486936569214, "learning_rate": 0.0001, "loss": 1.4097, "step": 12932 }, { "epoch": 1.502526866105141, "grad_norm": 0.6370952129364014, "learning_rate": 0.0001, "loss": 1.4829, "step": 12933 }, { "epoch": 1.5026430438571015, "grad_norm": 0.6076738834381104, "learning_rate": 0.0001, "loss": 1.5261, "step": 12934 }, { "epoch": 1.502759221609062, "grad_norm": 0.5521711707115173, "learning_rate": 0.0001, "loss": 1.3567, "step": 12935 }, { "epoch": 1.5028753993610224, "grad_norm": 0.571445643901825, "learning_rate": 0.0001, "loss": 1.198, "step": 12936 }, { "epoch": 1.502991577112983, "grad_norm": 0.6402273178100586, "learning_rate": 0.0001, "loss": 1.5463, "step": 12937 }, { "epoch": 1.5031077548649434, "grad_norm": 0.6269349455833435, "learning_rate": 0.0001, "loss": 1.5242, "step": 12938 }, { "epoch": 1.503223932616904, "grad_norm": 0.6312558054924011, "learning_rate": 0.0001, "loss": 1.4097, "step": 12939 }, { "epoch": 1.5033401103688644, "grad_norm": 0.6417350172996521, "learning_rate": 0.0001, "loss": 1.5901, "step": 12940 }, { "epoch": 1.5034562881208249, "grad_norm": 0.6578287482261658, "learning_rate": 0.0001, "loss": 1.5081, "step": 12941 }, { "epoch": 1.5035724658727854, "grad_norm": 0.5819112062454224, "learning_rate": 0.0001, "loss": 1.3784, "step": 12942 }, { "epoch": 1.5036886436247459, "grad_norm": 0.6250476837158203, "learning_rate": 0.0001, "loss": 1.4396, "step": 12943 }, { "epoch": 1.5038048213767063, "grad_norm": 0.7036281824111938, "learning_rate": 0.0001, "loss": 1.5268, "step": 12944 }, { "epoch": 1.5039209991286668, "grad_norm": 0.5976950526237488, "learning_rate": 0.0001, "loss": 1.3829, "step": 12945 }, { "epoch": 1.5040371768806273, "grad_norm": 0.6492063403129578, "learning_rate": 0.0001, "loss": 1.5779, "step": 12946 }, { "epoch": 1.5041533546325878, "grad_norm": 0.5875254273414612, "learning_rate": 0.0001, "loss": 1.4128, "step": 12947 }, { "epoch": 1.5042695323845483, "grad_norm": 0.6553013324737549, "learning_rate": 0.0001, "loss": 1.513, "step": 12948 }, { "epoch": 1.5043857101365088, "grad_norm": 0.6260391473770142, "learning_rate": 0.0001, "loss": 1.5521, "step": 12949 }, { "epoch": 1.5045018878884693, "grad_norm": 0.7011594772338867, "learning_rate": 0.0001, "loss": 1.5969, "step": 12950 }, { "epoch": 1.5046180656404298, "grad_norm": 0.6216688752174377, "learning_rate": 0.0001, "loss": 1.4793, "step": 12951 }, { "epoch": 1.5047342433923903, "grad_norm": 0.6175325512886047, "learning_rate": 0.0001, "loss": 1.5253, "step": 12952 }, { "epoch": 1.5048504211443507, "grad_norm": 0.657291829586029, "learning_rate": 0.0001, "loss": 1.5744, "step": 12953 }, { "epoch": 1.5049665988963112, "grad_norm": 0.6169841289520264, "learning_rate": 0.0001, "loss": 1.4183, "step": 12954 }, { "epoch": 1.5050827766482717, "grad_norm": 0.6225051879882812, "learning_rate": 0.0001, "loss": 1.3909, "step": 12955 }, { "epoch": 1.5051989544002322, "grad_norm": 0.7212052345275879, "learning_rate": 0.0001, "loss": 1.5745, "step": 12956 }, { "epoch": 1.5053151321521927, "grad_norm": 0.6326794028282166, "learning_rate": 0.0001, "loss": 1.4791, "step": 12957 }, { "epoch": 1.5054313099041532, "grad_norm": 0.6327944397926331, "learning_rate": 0.0001, "loss": 1.5499, "step": 12958 }, { "epoch": 1.505547487656114, "grad_norm": 0.580574095249176, "learning_rate": 0.0001, "loss": 1.3895, "step": 12959 }, { "epoch": 1.5056636654080744, "grad_norm": 0.7401102781295776, "learning_rate": 0.0001, "loss": 1.5773, "step": 12960 }, { "epoch": 1.5057798431600349, "grad_norm": 0.5871754288673401, "learning_rate": 0.0001, "loss": 1.4968, "step": 12961 }, { "epoch": 1.5058960209119954, "grad_norm": 0.6001573801040649, "learning_rate": 0.0001, "loss": 1.4837, "step": 12962 }, { "epoch": 1.5060121986639559, "grad_norm": 0.537681519985199, "learning_rate": 0.0001, "loss": 1.371, "step": 12963 }, { "epoch": 1.5061283764159163, "grad_norm": 0.6001483201980591, "learning_rate": 0.0001, "loss": 1.4206, "step": 12964 }, { "epoch": 1.5062445541678768, "grad_norm": 0.6050352454185486, "learning_rate": 0.0001, "loss": 1.4868, "step": 12965 }, { "epoch": 1.5063607319198373, "grad_norm": 0.5874537825584412, "learning_rate": 0.0001, "loss": 1.3447, "step": 12966 }, { "epoch": 1.5064769096717978, "grad_norm": 0.6636204123497009, "learning_rate": 0.0001, "loss": 1.4919, "step": 12967 }, { "epoch": 1.5065930874237583, "grad_norm": 0.6525164246559143, "learning_rate": 0.0001, "loss": 1.7726, "step": 12968 }, { "epoch": 1.506709265175719, "grad_norm": 0.6085329055786133, "learning_rate": 0.0001, "loss": 1.5039, "step": 12969 }, { "epoch": 1.5068254429276795, "grad_norm": 0.6257724165916443, "learning_rate": 0.0001, "loss": 1.5168, "step": 12970 }, { "epoch": 1.50694162067964, "grad_norm": 0.6245921850204468, "learning_rate": 0.0001, "loss": 1.4173, "step": 12971 }, { "epoch": 1.5070577984316005, "grad_norm": 0.6131578087806702, "learning_rate": 0.0001, "loss": 1.3337, "step": 12972 }, { "epoch": 1.507173976183561, "grad_norm": 0.6182470321655273, "learning_rate": 0.0001, "loss": 1.5111, "step": 12973 }, { "epoch": 1.5072901539355215, "grad_norm": 0.6009563207626343, "learning_rate": 0.0001, "loss": 1.4442, "step": 12974 }, { "epoch": 1.507406331687482, "grad_norm": 0.5851460099220276, "learning_rate": 0.0001, "loss": 1.4297, "step": 12975 }, { "epoch": 1.5075225094394424, "grad_norm": 0.5955979824066162, "learning_rate": 0.0001, "loss": 1.503, "step": 12976 }, { "epoch": 1.507638687191403, "grad_norm": 0.6064378619194031, "learning_rate": 0.0001, "loss": 1.4586, "step": 12977 }, { "epoch": 1.5077548649433634, "grad_norm": 0.5852058529853821, "learning_rate": 0.0001, "loss": 1.2852, "step": 12978 }, { "epoch": 1.507871042695324, "grad_norm": 0.5747120380401611, "learning_rate": 0.0001, "loss": 1.4461, "step": 12979 }, { "epoch": 1.5079872204472844, "grad_norm": 0.566041886806488, "learning_rate": 0.0001, "loss": 1.4831, "step": 12980 }, { "epoch": 1.5081033981992449, "grad_norm": 0.6108693480491638, "learning_rate": 0.0001, "loss": 1.3197, "step": 12981 }, { "epoch": 1.5082195759512054, "grad_norm": 0.6155402660369873, "learning_rate": 0.0001, "loss": 1.5253, "step": 12982 }, { "epoch": 1.5083357537031659, "grad_norm": 0.6545661687850952, "learning_rate": 0.0001, "loss": 1.5458, "step": 12983 }, { "epoch": 1.5084519314551263, "grad_norm": 0.6387633085250854, "learning_rate": 0.0001, "loss": 1.3534, "step": 12984 }, { "epoch": 1.5085681092070868, "grad_norm": 0.6138156056404114, "learning_rate": 0.0001, "loss": 1.4418, "step": 12985 }, { "epoch": 1.5086842869590473, "grad_norm": 0.6812984347343445, "learning_rate": 0.0001, "loss": 1.5453, "step": 12986 }, { "epoch": 1.5088004647110078, "grad_norm": 0.5979045033454895, "learning_rate": 0.0001, "loss": 1.3903, "step": 12987 }, { "epoch": 1.5089166424629683, "grad_norm": 0.5794659852981567, "learning_rate": 0.0001, "loss": 1.4501, "step": 12988 }, { "epoch": 1.5090328202149288, "grad_norm": 0.6213316321372986, "learning_rate": 0.0001, "loss": 1.4512, "step": 12989 }, { "epoch": 1.5091489979668893, "grad_norm": 0.6868020296096802, "learning_rate": 0.0001, "loss": 1.6787, "step": 12990 }, { "epoch": 1.5092651757188498, "grad_norm": 0.6285677552223206, "learning_rate": 0.0001, "loss": 1.5757, "step": 12991 }, { "epoch": 1.5093813534708103, "grad_norm": 0.6092104315757751, "learning_rate": 0.0001, "loss": 1.5592, "step": 12992 }, { "epoch": 1.5094975312227707, "grad_norm": 0.5800586342811584, "learning_rate": 0.0001, "loss": 1.413, "step": 12993 }, { "epoch": 1.5096137089747312, "grad_norm": 0.6098979711532593, "learning_rate": 0.0001, "loss": 1.6554, "step": 12994 }, { "epoch": 1.5097298867266917, "grad_norm": 0.5925366878509521, "learning_rate": 0.0001, "loss": 1.6048, "step": 12995 }, { "epoch": 1.5098460644786522, "grad_norm": 0.5455504059791565, "learning_rate": 0.0001, "loss": 1.3888, "step": 12996 }, { "epoch": 1.5099622422306127, "grad_norm": 0.6356263160705566, "learning_rate": 0.0001, "loss": 1.6256, "step": 12997 }, { "epoch": 1.5100784199825732, "grad_norm": 0.6532312035560608, "learning_rate": 0.0001, "loss": 1.6825, "step": 12998 }, { "epoch": 1.5101945977345337, "grad_norm": 0.6413785219192505, "learning_rate": 0.0001, "loss": 1.5933, "step": 12999 }, { "epoch": 1.5103107754864942, "grad_norm": 0.5889772176742554, "learning_rate": 0.0001, "loss": 1.3492, "step": 13000 }, { "epoch": 1.5104269532384549, "grad_norm": 0.5992516279220581, "learning_rate": 0.0001, "loss": 1.4146, "step": 13001 }, { "epoch": 1.5105431309904154, "grad_norm": 0.5487164855003357, "learning_rate": 0.0001, "loss": 1.393, "step": 13002 }, { "epoch": 1.5106593087423759, "grad_norm": 0.6081210970878601, "learning_rate": 0.0001, "loss": 1.3813, "step": 13003 }, { "epoch": 1.5107754864943364, "grad_norm": 0.6229928731918335, "learning_rate": 0.0001, "loss": 1.4805, "step": 13004 }, { "epoch": 1.5108916642462968, "grad_norm": 0.6931246519088745, "learning_rate": 0.0001, "loss": 1.8908, "step": 13005 }, { "epoch": 1.5110078419982573, "grad_norm": 0.5892548561096191, "learning_rate": 0.0001, "loss": 1.4452, "step": 13006 }, { "epoch": 1.5111240197502178, "grad_norm": 0.6359273791313171, "learning_rate": 0.0001, "loss": 1.4474, "step": 13007 }, { "epoch": 1.5112401975021783, "grad_norm": 0.6264612078666687, "learning_rate": 0.0001, "loss": 1.5436, "step": 13008 }, { "epoch": 1.5113563752541388, "grad_norm": 0.6442233324050903, "learning_rate": 0.0001, "loss": 1.3834, "step": 13009 }, { "epoch": 1.5114725530060993, "grad_norm": 0.6219121217727661, "learning_rate": 0.0001, "loss": 1.2729, "step": 13010 }, { "epoch": 1.51158873075806, "grad_norm": 0.6267338991165161, "learning_rate": 0.0001, "loss": 1.4941, "step": 13011 }, { "epoch": 1.5117049085100205, "grad_norm": 0.5808162689208984, "learning_rate": 0.0001, "loss": 1.4617, "step": 13012 }, { "epoch": 1.511821086261981, "grad_norm": 0.6579829454421997, "learning_rate": 0.0001, "loss": 1.4468, "step": 13013 }, { "epoch": 1.5119372640139415, "grad_norm": 0.593537449836731, "learning_rate": 0.0001, "loss": 1.4952, "step": 13014 }, { "epoch": 1.512053441765902, "grad_norm": 0.538985550403595, "learning_rate": 0.0001, "loss": 1.2365, "step": 13015 }, { "epoch": 1.5121696195178624, "grad_norm": 0.6082078814506531, "learning_rate": 0.0001, "loss": 1.4621, "step": 13016 }, { "epoch": 1.512285797269823, "grad_norm": 0.6273752450942993, "learning_rate": 0.0001, "loss": 1.4828, "step": 13017 }, { "epoch": 1.5124019750217834, "grad_norm": 0.6424401998519897, "learning_rate": 0.0001, "loss": 1.5122, "step": 13018 }, { "epoch": 1.512518152773744, "grad_norm": 0.5700364708900452, "learning_rate": 0.0001, "loss": 1.2343, "step": 13019 }, { "epoch": 1.5126343305257044, "grad_norm": 0.6058292388916016, "learning_rate": 0.0001, "loss": 1.3065, "step": 13020 }, { "epoch": 1.5127505082776649, "grad_norm": 0.5938413143157959, "learning_rate": 0.0001, "loss": 1.4116, "step": 13021 }, { "epoch": 1.5128666860296254, "grad_norm": 0.5882897973060608, "learning_rate": 0.0001, "loss": 1.4321, "step": 13022 }, { "epoch": 1.5129828637815859, "grad_norm": 0.6097550988197327, "learning_rate": 0.0001, "loss": 1.4422, "step": 13023 }, { "epoch": 1.5130990415335464, "grad_norm": 0.6150256395339966, "learning_rate": 0.0001, "loss": 1.4294, "step": 13024 }, { "epoch": 1.5132152192855068, "grad_norm": 0.5661260485649109, "learning_rate": 0.0001, "loss": 1.3101, "step": 13025 }, { "epoch": 1.5133313970374673, "grad_norm": 0.6101265549659729, "learning_rate": 0.0001, "loss": 1.4298, "step": 13026 }, { "epoch": 1.5134475747894278, "grad_norm": 0.5510701537132263, "learning_rate": 0.0001, "loss": 1.2253, "step": 13027 }, { "epoch": 1.5135637525413883, "grad_norm": 0.5895761847496033, "learning_rate": 0.0001, "loss": 1.2103, "step": 13028 }, { "epoch": 1.5136799302933488, "grad_norm": 0.6410093903541565, "learning_rate": 0.0001, "loss": 1.5713, "step": 13029 }, { "epoch": 1.5137961080453093, "grad_norm": 0.6049452424049377, "learning_rate": 0.0001, "loss": 1.4493, "step": 13030 }, { "epoch": 1.5139122857972698, "grad_norm": 0.5991563200950623, "learning_rate": 0.0001, "loss": 1.369, "step": 13031 }, { "epoch": 1.5140284635492303, "grad_norm": 0.6251417398452759, "learning_rate": 0.0001, "loss": 1.4542, "step": 13032 }, { "epoch": 1.5141446413011908, "grad_norm": 0.6023422479629517, "learning_rate": 0.0001, "loss": 1.3901, "step": 13033 }, { "epoch": 1.5142608190531512, "grad_norm": 0.6075770854949951, "learning_rate": 0.0001, "loss": 1.4216, "step": 13034 }, { "epoch": 1.5143769968051117, "grad_norm": 0.5965530872344971, "learning_rate": 0.0001, "loss": 1.544, "step": 13035 }, { "epoch": 1.5144931745570722, "grad_norm": 0.5827516913414001, "learning_rate": 0.0001, "loss": 1.3238, "step": 13036 }, { "epoch": 1.5146093523090327, "grad_norm": 0.5984660387039185, "learning_rate": 0.0001, "loss": 1.5236, "step": 13037 }, { "epoch": 1.5147255300609932, "grad_norm": 0.6355912685394287, "learning_rate": 0.0001, "loss": 1.6367, "step": 13038 }, { "epoch": 1.5148417078129537, "grad_norm": 0.6122537851333618, "learning_rate": 0.0001, "loss": 1.5632, "step": 13039 }, { "epoch": 1.5149578855649142, "grad_norm": 0.5791593790054321, "learning_rate": 0.0001, "loss": 1.3258, "step": 13040 }, { "epoch": 1.5150740633168747, "grad_norm": 0.604680061340332, "learning_rate": 0.0001, "loss": 1.5425, "step": 13041 }, { "epoch": 1.5151902410688352, "grad_norm": 0.610745370388031, "learning_rate": 0.0001, "loss": 1.2757, "step": 13042 }, { "epoch": 1.5153064188207959, "grad_norm": 0.5982139706611633, "learning_rate": 0.0001, "loss": 1.5444, "step": 13043 }, { "epoch": 1.5154225965727564, "grad_norm": 0.5831255316734314, "learning_rate": 0.0001, "loss": 1.366, "step": 13044 }, { "epoch": 1.5155387743247168, "grad_norm": 0.6067568063735962, "learning_rate": 0.0001, "loss": 1.3268, "step": 13045 }, { "epoch": 1.5156549520766773, "grad_norm": 0.575416088104248, "learning_rate": 0.0001, "loss": 1.4809, "step": 13046 }, { "epoch": 1.5157711298286378, "grad_norm": 0.6012206673622131, "learning_rate": 0.0001, "loss": 1.4559, "step": 13047 }, { "epoch": 1.5158873075805983, "grad_norm": 0.5732565522193909, "learning_rate": 0.0001, "loss": 1.3186, "step": 13048 }, { "epoch": 1.5160034853325588, "grad_norm": 0.5650144815444946, "learning_rate": 0.0001, "loss": 1.4331, "step": 13049 }, { "epoch": 1.5161196630845193, "grad_norm": 0.6107412576675415, "learning_rate": 0.0001, "loss": 1.388, "step": 13050 }, { "epoch": 1.5162358408364798, "grad_norm": 0.6341642737388611, "learning_rate": 0.0001, "loss": 1.4583, "step": 13051 }, { "epoch": 1.5163520185884403, "grad_norm": 0.6973785758018494, "learning_rate": 0.0001, "loss": 1.5455, "step": 13052 }, { "epoch": 1.516468196340401, "grad_norm": 0.6058968901634216, "learning_rate": 0.0001, "loss": 1.1763, "step": 13053 }, { "epoch": 1.5165843740923615, "grad_norm": 0.6408393383026123, "learning_rate": 0.0001, "loss": 1.5899, "step": 13054 }, { "epoch": 1.516700551844322, "grad_norm": 0.6071977019309998, "learning_rate": 0.0001, "loss": 1.3627, "step": 13055 }, { "epoch": 1.5168167295962824, "grad_norm": 0.6299028396606445, "learning_rate": 0.0001, "loss": 1.5713, "step": 13056 }, { "epoch": 1.516932907348243, "grad_norm": 0.6032760143280029, "learning_rate": 0.0001, "loss": 1.4244, "step": 13057 }, { "epoch": 1.5170490851002034, "grad_norm": 0.5748342871665955, "learning_rate": 0.0001, "loss": 1.41, "step": 13058 }, { "epoch": 1.517165262852164, "grad_norm": 0.5807141661643982, "learning_rate": 0.0001, "loss": 1.4516, "step": 13059 }, { "epoch": 1.5172814406041244, "grad_norm": 0.6238056421279907, "learning_rate": 0.0001, "loss": 1.3953, "step": 13060 }, { "epoch": 1.5173976183560849, "grad_norm": 0.6162986159324646, "learning_rate": 0.0001, "loss": 1.5245, "step": 13061 }, { "epoch": 1.5175137961080454, "grad_norm": 0.6011757254600525, "learning_rate": 0.0001, "loss": 1.4601, "step": 13062 }, { "epoch": 1.5176299738600059, "grad_norm": 0.6143328547477722, "learning_rate": 0.0001, "loss": 1.4704, "step": 13063 }, { "epoch": 1.5177461516119664, "grad_norm": 0.5976557731628418, "learning_rate": 0.0001, "loss": 1.3611, "step": 13064 }, { "epoch": 1.5178623293639268, "grad_norm": 0.6380662322044373, "learning_rate": 0.0001, "loss": 1.5687, "step": 13065 }, { "epoch": 1.5179785071158873, "grad_norm": 0.6185310482978821, "learning_rate": 0.0001, "loss": 1.5481, "step": 13066 }, { "epoch": 1.5180946848678478, "grad_norm": 0.5588987469673157, "learning_rate": 0.0001, "loss": 1.2781, "step": 13067 }, { "epoch": 1.5182108626198083, "grad_norm": 0.6087438464164734, "learning_rate": 0.0001, "loss": 1.4519, "step": 13068 }, { "epoch": 1.5183270403717688, "grad_norm": 0.6939107775688171, "learning_rate": 0.0001, "loss": 1.6837, "step": 13069 }, { "epoch": 1.5184432181237293, "grad_norm": 0.5763146877288818, "learning_rate": 0.0001, "loss": 1.3932, "step": 13070 }, { "epoch": 1.5185593958756898, "grad_norm": 0.5791681408882141, "learning_rate": 0.0001, "loss": 1.3638, "step": 13071 }, { "epoch": 1.5186755736276503, "grad_norm": 0.6180105209350586, "learning_rate": 0.0001, "loss": 1.5888, "step": 13072 }, { "epoch": 1.5187917513796108, "grad_norm": 0.6183565258979797, "learning_rate": 0.0001, "loss": 1.5131, "step": 13073 }, { "epoch": 1.5189079291315712, "grad_norm": 0.6181787848472595, "learning_rate": 0.0001, "loss": 1.4906, "step": 13074 }, { "epoch": 1.5190241068835317, "grad_norm": 0.6255152225494385, "learning_rate": 0.0001, "loss": 1.472, "step": 13075 }, { "epoch": 1.5191402846354922, "grad_norm": 0.57309889793396, "learning_rate": 0.0001, "loss": 1.3557, "step": 13076 }, { "epoch": 1.5192564623874527, "grad_norm": 0.5992699861526489, "learning_rate": 0.0001, "loss": 1.2967, "step": 13077 }, { "epoch": 1.5193726401394132, "grad_norm": 0.6498000025749207, "learning_rate": 0.0001, "loss": 1.6267, "step": 13078 }, { "epoch": 1.5194888178913737, "grad_norm": 0.6142323017120361, "learning_rate": 0.0001, "loss": 1.3953, "step": 13079 }, { "epoch": 1.5196049956433342, "grad_norm": 0.5629411935806274, "learning_rate": 0.0001, "loss": 1.4722, "step": 13080 }, { "epoch": 1.5197211733952947, "grad_norm": 0.6026464104652405, "learning_rate": 0.0001, "loss": 1.5025, "step": 13081 }, { "epoch": 1.5198373511472552, "grad_norm": 0.5876480340957642, "learning_rate": 0.0001, "loss": 1.42, "step": 13082 }, { "epoch": 1.5199535288992156, "grad_norm": 0.6540488600730896, "learning_rate": 0.0001, "loss": 1.5638, "step": 13083 }, { "epoch": 1.5200697066511761, "grad_norm": 0.5790266990661621, "learning_rate": 0.0001, "loss": 1.4935, "step": 13084 }, { "epoch": 1.5201858844031368, "grad_norm": 0.5915881991386414, "learning_rate": 0.0001, "loss": 1.5506, "step": 13085 }, { "epoch": 1.5203020621550973, "grad_norm": 0.601015567779541, "learning_rate": 0.0001, "loss": 1.554, "step": 13086 }, { "epoch": 1.5204182399070578, "grad_norm": 0.607069730758667, "learning_rate": 0.0001, "loss": 1.4829, "step": 13087 }, { "epoch": 1.5205344176590183, "grad_norm": 0.5643183588981628, "learning_rate": 0.0001, "loss": 1.4889, "step": 13088 }, { "epoch": 1.5206505954109788, "grad_norm": 0.6405245065689087, "learning_rate": 0.0001, "loss": 1.4864, "step": 13089 }, { "epoch": 1.5207667731629393, "grad_norm": 0.618649959564209, "learning_rate": 0.0001, "loss": 1.4619, "step": 13090 }, { "epoch": 1.5208829509148998, "grad_norm": 0.596312403678894, "learning_rate": 0.0001, "loss": 1.4631, "step": 13091 }, { "epoch": 1.5209991286668603, "grad_norm": 0.6227704286575317, "learning_rate": 0.0001, "loss": 1.6598, "step": 13092 }, { "epoch": 1.5211153064188208, "grad_norm": 0.5877798795700073, "learning_rate": 0.0001, "loss": 1.5022, "step": 13093 }, { "epoch": 1.5212314841707812, "grad_norm": 0.5980353355407715, "learning_rate": 0.0001, "loss": 1.5944, "step": 13094 }, { "epoch": 1.521347661922742, "grad_norm": 0.6470990777015686, "learning_rate": 0.0001, "loss": 1.6384, "step": 13095 }, { "epoch": 1.5214638396747024, "grad_norm": 0.6021008491516113, "learning_rate": 0.0001, "loss": 1.5108, "step": 13096 }, { "epoch": 1.521580017426663, "grad_norm": 0.5772629380226135, "learning_rate": 0.0001, "loss": 1.4815, "step": 13097 }, { "epoch": 1.5216961951786234, "grad_norm": 0.5909640192985535, "learning_rate": 0.0001, "loss": 1.4743, "step": 13098 }, { "epoch": 1.521812372930584, "grad_norm": 0.6111878752708435, "learning_rate": 0.0001, "loss": 1.5198, "step": 13099 }, { "epoch": 1.5219285506825444, "grad_norm": 0.5904512405395508, "learning_rate": 0.0001, "loss": 1.5612, "step": 13100 }, { "epoch": 1.522044728434505, "grad_norm": 0.5810577869415283, "learning_rate": 0.0001, "loss": 1.3642, "step": 13101 }, { "epoch": 1.5221609061864654, "grad_norm": 0.6800036430358887, "learning_rate": 0.0001, "loss": 1.5471, "step": 13102 }, { "epoch": 1.5222770839384259, "grad_norm": 0.6236268877983093, "learning_rate": 0.0001, "loss": 1.6704, "step": 13103 }, { "epoch": 1.5223932616903864, "grad_norm": 0.6043499708175659, "learning_rate": 0.0001, "loss": 1.4506, "step": 13104 }, { "epoch": 1.5225094394423468, "grad_norm": 0.609677255153656, "learning_rate": 0.0001, "loss": 1.45, "step": 13105 }, { "epoch": 1.5226256171943073, "grad_norm": 0.5774016976356506, "learning_rate": 0.0001, "loss": 1.4479, "step": 13106 }, { "epoch": 1.5227417949462678, "grad_norm": 0.6218743324279785, "learning_rate": 0.0001, "loss": 1.5468, "step": 13107 }, { "epoch": 1.5228579726982283, "grad_norm": 0.5599686503410339, "learning_rate": 0.0001, "loss": 1.3028, "step": 13108 }, { "epoch": 1.5229741504501888, "grad_norm": 0.5793375372886658, "learning_rate": 0.0001, "loss": 1.3442, "step": 13109 }, { "epoch": 1.5230903282021493, "grad_norm": 0.5650421380996704, "learning_rate": 0.0001, "loss": 1.1701, "step": 13110 }, { "epoch": 1.5232065059541098, "grad_norm": 0.628027617931366, "learning_rate": 0.0001, "loss": 1.4018, "step": 13111 }, { "epoch": 1.5233226837060703, "grad_norm": 0.6404212117195129, "learning_rate": 0.0001, "loss": 1.5248, "step": 13112 }, { "epoch": 1.5234388614580308, "grad_norm": 0.6501476168632507, "learning_rate": 0.0001, "loss": 1.6086, "step": 13113 }, { "epoch": 1.5235550392099912, "grad_norm": 0.6205679774284363, "learning_rate": 0.0001, "loss": 1.5886, "step": 13114 }, { "epoch": 1.5236712169619517, "grad_norm": 0.6271020174026489, "learning_rate": 0.0001, "loss": 1.4695, "step": 13115 }, { "epoch": 1.5237873947139122, "grad_norm": 0.6364188194274902, "learning_rate": 0.0001, "loss": 1.6018, "step": 13116 }, { "epoch": 1.5239035724658727, "grad_norm": 0.6291508674621582, "learning_rate": 0.0001, "loss": 1.4255, "step": 13117 }, { "epoch": 1.5240197502178332, "grad_norm": 0.6832680106163025, "learning_rate": 0.0001, "loss": 1.566, "step": 13118 }, { "epoch": 1.5241359279697937, "grad_norm": 0.6287549138069153, "learning_rate": 0.0001, "loss": 1.6068, "step": 13119 }, { "epoch": 1.5242521057217542, "grad_norm": 0.6037324070930481, "learning_rate": 0.0001, "loss": 1.3475, "step": 13120 }, { "epoch": 1.5243682834737147, "grad_norm": 0.6374009847640991, "learning_rate": 0.0001, "loss": 1.544, "step": 13121 }, { "epoch": 1.5244844612256752, "grad_norm": 0.5533431172370911, "learning_rate": 0.0001, "loss": 1.4256, "step": 13122 }, { "epoch": 1.5246006389776356, "grad_norm": 0.6008569002151489, "learning_rate": 0.0001, "loss": 1.4351, "step": 13123 }, { "epoch": 1.5247168167295961, "grad_norm": 0.5492884516716003, "learning_rate": 0.0001, "loss": 1.4973, "step": 13124 }, { "epoch": 1.5248329944815566, "grad_norm": 0.6348086595535278, "learning_rate": 0.0001, "loss": 1.6349, "step": 13125 }, { "epoch": 1.5249491722335173, "grad_norm": 0.5985419750213623, "learning_rate": 0.0001, "loss": 1.3882, "step": 13126 }, { "epoch": 1.5250653499854778, "grad_norm": 0.588337242603302, "learning_rate": 0.0001, "loss": 1.4547, "step": 13127 }, { "epoch": 1.5251815277374383, "grad_norm": 0.6453753113746643, "learning_rate": 0.0001, "loss": 1.593, "step": 13128 }, { "epoch": 1.5252977054893988, "grad_norm": 0.6236133575439453, "learning_rate": 0.0001, "loss": 1.3412, "step": 13129 }, { "epoch": 1.5254138832413593, "grad_norm": 0.6044201850891113, "learning_rate": 0.0001, "loss": 1.4729, "step": 13130 }, { "epoch": 1.5255300609933198, "grad_norm": 0.5731863975524902, "learning_rate": 0.0001, "loss": 1.3612, "step": 13131 }, { "epoch": 1.5256462387452803, "grad_norm": 0.6380339860916138, "learning_rate": 0.0001, "loss": 1.5127, "step": 13132 }, { "epoch": 1.5257624164972408, "grad_norm": 0.6082126498222351, "learning_rate": 0.0001, "loss": 1.5667, "step": 13133 }, { "epoch": 1.5258785942492012, "grad_norm": 0.6267526745796204, "learning_rate": 0.0001, "loss": 1.6277, "step": 13134 }, { "epoch": 1.5259947720011617, "grad_norm": 0.5912509560585022, "learning_rate": 0.0001, "loss": 1.2835, "step": 13135 }, { "epoch": 1.5261109497531222, "grad_norm": 0.6406939029693604, "learning_rate": 0.0001, "loss": 1.6174, "step": 13136 }, { "epoch": 1.526227127505083, "grad_norm": 0.6724519729614258, "learning_rate": 0.0001, "loss": 1.4591, "step": 13137 }, { "epoch": 1.5263433052570434, "grad_norm": 0.6085923314094543, "learning_rate": 0.0001, "loss": 1.4962, "step": 13138 }, { "epoch": 1.526459483009004, "grad_norm": 0.5566753149032593, "learning_rate": 0.0001, "loss": 1.1431, "step": 13139 }, { "epoch": 1.5265756607609644, "grad_norm": 0.5759755969047546, "learning_rate": 0.0001, "loss": 1.3858, "step": 13140 }, { "epoch": 1.526691838512925, "grad_norm": 0.5898522138595581, "learning_rate": 0.0001, "loss": 1.4425, "step": 13141 }, { "epoch": 1.5268080162648854, "grad_norm": 0.614996075630188, "learning_rate": 0.0001, "loss": 1.5029, "step": 13142 }, { "epoch": 1.5269241940168459, "grad_norm": 0.5954774022102356, "learning_rate": 0.0001, "loss": 1.4092, "step": 13143 }, { "epoch": 1.5270403717688064, "grad_norm": 0.6323831081390381, "learning_rate": 0.0001, "loss": 1.4987, "step": 13144 }, { "epoch": 1.5271565495207668, "grad_norm": 0.6423251628875732, "learning_rate": 0.0001, "loss": 1.7558, "step": 13145 }, { "epoch": 1.5272727272727273, "grad_norm": 0.6106740236282349, "learning_rate": 0.0001, "loss": 1.566, "step": 13146 }, { "epoch": 1.5273889050246878, "grad_norm": 0.5833632349967957, "learning_rate": 0.0001, "loss": 1.4143, "step": 13147 }, { "epoch": 1.5275050827766483, "grad_norm": 0.580480694770813, "learning_rate": 0.0001, "loss": 1.4606, "step": 13148 }, { "epoch": 1.5276212605286088, "grad_norm": 0.6620073318481445, "learning_rate": 0.0001, "loss": 1.5736, "step": 13149 }, { "epoch": 1.5277374382805693, "grad_norm": 0.6580905914306641, "learning_rate": 0.0001, "loss": 1.6311, "step": 13150 }, { "epoch": 1.5278536160325298, "grad_norm": 0.5711208581924438, "learning_rate": 0.0001, "loss": 1.4917, "step": 13151 }, { "epoch": 1.5279697937844903, "grad_norm": 0.5903010964393616, "learning_rate": 0.0001, "loss": 1.3864, "step": 13152 }, { "epoch": 1.5280859715364508, "grad_norm": 0.5728796720504761, "learning_rate": 0.0001, "loss": 1.4043, "step": 13153 }, { "epoch": 1.5282021492884112, "grad_norm": 0.591552197933197, "learning_rate": 0.0001, "loss": 1.5419, "step": 13154 }, { "epoch": 1.5283183270403717, "grad_norm": 0.5879071950912476, "learning_rate": 0.0001, "loss": 1.2898, "step": 13155 }, { "epoch": 1.5284345047923322, "grad_norm": 0.6146317720413208, "learning_rate": 0.0001, "loss": 1.4951, "step": 13156 }, { "epoch": 1.5285506825442927, "grad_norm": 0.6422489285469055, "learning_rate": 0.0001, "loss": 1.5282, "step": 13157 }, { "epoch": 1.5286668602962532, "grad_norm": 0.6233516335487366, "learning_rate": 0.0001, "loss": 1.5457, "step": 13158 }, { "epoch": 1.5287830380482137, "grad_norm": 0.7108170986175537, "learning_rate": 0.0001, "loss": 1.7262, "step": 13159 }, { "epoch": 1.5288992158001742, "grad_norm": 0.6578541398048401, "learning_rate": 0.0001, "loss": 1.6322, "step": 13160 }, { "epoch": 1.5290153935521347, "grad_norm": 0.6104475259780884, "learning_rate": 0.0001, "loss": 1.461, "step": 13161 }, { "epoch": 1.5291315713040952, "grad_norm": 0.6033722758293152, "learning_rate": 0.0001, "loss": 1.68, "step": 13162 }, { "epoch": 1.5292477490560556, "grad_norm": 0.5982484817504883, "learning_rate": 0.0001, "loss": 1.5397, "step": 13163 }, { "epoch": 1.5293639268080161, "grad_norm": 0.5699301958084106, "learning_rate": 0.0001, "loss": 1.3097, "step": 13164 }, { "epoch": 1.5294801045599766, "grad_norm": 0.5736358165740967, "learning_rate": 0.0001, "loss": 1.5139, "step": 13165 }, { "epoch": 1.5295962823119371, "grad_norm": 0.5809690952301025, "learning_rate": 0.0001, "loss": 1.2927, "step": 13166 }, { "epoch": 1.5297124600638976, "grad_norm": 0.64310622215271, "learning_rate": 0.0001, "loss": 1.6676, "step": 13167 }, { "epoch": 1.5298286378158583, "grad_norm": 0.6130411624908447, "learning_rate": 0.0001, "loss": 1.3156, "step": 13168 }, { "epoch": 1.5299448155678188, "grad_norm": 0.611953854560852, "learning_rate": 0.0001, "loss": 1.6393, "step": 13169 }, { "epoch": 1.5300609933197793, "grad_norm": 0.6218355894088745, "learning_rate": 0.0001, "loss": 1.3941, "step": 13170 }, { "epoch": 1.5301771710717398, "grad_norm": 0.6101202964782715, "learning_rate": 0.0001, "loss": 1.405, "step": 13171 }, { "epoch": 1.5302933488237003, "grad_norm": 0.6130668520927429, "learning_rate": 0.0001, "loss": 1.6009, "step": 13172 }, { "epoch": 1.5304095265756608, "grad_norm": 0.5833741426467896, "learning_rate": 0.0001, "loss": 1.5686, "step": 13173 }, { "epoch": 1.5305257043276213, "grad_norm": 0.5890129208564758, "learning_rate": 0.0001, "loss": 1.4987, "step": 13174 }, { "epoch": 1.5306418820795817, "grad_norm": 0.5778335928916931, "learning_rate": 0.0001, "loss": 1.3429, "step": 13175 }, { "epoch": 1.5307580598315422, "grad_norm": 0.6423119306564331, "learning_rate": 0.0001, "loss": 1.58, "step": 13176 }, { "epoch": 1.5308742375835027, "grad_norm": 0.594713032245636, "learning_rate": 0.0001, "loss": 1.566, "step": 13177 }, { "epoch": 1.5309904153354632, "grad_norm": 0.662578821182251, "learning_rate": 0.0001, "loss": 1.6117, "step": 13178 }, { "epoch": 1.531106593087424, "grad_norm": 0.6052879095077515, "learning_rate": 0.0001, "loss": 1.4994, "step": 13179 }, { "epoch": 1.5312227708393844, "grad_norm": 0.580970287322998, "learning_rate": 0.0001, "loss": 1.5377, "step": 13180 }, { "epoch": 1.531338948591345, "grad_norm": 0.6133270859718323, "learning_rate": 0.0001, "loss": 1.5026, "step": 13181 }, { "epoch": 1.5314551263433054, "grad_norm": 0.5756634473800659, "learning_rate": 0.0001, "loss": 1.4228, "step": 13182 }, { "epoch": 1.5315713040952659, "grad_norm": 0.588685154914856, "learning_rate": 0.0001, "loss": 1.3341, "step": 13183 }, { "epoch": 1.5316874818472264, "grad_norm": 0.6559744477272034, "learning_rate": 0.0001, "loss": 1.4987, "step": 13184 }, { "epoch": 1.5318036595991869, "grad_norm": 0.6924941539764404, "learning_rate": 0.0001, "loss": 1.6968, "step": 13185 }, { "epoch": 1.5319198373511473, "grad_norm": 0.5851516723632812, "learning_rate": 0.0001, "loss": 1.4279, "step": 13186 }, { "epoch": 1.5320360151031078, "grad_norm": 0.6089817881584167, "learning_rate": 0.0001, "loss": 1.4143, "step": 13187 }, { "epoch": 1.5321521928550683, "grad_norm": 0.6336711049079895, "learning_rate": 0.0001, "loss": 1.528, "step": 13188 }, { "epoch": 1.5322683706070288, "grad_norm": 0.5794046521186829, "learning_rate": 0.0001, "loss": 1.4202, "step": 13189 }, { "epoch": 1.5323845483589893, "grad_norm": 0.614953875541687, "learning_rate": 0.0001, "loss": 1.4362, "step": 13190 }, { "epoch": 1.5325007261109498, "grad_norm": 0.6357384324073792, "learning_rate": 0.0001, "loss": 1.3925, "step": 13191 }, { "epoch": 1.5326169038629103, "grad_norm": 0.617169201374054, "learning_rate": 0.0001, "loss": 1.4264, "step": 13192 }, { "epoch": 1.5327330816148708, "grad_norm": 0.5910585522651672, "learning_rate": 0.0001, "loss": 1.4205, "step": 13193 }, { "epoch": 1.5328492593668313, "grad_norm": 0.5722532868385315, "learning_rate": 0.0001, "loss": 1.3988, "step": 13194 }, { "epoch": 1.5329654371187917, "grad_norm": 0.5971508622169495, "learning_rate": 0.0001, "loss": 1.4986, "step": 13195 }, { "epoch": 1.5330816148707522, "grad_norm": 0.6081166863441467, "learning_rate": 0.0001, "loss": 1.4698, "step": 13196 }, { "epoch": 1.5331977926227127, "grad_norm": 0.6401707530021667, "learning_rate": 0.0001, "loss": 1.3302, "step": 13197 }, { "epoch": 1.5333139703746732, "grad_norm": 0.6085423827171326, "learning_rate": 0.0001, "loss": 1.3921, "step": 13198 }, { "epoch": 1.5334301481266337, "grad_norm": 0.6252439618110657, "learning_rate": 0.0001, "loss": 1.4668, "step": 13199 }, { "epoch": 1.5335463258785942, "grad_norm": 0.6168279051780701, "learning_rate": 0.0001, "loss": 1.4045, "step": 13200 }, { "epoch": 1.5336625036305547, "grad_norm": 0.5795454382896423, "learning_rate": 0.0001, "loss": 1.4236, "step": 13201 }, { "epoch": 1.5337786813825152, "grad_norm": 0.6378520131111145, "learning_rate": 0.0001, "loss": 1.6387, "step": 13202 }, { "epoch": 1.5338948591344757, "grad_norm": 0.616070568561554, "learning_rate": 0.0001, "loss": 1.5371, "step": 13203 }, { "epoch": 1.5340110368864361, "grad_norm": 0.670217752456665, "learning_rate": 0.0001, "loss": 1.5229, "step": 13204 }, { "epoch": 1.5341272146383966, "grad_norm": 0.6514032483100891, "learning_rate": 0.0001, "loss": 1.6283, "step": 13205 }, { "epoch": 1.5342433923903571, "grad_norm": 0.5641196966171265, "learning_rate": 0.0001, "loss": 1.4328, "step": 13206 }, { "epoch": 1.5343595701423176, "grad_norm": 0.5916898250579834, "learning_rate": 0.0001, "loss": 1.4845, "step": 13207 }, { "epoch": 1.534475747894278, "grad_norm": 0.5729571580886841, "learning_rate": 0.0001, "loss": 1.3437, "step": 13208 }, { "epoch": 1.5345919256462386, "grad_norm": 0.6877140402793884, "learning_rate": 0.0001, "loss": 1.5928, "step": 13209 }, { "epoch": 1.5347081033981993, "grad_norm": 0.628453254699707, "learning_rate": 0.0001, "loss": 1.3626, "step": 13210 }, { "epoch": 1.5348242811501598, "grad_norm": 0.5816517472267151, "learning_rate": 0.0001, "loss": 1.3589, "step": 13211 }, { "epoch": 1.5349404589021203, "grad_norm": 0.5863345861434937, "learning_rate": 0.0001, "loss": 1.4668, "step": 13212 }, { "epoch": 1.5350566366540808, "grad_norm": 0.5848140120506287, "learning_rate": 0.0001, "loss": 1.4771, "step": 13213 }, { "epoch": 1.5351728144060413, "grad_norm": 0.6075370907783508, "learning_rate": 0.0001, "loss": 1.4385, "step": 13214 }, { "epoch": 1.5352889921580017, "grad_norm": 0.6305372714996338, "learning_rate": 0.0001, "loss": 1.5449, "step": 13215 }, { "epoch": 1.5354051699099622, "grad_norm": 0.5978954434394836, "learning_rate": 0.0001, "loss": 1.514, "step": 13216 }, { "epoch": 1.5355213476619227, "grad_norm": 0.6035651564598083, "learning_rate": 0.0001, "loss": 1.5109, "step": 13217 }, { "epoch": 1.5356375254138832, "grad_norm": 0.5722531080245972, "learning_rate": 0.0001, "loss": 1.5771, "step": 13218 }, { "epoch": 1.5357537031658437, "grad_norm": 0.5960158705711365, "learning_rate": 0.0001, "loss": 1.4061, "step": 13219 }, { "epoch": 1.5358698809178042, "grad_norm": 0.6274595260620117, "learning_rate": 0.0001, "loss": 1.5707, "step": 13220 }, { "epoch": 1.535986058669765, "grad_norm": 0.6137160062789917, "learning_rate": 0.0001, "loss": 1.1475, "step": 13221 }, { "epoch": 1.5361022364217254, "grad_norm": 0.6096351742744446, "learning_rate": 0.0001, "loss": 1.2318, "step": 13222 }, { "epoch": 1.5362184141736859, "grad_norm": 0.642091691493988, "learning_rate": 0.0001, "loss": 1.5027, "step": 13223 }, { "epoch": 1.5363345919256464, "grad_norm": 0.6222463250160217, "learning_rate": 0.0001, "loss": 1.4155, "step": 13224 }, { "epoch": 1.5364507696776069, "grad_norm": 0.5922936201095581, "learning_rate": 0.0001, "loss": 1.3679, "step": 13225 }, { "epoch": 1.5365669474295673, "grad_norm": 0.565569281578064, "learning_rate": 0.0001, "loss": 1.2068, "step": 13226 }, { "epoch": 1.5366831251815278, "grad_norm": 0.6304237842559814, "learning_rate": 0.0001, "loss": 1.4687, "step": 13227 }, { "epoch": 1.5367993029334883, "grad_norm": 0.6497960686683655, "learning_rate": 0.0001, "loss": 1.5075, "step": 13228 }, { "epoch": 1.5369154806854488, "grad_norm": 0.6448734402656555, "learning_rate": 0.0001, "loss": 1.4774, "step": 13229 }, { "epoch": 1.5370316584374093, "grad_norm": 0.5980844497680664, "learning_rate": 0.0001, "loss": 1.5099, "step": 13230 }, { "epoch": 1.5371478361893698, "grad_norm": 0.5994159579277039, "learning_rate": 0.0001, "loss": 1.5558, "step": 13231 }, { "epoch": 1.5372640139413303, "grad_norm": 0.58444744348526, "learning_rate": 0.0001, "loss": 1.4855, "step": 13232 }, { "epoch": 1.5373801916932908, "grad_norm": 0.6289106607437134, "learning_rate": 0.0001, "loss": 1.5868, "step": 13233 }, { "epoch": 1.5374963694452513, "grad_norm": 0.6375524997711182, "learning_rate": 0.0001, "loss": 1.6756, "step": 13234 }, { "epoch": 1.5376125471972117, "grad_norm": 0.6511433720588684, "learning_rate": 0.0001, "loss": 1.4251, "step": 13235 }, { "epoch": 1.5377287249491722, "grad_norm": 0.5981631875038147, "learning_rate": 0.0001, "loss": 1.4202, "step": 13236 }, { "epoch": 1.5378449027011327, "grad_norm": 0.5719684958457947, "learning_rate": 0.0001, "loss": 1.4421, "step": 13237 }, { "epoch": 1.5379610804530932, "grad_norm": 0.6303753852844238, "learning_rate": 0.0001, "loss": 1.2953, "step": 13238 }, { "epoch": 1.5380772582050537, "grad_norm": 0.6014121174812317, "learning_rate": 0.0001, "loss": 1.4532, "step": 13239 }, { "epoch": 1.5381934359570142, "grad_norm": 0.5993248820304871, "learning_rate": 0.0001, "loss": 1.4158, "step": 13240 }, { "epoch": 1.5383096137089747, "grad_norm": 0.6195975542068481, "learning_rate": 0.0001, "loss": 1.4652, "step": 13241 }, { "epoch": 1.5384257914609352, "grad_norm": 0.5984340906143188, "learning_rate": 0.0001, "loss": 1.5031, "step": 13242 }, { "epoch": 1.5385419692128957, "grad_norm": 0.610115647315979, "learning_rate": 0.0001, "loss": 1.4412, "step": 13243 }, { "epoch": 1.5386581469648561, "grad_norm": 0.6320290565490723, "learning_rate": 0.0001, "loss": 1.5107, "step": 13244 }, { "epoch": 1.5387743247168166, "grad_norm": 0.6114384531974792, "learning_rate": 0.0001, "loss": 1.569, "step": 13245 }, { "epoch": 1.5388905024687771, "grad_norm": 0.6696942448616028, "learning_rate": 0.0001, "loss": 1.5754, "step": 13246 }, { "epoch": 1.5390066802207376, "grad_norm": 0.6278484463691711, "learning_rate": 0.0001, "loss": 1.3396, "step": 13247 }, { "epoch": 1.539122857972698, "grad_norm": 0.5679048895835876, "learning_rate": 0.0001, "loss": 1.3174, "step": 13248 }, { "epoch": 1.5392390357246586, "grad_norm": 0.6171903610229492, "learning_rate": 0.0001, "loss": 1.4796, "step": 13249 }, { "epoch": 1.539355213476619, "grad_norm": 0.6222214698791504, "learning_rate": 0.0001, "loss": 1.439, "step": 13250 }, { "epoch": 1.5394713912285796, "grad_norm": 0.5891657471656799, "learning_rate": 0.0001, "loss": 1.4478, "step": 13251 }, { "epoch": 1.5395875689805403, "grad_norm": 0.6132618188858032, "learning_rate": 0.0001, "loss": 1.4395, "step": 13252 }, { "epoch": 1.5397037467325008, "grad_norm": 0.5770408511161804, "learning_rate": 0.0001, "loss": 1.443, "step": 13253 }, { "epoch": 1.5398199244844613, "grad_norm": 0.6150637269020081, "learning_rate": 0.0001, "loss": 1.4207, "step": 13254 }, { "epoch": 1.5399361022364217, "grad_norm": 0.6168022155761719, "learning_rate": 0.0001, "loss": 1.3776, "step": 13255 }, { "epoch": 1.5400522799883822, "grad_norm": 0.6376373767852783, "learning_rate": 0.0001, "loss": 1.6387, "step": 13256 }, { "epoch": 1.5401684577403427, "grad_norm": 0.5829399824142456, "learning_rate": 0.0001, "loss": 1.2685, "step": 13257 }, { "epoch": 1.5402846354923032, "grad_norm": 0.6367193460464478, "learning_rate": 0.0001, "loss": 1.5424, "step": 13258 }, { "epoch": 1.5404008132442637, "grad_norm": 0.5960088968276978, "learning_rate": 0.0001, "loss": 1.3126, "step": 13259 }, { "epoch": 1.5405169909962242, "grad_norm": 0.6211891770362854, "learning_rate": 0.0001, "loss": 1.5361, "step": 13260 }, { "epoch": 1.5406331687481847, "grad_norm": 0.5986420512199402, "learning_rate": 0.0001, "loss": 1.5322, "step": 13261 }, { "epoch": 1.5407493465001452, "grad_norm": 0.6235995888710022, "learning_rate": 0.0001, "loss": 1.4277, "step": 13262 }, { "epoch": 1.5408655242521059, "grad_norm": 0.613019585609436, "learning_rate": 0.0001, "loss": 1.5626, "step": 13263 }, { "epoch": 1.5409817020040664, "grad_norm": 0.630034327507019, "learning_rate": 0.0001, "loss": 1.6762, "step": 13264 }, { "epoch": 1.5410978797560269, "grad_norm": 0.5918728709220886, "learning_rate": 0.0001, "loss": 1.4057, "step": 13265 }, { "epoch": 1.5412140575079873, "grad_norm": 0.5738247036933899, "learning_rate": 0.0001, "loss": 1.4588, "step": 13266 }, { "epoch": 1.5413302352599478, "grad_norm": 0.6175058484077454, "learning_rate": 0.0001, "loss": 1.457, "step": 13267 }, { "epoch": 1.5414464130119083, "grad_norm": 0.5984748601913452, "learning_rate": 0.0001, "loss": 1.5045, "step": 13268 }, { "epoch": 1.5415625907638688, "grad_norm": 0.6336756944656372, "learning_rate": 0.0001, "loss": 1.5041, "step": 13269 }, { "epoch": 1.5416787685158293, "grad_norm": 0.6561615467071533, "learning_rate": 0.0001, "loss": 1.4334, "step": 13270 }, { "epoch": 1.5417949462677898, "grad_norm": 0.643224835395813, "learning_rate": 0.0001, "loss": 1.5183, "step": 13271 }, { "epoch": 1.5419111240197503, "grad_norm": 0.5880582928657532, "learning_rate": 0.0001, "loss": 1.3817, "step": 13272 }, { "epoch": 1.5420273017717108, "grad_norm": 0.6055521368980408, "learning_rate": 0.0001, "loss": 1.3452, "step": 13273 }, { "epoch": 1.5421434795236713, "grad_norm": 0.590501070022583, "learning_rate": 0.0001, "loss": 1.3997, "step": 13274 }, { "epoch": 1.5422596572756317, "grad_norm": 0.5980383157730103, "learning_rate": 0.0001, "loss": 1.5425, "step": 13275 }, { "epoch": 1.5423758350275922, "grad_norm": 0.5758526921272278, "learning_rate": 0.0001, "loss": 1.348, "step": 13276 }, { "epoch": 1.5424920127795527, "grad_norm": 0.5835914611816406, "learning_rate": 0.0001, "loss": 1.2751, "step": 13277 }, { "epoch": 1.5426081905315132, "grad_norm": 0.5609144568443298, "learning_rate": 0.0001, "loss": 1.2751, "step": 13278 }, { "epoch": 1.5427243682834737, "grad_norm": 0.5979585647583008, "learning_rate": 0.0001, "loss": 1.4066, "step": 13279 }, { "epoch": 1.5428405460354342, "grad_norm": 0.6307063698768616, "learning_rate": 0.0001, "loss": 1.4208, "step": 13280 }, { "epoch": 1.5429567237873947, "grad_norm": 0.6205479502677917, "learning_rate": 0.0001, "loss": 1.4094, "step": 13281 }, { "epoch": 1.5430729015393552, "grad_norm": 0.6695151925086975, "learning_rate": 0.0001, "loss": 1.5445, "step": 13282 }, { "epoch": 1.5431890792913157, "grad_norm": 0.5992878675460815, "learning_rate": 0.0001, "loss": 1.3733, "step": 13283 }, { "epoch": 1.5433052570432761, "grad_norm": 0.5989102125167847, "learning_rate": 0.0001, "loss": 1.3398, "step": 13284 }, { "epoch": 1.5434214347952366, "grad_norm": 0.6177709698677063, "learning_rate": 0.0001, "loss": 1.5911, "step": 13285 }, { "epoch": 1.5435376125471971, "grad_norm": 0.5789451599121094, "learning_rate": 0.0001, "loss": 1.431, "step": 13286 }, { "epoch": 1.5436537902991576, "grad_norm": 0.6461418271064758, "learning_rate": 0.0001, "loss": 1.4955, "step": 13287 }, { "epoch": 1.543769968051118, "grad_norm": 0.5908834934234619, "learning_rate": 0.0001, "loss": 1.4299, "step": 13288 }, { "epoch": 1.5438861458030786, "grad_norm": 0.6150990724563599, "learning_rate": 0.0001, "loss": 1.5879, "step": 13289 }, { "epoch": 1.544002323555039, "grad_norm": 0.5878303050994873, "learning_rate": 0.0001, "loss": 1.2382, "step": 13290 }, { "epoch": 1.5441185013069996, "grad_norm": 0.6185941100120544, "learning_rate": 0.0001, "loss": 1.542, "step": 13291 }, { "epoch": 1.54423467905896, "grad_norm": 0.7063065767288208, "learning_rate": 0.0001, "loss": 1.6306, "step": 13292 }, { "epoch": 1.5443508568109205, "grad_norm": 0.6151609420776367, "learning_rate": 0.0001, "loss": 1.4068, "step": 13293 }, { "epoch": 1.5444670345628813, "grad_norm": 0.5908089280128479, "learning_rate": 0.0001, "loss": 1.5094, "step": 13294 }, { "epoch": 1.5445832123148417, "grad_norm": 0.5625834465026855, "learning_rate": 0.0001, "loss": 1.3472, "step": 13295 }, { "epoch": 1.5446993900668022, "grad_norm": 0.6102145314216614, "learning_rate": 0.0001, "loss": 1.4495, "step": 13296 }, { "epoch": 1.5448155678187627, "grad_norm": 0.6253021359443665, "learning_rate": 0.0001, "loss": 1.4255, "step": 13297 }, { "epoch": 1.5449317455707232, "grad_norm": 0.588062047958374, "learning_rate": 0.0001, "loss": 1.3395, "step": 13298 }, { "epoch": 1.5450479233226837, "grad_norm": 0.6264815926551819, "learning_rate": 0.0001, "loss": 1.3836, "step": 13299 }, { "epoch": 1.5451641010746442, "grad_norm": 0.6290935277938843, "learning_rate": 0.0001, "loss": 1.4261, "step": 13300 }, { "epoch": 1.5452802788266047, "grad_norm": 0.6513729691505432, "learning_rate": 0.0001, "loss": 1.4928, "step": 13301 }, { "epoch": 1.5453964565785652, "grad_norm": 0.6446203589439392, "learning_rate": 0.0001, "loss": 1.4939, "step": 13302 }, { "epoch": 1.5455126343305257, "grad_norm": 0.6368615031242371, "learning_rate": 0.0001, "loss": 1.5529, "step": 13303 }, { "epoch": 1.5456288120824864, "grad_norm": 0.5730301141738892, "learning_rate": 0.0001, "loss": 1.5144, "step": 13304 }, { "epoch": 1.5457449898344469, "grad_norm": 0.5296486020088196, "learning_rate": 0.0001, "loss": 1.4107, "step": 13305 }, { "epoch": 1.5458611675864073, "grad_norm": 0.6226763725280762, "learning_rate": 0.0001, "loss": 1.4552, "step": 13306 }, { "epoch": 1.5459773453383678, "grad_norm": 0.6259723901748657, "learning_rate": 0.0001, "loss": 1.4037, "step": 13307 }, { "epoch": 1.5460935230903283, "grad_norm": 0.5425748825073242, "learning_rate": 0.0001, "loss": 1.4097, "step": 13308 }, { "epoch": 1.5462097008422888, "grad_norm": 0.6267858743667603, "learning_rate": 0.0001, "loss": 1.2792, "step": 13309 }, { "epoch": 1.5463258785942493, "grad_norm": 0.5841253399848938, "learning_rate": 0.0001, "loss": 1.5139, "step": 13310 }, { "epoch": 1.5464420563462098, "grad_norm": 0.660430908203125, "learning_rate": 0.0001, "loss": 1.5256, "step": 13311 }, { "epoch": 1.5465582340981703, "grad_norm": 0.5816801190376282, "learning_rate": 0.0001, "loss": 1.3198, "step": 13312 }, { "epoch": 1.5466744118501308, "grad_norm": 0.6387170553207397, "learning_rate": 0.0001, "loss": 1.5019, "step": 13313 }, { "epoch": 1.5467905896020913, "grad_norm": 0.6249637603759766, "learning_rate": 0.0001, "loss": 1.41, "step": 13314 }, { "epoch": 1.5469067673540517, "grad_norm": 0.6024580597877502, "learning_rate": 0.0001, "loss": 1.4078, "step": 13315 }, { "epoch": 1.5470229451060122, "grad_norm": 0.6384835243225098, "learning_rate": 0.0001, "loss": 1.4738, "step": 13316 }, { "epoch": 1.5471391228579727, "grad_norm": 0.5695124864578247, "learning_rate": 0.0001, "loss": 1.364, "step": 13317 }, { "epoch": 1.5472553006099332, "grad_norm": 0.6180372834205627, "learning_rate": 0.0001, "loss": 1.5373, "step": 13318 }, { "epoch": 1.5473714783618937, "grad_norm": 0.5775349140167236, "learning_rate": 0.0001, "loss": 1.3818, "step": 13319 }, { "epoch": 1.5474876561138542, "grad_norm": 0.6106045842170715, "learning_rate": 0.0001, "loss": 1.3383, "step": 13320 }, { "epoch": 1.5476038338658147, "grad_norm": 0.6502373814582825, "learning_rate": 0.0001, "loss": 1.5179, "step": 13321 }, { "epoch": 1.5477200116177752, "grad_norm": 0.6484719514846802, "learning_rate": 0.0001, "loss": 1.4393, "step": 13322 }, { "epoch": 1.5478361893697357, "grad_norm": 0.6394400000572205, "learning_rate": 0.0001, "loss": 1.646, "step": 13323 }, { "epoch": 1.5479523671216961, "grad_norm": 0.5985630750656128, "learning_rate": 0.0001, "loss": 1.4505, "step": 13324 }, { "epoch": 1.5480685448736566, "grad_norm": 0.616834282875061, "learning_rate": 0.0001, "loss": 1.5475, "step": 13325 }, { "epoch": 1.5481847226256171, "grad_norm": 0.6069583892822266, "learning_rate": 0.0001, "loss": 1.5224, "step": 13326 }, { "epoch": 1.5483009003775776, "grad_norm": 0.6398360133171082, "learning_rate": 0.0001, "loss": 1.4967, "step": 13327 }, { "epoch": 1.548417078129538, "grad_norm": 0.5988773107528687, "learning_rate": 0.0001, "loss": 1.3364, "step": 13328 }, { "epoch": 1.5485332558814986, "grad_norm": 0.6219581365585327, "learning_rate": 0.0001, "loss": 1.3877, "step": 13329 }, { "epoch": 1.548649433633459, "grad_norm": 0.5958396792411804, "learning_rate": 0.0001, "loss": 1.3614, "step": 13330 }, { "epoch": 1.5487656113854196, "grad_norm": 0.5914923548698425, "learning_rate": 0.0001, "loss": 1.4747, "step": 13331 }, { "epoch": 1.54888178913738, "grad_norm": 0.6485493779182434, "learning_rate": 0.0001, "loss": 1.4707, "step": 13332 }, { "epoch": 1.5489979668893405, "grad_norm": 0.6255422830581665, "learning_rate": 0.0001, "loss": 1.4731, "step": 13333 }, { "epoch": 1.549114144641301, "grad_norm": 0.6094285845756531, "learning_rate": 0.0001, "loss": 1.503, "step": 13334 }, { "epoch": 1.5492303223932615, "grad_norm": 0.5891302824020386, "learning_rate": 0.0001, "loss": 1.4598, "step": 13335 }, { "epoch": 1.5493465001452222, "grad_norm": 0.6349563002586365, "learning_rate": 0.0001, "loss": 1.5865, "step": 13336 }, { "epoch": 1.5494626778971827, "grad_norm": 0.6088860034942627, "learning_rate": 0.0001, "loss": 1.4814, "step": 13337 }, { "epoch": 1.5495788556491432, "grad_norm": 0.7181675434112549, "learning_rate": 0.0001, "loss": 1.6944, "step": 13338 }, { "epoch": 1.5496950334011037, "grad_norm": 0.6011210083961487, "learning_rate": 0.0001, "loss": 1.4959, "step": 13339 }, { "epoch": 1.5498112111530642, "grad_norm": 0.6319872140884399, "learning_rate": 0.0001, "loss": 1.4118, "step": 13340 }, { "epoch": 1.5499273889050247, "grad_norm": 0.6266433596611023, "learning_rate": 0.0001, "loss": 1.5924, "step": 13341 }, { "epoch": 1.5500435666569852, "grad_norm": 0.6468966603279114, "learning_rate": 0.0001, "loss": 1.5942, "step": 13342 }, { "epoch": 1.5501597444089457, "grad_norm": 0.5942563414573669, "learning_rate": 0.0001, "loss": 1.5652, "step": 13343 }, { "epoch": 1.5502759221609062, "grad_norm": 0.5953789949417114, "learning_rate": 0.0001, "loss": 1.2761, "step": 13344 }, { "epoch": 1.5503920999128666, "grad_norm": 0.5696388483047485, "learning_rate": 0.0001, "loss": 1.3045, "step": 13345 }, { "epoch": 1.5505082776648274, "grad_norm": 0.5958232879638672, "learning_rate": 0.0001, "loss": 1.4762, "step": 13346 }, { "epoch": 1.5506244554167878, "grad_norm": 0.6455214619636536, "learning_rate": 0.0001, "loss": 1.5647, "step": 13347 }, { "epoch": 1.5507406331687483, "grad_norm": 0.7005032896995544, "learning_rate": 0.0001, "loss": 1.4845, "step": 13348 }, { "epoch": 1.5508568109207088, "grad_norm": 0.6717994809150696, "learning_rate": 0.0001, "loss": 1.6779, "step": 13349 }, { "epoch": 1.5509729886726693, "grad_norm": 0.5751558542251587, "learning_rate": 0.0001, "loss": 1.3095, "step": 13350 }, { "epoch": 1.5510891664246298, "grad_norm": 0.5958260297775269, "learning_rate": 0.0001, "loss": 1.4298, "step": 13351 }, { "epoch": 1.5512053441765903, "grad_norm": 0.5926149487495422, "learning_rate": 0.0001, "loss": 1.4678, "step": 13352 }, { "epoch": 1.5513215219285508, "grad_norm": 0.621460497379303, "learning_rate": 0.0001, "loss": 1.3896, "step": 13353 }, { "epoch": 1.5514376996805113, "grad_norm": 0.6164433360099792, "learning_rate": 0.0001, "loss": 1.5632, "step": 13354 }, { "epoch": 1.5515538774324718, "grad_norm": 0.6684780120849609, "learning_rate": 0.0001, "loss": 1.6347, "step": 13355 }, { "epoch": 1.5516700551844322, "grad_norm": 0.6255985498428345, "learning_rate": 0.0001, "loss": 1.3428, "step": 13356 }, { "epoch": 1.5517862329363927, "grad_norm": 0.571209728717804, "learning_rate": 0.0001, "loss": 1.3975, "step": 13357 }, { "epoch": 1.5519024106883532, "grad_norm": 0.5906187891960144, "learning_rate": 0.0001, "loss": 1.4001, "step": 13358 }, { "epoch": 1.5520185884403137, "grad_norm": 0.6360796093940735, "learning_rate": 0.0001, "loss": 1.6595, "step": 13359 }, { "epoch": 1.5521347661922742, "grad_norm": 0.5564336180686951, "learning_rate": 0.0001, "loss": 1.2847, "step": 13360 }, { "epoch": 1.5522509439442347, "grad_norm": 0.6803907155990601, "learning_rate": 0.0001, "loss": 1.5776, "step": 13361 }, { "epoch": 1.5523671216961952, "grad_norm": 0.6163176894187927, "learning_rate": 0.0001, "loss": 1.5737, "step": 13362 }, { "epoch": 1.5524832994481557, "grad_norm": 0.5890237092971802, "learning_rate": 0.0001, "loss": 1.3819, "step": 13363 }, { "epoch": 1.5525994772001162, "grad_norm": 0.6154002547264099, "learning_rate": 0.0001, "loss": 1.2708, "step": 13364 }, { "epoch": 1.5527156549520766, "grad_norm": 0.6088365316390991, "learning_rate": 0.0001, "loss": 1.5037, "step": 13365 }, { "epoch": 1.5528318327040371, "grad_norm": 0.5878404378890991, "learning_rate": 0.0001, "loss": 1.3299, "step": 13366 }, { "epoch": 1.5529480104559976, "grad_norm": 0.6358885765075684, "learning_rate": 0.0001, "loss": 1.6242, "step": 13367 }, { "epoch": 1.553064188207958, "grad_norm": 0.6308332681655884, "learning_rate": 0.0001, "loss": 1.4362, "step": 13368 }, { "epoch": 1.5531803659599186, "grad_norm": 0.5902178287506104, "learning_rate": 0.0001, "loss": 1.4866, "step": 13369 }, { "epoch": 1.553296543711879, "grad_norm": 0.6629870533943176, "learning_rate": 0.0001, "loss": 1.4724, "step": 13370 }, { "epoch": 1.5534127214638396, "grad_norm": 0.6822353601455688, "learning_rate": 0.0001, "loss": 1.4084, "step": 13371 }, { "epoch": 1.5535288992158, "grad_norm": 0.5877573490142822, "learning_rate": 0.0001, "loss": 1.2979, "step": 13372 }, { "epoch": 1.5536450769677606, "grad_norm": 0.5882243514060974, "learning_rate": 0.0001, "loss": 1.5042, "step": 13373 }, { "epoch": 1.553761254719721, "grad_norm": 0.6118847727775574, "learning_rate": 0.0001, "loss": 1.464, "step": 13374 }, { "epoch": 1.5538774324716815, "grad_norm": 0.6584449410438538, "learning_rate": 0.0001, "loss": 1.6281, "step": 13375 }, { "epoch": 1.553993610223642, "grad_norm": 0.6288265585899353, "learning_rate": 0.0001, "loss": 1.4938, "step": 13376 }, { "epoch": 1.5541097879756025, "grad_norm": 0.637592077255249, "learning_rate": 0.0001, "loss": 1.5458, "step": 13377 }, { "epoch": 1.5542259657275632, "grad_norm": 0.642940104007721, "learning_rate": 0.0001, "loss": 1.4834, "step": 13378 }, { "epoch": 1.5543421434795237, "grad_norm": 0.6421939730644226, "learning_rate": 0.0001, "loss": 1.4557, "step": 13379 }, { "epoch": 1.5544583212314842, "grad_norm": 0.5893421173095703, "learning_rate": 0.0001, "loss": 1.4721, "step": 13380 }, { "epoch": 1.5545744989834447, "grad_norm": 0.6120336055755615, "learning_rate": 0.0001, "loss": 1.5363, "step": 13381 }, { "epoch": 1.5546906767354052, "grad_norm": 0.6369379162788391, "learning_rate": 0.0001, "loss": 1.4525, "step": 13382 }, { "epoch": 1.5548068544873657, "grad_norm": 0.6311773657798767, "learning_rate": 0.0001, "loss": 1.4666, "step": 13383 }, { "epoch": 1.5549230322393262, "grad_norm": 0.6140812039375305, "learning_rate": 0.0001, "loss": 1.4224, "step": 13384 }, { "epoch": 1.5550392099912866, "grad_norm": 0.6031202673912048, "learning_rate": 0.0001, "loss": 1.4273, "step": 13385 }, { "epoch": 1.5551553877432471, "grad_norm": 0.6218429803848267, "learning_rate": 0.0001, "loss": 1.5215, "step": 13386 }, { "epoch": 1.5552715654952076, "grad_norm": 0.6086384654045105, "learning_rate": 0.0001, "loss": 1.6033, "step": 13387 }, { "epoch": 1.5553877432471683, "grad_norm": 0.6052895188331604, "learning_rate": 0.0001, "loss": 1.4336, "step": 13388 }, { "epoch": 1.5555039209991288, "grad_norm": 0.6349013447761536, "learning_rate": 0.0001, "loss": 1.5928, "step": 13389 }, { "epoch": 1.5556200987510893, "grad_norm": 0.6184483766555786, "learning_rate": 0.0001, "loss": 1.61, "step": 13390 }, { "epoch": 1.5557362765030498, "grad_norm": 0.601908266544342, "learning_rate": 0.0001, "loss": 1.5301, "step": 13391 }, { "epoch": 1.5558524542550103, "grad_norm": 0.6823452115058899, "learning_rate": 0.0001, "loss": 1.7363, "step": 13392 }, { "epoch": 1.5559686320069708, "grad_norm": 0.6129056811332703, "learning_rate": 0.0001, "loss": 1.41, "step": 13393 }, { "epoch": 1.5560848097589313, "grad_norm": 0.5627692937850952, "learning_rate": 0.0001, "loss": 1.3706, "step": 13394 }, { "epoch": 1.5562009875108918, "grad_norm": 0.5908641815185547, "learning_rate": 0.0001, "loss": 1.3288, "step": 13395 }, { "epoch": 1.5563171652628522, "grad_norm": 0.6511195302009583, "learning_rate": 0.0001, "loss": 1.4648, "step": 13396 }, { "epoch": 1.5564333430148127, "grad_norm": 0.619094729423523, "learning_rate": 0.0001, "loss": 1.4406, "step": 13397 }, { "epoch": 1.5565495207667732, "grad_norm": 0.6720662117004395, "learning_rate": 0.0001, "loss": 1.5678, "step": 13398 }, { "epoch": 1.5566656985187337, "grad_norm": 0.7280980944633484, "learning_rate": 0.0001, "loss": 1.6025, "step": 13399 }, { "epoch": 1.5567818762706942, "grad_norm": 0.612970232963562, "learning_rate": 0.0001, "loss": 1.2731, "step": 13400 }, { "epoch": 1.5568980540226547, "grad_norm": 0.6179816722869873, "learning_rate": 0.0001, "loss": 1.4997, "step": 13401 }, { "epoch": 1.5570142317746152, "grad_norm": 0.6261540651321411, "learning_rate": 0.0001, "loss": 1.3036, "step": 13402 }, { "epoch": 1.5571304095265757, "grad_norm": 0.6171663403511047, "learning_rate": 0.0001, "loss": 1.5464, "step": 13403 }, { "epoch": 1.5572465872785362, "grad_norm": 0.5964109301567078, "learning_rate": 0.0001, "loss": 1.538, "step": 13404 }, { "epoch": 1.5573627650304966, "grad_norm": 0.6069731712341309, "learning_rate": 0.0001, "loss": 1.3542, "step": 13405 }, { "epoch": 1.5574789427824571, "grad_norm": 0.61017906665802, "learning_rate": 0.0001, "loss": 1.5481, "step": 13406 }, { "epoch": 1.5575951205344176, "grad_norm": 0.6241382956504822, "learning_rate": 0.0001, "loss": 1.5264, "step": 13407 }, { "epoch": 1.557711298286378, "grad_norm": 0.6097595691680908, "learning_rate": 0.0001, "loss": 1.4519, "step": 13408 }, { "epoch": 1.5578274760383386, "grad_norm": 0.6461699604988098, "learning_rate": 0.0001, "loss": 1.4854, "step": 13409 }, { "epoch": 1.557943653790299, "grad_norm": 0.6478632688522339, "learning_rate": 0.0001, "loss": 1.6612, "step": 13410 }, { "epoch": 1.5580598315422596, "grad_norm": 0.5949918627738953, "learning_rate": 0.0001, "loss": 1.3795, "step": 13411 }, { "epoch": 1.55817600929422, "grad_norm": 0.6056655645370483, "learning_rate": 0.0001, "loss": 1.4087, "step": 13412 }, { "epoch": 1.5582921870461806, "grad_norm": 0.5610243082046509, "learning_rate": 0.0001, "loss": 1.3303, "step": 13413 }, { "epoch": 1.558408364798141, "grad_norm": 0.5745115280151367, "learning_rate": 0.0001, "loss": 1.3136, "step": 13414 }, { "epoch": 1.5585245425501015, "grad_norm": 0.587050199508667, "learning_rate": 0.0001, "loss": 1.4801, "step": 13415 }, { "epoch": 1.558640720302062, "grad_norm": 0.6332523226737976, "learning_rate": 0.0001, "loss": 1.5378, "step": 13416 }, { "epoch": 1.5587568980540225, "grad_norm": 0.6112111210823059, "learning_rate": 0.0001, "loss": 1.5639, "step": 13417 }, { "epoch": 1.558873075805983, "grad_norm": 0.6478002667427063, "learning_rate": 0.0001, "loss": 1.6473, "step": 13418 }, { "epoch": 1.5589892535579435, "grad_norm": 0.6234623789787292, "learning_rate": 0.0001, "loss": 1.5801, "step": 13419 }, { "epoch": 1.5591054313099042, "grad_norm": 0.6207946538925171, "learning_rate": 0.0001, "loss": 1.4796, "step": 13420 }, { "epoch": 1.5592216090618647, "grad_norm": 0.6182363629341125, "learning_rate": 0.0001, "loss": 1.4773, "step": 13421 }, { "epoch": 1.5593377868138252, "grad_norm": 0.6172978281974792, "learning_rate": 0.0001, "loss": 1.4579, "step": 13422 }, { "epoch": 1.5594539645657857, "grad_norm": 0.59696364402771, "learning_rate": 0.0001, "loss": 1.5049, "step": 13423 }, { "epoch": 1.5595701423177462, "grad_norm": 0.58649742603302, "learning_rate": 0.0001, "loss": 1.4482, "step": 13424 }, { "epoch": 1.5596863200697066, "grad_norm": 0.6304432153701782, "learning_rate": 0.0001, "loss": 1.6529, "step": 13425 }, { "epoch": 1.5598024978216671, "grad_norm": 0.5944958329200745, "learning_rate": 0.0001, "loss": 1.4783, "step": 13426 }, { "epoch": 1.5599186755736276, "grad_norm": 0.5643553137779236, "learning_rate": 0.0001, "loss": 1.4529, "step": 13427 }, { "epoch": 1.5600348533255881, "grad_norm": 0.6088239550590515, "learning_rate": 0.0001, "loss": 1.4445, "step": 13428 }, { "epoch": 1.5601510310775486, "grad_norm": 0.6248722672462463, "learning_rate": 0.0001, "loss": 1.4446, "step": 13429 }, { "epoch": 1.5602672088295093, "grad_norm": 0.6209125518798828, "learning_rate": 0.0001, "loss": 1.4512, "step": 13430 }, { "epoch": 1.5603833865814698, "grad_norm": 0.6119517683982849, "learning_rate": 0.0001, "loss": 1.498, "step": 13431 }, { "epoch": 1.5604995643334303, "grad_norm": 0.6059420704841614, "learning_rate": 0.0001, "loss": 1.3094, "step": 13432 }, { "epoch": 1.5606157420853908, "grad_norm": 0.635593593120575, "learning_rate": 0.0001, "loss": 1.5286, "step": 13433 }, { "epoch": 1.5607319198373513, "grad_norm": 0.63407963514328, "learning_rate": 0.0001, "loss": 1.6207, "step": 13434 }, { "epoch": 1.5608480975893118, "grad_norm": 0.5982295274734497, "learning_rate": 0.0001, "loss": 1.4909, "step": 13435 }, { "epoch": 1.5609642753412722, "grad_norm": 0.6256076097488403, "learning_rate": 0.0001, "loss": 1.4428, "step": 13436 }, { "epoch": 1.5610804530932327, "grad_norm": 0.600486159324646, "learning_rate": 0.0001, "loss": 1.5336, "step": 13437 }, { "epoch": 1.5611966308451932, "grad_norm": 0.6342299580574036, "learning_rate": 0.0001, "loss": 1.4709, "step": 13438 }, { "epoch": 1.5613128085971537, "grad_norm": 0.625291109085083, "learning_rate": 0.0001, "loss": 1.679, "step": 13439 }, { "epoch": 1.5614289863491142, "grad_norm": 0.6260406970977783, "learning_rate": 0.0001, "loss": 1.5073, "step": 13440 }, { "epoch": 1.5615451641010747, "grad_norm": 0.6225281953811646, "learning_rate": 0.0001, "loss": 1.4857, "step": 13441 }, { "epoch": 1.5616613418530352, "grad_norm": 0.5835169553756714, "learning_rate": 0.0001, "loss": 1.5368, "step": 13442 }, { "epoch": 1.5617775196049957, "grad_norm": 0.6170945167541504, "learning_rate": 0.0001, "loss": 1.53, "step": 13443 }, { "epoch": 1.5618936973569562, "grad_norm": 0.594443142414093, "learning_rate": 0.0001, "loss": 1.3711, "step": 13444 }, { "epoch": 1.5620098751089166, "grad_norm": 0.6158294081687927, "learning_rate": 0.0001, "loss": 1.4973, "step": 13445 }, { "epoch": 1.5621260528608771, "grad_norm": 0.6171496510505676, "learning_rate": 0.0001, "loss": 1.5523, "step": 13446 }, { "epoch": 1.5622422306128376, "grad_norm": 0.6249264478683472, "learning_rate": 0.0001, "loss": 1.4543, "step": 13447 }, { "epoch": 1.5623584083647981, "grad_norm": 0.6244696974754333, "learning_rate": 0.0001, "loss": 1.4413, "step": 13448 }, { "epoch": 1.5624745861167586, "grad_norm": 0.6428230404853821, "learning_rate": 0.0001, "loss": 1.5077, "step": 13449 }, { "epoch": 1.562590763868719, "grad_norm": 0.6612055897712708, "learning_rate": 0.0001, "loss": 1.5691, "step": 13450 }, { "epoch": 1.5627069416206796, "grad_norm": 0.6030257344245911, "learning_rate": 0.0001, "loss": 1.458, "step": 13451 }, { "epoch": 1.56282311937264, "grad_norm": 0.5865484476089478, "learning_rate": 0.0001, "loss": 1.2867, "step": 13452 }, { "epoch": 1.5629392971246006, "grad_norm": 0.6694547533988953, "learning_rate": 0.0001, "loss": 1.5695, "step": 13453 }, { "epoch": 1.563055474876561, "grad_norm": 0.6105194687843323, "learning_rate": 0.0001, "loss": 1.3006, "step": 13454 }, { "epoch": 1.5631716526285215, "grad_norm": 0.5813074707984924, "learning_rate": 0.0001, "loss": 1.3504, "step": 13455 }, { "epoch": 1.563287830380482, "grad_norm": 0.6537028551101685, "learning_rate": 0.0001, "loss": 1.3702, "step": 13456 }, { "epoch": 1.5634040081324425, "grad_norm": 0.6405291557312012, "learning_rate": 0.0001, "loss": 1.3698, "step": 13457 }, { "epoch": 1.563520185884403, "grad_norm": 0.644826352596283, "learning_rate": 0.0001, "loss": 1.5245, "step": 13458 }, { "epoch": 1.5636363636363635, "grad_norm": 0.6287594437599182, "learning_rate": 0.0001, "loss": 1.4445, "step": 13459 }, { "epoch": 1.563752541388324, "grad_norm": 0.6116755604743958, "learning_rate": 0.0001, "loss": 1.42, "step": 13460 }, { "epoch": 1.5638687191402847, "grad_norm": 0.6694396734237671, "learning_rate": 0.0001, "loss": 1.528, "step": 13461 }, { "epoch": 1.5639848968922452, "grad_norm": 0.6518230438232422, "learning_rate": 0.0001, "loss": 1.5947, "step": 13462 }, { "epoch": 1.5641010746442057, "grad_norm": 0.5904227495193481, "learning_rate": 0.0001, "loss": 1.484, "step": 13463 }, { "epoch": 1.5642172523961662, "grad_norm": 0.6056810617446899, "learning_rate": 0.0001, "loss": 1.5085, "step": 13464 }, { "epoch": 1.5643334301481266, "grad_norm": 0.606863260269165, "learning_rate": 0.0001, "loss": 1.4822, "step": 13465 }, { "epoch": 1.5644496079000871, "grad_norm": 0.61651211977005, "learning_rate": 0.0001, "loss": 1.4381, "step": 13466 }, { "epoch": 1.5645657856520476, "grad_norm": 0.6035972833633423, "learning_rate": 0.0001, "loss": 1.4649, "step": 13467 }, { "epoch": 1.5646819634040081, "grad_norm": 0.6274685859680176, "learning_rate": 0.0001, "loss": 1.4341, "step": 13468 }, { "epoch": 1.5647981411559686, "grad_norm": 0.6115575432777405, "learning_rate": 0.0001, "loss": 1.5034, "step": 13469 }, { "epoch": 1.564914318907929, "grad_norm": 0.6543807983398438, "learning_rate": 0.0001, "loss": 1.3965, "step": 13470 }, { "epoch": 1.5650304966598896, "grad_norm": 0.6324952244758606, "learning_rate": 0.0001, "loss": 1.4441, "step": 13471 }, { "epoch": 1.5651466744118503, "grad_norm": 0.6065995693206787, "learning_rate": 0.0001, "loss": 1.3974, "step": 13472 }, { "epoch": 1.5652628521638108, "grad_norm": 0.6735640168190002, "learning_rate": 0.0001, "loss": 1.4644, "step": 13473 }, { "epoch": 1.5653790299157713, "grad_norm": 0.6395497918128967, "learning_rate": 0.0001, "loss": 1.5344, "step": 13474 }, { "epoch": 1.5654952076677318, "grad_norm": 0.6460095643997192, "learning_rate": 0.0001, "loss": 1.5333, "step": 13475 }, { "epoch": 1.5656113854196922, "grad_norm": 0.6567825078964233, "learning_rate": 0.0001, "loss": 1.6167, "step": 13476 }, { "epoch": 1.5657275631716527, "grad_norm": 0.6820036768913269, "learning_rate": 0.0001, "loss": 1.6258, "step": 13477 }, { "epoch": 1.5658437409236132, "grad_norm": 0.6071247458457947, "learning_rate": 0.0001, "loss": 1.3688, "step": 13478 }, { "epoch": 1.5659599186755737, "grad_norm": 0.608846127986908, "learning_rate": 0.0001, "loss": 1.3601, "step": 13479 }, { "epoch": 1.5660760964275342, "grad_norm": 0.7005351781845093, "learning_rate": 0.0001, "loss": 1.535, "step": 13480 }, { "epoch": 1.5661922741794947, "grad_norm": 0.6527760624885559, "learning_rate": 0.0001, "loss": 1.5133, "step": 13481 }, { "epoch": 1.5663084519314552, "grad_norm": 0.593977153301239, "learning_rate": 0.0001, "loss": 1.4874, "step": 13482 }, { "epoch": 1.5664246296834157, "grad_norm": 0.6437960267066956, "learning_rate": 0.0001, "loss": 1.4985, "step": 13483 }, { "epoch": 1.5665408074353762, "grad_norm": 0.608905553817749, "learning_rate": 0.0001, "loss": 1.4795, "step": 13484 }, { "epoch": 1.5666569851873366, "grad_norm": 0.5913368463516235, "learning_rate": 0.0001, "loss": 1.3551, "step": 13485 }, { "epoch": 1.5667731629392971, "grad_norm": 0.6186370849609375, "learning_rate": 0.0001, "loss": 1.4108, "step": 13486 }, { "epoch": 1.5668893406912576, "grad_norm": 0.6638941168785095, "learning_rate": 0.0001, "loss": 1.5195, "step": 13487 }, { "epoch": 1.5670055184432181, "grad_norm": 0.7029228806495667, "learning_rate": 0.0001, "loss": 1.6166, "step": 13488 }, { "epoch": 1.5671216961951786, "grad_norm": 0.6374664902687073, "learning_rate": 0.0001, "loss": 1.5843, "step": 13489 }, { "epoch": 1.567237873947139, "grad_norm": 0.6297853589057922, "learning_rate": 0.0001, "loss": 1.4802, "step": 13490 }, { "epoch": 1.5673540516990996, "grad_norm": 0.6086907386779785, "learning_rate": 0.0001, "loss": 1.3929, "step": 13491 }, { "epoch": 1.56747022945106, "grad_norm": 0.6632092595100403, "learning_rate": 0.0001, "loss": 1.5099, "step": 13492 }, { "epoch": 1.5675864072030206, "grad_norm": 0.6230993270874023, "learning_rate": 0.0001, "loss": 1.497, "step": 13493 }, { "epoch": 1.567702584954981, "grad_norm": 0.6114823818206787, "learning_rate": 0.0001, "loss": 1.4794, "step": 13494 }, { "epoch": 1.5678187627069415, "grad_norm": 0.645427405834198, "learning_rate": 0.0001, "loss": 1.3659, "step": 13495 }, { "epoch": 1.567934940458902, "grad_norm": 0.5928539633750916, "learning_rate": 0.0001, "loss": 1.3554, "step": 13496 }, { "epoch": 1.5680511182108625, "grad_norm": 0.6389637589454651, "learning_rate": 0.0001, "loss": 1.3468, "step": 13497 }, { "epoch": 1.568167295962823, "grad_norm": 0.6235938668251038, "learning_rate": 0.0001, "loss": 1.4206, "step": 13498 }, { "epoch": 1.5682834737147835, "grad_norm": 0.6196651458740234, "learning_rate": 0.0001, "loss": 1.5044, "step": 13499 }, { "epoch": 1.568399651466744, "grad_norm": 0.6908482313156128, "learning_rate": 0.0001, "loss": 1.6682, "step": 13500 }, { "epoch": 1.5685158292187045, "grad_norm": 0.5792660713195801, "learning_rate": 0.0001, "loss": 1.4432, "step": 13501 }, { "epoch": 1.568632006970665, "grad_norm": 0.6126808524131775, "learning_rate": 0.0001, "loss": 1.5117, "step": 13502 }, { "epoch": 1.5687481847226257, "grad_norm": 0.6406491994857788, "learning_rate": 0.0001, "loss": 1.484, "step": 13503 }, { "epoch": 1.5688643624745862, "grad_norm": 0.6253640055656433, "learning_rate": 0.0001, "loss": 1.5793, "step": 13504 }, { "epoch": 1.5689805402265466, "grad_norm": 0.7741358280181885, "learning_rate": 0.0001, "loss": 1.5336, "step": 13505 }, { "epoch": 1.5690967179785071, "grad_norm": 0.5788688659667969, "learning_rate": 0.0001, "loss": 1.397, "step": 13506 }, { "epoch": 1.5692128957304676, "grad_norm": 0.6431933045387268, "learning_rate": 0.0001, "loss": 1.5296, "step": 13507 }, { "epoch": 1.5693290734824281, "grad_norm": 0.6005233526229858, "learning_rate": 0.0001, "loss": 1.5396, "step": 13508 }, { "epoch": 1.5694452512343886, "grad_norm": 0.6058092713356018, "learning_rate": 0.0001, "loss": 1.4367, "step": 13509 }, { "epoch": 1.569561428986349, "grad_norm": 0.6385200023651123, "learning_rate": 0.0001, "loss": 1.6869, "step": 13510 }, { "epoch": 1.5696776067383096, "grad_norm": 0.5717422366142273, "learning_rate": 0.0001, "loss": 1.4981, "step": 13511 }, { "epoch": 1.56979378449027, "grad_norm": 0.5632378458976746, "learning_rate": 0.0001, "loss": 1.324, "step": 13512 }, { "epoch": 1.5699099622422306, "grad_norm": 0.5892371535301208, "learning_rate": 0.0001, "loss": 1.4671, "step": 13513 }, { "epoch": 1.5700261399941913, "grad_norm": 0.5963519811630249, "learning_rate": 0.0001, "loss": 1.4721, "step": 13514 }, { "epoch": 1.5701423177461518, "grad_norm": 0.6520595550537109, "learning_rate": 0.0001, "loss": 1.6544, "step": 13515 }, { "epoch": 1.5702584954981123, "grad_norm": 0.6093313097953796, "learning_rate": 0.0001, "loss": 1.3183, "step": 13516 }, { "epoch": 1.5703746732500727, "grad_norm": 0.6415348052978516, "learning_rate": 0.0001, "loss": 1.5086, "step": 13517 }, { "epoch": 1.5704908510020332, "grad_norm": 0.6013089418411255, "learning_rate": 0.0001, "loss": 1.4176, "step": 13518 }, { "epoch": 1.5706070287539937, "grad_norm": 0.6108344197273254, "learning_rate": 0.0001, "loss": 1.4794, "step": 13519 }, { "epoch": 1.5707232065059542, "grad_norm": 0.6556594371795654, "learning_rate": 0.0001, "loss": 1.6093, "step": 13520 }, { "epoch": 1.5708393842579147, "grad_norm": 0.6048537492752075, "learning_rate": 0.0001, "loss": 1.4716, "step": 13521 }, { "epoch": 1.5709555620098752, "grad_norm": 0.5792707800865173, "learning_rate": 0.0001, "loss": 1.3146, "step": 13522 }, { "epoch": 1.5710717397618357, "grad_norm": 0.6412919163703918, "learning_rate": 0.0001, "loss": 1.4789, "step": 13523 }, { "epoch": 1.5711879175137962, "grad_norm": 0.6530932188034058, "learning_rate": 0.0001, "loss": 1.4638, "step": 13524 }, { "epoch": 1.5713040952657567, "grad_norm": 0.6409628391265869, "learning_rate": 0.0001, "loss": 1.4587, "step": 13525 }, { "epoch": 1.5714202730177171, "grad_norm": 0.6507309675216675, "learning_rate": 0.0001, "loss": 1.6477, "step": 13526 }, { "epoch": 1.5715364507696776, "grad_norm": 0.6563456654548645, "learning_rate": 0.0001, "loss": 1.3904, "step": 13527 }, { "epoch": 1.5716526285216381, "grad_norm": 0.6711177825927734, "learning_rate": 0.0001, "loss": 1.4924, "step": 13528 }, { "epoch": 1.5717688062735986, "grad_norm": 0.6010395884513855, "learning_rate": 0.0001, "loss": 1.4131, "step": 13529 }, { "epoch": 1.571884984025559, "grad_norm": 0.609821617603302, "learning_rate": 0.0001, "loss": 1.3046, "step": 13530 }, { "epoch": 1.5720011617775196, "grad_norm": 0.6318057179450989, "learning_rate": 0.0001, "loss": 1.4899, "step": 13531 }, { "epoch": 1.57211733952948, "grad_norm": 0.6473124027252197, "learning_rate": 0.0001, "loss": 1.5493, "step": 13532 }, { "epoch": 1.5722335172814406, "grad_norm": 0.6723019480705261, "learning_rate": 0.0001, "loss": 1.6904, "step": 13533 }, { "epoch": 1.572349695033401, "grad_norm": 0.6439656019210815, "learning_rate": 0.0001, "loss": 1.4742, "step": 13534 }, { "epoch": 1.5724658727853615, "grad_norm": 0.5960809588432312, "learning_rate": 0.0001, "loss": 1.4667, "step": 13535 }, { "epoch": 1.572582050537322, "grad_norm": 0.6096289753913879, "learning_rate": 0.0001, "loss": 1.4864, "step": 13536 }, { "epoch": 1.5726982282892825, "grad_norm": 0.5937155485153198, "learning_rate": 0.0001, "loss": 1.4994, "step": 13537 }, { "epoch": 1.572814406041243, "grad_norm": 0.6248606443405151, "learning_rate": 0.0001, "loss": 1.4783, "step": 13538 }, { "epoch": 1.5729305837932035, "grad_norm": 0.6048620343208313, "learning_rate": 0.0001, "loss": 1.5025, "step": 13539 }, { "epoch": 1.573046761545164, "grad_norm": 0.5833286643028259, "learning_rate": 0.0001, "loss": 1.4685, "step": 13540 }, { "epoch": 1.5731629392971245, "grad_norm": 0.6231339573860168, "learning_rate": 0.0001, "loss": 1.5243, "step": 13541 }, { "epoch": 1.573279117049085, "grad_norm": 0.641281247138977, "learning_rate": 0.0001, "loss": 1.6371, "step": 13542 }, { "epoch": 1.5733952948010455, "grad_norm": 0.6249711513519287, "learning_rate": 0.0001, "loss": 1.573, "step": 13543 }, { "epoch": 1.573511472553006, "grad_norm": 0.5779685378074646, "learning_rate": 0.0001, "loss": 1.3116, "step": 13544 }, { "epoch": 1.5736276503049667, "grad_norm": 0.6345327496528625, "learning_rate": 0.0001, "loss": 1.4915, "step": 13545 }, { "epoch": 1.5737438280569271, "grad_norm": 0.6199609637260437, "learning_rate": 0.0001, "loss": 1.2796, "step": 13546 }, { "epoch": 1.5738600058088876, "grad_norm": 0.5937657952308655, "learning_rate": 0.0001, "loss": 1.408, "step": 13547 }, { "epoch": 1.5739761835608481, "grad_norm": 0.6459031701087952, "learning_rate": 0.0001, "loss": 1.4237, "step": 13548 }, { "epoch": 1.5740923613128086, "grad_norm": 0.6150230169296265, "learning_rate": 0.0001, "loss": 1.3998, "step": 13549 }, { "epoch": 1.574208539064769, "grad_norm": 0.6067149043083191, "learning_rate": 0.0001, "loss": 1.4518, "step": 13550 }, { "epoch": 1.5743247168167296, "grad_norm": 0.5723419785499573, "learning_rate": 0.0001, "loss": 1.3533, "step": 13551 }, { "epoch": 1.57444089456869, "grad_norm": 0.5707582831382751, "learning_rate": 0.0001, "loss": 1.255, "step": 13552 }, { "epoch": 1.5745570723206506, "grad_norm": 0.6069430112838745, "learning_rate": 0.0001, "loss": 1.3843, "step": 13553 }, { "epoch": 1.574673250072611, "grad_norm": 0.6458742022514343, "learning_rate": 0.0001, "loss": 1.4396, "step": 13554 }, { "epoch": 1.5747894278245715, "grad_norm": 0.6966086030006409, "learning_rate": 0.0001, "loss": 1.5593, "step": 13555 }, { "epoch": 1.5749056055765323, "grad_norm": 0.6121847033500671, "learning_rate": 0.0001, "loss": 1.5018, "step": 13556 }, { "epoch": 1.5750217833284927, "grad_norm": 0.6447102427482605, "learning_rate": 0.0001, "loss": 1.5895, "step": 13557 }, { "epoch": 1.5751379610804532, "grad_norm": 0.6316473484039307, "learning_rate": 0.0001, "loss": 1.4707, "step": 13558 }, { "epoch": 1.5752541388324137, "grad_norm": 0.623142659664154, "learning_rate": 0.0001, "loss": 1.4751, "step": 13559 }, { "epoch": 1.5753703165843742, "grad_norm": 0.5784343481063843, "learning_rate": 0.0001, "loss": 1.3394, "step": 13560 }, { "epoch": 1.5754864943363347, "grad_norm": 0.5949280261993408, "learning_rate": 0.0001, "loss": 1.3922, "step": 13561 }, { "epoch": 1.5756026720882952, "grad_norm": 0.5954093933105469, "learning_rate": 0.0001, "loss": 1.519, "step": 13562 }, { "epoch": 1.5757188498402557, "grad_norm": 0.6469232439994812, "learning_rate": 0.0001, "loss": 1.6181, "step": 13563 }, { "epoch": 1.5758350275922162, "grad_norm": 0.5921434164047241, "learning_rate": 0.0001, "loss": 1.302, "step": 13564 }, { "epoch": 1.5759512053441767, "grad_norm": 0.5979151725769043, "learning_rate": 0.0001, "loss": 1.4764, "step": 13565 }, { "epoch": 1.5760673830961371, "grad_norm": 0.5948922634124756, "learning_rate": 0.0001, "loss": 1.367, "step": 13566 }, { "epoch": 1.5761835608480976, "grad_norm": 0.6355971097946167, "learning_rate": 0.0001, "loss": 1.5362, "step": 13567 }, { "epoch": 1.5762997386000581, "grad_norm": 0.654316782951355, "learning_rate": 0.0001, "loss": 1.5292, "step": 13568 }, { "epoch": 1.5764159163520186, "grad_norm": 0.6081578731536865, "learning_rate": 0.0001, "loss": 1.4942, "step": 13569 }, { "epoch": 1.576532094103979, "grad_norm": 0.6357074975967407, "learning_rate": 0.0001, "loss": 1.3794, "step": 13570 }, { "epoch": 1.5766482718559396, "grad_norm": 0.6234276294708252, "learning_rate": 0.0001, "loss": 1.4295, "step": 13571 }, { "epoch": 1.5767644496079, "grad_norm": 0.6558229327201843, "learning_rate": 0.0001, "loss": 1.5862, "step": 13572 }, { "epoch": 1.5768806273598606, "grad_norm": 0.6135905981063843, "learning_rate": 0.0001, "loss": 1.5521, "step": 13573 }, { "epoch": 1.576996805111821, "grad_norm": 0.5907683372497559, "learning_rate": 0.0001, "loss": 1.3218, "step": 13574 }, { "epoch": 1.5771129828637815, "grad_norm": 0.5935102701187134, "learning_rate": 0.0001, "loss": 1.4558, "step": 13575 }, { "epoch": 1.577229160615742, "grad_norm": 0.5960171222686768, "learning_rate": 0.0001, "loss": 1.3446, "step": 13576 }, { "epoch": 1.5773453383677025, "grad_norm": 0.617599606513977, "learning_rate": 0.0001, "loss": 1.4829, "step": 13577 }, { "epoch": 1.577461516119663, "grad_norm": 0.611723005771637, "learning_rate": 0.0001, "loss": 1.5532, "step": 13578 }, { "epoch": 1.5775776938716235, "grad_norm": 0.6049669981002808, "learning_rate": 0.0001, "loss": 1.3626, "step": 13579 }, { "epoch": 1.577693871623584, "grad_norm": 0.6668212413787842, "learning_rate": 0.0001, "loss": 1.5093, "step": 13580 }, { "epoch": 1.5778100493755445, "grad_norm": 0.5572206377983093, "learning_rate": 0.0001, "loss": 1.1698, "step": 13581 }, { "epoch": 1.577926227127505, "grad_norm": 0.6364170908927917, "learning_rate": 0.0001, "loss": 1.3022, "step": 13582 }, { "epoch": 1.5780424048794655, "grad_norm": 0.6507368087768555, "learning_rate": 0.0001, "loss": 1.5794, "step": 13583 }, { "epoch": 1.578158582631426, "grad_norm": 0.6161065697669983, "learning_rate": 0.0001, "loss": 1.54, "step": 13584 }, { "epoch": 1.5782747603833864, "grad_norm": 0.6118278503417969, "learning_rate": 0.0001, "loss": 1.5709, "step": 13585 }, { "epoch": 1.578390938135347, "grad_norm": 0.6194911599159241, "learning_rate": 0.0001, "loss": 1.5626, "step": 13586 }, { "epoch": 1.5785071158873076, "grad_norm": 0.6314056515693665, "learning_rate": 0.0001, "loss": 1.4473, "step": 13587 }, { "epoch": 1.5786232936392681, "grad_norm": 0.6135434508323669, "learning_rate": 0.0001, "loss": 1.3764, "step": 13588 }, { "epoch": 1.5787394713912286, "grad_norm": 0.6144410371780396, "learning_rate": 0.0001, "loss": 1.5922, "step": 13589 }, { "epoch": 1.578855649143189, "grad_norm": 0.6232594847679138, "learning_rate": 0.0001, "loss": 1.5534, "step": 13590 }, { "epoch": 1.5789718268951496, "grad_norm": 0.6463111639022827, "learning_rate": 0.0001, "loss": 1.5343, "step": 13591 }, { "epoch": 1.57908800464711, "grad_norm": 0.6009104251861572, "learning_rate": 0.0001, "loss": 1.4659, "step": 13592 }, { "epoch": 1.5792041823990706, "grad_norm": 0.6553391218185425, "learning_rate": 0.0001, "loss": 1.5536, "step": 13593 }, { "epoch": 1.579320360151031, "grad_norm": 0.5963552594184875, "learning_rate": 0.0001, "loss": 1.221, "step": 13594 }, { "epoch": 1.5794365379029915, "grad_norm": 0.5866657495498657, "learning_rate": 0.0001, "loss": 1.3527, "step": 13595 }, { "epoch": 1.579552715654952, "grad_norm": 0.6316760182380676, "learning_rate": 0.0001, "loss": 1.3974, "step": 13596 }, { "epoch": 1.5796688934069125, "grad_norm": 0.5814557671546936, "learning_rate": 0.0001, "loss": 1.257, "step": 13597 }, { "epoch": 1.5797850711588732, "grad_norm": 0.5807773470878601, "learning_rate": 0.0001, "loss": 1.29, "step": 13598 }, { "epoch": 1.5799012489108337, "grad_norm": 0.6384397745132446, "learning_rate": 0.0001, "loss": 1.5735, "step": 13599 }, { "epoch": 1.5800174266627942, "grad_norm": 0.5958534479141235, "learning_rate": 0.0001, "loss": 1.378, "step": 13600 }, { "epoch": 1.5801336044147547, "grad_norm": 0.633190929889679, "learning_rate": 0.0001, "loss": 1.5265, "step": 13601 }, { "epoch": 1.5802497821667152, "grad_norm": 0.6206573843955994, "learning_rate": 0.0001, "loss": 1.4828, "step": 13602 }, { "epoch": 1.5803659599186757, "grad_norm": 0.6325139999389648, "learning_rate": 0.0001, "loss": 1.3249, "step": 13603 }, { "epoch": 1.5804821376706362, "grad_norm": 0.618319571018219, "learning_rate": 0.0001, "loss": 1.6177, "step": 13604 }, { "epoch": 1.5805983154225967, "grad_norm": 0.6615894436836243, "learning_rate": 0.0001, "loss": 1.4182, "step": 13605 }, { "epoch": 1.5807144931745571, "grad_norm": 0.632692813873291, "learning_rate": 0.0001, "loss": 1.4902, "step": 13606 }, { "epoch": 1.5808306709265176, "grad_norm": 0.6061463356018066, "learning_rate": 0.0001, "loss": 1.3473, "step": 13607 }, { "epoch": 1.5809468486784781, "grad_norm": 0.6018291711807251, "learning_rate": 0.0001, "loss": 1.4807, "step": 13608 }, { "epoch": 1.5810630264304386, "grad_norm": 0.6714172959327698, "learning_rate": 0.0001, "loss": 1.4028, "step": 13609 }, { "epoch": 1.581179204182399, "grad_norm": 0.5730470418930054, "learning_rate": 0.0001, "loss": 1.3864, "step": 13610 }, { "epoch": 1.5812953819343596, "grad_norm": 0.5907166004180908, "learning_rate": 0.0001, "loss": 1.4745, "step": 13611 }, { "epoch": 1.58141155968632, "grad_norm": 0.6612714529037476, "learning_rate": 0.0001, "loss": 1.4357, "step": 13612 }, { "epoch": 1.5815277374382806, "grad_norm": 0.6622298359870911, "learning_rate": 0.0001, "loss": 1.5119, "step": 13613 }, { "epoch": 1.581643915190241, "grad_norm": 0.6330438852310181, "learning_rate": 0.0001, "loss": 1.5316, "step": 13614 }, { "epoch": 1.5817600929422015, "grad_norm": 0.6070156693458557, "learning_rate": 0.0001, "loss": 1.4341, "step": 13615 }, { "epoch": 1.581876270694162, "grad_norm": 0.6398180723190308, "learning_rate": 0.0001, "loss": 1.3261, "step": 13616 }, { "epoch": 1.5819924484461225, "grad_norm": 0.6727312803268433, "learning_rate": 0.0001, "loss": 1.6721, "step": 13617 }, { "epoch": 1.582108626198083, "grad_norm": 0.5788384079933167, "learning_rate": 0.0001, "loss": 1.3515, "step": 13618 }, { "epoch": 1.5822248039500435, "grad_norm": 0.5994517803192139, "learning_rate": 0.0001, "loss": 1.5195, "step": 13619 }, { "epoch": 1.582340981702004, "grad_norm": 0.661238431930542, "learning_rate": 0.0001, "loss": 1.5039, "step": 13620 }, { "epoch": 1.5824571594539645, "grad_norm": 0.6708731055259705, "learning_rate": 0.0001, "loss": 1.423, "step": 13621 }, { "epoch": 1.582573337205925, "grad_norm": 0.6710119843482971, "learning_rate": 0.0001, "loss": 1.6503, "step": 13622 }, { "epoch": 1.5826895149578855, "grad_norm": 0.632266640663147, "learning_rate": 0.0001, "loss": 1.7291, "step": 13623 }, { "epoch": 1.582805692709846, "grad_norm": 0.6530042290687561, "learning_rate": 0.0001, "loss": 1.4614, "step": 13624 }, { "epoch": 1.5829218704618064, "grad_norm": 0.5772076845169067, "learning_rate": 0.0001, "loss": 1.4532, "step": 13625 }, { "epoch": 1.583038048213767, "grad_norm": 0.6056697368621826, "learning_rate": 0.0001, "loss": 1.5996, "step": 13626 }, { "epoch": 1.5831542259657274, "grad_norm": 0.6514211893081665, "learning_rate": 0.0001, "loss": 1.6922, "step": 13627 }, { "epoch": 1.583270403717688, "grad_norm": 0.5848867297172546, "learning_rate": 0.0001, "loss": 1.2466, "step": 13628 }, { "epoch": 1.5833865814696486, "grad_norm": 0.5798253417015076, "learning_rate": 0.0001, "loss": 1.334, "step": 13629 }, { "epoch": 1.583502759221609, "grad_norm": 0.6590627431869507, "learning_rate": 0.0001, "loss": 1.6691, "step": 13630 }, { "epoch": 1.5836189369735696, "grad_norm": 0.5774937272071838, "learning_rate": 0.0001, "loss": 1.4867, "step": 13631 }, { "epoch": 1.58373511472553, "grad_norm": 0.6019846200942993, "learning_rate": 0.0001, "loss": 1.3748, "step": 13632 }, { "epoch": 1.5838512924774906, "grad_norm": 0.581792414188385, "learning_rate": 0.0001, "loss": 1.3973, "step": 13633 }, { "epoch": 1.583967470229451, "grad_norm": 0.6952236890792847, "learning_rate": 0.0001, "loss": 1.7609, "step": 13634 }, { "epoch": 1.5840836479814115, "grad_norm": 0.5895935297012329, "learning_rate": 0.0001, "loss": 1.4348, "step": 13635 }, { "epoch": 1.584199825733372, "grad_norm": 0.6527734994888306, "learning_rate": 0.0001, "loss": 1.4164, "step": 13636 }, { "epoch": 1.5843160034853325, "grad_norm": 0.5872225761413574, "learning_rate": 0.0001, "loss": 1.4422, "step": 13637 }, { "epoch": 1.584432181237293, "grad_norm": 0.5942081212997437, "learning_rate": 0.0001, "loss": 1.3844, "step": 13638 }, { "epoch": 1.5845483589892535, "grad_norm": 0.5980642437934875, "learning_rate": 0.0001, "loss": 1.717, "step": 13639 }, { "epoch": 1.5846645367412142, "grad_norm": 0.5560755729675293, "learning_rate": 0.0001, "loss": 1.2702, "step": 13640 }, { "epoch": 1.5847807144931747, "grad_norm": 0.6350087523460388, "learning_rate": 0.0001, "loss": 1.555, "step": 13641 }, { "epoch": 1.5848968922451352, "grad_norm": 0.6073043346405029, "learning_rate": 0.0001, "loss": 1.5905, "step": 13642 }, { "epoch": 1.5850130699970957, "grad_norm": 0.6224149465560913, "learning_rate": 0.0001, "loss": 1.3986, "step": 13643 }, { "epoch": 1.5851292477490562, "grad_norm": 0.6422401070594788, "learning_rate": 0.0001, "loss": 1.5798, "step": 13644 }, { "epoch": 1.5852454255010167, "grad_norm": 0.59529048204422, "learning_rate": 0.0001, "loss": 1.3998, "step": 13645 }, { "epoch": 1.5853616032529771, "grad_norm": 0.6929322481155396, "learning_rate": 0.0001, "loss": 1.5899, "step": 13646 }, { "epoch": 1.5854777810049376, "grad_norm": 0.5858911871910095, "learning_rate": 0.0001, "loss": 1.4672, "step": 13647 }, { "epoch": 1.5855939587568981, "grad_norm": 0.6204400658607483, "learning_rate": 0.0001, "loss": 1.3553, "step": 13648 }, { "epoch": 1.5857101365088586, "grad_norm": 0.5746094584465027, "learning_rate": 0.0001, "loss": 1.2365, "step": 13649 }, { "epoch": 1.585826314260819, "grad_norm": 0.6758047938346863, "learning_rate": 0.0001, "loss": 1.5019, "step": 13650 }, { "epoch": 1.5859424920127796, "grad_norm": 0.6562144756317139, "learning_rate": 0.0001, "loss": 1.4617, "step": 13651 }, { "epoch": 1.58605866976474, "grad_norm": 0.6426905989646912, "learning_rate": 0.0001, "loss": 1.4973, "step": 13652 }, { "epoch": 1.5861748475167006, "grad_norm": 0.5699962973594666, "learning_rate": 0.0001, "loss": 1.3537, "step": 13653 }, { "epoch": 1.586291025268661, "grad_norm": 0.5969005227088928, "learning_rate": 0.0001, "loss": 1.3264, "step": 13654 }, { "epoch": 1.5864072030206215, "grad_norm": 0.6157596111297607, "learning_rate": 0.0001, "loss": 1.4763, "step": 13655 }, { "epoch": 1.586523380772582, "grad_norm": 0.5986966490745544, "learning_rate": 0.0001, "loss": 1.4321, "step": 13656 }, { "epoch": 1.5866395585245425, "grad_norm": 0.6195567846298218, "learning_rate": 0.0001, "loss": 1.5363, "step": 13657 }, { "epoch": 1.586755736276503, "grad_norm": 0.6283451914787292, "learning_rate": 0.0001, "loss": 1.3878, "step": 13658 }, { "epoch": 1.5868719140284635, "grad_norm": 0.6001632809638977, "learning_rate": 0.0001, "loss": 1.4915, "step": 13659 }, { "epoch": 1.586988091780424, "grad_norm": 0.6226193308830261, "learning_rate": 0.0001, "loss": 1.5789, "step": 13660 }, { "epoch": 1.5871042695323845, "grad_norm": 0.6030400991439819, "learning_rate": 0.0001, "loss": 1.4806, "step": 13661 }, { "epoch": 1.587220447284345, "grad_norm": 0.5935747623443604, "learning_rate": 0.0001, "loss": 1.337, "step": 13662 }, { "epoch": 1.5873366250363055, "grad_norm": 0.6464823484420776, "learning_rate": 0.0001, "loss": 1.612, "step": 13663 }, { "epoch": 1.587452802788266, "grad_norm": 0.6798169612884521, "learning_rate": 0.0001, "loss": 1.277, "step": 13664 }, { "epoch": 1.5875689805402264, "grad_norm": 0.5947486162185669, "learning_rate": 0.0001, "loss": 1.3965, "step": 13665 }, { "epoch": 1.587685158292187, "grad_norm": 0.6533806324005127, "learning_rate": 0.0001, "loss": 1.431, "step": 13666 }, { "epoch": 1.5878013360441474, "grad_norm": 0.6578819155693054, "learning_rate": 0.0001, "loss": 1.5538, "step": 13667 }, { "epoch": 1.587917513796108, "grad_norm": 0.6176968216896057, "learning_rate": 0.0001, "loss": 1.413, "step": 13668 }, { "epoch": 1.5880336915480684, "grad_norm": 0.6410322785377502, "learning_rate": 0.0001, "loss": 1.546, "step": 13669 }, { "epoch": 1.5881498693000289, "grad_norm": 0.6015816330909729, "learning_rate": 0.0001, "loss": 1.464, "step": 13670 }, { "epoch": 1.5882660470519896, "grad_norm": 0.6452057957649231, "learning_rate": 0.0001, "loss": 1.5803, "step": 13671 }, { "epoch": 1.58838222480395, "grad_norm": 0.6403473019599915, "learning_rate": 0.0001, "loss": 1.5298, "step": 13672 }, { "epoch": 1.5884984025559106, "grad_norm": 0.5736103653907776, "learning_rate": 0.0001, "loss": 1.3283, "step": 13673 }, { "epoch": 1.588614580307871, "grad_norm": 0.5940598845481873, "learning_rate": 0.0001, "loss": 1.588, "step": 13674 }, { "epoch": 1.5887307580598315, "grad_norm": 0.6314423084259033, "learning_rate": 0.0001, "loss": 1.4163, "step": 13675 }, { "epoch": 1.588846935811792, "grad_norm": 0.5856815576553345, "learning_rate": 0.0001, "loss": 1.4124, "step": 13676 }, { "epoch": 1.5889631135637525, "grad_norm": 0.6191335916519165, "learning_rate": 0.0001, "loss": 1.5927, "step": 13677 }, { "epoch": 1.589079291315713, "grad_norm": 0.5804166793823242, "learning_rate": 0.0001, "loss": 1.3609, "step": 13678 }, { "epoch": 1.5891954690676735, "grad_norm": 0.6077446937561035, "learning_rate": 0.0001, "loss": 1.4106, "step": 13679 }, { "epoch": 1.589311646819634, "grad_norm": 0.668630838394165, "learning_rate": 0.0001, "loss": 1.3604, "step": 13680 }, { "epoch": 1.5894278245715947, "grad_norm": 0.6589301824569702, "learning_rate": 0.0001, "loss": 1.7169, "step": 13681 }, { "epoch": 1.5895440023235552, "grad_norm": 0.62638920545578, "learning_rate": 0.0001, "loss": 1.5183, "step": 13682 }, { "epoch": 1.5896601800755157, "grad_norm": 0.626444935798645, "learning_rate": 0.0001, "loss": 1.5418, "step": 13683 }, { "epoch": 1.5897763578274762, "grad_norm": 0.6382818818092346, "learning_rate": 0.0001, "loss": 1.5035, "step": 13684 }, { "epoch": 1.5898925355794367, "grad_norm": 0.6530130505561829, "learning_rate": 0.0001, "loss": 1.534, "step": 13685 }, { "epoch": 1.5900087133313971, "grad_norm": 0.5917280316352844, "learning_rate": 0.0001, "loss": 1.2945, "step": 13686 }, { "epoch": 1.5901248910833576, "grad_norm": 0.601091206073761, "learning_rate": 0.0001, "loss": 1.4798, "step": 13687 }, { "epoch": 1.5902410688353181, "grad_norm": 0.6319622993469238, "learning_rate": 0.0001, "loss": 1.4709, "step": 13688 }, { "epoch": 1.5903572465872786, "grad_norm": 0.5939752459526062, "learning_rate": 0.0001, "loss": 1.3826, "step": 13689 }, { "epoch": 1.590473424339239, "grad_norm": 0.6311963200569153, "learning_rate": 0.0001, "loss": 1.3094, "step": 13690 }, { "epoch": 1.5905896020911996, "grad_norm": 0.6844353079795837, "learning_rate": 0.0001, "loss": 1.6273, "step": 13691 }, { "epoch": 1.59070577984316, "grad_norm": 0.5649846196174622, "learning_rate": 0.0001, "loss": 1.4596, "step": 13692 }, { "epoch": 1.5908219575951206, "grad_norm": 0.6137500405311584, "learning_rate": 0.0001, "loss": 1.4631, "step": 13693 }, { "epoch": 1.590938135347081, "grad_norm": 0.6590669751167297, "learning_rate": 0.0001, "loss": 1.5914, "step": 13694 }, { "epoch": 1.5910543130990416, "grad_norm": 0.6377047896385193, "learning_rate": 0.0001, "loss": 1.5466, "step": 13695 }, { "epoch": 1.591170490851002, "grad_norm": 0.5739707946777344, "learning_rate": 0.0001, "loss": 1.4131, "step": 13696 }, { "epoch": 1.5912866686029625, "grad_norm": 0.5823712944984436, "learning_rate": 0.0001, "loss": 1.3342, "step": 13697 }, { "epoch": 1.591402846354923, "grad_norm": 0.6388639807701111, "learning_rate": 0.0001, "loss": 1.6015, "step": 13698 }, { "epoch": 1.5915190241068835, "grad_norm": 0.5976583361625671, "learning_rate": 0.0001, "loss": 1.4843, "step": 13699 }, { "epoch": 1.591635201858844, "grad_norm": 0.5898172855377197, "learning_rate": 0.0001, "loss": 1.4665, "step": 13700 }, { "epoch": 1.5917513796108045, "grad_norm": 0.6254405379295349, "learning_rate": 0.0001, "loss": 1.4318, "step": 13701 }, { "epoch": 1.591867557362765, "grad_norm": 0.6013739109039307, "learning_rate": 0.0001, "loss": 1.6765, "step": 13702 }, { "epoch": 1.5919837351147255, "grad_norm": 0.6028892993927002, "learning_rate": 0.0001, "loss": 1.5473, "step": 13703 }, { "epoch": 1.592099912866686, "grad_norm": 0.5645247101783752, "learning_rate": 0.0001, "loss": 1.3645, "step": 13704 }, { "epoch": 1.5922160906186464, "grad_norm": 0.5704538822174072, "learning_rate": 0.0001, "loss": 1.4181, "step": 13705 }, { "epoch": 1.592332268370607, "grad_norm": 0.6046001315116882, "learning_rate": 0.0001, "loss": 1.4436, "step": 13706 }, { "epoch": 1.5924484461225674, "grad_norm": 0.6235845685005188, "learning_rate": 0.0001, "loss": 1.4548, "step": 13707 }, { "epoch": 1.592564623874528, "grad_norm": 0.6004418730735779, "learning_rate": 0.0001, "loss": 1.3711, "step": 13708 }, { "epoch": 1.5926808016264884, "grad_norm": 0.6089489459991455, "learning_rate": 0.0001, "loss": 1.4301, "step": 13709 }, { "epoch": 1.5927969793784489, "grad_norm": 0.5987474322319031, "learning_rate": 0.0001, "loss": 1.6283, "step": 13710 }, { "epoch": 1.5929131571304094, "grad_norm": 0.6150535345077515, "learning_rate": 0.0001, "loss": 1.6291, "step": 13711 }, { "epoch": 1.5930293348823699, "grad_norm": 0.6323658227920532, "learning_rate": 0.0001, "loss": 1.5579, "step": 13712 }, { "epoch": 1.5931455126343306, "grad_norm": 0.6383227705955505, "learning_rate": 0.0001, "loss": 1.4719, "step": 13713 }, { "epoch": 1.593261690386291, "grad_norm": 0.6205024719238281, "learning_rate": 0.0001, "loss": 1.5389, "step": 13714 }, { "epoch": 1.5933778681382516, "grad_norm": 0.6311460733413696, "learning_rate": 0.0001, "loss": 1.5924, "step": 13715 }, { "epoch": 1.593494045890212, "grad_norm": 0.650108277797699, "learning_rate": 0.0001, "loss": 1.3354, "step": 13716 }, { "epoch": 1.5936102236421725, "grad_norm": 0.5857402682304382, "learning_rate": 0.0001, "loss": 1.3433, "step": 13717 }, { "epoch": 1.593726401394133, "grad_norm": 0.5971586108207703, "learning_rate": 0.0001, "loss": 1.455, "step": 13718 }, { "epoch": 1.5938425791460935, "grad_norm": 0.6388098001480103, "learning_rate": 0.0001, "loss": 1.4883, "step": 13719 }, { "epoch": 1.593958756898054, "grad_norm": 0.6415640115737915, "learning_rate": 0.0001, "loss": 1.4228, "step": 13720 }, { "epoch": 1.5940749346500145, "grad_norm": 0.5820626616477966, "learning_rate": 0.0001, "loss": 1.4049, "step": 13721 }, { "epoch": 1.594191112401975, "grad_norm": 0.569054126739502, "learning_rate": 0.0001, "loss": 1.3599, "step": 13722 }, { "epoch": 1.5943072901539357, "grad_norm": 0.6303136348724365, "learning_rate": 0.0001, "loss": 1.4897, "step": 13723 }, { "epoch": 1.5944234679058962, "grad_norm": 0.5948778986930847, "learning_rate": 0.0001, "loss": 1.5478, "step": 13724 }, { "epoch": 1.5945396456578567, "grad_norm": 0.6140435338020325, "learning_rate": 0.0001, "loss": 1.4991, "step": 13725 }, { "epoch": 1.5946558234098172, "grad_norm": 0.5883561372756958, "learning_rate": 0.0001, "loss": 1.5103, "step": 13726 }, { "epoch": 1.5947720011617776, "grad_norm": 0.5896515846252441, "learning_rate": 0.0001, "loss": 1.4454, "step": 13727 }, { "epoch": 1.5948881789137381, "grad_norm": 0.5697752833366394, "learning_rate": 0.0001, "loss": 1.4407, "step": 13728 }, { "epoch": 1.5950043566656986, "grad_norm": 0.6240254640579224, "learning_rate": 0.0001, "loss": 1.525, "step": 13729 }, { "epoch": 1.595120534417659, "grad_norm": 0.6201429963111877, "learning_rate": 0.0001, "loss": 1.5679, "step": 13730 }, { "epoch": 1.5952367121696196, "grad_norm": 0.590234637260437, "learning_rate": 0.0001, "loss": 1.3998, "step": 13731 }, { "epoch": 1.59535288992158, "grad_norm": 0.5969529747962952, "learning_rate": 0.0001, "loss": 1.3655, "step": 13732 }, { "epoch": 1.5954690676735406, "grad_norm": 0.6133432984352112, "learning_rate": 0.0001, "loss": 1.1459, "step": 13733 }, { "epoch": 1.595585245425501, "grad_norm": 0.6062743067741394, "learning_rate": 0.0001, "loss": 1.4189, "step": 13734 }, { "epoch": 1.5957014231774616, "grad_norm": 0.658184826374054, "learning_rate": 0.0001, "loss": 1.6716, "step": 13735 }, { "epoch": 1.595817600929422, "grad_norm": 0.599326491355896, "learning_rate": 0.0001, "loss": 1.3884, "step": 13736 }, { "epoch": 1.5959337786813825, "grad_norm": 0.6448878049850464, "learning_rate": 0.0001, "loss": 1.4524, "step": 13737 }, { "epoch": 1.596049956433343, "grad_norm": 0.6201961636543274, "learning_rate": 0.0001, "loss": 1.3097, "step": 13738 }, { "epoch": 1.5961661341853035, "grad_norm": 0.6305376291275024, "learning_rate": 0.0001, "loss": 1.4291, "step": 13739 }, { "epoch": 1.596282311937264, "grad_norm": 0.6596425175666809, "learning_rate": 0.0001, "loss": 1.557, "step": 13740 }, { "epoch": 1.5963984896892245, "grad_norm": 0.6207994818687439, "learning_rate": 0.0001, "loss": 1.4566, "step": 13741 }, { "epoch": 1.596514667441185, "grad_norm": 0.6085087060928345, "learning_rate": 0.0001, "loss": 1.5105, "step": 13742 }, { "epoch": 1.5966308451931455, "grad_norm": 0.6169493794441223, "learning_rate": 0.0001, "loss": 1.5793, "step": 13743 }, { "epoch": 1.596747022945106, "grad_norm": 0.6025657653808594, "learning_rate": 0.0001, "loss": 1.28, "step": 13744 }, { "epoch": 1.5968632006970664, "grad_norm": 0.6102682948112488, "learning_rate": 0.0001, "loss": 1.6037, "step": 13745 }, { "epoch": 1.596979378449027, "grad_norm": 0.5944964289665222, "learning_rate": 0.0001, "loss": 1.6677, "step": 13746 }, { "epoch": 1.5970955562009874, "grad_norm": 0.6884152889251709, "learning_rate": 0.0001, "loss": 1.6509, "step": 13747 }, { "epoch": 1.597211733952948, "grad_norm": 0.600307285785675, "learning_rate": 0.0001, "loss": 1.4579, "step": 13748 }, { "epoch": 1.5973279117049084, "grad_norm": 0.5742710828781128, "learning_rate": 0.0001, "loss": 1.2788, "step": 13749 }, { "epoch": 1.5974440894568689, "grad_norm": 0.5742681622505188, "learning_rate": 0.0001, "loss": 1.4084, "step": 13750 }, { "epoch": 1.5975602672088294, "grad_norm": 0.5895428657531738, "learning_rate": 0.0001, "loss": 1.5114, "step": 13751 }, { "epoch": 1.5976764449607899, "grad_norm": 0.6041052937507629, "learning_rate": 0.0001, "loss": 1.4984, "step": 13752 }, { "epoch": 1.5977926227127504, "grad_norm": 0.6442006826400757, "learning_rate": 0.0001, "loss": 1.6977, "step": 13753 }, { "epoch": 1.5979088004647108, "grad_norm": 0.5974199175834656, "learning_rate": 0.0001, "loss": 1.3967, "step": 13754 }, { "epoch": 1.5980249782166716, "grad_norm": 0.6257326602935791, "learning_rate": 0.0001, "loss": 1.5538, "step": 13755 }, { "epoch": 1.598141155968632, "grad_norm": 0.6209074258804321, "learning_rate": 0.0001, "loss": 1.4854, "step": 13756 }, { "epoch": 1.5982573337205925, "grad_norm": 0.6362159252166748, "learning_rate": 0.0001, "loss": 1.5634, "step": 13757 }, { "epoch": 1.598373511472553, "grad_norm": 0.611994743347168, "learning_rate": 0.0001, "loss": 1.4347, "step": 13758 }, { "epoch": 1.5984896892245135, "grad_norm": 0.6070793867111206, "learning_rate": 0.0001, "loss": 1.3342, "step": 13759 }, { "epoch": 1.598605866976474, "grad_norm": 0.6359820365905762, "learning_rate": 0.0001, "loss": 1.4627, "step": 13760 }, { "epoch": 1.5987220447284345, "grad_norm": 0.6544227004051208, "learning_rate": 0.0001, "loss": 1.4379, "step": 13761 }, { "epoch": 1.598838222480395, "grad_norm": 0.669750452041626, "learning_rate": 0.0001, "loss": 1.3736, "step": 13762 }, { "epoch": 1.5989544002323555, "grad_norm": 0.6785888075828552, "learning_rate": 0.0001, "loss": 1.6779, "step": 13763 }, { "epoch": 1.599070577984316, "grad_norm": 0.6282570958137512, "learning_rate": 0.0001, "loss": 1.4536, "step": 13764 }, { "epoch": 1.5991867557362767, "grad_norm": 0.5843489170074463, "learning_rate": 0.0001, "loss": 1.4549, "step": 13765 }, { "epoch": 1.5993029334882372, "grad_norm": 0.6315165758132935, "learning_rate": 0.0001, "loss": 1.4352, "step": 13766 }, { "epoch": 1.5994191112401976, "grad_norm": 0.6136773824691772, "learning_rate": 0.0001, "loss": 1.5576, "step": 13767 }, { "epoch": 1.5995352889921581, "grad_norm": 0.6138652563095093, "learning_rate": 0.0001, "loss": 1.5366, "step": 13768 }, { "epoch": 1.5996514667441186, "grad_norm": 0.6372418999671936, "learning_rate": 0.0001, "loss": 1.6065, "step": 13769 }, { "epoch": 1.599767644496079, "grad_norm": 0.6061596870422363, "learning_rate": 0.0001, "loss": 1.416, "step": 13770 }, { "epoch": 1.5998838222480396, "grad_norm": 0.6062629818916321, "learning_rate": 0.0001, "loss": 1.2875, "step": 13771 }, { "epoch": 1.6, "grad_norm": 0.6032814979553223, "learning_rate": 0.0001, "loss": 1.4574, "step": 13772 }, { "epoch": 1.6001161777519606, "grad_norm": 0.6498891115188599, "learning_rate": 0.0001, "loss": 1.5472, "step": 13773 }, { "epoch": 1.600232355503921, "grad_norm": 0.6103582978248596, "learning_rate": 0.0001, "loss": 1.4621, "step": 13774 }, { "epoch": 1.6003485332558816, "grad_norm": 0.5915868282318115, "learning_rate": 0.0001, "loss": 1.4204, "step": 13775 }, { "epoch": 1.600464711007842, "grad_norm": 0.5753716826438904, "learning_rate": 0.0001, "loss": 1.2423, "step": 13776 }, { "epoch": 1.6005808887598025, "grad_norm": 0.5940613746643066, "learning_rate": 0.0001, "loss": 1.5206, "step": 13777 }, { "epoch": 1.600697066511763, "grad_norm": 0.5621521472930908, "learning_rate": 0.0001, "loss": 1.3927, "step": 13778 }, { "epoch": 1.6008132442637235, "grad_norm": 0.6004639267921448, "learning_rate": 0.0001, "loss": 1.3794, "step": 13779 }, { "epoch": 1.600929422015684, "grad_norm": 0.6216353178024292, "learning_rate": 0.0001, "loss": 1.559, "step": 13780 }, { "epoch": 1.6010455997676445, "grad_norm": 0.5929364562034607, "learning_rate": 0.0001, "loss": 1.5074, "step": 13781 }, { "epoch": 1.601161777519605, "grad_norm": 0.6302357316017151, "learning_rate": 0.0001, "loss": 1.3182, "step": 13782 }, { "epoch": 1.6012779552715655, "grad_norm": 0.6794602274894714, "learning_rate": 0.0001, "loss": 1.6703, "step": 13783 }, { "epoch": 1.601394133023526, "grad_norm": 0.5782144069671631, "learning_rate": 0.0001, "loss": 1.36, "step": 13784 }, { "epoch": 1.6015103107754864, "grad_norm": 0.659773588180542, "learning_rate": 0.0001, "loss": 1.4868, "step": 13785 }, { "epoch": 1.601626488527447, "grad_norm": 0.6725043058395386, "learning_rate": 0.0001, "loss": 1.4797, "step": 13786 }, { "epoch": 1.6017426662794074, "grad_norm": 0.6366990208625793, "learning_rate": 0.0001, "loss": 1.4486, "step": 13787 }, { "epoch": 1.601858844031368, "grad_norm": 0.6560510993003845, "learning_rate": 0.0001, "loss": 1.4995, "step": 13788 }, { "epoch": 1.6019750217833284, "grad_norm": 0.6620891690254211, "learning_rate": 0.0001, "loss": 1.3972, "step": 13789 }, { "epoch": 1.602091199535289, "grad_norm": 0.6221482157707214, "learning_rate": 0.0001, "loss": 1.5887, "step": 13790 }, { "epoch": 1.6022073772872494, "grad_norm": 0.6068333983421326, "learning_rate": 0.0001, "loss": 1.4633, "step": 13791 }, { "epoch": 1.6023235550392099, "grad_norm": 0.6149522066116333, "learning_rate": 0.0001, "loss": 1.5298, "step": 13792 }, { "epoch": 1.6024397327911704, "grad_norm": 0.6315502524375916, "learning_rate": 0.0001, "loss": 1.3288, "step": 13793 }, { "epoch": 1.6025559105431308, "grad_norm": 0.6193204522132874, "learning_rate": 0.0001, "loss": 1.5412, "step": 13794 }, { "epoch": 1.6026720882950913, "grad_norm": 0.6118963956832886, "learning_rate": 0.0001, "loss": 1.6071, "step": 13795 }, { "epoch": 1.6027882660470518, "grad_norm": 0.5986089110374451, "learning_rate": 0.0001, "loss": 1.4186, "step": 13796 }, { "epoch": 1.6029044437990125, "grad_norm": 0.5849127769470215, "learning_rate": 0.0001, "loss": 1.6019, "step": 13797 }, { "epoch": 1.603020621550973, "grad_norm": 0.6488816738128662, "learning_rate": 0.0001, "loss": 1.592, "step": 13798 }, { "epoch": 1.6031367993029335, "grad_norm": 0.5963447690010071, "learning_rate": 0.0001, "loss": 1.4233, "step": 13799 }, { "epoch": 1.603252977054894, "grad_norm": 0.5969744920730591, "learning_rate": 0.0001, "loss": 1.4197, "step": 13800 }, { "epoch": 1.6033691548068545, "grad_norm": 0.5776385068893433, "learning_rate": 0.0001, "loss": 1.4639, "step": 13801 }, { "epoch": 1.603485332558815, "grad_norm": 0.568734347820282, "learning_rate": 0.0001, "loss": 1.384, "step": 13802 }, { "epoch": 1.6036015103107755, "grad_norm": 0.6355451345443726, "learning_rate": 0.0001, "loss": 1.5034, "step": 13803 }, { "epoch": 1.603717688062736, "grad_norm": 0.5923163294792175, "learning_rate": 0.0001, "loss": 1.4938, "step": 13804 }, { "epoch": 1.6038338658146964, "grad_norm": 0.6059244275093079, "learning_rate": 0.0001, "loss": 1.4256, "step": 13805 }, { "epoch": 1.603950043566657, "grad_norm": 0.5898007750511169, "learning_rate": 0.0001, "loss": 1.4724, "step": 13806 }, { "epoch": 1.6040662213186176, "grad_norm": 0.5829443335533142, "learning_rate": 0.0001, "loss": 1.3609, "step": 13807 }, { "epoch": 1.6041823990705781, "grad_norm": 0.6079580783843994, "learning_rate": 0.0001, "loss": 1.3347, "step": 13808 }, { "epoch": 1.6042985768225386, "grad_norm": 0.6549359560012817, "learning_rate": 0.0001, "loss": 1.5499, "step": 13809 }, { "epoch": 1.6044147545744991, "grad_norm": 0.5998852252960205, "learning_rate": 0.0001, "loss": 1.3346, "step": 13810 }, { "epoch": 1.6045309323264596, "grad_norm": 0.6005247831344604, "learning_rate": 0.0001, "loss": 1.4291, "step": 13811 }, { "epoch": 1.60464711007842, "grad_norm": 0.6408963799476624, "learning_rate": 0.0001, "loss": 1.5221, "step": 13812 }, { "epoch": 1.6047632878303806, "grad_norm": 0.589667022228241, "learning_rate": 0.0001, "loss": 1.334, "step": 13813 }, { "epoch": 1.604879465582341, "grad_norm": 0.5643407702445984, "learning_rate": 0.0001, "loss": 1.3689, "step": 13814 }, { "epoch": 1.6049956433343016, "grad_norm": 0.659737229347229, "learning_rate": 0.0001, "loss": 1.4327, "step": 13815 }, { "epoch": 1.605111821086262, "grad_norm": 0.6060417890548706, "learning_rate": 0.0001, "loss": 1.4205, "step": 13816 }, { "epoch": 1.6052279988382225, "grad_norm": 0.6752500534057617, "learning_rate": 0.0001, "loss": 1.4827, "step": 13817 }, { "epoch": 1.605344176590183, "grad_norm": 0.6170288324356079, "learning_rate": 0.0001, "loss": 1.2098, "step": 13818 }, { "epoch": 1.6054603543421435, "grad_norm": 0.6077424883842468, "learning_rate": 0.0001, "loss": 1.5053, "step": 13819 }, { "epoch": 1.605576532094104, "grad_norm": 0.5606176257133484, "learning_rate": 0.0001, "loss": 1.3151, "step": 13820 }, { "epoch": 1.6056927098460645, "grad_norm": 0.6182757019996643, "learning_rate": 0.0001, "loss": 1.503, "step": 13821 }, { "epoch": 1.605808887598025, "grad_norm": 0.6296382546424866, "learning_rate": 0.0001, "loss": 1.5476, "step": 13822 }, { "epoch": 1.6059250653499855, "grad_norm": 0.6217150688171387, "learning_rate": 0.0001, "loss": 1.3125, "step": 13823 }, { "epoch": 1.606041243101946, "grad_norm": 0.6524258852005005, "learning_rate": 0.0001, "loss": 1.5611, "step": 13824 }, { "epoch": 1.6061574208539064, "grad_norm": 0.6134025454521179, "learning_rate": 0.0001, "loss": 1.4981, "step": 13825 }, { "epoch": 1.606273598605867, "grad_norm": 0.6530399918556213, "learning_rate": 0.0001, "loss": 1.4917, "step": 13826 }, { "epoch": 1.6063897763578274, "grad_norm": 0.5791024565696716, "learning_rate": 0.0001, "loss": 1.2731, "step": 13827 }, { "epoch": 1.606505954109788, "grad_norm": 0.6186858415603638, "learning_rate": 0.0001, "loss": 1.3552, "step": 13828 }, { "epoch": 1.6066221318617484, "grad_norm": 0.5937260985374451, "learning_rate": 0.0001, "loss": 1.3172, "step": 13829 }, { "epoch": 1.606738309613709, "grad_norm": 0.6377929449081421, "learning_rate": 0.0001, "loss": 1.5637, "step": 13830 }, { "epoch": 1.6068544873656694, "grad_norm": 0.5948269367218018, "learning_rate": 0.0001, "loss": 1.3598, "step": 13831 }, { "epoch": 1.6069706651176299, "grad_norm": 0.5641445517539978, "learning_rate": 0.0001, "loss": 1.2045, "step": 13832 }, { "epoch": 1.6070868428695904, "grad_norm": 0.638268232345581, "learning_rate": 0.0001, "loss": 1.4597, "step": 13833 }, { "epoch": 1.6072030206215508, "grad_norm": 0.5887224674224854, "learning_rate": 0.0001, "loss": 1.4827, "step": 13834 }, { "epoch": 1.6073191983735113, "grad_norm": 0.6423467397689819, "learning_rate": 0.0001, "loss": 1.7015, "step": 13835 }, { "epoch": 1.6074353761254718, "grad_norm": 0.6366564631462097, "learning_rate": 0.0001, "loss": 1.6689, "step": 13836 }, { "epoch": 1.6075515538774323, "grad_norm": 0.5624208450317383, "learning_rate": 0.0001, "loss": 1.3148, "step": 13837 }, { "epoch": 1.607667731629393, "grad_norm": 0.5844243168830872, "learning_rate": 0.0001, "loss": 1.4741, "step": 13838 }, { "epoch": 1.6077839093813535, "grad_norm": 0.6397031545639038, "learning_rate": 0.0001, "loss": 1.6976, "step": 13839 }, { "epoch": 1.607900087133314, "grad_norm": 0.6005331873893738, "learning_rate": 0.0001, "loss": 1.3997, "step": 13840 }, { "epoch": 1.6080162648852745, "grad_norm": 0.6438605189323425, "learning_rate": 0.0001, "loss": 1.6067, "step": 13841 }, { "epoch": 1.608132442637235, "grad_norm": 0.5851279497146606, "learning_rate": 0.0001, "loss": 1.2065, "step": 13842 }, { "epoch": 1.6082486203891955, "grad_norm": 0.567348301410675, "learning_rate": 0.0001, "loss": 1.3517, "step": 13843 }, { "epoch": 1.608364798141156, "grad_norm": 0.6039325594902039, "learning_rate": 0.0001, "loss": 1.4571, "step": 13844 }, { "epoch": 1.6084809758931164, "grad_norm": 0.5768235921859741, "learning_rate": 0.0001, "loss": 1.3531, "step": 13845 }, { "epoch": 1.608597153645077, "grad_norm": 0.5796616077423096, "learning_rate": 0.0001, "loss": 1.3958, "step": 13846 }, { "epoch": 1.6087133313970374, "grad_norm": 0.603740930557251, "learning_rate": 0.0001, "loss": 1.4034, "step": 13847 }, { "epoch": 1.608829509148998, "grad_norm": 0.6364991068840027, "learning_rate": 0.0001, "loss": 1.5609, "step": 13848 }, { "epoch": 1.6089456869009586, "grad_norm": 0.6351603269577026, "learning_rate": 0.0001, "loss": 1.564, "step": 13849 }, { "epoch": 1.6090618646529191, "grad_norm": 0.6068862676620483, "learning_rate": 0.0001, "loss": 1.4097, "step": 13850 }, { "epoch": 1.6091780424048796, "grad_norm": 0.642614483833313, "learning_rate": 0.0001, "loss": 1.4754, "step": 13851 }, { "epoch": 1.60929422015684, "grad_norm": 0.6429895162582397, "learning_rate": 0.0001, "loss": 1.6018, "step": 13852 }, { "epoch": 1.6094103979088006, "grad_norm": 0.5926845073699951, "learning_rate": 0.0001, "loss": 1.458, "step": 13853 }, { "epoch": 1.609526575660761, "grad_norm": 0.658845067024231, "learning_rate": 0.0001, "loss": 1.3906, "step": 13854 }, { "epoch": 1.6096427534127216, "grad_norm": 0.5958040952682495, "learning_rate": 0.0001, "loss": 1.3695, "step": 13855 }, { "epoch": 1.609758931164682, "grad_norm": 0.6442962288856506, "learning_rate": 0.0001, "loss": 1.6451, "step": 13856 }, { "epoch": 1.6098751089166425, "grad_norm": 0.5802430510520935, "learning_rate": 0.0001, "loss": 1.4404, "step": 13857 }, { "epoch": 1.609991286668603, "grad_norm": 0.6123525500297546, "learning_rate": 0.0001, "loss": 1.3439, "step": 13858 }, { "epoch": 1.6101074644205635, "grad_norm": 0.6517568230628967, "learning_rate": 0.0001, "loss": 1.4508, "step": 13859 }, { "epoch": 1.610223642172524, "grad_norm": 0.634818434715271, "learning_rate": 0.0001, "loss": 1.6394, "step": 13860 }, { "epoch": 1.6103398199244845, "grad_norm": 0.5639699697494507, "learning_rate": 0.0001, "loss": 1.5309, "step": 13861 }, { "epoch": 1.610455997676445, "grad_norm": 0.5527640581130981, "learning_rate": 0.0001, "loss": 1.39, "step": 13862 }, { "epoch": 1.6105721754284055, "grad_norm": 0.6287981271743774, "learning_rate": 0.0001, "loss": 1.4817, "step": 13863 }, { "epoch": 1.610688353180366, "grad_norm": 0.5936501622200012, "learning_rate": 0.0001, "loss": 1.5035, "step": 13864 }, { "epoch": 1.6108045309323265, "grad_norm": 0.6397016644477844, "learning_rate": 0.0001, "loss": 1.3037, "step": 13865 }, { "epoch": 1.610920708684287, "grad_norm": 0.6394860744476318, "learning_rate": 0.0001, "loss": 1.4609, "step": 13866 }, { "epoch": 1.6110368864362474, "grad_norm": 0.6565226316452026, "learning_rate": 0.0001, "loss": 1.5441, "step": 13867 }, { "epoch": 1.611153064188208, "grad_norm": 0.6310607194900513, "learning_rate": 0.0001, "loss": 1.4312, "step": 13868 }, { "epoch": 1.6112692419401684, "grad_norm": 0.6256000995635986, "learning_rate": 0.0001, "loss": 1.6485, "step": 13869 }, { "epoch": 1.611385419692129, "grad_norm": 0.6283453106880188, "learning_rate": 0.0001, "loss": 1.4648, "step": 13870 }, { "epoch": 1.6115015974440894, "grad_norm": 0.6144470572471619, "learning_rate": 0.0001, "loss": 1.3494, "step": 13871 }, { "epoch": 1.6116177751960499, "grad_norm": 0.6374574303627014, "learning_rate": 0.0001, "loss": 1.6428, "step": 13872 }, { "epoch": 1.6117339529480104, "grad_norm": 0.6335989236831665, "learning_rate": 0.0001, "loss": 1.5362, "step": 13873 }, { "epoch": 1.6118501306999709, "grad_norm": 0.6003391146659851, "learning_rate": 0.0001, "loss": 1.4554, "step": 13874 }, { "epoch": 1.6119663084519313, "grad_norm": 0.6658709645271301, "learning_rate": 0.0001, "loss": 1.6036, "step": 13875 }, { "epoch": 1.6120824862038918, "grad_norm": 0.6419057250022888, "learning_rate": 0.0001, "loss": 1.4394, "step": 13876 }, { "epoch": 1.6121986639558523, "grad_norm": 0.737707793712616, "learning_rate": 0.0001, "loss": 1.3447, "step": 13877 }, { "epoch": 1.6123148417078128, "grad_norm": 0.6373167634010315, "learning_rate": 0.0001, "loss": 1.5011, "step": 13878 }, { "epoch": 1.6124310194597733, "grad_norm": 0.6646495461463928, "learning_rate": 0.0001, "loss": 1.6556, "step": 13879 }, { "epoch": 1.612547197211734, "grad_norm": 0.6386552453041077, "learning_rate": 0.0001, "loss": 1.5203, "step": 13880 }, { "epoch": 1.6126633749636945, "grad_norm": 0.5788254141807556, "learning_rate": 0.0001, "loss": 1.5247, "step": 13881 }, { "epoch": 1.612779552715655, "grad_norm": 0.6018937826156616, "learning_rate": 0.0001, "loss": 1.4407, "step": 13882 }, { "epoch": 1.6128957304676155, "grad_norm": 0.6025969386100769, "learning_rate": 0.0001, "loss": 1.3986, "step": 13883 }, { "epoch": 1.613011908219576, "grad_norm": 0.5800029635429382, "learning_rate": 0.0001, "loss": 1.4766, "step": 13884 }, { "epoch": 1.6131280859715365, "grad_norm": 0.5896215438842773, "learning_rate": 0.0001, "loss": 1.3036, "step": 13885 }, { "epoch": 1.613244263723497, "grad_norm": 0.6163521409034729, "learning_rate": 0.0001, "loss": 1.4066, "step": 13886 }, { "epoch": 1.6133604414754574, "grad_norm": 0.6672067642211914, "learning_rate": 0.0001, "loss": 1.5426, "step": 13887 }, { "epoch": 1.613476619227418, "grad_norm": 0.5820779204368591, "learning_rate": 0.0001, "loss": 1.3377, "step": 13888 }, { "epoch": 1.6135927969793784, "grad_norm": 0.6226430535316467, "learning_rate": 0.0001, "loss": 1.4441, "step": 13889 }, { "epoch": 1.613708974731339, "grad_norm": 0.6409672498703003, "learning_rate": 0.0001, "loss": 1.5232, "step": 13890 }, { "epoch": 1.6138251524832996, "grad_norm": 0.6150544881820679, "learning_rate": 0.0001, "loss": 1.4207, "step": 13891 }, { "epoch": 1.61394133023526, "grad_norm": 0.6505727767944336, "learning_rate": 0.0001, "loss": 1.5336, "step": 13892 }, { "epoch": 1.6140575079872206, "grad_norm": 0.6071352958679199, "learning_rate": 0.0001, "loss": 1.5639, "step": 13893 }, { "epoch": 1.614173685739181, "grad_norm": 0.5638280510902405, "learning_rate": 0.0001, "loss": 1.4476, "step": 13894 }, { "epoch": 1.6142898634911416, "grad_norm": 0.6587737798690796, "learning_rate": 0.0001, "loss": 1.6427, "step": 13895 }, { "epoch": 1.614406041243102, "grad_norm": 0.6675108075141907, "learning_rate": 0.0001, "loss": 1.5899, "step": 13896 }, { "epoch": 1.6145222189950625, "grad_norm": 0.6368352770805359, "learning_rate": 0.0001, "loss": 1.6209, "step": 13897 }, { "epoch": 1.614638396747023, "grad_norm": 0.6211996674537659, "learning_rate": 0.0001, "loss": 1.416, "step": 13898 }, { "epoch": 1.6147545744989835, "grad_norm": 0.6183685660362244, "learning_rate": 0.0001, "loss": 1.4282, "step": 13899 }, { "epoch": 1.614870752250944, "grad_norm": 0.615385115146637, "learning_rate": 0.0001, "loss": 1.4632, "step": 13900 }, { "epoch": 1.6149869300029045, "grad_norm": 0.6182390451431274, "learning_rate": 0.0001, "loss": 1.5878, "step": 13901 }, { "epoch": 1.615103107754865, "grad_norm": 0.6461263298988342, "learning_rate": 0.0001, "loss": 1.4021, "step": 13902 }, { "epoch": 1.6152192855068255, "grad_norm": 0.5660906434059143, "learning_rate": 0.0001, "loss": 1.2598, "step": 13903 }, { "epoch": 1.615335463258786, "grad_norm": 0.6474366188049316, "learning_rate": 0.0001, "loss": 1.4905, "step": 13904 }, { "epoch": 1.6154516410107465, "grad_norm": 0.6442524790763855, "learning_rate": 0.0001, "loss": 1.4119, "step": 13905 }, { "epoch": 1.615567818762707, "grad_norm": 0.6138772964477539, "learning_rate": 0.0001, "loss": 1.4201, "step": 13906 }, { "epoch": 1.6156839965146674, "grad_norm": 0.6902275085449219, "learning_rate": 0.0001, "loss": 1.7217, "step": 13907 }, { "epoch": 1.615800174266628, "grad_norm": 0.5896263122558594, "learning_rate": 0.0001, "loss": 1.3702, "step": 13908 }, { "epoch": 1.6159163520185884, "grad_norm": 0.6327207684516907, "learning_rate": 0.0001, "loss": 1.5585, "step": 13909 }, { "epoch": 1.616032529770549, "grad_norm": 0.6263841986656189, "learning_rate": 0.0001, "loss": 1.371, "step": 13910 }, { "epoch": 1.6161487075225094, "grad_norm": 0.6256563067436218, "learning_rate": 0.0001, "loss": 1.3766, "step": 13911 }, { "epoch": 1.6162648852744699, "grad_norm": 0.6188286542892456, "learning_rate": 0.0001, "loss": 1.3662, "step": 13912 }, { "epoch": 1.6163810630264304, "grad_norm": 0.5888686180114746, "learning_rate": 0.0001, "loss": 1.328, "step": 13913 }, { "epoch": 1.6164972407783909, "grad_norm": 0.5912812352180481, "learning_rate": 0.0001, "loss": 1.3469, "step": 13914 }, { "epoch": 1.6166134185303513, "grad_norm": 0.58259516954422, "learning_rate": 0.0001, "loss": 1.3371, "step": 13915 }, { "epoch": 1.6167295962823118, "grad_norm": 0.636556088924408, "learning_rate": 0.0001, "loss": 1.5573, "step": 13916 }, { "epoch": 1.6168457740342723, "grad_norm": 0.5752967596054077, "learning_rate": 0.0001, "loss": 1.3001, "step": 13917 }, { "epoch": 1.6169619517862328, "grad_norm": 0.6357734203338623, "learning_rate": 0.0001, "loss": 1.5934, "step": 13918 }, { "epoch": 1.6170781295381933, "grad_norm": 0.6322953701019287, "learning_rate": 0.0001, "loss": 1.5612, "step": 13919 }, { "epoch": 1.6171943072901538, "grad_norm": 0.6408698558807373, "learning_rate": 0.0001, "loss": 1.5034, "step": 13920 }, { "epoch": 1.6173104850421143, "grad_norm": 0.6438761949539185, "learning_rate": 0.0001, "loss": 1.5634, "step": 13921 }, { "epoch": 1.617426662794075, "grad_norm": 0.6261228322982788, "learning_rate": 0.0001, "loss": 1.4357, "step": 13922 }, { "epoch": 1.6175428405460355, "grad_norm": 0.6390764713287354, "learning_rate": 0.0001, "loss": 1.458, "step": 13923 }, { "epoch": 1.617659018297996, "grad_norm": 0.5800904631614685, "learning_rate": 0.0001, "loss": 1.2348, "step": 13924 }, { "epoch": 1.6177751960499565, "grad_norm": 0.5801419615745544, "learning_rate": 0.0001, "loss": 1.4536, "step": 13925 }, { "epoch": 1.617891373801917, "grad_norm": 0.6229808926582336, "learning_rate": 0.0001, "loss": 1.4334, "step": 13926 }, { "epoch": 1.6180075515538774, "grad_norm": 0.5984506011009216, "learning_rate": 0.0001, "loss": 1.3765, "step": 13927 }, { "epoch": 1.618123729305838, "grad_norm": 0.6117129325866699, "learning_rate": 0.0001, "loss": 1.4251, "step": 13928 }, { "epoch": 1.6182399070577984, "grad_norm": 0.6036126613616943, "learning_rate": 0.0001, "loss": 1.3457, "step": 13929 }, { "epoch": 1.618356084809759, "grad_norm": 0.6293709874153137, "learning_rate": 0.0001, "loss": 1.5344, "step": 13930 }, { "epoch": 1.6184722625617194, "grad_norm": 0.6404412388801575, "learning_rate": 0.0001, "loss": 1.5842, "step": 13931 }, { "epoch": 1.6185884403136799, "grad_norm": 0.6405262351036072, "learning_rate": 0.0001, "loss": 1.5674, "step": 13932 }, { "epoch": 1.6187046180656406, "grad_norm": 0.6383447647094727, "learning_rate": 0.0001, "loss": 1.5493, "step": 13933 }, { "epoch": 1.618820795817601, "grad_norm": 0.6693515181541443, "learning_rate": 0.0001, "loss": 1.6526, "step": 13934 }, { "epoch": 1.6189369735695616, "grad_norm": 0.6073635220527649, "learning_rate": 0.0001, "loss": 1.5188, "step": 13935 }, { "epoch": 1.619053151321522, "grad_norm": 0.6150029301643372, "learning_rate": 0.0001, "loss": 1.4316, "step": 13936 }, { "epoch": 1.6191693290734825, "grad_norm": 0.6156079173088074, "learning_rate": 0.0001, "loss": 1.3389, "step": 13937 }, { "epoch": 1.619285506825443, "grad_norm": 0.6327203512191772, "learning_rate": 0.0001, "loss": 1.5262, "step": 13938 }, { "epoch": 1.6194016845774035, "grad_norm": 0.5985980033874512, "learning_rate": 0.0001, "loss": 1.3391, "step": 13939 }, { "epoch": 1.619517862329364, "grad_norm": 0.5909581780433655, "learning_rate": 0.0001, "loss": 1.4643, "step": 13940 }, { "epoch": 1.6196340400813245, "grad_norm": 0.6321226358413696, "learning_rate": 0.0001, "loss": 1.5628, "step": 13941 }, { "epoch": 1.619750217833285, "grad_norm": 0.6735860109329224, "learning_rate": 0.0001, "loss": 1.3241, "step": 13942 }, { "epoch": 1.6198663955852455, "grad_norm": 0.6376870274543762, "learning_rate": 0.0001, "loss": 1.463, "step": 13943 }, { "epoch": 1.619982573337206, "grad_norm": 0.6346738338470459, "learning_rate": 0.0001, "loss": 1.4896, "step": 13944 }, { "epoch": 1.6200987510891665, "grad_norm": 0.6026695966720581, "learning_rate": 0.0001, "loss": 1.3668, "step": 13945 }, { "epoch": 1.620214928841127, "grad_norm": 0.5611673593521118, "learning_rate": 0.0001, "loss": 1.3694, "step": 13946 }, { "epoch": 1.6203311065930874, "grad_norm": 0.6367985606193542, "learning_rate": 0.0001, "loss": 1.5537, "step": 13947 }, { "epoch": 1.620447284345048, "grad_norm": 0.6661987900733948, "learning_rate": 0.0001, "loss": 1.5108, "step": 13948 }, { "epoch": 1.6205634620970084, "grad_norm": 0.6102688908576965, "learning_rate": 0.0001, "loss": 1.4665, "step": 13949 }, { "epoch": 1.620679639848969, "grad_norm": 0.5848194360733032, "learning_rate": 0.0001, "loss": 1.4065, "step": 13950 }, { "epoch": 1.6207958176009294, "grad_norm": 0.5830379128456116, "learning_rate": 0.0001, "loss": 1.513, "step": 13951 }, { "epoch": 1.6209119953528899, "grad_norm": 0.6410690546035767, "learning_rate": 0.0001, "loss": 1.4485, "step": 13952 }, { "epoch": 1.6210281731048504, "grad_norm": 0.6042836904525757, "learning_rate": 0.0001, "loss": 1.4449, "step": 13953 }, { "epoch": 1.6211443508568109, "grad_norm": 0.6346921920776367, "learning_rate": 0.0001, "loss": 1.5349, "step": 13954 }, { "epoch": 1.6212605286087713, "grad_norm": 0.5908709764480591, "learning_rate": 0.0001, "loss": 1.3898, "step": 13955 }, { "epoch": 1.6213767063607318, "grad_norm": 0.6253523230552673, "learning_rate": 0.0001, "loss": 1.455, "step": 13956 }, { "epoch": 1.6214928841126923, "grad_norm": 0.6313303112983704, "learning_rate": 0.0001, "loss": 1.4671, "step": 13957 }, { "epoch": 1.6216090618646528, "grad_norm": 0.6551414728164673, "learning_rate": 0.0001, "loss": 1.5872, "step": 13958 }, { "epoch": 1.6217252396166133, "grad_norm": 0.602879524230957, "learning_rate": 0.0001, "loss": 1.4072, "step": 13959 }, { "epoch": 1.6218414173685738, "grad_norm": 0.6179625988006592, "learning_rate": 0.0001, "loss": 1.4204, "step": 13960 }, { "epoch": 1.6219575951205343, "grad_norm": 0.5867196321487427, "learning_rate": 0.0001, "loss": 1.3604, "step": 13961 }, { "epoch": 1.6220737728724948, "grad_norm": 0.6081652045249939, "learning_rate": 0.0001, "loss": 1.5271, "step": 13962 }, { "epoch": 1.6221899506244553, "grad_norm": 0.5890710353851318, "learning_rate": 0.0001, "loss": 1.3407, "step": 13963 }, { "epoch": 1.622306128376416, "grad_norm": 0.6529542207717896, "learning_rate": 0.0001, "loss": 1.5192, "step": 13964 }, { "epoch": 1.6224223061283765, "grad_norm": 0.6106531023979187, "learning_rate": 0.0001, "loss": 1.4651, "step": 13965 }, { "epoch": 1.622538483880337, "grad_norm": 0.6239684224128723, "learning_rate": 0.0001, "loss": 1.4949, "step": 13966 }, { "epoch": 1.6226546616322974, "grad_norm": 0.6258800625801086, "learning_rate": 0.0001, "loss": 1.4508, "step": 13967 }, { "epoch": 1.622770839384258, "grad_norm": 0.5880980491638184, "learning_rate": 0.0001, "loss": 1.4816, "step": 13968 }, { "epoch": 1.6228870171362184, "grad_norm": 0.588895320892334, "learning_rate": 0.0001, "loss": 1.2776, "step": 13969 }, { "epoch": 1.623003194888179, "grad_norm": 0.5825314521789551, "learning_rate": 0.0001, "loss": 1.3614, "step": 13970 }, { "epoch": 1.6231193726401394, "grad_norm": 0.6523398756980896, "learning_rate": 0.0001, "loss": 1.4717, "step": 13971 }, { "epoch": 1.6232355503920999, "grad_norm": 0.5812602639198303, "learning_rate": 0.0001, "loss": 1.3427, "step": 13972 }, { "epoch": 1.6233517281440604, "grad_norm": 0.6783197522163391, "learning_rate": 0.0001, "loss": 1.4434, "step": 13973 }, { "epoch": 1.6234679058960209, "grad_norm": 0.6416046619415283, "learning_rate": 0.0001, "loss": 1.3776, "step": 13974 }, { "epoch": 1.6235840836479816, "grad_norm": 0.6453458666801453, "learning_rate": 0.0001, "loss": 1.5991, "step": 13975 }, { "epoch": 1.623700261399942, "grad_norm": 0.6448045969009399, "learning_rate": 0.0001, "loss": 1.4237, "step": 13976 }, { "epoch": 1.6238164391519025, "grad_norm": 0.6527649760246277, "learning_rate": 0.0001, "loss": 1.431, "step": 13977 }, { "epoch": 1.623932616903863, "grad_norm": 0.6334816217422485, "learning_rate": 0.0001, "loss": 1.323, "step": 13978 }, { "epoch": 1.6240487946558235, "grad_norm": 0.6312617659568787, "learning_rate": 0.0001, "loss": 1.5421, "step": 13979 }, { "epoch": 1.624164972407784, "grad_norm": 0.6112910509109497, "learning_rate": 0.0001, "loss": 1.5255, "step": 13980 }, { "epoch": 1.6242811501597445, "grad_norm": 0.6557388305664062, "learning_rate": 0.0001, "loss": 1.582, "step": 13981 }, { "epoch": 1.624397327911705, "grad_norm": 0.6169906854629517, "learning_rate": 0.0001, "loss": 1.4738, "step": 13982 }, { "epoch": 1.6245135056636655, "grad_norm": 0.6463947296142578, "learning_rate": 0.0001, "loss": 1.4123, "step": 13983 }, { "epoch": 1.624629683415626, "grad_norm": 0.5652637481689453, "learning_rate": 0.0001, "loss": 1.2282, "step": 13984 }, { "epoch": 1.6247458611675865, "grad_norm": 0.6067284941673279, "learning_rate": 0.0001, "loss": 1.5313, "step": 13985 }, { "epoch": 1.624862038919547, "grad_norm": 0.6332645416259766, "learning_rate": 0.0001, "loss": 1.4048, "step": 13986 }, { "epoch": 1.6249782166715074, "grad_norm": 0.5808968544006348, "learning_rate": 0.0001, "loss": 1.4871, "step": 13987 }, { "epoch": 1.625094394423468, "grad_norm": 0.5867791771888733, "learning_rate": 0.0001, "loss": 1.44, "step": 13988 }, { "epoch": 1.6252105721754284, "grad_norm": 0.643679678440094, "learning_rate": 0.0001, "loss": 1.5416, "step": 13989 }, { "epoch": 1.625326749927389, "grad_norm": 0.6203275322914124, "learning_rate": 0.0001, "loss": 1.4913, "step": 13990 }, { "epoch": 1.6254429276793494, "grad_norm": 0.5824859738349915, "learning_rate": 0.0001, "loss": 1.3436, "step": 13991 }, { "epoch": 1.6255591054313099, "grad_norm": 0.6150936484336853, "learning_rate": 0.0001, "loss": 1.3999, "step": 13992 }, { "epoch": 1.6256752831832704, "grad_norm": 0.579694926738739, "learning_rate": 0.0001, "loss": 1.6301, "step": 13993 }, { "epoch": 1.6257914609352309, "grad_norm": 0.5742935538291931, "learning_rate": 0.0001, "loss": 1.4251, "step": 13994 }, { "epoch": 1.6259076386871913, "grad_norm": 0.5996615886688232, "learning_rate": 0.0001, "loss": 1.4744, "step": 13995 }, { "epoch": 1.6260238164391518, "grad_norm": 0.7094278931617737, "learning_rate": 0.0001, "loss": 1.4994, "step": 13996 }, { "epoch": 1.6261399941911123, "grad_norm": 0.6233917474746704, "learning_rate": 0.0001, "loss": 1.4866, "step": 13997 }, { "epoch": 1.6262561719430728, "grad_norm": 0.6166983246803284, "learning_rate": 0.0001, "loss": 1.375, "step": 13998 }, { "epoch": 1.6263723496950333, "grad_norm": 0.638420581817627, "learning_rate": 0.0001, "loss": 1.4988, "step": 13999 }, { "epoch": 1.6264885274469938, "grad_norm": 0.6417514681816101, "learning_rate": 0.0001, "loss": 1.4279, "step": 14000 }, { "epoch": 1.6266047051989543, "grad_norm": 0.6283998489379883, "learning_rate": 0.0001, "loss": 1.5945, "step": 14001 }, { "epoch": 1.6267208829509148, "grad_norm": 0.6124157309532166, "learning_rate": 0.0001, "loss": 1.4929, "step": 14002 }, { "epoch": 1.6268370607028753, "grad_norm": 0.594475269317627, "learning_rate": 0.0001, "loss": 1.4148, "step": 14003 }, { "epoch": 1.6269532384548357, "grad_norm": 0.6385263204574585, "learning_rate": 0.0001, "loss": 1.575, "step": 14004 }, { "epoch": 1.6270694162067962, "grad_norm": 0.6499114036560059, "learning_rate": 0.0001, "loss": 1.4152, "step": 14005 }, { "epoch": 1.627185593958757, "grad_norm": 0.6124439239501953, "learning_rate": 0.0001, "loss": 1.5092, "step": 14006 }, { "epoch": 1.6273017717107174, "grad_norm": 0.6168376803398132, "learning_rate": 0.0001, "loss": 1.6337, "step": 14007 }, { "epoch": 1.627417949462678, "grad_norm": 0.5698392987251282, "learning_rate": 0.0001, "loss": 1.4545, "step": 14008 }, { "epoch": 1.6275341272146384, "grad_norm": 0.6328918933868408, "learning_rate": 0.0001, "loss": 1.5705, "step": 14009 }, { "epoch": 1.627650304966599, "grad_norm": 0.6024948954582214, "learning_rate": 0.0001, "loss": 1.4952, "step": 14010 }, { "epoch": 1.6277664827185594, "grad_norm": 0.6581865549087524, "learning_rate": 0.0001, "loss": 1.5492, "step": 14011 }, { "epoch": 1.6278826604705199, "grad_norm": 0.5633354187011719, "learning_rate": 0.0001, "loss": 1.2757, "step": 14012 }, { "epoch": 1.6279988382224804, "grad_norm": 0.6289613842964172, "learning_rate": 0.0001, "loss": 1.4402, "step": 14013 }, { "epoch": 1.6281150159744409, "grad_norm": 0.6051575541496277, "learning_rate": 0.0001, "loss": 1.5605, "step": 14014 }, { "epoch": 1.6282311937264013, "grad_norm": 0.5795895457267761, "learning_rate": 0.0001, "loss": 1.5407, "step": 14015 }, { "epoch": 1.628347371478362, "grad_norm": 0.6840769052505493, "learning_rate": 0.0001, "loss": 1.7659, "step": 14016 }, { "epoch": 1.6284635492303225, "grad_norm": 0.6221969723701477, "learning_rate": 0.0001, "loss": 1.4444, "step": 14017 }, { "epoch": 1.628579726982283, "grad_norm": 0.6639615297317505, "learning_rate": 0.0001, "loss": 1.551, "step": 14018 }, { "epoch": 1.6286959047342435, "grad_norm": 0.6530728936195374, "learning_rate": 0.0001, "loss": 1.5048, "step": 14019 }, { "epoch": 1.628812082486204, "grad_norm": 0.6423136591911316, "learning_rate": 0.0001, "loss": 1.5412, "step": 14020 }, { "epoch": 1.6289282602381645, "grad_norm": 0.6494227647781372, "learning_rate": 0.0001, "loss": 1.4474, "step": 14021 }, { "epoch": 1.629044437990125, "grad_norm": 0.5999130606651306, "learning_rate": 0.0001, "loss": 1.4138, "step": 14022 }, { "epoch": 1.6291606157420855, "grad_norm": 0.6521318554878235, "learning_rate": 0.0001, "loss": 1.6509, "step": 14023 }, { "epoch": 1.629276793494046, "grad_norm": 0.6327621936798096, "learning_rate": 0.0001, "loss": 1.5195, "step": 14024 }, { "epoch": 1.6293929712460065, "grad_norm": 0.6055527329444885, "learning_rate": 0.0001, "loss": 1.6212, "step": 14025 }, { "epoch": 1.629509148997967, "grad_norm": 0.5985307097434998, "learning_rate": 0.0001, "loss": 1.5282, "step": 14026 }, { "epoch": 1.6296253267499274, "grad_norm": 0.6053721308708191, "learning_rate": 0.0001, "loss": 1.5571, "step": 14027 }, { "epoch": 1.629741504501888, "grad_norm": 0.5892758965492249, "learning_rate": 0.0001, "loss": 1.5048, "step": 14028 }, { "epoch": 1.6298576822538484, "grad_norm": 0.6331409215927124, "learning_rate": 0.0001, "loss": 1.4201, "step": 14029 }, { "epoch": 1.629973860005809, "grad_norm": 0.588765025138855, "learning_rate": 0.0001, "loss": 1.2788, "step": 14030 }, { "epoch": 1.6300900377577694, "grad_norm": 0.5880612134933472, "learning_rate": 0.0001, "loss": 1.5298, "step": 14031 }, { "epoch": 1.6302062155097299, "grad_norm": 0.6789817810058594, "learning_rate": 0.0001, "loss": 1.4995, "step": 14032 }, { "epoch": 1.6303223932616904, "grad_norm": 0.6591566205024719, "learning_rate": 0.0001, "loss": 1.4953, "step": 14033 }, { "epoch": 1.6304385710136509, "grad_norm": 0.6221017241477966, "learning_rate": 0.0001, "loss": 1.3042, "step": 14034 }, { "epoch": 1.6305547487656114, "grad_norm": 0.6314887404441833, "learning_rate": 0.0001, "loss": 1.3127, "step": 14035 }, { "epoch": 1.6306709265175718, "grad_norm": 0.6149263381958008, "learning_rate": 0.0001, "loss": 1.399, "step": 14036 }, { "epoch": 1.6307871042695323, "grad_norm": 0.5955255031585693, "learning_rate": 0.0001, "loss": 1.331, "step": 14037 }, { "epoch": 1.6309032820214928, "grad_norm": 0.5955930352210999, "learning_rate": 0.0001, "loss": 1.3982, "step": 14038 }, { "epoch": 1.6310194597734533, "grad_norm": 0.7224507927894592, "learning_rate": 0.0001, "loss": 1.5178, "step": 14039 }, { "epoch": 1.6311356375254138, "grad_norm": 0.6202871203422546, "learning_rate": 0.0001, "loss": 1.3921, "step": 14040 }, { "epoch": 1.6312518152773743, "grad_norm": 0.6397614479064941, "learning_rate": 0.0001, "loss": 1.5403, "step": 14041 }, { "epoch": 1.6313679930293348, "grad_norm": 0.5635595917701721, "learning_rate": 0.0001, "loss": 1.4285, "step": 14042 }, { "epoch": 1.6314841707812953, "grad_norm": 0.5631869435310364, "learning_rate": 0.0001, "loss": 1.3056, "step": 14043 }, { "epoch": 1.6316003485332558, "grad_norm": 0.6522062420845032, "learning_rate": 0.0001, "loss": 1.5501, "step": 14044 }, { "epoch": 1.6317165262852162, "grad_norm": 0.6932905316352844, "learning_rate": 0.0001, "loss": 1.596, "step": 14045 }, { "epoch": 1.6318327040371767, "grad_norm": 0.6221775412559509, "learning_rate": 0.0001, "loss": 1.4737, "step": 14046 }, { "epoch": 1.6319488817891372, "grad_norm": 0.6478981375694275, "learning_rate": 0.0001, "loss": 1.426, "step": 14047 }, { "epoch": 1.632065059541098, "grad_norm": 0.6464584469795227, "learning_rate": 0.0001, "loss": 1.2716, "step": 14048 }, { "epoch": 1.6321812372930584, "grad_norm": 0.5974857211112976, "learning_rate": 0.0001, "loss": 1.3202, "step": 14049 }, { "epoch": 1.632297415045019, "grad_norm": 0.6420817375183105, "learning_rate": 0.0001, "loss": 1.4625, "step": 14050 }, { "epoch": 1.6324135927969794, "grad_norm": 0.5880864858627319, "learning_rate": 0.0001, "loss": 1.3173, "step": 14051 }, { "epoch": 1.6325297705489399, "grad_norm": 0.6501123905181885, "learning_rate": 0.0001, "loss": 1.6799, "step": 14052 }, { "epoch": 1.6326459483009004, "grad_norm": 0.622643768787384, "learning_rate": 0.0001, "loss": 1.6575, "step": 14053 }, { "epoch": 1.6327621260528609, "grad_norm": 0.5698688626289368, "learning_rate": 0.0001, "loss": 1.3591, "step": 14054 }, { "epoch": 1.6328783038048214, "grad_norm": 0.6086718440055847, "learning_rate": 0.0001, "loss": 1.5633, "step": 14055 }, { "epoch": 1.6329944815567818, "grad_norm": 0.6424548029899597, "learning_rate": 0.0001, "loss": 1.389, "step": 14056 }, { "epoch": 1.6331106593087423, "grad_norm": 0.5952888131141663, "learning_rate": 0.0001, "loss": 1.4085, "step": 14057 }, { "epoch": 1.633226837060703, "grad_norm": 0.5975378155708313, "learning_rate": 0.0001, "loss": 1.4426, "step": 14058 }, { "epoch": 1.6333430148126635, "grad_norm": 0.615254282951355, "learning_rate": 0.0001, "loss": 1.4592, "step": 14059 }, { "epoch": 1.633459192564624, "grad_norm": 0.684297502040863, "learning_rate": 0.0001, "loss": 1.6907, "step": 14060 }, { "epoch": 1.6335753703165845, "grad_norm": 0.628893256187439, "learning_rate": 0.0001, "loss": 1.5178, "step": 14061 }, { "epoch": 1.633691548068545, "grad_norm": 0.6183853149414062, "learning_rate": 0.0001, "loss": 1.3585, "step": 14062 }, { "epoch": 1.6338077258205055, "grad_norm": 0.6302970051765442, "learning_rate": 0.0001, "loss": 1.4764, "step": 14063 }, { "epoch": 1.633923903572466, "grad_norm": 0.6082267165184021, "learning_rate": 0.0001, "loss": 1.3908, "step": 14064 }, { "epoch": 1.6340400813244265, "grad_norm": 0.5841929912567139, "learning_rate": 0.0001, "loss": 1.5005, "step": 14065 }, { "epoch": 1.634156259076387, "grad_norm": 0.6122580170631409, "learning_rate": 0.0001, "loss": 1.3528, "step": 14066 }, { "epoch": 1.6342724368283474, "grad_norm": 0.5823848843574524, "learning_rate": 0.0001, "loss": 1.3313, "step": 14067 }, { "epoch": 1.634388614580308, "grad_norm": 0.6329461932182312, "learning_rate": 0.0001, "loss": 1.5848, "step": 14068 }, { "epoch": 1.6345047923322684, "grad_norm": 0.6053112745285034, "learning_rate": 0.0001, "loss": 1.3889, "step": 14069 }, { "epoch": 1.634620970084229, "grad_norm": 0.5855569839477539, "learning_rate": 0.0001, "loss": 1.3969, "step": 14070 }, { "epoch": 1.6347371478361894, "grad_norm": 0.669511079788208, "learning_rate": 0.0001, "loss": 1.5187, "step": 14071 }, { "epoch": 1.6348533255881499, "grad_norm": 0.6059605479240417, "learning_rate": 0.0001, "loss": 1.4615, "step": 14072 }, { "epoch": 1.6349695033401104, "grad_norm": 0.6450716853141785, "learning_rate": 0.0001, "loss": 1.5312, "step": 14073 }, { "epoch": 1.6350856810920709, "grad_norm": 0.5897365808486938, "learning_rate": 0.0001, "loss": 1.383, "step": 14074 }, { "epoch": 1.6352018588440314, "grad_norm": 0.6256240606307983, "learning_rate": 0.0001, "loss": 1.5765, "step": 14075 }, { "epoch": 1.6353180365959918, "grad_norm": 0.6321964263916016, "learning_rate": 0.0001, "loss": 1.5481, "step": 14076 }, { "epoch": 1.6354342143479523, "grad_norm": 0.6097416281700134, "learning_rate": 0.0001, "loss": 1.5983, "step": 14077 }, { "epoch": 1.6355503920999128, "grad_norm": 0.6388082504272461, "learning_rate": 0.0001, "loss": 1.5051, "step": 14078 }, { "epoch": 1.6356665698518733, "grad_norm": 0.5849140882492065, "learning_rate": 0.0001, "loss": 1.3683, "step": 14079 }, { "epoch": 1.6357827476038338, "grad_norm": 0.6164708137512207, "learning_rate": 0.0001, "loss": 1.3723, "step": 14080 }, { "epoch": 1.6358989253557943, "grad_norm": 0.6650832295417786, "learning_rate": 0.0001, "loss": 1.6466, "step": 14081 }, { "epoch": 1.6360151031077548, "grad_norm": 0.6212082505226135, "learning_rate": 0.0001, "loss": 1.5207, "step": 14082 }, { "epoch": 1.6361312808597153, "grad_norm": 0.6258420348167419, "learning_rate": 0.0001, "loss": 1.5162, "step": 14083 }, { "epoch": 1.6362474586116758, "grad_norm": 0.5431529879570007, "learning_rate": 0.0001, "loss": 1.2823, "step": 14084 }, { "epoch": 1.6363636363636362, "grad_norm": 0.5796236991882324, "learning_rate": 0.0001, "loss": 1.357, "step": 14085 }, { "epoch": 1.6364798141155967, "grad_norm": 0.6135089993476868, "learning_rate": 0.0001, "loss": 1.3273, "step": 14086 }, { "epoch": 1.6365959918675572, "grad_norm": 0.6038573980331421, "learning_rate": 0.0001, "loss": 1.3763, "step": 14087 }, { "epoch": 1.6367121696195177, "grad_norm": 0.6328492164611816, "learning_rate": 0.0001, "loss": 1.5462, "step": 14088 }, { "epoch": 1.6368283473714782, "grad_norm": 0.64963698387146, "learning_rate": 0.0001, "loss": 1.4086, "step": 14089 }, { "epoch": 1.636944525123439, "grad_norm": 0.654787540435791, "learning_rate": 0.0001, "loss": 1.5029, "step": 14090 }, { "epoch": 1.6370607028753994, "grad_norm": 0.6167252063751221, "learning_rate": 0.0001, "loss": 1.5282, "step": 14091 }, { "epoch": 1.6371768806273599, "grad_norm": 0.6117875576019287, "learning_rate": 0.0001, "loss": 1.4823, "step": 14092 }, { "epoch": 1.6372930583793204, "grad_norm": 0.5955542325973511, "learning_rate": 0.0001, "loss": 1.4145, "step": 14093 }, { "epoch": 1.6374092361312809, "grad_norm": 0.6241755485534668, "learning_rate": 0.0001, "loss": 1.484, "step": 14094 }, { "epoch": 1.6375254138832414, "grad_norm": 0.6314031481742859, "learning_rate": 0.0001, "loss": 1.495, "step": 14095 }, { "epoch": 1.6376415916352018, "grad_norm": 0.6061991453170776, "learning_rate": 0.0001, "loss": 1.5289, "step": 14096 }, { "epoch": 1.6377577693871623, "grad_norm": 0.6209096312522888, "learning_rate": 0.0001, "loss": 1.4582, "step": 14097 }, { "epoch": 1.6378739471391228, "grad_norm": 0.5920364856719971, "learning_rate": 0.0001, "loss": 1.4785, "step": 14098 }, { "epoch": 1.6379901248910833, "grad_norm": 0.6609426140785217, "learning_rate": 0.0001, "loss": 1.6022, "step": 14099 }, { "epoch": 1.638106302643044, "grad_norm": 0.5915592312812805, "learning_rate": 0.0001, "loss": 1.4119, "step": 14100 }, { "epoch": 1.6382224803950045, "grad_norm": 0.616982102394104, "learning_rate": 0.0001, "loss": 1.4499, "step": 14101 }, { "epoch": 1.638338658146965, "grad_norm": 0.6017442941665649, "learning_rate": 0.0001, "loss": 1.2596, "step": 14102 }, { "epoch": 1.6384548358989255, "grad_norm": 0.6194157004356384, "learning_rate": 0.0001, "loss": 1.3425, "step": 14103 }, { "epoch": 1.638571013650886, "grad_norm": 0.6266767382621765, "learning_rate": 0.0001, "loss": 1.3835, "step": 14104 }, { "epoch": 1.6386871914028465, "grad_norm": 0.5987579226493835, "learning_rate": 0.0001, "loss": 1.366, "step": 14105 }, { "epoch": 1.638803369154807, "grad_norm": 0.651887059211731, "learning_rate": 0.0001, "loss": 1.5016, "step": 14106 }, { "epoch": 1.6389195469067674, "grad_norm": 0.6444787383079529, "learning_rate": 0.0001, "loss": 1.5132, "step": 14107 }, { "epoch": 1.639035724658728, "grad_norm": 0.6272260546684265, "learning_rate": 0.0001, "loss": 1.5259, "step": 14108 }, { "epoch": 1.6391519024106884, "grad_norm": 0.6479817032814026, "learning_rate": 0.0001, "loss": 1.4818, "step": 14109 }, { "epoch": 1.639268080162649, "grad_norm": 0.6236165761947632, "learning_rate": 0.0001, "loss": 1.5658, "step": 14110 }, { "epoch": 1.6393842579146094, "grad_norm": 0.6055958867073059, "learning_rate": 0.0001, "loss": 1.4548, "step": 14111 }, { "epoch": 1.6395004356665699, "grad_norm": 0.6537360548973083, "learning_rate": 0.0001, "loss": 1.4727, "step": 14112 }, { "epoch": 1.6396166134185304, "grad_norm": 0.5849602222442627, "learning_rate": 0.0001, "loss": 1.3445, "step": 14113 }, { "epoch": 1.6397327911704909, "grad_norm": 0.6079145669937134, "learning_rate": 0.0001, "loss": 1.4493, "step": 14114 }, { "epoch": 1.6398489689224514, "grad_norm": 0.6336851716041565, "learning_rate": 0.0001, "loss": 1.5739, "step": 14115 }, { "epoch": 1.6399651466744118, "grad_norm": 0.6342026591300964, "learning_rate": 0.0001, "loss": 1.5609, "step": 14116 }, { "epoch": 1.6400813244263723, "grad_norm": 0.6126139163970947, "learning_rate": 0.0001, "loss": 1.5301, "step": 14117 }, { "epoch": 1.6401975021783328, "grad_norm": 0.6149883270263672, "learning_rate": 0.0001, "loss": 1.4362, "step": 14118 }, { "epoch": 1.6403136799302933, "grad_norm": 0.6330409646034241, "learning_rate": 0.0001, "loss": 1.4284, "step": 14119 }, { "epoch": 1.6404298576822538, "grad_norm": 0.6454080939292908, "learning_rate": 0.0001, "loss": 1.4864, "step": 14120 }, { "epoch": 1.6405460354342143, "grad_norm": 0.6485667824745178, "learning_rate": 0.0001, "loss": 1.5837, "step": 14121 }, { "epoch": 1.6406622131861748, "grad_norm": 0.6031750440597534, "learning_rate": 0.0001, "loss": 1.5467, "step": 14122 }, { "epoch": 1.6407783909381353, "grad_norm": 0.6269805431365967, "learning_rate": 0.0001, "loss": 1.5209, "step": 14123 }, { "epoch": 1.6408945686900958, "grad_norm": 0.6257645487785339, "learning_rate": 0.0001, "loss": 1.6409, "step": 14124 }, { "epoch": 1.6410107464420562, "grad_norm": 0.6060915589332581, "learning_rate": 0.0001, "loss": 1.4809, "step": 14125 }, { "epoch": 1.6411269241940167, "grad_norm": 0.6223340034484863, "learning_rate": 0.0001, "loss": 1.4791, "step": 14126 }, { "epoch": 1.6412431019459772, "grad_norm": 0.6407727599143982, "learning_rate": 0.0001, "loss": 1.3167, "step": 14127 }, { "epoch": 1.6413592796979377, "grad_norm": 0.622276246547699, "learning_rate": 0.0001, "loss": 1.434, "step": 14128 }, { "epoch": 1.6414754574498982, "grad_norm": 0.640952467918396, "learning_rate": 0.0001, "loss": 1.5844, "step": 14129 }, { "epoch": 1.6415916352018587, "grad_norm": 0.5898902416229248, "learning_rate": 0.0001, "loss": 1.3305, "step": 14130 }, { "epoch": 1.6417078129538192, "grad_norm": 0.5807786583900452, "learning_rate": 0.0001, "loss": 1.4521, "step": 14131 }, { "epoch": 1.64182399070578, "grad_norm": 0.6199413537979126, "learning_rate": 0.0001, "loss": 1.3459, "step": 14132 }, { "epoch": 1.6419401684577404, "grad_norm": 0.5780317187309265, "learning_rate": 0.0001, "loss": 1.4143, "step": 14133 }, { "epoch": 1.6420563462097009, "grad_norm": 0.6026042699813843, "learning_rate": 0.0001, "loss": 1.4828, "step": 14134 }, { "epoch": 1.6421725239616614, "grad_norm": 0.6366655230522156, "learning_rate": 0.0001, "loss": 1.4533, "step": 14135 }, { "epoch": 1.6422887017136218, "grad_norm": 0.6139160394668579, "learning_rate": 0.0001, "loss": 1.3117, "step": 14136 }, { "epoch": 1.6424048794655823, "grad_norm": 0.6157143712043762, "learning_rate": 0.0001, "loss": 1.3066, "step": 14137 }, { "epoch": 1.6425210572175428, "grad_norm": 0.6278376579284668, "learning_rate": 0.0001, "loss": 1.542, "step": 14138 }, { "epoch": 1.6426372349695033, "grad_norm": 0.6848769783973694, "learning_rate": 0.0001, "loss": 1.437, "step": 14139 }, { "epoch": 1.6427534127214638, "grad_norm": 0.6353069543838501, "learning_rate": 0.0001, "loss": 1.4935, "step": 14140 }, { "epoch": 1.6428695904734243, "grad_norm": 0.618973433971405, "learning_rate": 0.0001, "loss": 1.5368, "step": 14141 }, { "epoch": 1.642985768225385, "grad_norm": 0.6311565637588501, "learning_rate": 0.0001, "loss": 1.354, "step": 14142 }, { "epoch": 1.6431019459773455, "grad_norm": 0.6276684999465942, "learning_rate": 0.0001, "loss": 1.4216, "step": 14143 }, { "epoch": 1.643218123729306, "grad_norm": 0.667702317237854, "learning_rate": 0.0001, "loss": 1.4672, "step": 14144 }, { "epoch": 1.6433343014812665, "grad_norm": 0.6167467832565308, "learning_rate": 0.0001, "loss": 1.5276, "step": 14145 }, { "epoch": 1.643450479233227, "grad_norm": 0.6455250978469849, "learning_rate": 0.0001, "loss": 1.4224, "step": 14146 }, { "epoch": 1.6435666569851874, "grad_norm": 0.6006497144699097, "learning_rate": 0.0001, "loss": 1.3413, "step": 14147 }, { "epoch": 1.643682834737148, "grad_norm": 0.6262154579162598, "learning_rate": 0.0001, "loss": 1.4936, "step": 14148 }, { "epoch": 1.6437990124891084, "grad_norm": 0.638005793094635, "learning_rate": 0.0001, "loss": 1.5503, "step": 14149 }, { "epoch": 1.643915190241069, "grad_norm": 0.6695378422737122, "learning_rate": 0.0001, "loss": 1.4534, "step": 14150 }, { "epoch": 1.6440313679930294, "grad_norm": 0.651477575302124, "learning_rate": 0.0001, "loss": 1.3342, "step": 14151 }, { "epoch": 1.64414754574499, "grad_norm": 0.6691502332687378, "learning_rate": 0.0001, "loss": 1.3718, "step": 14152 }, { "epoch": 1.6442637234969504, "grad_norm": 0.602782666683197, "learning_rate": 0.0001, "loss": 1.3341, "step": 14153 }, { "epoch": 1.6443799012489109, "grad_norm": 0.610246479511261, "learning_rate": 0.0001, "loss": 1.2958, "step": 14154 }, { "epoch": 1.6444960790008714, "grad_norm": 0.594747006893158, "learning_rate": 0.0001, "loss": 1.421, "step": 14155 }, { "epoch": 1.6446122567528318, "grad_norm": 0.6084997057914734, "learning_rate": 0.0001, "loss": 1.5756, "step": 14156 }, { "epoch": 1.6447284345047923, "grad_norm": 0.6012604236602783, "learning_rate": 0.0001, "loss": 1.5527, "step": 14157 }, { "epoch": 1.6448446122567528, "grad_norm": 0.5681290030479431, "learning_rate": 0.0001, "loss": 1.2684, "step": 14158 }, { "epoch": 1.6449607900087133, "grad_norm": 0.6119375228881836, "learning_rate": 0.0001, "loss": 1.4624, "step": 14159 }, { "epoch": 1.6450769677606738, "grad_norm": 0.6131313443183899, "learning_rate": 0.0001, "loss": 1.5145, "step": 14160 }, { "epoch": 1.6451931455126343, "grad_norm": 0.6260764598846436, "learning_rate": 0.0001, "loss": 1.3354, "step": 14161 }, { "epoch": 1.6453093232645948, "grad_norm": 0.6539373397827148, "learning_rate": 0.0001, "loss": 1.5717, "step": 14162 }, { "epoch": 1.6454255010165553, "grad_norm": 0.655584990978241, "learning_rate": 0.0001, "loss": 1.4643, "step": 14163 }, { "epoch": 1.6455416787685158, "grad_norm": 0.6203681230545044, "learning_rate": 0.0001, "loss": 1.4141, "step": 14164 }, { "epoch": 1.6456578565204762, "grad_norm": 0.5968455672264099, "learning_rate": 0.0001, "loss": 1.4032, "step": 14165 }, { "epoch": 1.6457740342724367, "grad_norm": 0.6745409369468689, "learning_rate": 0.0001, "loss": 1.6126, "step": 14166 }, { "epoch": 1.6458902120243972, "grad_norm": 0.6292099952697754, "learning_rate": 0.0001, "loss": 1.4283, "step": 14167 }, { "epoch": 1.6460063897763577, "grad_norm": 0.5843009948730469, "learning_rate": 0.0001, "loss": 1.343, "step": 14168 }, { "epoch": 1.6461225675283182, "grad_norm": 0.5960466265678406, "learning_rate": 0.0001, "loss": 1.2639, "step": 14169 }, { "epoch": 1.6462387452802787, "grad_norm": 0.6020562648773193, "learning_rate": 0.0001, "loss": 1.2151, "step": 14170 }, { "epoch": 1.6463549230322392, "grad_norm": 0.6152138710021973, "learning_rate": 0.0001, "loss": 1.5185, "step": 14171 }, { "epoch": 1.6464711007841997, "grad_norm": 0.6108376383781433, "learning_rate": 0.0001, "loss": 1.3952, "step": 14172 }, { "epoch": 1.6465872785361602, "grad_norm": 0.6300705075263977, "learning_rate": 0.0001, "loss": 1.3714, "step": 14173 }, { "epoch": 1.6467034562881209, "grad_norm": 0.6430031657218933, "learning_rate": 0.0001, "loss": 1.4847, "step": 14174 }, { "epoch": 1.6468196340400814, "grad_norm": 0.6210929155349731, "learning_rate": 0.0001, "loss": 1.4261, "step": 14175 }, { "epoch": 1.6469358117920418, "grad_norm": 0.6436059474945068, "learning_rate": 0.0001, "loss": 1.4513, "step": 14176 }, { "epoch": 1.6470519895440023, "grad_norm": 0.6212117671966553, "learning_rate": 0.0001, "loss": 1.5061, "step": 14177 }, { "epoch": 1.6471681672959628, "grad_norm": 0.5962768793106079, "learning_rate": 0.0001, "loss": 1.3985, "step": 14178 }, { "epoch": 1.6472843450479233, "grad_norm": 0.6103590130805969, "learning_rate": 0.0001, "loss": 1.4262, "step": 14179 }, { "epoch": 1.6474005227998838, "grad_norm": 0.5891841650009155, "learning_rate": 0.0001, "loss": 1.3377, "step": 14180 }, { "epoch": 1.6475167005518443, "grad_norm": 0.5608420372009277, "learning_rate": 0.0001, "loss": 1.5066, "step": 14181 }, { "epoch": 1.6476328783038048, "grad_norm": 0.6272050738334656, "learning_rate": 0.0001, "loss": 1.5563, "step": 14182 }, { "epoch": 1.6477490560557653, "grad_norm": 0.6144561767578125, "learning_rate": 0.0001, "loss": 1.4184, "step": 14183 }, { "epoch": 1.647865233807726, "grad_norm": 0.6115341186523438, "learning_rate": 0.0001, "loss": 1.5254, "step": 14184 }, { "epoch": 1.6479814115596865, "grad_norm": 0.6426036357879639, "learning_rate": 0.0001, "loss": 1.5782, "step": 14185 }, { "epoch": 1.648097589311647, "grad_norm": 0.6239418983459473, "learning_rate": 0.0001, "loss": 1.5812, "step": 14186 }, { "epoch": 1.6482137670636074, "grad_norm": 0.6172148585319519, "learning_rate": 0.0001, "loss": 1.3512, "step": 14187 }, { "epoch": 1.648329944815568, "grad_norm": 0.6364747285842896, "learning_rate": 0.0001, "loss": 1.4703, "step": 14188 }, { "epoch": 1.6484461225675284, "grad_norm": 0.6305439472198486, "learning_rate": 0.0001, "loss": 1.574, "step": 14189 }, { "epoch": 1.648562300319489, "grad_norm": 0.6381820440292358, "learning_rate": 0.0001, "loss": 1.4599, "step": 14190 }, { "epoch": 1.6486784780714494, "grad_norm": 0.630121111869812, "learning_rate": 0.0001, "loss": 1.4585, "step": 14191 }, { "epoch": 1.64879465582341, "grad_norm": 0.6274852752685547, "learning_rate": 0.0001, "loss": 1.6236, "step": 14192 }, { "epoch": 1.6489108335753704, "grad_norm": 0.6383571624755859, "learning_rate": 0.0001, "loss": 1.4349, "step": 14193 }, { "epoch": 1.6490270113273309, "grad_norm": 0.659835696220398, "learning_rate": 0.0001, "loss": 1.5278, "step": 14194 }, { "epoch": 1.6491431890792914, "grad_norm": 0.5725560188293457, "learning_rate": 0.0001, "loss": 1.3176, "step": 14195 }, { "epoch": 1.6492593668312518, "grad_norm": 0.6228959560394287, "learning_rate": 0.0001, "loss": 1.5462, "step": 14196 }, { "epoch": 1.6493755445832123, "grad_norm": 0.5721942186355591, "learning_rate": 0.0001, "loss": 1.2445, "step": 14197 }, { "epoch": 1.6494917223351728, "grad_norm": 0.6114285588264465, "learning_rate": 0.0001, "loss": 1.4016, "step": 14198 }, { "epoch": 1.6496079000871333, "grad_norm": 0.6663209199905396, "learning_rate": 0.0001, "loss": 1.5092, "step": 14199 }, { "epoch": 1.6497240778390938, "grad_norm": 0.6566654443740845, "learning_rate": 0.0001, "loss": 1.5143, "step": 14200 }, { "epoch": 1.6498402555910543, "grad_norm": 0.622334897518158, "learning_rate": 0.0001, "loss": 1.3745, "step": 14201 }, { "epoch": 1.6499564333430148, "grad_norm": 0.588445246219635, "learning_rate": 0.0001, "loss": 1.3504, "step": 14202 }, { "epoch": 1.6500726110949753, "grad_norm": 0.6347247958183289, "learning_rate": 0.0001, "loss": 1.507, "step": 14203 }, { "epoch": 1.6501887888469358, "grad_norm": 0.6108758449554443, "learning_rate": 0.0001, "loss": 1.4755, "step": 14204 }, { "epoch": 1.6503049665988963, "grad_norm": 0.6030958294868469, "learning_rate": 0.0001, "loss": 1.4607, "step": 14205 }, { "epoch": 1.6504211443508567, "grad_norm": 0.5850770473480225, "learning_rate": 0.0001, "loss": 1.4278, "step": 14206 }, { "epoch": 1.6505373221028172, "grad_norm": 0.6222158074378967, "learning_rate": 0.0001, "loss": 1.3877, "step": 14207 }, { "epoch": 1.6506534998547777, "grad_norm": 0.5812451243400574, "learning_rate": 0.0001, "loss": 1.3789, "step": 14208 }, { "epoch": 1.6507696776067382, "grad_norm": 0.635403573513031, "learning_rate": 0.0001, "loss": 1.4684, "step": 14209 }, { "epoch": 1.6508858553586987, "grad_norm": 0.643618106842041, "learning_rate": 0.0001, "loss": 1.4236, "step": 14210 }, { "epoch": 1.6510020331106592, "grad_norm": 0.6475852131843567, "learning_rate": 0.0001, "loss": 1.4558, "step": 14211 }, { "epoch": 1.6511182108626197, "grad_norm": 0.6677451729774475, "learning_rate": 0.0001, "loss": 1.3397, "step": 14212 }, { "epoch": 1.6512343886145802, "grad_norm": 0.6744155883789062, "learning_rate": 0.0001, "loss": 1.4089, "step": 14213 }, { "epoch": 1.6513505663665407, "grad_norm": 0.6262295842170715, "learning_rate": 0.0001, "loss": 1.4235, "step": 14214 }, { "epoch": 1.6514667441185014, "grad_norm": 0.6039606332778931, "learning_rate": 0.0001, "loss": 1.4584, "step": 14215 }, { "epoch": 1.6515829218704619, "grad_norm": 0.5976904630661011, "learning_rate": 0.0001, "loss": 1.4723, "step": 14216 }, { "epoch": 1.6516990996224223, "grad_norm": 0.5894930958747864, "learning_rate": 0.0001, "loss": 1.509, "step": 14217 }, { "epoch": 1.6518152773743828, "grad_norm": 0.6000151038169861, "learning_rate": 0.0001, "loss": 1.4843, "step": 14218 }, { "epoch": 1.6519314551263433, "grad_norm": 0.6081972718238831, "learning_rate": 0.0001, "loss": 1.5429, "step": 14219 }, { "epoch": 1.6520476328783038, "grad_norm": 0.59024977684021, "learning_rate": 0.0001, "loss": 1.3025, "step": 14220 }, { "epoch": 1.6521638106302643, "grad_norm": 0.6456470489501953, "learning_rate": 0.0001, "loss": 1.5966, "step": 14221 }, { "epoch": 1.6522799883822248, "grad_norm": 0.62722247838974, "learning_rate": 0.0001, "loss": 1.3141, "step": 14222 }, { "epoch": 1.6523961661341853, "grad_norm": 0.5566750168800354, "learning_rate": 0.0001, "loss": 1.3946, "step": 14223 }, { "epoch": 1.6525123438861458, "grad_norm": 0.6187589764595032, "learning_rate": 0.0001, "loss": 1.5082, "step": 14224 }, { "epoch": 1.6526285216381063, "grad_norm": 0.6611448526382446, "learning_rate": 0.0001, "loss": 1.5175, "step": 14225 }, { "epoch": 1.652744699390067, "grad_norm": 0.6645335555076599, "learning_rate": 0.0001, "loss": 1.563, "step": 14226 }, { "epoch": 1.6528608771420275, "grad_norm": 0.6388023495674133, "learning_rate": 0.0001, "loss": 1.4908, "step": 14227 }, { "epoch": 1.652977054893988, "grad_norm": 0.6315258741378784, "learning_rate": 0.0001, "loss": 1.3784, "step": 14228 }, { "epoch": 1.6530932326459484, "grad_norm": 0.6409275531768799, "learning_rate": 0.0001, "loss": 1.3888, "step": 14229 }, { "epoch": 1.653209410397909, "grad_norm": 0.6418370008468628, "learning_rate": 0.0001, "loss": 1.4947, "step": 14230 }, { "epoch": 1.6533255881498694, "grad_norm": 0.62156742811203, "learning_rate": 0.0001, "loss": 1.3787, "step": 14231 }, { "epoch": 1.65344176590183, "grad_norm": 0.6330835223197937, "learning_rate": 0.0001, "loss": 1.5093, "step": 14232 }, { "epoch": 1.6535579436537904, "grad_norm": 0.6426683068275452, "learning_rate": 0.0001, "loss": 1.5612, "step": 14233 }, { "epoch": 1.6536741214057509, "grad_norm": 0.5917912125587463, "learning_rate": 0.0001, "loss": 1.3273, "step": 14234 }, { "epoch": 1.6537902991577114, "grad_norm": 0.6813544034957886, "learning_rate": 0.0001, "loss": 1.5226, "step": 14235 }, { "epoch": 1.6539064769096719, "grad_norm": 0.6685516238212585, "learning_rate": 0.0001, "loss": 1.6258, "step": 14236 }, { "epoch": 1.6540226546616323, "grad_norm": 0.6967253088951111, "learning_rate": 0.0001, "loss": 1.6616, "step": 14237 }, { "epoch": 1.6541388324135928, "grad_norm": 0.6105125546455383, "learning_rate": 0.0001, "loss": 1.4309, "step": 14238 }, { "epoch": 1.6542550101655533, "grad_norm": 0.6094537973403931, "learning_rate": 0.0001, "loss": 1.5657, "step": 14239 }, { "epoch": 1.6543711879175138, "grad_norm": 0.5908210873603821, "learning_rate": 0.0001, "loss": 1.3088, "step": 14240 }, { "epoch": 1.6544873656694743, "grad_norm": 0.6080873012542725, "learning_rate": 0.0001, "loss": 1.2218, "step": 14241 }, { "epoch": 1.6546035434214348, "grad_norm": 0.691250205039978, "learning_rate": 0.0001, "loss": 1.4719, "step": 14242 }, { "epoch": 1.6547197211733953, "grad_norm": 0.6376727223396301, "learning_rate": 0.0001, "loss": 1.4352, "step": 14243 }, { "epoch": 1.6548358989253558, "grad_norm": 0.6390055418014526, "learning_rate": 0.0001, "loss": 1.5822, "step": 14244 }, { "epoch": 1.6549520766773163, "grad_norm": 0.6597655415534973, "learning_rate": 0.0001, "loss": 1.4584, "step": 14245 }, { "epoch": 1.6550682544292767, "grad_norm": 0.6162733435630798, "learning_rate": 0.0001, "loss": 1.5395, "step": 14246 }, { "epoch": 1.6551844321812372, "grad_norm": 0.6516062617301941, "learning_rate": 0.0001, "loss": 1.5161, "step": 14247 }, { "epoch": 1.6553006099331977, "grad_norm": 0.5855607986450195, "learning_rate": 0.0001, "loss": 1.4858, "step": 14248 }, { "epoch": 1.6554167876851582, "grad_norm": 0.619767963886261, "learning_rate": 0.0001, "loss": 1.2785, "step": 14249 }, { "epoch": 1.6555329654371187, "grad_norm": 0.6079853177070618, "learning_rate": 0.0001, "loss": 1.4298, "step": 14250 }, { "epoch": 1.6556491431890792, "grad_norm": 0.6627675294876099, "learning_rate": 0.0001, "loss": 1.6715, "step": 14251 }, { "epoch": 1.6557653209410397, "grad_norm": 0.6213913559913635, "learning_rate": 0.0001, "loss": 1.4026, "step": 14252 }, { "epoch": 1.6558814986930002, "grad_norm": 0.6121188998222351, "learning_rate": 0.0001, "loss": 1.5758, "step": 14253 }, { "epoch": 1.6559976764449607, "grad_norm": 0.6337873339653015, "learning_rate": 0.0001, "loss": 1.3376, "step": 14254 }, { "epoch": 1.6561138541969211, "grad_norm": 0.6308068037033081, "learning_rate": 0.0001, "loss": 1.4225, "step": 14255 }, { "epoch": 1.6562300319488816, "grad_norm": 0.6675533652305603, "learning_rate": 0.0001, "loss": 1.4606, "step": 14256 }, { "epoch": 1.6563462097008423, "grad_norm": 0.6582982540130615, "learning_rate": 0.0001, "loss": 1.5031, "step": 14257 }, { "epoch": 1.6564623874528028, "grad_norm": 0.6784161329269409, "learning_rate": 0.0001, "loss": 1.5038, "step": 14258 }, { "epoch": 1.6565785652047633, "grad_norm": 0.6543930172920227, "learning_rate": 0.0001, "loss": 1.4541, "step": 14259 }, { "epoch": 1.6566947429567238, "grad_norm": 0.6294996738433838, "learning_rate": 0.0001, "loss": 1.542, "step": 14260 }, { "epoch": 1.6568109207086843, "grad_norm": 0.636631965637207, "learning_rate": 0.0001, "loss": 1.3751, "step": 14261 }, { "epoch": 1.6569270984606448, "grad_norm": 0.6085163354873657, "learning_rate": 0.0001, "loss": 1.5421, "step": 14262 }, { "epoch": 1.6570432762126053, "grad_norm": 0.6034590601921082, "learning_rate": 0.0001, "loss": 1.3749, "step": 14263 }, { "epoch": 1.6571594539645658, "grad_norm": 0.6328452825546265, "learning_rate": 0.0001, "loss": 1.4622, "step": 14264 }, { "epoch": 1.6572756317165263, "grad_norm": 0.5842518210411072, "learning_rate": 0.0001, "loss": 1.4548, "step": 14265 }, { "epoch": 1.6573918094684867, "grad_norm": 0.5987873673439026, "learning_rate": 0.0001, "loss": 1.341, "step": 14266 }, { "epoch": 1.6575079872204472, "grad_norm": 0.6453771591186523, "learning_rate": 0.0001, "loss": 1.4977, "step": 14267 }, { "epoch": 1.657624164972408, "grad_norm": 0.5941254496574402, "learning_rate": 0.0001, "loss": 1.4271, "step": 14268 }, { "epoch": 1.6577403427243684, "grad_norm": 0.651914656162262, "learning_rate": 0.0001, "loss": 1.5811, "step": 14269 }, { "epoch": 1.657856520476329, "grad_norm": 0.6220420598983765, "learning_rate": 0.0001, "loss": 1.7155, "step": 14270 }, { "epoch": 1.6579726982282894, "grad_norm": 0.6234059929847717, "learning_rate": 0.0001, "loss": 1.315, "step": 14271 }, { "epoch": 1.65808887598025, "grad_norm": 0.5947203040122986, "learning_rate": 0.0001, "loss": 1.5306, "step": 14272 }, { "epoch": 1.6582050537322104, "grad_norm": 0.5515488386154175, "learning_rate": 0.0001, "loss": 1.3878, "step": 14273 }, { "epoch": 1.6583212314841709, "grad_norm": 0.5935400128364563, "learning_rate": 0.0001, "loss": 1.5038, "step": 14274 }, { "epoch": 1.6584374092361314, "grad_norm": 0.5995380878448486, "learning_rate": 0.0001, "loss": 1.4896, "step": 14275 }, { "epoch": 1.6585535869880919, "grad_norm": 0.7497626543045044, "learning_rate": 0.0001, "loss": 1.6789, "step": 14276 }, { "epoch": 1.6586697647400523, "grad_norm": 0.6365568041801453, "learning_rate": 0.0001, "loss": 1.587, "step": 14277 }, { "epoch": 1.6587859424920128, "grad_norm": 0.6294962763786316, "learning_rate": 0.0001, "loss": 1.4069, "step": 14278 }, { "epoch": 1.6589021202439733, "grad_norm": 0.605728805065155, "learning_rate": 0.0001, "loss": 1.4664, "step": 14279 }, { "epoch": 1.6590182979959338, "grad_norm": 0.5881183743476868, "learning_rate": 0.0001, "loss": 1.3909, "step": 14280 }, { "epoch": 1.6591344757478943, "grad_norm": 0.5991416573524475, "learning_rate": 0.0001, "loss": 1.3881, "step": 14281 }, { "epoch": 1.6592506534998548, "grad_norm": 0.6086195111274719, "learning_rate": 0.0001, "loss": 1.4557, "step": 14282 }, { "epoch": 1.6593668312518153, "grad_norm": 0.6455889344215393, "learning_rate": 0.0001, "loss": 1.5323, "step": 14283 }, { "epoch": 1.6594830090037758, "grad_norm": 0.5819340944290161, "learning_rate": 0.0001, "loss": 1.3684, "step": 14284 }, { "epoch": 1.6595991867557363, "grad_norm": 0.6517034769058228, "learning_rate": 0.0001, "loss": 1.527, "step": 14285 }, { "epoch": 1.6597153645076967, "grad_norm": 0.6496723890304565, "learning_rate": 0.0001, "loss": 1.44, "step": 14286 }, { "epoch": 1.6598315422596572, "grad_norm": 0.6734347343444824, "learning_rate": 0.0001, "loss": 1.4147, "step": 14287 }, { "epoch": 1.6599477200116177, "grad_norm": 0.6803379654884338, "learning_rate": 0.0001, "loss": 1.468, "step": 14288 }, { "epoch": 1.6600638977635782, "grad_norm": 0.6400073766708374, "learning_rate": 0.0001, "loss": 1.6689, "step": 14289 }, { "epoch": 1.6601800755155387, "grad_norm": 0.629406750202179, "learning_rate": 0.0001, "loss": 1.5885, "step": 14290 }, { "epoch": 1.6602962532674992, "grad_norm": 0.5946850180625916, "learning_rate": 0.0001, "loss": 1.3803, "step": 14291 }, { "epoch": 1.6604124310194597, "grad_norm": 0.5918585658073425, "learning_rate": 0.0001, "loss": 1.4861, "step": 14292 }, { "epoch": 1.6605286087714202, "grad_norm": 0.6482305526733398, "learning_rate": 0.0001, "loss": 1.5553, "step": 14293 }, { "epoch": 1.6606447865233807, "grad_norm": 0.670337438583374, "learning_rate": 0.0001, "loss": 1.4408, "step": 14294 }, { "epoch": 1.6607609642753411, "grad_norm": 0.6169812679290771, "learning_rate": 0.0001, "loss": 1.4298, "step": 14295 }, { "epoch": 1.6608771420273016, "grad_norm": 0.6292743682861328, "learning_rate": 0.0001, "loss": 1.4992, "step": 14296 }, { "epoch": 1.6609933197792621, "grad_norm": 0.6713395714759827, "learning_rate": 0.0001, "loss": 1.6314, "step": 14297 }, { "epoch": 1.6611094975312226, "grad_norm": 0.6211674809455872, "learning_rate": 0.0001, "loss": 1.4799, "step": 14298 }, { "epoch": 1.6612256752831833, "grad_norm": 0.6177234649658203, "learning_rate": 0.0001, "loss": 1.3664, "step": 14299 }, { "epoch": 1.6613418530351438, "grad_norm": 0.6477454304695129, "learning_rate": 0.0001, "loss": 1.6223, "step": 14300 }, { "epoch": 1.6614580307871043, "grad_norm": 0.6138026714324951, "learning_rate": 0.0001, "loss": 1.5107, "step": 14301 }, { "epoch": 1.6615742085390648, "grad_norm": 0.5738349556922913, "learning_rate": 0.0001, "loss": 1.4068, "step": 14302 }, { "epoch": 1.6616903862910253, "grad_norm": 0.6114820837974548, "learning_rate": 0.0001, "loss": 1.6262, "step": 14303 }, { "epoch": 1.6618065640429858, "grad_norm": 0.6038328409194946, "learning_rate": 0.0001, "loss": 1.5271, "step": 14304 }, { "epoch": 1.6619227417949463, "grad_norm": 0.5927251577377319, "learning_rate": 0.0001, "loss": 1.3334, "step": 14305 }, { "epoch": 1.6620389195469067, "grad_norm": 0.6357162594795227, "learning_rate": 0.0001, "loss": 1.4604, "step": 14306 }, { "epoch": 1.6621550972988672, "grad_norm": 0.6379348635673523, "learning_rate": 0.0001, "loss": 1.4317, "step": 14307 }, { "epoch": 1.6622712750508277, "grad_norm": 0.6593014001846313, "learning_rate": 0.0001, "loss": 1.6546, "step": 14308 }, { "epoch": 1.6623874528027882, "grad_norm": 0.5913210511207581, "learning_rate": 0.0001, "loss": 1.2818, "step": 14309 }, { "epoch": 1.662503630554749, "grad_norm": 0.614392101764679, "learning_rate": 0.0001, "loss": 1.4769, "step": 14310 }, { "epoch": 1.6626198083067094, "grad_norm": 0.6020192503929138, "learning_rate": 0.0001, "loss": 1.3537, "step": 14311 }, { "epoch": 1.66273598605867, "grad_norm": 0.6114288568496704, "learning_rate": 0.0001, "loss": 1.5126, "step": 14312 }, { "epoch": 1.6628521638106304, "grad_norm": 0.6113401651382446, "learning_rate": 0.0001, "loss": 1.409, "step": 14313 }, { "epoch": 1.6629683415625909, "grad_norm": 0.6201659440994263, "learning_rate": 0.0001, "loss": 1.4371, "step": 14314 }, { "epoch": 1.6630845193145514, "grad_norm": 0.5865699052810669, "learning_rate": 0.0001, "loss": 1.3168, "step": 14315 }, { "epoch": 1.6632006970665119, "grad_norm": 0.6772154569625854, "learning_rate": 0.0001, "loss": 1.5464, "step": 14316 }, { "epoch": 1.6633168748184723, "grad_norm": 0.6310399174690247, "learning_rate": 0.0001, "loss": 1.6112, "step": 14317 }, { "epoch": 1.6634330525704328, "grad_norm": 0.6301345229148865, "learning_rate": 0.0001, "loss": 1.4676, "step": 14318 }, { "epoch": 1.6635492303223933, "grad_norm": 0.6077814102172852, "learning_rate": 0.0001, "loss": 1.3477, "step": 14319 }, { "epoch": 1.6636654080743538, "grad_norm": 0.6315916776657104, "learning_rate": 0.0001, "loss": 1.4851, "step": 14320 }, { "epoch": 1.6637815858263143, "grad_norm": 0.6211605072021484, "learning_rate": 0.0001, "loss": 1.4077, "step": 14321 }, { "epoch": 1.6638977635782748, "grad_norm": 0.6359567642211914, "learning_rate": 0.0001, "loss": 1.3831, "step": 14322 }, { "epoch": 1.6640139413302353, "grad_norm": 0.5998718738555908, "learning_rate": 0.0001, "loss": 1.5675, "step": 14323 }, { "epoch": 1.6641301190821958, "grad_norm": 0.6033626794815063, "learning_rate": 0.0001, "loss": 1.4181, "step": 14324 }, { "epoch": 1.6642462968341563, "grad_norm": 0.5701342821121216, "learning_rate": 0.0001, "loss": 1.3663, "step": 14325 }, { "epoch": 1.6643624745861167, "grad_norm": 0.6412046551704407, "learning_rate": 0.0001, "loss": 1.5007, "step": 14326 }, { "epoch": 1.6644786523380772, "grad_norm": 0.6366773247718811, "learning_rate": 0.0001, "loss": 1.4257, "step": 14327 }, { "epoch": 1.6645948300900377, "grad_norm": 0.6343765258789062, "learning_rate": 0.0001, "loss": 1.4524, "step": 14328 }, { "epoch": 1.6647110078419982, "grad_norm": 0.6956954002380371, "learning_rate": 0.0001, "loss": 1.6713, "step": 14329 }, { "epoch": 1.6648271855939587, "grad_norm": 0.666972279548645, "learning_rate": 0.0001, "loss": 1.5215, "step": 14330 }, { "epoch": 1.6649433633459192, "grad_norm": 0.6471752524375916, "learning_rate": 0.0001, "loss": 1.5754, "step": 14331 }, { "epoch": 1.6650595410978797, "grad_norm": 0.6297165751457214, "learning_rate": 0.0001, "loss": 1.5005, "step": 14332 }, { "epoch": 1.6651757188498402, "grad_norm": 0.6866233944892883, "learning_rate": 0.0001, "loss": 1.4758, "step": 14333 }, { "epoch": 1.6652918966018007, "grad_norm": 0.6554885506629944, "learning_rate": 0.0001, "loss": 1.545, "step": 14334 }, { "epoch": 1.6654080743537611, "grad_norm": 0.6167072653770447, "learning_rate": 0.0001, "loss": 1.4427, "step": 14335 }, { "epoch": 1.6655242521057216, "grad_norm": 0.6200105547904968, "learning_rate": 0.0001, "loss": 1.4971, "step": 14336 }, { "epoch": 1.6656404298576821, "grad_norm": 0.6375522017478943, "learning_rate": 0.0001, "loss": 1.6341, "step": 14337 }, { "epoch": 1.6657566076096426, "grad_norm": 0.6104230880737305, "learning_rate": 0.0001, "loss": 1.5022, "step": 14338 }, { "epoch": 1.665872785361603, "grad_norm": 0.6164869070053101, "learning_rate": 0.0001, "loss": 1.4598, "step": 14339 }, { "epoch": 1.6659889631135636, "grad_norm": 0.6452317833900452, "learning_rate": 0.0001, "loss": 1.4099, "step": 14340 }, { "epoch": 1.6661051408655243, "grad_norm": 0.6302358508110046, "learning_rate": 0.0001, "loss": 1.535, "step": 14341 }, { "epoch": 1.6662213186174848, "grad_norm": 0.6247379183769226, "learning_rate": 0.0001, "loss": 1.5035, "step": 14342 }, { "epoch": 1.6663374963694453, "grad_norm": 0.6550397872924805, "learning_rate": 0.0001, "loss": 1.5491, "step": 14343 }, { "epoch": 1.6664536741214058, "grad_norm": 0.5882265567779541, "learning_rate": 0.0001, "loss": 1.5051, "step": 14344 }, { "epoch": 1.6665698518733663, "grad_norm": 0.7321701049804688, "learning_rate": 0.0001, "loss": 1.2877, "step": 14345 }, { "epoch": 1.6666860296253267, "grad_norm": 0.690442681312561, "learning_rate": 0.0001, "loss": 1.6333, "step": 14346 }, { "epoch": 1.6668022073772872, "grad_norm": 0.6370790004730225, "learning_rate": 0.0001, "loss": 1.5198, "step": 14347 }, { "epoch": 1.6669183851292477, "grad_norm": 0.5971081256866455, "learning_rate": 0.0001, "loss": 1.3959, "step": 14348 }, { "epoch": 1.6670345628812082, "grad_norm": 0.6645078659057617, "learning_rate": 0.0001, "loss": 1.4953, "step": 14349 }, { "epoch": 1.6671507406331687, "grad_norm": 0.6524665355682373, "learning_rate": 0.0001, "loss": 1.5207, "step": 14350 }, { "epoch": 1.6672669183851292, "grad_norm": 0.6136947274208069, "learning_rate": 0.0001, "loss": 1.4084, "step": 14351 }, { "epoch": 1.66738309613709, "grad_norm": 0.689610481262207, "learning_rate": 0.0001, "loss": 1.6417, "step": 14352 }, { "epoch": 1.6674992738890504, "grad_norm": 0.645634114742279, "learning_rate": 0.0001, "loss": 1.6085, "step": 14353 }, { "epoch": 1.6676154516410109, "grad_norm": 0.5941215753555298, "learning_rate": 0.0001, "loss": 1.3769, "step": 14354 }, { "epoch": 1.6677316293929714, "grad_norm": 0.6498944163322449, "learning_rate": 0.0001, "loss": 1.5569, "step": 14355 }, { "epoch": 1.6678478071449319, "grad_norm": 0.5884348154067993, "learning_rate": 0.0001, "loss": 1.4579, "step": 14356 }, { "epoch": 1.6679639848968923, "grad_norm": 0.5931845307350159, "learning_rate": 0.0001, "loss": 1.3808, "step": 14357 }, { "epoch": 1.6680801626488528, "grad_norm": 0.647567093372345, "learning_rate": 0.0001, "loss": 1.4115, "step": 14358 }, { "epoch": 1.6681963404008133, "grad_norm": 0.654813826084137, "learning_rate": 0.0001, "loss": 1.5908, "step": 14359 }, { "epoch": 1.6683125181527738, "grad_norm": 0.6201080679893494, "learning_rate": 0.0001, "loss": 1.4361, "step": 14360 }, { "epoch": 1.6684286959047343, "grad_norm": 0.6302205324172974, "learning_rate": 0.0001, "loss": 1.5178, "step": 14361 }, { "epoch": 1.6685448736566948, "grad_norm": 0.6126688122749329, "learning_rate": 0.0001, "loss": 1.3765, "step": 14362 }, { "epoch": 1.6686610514086553, "grad_norm": 0.673390805721283, "learning_rate": 0.0001, "loss": 1.6705, "step": 14363 }, { "epoch": 1.6687772291606158, "grad_norm": 0.6542450189590454, "learning_rate": 0.0001, "loss": 1.4918, "step": 14364 }, { "epoch": 1.6688934069125763, "grad_norm": 0.6602783799171448, "learning_rate": 0.0001, "loss": 1.3765, "step": 14365 }, { "epoch": 1.6690095846645367, "grad_norm": 0.595958948135376, "learning_rate": 0.0001, "loss": 1.5278, "step": 14366 }, { "epoch": 1.6691257624164972, "grad_norm": 0.5857497453689575, "learning_rate": 0.0001, "loss": 1.431, "step": 14367 }, { "epoch": 1.6692419401684577, "grad_norm": 0.5567007064819336, "learning_rate": 0.0001, "loss": 1.3577, "step": 14368 }, { "epoch": 1.6693581179204182, "grad_norm": 0.5964413285255432, "learning_rate": 0.0001, "loss": 1.4436, "step": 14369 }, { "epoch": 1.6694742956723787, "grad_norm": 0.6263625025749207, "learning_rate": 0.0001, "loss": 1.5634, "step": 14370 }, { "epoch": 1.6695904734243392, "grad_norm": 0.595973551273346, "learning_rate": 0.0001, "loss": 1.4063, "step": 14371 }, { "epoch": 1.6697066511762997, "grad_norm": 0.6597710847854614, "learning_rate": 0.0001, "loss": 1.6195, "step": 14372 }, { "epoch": 1.6698228289282602, "grad_norm": 0.6427208185195923, "learning_rate": 0.0001, "loss": 1.4949, "step": 14373 }, { "epoch": 1.6699390066802207, "grad_norm": 0.6536876559257507, "learning_rate": 0.0001, "loss": 1.3513, "step": 14374 }, { "epoch": 1.6700551844321812, "grad_norm": 0.6567574739456177, "learning_rate": 0.0001, "loss": 1.5253, "step": 14375 }, { "epoch": 1.6701713621841416, "grad_norm": 0.6249970197677612, "learning_rate": 0.0001, "loss": 1.4188, "step": 14376 }, { "epoch": 1.6702875399361021, "grad_norm": 0.6292693614959717, "learning_rate": 0.0001, "loss": 1.4584, "step": 14377 }, { "epoch": 1.6704037176880626, "grad_norm": 0.5975127816200256, "learning_rate": 0.0001, "loss": 1.437, "step": 14378 }, { "epoch": 1.670519895440023, "grad_norm": 0.6499278545379639, "learning_rate": 0.0001, "loss": 1.3752, "step": 14379 }, { "epoch": 1.6706360731919836, "grad_norm": 0.6347719430923462, "learning_rate": 0.0001, "loss": 1.5053, "step": 14380 }, { "epoch": 1.670752250943944, "grad_norm": 0.6124584078788757, "learning_rate": 0.0001, "loss": 1.5064, "step": 14381 }, { "epoch": 1.6708684286959046, "grad_norm": 0.586243748664856, "learning_rate": 0.0001, "loss": 1.4781, "step": 14382 }, { "epoch": 1.6709846064478653, "grad_norm": 0.6304342150688171, "learning_rate": 0.0001, "loss": 1.573, "step": 14383 }, { "epoch": 1.6711007841998258, "grad_norm": 0.6746019721031189, "learning_rate": 0.0001, "loss": 1.4781, "step": 14384 }, { "epoch": 1.6712169619517863, "grad_norm": 0.6391258835792542, "learning_rate": 0.0001, "loss": 1.5186, "step": 14385 }, { "epoch": 1.6713331397037468, "grad_norm": 0.6446275115013123, "learning_rate": 0.0001, "loss": 1.3664, "step": 14386 }, { "epoch": 1.6714493174557072, "grad_norm": 0.5888086557388306, "learning_rate": 0.0001, "loss": 1.3029, "step": 14387 }, { "epoch": 1.6715654952076677, "grad_norm": 0.613270103931427, "learning_rate": 0.0001, "loss": 1.425, "step": 14388 }, { "epoch": 1.6716816729596282, "grad_norm": 0.5924221277236938, "learning_rate": 0.0001, "loss": 1.4812, "step": 14389 }, { "epoch": 1.6717978507115887, "grad_norm": 0.5716918706893921, "learning_rate": 0.0001, "loss": 1.3345, "step": 14390 }, { "epoch": 1.6719140284635492, "grad_norm": 0.6300333142280579, "learning_rate": 0.0001, "loss": 1.4296, "step": 14391 }, { "epoch": 1.6720302062155097, "grad_norm": 0.6733264327049255, "learning_rate": 0.0001, "loss": 1.4657, "step": 14392 }, { "epoch": 1.6721463839674704, "grad_norm": 0.5939809083938599, "learning_rate": 0.0001, "loss": 1.3232, "step": 14393 }, { "epoch": 1.6722625617194309, "grad_norm": 0.5994482040405273, "learning_rate": 0.0001, "loss": 1.4336, "step": 14394 }, { "epoch": 1.6723787394713914, "grad_norm": 0.6925269961357117, "learning_rate": 0.0001, "loss": 1.3146, "step": 14395 }, { "epoch": 1.6724949172233519, "grad_norm": 0.5802856683731079, "learning_rate": 0.0001, "loss": 1.2919, "step": 14396 }, { "epoch": 1.6726110949753124, "grad_norm": 0.6189543008804321, "learning_rate": 0.0001, "loss": 1.4859, "step": 14397 }, { "epoch": 1.6727272727272728, "grad_norm": 0.6009461283683777, "learning_rate": 0.0001, "loss": 1.3418, "step": 14398 }, { "epoch": 1.6728434504792333, "grad_norm": 0.6374339461326599, "learning_rate": 0.0001, "loss": 1.5892, "step": 14399 }, { "epoch": 1.6729596282311938, "grad_norm": 0.6215534806251526, "learning_rate": 0.0001, "loss": 1.5573, "step": 14400 }, { "epoch": 1.6730758059831543, "grad_norm": 0.6279752254486084, "learning_rate": 0.0001, "loss": 1.4585, "step": 14401 }, { "epoch": 1.6731919837351148, "grad_norm": 0.6301735639572144, "learning_rate": 0.0001, "loss": 1.3269, "step": 14402 }, { "epoch": 1.6733081614870753, "grad_norm": 0.6672055125236511, "learning_rate": 0.0001, "loss": 1.4341, "step": 14403 }, { "epoch": 1.6734243392390358, "grad_norm": 0.6005990505218506, "learning_rate": 0.0001, "loss": 1.4152, "step": 14404 }, { "epoch": 1.6735405169909963, "grad_norm": 0.6211845874786377, "learning_rate": 0.0001, "loss": 1.5257, "step": 14405 }, { "epoch": 1.6736566947429568, "grad_norm": 0.6211841702461243, "learning_rate": 0.0001, "loss": 1.4342, "step": 14406 }, { "epoch": 1.6737728724949172, "grad_norm": 0.5766007900238037, "learning_rate": 0.0001, "loss": 1.5273, "step": 14407 }, { "epoch": 1.6738890502468777, "grad_norm": 0.5810567736625671, "learning_rate": 0.0001, "loss": 1.562, "step": 14408 }, { "epoch": 1.6740052279988382, "grad_norm": 0.6212894916534424, "learning_rate": 0.0001, "loss": 1.4144, "step": 14409 }, { "epoch": 1.6741214057507987, "grad_norm": 0.6278946399688721, "learning_rate": 0.0001, "loss": 1.3985, "step": 14410 }, { "epoch": 1.6742375835027592, "grad_norm": 0.7318649291992188, "learning_rate": 0.0001, "loss": 1.5434, "step": 14411 }, { "epoch": 1.6743537612547197, "grad_norm": 0.6377938985824585, "learning_rate": 0.0001, "loss": 1.4921, "step": 14412 }, { "epoch": 1.6744699390066802, "grad_norm": 0.6529183387756348, "learning_rate": 0.0001, "loss": 1.5631, "step": 14413 }, { "epoch": 1.6745861167586407, "grad_norm": 0.6350656747817993, "learning_rate": 0.0001, "loss": 1.5096, "step": 14414 }, { "epoch": 1.6747022945106012, "grad_norm": 0.6154967546463013, "learning_rate": 0.0001, "loss": 1.347, "step": 14415 }, { "epoch": 1.6748184722625616, "grad_norm": 0.6126163601875305, "learning_rate": 0.0001, "loss": 1.3978, "step": 14416 }, { "epoch": 1.6749346500145221, "grad_norm": 0.5958592891693115, "learning_rate": 0.0001, "loss": 1.3257, "step": 14417 }, { "epoch": 1.6750508277664826, "grad_norm": 0.5840135812759399, "learning_rate": 0.0001, "loss": 1.3233, "step": 14418 }, { "epoch": 1.675167005518443, "grad_norm": 0.6193830370903015, "learning_rate": 0.0001, "loss": 1.2989, "step": 14419 }, { "epoch": 1.6752831832704036, "grad_norm": 0.647566020488739, "learning_rate": 0.0001, "loss": 1.5286, "step": 14420 }, { "epoch": 1.675399361022364, "grad_norm": 0.6288964748382568, "learning_rate": 0.0001, "loss": 1.5184, "step": 14421 }, { "epoch": 1.6755155387743246, "grad_norm": 0.6111547350883484, "learning_rate": 0.0001, "loss": 1.4075, "step": 14422 }, { "epoch": 1.675631716526285, "grad_norm": 0.6194759607315063, "learning_rate": 0.0001, "loss": 1.605, "step": 14423 }, { "epoch": 1.6757478942782456, "grad_norm": 0.6172354221343994, "learning_rate": 0.0001, "loss": 1.6095, "step": 14424 }, { "epoch": 1.6758640720302063, "grad_norm": 0.6327816247940063, "learning_rate": 0.0001, "loss": 1.4344, "step": 14425 }, { "epoch": 1.6759802497821668, "grad_norm": 0.6195693612098694, "learning_rate": 0.0001, "loss": 1.331, "step": 14426 }, { "epoch": 1.6760964275341272, "grad_norm": 0.6367574334144592, "learning_rate": 0.0001, "loss": 1.5253, "step": 14427 }, { "epoch": 1.6762126052860877, "grad_norm": 0.6310869455337524, "learning_rate": 0.0001, "loss": 1.5844, "step": 14428 }, { "epoch": 1.6763287830380482, "grad_norm": 0.6287874579429626, "learning_rate": 0.0001, "loss": 1.5016, "step": 14429 }, { "epoch": 1.6764449607900087, "grad_norm": 0.6596211194992065, "learning_rate": 0.0001, "loss": 1.5228, "step": 14430 }, { "epoch": 1.6765611385419692, "grad_norm": 0.5899932980537415, "learning_rate": 0.0001, "loss": 1.4023, "step": 14431 }, { "epoch": 1.6766773162939297, "grad_norm": 0.6191056370735168, "learning_rate": 0.0001, "loss": 1.3938, "step": 14432 }, { "epoch": 1.6767934940458902, "grad_norm": 0.5562541484832764, "learning_rate": 0.0001, "loss": 1.361, "step": 14433 }, { "epoch": 1.6769096717978507, "grad_norm": 0.5942580103874207, "learning_rate": 0.0001, "loss": 1.3855, "step": 14434 }, { "epoch": 1.6770258495498114, "grad_norm": 0.6190808415412903, "learning_rate": 0.0001, "loss": 1.5886, "step": 14435 }, { "epoch": 1.6771420273017719, "grad_norm": 0.6279035806655884, "learning_rate": 0.0001, "loss": 1.3705, "step": 14436 }, { "epoch": 1.6772582050537324, "grad_norm": 0.7129959464073181, "learning_rate": 0.0001, "loss": 1.4056, "step": 14437 }, { "epoch": 1.6773743828056928, "grad_norm": 0.5795397162437439, "learning_rate": 0.0001, "loss": 1.315, "step": 14438 }, { "epoch": 1.6774905605576533, "grad_norm": 0.6081341505050659, "learning_rate": 0.0001, "loss": 1.4835, "step": 14439 }, { "epoch": 1.6776067383096138, "grad_norm": 0.6048169136047363, "learning_rate": 0.0001, "loss": 1.3653, "step": 14440 }, { "epoch": 1.6777229160615743, "grad_norm": 0.5845487117767334, "learning_rate": 0.0001, "loss": 1.2882, "step": 14441 }, { "epoch": 1.6778390938135348, "grad_norm": 0.6477097868919373, "learning_rate": 0.0001, "loss": 1.4798, "step": 14442 }, { "epoch": 1.6779552715654953, "grad_norm": 0.5940166711807251, "learning_rate": 0.0001, "loss": 1.5485, "step": 14443 }, { "epoch": 1.6780714493174558, "grad_norm": 0.5957334637641907, "learning_rate": 0.0001, "loss": 1.5316, "step": 14444 }, { "epoch": 1.6781876270694163, "grad_norm": 0.5919507741928101, "learning_rate": 0.0001, "loss": 1.3858, "step": 14445 }, { "epoch": 1.6783038048213768, "grad_norm": 0.6454141736030579, "learning_rate": 0.0001, "loss": 1.5197, "step": 14446 }, { "epoch": 1.6784199825733372, "grad_norm": 0.5964515805244446, "learning_rate": 0.0001, "loss": 1.5337, "step": 14447 }, { "epoch": 1.6785361603252977, "grad_norm": 0.5786939859390259, "learning_rate": 0.0001, "loss": 1.3801, "step": 14448 }, { "epoch": 1.6786523380772582, "grad_norm": 0.6106777191162109, "learning_rate": 0.0001, "loss": 1.4501, "step": 14449 }, { "epoch": 1.6787685158292187, "grad_norm": 0.6513943076133728, "learning_rate": 0.0001, "loss": 1.5596, "step": 14450 }, { "epoch": 1.6788846935811792, "grad_norm": 0.606070339679718, "learning_rate": 0.0001, "loss": 1.5155, "step": 14451 }, { "epoch": 1.6790008713331397, "grad_norm": 0.5917849540710449, "learning_rate": 0.0001, "loss": 1.4776, "step": 14452 }, { "epoch": 1.6791170490851002, "grad_norm": 0.6083816885948181, "learning_rate": 0.0001, "loss": 1.5489, "step": 14453 }, { "epoch": 1.6792332268370607, "grad_norm": 0.5685683488845825, "learning_rate": 0.0001, "loss": 1.4569, "step": 14454 }, { "epoch": 1.6793494045890212, "grad_norm": 0.578198254108429, "learning_rate": 0.0001, "loss": 1.4338, "step": 14455 }, { "epoch": 1.6794655823409816, "grad_norm": 0.57418292760849, "learning_rate": 0.0001, "loss": 1.2322, "step": 14456 }, { "epoch": 1.6795817600929421, "grad_norm": 0.6263592839241028, "learning_rate": 0.0001, "loss": 1.4322, "step": 14457 }, { "epoch": 1.6796979378449026, "grad_norm": 0.6080293655395508, "learning_rate": 0.0001, "loss": 1.4922, "step": 14458 }, { "epoch": 1.6798141155968631, "grad_norm": 0.6497699618339539, "learning_rate": 0.0001, "loss": 1.6174, "step": 14459 }, { "epoch": 1.6799302933488236, "grad_norm": 0.655951201915741, "learning_rate": 0.0001, "loss": 1.4868, "step": 14460 }, { "epoch": 1.680046471100784, "grad_norm": 0.7133967280387878, "learning_rate": 0.0001, "loss": 1.7067, "step": 14461 }, { "epoch": 1.6801626488527446, "grad_norm": 0.6153315901756287, "learning_rate": 0.0001, "loss": 1.2878, "step": 14462 }, { "epoch": 1.680278826604705, "grad_norm": 0.6728612184524536, "learning_rate": 0.0001, "loss": 1.5437, "step": 14463 }, { "epoch": 1.6803950043566656, "grad_norm": 0.6321450471878052, "learning_rate": 0.0001, "loss": 1.578, "step": 14464 }, { "epoch": 1.680511182108626, "grad_norm": 0.597470223903656, "learning_rate": 0.0001, "loss": 1.3383, "step": 14465 }, { "epoch": 1.6806273598605865, "grad_norm": 0.6447314620018005, "learning_rate": 0.0001, "loss": 1.5533, "step": 14466 }, { "epoch": 1.6807435376125472, "grad_norm": 0.5593234896659851, "learning_rate": 0.0001, "loss": 1.3489, "step": 14467 }, { "epoch": 1.6808597153645077, "grad_norm": 0.601919949054718, "learning_rate": 0.0001, "loss": 1.4423, "step": 14468 }, { "epoch": 1.6809758931164682, "grad_norm": 0.6480042934417725, "learning_rate": 0.0001, "loss": 1.6247, "step": 14469 }, { "epoch": 1.6810920708684287, "grad_norm": 0.6055094599723816, "learning_rate": 0.0001, "loss": 1.5045, "step": 14470 }, { "epoch": 1.6812082486203892, "grad_norm": 0.634349524974823, "learning_rate": 0.0001, "loss": 1.3836, "step": 14471 }, { "epoch": 1.6813244263723497, "grad_norm": 0.6338309049606323, "learning_rate": 0.0001, "loss": 1.5357, "step": 14472 }, { "epoch": 1.6814406041243102, "grad_norm": 0.6654948592185974, "learning_rate": 0.0001, "loss": 1.7227, "step": 14473 }, { "epoch": 1.6815567818762707, "grad_norm": 0.5978288650512695, "learning_rate": 0.0001, "loss": 1.3984, "step": 14474 }, { "epoch": 1.6816729596282312, "grad_norm": 0.5887945890426636, "learning_rate": 0.0001, "loss": 1.3474, "step": 14475 }, { "epoch": 1.6817891373801916, "grad_norm": 0.595877468585968, "learning_rate": 0.0001, "loss": 1.5483, "step": 14476 }, { "epoch": 1.6819053151321524, "grad_norm": 0.5978279113769531, "learning_rate": 0.0001, "loss": 1.209, "step": 14477 }, { "epoch": 1.6820214928841128, "grad_norm": 0.5670945048332214, "learning_rate": 0.0001, "loss": 1.3614, "step": 14478 }, { "epoch": 1.6821376706360733, "grad_norm": 0.6024394035339355, "learning_rate": 0.0001, "loss": 1.3852, "step": 14479 }, { "epoch": 1.6822538483880338, "grad_norm": 0.6266058087348938, "learning_rate": 0.0001, "loss": 1.4178, "step": 14480 }, { "epoch": 1.6823700261399943, "grad_norm": 0.5809116959571838, "learning_rate": 0.0001, "loss": 1.5169, "step": 14481 }, { "epoch": 1.6824862038919548, "grad_norm": 0.6381387710571289, "learning_rate": 0.0001, "loss": 1.367, "step": 14482 }, { "epoch": 1.6826023816439153, "grad_norm": 0.635148286819458, "learning_rate": 0.0001, "loss": 1.5516, "step": 14483 }, { "epoch": 1.6827185593958758, "grad_norm": 0.649366021156311, "learning_rate": 0.0001, "loss": 1.5219, "step": 14484 }, { "epoch": 1.6828347371478363, "grad_norm": 0.5853772163391113, "learning_rate": 0.0001, "loss": 1.3994, "step": 14485 }, { "epoch": 1.6829509148997968, "grad_norm": 0.604891300201416, "learning_rate": 0.0001, "loss": 1.5446, "step": 14486 }, { "epoch": 1.6830670926517572, "grad_norm": 0.6487552523612976, "learning_rate": 0.0001, "loss": 1.4734, "step": 14487 }, { "epoch": 1.6831832704037177, "grad_norm": 0.6208043098449707, "learning_rate": 0.0001, "loss": 1.4336, "step": 14488 }, { "epoch": 1.6832994481556782, "grad_norm": 0.5875146389007568, "learning_rate": 0.0001, "loss": 1.4897, "step": 14489 }, { "epoch": 1.6834156259076387, "grad_norm": 0.6026572585105896, "learning_rate": 0.0001, "loss": 1.4709, "step": 14490 }, { "epoch": 1.6835318036595992, "grad_norm": 0.6388230919837952, "learning_rate": 0.0001, "loss": 1.622, "step": 14491 }, { "epoch": 1.6836479814115597, "grad_norm": 0.6272510290145874, "learning_rate": 0.0001, "loss": 1.3593, "step": 14492 }, { "epoch": 1.6837641591635202, "grad_norm": 0.6031495928764343, "learning_rate": 0.0001, "loss": 1.3271, "step": 14493 }, { "epoch": 1.6838803369154807, "grad_norm": 0.6953699588775635, "learning_rate": 0.0001, "loss": 1.5505, "step": 14494 }, { "epoch": 1.6839965146674412, "grad_norm": 0.6376732587814331, "learning_rate": 0.0001, "loss": 1.5482, "step": 14495 }, { "epoch": 1.6841126924194016, "grad_norm": 0.6579051613807678, "learning_rate": 0.0001, "loss": 1.3625, "step": 14496 }, { "epoch": 1.6842288701713621, "grad_norm": 0.6580008864402771, "learning_rate": 0.0001, "loss": 1.5499, "step": 14497 }, { "epoch": 1.6843450479233226, "grad_norm": 0.6870502233505249, "learning_rate": 0.0001, "loss": 1.5442, "step": 14498 }, { "epoch": 1.6844612256752831, "grad_norm": 0.5963420271873474, "learning_rate": 0.0001, "loss": 1.5402, "step": 14499 }, { "epoch": 1.6845774034272436, "grad_norm": 0.5947056412696838, "learning_rate": 0.0001, "loss": 1.3172, "step": 14500 }, { "epoch": 1.684693581179204, "grad_norm": 0.6432857513427734, "learning_rate": 0.0001, "loss": 1.4623, "step": 14501 }, { "epoch": 1.6848097589311646, "grad_norm": 0.6341150999069214, "learning_rate": 0.0001, "loss": 1.5529, "step": 14502 }, { "epoch": 1.684925936683125, "grad_norm": 0.5731436610221863, "learning_rate": 0.0001, "loss": 1.3317, "step": 14503 }, { "epoch": 1.6850421144350856, "grad_norm": 0.6460700631141663, "learning_rate": 0.0001, "loss": 1.4949, "step": 14504 }, { "epoch": 1.685158292187046, "grad_norm": 0.6254189610481262, "learning_rate": 0.0001, "loss": 1.409, "step": 14505 }, { "epoch": 1.6852744699390065, "grad_norm": 0.6591849327087402, "learning_rate": 0.0001, "loss": 1.4723, "step": 14506 }, { "epoch": 1.685390647690967, "grad_norm": 0.6324242949485779, "learning_rate": 0.0001, "loss": 1.5383, "step": 14507 }, { "epoch": 1.6855068254429275, "grad_norm": 0.5943838357925415, "learning_rate": 0.0001, "loss": 1.3313, "step": 14508 }, { "epoch": 1.6856230031948882, "grad_norm": 0.5703464150428772, "learning_rate": 0.0001, "loss": 1.2683, "step": 14509 }, { "epoch": 1.6857391809468487, "grad_norm": 0.6419578790664673, "learning_rate": 0.0001, "loss": 1.459, "step": 14510 }, { "epoch": 1.6858553586988092, "grad_norm": 0.6053417921066284, "learning_rate": 0.0001, "loss": 1.504, "step": 14511 }, { "epoch": 1.6859715364507697, "grad_norm": 0.5826777219772339, "learning_rate": 0.0001, "loss": 1.3442, "step": 14512 }, { "epoch": 1.6860877142027302, "grad_norm": 0.5953052639961243, "learning_rate": 0.0001, "loss": 1.4257, "step": 14513 }, { "epoch": 1.6862038919546907, "grad_norm": 0.6599135398864746, "learning_rate": 0.0001, "loss": 1.4593, "step": 14514 }, { "epoch": 1.6863200697066512, "grad_norm": 0.6383476853370667, "learning_rate": 0.0001, "loss": 1.4106, "step": 14515 }, { "epoch": 1.6864362474586116, "grad_norm": 0.6103217005729675, "learning_rate": 0.0001, "loss": 1.5003, "step": 14516 }, { "epoch": 1.6865524252105721, "grad_norm": 0.6277769207954407, "learning_rate": 0.0001, "loss": 1.3357, "step": 14517 }, { "epoch": 1.6866686029625326, "grad_norm": 0.6884340047836304, "learning_rate": 0.0001, "loss": 1.4431, "step": 14518 }, { "epoch": 1.6867847807144933, "grad_norm": 0.6325324177742004, "learning_rate": 0.0001, "loss": 1.3521, "step": 14519 }, { "epoch": 1.6869009584664538, "grad_norm": 0.609632134437561, "learning_rate": 0.0001, "loss": 1.3016, "step": 14520 }, { "epoch": 1.6870171362184143, "grad_norm": 0.7071484923362732, "learning_rate": 0.0001, "loss": 1.6922, "step": 14521 }, { "epoch": 1.6871333139703748, "grad_norm": 0.5926123857498169, "learning_rate": 0.0001, "loss": 1.4296, "step": 14522 }, { "epoch": 1.6872494917223353, "grad_norm": 0.6333919167518616, "learning_rate": 0.0001, "loss": 1.4905, "step": 14523 }, { "epoch": 1.6873656694742958, "grad_norm": 0.6357964873313904, "learning_rate": 0.0001, "loss": 1.5479, "step": 14524 }, { "epoch": 1.6874818472262563, "grad_norm": 0.5926110148429871, "learning_rate": 0.0001, "loss": 1.5907, "step": 14525 }, { "epoch": 1.6875980249782168, "grad_norm": 0.5912199020385742, "learning_rate": 0.0001, "loss": 1.3935, "step": 14526 }, { "epoch": 1.6877142027301772, "grad_norm": 0.6824315190315247, "learning_rate": 0.0001, "loss": 1.6578, "step": 14527 }, { "epoch": 1.6878303804821377, "grad_norm": 0.6213096380233765, "learning_rate": 0.0001, "loss": 1.477, "step": 14528 }, { "epoch": 1.6879465582340982, "grad_norm": 0.6405655741691589, "learning_rate": 0.0001, "loss": 1.3823, "step": 14529 }, { "epoch": 1.6880627359860587, "grad_norm": 0.658589780330658, "learning_rate": 0.0001, "loss": 1.4905, "step": 14530 }, { "epoch": 1.6881789137380192, "grad_norm": 0.6120070219039917, "learning_rate": 0.0001, "loss": 1.3881, "step": 14531 }, { "epoch": 1.6882950914899797, "grad_norm": 0.6297619938850403, "learning_rate": 0.0001, "loss": 1.3099, "step": 14532 }, { "epoch": 1.6884112692419402, "grad_norm": 0.5902721285820007, "learning_rate": 0.0001, "loss": 1.3845, "step": 14533 }, { "epoch": 1.6885274469939007, "grad_norm": 0.604314923286438, "learning_rate": 0.0001, "loss": 1.4333, "step": 14534 }, { "epoch": 1.6886436247458612, "grad_norm": 0.6077500581741333, "learning_rate": 0.0001, "loss": 1.4104, "step": 14535 }, { "epoch": 1.6887598024978216, "grad_norm": 0.5967190861701965, "learning_rate": 0.0001, "loss": 1.3915, "step": 14536 }, { "epoch": 1.6888759802497821, "grad_norm": 0.6104722023010254, "learning_rate": 0.0001, "loss": 1.3176, "step": 14537 }, { "epoch": 1.6889921580017426, "grad_norm": 0.6314585208892822, "learning_rate": 0.0001, "loss": 1.5521, "step": 14538 }, { "epoch": 1.6891083357537031, "grad_norm": 0.6697160601615906, "learning_rate": 0.0001, "loss": 1.5445, "step": 14539 }, { "epoch": 1.6892245135056636, "grad_norm": 0.6182875633239746, "learning_rate": 0.0001, "loss": 1.5825, "step": 14540 }, { "epoch": 1.689340691257624, "grad_norm": 0.6394671201705933, "learning_rate": 0.0001, "loss": 1.4096, "step": 14541 }, { "epoch": 1.6894568690095846, "grad_norm": 0.68181973695755, "learning_rate": 0.0001, "loss": 1.4496, "step": 14542 }, { "epoch": 1.689573046761545, "grad_norm": 0.583557665348053, "learning_rate": 0.0001, "loss": 1.2828, "step": 14543 }, { "epoch": 1.6896892245135056, "grad_norm": 0.6751927137374878, "learning_rate": 0.0001, "loss": 1.6908, "step": 14544 }, { "epoch": 1.689805402265466, "grad_norm": 0.6294887065887451, "learning_rate": 0.0001, "loss": 1.4344, "step": 14545 }, { "epoch": 1.6899215800174265, "grad_norm": 0.6275957822799683, "learning_rate": 0.0001, "loss": 1.5149, "step": 14546 }, { "epoch": 1.690037757769387, "grad_norm": 0.6664263010025024, "learning_rate": 0.0001, "loss": 1.6413, "step": 14547 }, { "epoch": 1.6901539355213475, "grad_norm": 0.5797239542007446, "learning_rate": 0.0001, "loss": 1.4193, "step": 14548 }, { "epoch": 1.690270113273308, "grad_norm": 0.6231329441070557, "learning_rate": 0.0001, "loss": 1.292, "step": 14549 }, { "epoch": 1.6903862910252685, "grad_norm": 0.6177236437797546, "learning_rate": 0.0001, "loss": 1.5413, "step": 14550 }, { "epoch": 1.6905024687772292, "grad_norm": 0.6350299715995789, "learning_rate": 0.0001, "loss": 1.4295, "step": 14551 }, { "epoch": 1.6906186465291897, "grad_norm": 0.6407362222671509, "learning_rate": 0.0001, "loss": 1.5018, "step": 14552 }, { "epoch": 1.6907348242811502, "grad_norm": 0.6423758268356323, "learning_rate": 0.0001, "loss": 1.4251, "step": 14553 }, { "epoch": 1.6908510020331107, "grad_norm": 0.6132572889328003, "learning_rate": 0.0001, "loss": 1.4823, "step": 14554 }, { "epoch": 1.6909671797850712, "grad_norm": 0.5959659814834595, "learning_rate": 0.0001, "loss": 1.3683, "step": 14555 }, { "epoch": 1.6910833575370317, "grad_norm": 0.6442015767097473, "learning_rate": 0.0001, "loss": 1.3915, "step": 14556 }, { "epoch": 1.6911995352889921, "grad_norm": 0.66871178150177, "learning_rate": 0.0001, "loss": 1.4989, "step": 14557 }, { "epoch": 1.6913157130409526, "grad_norm": 0.6081305146217346, "learning_rate": 0.0001, "loss": 1.5511, "step": 14558 }, { "epoch": 1.6914318907929131, "grad_norm": 0.6531521081924438, "learning_rate": 0.0001, "loss": 1.2818, "step": 14559 }, { "epoch": 1.6915480685448736, "grad_norm": 0.5845082998275757, "learning_rate": 0.0001, "loss": 1.4252, "step": 14560 }, { "epoch": 1.6916642462968343, "grad_norm": 0.6385669708251953, "learning_rate": 0.0001, "loss": 1.4509, "step": 14561 }, { "epoch": 1.6917804240487948, "grad_norm": 0.6152339577674866, "learning_rate": 0.0001, "loss": 1.4737, "step": 14562 }, { "epoch": 1.6918966018007553, "grad_norm": 0.6045849323272705, "learning_rate": 0.0001, "loss": 1.3673, "step": 14563 }, { "epoch": 1.6920127795527158, "grad_norm": 0.6111593246459961, "learning_rate": 0.0001, "loss": 1.4622, "step": 14564 }, { "epoch": 1.6921289573046763, "grad_norm": 0.6448667645454407, "learning_rate": 0.0001, "loss": 1.4205, "step": 14565 }, { "epoch": 1.6922451350566368, "grad_norm": 0.5893568396568298, "learning_rate": 0.0001, "loss": 1.3223, "step": 14566 }, { "epoch": 1.6923613128085973, "grad_norm": 0.6245533227920532, "learning_rate": 0.0001, "loss": 1.3715, "step": 14567 }, { "epoch": 1.6924774905605577, "grad_norm": 0.6759933233261108, "learning_rate": 0.0001, "loss": 1.7051, "step": 14568 }, { "epoch": 1.6925936683125182, "grad_norm": 0.5837852954864502, "learning_rate": 0.0001, "loss": 1.4091, "step": 14569 }, { "epoch": 1.6927098460644787, "grad_norm": 0.5810626149177551, "learning_rate": 0.0001, "loss": 1.3708, "step": 14570 }, { "epoch": 1.6928260238164392, "grad_norm": 0.6138414144515991, "learning_rate": 0.0001, "loss": 1.3406, "step": 14571 }, { "epoch": 1.6929422015683997, "grad_norm": 0.6352119445800781, "learning_rate": 0.0001, "loss": 1.5335, "step": 14572 }, { "epoch": 1.6930583793203602, "grad_norm": 0.6615945100784302, "learning_rate": 0.0001, "loss": 1.545, "step": 14573 }, { "epoch": 1.6931745570723207, "grad_norm": 0.6328340768814087, "learning_rate": 0.0001, "loss": 1.4159, "step": 14574 }, { "epoch": 1.6932907348242812, "grad_norm": 0.6434016227722168, "learning_rate": 0.0001, "loss": 1.6526, "step": 14575 }, { "epoch": 1.6934069125762417, "grad_norm": 0.6185423731803894, "learning_rate": 0.0001, "loss": 1.4689, "step": 14576 }, { "epoch": 1.6935230903282021, "grad_norm": 0.5980252623558044, "learning_rate": 0.0001, "loss": 1.4311, "step": 14577 }, { "epoch": 1.6936392680801626, "grad_norm": 0.6386241316795349, "learning_rate": 0.0001, "loss": 1.5584, "step": 14578 }, { "epoch": 1.6937554458321231, "grad_norm": 0.5935457348823547, "learning_rate": 0.0001, "loss": 1.2232, "step": 14579 }, { "epoch": 1.6938716235840836, "grad_norm": 0.6672040224075317, "learning_rate": 0.0001, "loss": 1.3887, "step": 14580 }, { "epoch": 1.693987801336044, "grad_norm": 0.6680154800415039, "learning_rate": 0.0001, "loss": 1.4414, "step": 14581 }, { "epoch": 1.6941039790880046, "grad_norm": 0.6375916600227356, "learning_rate": 0.0001, "loss": 1.5892, "step": 14582 }, { "epoch": 1.694220156839965, "grad_norm": 0.6327572464942932, "learning_rate": 0.0001, "loss": 1.438, "step": 14583 }, { "epoch": 1.6943363345919256, "grad_norm": 0.6473349928855896, "learning_rate": 0.0001, "loss": 1.4295, "step": 14584 }, { "epoch": 1.694452512343886, "grad_norm": 0.659527063369751, "learning_rate": 0.0001, "loss": 1.5361, "step": 14585 }, { "epoch": 1.6945686900958465, "grad_norm": 0.6704113483428955, "learning_rate": 0.0001, "loss": 1.5204, "step": 14586 }, { "epoch": 1.694684867847807, "grad_norm": 0.638503909111023, "learning_rate": 0.0001, "loss": 1.5778, "step": 14587 }, { "epoch": 1.6948010455997675, "grad_norm": 0.6061193346977234, "learning_rate": 0.0001, "loss": 1.4811, "step": 14588 }, { "epoch": 1.694917223351728, "grad_norm": 0.6526625752449036, "learning_rate": 0.0001, "loss": 1.4969, "step": 14589 }, { "epoch": 1.6950334011036885, "grad_norm": 0.5856937170028687, "learning_rate": 0.0001, "loss": 1.3898, "step": 14590 }, { "epoch": 1.695149578855649, "grad_norm": 0.608001172542572, "learning_rate": 0.0001, "loss": 1.3862, "step": 14591 }, { "epoch": 1.6952657566076097, "grad_norm": 0.6142505407333374, "learning_rate": 0.0001, "loss": 1.4282, "step": 14592 }, { "epoch": 1.6953819343595702, "grad_norm": 0.6172985434532166, "learning_rate": 0.0001, "loss": 1.6078, "step": 14593 }, { "epoch": 1.6954981121115307, "grad_norm": 0.606876015663147, "learning_rate": 0.0001, "loss": 1.2548, "step": 14594 }, { "epoch": 1.6956142898634912, "grad_norm": 0.5838636159896851, "learning_rate": 0.0001, "loss": 1.4818, "step": 14595 }, { "epoch": 1.6957304676154517, "grad_norm": 0.6177676916122437, "learning_rate": 0.0001, "loss": 1.4402, "step": 14596 }, { "epoch": 1.6958466453674121, "grad_norm": 0.604118824005127, "learning_rate": 0.0001, "loss": 1.4383, "step": 14597 }, { "epoch": 1.6959628231193726, "grad_norm": 0.5880645513534546, "learning_rate": 0.0001, "loss": 1.3653, "step": 14598 }, { "epoch": 1.6960790008713331, "grad_norm": 0.618308961391449, "learning_rate": 0.0001, "loss": 1.5468, "step": 14599 }, { "epoch": 1.6961951786232936, "grad_norm": 0.6678215265274048, "learning_rate": 0.0001, "loss": 1.5065, "step": 14600 }, { "epoch": 1.696311356375254, "grad_norm": 0.6460365056991577, "learning_rate": 0.0001, "loss": 1.5934, "step": 14601 }, { "epoch": 1.6964275341272146, "grad_norm": 0.6705929040908813, "learning_rate": 0.0001, "loss": 1.7046, "step": 14602 }, { "epoch": 1.6965437118791753, "grad_norm": 0.6509013772010803, "learning_rate": 0.0001, "loss": 1.4563, "step": 14603 }, { "epoch": 1.6966598896311358, "grad_norm": 0.6307360529899597, "learning_rate": 0.0001, "loss": 1.496, "step": 14604 }, { "epoch": 1.6967760673830963, "grad_norm": 0.6434224843978882, "learning_rate": 0.0001, "loss": 1.4823, "step": 14605 }, { "epoch": 1.6968922451350568, "grad_norm": 0.5928567051887512, "learning_rate": 0.0001, "loss": 1.4717, "step": 14606 }, { "epoch": 1.6970084228870173, "grad_norm": 0.6560643911361694, "learning_rate": 0.0001, "loss": 1.6004, "step": 14607 }, { "epoch": 1.6971246006389777, "grad_norm": 0.6457347869873047, "learning_rate": 0.0001, "loss": 1.3097, "step": 14608 }, { "epoch": 1.6972407783909382, "grad_norm": 0.6389618515968323, "learning_rate": 0.0001, "loss": 1.5092, "step": 14609 }, { "epoch": 1.6973569561428987, "grad_norm": 0.6506479978561401, "learning_rate": 0.0001, "loss": 1.6275, "step": 14610 }, { "epoch": 1.6974731338948592, "grad_norm": 0.6570099592208862, "learning_rate": 0.0001, "loss": 1.5231, "step": 14611 }, { "epoch": 1.6975893116468197, "grad_norm": 0.6314353942871094, "learning_rate": 0.0001, "loss": 1.5215, "step": 14612 }, { "epoch": 1.6977054893987802, "grad_norm": 0.5876086354255676, "learning_rate": 0.0001, "loss": 1.4018, "step": 14613 }, { "epoch": 1.6978216671507407, "grad_norm": 0.7521069645881653, "learning_rate": 0.0001, "loss": 1.4478, "step": 14614 }, { "epoch": 1.6979378449027012, "grad_norm": 0.7189734578132629, "learning_rate": 0.0001, "loss": 1.4591, "step": 14615 }, { "epoch": 1.6980540226546617, "grad_norm": 0.66774982213974, "learning_rate": 0.0001, "loss": 1.522, "step": 14616 }, { "epoch": 1.6981702004066221, "grad_norm": 0.6416917443275452, "learning_rate": 0.0001, "loss": 1.4685, "step": 14617 }, { "epoch": 1.6982863781585826, "grad_norm": 0.6286454796791077, "learning_rate": 0.0001, "loss": 1.4376, "step": 14618 }, { "epoch": 1.6984025559105431, "grad_norm": 0.6092660427093506, "learning_rate": 0.0001, "loss": 1.5246, "step": 14619 }, { "epoch": 1.6985187336625036, "grad_norm": 0.6334081888198853, "learning_rate": 0.0001, "loss": 1.584, "step": 14620 }, { "epoch": 1.698634911414464, "grad_norm": 0.591025173664093, "learning_rate": 0.0001, "loss": 1.5467, "step": 14621 }, { "epoch": 1.6987510891664246, "grad_norm": 0.6813499331474304, "learning_rate": 0.0001, "loss": 1.4357, "step": 14622 }, { "epoch": 1.698867266918385, "grad_norm": 0.5753002166748047, "learning_rate": 0.0001, "loss": 1.2696, "step": 14623 }, { "epoch": 1.6989834446703456, "grad_norm": 0.5820756554603577, "learning_rate": 0.0001, "loss": 1.45, "step": 14624 }, { "epoch": 1.699099622422306, "grad_norm": 0.6272836923599243, "learning_rate": 0.0001, "loss": 1.2814, "step": 14625 }, { "epoch": 1.6992158001742665, "grad_norm": 0.5866975784301758, "learning_rate": 0.0001, "loss": 1.5529, "step": 14626 }, { "epoch": 1.699331977926227, "grad_norm": 0.6598595380783081, "learning_rate": 0.0001, "loss": 1.417, "step": 14627 }, { "epoch": 1.6994481556781875, "grad_norm": 0.635231077671051, "learning_rate": 0.0001, "loss": 1.4982, "step": 14628 }, { "epoch": 1.699564333430148, "grad_norm": 0.6380534172058105, "learning_rate": 0.0001, "loss": 1.5566, "step": 14629 }, { "epoch": 1.6996805111821085, "grad_norm": 0.6780301332473755, "learning_rate": 0.0001, "loss": 1.6176, "step": 14630 }, { "epoch": 1.699796688934069, "grad_norm": 0.5861924290657043, "learning_rate": 0.0001, "loss": 1.285, "step": 14631 }, { "epoch": 1.6999128666860295, "grad_norm": 0.6486508846282959, "learning_rate": 0.0001, "loss": 1.5365, "step": 14632 }, { "epoch": 1.70002904443799, "grad_norm": 0.6988244652748108, "learning_rate": 0.0001, "loss": 1.5485, "step": 14633 }, { "epoch": 1.7001452221899507, "grad_norm": 0.6290296316146851, "learning_rate": 0.0001, "loss": 1.4669, "step": 14634 }, { "epoch": 1.7002613999419112, "grad_norm": 0.6649961471557617, "learning_rate": 0.0001, "loss": 1.3898, "step": 14635 }, { "epoch": 1.7003775776938717, "grad_norm": 0.5966437458992004, "learning_rate": 0.0001, "loss": 1.2483, "step": 14636 }, { "epoch": 1.7004937554458321, "grad_norm": 0.6070742011070251, "learning_rate": 0.0001, "loss": 1.5061, "step": 14637 }, { "epoch": 1.7006099331977926, "grad_norm": 0.6484366059303284, "learning_rate": 0.0001, "loss": 1.4458, "step": 14638 }, { "epoch": 1.7007261109497531, "grad_norm": 0.6129118204116821, "learning_rate": 0.0001, "loss": 1.425, "step": 14639 }, { "epoch": 1.7008422887017136, "grad_norm": 0.6081497669219971, "learning_rate": 0.0001, "loss": 1.4826, "step": 14640 }, { "epoch": 1.700958466453674, "grad_norm": 0.5963031053543091, "learning_rate": 0.0001, "loss": 1.3579, "step": 14641 }, { "epoch": 1.7010746442056346, "grad_norm": 0.635315477848053, "learning_rate": 0.0001, "loss": 1.5403, "step": 14642 }, { "epoch": 1.701190821957595, "grad_norm": 0.6180382370948792, "learning_rate": 0.0001, "loss": 1.4034, "step": 14643 }, { "epoch": 1.7013069997095556, "grad_norm": 0.6645064353942871, "learning_rate": 0.0001, "loss": 1.401, "step": 14644 }, { "epoch": 1.7014231774615163, "grad_norm": 0.6183170080184937, "learning_rate": 0.0001, "loss": 1.3872, "step": 14645 }, { "epoch": 1.7015393552134768, "grad_norm": 0.627678632736206, "learning_rate": 0.0001, "loss": 1.3773, "step": 14646 }, { "epoch": 1.7016555329654373, "grad_norm": 0.6067113280296326, "learning_rate": 0.0001, "loss": 1.3639, "step": 14647 }, { "epoch": 1.7017717107173977, "grad_norm": 0.6415846347808838, "learning_rate": 0.0001, "loss": 1.4816, "step": 14648 }, { "epoch": 1.7018878884693582, "grad_norm": 0.5976949334144592, "learning_rate": 0.0001, "loss": 1.4375, "step": 14649 }, { "epoch": 1.7020040662213187, "grad_norm": 0.6254173517227173, "learning_rate": 0.0001, "loss": 1.4246, "step": 14650 }, { "epoch": 1.7021202439732792, "grad_norm": 0.6585585474967957, "learning_rate": 0.0001, "loss": 1.4439, "step": 14651 }, { "epoch": 1.7022364217252397, "grad_norm": 0.6446869969367981, "learning_rate": 0.0001, "loss": 1.5057, "step": 14652 }, { "epoch": 1.7023525994772002, "grad_norm": 0.654296875, "learning_rate": 0.0001, "loss": 1.2977, "step": 14653 }, { "epoch": 1.7024687772291607, "grad_norm": 0.674474835395813, "learning_rate": 0.0001, "loss": 1.4322, "step": 14654 }, { "epoch": 1.7025849549811212, "grad_norm": 0.6400251388549805, "learning_rate": 0.0001, "loss": 1.4546, "step": 14655 }, { "epoch": 1.7027011327330817, "grad_norm": 0.6343820691108704, "learning_rate": 0.0001, "loss": 1.4035, "step": 14656 }, { "epoch": 1.7028173104850421, "grad_norm": 0.6097686290740967, "learning_rate": 0.0001, "loss": 1.3933, "step": 14657 }, { "epoch": 1.7029334882370026, "grad_norm": 0.6226780414581299, "learning_rate": 0.0001, "loss": 1.6598, "step": 14658 }, { "epoch": 1.7030496659889631, "grad_norm": 0.6007325053215027, "learning_rate": 0.0001, "loss": 1.3065, "step": 14659 }, { "epoch": 1.7031658437409236, "grad_norm": 0.6372803449630737, "learning_rate": 0.0001, "loss": 1.5234, "step": 14660 }, { "epoch": 1.703282021492884, "grad_norm": 0.625385046005249, "learning_rate": 0.0001, "loss": 1.4079, "step": 14661 }, { "epoch": 1.7033981992448446, "grad_norm": 0.6250273585319519, "learning_rate": 0.0001, "loss": 1.5111, "step": 14662 }, { "epoch": 1.703514376996805, "grad_norm": 0.5870633125305176, "learning_rate": 0.0001, "loss": 1.3509, "step": 14663 }, { "epoch": 1.7036305547487656, "grad_norm": 0.620680034160614, "learning_rate": 0.0001, "loss": 1.4227, "step": 14664 }, { "epoch": 1.703746732500726, "grad_norm": 0.6551097631454468, "learning_rate": 0.0001, "loss": 1.5411, "step": 14665 }, { "epoch": 1.7038629102526865, "grad_norm": 0.6445282697677612, "learning_rate": 0.0001, "loss": 1.5992, "step": 14666 }, { "epoch": 1.703979088004647, "grad_norm": 0.6146022081375122, "learning_rate": 0.0001, "loss": 1.4884, "step": 14667 }, { "epoch": 1.7040952657566075, "grad_norm": 0.5587998628616333, "learning_rate": 0.0001, "loss": 1.2187, "step": 14668 }, { "epoch": 1.704211443508568, "grad_norm": 0.6643266081809998, "learning_rate": 0.0001, "loss": 1.5298, "step": 14669 }, { "epoch": 1.7043276212605285, "grad_norm": 0.6104164719581604, "learning_rate": 0.0001, "loss": 1.4932, "step": 14670 }, { "epoch": 1.704443799012489, "grad_norm": 0.620539665222168, "learning_rate": 0.0001, "loss": 1.4916, "step": 14671 }, { "epoch": 1.7045599767644495, "grad_norm": 0.704810380935669, "learning_rate": 0.0001, "loss": 1.567, "step": 14672 }, { "epoch": 1.70467615451641, "grad_norm": 0.6111543774604797, "learning_rate": 0.0001, "loss": 1.4634, "step": 14673 }, { "epoch": 1.7047923322683705, "grad_norm": 0.6791646480560303, "learning_rate": 0.0001, "loss": 1.5672, "step": 14674 }, { "epoch": 1.704908510020331, "grad_norm": 0.6171687841415405, "learning_rate": 0.0001, "loss": 1.4182, "step": 14675 }, { "epoch": 1.7050246877722917, "grad_norm": 0.6004462838172913, "learning_rate": 0.0001, "loss": 1.5018, "step": 14676 }, { "epoch": 1.7051408655242521, "grad_norm": 0.5968261361122131, "learning_rate": 0.0001, "loss": 1.5018, "step": 14677 }, { "epoch": 1.7052570432762126, "grad_norm": 0.609222948551178, "learning_rate": 0.0001, "loss": 1.311, "step": 14678 }, { "epoch": 1.7053732210281731, "grad_norm": 0.5788531303405762, "learning_rate": 0.0001, "loss": 1.4782, "step": 14679 }, { "epoch": 1.7054893987801336, "grad_norm": 0.5904691815376282, "learning_rate": 0.0001, "loss": 1.4116, "step": 14680 }, { "epoch": 1.705605576532094, "grad_norm": 0.600645899772644, "learning_rate": 0.0001, "loss": 1.459, "step": 14681 }, { "epoch": 1.7057217542840546, "grad_norm": 0.6605891585350037, "learning_rate": 0.0001, "loss": 1.4127, "step": 14682 }, { "epoch": 1.705837932036015, "grad_norm": 0.6406998038291931, "learning_rate": 0.0001, "loss": 1.5132, "step": 14683 }, { "epoch": 1.7059541097879756, "grad_norm": 0.6493116617202759, "learning_rate": 0.0001, "loss": 1.4353, "step": 14684 }, { "epoch": 1.706070287539936, "grad_norm": 0.6372137069702148, "learning_rate": 0.0001, "loss": 1.6287, "step": 14685 }, { "epoch": 1.7061864652918965, "grad_norm": 0.6333807110786438, "learning_rate": 0.0001, "loss": 1.3945, "step": 14686 }, { "epoch": 1.7063026430438573, "grad_norm": 0.5930798649787903, "learning_rate": 0.0001, "loss": 1.4274, "step": 14687 }, { "epoch": 1.7064188207958177, "grad_norm": 0.5937562584877014, "learning_rate": 0.0001, "loss": 1.4179, "step": 14688 }, { "epoch": 1.7065349985477782, "grad_norm": 0.6222439408302307, "learning_rate": 0.0001, "loss": 1.4929, "step": 14689 }, { "epoch": 1.7066511762997387, "grad_norm": 0.6252455115318298, "learning_rate": 0.0001, "loss": 1.5824, "step": 14690 }, { "epoch": 1.7067673540516992, "grad_norm": 0.6360738277435303, "learning_rate": 0.0001, "loss": 1.5338, "step": 14691 }, { "epoch": 1.7068835318036597, "grad_norm": 0.600849449634552, "learning_rate": 0.0001, "loss": 1.3596, "step": 14692 }, { "epoch": 1.7069997095556202, "grad_norm": 0.6560570001602173, "learning_rate": 0.0001, "loss": 1.5934, "step": 14693 }, { "epoch": 1.7071158873075807, "grad_norm": 0.5728269219398499, "learning_rate": 0.0001, "loss": 1.4888, "step": 14694 }, { "epoch": 1.7072320650595412, "grad_norm": 0.6428949236869812, "learning_rate": 0.0001, "loss": 1.5043, "step": 14695 }, { "epoch": 1.7073482428115017, "grad_norm": 0.6195924878120422, "learning_rate": 0.0001, "loss": 1.4842, "step": 14696 }, { "epoch": 1.7074644205634621, "grad_norm": 0.6460603475570679, "learning_rate": 0.0001, "loss": 1.5995, "step": 14697 }, { "epoch": 1.7075805983154226, "grad_norm": 0.6422379612922668, "learning_rate": 0.0001, "loss": 1.599, "step": 14698 }, { "epoch": 1.7076967760673831, "grad_norm": 0.6654634475708008, "learning_rate": 0.0001, "loss": 1.644, "step": 14699 }, { "epoch": 1.7078129538193436, "grad_norm": 0.6639367341995239, "learning_rate": 0.0001, "loss": 1.5017, "step": 14700 }, { "epoch": 1.707929131571304, "grad_norm": 0.5767974853515625, "learning_rate": 0.0001, "loss": 1.2133, "step": 14701 }, { "epoch": 1.7080453093232646, "grad_norm": 0.594628632068634, "learning_rate": 0.0001, "loss": 1.5327, "step": 14702 }, { "epoch": 1.708161487075225, "grad_norm": 0.642102062702179, "learning_rate": 0.0001, "loss": 1.5452, "step": 14703 }, { "epoch": 1.7082776648271856, "grad_norm": 0.6706504225730896, "learning_rate": 0.0001, "loss": 1.5864, "step": 14704 }, { "epoch": 1.708393842579146, "grad_norm": 0.6253349184989929, "learning_rate": 0.0001, "loss": 1.2945, "step": 14705 }, { "epoch": 1.7085100203311065, "grad_norm": 0.6329401135444641, "learning_rate": 0.0001, "loss": 1.295, "step": 14706 }, { "epoch": 1.708626198083067, "grad_norm": 0.6025615930557251, "learning_rate": 0.0001, "loss": 1.5696, "step": 14707 }, { "epoch": 1.7087423758350275, "grad_norm": 0.5967482328414917, "learning_rate": 0.0001, "loss": 1.3766, "step": 14708 }, { "epoch": 1.708858553586988, "grad_norm": 0.6471050381660461, "learning_rate": 0.0001, "loss": 1.4146, "step": 14709 }, { "epoch": 1.7089747313389485, "grad_norm": 0.6468651294708252, "learning_rate": 0.0001, "loss": 1.5212, "step": 14710 }, { "epoch": 1.709090909090909, "grad_norm": 0.6520069241523743, "learning_rate": 0.0001, "loss": 1.4368, "step": 14711 }, { "epoch": 1.7092070868428695, "grad_norm": 0.6526609659194946, "learning_rate": 0.0001, "loss": 1.5241, "step": 14712 }, { "epoch": 1.70932326459483, "grad_norm": 0.605928361415863, "learning_rate": 0.0001, "loss": 1.3165, "step": 14713 }, { "epoch": 1.7094394423467905, "grad_norm": 0.6068093776702881, "learning_rate": 0.0001, "loss": 1.5312, "step": 14714 }, { "epoch": 1.709555620098751, "grad_norm": 0.6246966123580933, "learning_rate": 0.0001, "loss": 1.4664, "step": 14715 }, { "epoch": 1.7096717978507114, "grad_norm": 0.6444641947746277, "learning_rate": 0.0001, "loss": 1.3781, "step": 14716 }, { "epoch": 1.709787975602672, "grad_norm": 0.6108652353286743, "learning_rate": 0.0001, "loss": 1.6248, "step": 14717 }, { "epoch": 1.7099041533546326, "grad_norm": 0.6027578115463257, "learning_rate": 0.0001, "loss": 1.3968, "step": 14718 }, { "epoch": 1.7100203311065931, "grad_norm": 0.6105266213417053, "learning_rate": 0.0001, "loss": 1.403, "step": 14719 }, { "epoch": 1.7101365088585536, "grad_norm": 0.5987253189086914, "learning_rate": 0.0001, "loss": 1.3818, "step": 14720 }, { "epoch": 1.710252686610514, "grad_norm": 0.6370313167572021, "learning_rate": 0.0001, "loss": 1.5368, "step": 14721 }, { "epoch": 1.7103688643624746, "grad_norm": 0.6336929798126221, "learning_rate": 0.0001, "loss": 1.4582, "step": 14722 }, { "epoch": 1.710485042114435, "grad_norm": 0.6062333583831787, "learning_rate": 0.0001, "loss": 1.4079, "step": 14723 }, { "epoch": 1.7106012198663956, "grad_norm": 0.5995748043060303, "learning_rate": 0.0001, "loss": 1.4158, "step": 14724 }, { "epoch": 1.710717397618356, "grad_norm": 0.5926563143730164, "learning_rate": 0.0001, "loss": 1.3622, "step": 14725 }, { "epoch": 1.7108335753703166, "grad_norm": 0.6350700259208679, "learning_rate": 0.0001, "loss": 1.4151, "step": 14726 }, { "epoch": 1.710949753122277, "grad_norm": 0.7133452296257019, "learning_rate": 0.0001, "loss": 1.5686, "step": 14727 }, { "epoch": 1.7110659308742375, "grad_norm": 0.791262686252594, "learning_rate": 0.0001, "loss": 1.6826, "step": 14728 }, { "epoch": 1.7111821086261982, "grad_norm": 0.6219238042831421, "learning_rate": 0.0001, "loss": 1.5004, "step": 14729 }, { "epoch": 1.7112982863781587, "grad_norm": 0.6142376065254211, "learning_rate": 0.0001, "loss": 1.3113, "step": 14730 }, { "epoch": 1.7114144641301192, "grad_norm": 0.617604672908783, "learning_rate": 0.0001, "loss": 1.4467, "step": 14731 }, { "epoch": 1.7115306418820797, "grad_norm": 0.6121389865875244, "learning_rate": 0.0001, "loss": 1.4675, "step": 14732 }, { "epoch": 1.7116468196340402, "grad_norm": 0.6028082966804504, "learning_rate": 0.0001, "loss": 1.4003, "step": 14733 }, { "epoch": 1.7117629973860007, "grad_norm": 0.5863094329833984, "learning_rate": 0.0001, "loss": 1.3999, "step": 14734 }, { "epoch": 1.7118791751379612, "grad_norm": 0.6222500205039978, "learning_rate": 0.0001, "loss": 1.3348, "step": 14735 }, { "epoch": 1.7119953528899217, "grad_norm": 0.7160225510597229, "learning_rate": 0.0001, "loss": 1.5907, "step": 14736 }, { "epoch": 1.7121115306418822, "grad_norm": 0.5955639481544495, "learning_rate": 0.0001, "loss": 1.4233, "step": 14737 }, { "epoch": 1.7122277083938426, "grad_norm": 0.654880166053772, "learning_rate": 0.0001, "loss": 1.3942, "step": 14738 }, { "epoch": 1.7123438861458031, "grad_norm": 0.6375676393508911, "learning_rate": 0.0001, "loss": 1.499, "step": 14739 }, { "epoch": 1.7124600638977636, "grad_norm": 0.654710590839386, "learning_rate": 0.0001, "loss": 1.5934, "step": 14740 }, { "epoch": 1.712576241649724, "grad_norm": 0.5975367426872253, "learning_rate": 0.0001, "loss": 1.3241, "step": 14741 }, { "epoch": 1.7126924194016846, "grad_norm": 0.6368477940559387, "learning_rate": 0.0001, "loss": 1.4124, "step": 14742 }, { "epoch": 1.712808597153645, "grad_norm": 0.6215337514877319, "learning_rate": 0.0001, "loss": 1.4595, "step": 14743 }, { "epoch": 1.7129247749056056, "grad_norm": 0.5887765884399414, "learning_rate": 0.0001, "loss": 1.4359, "step": 14744 }, { "epoch": 1.713040952657566, "grad_norm": 0.6563401222229004, "learning_rate": 0.0001, "loss": 1.3508, "step": 14745 }, { "epoch": 1.7131571304095266, "grad_norm": 0.6423262357711792, "learning_rate": 0.0001, "loss": 1.3729, "step": 14746 }, { "epoch": 1.713273308161487, "grad_norm": 0.6273072957992554, "learning_rate": 0.0001, "loss": 1.4554, "step": 14747 }, { "epoch": 1.7133894859134475, "grad_norm": 0.6396011710166931, "learning_rate": 0.0001, "loss": 1.3666, "step": 14748 }, { "epoch": 1.713505663665408, "grad_norm": 0.6402760744094849, "learning_rate": 0.0001, "loss": 1.3723, "step": 14749 }, { "epoch": 1.7136218414173685, "grad_norm": 0.598962128162384, "learning_rate": 0.0001, "loss": 1.3962, "step": 14750 }, { "epoch": 1.713738019169329, "grad_norm": 0.6096288561820984, "learning_rate": 0.0001, "loss": 1.4383, "step": 14751 }, { "epoch": 1.7138541969212895, "grad_norm": 0.6256985068321228, "learning_rate": 0.0001, "loss": 1.4633, "step": 14752 }, { "epoch": 1.71397037467325, "grad_norm": 0.6655079126358032, "learning_rate": 0.0001, "loss": 1.4659, "step": 14753 }, { "epoch": 1.7140865524252105, "grad_norm": 0.6243584752082825, "learning_rate": 0.0001, "loss": 1.5277, "step": 14754 }, { "epoch": 1.714202730177171, "grad_norm": 0.6163287162780762, "learning_rate": 0.0001, "loss": 1.3551, "step": 14755 }, { "epoch": 1.7143189079291314, "grad_norm": 0.6366534233093262, "learning_rate": 0.0001, "loss": 1.5268, "step": 14756 }, { "epoch": 1.714435085681092, "grad_norm": 0.6282738447189331, "learning_rate": 0.0001, "loss": 1.4281, "step": 14757 }, { "epoch": 1.7145512634330524, "grad_norm": 0.6380648016929626, "learning_rate": 0.0001, "loss": 1.5557, "step": 14758 }, { "epoch": 1.714667441185013, "grad_norm": 0.6344602704048157, "learning_rate": 0.0001, "loss": 1.3978, "step": 14759 }, { "epoch": 1.7147836189369736, "grad_norm": 0.5779582858085632, "learning_rate": 0.0001, "loss": 1.4086, "step": 14760 }, { "epoch": 1.714899796688934, "grad_norm": 0.6426671743392944, "learning_rate": 0.0001, "loss": 1.3922, "step": 14761 }, { "epoch": 1.7150159744408946, "grad_norm": 0.6149613261222839, "learning_rate": 0.0001, "loss": 1.5402, "step": 14762 }, { "epoch": 1.715132152192855, "grad_norm": 0.6725655794143677, "learning_rate": 0.0001, "loss": 1.5138, "step": 14763 }, { "epoch": 1.7152483299448156, "grad_norm": 0.6449666619300842, "learning_rate": 0.0001, "loss": 1.5189, "step": 14764 }, { "epoch": 1.715364507696776, "grad_norm": 0.6810455918312073, "learning_rate": 0.0001, "loss": 1.6213, "step": 14765 }, { "epoch": 1.7154806854487366, "grad_norm": 0.6730927228927612, "learning_rate": 0.0001, "loss": 1.6243, "step": 14766 }, { "epoch": 1.715596863200697, "grad_norm": 0.6302074193954468, "learning_rate": 0.0001, "loss": 1.4939, "step": 14767 }, { "epoch": 1.7157130409526575, "grad_norm": 0.6140825152397156, "learning_rate": 0.0001, "loss": 1.4821, "step": 14768 }, { "epoch": 1.715829218704618, "grad_norm": 0.5870632529258728, "learning_rate": 0.0001, "loss": 1.3058, "step": 14769 }, { "epoch": 1.7159453964565787, "grad_norm": 0.5881654024124146, "learning_rate": 0.0001, "loss": 1.5019, "step": 14770 }, { "epoch": 1.7160615742085392, "grad_norm": 0.6072990894317627, "learning_rate": 0.0001, "loss": 1.3868, "step": 14771 }, { "epoch": 1.7161777519604997, "grad_norm": 0.6161254048347473, "learning_rate": 0.0001, "loss": 1.3681, "step": 14772 }, { "epoch": 1.7162939297124602, "grad_norm": 0.5859989523887634, "learning_rate": 0.0001, "loss": 1.3696, "step": 14773 }, { "epoch": 1.7164101074644207, "grad_norm": 0.6155399680137634, "learning_rate": 0.0001, "loss": 1.442, "step": 14774 }, { "epoch": 1.7165262852163812, "grad_norm": 0.6258164048194885, "learning_rate": 0.0001, "loss": 1.4765, "step": 14775 }, { "epoch": 1.7166424629683417, "grad_norm": 0.7166014313697815, "learning_rate": 0.0001, "loss": 1.4442, "step": 14776 }, { "epoch": 1.7167586407203022, "grad_norm": 0.6677807569503784, "learning_rate": 0.0001, "loss": 1.4843, "step": 14777 }, { "epoch": 1.7168748184722626, "grad_norm": 0.6242325305938721, "learning_rate": 0.0001, "loss": 1.4131, "step": 14778 }, { "epoch": 1.7169909962242231, "grad_norm": 0.6411528587341309, "learning_rate": 0.0001, "loss": 1.4636, "step": 14779 }, { "epoch": 1.7171071739761836, "grad_norm": 0.6611375212669373, "learning_rate": 0.0001, "loss": 1.5358, "step": 14780 }, { "epoch": 1.717223351728144, "grad_norm": 0.6296682953834534, "learning_rate": 0.0001, "loss": 1.4925, "step": 14781 }, { "epoch": 1.7173395294801046, "grad_norm": 0.5982662439346313, "learning_rate": 0.0001, "loss": 1.4455, "step": 14782 }, { "epoch": 1.717455707232065, "grad_norm": 0.6289569735527039, "learning_rate": 0.0001, "loss": 1.5707, "step": 14783 }, { "epoch": 1.7175718849840256, "grad_norm": 0.610512912273407, "learning_rate": 0.0001, "loss": 1.5083, "step": 14784 }, { "epoch": 1.717688062735986, "grad_norm": 0.56607985496521, "learning_rate": 0.0001, "loss": 1.3818, "step": 14785 }, { "epoch": 1.7178042404879466, "grad_norm": 0.6096919775009155, "learning_rate": 0.0001, "loss": 1.5831, "step": 14786 }, { "epoch": 1.717920418239907, "grad_norm": 0.6714848279953003, "learning_rate": 0.0001, "loss": 1.5206, "step": 14787 }, { "epoch": 1.7180365959918675, "grad_norm": 0.6003017425537109, "learning_rate": 0.0001, "loss": 1.376, "step": 14788 }, { "epoch": 1.718152773743828, "grad_norm": 0.7086442112922668, "learning_rate": 0.0001, "loss": 1.4837, "step": 14789 }, { "epoch": 1.7182689514957885, "grad_norm": 0.6242117881774902, "learning_rate": 0.0001, "loss": 1.4953, "step": 14790 }, { "epoch": 1.718385129247749, "grad_norm": 0.6298418641090393, "learning_rate": 0.0001, "loss": 1.5567, "step": 14791 }, { "epoch": 1.7185013069997095, "grad_norm": 0.6413167119026184, "learning_rate": 0.0001, "loss": 1.5655, "step": 14792 }, { "epoch": 1.71861748475167, "grad_norm": 0.6316781640052795, "learning_rate": 0.0001, "loss": 1.3297, "step": 14793 }, { "epoch": 1.7187336625036305, "grad_norm": 0.6167172193527222, "learning_rate": 0.0001, "loss": 1.4939, "step": 14794 }, { "epoch": 1.718849840255591, "grad_norm": 0.5856084227561951, "learning_rate": 0.0001, "loss": 1.3353, "step": 14795 }, { "epoch": 1.7189660180075514, "grad_norm": 0.5927707552909851, "learning_rate": 0.0001, "loss": 1.3833, "step": 14796 }, { "epoch": 1.719082195759512, "grad_norm": 0.6630589962005615, "learning_rate": 0.0001, "loss": 1.6138, "step": 14797 }, { "epoch": 1.7191983735114724, "grad_norm": 0.6281614303588867, "learning_rate": 0.0001, "loss": 1.4676, "step": 14798 }, { "epoch": 1.719314551263433, "grad_norm": 0.6465421319007874, "learning_rate": 0.0001, "loss": 1.4422, "step": 14799 }, { "epoch": 1.7194307290153934, "grad_norm": 0.6694326400756836, "learning_rate": 0.0001, "loss": 1.4792, "step": 14800 }, { "epoch": 1.7195469067673539, "grad_norm": 0.6777458190917969, "learning_rate": 0.0001, "loss": 1.5794, "step": 14801 }, { "epoch": 1.7196630845193146, "grad_norm": 0.6127638220787048, "learning_rate": 0.0001, "loss": 1.5829, "step": 14802 }, { "epoch": 1.719779262271275, "grad_norm": 0.657163679599762, "learning_rate": 0.0001, "loss": 1.4248, "step": 14803 }, { "epoch": 1.7198954400232356, "grad_norm": 0.5938178300857544, "learning_rate": 0.0001, "loss": 1.2876, "step": 14804 }, { "epoch": 1.720011617775196, "grad_norm": 0.5804201364517212, "learning_rate": 0.0001, "loss": 1.3832, "step": 14805 }, { "epoch": 1.7201277955271566, "grad_norm": 0.5739307999610901, "learning_rate": 0.0001, "loss": 1.4022, "step": 14806 }, { "epoch": 1.720243973279117, "grad_norm": 0.6426794528961182, "learning_rate": 0.0001, "loss": 1.4875, "step": 14807 }, { "epoch": 1.7203601510310775, "grad_norm": 0.6012753248214722, "learning_rate": 0.0001, "loss": 1.531, "step": 14808 }, { "epoch": 1.720476328783038, "grad_norm": 0.6352126002311707, "learning_rate": 0.0001, "loss": 1.3829, "step": 14809 }, { "epoch": 1.7205925065349985, "grad_norm": 0.6401197910308838, "learning_rate": 0.0001, "loss": 1.5839, "step": 14810 }, { "epoch": 1.720708684286959, "grad_norm": 0.6192115545272827, "learning_rate": 0.0001, "loss": 1.403, "step": 14811 }, { "epoch": 1.7208248620389197, "grad_norm": 0.5834882259368896, "learning_rate": 0.0001, "loss": 1.4002, "step": 14812 }, { "epoch": 1.7209410397908802, "grad_norm": 0.6181933283805847, "learning_rate": 0.0001, "loss": 1.3734, "step": 14813 }, { "epoch": 1.7210572175428407, "grad_norm": 0.645168662071228, "learning_rate": 0.0001, "loss": 1.6408, "step": 14814 }, { "epoch": 1.7211733952948012, "grad_norm": 0.7209157943725586, "learning_rate": 0.0001, "loss": 1.4086, "step": 14815 }, { "epoch": 1.7212895730467617, "grad_norm": 0.6558918356895447, "learning_rate": 0.0001, "loss": 1.505, "step": 14816 }, { "epoch": 1.7214057507987222, "grad_norm": 0.5995926260948181, "learning_rate": 0.0001, "loss": 1.5445, "step": 14817 }, { "epoch": 1.7215219285506826, "grad_norm": 0.5940266847610474, "learning_rate": 0.0001, "loss": 1.4528, "step": 14818 }, { "epoch": 1.7216381063026431, "grad_norm": 0.5744787454605103, "learning_rate": 0.0001, "loss": 1.405, "step": 14819 }, { "epoch": 1.7217542840546036, "grad_norm": 0.606278657913208, "learning_rate": 0.0001, "loss": 1.4874, "step": 14820 }, { "epoch": 1.7218704618065641, "grad_norm": 0.6264839172363281, "learning_rate": 0.0001, "loss": 1.5653, "step": 14821 }, { "epoch": 1.7219866395585246, "grad_norm": 0.627369225025177, "learning_rate": 0.0001, "loss": 1.5094, "step": 14822 }, { "epoch": 1.722102817310485, "grad_norm": 0.6821233034133911, "learning_rate": 0.0001, "loss": 1.4753, "step": 14823 }, { "epoch": 1.7222189950624456, "grad_norm": 0.6599948406219482, "learning_rate": 0.0001, "loss": 1.5193, "step": 14824 }, { "epoch": 1.722335172814406, "grad_norm": 0.6415464282035828, "learning_rate": 0.0001, "loss": 1.5414, "step": 14825 }, { "epoch": 1.7224513505663666, "grad_norm": 0.6503273844718933, "learning_rate": 0.0001, "loss": 1.4006, "step": 14826 }, { "epoch": 1.722567528318327, "grad_norm": 0.6336853504180908, "learning_rate": 0.0001, "loss": 1.4252, "step": 14827 }, { "epoch": 1.7226837060702875, "grad_norm": 0.6365883946418762, "learning_rate": 0.0001, "loss": 1.6283, "step": 14828 }, { "epoch": 1.722799883822248, "grad_norm": 0.6308516263961792, "learning_rate": 0.0001, "loss": 1.5141, "step": 14829 }, { "epoch": 1.7229160615742085, "grad_norm": 0.6111534237861633, "learning_rate": 0.0001, "loss": 1.4043, "step": 14830 }, { "epoch": 1.723032239326169, "grad_norm": 0.6594417095184326, "learning_rate": 0.0001, "loss": 1.2859, "step": 14831 }, { "epoch": 1.7231484170781295, "grad_norm": 0.6144202351570129, "learning_rate": 0.0001, "loss": 1.5216, "step": 14832 }, { "epoch": 1.72326459483009, "grad_norm": 0.6571009159088135, "learning_rate": 0.0001, "loss": 1.6701, "step": 14833 }, { "epoch": 1.7233807725820505, "grad_norm": 0.5851426720619202, "learning_rate": 0.0001, "loss": 1.4278, "step": 14834 }, { "epoch": 1.723496950334011, "grad_norm": 0.6420098543167114, "learning_rate": 0.0001, "loss": 1.5496, "step": 14835 }, { "epoch": 1.7236131280859714, "grad_norm": 0.6545212268829346, "learning_rate": 0.0001, "loss": 1.5927, "step": 14836 }, { "epoch": 1.723729305837932, "grad_norm": 0.5943119525909424, "learning_rate": 0.0001, "loss": 1.3249, "step": 14837 }, { "epoch": 1.7238454835898924, "grad_norm": 0.5567991733551025, "learning_rate": 0.0001, "loss": 1.3278, "step": 14838 }, { "epoch": 1.723961661341853, "grad_norm": 0.6717585325241089, "learning_rate": 0.0001, "loss": 1.4613, "step": 14839 }, { "epoch": 1.7240778390938134, "grad_norm": 0.656572163105011, "learning_rate": 0.0001, "loss": 1.5494, "step": 14840 }, { "epoch": 1.724194016845774, "grad_norm": 0.6454892754554749, "learning_rate": 0.0001, "loss": 1.438, "step": 14841 }, { "epoch": 1.7243101945977344, "grad_norm": 0.6074561476707458, "learning_rate": 0.0001, "loss": 1.5371, "step": 14842 }, { "epoch": 1.7244263723496949, "grad_norm": 0.6063897013664246, "learning_rate": 0.0001, "loss": 1.5064, "step": 14843 }, { "epoch": 1.7245425501016556, "grad_norm": 0.5994085073471069, "learning_rate": 0.0001, "loss": 1.4391, "step": 14844 }, { "epoch": 1.724658727853616, "grad_norm": 0.631662905216217, "learning_rate": 0.0001, "loss": 1.6205, "step": 14845 }, { "epoch": 1.7247749056055766, "grad_norm": 0.6281533241271973, "learning_rate": 0.0001, "loss": 1.5942, "step": 14846 }, { "epoch": 1.724891083357537, "grad_norm": 0.6609871983528137, "learning_rate": 0.0001, "loss": 1.3702, "step": 14847 }, { "epoch": 1.7250072611094975, "grad_norm": 0.648665726184845, "learning_rate": 0.0001, "loss": 1.5237, "step": 14848 }, { "epoch": 1.725123438861458, "grad_norm": 0.6376709342002869, "learning_rate": 0.0001, "loss": 1.4291, "step": 14849 }, { "epoch": 1.7252396166134185, "grad_norm": 0.6555134057998657, "learning_rate": 0.0001, "loss": 1.6408, "step": 14850 }, { "epoch": 1.725355794365379, "grad_norm": 0.6427141427993774, "learning_rate": 0.0001, "loss": 1.4558, "step": 14851 }, { "epoch": 1.7254719721173395, "grad_norm": 0.641374409198761, "learning_rate": 0.0001, "loss": 1.3376, "step": 14852 }, { "epoch": 1.7255881498693, "grad_norm": 0.6612485647201538, "learning_rate": 0.0001, "loss": 1.5375, "step": 14853 }, { "epoch": 1.7257043276212607, "grad_norm": 0.6559956669807434, "learning_rate": 0.0001, "loss": 1.6083, "step": 14854 }, { "epoch": 1.7258205053732212, "grad_norm": 0.6144281625747681, "learning_rate": 0.0001, "loss": 1.4464, "step": 14855 }, { "epoch": 1.7259366831251817, "grad_norm": 0.6333773136138916, "learning_rate": 0.0001, "loss": 1.4129, "step": 14856 }, { "epoch": 1.7260528608771422, "grad_norm": 0.6108373999595642, "learning_rate": 0.0001, "loss": 1.3981, "step": 14857 }, { "epoch": 1.7261690386291026, "grad_norm": 0.6192029118537903, "learning_rate": 0.0001, "loss": 1.4526, "step": 14858 }, { "epoch": 1.7262852163810631, "grad_norm": 0.6309804320335388, "learning_rate": 0.0001, "loss": 1.2411, "step": 14859 }, { "epoch": 1.7264013941330236, "grad_norm": 0.588129997253418, "learning_rate": 0.0001, "loss": 1.371, "step": 14860 }, { "epoch": 1.7265175718849841, "grad_norm": 0.5779204964637756, "learning_rate": 0.0001, "loss": 1.3219, "step": 14861 }, { "epoch": 1.7266337496369446, "grad_norm": 0.6525713205337524, "learning_rate": 0.0001, "loss": 1.4946, "step": 14862 }, { "epoch": 1.726749927388905, "grad_norm": 0.6475061774253845, "learning_rate": 0.0001, "loss": 1.3756, "step": 14863 }, { "epoch": 1.7268661051408656, "grad_norm": 0.6767624020576477, "learning_rate": 0.0001, "loss": 1.4313, "step": 14864 }, { "epoch": 1.726982282892826, "grad_norm": 0.6144096851348877, "learning_rate": 0.0001, "loss": 1.5719, "step": 14865 }, { "epoch": 1.7270984606447866, "grad_norm": 0.5616995692253113, "learning_rate": 0.0001, "loss": 1.2873, "step": 14866 }, { "epoch": 1.727214638396747, "grad_norm": 0.5927960872650146, "learning_rate": 0.0001, "loss": 1.2225, "step": 14867 }, { "epoch": 1.7273308161487075, "grad_norm": 0.6717016696929932, "learning_rate": 0.0001, "loss": 1.5557, "step": 14868 }, { "epoch": 1.727446993900668, "grad_norm": 0.6246774196624756, "learning_rate": 0.0001, "loss": 1.4958, "step": 14869 }, { "epoch": 1.7275631716526285, "grad_norm": 0.6078056693077087, "learning_rate": 0.0001, "loss": 1.4331, "step": 14870 }, { "epoch": 1.727679349404589, "grad_norm": 0.6438655257225037, "learning_rate": 0.0001, "loss": 1.4329, "step": 14871 }, { "epoch": 1.7277955271565495, "grad_norm": 0.5785349011421204, "learning_rate": 0.0001, "loss": 1.3317, "step": 14872 }, { "epoch": 1.72791170490851, "grad_norm": 0.6208078265190125, "learning_rate": 0.0001, "loss": 1.4352, "step": 14873 }, { "epoch": 1.7280278826604705, "grad_norm": 0.6939451694488525, "learning_rate": 0.0001, "loss": 1.5787, "step": 14874 }, { "epoch": 1.728144060412431, "grad_norm": 0.6345509886741638, "learning_rate": 0.0001, "loss": 1.4445, "step": 14875 }, { "epoch": 1.7282602381643914, "grad_norm": 0.6041543483734131, "learning_rate": 0.0001, "loss": 1.3317, "step": 14876 }, { "epoch": 1.728376415916352, "grad_norm": 0.610222339630127, "learning_rate": 0.0001, "loss": 1.3053, "step": 14877 }, { "epoch": 1.7284925936683124, "grad_norm": 0.6278032660484314, "learning_rate": 0.0001, "loss": 1.4083, "step": 14878 }, { "epoch": 1.728608771420273, "grad_norm": 0.6465734243392944, "learning_rate": 0.0001, "loss": 1.4495, "step": 14879 }, { "epoch": 1.7287249491722334, "grad_norm": 0.6702896952629089, "learning_rate": 0.0001, "loss": 1.5436, "step": 14880 }, { "epoch": 1.728841126924194, "grad_norm": 0.6279995441436768, "learning_rate": 0.0001, "loss": 1.5054, "step": 14881 }, { "epoch": 1.7289573046761544, "grad_norm": 0.5991693735122681, "learning_rate": 0.0001, "loss": 1.3988, "step": 14882 }, { "epoch": 1.7290734824281149, "grad_norm": 0.6270583868026733, "learning_rate": 0.0001, "loss": 1.4969, "step": 14883 }, { "epoch": 1.7291896601800754, "grad_norm": 0.6589519381523132, "learning_rate": 0.0001, "loss": 1.4226, "step": 14884 }, { "epoch": 1.7293058379320358, "grad_norm": 0.6318827271461487, "learning_rate": 0.0001, "loss": 1.561, "step": 14885 }, { "epoch": 1.7294220156839966, "grad_norm": 0.6211879253387451, "learning_rate": 0.0001, "loss": 1.4565, "step": 14886 }, { "epoch": 1.729538193435957, "grad_norm": 0.5783466696739197, "learning_rate": 0.0001, "loss": 1.3544, "step": 14887 }, { "epoch": 1.7296543711879175, "grad_norm": 0.5930356979370117, "learning_rate": 0.0001, "loss": 1.2345, "step": 14888 }, { "epoch": 1.729770548939878, "grad_norm": 0.6086358428001404, "learning_rate": 0.0001, "loss": 1.5209, "step": 14889 }, { "epoch": 1.7298867266918385, "grad_norm": 0.6617630124092102, "learning_rate": 0.0001, "loss": 1.4126, "step": 14890 }, { "epoch": 1.730002904443799, "grad_norm": 0.634216845035553, "learning_rate": 0.0001, "loss": 1.47, "step": 14891 }, { "epoch": 1.7301190821957595, "grad_norm": 0.5907754302024841, "learning_rate": 0.0001, "loss": 1.4448, "step": 14892 }, { "epoch": 1.73023525994772, "grad_norm": 0.6396786570549011, "learning_rate": 0.0001, "loss": 1.4374, "step": 14893 }, { "epoch": 1.7303514376996805, "grad_norm": 0.6407013535499573, "learning_rate": 0.0001, "loss": 1.3586, "step": 14894 }, { "epoch": 1.730467615451641, "grad_norm": 0.6141471862792969, "learning_rate": 0.0001, "loss": 1.388, "step": 14895 }, { "epoch": 1.7305837932036017, "grad_norm": 0.6699461936950684, "learning_rate": 0.0001, "loss": 1.6199, "step": 14896 }, { "epoch": 1.7306999709555622, "grad_norm": 0.6322328448295593, "learning_rate": 0.0001, "loss": 1.5543, "step": 14897 }, { "epoch": 1.7308161487075227, "grad_norm": 0.5870790481567383, "learning_rate": 0.0001, "loss": 1.3775, "step": 14898 }, { "epoch": 1.7309323264594831, "grad_norm": 0.6236059665679932, "learning_rate": 0.0001, "loss": 1.3759, "step": 14899 }, { "epoch": 1.7310485042114436, "grad_norm": 0.6963167190551758, "learning_rate": 0.0001, "loss": 1.7131, "step": 14900 }, { "epoch": 1.7311646819634041, "grad_norm": 0.5722723007202148, "learning_rate": 0.0001, "loss": 1.3605, "step": 14901 }, { "epoch": 1.7312808597153646, "grad_norm": 0.5907723903656006, "learning_rate": 0.0001, "loss": 1.4767, "step": 14902 }, { "epoch": 1.731397037467325, "grad_norm": 0.6472699046134949, "learning_rate": 0.0001, "loss": 1.5775, "step": 14903 }, { "epoch": 1.7315132152192856, "grad_norm": 0.596060574054718, "learning_rate": 0.0001, "loss": 1.5383, "step": 14904 }, { "epoch": 1.731629392971246, "grad_norm": 0.6367079019546509, "learning_rate": 0.0001, "loss": 1.5159, "step": 14905 }, { "epoch": 1.7317455707232066, "grad_norm": 0.6356344223022461, "learning_rate": 0.0001, "loss": 1.5387, "step": 14906 }, { "epoch": 1.731861748475167, "grad_norm": 0.6061190366744995, "learning_rate": 0.0001, "loss": 1.3663, "step": 14907 }, { "epoch": 1.7319779262271275, "grad_norm": 0.6087552905082703, "learning_rate": 0.0001, "loss": 1.4722, "step": 14908 }, { "epoch": 1.732094103979088, "grad_norm": 0.5894343256950378, "learning_rate": 0.0001, "loss": 1.5388, "step": 14909 }, { "epoch": 1.7322102817310485, "grad_norm": 0.6816506385803223, "learning_rate": 0.0001, "loss": 1.4844, "step": 14910 }, { "epoch": 1.732326459483009, "grad_norm": 0.589027464389801, "learning_rate": 0.0001, "loss": 1.5099, "step": 14911 }, { "epoch": 1.7324426372349695, "grad_norm": 0.6024441123008728, "learning_rate": 0.0001, "loss": 1.5363, "step": 14912 }, { "epoch": 1.73255881498693, "grad_norm": 0.6175540089607239, "learning_rate": 0.0001, "loss": 1.3875, "step": 14913 }, { "epoch": 1.7326749927388905, "grad_norm": 0.6664891242980957, "learning_rate": 0.0001, "loss": 1.3915, "step": 14914 }, { "epoch": 1.732791170490851, "grad_norm": 0.5898049473762512, "learning_rate": 0.0001, "loss": 1.504, "step": 14915 }, { "epoch": 1.7329073482428115, "grad_norm": 0.6187012195587158, "learning_rate": 0.0001, "loss": 1.35, "step": 14916 }, { "epoch": 1.733023525994772, "grad_norm": 0.6249679327011108, "learning_rate": 0.0001, "loss": 1.5201, "step": 14917 }, { "epoch": 1.7331397037467324, "grad_norm": 0.674238920211792, "learning_rate": 0.0001, "loss": 1.4897, "step": 14918 }, { "epoch": 1.733255881498693, "grad_norm": 0.5915499329566956, "learning_rate": 0.0001, "loss": 1.4503, "step": 14919 }, { "epoch": 1.7333720592506534, "grad_norm": 0.5969433784484863, "learning_rate": 0.0001, "loss": 1.4824, "step": 14920 }, { "epoch": 1.733488237002614, "grad_norm": 0.6584394574165344, "learning_rate": 0.0001, "loss": 1.5596, "step": 14921 }, { "epoch": 1.7336044147545744, "grad_norm": 0.5716344118118286, "learning_rate": 0.0001, "loss": 1.3798, "step": 14922 }, { "epoch": 1.7337205925065349, "grad_norm": 0.6250728368759155, "learning_rate": 0.0001, "loss": 1.474, "step": 14923 }, { "epoch": 1.7338367702584954, "grad_norm": 0.6489406228065491, "learning_rate": 0.0001, "loss": 1.3943, "step": 14924 }, { "epoch": 1.7339529480104559, "grad_norm": 0.5895406007766724, "learning_rate": 0.0001, "loss": 1.3282, "step": 14925 }, { "epoch": 1.7340691257624163, "grad_norm": 0.6469747424125671, "learning_rate": 0.0001, "loss": 1.539, "step": 14926 }, { "epoch": 1.734185303514377, "grad_norm": 0.6923781037330627, "learning_rate": 0.0001, "loss": 1.584, "step": 14927 }, { "epoch": 1.7343014812663375, "grad_norm": 0.6293679475784302, "learning_rate": 0.0001, "loss": 1.3905, "step": 14928 }, { "epoch": 1.734417659018298, "grad_norm": 0.6348400712013245, "learning_rate": 0.0001, "loss": 1.4043, "step": 14929 }, { "epoch": 1.7345338367702585, "grad_norm": 0.6766868829727173, "learning_rate": 0.0001, "loss": 1.4581, "step": 14930 }, { "epoch": 1.734650014522219, "grad_norm": 0.6041527986526489, "learning_rate": 0.0001, "loss": 1.452, "step": 14931 }, { "epoch": 1.7347661922741795, "grad_norm": 0.5944593548774719, "learning_rate": 0.0001, "loss": 1.3284, "step": 14932 }, { "epoch": 1.73488237002614, "grad_norm": 0.5816593170166016, "learning_rate": 0.0001, "loss": 1.3934, "step": 14933 }, { "epoch": 1.7349985477781005, "grad_norm": 0.610496461391449, "learning_rate": 0.0001, "loss": 1.5364, "step": 14934 }, { "epoch": 1.735114725530061, "grad_norm": 0.6939106583595276, "learning_rate": 0.0001, "loss": 1.5775, "step": 14935 }, { "epoch": 1.7352309032820215, "grad_norm": 0.6299450993537903, "learning_rate": 0.0001, "loss": 1.5211, "step": 14936 }, { "epoch": 1.735347081033982, "grad_norm": 0.6612195372581482, "learning_rate": 0.0001, "loss": 1.4804, "step": 14937 }, { "epoch": 1.7354632587859427, "grad_norm": 0.6642609238624573, "learning_rate": 0.0001, "loss": 1.469, "step": 14938 }, { "epoch": 1.7355794365379031, "grad_norm": 0.6344380378723145, "learning_rate": 0.0001, "loss": 1.261, "step": 14939 }, { "epoch": 1.7356956142898636, "grad_norm": 0.6331685781478882, "learning_rate": 0.0001, "loss": 1.4629, "step": 14940 }, { "epoch": 1.7358117920418241, "grad_norm": 0.6545323729515076, "learning_rate": 0.0001, "loss": 1.4304, "step": 14941 }, { "epoch": 1.7359279697937846, "grad_norm": 0.6539961695671082, "learning_rate": 0.0001, "loss": 1.682, "step": 14942 }, { "epoch": 1.736044147545745, "grad_norm": 0.6771539449691772, "learning_rate": 0.0001, "loss": 1.2817, "step": 14943 }, { "epoch": 1.7361603252977056, "grad_norm": 0.6527926921844482, "learning_rate": 0.0001, "loss": 1.5158, "step": 14944 }, { "epoch": 1.736276503049666, "grad_norm": 0.6027122139930725, "learning_rate": 0.0001, "loss": 1.4582, "step": 14945 }, { "epoch": 1.7363926808016266, "grad_norm": 0.6119851469993591, "learning_rate": 0.0001, "loss": 1.4296, "step": 14946 }, { "epoch": 1.736508858553587, "grad_norm": 0.5789207816123962, "learning_rate": 0.0001, "loss": 1.3547, "step": 14947 }, { "epoch": 1.7366250363055475, "grad_norm": 0.6395784616470337, "learning_rate": 0.0001, "loss": 1.3039, "step": 14948 }, { "epoch": 1.736741214057508, "grad_norm": 0.7226824164390564, "learning_rate": 0.0001, "loss": 1.5902, "step": 14949 }, { "epoch": 1.7368573918094685, "grad_norm": 0.677904486656189, "learning_rate": 0.0001, "loss": 1.533, "step": 14950 }, { "epoch": 1.736973569561429, "grad_norm": 0.7338805198669434, "learning_rate": 0.0001, "loss": 1.619, "step": 14951 }, { "epoch": 1.7370897473133895, "grad_norm": 0.6601492762565613, "learning_rate": 0.0001, "loss": 1.5001, "step": 14952 }, { "epoch": 1.73720592506535, "grad_norm": 0.6641669273376465, "learning_rate": 0.0001, "loss": 1.4123, "step": 14953 }, { "epoch": 1.7373221028173105, "grad_norm": 0.6902362108230591, "learning_rate": 0.0001, "loss": 1.5876, "step": 14954 }, { "epoch": 1.737438280569271, "grad_norm": 0.6217620968818665, "learning_rate": 0.0001, "loss": 1.4348, "step": 14955 }, { "epoch": 1.7375544583212315, "grad_norm": 0.6086151599884033, "learning_rate": 0.0001, "loss": 1.4338, "step": 14956 }, { "epoch": 1.737670636073192, "grad_norm": 0.583915114402771, "learning_rate": 0.0001, "loss": 1.325, "step": 14957 }, { "epoch": 1.7377868138251524, "grad_norm": 0.5941867828369141, "learning_rate": 0.0001, "loss": 1.4816, "step": 14958 }, { "epoch": 1.737902991577113, "grad_norm": 0.6316176652908325, "learning_rate": 0.0001, "loss": 1.4773, "step": 14959 }, { "epoch": 1.7380191693290734, "grad_norm": 0.6003541350364685, "learning_rate": 0.0001, "loss": 1.3213, "step": 14960 }, { "epoch": 1.738135347081034, "grad_norm": 0.6288439035415649, "learning_rate": 0.0001, "loss": 1.3297, "step": 14961 }, { "epoch": 1.7382515248329944, "grad_norm": 0.6566857099533081, "learning_rate": 0.0001, "loss": 1.5911, "step": 14962 }, { "epoch": 1.7383677025849549, "grad_norm": 0.6032436490058899, "learning_rate": 0.0001, "loss": 1.4093, "step": 14963 }, { "epoch": 1.7384838803369154, "grad_norm": 0.6279575228691101, "learning_rate": 0.0001, "loss": 1.513, "step": 14964 }, { "epoch": 1.7386000580888759, "grad_norm": 0.6450163722038269, "learning_rate": 0.0001, "loss": 1.5388, "step": 14965 }, { "epoch": 1.7387162358408363, "grad_norm": 0.6442850828170776, "learning_rate": 0.0001, "loss": 1.493, "step": 14966 }, { "epoch": 1.7388324135927968, "grad_norm": 0.6041399836540222, "learning_rate": 0.0001, "loss": 1.4484, "step": 14967 }, { "epoch": 1.7389485913447573, "grad_norm": 0.6500686407089233, "learning_rate": 0.0001, "loss": 1.4989, "step": 14968 }, { "epoch": 1.739064769096718, "grad_norm": 0.6627811789512634, "learning_rate": 0.0001, "loss": 1.718, "step": 14969 }, { "epoch": 1.7391809468486785, "grad_norm": 0.6354086399078369, "learning_rate": 0.0001, "loss": 1.4982, "step": 14970 }, { "epoch": 1.739297124600639, "grad_norm": 0.606508195400238, "learning_rate": 0.0001, "loss": 1.3115, "step": 14971 }, { "epoch": 1.7394133023525995, "grad_norm": 0.6003134846687317, "learning_rate": 0.0001, "loss": 1.4044, "step": 14972 }, { "epoch": 1.73952948010456, "grad_norm": 0.6335530877113342, "learning_rate": 0.0001, "loss": 1.5405, "step": 14973 }, { "epoch": 1.7396456578565205, "grad_norm": 0.6900773644447327, "learning_rate": 0.0001, "loss": 1.491, "step": 14974 }, { "epoch": 1.739761835608481, "grad_norm": 0.6731414794921875, "learning_rate": 0.0001, "loss": 1.3901, "step": 14975 }, { "epoch": 1.7398780133604415, "grad_norm": 0.601018488407135, "learning_rate": 0.0001, "loss": 1.3836, "step": 14976 }, { "epoch": 1.739994191112402, "grad_norm": 0.61836177110672, "learning_rate": 0.0001, "loss": 1.4741, "step": 14977 }, { "epoch": 1.7401103688643624, "grad_norm": 0.6186898946762085, "learning_rate": 0.0001, "loss": 1.5577, "step": 14978 }, { "epoch": 1.740226546616323, "grad_norm": 0.6342140436172485, "learning_rate": 0.0001, "loss": 1.5403, "step": 14979 }, { "epoch": 1.7403427243682836, "grad_norm": 0.7394945621490479, "learning_rate": 0.0001, "loss": 1.608, "step": 14980 }, { "epoch": 1.7404589021202441, "grad_norm": 0.6118188500404358, "learning_rate": 0.0001, "loss": 1.568, "step": 14981 }, { "epoch": 1.7405750798722046, "grad_norm": 0.5938742756843567, "learning_rate": 0.0001, "loss": 1.489, "step": 14982 }, { "epoch": 1.740691257624165, "grad_norm": 0.635901153087616, "learning_rate": 0.0001, "loss": 1.4667, "step": 14983 }, { "epoch": 1.7408074353761256, "grad_norm": 0.5821294188499451, "learning_rate": 0.0001, "loss": 1.3741, "step": 14984 }, { "epoch": 1.740923613128086, "grad_norm": 0.5809999704360962, "learning_rate": 0.0001, "loss": 1.2669, "step": 14985 }, { "epoch": 1.7410397908800466, "grad_norm": 0.6173656582832336, "learning_rate": 0.0001, "loss": 1.4666, "step": 14986 }, { "epoch": 1.741155968632007, "grad_norm": 0.6136579513549805, "learning_rate": 0.0001, "loss": 1.4284, "step": 14987 }, { "epoch": 1.7412721463839675, "grad_norm": 0.587314784526825, "learning_rate": 0.0001, "loss": 1.3173, "step": 14988 }, { "epoch": 1.741388324135928, "grad_norm": 0.6620569825172424, "learning_rate": 0.0001, "loss": 1.3517, "step": 14989 }, { "epoch": 1.7415045018878885, "grad_norm": 0.6463390588760376, "learning_rate": 0.0001, "loss": 1.4413, "step": 14990 }, { "epoch": 1.741620679639849, "grad_norm": 0.6573072671890259, "learning_rate": 0.0001, "loss": 1.3351, "step": 14991 }, { "epoch": 1.7417368573918095, "grad_norm": 0.6141982674598694, "learning_rate": 0.0001, "loss": 1.5339, "step": 14992 }, { "epoch": 1.74185303514377, "grad_norm": 0.6553406119346619, "learning_rate": 0.0001, "loss": 1.6102, "step": 14993 }, { "epoch": 1.7419692128957305, "grad_norm": 0.6405891180038452, "learning_rate": 0.0001, "loss": 1.5675, "step": 14994 }, { "epoch": 1.742085390647691, "grad_norm": 0.5946596264839172, "learning_rate": 0.0001, "loss": 1.3917, "step": 14995 }, { "epoch": 1.7422015683996515, "grad_norm": 0.6346911787986755, "learning_rate": 0.0001, "loss": 1.478, "step": 14996 }, { "epoch": 1.742317746151612, "grad_norm": 0.6111404895782471, "learning_rate": 0.0001, "loss": 1.3964, "step": 14997 }, { "epoch": 1.7424339239035724, "grad_norm": 0.5915775299072266, "learning_rate": 0.0001, "loss": 1.2769, "step": 14998 }, { "epoch": 1.742550101655533, "grad_norm": 0.5770561099052429, "learning_rate": 0.0001, "loss": 1.4304, "step": 14999 }, { "epoch": 1.7426662794074934, "grad_norm": 0.6432168483734131, "learning_rate": 0.0001, "loss": 1.5803, "step": 15000 }, { "epoch": 1.742782457159454, "grad_norm": 0.6395055651664734, "learning_rate": 0.0001, "loss": 1.4534, "step": 15001 }, { "epoch": 1.7428986349114144, "grad_norm": 0.6546747088432312, "learning_rate": 0.0001, "loss": 1.3898, "step": 15002 }, { "epoch": 1.7430148126633749, "grad_norm": 0.6467830538749695, "learning_rate": 0.0001, "loss": 1.4269, "step": 15003 }, { "epoch": 1.7431309904153354, "grad_norm": 0.6273411512374878, "learning_rate": 0.0001, "loss": 1.5364, "step": 15004 }, { "epoch": 1.7432471681672959, "grad_norm": 0.5850337147712708, "learning_rate": 0.0001, "loss": 1.4392, "step": 15005 }, { "epoch": 1.7433633459192563, "grad_norm": 0.6644369959831238, "learning_rate": 0.0001, "loss": 1.5125, "step": 15006 }, { "epoch": 1.7434795236712168, "grad_norm": 0.6572169661521912, "learning_rate": 0.0001, "loss": 1.4615, "step": 15007 }, { "epoch": 1.7435957014231773, "grad_norm": 0.6588318943977356, "learning_rate": 0.0001, "loss": 1.5521, "step": 15008 }, { "epoch": 1.7437118791751378, "grad_norm": 0.6325780153274536, "learning_rate": 0.0001, "loss": 1.4485, "step": 15009 }, { "epoch": 1.7438280569270983, "grad_norm": 0.586868941783905, "learning_rate": 0.0001, "loss": 1.3655, "step": 15010 }, { "epoch": 1.743944234679059, "grad_norm": 0.597052812576294, "learning_rate": 0.0001, "loss": 1.4304, "step": 15011 }, { "epoch": 1.7440604124310195, "grad_norm": 0.5788699388504028, "learning_rate": 0.0001, "loss": 1.2966, "step": 15012 }, { "epoch": 1.74417659018298, "grad_norm": 0.5919994115829468, "learning_rate": 0.0001, "loss": 1.4554, "step": 15013 }, { "epoch": 1.7442927679349405, "grad_norm": 0.6199740767478943, "learning_rate": 0.0001, "loss": 1.5435, "step": 15014 }, { "epoch": 1.744408945686901, "grad_norm": 0.6682386994361877, "learning_rate": 0.0001, "loss": 1.5001, "step": 15015 }, { "epoch": 1.7445251234388615, "grad_norm": 0.6738241910934448, "learning_rate": 0.0001, "loss": 1.451, "step": 15016 }, { "epoch": 1.744641301190822, "grad_norm": 0.5972993969917297, "learning_rate": 0.0001, "loss": 1.2828, "step": 15017 }, { "epoch": 1.7447574789427824, "grad_norm": 0.6440829038619995, "learning_rate": 0.0001, "loss": 1.3913, "step": 15018 }, { "epoch": 1.744873656694743, "grad_norm": 0.6381629705429077, "learning_rate": 0.0001, "loss": 1.5157, "step": 15019 }, { "epoch": 1.7449898344467034, "grad_norm": 0.5952252149581909, "learning_rate": 0.0001, "loss": 1.4531, "step": 15020 }, { "epoch": 1.745106012198664, "grad_norm": 0.6799747943878174, "learning_rate": 0.0001, "loss": 1.3397, "step": 15021 }, { "epoch": 1.7452221899506246, "grad_norm": 0.6936914920806885, "learning_rate": 0.0001, "loss": 1.5184, "step": 15022 }, { "epoch": 1.745338367702585, "grad_norm": 0.5949373841285706, "learning_rate": 0.0001, "loss": 1.4549, "step": 15023 }, { "epoch": 1.7454545454545456, "grad_norm": 0.5943772196769714, "learning_rate": 0.0001, "loss": 1.2775, "step": 15024 }, { "epoch": 1.745570723206506, "grad_norm": 0.6044946908950806, "learning_rate": 0.0001, "loss": 1.3416, "step": 15025 }, { "epoch": 1.7456869009584666, "grad_norm": 0.6261039972305298, "learning_rate": 0.0001, "loss": 1.4467, "step": 15026 }, { "epoch": 1.745803078710427, "grad_norm": 0.6201652884483337, "learning_rate": 0.0001, "loss": 1.2549, "step": 15027 }, { "epoch": 1.7459192564623875, "grad_norm": 0.6456770896911621, "learning_rate": 0.0001, "loss": 1.4033, "step": 15028 }, { "epoch": 1.746035434214348, "grad_norm": 0.6587963700294495, "learning_rate": 0.0001, "loss": 1.4745, "step": 15029 }, { "epoch": 1.7461516119663085, "grad_norm": 0.615346372127533, "learning_rate": 0.0001, "loss": 1.4984, "step": 15030 }, { "epoch": 1.746267789718269, "grad_norm": 0.649808943271637, "learning_rate": 0.0001, "loss": 1.4235, "step": 15031 }, { "epoch": 1.7463839674702295, "grad_norm": 0.6134951710700989, "learning_rate": 0.0001, "loss": 1.4333, "step": 15032 }, { "epoch": 1.74650014522219, "grad_norm": 0.6592127680778503, "learning_rate": 0.0001, "loss": 1.3805, "step": 15033 }, { "epoch": 1.7466163229741505, "grad_norm": 0.5940577983856201, "learning_rate": 0.0001, "loss": 1.335, "step": 15034 }, { "epoch": 1.746732500726111, "grad_norm": 0.6388440728187561, "learning_rate": 0.0001, "loss": 1.4598, "step": 15035 }, { "epoch": 1.7468486784780715, "grad_norm": 0.6137976050376892, "learning_rate": 0.0001, "loss": 1.4523, "step": 15036 }, { "epoch": 1.746964856230032, "grad_norm": 0.6836861968040466, "learning_rate": 0.0001, "loss": 1.4933, "step": 15037 }, { "epoch": 1.7470810339819924, "grad_norm": 0.6250720024108887, "learning_rate": 0.0001, "loss": 1.4868, "step": 15038 }, { "epoch": 1.747197211733953, "grad_norm": 0.6634293794631958, "learning_rate": 0.0001, "loss": 1.5837, "step": 15039 }, { "epoch": 1.7473133894859134, "grad_norm": 0.5937802791595459, "learning_rate": 0.0001, "loss": 1.4692, "step": 15040 }, { "epoch": 1.747429567237874, "grad_norm": 0.6354300379753113, "learning_rate": 0.0001, "loss": 1.4147, "step": 15041 }, { "epoch": 1.7475457449898344, "grad_norm": 0.6555377840995789, "learning_rate": 0.0001, "loss": 1.6544, "step": 15042 }, { "epoch": 1.7476619227417949, "grad_norm": 0.588962972164154, "learning_rate": 0.0001, "loss": 1.4207, "step": 15043 }, { "epoch": 1.7477781004937554, "grad_norm": 0.5658825635910034, "learning_rate": 0.0001, "loss": 1.287, "step": 15044 }, { "epoch": 1.7478942782457159, "grad_norm": 0.6380483508110046, "learning_rate": 0.0001, "loss": 1.4252, "step": 15045 }, { "epoch": 1.7480104559976763, "grad_norm": 0.6003624200820923, "learning_rate": 0.0001, "loss": 1.4379, "step": 15046 }, { "epoch": 1.7481266337496368, "grad_norm": 0.6264920830726624, "learning_rate": 0.0001, "loss": 1.3841, "step": 15047 }, { "epoch": 1.7482428115015973, "grad_norm": 0.5998390316963196, "learning_rate": 0.0001, "loss": 1.4459, "step": 15048 }, { "epoch": 1.7483589892535578, "grad_norm": 0.5849918127059937, "learning_rate": 0.0001, "loss": 1.2989, "step": 15049 }, { "epoch": 1.7484751670055183, "grad_norm": 0.6505645513534546, "learning_rate": 0.0001, "loss": 1.4034, "step": 15050 }, { "epoch": 1.7485913447574788, "grad_norm": 0.6195028424263, "learning_rate": 0.0001, "loss": 1.5051, "step": 15051 }, { "epoch": 1.7487075225094393, "grad_norm": 0.6650033593177795, "learning_rate": 0.0001, "loss": 1.4214, "step": 15052 }, { "epoch": 1.7488237002614, "grad_norm": 0.56318598985672, "learning_rate": 0.0001, "loss": 1.2997, "step": 15053 }, { "epoch": 1.7489398780133605, "grad_norm": 0.6114184856414795, "learning_rate": 0.0001, "loss": 1.4699, "step": 15054 }, { "epoch": 1.749056055765321, "grad_norm": 0.600736141204834, "learning_rate": 0.0001, "loss": 1.4202, "step": 15055 }, { "epoch": 1.7491722335172815, "grad_norm": 0.6152333617210388, "learning_rate": 0.0001, "loss": 1.5808, "step": 15056 }, { "epoch": 1.749288411269242, "grad_norm": 0.648831307888031, "learning_rate": 0.0001, "loss": 1.4797, "step": 15057 }, { "epoch": 1.7494045890212024, "grad_norm": 0.6709495782852173, "learning_rate": 0.0001, "loss": 1.486, "step": 15058 }, { "epoch": 1.749520766773163, "grad_norm": 0.6742035150527954, "learning_rate": 0.0001, "loss": 1.4963, "step": 15059 }, { "epoch": 1.7496369445251234, "grad_norm": 0.6638956665992737, "learning_rate": 0.0001, "loss": 1.3844, "step": 15060 }, { "epoch": 1.749753122277084, "grad_norm": 0.6610105633735657, "learning_rate": 0.0001, "loss": 1.4468, "step": 15061 }, { "epoch": 1.7498693000290444, "grad_norm": 0.6239005923271179, "learning_rate": 0.0001, "loss": 1.4541, "step": 15062 }, { "epoch": 1.7499854777810049, "grad_norm": 0.6245071887969971, "learning_rate": 0.0001, "loss": 1.4137, "step": 15063 }, { "epoch": 1.7501016555329656, "grad_norm": 0.607805609703064, "learning_rate": 0.0001, "loss": 1.2952, "step": 15064 }, { "epoch": 1.750217833284926, "grad_norm": 0.6441650986671448, "learning_rate": 0.0001, "loss": 1.3018, "step": 15065 }, { "epoch": 1.7503340110368866, "grad_norm": 0.618821918964386, "learning_rate": 0.0001, "loss": 1.4018, "step": 15066 }, { "epoch": 1.750450188788847, "grad_norm": 0.639229953289032, "learning_rate": 0.0001, "loss": 1.4329, "step": 15067 }, { "epoch": 1.7505663665408076, "grad_norm": 0.6427684426307678, "learning_rate": 0.0001, "loss": 1.3651, "step": 15068 }, { "epoch": 1.750682544292768, "grad_norm": 0.6033851504325867, "learning_rate": 0.0001, "loss": 1.2519, "step": 15069 }, { "epoch": 1.7507987220447285, "grad_norm": 0.6717852354049683, "learning_rate": 0.0001, "loss": 1.6181, "step": 15070 }, { "epoch": 1.750914899796689, "grad_norm": 0.6119966506958008, "learning_rate": 0.0001, "loss": 1.2561, "step": 15071 }, { "epoch": 1.7510310775486495, "grad_norm": 0.6730672121047974, "learning_rate": 0.0001, "loss": 1.5001, "step": 15072 }, { "epoch": 1.75114725530061, "grad_norm": 0.6353880763053894, "learning_rate": 0.0001, "loss": 1.4568, "step": 15073 }, { "epoch": 1.7512634330525705, "grad_norm": 0.6133871674537659, "learning_rate": 0.0001, "loss": 1.3442, "step": 15074 }, { "epoch": 1.751379610804531, "grad_norm": 0.6594220399856567, "learning_rate": 0.0001, "loss": 1.4641, "step": 15075 }, { "epoch": 1.7514957885564915, "grad_norm": 0.6014381647109985, "learning_rate": 0.0001, "loss": 1.5058, "step": 15076 }, { "epoch": 1.751611966308452, "grad_norm": 0.6819438338279724, "learning_rate": 0.0001, "loss": 1.6147, "step": 15077 }, { "epoch": 1.7517281440604124, "grad_norm": 0.6183732748031616, "learning_rate": 0.0001, "loss": 1.329, "step": 15078 }, { "epoch": 1.751844321812373, "grad_norm": 0.641448974609375, "learning_rate": 0.0001, "loss": 1.4084, "step": 15079 }, { "epoch": 1.7519604995643334, "grad_norm": 0.5996828079223633, "learning_rate": 0.0001, "loss": 1.3851, "step": 15080 }, { "epoch": 1.752076677316294, "grad_norm": 0.6433569192886353, "learning_rate": 0.0001, "loss": 1.3972, "step": 15081 }, { "epoch": 1.7521928550682544, "grad_norm": 0.6291333436965942, "learning_rate": 0.0001, "loss": 1.5722, "step": 15082 }, { "epoch": 1.7523090328202149, "grad_norm": 0.6067669987678528, "learning_rate": 0.0001, "loss": 1.5167, "step": 15083 }, { "epoch": 1.7524252105721754, "grad_norm": 0.6297567486763, "learning_rate": 0.0001, "loss": 1.5581, "step": 15084 }, { "epoch": 1.7525413883241359, "grad_norm": 0.6200578212738037, "learning_rate": 0.0001, "loss": 1.4387, "step": 15085 }, { "epoch": 1.7526575660760964, "grad_norm": 0.662356972694397, "learning_rate": 0.0001, "loss": 1.6797, "step": 15086 }, { "epoch": 1.7527737438280568, "grad_norm": 0.6195921301841736, "learning_rate": 0.0001, "loss": 1.4765, "step": 15087 }, { "epoch": 1.7528899215800173, "grad_norm": 0.6161373257637024, "learning_rate": 0.0001, "loss": 1.4572, "step": 15088 }, { "epoch": 1.7530060993319778, "grad_norm": 0.6230433583259583, "learning_rate": 0.0001, "loss": 1.4347, "step": 15089 }, { "epoch": 1.7531222770839383, "grad_norm": 0.6298375725746155, "learning_rate": 0.0001, "loss": 1.3929, "step": 15090 }, { "epoch": 1.7532384548358988, "grad_norm": 0.6077491641044617, "learning_rate": 0.0001, "loss": 1.2395, "step": 15091 }, { "epoch": 1.7533546325878593, "grad_norm": 0.6149680018424988, "learning_rate": 0.0001, "loss": 1.4589, "step": 15092 }, { "epoch": 1.7534708103398198, "grad_norm": 0.6441075205802917, "learning_rate": 0.0001, "loss": 1.4822, "step": 15093 }, { "epoch": 1.7535869880917803, "grad_norm": 0.6269232630729675, "learning_rate": 0.0001, "loss": 1.375, "step": 15094 }, { "epoch": 1.753703165843741, "grad_norm": 0.6457510590553284, "learning_rate": 0.0001, "loss": 1.536, "step": 15095 }, { "epoch": 1.7538193435957015, "grad_norm": 0.6038615703582764, "learning_rate": 0.0001, "loss": 1.5117, "step": 15096 }, { "epoch": 1.753935521347662, "grad_norm": 0.6739659905433655, "learning_rate": 0.0001, "loss": 1.5519, "step": 15097 }, { "epoch": 1.7540516990996224, "grad_norm": 0.6151448488235474, "learning_rate": 0.0001, "loss": 1.5084, "step": 15098 }, { "epoch": 1.754167876851583, "grad_norm": 0.6436981558799744, "learning_rate": 0.0001, "loss": 1.6158, "step": 15099 }, { "epoch": 1.7542840546035434, "grad_norm": 0.5808581113815308, "learning_rate": 0.0001, "loss": 1.3385, "step": 15100 }, { "epoch": 1.754400232355504, "grad_norm": 0.6201916933059692, "learning_rate": 0.0001, "loss": 1.5146, "step": 15101 }, { "epoch": 1.7545164101074644, "grad_norm": 0.6582393646240234, "learning_rate": 0.0001, "loss": 1.3327, "step": 15102 }, { "epoch": 1.7546325878594249, "grad_norm": 0.6628432869911194, "learning_rate": 0.0001, "loss": 1.6235, "step": 15103 }, { "epoch": 1.7547487656113854, "grad_norm": 0.6497442722320557, "learning_rate": 0.0001, "loss": 1.4938, "step": 15104 }, { "epoch": 1.7548649433633459, "grad_norm": 0.5993771553039551, "learning_rate": 0.0001, "loss": 1.4852, "step": 15105 }, { "epoch": 1.7549811211153066, "grad_norm": 0.6184335947036743, "learning_rate": 0.0001, "loss": 1.6169, "step": 15106 }, { "epoch": 1.755097298867267, "grad_norm": 0.6989839673042297, "learning_rate": 0.0001, "loss": 1.6119, "step": 15107 }, { "epoch": 1.7552134766192276, "grad_norm": 0.6278969645500183, "learning_rate": 0.0001, "loss": 1.3826, "step": 15108 }, { "epoch": 1.755329654371188, "grad_norm": 0.6574743390083313, "learning_rate": 0.0001, "loss": 1.4077, "step": 15109 }, { "epoch": 1.7554458321231485, "grad_norm": 0.6639965176582336, "learning_rate": 0.0001, "loss": 1.5295, "step": 15110 }, { "epoch": 1.755562009875109, "grad_norm": 0.6918113827705383, "learning_rate": 0.0001, "loss": 1.4271, "step": 15111 }, { "epoch": 1.7556781876270695, "grad_norm": 0.6522295475006104, "learning_rate": 0.0001, "loss": 1.4183, "step": 15112 }, { "epoch": 1.75579436537903, "grad_norm": 0.6580625772476196, "learning_rate": 0.0001, "loss": 1.4975, "step": 15113 }, { "epoch": 1.7559105431309905, "grad_norm": 0.677513062953949, "learning_rate": 0.0001, "loss": 1.5059, "step": 15114 }, { "epoch": 1.756026720882951, "grad_norm": 0.6215130090713501, "learning_rate": 0.0001, "loss": 1.4397, "step": 15115 }, { "epoch": 1.7561428986349115, "grad_norm": 0.6136471033096313, "learning_rate": 0.0001, "loss": 1.3608, "step": 15116 }, { "epoch": 1.756259076386872, "grad_norm": 0.633785605430603, "learning_rate": 0.0001, "loss": 1.4681, "step": 15117 }, { "epoch": 1.7563752541388324, "grad_norm": 0.6462593078613281, "learning_rate": 0.0001, "loss": 1.4939, "step": 15118 }, { "epoch": 1.756491431890793, "grad_norm": 0.6808847188949585, "learning_rate": 0.0001, "loss": 1.3673, "step": 15119 }, { "epoch": 1.7566076096427534, "grad_norm": 0.6496431231498718, "learning_rate": 0.0001, "loss": 1.401, "step": 15120 }, { "epoch": 1.756723787394714, "grad_norm": 0.6454607844352722, "learning_rate": 0.0001, "loss": 1.5417, "step": 15121 }, { "epoch": 1.7568399651466744, "grad_norm": 0.5999384522438049, "learning_rate": 0.0001, "loss": 1.4212, "step": 15122 }, { "epoch": 1.7569561428986349, "grad_norm": 0.6660782694816589, "learning_rate": 0.0001, "loss": 1.5721, "step": 15123 }, { "epoch": 1.7570723206505954, "grad_norm": 0.6537553071975708, "learning_rate": 0.0001, "loss": 1.5435, "step": 15124 }, { "epoch": 1.7571884984025559, "grad_norm": 0.6898263096809387, "learning_rate": 0.0001, "loss": 1.3527, "step": 15125 }, { "epoch": 1.7573046761545164, "grad_norm": 0.6758860349655151, "learning_rate": 0.0001, "loss": 1.5896, "step": 15126 }, { "epoch": 1.7574208539064768, "grad_norm": 0.5965571999549866, "learning_rate": 0.0001, "loss": 1.4576, "step": 15127 }, { "epoch": 1.7575370316584373, "grad_norm": 0.6066791415214539, "learning_rate": 0.0001, "loss": 1.3818, "step": 15128 }, { "epoch": 1.7576532094103978, "grad_norm": 0.5797308683395386, "learning_rate": 0.0001, "loss": 1.4762, "step": 15129 }, { "epoch": 1.7577693871623583, "grad_norm": 0.5891377329826355, "learning_rate": 0.0001, "loss": 1.358, "step": 15130 }, { "epoch": 1.7578855649143188, "grad_norm": 0.5882729291915894, "learning_rate": 0.0001, "loss": 1.3172, "step": 15131 }, { "epoch": 1.7580017426662793, "grad_norm": 0.5878888964653015, "learning_rate": 0.0001, "loss": 1.2588, "step": 15132 }, { "epoch": 1.7581179204182398, "grad_norm": 0.5813704133033752, "learning_rate": 0.0001, "loss": 1.3899, "step": 15133 }, { "epoch": 1.7582340981702003, "grad_norm": 0.6040433049201965, "learning_rate": 0.0001, "loss": 1.4627, "step": 15134 }, { "epoch": 1.7583502759221608, "grad_norm": 0.6528154015541077, "learning_rate": 0.0001, "loss": 1.3874, "step": 15135 }, { "epoch": 1.7584664536741212, "grad_norm": 0.6494193077087402, "learning_rate": 0.0001, "loss": 1.5437, "step": 15136 }, { "epoch": 1.758582631426082, "grad_norm": 0.619196355342865, "learning_rate": 0.0001, "loss": 1.3111, "step": 15137 }, { "epoch": 1.7586988091780424, "grad_norm": 0.6160910129547119, "learning_rate": 0.0001, "loss": 1.2636, "step": 15138 }, { "epoch": 1.758814986930003, "grad_norm": 0.6449258923530579, "learning_rate": 0.0001, "loss": 1.5567, "step": 15139 }, { "epoch": 1.7589311646819634, "grad_norm": 0.6017787456512451, "learning_rate": 0.0001, "loss": 1.3471, "step": 15140 }, { "epoch": 1.759047342433924, "grad_norm": 0.7070938348770142, "learning_rate": 0.0001, "loss": 1.6235, "step": 15141 }, { "epoch": 1.7591635201858844, "grad_norm": 0.6401433348655701, "learning_rate": 0.0001, "loss": 1.4894, "step": 15142 }, { "epoch": 1.7592796979378449, "grad_norm": 0.6522578597068787, "learning_rate": 0.0001, "loss": 1.3684, "step": 15143 }, { "epoch": 1.7593958756898054, "grad_norm": 0.6095269322395325, "learning_rate": 0.0001, "loss": 1.4612, "step": 15144 }, { "epoch": 1.7595120534417659, "grad_norm": 0.7137290835380554, "learning_rate": 0.0001, "loss": 1.6957, "step": 15145 }, { "epoch": 1.7596282311937264, "grad_norm": 0.6265722513198853, "learning_rate": 0.0001, "loss": 1.6093, "step": 15146 }, { "epoch": 1.759744408945687, "grad_norm": 0.6235710978507996, "learning_rate": 0.0001, "loss": 1.4391, "step": 15147 }, { "epoch": 1.7598605866976476, "grad_norm": 0.5946484804153442, "learning_rate": 0.0001, "loss": 1.4924, "step": 15148 }, { "epoch": 1.759976764449608, "grad_norm": 0.6407091021537781, "learning_rate": 0.0001, "loss": 1.3607, "step": 15149 }, { "epoch": 1.7600929422015685, "grad_norm": 0.5906693935394287, "learning_rate": 0.0001, "loss": 1.2804, "step": 15150 }, { "epoch": 1.760209119953529, "grad_norm": 0.6209679841995239, "learning_rate": 0.0001, "loss": 1.4671, "step": 15151 }, { "epoch": 1.7603252977054895, "grad_norm": 0.6487011909484863, "learning_rate": 0.0001, "loss": 1.3589, "step": 15152 }, { "epoch": 1.76044147545745, "grad_norm": 0.6216895580291748, "learning_rate": 0.0001, "loss": 1.4931, "step": 15153 }, { "epoch": 1.7605576532094105, "grad_norm": 0.5904422402381897, "learning_rate": 0.0001, "loss": 1.3922, "step": 15154 }, { "epoch": 1.760673830961371, "grad_norm": 0.6489174365997314, "learning_rate": 0.0001, "loss": 1.5609, "step": 15155 }, { "epoch": 1.7607900087133315, "grad_norm": 0.6204991936683655, "learning_rate": 0.0001, "loss": 1.4795, "step": 15156 }, { "epoch": 1.760906186465292, "grad_norm": 0.6347008943557739, "learning_rate": 0.0001, "loss": 1.4584, "step": 15157 }, { "epoch": 1.7610223642172524, "grad_norm": 0.6178375482559204, "learning_rate": 0.0001, "loss": 1.6082, "step": 15158 }, { "epoch": 1.761138541969213, "grad_norm": 0.6166771650314331, "learning_rate": 0.0001, "loss": 1.5743, "step": 15159 }, { "epoch": 1.7612547197211734, "grad_norm": 0.636630654335022, "learning_rate": 0.0001, "loss": 1.502, "step": 15160 }, { "epoch": 1.761370897473134, "grad_norm": 0.6614100933074951, "learning_rate": 0.0001, "loss": 1.4299, "step": 15161 }, { "epoch": 1.7614870752250944, "grad_norm": 0.6548051238059998, "learning_rate": 0.0001, "loss": 1.472, "step": 15162 }, { "epoch": 1.761603252977055, "grad_norm": 0.6765132546424866, "learning_rate": 0.0001, "loss": 1.5299, "step": 15163 }, { "epoch": 1.7617194307290154, "grad_norm": 0.6402645111083984, "learning_rate": 0.0001, "loss": 1.4741, "step": 15164 }, { "epoch": 1.7618356084809759, "grad_norm": 0.6402096152305603, "learning_rate": 0.0001, "loss": 1.5683, "step": 15165 }, { "epoch": 1.7619517862329364, "grad_norm": 0.6129283308982849, "learning_rate": 0.0001, "loss": 1.5653, "step": 15166 }, { "epoch": 1.7620679639848968, "grad_norm": 0.6220570206642151, "learning_rate": 0.0001, "loss": 1.4425, "step": 15167 }, { "epoch": 1.7621841417368573, "grad_norm": 0.6414580345153809, "learning_rate": 0.0001, "loss": 1.5567, "step": 15168 }, { "epoch": 1.7623003194888178, "grad_norm": 0.6086570024490356, "learning_rate": 0.0001, "loss": 1.5355, "step": 15169 }, { "epoch": 1.7624164972407783, "grad_norm": 0.6041181683540344, "learning_rate": 0.0001, "loss": 1.3614, "step": 15170 }, { "epoch": 1.7625326749927388, "grad_norm": 0.7212263941764832, "learning_rate": 0.0001, "loss": 1.5506, "step": 15171 }, { "epoch": 1.7626488527446993, "grad_norm": 0.6596695780754089, "learning_rate": 0.0001, "loss": 1.4614, "step": 15172 }, { "epoch": 1.7627650304966598, "grad_norm": 0.6024173498153687, "learning_rate": 0.0001, "loss": 1.3578, "step": 15173 }, { "epoch": 1.7628812082486203, "grad_norm": 0.6716031432151794, "learning_rate": 0.0001, "loss": 1.4653, "step": 15174 }, { "epoch": 1.7629973860005808, "grad_norm": 0.6493299603462219, "learning_rate": 0.0001, "loss": 1.4089, "step": 15175 }, { "epoch": 1.7631135637525412, "grad_norm": 0.651054859161377, "learning_rate": 0.0001, "loss": 1.4232, "step": 15176 }, { "epoch": 1.7632297415045017, "grad_norm": 0.6091866493225098, "learning_rate": 0.0001, "loss": 1.4645, "step": 15177 }, { "epoch": 1.7633459192564622, "grad_norm": 0.6780480146408081, "learning_rate": 0.0001, "loss": 1.4562, "step": 15178 }, { "epoch": 1.763462097008423, "grad_norm": 0.6240149140357971, "learning_rate": 0.0001, "loss": 1.4626, "step": 15179 }, { "epoch": 1.7635782747603834, "grad_norm": 0.646800696849823, "learning_rate": 0.0001, "loss": 1.5647, "step": 15180 }, { "epoch": 1.763694452512344, "grad_norm": 0.5924319624900818, "learning_rate": 0.0001, "loss": 1.3453, "step": 15181 }, { "epoch": 1.7638106302643044, "grad_norm": 0.5899021029472351, "learning_rate": 0.0001, "loss": 1.3861, "step": 15182 }, { "epoch": 1.763926808016265, "grad_norm": 0.679233193397522, "learning_rate": 0.0001, "loss": 1.6765, "step": 15183 }, { "epoch": 1.7640429857682254, "grad_norm": 0.6055246591567993, "learning_rate": 0.0001, "loss": 1.3876, "step": 15184 }, { "epoch": 1.7641591635201859, "grad_norm": 0.6095439195632935, "learning_rate": 0.0001, "loss": 1.3983, "step": 15185 }, { "epoch": 1.7642753412721464, "grad_norm": 0.6251705288887024, "learning_rate": 0.0001, "loss": 1.5069, "step": 15186 }, { "epoch": 1.7643915190241068, "grad_norm": 0.6146259903907776, "learning_rate": 0.0001, "loss": 1.5028, "step": 15187 }, { "epoch": 1.7645076967760673, "grad_norm": 0.6173810362815857, "learning_rate": 0.0001, "loss": 1.4804, "step": 15188 }, { "epoch": 1.764623874528028, "grad_norm": 0.6138263940811157, "learning_rate": 0.0001, "loss": 1.4347, "step": 15189 }, { "epoch": 1.7647400522799885, "grad_norm": 0.6156290173530579, "learning_rate": 0.0001, "loss": 1.4232, "step": 15190 }, { "epoch": 1.764856230031949, "grad_norm": 0.6088510155677795, "learning_rate": 0.0001, "loss": 1.2664, "step": 15191 }, { "epoch": 1.7649724077839095, "grad_norm": 0.6944965720176697, "learning_rate": 0.0001, "loss": 1.4426, "step": 15192 }, { "epoch": 1.76508858553587, "grad_norm": 0.600951075553894, "learning_rate": 0.0001, "loss": 1.4428, "step": 15193 }, { "epoch": 1.7652047632878305, "grad_norm": 0.6315292716026306, "learning_rate": 0.0001, "loss": 1.503, "step": 15194 }, { "epoch": 1.765320941039791, "grad_norm": 0.6163005828857422, "learning_rate": 0.0001, "loss": 1.5804, "step": 15195 }, { "epoch": 1.7654371187917515, "grad_norm": 0.6311103105545044, "learning_rate": 0.0001, "loss": 1.4576, "step": 15196 }, { "epoch": 1.765553296543712, "grad_norm": 0.6568660736083984, "learning_rate": 0.0001, "loss": 1.5567, "step": 15197 }, { "epoch": 1.7656694742956724, "grad_norm": 0.6219604015350342, "learning_rate": 0.0001, "loss": 1.447, "step": 15198 }, { "epoch": 1.765785652047633, "grad_norm": 0.615856409072876, "learning_rate": 0.0001, "loss": 1.422, "step": 15199 }, { "epoch": 1.7659018297995934, "grad_norm": 0.5975033044815063, "learning_rate": 0.0001, "loss": 1.4522, "step": 15200 }, { "epoch": 1.766018007551554, "grad_norm": 0.6199268102645874, "learning_rate": 0.0001, "loss": 1.3942, "step": 15201 }, { "epoch": 1.7661341853035144, "grad_norm": 0.6389952898025513, "learning_rate": 0.0001, "loss": 1.2439, "step": 15202 }, { "epoch": 1.766250363055475, "grad_norm": 0.6434933543205261, "learning_rate": 0.0001, "loss": 1.2923, "step": 15203 }, { "epoch": 1.7663665408074354, "grad_norm": 0.6037079095840454, "learning_rate": 0.0001, "loss": 1.3685, "step": 15204 }, { "epoch": 1.7664827185593959, "grad_norm": 0.6691433191299438, "learning_rate": 0.0001, "loss": 1.4527, "step": 15205 }, { "epoch": 1.7665988963113564, "grad_norm": 0.6160171627998352, "learning_rate": 0.0001, "loss": 1.4963, "step": 15206 }, { "epoch": 1.7667150740633168, "grad_norm": 0.6590574383735657, "learning_rate": 0.0001, "loss": 1.5282, "step": 15207 }, { "epoch": 1.7668312518152773, "grad_norm": 0.6321625113487244, "learning_rate": 0.0001, "loss": 1.5395, "step": 15208 }, { "epoch": 1.7669474295672378, "grad_norm": 0.61099773645401, "learning_rate": 0.0001, "loss": 1.4993, "step": 15209 }, { "epoch": 1.7670636073191983, "grad_norm": 0.6425243020057678, "learning_rate": 0.0001, "loss": 1.4927, "step": 15210 }, { "epoch": 1.7671797850711588, "grad_norm": 0.6273446083068848, "learning_rate": 0.0001, "loss": 1.5778, "step": 15211 }, { "epoch": 1.7672959628231193, "grad_norm": 0.6233547925949097, "learning_rate": 0.0001, "loss": 1.545, "step": 15212 }, { "epoch": 1.7674121405750798, "grad_norm": 0.6338528394699097, "learning_rate": 0.0001, "loss": 1.5318, "step": 15213 }, { "epoch": 1.7675283183270403, "grad_norm": 0.6094504594802856, "learning_rate": 0.0001, "loss": 1.4414, "step": 15214 }, { "epoch": 1.7676444960790008, "grad_norm": 0.6037681102752686, "learning_rate": 0.0001, "loss": 1.2316, "step": 15215 }, { "epoch": 1.7677606738309612, "grad_norm": 0.6513276100158691, "learning_rate": 0.0001, "loss": 1.4963, "step": 15216 }, { "epoch": 1.7678768515829217, "grad_norm": 0.6128390431404114, "learning_rate": 0.0001, "loss": 1.4387, "step": 15217 }, { "epoch": 1.7679930293348822, "grad_norm": 0.602202832698822, "learning_rate": 0.0001, "loss": 1.4465, "step": 15218 }, { "epoch": 1.7681092070868427, "grad_norm": 0.6091821789741516, "learning_rate": 0.0001, "loss": 1.4265, "step": 15219 }, { "epoch": 1.7682253848388032, "grad_norm": 0.6470639109611511, "learning_rate": 0.0001, "loss": 1.4598, "step": 15220 }, { "epoch": 1.768341562590764, "grad_norm": 0.6129840016365051, "learning_rate": 0.0001, "loss": 1.5435, "step": 15221 }, { "epoch": 1.7684577403427244, "grad_norm": 0.6531546115875244, "learning_rate": 0.0001, "loss": 1.5679, "step": 15222 }, { "epoch": 1.768573918094685, "grad_norm": 0.6650546193122864, "learning_rate": 0.0001, "loss": 1.6495, "step": 15223 }, { "epoch": 1.7686900958466454, "grad_norm": 0.6529395580291748, "learning_rate": 0.0001, "loss": 1.3365, "step": 15224 }, { "epoch": 1.7688062735986059, "grad_norm": 0.636663556098938, "learning_rate": 0.0001, "loss": 1.4164, "step": 15225 }, { "epoch": 1.7689224513505664, "grad_norm": 0.6182118058204651, "learning_rate": 0.0001, "loss": 1.497, "step": 15226 }, { "epoch": 1.7690386291025268, "grad_norm": 0.6166336536407471, "learning_rate": 0.0001, "loss": 1.42, "step": 15227 }, { "epoch": 1.7691548068544873, "grad_norm": 0.5651601552963257, "learning_rate": 0.0001, "loss": 1.4086, "step": 15228 }, { "epoch": 1.7692709846064478, "grad_norm": 0.6268301606178284, "learning_rate": 0.0001, "loss": 1.4596, "step": 15229 }, { "epoch": 1.7693871623584083, "grad_norm": 0.6724717617034912, "learning_rate": 0.0001, "loss": 1.7388, "step": 15230 }, { "epoch": 1.769503340110369, "grad_norm": 0.5851798057556152, "learning_rate": 0.0001, "loss": 1.3092, "step": 15231 }, { "epoch": 1.7696195178623295, "grad_norm": 0.6190688014030457, "learning_rate": 0.0001, "loss": 1.4238, "step": 15232 }, { "epoch": 1.76973569561429, "grad_norm": 0.6267878413200378, "learning_rate": 0.0001, "loss": 1.5207, "step": 15233 }, { "epoch": 1.7698518733662505, "grad_norm": 0.5954955816268921, "learning_rate": 0.0001, "loss": 1.4273, "step": 15234 }, { "epoch": 1.769968051118211, "grad_norm": 0.6041606068611145, "learning_rate": 0.0001, "loss": 1.439, "step": 15235 }, { "epoch": 1.7700842288701715, "grad_norm": 0.6317799091339111, "learning_rate": 0.0001, "loss": 1.4598, "step": 15236 }, { "epoch": 1.770200406622132, "grad_norm": 0.6325985789299011, "learning_rate": 0.0001, "loss": 1.4497, "step": 15237 }, { "epoch": 1.7703165843740925, "grad_norm": 0.6945986747741699, "learning_rate": 0.0001, "loss": 1.523, "step": 15238 }, { "epoch": 1.770432762126053, "grad_norm": 0.6552503108978271, "learning_rate": 0.0001, "loss": 1.2562, "step": 15239 }, { "epoch": 1.7705489398780134, "grad_norm": 0.6622961163520813, "learning_rate": 0.0001, "loss": 1.324, "step": 15240 }, { "epoch": 1.770665117629974, "grad_norm": 0.6506061553955078, "learning_rate": 0.0001, "loss": 1.3427, "step": 15241 }, { "epoch": 1.7707812953819344, "grad_norm": 0.6822132468223572, "learning_rate": 0.0001, "loss": 1.4515, "step": 15242 }, { "epoch": 1.770897473133895, "grad_norm": 0.6006519794464111, "learning_rate": 0.0001, "loss": 1.4778, "step": 15243 }, { "epoch": 1.7710136508858554, "grad_norm": 0.6622036099433899, "learning_rate": 0.0001, "loss": 1.2741, "step": 15244 }, { "epoch": 1.7711298286378159, "grad_norm": 0.6481319069862366, "learning_rate": 0.0001, "loss": 1.3735, "step": 15245 }, { "epoch": 1.7712460063897764, "grad_norm": 0.626197338104248, "learning_rate": 0.0001, "loss": 1.3566, "step": 15246 }, { "epoch": 1.7713621841417369, "grad_norm": 0.6185934543609619, "learning_rate": 0.0001, "loss": 1.4342, "step": 15247 }, { "epoch": 1.7714783618936973, "grad_norm": 0.6000529527664185, "learning_rate": 0.0001, "loss": 1.4596, "step": 15248 }, { "epoch": 1.7715945396456578, "grad_norm": 0.6031962633132935, "learning_rate": 0.0001, "loss": 1.3454, "step": 15249 }, { "epoch": 1.7717107173976183, "grad_norm": 0.6442926526069641, "learning_rate": 0.0001, "loss": 1.4174, "step": 15250 }, { "epoch": 1.7718268951495788, "grad_norm": 0.6320897340774536, "learning_rate": 0.0001, "loss": 1.46, "step": 15251 }, { "epoch": 1.7719430729015393, "grad_norm": 0.5655400156974792, "learning_rate": 0.0001, "loss": 1.4628, "step": 15252 }, { "epoch": 1.7720592506534998, "grad_norm": 0.6556128859519958, "learning_rate": 0.0001, "loss": 1.4857, "step": 15253 }, { "epoch": 1.7721754284054603, "grad_norm": 0.6669363379478455, "learning_rate": 0.0001, "loss": 1.7047, "step": 15254 }, { "epoch": 1.7722916061574208, "grad_norm": 0.623723030090332, "learning_rate": 0.0001, "loss": 1.5303, "step": 15255 }, { "epoch": 1.7724077839093813, "grad_norm": 0.631964385509491, "learning_rate": 0.0001, "loss": 1.4779, "step": 15256 }, { "epoch": 1.7725239616613417, "grad_norm": 0.6440684199333191, "learning_rate": 0.0001, "loss": 1.5772, "step": 15257 }, { "epoch": 1.7726401394133022, "grad_norm": 0.63379967212677, "learning_rate": 0.0001, "loss": 1.5415, "step": 15258 }, { "epoch": 1.7727563171652627, "grad_norm": 0.6206088066101074, "learning_rate": 0.0001, "loss": 1.3873, "step": 15259 }, { "epoch": 1.7728724949172232, "grad_norm": 0.6498738527297974, "learning_rate": 0.0001, "loss": 1.5142, "step": 15260 }, { "epoch": 1.7729886726691837, "grad_norm": 0.6904322504997253, "learning_rate": 0.0001, "loss": 1.5715, "step": 15261 }, { "epoch": 1.7731048504211442, "grad_norm": 0.6221197247505188, "learning_rate": 0.0001, "loss": 1.5061, "step": 15262 }, { "epoch": 1.773221028173105, "grad_norm": 0.6042959094047546, "learning_rate": 0.0001, "loss": 1.4654, "step": 15263 }, { "epoch": 1.7733372059250654, "grad_norm": 0.6099536418914795, "learning_rate": 0.0001, "loss": 1.3479, "step": 15264 }, { "epoch": 1.7734533836770259, "grad_norm": 0.5856276154518127, "learning_rate": 0.0001, "loss": 1.6446, "step": 15265 }, { "epoch": 1.7735695614289864, "grad_norm": 0.621751606464386, "learning_rate": 0.0001, "loss": 1.4287, "step": 15266 }, { "epoch": 1.7736857391809469, "grad_norm": 0.5653685331344604, "learning_rate": 0.0001, "loss": 1.2917, "step": 15267 }, { "epoch": 1.7738019169329073, "grad_norm": 0.7086547613143921, "learning_rate": 0.0001, "loss": 1.3996, "step": 15268 }, { "epoch": 1.7739180946848678, "grad_norm": 0.6452550888061523, "learning_rate": 0.0001, "loss": 1.4397, "step": 15269 }, { "epoch": 1.7740342724368283, "grad_norm": 0.6548465490341187, "learning_rate": 0.0001, "loss": 1.4688, "step": 15270 }, { "epoch": 1.7741504501887888, "grad_norm": 0.669599175453186, "learning_rate": 0.0001, "loss": 1.4927, "step": 15271 }, { "epoch": 1.7742666279407493, "grad_norm": 0.6198776364326477, "learning_rate": 0.0001, "loss": 1.4836, "step": 15272 }, { "epoch": 1.77438280569271, "grad_norm": 0.6791712641716003, "learning_rate": 0.0001, "loss": 1.6155, "step": 15273 }, { "epoch": 1.7744989834446705, "grad_norm": 0.63079833984375, "learning_rate": 0.0001, "loss": 1.5732, "step": 15274 }, { "epoch": 1.774615161196631, "grad_norm": 0.6533926725387573, "learning_rate": 0.0001, "loss": 1.3501, "step": 15275 }, { "epoch": 1.7747313389485915, "grad_norm": 0.6267770528793335, "learning_rate": 0.0001, "loss": 1.4863, "step": 15276 }, { "epoch": 1.774847516700552, "grad_norm": 0.5884193778038025, "learning_rate": 0.0001, "loss": 1.477, "step": 15277 }, { "epoch": 1.7749636944525125, "grad_norm": 0.6586154699325562, "learning_rate": 0.0001, "loss": 1.5167, "step": 15278 }, { "epoch": 1.775079872204473, "grad_norm": 0.6086314916610718, "learning_rate": 0.0001, "loss": 1.404, "step": 15279 }, { "epoch": 1.7751960499564334, "grad_norm": 0.5791851282119751, "learning_rate": 0.0001, "loss": 1.291, "step": 15280 }, { "epoch": 1.775312227708394, "grad_norm": 0.6863628625869751, "learning_rate": 0.0001, "loss": 1.5406, "step": 15281 }, { "epoch": 1.7754284054603544, "grad_norm": 0.581861674785614, "learning_rate": 0.0001, "loss": 1.3177, "step": 15282 }, { "epoch": 1.775544583212315, "grad_norm": 0.6596324443817139, "learning_rate": 0.0001, "loss": 1.6688, "step": 15283 }, { "epoch": 1.7756607609642754, "grad_norm": 0.6306443214416504, "learning_rate": 0.0001, "loss": 1.3862, "step": 15284 }, { "epoch": 1.7757769387162359, "grad_norm": 0.6120966076850891, "learning_rate": 0.0001, "loss": 1.488, "step": 15285 }, { "epoch": 1.7758931164681964, "grad_norm": 0.6473104953765869, "learning_rate": 0.0001, "loss": 1.4156, "step": 15286 }, { "epoch": 1.7760092942201569, "grad_norm": 0.6253796219825745, "learning_rate": 0.0001, "loss": 1.5138, "step": 15287 }, { "epoch": 1.7761254719721173, "grad_norm": 0.5919986963272095, "learning_rate": 0.0001, "loss": 1.2642, "step": 15288 }, { "epoch": 1.7762416497240778, "grad_norm": 0.6327444314956665, "learning_rate": 0.0001, "loss": 1.499, "step": 15289 }, { "epoch": 1.7763578274760383, "grad_norm": 0.6434443593025208, "learning_rate": 0.0001, "loss": 1.3832, "step": 15290 }, { "epoch": 1.7764740052279988, "grad_norm": 0.678210437297821, "learning_rate": 0.0001, "loss": 1.5793, "step": 15291 }, { "epoch": 1.7765901829799593, "grad_norm": 0.6756482124328613, "learning_rate": 0.0001, "loss": 1.5003, "step": 15292 }, { "epoch": 1.7767063607319198, "grad_norm": 0.6156312227249146, "learning_rate": 0.0001, "loss": 1.2463, "step": 15293 }, { "epoch": 1.7768225384838803, "grad_norm": 0.6035404205322266, "learning_rate": 0.0001, "loss": 1.407, "step": 15294 }, { "epoch": 1.7769387162358408, "grad_norm": 0.579347550868988, "learning_rate": 0.0001, "loss": 1.4089, "step": 15295 }, { "epoch": 1.7770548939878013, "grad_norm": 0.6338497996330261, "learning_rate": 0.0001, "loss": 1.4386, "step": 15296 }, { "epoch": 1.7771710717397617, "grad_norm": 0.6521024703979492, "learning_rate": 0.0001, "loss": 1.4465, "step": 15297 }, { "epoch": 1.7772872494917222, "grad_norm": 0.6606890559196472, "learning_rate": 0.0001, "loss": 1.4526, "step": 15298 }, { "epoch": 1.7774034272436827, "grad_norm": 0.6389157176017761, "learning_rate": 0.0001, "loss": 1.4403, "step": 15299 }, { "epoch": 1.7775196049956432, "grad_norm": 0.623223066329956, "learning_rate": 0.0001, "loss": 1.3796, "step": 15300 }, { "epoch": 1.7776357827476037, "grad_norm": 0.6090806126594543, "learning_rate": 0.0001, "loss": 1.3068, "step": 15301 }, { "epoch": 1.7777519604995642, "grad_norm": 0.6257072687149048, "learning_rate": 0.0001, "loss": 1.4551, "step": 15302 }, { "epoch": 1.7778681382515247, "grad_norm": 0.6237324476242065, "learning_rate": 0.0001, "loss": 1.4583, "step": 15303 }, { "epoch": 1.7779843160034854, "grad_norm": 0.5848506689071655, "learning_rate": 0.0001, "loss": 1.2779, "step": 15304 }, { "epoch": 1.7781004937554459, "grad_norm": 0.6606771945953369, "learning_rate": 0.0001, "loss": 1.4422, "step": 15305 }, { "epoch": 1.7782166715074064, "grad_norm": 0.6356858611106873, "learning_rate": 0.0001, "loss": 1.3737, "step": 15306 }, { "epoch": 1.7783328492593669, "grad_norm": 0.646452009677887, "learning_rate": 0.0001, "loss": 1.4217, "step": 15307 }, { "epoch": 1.7784490270113273, "grad_norm": 0.6183162927627563, "learning_rate": 0.0001, "loss": 1.3692, "step": 15308 }, { "epoch": 1.7785652047632878, "grad_norm": 0.6706176400184631, "learning_rate": 0.0001, "loss": 1.5664, "step": 15309 }, { "epoch": 1.7786813825152483, "grad_norm": 0.6701763272285461, "learning_rate": 0.0001, "loss": 1.5171, "step": 15310 }, { "epoch": 1.7787975602672088, "grad_norm": 0.5740065574645996, "learning_rate": 0.0001, "loss": 1.3704, "step": 15311 }, { "epoch": 1.7789137380191693, "grad_norm": 0.6892088651657104, "learning_rate": 0.0001, "loss": 1.5917, "step": 15312 }, { "epoch": 1.7790299157711298, "grad_norm": 0.6434208154678345, "learning_rate": 0.0001, "loss": 1.4718, "step": 15313 }, { "epoch": 1.7791460935230903, "grad_norm": 0.6382471919059753, "learning_rate": 0.0001, "loss": 1.5909, "step": 15314 }, { "epoch": 1.779262271275051, "grad_norm": 0.6431048512458801, "learning_rate": 0.0001, "loss": 1.5161, "step": 15315 }, { "epoch": 1.7793784490270115, "grad_norm": 0.6321643590927124, "learning_rate": 0.0001, "loss": 1.324, "step": 15316 }, { "epoch": 1.779494626778972, "grad_norm": 0.6527208089828491, "learning_rate": 0.0001, "loss": 1.3941, "step": 15317 }, { "epoch": 1.7796108045309325, "grad_norm": 0.6331608295440674, "learning_rate": 0.0001, "loss": 1.3747, "step": 15318 }, { "epoch": 1.779726982282893, "grad_norm": 0.6525262594223022, "learning_rate": 0.0001, "loss": 1.5938, "step": 15319 }, { "epoch": 1.7798431600348534, "grad_norm": 0.6555960178375244, "learning_rate": 0.0001, "loss": 1.4205, "step": 15320 }, { "epoch": 1.779959337786814, "grad_norm": 0.6114421486854553, "learning_rate": 0.0001, "loss": 1.3179, "step": 15321 }, { "epoch": 1.7800755155387744, "grad_norm": 0.6588555574417114, "learning_rate": 0.0001, "loss": 1.3956, "step": 15322 }, { "epoch": 1.780191693290735, "grad_norm": 0.6319722533226013, "learning_rate": 0.0001, "loss": 1.394, "step": 15323 }, { "epoch": 1.7803078710426954, "grad_norm": 0.6400730609893799, "learning_rate": 0.0001, "loss": 1.5056, "step": 15324 }, { "epoch": 1.7804240487946559, "grad_norm": 0.6702159643173218, "learning_rate": 0.0001, "loss": 1.535, "step": 15325 }, { "epoch": 1.7805402265466164, "grad_norm": 0.676764965057373, "learning_rate": 0.0001, "loss": 1.5695, "step": 15326 }, { "epoch": 1.7806564042985769, "grad_norm": 0.6130290627479553, "learning_rate": 0.0001, "loss": 1.5965, "step": 15327 }, { "epoch": 1.7807725820505373, "grad_norm": 0.5740941762924194, "learning_rate": 0.0001, "loss": 1.3587, "step": 15328 }, { "epoch": 1.7808887598024978, "grad_norm": 0.6307185292243958, "learning_rate": 0.0001, "loss": 1.5705, "step": 15329 }, { "epoch": 1.7810049375544583, "grad_norm": 0.6241540312767029, "learning_rate": 0.0001, "loss": 1.4833, "step": 15330 }, { "epoch": 1.7811211153064188, "grad_norm": 0.6415241956710815, "learning_rate": 0.0001, "loss": 1.5564, "step": 15331 }, { "epoch": 1.7812372930583793, "grad_norm": 0.5836394429206848, "learning_rate": 0.0001, "loss": 1.1773, "step": 15332 }, { "epoch": 1.7813534708103398, "grad_norm": 0.6343079805374146, "learning_rate": 0.0001, "loss": 1.5003, "step": 15333 }, { "epoch": 1.7814696485623003, "grad_norm": 0.6596034169197083, "learning_rate": 0.0001, "loss": 1.5347, "step": 15334 }, { "epoch": 1.7815858263142608, "grad_norm": 0.6426047086715698, "learning_rate": 0.0001, "loss": 1.414, "step": 15335 }, { "epoch": 1.7817020040662213, "grad_norm": 0.6495097875595093, "learning_rate": 0.0001, "loss": 1.4471, "step": 15336 }, { "epoch": 1.7818181818181817, "grad_norm": 0.5877517461776733, "learning_rate": 0.0001, "loss": 1.3095, "step": 15337 }, { "epoch": 1.7819343595701422, "grad_norm": 0.6389449834823608, "learning_rate": 0.0001, "loss": 1.4296, "step": 15338 }, { "epoch": 1.7820505373221027, "grad_norm": 0.6318106055259705, "learning_rate": 0.0001, "loss": 1.5441, "step": 15339 }, { "epoch": 1.7821667150740632, "grad_norm": 0.6248120665550232, "learning_rate": 0.0001, "loss": 1.3682, "step": 15340 }, { "epoch": 1.7822828928260237, "grad_norm": 0.6103470325469971, "learning_rate": 0.0001, "loss": 1.5816, "step": 15341 }, { "epoch": 1.7823990705779842, "grad_norm": 0.7223705649375916, "learning_rate": 0.0001, "loss": 1.7042, "step": 15342 }, { "epoch": 1.7825152483299447, "grad_norm": 0.6277883052825928, "learning_rate": 0.0001, "loss": 1.4815, "step": 15343 }, { "epoch": 1.7826314260819052, "grad_norm": 0.6787412166595459, "learning_rate": 0.0001, "loss": 1.5379, "step": 15344 }, { "epoch": 1.7827476038338657, "grad_norm": 0.6357549428939819, "learning_rate": 0.0001, "loss": 1.4805, "step": 15345 }, { "epoch": 1.7828637815858264, "grad_norm": 0.6067598462104797, "learning_rate": 0.0001, "loss": 1.3747, "step": 15346 }, { "epoch": 1.7829799593377869, "grad_norm": 0.6521841883659363, "learning_rate": 0.0001, "loss": 1.4286, "step": 15347 }, { "epoch": 1.7830961370897473, "grad_norm": 0.6672656536102295, "learning_rate": 0.0001, "loss": 1.533, "step": 15348 }, { "epoch": 1.7832123148417078, "grad_norm": 0.6569178700447083, "learning_rate": 0.0001, "loss": 1.3805, "step": 15349 }, { "epoch": 1.7833284925936683, "grad_norm": 0.6453930735588074, "learning_rate": 0.0001, "loss": 1.61, "step": 15350 }, { "epoch": 1.7834446703456288, "grad_norm": 0.5926080346107483, "learning_rate": 0.0001, "loss": 1.2944, "step": 15351 }, { "epoch": 1.7835608480975893, "grad_norm": 0.6266493201255798, "learning_rate": 0.0001, "loss": 1.4033, "step": 15352 }, { "epoch": 1.7836770258495498, "grad_norm": 0.6241078972816467, "learning_rate": 0.0001, "loss": 1.5682, "step": 15353 }, { "epoch": 1.7837932036015103, "grad_norm": 0.655592679977417, "learning_rate": 0.0001, "loss": 1.2847, "step": 15354 }, { "epoch": 1.7839093813534708, "grad_norm": 0.6183730959892273, "learning_rate": 0.0001, "loss": 1.7057, "step": 15355 }, { "epoch": 1.7840255591054313, "grad_norm": 0.6165468692779541, "learning_rate": 0.0001, "loss": 1.5667, "step": 15356 }, { "epoch": 1.784141736857392, "grad_norm": 0.626423716545105, "learning_rate": 0.0001, "loss": 1.4701, "step": 15357 }, { "epoch": 1.7842579146093525, "grad_norm": 0.6840025782585144, "learning_rate": 0.0001, "loss": 1.4414, "step": 15358 }, { "epoch": 1.784374092361313, "grad_norm": 0.6352068781852722, "learning_rate": 0.0001, "loss": 1.5339, "step": 15359 }, { "epoch": 1.7844902701132734, "grad_norm": 0.6823697686195374, "learning_rate": 0.0001, "loss": 1.6172, "step": 15360 }, { "epoch": 1.784606447865234, "grad_norm": 0.6007603406906128, "learning_rate": 0.0001, "loss": 1.4924, "step": 15361 }, { "epoch": 1.7847226256171944, "grad_norm": 0.6556808352470398, "learning_rate": 0.0001, "loss": 1.476, "step": 15362 }, { "epoch": 1.784838803369155, "grad_norm": 0.641976535320282, "learning_rate": 0.0001, "loss": 1.3711, "step": 15363 }, { "epoch": 1.7849549811211154, "grad_norm": 0.6192817687988281, "learning_rate": 0.0001, "loss": 1.4762, "step": 15364 }, { "epoch": 1.7850711588730759, "grad_norm": 0.6530466079711914, "learning_rate": 0.0001, "loss": 1.3754, "step": 15365 }, { "epoch": 1.7851873366250364, "grad_norm": 0.6647409796714783, "learning_rate": 0.0001, "loss": 1.4856, "step": 15366 }, { "epoch": 1.7853035143769969, "grad_norm": 0.6130949854850769, "learning_rate": 0.0001, "loss": 1.4127, "step": 15367 }, { "epoch": 1.7854196921289573, "grad_norm": 0.5865651965141296, "learning_rate": 0.0001, "loss": 1.4363, "step": 15368 }, { "epoch": 1.7855358698809178, "grad_norm": 0.6390106081962585, "learning_rate": 0.0001, "loss": 1.4961, "step": 15369 }, { "epoch": 1.7856520476328783, "grad_norm": 0.625084400177002, "learning_rate": 0.0001, "loss": 1.3161, "step": 15370 }, { "epoch": 1.7857682253848388, "grad_norm": 0.6631665825843811, "learning_rate": 0.0001, "loss": 1.5103, "step": 15371 }, { "epoch": 1.7858844031367993, "grad_norm": 0.6234285831451416, "learning_rate": 0.0001, "loss": 1.3618, "step": 15372 }, { "epoch": 1.7860005808887598, "grad_norm": 0.6130627393722534, "learning_rate": 0.0001, "loss": 1.3851, "step": 15373 }, { "epoch": 1.7861167586407203, "grad_norm": 0.6541336178779602, "learning_rate": 0.0001, "loss": 1.4747, "step": 15374 }, { "epoch": 1.7862329363926808, "grad_norm": 0.6418958306312561, "learning_rate": 0.0001, "loss": 1.4473, "step": 15375 }, { "epoch": 1.7863491141446413, "grad_norm": 0.6898660659790039, "learning_rate": 0.0001, "loss": 1.6252, "step": 15376 }, { "epoch": 1.7864652918966017, "grad_norm": 0.6710512042045593, "learning_rate": 0.0001, "loss": 1.5709, "step": 15377 }, { "epoch": 1.7865814696485622, "grad_norm": 0.6682336926460266, "learning_rate": 0.0001, "loss": 1.5381, "step": 15378 }, { "epoch": 1.7866976474005227, "grad_norm": 0.6305358409881592, "learning_rate": 0.0001, "loss": 1.5759, "step": 15379 }, { "epoch": 1.7868138251524832, "grad_norm": 0.6466816067695618, "learning_rate": 0.0001, "loss": 1.574, "step": 15380 }, { "epoch": 1.7869300029044437, "grad_norm": 0.6368044018745422, "learning_rate": 0.0001, "loss": 1.4986, "step": 15381 }, { "epoch": 1.7870461806564042, "grad_norm": 0.6059060096740723, "learning_rate": 0.0001, "loss": 1.4969, "step": 15382 }, { "epoch": 1.7871623584083647, "grad_norm": 0.6261503100395203, "learning_rate": 0.0001, "loss": 1.4372, "step": 15383 }, { "epoch": 1.7872785361603252, "grad_norm": 0.6143785119056702, "learning_rate": 0.0001, "loss": 1.4458, "step": 15384 }, { "epoch": 1.7873947139122857, "grad_norm": 0.6497848629951477, "learning_rate": 0.0001, "loss": 1.4957, "step": 15385 }, { "epoch": 1.7875108916642461, "grad_norm": 0.6345806121826172, "learning_rate": 0.0001, "loss": 1.4309, "step": 15386 }, { "epoch": 1.7876270694162066, "grad_norm": 0.6604867577552795, "learning_rate": 0.0001, "loss": 1.53, "step": 15387 }, { "epoch": 1.7877432471681673, "grad_norm": 0.6458450555801392, "learning_rate": 0.0001, "loss": 1.5442, "step": 15388 }, { "epoch": 1.7878594249201278, "grad_norm": 0.6687461733818054, "learning_rate": 0.0001, "loss": 1.4473, "step": 15389 }, { "epoch": 1.7879756026720883, "grad_norm": 0.6317610144615173, "learning_rate": 0.0001, "loss": 1.4254, "step": 15390 }, { "epoch": 1.7880917804240488, "grad_norm": 0.6526240110397339, "learning_rate": 0.0001, "loss": 1.526, "step": 15391 }, { "epoch": 1.7882079581760093, "grad_norm": 0.6347101926803589, "learning_rate": 0.0001, "loss": 1.5397, "step": 15392 }, { "epoch": 1.7883241359279698, "grad_norm": 0.6390734314918518, "learning_rate": 0.0001, "loss": 1.3856, "step": 15393 }, { "epoch": 1.7884403136799303, "grad_norm": 0.6033388376235962, "learning_rate": 0.0001, "loss": 1.2651, "step": 15394 }, { "epoch": 1.7885564914318908, "grad_norm": 0.6391220092773438, "learning_rate": 0.0001, "loss": 1.488, "step": 15395 }, { "epoch": 1.7886726691838513, "grad_norm": 0.6067914366722107, "learning_rate": 0.0001, "loss": 1.4141, "step": 15396 }, { "epoch": 1.7887888469358117, "grad_norm": 0.5883461833000183, "learning_rate": 0.0001, "loss": 1.4539, "step": 15397 }, { "epoch": 1.7889050246877722, "grad_norm": 0.6306256055831909, "learning_rate": 0.0001, "loss": 1.3757, "step": 15398 }, { "epoch": 1.789021202439733, "grad_norm": 0.6178750395774841, "learning_rate": 0.0001, "loss": 1.3581, "step": 15399 }, { "epoch": 1.7891373801916934, "grad_norm": 0.6314160227775574, "learning_rate": 0.0001, "loss": 1.4617, "step": 15400 }, { "epoch": 1.789253557943654, "grad_norm": 0.5842681527137756, "learning_rate": 0.0001, "loss": 1.2558, "step": 15401 }, { "epoch": 1.7893697356956144, "grad_norm": 0.740821361541748, "learning_rate": 0.0001, "loss": 1.7731, "step": 15402 }, { "epoch": 1.789485913447575, "grad_norm": 0.6980586051940918, "learning_rate": 0.0001, "loss": 1.5099, "step": 15403 }, { "epoch": 1.7896020911995354, "grad_norm": 0.6374906897544861, "learning_rate": 0.0001, "loss": 1.5132, "step": 15404 }, { "epoch": 1.7897182689514959, "grad_norm": 0.6395952105522156, "learning_rate": 0.0001, "loss": 1.3812, "step": 15405 }, { "epoch": 1.7898344467034564, "grad_norm": 0.6942700147628784, "learning_rate": 0.0001, "loss": 1.5105, "step": 15406 }, { "epoch": 1.7899506244554169, "grad_norm": 0.6455872654914856, "learning_rate": 0.0001, "loss": 1.5927, "step": 15407 }, { "epoch": 1.7900668022073773, "grad_norm": 0.6241863965988159, "learning_rate": 0.0001, "loss": 1.3523, "step": 15408 }, { "epoch": 1.7901829799593378, "grad_norm": 0.6423774361610413, "learning_rate": 0.0001, "loss": 1.5663, "step": 15409 }, { "epoch": 1.7902991577112983, "grad_norm": 0.6053903102874756, "learning_rate": 0.0001, "loss": 1.3915, "step": 15410 }, { "epoch": 1.7904153354632588, "grad_norm": 0.634202241897583, "learning_rate": 0.0001, "loss": 1.3752, "step": 15411 }, { "epoch": 1.7905315132152193, "grad_norm": 0.6020644903182983, "learning_rate": 0.0001, "loss": 1.3854, "step": 15412 }, { "epoch": 1.7906476909671798, "grad_norm": 0.5868170261383057, "learning_rate": 0.0001, "loss": 1.3431, "step": 15413 }, { "epoch": 1.7907638687191403, "grad_norm": 0.67093425989151, "learning_rate": 0.0001, "loss": 1.5123, "step": 15414 }, { "epoch": 1.7908800464711008, "grad_norm": 0.6786069869995117, "learning_rate": 0.0001, "loss": 1.3537, "step": 15415 }, { "epoch": 1.7909962242230613, "grad_norm": 0.6926303505897522, "learning_rate": 0.0001, "loss": 1.5992, "step": 15416 }, { "epoch": 1.7911124019750218, "grad_norm": 0.6421206593513489, "learning_rate": 0.0001, "loss": 1.5509, "step": 15417 }, { "epoch": 1.7912285797269822, "grad_norm": 0.6365699172019958, "learning_rate": 0.0001, "loss": 1.4309, "step": 15418 }, { "epoch": 1.7913447574789427, "grad_norm": 0.6576679944992065, "learning_rate": 0.0001, "loss": 1.2789, "step": 15419 }, { "epoch": 1.7914609352309032, "grad_norm": 0.5954288244247437, "learning_rate": 0.0001, "loss": 1.3752, "step": 15420 }, { "epoch": 1.7915771129828637, "grad_norm": 0.6105135083198547, "learning_rate": 0.0001, "loss": 1.3923, "step": 15421 }, { "epoch": 1.7916932907348242, "grad_norm": 0.6383089423179626, "learning_rate": 0.0001, "loss": 1.5883, "step": 15422 }, { "epoch": 1.7918094684867847, "grad_norm": 0.6352864503860474, "learning_rate": 0.0001, "loss": 1.405, "step": 15423 }, { "epoch": 1.7919256462387452, "grad_norm": 0.6778745651245117, "learning_rate": 0.0001, "loss": 1.6501, "step": 15424 }, { "epoch": 1.7920418239907057, "grad_norm": 0.7053214907646179, "learning_rate": 0.0001, "loss": 1.5731, "step": 15425 }, { "epoch": 1.7921580017426662, "grad_norm": 0.6709882616996765, "learning_rate": 0.0001, "loss": 1.5546, "step": 15426 }, { "epoch": 1.7922741794946266, "grad_norm": 0.6401253342628479, "learning_rate": 0.0001, "loss": 1.3315, "step": 15427 }, { "epoch": 1.7923903572465871, "grad_norm": 0.6264938116073608, "learning_rate": 0.0001, "loss": 1.3477, "step": 15428 }, { "epoch": 1.7925065349985476, "grad_norm": 0.6280655264854431, "learning_rate": 0.0001, "loss": 1.4361, "step": 15429 }, { "epoch": 1.7926227127505083, "grad_norm": 0.6435604691505432, "learning_rate": 0.0001, "loss": 1.4996, "step": 15430 }, { "epoch": 1.7927388905024688, "grad_norm": 0.6186904907226562, "learning_rate": 0.0001, "loss": 1.379, "step": 15431 }, { "epoch": 1.7928550682544293, "grad_norm": 0.6186133027076721, "learning_rate": 0.0001, "loss": 1.3483, "step": 15432 }, { "epoch": 1.7929712460063898, "grad_norm": 0.5829854011535645, "learning_rate": 0.0001, "loss": 1.4142, "step": 15433 }, { "epoch": 1.7930874237583503, "grad_norm": 0.578281581401825, "learning_rate": 0.0001, "loss": 1.2689, "step": 15434 }, { "epoch": 1.7932036015103108, "grad_norm": 0.6309124231338501, "learning_rate": 0.0001, "loss": 1.442, "step": 15435 }, { "epoch": 1.7933197792622713, "grad_norm": 0.6733617782592773, "learning_rate": 0.0001, "loss": 1.6304, "step": 15436 }, { "epoch": 1.7934359570142318, "grad_norm": 0.6470215916633606, "learning_rate": 0.0001, "loss": 1.4962, "step": 15437 }, { "epoch": 1.7935521347661922, "grad_norm": 0.6718212962150574, "learning_rate": 0.0001, "loss": 1.4045, "step": 15438 }, { "epoch": 1.7936683125181527, "grad_norm": 0.6350439190864563, "learning_rate": 0.0001, "loss": 1.3802, "step": 15439 }, { "epoch": 1.7937844902701132, "grad_norm": 0.6237597465515137, "learning_rate": 0.0001, "loss": 1.4137, "step": 15440 }, { "epoch": 1.793900668022074, "grad_norm": 0.6106988191604614, "learning_rate": 0.0001, "loss": 1.2524, "step": 15441 }, { "epoch": 1.7940168457740344, "grad_norm": 0.6247336864471436, "learning_rate": 0.0001, "loss": 1.4243, "step": 15442 }, { "epoch": 1.794133023525995, "grad_norm": 0.5693892240524292, "learning_rate": 0.0001, "loss": 1.3875, "step": 15443 }, { "epoch": 1.7942492012779554, "grad_norm": 0.6251781582832336, "learning_rate": 0.0001, "loss": 1.3681, "step": 15444 }, { "epoch": 1.7943653790299159, "grad_norm": 0.632365882396698, "learning_rate": 0.0001, "loss": 1.4629, "step": 15445 }, { "epoch": 1.7944815567818764, "grad_norm": 0.6545054316520691, "learning_rate": 0.0001, "loss": 1.4322, "step": 15446 }, { "epoch": 1.7945977345338369, "grad_norm": 0.6065900921821594, "learning_rate": 0.0001, "loss": 1.4026, "step": 15447 }, { "epoch": 1.7947139122857974, "grad_norm": 0.6570457220077515, "learning_rate": 0.0001, "loss": 1.4182, "step": 15448 }, { "epoch": 1.7948300900377578, "grad_norm": 0.6142979860305786, "learning_rate": 0.0001, "loss": 1.4313, "step": 15449 }, { "epoch": 1.7949462677897183, "grad_norm": 0.6414238810539246, "learning_rate": 0.0001, "loss": 1.3371, "step": 15450 }, { "epoch": 1.7950624455416788, "grad_norm": 0.6449700593948364, "learning_rate": 0.0001, "loss": 1.5037, "step": 15451 }, { "epoch": 1.7951786232936393, "grad_norm": 0.6076501607894897, "learning_rate": 0.0001, "loss": 1.3256, "step": 15452 }, { "epoch": 1.7952948010455998, "grad_norm": 0.6959519982337952, "learning_rate": 0.0001, "loss": 1.5431, "step": 15453 }, { "epoch": 1.7954109787975603, "grad_norm": 0.6095200777053833, "learning_rate": 0.0001, "loss": 1.324, "step": 15454 }, { "epoch": 1.7955271565495208, "grad_norm": 0.6639478206634521, "learning_rate": 0.0001, "loss": 1.5005, "step": 15455 }, { "epoch": 1.7956433343014813, "grad_norm": 0.6733502745628357, "learning_rate": 0.0001, "loss": 1.4839, "step": 15456 }, { "epoch": 1.7957595120534418, "grad_norm": 0.6602219939231873, "learning_rate": 0.0001, "loss": 1.5317, "step": 15457 }, { "epoch": 1.7958756898054022, "grad_norm": 0.63166344165802, "learning_rate": 0.0001, "loss": 1.5148, "step": 15458 }, { "epoch": 1.7959918675573627, "grad_norm": 0.6733032464981079, "learning_rate": 0.0001, "loss": 1.6834, "step": 15459 }, { "epoch": 1.7961080453093232, "grad_norm": 0.6030856370925903, "learning_rate": 0.0001, "loss": 1.3604, "step": 15460 }, { "epoch": 1.7962242230612837, "grad_norm": 0.5861091017723083, "learning_rate": 0.0001, "loss": 1.2447, "step": 15461 }, { "epoch": 1.7963404008132442, "grad_norm": 0.6066709756851196, "learning_rate": 0.0001, "loss": 1.3957, "step": 15462 }, { "epoch": 1.7964565785652047, "grad_norm": 0.6612598299980164, "learning_rate": 0.0001, "loss": 1.3733, "step": 15463 }, { "epoch": 1.7965727563171652, "grad_norm": 0.6284952163696289, "learning_rate": 0.0001, "loss": 1.4436, "step": 15464 }, { "epoch": 1.7966889340691257, "grad_norm": 0.6146871447563171, "learning_rate": 0.0001, "loss": 1.5799, "step": 15465 }, { "epoch": 1.7968051118210862, "grad_norm": 0.660258948802948, "learning_rate": 0.0001, "loss": 1.478, "step": 15466 }, { "epoch": 1.7969212895730466, "grad_norm": 0.6201002597808838, "learning_rate": 0.0001, "loss": 1.4572, "step": 15467 }, { "epoch": 1.7970374673250071, "grad_norm": 0.5912632346153259, "learning_rate": 0.0001, "loss": 1.2779, "step": 15468 }, { "epoch": 1.7971536450769676, "grad_norm": 0.6016855239868164, "learning_rate": 0.0001, "loss": 1.5056, "step": 15469 }, { "epoch": 1.797269822828928, "grad_norm": 0.6806309819221497, "learning_rate": 0.0001, "loss": 1.4069, "step": 15470 }, { "epoch": 1.7973860005808886, "grad_norm": 0.6250790953636169, "learning_rate": 0.0001, "loss": 1.2914, "step": 15471 }, { "epoch": 1.7975021783328493, "grad_norm": 0.604945957660675, "learning_rate": 0.0001, "loss": 1.4136, "step": 15472 }, { "epoch": 1.7976183560848098, "grad_norm": 0.6098234057426453, "learning_rate": 0.0001, "loss": 1.5328, "step": 15473 }, { "epoch": 1.7977345338367703, "grad_norm": 0.643011212348938, "learning_rate": 0.0001, "loss": 1.489, "step": 15474 }, { "epoch": 1.7978507115887308, "grad_norm": 0.6149110198020935, "learning_rate": 0.0001, "loss": 1.4462, "step": 15475 }, { "epoch": 1.7979668893406913, "grad_norm": 0.6385002136230469, "learning_rate": 0.0001, "loss": 1.5133, "step": 15476 }, { "epoch": 1.7980830670926518, "grad_norm": 0.6435854434967041, "learning_rate": 0.0001, "loss": 1.5288, "step": 15477 }, { "epoch": 1.7981992448446122, "grad_norm": 0.5845692753791809, "learning_rate": 0.0001, "loss": 1.3883, "step": 15478 }, { "epoch": 1.7983154225965727, "grad_norm": 0.6132542490959167, "learning_rate": 0.0001, "loss": 1.4788, "step": 15479 }, { "epoch": 1.7984316003485332, "grad_norm": 0.691087007522583, "learning_rate": 0.0001, "loss": 1.4946, "step": 15480 }, { "epoch": 1.7985477781004937, "grad_norm": 0.5728273391723633, "learning_rate": 0.0001, "loss": 1.3909, "step": 15481 }, { "epoch": 1.7986639558524544, "grad_norm": 0.6824169754981995, "learning_rate": 0.0001, "loss": 1.4081, "step": 15482 }, { "epoch": 1.798780133604415, "grad_norm": 0.6408066153526306, "learning_rate": 0.0001, "loss": 1.4195, "step": 15483 }, { "epoch": 1.7988963113563754, "grad_norm": 0.6111366748809814, "learning_rate": 0.0001, "loss": 1.3786, "step": 15484 }, { "epoch": 1.7990124891083359, "grad_norm": 0.615522027015686, "learning_rate": 0.0001, "loss": 1.3679, "step": 15485 }, { "epoch": 1.7991286668602964, "grad_norm": 0.6301548480987549, "learning_rate": 0.0001, "loss": 1.2316, "step": 15486 }, { "epoch": 1.7992448446122569, "grad_norm": 0.6690608859062195, "learning_rate": 0.0001, "loss": 1.4182, "step": 15487 }, { "epoch": 1.7993610223642174, "grad_norm": 0.6183565855026245, "learning_rate": 0.0001, "loss": 1.2364, "step": 15488 }, { "epoch": 1.7994772001161778, "grad_norm": 0.648381769657135, "learning_rate": 0.0001, "loss": 1.4054, "step": 15489 }, { "epoch": 1.7995933778681383, "grad_norm": 0.6269338726997375, "learning_rate": 0.0001, "loss": 1.5028, "step": 15490 }, { "epoch": 1.7997095556200988, "grad_norm": 0.6279770135879517, "learning_rate": 0.0001, "loss": 1.4225, "step": 15491 }, { "epoch": 1.7998257333720593, "grad_norm": 0.6257197260856628, "learning_rate": 0.0001, "loss": 1.4381, "step": 15492 }, { "epoch": 1.7999419111240198, "grad_norm": 0.6717484593391418, "learning_rate": 0.0001, "loss": 1.5658, "step": 15493 }, { "epoch": 1.8000580888759803, "grad_norm": 0.6743965148925781, "learning_rate": 0.0001, "loss": 1.4372, "step": 15494 }, { "epoch": 1.8001742666279408, "grad_norm": 0.6533719897270203, "learning_rate": 0.0001, "loss": 1.3257, "step": 15495 }, { "epoch": 1.8002904443799013, "grad_norm": 0.6260417103767395, "learning_rate": 0.0001, "loss": 1.4907, "step": 15496 }, { "epoch": 1.8004066221318618, "grad_norm": 0.6232183575630188, "learning_rate": 0.0001, "loss": 1.4011, "step": 15497 }, { "epoch": 1.8005227998838222, "grad_norm": 0.6195414662361145, "learning_rate": 0.0001, "loss": 1.5305, "step": 15498 }, { "epoch": 1.8006389776357827, "grad_norm": 0.6116890907287598, "learning_rate": 0.0001, "loss": 1.515, "step": 15499 }, { "epoch": 1.8007551553877432, "grad_norm": 0.5852633118629456, "learning_rate": 0.0001, "loss": 1.3992, "step": 15500 }, { "epoch": 1.8008713331397037, "grad_norm": 0.6426783204078674, "learning_rate": 0.0001, "loss": 1.4188, "step": 15501 }, { "epoch": 1.8009875108916642, "grad_norm": 0.6477175951004028, "learning_rate": 0.0001, "loss": 1.4329, "step": 15502 }, { "epoch": 1.8011036886436247, "grad_norm": 0.68708336353302, "learning_rate": 0.0001, "loss": 1.4541, "step": 15503 }, { "epoch": 1.8012198663955852, "grad_norm": 0.5975637435913086, "learning_rate": 0.0001, "loss": 1.4188, "step": 15504 }, { "epoch": 1.8013360441475457, "grad_norm": 0.5850353240966797, "learning_rate": 0.0001, "loss": 1.3189, "step": 15505 }, { "epoch": 1.8014522218995062, "grad_norm": 0.624815046787262, "learning_rate": 0.0001, "loss": 1.5784, "step": 15506 }, { "epoch": 1.8015683996514666, "grad_norm": 0.6753899455070496, "learning_rate": 0.0001, "loss": 1.6026, "step": 15507 }, { "epoch": 1.8016845774034271, "grad_norm": 0.5973823666572571, "learning_rate": 0.0001, "loss": 1.3672, "step": 15508 }, { "epoch": 1.8018007551553876, "grad_norm": 0.6421844959259033, "learning_rate": 0.0001, "loss": 1.4048, "step": 15509 }, { "epoch": 1.8019169329073481, "grad_norm": 0.6462374925613403, "learning_rate": 0.0001, "loss": 1.5591, "step": 15510 }, { "epoch": 1.8020331106593086, "grad_norm": 0.6470414400100708, "learning_rate": 0.0001, "loss": 1.4676, "step": 15511 }, { "epoch": 1.802149288411269, "grad_norm": 0.6577094793319702, "learning_rate": 0.0001, "loss": 1.463, "step": 15512 }, { "epoch": 1.8022654661632296, "grad_norm": 0.6221495866775513, "learning_rate": 0.0001, "loss": 1.532, "step": 15513 }, { "epoch": 1.8023816439151903, "grad_norm": 0.6168953776359558, "learning_rate": 0.0001, "loss": 1.3635, "step": 15514 }, { "epoch": 1.8024978216671508, "grad_norm": 0.6322457790374756, "learning_rate": 0.0001, "loss": 1.397, "step": 15515 }, { "epoch": 1.8026139994191113, "grad_norm": 0.649965226650238, "learning_rate": 0.0001, "loss": 1.4338, "step": 15516 }, { "epoch": 1.8027301771710718, "grad_norm": 0.6152699589729309, "learning_rate": 0.0001, "loss": 1.4006, "step": 15517 }, { "epoch": 1.8028463549230322, "grad_norm": 0.6808486580848694, "learning_rate": 0.0001, "loss": 1.6205, "step": 15518 }, { "epoch": 1.8029625326749927, "grad_norm": 0.6583698391914368, "learning_rate": 0.0001, "loss": 1.4805, "step": 15519 }, { "epoch": 1.8030787104269532, "grad_norm": 0.6418282985687256, "learning_rate": 0.0001, "loss": 1.3415, "step": 15520 }, { "epoch": 1.8031948881789137, "grad_norm": 0.594459593296051, "learning_rate": 0.0001, "loss": 1.268, "step": 15521 }, { "epoch": 1.8033110659308742, "grad_norm": 0.634330689907074, "learning_rate": 0.0001, "loss": 1.2682, "step": 15522 }, { "epoch": 1.8034272436828347, "grad_norm": 0.6618805527687073, "learning_rate": 0.0001, "loss": 1.5349, "step": 15523 }, { "epoch": 1.8035434214347954, "grad_norm": 0.6131069660186768, "learning_rate": 0.0001, "loss": 1.3872, "step": 15524 }, { "epoch": 1.803659599186756, "grad_norm": 0.6344172954559326, "learning_rate": 0.0001, "loss": 1.5199, "step": 15525 }, { "epoch": 1.8037757769387164, "grad_norm": 0.6468499302864075, "learning_rate": 0.0001, "loss": 1.5414, "step": 15526 }, { "epoch": 1.8038919546906769, "grad_norm": 0.6068350672721863, "learning_rate": 0.0001, "loss": 1.4575, "step": 15527 }, { "epoch": 1.8040081324426374, "grad_norm": 0.6041995882987976, "learning_rate": 0.0001, "loss": 1.4324, "step": 15528 }, { "epoch": 1.8041243101945978, "grad_norm": 0.5866186022758484, "learning_rate": 0.0001, "loss": 1.4376, "step": 15529 }, { "epoch": 1.8042404879465583, "grad_norm": 0.654376745223999, "learning_rate": 0.0001, "loss": 1.4937, "step": 15530 }, { "epoch": 1.8043566656985188, "grad_norm": 0.6172172427177429, "learning_rate": 0.0001, "loss": 1.4292, "step": 15531 }, { "epoch": 1.8044728434504793, "grad_norm": 0.6342853307723999, "learning_rate": 0.0001, "loss": 1.5803, "step": 15532 }, { "epoch": 1.8045890212024398, "grad_norm": 0.6920246481895447, "learning_rate": 0.0001, "loss": 1.563, "step": 15533 }, { "epoch": 1.8047051989544003, "grad_norm": 0.6475127935409546, "learning_rate": 0.0001, "loss": 1.4816, "step": 15534 }, { "epoch": 1.8048213767063608, "grad_norm": 0.6755061149597168, "learning_rate": 0.0001, "loss": 1.6452, "step": 15535 }, { "epoch": 1.8049375544583213, "grad_norm": 0.6353265047073364, "learning_rate": 0.0001, "loss": 1.4436, "step": 15536 }, { "epoch": 1.8050537322102818, "grad_norm": 0.6212906837463379, "learning_rate": 0.0001, "loss": 1.6024, "step": 15537 }, { "epoch": 1.8051699099622422, "grad_norm": 0.629300057888031, "learning_rate": 0.0001, "loss": 1.4043, "step": 15538 }, { "epoch": 1.8052860877142027, "grad_norm": 0.5710582137107849, "learning_rate": 0.0001, "loss": 1.3679, "step": 15539 }, { "epoch": 1.8054022654661632, "grad_norm": 0.6515181064605713, "learning_rate": 0.0001, "loss": 1.4918, "step": 15540 }, { "epoch": 1.8055184432181237, "grad_norm": 0.6114649772644043, "learning_rate": 0.0001, "loss": 1.4319, "step": 15541 }, { "epoch": 1.8056346209700842, "grad_norm": 0.6710079908370972, "learning_rate": 0.0001, "loss": 1.3816, "step": 15542 }, { "epoch": 1.8057507987220447, "grad_norm": 0.6572760343551636, "learning_rate": 0.0001, "loss": 1.3643, "step": 15543 }, { "epoch": 1.8058669764740052, "grad_norm": 0.6663978099822998, "learning_rate": 0.0001, "loss": 1.4864, "step": 15544 }, { "epoch": 1.8059831542259657, "grad_norm": 0.6454578638076782, "learning_rate": 0.0001, "loss": 1.4032, "step": 15545 }, { "epoch": 1.8060993319779262, "grad_norm": 0.6383687257766724, "learning_rate": 0.0001, "loss": 1.4363, "step": 15546 }, { "epoch": 1.8062155097298866, "grad_norm": 0.6202837228775024, "learning_rate": 0.0001, "loss": 1.2674, "step": 15547 }, { "epoch": 1.8063316874818471, "grad_norm": 0.6706852316856384, "learning_rate": 0.0001, "loss": 1.4464, "step": 15548 }, { "epoch": 1.8064478652338076, "grad_norm": 0.6259382367134094, "learning_rate": 0.0001, "loss": 1.5115, "step": 15549 }, { "epoch": 1.8065640429857681, "grad_norm": 0.6482195854187012, "learning_rate": 0.0001, "loss": 1.5831, "step": 15550 }, { "epoch": 1.8066802207377286, "grad_norm": 0.6392964124679565, "learning_rate": 0.0001, "loss": 1.5778, "step": 15551 }, { "epoch": 1.806796398489689, "grad_norm": 0.611294686794281, "learning_rate": 0.0001, "loss": 1.2595, "step": 15552 }, { "epoch": 1.8069125762416496, "grad_norm": 0.6573194265365601, "learning_rate": 0.0001, "loss": 1.4941, "step": 15553 }, { "epoch": 1.80702875399361, "grad_norm": 0.6529243588447571, "learning_rate": 0.0001, "loss": 1.4349, "step": 15554 }, { "epoch": 1.8071449317455706, "grad_norm": 0.6027690172195435, "learning_rate": 0.0001, "loss": 1.4156, "step": 15555 }, { "epoch": 1.8072611094975313, "grad_norm": 0.6437543034553528, "learning_rate": 0.0001, "loss": 1.5338, "step": 15556 }, { "epoch": 1.8073772872494918, "grad_norm": 0.6517318487167358, "learning_rate": 0.0001, "loss": 1.4367, "step": 15557 }, { "epoch": 1.8074934650014522, "grad_norm": 0.632622241973877, "learning_rate": 0.0001, "loss": 1.479, "step": 15558 }, { "epoch": 1.8076096427534127, "grad_norm": 0.6319211721420288, "learning_rate": 0.0001, "loss": 1.4903, "step": 15559 }, { "epoch": 1.8077258205053732, "grad_norm": 0.6070552468299866, "learning_rate": 0.0001, "loss": 1.2887, "step": 15560 }, { "epoch": 1.8078419982573337, "grad_norm": 0.6796751022338867, "learning_rate": 0.0001, "loss": 1.4545, "step": 15561 }, { "epoch": 1.8079581760092942, "grad_norm": 0.6342692971229553, "learning_rate": 0.0001, "loss": 1.472, "step": 15562 }, { "epoch": 1.8080743537612547, "grad_norm": 0.6401798725128174, "learning_rate": 0.0001, "loss": 1.554, "step": 15563 }, { "epoch": 1.8081905315132152, "grad_norm": 0.5917170643806458, "learning_rate": 0.0001, "loss": 1.3645, "step": 15564 }, { "epoch": 1.8083067092651757, "grad_norm": 0.6267076134681702, "learning_rate": 0.0001, "loss": 1.427, "step": 15565 }, { "epoch": 1.8084228870171364, "grad_norm": 0.6384696960449219, "learning_rate": 0.0001, "loss": 1.5426, "step": 15566 }, { "epoch": 1.8085390647690969, "grad_norm": 0.6659607887268066, "learning_rate": 0.0001, "loss": 1.4492, "step": 15567 }, { "epoch": 1.8086552425210574, "grad_norm": 0.609930694103241, "learning_rate": 0.0001, "loss": 1.5741, "step": 15568 }, { "epoch": 1.8087714202730178, "grad_norm": 0.6543862819671631, "learning_rate": 0.0001, "loss": 1.5129, "step": 15569 }, { "epoch": 1.8088875980249783, "grad_norm": 0.6350655555725098, "learning_rate": 0.0001, "loss": 1.4226, "step": 15570 }, { "epoch": 1.8090037757769388, "grad_norm": 0.6013209819793701, "learning_rate": 0.0001, "loss": 1.2865, "step": 15571 }, { "epoch": 1.8091199535288993, "grad_norm": 0.6370180249214172, "learning_rate": 0.0001, "loss": 1.4923, "step": 15572 }, { "epoch": 1.8092361312808598, "grad_norm": 0.5925559997558594, "learning_rate": 0.0001, "loss": 1.3867, "step": 15573 }, { "epoch": 1.8093523090328203, "grad_norm": 0.6409415602684021, "learning_rate": 0.0001, "loss": 1.385, "step": 15574 }, { "epoch": 1.8094684867847808, "grad_norm": 0.6258260011672974, "learning_rate": 0.0001, "loss": 1.5361, "step": 15575 }, { "epoch": 1.8095846645367413, "grad_norm": 0.6165568828582764, "learning_rate": 0.0001, "loss": 1.6987, "step": 15576 }, { "epoch": 1.8097008422887018, "grad_norm": 0.5947027802467346, "learning_rate": 0.0001, "loss": 1.3532, "step": 15577 }, { "epoch": 1.8098170200406622, "grad_norm": 0.6363356709480286, "learning_rate": 0.0001, "loss": 1.3476, "step": 15578 }, { "epoch": 1.8099331977926227, "grad_norm": 0.6468795537948608, "learning_rate": 0.0001, "loss": 1.5388, "step": 15579 }, { "epoch": 1.8100493755445832, "grad_norm": 0.6045858263969421, "learning_rate": 0.0001, "loss": 1.4873, "step": 15580 }, { "epoch": 1.8101655532965437, "grad_norm": 0.6008251309394836, "learning_rate": 0.0001, "loss": 1.4593, "step": 15581 }, { "epoch": 1.8102817310485042, "grad_norm": 0.6392335891723633, "learning_rate": 0.0001, "loss": 1.4743, "step": 15582 }, { "epoch": 1.8103979088004647, "grad_norm": 0.6000946164131165, "learning_rate": 0.0001, "loss": 1.5287, "step": 15583 }, { "epoch": 1.8105140865524252, "grad_norm": 0.6234345436096191, "learning_rate": 0.0001, "loss": 1.433, "step": 15584 }, { "epoch": 1.8106302643043857, "grad_norm": 0.6106148958206177, "learning_rate": 0.0001, "loss": 1.4892, "step": 15585 }, { "epoch": 1.8107464420563462, "grad_norm": 0.5977939367294312, "learning_rate": 0.0001, "loss": 1.3901, "step": 15586 }, { "epoch": 1.8108626198083067, "grad_norm": 0.6329689621925354, "learning_rate": 0.0001, "loss": 1.3955, "step": 15587 }, { "epoch": 1.8109787975602671, "grad_norm": 0.6138922572135925, "learning_rate": 0.0001, "loss": 1.3091, "step": 15588 }, { "epoch": 1.8110949753122276, "grad_norm": 0.6133901476860046, "learning_rate": 0.0001, "loss": 1.364, "step": 15589 }, { "epoch": 1.8112111530641881, "grad_norm": 0.6312717795372009, "learning_rate": 0.0001, "loss": 1.4335, "step": 15590 }, { "epoch": 1.8113273308161486, "grad_norm": 0.6562632322311401, "learning_rate": 0.0001, "loss": 1.4884, "step": 15591 }, { "epoch": 1.811443508568109, "grad_norm": 0.6676362752914429, "learning_rate": 0.0001, "loss": 1.4489, "step": 15592 }, { "epoch": 1.8115596863200696, "grad_norm": 0.5867359638214111, "learning_rate": 0.0001, "loss": 1.4699, "step": 15593 }, { "epoch": 1.81167586407203, "grad_norm": 0.610526442527771, "learning_rate": 0.0001, "loss": 1.5134, "step": 15594 }, { "epoch": 1.8117920418239906, "grad_norm": 0.6464741826057434, "learning_rate": 0.0001, "loss": 1.3157, "step": 15595 }, { "epoch": 1.811908219575951, "grad_norm": 0.6477694511413574, "learning_rate": 0.0001, "loss": 1.4665, "step": 15596 }, { "epoch": 1.8120243973279115, "grad_norm": 0.6339976191520691, "learning_rate": 0.0001, "loss": 1.5231, "step": 15597 }, { "epoch": 1.8121405750798723, "grad_norm": 0.5875985026359558, "learning_rate": 0.0001, "loss": 1.311, "step": 15598 }, { "epoch": 1.8122567528318327, "grad_norm": 0.6327051520347595, "learning_rate": 0.0001, "loss": 1.4672, "step": 15599 }, { "epoch": 1.8123729305837932, "grad_norm": 0.5891144275665283, "learning_rate": 0.0001, "loss": 1.353, "step": 15600 }, { "epoch": 1.8124891083357537, "grad_norm": 0.6597616672515869, "learning_rate": 0.0001, "loss": 1.5702, "step": 15601 }, { "epoch": 1.8126052860877142, "grad_norm": 0.6248520016670227, "learning_rate": 0.0001, "loss": 1.4086, "step": 15602 }, { "epoch": 1.8127214638396747, "grad_norm": 0.6467711925506592, "learning_rate": 0.0001, "loss": 1.3948, "step": 15603 }, { "epoch": 1.8128376415916352, "grad_norm": 0.5930083394050598, "learning_rate": 0.0001, "loss": 1.375, "step": 15604 }, { "epoch": 1.8129538193435957, "grad_norm": 0.6492817401885986, "learning_rate": 0.0001, "loss": 1.48, "step": 15605 }, { "epoch": 1.8130699970955562, "grad_norm": 0.6129246354103088, "learning_rate": 0.0001, "loss": 1.3686, "step": 15606 }, { "epoch": 1.8131861748475167, "grad_norm": 0.6140215396881104, "learning_rate": 0.0001, "loss": 1.4095, "step": 15607 }, { "epoch": 1.8133023525994774, "grad_norm": 0.6427494287490845, "learning_rate": 0.0001, "loss": 1.498, "step": 15608 }, { "epoch": 1.8134185303514379, "grad_norm": 0.6968186497688293, "learning_rate": 0.0001, "loss": 1.3832, "step": 15609 }, { "epoch": 1.8135347081033983, "grad_norm": 0.6444759368896484, "learning_rate": 0.0001, "loss": 1.5395, "step": 15610 }, { "epoch": 1.8136508858553588, "grad_norm": 0.6709272265434265, "learning_rate": 0.0001, "loss": 1.3628, "step": 15611 }, { "epoch": 1.8137670636073193, "grad_norm": 0.5977835655212402, "learning_rate": 0.0001, "loss": 1.4287, "step": 15612 }, { "epoch": 1.8138832413592798, "grad_norm": 0.6589576005935669, "learning_rate": 0.0001, "loss": 1.5726, "step": 15613 }, { "epoch": 1.8139994191112403, "grad_norm": 0.6188105940818787, "learning_rate": 0.0001, "loss": 1.2478, "step": 15614 }, { "epoch": 1.8141155968632008, "grad_norm": 0.6436295509338379, "learning_rate": 0.0001, "loss": 1.5318, "step": 15615 }, { "epoch": 1.8142317746151613, "grad_norm": 0.644077718257904, "learning_rate": 0.0001, "loss": 1.3944, "step": 15616 }, { "epoch": 1.8143479523671218, "grad_norm": 0.637717604637146, "learning_rate": 0.0001, "loss": 1.3111, "step": 15617 }, { "epoch": 1.8144641301190823, "grad_norm": 0.6236617565155029, "learning_rate": 0.0001, "loss": 1.5477, "step": 15618 }, { "epoch": 1.8145803078710427, "grad_norm": 0.6345205903053284, "learning_rate": 0.0001, "loss": 1.5212, "step": 15619 }, { "epoch": 1.8146964856230032, "grad_norm": 0.6268531680107117, "learning_rate": 0.0001, "loss": 1.472, "step": 15620 }, { "epoch": 1.8148126633749637, "grad_norm": 0.6145265698432922, "learning_rate": 0.0001, "loss": 1.3223, "step": 15621 }, { "epoch": 1.8149288411269242, "grad_norm": 0.6577031016349792, "learning_rate": 0.0001, "loss": 1.4009, "step": 15622 }, { "epoch": 1.8150450188788847, "grad_norm": 0.6210200190544128, "learning_rate": 0.0001, "loss": 1.4791, "step": 15623 }, { "epoch": 1.8151611966308452, "grad_norm": 0.6395127177238464, "learning_rate": 0.0001, "loss": 1.5141, "step": 15624 }, { "epoch": 1.8152773743828057, "grad_norm": 0.6661720871925354, "learning_rate": 0.0001, "loss": 1.5582, "step": 15625 }, { "epoch": 1.8153935521347662, "grad_norm": 0.5990541577339172, "learning_rate": 0.0001, "loss": 1.4719, "step": 15626 }, { "epoch": 1.8155097298867267, "grad_norm": 0.6122716665267944, "learning_rate": 0.0001, "loss": 1.4799, "step": 15627 }, { "epoch": 1.8156259076386871, "grad_norm": 0.6129891872406006, "learning_rate": 0.0001, "loss": 1.3344, "step": 15628 }, { "epoch": 1.8157420853906476, "grad_norm": 0.6177826523780823, "learning_rate": 0.0001, "loss": 1.3427, "step": 15629 }, { "epoch": 1.8158582631426081, "grad_norm": 0.6290244460105896, "learning_rate": 0.0001, "loss": 1.4393, "step": 15630 }, { "epoch": 1.8159744408945686, "grad_norm": 0.5964085459709167, "learning_rate": 0.0001, "loss": 1.4937, "step": 15631 }, { "epoch": 1.816090618646529, "grad_norm": 0.674884021282196, "learning_rate": 0.0001, "loss": 1.5005, "step": 15632 }, { "epoch": 1.8162067963984896, "grad_norm": 0.6360381245613098, "learning_rate": 0.0001, "loss": 1.3226, "step": 15633 }, { "epoch": 1.81632297415045, "grad_norm": 0.6837267875671387, "learning_rate": 0.0001, "loss": 1.4333, "step": 15634 }, { "epoch": 1.8164391519024106, "grad_norm": 0.6174556016921997, "learning_rate": 0.0001, "loss": 1.3367, "step": 15635 }, { "epoch": 1.816555329654371, "grad_norm": 0.6203545331954956, "learning_rate": 0.0001, "loss": 1.2864, "step": 15636 }, { "epoch": 1.8166715074063315, "grad_norm": 0.6508125066757202, "learning_rate": 0.0001, "loss": 1.511, "step": 15637 }, { "epoch": 1.816787685158292, "grad_norm": 0.6309399604797363, "learning_rate": 0.0001, "loss": 1.5726, "step": 15638 }, { "epoch": 1.8169038629102525, "grad_norm": 0.6121013760566711, "learning_rate": 0.0001, "loss": 1.3792, "step": 15639 }, { "epoch": 1.8170200406622132, "grad_norm": 0.6283285021781921, "learning_rate": 0.0001, "loss": 1.3344, "step": 15640 }, { "epoch": 1.8171362184141737, "grad_norm": 0.7028416395187378, "learning_rate": 0.0001, "loss": 1.4236, "step": 15641 }, { "epoch": 1.8172523961661342, "grad_norm": 0.6156495809555054, "learning_rate": 0.0001, "loss": 1.4386, "step": 15642 }, { "epoch": 1.8173685739180947, "grad_norm": 0.665553629398346, "learning_rate": 0.0001, "loss": 1.553, "step": 15643 }, { "epoch": 1.8174847516700552, "grad_norm": 0.6277129650115967, "learning_rate": 0.0001, "loss": 1.5514, "step": 15644 }, { "epoch": 1.8176009294220157, "grad_norm": 0.6554896235466003, "learning_rate": 0.0001, "loss": 1.6498, "step": 15645 }, { "epoch": 1.8177171071739762, "grad_norm": 0.6066895723342896, "learning_rate": 0.0001, "loss": 1.3731, "step": 15646 }, { "epoch": 1.8178332849259367, "grad_norm": 0.6454988718032837, "learning_rate": 0.0001, "loss": 1.5711, "step": 15647 }, { "epoch": 1.8179494626778971, "grad_norm": 0.621356189250946, "learning_rate": 0.0001, "loss": 1.4564, "step": 15648 }, { "epoch": 1.8180656404298576, "grad_norm": 0.6455363631248474, "learning_rate": 0.0001, "loss": 1.3961, "step": 15649 }, { "epoch": 1.8181818181818183, "grad_norm": 0.6525305509567261, "learning_rate": 0.0001, "loss": 1.4999, "step": 15650 }, { "epoch": 1.8182979959337788, "grad_norm": 0.6415418386459351, "learning_rate": 0.0001, "loss": 1.6698, "step": 15651 }, { "epoch": 1.8184141736857393, "grad_norm": 0.6238638758659363, "learning_rate": 0.0001, "loss": 1.4671, "step": 15652 }, { "epoch": 1.8185303514376998, "grad_norm": 0.6095195412635803, "learning_rate": 0.0001, "loss": 1.3642, "step": 15653 }, { "epoch": 1.8186465291896603, "grad_norm": 0.6489900946617126, "learning_rate": 0.0001, "loss": 1.3327, "step": 15654 }, { "epoch": 1.8187627069416208, "grad_norm": 0.6455105543136597, "learning_rate": 0.0001, "loss": 1.5854, "step": 15655 }, { "epoch": 1.8188788846935813, "grad_norm": 0.625741183757782, "learning_rate": 0.0001, "loss": 1.516, "step": 15656 }, { "epoch": 1.8189950624455418, "grad_norm": 0.6658532023429871, "learning_rate": 0.0001, "loss": 1.4335, "step": 15657 }, { "epoch": 1.8191112401975023, "grad_norm": 0.6668034195899963, "learning_rate": 0.0001, "loss": 1.4892, "step": 15658 }, { "epoch": 1.8192274179494627, "grad_norm": 0.7984752058982849, "learning_rate": 0.0001, "loss": 1.1825, "step": 15659 }, { "epoch": 1.8193435957014232, "grad_norm": 0.6774497032165527, "learning_rate": 0.0001, "loss": 1.5137, "step": 15660 }, { "epoch": 1.8194597734533837, "grad_norm": 0.6689207553863525, "learning_rate": 0.0001, "loss": 1.4904, "step": 15661 }, { "epoch": 1.8195759512053442, "grad_norm": 0.6524935364723206, "learning_rate": 0.0001, "loss": 1.4814, "step": 15662 }, { "epoch": 1.8196921289573047, "grad_norm": 0.6597123146057129, "learning_rate": 0.0001, "loss": 1.4268, "step": 15663 }, { "epoch": 1.8198083067092652, "grad_norm": 0.6401806473731995, "learning_rate": 0.0001, "loss": 1.4742, "step": 15664 }, { "epoch": 1.8199244844612257, "grad_norm": 0.6572660803794861, "learning_rate": 0.0001, "loss": 1.5616, "step": 15665 }, { "epoch": 1.8200406622131862, "grad_norm": 0.6930816769599915, "learning_rate": 0.0001, "loss": 1.4826, "step": 15666 }, { "epoch": 1.8201568399651467, "grad_norm": 0.6326063275337219, "learning_rate": 0.0001, "loss": 1.4518, "step": 15667 }, { "epoch": 1.8202730177171071, "grad_norm": 0.6219976544380188, "learning_rate": 0.0001, "loss": 1.4319, "step": 15668 }, { "epoch": 1.8203891954690676, "grad_norm": 0.6230618953704834, "learning_rate": 0.0001, "loss": 1.5437, "step": 15669 }, { "epoch": 1.8205053732210281, "grad_norm": 0.599841833114624, "learning_rate": 0.0001, "loss": 1.4642, "step": 15670 }, { "epoch": 1.8206215509729886, "grad_norm": 0.6312829852104187, "learning_rate": 0.0001, "loss": 1.3916, "step": 15671 }, { "epoch": 1.820737728724949, "grad_norm": 0.584986686706543, "learning_rate": 0.0001, "loss": 1.2353, "step": 15672 }, { "epoch": 1.8208539064769096, "grad_norm": 0.625332772731781, "learning_rate": 0.0001, "loss": 1.3221, "step": 15673 }, { "epoch": 1.82097008422887, "grad_norm": 0.7025689482688904, "learning_rate": 0.0001, "loss": 1.5841, "step": 15674 }, { "epoch": 1.8210862619808306, "grad_norm": 0.6292913556098938, "learning_rate": 0.0001, "loss": 1.3456, "step": 15675 }, { "epoch": 1.821202439732791, "grad_norm": 0.634003758430481, "learning_rate": 0.0001, "loss": 1.5375, "step": 15676 }, { "epoch": 1.8213186174847515, "grad_norm": 0.6472622156143188, "learning_rate": 0.0001, "loss": 1.2689, "step": 15677 }, { "epoch": 1.821434795236712, "grad_norm": 0.6431424021720886, "learning_rate": 0.0001, "loss": 1.5079, "step": 15678 }, { "epoch": 1.8215509729886725, "grad_norm": 0.6438504457473755, "learning_rate": 0.0001, "loss": 1.3573, "step": 15679 }, { "epoch": 1.821667150740633, "grad_norm": 0.6260543465614319, "learning_rate": 0.0001, "loss": 1.4517, "step": 15680 }, { "epoch": 1.8217833284925937, "grad_norm": 0.6657220721244812, "learning_rate": 0.0001, "loss": 1.5435, "step": 15681 }, { "epoch": 1.8218995062445542, "grad_norm": 0.6194844245910645, "learning_rate": 0.0001, "loss": 1.3552, "step": 15682 }, { "epoch": 1.8220156839965147, "grad_norm": 0.6261272430419922, "learning_rate": 0.0001, "loss": 1.4463, "step": 15683 }, { "epoch": 1.8221318617484752, "grad_norm": 0.6646369695663452, "learning_rate": 0.0001, "loss": 1.2688, "step": 15684 }, { "epoch": 1.8222480395004357, "grad_norm": 0.661354660987854, "learning_rate": 0.0001, "loss": 1.4943, "step": 15685 }, { "epoch": 1.8223642172523962, "grad_norm": 0.6553549766540527, "learning_rate": 0.0001, "loss": 1.5064, "step": 15686 }, { "epoch": 1.8224803950043567, "grad_norm": 0.6306678056716919, "learning_rate": 0.0001, "loss": 1.4184, "step": 15687 }, { "epoch": 1.8225965727563171, "grad_norm": 0.6394689679145813, "learning_rate": 0.0001, "loss": 1.439, "step": 15688 }, { "epoch": 1.8227127505082776, "grad_norm": 0.6229515075683594, "learning_rate": 0.0001, "loss": 1.439, "step": 15689 }, { "epoch": 1.8228289282602381, "grad_norm": 0.669452428817749, "learning_rate": 0.0001, "loss": 1.4198, "step": 15690 }, { "epoch": 1.8229451060121986, "grad_norm": 0.6298848986625671, "learning_rate": 0.0001, "loss": 1.3489, "step": 15691 }, { "epoch": 1.8230612837641593, "grad_norm": 0.6125050187110901, "learning_rate": 0.0001, "loss": 1.5542, "step": 15692 }, { "epoch": 1.8231774615161198, "grad_norm": 0.6353245377540588, "learning_rate": 0.0001, "loss": 1.3483, "step": 15693 }, { "epoch": 1.8232936392680803, "grad_norm": 0.6046391129493713, "learning_rate": 0.0001, "loss": 1.4707, "step": 15694 }, { "epoch": 1.8234098170200408, "grad_norm": 0.6517807245254517, "learning_rate": 0.0001, "loss": 1.3653, "step": 15695 }, { "epoch": 1.8235259947720013, "grad_norm": 0.6398533582687378, "learning_rate": 0.0001, "loss": 1.4802, "step": 15696 }, { "epoch": 1.8236421725239618, "grad_norm": 0.6782436370849609, "learning_rate": 0.0001, "loss": 1.4821, "step": 15697 }, { "epoch": 1.8237583502759223, "grad_norm": 0.665037989616394, "learning_rate": 0.0001, "loss": 1.5208, "step": 15698 }, { "epoch": 1.8238745280278827, "grad_norm": 0.6023517847061157, "learning_rate": 0.0001, "loss": 1.313, "step": 15699 }, { "epoch": 1.8239907057798432, "grad_norm": 0.7003962993621826, "learning_rate": 0.0001, "loss": 1.5367, "step": 15700 }, { "epoch": 1.8241068835318037, "grad_norm": 0.6215689182281494, "learning_rate": 0.0001, "loss": 1.4324, "step": 15701 }, { "epoch": 1.8242230612837642, "grad_norm": 0.663047730922699, "learning_rate": 0.0001, "loss": 1.5801, "step": 15702 }, { "epoch": 1.8243392390357247, "grad_norm": 0.603195071220398, "learning_rate": 0.0001, "loss": 1.3298, "step": 15703 }, { "epoch": 1.8244554167876852, "grad_norm": 0.6194412708282471, "learning_rate": 0.0001, "loss": 1.4135, "step": 15704 }, { "epoch": 1.8245715945396457, "grad_norm": 0.6398873329162598, "learning_rate": 0.0001, "loss": 1.4196, "step": 15705 }, { "epoch": 1.8246877722916062, "grad_norm": 0.5934845209121704, "learning_rate": 0.0001, "loss": 1.2589, "step": 15706 }, { "epoch": 1.8248039500435667, "grad_norm": 0.6291519999504089, "learning_rate": 0.0001, "loss": 1.4262, "step": 15707 }, { "epoch": 1.8249201277955271, "grad_norm": 0.6169574856758118, "learning_rate": 0.0001, "loss": 1.5321, "step": 15708 }, { "epoch": 1.8250363055474876, "grad_norm": 0.640806257724762, "learning_rate": 0.0001, "loss": 1.5679, "step": 15709 }, { "epoch": 1.8251524832994481, "grad_norm": 0.6423100233078003, "learning_rate": 0.0001, "loss": 1.4409, "step": 15710 }, { "epoch": 1.8252686610514086, "grad_norm": 0.651190996170044, "learning_rate": 0.0001, "loss": 1.4781, "step": 15711 }, { "epoch": 1.825384838803369, "grad_norm": 0.607464611530304, "learning_rate": 0.0001, "loss": 1.49, "step": 15712 }, { "epoch": 1.8255010165553296, "grad_norm": 0.5951231122016907, "learning_rate": 0.0001, "loss": 1.3891, "step": 15713 }, { "epoch": 1.82561719430729, "grad_norm": 0.6105263829231262, "learning_rate": 0.0001, "loss": 1.3204, "step": 15714 }, { "epoch": 1.8257333720592506, "grad_norm": 0.624480128288269, "learning_rate": 0.0001, "loss": 1.3137, "step": 15715 }, { "epoch": 1.825849549811211, "grad_norm": 0.6641981601715088, "learning_rate": 0.0001, "loss": 1.434, "step": 15716 }, { "epoch": 1.8259657275631715, "grad_norm": 0.6716280579566956, "learning_rate": 0.0001, "loss": 1.5697, "step": 15717 }, { "epoch": 1.826081905315132, "grad_norm": 0.6153289675712585, "learning_rate": 0.0001, "loss": 1.3179, "step": 15718 }, { "epoch": 1.8261980830670925, "grad_norm": 0.6423638463020325, "learning_rate": 0.0001, "loss": 1.5095, "step": 15719 }, { "epoch": 1.826314260819053, "grad_norm": 0.7097392082214355, "learning_rate": 0.0001, "loss": 1.5316, "step": 15720 }, { "epoch": 1.8264304385710135, "grad_norm": 0.6601358652114868, "learning_rate": 0.0001, "loss": 1.5746, "step": 15721 }, { "epoch": 1.826546616322974, "grad_norm": 0.6218417882919312, "learning_rate": 0.0001, "loss": 1.456, "step": 15722 }, { "epoch": 1.8266627940749347, "grad_norm": 0.5989342927932739, "learning_rate": 0.0001, "loss": 1.349, "step": 15723 }, { "epoch": 1.8267789718268952, "grad_norm": 0.6521559953689575, "learning_rate": 0.0001, "loss": 1.4588, "step": 15724 }, { "epoch": 1.8268951495788557, "grad_norm": 0.6307593584060669, "learning_rate": 0.0001, "loss": 1.5444, "step": 15725 }, { "epoch": 1.8270113273308162, "grad_norm": 0.6584532856941223, "learning_rate": 0.0001, "loss": 1.5694, "step": 15726 }, { "epoch": 1.8271275050827767, "grad_norm": 0.7067667245864868, "learning_rate": 0.0001, "loss": 1.6617, "step": 15727 }, { "epoch": 1.8272436828347371, "grad_norm": 0.6160309314727783, "learning_rate": 0.0001, "loss": 1.3623, "step": 15728 }, { "epoch": 1.8273598605866976, "grad_norm": 0.6524335741996765, "learning_rate": 0.0001, "loss": 1.5656, "step": 15729 }, { "epoch": 1.8274760383386581, "grad_norm": 0.6545636057853699, "learning_rate": 0.0001, "loss": 1.4816, "step": 15730 }, { "epoch": 1.8275922160906186, "grad_norm": 0.6238372325897217, "learning_rate": 0.0001, "loss": 1.4544, "step": 15731 }, { "epoch": 1.827708393842579, "grad_norm": 0.6156944632530212, "learning_rate": 0.0001, "loss": 1.4125, "step": 15732 }, { "epoch": 1.8278245715945396, "grad_norm": 0.6655493974685669, "learning_rate": 0.0001, "loss": 1.4263, "step": 15733 }, { "epoch": 1.8279407493465003, "grad_norm": 0.646045446395874, "learning_rate": 0.0001, "loss": 1.5402, "step": 15734 }, { "epoch": 1.8280569270984608, "grad_norm": 0.6110576391220093, "learning_rate": 0.0001, "loss": 1.4161, "step": 15735 }, { "epoch": 1.8281731048504213, "grad_norm": 0.6814568042755127, "learning_rate": 0.0001, "loss": 1.546, "step": 15736 }, { "epoch": 1.8282892826023818, "grad_norm": 0.6145970225334167, "learning_rate": 0.0001, "loss": 1.3518, "step": 15737 }, { "epoch": 1.8284054603543423, "grad_norm": 0.6477583646774292, "learning_rate": 0.0001, "loss": 1.4436, "step": 15738 }, { "epoch": 1.8285216381063027, "grad_norm": 0.6140140891075134, "learning_rate": 0.0001, "loss": 1.437, "step": 15739 }, { "epoch": 1.8286378158582632, "grad_norm": 0.6034733653068542, "learning_rate": 0.0001, "loss": 1.3864, "step": 15740 }, { "epoch": 1.8287539936102237, "grad_norm": 0.6174204349517822, "learning_rate": 0.0001, "loss": 1.3747, "step": 15741 }, { "epoch": 1.8288701713621842, "grad_norm": 0.6232085824012756, "learning_rate": 0.0001, "loss": 1.3464, "step": 15742 }, { "epoch": 1.8289863491141447, "grad_norm": 0.6628851890563965, "learning_rate": 0.0001, "loss": 1.4494, "step": 15743 }, { "epoch": 1.8291025268661052, "grad_norm": 0.5966079831123352, "learning_rate": 0.0001, "loss": 1.469, "step": 15744 }, { "epoch": 1.8292187046180657, "grad_norm": 0.6103048324584961, "learning_rate": 0.0001, "loss": 1.5032, "step": 15745 }, { "epoch": 1.8293348823700262, "grad_norm": 0.6263560652732849, "learning_rate": 0.0001, "loss": 1.452, "step": 15746 }, { "epoch": 1.8294510601219867, "grad_norm": 0.6290574073791504, "learning_rate": 0.0001, "loss": 1.4978, "step": 15747 }, { "epoch": 1.8295672378739471, "grad_norm": 0.6744107007980347, "learning_rate": 0.0001, "loss": 1.4742, "step": 15748 }, { "epoch": 1.8296834156259076, "grad_norm": 0.6555504202842712, "learning_rate": 0.0001, "loss": 1.4864, "step": 15749 }, { "epoch": 1.8297995933778681, "grad_norm": 0.5998446345329285, "learning_rate": 0.0001, "loss": 1.4323, "step": 15750 }, { "epoch": 1.8299157711298286, "grad_norm": 0.6517525315284729, "learning_rate": 0.0001, "loss": 1.4494, "step": 15751 }, { "epoch": 1.830031948881789, "grad_norm": 0.6083842515945435, "learning_rate": 0.0001, "loss": 1.4295, "step": 15752 }, { "epoch": 1.8301481266337496, "grad_norm": 0.599166750907898, "learning_rate": 0.0001, "loss": 1.5188, "step": 15753 }, { "epoch": 1.83026430438571, "grad_norm": 0.6017733812332153, "learning_rate": 0.0001, "loss": 1.5186, "step": 15754 }, { "epoch": 1.8303804821376706, "grad_norm": 0.6161291003227234, "learning_rate": 0.0001, "loss": 1.4985, "step": 15755 }, { "epoch": 1.830496659889631, "grad_norm": 0.6510352492332458, "learning_rate": 0.0001, "loss": 1.4695, "step": 15756 }, { "epoch": 1.8306128376415916, "grad_norm": 0.6702868938446045, "learning_rate": 0.0001, "loss": 1.5777, "step": 15757 }, { "epoch": 1.830729015393552, "grad_norm": 0.6573750972747803, "learning_rate": 0.0001, "loss": 1.5759, "step": 15758 }, { "epoch": 1.8308451931455125, "grad_norm": 0.6256064772605896, "learning_rate": 0.0001, "loss": 1.3602, "step": 15759 }, { "epoch": 1.830961370897473, "grad_norm": 0.6097702383995056, "learning_rate": 0.0001, "loss": 1.3819, "step": 15760 }, { "epoch": 1.8310775486494335, "grad_norm": 0.6192917823791504, "learning_rate": 0.0001, "loss": 1.465, "step": 15761 }, { "epoch": 1.831193726401394, "grad_norm": 0.6619765162467957, "learning_rate": 0.0001, "loss": 1.4104, "step": 15762 }, { "epoch": 1.8313099041533545, "grad_norm": 0.6467078924179077, "learning_rate": 0.0001, "loss": 1.5349, "step": 15763 }, { "epoch": 1.831426081905315, "grad_norm": 0.6213657259941101, "learning_rate": 0.0001, "loss": 1.3376, "step": 15764 }, { "epoch": 1.8315422596572757, "grad_norm": 0.6226456165313721, "learning_rate": 0.0001, "loss": 1.504, "step": 15765 }, { "epoch": 1.8316584374092362, "grad_norm": 0.6544266939163208, "learning_rate": 0.0001, "loss": 1.5886, "step": 15766 }, { "epoch": 1.8317746151611967, "grad_norm": 0.6587969660758972, "learning_rate": 0.0001, "loss": 1.539, "step": 15767 }, { "epoch": 1.8318907929131572, "grad_norm": 0.558379054069519, "learning_rate": 0.0001, "loss": 1.2244, "step": 15768 }, { "epoch": 1.8320069706651176, "grad_norm": 0.6884554028511047, "learning_rate": 0.0001, "loss": 1.3835, "step": 15769 }, { "epoch": 1.8321231484170781, "grad_norm": 0.6165851950645447, "learning_rate": 0.0001, "loss": 1.3442, "step": 15770 }, { "epoch": 1.8322393261690386, "grad_norm": 0.6421175003051758, "learning_rate": 0.0001, "loss": 1.4531, "step": 15771 }, { "epoch": 1.832355503920999, "grad_norm": 0.6422439813613892, "learning_rate": 0.0001, "loss": 1.5082, "step": 15772 }, { "epoch": 1.8324716816729596, "grad_norm": 0.6669280529022217, "learning_rate": 0.0001, "loss": 1.6315, "step": 15773 }, { "epoch": 1.83258785942492, "grad_norm": 0.6927198171615601, "learning_rate": 0.0001, "loss": 1.6928, "step": 15774 }, { "epoch": 1.8327040371768806, "grad_norm": 0.6718189716339111, "learning_rate": 0.0001, "loss": 1.5182, "step": 15775 }, { "epoch": 1.8328202149288413, "grad_norm": 0.6268951892852783, "learning_rate": 0.0001, "loss": 1.4821, "step": 15776 }, { "epoch": 1.8329363926808018, "grad_norm": 0.6476660966873169, "learning_rate": 0.0001, "loss": 1.4125, "step": 15777 }, { "epoch": 1.8330525704327623, "grad_norm": 0.6505872011184692, "learning_rate": 0.0001, "loss": 1.5279, "step": 15778 }, { "epoch": 1.8331687481847228, "grad_norm": 0.6079415082931519, "learning_rate": 0.0001, "loss": 1.4143, "step": 15779 }, { "epoch": 1.8332849259366832, "grad_norm": 0.6487735509872437, "learning_rate": 0.0001, "loss": 1.4838, "step": 15780 }, { "epoch": 1.8334011036886437, "grad_norm": 0.6747827529907227, "learning_rate": 0.0001, "loss": 1.6398, "step": 15781 }, { "epoch": 1.8335172814406042, "grad_norm": 0.6179137229919434, "learning_rate": 0.0001, "loss": 1.2477, "step": 15782 }, { "epoch": 1.8336334591925647, "grad_norm": 0.6353632211685181, "learning_rate": 0.0001, "loss": 1.6035, "step": 15783 }, { "epoch": 1.8337496369445252, "grad_norm": 0.6235594153404236, "learning_rate": 0.0001, "loss": 1.423, "step": 15784 }, { "epoch": 1.8338658146964857, "grad_norm": 0.6635370254516602, "learning_rate": 0.0001, "loss": 1.5815, "step": 15785 }, { "epoch": 1.8339819924484462, "grad_norm": 0.6491432189941406, "learning_rate": 0.0001, "loss": 1.3535, "step": 15786 }, { "epoch": 1.8340981702004067, "grad_norm": 0.6436711549758911, "learning_rate": 0.0001, "loss": 1.6146, "step": 15787 }, { "epoch": 1.8342143479523672, "grad_norm": 0.6430501937866211, "learning_rate": 0.0001, "loss": 1.6028, "step": 15788 }, { "epoch": 1.8343305257043276, "grad_norm": 0.5883323550224304, "learning_rate": 0.0001, "loss": 1.4384, "step": 15789 }, { "epoch": 1.8344467034562881, "grad_norm": 0.6654613018035889, "learning_rate": 0.0001, "loss": 1.4959, "step": 15790 }, { "epoch": 1.8345628812082486, "grad_norm": 0.6218181252479553, "learning_rate": 0.0001, "loss": 1.3452, "step": 15791 }, { "epoch": 1.834679058960209, "grad_norm": 0.6197909712791443, "learning_rate": 0.0001, "loss": 1.4516, "step": 15792 }, { "epoch": 1.8347952367121696, "grad_norm": 0.6160784959793091, "learning_rate": 0.0001, "loss": 1.3933, "step": 15793 }, { "epoch": 1.83491141446413, "grad_norm": 0.6177665591239929, "learning_rate": 0.0001, "loss": 1.5776, "step": 15794 }, { "epoch": 1.8350275922160906, "grad_norm": 0.5998832583427429, "learning_rate": 0.0001, "loss": 1.4851, "step": 15795 }, { "epoch": 1.835143769968051, "grad_norm": 0.6603788733482361, "learning_rate": 0.0001, "loss": 1.4121, "step": 15796 }, { "epoch": 1.8352599477200116, "grad_norm": 0.6297788023948669, "learning_rate": 0.0001, "loss": 1.3603, "step": 15797 }, { "epoch": 1.835376125471972, "grad_norm": 0.6391573548316956, "learning_rate": 0.0001, "loss": 1.419, "step": 15798 }, { "epoch": 1.8354923032239325, "grad_norm": 0.6628352999687195, "learning_rate": 0.0001, "loss": 1.5928, "step": 15799 }, { "epoch": 1.835608480975893, "grad_norm": 0.6142341494560242, "learning_rate": 0.0001, "loss": 1.3791, "step": 15800 }, { "epoch": 1.8357246587278535, "grad_norm": 0.6423048377037048, "learning_rate": 0.0001, "loss": 1.4444, "step": 15801 }, { "epoch": 1.835840836479814, "grad_norm": 0.6642759442329407, "learning_rate": 0.0001, "loss": 1.6493, "step": 15802 }, { "epoch": 1.8359570142317745, "grad_norm": 0.6206764578819275, "learning_rate": 0.0001, "loss": 1.4107, "step": 15803 }, { "epoch": 1.836073191983735, "grad_norm": 0.624851644039154, "learning_rate": 0.0001, "loss": 1.3283, "step": 15804 }, { "epoch": 1.8361893697356955, "grad_norm": 0.6127082109451294, "learning_rate": 0.0001, "loss": 1.5061, "step": 15805 }, { "epoch": 1.836305547487656, "grad_norm": 0.6388171315193176, "learning_rate": 0.0001, "loss": 1.4454, "step": 15806 }, { "epoch": 1.8364217252396167, "grad_norm": 0.6365912556648254, "learning_rate": 0.0001, "loss": 1.4164, "step": 15807 }, { "epoch": 1.8365379029915772, "grad_norm": 0.6378763914108276, "learning_rate": 0.0001, "loss": 1.4261, "step": 15808 }, { "epoch": 1.8366540807435376, "grad_norm": 0.6192729473114014, "learning_rate": 0.0001, "loss": 1.4165, "step": 15809 }, { "epoch": 1.8367702584954981, "grad_norm": 0.5974169969558716, "learning_rate": 0.0001, "loss": 1.278, "step": 15810 }, { "epoch": 1.8368864362474586, "grad_norm": 0.676320493221283, "learning_rate": 0.0001, "loss": 1.4383, "step": 15811 }, { "epoch": 1.837002613999419, "grad_norm": 0.6627748012542725, "learning_rate": 0.0001, "loss": 1.4345, "step": 15812 }, { "epoch": 1.8371187917513796, "grad_norm": 0.6659048199653625, "learning_rate": 0.0001, "loss": 1.6619, "step": 15813 }, { "epoch": 1.83723496950334, "grad_norm": 0.6555460691452026, "learning_rate": 0.0001, "loss": 1.5172, "step": 15814 }, { "epoch": 1.8373511472553006, "grad_norm": 0.6790602803230286, "learning_rate": 0.0001, "loss": 1.5644, "step": 15815 }, { "epoch": 1.837467325007261, "grad_norm": 0.6389950513839722, "learning_rate": 0.0001, "loss": 1.3585, "step": 15816 }, { "epoch": 1.8375835027592216, "grad_norm": 0.6223099827766418, "learning_rate": 0.0001, "loss": 1.5151, "step": 15817 }, { "epoch": 1.8376996805111823, "grad_norm": 0.6905158758163452, "learning_rate": 0.0001, "loss": 1.4353, "step": 15818 }, { "epoch": 1.8378158582631428, "grad_norm": 0.6519301533699036, "learning_rate": 0.0001, "loss": 1.5341, "step": 15819 }, { "epoch": 1.8379320360151032, "grad_norm": 0.5987849235534668, "learning_rate": 0.0001, "loss": 1.4681, "step": 15820 }, { "epoch": 1.8380482137670637, "grad_norm": 0.6022413372993469, "learning_rate": 0.0001, "loss": 1.1909, "step": 15821 }, { "epoch": 1.8381643915190242, "grad_norm": 0.676166832447052, "learning_rate": 0.0001, "loss": 1.5692, "step": 15822 }, { "epoch": 1.8382805692709847, "grad_norm": 0.6689494252204895, "learning_rate": 0.0001, "loss": 1.4347, "step": 15823 }, { "epoch": 1.8383967470229452, "grad_norm": 0.6943590044975281, "learning_rate": 0.0001, "loss": 1.5028, "step": 15824 }, { "epoch": 1.8385129247749057, "grad_norm": 0.6813721060752869, "learning_rate": 0.0001, "loss": 1.3688, "step": 15825 }, { "epoch": 1.8386291025268662, "grad_norm": 0.6406850814819336, "learning_rate": 0.0001, "loss": 1.394, "step": 15826 }, { "epoch": 1.8387452802788267, "grad_norm": 0.657934308052063, "learning_rate": 0.0001, "loss": 1.4567, "step": 15827 }, { "epoch": 1.8388614580307872, "grad_norm": 0.6139489412307739, "learning_rate": 0.0001, "loss": 1.3562, "step": 15828 }, { "epoch": 1.8389776357827476, "grad_norm": 0.6442635655403137, "learning_rate": 0.0001, "loss": 1.6105, "step": 15829 }, { "epoch": 1.8390938135347081, "grad_norm": 0.6636027097702026, "learning_rate": 0.0001, "loss": 1.5311, "step": 15830 }, { "epoch": 1.8392099912866686, "grad_norm": 0.6013785004615784, "learning_rate": 0.0001, "loss": 1.3251, "step": 15831 }, { "epoch": 1.839326169038629, "grad_norm": 0.6119763255119324, "learning_rate": 0.0001, "loss": 1.3852, "step": 15832 }, { "epoch": 1.8394423467905896, "grad_norm": 0.7098992466926575, "learning_rate": 0.0001, "loss": 1.4956, "step": 15833 }, { "epoch": 1.83955852454255, "grad_norm": 0.6479992270469666, "learning_rate": 0.0001, "loss": 1.3366, "step": 15834 }, { "epoch": 1.8396747022945106, "grad_norm": 0.7065369486808777, "learning_rate": 0.0001, "loss": 1.298, "step": 15835 }, { "epoch": 1.839790880046471, "grad_norm": 0.6130120158195496, "learning_rate": 0.0001, "loss": 1.3877, "step": 15836 }, { "epoch": 1.8399070577984316, "grad_norm": 0.6558318138122559, "learning_rate": 0.0001, "loss": 1.4868, "step": 15837 }, { "epoch": 1.840023235550392, "grad_norm": 0.6193309426307678, "learning_rate": 0.0001, "loss": 1.2886, "step": 15838 }, { "epoch": 1.8401394133023525, "grad_norm": 0.6239991784095764, "learning_rate": 0.0001, "loss": 1.3477, "step": 15839 }, { "epoch": 1.840255591054313, "grad_norm": 0.6400113105773926, "learning_rate": 0.0001, "loss": 1.5957, "step": 15840 }, { "epoch": 1.8403717688062735, "grad_norm": 0.6171450614929199, "learning_rate": 0.0001, "loss": 1.3954, "step": 15841 }, { "epoch": 1.840487946558234, "grad_norm": 0.6365968585014343, "learning_rate": 0.0001, "loss": 1.3949, "step": 15842 }, { "epoch": 1.8406041243101945, "grad_norm": 0.6223557591438293, "learning_rate": 0.0001, "loss": 1.4757, "step": 15843 }, { "epoch": 1.840720302062155, "grad_norm": 0.5846434235572815, "learning_rate": 0.0001, "loss": 1.416, "step": 15844 }, { "epoch": 1.8408364798141155, "grad_norm": 0.6284165382385254, "learning_rate": 0.0001, "loss": 1.4623, "step": 15845 }, { "epoch": 1.840952657566076, "grad_norm": 0.6345427632331848, "learning_rate": 0.0001, "loss": 1.3014, "step": 15846 }, { "epoch": 1.8410688353180364, "grad_norm": 0.6641936302185059, "learning_rate": 0.0001, "loss": 1.5752, "step": 15847 }, { "epoch": 1.841185013069997, "grad_norm": 0.6104840636253357, "learning_rate": 0.0001, "loss": 1.3778, "step": 15848 }, { "epoch": 1.8413011908219576, "grad_norm": 0.6078286170959473, "learning_rate": 0.0001, "loss": 1.3278, "step": 15849 }, { "epoch": 1.8414173685739181, "grad_norm": 0.6374439597129822, "learning_rate": 0.0001, "loss": 1.5076, "step": 15850 }, { "epoch": 1.8415335463258786, "grad_norm": 0.6510405540466309, "learning_rate": 0.0001, "loss": 1.412, "step": 15851 }, { "epoch": 1.8416497240778391, "grad_norm": 0.651776909828186, "learning_rate": 0.0001, "loss": 1.5319, "step": 15852 }, { "epoch": 1.8417659018297996, "grad_norm": 0.6916422843933105, "learning_rate": 0.0001, "loss": 1.557, "step": 15853 }, { "epoch": 1.84188207958176, "grad_norm": 0.588064968585968, "learning_rate": 0.0001, "loss": 1.4961, "step": 15854 }, { "epoch": 1.8419982573337206, "grad_norm": 0.6474186778068542, "learning_rate": 0.0001, "loss": 1.491, "step": 15855 }, { "epoch": 1.842114435085681, "grad_norm": 0.6321966648101807, "learning_rate": 0.0001, "loss": 1.5578, "step": 15856 }, { "epoch": 1.8422306128376416, "grad_norm": 0.627600371837616, "learning_rate": 0.0001, "loss": 1.4618, "step": 15857 }, { "epoch": 1.842346790589602, "grad_norm": 0.6926926970481873, "learning_rate": 0.0001, "loss": 1.5241, "step": 15858 }, { "epoch": 1.8424629683415628, "grad_norm": 0.6751333475112915, "learning_rate": 0.0001, "loss": 1.536, "step": 15859 }, { "epoch": 1.8425791460935232, "grad_norm": 0.6097075343132019, "learning_rate": 0.0001, "loss": 1.3156, "step": 15860 }, { "epoch": 1.8426953238454837, "grad_norm": 0.6458491683006287, "learning_rate": 0.0001, "loss": 1.5339, "step": 15861 }, { "epoch": 1.8428115015974442, "grad_norm": 0.6323471665382385, "learning_rate": 0.0001, "loss": 1.4383, "step": 15862 }, { "epoch": 1.8429276793494047, "grad_norm": 0.6339277625083923, "learning_rate": 0.0001, "loss": 1.5904, "step": 15863 }, { "epoch": 1.8430438571013652, "grad_norm": 0.6184143424034119, "learning_rate": 0.0001, "loss": 1.4286, "step": 15864 }, { "epoch": 1.8431600348533257, "grad_norm": 0.662568986415863, "learning_rate": 0.0001, "loss": 1.3606, "step": 15865 }, { "epoch": 1.8432762126052862, "grad_norm": 0.5960211753845215, "learning_rate": 0.0001, "loss": 1.3503, "step": 15866 }, { "epoch": 1.8433923903572467, "grad_norm": 0.662533700466156, "learning_rate": 0.0001, "loss": 1.4761, "step": 15867 }, { "epoch": 1.8435085681092072, "grad_norm": 0.6247810125350952, "learning_rate": 0.0001, "loss": 1.4299, "step": 15868 }, { "epoch": 1.8436247458611676, "grad_norm": 0.6141956448554993, "learning_rate": 0.0001, "loss": 1.3269, "step": 15869 }, { "epoch": 1.8437409236131281, "grad_norm": 0.6622874140739441, "learning_rate": 0.0001, "loss": 1.4268, "step": 15870 }, { "epoch": 1.8438571013650886, "grad_norm": 0.647631049156189, "learning_rate": 0.0001, "loss": 1.3943, "step": 15871 }, { "epoch": 1.8439732791170491, "grad_norm": 0.6533973217010498, "learning_rate": 0.0001, "loss": 1.4422, "step": 15872 }, { "epoch": 1.8440894568690096, "grad_norm": 0.7556509971618652, "learning_rate": 0.0001, "loss": 1.4924, "step": 15873 }, { "epoch": 1.84420563462097, "grad_norm": 0.6466307044029236, "learning_rate": 0.0001, "loss": 1.4235, "step": 15874 }, { "epoch": 1.8443218123729306, "grad_norm": 0.6324759125709534, "learning_rate": 0.0001, "loss": 1.4354, "step": 15875 }, { "epoch": 1.844437990124891, "grad_norm": 0.6951972842216492, "learning_rate": 0.0001, "loss": 1.3743, "step": 15876 }, { "epoch": 1.8445541678768516, "grad_norm": 0.6317431330680847, "learning_rate": 0.0001, "loss": 1.4027, "step": 15877 }, { "epoch": 1.844670345628812, "grad_norm": 0.6691324710845947, "learning_rate": 0.0001, "loss": 1.4926, "step": 15878 }, { "epoch": 1.8447865233807725, "grad_norm": 0.6198418140411377, "learning_rate": 0.0001, "loss": 1.3961, "step": 15879 }, { "epoch": 1.844902701132733, "grad_norm": 0.688801109790802, "learning_rate": 0.0001, "loss": 1.6658, "step": 15880 }, { "epoch": 1.8450188788846935, "grad_norm": 0.6224777102470398, "learning_rate": 0.0001, "loss": 1.478, "step": 15881 }, { "epoch": 1.845135056636654, "grad_norm": 0.6318206787109375, "learning_rate": 0.0001, "loss": 1.4193, "step": 15882 }, { "epoch": 1.8452512343886145, "grad_norm": 0.6100471019744873, "learning_rate": 0.0001, "loss": 1.3605, "step": 15883 }, { "epoch": 1.845367412140575, "grad_norm": 0.67999666929245, "learning_rate": 0.0001, "loss": 1.5098, "step": 15884 }, { "epoch": 1.8454835898925355, "grad_norm": 0.6518880128860474, "learning_rate": 0.0001, "loss": 1.4426, "step": 15885 }, { "epoch": 1.845599767644496, "grad_norm": 0.5936741828918457, "learning_rate": 0.0001, "loss": 1.2343, "step": 15886 }, { "epoch": 1.8457159453964564, "grad_norm": 0.59494948387146, "learning_rate": 0.0001, "loss": 1.3919, "step": 15887 }, { "epoch": 1.845832123148417, "grad_norm": 0.6592521071434021, "learning_rate": 0.0001, "loss": 1.5441, "step": 15888 }, { "epoch": 1.8459483009003774, "grad_norm": 0.637743353843689, "learning_rate": 0.0001, "loss": 1.4972, "step": 15889 }, { "epoch": 1.846064478652338, "grad_norm": 0.672650933265686, "learning_rate": 0.0001, "loss": 1.5107, "step": 15890 }, { "epoch": 1.8461806564042986, "grad_norm": 0.5832641124725342, "learning_rate": 0.0001, "loss": 1.3632, "step": 15891 }, { "epoch": 1.8462968341562591, "grad_norm": 0.653222382068634, "learning_rate": 0.0001, "loss": 1.5397, "step": 15892 }, { "epoch": 1.8464130119082196, "grad_norm": 0.6585376858711243, "learning_rate": 0.0001, "loss": 1.4225, "step": 15893 }, { "epoch": 1.84652918966018, "grad_norm": 0.602841317653656, "learning_rate": 0.0001, "loss": 1.3514, "step": 15894 }, { "epoch": 1.8466453674121406, "grad_norm": 0.6117048263549805, "learning_rate": 0.0001, "loss": 1.4297, "step": 15895 }, { "epoch": 1.846761545164101, "grad_norm": 0.6555029153823853, "learning_rate": 0.0001, "loss": 1.5485, "step": 15896 }, { "epoch": 1.8468777229160616, "grad_norm": 0.6498346328735352, "learning_rate": 0.0001, "loss": 1.3554, "step": 15897 }, { "epoch": 1.846993900668022, "grad_norm": 0.6486692428588867, "learning_rate": 0.0001, "loss": 1.4703, "step": 15898 }, { "epoch": 1.8471100784199825, "grad_norm": 0.6454839706420898, "learning_rate": 0.0001, "loss": 1.4445, "step": 15899 }, { "epoch": 1.847226256171943, "grad_norm": 0.6514415144920349, "learning_rate": 0.0001, "loss": 1.4667, "step": 15900 }, { "epoch": 1.8473424339239037, "grad_norm": 0.7295227646827698, "learning_rate": 0.0001, "loss": 1.5415, "step": 15901 }, { "epoch": 1.8474586116758642, "grad_norm": 0.7171356678009033, "learning_rate": 0.0001, "loss": 1.6124, "step": 15902 }, { "epoch": 1.8475747894278247, "grad_norm": 0.631712019443512, "learning_rate": 0.0001, "loss": 1.3553, "step": 15903 }, { "epoch": 1.8476909671797852, "grad_norm": 0.6527529954910278, "learning_rate": 0.0001, "loss": 1.5835, "step": 15904 }, { "epoch": 1.8478071449317457, "grad_norm": 0.6119389533996582, "learning_rate": 0.0001, "loss": 1.3865, "step": 15905 }, { "epoch": 1.8479233226837062, "grad_norm": 0.6549842953681946, "learning_rate": 0.0001, "loss": 1.5724, "step": 15906 }, { "epoch": 1.8480395004356667, "grad_norm": 0.622146487236023, "learning_rate": 0.0001, "loss": 1.3403, "step": 15907 }, { "epoch": 1.8481556781876272, "grad_norm": 0.6111570596694946, "learning_rate": 0.0001, "loss": 1.2168, "step": 15908 }, { "epoch": 1.8482718559395876, "grad_norm": 0.658610999584198, "learning_rate": 0.0001, "loss": 1.4611, "step": 15909 }, { "epoch": 1.8483880336915481, "grad_norm": 0.6580610871315002, "learning_rate": 0.0001, "loss": 1.428, "step": 15910 }, { "epoch": 1.8485042114435086, "grad_norm": 0.6814003586769104, "learning_rate": 0.0001, "loss": 1.494, "step": 15911 }, { "epoch": 1.8486203891954691, "grad_norm": 0.6960819363594055, "learning_rate": 0.0001, "loss": 1.52, "step": 15912 }, { "epoch": 1.8487365669474296, "grad_norm": 0.6664005517959595, "learning_rate": 0.0001, "loss": 1.591, "step": 15913 }, { "epoch": 1.84885274469939, "grad_norm": 0.6293574571609497, "learning_rate": 0.0001, "loss": 1.4252, "step": 15914 }, { "epoch": 1.8489689224513506, "grad_norm": 0.5990114212036133, "learning_rate": 0.0001, "loss": 1.1662, "step": 15915 }, { "epoch": 1.849085100203311, "grad_norm": 0.5898028016090393, "learning_rate": 0.0001, "loss": 1.4397, "step": 15916 }, { "epoch": 1.8492012779552716, "grad_norm": 0.6499435901641846, "learning_rate": 0.0001, "loss": 1.5067, "step": 15917 }, { "epoch": 1.849317455707232, "grad_norm": 0.598627507686615, "learning_rate": 0.0001, "loss": 1.2612, "step": 15918 }, { "epoch": 1.8494336334591925, "grad_norm": 0.6359940767288208, "learning_rate": 0.0001, "loss": 1.4969, "step": 15919 }, { "epoch": 1.849549811211153, "grad_norm": 0.6298830509185791, "learning_rate": 0.0001, "loss": 1.5886, "step": 15920 }, { "epoch": 1.8496659889631135, "grad_norm": 0.6625968217849731, "learning_rate": 0.0001, "loss": 1.4717, "step": 15921 }, { "epoch": 1.849782166715074, "grad_norm": 0.6397876143455505, "learning_rate": 0.0001, "loss": 1.3486, "step": 15922 }, { "epoch": 1.8498983444670345, "grad_norm": 0.6491879224777222, "learning_rate": 0.0001, "loss": 1.5525, "step": 15923 }, { "epoch": 1.850014522218995, "grad_norm": 0.6469165682792664, "learning_rate": 0.0001, "loss": 1.4168, "step": 15924 }, { "epoch": 1.8501306999709555, "grad_norm": 0.6507518291473389, "learning_rate": 0.0001, "loss": 1.4388, "step": 15925 }, { "epoch": 1.850246877722916, "grad_norm": 0.6447193622589111, "learning_rate": 0.0001, "loss": 1.5766, "step": 15926 }, { "epoch": 1.8503630554748765, "grad_norm": 0.6330906748771667, "learning_rate": 0.0001, "loss": 1.4083, "step": 15927 }, { "epoch": 1.850479233226837, "grad_norm": 0.6252586841583252, "learning_rate": 0.0001, "loss": 1.4948, "step": 15928 }, { "epoch": 1.8505954109787974, "grad_norm": 0.6731130480766296, "learning_rate": 0.0001, "loss": 1.4928, "step": 15929 }, { "epoch": 1.850711588730758, "grad_norm": 0.6255606412887573, "learning_rate": 0.0001, "loss": 1.6288, "step": 15930 }, { "epoch": 1.8508277664827184, "grad_norm": 0.6344792246818542, "learning_rate": 0.0001, "loss": 1.4916, "step": 15931 }, { "epoch": 1.850943944234679, "grad_norm": 0.6789487600326538, "learning_rate": 0.0001, "loss": 1.5331, "step": 15932 }, { "epoch": 1.8510601219866396, "grad_norm": 0.633052408695221, "learning_rate": 0.0001, "loss": 1.4782, "step": 15933 }, { "epoch": 1.8511762997386, "grad_norm": 0.5799365639686584, "learning_rate": 0.0001, "loss": 1.2961, "step": 15934 }, { "epoch": 1.8512924774905606, "grad_norm": 0.6157287955284119, "learning_rate": 0.0001, "loss": 1.4431, "step": 15935 }, { "epoch": 1.851408655242521, "grad_norm": 0.6475369334220886, "learning_rate": 0.0001, "loss": 1.6849, "step": 15936 }, { "epoch": 1.8515248329944816, "grad_norm": 0.6590321063995361, "learning_rate": 0.0001, "loss": 1.5023, "step": 15937 }, { "epoch": 1.851641010746442, "grad_norm": 0.5794409513473511, "learning_rate": 0.0001, "loss": 1.3523, "step": 15938 }, { "epoch": 1.8517571884984025, "grad_norm": 0.6696737408638, "learning_rate": 0.0001, "loss": 1.3531, "step": 15939 }, { "epoch": 1.851873366250363, "grad_norm": 0.6289454102516174, "learning_rate": 0.0001, "loss": 1.3364, "step": 15940 }, { "epoch": 1.8519895440023235, "grad_norm": 0.6849252581596375, "learning_rate": 0.0001, "loss": 1.6824, "step": 15941 }, { "epoch": 1.852105721754284, "grad_norm": 0.6129252314567566, "learning_rate": 0.0001, "loss": 1.4256, "step": 15942 }, { "epoch": 1.8522218995062447, "grad_norm": 0.6236092448234558, "learning_rate": 0.0001, "loss": 1.2727, "step": 15943 }, { "epoch": 1.8523380772582052, "grad_norm": 0.6409513354301453, "learning_rate": 0.0001, "loss": 1.6124, "step": 15944 }, { "epoch": 1.8524542550101657, "grad_norm": 0.6730403304100037, "learning_rate": 0.0001, "loss": 1.3428, "step": 15945 }, { "epoch": 1.8525704327621262, "grad_norm": 0.5990225672721863, "learning_rate": 0.0001, "loss": 1.5518, "step": 15946 }, { "epoch": 1.8526866105140867, "grad_norm": 0.6200845837593079, "learning_rate": 0.0001, "loss": 1.6408, "step": 15947 }, { "epoch": 1.8528027882660472, "grad_norm": 0.6110101938247681, "learning_rate": 0.0001, "loss": 1.284, "step": 15948 }, { "epoch": 1.8529189660180077, "grad_norm": 0.6141749024391174, "learning_rate": 0.0001, "loss": 1.413, "step": 15949 }, { "epoch": 1.8530351437699681, "grad_norm": 0.7095697522163391, "learning_rate": 0.0001, "loss": 1.3614, "step": 15950 }, { "epoch": 1.8531513215219286, "grad_norm": 0.6521300673484802, "learning_rate": 0.0001, "loss": 1.4526, "step": 15951 }, { "epoch": 1.8532674992738891, "grad_norm": 0.6550735235214233, "learning_rate": 0.0001, "loss": 1.4943, "step": 15952 }, { "epoch": 1.8533836770258496, "grad_norm": 0.6534093022346497, "learning_rate": 0.0001, "loss": 1.3861, "step": 15953 }, { "epoch": 1.85349985477781, "grad_norm": 0.6369327306747437, "learning_rate": 0.0001, "loss": 1.5907, "step": 15954 }, { "epoch": 1.8536160325297706, "grad_norm": 0.628036618232727, "learning_rate": 0.0001, "loss": 1.6018, "step": 15955 }, { "epoch": 1.853732210281731, "grad_norm": 0.675891101360321, "learning_rate": 0.0001, "loss": 1.384, "step": 15956 }, { "epoch": 1.8538483880336916, "grad_norm": 0.6261839270591736, "learning_rate": 0.0001, "loss": 1.3933, "step": 15957 }, { "epoch": 1.853964565785652, "grad_norm": 0.633547306060791, "learning_rate": 0.0001, "loss": 1.3736, "step": 15958 }, { "epoch": 1.8540807435376125, "grad_norm": 0.6268337368965149, "learning_rate": 0.0001, "loss": 1.347, "step": 15959 }, { "epoch": 1.854196921289573, "grad_norm": 0.6326743364334106, "learning_rate": 0.0001, "loss": 1.3714, "step": 15960 }, { "epoch": 1.8543130990415335, "grad_norm": 0.6467177867889404, "learning_rate": 0.0001, "loss": 1.3624, "step": 15961 }, { "epoch": 1.854429276793494, "grad_norm": 0.5943117737770081, "learning_rate": 0.0001, "loss": 1.2638, "step": 15962 }, { "epoch": 1.8545454545454545, "grad_norm": 0.6902331113815308, "learning_rate": 0.0001, "loss": 1.4712, "step": 15963 }, { "epoch": 1.854661632297415, "grad_norm": 0.6605142951011658, "learning_rate": 0.0001, "loss": 1.5047, "step": 15964 }, { "epoch": 1.8547778100493755, "grad_norm": 0.6251471042633057, "learning_rate": 0.0001, "loss": 1.5201, "step": 15965 }, { "epoch": 1.854893987801336, "grad_norm": 0.6003806591033936, "learning_rate": 0.0001, "loss": 1.3556, "step": 15966 }, { "epoch": 1.8550101655532965, "grad_norm": 0.5969613790512085, "learning_rate": 0.0001, "loss": 1.2569, "step": 15967 }, { "epoch": 1.855126343305257, "grad_norm": 0.6904743313789368, "learning_rate": 0.0001, "loss": 1.5101, "step": 15968 }, { "epoch": 1.8552425210572174, "grad_norm": 0.6145458221435547, "learning_rate": 0.0001, "loss": 1.5171, "step": 15969 }, { "epoch": 1.855358698809178, "grad_norm": 0.5802835822105408, "learning_rate": 0.0001, "loss": 1.3716, "step": 15970 }, { "epoch": 1.8554748765611384, "grad_norm": 0.6366599798202515, "learning_rate": 0.0001, "loss": 1.3407, "step": 15971 }, { "epoch": 1.855591054313099, "grad_norm": 0.6242477893829346, "learning_rate": 0.0001, "loss": 1.4578, "step": 15972 }, { "epoch": 1.8557072320650594, "grad_norm": 0.659819483757019, "learning_rate": 0.0001, "loss": 1.5271, "step": 15973 }, { "epoch": 1.8558234098170199, "grad_norm": 0.6689221262931824, "learning_rate": 0.0001, "loss": 1.4303, "step": 15974 }, { "epoch": 1.8559395875689806, "grad_norm": 0.6268795132637024, "learning_rate": 0.0001, "loss": 1.7171, "step": 15975 }, { "epoch": 1.856055765320941, "grad_norm": 0.6311727166175842, "learning_rate": 0.0001, "loss": 1.4042, "step": 15976 }, { "epoch": 1.8561719430729016, "grad_norm": 0.5696510672569275, "learning_rate": 0.0001, "loss": 1.2505, "step": 15977 }, { "epoch": 1.856288120824862, "grad_norm": 0.6313110589981079, "learning_rate": 0.0001, "loss": 1.4215, "step": 15978 }, { "epoch": 1.8564042985768225, "grad_norm": 0.630997359752655, "learning_rate": 0.0001, "loss": 1.3868, "step": 15979 }, { "epoch": 1.856520476328783, "grad_norm": 0.7307471632957458, "learning_rate": 0.0001, "loss": 1.4431, "step": 15980 }, { "epoch": 1.8566366540807435, "grad_norm": 0.5916454792022705, "learning_rate": 0.0001, "loss": 1.3303, "step": 15981 }, { "epoch": 1.856752831832704, "grad_norm": 0.6538284420967102, "learning_rate": 0.0001, "loss": 1.4165, "step": 15982 }, { "epoch": 1.8568690095846645, "grad_norm": 0.6251320242881775, "learning_rate": 0.0001, "loss": 1.5548, "step": 15983 }, { "epoch": 1.856985187336625, "grad_norm": 0.6046530604362488, "learning_rate": 0.0001, "loss": 1.53, "step": 15984 }, { "epoch": 1.8571013650885857, "grad_norm": 0.6331608295440674, "learning_rate": 0.0001, "loss": 1.2712, "step": 15985 }, { "epoch": 1.8572175428405462, "grad_norm": 0.6148213744163513, "learning_rate": 0.0001, "loss": 1.2921, "step": 15986 }, { "epoch": 1.8573337205925067, "grad_norm": 0.6705180406570435, "learning_rate": 0.0001, "loss": 1.5069, "step": 15987 }, { "epoch": 1.8574498983444672, "grad_norm": 0.6756179928779602, "learning_rate": 0.0001, "loss": 1.5096, "step": 15988 }, { "epoch": 1.8575660760964277, "grad_norm": 0.6396727561950684, "learning_rate": 0.0001, "loss": 1.2108, "step": 15989 }, { "epoch": 1.8576822538483881, "grad_norm": 0.5996811985969543, "learning_rate": 0.0001, "loss": 1.2902, "step": 15990 }, { "epoch": 1.8577984316003486, "grad_norm": 0.6880459189414978, "learning_rate": 0.0001, "loss": 1.4609, "step": 15991 }, { "epoch": 1.8579146093523091, "grad_norm": 0.7272287607192993, "learning_rate": 0.0001, "loss": 1.5791, "step": 15992 }, { "epoch": 1.8580307871042696, "grad_norm": 0.6769748330116272, "learning_rate": 0.0001, "loss": 1.5794, "step": 15993 }, { "epoch": 1.85814696485623, "grad_norm": 0.6352269649505615, "learning_rate": 0.0001, "loss": 1.3752, "step": 15994 }, { "epoch": 1.8582631426081906, "grad_norm": 0.6746150851249695, "learning_rate": 0.0001, "loss": 1.6153, "step": 15995 }, { "epoch": 1.858379320360151, "grad_norm": 0.6321988701820374, "learning_rate": 0.0001, "loss": 1.3948, "step": 15996 }, { "epoch": 1.8584954981121116, "grad_norm": 0.6436606049537659, "learning_rate": 0.0001, "loss": 1.3095, "step": 15997 }, { "epoch": 1.858611675864072, "grad_norm": 0.6479673385620117, "learning_rate": 0.0001, "loss": 1.5992, "step": 15998 }, { "epoch": 1.8587278536160325, "grad_norm": 0.6009630560874939, "learning_rate": 0.0001, "loss": 1.2282, "step": 15999 }, { "epoch": 1.858844031367993, "grad_norm": 0.665545642375946, "learning_rate": 0.0001, "loss": 1.544, "step": 16000 }, { "epoch": 1.8589602091199535, "grad_norm": 0.643004298210144, "learning_rate": 0.0001, "loss": 1.3991, "step": 16001 }, { "epoch": 1.859076386871914, "grad_norm": 0.6295541524887085, "learning_rate": 0.0001, "loss": 1.4687, "step": 16002 }, { "epoch": 1.8591925646238745, "grad_norm": 0.6386839151382446, "learning_rate": 0.0001, "loss": 1.39, "step": 16003 }, { "epoch": 1.859308742375835, "grad_norm": 0.6171324253082275, "learning_rate": 0.0001, "loss": 1.3447, "step": 16004 }, { "epoch": 1.8594249201277955, "grad_norm": 0.6610198020935059, "learning_rate": 0.0001, "loss": 1.422, "step": 16005 }, { "epoch": 1.859541097879756, "grad_norm": 0.6207103133201599, "learning_rate": 0.0001, "loss": 1.3946, "step": 16006 }, { "epoch": 1.8596572756317165, "grad_norm": 0.6465338468551636, "learning_rate": 0.0001, "loss": 1.4222, "step": 16007 }, { "epoch": 1.859773453383677, "grad_norm": 0.6289892196655273, "learning_rate": 0.0001, "loss": 1.4562, "step": 16008 }, { "epoch": 1.8598896311356374, "grad_norm": 0.6309964656829834, "learning_rate": 0.0001, "loss": 1.6264, "step": 16009 }, { "epoch": 1.860005808887598, "grad_norm": 0.6338998079299927, "learning_rate": 0.0001, "loss": 1.4188, "step": 16010 }, { "epoch": 1.8601219866395584, "grad_norm": 0.6893547773361206, "learning_rate": 0.0001, "loss": 1.5243, "step": 16011 }, { "epoch": 1.860238164391519, "grad_norm": 0.6893996596336365, "learning_rate": 0.0001, "loss": 1.5066, "step": 16012 }, { "epoch": 1.8603543421434794, "grad_norm": 0.6099063158035278, "learning_rate": 0.0001, "loss": 1.5613, "step": 16013 }, { "epoch": 1.8604705198954399, "grad_norm": 0.671953022480011, "learning_rate": 0.0001, "loss": 1.4714, "step": 16014 }, { "epoch": 1.8605866976474004, "grad_norm": 0.6011316776275635, "learning_rate": 0.0001, "loss": 1.3394, "step": 16015 }, { "epoch": 1.8607028753993609, "grad_norm": 0.687449038028717, "learning_rate": 0.0001, "loss": 1.5649, "step": 16016 }, { "epoch": 1.8608190531513216, "grad_norm": 0.6525353789329529, "learning_rate": 0.0001, "loss": 1.6455, "step": 16017 }, { "epoch": 1.860935230903282, "grad_norm": 0.6548526287078857, "learning_rate": 0.0001, "loss": 1.4141, "step": 16018 }, { "epoch": 1.8610514086552425, "grad_norm": 0.6427944302558899, "learning_rate": 0.0001, "loss": 1.5082, "step": 16019 }, { "epoch": 1.861167586407203, "grad_norm": 0.6129639744758606, "learning_rate": 0.0001, "loss": 1.5673, "step": 16020 }, { "epoch": 1.8612837641591635, "grad_norm": 0.6827815771102905, "learning_rate": 0.0001, "loss": 1.6364, "step": 16021 }, { "epoch": 1.861399941911124, "grad_norm": 0.6350533962249756, "learning_rate": 0.0001, "loss": 1.3449, "step": 16022 }, { "epoch": 1.8615161196630845, "grad_norm": 0.6233449578285217, "learning_rate": 0.0001, "loss": 1.5625, "step": 16023 }, { "epoch": 1.861632297415045, "grad_norm": 0.6747365593910217, "learning_rate": 0.0001, "loss": 1.5287, "step": 16024 }, { "epoch": 1.8617484751670055, "grad_norm": 0.6383475065231323, "learning_rate": 0.0001, "loss": 1.4225, "step": 16025 }, { "epoch": 1.861864652918966, "grad_norm": 0.6553027033805847, "learning_rate": 0.0001, "loss": 1.5483, "step": 16026 }, { "epoch": 1.8619808306709267, "grad_norm": 0.6364275217056274, "learning_rate": 0.0001, "loss": 1.4228, "step": 16027 }, { "epoch": 1.8620970084228872, "grad_norm": 0.6255744695663452, "learning_rate": 0.0001, "loss": 1.6231, "step": 16028 }, { "epoch": 1.8622131861748477, "grad_norm": 0.6555817127227783, "learning_rate": 0.0001, "loss": 1.5468, "step": 16029 }, { "epoch": 1.8623293639268081, "grad_norm": 0.6397342085838318, "learning_rate": 0.0001, "loss": 1.3878, "step": 16030 }, { "epoch": 1.8624455416787686, "grad_norm": 0.6352092623710632, "learning_rate": 0.0001, "loss": 1.4146, "step": 16031 }, { "epoch": 1.8625617194307291, "grad_norm": 0.6633145213127136, "learning_rate": 0.0001, "loss": 1.4537, "step": 16032 }, { "epoch": 1.8626778971826896, "grad_norm": 0.6188063025474548, "learning_rate": 0.0001, "loss": 1.4692, "step": 16033 }, { "epoch": 1.86279407493465, "grad_norm": 0.6493058800697327, "learning_rate": 0.0001, "loss": 1.4305, "step": 16034 }, { "epoch": 1.8629102526866106, "grad_norm": 0.631112277507782, "learning_rate": 0.0001, "loss": 1.5089, "step": 16035 }, { "epoch": 1.863026430438571, "grad_norm": 0.6346030831336975, "learning_rate": 0.0001, "loss": 1.4847, "step": 16036 }, { "epoch": 1.8631426081905316, "grad_norm": 0.6686719059944153, "learning_rate": 0.0001, "loss": 1.3899, "step": 16037 }, { "epoch": 1.863258785942492, "grad_norm": 0.6608430743217468, "learning_rate": 0.0001, "loss": 1.543, "step": 16038 }, { "epoch": 1.8633749636944525, "grad_norm": 0.6154099702835083, "learning_rate": 0.0001, "loss": 1.2944, "step": 16039 }, { "epoch": 1.863491141446413, "grad_norm": 0.6072275638580322, "learning_rate": 0.0001, "loss": 1.4723, "step": 16040 }, { "epoch": 1.8636073191983735, "grad_norm": 0.6457508206367493, "learning_rate": 0.0001, "loss": 1.5102, "step": 16041 }, { "epoch": 1.863723496950334, "grad_norm": 0.6489636898040771, "learning_rate": 0.0001, "loss": 1.3776, "step": 16042 }, { "epoch": 1.8638396747022945, "grad_norm": 0.6392422318458557, "learning_rate": 0.0001, "loss": 1.4857, "step": 16043 }, { "epoch": 1.863955852454255, "grad_norm": 0.6000247597694397, "learning_rate": 0.0001, "loss": 1.4462, "step": 16044 }, { "epoch": 1.8640720302062155, "grad_norm": 0.6613923907279968, "learning_rate": 0.0001, "loss": 1.4059, "step": 16045 }, { "epoch": 1.864188207958176, "grad_norm": 0.5896721482276917, "learning_rate": 0.0001, "loss": 1.4278, "step": 16046 }, { "epoch": 1.8643043857101365, "grad_norm": 0.6610058546066284, "learning_rate": 0.0001, "loss": 1.5905, "step": 16047 }, { "epoch": 1.864420563462097, "grad_norm": 0.6179382801055908, "learning_rate": 0.0001, "loss": 1.3009, "step": 16048 }, { "epoch": 1.8645367412140574, "grad_norm": 0.6016153693199158, "learning_rate": 0.0001, "loss": 1.4749, "step": 16049 }, { "epoch": 1.864652918966018, "grad_norm": 0.7349487543106079, "learning_rate": 0.0001, "loss": 1.6547, "step": 16050 }, { "epoch": 1.8647690967179784, "grad_norm": 0.6561573147773743, "learning_rate": 0.0001, "loss": 1.3527, "step": 16051 }, { "epoch": 1.864885274469939, "grad_norm": 0.6679237484931946, "learning_rate": 0.0001, "loss": 1.5864, "step": 16052 }, { "epoch": 1.8650014522218994, "grad_norm": 0.6512364745140076, "learning_rate": 0.0001, "loss": 1.3981, "step": 16053 }, { "epoch": 1.8651176299738599, "grad_norm": 0.6438536643981934, "learning_rate": 0.0001, "loss": 1.4861, "step": 16054 }, { "epoch": 1.8652338077258204, "grad_norm": 0.597125768661499, "learning_rate": 0.0001, "loss": 1.2542, "step": 16055 }, { "epoch": 1.8653499854777809, "grad_norm": 0.6511043310165405, "learning_rate": 0.0001, "loss": 1.6802, "step": 16056 }, { "epoch": 1.8654661632297413, "grad_norm": 0.6484615802764893, "learning_rate": 0.0001, "loss": 1.5774, "step": 16057 }, { "epoch": 1.865582340981702, "grad_norm": 0.6066306233406067, "learning_rate": 0.0001, "loss": 1.444, "step": 16058 }, { "epoch": 1.8656985187336625, "grad_norm": 0.687473475933075, "learning_rate": 0.0001, "loss": 1.3532, "step": 16059 }, { "epoch": 1.865814696485623, "grad_norm": 0.6569384932518005, "learning_rate": 0.0001, "loss": 1.5606, "step": 16060 }, { "epoch": 1.8659308742375835, "grad_norm": 0.6482222080230713, "learning_rate": 0.0001, "loss": 1.5765, "step": 16061 }, { "epoch": 1.866047051989544, "grad_norm": 0.6548320055007935, "learning_rate": 0.0001, "loss": 1.4455, "step": 16062 }, { "epoch": 1.8661632297415045, "grad_norm": 0.6764519214630127, "learning_rate": 0.0001, "loss": 1.4629, "step": 16063 }, { "epoch": 1.866279407493465, "grad_norm": 0.6106156706809998, "learning_rate": 0.0001, "loss": 1.411, "step": 16064 }, { "epoch": 1.8663955852454255, "grad_norm": 0.599441647529602, "learning_rate": 0.0001, "loss": 1.3843, "step": 16065 }, { "epoch": 1.866511762997386, "grad_norm": 0.64780193567276, "learning_rate": 0.0001, "loss": 1.372, "step": 16066 }, { "epoch": 1.8666279407493465, "grad_norm": 0.6867490410804749, "learning_rate": 0.0001, "loss": 1.607, "step": 16067 }, { "epoch": 1.866744118501307, "grad_norm": 0.6278959512710571, "learning_rate": 0.0001, "loss": 1.3381, "step": 16068 }, { "epoch": 1.8668602962532677, "grad_norm": 0.6293548941612244, "learning_rate": 0.0001, "loss": 1.3661, "step": 16069 }, { "epoch": 1.8669764740052281, "grad_norm": 0.6394202709197998, "learning_rate": 0.0001, "loss": 1.517, "step": 16070 }, { "epoch": 1.8670926517571886, "grad_norm": 0.5861350893974304, "learning_rate": 0.0001, "loss": 1.3924, "step": 16071 }, { "epoch": 1.8672088295091491, "grad_norm": 0.6635555624961853, "learning_rate": 0.0001, "loss": 1.3656, "step": 16072 }, { "epoch": 1.8673250072611096, "grad_norm": 0.5992189049720764, "learning_rate": 0.0001, "loss": 1.3305, "step": 16073 }, { "epoch": 1.86744118501307, "grad_norm": 0.6643315553665161, "learning_rate": 0.0001, "loss": 1.4766, "step": 16074 }, { "epoch": 1.8675573627650306, "grad_norm": 0.664090633392334, "learning_rate": 0.0001, "loss": 1.6722, "step": 16075 }, { "epoch": 1.867673540516991, "grad_norm": 0.6654585003852844, "learning_rate": 0.0001, "loss": 1.4078, "step": 16076 }, { "epoch": 1.8677897182689516, "grad_norm": 0.675710916519165, "learning_rate": 0.0001, "loss": 1.5654, "step": 16077 }, { "epoch": 1.867905896020912, "grad_norm": 0.6081445217132568, "learning_rate": 0.0001, "loss": 1.3443, "step": 16078 }, { "epoch": 1.8680220737728725, "grad_norm": 0.6614611148834229, "learning_rate": 0.0001, "loss": 1.3373, "step": 16079 }, { "epoch": 1.868138251524833, "grad_norm": 0.6432121992111206, "learning_rate": 0.0001, "loss": 1.4906, "step": 16080 }, { "epoch": 1.8682544292767935, "grad_norm": 0.6689320802688599, "learning_rate": 0.0001, "loss": 1.5123, "step": 16081 }, { "epoch": 1.868370607028754, "grad_norm": 0.6432765126228333, "learning_rate": 0.0001, "loss": 1.4508, "step": 16082 }, { "epoch": 1.8684867847807145, "grad_norm": 0.6370273232460022, "learning_rate": 0.0001, "loss": 1.3359, "step": 16083 }, { "epoch": 1.868602962532675, "grad_norm": 0.6843346357345581, "learning_rate": 0.0001, "loss": 1.4115, "step": 16084 }, { "epoch": 1.8687191402846355, "grad_norm": 0.6531260013580322, "learning_rate": 0.0001, "loss": 1.6021, "step": 16085 }, { "epoch": 1.868835318036596, "grad_norm": 0.6587830185890198, "learning_rate": 0.0001, "loss": 1.4589, "step": 16086 }, { "epoch": 1.8689514957885565, "grad_norm": 0.6717437505722046, "learning_rate": 0.0001, "loss": 1.4421, "step": 16087 }, { "epoch": 1.869067673540517, "grad_norm": 0.6399134993553162, "learning_rate": 0.0001, "loss": 1.5111, "step": 16088 }, { "epoch": 1.8691838512924774, "grad_norm": 0.6350154876708984, "learning_rate": 0.0001, "loss": 1.4613, "step": 16089 }, { "epoch": 1.869300029044438, "grad_norm": 0.6754201650619507, "learning_rate": 0.0001, "loss": 1.4803, "step": 16090 }, { "epoch": 1.8694162067963984, "grad_norm": 0.6215840578079224, "learning_rate": 0.0001, "loss": 1.3367, "step": 16091 }, { "epoch": 1.869532384548359, "grad_norm": 0.6685314178466797, "learning_rate": 0.0001, "loss": 1.4121, "step": 16092 }, { "epoch": 1.8696485623003194, "grad_norm": 0.6347002983093262, "learning_rate": 0.0001, "loss": 1.4441, "step": 16093 }, { "epoch": 1.8697647400522799, "grad_norm": 0.6363687515258789, "learning_rate": 0.0001, "loss": 1.4009, "step": 16094 }, { "epoch": 1.8698809178042404, "grad_norm": 0.7349709868431091, "learning_rate": 0.0001, "loss": 1.6166, "step": 16095 }, { "epoch": 1.8699970955562009, "grad_norm": 0.6402686238288879, "learning_rate": 0.0001, "loss": 1.3914, "step": 16096 }, { "epoch": 1.8701132733081614, "grad_norm": 0.6475024223327637, "learning_rate": 0.0001, "loss": 1.409, "step": 16097 }, { "epoch": 1.8702294510601218, "grad_norm": 0.6470693349838257, "learning_rate": 0.0001, "loss": 1.4921, "step": 16098 }, { "epoch": 1.8703456288120823, "grad_norm": 0.6139314770698547, "learning_rate": 0.0001, "loss": 1.4649, "step": 16099 }, { "epoch": 1.870461806564043, "grad_norm": 0.6222054958343506, "learning_rate": 0.0001, "loss": 1.4842, "step": 16100 }, { "epoch": 1.8705779843160035, "grad_norm": 0.6488921046257019, "learning_rate": 0.0001, "loss": 1.5249, "step": 16101 }, { "epoch": 1.870694162067964, "grad_norm": 0.6102682948112488, "learning_rate": 0.0001, "loss": 1.3227, "step": 16102 }, { "epoch": 1.8708103398199245, "grad_norm": 0.6389469504356384, "learning_rate": 0.0001, "loss": 1.493, "step": 16103 }, { "epoch": 1.870926517571885, "grad_norm": 0.6223317384719849, "learning_rate": 0.0001, "loss": 1.3478, "step": 16104 }, { "epoch": 1.8710426953238455, "grad_norm": 0.6686277389526367, "learning_rate": 0.0001, "loss": 1.431, "step": 16105 }, { "epoch": 1.871158873075806, "grad_norm": 0.6599236726760864, "learning_rate": 0.0001, "loss": 1.4075, "step": 16106 }, { "epoch": 1.8712750508277665, "grad_norm": 0.681799590587616, "learning_rate": 0.0001, "loss": 1.6223, "step": 16107 }, { "epoch": 1.871391228579727, "grad_norm": 0.6067466735839844, "learning_rate": 0.0001, "loss": 1.3714, "step": 16108 }, { "epoch": 1.8715074063316874, "grad_norm": 0.6384574174880981, "learning_rate": 0.0001, "loss": 1.3321, "step": 16109 }, { "epoch": 1.871623584083648, "grad_norm": 0.613704264163971, "learning_rate": 0.0001, "loss": 1.4495, "step": 16110 }, { "epoch": 1.8717397618356086, "grad_norm": 0.6118010878562927, "learning_rate": 0.0001, "loss": 1.4425, "step": 16111 }, { "epoch": 1.8718559395875691, "grad_norm": 0.6611287593841553, "learning_rate": 0.0001, "loss": 1.4609, "step": 16112 }, { "epoch": 1.8719721173395296, "grad_norm": 0.6075910925865173, "learning_rate": 0.0001, "loss": 1.4266, "step": 16113 }, { "epoch": 1.87208829509149, "grad_norm": 0.604663610458374, "learning_rate": 0.0001, "loss": 1.2994, "step": 16114 }, { "epoch": 1.8722044728434506, "grad_norm": 0.6526971459388733, "learning_rate": 0.0001, "loss": 1.5681, "step": 16115 }, { "epoch": 1.872320650595411, "grad_norm": 0.6362418532371521, "learning_rate": 0.0001, "loss": 1.4114, "step": 16116 }, { "epoch": 1.8724368283473716, "grad_norm": 0.6441003680229187, "learning_rate": 0.0001, "loss": 1.4133, "step": 16117 }, { "epoch": 1.872553006099332, "grad_norm": 0.6294057965278625, "learning_rate": 0.0001, "loss": 1.2939, "step": 16118 }, { "epoch": 1.8726691838512926, "grad_norm": 0.6961047649383545, "learning_rate": 0.0001, "loss": 1.5681, "step": 16119 }, { "epoch": 1.872785361603253, "grad_norm": 0.6391631364822388, "learning_rate": 0.0001, "loss": 1.4166, "step": 16120 }, { "epoch": 1.8729015393552135, "grad_norm": 0.6639420986175537, "learning_rate": 0.0001, "loss": 1.4992, "step": 16121 }, { "epoch": 1.873017717107174, "grad_norm": 0.5933699607849121, "learning_rate": 0.0001, "loss": 1.3688, "step": 16122 }, { "epoch": 1.8731338948591345, "grad_norm": 0.642113208770752, "learning_rate": 0.0001, "loss": 1.3547, "step": 16123 }, { "epoch": 1.873250072611095, "grad_norm": 0.6226654052734375, "learning_rate": 0.0001, "loss": 1.3821, "step": 16124 }, { "epoch": 1.8733662503630555, "grad_norm": 0.6105073094367981, "learning_rate": 0.0001, "loss": 1.3144, "step": 16125 }, { "epoch": 1.873482428115016, "grad_norm": 0.6547592878341675, "learning_rate": 0.0001, "loss": 1.3156, "step": 16126 }, { "epoch": 1.8735986058669765, "grad_norm": 0.6428502798080444, "learning_rate": 0.0001, "loss": 1.4213, "step": 16127 }, { "epoch": 1.873714783618937, "grad_norm": 0.6893073916435242, "learning_rate": 0.0001, "loss": 1.5834, "step": 16128 }, { "epoch": 1.8738309613708974, "grad_norm": 0.7040899395942688, "learning_rate": 0.0001, "loss": 1.4901, "step": 16129 }, { "epoch": 1.873947139122858, "grad_norm": 0.6426632404327393, "learning_rate": 0.0001, "loss": 1.503, "step": 16130 }, { "epoch": 1.8740633168748184, "grad_norm": 0.6567462682723999, "learning_rate": 0.0001, "loss": 1.5078, "step": 16131 }, { "epoch": 1.874179494626779, "grad_norm": 0.6113812327384949, "learning_rate": 0.0001, "loss": 1.4698, "step": 16132 }, { "epoch": 1.8742956723787394, "grad_norm": 0.6160392761230469, "learning_rate": 0.0001, "loss": 1.3995, "step": 16133 }, { "epoch": 1.8744118501306999, "grad_norm": 0.6506639719009399, "learning_rate": 0.0001, "loss": 1.4755, "step": 16134 }, { "epoch": 1.8745280278826604, "grad_norm": 0.5986200571060181, "learning_rate": 0.0001, "loss": 1.4129, "step": 16135 }, { "epoch": 1.8746442056346209, "grad_norm": 0.6255586743354797, "learning_rate": 0.0001, "loss": 1.5394, "step": 16136 }, { "epoch": 1.8747603833865814, "grad_norm": 0.728040874004364, "learning_rate": 0.0001, "loss": 1.4589, "step": 16137 }, { "epoch": 1.8748765611385418, "grad_norm": 0.6328973770141602, "learning_rate": 0.0001, "loss": 1.47, "step": 16138 }, { "epoch": 1.8749927388905023, "grad_norm": 0.6382498145103455, "learning_rate": 0.0001, "loss": 1.4841, "step": 16139 }, { "epoch": 1.8751089166424628, "grad_norm": 0.5920639634132385, "learning_rate": 0.0001, "loss": 1.4326, "step": 16140 }, { "epoch": 1.8752250943944233, "grad_norm": 0.6232799887657166, "learning_rate": 0.0001, "loss": 1.5424, "step": 16141 }, { "epoch": 1.875341272146384, "grad_norm": 0.6544450521469116, "learning_rate": 0.0001, "loss": 1.441, "step": 16142 }, { "epoch": 1.8754574498983445, "grad_norm": 0.6664350032806396, "learning_rate": 0.0001, "loss": 1.516, "step": 16143 }, { "epoch": 1.875573627650305, "grad_norm": 0.561137855052948, "learning_rate": 0.0001, "loss": 1.2147, "step": 16144 }, { "epoch": 1.8756898054022655, "grad_norm": 0.6579391956329346, "learning_rate": 0.0001, "loss": 1.4285, "step": 16145 }, { "epoch": 1.875805983154226, "grad_norm": 0.6529465317726135, "learning_rate": 0.0001, "loss": 1.573, "step": 16146 }, { "epoch": 1.8759221609061865, "grad_norm": 0.6378617286682129, "learning_rate": 0.0001, "loss": 1.5657, "step": 16147 }, { "epoch": 1.876038338658147, "grad_norm": 0.6672477722167969, "learning_rate": 0.0001, "loss": 1.6109, "step": 16148 }, { "epoch": 1.8761545164101074, "grad_norm": 0.6656404733657837, "learning_rate": 0.0001, "loss": 1.597, "step": 16149 }, { "epoch": 1.876270694162068, "grad_norm": 0.6094094514846802, "learning_rate": 0.0001, "loss": 1.2944, "step": 16150 }, { "epoch": 1.8763868719140284, "grad_norm": 0.6074898838996887, "learning_rate": 0.0001, "loss": 1.4135, "step": 16151 }, { "epoch": 1.876503049665989, "grad_norm": 0.6635422110557556, "learning_rate": 0.0001, "loss": 1.486, "step": 16152 }, { "epoch": 1.8766192274179496, "grad_norm": 0.6677186489105225, "learning_rate": 0.0001, "loss": 1.4001, "step": 16153 }, { "epoch": 1.87673540516991, "grad_norm": 0.6453947424888611, "learning_rate": 0.0001, "loss": 1.3866, "step": 16154 }, { "epoch": 1.8768515829218706, "grad_norm": 0.6214307546615601, "learning_rate": 0.0001, "loss": 1.5212, "step": 16155 }, { "epoch": 1.876967760673831, "grad_norm": 0.6326675415039062, "learning_rate": 0.0001, "loss": 1.3973, "step": 16156 }, { "epoch": 1.8770839384257916, "grad_norm": 0.6758370399475098, "learning_rate": 0.0001, "loss": 1.6191, "step": 16157 }, { "epoch": 1.877200116177752, "grad_norm": 0.6559527516365051, "learning_rate": 0.0001, "loss": 1.486, "step": 16158 }, { "epoch": 1.8773162939297126, "grad_norm": 0.6483144164085388, "learning_rate": 0.0001, "loss": 1.5145, "step": 16159 }, { "epoch": 1.877432471681673, "grad_norm": 0.6223169565200806, "learning_rate": 0.0001, "loss": 1.5614, "step": 16160 }, { "epoch": 1.8775486494336335, "grad_norm": 0.6081398129463196, "learning_rate": 0.0001, "loss": 1.3007, "step": 16161 }, { "epoch": 1.877664827185594, "grad_norm": 0.6108434200286865, "learning_rate": 0.0001, "loss": 1.5354, "step": 16162 }, { "epoch": 1.8777810049375545, "grad_norm": 0.6521387100219727, "learning_rate": 0.0001, "loss": 1.446, "step": 16163 }, { "epoch": 1.877897182689515, "grad_norm": 0.6595948934555054, "learning_rate": 0.0001, "loss": 1.4916, "step": 16164 }, { "epoch": 1.8780133604414755, "grad_norm": 0.6371844410896301, "learning_rate": 0.0001, "loss": 1.3303, "step": 16165 }, { "epoch": 1.878129538193436, "grad_norm": 0.6075495481491089, "learning_rate": 0.0001, "loss": 1.3591, "step": 16166 }, { "epoch": 1.8782457159453965, "grad_norm": 0.6520854830741882, "learning_rate": 0.0001, "loss": 1.3767, "step": 16167 }, { "epoch": 1.878361893697357, "grad_norm": 0.6664291024208069, "learning_rate": 0.0001, "loss": 1.4965, "step": 16168 }, { "epoch": 1.8784780714493174, "grad_norm": 0.613292396068573, "learning_rate": 0.0001, "loss": 1.2729, "step": 16169 }, { "epoch": 1.878594249201278, "grad_norm": 0.6292546391487122, "learning_rate": 0.0001, "loss": 1.4637, "step": 16170 }, { "epoch": 1.8787104269532384, "grad_norm": 0.6244401931762695, "learning_rate": 0.0001, "loss": 1.419, "step": 16171 }, { "epoch": 1.878826604705199, "grad_norm": 0.6185139417648315, "learning_rate": 0.0001, "loss": 1.4135, "step": 16172 }, { "epoch": 1.8789427824571594, "grad_norm": 0.6202735304832458, "learning_rate": 0.0001, "loss": 1.2438, "step": 16173 }, { "epoch": 1.8790589602091199, "grad_norm": 0.6984301805496216, "learning_rate": 0.0001, "loss": 1.3728, "step": 16174 }, { "epoch": 1.8791751379610804, "grad_norm": 0.6947263479232788, "learning_rate": 0.0001, "loss": 1.5079, "step": 16175 }, { "epoch": 1.8792913157130409, "grad_norm": 0.660307765007019, "learning_rate": 0.0001, "loss": 1.5195, "step": 16176 }, { "epoch": 1.8794074934650014, "grad_norm": 0.718999445438385, "learning_rate": 0.0001, "loss": 1.3681, "step": 16177 }, { "epoch": 1.8795236712169618, "grad_norm": 0.5916824340820312, "learning_rate": 0.0001, "loss": 1.4181, "step": 16178 }, { "epoch": 1.8796398489689223, "grad_norm": 0.6681329607963562, "learning_rate": 0.0001, "loss": 1.514, "step": 16179 }, { "epoch": 1.8797560267208828, "grad_norm": 0.7035693526268005, "learning_rate": 0.0001, "loss": 1.4902, "step": 16180 }, { "epoch": 1.8798722044728433, "grad_norm": 0.6438646912574768, "learning_rate": 0.0001, "loss": 1.4211, "step": 16181 }, { "epoch": 1.8799883822248038, "grad_norm": 0.6208763718605042, "learning_rate": 0.0001, "loss": 1.4726, "step": 16182 }, { "epoch": 1.8801045599767643, "grad_norm": 0.5957739353179932, "learning_rate": 0.0001, "loss": 1.3845, "step": 16183 }, { "epoch": 1.880220737728725, "grad_norm": 0.6270644664764404, "learning_rate": 0.0001, "loss": 1.4788, "step": 16184 }, { "epoch": 1.8803369154806855, "grad_norm": 0.7408553957939148, "learning_rate": 0.0001, "loss": 1.4042, "step": 16185 }, { "epoch": 1.880453093232646, "grad_norm": 0.5753775835037231, "learning_rate": 0.0001, "loss": 1.3737, "step": 16186 }, { "epoch": 1.8805692709846065, "grad_norm": 0.6169424653053284, "learning_rate": 0.0001, "loss": 1.3774, "step": 16187 }, { "epoch": 1.880685448736567, "grad_norm": 0.6537294983863831, "learning_rate": 0.0001, "loss": 1.4537, "step": 16188 }, { "epoch": 1.8808016264885274, "grad_norm": 0.6403899192810059, "learning_rate": 0.0001, "loss": 1.3771, "step": 16189 }, { "epoch": 1.880917804240488, "grad_norm": 0.6702489256858826, "learning_rate": 0.0001, "loss": 1.4289, "step": 16190 }, { "epoch": 1.8810339819924484, "grad_norm": 0.6810214519500732, "learning_rate": 0.0001, "loss": 1.3772, "step": 16191 }, { "epoch": 1.881150159744409, "grad_norm": 0.694662868976593, "learning_rate": 0.0001, "loss": 1.5094, "step": 16192 }, { "epoch": 1.8812663374963694, "grad_norm": 0.6301172971725464, "learning_rate": 0.0001, "loss": 1.5233, "step": 16193 }, { "epoch": 1.88138251524833, "grad_norm": 0.7032142877578735, "learning_rate": 0.0001, "loss": 1.5058, "step": 16194 }, { "epoch": 1.8814986930002906, "grad_norm": 0.6602553129196167, "learning_rate": 0.0001, "loss": 1.4575, "step": 16195 }, { "epoch": 1.881614870752251, "grad_norm": 0.622805655002594, "learning_rate": 0.0001, "loss": 1.4459, "step": 16196 }, { "epoch": 1.8817310485042116, "grad_norm": 0.5976136922836304, "learning_rate": 0.0001, "loss": 1.3166, "step": 16197 }, { "epoch": 1.881847226256172, "grad_norm": 0.6232274770736694, "learning_rate": 0.0001, "loss": 1.5234, "step": 16198 }, { "epoch": 1.8819634040081326, "grad_norm": 0.6553215384483337, "learning_rate": 0.0001, "loss": 1.4395, "step": 16199 }, { "epoch": 1.882079581760093, "grad_norm": 0.6179276704788208, "learning_rate": 0.0001, "loss": 1.4126, "step": 16200 }, { "epoch": 1.8821957595120535, "grad_norm": 0.6998701095581055, "learning_rate": 0.0001, "loss": 1.586, "step": 16201 }, { "epoch": 1.882311937264014, "grad_norm": 0.6230928897857666, "learning_rate": 0.0001, "loss": 1.4469, "step": 16202 }, { "epoch": 1.8824281150159745, "grad_norm": 0.6606556177139282, "learning_rate": 0.0001, "loss": 1.5392, "step": 16203 }, { "epoch": 1.882544292767935, "grad_norm": 0.6373953223228455, "learning_rate": 0.0001, "loss": 1.5168, "step": 16204 }, { "epoch": 1.8826604705198955, "grad_norm": 0.6109699010848999, "learning_rate": 0.0001, "loss": 1.43, "step": 16205 }, { "epoch": 1.882776648271856, "grad_norm": 0.6007639765739441, "learning_rate": 0.0001, "loss": 1.219, "step": 16206 }, { "epoch": 1.8828928260238165, "grad_norm": 0.678024411201477, "learning_rate": 0.0001, "loss": 1.602, "step": 16207 }, { "epoch": 1.883009003775777, "grad_norm": 0.7188958525657654, "learning_rate": 0.0001, "loss": 1.6122, "step": 16208 }, { "epoch": 1.8831251815277374, "grad_norm": 0.6568261384963989, "learning_rate": 0.0001, "loss": 1.4127, "step": 16209 }, { "epoch": 1.883241359279698, "grad_norm": 0.6506110429763794, "learning_rate": 0.0001, "loss": 1.3864, "step": 16210 }, { "epoch": 1.8833575370316584, "grad_norm": 0.6438487768173218, "learning_rate": 0.0001, "loss": 1.4388, "step": 16211 }, { "epoch": 1.883473714783619, "grad_norm": 0.6122698187828064, "learning_rate": 0.0001, "loss": 1.2368, "step": 16212 }, { "epoch": 1.8835898925355794, "grad_norm": 0.6352079510688782, "learning_rate": 0.0001, "loss": 1.3854, "step": 16213 }, { "epoch": 1.88370607028754, "grad_norm": 0.6468987464904785, "learning_rate": 0.0001, "loss": 1.4283, "step": 16214 }, { "epoch": 1.8838222480395004, "grad_norm": 0.6842536330223083, "learning_rate": 0.0001, "loss": 1.4713, "step": 16215 }, { "epoch": 1.8839384257914609, "grad_norm": 0.63325434923172, "learning_rate": 0.0001, "loss": 1.43, "step": 16216 }, { "epoch": 1.8840546035434214, "grad_norm": 0.6556114554405212, "learning_rate": 0.0001, "loss": 1.4115, "step": 16217 }, { "epoch": 1.8841707812953818, "grad_norm": 0.6330429315567017, "learning_rate": 0.0001, "loss": 1.4792, "step": 16218 }, { "epoch": 1.8842869590473423, "grad_norm": 0.603271484375, "learning_rate": 0.0001, "loss": 1.353, "step": 16219 }, { "epoch": 1.8844031367993028, "grad_norm": 0.6156960129737854, "learning_rate": 0.0001, "loss": 1.3952, "step": 16220 }, { "epoch": 1.8845193145512633, "grad_norm": 0.6567127704620361, "learning_rate": 0.0001, "loss": 1.4617, "step": 16221 }, { "epoch": 1.8846354923032238, "grad_norm": 0.6550294756889343, "learning_rate": 0.0001, "loss": 1.4128, "step": 16222 }, { "epoch": 1.8847516700551843, "grad_norm": 0.69513338804245, "learning_rate": 0.0001, "loss": 1.6556, "step": 16223 }, { "epoch": 1.8848678478071448, "grad_norm": 0.6480852365493774, "learning_rate": 0.0001, "loss": 1.5386, "step": 16224 }, { "epoch": 1.8849840255591053, "grad_norm": 0.623177707195282, "learning_rate": 0.0001, "loss": 1.5206, "step": 16225 }, { "epoch": 1.885100203311066, "grad_norm": 0.6310771107673645, "learning_rate": 0.0001, "loss": 1.5955, "step": 16226 }, { "epoch": 1.8852163810630265, "grad_norm": 0.6477039456367493, "learning_rate": 0.0001, "loss": 1.4276, "step": 16227 }, { "epoch": 1.885332558814987, "grad_norm": 0.634675145149231, "learning_rate": 0.0001, "loss": 1.376, "step": 16228 }, { "epoch": 1.8854487365669474, "grad_norm": 0.6803785562515259, "learning_rate": 0.0001, "loss": 1.4586, "step": 16229 }, { "epoch": 1.885564914318908, "grad_norm": 0.6482465267181396, "learning_rate": 0.0001, "loss": 1.2912, "step": 16230 }, { "epoch": 1.8856810920708684, "grad_norm": 0.6558801531791687, "learning_rate": 0.0001, "loss": 1.536, "step": 16231 }, { "epoch": 1.885797269822829, "grad_norm": 0.6634625792503357, "learning_rate": 0.0001, "loss": 1.6405, "step": 16232 }, { "epoch": 1.8859134475747894, "grad_norm": 0.6628672480583191, "learning_rate": 0.0001, "loss": 1.5729, "step": 16233 }, { "epoch": 1.88602962532675, "grad_norm": 0.6206861138343811, "learning_rate": 0.0001, "loss": 1.2763, "step": 16234 }, { "epoch": 1.8861458030787104, "grad_norm": 0.6830229163169861, "learning_rate": 0.0001, "loss": 1.673, "step": 16235 }, { "epoch": 1.886261980830671, "grad_norm": 0.6225918531417847, "learning_rate": 0.0001, "loss": 1.3825, "step": 16236 }, { "epoch": 1.8863781585826316, "grad_norm": 0.6204468607902527, "learning_rate": 0.0001, "loss": 1.4252, "step": 16237 }, { "epoch": 1.886494336334592, "grad_norm": 0.6406263709068298, "learning_rate": 0.0001, "loss": 1.4315, "step": 16238 }, { "epoch": 1.8866105140865526, "grad_norm": 0.594034731388092, "learning_rate": 0.0001, "loss": 1.4297, "step": 16239 }, { "epoch": 1.886726691838513, "grad_norm": 0.5932945013046265, "learning_rate": 0.0001, "loss": 1.3535, "step": 16240 }, { "epoch": 1.8868428695904735, "grad_norm": 0.6160834431648254, "learning_rate": 0.0001, "loss": 1.4294, "step": 16241 }, { "epoch": 1.886959047342434, "grad_norm": 0.6335605382919312, "learning_rate": 0.0001, "loss": 1.526, "step": 16242 }, { "epoch": 1.8870752250943945, "grad_norm": 0.6654402613639832, "learning_rate": 0.0001, "loss": 1.4996, "step": 16243 }, { "epoch": 1.887191402846355, "grad_norm": 0.699084997177124, "learning_rate": 0.0001, "loss": 1.542, "step": 16244 }, { "epoch": 1.8873075805983155, "grad_norm": 0.5810077786445618, "learning_rate": 0.0001, "loss": 1.316, "step": 16245 }, { "epoch": 1.887423758350276, "grad_norm": 0.6725503206253052, "learning_rate": 0.0001, "loss": 1.5925, "step": 16246 }, { "epoch": 1.8875399361022365, "grad_norm": 0.6317938566207886, "learning_rate": 0.0001, "loss": 1.5725, "step": 16247 }, { "epoch": 1.887656113854197, "grad_norm": 0.6254867315292358, "learning_rate": 0.0001, "loss": 1.4515, "step": 16248 }, { "epoch": 1.8877722916061574, "grad_norm": 0.6530857682228088, "learning_rate": 0.0001, "loss": 1.5262, "step": 16249 }, { "epoch": 1.887888469358118, "grad_norm": 0.6241989135742188, "learning_rate": 0.0001, "loss": 1.3866, "step": 16250 }, { "epoch": 1.8880046471100784, "grad_norm": 0.6058340072631836, "learning_rate": 0.0001, "loss": 1.5298, "step": 16251 }, { "epoch": 1.888120824862039, "grad_norm": 0.6978906393051147, "learning_rate": 0.0001, "loss": 1.6112, "step": 16252 }, { "epoch": 1.8882370026139994, "grad_norm": 0.6833487153053284, "learning_rate": 0.0001, "loss": 1.5619, "step": 16253 }, { "epoch": 1.88835318036596, "grad_norm": 0.6531192660331726, "learning_rate": 0.0001, "loss": 1.558, "step": 16254 }, { "epoch": 1.8884693581179204, "grad_norm": 0.6346166133880615, "learning_rate": 0.0001, "loss": 1.5398, "step": 16255 }, { "epoch": 1.8885855358698809, "grad_norm": 0.6082544326782227, "learning_rate": 0.0001, "loss": 1.4792, "step": 16256 }, { "epoch": 1.8887017136218414, "grad_norm": 0.686316192150116, "learning_rate": 0.0001, "loss": 1.4781, "step": 16257 }, { "epoch": 1.8888178913738018, "grad_norm": 0.6406739950180054, "learning_rate": 0.0001, "loss": 1.5061, "step": 16258 }, { "epoch": 1.8889340691257623, "grad_norm": 0.6082045435905457, "learning_rate": 0.0001, "loss": 1.3622, "step": 16259 }, { "epoch": 1.8890502468777228, "grad_norm": 0.6095808744430542, "learning_rate": 0.0001, "loss": 1.481, "step": 16260 }, { "epoch": 1.8891664246296833, "grad_norm": 0.6595603823661804, "learning_rate": 0.0001, "loss": 1.4074, "step": 16261 }, { "epoch": 1.8892826023816438, "grad_norm": 0.6486496925354004, "learning_rate": 0.0001, "loss": 1.4188, "step": 16262 }, { "epoch": 1.8893987801336043, "grad_norm": 0.607620894908905, "learning_rate": 0.0001, "loss": 1.4437, "step": 16263 }, { "epoch": 1.8895149578855648, "grad_norm": 0.6697260737419128, "learning_rate": 0.0001, "loss": 1.3941, "step": 16264 }, { "epoch": 1.8896311356375253, "grad_norm": 0.6638875007629395, "learning_rate": 0.0001, "loss": 1.4645, "step": 16265 }, { "epoch": 1.8897473133894858, "grad_norm": 0.7261951565742493, "learning_rate": 0.0001, "loss": 1.5499, "step": 16266 }, { "epoch": 1.8898634911414463, "grad_norm": 0.6128770112991333, "learning_rate": 0.0001, "loss": 1.5662, "step": 16267 }, { "epoch": 1.889979668893407, "grad_norm": 0.6858522891998291, "learning_rate": 0.0001, "loss": 1.4384, "step": 16268 }, { "epoch": 1.8900958466453675, "grad_norm": 0.6504489183425903, "learning_rate": 0.0001, "loss": 1.4228, "step": 16269 }, { "epoch": 1.890212024397328, "grad_norm": 0.6010448932647705, "learning_rate": 0.0001, "loss": 1.3066, "step": 16270 }, { "epoch": 1.8903282021492884, "grad_norm": 0.6370309591293335, "learning_rate": 0.0001, "loss": 1.4773, "step": 16271 }, { "epoch": 1.890444379901249, "grad_norm": 0.6368116736412048, "learning_rate": 0.0001, "loss": 1.3728, "step": 16272 }, { "epoch": 1.8905605576532094, "grad_norm": 0.6693791747093201, "learning_rate": 0.0001, "loss": 1.616, "step": 16273 }, { "epoch": 1.89067673540517, "grad_norm": 0.6338540315628052, "learning_rate": 0.0001, "loss": 1.2589, "step": 16274 }, { "epoch": 1.8907929131571304, "grad_norm": 0.6232069134712219, "learning_rate": 0.0001, "loss": 1.3826, "step": 16275 }, { "epoch": 1.8909090909090909, "grad_norm": 0.5853868126869202, "learning_rate": 0.0001, "loss": 1.1918, "step": 16276 }, { "epoch": 1.8910252686610514, "grad_norm": 0.6741273999214172, "learning_rate": 0.0001, "loss": 1.4104, "step": 16277 }, { "epoch": 1.891141446413012, "grad_norm": 0.6685637831687927, "learning_rate": 0.0001, "loss": 1.4751, "step": 16278 }, { "epoch": 1.8912576241649726, "grad_norm": 0.6453445553779602, "learning_rate": 0.0001, "loss": 1.4783, "step": 16279 }, { "epoch": 1.891373801916933, "grad_norm": 0.6201800107955933, "learning_rate": 0.0001, "loss": 1.1419, "step": 16280 }, { "epoch": 1.8914899796688935, "grad_norm": 0.6624548435211182, "learning_rate": 0.0001, "loss": 1.4411, "step": 16281 }, { "epoch": 1.891606157420854, "grad_norm": 0.6203185319900513, "learning_rate": 0.0001, "loss": 1.3103, "step": 16282 }, { "epoch": 1.8917223351728145, "grad_norm": 0.6690462231636047, "learning_rate": 0.0001, "loss": 1.4472, "step": 16283 }, { "epoch": 1.891838512924775, "grad_norm": 0.6256049871444702, "learning_rate": 0.0001, "loss": 1.4034, "step": 16284 }, { "epoch": 1.8919546906767355, "grad_norm": 0.6361051797866821, "learning_rate": 0.0001, "loss": 1.4706, "step": 16285 }, { "epoch": 1.892070868428696, "grad_norm": 0.6839278936386108, "learning_rate": 0.0001, "loss": 1.5615, "step": 16286 }, { "epoch": 1.8921870461806565, "grad_norm": 0.6685923337936401, "learning_rate": 0.0001, "loss": 1.5528, "step": 16287 }, { "epoch": 1.892303223932617, "grad_norm": 0.6188735365867615, "learning_rate": 0.0001, "loss": 1.5113, "step": 16288 }, { "epoch": 1.8924194016845775, "grad_norm": 0.6116786599159241, "learning_rate": 0.0001, "loss": 1.3746, "step": 16289 }, { "epoch": 1.892535579436538, "grad_norm": 0.5917659997940063, "learning_rate": 0.0001, "loss": 1.3512, "step": 16290 }, { "epoch": 1.8926517571884984, "grad_norm": 0.6804871559143066, "learning_rate": 0.0001, "loss": 1.5389, "step": 16291 }, { "epoch": 1.892767934940459, "grad_norm": 0.6188212633132935, "learning_rate": 0.0001, "loss": 1.3892, "step": 16292 }, { "epoch": 1.8928841126924194, "grad_norm": 0.6414658427238464, "learning_rate": 0.0001, "loss": 1.4581, "step": 16293 }, { "epoch": 1.89300029044438, "grad_norm": 0.6274229288101196, "learning_rate": 0.0001, "loss": 1.41, "step": 16294 }, { "epoch": 1.8931164681963404, "grad_norm": 0.6137471795082092, "learning_rate": 0.0001, "loss": 1.4156, "step": 16295 }, { "epoch": 1.8932326459483009, "grad_norm": 0.6135362386703491, "learning_rate": 0.0001, "loss": 1.2233, "step": 16296 }, { "epoch": 1.8933488237002614, "grad_norm": 0.581027626991272, "learning_rate": 0.0001, "loss": 1.3152, "step": 16297 }, { "epoch": 1.8934650014522219, "grad_norm": 0.6401405334472656, "learning_rate": 0.0001, "loss": 1.2997, "step": 16298 }, { "epoch": 1.8935811792041823, "grad_norm": 0.6054320335388184, "learning_rate": 0.0001, "loss": 1.4294, "step": 16299 }, { "epoch": 1.8936973569561428, "grad_norm": 0.6537542939186096, "learning_rate": 0.0001, "loss": 1.4964, "step": 16300 }, { "epoch": 1.8938135347081033, "grad_norm": 0.6604767441749573, "learning_rate": 0.0001, "loss": 1.4648, "step": 16301 }, { "epoch": 1.8939297124600638, "grad_norm": 0.6958973407745361, "learning_rate": 0.0001, "loss": 1.451, "step": 16302 }, { "epoch": 1.8940458902120243, "grad_norm": 0.6734423637390137, "learning_rate": 0.0001, "loss": 1.5381, "step": 16303 }, { "epoch": 1.8941620679639848, "grad_norm": 0.6259022355079651, "learning_rate": 0.0001, "loss": 1.4018, "step": 16304 }, { "epoch": 1.8942782457159453, "grad_norm": 0.6697198152542114, "learning_rate": 0.0001, "loss": 1.5401, "step": 16305 }, { "epoch": 1.8943944234679058, "grad_norm": 0.6363210678100586, "learning_rate": 0.0001, "loss": 1.3552, "step": 16306 }, { "epoch": 1.8945106012198663, "grad_norm": 0.6797921061515808, "learning_rate": 0.0001, "loss": 1.5345, "step": 16307 }, { "epoch": 1.8946267789718267, "grad_norm": 0.6048640012741089, "learning_rate": 0.0001, "loss": 1.4204, "step": 16308 }, { "epoch": 1.8947429567237872, "grad_norm": 0.6270192861557007, "learning_rate": 0.0001, "loss": 1.4924, "step": 16309 }, { "epoch": 1.894859134475748, "grad_norm": 0.6300553679466248, "learning_rate": 0.0001, "loss": 1.4843, "step": 16310 }, { "epoch": 1.8949753122277084, "grad_norm": 0.6239210367202759, "learning_rate": 0.0001, "loss": 1.454, "step": 16311 }, { "epoch": 1.895091489979669, "grad_norm": 0.6402313113212585, "learning_rate": 0.0001, "loss": 1.426, "step": 16312 }, { "epoch": 1.8952076677316294, "grad_norm": 0.6168700456619263, "learning_rate": 0.0001, "loss": 1.3988, "step": 16313 }, { "epoch": 1.89532384548359, "grad_norm": 0.636816143989563, "learning_rate": 0.0001, "loss": 1.446, "step": 16314 }, { "epoch": 1.8954400232355504, "grad_norm": 0.6350388526916504, "learning_rate": 0.0001, "loss": 1.3877, "step": 16315 }, { "epoch": 1.8955562009875109, "grad_norm": 0.6476380228996277, "learning_rate": 0.0001, "loss": 1.347, "step": 16316 }, { "epoch": 1.8956723787394714, "grad_norm": 0.649406909942627, "learning_rate": 0.0001, "loss": 1.485, "step": 16317 }, { "epoch": 1.8957885564914319, "grad_norm": 0.6723828911781311, "learning_rate": 0.0001, "loss": 1.4107, "step": 16318 }, { "epoch": 1.8959047342433923, "grad_norm": 0.6643217206001282, "learning_rate": 0.0001, "loss": 1.2987, "step": 16319 }, { "epoch": 1.896020911995353, "grad_norm": 0.640669584274292, "learning_rate": 0.0001, "loss": 1.3199, "step": 16320 }, { "epoch": 1.8961370897473135, "grad_norm": 0.6841109395027161, "learning_rate": 0.0001, "loss": 1.4943, "step": 16321 }, { "epoch": 1.896253267499274, "grad_norm": 0.6322958469390869, "learning_rate": 0.0001, "loss": 1.4514, "step": 16322 }, { "epoch": 1.8963694452512345, "grad_norm": 0.6450737714767456, "learning_rate": 0.0001, "loss": 1.5285, "step": 16323 }, { "epoch": 1.896485623003195, "grad_norm": 0.6351325511932373, "learning_rate": 0.0001, "loss": 1.3975, "step": 16324 }, { "epoch": 1.8966018007551555, "grad_norm": 0.6209524273872375, "learning_rate": 0.0001, "loss": 1.33, "step": 16325 }, { "epoch": 1.896717978507116, "grad_norm": 0.6339280009269714, "learning_rate": 0.0001, "loss": 1.3828, "step": 16326 }, { "epoch": 1.8968341562590765, "grad_norm": 0.6543813943862915, "learning_rate": 0.0001, "loss": 1.6232, "step": 16327 }, { "epoch": 1.896950334011037, "grad_norm": 0.5659906268119812, "learning_rate": 0.0001, "loss": 1.2805, "step": 16328 }, { "epoch": 1.8970665117629975, "grad_norm": 0.6913331151008606, "learning_rate": 0.0001, "loss": 1.5897, "step": 16329 }, { "epoch": 1.897182689514958, "grad_norm": 0.6378936171531677, "learning_rate": 0.0001, "loss": 1.4734, "step": 16330 }, { "epoch": 1.8972988672669184, "grad_norm": 0.7173873782157898, "learning_rate": 0.0001, "loss": 1.5018, "step": 16331 }, { "epoch": 1.897415045018879, "grad_norm": 0.6787568926811218, "learning_rate": 0.0001, "loss": 1.4606, "step": 16332 }, { "epoch": 1.8975312227708394, "grad_norm": 0.6129149794578552, "learning_rate": 0.0001, "loss": 1.3964, "step": 16333 }, { "epoch": 1.8976474005228, "grad_norm": 0.6134177446365356, "learning_rate": 0.0001, "loss": 1.302, "step": 16334 }, { "epoch": 1.8977635782747604, "grad_norm": 0.7047176957130432, "learning_rate": 0.0001, "loss": 1.4386, "step": 16335 }, { "epoch": 1.8978797560267209, "grad_norm": 0.6540737152099609, "learning_rate": 0.0001, "loss": 1.5279, "step": 16336 }, { "epoch": 1.8979959337786814, "grad_norm": 0.6591337323188782, "learning_rate": 0.0001, "loss": 1.6603, "step": 16337 }, { "epoch": 1.8981121115306419, "grad_norm": 0.673383355140686, "learning_rate": 0.0001, "loss": 1.6106, "step": 16338 }, { "epoch": 1.8982282892826023, "grad_norm": 0.6349223256111145, "learning_rate": 0.0001, "loss": 1.4935, "step": 16339 }, { "epoch": 1.8983444670345628, "grad_norm": 0.6121570467948914, "learning_rate": 0.0001, "loss": 1.5218, "step": 16340 }, { "epoch": 1.8984606447865233, "grad_norm": 0.6060134768486023, "learning_rate": 0.0001, "loss": 1.286, "step": 16341 }, { "epoch": 1.8985768225384838, "grad_norm": 0.6281922459602356, "learning_rate": 0.0001, "loss": 1.5581, "step": 16342 }, { "epoch": 1.8986930002904443, "grad_norm": 0.649915337562561, "learning_rate": 0.0001, "loss": 1.5569, "step": 16343 }, { "epoch": 1.8988091780424048, "grad_norm": 0.6597468256950378, "learning_rate": 0.0001, "loss": 1.3918, "step": 16344 }, { "epoch": 1.8989253557943653, "grad_norm": 0.6173076033592224, "learning_rate": 0.0001, "loss": 1.2681, "step": 16345 }, { "epoch": 1.8990415335463258, "grad_norm": 0.6370591521263123, "learning_rate": 0.0001, "loss": 1.3584, "step": 16346 }, { "epoch": 1.8991577112982863, "grad_norm": 0.6387348175048828, "learning_rate": 0.0001, "loss": 1.4748, "step": 16347 }, { "epoch": 1.8992738890502467, "grad_norm": 0.6306506991386414, "learning_rate": 0.0001, "loss": 1.4194, "step": 16348 }, { "epoch": 1.8993900668022072, "grad_norm": 0.7107334136962891, "learning_rate": 0.0001, "loss": 1.7063, "step": 16349 }, { "epoch": 1.8995062445541677, "grad_norm": 0.6545239686965942, "learning_rate": 0.0001, "loss": 1.3276, "step": 16350 }, { "epoch": 1.8996224223061282, "grad_norm": 0.6358075737953186, "learning_rate": 0.0001, "loss": 1.4117, "step": 16351 }, { "epoch": 1.899738600058089, "grad_norm": 0.6022694706916809, "learning_rate": 0.0001, "loss": 1.4806, "step": 16352 }, { "epoch": 1.8998547778100494, "grad_norm": 0.6779773831367493, "learning_rate": 0.0001, "loss": 1.446, "step": 16353 }, { "epoch": 1.89997095556201, "grad_norm": 0.6681201457977295, "learning_rate": 0.0001, "loss": 1.4598, "step": 16354 }, { "epoch": 1.9000871333139704, "grad_norm": 0.5996114611625671, "learning_rate": 0.0001, "loss": 1.3639, "step": 16355 }, { "epoch": 1.9002033110659309, "grad_norm": 0.5931739807128906, "learning_rate": 0.0001, "loss": 1.2355, "step": 16356 }, { "epoch": 1.9003194888178914, "grad_norm": 0.6598568558692932, "learning_rate": 0.0001, "loss": 1.3796, "step": 16357 }, { "epoch": 1.9004356665698519, "grad_norm": 0.6512686014175415, "learning_rate": 0.0001, "loss": 1.4236, "step": 16358 }, { "epoch": 1.9005518443218123, "grad_norm": 0.6606178879737854, "learning_rate": 0.0001, "loss": 1.4219, "step": 16359 }, { "epoch": 1.9006680220737728, "grad_norm": 0.6541381478309631, "learning_rate": 0.0001, "loss": 1.3472, "step": 16360 }, { "epoch": 1.9007841998257333, "grad_norm": 0.6701364517211914, "learning_rate": 0.0001, "loss": 1.4914, "step": 16361 }, { "epoch": 1.900900377577694, "grad_norm": 0.6149517297744751, "learning_rate": 0.0001, "loss": 1.4922, "step": 16362 }, { "epoch": 1.9010165553296545, "grad_norm": 0.6869191527366638, "learning_rate": 0.0001, "loss": 1.4663, "step": 16363 }, { "epoch": 1.901132733081615, "grad_norm": 0.6002347469329834, "learning_rate": 0.0001, "loss": 1.3809, "step": 16364 }, { "epoch": 1.9012489108335755, "grad_norm": 0.6327499151229858, "learning_rate": 0.0001, "loss": 1.5218, "step": 16365 }, { "epoch": 1.901365088585536, "grad_norm": 0.6414229273796082, "learning_rate": 0.0001, "loss": 1.4855, "step": 16366 }, { "epoch": 1.9014812663374965, "grad_norm": 0.6490059494972229, "learning_rate": 0.0001, "loss": 1.462, "step": 16367 }, { "epoch": 1.901597444089457, "grad_norm": 0.6510499119758606, "learning_rate": 0.0001, "loss": 1.4496, "step": 16368 }, { "epoch": 1.9017136218414175, "grad_norm": 0.6222279071807861, "learning_rate": 0.0001, "loss": 1.4961, "step": 16369 }, { "epoch": 1.901829799593378, "grad_norm": 0.6428414583206177, "learning_rate": 0.0001, "loss": 1.5145, "step": 16370 }, { "epoch": 1.9019459773453384, "grad_norm": 0.6488329172134399, "learning_rate": 0.0001, "loss": 1.287, "step": 16371 }, { "epoch": 1.902062155097299, "grad_norm": 0.6108402609825134, "learning_rate": 0.0001, "loss": 1.3416, "step": 16372 }, { "epoch": 1.9021783328492594, "grad_norm": 0.6613602042198181, "learning_rate": 0.0001, "loss": 1.4814, "step": 16373 }, { "epoch": 1.90229451060122, "grad_norm": 0.6397709846496582, "learning_rate": 0.0001, "loss": 1.5218, "step": 16374 }, { "epoch": 1.9024106883531804, "grad_norm": 0.6187673807144165, "learning_rate": 0.0001, "loss": 1.5013, "step": 16375 }, { "epoch": 1.9025268661051409, "grad_norm": 0.6692391037940979, "learning_rate": 0.0001, "loss": 1.4704, "step": 16376 }, { "epoch": 1.9026430438571014, "grad_norm": 0.6933709979057312, "learning_rate": 0.0001, "loss": 1.5263, "step": 16377 }, { "epoch": 1.9027592216090619, "grad_norm": 0.6626425385475159, "learning_rate": 0.0001, "loss": 1.4743, "step": 16378 }, { "epoch": 1.9028753993610223, "grad_norm": 0.6111414432525635, "learning_rate": 0.0001, "loss": 1.5256, "step": 16379 }, { "epoch": 1.9029915771129828, "grad_norm": 0.6156660914421082, "learning_rate": 0.0001, "loss": 1.3754, "step": 16380 }, { "epoch": 1.9031077548649433, "grad_norm": 0.6141976714134216, "learning_rate": 0.0001, "loss": 1.4556, "step": 16381 }, { "epoch": 1.9032239326169038, "grad_norm": 0.6272478699684143, "learning_rate": 0.0001, "loss": 1.5962, "step": 16382 }, { "epoch": 1.9033401103688643, "grad_norm": 0.6356613636016846, "learning_rate": 0.0001, "loss": 1.4228, "step": 16383 }, { "epoch": 1.9034562881208248, "grad_norm": 0.5686226487159729, "learning_rate": 0.0001, "loss": 1.3646, "step": 16384 }, { "epoch": 1.9035724658727853, "grad_norm": 0.6323666572570801, "learning_rate": 0.0001, "loss": 1.4161, "step": 16385 }, { "epoch": 1.9036886436247458, "grad_norm": 0.677195131778717, "learning_rate": 0.0001, "loss": 1.4928, "step": 16386 }, { "epoch": 1.9038048213767063, "grad_norm": 0.6431178450584412, "learning_rate": 0.0001, "loss": 1.4681, "step": 16387 }, { "epoch": 1.9039209991286667, "grad_norm": 0.6128330826759338, "learning_rate": 0.0001, "loss": 1.3627, "step": 16388 }, { "epoch": 1.9040371768806272, "grad_norm": 0.5830598473548889, "learning_rate": 0.0001, "loss": 1.2389, "step": 16389 }, { "epoch": 1.9041533546325877, "grad_norm": 0.6621609926223755, "learning_rate": 0.0001, "loss": 1.5467, "step": 16390 }, { "epoch": 1.9042695323845482, "grad_norm": 0.6513555645942688, "learning_rate": 0.0001, "loss": 1.4729, "step": 16391 }, { "epoch": 1.9043857101365087, "grad_norm": 0.650956392288208, "learning_rate": 0.0001, "loss": 1.534, "step": 16392 }, { "epoch": 1.9045018878884694, "grad_norm": 0.6090638637542725, "learning_rate": 0.0001, "loss": 1.2517, "step": 16393 }, { "epoch": 1.90461806564043, "grad_norm": 0.6661187410354614, "learning_rate": 0.0001, "loss": 1.5124, "step": 16394 }, { "epoch": 1.9047342433923904, "grad_norm": 0.6757855415344238, "learning_rate": 0.0001, "loss": 1.3899, "step": 16395 }, { "epoch": 1.9048504211443509, "grad_norm": 0.6431171894073486, "learning_rate": 0.0001, "loss": 1.3244, "step": 16396 }, { "epoch": 1.9049665988963114, "grad_norm": 0.608852744102478, "learning_rate": 0.0001, "loss": 1.2734, "step": 16397 }, { "epoch": 1.9050827766482719, "grad_norm": 0.6646301746368408, "learning_rate": 0.0001, "loss": 1.4388, "step": 16398 }, { "epoch": 1.9051989544002323, "grad_norm": 0.6384854316711426, "learning_rate": 0.0001, "loss": 1.4841, "step": 16399 }, { "epoch": 1.9053151321521928, "grad_norm": 0.6452166438102722, "learning_rate": 0.0001, "loss": 1.4734, "step": 16400 }, { "epoch": 1.9054313099041533, "grad_norm": 0.659691333770752, "learning_rate": 0.0001, "loss": 1.4667, "step": 16401 }, { "epoch": 1.9055474876561138, "grad_norm": 0.6143450736999512, "learning_rate": 0.0001, "loss": 1.4592, "step": 16402 }, { "epoch": 1.9056636654080743, "grad_norm": 0.6507795453071594, "learning_rate": 0.0001, "loss": 1.4863, "step": 16403 }, { "epoch": 1.905779843160035, "grad_norm": 0.6729721426963806, "learning_rate": 0.0001, "loss": 1.4766, "step": 16404 }, { "epoch": 1.9058960209119955, "grad_norm": 0.6447899341583252, "learning_rate": 0.0001, "loss": 1.4141, "step": 16405 }, { "epoch": 1.906012198663956, "grad_norm": 0.6360199451446533, "learning_rate": 0.0001, "loss": 1.5541, "step": 16406 }, { "epoch": 1.9061283764159165, "grad_norm": 0.6306689381599426, "learning_rate": 0.0001, "loss": 1.4581, "step": 16407 }, { "epoch": 1.906244554167877, "grad_norm": 0.6508102416992188, "learning_rate": 0.0001, "loss": 1.4423, "step": 16408 }, { "epoch": 1.9063607319198375, "grad_norm": 0.625827431678772, "learning_rate": 0.0001, "loss": 1.5005, "step": 16409 }, { "epoch": 1.906476909671798, "grad_norm": 0.6415421962738037, "learning_rate": 0.0001, "loss": 1.3501, "step": 16410 }, { "epoch": 1.9065930874237584, "grad_norm": 0.6382960081100464, "learning_rate": 0.0001, "loss": 1.2997, "step": 16411 }, { "epoch": 1.906709265175719, "grad_norm": 0.6310703754425049, "learning_rate": 0.0001, "loss": 1.3511, "step": 16412 }, { "epoch": 1.9068254429276794, "grad_norm": 0.6558092832565308, "learning_rate": 0.0001, "loss": 1.4212, "step": 16413 }, { "epoch": 1.90694162067964, "grad_norm": 0.6157481670379639, "learning_rate": 0.0001, "loss": 1.1823, "step": 16414 }, { "epoch": 1.9070577984316004, "grad_norm": 0.6207016110420227, "learning_rate": 0.0001, "loss": 1.3306, "step": 16415 }, { "epoch": 1.9071739761835609, "grad_norm": 0.7037265300750732, "learning_rate": 0.0001, "loss": 1.4956, "step": 16416 }, { "epoch": 1.9072901539355214, "grad_norm": 0.73262619972229, "learning_rate": 0.0001, "loss": 1.5791, "step": 16417 }, { "epoch": 1.9074063316874819, "grad_norm": 0.6703060865402222, "learning_rate": 0.0001, "loss": 1.4617, "step": 16418 }, { "epoch": 1.9075225094394423, "grad_norm": 0.671227216720581, "learning_rate": 0.0001, "loss": 1.3935, "step": 16419 }, { "epoch": 1.9076386871914028, "grad_norm": 0.6156341433525085, "learning_rate": 0.0001, "loss": 1.2633, "step": 16420 }, { "epoch": 1.9077548649433633, "grad_norm": 0.6529107689857483, "learning_rate": 0.0001, "loss": 1.5176, "step": 16421 }, { "epoch": 1.9078710426953238, "grad_norm": 0.6322326064109802, "learning_rate": 0.0001, "loss": 1.2583, "step": 16422 }, { "epoch": 1.9079872204472843, "grad_norm": 0.6221580505371094, "learning_rate": 0.0001, "loss": 1.4591, "step": 16423 }, { "epoch": 1.9081033981992448, "grad_norm": 0.6376168727874756, "learning_rate": 0.0001, "loss": 1.4316, "step": 16424 }, { "epoch": 1.9082195759512053, "grad_norm": 0.6658594608306885, "learning_rate": 0.0001, "loss": 1.4973, "step": 16425 }, { "epoch": 1.9083357537031658, "grad_norm": 0.6233656406402588, "learning_rate": 0.0001, "loss": 1.3629, "step": 16426 }, { "epoch": 1.9084519314551263, "grad_norm": 0.6500493884086609, "learning_rate": 0.0001, "loss": 1.4272, "step": 16427 }, { "epoch": 1.9085681092070867, "grad_norm": 0.6260415315628052, "learning_rate": 0.0001, "loss": 1.4441, "step": 16428 }, { "epoch": 1.9086842869590472, "grad_norm": 0.712968647480011, "learning_rate": 0.0001, "loss": 1.5315, "step": 16429 }, { "epoch": 1.9088004647110077, "grad_norm": 0.6700369715690613, "learning_rate": 0.0001, "loss": 1.4169, "step": 16430 }, { "epoch": 1.9089166424629682, "grad_norm": 0.6498188972473145, "learning_rate": 0.0001, "loss": 1.3518, "step": 16431 }, { "epoch": 1.9090328202149287, "grad_norm": 0.6367303133010864, "learning_rate": 0.0001, "loss": 1.5907, "step": 16432 }, { "epoch": 1.9091489979668892, "grad_norm": 0.6747793555259705, "learning_rate": 0.0001, "loss": 1.4709, "step": 16433 }, { "epoch": 1.9092651757188497, "grad_norm": 0.6583126783370972, "learning_rate": 0.0001, "loss": 1.3844, "step": 16434 }, { "epoch": 1.9093813534708104, "grad_norm": 0.673969030380249, "learning_rate": 0.0001, "loss": 1.3528, "step": 16435 }, { "epoch": 1.9094975312227709, "grad_norm": 0.6629801988601685, "learning_rate": 0.0001, "loss": 1.5156, "step": 16436 }, { "epoch": 1.9096137089747314, "grad_norm": 0.6816200613975525, "learning_rate": 0.0001, "loss": 1.4524, "step": 16437 }, { "epoch": 1.9097298867266919, "grad_norm": 0.6301851868629456, "learning_rate": 0.0001, "loss": 1.4537, "step": 16438 }, { "epoch": 1.9098460644786523, "grad_norm": 0.6251214146614075, "learning_rate": 0.0001, "loss": 1.3369, "step": 16439 }, { "epoch": 1.9099622422306128, "grad_norm": 0.6370289921760559, "learning_rate": 0.0001, "loss": 1.3452, "step": 16440 }, { "epoch": 1.9100784199825733, "grad_norm": 0.5976542234420776, "learning_rate": 0.0001, "loss": 1.3639, "step": 16441 }, { "epoch": 1.9101945977345338, "grad_norm": 0.6388048529624939, "learning_rate": 0.0001, "loss": 1.313, "step": 16442 }, { "epoch": 1.9103107754864943, "grad_norm": 0.6384835243225098, "learning_rate": 0.0001, "loss": 1.4141, "step": 16443 }, { "epoch": 1.9104269532384548, "grad_norm": 0.6970011591911316, "learning_rate": 0.0001, "loss": 1.44, "step": 16444 }, { "epoch": 1.9105431309904153, "grad_norm": 0.655815064907074, "learning_rate": 0.0001, "loss": 1.6069, "step": 16445 }, { "epoch": 1.910659308742376, "grad_norm": 0.6334776878356934, "learning_rate": 0.0001, "loss": 1.3036, "step": 16446 }, { "epoch": 1.9107754864943365, "grad_norm": 0.631655216217041, "learning_rate": 0.0001, "loss": 1.3927, "step": 16447 }, { "epoch": 1.910891664246297, "grad_norm": 0.687410295009613, "learning_rate": 0.0001, "loss": 1.429, "step": 16448 }, { "epoch": 1.9110078419982575, "grad_norm": 0.6505168080329895, "learning_rate": 0.0001, "loss": 1.4476, "step": 16449 }, { "epoch": 1.911124019750218, "grad_norm": 0.6531510949134827, "learning_rate": 0.0001, "loss": 1.3969, "step": 16450 }, { "epoch": 1.9112401975021784, "grad_norm": 0.6344730257987976, "learning_rate": 0.0001, "loss": 1.6063, "step": 16451 }, { "epoch": 1.911356375254139, "grad_norm": 0.6963967084884644, "learning_rate": 0.0001, "loss": 1.4602, "step": 16452 }, { "epoch": 1.9114725530060994, "grad_norm": 0.6708447933197021, "learning_rate": 0.0001, "loss": 1.5405, "step": 16453 }, { "epoch": 1.91158873075806, "grad_norm": 0.5894824266433716, "learning_rate": 0.0001, "loss": 1.3037, "step": 16454 }, { "epoch": 1.9117049085100204, "grad_norm": 0.643746018409729, "learning_rate": 0.0001, "loss": 1.4683, "step": 16455 }, { "epoch": 1.9118210862619809, "grad_norm": 0.6645706295967102, "learning_rate": 0.0001, "loss": 1.4337, "step": 16456 }, { "epoch": 1.9119372640139414, "grad_norm": 0.6779530644416809, "learning_rate": 0.0001, "loss": 1.4623, "step": 16457 }, { "epoch": 1.9120534417659019, "grad_norm": 0.6800106167793274, "learning_rate": 0.0001, "loss": 1.4098, "step": 16458 }, { "epoch": 1.9121696195178624, "grad_norm": 0.6622534394264221, "learning_rate": 0.0001, "loss": 1.6359, "step": 16459 }, { "epoch": 1.9122857972698228, "grad_norm": 0.6480720043182373, "learning_rate": 0.0001, "loss": 1.3999, "step": 16460 }, { "epoch": 1.9124019750217833, "grad_norm": 0.6823915243148804, "learning_rate": 0.0001, "loss": 1.7256, "step": 16461 }, { "epoch": 1.9125181527737438, "grad_norm": 0.6224784255027771, "learning_rate": 0.0001, "loss": 1.4071, "step": 16462 }, { "epoch": 1.9126343305257043, "grad_norm": 0.651973307132721, "learning_rate": 0.0001, "loss": 1.5549, "step": 16463 }, { "epoch": 1.9127505082776648, "grad_norm": 0.6376694440841675, "learning_rate": 0.0001, "loss": 1.5085, "step": 16464 }, { "epoch": 1.9128666860296253, "grad_norm": 0.5933566093444824, "learning_rate": 0.0001, "loss": 1.3447, "step": 16465 }, { "epoch": 1.9129828637815858, "grad_norm": 0.6027122735977173, "learning_rate": 0.0001, "loss": 1.3501, "step": 16466 }, { "epoch": 1.9130990415335463, "grad_norm": 0.7018294930458069, "learning_rate": 0.0001, "loss": 1.8245, "step": 16467 }, { "epoch": 1.9132152192855068, "grad_norm": 0.6423582434654236, "learning_rate": 0.0001, "loss": 1.4504, "step": 16468 }, { "epoch": 1.9133313970374672, "grad_norm": 0.6462240815162659, "learning_rate": 0.0001, "loss": 1.4605, "step": 16469 }, { "epoch": 1.9134475747894277, "grad_norm": 0.6385015845298767, "learning_rate": 0.0001, "loss": 1.4592, "step": 16470 }, { "epoch": 1.9135637525413882, "grad_norm": 0.6264072060585022, "learning_rate": 0.0001, "loss": 1.5117, "step": 16471 }, { "epoch": 1.9136799302933487, "grad_norm": 0.7070044279098511, "learning_rate": 0.0001, "loss": 1.4738, "step": 16472 }, { "epoch": 1.9137961080453092, "grad_norm": 0.6259687542915344, "learning_rate": 0.0001, "loss": 1.2971, "step": 16473 }, { "epoch": 1.9139122857972697, "grad_norm": 0.5981330871582031, "learning_rate": 0.0001, "loss": 1.4731, "step": 16474 }, { "epoch": 1.9140284635492302, "grad_norm": 0.6566482186317444, "learning_rate": 0.0001, "loss": 1.5107, "step": 16475 }, { "epoch": 1.9141446413011907, "grad_norm": 0.6393707990646362, "learning_rate": 0.0001, "loss": 1.4469, "step": 16476 }, { "epoch": 1.9142608190531514, "grad_norm": 0.6416871547698975, "learning_rate": 0.0001, "loss": 1.3932, "step": 16477 }, { "epoch": 1.9143769968051119, "grad_norm": 0.6457273364067078, "learning_rate": 0.0001, "loss": 1.3904, "step": 16478 }, { "epoch": 1.9144931745570724, "grad_norm": 0.6534112691879272, "learning_rate": 0.0001, "loss": 1.5536, "step": 16479 }, { "epoch": 1.9146093523090328, "grad_norm": 0.649128794670105, "learning_rate": 0.0001, "loss": 1.3712, "step": 16480 }, { "epoch": 1.9147255300609933, "grad_norm": 0.5857309699058533, "learning_rate": 0.0001, "loss": 1.3925, "step": 16481 }, { "epoch": 1.9148417078129538, "grad_norm": 0.6419894695281982, "learning_rate": 0.0001, "loss": 1.3888, "step": 16482 }, { "epoch": 1.9149578855649143, "grad_norm": 0.6613510251045227, "learning_rate": 0.0001, "loss": 1.4637, "step": 16483 }, { "epoch": 1.9150740633168748, "grad_norm": 0.6239970326423645, "learning_rate": 0.0001, "loss": 1.2183, "step": 16484 }, { "epoch": 1.9151902410688353, "grad_norm": 0.6346543431282043, "learning_rate": 0.0001, "loss": 1.3542, "step": 16485 }, { "epoch": 1.9153064188207958, "grad_norm": 0.6834646463394165, "learning_rate": 0.0001, "loss": 1.5549, "step": 16486 }, { "epoch": 1.9154225965727563, "grad_norm": 0.6116211414337158, "learning_rate": 0.0001, "loss": 1.3389, "step": 16487 }, { "epoch": 1.915538774324717, "grad_norm": 0.6757547855377197, "learning_rate": 0.0001, "loss": 1.7654, "step": 16488 }, { "epoch": 1.9156549520766775, "grad_norm": 0.6641934514045715, "learning_rate": 0.0001, "loss": 1.3815, "step": 16489 }, { "epoch": 1.915771129828638, "grad_norm": 0.6570766568183899, "learning_rate": 0.0001, "loss": 1.4266, "step": 16490 }, { "epoch": 1.9158873075805984, "grad_norm": 0.6635448932647705, "learning_rate": 0.0001, "loss": 1.4384, "step": 16491 }, { "epoch": 1.916003485332559, "grad_norm": 0.6745858192443848, "learning_rate": 0.0001, "loss": 1.5666, "step": 16492 }, { "epoch": 1.9161196630845194, "grad_norm": 0.6747871041297913, "learning_rate": 0.0001, "loss": 1.5974, "step": 16493 }, { "epoch": 1.91623584083648, "grad_norm": 0.588080644607544, "learning_rate": 0.0001, "loss": 1.4265, "step": 16494 }, { "epoch": 1.9163520185884404, "grad_norm": 0.5993366241455078, "learning_rate": 0.0001, "loss": 1.3325, "step": 16495 }, { "epoch": 1.9164681963404009, "grad_norm": 0.6158087849617004, "learning_rate": 0.0001, "loss": 1.2855, "step": 16496 }, { "epoch": 1.9165843740923614, "grad_norm": 0.6308013796806335, "learning_rate": 0.0001, "loss": 1.4351, "step": 16497 }, { "epoch": 1.9167005518443219, "grad_norm": 0.6267406344413757, "learning_rate": 0.0001, "loss": 1.319, "step": 16498 }, { "epoch": 1.9168167295962824, "grad_norm": 0.6696361303329468, "learning_rate": 0.0001, "loss": 1.4078, "step": 16499 }, { "epoch": 1.9169329073482428, "grad_norm": 0.6355911493301392, "learning_rate": 0.0001, "loss": 1.3845, "step": 16500 }, { "epoch": 1.9170490851002033, "grad_norm": 0.711129903793335, "learning_rate": 0.0001, "loss": 1.5678, "step": 16501 }, { "epoch": 1.9171652628521638, "grad_norm": 0.6613529324531555, "learning_rate": 0.0001, "loss": 1.448, "step": 16502 }, { "epoch": 1.9172814406041243, "grad_norm": 0.6712475419044495, "learning_rate": 0.0001, "loss": 1.5966, "step": 16503 }, { "epoch": 1.9173976183560848, "grad_norm": 0.6463940739631653, "learning_rate": 0.0001, "loss": 1.4042, "step": 16504 }, { "epoch": 1.9175137961080453, "grad_norm": 0.6091834306716919, "learning_rate": 0.0001, "loss": 1.375, "step": 16505 }, { "epoch": 1.9176299738600058, "grad_norm": 0.6414788365364075, "learning_rate": 0.0001, "loss": 1.4512, "step": 16506 }, { "epoch": 1.9177461516119663, "grad_norm": 0.6184314489364624, "learning_rate": 0.0001, "loss": 1.4082, "step": 16507 }, { "epoch": 1.9178623293639268, "grad_norm": 0.6536719799041748, "learning_rate": 0.0001, "loss": 1.4861, "step": 16508 }, { "epoch": 1.9179785071158872, "grad_norm": 0.613323986530304, "learning_rate": 0.0001, "loss": 1.4641, "step": 16509 }, { "epoch": 1.9180946848678477, "grad_norm": 0.6691395044326782, "learning_rate": 0.0001, "loss": 1.5018, "step": 16510 }, { "epoch": 1.9182108626198082, "grad_norm": 0.6589152216911316, "learning_rate": 0.0001, "loss": 1.3652, "step": 16511 }, { "epoch": 1.9183270403717687, "grad_norm": 0.678765058517456, "learning_rate": 0.0001, "loss": 1.4853, "step": 16512 }, { "epoch": 1.9184432181237292, "grad_norm": 0.6601305603981018, "learning_rate": 0.0001, "loss": 1.3382, "step": 16513 }, { "epoch": 1.9185593958756897, "grad_norm": 0.6544720530509949, "learning_rate": 0.0001, "loss": 1.4215, "step": 16514 }, { "epoch": 1.9186755736276502, "grad_norm": 0.6246581077575684, "learning_rate": 0.0001, "loss": 1.2082, "step": 16515 }, { "epoch": 1.9187917513796107, "grad_norm": 0.638567328453064, "learning_rate": 0.0001, "loss": 1.3809, "step": 16516 }, { "epoch": 1.9189079291315712, "grad_norm": 0.6571439504623413, "learning_rate": 0.0001, "loss": 1.3691, "step": 16517 }, { "epoch": 1.9190241068835316, "grad_norm": 0.6592364311218262, "learning_rate": 0.0001, "loss": 1.3693, "step": 16518 }, { "epoch": 1.9191402846354924, "grad_norm": 0.7054256796836853, "learning_rate": 0.0001, "loss": 1.4266, "step": 16519 }, { "epoch": 1.9192564623874528, "grad_norm": 0.6537622809410095, "learning_rate": 0.0001, "loss": 1.4761, "step": 16520 }, { "epoch": 1.9193726401394133, "grad_norm": 0.6173102855682373, "learning_rate": 0.0001, "loss": 1.383, "step": 16521 }, { "epoch": 1.9194888178913738, "grad_norm": 0.6511791348457336, "learning_rate": 0.0001, "loss": 1.5018, "step": 16522 }, { "epoch": 1.9196049956433343, "grad_norm": 0.6183764338493347, "learning_rate": 0.0001, "loss": 1.3614, "step": 16523 }, { "epoch": 1.9197211733952948, "grad_norm": 0.6822580099105835, "learning_rate": 0.0001, "loss": 1.529, "step": 16524 }, { "epoch": 1.9198373511472553, "grad_norm": 0.6584765315055847, "learning_rate": 0.0001, "loss": 1.2673, "step": 16525 }, { "epoch": 1.9199535288992158, "grad_norm": 0.6487628817558289, "learning_rate": 0.0001, "loss": 1.4574, "step": 16526 }, { "epoch": 1.9200697066511763, "grad_norm": 2.0107524394989014, "learning_rate": 0.0001, "loss": 1.4705, "step": 16527 }, { "epoch": 1.9201858844031368, "grad_norm": 0.6716618537902832, "learning_rate": 0.0001, "loss": 1.468, "step": 16528 }, { "epoch": 1.9203020621550972, "grad_norm": 0.6928927898406982, "learning_rate": 0.0001, "loss": 1.4584, "step": 16529 }, { "epoch": 1.920418239907058, "grad_norm": 0.624709963798523, "learning_rate": 0.0001, "loss": 1.4645, "step": 16530 }, { "epoch": 1.9205344176590184, "grad_norm": 0.6557425260543823, "learning_rate": 0.0001, "loss": 1.3485, "step": 16531 }, { "epoch": 1.920650595410979, "grad_norm": 0.6358258724212646, "learning_rate": 0.0001, "loss": 1.5765, "step": 16532 }, { "epoch": 1.9207667731629394, "grad_norm": 0.6697930693626404, "learning_rate": 0.0001, "loss": 1.5745, "step": 16533 }, { "epoch": 1.9208829509149, "grad_norm": 0.6308974623680115, "learning_rate": 0.0001, "loss": 1.5374, "step": 16534 }, { "epoch": 1.9209991286668604, "grad_norm": 0.6196278929710388, "learning_rate": 0.0001, "loss": 1.4763, "step": 16535 }, { "epoch": 1.921115306418821, "grad_norm": 0.6348267793655396, "learning_rate": 0.0001, "loss": 1.3527, "step": 16536 }, { "epoch": 1.9212314841707814, "grad_norm": 0.6334741711616516, "learning_rate": 0.0001, "loss": 1.4053, "step": 16537 }, { "epoch": 1.9213476619227419, "grad_norm": 0.6073527932167053, "learning_rate": 0.0001, "loss": 1.4377, "step": 16538 }, { "epoch": 1.9214638396747024, "grad_norm": 0.6385539770126343, "learning_rate": 0.0001, "loss": 1.3456, "step": 16539 }, { "epoch": 1.9215800174266628, "grad_norm": 0.6245630383491516, "learning_rate": 0.0001, "loss": 1.5885, "step": 16540 }, { "epoch": 1.9216961951786233, "grad_norm": 0.648451566696167, "learning_rate": 0.0001, "loss": 1.3498, "step": 16541 }, { "epoch": 1.9218123729305838, "grad_norm": 0.6779778003692627, "learning_rate": 0.0001, "loss": 1.5927, "step": 16542 }, { "epoch": 1.9219285506825443, "grad_norm": 0.6593450307846069, "learning_rate": 0.0001, "loss": 1.4002, "step": 16543 }, { "epoch": 1.9220447284345048, "grad_norm": 0.6867441534996033, "learning_rate": 0.0001, "loss": 1.5662, "step": 16544 }, { "epoch": 1.9221609061864653, "grad_norm": 0.6424154043197632, "learning_rate": 0.0001, "loss": 1.5885, "step": 16545 }, { "epoch": 1.9222770839384258, "grad_norm": 0.6318874359130859, "learning_rate": 0.0001, "loss": 1.5969, "step": 16546 }, { "epoch": 1.9223932616903863, "grad_norm": 0.6468585729598999, "learning_rate": 0.0001, "loss": 1.6082, "step": 16547 }, { "epoch": 1.9225094394423468, "grad_norm": 0.6427927017211914, "learning_rate": 0.0001, "loss": 1.4825, "step": 16548 }, { "epoch": 1.9226256171943072, "grad_norm": 0.6428377032279968, "learning_rate": 0.0001, "loss": 1.4448, "step": 16549 }, { "epoch": 1.9227417949462677, "grad_norm": 0.6127558946609497, "learning_rate": 0.0001, "loss": 1.5147, "step": 16550 }, { "epoch": 1.9228579726982282, "grad_norm": 0.599165141582489, "learning_rate": 0.0001, "loss": 1.4993, "step": 16551 }, { "epoch": 1.9229741504501887, "grad_norm": 0.5830439329147339, "learning_rate": 0.0001, "loss": 1.3145, "step": 16552 }, { "epoch": 1.9230903282021492, "grad_norm": 0.6550738215446472, "learning_rate": 0.0001, "loss": 1.4451, "step": 16553 }, { "epoch": 1.9232065059541097, "grad_norm": 0.7119544148445129, "learning_rate": 0.0001, "loss": 1.58, "step": 16554 }, { "epoch": 1.9233226837060702, "grad_norm": 0.628628134727478, "learning_rate": 0.0001, "loss": 1.347, "step": 16555 }, { "epoch": 1.9234388614580307, "grad_norm": 0.644616425037384, "learning_rate": 0.0001, "loss": 1.3814, "step": 16556 }, { "epoch": 1.9235550392099912, "grad_norm": 0.6717808246612549, "learning_rate": 0.0001, "loss": 1.5756, "step": 16557 }, { "epoch": 1.9236712169619516, "grad_norm": 0.6062647700309753, "learning_rate": 0.0001, "loss": 1.2932, "step": 16558 }, { "epoch": 1.9237873947139121, "grad_norm": 0.6366165280342102, "learning_rate": 0.0001, "loss": 1.3493, "step": 16559 }, { "epoch": 1.9239035724658726, "grad_norm": 0.7009989023208618, "learning_rate": 0.0001, "loss": 1.6042, "step": 16560 }, { "epoch": 1.9240197502178333, "grad_norm": 0.7381427884101868, "learning_rate": 0.0001, "loss": 1.5097, "step": 16561 }, { "epoch": 1.9241359279697938, "grad_norm": 0.6532899141311646, "learning_rate": 0.0001, "loss": 1.587, "step": 16562 }, { "epoch": 1.9242521057217543, "grad_norm": 0.6630954146385193, "learning_rate": 0.0001, "loss": 1.4144, "step": 16563 }, { "epoch": 1.9243682834737148, "grad_norm": 0.7093004584312439, "learning_rate": 0.0001, "loss": 1.6205, "step": 16564 }, { "epoch": 1.9244844612256753, "grad_norm": 0.604897677898407, "learning_rate": 0.0001, "loss": 1.288, "step": 16565 }, { "epoch": 1.9246006389776358, "grad_norm": 0.6201945543289185, "learning_rate": 0.0001, "loss": 1.4695, "step": 16566 }, { "epoch": 1.9247168167295963, "grad_norm": 0.6748204827308655, "learning_rate": 0.0001, "loss": 1.3134, "step": 16567 }, { "epoch": 1.9248329944815568, "grad_norm": 0.5957321524620056, "learning_rate": 0.0001, "loss": 1.318, "step": 16568 }, { "epoch": 1.9249491722335172, "grad_norm": 0.6583741307258606, "learning_rate": 0.0001, "loss": 1.5621, "step": 16569 }, { "epoch": 1.9250653499854777, "grad_norm": 0.690685510635376, "learning_rate": 0.0001, "loss": 1.6489, "step": 16570 }, { "epoch": 1.9251815277374382, "grad_norm": 0.6293180584907532, "learning_rate": 0.0001, "loss": 1.4948, "step": 16571 }, { "epoch": 1.925297705489399, "grad_norm": 0.6288379430770874, "learning_rate": 0.0001, "loss": 1.5085, "step": 16572 }, { "epoch": 1.9254138832413594, "grad_norm": 0.6467605829238892, "learning_rate": 0.0001, "loss": 1.6035, "step": 16573 }, { "epoch": 1.92553006099332, "grad_norm": 0.6469237804412842, "learning_rate": 0.0001, "loss": 1.6073, "step": 16574 }, { "epoch": 1.9256462387452804, "grad_norm": 0.6825864315032959, "learning_rate": 0.0001, "loss": 1.4161, "step": 16575 }, { "epoch": 1.925762416497241, "grad_norm": 0.6452011466026306, "learning_rate": 0.0001, "loss": 1.5312, "step": 16576 }, { "epoch": 1.9258785942492014, "grad_norm": 0.6895046234130859, "learning_rate": 0.0001, "loss": 1.3833, "step": 16577 }, { "epoch": 1.9259947720011619, "grad_norm": 0.7602998614311218, "learning_rate": 0.0001, "loss": 1.4156, "step": 16578 }, { "epoch": 1.9261109497531224, "grad_norm": 0.6322417259216309, "learning_rate": 0.0001, "loss": 1.477, "step": 16579 }, { "epoch": 1.9262271275050828, "grad_norm": 0.623116672039032, "learning_rate": 0.0001, "loss": 1.4306, "step": 16580 }, { "epoch": 1.9263433052570433, "grad_norm": 0.6488289833068848, "learning_rate": 0.0001, "loss": 1.4004, "step": 16581 }, { "epoch": 1.9264594830090038, "grad_norm": 0.6724238991737366, "learning_rate": 0.0001, "loss": 1.4477, "step": 16582 }, { "epoch": 1.9265756607609643, "grad_norm": 0.6455715298652649, "learning_rate": 0.0001, "loss": 1.4589, "step": 16583 }, { "epoch": 1.9266918385129248, "grad_norm": 0.6559097170829773, "learning_rate": 0.0001, "loss": 1.6034, "step": 16584 }, { "epoch": 1.9268080162648853, "grad_norm": 0.7313292622566223, "learning_rate": 0.0001, "loss": 1.7648, "step": 16585 }, { "epoch": 1.9269241940168458, "grad_norm": 0.6808985471725464, "learning_rate": 0.0001, "loss": 1.5344, "step": 16586 }, { "epoch": 1.9270403717688063, "grad_norm": 0.6397284865379333, "learning_rate": 0.0001, "loss": 1.475, "step": 16587 }, { "epoch": 1.9271565495207668, "grad_norm": 3.1031129360198975, "learning_rate": 0.0001, "loss": 1.3417, "step": 16588 }, { "epoch": 1.9272727272727272, "grad_norm": 0.6621960401535034, "learning_rate": 0.0001, "loss": 1.4989, "step": 16589 }, { "epoch": 1.9273889050246877, "grad_norm": 0.6568571925163269, "learning_rate": 0.0001, "loss": 1.3734, "step": 16590 }, { "epoch": 1.9275050827766482, "grad_norm": 0.6704007387161255, "learning_rate": 0.0001, "loss": 1.4917, "step": 16591 }, { "epoch": 1.9276212605286087, "grad_norm": 0.6791861653327942, "learning_rate": 0.0001, "loss": 1.7039, "step": 16592 }, { "epoch": 1.9277374382805692, "grad_norm": 0.632156252861023, "learning_rate": 0.0001, "loss": 1.4476, "step": 16593 }, { "epoch": 1.9278536160325297, "grad_norm": 0.6251896619796753, "learning_rate": 0.0001, "loss": 1.3316, "step": 16594 }, { "epoch": 1.9279697937844902, "grad_norm": 0.6218069791793823, "learning_rate": 0.0001, "loss": 1.2969, "step": 16595 }, { "epoch": 1.9280859715364507, "grad_norm": 0.6654088497161865, "learning_rate": 0.0001, "loss": 1.3541, "step": 16596 }, { "epoch": 1.9282021492884112, "grad_norm": 0.6630351543426514, "learning_rate": 0.0001, "loss": 1.3614, "step": 16597 }, { "epoch": 1.9283183270403716, "grad_norm": 0.742335319519043, "learning_rate": 0.0001, "loss": 1.5037, "step": 16598 }, { "epoch": 1.9284345047923321, "grad_norm": 0.6404957175254822, "learning_rate": 0.0001, "loss": 1.4045, "step": 16599 }, { "epoch": 1.9285506825442926, "grad_norm": 0.6033278703689575, "learning_rate": 0.0001, "loss": 1.2491, "step": 16600 }, { "epoch": 1.9286668602962531, "grad_norm": 0.6421386003494263, "learning_rate": 0.0001, "loss": 1.5497, "step": 16601 }, { "epoch": 1.9287830380482136, "grad_norm": 0.6442969441413879, "learning_rate": 0.0001, "loss": 1.4552, "step": 16602 }, { "epoch": 1.9288992158001743, "grad_norm": 0.6578024625778198, "learning_rate": 0.0001, "loss": 1.4672, "step": 16603 }, { "epoch": 1.9290153935521348, "grad_norm": 0.7395846247673035, "learning_rate": 0.0001, "loss": 1.4534, "step": 16604 }, { "epoch": 1.9291315713040953, "grad_norm": 36.97671890258789, "learning_rate": 0.0001, "loss": 2.9429, "step": 16605 }, { "epoch": 1.9292477490560558, "grad_norm": 0.665990948677063, "learning_rate": 0.0001, "loss": 1.4611, "step": 16606 }, { "epoch": 1.9293639268080163, "grad_norm": 0.7356756329536438, "learning_rate": 0.0001, "loss": 1.4058, "step": 16607 }, { "epoch": 1.9294801045599768, "grad_norm": 0.6247438192367554, "learning_rate": 0.0001, "loss": 1.4404, "step": 16608 }, { "epoch": 1.9295962823119372, "grad_norm": 0.6352943778038025, "learning_rate": 0.0001, "loss": 1.4259, "step": 16609 }, { "epoch": 1.9297124600638977, "grad_norm": 0.8352517485618591, "learning_rate": 0.0001, "loss": 1.4044, "step": 16610 }, { "epoch": 1.9298286378158582, "grad_norm": 0.6350531578063965, "learning_rate": 0.0001, "loss": 1.3762, "step": 16611 }, { "epoch": 1.9299448155678187, "grad_norm": 0.6718446612358093, "learning_rate": 0.0001, "loss": 1.4418, "step": 16612 }, { "epoch": 1.9300609933197794, "grad_norm": 0.6531091928482056, "learning_rate": 0.0001, "loss": 1.4229, "step": 16613 }, { "epoch": 1.93017717107174, "grad_norm": 0.6925991773605347, "learning_rate": 0.0001, "loss": 1.5397, "step": 16614 }, { "epoch": 1.9302933488237004, "grad_norm": 0.6539130210876465, "learning_rate": 0.0001, "loss": 1.3099, "step": 16615 }, { "epoch": 1.930409526575661, "grad_norm": 1.3421820402145386, "learning_rate": 0.0001, "loss": 1.3973, "step": 16616 }, { "epoch": 1.9305257043276214, "grad_norm": 0.6287830471992493, "learning_rate": 0.0001, "loss": 1.3885, "step": 16617 }, { "epoch": 1.9306418820795819, "grad_norm": 0.6415571570396423, "learning_rate": 0.0001, "loss": 1.49, "step": 16618 }, { "epoch": 1.9307580598315424, "grad_norm": 0.6518732309341431, "learning_rate": 0.0001, "loss": 1.4067, "step": 16619 }, { "epoch": 1.9308742375835029, "grad_norm": 0.6404312252998352, "learning_rate": 0.0001, "loss": 1.5293, "step": 16620 }, { "epoch": 1.9309904153354633, "grad_norm": 1.2512699365615845, "learning_rate": 0.0001, "loss": 1.3367, "step": 16621 }, { "epoch": 1.9311065930874238, "grad_norm": 0.7849844098091125, "learning_rate": 0.0001, "loss": 1.6012, "step": 16622 }, { "epoch": 1.9312227708393843, "grad_norm": 0.9194017052650452, "learning_rate": 0.0001, "loss": 1.6028, "step": 16623 }, { "epoch": 1.9313389485913448, "grad_norm": 0.7389204502105713, "learning_rate": 0.0001, "loss": 1.4939, "step": 16624 }, { "epoch": 1.9314551263433053, "grad_norm": 0.6778737306594849, "learning_rate": 0.0001, "loss": 1.4501, "step": 16625 }, { "epoch": 1.9315713040952658, "grad_norm": 2.7290234565734863, "learning_rate": 0.0001, "loss": 1.4903, "step": 16626 }, { "epoch": 1.9316874818472263, "grad_norm": 0.6387723088264465, "learning_rate": 0.0001, "loss": 1.407, "step": 16627 }, { "epoch": 1.9318036595991868, "grad_norm": 0.8926582932472229, "learning_rate": 0.0001, "loss": 1.319, "step": 16628 }, { "epoch": 1.9319198373511473, "grad_norm": 0.670853853225708, "learning_rate": 0.0001, "loss": 1.4273, "step": 16629 }, { "epoch": 1.9320360151031077, "grad_norm": 0.7137992978096008, "learning_rate": 0.0001, "loss": 1.4747, "step": 16630 }, { "epoch": 1.9321521928550682, "grad_norm": 4.574283123016357, "learning_rate": 0.0001, "loss": 1.599, "step": 16631 }, { "epoch": 1.9322683706070287, "grad_norm": 0.6734709143638611, "learning_rate": 0.0001, "loss": 1.5688, "step": 16632 }, { "epoch": 1.9323845483589892, "grad_norm": 3.495939016342163, "learning_rate": 0.0001, "loss": 1.4874, "step": 16633 }, { "epoch": 1.9325007261109497, "grad_norm": 0.7935895323753357, "learning_rate": 0.0001, "loss": 1.3891, "step": 16634 }, { "epoch": 1.9326169038629102, "grad_norm": 0.7920544147491455, "learning_rate": 0.0001, "loss": 1.4431, "step": 16635 }, { "epoch": 1.9327330816148707, "grad_norm": 0.7319844365119934, "learning_rate": 0.0001, "loss": 1.4172, "step": 16636 }, { "epoch": 1.9328492593668312, "grad_norm": 0.803978443145752, "learning_rate": 0.0001, "loss": 1.4854, "step": 16637 }, { "epoch": 1.9329654371187917, "grad_norm": 0.6874574422836304, "learning_rate": 0.0001, "loss": 1.5287, "step": 16638 }, { "epoch": 1.9330816148707521, "grad_norm": 0.7260401844978333, "learning_rate": 0.0001, "loss": 1.4336, "step": 16639 }, { "epoch": 1.9331977926227126, "grad_norm": 0.7048704624176025, "learning_rate": 0.0001, "loss": 1.4758, "step": 16640 }, { "epoch": 1.9333139703746731, "grad_norm": 0.6527437567710876, "learning_rate": 0.0001, "loss": 1.443, "step": 16641 }, { "epoch": 1.9334301481266336, "grad_norm": 0.6349430084228516, "learning_rate": 0.0001, "loss": 1.4942, "step": 16642 }, { "epoch": 1.933546325878594, "grad_norm": 0.623433530330658, "learning_rate": 0.0001, "loss": 1.6122, "step": 16643 }, { "epoch": 1.9336625036305546, "grad_norm": 0.7340916991233826, "learning_rate": 0.0001, "loss": 1.5012, "step": 16644 }, { "epoch": 1.9337786813825153, "grad_norm": 0.6529496908187866, "learning_rate": 0.0001, "loss": 1.3324, "step": 16645 }, { "epoch": 1.9338948591344758, "grad_norm": 0.6408762335777283, "learning_rate": 0.0001, "loss": 1.343, "step": 16646 }, { "epoch": 1.9340110368864363, "grad_norm": 0.7193260192871094, "learning_rate": 0.0001, "loss": 1.5036, "step": 16647 }, { "epoch": 1.9341272146383968, "grad_norm": 0.6456512212753296, "learning_rate": 0.0001, "loss": 1.4098, "step": 16648 }, { "epoch": 1.9342433923903573, "grad_norm": 0.6565035581588745, "learning_rate": 0.0001, "loss": 1.423, "step": 16649 }, { "epoch": 1.9343595701423177, "grad_norm": 0.7637086510658264, "learning_rate": 0.0001, "loss": 1.618, "step": 16650 }, { "epoch": 1.9344757478942782, "grad_norm": 0.6454724669456482, "learning_rate": 0.0001, "loss": 1.4092, "step": 16651 }, { "epoch": 1.9345919256462387, "grad_norm": 0.7209746837615967, "learning_rate": 0.0001, "loss": 1.5369, "step": 16652 }, { "epoch": 1.9347081033981992, "grad_norm": 0.6487703323364258, "learning_rate": 0.0001, "loss": 1.4266, "step": 16653 }, { "epoch": 1.9348242811501597, "grad_norm": 0.6998302936553955, "learning_rate": 0.0001, "loss": 1.5737, "step": 16654 }, { "epoch": 1.9349404589021204, "grad_norm": 0.6754437685012817, "learning_rate": 0.0001, "loss": 1.4423, "step": 16655 }, { "epoch": 1.935056636654081, "grad_norm": 0.7228525280952454, "learning_rate": 0.0001, "loss": 1.6053, "step": 16656 }, { "epoch": 1.9351728144060414, "grad_norm": 0.6906144618988037, "learning_rate": 0.0001, "loss": 1.509, "step": 16657 }, { "epoch": 1.9352889921580019, "grad_norm": 0.6010711789131165, "learning_rate": 0.0001, "loss": 1.4005, "step": 16658 }, { "epoch": 1.9354051699099624, "grad_norm": 0.6514776945114136, "learning_rate": 0.0001, "loss": 1.3454, "step": 16659 }, { "epoch": 1.9355213476619229, "grad_norm": 0.6675294041633606, "learning_rate": 0.0001, "loss": 1.5585, "step": 16660 }, { "epoch": 1.9356375254138833, "grad_norm": 0.6100136041641235, "learning_rate": 0.0001, "loss": 1.4755, "step": 16661 }, { "epoch": 1.9357537031658438, "grad_norm": 0.6834390759468079, "learning_rate": 0.0001, "loss": 1.4561, "step": 16662 }, { "epoch": 1.9358698809178043, "grad_norm": 0.6633105278015137, "learning_rate": 0.0001, "loss": 1.5259, "step": 16663 }, { "epoch": 1.9359860586697648, "grad_norm": 0.6935020089149475, "learning_rate": 0.0001, "loss": 1.4903, "step": 16664 }, { "epoch": 1.9361022364217253, "grad_norm": 0.6353487372398376, "learning_rate": 0.0001, "loss": 1.5369, "step": 16665 }, { "epoch": 1.9362184141736858, "grad_norm": 0.6704708337783813, "learning_rate": 0.0001, "loss": 1.4686, "step": 16666 }, { "epoch": 1.9363345919256463, "grad_norm": 0.6549214720726013, "learning_rate": 0.0001, "loss": 1.4692, "step": 16667 }, { "epoch": 1.9364507696776068, "grad_norm": 0.6901536583900452, "learning_rate": 0.0001, "loss": 1.3832, "step": 16668 }, { "epoch": 1.9365669474295673, "grad_norm": 0.6834099292755127, "learning_rate": 0.0001, "loss": 1.5058, "step": 16669 }, { "epoch": 1.9366831251815277, "grad_norm": 0.6013579964637756, "learning_rate": 0.0001, "loss": 1.3678, "step": 16670 }, { "epoch": 1.9367993029334882, "grad_norm": 0.6654561161994934, "learning_rate": 0.0001, "loss": 1.5921, "step": 16671 }, { "epoch": 1.9369154806854487, "grad_norm": 0.7015004754066467, "learning_rate": 0.0001, "loss": 1.5054, "step": 16672 }, { "epoch": 1.9370316584374092, "grad_norm": 0.6722459197044373, "learning_rate": 0.0001, "loss": 1.4925, "step": 16673 }, { "epoch": 1.9371478361893697, "grad_norm": 0.6341900825500488, "learning_rate": 0.0001, "loss": 1.453, "step": 16674 }, { "epoch": 1.9372640139413302, "grad_norm": 0.6493798494338989, "learning_rate": 0.0001, "loss": 1.4069, "step": 16675 }, { "epoch": 1.9373801916932907, "grad_norm": 0.6679549813270569, "learning_rate": 0.0001, "loss": 1.519, "step": 16676 }, { "epoch": 1.9374963694452512, "grad_norm": 0.6709537506103516, "learning_rate": 0.0001, "loss": 1.3854, "step": 16677 }, { "epoch": 1.9376125471972117, "grad_norm": 0.7255121469497681, "learning_rate": 0.0001, "loss": 1.6107, "step": 16678 }, { "epoch": 1.9377287249491721, "grad_norm": 0.7132481336593628, "learning_rate": 0.0001, "loss": 1.5287, "step": 16679 }, { "epoch": 1.9378449027011326, "grad_norm": 0.6523537039756775, "learning_rate": 0.0001, "loss": 1.3599, "step": 16680 }, { "epoch": 1.9379610804530931, "grad_norm": 0.6540292501449585, "learning_rate": 0.0001, "loss": 1.5858, "step": 16681 }, { "epoch": 1.9380772582050536, "grad_norm": 0.6346926093101501, "learning_rate": 0.0001, "loss": 1.477, "step": 16682 }, { "epoch": 1.938193435957014, "grad_norm": 0.6806980967521667, "learning_rate": 0.0001, "loss": 1.4415, "step": 16683 }, { "epoch": 1.9383096137089746, "grad_norm": 0.652722179889679, "learning_rate": 0.0001, "loss": 1.4638, "step": 16684 }, { "epoch": 1.938425791460935, "grad_norm": 0.6343298554420471, "learning_rate": 0.0001, "loss": 1.4014, "step": 16685 }, { "epoch": 1.9385419692128956, "grad_norm": 0.5906456112861633, "learning_rate": 0.0001, "loss": 1.357, "step": 16686 }, { "epoch": 1.9386581469648563, "grad_norm": 0.6251915693283081, "learning_rate": 0.0001, "loss": 1.4759, "step": 16687 }, { "epoch": 1.9387743247168168, "grad_norm": 0.659336507320404, "learning_rate": 0.0001, "loss": 1.4253, "step": 16688 }, { "epoch": 1.9388905024687773, "grad_norm": 0.7129247188568115, "learning_rate": 0.0001, "loss": 1.3947, "step": 16689 }, { "epoch": 1.9390066802207377, "grad_norm": 0.6155510544776917, "learning_rate": 0.0001, "loss": 1.3926, "step": 16690 }, { "epoch": 1.9391228579726982, "grad_norm": 0.6242903470993042, "learning_rate": 0.0001, "loss": 1.4449, "step": 16691 }, { "epoch": 1.9392390357246587, "grad_norm": 0.7008059024810791, "learning_rate": 0.0001, "loss": 1.4612, "step": 16692 }, { "epoch": 1.9393552134766192, "grad_norm": 0.6275923848152161, "learning_rate": 0.0001, "loss": 1.2577, "step": 16693 }, { "epoch": 1.9394713912285797, "grad_norm": 0.6566643118858337, "learning_rate": 0.0001, "loss": 1.5035, "step": 16694 }, { "epoch": 1.9395875689805402, "grad_norm": 0.6245930194854736, "learning_rate": 0.0001, "loss": 1.6921, "step": 16695 }, { "epoch": 1.9397037467325007, "grad_norm": 0.6758073568344116, "learning_rate": 0.0001, "loss": 1.4437, "step": 16696 }, { "epoch": 1.9398199244844614, "grad_norm": 0.640375554561615, "learning_rate": 0.0001, "loss": 1.4807, "step": 16697 }, { "epoch": 1.9399361022364219, "grad_norm": 0.6732568144798279, "learning_rate": 0.0001, "loss": 1.4437, "step": 16698 }, { "epoch": 1.9400522799883824, "grad_norm": 0.6728057265281677, "learning_rate": 0.0001, "loss": 1.3904, "step": 16699 }, { "epoch": 1.9401684577403429, "grad_norm": 0.6272763013839722, "learning_rate": 0.0001, "loss": 1.2958, "step": 16700 }, { "epoch": 1.9402846354923033, "grad_norm": 0.6336491107940674, "learning_rate": 0.0001, "loss": 1.3129, "step": 16701 }, { "epoch": 1.9404008132442638, "grad_norm": 0.6731768250465393, "learning_rate": 0.0001, "loss": 1.5489, "step": 16702 }, { "epoch": 1.9405169909962243, "grad_norm": 0.6434739828109741, "learning_rate": 0.0001, "loss": 1.5446, "step": 16703 }, { "epoch": 1.9406331687481848, "grad_norm": 0.6862572431564331, "learning_rate": 0.0001, "loss": 1.4547, "step": 16704 }, { "epoch": 1.9407493465001453, "grad_norm": 0.6229750514030457, "learning_rate": 0.0001, "loss": 1.3661, "step": 16705 }, { "epoch": 1.9408655242521058, "grad_norm": 0.632831871509552, "learning_rate": 0.0001, "loss": 1.2303, "step": 16706 }, { "epoch": 1.9409817020040663, "grad_norm": 0.6478464603424072, "learning_rate": 0.0001, "loss": 1.3536, "step": 16707 }, { "epoch": 1.9410978797560268, "grad_norm": 0.7153877019882202, "learning_rate": 0.0001, "loss": 1.5317, "step": 16708 }, { "epoch": 1.9412140575079873, "grad_norm": 0.742243230342865, "learning_rate": 0.0001, "loss": 1.6074, "step": 16709 }, { "epoch": 1.9413302352599477, "grad_norm": 0.6956560015678406, "learning_rate": 0.0001, "loss": 1.6345, "step": 16710 }, { "epoch": 1.9414464130119082, "grad_norm": 0.6485413312911987, "learning_rate": 0.0001, "loss": 1.4414, "step": 16711 }, { "epoch": 1.9415625907638687, "grad_norm": 0.6522170305252075, "learning_rate": 0.0001, "loss": 1.6141, "step": 16712 }, { "epoch": 1.9416787685158292, "grad_norm": 0.6709471344947815, "learning_rate": 0.0001, "loss": 1.5683, "step": 16713 }, { "epoch": 1.9417949462677897, "grad_norm": 0.6498940587043762, "learning_rate": 0.0001, "loss": 1.4917, "step": 16714 }, { "epoch": 1.9419111240197502, "grad_norm": 0.6017136573791504, "learning_rate": 0.0001, "loss": 1.3147, "step": 16715 }, { "epoch": 1.9420273017717107, "grad_norm": 0.6413711309432983, "learning_rate": 0.0001, "loss": 1.317, "step": 16716 }, { "epoch": 1.9421434795236712, "grad_norm": 0.7011211514472961, "learning_rate": 0.0001, "loss": 1.5437, "step": 16717 }, { "epoch": 1.9422596572756317, "grad_norm": 0.6307085752487183, "learning_rate": 0.0001, "loss": 1.3766, "step": 16718 }, { "epoch": 1.9423758350275921, "grad_norm": 0.6387877464294434, "learning_rate": 0.0001, "loss": 1.3187, "step": 16719 }, { "epoch": 1.9424920127795526, "grad_norm": 0.6452335119247437, "learning_rate": 0.0001, "loss": 1.5065, "step": 16720 }, { "epoch": 1.9426081905315131, "grad_norm": 0.6727257370948792, "learning_rate": 0.0001, "loss": 1.4629, "step": 16721 }, { "epoch": 1.9427243682834736, "grad_norm": 0.6422793865203857, "learning_rate": 0.0001, "loss": 1.4756, "step": 16722 }, { "epoch": 1.942840546035434, "grad_norm": 0.7160427570343018, "learning_rate": 0.0001, "loss": 1.4688, "step": 16723 }, { "epoch": 1.9429567237873946, "grad_norm": 0.6088557243347168, "learning_rate": 0.0001, "loss": 1.3098, "step": 16724 }, { "epoch": 1.943072901539355, "grad_norm": 0.6456435322761536, "learning_rate": 0.0001, "loss": 1.3003, "step": 16725 }, { "epoch": 1.9431890792913156, "grad_norm": 0.689880907535553, "learning_rate": 0.0001, "loss": 1.5717, "step": 16726 }, { "epoch": 1.943305257043276, "grad_norm": 0.6574771404266357, "learning_rate": 0.0001, "loss": 1.4055, "step": 16727 }, { "epoch": 1.9434214347952365, "grad_norm": 0.7070064544677734, "learning_rate": 0.0001, "loss": 1.483, "step": 16728 }, { "epoch": 1.9435376125471973, "grad_norm": 0.6926152110099792, "learning_rate": 0.0001, "loss": 1.4891, "step": 16729 }, { "epoch": 1.9436537902991577, "grad_norm": 0.6049273610115051, "learning_rate": 0.0001, "loss": 1.4354, "step": 16730 }, { "epoch": 1.9437699680511182, "grad_norm": 0.623616099357605, "learning_rate": 0.0001, "loss": 1.3255, "step": 16731 }, { "epoch": 1.9438861458030787, "grad_norm": 0.7186086177825928, "learning_rate": 0.0001, "loss": 1.4911, "step": 16732 }, { "epoch": 1.9440023235550392, "grad_norm": 0.707463264465332, "learning_rate": 0.0001, "loss": 1.5816, "step": 16733 }, { "epoch": 1.9441185013069997, "grad_norm": 0.6389947533607483, "learning_rate": 0.0001, "loss": 1.43, "step": 16734 }, { "epoch": 1.9442346790589602, "grad_norm": 0.6669544577598572, "learning_rate": 0.0001, "loss": 1.3785, "step": 16735 }, { "epoch": 1.9443508568109207, "grad_norm": 0.7142645120620728, "learning_rate": 0.0001, "loss": 1.4587, "step": 16736 }, { "epoch": 1.9444670345628812, "grad_norm": 0.6553264856338501, "learning_rate": 0.0001, "loss": 1.4373, "step": 16737 }, { "epoch": 1.9445832123148417, "grad_norm": 0.6745234131813049, "learning_rate": 0.0001, "loss": 1.5387, "step": 16738 }, { "epoch": 1.9446993900668024, "grad_norm": 0.6603965163230896, "learning_rate": 0.0001, "loss": 1.3655, "step": 16739 }, { "epoch": 1.9448155678187629, "grad_norm": 0.6330799460411072, "learning_rate": 0.0001, "loss": 1.5178, "step": 16740 }, { "epoch": 1.9449317455707233, "grad_norm": 0.6976838111877441, "learning_rate": 0.0001, "loss": 1.6003, "step": 16741 }, { "epoch": 1.9450479233226838, "grad_norm": 0.6364608407020569, "learning_rate": 0.0001, "loss": 1.6802, "step": 16742 }, { "epoch": 1.9451641010746443, "grad_norm": 0.6515650153160095, "learning_rate": 0.0001, "loss": 1.4906, "step": 16743 }, { "epoch": 1.9452802788266048, "grad_norm": 0.6282598972320557, "learning_rate": 0.0001, "loss": 1.3712, "step": 16744 }, { "epoch": 1.9453964565785653, "grad_norm": 0.6360607147216797, "learning_rate": 0.0001, "loss": 1.4749, "step": 16745 }, { "epoch": 1.9455126343305258, "grad_norm": 0.6735727190971375, "learning_rate": 0.0001, "loss": 1.2862, "step": 16746 }, { "epoch": 1.9456288120824863, "grad_norm": 0.6471088528633118, "learning_rate": 0.0001, "loss": 1.427, "step": 16747 }, { "epoch": 1.9457449898344468, "grad_norm": 0.6112599968910217, "learning_rate": 0.0001, "loss": 1.2313, "step": 16748 }, { "epoch": 1.9458611675864073, "grad_norm": 0.6612389087677002, "learning_rate": 0.0001, "loss": 1.4742, "step": 16749 }, { "epoch": 1.9459773453383677, "grad_norm": 0.6832696199417114, "learning_rate": 0.0001, "loss": 1.5614, "step": 16750 }, { "epoch": 1.9460935230903282, "grad_norm": 0.6629716157913208, "learning_rate": 0.0001, "loss": 1.5292, "step": 16751 }, { "epoch": 1.9462097008422887, "grad_norm": 0.7395675778388977, "learning_rate": 0.0001, "loss": 1.4324, "step": 16752 }, { "epoch": 1.9463258785942492, "grad_norm": 0.6757746338844299, "learning_rate": 0.0001, "loss": 1.5063, "step": 16753 }, { "epoch": 1.9464420563462097, "grad_norm": 0.6616039872169495, "learning_rate": 0.0001, "loss": 1.4183, "step": 16754 }, { "epoch": 1.9465582340981702, "grad_norm": 0.7018517255783081, "learning_rate": 0.0001, "loss": 1.4651, "step": 16755 }, { "epoch": 1.9466744118501307, "grad_norm": 0.6375793814659119, "learning_rate": 0.0001, "loss": 1.2257, "step": 16756 }, { "epoch": 1.9467905896020912, "grad_norm": 0.6641897559165955, "learning_rate": 0.0001, "loss": 1.2862, "step": 16757 }, { "epoch": 1.9469067673540517, "grad_norm": 0.6264728307723999, "learning_rate": 0.0001, "loss": 1.3409, "step": 16758 }, { "epoch": 1.9470229451060121, "grad_norm": 0.7059872150421143, "learning_rate": 0.0001, "loss": 1.3881, "step": 16759 }, { "epoch": 1.9471391228579726, "grad_norm": 0.6805691719055176, "learning_rate": 0.0001, "loss": 1.3762, "step": 16760 }, { "epoch": 1.9472553006099331, "grad_norm": 0.6974397301673889, "learning_rate": 0.0001, "loss": 1.5568, "step": 16761 }, { "epoch": 1.9473714783618936, "grad_norm": 0.7048595547676086, "learning_rate": 0.0001, "loss": 1.6313, "step": 16762 }, { "epoch": 1.947487656113854, "grad_norm": 0.6319378018379211, "learning_rate": 0.0001, "loss": 1.393, "step": 16763 }, { "epoch": 1.9476038338658146, "grad_norm": 0.6213495135307312, "learning_rate": 0.0001, "loss": 1.3844, "step": 16764 }, { "epoch": 1.947720011617775, "grad_norm": 0.6406894326210022, "learning_rate": 0.0001, "loss": 1.2891, "step": 16765 }, { "epoch": 1.9478361893697356, "grad_norm": 0.6391229033470154, "learning_rate": 0.0001, "loss": 1.5439, "step": 16766 }, { "epoch": 1.947952367121696, "grad_norm": 0.640232503414154, "learning_rate": 0.0001, "loss": 1.3612, "step": 16767 }, { "epoch": 1.9480685448736565, "grad_norm": 0.717464029788971, "learning_rate": 0.0001, "loss": 1.336, "step": 16768 }, { "epoch": 1.948184722625617, "grad_norm": 0.6109762191772461, "learning_rate": 0.0001, "loss": 1.2314, "step": 16769 }, { "epoch": 1.9483009003775777, "grad_norm": 0.6487881541252136, "learning_rate": 0.0001, "loss": 1.3061, "step": 16770 }, { "epoch": 1.9484170781295382, "grad_norm": 0.6883452534675598, "learning_rate": 0.0001, "loss": 1.5796, "step": 16771 }, { "epoch": 1.9485332558814987, "grad_norm": 0.7152904272079468, "learning_rate": 0.0001, "loss": 1.4994, "step": 16772 }, { "epoch": 1.9486494336334592, "grad_norm": 0.6602367758750916, "learning_rate": 0.0001, "loss": 1.3372, "step": 16773 }, { "epoch": 1.9487656113854197, "grad_norm": 0.7148573994636536, "learning_rate": 0.0001, "loss": 1.5183, "step": 16774 }, { "epoch": 1.9488817891373802, "grad_norm": 0.6948972344398499, "learning_rate": 0.0001, "loss": 1.6269, "step": 16775 }, { "epoch": 1.9489979668893407, "grad_norm": 0.679164707660675, "learning_rate": 0.0001, "loss": 1.4542, "step": 16776 }, { "epoch": 1.9491141446413012, "grad_norm": 0.6517925262451172, "learning_rate": 0.0001, "loss": 1.4632, "step": 16777 }, { "epoch": 1.9492303223932617, "grad_norm": 0.7487609386444092, "learning_rate": 0.0001, "loss": 1.627, "step": 16778 }, { "epoch": 1.9493465001452221, "grad_norm": 0.6588004231452942, "learning_rate": 0.0001, "loss": 1.5956, "step": 16779 }, { "epoch": 1.9494626778971826, "grad_norm": 0.6407721042633057, "learning_rate": 0.0001, "loss": 1.4962, "step": 16780 }, { "epoch": 1.9495788556491433, "grad_norm": 0.6370645761489868, "learning_rate": 0.0001, "loss": 1.578, "step": 16781 }, { "epoch": 1.9496950334011038, "grad_norm": 0.6277661919593811, "learning_rate": 0.0001, "loss": 1.3723, "step": 16782 }, { "epoch": 1.9498112111530643, "grad_norm": 0.6160290837287903, "learning_rate": 0.0001, "loss": 1.5611, "step": 16783 }, { "epoch": 1.9499273889050248, "grad_norm": 0.6957546472549438, "learning_rate": 0.0001, "loss": 1.5229, "step": 16784 }, { "epoch": 1.9500435666569853, "grad_norm": 0.5949047803878784, "learning_rate": 0.0001, "loss": 1.3998, "step": 16785 }, { "epoch": 1.9501597444089458, "grad_norm": 0.6545408368110657, "learning_rate": 0.0001, "loss": 1.2842, "step": 16786 }, { "epoch": 1.9502759221609063, "grad_norm": 0.633139967918396, "learning_rate": 0.0001, "loss": 1.4277, "step": 16787 }, { "epoch": 1.9503920999128668, "grad_norm": 0.6589480042457581, "learning_rate": 0.0001, "loss": 1.4215, "step": 16788 }, { "epoch": 1.9505082776648273, "grad_norm": 0.6899810433387756, "learning_rate": 0.0001, "loss": 1.4357, "step": 16789 }, { "epoch": 1.9506244554167878, "grad_norm": 0.6233296394348145, "learning_rate": 0.0001, "loss": 1.4207, "step": 16790 }, { "epoch": 1.9507406331687482, "grad_norm": 0.6625037789344788, "learning_rate": 0.0001, "loss": 1.5143, "step": 16791 }, { "epoch": 1.9508568109207087, "grad_norm": 0.6384428143501282, "learning_rate": 0.0001, "loss": 1.4038, "step": 16792 }, { "epoch": 1.9509729886726692, "grad_norm": 0.6763142943382263, "learning_rate": 0.0001, "loss": 1.5289, "step": 16793 }, { "epoch": 1.9510891664246297, "grad_norm": 0.6408162117004395, "learning_rate": 0.0001, "loss": 1.3448, "step": 16794 }, { "epoch": 1.9512053441765902, "grad_norm": 0.6953721642494202, "learning_rate": 0.0001, "loss": 1.534, "step": 16795 }, { "epoch": 1.9513215219285507, "grad_norm": 0.6348594427108765, "learning_rate": 0.0001, "loss": 1.4733, "step": 16796 }, { "epoch": 1.9514376996805112, "grad_norm": 0.6523205637931824, "learning_rate": 0.0001, "loss": 1.4079, "step": 16797 }, { "epoch": 1.9515538774324717, "grad_norm": 1.2986875772476196, "learning_rate": 0.0001, "loss": 1.2656, "step": 16798 }, { "epoch": 1.9516700551844322, "grad_norm": 0.6754353642463684, "learning_rate": 0.0001, "loss": 1.4341, "step": 16799 }, { "epoch": 1.9517862329363926, "grad_norm": 0.6407897472381592, "learning_rate": 0.0001, "loss": 1.4318, "step": 16800 }, { "epoch": 1.9519024106883531, "grad_norm": 0.6710324883460999, "learning_rate": 0.0001, "loss": 1.4721, "step": 16801 }, { "epoch": 1.9520185884403136, "grad_norm": 0.6537321209907532, "learning_rate": 0.0001, "loss": 1.473, "step": 16802 }, { "epoch": 1.952134766192274, "grad_norm": 0.6465702056884766, "learning_rate": 0.0001, "loss": 1.2741, "step": 16803 }, { "epoch": 1.9522509439442346, "grad_norm": 0.6942079067230225, "learning_rate": 0.0001, "loss": 1.4423, "step": 16804 }, { "epoch": 1.952367121696195, "grad_norm": 0.6149597764015198, "learning_rate": 0.0001, "loss": 1.3385, "step": 16805 }, { "epoch": 1.9524832994481556, "grad_norm": 0.6097157597541809, "learning_rate": 0.0001, "loss": 1.3638, "step": 16806 }, { "epoch": 1.952599477200116, "grad_norm": 0.6873170137405396, "learning_rate": 0.0001, "loss": 1.484, "step": 16807 }, { "epoch": 1.9527156549520766, "grad_norm": 0.6243611574172974, "learning_rate": 0.0001, "loss": 1.4864, "step": 16808 }, { "epoch": 1.952831832704037, "grad_norm": 0.6626017689704895, "learning_rate": 0.0001, "loss": 1.5465, "step": 16809 }, { "epoch": 1.9529480104559975, "grad_norm": 0.6952168345451355, "learning_rate": 0.0001, "loss": 1.3906, "step": 16810 }, { "epoch": 1.953064188207958, "grad_norm": 0.6453093886375427, "learning_rate": 0.0001, "loss": 1.4495, "step": 16811 }, { "epoch": 1.9531803659599187, "grad_norm": 0.6364574432373047, "learning_rate": 0.0001, "loss": 1.5159, "step": 16812 }, { "epoch": 1.9532965437118792, "grad_norm": 0.651314377784729, "learning_rate": 0.0001, "loss": 1.4037, "step": 16813 }, { "epoch": 1.9534127214638397, "grad_norm": 0.6500648260116577, "learning_rate": 0.0001, "loss": 1.3321, "step": 16814 }, { "epoch": 1.9535288992158002, "grad_norm": 0.6901101469993591, "learning_rate": 0.0001, "loss": 1.532, "step": 16815 }, { "epoch": 1.9536450769677607, "grad_norm": 0.7526983618736267, "learning_rate": 0.0001, "loss": 1.5513, "step": 16816 }, { "epoch": 1.9537612547197212, "grad_norm": 0.7502654790878296, "learning_rate": 0.0001, "loss": 1.5387, "step": 16817 }, { "epoch": 1.9538774324716817, "grad_norm": 0.6962020993232727, "learning_rate": 0.0001, "loss": 1.4833, "step": 16818 }, { "epoch": 1.9539936102236422, "grad_norm": 0.6374176144599915, "learning_rate": 0.0001, "loss": 1.5032, "step": 16819 }, { "epoch": 1.9541097879756026, "grad_norm": 0.6267789602279663, "learning_rate": 0.0001, "loss": 1.4648, "step": 16820 }, { "epoch": 1.9542259657275631, "grad_norm": 0.6276360750198364, "learning_rate": 0.0001, "loss": 1.5265, "step": 16821 }, { "epoch": 1.9543421434795236, "grad_norm": 0.6632639169692993, "learning_rate": 0.0001, "loss": 1.3138, "step": 16822 }, { "epoch": 1.9544583212314843, "grad_norm": 0.6547974348068237, "learning_rate": 0.0001, "loss": 1.5944, "step": 16823 }, { "epoch": 1.9545744989834448, "grad_norm": 0.6429327726364136, "learning_rate": 0.0001, "loss": 1.5236, "step": 16824 }, { "epoch": 1.9546906767354053, "grad_norm": 0.6531368494033813, "learning_rate": 0.0001, "loss": 1.4964, "step": 16825 }, { "epoch": 1.9548068544873658, "grad_norm": 0.6469924449920654, "learning_rate": 0.0001, "loss": 1.5122, "step": 16826 }, { "epoch": 1.9549230322393263, "grad_norm": 0.6485582590103149, "learning_rate": 0.0001, "loss": 1.4679, "step": 16827 }, { "epoch": 1.9550392099912868, "grad_norm": 0.6716512441635132, "learning_rate": 0.0001, "loss": 1.5425, "step": 16828 }, { "epoch": 1.9551553877432473, "grad_norm": 0.6563982367515564, "learning_rate": 0.0001, "loss": 1.4804, "step": 16829 }, { "epoch": 1.9552715654952078, "grad_norm": 0.7062771916389465, "learning_rate": 0.0001, "loss": 1.5701, "step": 16830 }, { "epoch": 1.9553877432471682, "grad_norm": 0.6591980457305908, "learning_rate": 0.0001, "loss": 1.3091, "step": 16831 }, { "epoch": 1.9555039209991287, "grad_norm": 0.6411164999008179, "learning_rate": 0.0001, "loss": 1.4556, "step": 16832 }, { "epoch": 1.9556200987510892, "grad_norm": 0.614369809627533, "learning_rate": 0.0001, "loss": 1.4085, "step": 16833 }, { "epoch": 1.9557362765030497, "grad_norm": 0.6417170763015747, "learning_rate": 0.0001, "loss": 1.4896, "step": 16834 }, { "epoch": 1.9558524542550102, "grad_norm": 0.6103700399398804, "learning_rate": 0.0001, "loss": 1.4056, "step": 16835 }, { "epoch": 1.9559686320069707, "grad_norm": 0.6278691291809082, "learning_rate": 0.0001, "loss": 1.2524, "step": 16836 }, { "epoch": 1.9560848097589312, "grad_norm": 0.6249340176582336, "learning_rate": 0.0001, "loss": 1.3754, "step": 16837 }, { "epoch": 1.9562009875108917, "grad_norm": 0.6604050993919373, "learning_rate": 0.0001, "loss": 1.4911, "step": 16838 }, { "epoch": 1.9563171652628522, "grad_norm": 0.6540094614028931, "learning_rate": 0.0001, "loss": 1.5495, "step": 16839 }, { "epoch": 1.9564333430148126, "grad_norm": 0.6436858177185059, "learning_rate": 0.0001, "loss": 1.441, "step": 16840 }, { "epoch": 1.9565495207667731, "grad_norm": 0.6557092070579529, "learning_rate": 0.0001, "loss": 1.4295, "step": 16841 }, { "epoch": 1.9566656985187336, "grad_norm": 0.618324875831604, "learning_rate": 0.0001, "loss": 1.308, "step": 16842 }, { "epoch": 1.956781876270694, "grad_norm": 0.6567499041557312, "learning_rate": 0.0001, "loss": 1.3384, "step": 16843 }, { "epoch": 1.9568980540226546, "grad_norm": 0.6852730512619019, "learning_rate": 0.0001, "loss": 1.445, "step": 16844 }, { "epoch": 1.957014231774615, "grad_norm": 0.6480897068977356, "learning_rate": 0.0001, "loss": 1.4174, "step": 16845 }, { "epoch": 1.9571304095265756, "grad_norm": 0.70030277967453, "learning_rate": 0.0001, "loss": 1.5602, "step": 16846 }, { "epoch": 1.957246587278536, "grad_norm": 0.6975461840629578, "learning_rate": 0.0001, "loss": 1.4783, "step": 16847 }, { "epoch": 1.9573627650304966, "grad_norm": 0.680759072303772, "learning_rate": 0.0001, "loss": 1.4557, "step": 16848 }, { "epoch": 1.957478942782457, "grad_norm": 0.6764029264450073, "learning_rate": 0.0001, "loss": 1.5008, "step": 16849 }, { "epoch": 1.9575951205344175, "grad_norm": 0.6500774025917053, "learning_rate": 0.0001, "loss": 1.4906, "step": 16850 }, { "epoch": 1.957711298286378, "grad_norm": 0.6653610467910767, "learning_rate": 0.0001, "loss": 1.3679, "step": 16851 }, { "epoch": 1.9578274760383385, "grad_norm": 0.6707155108451843, "learning_rate": 0.0001, "loss": 1.5521, "step": 16852 }, { "epoch": 1.957943653790299, "grad_norm": 0.6617424488067627, "learning_rate": 0.0001, "loss": 1.4245, "step": 16853 }, { "epoch": 1.9580598315422597, "grad_norm": 0.6398313045501709, "learning_rate": 0.0001, "loss": 1.3766, "step": 16854 }, { "epoch": 1.9581760092942202, "grad_norm": 0.6342032551765442, "learning_rate": 0.0001, "loss": 1.5159, "step": 16855 }, { "epoch": 1.9582921870461807, "grad_norm": 0.6205247640609741, "learning_rate": 0.0001, "loss": 1.4364, "step": 16856 }, { "epoch": 1.9584083647981412, "grad_norm": 0.6303980350494385, "learning_rate": 0.0001, "loss": 1.2779, "step": 16857 }, { "epoch": 1.9585245425501017, "grad_norm": 0.6580673456192017, "learning_rate": 0.0001, "loss": 1.4954, "step": 16858 }, { "epoch": 1.9586407203020622, "grad_norm": 0.7108595371246338, "learning_rate": 0.0001, "loss": 1.4085, "step": 16859 }, { "epoch": 1.9587568980540226, "grad_norm": 0.6796523928642273, "learning_rate": 0.0001, "loss": 1.5066, "step": 16860 }, { "epoch": 1.9588730758059831, "grad_norm": 0.6843320727348328, "learning_rate": 0.0001, "loss": 1.579, "step": 16861 }, { "epoch": 1.9589892535579436, "grad_norm": 0.6194405555725098, "learning_rate": 0.0001, "loss": 1.5073, "step": 16862 }, { "epoch": 1.959105431309904, "grad_norm": 0.6883471608161926, "learning_rate": 0.0001, "loss": 1.5245, "step": 16863 }, { "epoch": 1.9592216090618646, "grad_norm": 0.6587563157081604, "learning_rate": 0.0001, "loss": 1.412, "step": 16864 }, { "epoch": 1.9593377868138253, "grad_norm": 0.6583654880523682, "learning_rate": 0.0001, "loss": 1.3675, "step": 16865 }, { "epoch": 1.9594539645657858, "grad_norm": 0.6586498618125916, "learning_rate": 0.0001, "loss": 1.445, "step": 16866 }, { "epoch": 1.9595701423177463, "grad_norm": 0.6924668550491333, "learning_rate": 0.0001, "loss": 1.5131, "step": 16867 }, { "epoch": 1.9596863200697068, "grad_norm": 0.629725456237793, "learning_rate": 0.0001, "loss": 1.4377, "step": 16868 }, { "epoch": 1.9598024978216673, "grad_norm": 0.6179175972938538, "learning_rate": 0.0001, "loss": 1.4449, "step": 16869 }, { "epoch": 1.9599186755736278, "grad_norm": 0.6352236270904541, "learning_rate": 0.0001, "loss": 1.4486, "step": 16870 }, { "epoch": 1.9600348533255882, "grad_norm": 0.61089026927948, "learning_rate": 0.0001, "loss": 1.4137, "step": 16871 }, { "epoch": 1.9601510310775487, "grad_norm": 0.6087163686752319, "learning_rate": 0.0001, "loss": 1.3499, "step": 16872 }, { "epoch": 1.9602672088295092, "grad_norm": 0.6716894507408142, "learning_rate": 0.0001, "loss": 1.5299, "step": 16873 }, { "epoch": 1.9603833865814697, "grad_norm": 0.66105055809021, "learning_rate": 0.0001, "loss": 1.406, "step": 16874 }, { "epoch": 1.9604995643334302, "grad_norm": 0.7295058965682983, "learning_rate": 0.0001, "loss": 1.4398, "step": 16875 }, { "epoch": 1.9606157420853907, "grad_norm": 0.7296388149261475, "learning_rate": 0.0001, "loss": 1.3707, "step": 16876 }, { "epoch": 1.9607319198373512, "grad_norm": 0.6589450836181641, "learning_rate": 0.0001, "loss": 1.4186, "step": 16877 }, { "epoch": 1.9608480975893117, "grad_norm": 0.66725093126297, "learning_rate": 0.0001, "loss": 1.5247, "step": 16878 }, { "epoch": 1.9609642753412722, "grad_norm": 0.6829125881195068, "learning_rate": 0.0001, "loss": 1.3611, "step": 16879 }, { "epoch": 1.9610804530932326, "grad_norm": 0.6833091378211975, "learning_rate": 0.0001, "loss": 1.4371, "step": 16880 }, { "epoch": 1.9611966308451931, "grad_norm": 0.6404169201850891, "learning_rate": 0.0001, "loss": 1.5802, "step": 16881 }, { "epoch": 1.9613128085971536, "grad_norm": 0.71299147605896, "learning_rate": 0.0001, "loss": 1.4624, "step": 16882 }, { "epoch": 1.9614289863491141, "grad_norm": 0.6363828182220459, "learning_rate": 0.0001, "loss": 1.4265, "step": 16883 }, { "epoch": 1.9615451641010746, "grad_norm": 0.6519455313682556, "learning_rate": 0.0001, "loss": 1.6392, "step": 16884 }, { "epoch": 1.961661341853035, "grad_norm": 0.6451375484466553, "learning_rate": 0.0001, "loss": 1.3823, "step": 16885 }, { "epoch": 1.9617775196049956, "grad_norm": 0.6893901228904724, "learning_rate": 0.0001, "loss": 1.4184, "step": 16886 }, { "epoch": 1.961893697356956, "grad_norm": 0.6673083901405334, "learning_rate": 0.0001, "loss": 1.4758, "step": 16887 }, { "epoch": 1.9620098751089166, "grad_norm": 0.6170085668563843, "learning_rate": 0.0001, "loss": 1.2767, "step": 16888 }, { "epoch": 1.962126052860877, "grad_norm": 0.6572393178939819, "learning_rate": 0.0001, "loss": 1.517, "step": 16889 }, { "epoch": 1.9622422306128375, "grad_norm": 0.6947939395904541, "learning_rate": 0.0001, "loss": 1.4409, "step": 16890 }, { "epoch": 1.962358408364798, "grad_norm": 0.6807284951210022, "learning_rate": 0.0001, "loss": 1.4447, "step": 16891 }, { "epoch": 1.9624745861167585, "grad_norm": 0.6697346568107605, "learning_rate": 0.0001, "loss": 1.3247, "step": 16892 }, { "epoch": 1.962590763868719, "grad_norm": 0.6470866799354553, "learning_rate": 0.0001, "loss": 1.4273, "step": 16893 }, { "epoch": 1.9627069416206795, "grad_norm": 0.6352959871292114, "learning_rate": 0.0001, "loss": 1.4789, "step": 16894 }, { "epoch": 1.96282311937264, "grad_norm": 0.6530041694641113, "learning_rate": 0.0001, "loss": 1.5937, "step": 16895 }, { "epoch": 1.9629392971246007, "grad_norm": 0.7219769358634949, "learning_rate": 0.0001, "loss": 1.4694, "step": 16896 }, { "epoch": 1.9630554748765612, "grad_norm": 0.6419088840484619, "learning_rate": 0.0001, "loss": 1.4983, "step": 16897 }, { "epoch": 1.9631716526285217, "grad_norm": 0.6872606873512268, "learning_rate": 0.0001, "loss": 1.5483, "step": 16898 }, { "epoch": 1.9632878303804822, "grad_norm": 0.6950446963310242, "learning_rate": 0.0001, "loss": 1.6492, "step": 16899 }, { "epoch": 1.9634040081324426, "grad_norm": 0.6722810864448547, "learning_rate": 0.0001, "loss": 1.4951, "step": 16900 }, { "epoch": 1.9635201858844031, "grad_norm": 0.6293094754219055, "learning_rate": 0.0001, "loss": 1.3009, "step": 16901 }, { "epoch": 1.9636363636363636, "grad_norm": 0.653597354888916, "learning_rate": 0.0001, "loss": 1.391, "step": 16902 }, { "epoch": 1.9637525413883241, "grad_norm": 0.6247639656066895, "learning_rate": 0.0001, "loss": 1.4896, "step": 16903 }, { "epoch": 1.9638687191402846, "grad_norm": 0.6629366874694824, "learning_rate": 0.0001, "loss": 1.526, "step": 16904 }, { "epoch": 1.963984896892245, "grad_norm": 0.6465187072753906, "learning_rate": 0.0001, "loss": 1.4604, "step": 16905 }, { "epoch": 1.9641010746442056, "grad_norm": 0.6469520330429077, "learning_rate": 0.0001, "loss": 1.4567, "step": 16906 }, { "epoch": 1.9642172523961663, "grad_norm": 0.592936635017395, "learning_rate": 0.0001, "loss": 1.3207, "step": 16907 }, { "epoch": 1.9643334301481268, "grad_norm": 0.6297258734703064, "learning_rate": 0.0001, "loss": 1.518, "step": 16908 }, { "epoch": 1.9644496079000873, "grad_norm": 0.6411937475204468, "learning_rate": 0.0001, "loss": 1.4784, "step": 16909 }, { "epoch": 1.9645657856520478, "grad_norm": 0.6432691812515259, "learning_rate": 0.0001, "loss": 1.4424, "step": 16910 }, { "epoch": 1.9646819634040082, "grad_norm": 0.705985426902771, "learning_rate": 0.0001, "loss": 1.534, "step": 16911 }, { "epoch": 1.9647981411559687, "grad_norm": 0.6686820387840271, "learning_rate": 0.0001, "loss": 1.4685, "step": 16912 }, { "epoch": 1.9649143189079292, "grad_norm": 0.6793909668922424, "learning_rate": 0.0001, "loss": 1.511, "step": 16913 }, { "epoch": 1.9650304966598897, "grad_norm": 0.672163724899292, "learning_rate": 0.0001, "loss": 1.4878, "step": 16914 }, { "epoch": 1.9651466744118502, "grad_norm": 0.6782339811325073, "learning_rate": 0.0001, "loss": 1.4215, "step": 16915 }, { "epoch": 1.9652628521638107, "grad_norm": 0.6329590082168579, "learning_rate": 0.0001, "loss": 1.4281, "step": 16916 }, { "epoch": 1.9653790299157712, "grad_norm": 0.5860947966575623, "learning_rate": 0.0001, "loss": 1.3698, "step": 16917 }, { "epoch": 1.9654952076677317, "grad_norm": 0.6451777815818787, "learning_rate": 0.0001, "loss": 1.4817, "step": 16918 }, { "epoch": 1.9656113854196922, "grad_norm": 0.6499130725860596, "learning_rate": 0.0001, "loss": 1.49, "step": 16919 }, { "epoch": 1.9657275631716526, "grad_norm": 0.6585314273834229, "learning_rate": 0.0001, "loss": 1.6237, "step": 16920 }, { "epoch": 1.9658437409236131, "grad_norm": 0.6975468993186951, "learning_rate": 0.0001, "loss": 1.5319, "step": 16921 }, { "epoch": 1.9659599186755736, "grad_norm": 0.6544103026390076, "learning_rate": 0.0001, "loss": 1.4636, "step": 16922 }, { "epoch": 1.9660760964275341, "grad_norm": 0.6451906561851501, "learning_rate": 0.0001, "loss": 1.3416, "step": 16923 }, { "epoch": 1.9661922741794946, "grad_norm": 0.715205729007721, "learning_rate": 0.0001, "loss": 1.5243, "step": 16924 }, { "epoch": 1.966308451931455, "grad_norm": 0.6527731418609619, "learning_rate": 0.0001, "loss": 1.4943, "step": 16925 }, { "epoch": 1.9664246296834156, "grad_norm": 0.701113760471344, "learning_rate": 0.0001, "loss": 1.6541, "step": 16926 }, { "epoch": 1.966540807435376, "grad_norm": 0.6210044622421265, "learning_rate": 0.0001, "loss": 1.2759, "step": 16927 }, { "epoch": 1.9666569851873366, "grad_norm": 0.6458300352096558, "learning_rate": 0.0001, "loss": 1.331, "step": 16928 }, { "epoch": 1.966773162939297, "grad_norm": 0.6208284497261047, "learning_rate": 0.0001, "loss": 1.5049, "step": 16929 }, { "epoch": 1.9668893406912575, "grad_norm": 0.6257730722427368, "learning_rate": 0.0001, "loss": 1.4962, "step": 16930 }, { "epoch": 1.967005518443218, "grad_norm": 0.5953629612922668, "learning_rate": 0.0001, "loss": 1.3791, "step": 16931 }, { "epoch": 1.9671216961951785, "grad_norm": 0.6429222822189331, "learning_rate": 0.0001, "loss": 1.3927, "step": 16932 }, { "epoch": 1.967237873947139, "grad_norm": 0.6672798991203308, "learning_rate": 0.0001, "loss": 1.4684, "step": 16933 }, { "epoch": 1.9673540516990995, "grad_norm": 0.6431074738502502, "learning_rate": 0.0001, "loss": 1.4785, "step": 16934 }, { "epoch": 1.96747022945106, "grad_norm": 0.6146492958068848, "learning_rate": 0.0001, "loss": 1.3951, "step": 16935 }, { "epoch": 1.9675864072030205, "grad_norm": 0.6087470054626465, "learning_rate": 0.0001, "loss": 1.3709, "step": 16936 }, { "epoch": 1.967702584954981, "grad_norm": 0.6495168209075928, "learning_rate": 0.0001, "loss": 1.4305, "step": 16937 }, { "epoch": 1.9678187627069417, "grad_norm": 0.6350211501121521, "learning_rate": 0.0001, "loss": 1.4812, "step": 16938 }, { "epoch": 1.9679349404589022, "grad_norm": 0.5701435208320618, "learning_rate": 0.0001, "loss": 1.0747, "step": 16939 }, { "epoch": 1.9680511182108626, "grad_norm": 0.624546468257904, "learning_rate": 0.0001, "loss": 1.5752, "step": 16940 }, { "epoch": 1.9681672959628231, "grad_norm": 0.6842914819717407, "learning_rate": 0.0001, "loss": 1.6304, "step": 16941 }, { "epoch": 1.9682834737147836, "grad_norm": 0.6859439015388489, "learning_rate": 0.0001, "loss": 1.535, "step": 16942 }, { "epoch": 1.9683996514667441, "grad_norm": 0.6349547505378723, "learning_rate": 0.0001, "loss": 1.4002, "step": 16943 }, { "epoch": 1.9685158292187046, "grad_norm": 0.6811142563819885, "learning_rate": 0.0001, "loss": 1.5053, "step": 16944 }, { "epoch": 1.968632006970665, "grad_norm": 0.8298106789588928, "learning_rate": 0.0001, "loss": 1.5151, "step": 16945 }, { "epoch": 1.9687481847226256, "grad_norm": 0.6081829071044922, "learning_rate": 0.0001, "loss": 1.5359, "step": 16946 }, { "epoch": 1.968864362474586, "grad_norm": 0.6739099621772766, "learning_rate": 0.0001, "loss": 1.4548, "step": 16947 }, { "epoch": 1.9689805402265468, "grad_norm": 0.6355230212211609, "learning_rate": 0.0001, "loss": 1.374, "step": 16948 }, { "epoch": 1.9690967179785073, "grad_norm": 0.621856153011322, "learning_rate": 0.0001, "loss": 1.4004, "step": 16949 }, { "epoch": 1.9692128957304678, "grad_norm": 0.6464223861694336, "learning_rate": 0.0001, "loss": 1.4747, "step": 16950 }, { "epoch": 1.9693290734824282, "grad_norm": 0.6148073673248291, "learning_rate": 0.0001, "loss": 1.436, "step": 16951 }, { "epoch": 1.9694452512343887, "grad_norm": 0.6290232539176941, "learning_rate": 0.0001, "loss": 1.5126, "step": 16952 }, { "epoch": 1.9695614289863492, "grad_norm": 0.7354368567466736, "learning_rate": 0.0001, "loss": 1.6942, "step": 16953 }, { "epoch": 1.9696776067383097, "grad_norm": 0.6189380288124084, "learning_rate": 0.0001, "loss": 1.3827, "step": 16954 }, { "epoch": 1.9697937844902702, "grad_norm": 0.6174877285957336, "learning_rate": 0.0001, "loss": 1.2695, "step": 16955 }, { "epoch": 1.9699099622422307, "grad_norm": 0.6455286145210266, "learning_rate": 0.0001, "loss": 1.3894, "step": 16956 }, { "epoch": 1.9700261399941912, "grad_norm": 0.6408966779708862, "learning_rate": 0.0001, "loss": 1.5463, "step": 16957 }, { "epoch": 1.9701423177461517, "grad_norm": 0.647350549697876, "learning_rate": 0.0001, "loss": 1.4139, "step": 16958 }, { "epoch": 1.9702584954981122, "grad_norm": 0.6626277565956116, "learning_rate": 0.0001, "loss": 1.5518, "step": 16959 }, { "epoch": 1.9703746732500727, "grad_norm": 0.6524257659912109, "learning_rate": 0.0001, "loss": 1.4471, "step": 16960 }, { "epoch": 1.9704908510020331, "grad_norm": 0.6187669634819031, "learning_rate": 0.0001, "loss": 1.3361, "step": 16961 }, { "epoch": 1.9706070287539936, "grad_norm": 0.6824855208396912, "learning_rate": 0.0001, "loss": 1.5166, "step": 16962 }, { "epoch": 1.9707232065059541, "grad_norm": 0.6160680055618286, "learning_rate": 0.0001, "loss": 1.3571, "step": 16963 }, { "epoch": 1.9708393842579146, "grad_norm": 0.6632078886032104, "learning_rate": 0.0001, "loss": 1.302, "step": 16964 }, { "epoch": 1.970955562009875, "grad_norm": 0.6774406433105469, "learning_rate": 0.0001, "loss": 1.3987, "step": 16965 }, { "epoch": 1.9710717397618356, "grad_norm": 0.6735078692436218, "learning_rate": 0.0001, "loss": 1.4162, "step": 16966 }, { "epoch": 1.971187917513796, "grad_norm": 0.6581439971923828, "learning_rate": 0.0001, "loss": 1.5193, "step": 16967 }, { "epoch": 1.9713040952657566, "grad_norm": 0.6638876795768738, "learning_rate": 0.0001, "loss": 1.4699, "step": 16968 }, { "epoch": 1.971420273017717, "grad_norm": 0.6290700435638428, "learning_rate": 0.0001, "loss": 1.3326, "step": 16969 }, { "epoch": 1.9715364507696775, "grad_norm": 0.6683380007743835, "learning_rate": 0.0001, "loss": 1.5612, "step": 16970 }, { "epoch": 1.971652628521638, "grad_norm": 0.6308155059814453, "learning_rate": 0.0001, "loss": 1.4848, "step": 16971 }, { "epoch": 1.9717688062735985, "grad_norm": 0.6389683485031128, "learning_rate": 0.0001, "loss": 1.4831, "step": 16972 }, { "epoch": 1.971884984025559, "grad_norm": 0.6721927523612976, "learning_rate": 0.0001, "loss": 1.6259, "step": 16973 }, { "epoch": 1.9720011617775195, "grad_norm": 0.6064475774765015, "learning_rate": 0.0001, "loss": 1.3934, "step": 16974 }, { "epoch": 1.97211733952948, "grad_norm": 0.6965947151184082, "learning_rate": 0.0001, "loss": 1.4247, "step": 16975 }, { "epoch": 1.9722335172814405, "grad_norm": 0.6857799887657166, "learning_rate": 0.0001, "loss": 1.655, "step": 16976 }, { "epoch": 1.972349695033401, "grad_norm": 0.6697593331336975, "learning_rate": 0.0001, "loss": 1.4758, "step": 16977 }, { "epoch": 1.9724658727853615, "grad_norm": 0.698186993598938, "learning_rate": 0.0001, "loss": 1.2919, "step": 16978 }, { "epoch": 1.972582050537322, "grad_norm": 0.6767157912254333, "learning_rate": 0.0001, "loss": 1.3035, "step": 16979 }, { "epoch": 1.9726982282892827, "grad_norm": 0.6248701214790344, "learning_rate": 0.0001, "loss": 1.5073, "step": 16980 }, { "epoch": 1.9728144060412431, "grad_norm": 0.64948570728302, "learning_rate": 0.0001, "loss": 1.6151, "step": 16981 }, { "epoch": 1.9729305837932036, "grad_norm": 0.6856604814529419, "learning_rate": 0.0001, "loss": 1.4656, "step": 16982 }, { "epoch": 1.9730467615451641, "grad_norm": 0.6624447703361511, "learning_rate": 0.0001, "loss": 1.4982, "step": 16983 }, { "epoch": 1.9731629392971246, "grad_norm": 0.6743104457855225, "learning_rate": 0.0001, "loss": 1.4052, "step": 16984 }, { "epoch": 1.973279117049085, "grad_norm": 0.6608320474624634, "learning_rate": 0.0001, "loss": 1.4765, "step": 16985 }, { "epoch": 1.9733952948010456, "grad_norm": 0.6632779836654663, "learning_rate": 0.0001, "loss": 1.5586, "step": 16986 }, { "epoch": 1.973511472553006, "grad_norm": 0.6260380744934082, "learning_rate": 0.0001, "loss": 1.4673, "step": 16987 }, { "epoch": 1.9736276503049666, "grad_norm": 0.6172561049461365, "learning_rate": 0.0001, "loss": 1.3626, "step": 16988 }, { "epoch": 1.973743828056927, "grad_norm": 0.6249331831932068, "learning_rate": 0.0001, "loss": 1.2641, "step": 16989 }, { "epoch": 1.9738600058088878, "grad_norm": 0.6231138706207275, "learning_rate": 0.0001, "loss": 1.4441, "step": 16990 }, { "epoch": 1.9739761835608483, "grad_norm": 0.6399810314178467, "learning_rate": 0.0001, "loss": 1.468, "step": 16991 }, { "epoch": 1.9740923613128087, "grad_norm": 0.6135619282722473, "learning_rate": 0.0001, "loss": 1.3092, "step": 16992 }, { "epoch": 1.9742085390647692, "grad_norm": 0.7376452684402466, "learning_rate": 0.0001, "loss": 1.5916, "step": 16993 }, { "epoch": 1.9743247168167297, "grad_norm": 0.6568403244018555, "learning_rate": 0.0001, "loss": 1.3851, "step": 16994 }, { "epoch": 1.9744408945686902, "grad_norm": 0.6743078827857971, "learning_rate": 0.0001, "loss": 1.4832, "step": 16995 }, { "epoch": 1.9745570723206507, "grad_norm": 0.6016098260879517, "learning_rate": 0.0001, "loss": 1.2484, "step": 16996 }, { "epoch": 1.9746732500726112, "grad_norm": 0.5985375642776489, "learning_rate": 0.0001, "loss": 1.3817, "step": 16997 }, { "epoch": 1.9747894278245717, "grad_norm": 0.6559426188468933, "learning_rate": 0.0001, "loss": 1.3547, "step": 16998 }, { "epoch": 1.9749056055765322, "grad_norm": 0.6842581629753113, "learning_rate": 0.0001, "loss": 1.4568, "step": 16999 }, { "epoch": 1.9750217833284927, "grad_norm": 0.6296683549880981, "learning_rate": 0.0001, "loss": 1.3842, "step": 17000 }, { "epoch": 1.9751379610804531, "grad_norm": 0.7106660604476929, "learning_rate": 0.0001, "loss": 1.5716, "step": 17001 }, { "epoch": 1.9752541388324136, "grad_norm": 0.6240237355232239, "learning_rate": 0.0001, "loss": 1.3953, "step": 17002 }, { "epoch": 1.9753703165843741, "grad_norm": 0.6408183574676514, "learning_rate": 0.0001, "loss": 1.6497, "step": 17003 }, { "epoch": 1.9754864943363346, "grad_norm": 0.6275613307952881, "learning_rate": 0.0001, "loss": 1.3472, "step": 17004 }, { "epoch": 1.975602672088295, "grad_norm": 0.6482100486755371, "learning_rate": 0.0001, "loss": 1.4357, "step": 17005 }, { "epoch": 1.9757188498402556, "grad_norm": 0.5851816534996033, "learning_rate": 0.0001, "loss": 1.3379, "step": 17006 }, { "epoch": 1.975835027592216, "grad_norm": 0.7043350338935852, "learning_rate": 0.0001, "loss": 1.5951, "step": 17007 }, { "epoch": 1.9759512053441766, "grad_norm": 0.6993297934532166, "learning_rate": 0.0001, "loss": 1.6174, "step": 17008 }, { "epoch": 1.976067383096137, "grad_norm": 0.6678404808044434, "learning_rate": 0.0001, "loss": 1.5669, "step": 17009 }, { "epoch": 1.9761835608480975, "grad_norm": 0.6476895213127136, "learning_rate": 0.0001, "loss": 1.5321, "step": 17010 }, { "epoch": 1.976299738600058, "grad_norm": 0.6467980742454529, "learning_rate": 0.0001, "loss": 1.5057, "step": 17011 }, { "epoch": 1.9764159163520185, "grad_norm": 0.6289312243461609, "learning_rate": 0.0001, "loss": 1.381, "step": 17012 }, { "epoch": 1.976532094103979, "grad_norm": 0.6213926672935486, "learning_rate": 0.0001, "loss": 1.3531, "step": 17013 }, { "epoch": 1.9766482718559395, "grad_norm": 0.6215076446533203, "learning_rate": 0.0001, "loss": 1.3858, "step": 17014 }, { "epoch": 1.9767644496079, "grad_norm": 0.6818844676017761, "learning_rate": 0.0001, "loss": 1.5352, "step": 17015 }, { "epoch": 1.9768806273598605, "grad_norm": 0.6505844593048096, "learning_rate": 0.0001, "loss": 1.5296, "step": 17016 }, { "epoch": 1.976996805111821, "grad_norm": 0.6538925170898438, "learning_rate": 0.0001, "loss": 1.4698, "step": 17017 }, { "epoch": 1.9771129828637815, "grad_norm": 0.6166635155677795, "learning_rate": 0.0001, "loss": 1.3459, "step": 17018 }, { "epoch": 1.977229160615742, "grad_norm": 0.6817828416824341, "learning_rate": 0.0001, "loss": 1.4773, "step": 17019 }, { "epoch": 1.9773453383677024, "grad_norm": 0.6032022833824158, "learning_rate": 0.0001, "loss": 1.3998, "step": 17020 }, { "epoch": 1.977461516119663, "grad_norm": 0.6282936930656433, "learning_rate": 0.0001, "loss": 1.4675, "step": 17021 }, { "epoch": 1.9775776938716236, "grad_norm": 0.6419928669929504, "learning_rate": 0.0001, "loss": 1.5802, "step": 17022 }, { "epoch": 1.9776938716235841, "grad_norm": 0.6373183131217957, "learning_rate": 0.0001, "loss": 1.2775, "step": 17023 }, { "epoch": 1.9778100493755446, "grad_norm": 0.6078613996505737, "learning_rate": 0.0001, "loss": 1.3375, "step": 17024 }, { "epoch": 1.977926227127505, "grad_norm": 0.5793294906616211, "learning_rate": 0.0001, "loss": 1.4685, "step": 17025 }, { "epoch": 1.9780424048794656, "grad_norm": 0.6440683603286743, "learning_rate": 0.0001, "loss": 1.4115, "step": 17026 }, { "epoch": 1.978158582631426, "grad_norm": 0.6319810748100281, "learning_rate": 0.0001, "loss": 1.2384, "step": 17027 }, { "epoch": 1.9782747603833866, "grad_norm": 0.7178165316581726, "learning_rate": 0.0001, "loss": 1.5436, "step": 17028 }, { "epoch": 1.978390938135347, "grad_norm": 0.6456764936447144, "learning_rate": 0.0001, "loss": 1.3396, "step": 17029 }, { "epoch": 1.9785071158873075, "grad_norm": 0.7021783590316772, "learning_rate": 0.0001, "loss": 1.4093, "step": 17030 }, { "epoch": 1.978623293639268, "grad_norm": 0.6536981463432312, "learning_rate": 0.0001, "loss": 1.2879, "step": 17031 }, { "epoch": 1.9787394713912287, "grad_norm": 0.7266817092895508, "learning_rate": 0.0001, "loss": 1.5658, "step": 17032 }, { "epoch": 1.9788556491431892, "grad_norm": 0.696922779083252, "learning_rate": 0.0001, "loss": 1.5476, "step": 17033 }, { "epoch": 1.9789718268951497, "grad_norm": 0.6734854578971863, "learning_rate": 0.0001, "loss": 1.1581, "step": 17034 }, { "epoch": 1.9790880046471102, "grad_norm": 0.622714102268219, "learning_rate": 0.0001, "loss": 1.4293, "step": 17035 }, { "epoch": 1.9792041823990707, "grad_norm": 0.6082803010940552, "learning_rate": 0.0001, "loss": 1.5153, "step": 17036 }, { "epoch": 1.9793203601510312, "grad_norm": 0.6169379353523254, "learning_rate": 0.0001, "loss": 1.3079, "step": 17037 }, { "epoch": 1.9794365379029917, "grad_norm": 0.6123185753822327, "learning_rate": 0.0001, "loss": 1.2963, "step": 17038 }, { "epoch": 1.9795527156549522, "grad_norm": 0.6298595070838928, "learning_rate": 0.0001, "loss": 1.4766, "step": 17039 }, { "epoch": 1.9796688934069127, "grad_norm": 0.6781691312789917, "learning_rate": 0.0001, "loss": 1.3926, "step": 17040 }, { "epoch": 1.9797850711588731, "grad_norm": 0.7230692505836487, "learning_rate": 0.0001, "loss": 1.5122, "step": 17041 }, { "epoch": 1.9799012489108336, "grad_norm": 0.6615117192268372, "learning_rate": 0.0001, "loss": 1.3093, "step": 17042 }, { "epoch": 1.9800174266627941, "grad_norm": 0.7139205932617188, "learning_rate": 0.0001, "loss": 1.5679, "step": 17043 }, { "epoch": 1.9801336044147546, "grad_norm": 0.7162491679191589, "learning_rate": 0.0001, "loss": 1.5496, "step": 17044 }, { "epoch": 1.980249782166715, "grad_norm": 0.6666257977485657, "learning_rate": 0.0001, "loss": 1.4485, "step": 17045 }, { "epoch": 1.9803659599186756, "grad_norm": 0.6200940608978271, "learning_rate": 0.0001, "loss": 1.4066, "step": 17046 }, { "epoch": 1.980482137670636, "grad_norm": 0.6406752467155457, "learning_rate": 0.0001, "loss": 1.496, "step": 17047 }, { "epoch": 1.9805983154225966, "grad_norm": 0.6268723011016846, "learning_rate": 0.0001, "loss": 1.2797, "step": 17048 }, { "epoch": 1.980714493174557, "grad_norm": 0.6737634539604187, "learning_rate": 0.0001, "loss": 1.4473, "step": 17049 }, { "epoch": 1.9808306709265175, "grad_norm": 0.6537479162216187, "learning_rate": 0.0001, "loss": 1.3614, "step": 17050 }, { "epoch": 1.980946848678478, "grad_norm": 0.6656580567359924, "learning_rate": 0.0001, "loss": 1.5317, "step": 17051 }, { "epoch": 1.9810630264304385, "grad_norm": 0.6043115258216858, "learning_rate": 0.0001, "loss": 1.4585, "step": 17052 }, { "epoch": 1.981179204182399, "grad_norm": 0.6332020163536072, "learning_rate": 0.0001, "loss": 1.3589, "step": 17053 }, { "epoch": 1.9812953819343595, "grad_norm": 0.6296618580818176, "learning_rate": 0.0001, "loss": 1.4216, "step": 17054 }, { "epoch": 1.98141155968632, "grad_norm": 0.6167466044425964, "learning_rate": 0.0001, "loss": 1.3932, "step": 17055 }, { "epoch": 1.9815277374382805, "grad_norm": 0.6547901034355164, "learning_rate": 0.0001, "loss": 1.5378, "step": 17056 }, { "epoch": 1.981643915190241, "grad_norm": 0.6633114814758301, "learning_rate": 0.0001, "loss": 1.4001, "step": 17057 }, { "epoch": 1.9817600929422015, "grad_norm": 0.6907011270523071, "learning_rate": 0.0001, "loss": 1.4164, "step": 17058 }, { "epoch": 1.981876270694162, "grad_norm": 0.7314690947532654, "learning_rate": 0.0001, "loss": 1.5204, "step": 17059 }, { "epoch": 1.9819924484461224, "grad_norm": 0.6343308091163635, "learning_rate": 0.0001, "loss": 1.4447, "step": 17060 }, { "epoch": 1.982108626198083, "grad_norm": 0.6838437914848328, "learning_rate": 0.0001, "loss": 1.4216, "step": 17061 }, { "epoch": 1.9822248039500434, "grad_norm": 0.6828415989875793, "learning_rate": 0.0001, "loss": 1.3946, "step": 17062 }, { "epoch": 1.982340981702004, "grad_norm": 0.6134259700775146, "learning_rate": 0.0001, "loss": 1.3255, "step": 17063 }, { "epoch": 1.9824571594539646, "grad_norm": 0.6687304973602295, "learning_rate": 0.0001, "loss": 1.382, "step": 17064 }, { "epoch": 1.982573337205925, "grad_norm": 0.7394691109657288, "learning_rate": 0.0001, "loss": 1.4039, "step": 17065 }, { "epoch": 1.9826895149578856, "grad_norm": 0.634257435798645, "learning_rate": 0.0001, "loss": 1.5533, "step": 17066 }, { "epoch": 1.982805692709846, "grad_norm": 0.6890622973442078, "learning_rate": 0.0001, "loss": 1.4102, "step": 17067 }, { "epoch": 1.9829218704618066, "grad_norm": 0.6342288255691528, "learning_rate": 0.0001, "loss": 1.3857, "step": 17068 }, { "epoch": 1.983038048213767, "grad_norm": 0.6564890146255493, "learning_rate": 0.0001, "loss": 1.5479, "step": 17069 }, { "epoch": 1.9831542259657275, "grad_norm": 0.6686529517173767, "learning_rate": 0.0001, "loss": 1.5521, "step": 17070 }, { "epoch": 1.983270403717688, "grad_norm": 0.6676340699195862, "learning_rate": 0.0001, "loss": 1.3848, "step": 17071 }, { "epoch": 1.9833865814696485, "grad_norm": 0.6322485208511353, "learning_rate": 0.0001, "loss": 1.2408, "step": 17072 }, { "epoch": 1.983502759221609, "grad_norm": 0.6894069314002991, "learning_rate": 0.0001, "loss": 1.5586, "step": 17073 }, { "epoch": 1.9836189369735697, "grad_norm": 0.6519978046417236, "learning_rate": 0.0001, "loss": 1.4406, "step": 17074 }, { "epoch": 1.9837351147255302, "grad_norm": 0.6524668335914612, "learning_rate": 0.0001, "loss": 1.4319, "step": 17075 }, { "epoch": 1.9838512924774907, "grad_norm": 0.6815724968910217, "learning_rate": 0.0001, "loss": 1.4021, "step": 17076 }, { "epoch": 1.9839674702294512, "grad_norm": 0.6381662487983704, "learning_rate": 0.0001, "loss": 1.3992, "step": 17077 }, { "epoch": 1.9840836479814117, "grad_norm": 0.714050829410553, "learning_rate": 0.0001, "loss": 1.4636, "step": 17078 }, { "epoch": 1.9841998257333722, "grad_norm": 0.6428562998771667, "learning_rate": 0.0001, "loss": 1.5339, "step": 17079 }, { "epoch": 1.9843160034853327, "grad_norm": 0.6447545289993286, "learning_rate": 0.0001, "loss": 1.5999, "step": 17080 }, { "epoch": 1.9844321812372931, "grad_norm": 0.637629508972168, "learning_rate": 0.0001, "loss": 1.3339, "step": 17081 }, { "epoch": 1.9845483589892536, "grad_norm": 0.6734089255332947, "learning_rate": 0.0001, "loss": 1.5578, "step": 17082 }, { "epoch": 1.9846645367412141, "grad_norm": 0.6303187608718872, "learning_rate": 0.0001, "loss": 1.502, "step": 17083 }, { "epoch": 1.9847807144931746, "grad_norm": 0.602295994758606, "learning_rate": 0.0001, "loss": 1.429, "step": 17084 }, { "epoch": 1.984896892245135, "grad_norm": 0.6558414697647095, "learning_rate": 0.0001, "loss": 1.5493, "step": 17085 }, { "epoch": 1.9850130699970956, "grad_norm": 0.713664710521698, "learning_rate": 0.0001, "loss": 1.5285, "step": 17086 }, { "epoch": 1.985129247749056, "grad_norm": 0.6422772407531738, "learning_rate": 0.0001, "loss": 1.4591, "step": 17087 }, { "epoch": 1.9852454255010166, "grad_norm": 0.6631743907928467, "learning_rate": 0.0001, "loss": 1.4708, "step": 17088 }, { "epoch": 1.985361603252977, "grad_norm": 0.6412901282310486, "learning_rate": 0.0001, "loss": 1.3196, "step": 17089 }, { "epoch": 1.9854777810049375, "grad_norm": 0.6073976159095764, "learning_rate": 0.0001, "loss": 1.3891, "step": 17090 }, { "epoch": 1.985593958756898, "grad_norm": 0.6807288527488708, "learning_rate": 0.0001, "loss": 1.6215, "step": 17091 }, { "epoch": 1.9857101365088585, "grad_norm": 0.645380437374115, "learning_rate": 0.0001, "loss": 1.5162, "step": 17092 }, { "epoch": 1.985826314260819, "grad_norm": 0.6473852396011353, "learning_rate": 0.0001, "loss": 1.512, "step": 17093 }, { "epoch": 1.9859424920127795, "grad_norm": 0.6923903822898865, "learning_rate": 0.0001, "loss": 1.551, "step": 17094 }, { "epoch": 1.98605866976474, "grad_norm": 0.6725409030914307, "learning_rate": 0.0001, "loss": 1.4869, "step": 17095 }, { "epoch": 1.9861748475167005, "grad_norm": 0.7083355784416199, "learning_rate": 0.0001, "loss": 1.5071, "step": 17096 }, { "epoch": 1.986291025268661, "grad_norm": 0.6550217270851135, "learning_rate": 0.0001, "loss": 1.3578, "step": 17097 }, { "epoch": 1.9864072030206215, "grad_norm": 0.7128257751464844, "learning_rate": 0.0001, "loss": 1.5812, "step": 17098 }, { "epoch": 1.986523380772582, "grad_norm": 0.6236690878868103, "learning_rate": 0.0001, "loss": 1.3195, "step": 17099 }, { "epoch": 1.9866395585245424, "grad_norm": 0.6178159713745117, "learning_rate": 0.0001, "loss": 1.3216, "step": 17100 }, { "epoch": 1.986755736276503, "grad_norm": 0.6030391454696655, "learning_rate": 0.0001, "loss": 1.3621, "step": 17101 }, { "epoch": 1.9868719140284634, "grad_norm": 0.6141607165336609, "learning_rate": 0.0001, "loss": 1.2788, "step": 17102 }, { "epoch": 1.986988091780424, "grad_norm": 0.6988334059715271, "learning_rate": 0.0001, "loss": 1.579, "step": 17103 }, { "epoch": 1.9871042695323844, "grad_norm": 0.6904895305633545, "learning_rate": 0.0001, "loss": 1.3966, "step": 17104 }, { "epoch": 1.9872204472843449, "grad_norm": 0.6659129858016968, "learning_rate": 0.0001, "loss": 1.3251, "step": 17105 }, { "epoch": 1.9873366250363056, "grad_norm": 0.6418099999427795, "learning_rate": 0.0001, "loss": 1.4371, "step": 17106 }, { "epoch": 1.987452802788266, "grad_norm": 0.6404750943183899, "learning_rate": 0.0001, "loss": 1.4876, "step": 17107 }, { "epoch": 1.9875689805402266, "grad_norm": 0.6369061470031738, "learning_rate": 0.0001, "loss": 1.3047, "step": 17108 }, { "epoch": 1.987685158292187, "grad_norm": 0.6830401420593262, "learning_rate": 0.0001, "loss": 1.4756, "step": 17109 }, { "epoch": 1.9878013360441475, "grad_norm": 0.6465064883232117, "learning_rate": 0.0001, "loss": 1.4359, "step": 17110 }, { "epoch": 1.987917513796108, "grad_norm": 0.7394779324531555, "learning_rate": 0.0001, "loss": 1.5612, "step": 17111 }, { "epoch": 1.9880336915480685, "grad_norm": 0.6628512144088745, "learning_rate": 0.0001, "loss": 1.4864, "step": 17112 }, { "epoch": 1.988149869300029, "grad_norm": 0.6167359948158264, "learning_rate": 0.0001, "loss": 1.3137, "step": 17113 }, { "epoch": 1.9882660470519895, "grad_norm": 0.646038830280304, "learning_rate": 0.0001, "loss": 1.4589, "step": 17114 }, { "epoch": 1.98838222480395, "grad_norm": 0.6481981873512268, "learning_rate": 0.0001, "loss": 1.4275, "step": 17115 }, { "epoch": 1.9884984025559107, "grad_norm": 0.6686795353889465, "learning_rate": 0.0001, "loss": 1.5354, "step": 17116 }, { "epoch": 1.9886145803078712, "grad_norm": 0.7088003754615784, "learning_rate": 0.0001, "loss": 1.4154, "step": 17117 }, { "epoch": 1.9887307580598317, "grad_norm": 0.651827335357666, "learning_rate": 0.0001, "loss": 1.329, "step": 17118 }, { "epoch": 1.9888469358117922, "grad_norm": 0.5897656083106995, "learning_rate": 0.0001, "loss": 1.3881, "step": 17119 }, { "epoch": 1.9889631135637527, "grad_norm": 0.7151718735694885, "learning_rate": 0.0001, "loss": 1.5464, "step": 17120 }, { "epoch": 1.9890792913157131, "grad_norm": 0.6732050776481628, "learning_rate": 0.0001, "loss": 1.3434, "step": 17121 }, { "epoch": 1.9891954690676736, "grad_norm": 0.5970642566680908, "learning_rate": 0.0001, "loss": 1.2934, "step": 17122 }, { "epoch": 1.9893116468196341, "grad_norm": 0.6443027257919312, "learning_rate": 0.0001, "loss": 1.5488, "step": 17123 }, { "epoch": 1.9894278245715946, "grad_norm": 0.6954662203788757, "learning_rate": 0.0001, "loss": 1.4859, "step": 17124 }, { "epoch": 1.989544002323555, "grad_norm": 0.6400409936904907, "learning_rate": 0.0001, "loss": 1.5544, "step": 17125 }, { "epoch": 1.9896601800755156, "grad_norm": 0.6106079816818237, "learning_rate": 0.0001, "loss": 1.459, "step": 17126 }, { "epoch": 1.989776357827476, "grad_norm": 0.659103274345398, "learning_rate": 0.0001, "loss": 1.5242, "step": 17127 }, { "epoch": 1.9898925355794366, "grad_norm": 0.6506654024124146, "learning_rate": 0.0001, "loss": 1.4329, "step": 17128 }, { "epoch": 1.990008713331397, "grad_norm": 0.6776611804962158, "learning_rate": 0.0001, "loss": 1.3965, "step": 17129 }, { "epoch": 1.9901248910833576, "grad_norm": 0.6105184555053711, "learning_rate": 0.0001, "loss": 1.2564, "step": 17130 }, { "epoch": 1.990241068835318, "grad_norm": 0.6182636618614197, "learning_rate": 0.0001, "loss": 1.2754, "step": 17131 }, { "epoch": 1.9903572465872785, "grad_norm": 0.60297030210495, "learning_rate": 0.0001, "loss": 1.377, "step": 17132 }, { "epoch": 1.990473424339239, "grad_norm": 0.6124786138534546, "learning_rate": 0.0001, "loss": 1.6733, "step": 17133 }, { "epoch": 1.9905896020911995, "grad_norm": 0.6822877526283264, "learning_rate": 0.0001, "loss": 1.4878, "step": 17134 }, { "epoch": 1.99070577984316, "grad_norm": 0.6614628434181213, "learning_rate": 0.0001, "loss": 1.395, "step": 17135 }, { "epoch": 1.9908219575951205, "grad_norm": 0.6281396746635437, "learning_rate": 0.0001, "loss": 1.433, "step": 17136 }, { "epoch": 1.990938135347081, "grad_norm": 0.6177956461906433, "learning_rate": 0.0001, "loss": 1.4154, "step": 17137 }, { "epoch": 1.9910543130990415, "grad_norm": 0.6248018741607666, "learning_rate": 0.0001, "loss": 1.5407, "step": 17138 }, { "epoch": 1.991170490851002, "grad_norm": 0.6084890961647034, "learning_rate": 0.0001, "loss": 1.3777, "step": 17139 }, { "epoch": 1.9912866686029624, "grad_norm": 0.6869462132453918, "learning_rate": 0.0001, "loss": 1.3761, "step": 17140 }, { "epoch": 1.991402846354923, "grad_norm": 0.6378315091133118, "learning_rate": 0.0001, "loss": 1.4141, "step": 17141 }, { "epoch": 1.9915190241068834, "grad_norm": 0.6057592034339905, "learning_rate": 0.0001, "loss": 1.3414, "step": 17142 }, { "epoch": 1.991635201858844, "grad_norm": 0.6446013450622559, "learning_rate": 0.0001, "loss": 1.5048, "step": 17143 }, { "epoch": 1.9917513796108044, "grad_norm": 0.611735999584198, "learning_rate": 0.0001, "loss": 1.4286, "step": 17144 }, { "epoch": 1.9918675573627649, "grad_norm": 0.6325556635856628, "learning_rate": 0.0001, "loss": 1.3464, "step": 17145 }, { "epoch": 1.9919837351147254, "grad_norm": 0.6586650609970093, "learning_rate": 0.0001, "loss": 1.4922, "step": 17146 }, { "epoch": 1.992099912866686, "grad_norm": 0.6672866940498352, "learning_rate": 0.0001, "loss": 1.523, "step": 17147 }, { "epoch": 1.9922160906186466, "grad_norm": 0.6891683340072632, "learning_rate": 0.0001, "loss": 1.3276, "step": 17148 }, { "epoch": 1.992332268370607, "grad_norm": 0.7425206899642944, "learning_rate": 0.0001, "loss": 1.3574, "step": 17149 }, { "epoch": 1.9924484461225676, "grad_norm": 0.6936492323875427, "learning_rate": 0.0001, "loss": 1.5141, "step": 17150 }, { "epoch": 1.992564623874528, "grad_norm": 0.7799718976020813, "learning_rate": 0.0001, "loss": 1.5688, "step": 17151 }, { "epoch": 1.9926808016264885, "grad_norm": 0.6783624887466431, "learning_rate": 0.0001, "loss": 1.3393, "step": 17152 }, { "epoch": 1.992796979378449, "grad_norm": 0.6209844350814819, "learning_rate": 0.0001, "loss": 1.4792, "step": 17153 }, { "epoch": 1.9929131571304095, "grad_norm": 0.6372485160827637, "learning_rate": 0.0001, "loss": 1.4507, "step": 17154 }, { "epoch": 1.99302933488237, "grad_norm": 0.6525490880012512, "learning_rate": 0.0001, "loss": 1.5328, "step": 17155 }, { "epoch": 1.9931455126343305, "grad_norm": 0.6811994314193726, "learning_rate": 0.0001, "loss": 1.6285, "step": 17156 }, { "epoch": 1.993261690386291, "grad_norm": 0.5977483987808228, "learning_rate": 0.0001, "loss": 1.4758, "step": 17157 }, { "epoch": 1.9933778681382517, "grad_norm": 0.6188771724700928, "learning_rate": 0.0001, "loss": 1.2736, "step": 17158 }, { "epoch": 1.9934940458902122, "grad_norm": 0.6444458365440369, "learning_rate": 0.0001, "loss": 1.4228, "step": 17159 }, { "epoch": 1.9936102236421727, "grad_norm": 0.6674790382385254, "learning_rate": 0.0001, "loss": 1.5831, "step": 17160 }, { "epoch": 1.9937264013941332, "grad_norm": 0.6228296160697937, "learning_rate": 0.0001, "loss": 1.4781, "step": 17161 }, { "epoch": 1.9938425791460936, "grad_norm": 0.6649051308631897, "learning_rate": 0.0001, "loss": 1.3831, "step": 17162 }, { "epoch": 1.9939587568980541, "grad_norm": 0.6507700085639954, "learning_rate": 0.0001, "loss": 1.3792, "step": 17163 }, { "epoch": 1.9940749346500146, "grad_norm": 0.6499350070953369, "learning_rate": 0.0001, "loss": 1.4951, "step": 17164 }, { "epoch": 1.994191112401975, "grad_norm": 0.6582063436508179, "learning_rate": 0.0001, "loss": 1.518, "step": 17165 }, { "epoch": 1.9943072901539356, "grad_norm": 0.668849527835846, "learning_rate": 0.0001, "loss": 1.5382, "step": 17166 }, { "epoch": 1.994423467905896, "grad_norm": 0.6882129907608032, "learning_rate": 0.0001, "loss": 1.4627, "step": 17167 }, { "epoch": 1.9945396456578566, "grad_norm": 0.6902349591255188, "learning_rate": 0.0001, "loss": 1.5089, "step": 17168 }, { "epoch": 1.994655823409817, "grad_norm": 0.6716760993003845, "learning_rate": 0.0001, "loss": 1.5113, "step": 17169 }, { "epoch": 1.9947720011617776, "grad_norm": 0.6998204588890076, "learning_rate": 0.0001, "loss": 1.5356, "step": 17170 }, { "epoch": 1.994888178913738, "grad_norm": 0.6393367648124695, "learning_rate": 0.0001, "loss": 1.3626, "step": 17171 }, { "epoch": 1.9950043566656985, "grad_norm": 0.6380053162574768, "learning_rate": 0.0001, "loss": 1.3207, "step": 17172 }, { "epoch": 1.995120534417659, "grad_norm": 0.6351662874221802, "learning_rate": 0.0001, "loss": 1.4096, "step": 17173 }, { "epoch": 1.9952367121696195, "grad_norm": 0.6423876881599426, "learning_rate": 0.0001, "loss": 1.3573, "step": 17174 }, { "epoch": 1.99535288992158, "grad_norm": 0.6613436341285706, "learning_rate": 0.0001, "loss": 1.3775, "step": 17175 }, { "epoch": 1.9954690676735405, "grad_norm": 0.7249255776405334, "learning_rate": 0.0001, "loss": 1.5306, "step": 17176 }, { "epoch": 1.995585245425501, "grad_norm": 0.6434426307678223, "learning_rate": 0.0001, "loss": 1.4091, "step": 17177 }, { "epoch": 1.9957014231774615, "grad_norm": 0.6899266839027405, "learning_rate": 0.0001, "loss": 1.3094, "step": 17178 }, { "epoch": 1.995817600929422, "grad_norm": 0.6469667553901672, "learning_rate": 0.0001, "loss": 1.4073, "step": 17179 }, { "epoch": 1.9959337786813824, "grad_norm": 0.6696066856384277, "learning_rate": 0.0001, "loss": 1.5468, "step": 17180 }, { "epoch": 1.996049956433343, "grad_norm": 0.6202344298362732, "learning_rate": 0.0001, "loss": 1.498, "step": 17181 }, { "epoch": 1.9961661341853034, "grad_norm": 0.6750481128692627, "learning_rate": 0.0001, "loss": 1.4697, "step": 17182 }, { "epoch": 1.996282311937264, "grad_norm": 0.6343403458595276, "learning_rate": 0.0001, "loss": 1.3338, "step": 17183 }, { "epoch": 1.9963984896892244, "grad_norm": 0.6457793116569519, "learning_rate": 0.0001, "loss": 1.4936, "step": 17184 }, { "epoch": 1.9965146674411849, "grad_norm": 0.6378026008605957, "learning_rate": 0.0001, "loss": 1.5401, "step": 17185 }, { "epoch": 1.9966308451931454, "grad_norm": 0.6804471015930176, "learning_rate": 0.0001, "loss": 1.5089, "step": 17186 }, { "epoch": 1.9967470229451059, "grad_norm": 0.6273019313812256, "learning_rate": 0.0001, "loss": 1.3339, "step": 17187 }, { "epoch": 1.9968632006970664, "grad_norm": 0.6540704369544983, "learning_rate": 0.0001, "loss": 1.4048, "step": 17188 }, { "epoch": 1.996979378449027, "grad_norm": 0.6998521089553833, "learning_rate": 0.0001, "loss": 1.4601, "step": 17189 }, { "epoch": 1.9970955562009876, "grad_norm": 0.646367609500885, "learning_rate": 0.0001, "loss": 1.473, "step": 17190 }, { "epoch": 1.997211733952948, "grad_norm": 0.6185293197631836, "learning_rate": 0.0001, "loss": 1.3793, "step": 17191 }, { "epoch": 1.9973279117049085, "grad_norm": 0.6536652445793152, "learning_rate": 0.0001, "loss": 1.4956, "step": 17192 }, { "epoch": 1.997444089456869, "grad_norm": 0.6189572215080261, "learning_rate": 0.0001, "loss": 1.3354, "step": 17193 }, { "epoch": 1.9975602672088295, "grad_norm": 0.6570109128952026, "learning_rate": 0.0001, "loss": 1.4278, "step": 17194 }, { "epoch": 1.99767644496079, "grad_norm": 0.6795138716697693, "learning_rate": 0.0001, "loss": 1.3985, "step": 17195 }, { "epoch": 1.9977926227127505, "grad_norm": 0.6193387508392334, "learning_rate": 0.0001, "loss": 1.3896, "step": 17196 }, { "epoch": 1.997908800464711, "grad_norm": 0.6169037818908691, "learning_rate": 0.0001, "loss": 1.4765, "step": 17197 }, { "epoch": 1.9980249782166715, "grad_norm": 0.6367796063423157, "learning_rate": 0.0001, "loss": 1.4163, "step": 17198 }, { "epoch": 1.998141155968632, "grad_norm": 0.6608498096466064, "learning_rate": 0.0001, "loss": 1.474, "step": 17199 }, { "epoch": 1.9982573337205927, "grad_norm": 0.6829690337181091, "learning_rate": 0.0001, "loss": 1.4992, "step": 17200 }, { "epoch": 1.9983735114725532, "grad_norm": 0.6340316534042358, "learning_rate": 0.0001, "loss": 1.4231, "step": 17201 }, { "epoch": 1.9984896892245136, "grad_norm": 0.6269450187683105, "learning_rate": 0.0001, "loss": 1.3459, "step": 17202 }, { "epoch": 1.9986058669764741, "grad_norm": 0.5962265133857727, "learning_rate": 0.0001, "loss": 1.3543, "step": 17203 }, { "epoch": 1.9987220447284346, "grad_norm": 0.6556157469749451, "learning_rate": 0.0001, "loss": 1.5697, "step": 17204 }, { "epoch": 1.998838222480395, "grad_norm": 0.677021861076355, "learning_rate": 0.0001, "loss": 1.5225, "step": 17205 }, { "epoch": 1.9989544002323556, "grad_norm": 0.6665991544723511, "learning_rate": 0.0001, "loss": 1.37, "step": 17206 }, { "epoch": 1.999070577984316, "grad_norm": 0.7138615846633911, "learning_rate": 0.0001, "loss": 1.5723, "step": 17207 }, { "epoch": 1.9991867557362766, "grad_norm": 0.6186869740486145, "learning_rate": 0.0001, "loss": 1.4157, "step": 17208 }, { "epoch": 1.999302933488237, "grad_norm": 0.6989908218383789, "learning_rate": 0.0001, "loss": 1.5856, "step": 17209 }, { "epoch": 1.9994191112401976, "grad_norm": 0.6486788988113403, "learning_rate": 0.0001, "loss": 1.4328, "step": 17210 }, { "epoch": 1.999535288992158, "grad_norm": 0.6686538457870483, "learning_rate": 0.0001, "loss": 1.3504, "step": 17211 }, { "epoch": 1.9996514667441185, "grad_norm": 0.6192347407341003, "learning_rate": 0.0001, "loss": 1.3796, "step": 17212 }, { "epoch": 1.999767644496079, "grad_norm": 0.625481128692627, "learning_rate": 0.0001, "loss": 1.4848, "step": 17213 }, { "epoch": 1.9998838222480395, "grad_norm": 0.679990828037262, "learning_rate": 0.0001, "loss": 1.5458, "step": 17214 }, { "epoch": 1.9998838222480395, "step": 17214, "total_flos": 2.5967552744531165e+19, "train_loss": 1.5602576382985387, "train_runtime": 339052.3659, "train_samples_per_second": 0.203, "train_steps_per_second": 0.051 } ], "logging_steps": 1.0, "max_steps": 17214, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.5967552744531165e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }