| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.3662979830839297, |
| "eval_steps": 500, |
| "global_step": 2100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006506180871828237, |
| "grad_norm": 3.778571605682373, |
| "learning_rate": 0.0001, |
| "loss": 4.706, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0013012361743656475, |
| "grad_norm": 0.7331739068031311, |
| "learning_rate": 0.0001, |
| "loss": 2.6402, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001951854261548471, |
| "grad_norm": 0.5679969191551208, |
| "learning_rate": 0.0001, |
| "loss": 2.5315, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002602472348731295, |
| "grad_norm": 0.6543067693710327, |
| "learning_rate": 0.0001, |
| "loss": 2.5226, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0032530904359141183, |
| "grad_norm": 0.42487671971321106, |
| "learning_rate": 0.0001, |
| "loss": 2.1375, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.003903708523096942, |
| "grad_norm": 0.48795655369758606, |
| "learning_rate": 0.0001, |
| "loss": 2.253, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.004554326610279766, |
| "grad_norm": 0.6054234504699707, |
| "learning_rate": 0.0001, |
| "loss": 2.3411, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00520494469746259, |
| "grad_norm": 0.3039970397949219, |
| "learning_rate": 0.0001, |
| "loss": 2.1293, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.005855562784645413, |
| "grad_norm": 0.6592361330986023, |
| "learning_rate": 0.0001, |
| "loss": 3.1615, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.006506180871828237, |
| "grad_norm": 0.4017999470233917, |
| "learning_rate": 0.0001, |
| "loss": 2.5068, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0071567989590110605, |
| "grad_norm": 0.31507641077041626, |
| "learning_rate": 0.0001, |
| "loss": 2.1894, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.007807417046193884, |
| "grad_norm": 0.33226895332336426, |
| "learning_rate": 0.0001, |
| "loss": 2.2006, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008458035133376708, |
| "grad_norm": 0.2632739841938019, |
| "learning_rate": 0.0001, |
| "loss": 2.0998, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.009108653220559532, |
| "grad_norm": 0.2794795036315918, |
| "learning_rate": 0.0001, |
| "loss": 2.113, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.009759271307742356, |
| "grad_norm": 0.29168492555618286, |
| "learning_rate": 0.0001, |
| "loss": 2.354, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01040988939492518, |
| "grad_norm": 0.2537970244884491, |
| "learning_rate": 0.0001, |
| "loss": 2.2939, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.011060507482108002, |
| "grad_norm": 0.5140053033828735, |
| "learning_rate": 0.0001, |
| "loss": 2.6237, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.011711125569290826, |
| "grad_norm": 0.3093675971031189, |
| "learning_rate": 0.0001, |
| "loss": 2.3502, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.01236174365647365, |
| "grad_norm": 0.29241421818733215, |
| "learning_rate": 0.0001, |
| "loss": 2.5365, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.013012361743656473, |
| "grad_norm": 0.3164322078227997, |
| "learning_rate": 0.0001, |
| "loss": 2.396, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.013662979830839297, |
| "grad_norm": 0.24512743949890137, |
| "learning_rate": 0.0001, |
| "loss": 2.2759, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.014313597918022121, |
| "grad_norm": 0.24328342080116272, |
| "learning_rate": 0.0001, |
| "loss": 2.2103, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.014964216005204945, |
| "grad_norm": 0.2563220262527466, |
| "learning_rate": 0.0001, |
| "loss": 2.4836, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.015614834092387769, |
| "grad_norm": 0.33601588010787964, |
| "learning_rate": 0.0001, |
| "loss": 2.4446, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01626545217957059, |
| "grad_norm": 0.28699007630348206, |
| "learning_rate": 0.0001, |
| "loss": 2.8504, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.016916070266753416, |
| "grad_norm": 0.3181653618812561, |
| "learning_rate": 0.0001, |
| "loss": 2.3042, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01756668835393624, |
| "grad_norm": 0.2349390834569931, |
| "learning_rate": 0.0001, |
| "loss": 2.1024, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.018217306441119064, |
| "grad_norm": 0.2751820981502533, |
| "learning_rate": 0.0001, |
| "loss": 2.2646, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.018867924528301886, |
| "grad_norm": 0.25547271966934204, |
| "learning_rate": 0.0001, |
| "loss": 2.1928, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01951854261548471, |
| "grad_norm": 0.283507764339447, |
| "learning_rate": 0.0001, |
| "loss": 2.3073, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.020169160702667534, |
| "grad_norm": 0.3354213237762451, |
| "learning_rate": 0.0001, |
| "loss": 2.6273, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.02081977878985036, |
| "grad_norm": 0.40484553575515747, |
| "learning_rate": 0.0001, |
| "loss": 2.4919, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02147039687703318, |
| "grad_norm": 0.34319421648979187, |
| "learning_rate": 0.0001, |
| "loss": 2.8381, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.022121014964216004, |
| "grad_norm": 0.32958984375, |
| "learning_rate": 0.0001, |
| "loss": 2.3062, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.02277163305139883, |
| "grad_norm": 0.4503105878829956, |
| "learning_rate": 0.0001, |
| "loss": 2.4647, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02342225113858165, |
| "grad_norm": 0.5084238052368164, |
| "learning_rate": 0.0001, |
| "loss": 3.0047, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.024072869225764477, |
| "grad_norm": 0.5192400813102722, |
| "learning_rate": 0.0001, |
| "loss": 2.2899, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0247234873129473, |
| "grad_norm": 0.4197874665260315, |
| "learning_rate": 0.0001, |
| "loss": 2.4057, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.025374105400130124, |
| "grad_norm": 0.5170285105705261, |
| "learning_rate": 0.0001, |
| "loss": 3.2918, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.026024723487312947, |
| "grad_norm": 0.2491147667169571, |
| "learning_rate": 0.0001, |
| "loss": 2.1957, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.026675341574495772, |
| "grad_norm": 0.6597635746002197, |
| "learning_rate": 0.0001, |
| "loss": 2.7474, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.027325959661678594, |
| "grad_norm": 0.40205034613609314, |
| "learning_rate": 0.0001, |
| "loss": 2.4561, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02797657774886142, |
| "grad_norm": 0.27388331294059753, |
| "learning_rate": 0.0001, |
| "loss": 2.0477, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.028627195836044242, |
| "grad_norm": 0.9163908958435059, |
| "learning_rate": 0.0001, |
| "loss": 3.334, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.029277813923227064, |
| "grad_norm": 0.2747696042060852, |
| "learning_rate": 0.0001, |
| "loss": 2.1604, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02992843201040989, |
| "grad_norm": 0.36308085918426514, |
| "learning_rate": 0.0001, |
| "loss": 2.693, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03057905009759271, |
| "grad_norm": 0.6159886121749878, |
| "learning_rate": 0.0001, |
| "loss": 2.5515, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.031229668184775537, |
| "grad_norm": 0.4801373779773712, |
| "learning_rate": 0.0001, |
| "loss": 2.809, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03188028627195836, |
| "grad_norm": 0.32580915093421936, |
| "learning_rate": 0.0001, |
| "loss": 2.5236, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.03253090435914118, |
| "grad_norm": 0.3028671443462372, |
| "learning_rate": 0.0001, |
| "loss": 2.2685, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03318152244632401, |
| "grad_norm": 0.5660931468009949, |
| "learning_rate": 0.0001, |
| "loss": 2.2564, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03383214053350683, |
| "grad_norm": 0.24634602665901184, |
| "learning_rate": 0.0001, |
| "loss": 2.1355, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.034482758620689655, |
| "grad_norm": 0.24830913543701172, |
| "learning_rate": 0.0001, |
| "loss": 2.0425, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03513337670787248, |
| "grad_norm": 0.23614570498466492, |
| "learning_rate": 0.0001, |
| "loss": 2.1975, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.035783994795055306, |
| "grad_norm": 0.2624325156211853, |
| "learning_rate": 0.0001, |
| "loss": 2.3071, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03643461288223813, |
| "grad_norm": 0.3967755436897278, |
| "learning_rate": 0.0001, |
| "loss": 2.6088, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.03708523096942095, |
| "grad_norm": 0.22147373855113983, |
| "learning_rate": 0.0001, |
| "loss": 2.003, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03773584905660377, |
| "grad_norm": 0.47795867919921875, |
| "learning_rate": 0.0001, |
| "loss": 2.1473, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.038386467143786594, |
| "grad_norm": 0.43953707814216614, |
| "learning_rate": 0.0001, |
| "loss": 2.6595, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03903708523096942, |
| "grad_norm": 0.29031845927238464, |
| "learning_rate": 0.0001, |
| "loss": 2.3173, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.039687703318152245, |
| "grad_norm": 0.2491024285554886, |
| "learning_rate": 0.0001, |
| "loss": 2.0575, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.04033832140533507, |
| "grad_norm": 0.3025687634944916, |
| "learning_rate": 0.0001, |
| "loss": 2.0965, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04098893949251789, |
| "grad_norm": 0.26097819209098816, |
| "learning_rate": 0.0001, |
| "loss": 2.2583, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.04163955757970072, |
| "grad_norm": 0.2413238286972046, |
| "learning_rate": 0.0001, |
| "loss": 2.2441, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04229017566688354, |
| "grad_norm": 0.2332315295934677, |
| "learning_rate": 0.0001, |
| "loss": 2.185, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04294079375406636, |
| "grad_norm": 0.4037252366542816, |
| "learning_rate": 0.0001, |
| "loss": 2.3875, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.043591411841249185, |
| "grad_norm": 0.34149354696273804, |
| "learning_rate": 0.0001, |
| "loss": 2.3835, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04424202992843201, |
| "grad_norm": 0.23793481290340424, |
| "learning_rate": 0.0001, |
| "loss": 2.3521, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.044892648015614836, |
| "grad_norm": 0.24252744019031525, |
| "learning_rate": 0.0001, |
| "loss": 2.0984, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.04554326610279766, |
| "grad_norm": 0.2870447635650635, |
| "learning_rate": 0.0001, |
| "loss": 2.5408, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04619388418998048, |
| "grad_norm": 0.5050077438354492, |
| "learning_rate": 0.0001, |
| "loss": 2.7091, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0468445022771633, |
| "grad_norm": 0.2391565591096878, |
| "learning_rate": 0.0001, |
| "loss": 2.1601, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04749512036434613, |
| "grad_norm": 0.20647507905960083, |
| "learning_rate": 0.0001, |
| "loss": 1.9582, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.048145738451528954, |
| "grad_norm": 0.26072338223457336, |
| "learning_rate": 0.0001, |
| "loss": 2.3577, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.048796356538711776, |
| "grad_norm": 0.28378504514694214, |
| "learning_rate": 0.0001, |
| "loss": 2.349, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0494469746258946, |
| "grad_norm": 0.2536943256855011, |
| "learning_rate": 0.0001, |
| "loss": 2.375, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.05009759271307743, |
| "grad_norm": 0.29276445508003235, |
| "learning_rate": 0.0001, |
| "loss": 2.5003, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.05074821080026025, |
| "grad_norm": 0.2649310231208801, |
| "learning_rate": 0.0001, |
| "loss": 2.3247, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05139882888744307, |
| "grad_norm": 0.38125383853912354, |
| "learning_rate": 0.0001, |
| "loss": 2.5405, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05204944697462589, |
| "grad_norm": 0.40980008244514465, |
| "learning_rate": 0.0001, |
| "loss": 2.212, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.052700065061808715, |
| "grad_norm": 0.5363492965698242, |
| "learning_rate": 0.0001, |
| "loss": 2.6499, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.053350683148991544, |
| "grad_norm": 0.34647300839424133, |
| "learning_rate": 0.0001, |
| "loss": 2.6302, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.054001301236174366, |
| "grad_norm": 0.27607980370521545, |
| "learning_rate": 0.0001, |
| "loss": 2.1819, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05465191932335719, |
| "grad_norm": 0.27654680609703064, |
| "learning_rate": 0.0001, |
| "loss": 2.1763, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05530253741054001, |
| "grad_norm": 0.24596217274665833, |
| "learning_rate": 0.0001, |
| "loss": 2.2585, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.05595315549772284, |
| "grad_norm": 0.24279890954494476, |
| "learning_rate": 0.0001, |
| "loss": 2.4247, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05660377358490566, |
| "grad_norm": 0.2918747365474701, |
| "learning_rate": 0.0001, |
| "loss": 2.3986, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.057254391672088484, |
| "grad_norm": 0.26778745651245117, |
| "learning_rate": 0.0001, |
| "loss": 2.3592, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.057905009759271306, |
| "grad_norm": 0.39637815952301025, |
| "learning_rate": 0.0001, |
| "loss": 2.8006, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.05855562784645413, |
| "grad_norm": 0.2676962614059448, |
| "learning_rate": 0.0001, |
| "loss": 2.2384, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05920624593363696, |
| "grad_norm": 0.3044937252998352, |
| "learning_rate": 0.0001, |
| "loss": 2.7762, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.05985686402081978, |
| "grad_norm": 0.23922136425971985, |
| "learning_rate": 0.0001, |
| "loss": 2.0873, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0605074821080026, |
| "grad_norm": 0.25385046005249023, |
| "learning_rate": 0.0001, |
| "loss": 2.2708, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06115810019518542, |
| "grad_norm": 0.378401517868042, |
| "learning_rate": 0.0001, |
| "loss": 3.0583, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06180871828236825, |
| "grad_norm": 0.37193092703819275, |
| "learning_rate": 0.0001, |
| "loss": 2.3632, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.062459336369551074, |
| "grad_norm": 0.3757643699645996, |
| "learning_rate": 0.0001, |
| "loss": 2.4071, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0631099544567339, |
| "grad_norm": 0.272833913564682, |
| "learning_rate": 0.0001, |
| "loss": 2.3989, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06376057254391672, |
| "grad_norm": 0.26533326506614685, |
| "learning_rate": 0.0001, |
| "loss": 2.1716, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06441119063109954, |
| "grad_norm": 0.5787199139595032, |
| "learning_rate": 0.0001, |
| "loss": 2.9445, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.06506180871828236, |
| "grad_norm": 0.29046157002449036, |
| "learning_rate": 0.0001, |
| "loss": 2.3325, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06571242680546518, |
| "grad_norm": 0.531452476978302, |
| "learning_rate": 0.0001, |
| "loss": 2.7445, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06636304489264802, |
| "grad_norm": 0.3969165086746216, |
| "learning_rate": 0.0001, |
| "loss": 2.7126, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06701366297983084, |
| "grad_norm": 0.24183356761932373, |
| "learning_rate": 0.0001, |
| "loss": 1.9971, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06766428106701367, |
| "grad_norm": 0.3268399238586426, |
| "learning_rate": 0.0001, |
| "loss": 2.1055, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06831489915419649, |
| "grad_norm": 0.2625877559185028, |
| "learning_rate": 0.0001, |
| "loss": 1.9946, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06896551724137931, |
| "grad_norm": 0.2720443308353424, |
| "learning_rate": 0.0001, |
| "loss": 2.0764, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06961613532856213, |
| "grad_norm": 0.20969334244728088, |
| "learning_rate": 0.0001, |
| "loss": 1.8687, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.07026675341574495, |
| "grad_norm": 0.26211223006248474, |
| "learning_rate": 0.0001, |
| "loss": 2.2042, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07091737150292778, |
| "grad_norm": 0.27889683842658997, |
| "learning_rate": 0.0001, |
| "loss": 2.3146, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.07156798959011061, |
| "grad_norm": 0.2657179832458496, |
| "learning_rate": 0.0001, |
| "loss": 2.1021, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07221860767729343, |
| "grad_norm": 0.26620885729789734, |
| "learning_rate": 0.0001, |
| "loss": 2.3488, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.07286922576447626, |
| "grad_norm": 0.4223373830318451, |
| "learning_rate": 0.0001, |
| "loss": 2.5289, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07351984385165908, |
| "grad_norm": 0.35398781299591064, |
| "learning_rate": 0.0001, |
| "loss": 2.5702, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0741704619388419, |
| "grad_norm": 0.23328129947185516, |
| "learning_rate": 0.0001, |
| "loss": 2.1292, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07482108002602472, |
| "grad_norm": 0.33508536219596863, |
| "learning_rate": 0.0001, |
| "loss": 2.2049, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07547169811320754, |
| "grad_norm": 0.2646953761577606, |
| "learning_rate": 0.0001, |
| "loss": 2.3445, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07612231620039037, |
| "grad_norm": 0.27866706252098083, |
| "learning_rate": 0.0001, |
| "loss": 2.2472, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07677293428757319, |
| "grad_norm": 0.35688602924346924, |
| "learning_rate": 0.0001, |
| "loss": 2.5045, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07742355237475602, |
| "grad_norm": 0.24262933433055878, |
| "learning_rate": 0.0001, |
| "loss": 2.4565, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.07807417046193885, |
| "grad_norm": 0.44757333397865295, |
| "learning_rate": 0.0001, |
| "loss": 2.1619, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07872478854912167, |
| "grad_norm": 0.3279111385345459, |
| "learning_rate": 0.0001, |
| "loss": 2.3996, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07937540663630449, |
| "grad_norm": 0.25862693786621094, |
| "learning_rate": 0.0001, |
| "loss": 2.3214, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.08002602472348731, |
| "grad_norm": 0.30093592405319214, |
| "learning_rate": 0.0001, |
| "loss": 2.6446, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.08067664281067013, |
| "grad_norm": 0.25440871715545654, |
| "learning_rate": 0.0001, |
| "loss": 2.1181, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.08132726089785296, |
| "grad_norm": 0.19935627281665802, |
| "learning_rate": 0.0001, |
| "loss": 2.0904, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08197787898503578, |
| "grad_norm": 0.27385473251342773, |
| "learning_rate": 0.0001, |
| "loss": 2.0829, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0826284970722186, |
| "grad_norm": 0.24417711794376373, |
| "learning_rate": 0.0001, |
| "loss": 2.0019, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.08327911515940144, |
| "grad_norm": 0.27386653423309326, |
| "learning_rate": 0.0001, |
| "loss": 2.2743, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08392973324658426, |
| "grad_norm": 0.22413575649261475, |
| "learning_rate": 0.0001, |
| "loss": 2.1584, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.08458035133376708, |
| "grad_norm": 0.27748343348503113, |
| "learning_rate": 0.0001, |
| "loss": 2.1428, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0852309694209499, |
| "grad_norm": 0.18890976905822754, |
| "learning_rate": 0.0001, |
| "loss": 1.9474, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08588158750813273, |
| "grad_norm": 0.3067719340324402, |
| "learning_rate": 0.0001, |
| "loss": 2.287, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08653220559531555, |
| "grad_norm": 0.35126858949661255, |
| "learning_rate": 0.0001, |
| "loss": 2.5086, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08718282368249837, |
| "grad_norm": 0.19619591534137726, |
| "learning_rate": 0.0001, |
| "loss": 2.0132, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08783344176968119, |
| "grad_norm": 0.360569566488266, |
| "learning_rate": 0.0001, |
| "loss": 2.607, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08848405985686401, |
| "grad_norm": 0.22566738724708557, |
| "learning_rate": 0.0001, |
| "loss": 2.0942, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08913467794404685, |
| "grad_norm": 0.27346086502075195, |
| "learning_rate": 0.0001, |
| "loss": 2.3139, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.08978529603122967, |
| "grad_norm": 0.2500152289867401, |
| "learning_rate": 0.0001, |
| "loss": 2.0815, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0904359141184125, |
| "grad_norm": 0.22101153433322906, |
| "learning_rate": 0.0001, |
| "loss": 2.374, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.09108653220559532, |
| "grad_norm": 0.2173723727464676, |
| "learning_rate": 0.0001, |
| "loss": 2.0084, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09173715029277814, |
| "grad_norm": 0.28956499695777893, |
| "learning_rate": 0.0001, |
| "loss": 2.6283, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.09238776837996096, |
| "grad_norm": 0.27032795548439026, |
| "learning_rate": 0.0001, |
| "loss": 2.142, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.09303838646714378, |
| "grad_norm": 0.24320480227470398, |
| "learning_rate": 0.0001, |
| "loss": 2.1402, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.0936890045543266, |
| "grad_norm": 0.3127799332141876, |
| "learning_rate": 0.0001, |
| "loss": 2.6671, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.09433962264150944, |
| "grad_norm": 0.30706024169921875, |
| "learning_rate": 0.0001, |
| "loss": 2.3026, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.09499024072869226, |
| "grad_norm": 0.2378646731376648, |
| "learning_rate": 0.0001, |
| "loss": 2.0422, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.09564085881587508, |
| "grad_norm": 0.24755406379699707, |
| "learning_rate": 0.0001, |
| "loss": 2.2574, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.09629147690305791, |
| "grad_norm": 0.34464696049690247, |
| "learning_rate": 0.0001, |
| "loss": 2.2817, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.09694209499024073, |
| "grad_norm": 0.30485469102859497, |
| "learning_rate": 0.0001, |
| "loss": 2.7303, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.09759271307742355, |
| "grad_norm": 0.1860698163509369, |
| "learning_rate": 0.0001, |
| "loss": 1.8582, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09824333116460637, |
| "grad_norm": 0.23853841423988342, |
| "learning_rate": 0.0001, |
| "loss": 2.1378, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0988939492517892, |
| "grad_norm": 0.20248261094093323, |
| "learning_rate": 0.0001, |
| "loss": 2.1888, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09954456733897202, |
| "grad_norm": 0.3582792282104492, |
| "learning_rate": 0.0001, |
| "loss": 2.6726, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.10019518542615485, |
| "grad_norm": 0.2576686441898346, |
| "learning_rate": 0.0001, |
| "loss": 2.4494, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.10084580351333768, |
| "grad_norm": 0.306029349565506, |
| "learning_rate": 0.0001, |
| "loss": 2.2273, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.1014964216005205, |
| "grad_norm": 0.31375500559806824, |
| "learning_rate": 0.0001, |
| "loss": 2.2474, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.10214703968770332, |
| "grad_norm": 0.253250390291214, |
| "learning_rate": 0.0001, |
| "loss": 2.0142, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.10279765777488614, |
| "grad_norm": 0.3098273277282715, |
| "learning_rate": 0.0001, |
| "loss": 2.2516, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.10344827586206896, |
| "grad_norm": 0.3239591717720032, |
| "learning_rate": 0.0001, |
| "loss": 2.2432, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.10409889394925179, |
| "grad_norm": 0.24929773807525635, |
| "learning_rate": 0.0001, |
| "loss": 2.2495, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10474951203643461, |
| "grad_norm": 0.3203783929347992, |
| "learning_rate": 0.0001, |
| "loss": 2.68, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.10540013012361743, |
| "grad_norm": 0.38844674825668335, |
| "learning_rate": 0.0001, |
| "loss": 2.7457, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.10605074821080027, |
| "grad_norm": 0.21753644943237305, |
| "learning_rate": 0.0001, |
| "loss": 2.1284, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.10670136629798309, |
| "grad_norm": 0.20610418915748596, |
| "learning_rate": 0.0001, |
| "loss": 1.8377, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.10735198438516591, |
| "grad_norm": 0.3555772304534912, |
| "learning_rate": 0.0001, |
| "loss": 2.3599, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.10800260247234873, |
| "grad_norm": 0.3971005380153656, |
| "learning_rate": 0.0001, |
| "loss": 2.2771, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10865322055953155, |
| "grad_norm": 0.28628769516944885, |
| "learning_rate": 0.0001, |
| "loss": 2.2438, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.10930383864671438, |
| "grad_norm": 0.38728833198547363, |
| "learning_rate": 0.0001, |
| "loss": 2.4103, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1099544567338972, |
| "grad_norm": 0.26340189576148987, |
| "learning_rate": 0.0001, |
| "loss": 2.6832, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.11060507482108002, |
| "grad_norm": 0.20119386911392212, |
| "learning_rate": 0.0001, |
| "loss": 1.9622, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11125569290826284, |
| "grad_norm": 0.2929171621799469, |
| "learning_rate": 0.0001, |
| "loss": 2.2762, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.11190631099544568, |
| "grad_norm": 0.422146201133728, |
| "learning_rate": 0.0001, |
| "loss": 2.4015, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1125569290826285, |
| "grad_norm": 0.29050537943840027, |
| "learning_rate": 0.0001, |
| "loss": 2.4399, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.11320754716981132, |
| "grad_norm": 0.2646816074848175, |
| "learning_rate": 0.0001, |
| "loss": 2.3058, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.11385816525699415, |
| "grad_norm": 0.2643061578273773, |
| "learning_rate": 0.0001, |
| "loss": 2.1892, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.11450878334417697, |
| "grad_norm": 0.5878323316574097, |
| "learning_rate": 0.0001, |
| "loss": 3.2198, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.11515940143135979, |
| "grad_norm": 0.36881884932518005, |
| "learning_rate": 0.0001, |
| "loss": 2.4112, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.11581001951854261, |
| "grad_norm": 0.25198304653167725, |
| "learning_rate": 0.0001, |
| "loss": 2.1667, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.11646063760572543, |
| "grad_norm": 0.34164664149284363, |
| "learning_rate": 0.0001, |
| "loss": 2.6248, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.11711125569290826, |
| "grad_norm": 0.41471973061561584, |
| "learning_rate": 0.0001, |
| "loss": 2.5616, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11776187378009109, |
| "grad_norm": 0.26372480392456055, |
| "learning_rate": 0.0001, |
| "loss": 2.2904, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.11841249186727391, |
| "grad_norm": 0.2271176278591156, |
| "learning_rate": 0.0001, |
| "loss": 2.0312, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11906310995445674, |
| "grad_norm": 0.2106996774673462, |
| "learning_rate": 0.0001, |
| "loss": 1.9661, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11971372804163956, |
| "grad_norm": 0.22870291769504547, |
| "learning_rate": 0.0001, |
| "loss": 1.9052, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.12036434612882238, |
| "grad_norm": 0.41253864765167236, |
| "learning_rate": 0.0001, |
| "loss": 2.3747, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1210149642160052, |
| "grad_norm": 0.3258817791938782, |
| "learning_rate": 0.0001, |
| "loss": 2.5401, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.12166558230318802, |
| "grad_norm": 0.3461870551109314, |
| "learning_rate": 0.0001, |
| "loss": 2.8027, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.12231620039037085, |
| "grad_norm": 0.3704046607017517, |
| "learning_rate": 0.0001, |
| "loss": 2.799, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.12296681847755368, |
| "grad_norm": 0.30265969038009644, |
| "learning_rate": 0.0001, |
| "loss": 2.4287, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.1236174365647365, |
| "grad_norm": 0.4215582013130188, |
| "learning_rate": 0.0001, |
| "loss": 2.6857, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12426805465191933, |
| "grad_norm": 0.3003520965576172, |
| "learning_rate": 0.0001, |
| "loss": 2.4155, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.12491867273910215, |
| "grad_norm": 0.412749320268631, |
| "learning_rate": 0.0001, |
| "loss": 2.6352, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.12556929082628496, |
| "grad_norm": 0.2772350013256073, |
| "learning_rate": 0.0001, |
| "loss": 2.2452, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.1262199089134678, |
| "grad_norm": 0.21457143127918243, |
| "learning_rate": 0.0001, |
| "loss": 2.0172, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.12687052700065063, |
| "grad_norm": 0.40995845198631287, |
| "learning_rate": 0.0001, |
| "loss": 2.6218, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.12752114508783344, |
| "grad_norm": 0.2253209501504898, |
| "learning_rate": 0.0001, |
| "loss": 2.2319, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.12817176317501627, |
| "grad_norm": 0.36564287543296814, |
| "learning_rate": 0.0001, |
| "loss": 2.4585, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.12882238126219908, |
| "grad_norm": 0.41084784269332886, |
| "learning_rate": 0.0001, |
| "loss": 2.6326, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.12947299934938192, |
| "grad_norm": 0.36012157797813416, |
| "learning_rate": 0.0001, |
| "loss": 2.0168, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.13012361743656473, |
| "grad_norm": 0.5138425230979919, |
| "learning_rate": 0.0001, |
| "loss": 2.3377, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13077423552374756, |
| "grad_norm": 0.2799031436443329, |
| "learning_rate": 0.0001, |
| "loss": 2.532, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.13142485361093037, |
| "grad_norm": 0.3078779876232147, |
| "learning_rate": 0.0001, |
| "loss": 2.044, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1320754716981132, |
| "grad_norm": 0.31270912289619446, |
| "learning_rate": 0.0001, |
| "loss": 1.8576, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.13272608978529604, |
| "grad_norm": 0.23117204010486603, |
| "learning_rate": 0.0001, |
| "loss": 2.1908, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.13337670787247885, |
| "grad_norm": 0.2531285285949707, |
| "learning_rate": 0.0001, |
| "loss": 2.143, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.1340273259596617, |
| "grad_norm": 0.28053218126296997, |
| "learning_rate": 0.0001, |
| "loss": 2.6902, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1346779440468445, |
| "grad_norm": 0.2600589692592621, |
| "learning_rate": 0.0001, |
| "loss": 2.0355, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.13532856213402733, |
| "grad_norm": 0.2725912630558014, |
| "learning_rate": 0.0001, |
| "loss": 2.3949, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.13597918022121014, |
| "grad_norm": 0.6166338324546814, |
| "learning_rate": 0.0001, |
| "loss": 2.8146, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.13662979830839297, |
| "grad_norm": 0.4028575122356415, |
| "learning_rate": 0.0001, |
| "loss": 2.888, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1372804163955758, |
| "grad_norm": 0.23181548714637756, |
| "learning_rate": 0.0001, |
| "loss": 2.1406, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.13793103448275862, |
| "grad_norm": 0.24338063597679138, |
| "learning_rate": 0.0001, |
| "loss": 2.1564, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.13858165256994145, |
| "grad_norm": 0.233146533370018, |
| "learning_rate": 0.0001, |
| "loss": 2.1695, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.13923227065712426, |
| "grad_norm": 0.21236726641654968, |
| "learning_rate": 0.0001, |
| "loss": 1.9272, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1398828887443071, |
| "grad_norm": 0.25471317768096924, |
| "learning_rate": 0.0001, |
| "loss": 2.3447, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.1405335068314899, |
| "grad_norm": 0.35532835125923157, |
| "learning_rate": 0.0001, |
| "loss": 2.4328, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.14118412491867274, |
| "grad_norm": 0.32900944352149963, |
| "learning_rate": 0.0001, |
| "loss": 2.385, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.14183474300585555, |
| "grad_norm": 0.45404863357543945, |
| "learning_rate": 0.0001, |
| "loss": 2.8053, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1424853610930384, |
| "grad_norm": 0.33968400955200195, |
| "learning_rate": 0.0001, |
| "loss": 2.4524, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.14313597918022122, |
| "grad_norm": 0.3250170946121216, |
| "learning_rate": 0.0001, |
| "loss": 2.6173, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.14378659726740403, |
| "grad_norm": 0.34765559434890747, |
| "learning_rate": 0.0001, |
| "loss": 2.8468, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.14443721535458687, |
| "grad_norm": 0.2274564653635025, |
| "learning_rate": 0.0001, |
| "loss": 2.1305, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.14508783344176968, |
| "grad_norm": 0.42719507217407227, |
| "learning_rate": 0.0001, |
| "loss": 2.3682, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.1457384515289525, |
| "grad_norm": 0.2848481833934784, |
| "learning_rate": 0.0001, |
| "loss": 2.0923, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.14638906961613532, |
| "grad_norm": 0.266548752784729, |
| "learning_rate": 0.0001, |
| "loss": 2.0393, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.14703968770331816, |
| "grad_norm": 0.24076099693775177, |
| "learning_rate": 0.0001, |
| "loss": 2.2674, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.14769030579050096, |
| "grad_norm": 0.23347622156143188, |
| "learning_rate": 0.0001, |
| "loss": 1.9455, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1483409238776838, |
| "grad_norm": 0.3925648033618927, |
| "learning_rate": 0.0001, |
| "loss": 2.7117, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.14899154196486664, |
| "grad_norm": 0.27654924988746643, |
| "learning_rate": 0.0001, |
| "loss": 2.1306, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.14964216005204944, |
| "grad_norm": 0.2853853702545166, |
| "learning_rate": 0.0001, |
| "loss": 2.4369, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.15029277813923228, |
| "grad_norm": 0.4509859085083008, |
| "learning_rate": 0.0001, |
| "loss": 2.6047, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.1509433962264151, |
| "grad_norm": 0.2515909671783447, |
| "learning_rate": 0.0001, |
| "loss": 2.2065, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.15159401431359792, |
| "grad_norm": 0.5977367162704468, |
| "learning_rate": 0.0001, |
| "loss": 2.7133, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.15224463240078073, |
| "grad_norm": 0.30381399393081665, |
| "learning_rate": 0.0001, |
| "loss": 2.343, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.15289525048796357, |
| "grad_norm": 0.27204832434654236, |
| "learning_rate": 0.0001, |
| "loss": 2.2908, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.15354586857514638, |
| "grad_norm": 0.6246710419654846, |
| "learning_rate": 0.0001, |
| "loss": 2.7862, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1541964866623292, |
| "grad_norm": 0.4803178012371063, |
| "learning_rate": 0.0001, |
| "loss": 3.4388, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.15484710474951205, |
| "grad_norm": 0.3038940727710724, |
| "learning_rate": 0.0001, |
| "loss": 2.7409, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.15549772283669486, |
| "grad_norm": 0.2494591474533081, |
| "learning_rate": 0.0001, |
| "loss": 2.2601, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1561483409238777, |
| "grad_norm": 0.23808616399765015, |
| "learning_rate": 0.0001, |
| "loss": 2.1319, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1567989590110605, |
| "grad_norm": 0.3111306130886078, |
| "learning_rate": 0.0001, |
| "loss": 2.7414, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.15744957709824334, |
| "grad_norm": 0.22197599709033966, |
| "learning_rate": 0.0001, |
| "loss": 2.1346, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.15810019518542615, |
| "grad_norm": 0.2681500315666199, |
| "learning_rate": 0.0001, |
| "loss": 2.3779, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.15875081327260898, |
| "grad_norm": 0.2612643241882324, |
| "learning_rate": 0.0001, |
| "loss": 2.5743, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1594014313597918, |
| "grad_norm": 0.201397106051445, |
| "learning_rate": 0.0001, |
| "loss": 2.0312, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.16005204944697463, |
| "grad_norm": 0.25662410259246826, |
| "learning_rate": 0.0001, |
| "loss": 2.5085, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.16070266753415746, |
| "grad_norm": 0.21460294723510742, |
| "learning_rate": 0.0001, |
| "loss": 2.1099, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.16135328562134027, |
| "grad_norm": 0.19971312582492828, |
| "learning_rate": 0.0001, |
| "loss": 2.1024, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1620039037085231, |
| "grad_norm": 0.1986059844493866, |
| "learning_rate": 0.0001, |
| "loss": 1.9306, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.16265452179570591, |
| "grad_norm": 0.21961884200572968, |
| "learning_rate": 0.0001, |
| "loss": 2.1218, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16330513988288875, |
| "grad_norm": 0.20071017742156982, |
| "learning_rate": 0.0001, |
| "loss": 2.0581, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.16395575797007156, |
| "grad_norm": 0.32734909653663635, |
| "learning_rate": 0.0001, |
| "loss": 2.6229, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1646063760572544, |
| "grad_norm": 0.21822451055049896, |
| "learning_rate": 0.0001, |
| "loss": 1.9954, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.1652569941444372, |
| "grad_norm": 0.3013177216053009, |
| "learning_rate": 0.0001, |
| "loss": 2.454, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.16590761223162004, |
| "grad_norm": 0.31199347972869873, |
| "learning_rate": 0.0001, |
| "loss": 2.815, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.16655823031880287, |
| "grad_norm": 0.2255464345216751, |
| "learning_rate": 0.0001, |
| "loss": 2.0232, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.16720884840598568, |
| "grad_norm": 0.21208804845809937, |
| "learning_rate": 0.0001, |
| "loss": 1.9663, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.16785946649316852, |
| "grad_norm": 0.2432132512331009, |
| "learning_rate": 0.0001, |
| "loss": 2.4189, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.16851008458035133, |
| "grad_norm": 0.21116623282432556, |
| "learning_rate": 0.0001, |
| "loss": 2.0761, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.16916070266753416, |
| "grad_norm": 0.18722975254058838, |
| "learning_rate": 0.0001, |
| "loss": 1.9537, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16981132075471697, |
| "grad_norm": 0.2683362662792206, |
| "learning_rate": 0.0001, |
| "loss": 2.4483, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1704619388418998, |
| "grad_norm": 0.2739648222923279, |
| "learning_rate": 0.0001, |
| "loss": 2.3754, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.17111255692908262, |
| "grad_norm": 0.1836375594139099, |
| "learning_rate": 0.0001, |
| "loss": 2.0103, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.17176317501626545, |
| "grad_norm": 0.34002602100372314, |
| "learning_rate": 0.0001, |
| "loss": 2.2626, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1724137931034483, |
| "grad_norm": 0.19341516494750977, |
| "learning_rate": 0.0001, |
| "loss": 1.9751, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.1730644111906311, |
| "grad_norm": 0.25080743432044983, |
| "learning_rate": 0.0001, |
| "loss": 2.2162, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.17371502927781393, |
| "grad_norm": 0.2362661212682724, |
| "learning_rate": 0.0001, |
| "loss": 2.0226, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.17436564736499674, |
| "grad_norm": 0.25844064354896545, |
| "learning_rate": 0.0001, |
| "loss": 2.3176, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.17501626545217958, |
| "grad_norm": 0.3904498517513275, |
| "learning_rate": 0.0001, |
| "loss": 2.4871, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.17566688353936238, |
| "grad_norm": 0.22143317759037018, |
| "learning_rate": 0.0001, |
| "loss": 2.2073, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.17631750162654522, |
| "grad_norm": 0.20974211394786835, |
| "learning_rate": 0.0001, |
| "loss": 2.1393, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.17696811971372803, |
| "grad_norm": 0.24463056027889252, |
| "learning_rate": 0.0001, |
| "loss": 2.0203, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.17761873780091086, |
| "grad_norm": 0.23296399414539337, |
| "learning_rate": 0.0001, |
| "loss": 2.1096, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.1782693558880937, |
| "grad_norm": 0.4122619926929474, |
| "learning_rate": 0.0001, |
| "loss": 3.1512, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1789199739752765, |
| "grad_norm": 0.2744470536708832, |
| "learning_rate": 0.0001, |
| "loss": 2.2211, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.17957059206245934, |
| "grad_norm": 0.21010619401931763, |
| "learning_rate": 0.0001, |
| "loss": 2.2203, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.18022121014964215, |
| "grad_norm": 0.27855056524276733, |
| "learning_rate": 0.0001, |
| "loss": 2.2903, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.180871828236825, |
| "grad_norm": 0.2909989058971405, |
| "learning_rate": 0.0001, |
| "loss": 2.237, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1815224463240078, |
| "grad_norm": 0.21754448115825653, |
| "learning_rate": 0.0001, |
| "loss": 2.0138, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.18217306441119063, |
| "grad_norm": 0.35209745168685913, |
| "learning_rate": 0.0001, |
| "loss": 2.652, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18282368249837344, |
| "grad_norm": 0.29994750022888184, |
| "learning_rate": 0.0001, |
| "loss": 2.1868, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.18347430058555628, |
| "grad_norm": 0.2645902633666992, |
| "learning_rate": 0.0001, |
| "loss": 2.2925, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1841249186727391, |
| "grad_norm": 0.3492202162742615, |
| "learning_rate": 0.0001, |
| "loss": 2.4176, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.18477553675992192, |
| "grad_norm": 0.256651371717453, |
| "learning_rate": 0.0001, |
| "loss": 2.3414, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.18542615484710476, |
| "grad_norm": 0.23287786543369293, |
| "learning_rate": 0.0001, |
| "loss": 2.5488, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.18607677293428757, |
| "grad_norm": 0.26059290766716003, |
| "learning_rate": 0.0001, |
| "loss": 2.4551, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1867273910214704, |
| "grad_norm": 0.2482365071773529, |
| "learning_rate": 0.0001, |
| "loss": 2.0818, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1873780091086532, |
| "grad_norm": 0.23024773597717285, |
| "learning_rate": 0.0001, |
| "loss": 2.2592, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.18802862719583605, |
| "grad_norm": 0.2590011656284332, |
| "learning_rate": 0.0001, |
| "loss": 2.4177, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.18867924528301888, |
| "grad_norm": 0.19760870933532715, |
| "learning_rate": 0.0001, |
| "loss": 2.0731, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1893298633702017, |
| "grad_norm": 0.20266428589820862, |
| "learning_rate": 0.0001, |
| "loss": 2.1221, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.18998048145738453, |
| "grad_norm": 0.20199884474277496, |
| "learning_rate": 0.0001, |
| "loss": 2.0489, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.19063109954456733, |
| "grad_norm": 0.23876360058784485, |
| "learning_rate": 0.0001, |
| "loss": 2.1392, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.19128171763175017, |
| "grad_norm": 0.23555997014045715, |
| "learning_rate": 0.0001, |
| "loss": 2.4116, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.19193233571893298, |
| "grad_norm": 0.5010725259780884, |
| "learning_rate": 0.0001, |
| "loss": 2.7444, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.19258295380611581, |
| "grad_norm": 0.37809622287750244, |
| "learning_rate": 0.0001, |
| "loss": 2.2635, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.19323357189329862, |
| "grad_norm": 0.499888151884079, |
| "learning_rate": 0.0001, |
| "loss": 2.1984, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.19388418998048146, |
| "grad_norm": 0.43810585141181946, |
| "learning_rate": 0.0001, |
| "loss": 3.084, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1945348080676643, |
| "grad_norm": 0.35633769631385803, |
| "learning_rate": 0.0001, |
| "loss": 2.0351, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1951854261548471, |
| "grad_norm": 0.3693079650402069, |
| "learning_rate": 0.0001, |
| "loss": 1.9525, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.19583604424202994, |
| "grad_norm": 0.36550503969192505, |
| "learning_rate": 0.0001, |
| "loss": 2.2469, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.19648666232921275, |
| "grad_norm": 0.2579827308654785, |
| "learning_rate": 0.0001, |
| "loss": 2.3585, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.19713728041639558, |
| "grad_norm": 0.2603841722011566, |
| "learning_rate": 0.0001, |
| "loss": 2.3959, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.1977878985035784, |
| "grad_norm": 0.33103683590888977, |
| "learning_rate": 0.0001, |
| "loss": 2.2197, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.19843851659076123, |
| "grad_norm": 0.2977697551250458, |
| "learning_rate": 0.0001, |
| "loss": 2.2569, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.19908913467794404, |
| "grad_norm": 0.2085130512714386, |
| "learning_rate": 0.0001, |
| "loss": 2.2284, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.19973975276512687, |
| "grad_norm": 0.409212201833725, |
| "learning_rate": 0.0001, |
| "loss": 2.7014, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.2003903708523097, |
| "grad_norm": 0.2447553277015686, |
| "learning_rate": 0.0001, |
| "loss": 2.2826, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.20104098893949252, |
| "grad_norm": 0.21881726384162903, |
| "learning_rate": 0.0001, |
| "loss": 1.8573, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.20169160702667535, |
| "grad_norm": 0.24484936892986298, |
| "learning_rate": 0.0001, |
| "loss": 2.318, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.20234222511385816, |
| "grad_norm": 0.3251173198223114, |
| "learning_rate": 0.0001, |
| "loss": 2.3346, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.202992843201041, |
| "grad_norm": 0.22313712537288666, |
| "learning_rate": 0.0001, |
| "loss": 1.9119, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.2036434612882238, |
| "grad_norm": 0.3086949288845062, |
| "learning_rate": 0.0001, |
| "loss": 2.1809, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.20429407937540664, |
| "grad_norm": 0.28272122144699097, |
| "learning_rate": 0.0001, |
| "loss": 2.3335, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.20494469746258945, |
| "grad_norm": 0.208637535572052, |
| "learning_rate": 0.0001, |
| "loss": 2.1947, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.20559531554977228, |
| "grad_norm": 0.2913041114807129, |
| "learning_rate": 0.0001, |
| "loss": 2.3009, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.20624593363695512, |
| "grad_norm": 0.2813785970211029, |
| "learning_rate": 0.0001, |
| "loss": 2.0133, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.20689655172413793, |
| "grad_norm": 0.2324337363243103, |
| "learning_rate": 0.0001, |
| "loss": 2.0827, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.20754716981132076, |
| "grad_norm": 0.25195491313934326, |
| "learning_rate": 0.0001, |
| "loss": 2.5201, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.20819778789850357, |
| "grad_norm": 0.3435034453868866, |
| "learning_rate": 0.0001, |
| "loss": 2.321, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2088484059856864, |
| "grad_norm": 0.2735581696033478, |
| "learning_rate": 0.0001, |
| "loss": 2.2218, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.20949902407286922, |
| "grad_norm": 0.2250661551952362, |
| "learning_rate": 0.0001, |
| "loss": 1.9416, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.21014964216005205, |
| "grad_norm": 0.3160262107849121, |
| "learning_rate": 0.0001, |
| "loss": 2.5494, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.21080026024723486, |
| "grad_norm": 0.3669279217720032, |
| "learning_rate": 0.0001, |
| "loss": 2.7751, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2114508783344177, |
| "grad_norm": 0.2052752673625946, |
| "learning_rate": 0.0001, |
| "loss": 2.0139, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.21210149642160053, |
| "grad_norm": 0.2906612455844879, |
| "learning_rate": 0.0001, |
| "loss": 2.227, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.21275211450878334, |
| "grad_norm": 0.30327048897743225, |
| "learning_rate": 0.0001, |
| "loss": 2.2905, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.21340273259596618, |
| "grad_norm": 0.33950623869895935, |
| "learning_rate": 0.0001, |
| "loss": 3.0731, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.21405335068314899, |
| "grad_norm": 0.31319788098335266, |
| "learning_rate": 0.0001, |
| "loss": 2.1374, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.21470396877033182, |
| "grad_norm": 0.21442054212093353, |
| "learning_rate": 0.0001, |
| "loss": 1.7588, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.21535458685751463, |
| "grad_norm": 0.23125174641609192, |
| "learning_rate": 0.0001, |
| "loss": 1.9295, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.21600520494469747, |
| "grad_norm": 0.23220308125019073, |
| "learning_rate": 0.0001, |
| "loss": 2.2606, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.21665582303188027, |
| "grad_norm": 0.24599219858646393, |
| "learning_rate": 0.0001, |
| "loss": 2.2687, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2173064411190631, |
| "grad_norm": 0.22226236760616302, |
| "learning_rate": 0.0001, |
| "loss": 2.1428, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.21795705920624595, |
| "grad_norm": 0.2653510570526123, |
| "learning_rate": 0.0001, |
| "loss": 2.4381, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.21860767729342875, |
| "grad_norm": 0.23770929872989655, |
| "learning_rate": 0.0001, |
| "loss": 1.9655, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.2192582953806116, |
| "grad_norm": 0.1932332068681717, |
| "learning_rate": 0.0001, |
| "loss": 1.9465, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.2199089134677944, |
| "grad_norm": 0.181661456823349, |
| "learning_rate": 0.0001, |
| "loss": 1.9912, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.22055953155497723, |
| "grad_norm": 0.22275297343730927, |
| "learning_rate": 0.0001, |
| "loss": 2.1964, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.22121014964216004, |
| "grad_norm": 0.22086840867996216, |
| "learning_rate": 0.0001, |
| "loss": 2.2216, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.22186076772934288, |
| "grad_norm": 0.22807130217552185, |
| "learning_rate": 0.0001, |
| "loss": 2.2434, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.2225113858165257, |
| "grad_norm": 0.26616647839546204, |
| "learning_rate": 0.0001, |
| "loss": 2.442, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.22316200390370852, |
| "grad_norm": 0.2841719388961792, |
| "learning_rate": 0.0001, |
| "loss": 2.2358, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.22381262199089136, |
| "grad_norm": 0.23251943290233612, |
| "learning_rate": 0.0001, |
| "loss": 2.3436, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.22446324007807417, |
| "grad_norm": 0.20406994223594666, |
| "learning_rate": 0.0001, |
| "loss": 2.101, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.225113858165257, |
| "grad_norm": 0.18677304685115814, |
| "learning_rate": 0.0001, |
| "loss": 2.0596, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2257644762524398, |
| "grad_norm": 0.22367873787879944, |
| "learning_rate": 0.0001, |
| "loss": 2.2051, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.22641509433962265, |
| "grad_norm": 0.2521246671676636, |
| "learning_rate": 0.0001, |
| "loss": 2.1718, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.22706571242680545, |
| "grad_norm": 0.23043319582939148, |
| "learning_rate": 0.0001, |
| "loss": 2.2818, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2277163305139883, |
| "grad_norm": 0.22021251916885376, |
| "learning_rate": 0.0001, |
| "loss": 2.0337, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2283669486011711, |
| "grad_norm": 0.18043603003025055, |
| "learning_rate": 0.0001, |
| "loss": 1.9434, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.22901756668835394, |
| "grad_norm": 0.4757142961025238, |
| "learning_rate": 0.0001, |
| "loss": 2.2467, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.22966818477553677, |
| "grad_norm": 0.30740290880203247, |
| "learning_rate": 0.0001, |
| "loss": 2.5296, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.23031880286271958, |
| "grad_norm": 0.23037666082382202, |
| "learning_rate": 0.0001, |
| "loss": 2.311, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.23096942094990242, |
| "grad_norm": 0.22314564883708954, |
| "learning_rate": 0.0001, |
| "loss": 2.0494, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.23162003903708522, |
| "grad_norm": 0.21417242288589478, |
| "learning_rate": 0.0001, |
| "loss": 2.2459, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.23227065712426806, |
| "grad_norm": 0.2895831763744354, |
| "learning_rate": 0.0001, |
| "loss": 2.2705, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.23292127521145087, |
| "grad_norm": 0.2110838145017624, |
| "learning_rate": 0.0001, |
| "loss": 2.1175, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.2335718932986337, |
| "grad_norm": 0.3999682664871216, |
| "learning_rate": 0.0001, |
| "loss": 2.6891, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2342225113858165, |
| "grad_norm": 0.5169201493263245, |
| "learning_rate": 0.0001, |
| "loss": 2.5764, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23487312947299935, |
| "grad_norm": 0.24382548034191132, |
| "learning_rate": 0.0001, |
| "loss": 2.1065, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.23552374756018218, |
| "grad_norm": 0.2830081582069397, |
| "learning_rate": 0.0001, |
| "loss": 2.1186, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.236174365647365, |
| "grad_norm": 0.23680554330348969, |
| "learning_rate": 0.0001, |
| "loss": 2.118, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.23682498373454783, |
| "grad_norm": 0.3790690302848816, |
| "learning_rate": 0.0001, |
| "loss": 2.3566, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.23747560182173064, |
| "grad_norm": 0.2664685845375061, |
| "learning_rate": 0.0001, |
| "loss": 2.2118, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.23812621990891347, |
| "grad_norm": 0.22439126670360565, |
| "learning_rate": 0.0001, |
| "loss": 2.0897, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.23877683799609628, |
| "grad_norm": 0.2559892237186432, |
| "learning_rate": 0.0001, |
| "loss": 2.2559, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.23942745608327912, |
| "grad_norm": 0.43989577889442444, |
| "learning_rate": 0.0001, |
| "loss": 2.5208, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.24007807417046195, |
| "grad_norm": 0.24543894827365875, |
| "learning_rate": 0.0001, |
| "loss": 2.1692, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.24072869225764476, |
| "grad_norm": 0.37020954489707947, |
| "learning_rate": 0.0001, |
| "loss": 2.1287, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2413793103448276, |
| "grad_norm": 0.41815564036369324, |
| "learning_rate": 0.0001, |
| "loss": 2.5952, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2420299284320104, |
| "grad_norm": 0.22579136490821838, |
| "learning_rate": 0.0001, |
| "loss": 2.2427, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.24268054651919324, |
| "grad_norm": 0.3004798889160156, |
| "learning_rate": 0.0001, |
| "loss": 2.2767, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.24333116460637605, |
| "grad_norm": 0.27470141649246216, |
| "learning_rate": 0.0001, |
| "loss": 2.092, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.24398178269355889, |
| "grad_norm": 0.25301867723464966, |
| "learning_rate": 0.0001, |
| "loss": 2.1816, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.2446324007807417, |
| "grad_norm": 0.21194620430469513, |
| "learning_rate": 0.0001, |
| "loss": 2.1322, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.24528301886792453, |
| "grad_norm": 0.28737103939056396, |
| "learning_rate": 0.0001, |
| "loss": 2.6685, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.24593363695510737, |
| "grad_norm": 0.28857922554016113, |
| "learning_rate": 0.0001, |
| "loss": 2.2219, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.24658425504229017, |
| "grad_norm": 0.29493409395217896, |
| "learning_rate": 0.0001, |
| "loss": 2.717, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.247234873129473, |
| "grad_norm": 0.33975929021835327, |
| "learning_rate": 0.0001, |
| "loss": 2.3499, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.24788549121665582, |
| "grad_norm": 0.21486152708530426, |
| "learning_rate": 0.0001, |
| "loss": 2.306, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.24853610930383865, |
| "grad_norm": 0.2686431109905243, |
| "learning_rate": 0.0001, |
| "loss": 2.0942, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.24918672739102146, |
| "grad_norm": 0.2812007963657379, |
| "learning_rate": 0.0001, |
| "loss": 2.3729, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.2498373454782043, |
| "grad_norm": 0.31875330209732056, |
| "learning_rate": 0.0001, |
| "loss": 2.5766, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2504879635653871, |
| "grad_norm": 0.2624376714229584, |
| "learning_rate": 0.0001, |
| "loss": 2.2057, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2511385816525699, |
| "grad_norm": 0.265286386013031, |
| "learning_rate": 0.0001, |
| "loss": 2.2405, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2517891997397528, |
| "grad_norm": 0.3202246129512787, |
| "learning_rate": 0.0001, |
| "loss": 2.2817, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2524398178269356, |
| "grad_norm": 0.22770161926746368, |
| "learning_rate": 0.0001, |
| "loss": 1.9564, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2530904359141184, |
| "grad_norm": 0.3313138484954834, |
| "learning_rate": 0.0001, |
| "loss": 2.4424, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.25374105400130126, |
| "grad_norm": 0.2961839437484741, |
| "learning_rate": 0.0001, |
| "loss": 2.4122, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.25439167208848407, |
| "grad_norm": 0.24270308017730713, |
| "learning_rate": 0.0001, |
| "loss": 1.99, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2550422901756669, |
| "grad_norm": 0.2306670844554901, |
| "learning_rate": 0.0001, |
| "loss": 2.3529, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.2556929082628497, |
| "grad_norm": 0.28387176990509033, |
| "learning_rate": 0.0001, |
| "loss": 2.0824, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.25634352635003255, |
| "grad_norm": 0.3105824291706085, |
| "learning_rate": 0.0001, |
| "loss": 2.437, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.25699414443721535, |
| "grad_norm": 0.1932361125946045, |
| "learning_rate": 0.0001, |
| "loss": 1.9747, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.25764476252439816, |
| "grad_norm": 0.31146278977394104, |
| "learning_rate": 0.0001, |
| "loss": 2.263, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.258295380611581, |
| "grad_norm": 0.24420365691184998, |
| "learning_rate": 0.0001, |
| "loss": 2.015, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.25894599869876384, |
| "grad_norm": 0.24144989252090454, |
| "learning_rate": 0.0001, |
| "loss": 2.2536, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.25959661678594664, |
| "grad_norm": 0.3478517532348633, |
| "learning_rate": 0.0001, |
| "loss": 2.5835, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.26024723487312945, |
| "grad_norm": 0.24381348490715027, |
| "learning_rate": 0.0001, |
| "loss": 2.2439, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2608978529603123, |
| "grad_norm": 0.2834983468055725, |
| "learning_rate": 0.0001, |
| "loss": 2.3991, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.2615484710474951, |
| "grad_norm": 0.28689858317375183, |
| "learning_rate": 0.0001, |
| "loss": 1.9156, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.26219908913467793, |
| "grad_norm": 0.23692357540130615, |
| "learning_rate": 0.0001, |
| "loss": 2.0189, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.26284970722186074, |
| "grad_norm": 0.30104926228523254, |
| "learning_rate": 0.0001, |
| "loss": 2.4945, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2635003253090436, |
| "grad_norm": 0.23472270369529724, |
| "learning_rate": 0.0001, |
| "loss": 1.8892, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2641509433962264, |
| "grad_norm": 0.31508034467697144, |
| "learning_rate": 0.0001, |
| "loss": 2.4935, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2648015614834092, |
| "grad_norm": 0.25103551149368286, |
| "learning_rate": 0.0001, |
| "loss": 2.4428, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2654521795705921, |
| "grad_norm": 0.2387259602546692, |
| "learning_rate": 0.0001, |
| "loss": 2.0989, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2661027976577749, |
| "grad_norm": 0.2606028616428375, |
| "learning_rate": 0.0001, |
| "loss": 1.9494, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.2667534157449577, |
| "grad_norm": 0.25114724040031433, |
| "learning_rate": 0.0001, |
| "loss": 2.2432, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2674040338321405, |
| "grad_norm": 0.3072582483291626, |
| "learning_rate": 0.0001, |
| "loss": 2.3506, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.2680546519193234, |
| "grad_norm": 0.23917561769485474, |
| "learning_rate": 0.0001, |
| "loss": 2.2665, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2687052700065062, |
| "grad_norm": 0.2120814174413681, |
| "learning_rate": 0.0001, |
| "loss": 1.9625, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.269355888093689, |
| "grad_norm": 0.22003813087940216, |
| "learning_rate": 0.0001, |
| "loss": 2.1179, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.27000650618087185, |
| "grad_norm": 0.33217060565948486, |
| "learning_rate": 0.0001, |
| "loss": 2.6353, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.27065712426805466, |
| "grad_norm": 0.2260630577802658, |
| "learning_rate": 0.0001, |
| "loss": 2.0355, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.27130774235523747, |
| "grad_norm": 0.30081093311309814, |
| "learning_rate": 0.0001, |
| "loss": 2.1825, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2719583604424203, |
| "grad_norm": 0.27275893092155457, |
| "learning_rate": 0.0001, |
| "loss": 2.6183, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.27260897852960314, |
| "grad_norm": 0.4902358651161194, |
| "learning_rate": 0.0001, |
| "loss": 3.0888, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.27325959661678595, |
| "grad_norm": 0.21213112771511078, |
| "learning_rate": 0.0001, |
| "loss": 2.1172, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.27391021470396876, |
| "grad_norm": 0.35953450202941895, |
| "learning_rate": 0.0001, |
| "loss": 2.5109, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2745608327911516, |
| "grad_norm": 0.2081584334373474, |
| "learning_rate": 0.0001, |
| "loss": 2.0894, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.27521145087833443, |
| "grad_norm": 0.20892906188964844, |
| "learning_rate": 0.0001, |
| "loss": 1.9643, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 0.30058735609054565, |
| "learning_rate": 0.0001, |
| "loss": 2.6503, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.27651268705270005, |
| "grad_norm": 0.32902124524116516, |
| "learning_rate": 0.0001, |
| "loss": 2.3271, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2771633051398829, |
| "grad_norm": 0.2003614902496338, |
| "learning_rate": 0.0001, |
| "loss": 1.9881, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2778139232270657, |
| "grad_norm": 0.33349111676216125, |
| "learning_rate": 0.0001, |
| "loss": 2.7625, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2784645413142485, |
| "grad_norm": 0.25051257014274597, |
| "learning_rate": 0.0001, |
| "loss": 2.0825, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.27911515940143133, |
| "grad_norm": 0.3301559388637543, |
| "learning_rate": 0.0001, |
| "loss": 2.85, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2797657774886142, |
| "grad_norm": 0.18224254250526428, |
| "learning_rate": 0.0001, |
| "loss": 1.9687, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.280416395575797, |
| "grad_norm": 0.21809989213943481, |
| "learning_rate": 0.0001, |
| "loss": 2.2596, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.2810670136629798, |
| "grad_norm": 0.2473779171705246, |
| "learning_rate": 0.0001, |
| "loss": 2.2042, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2817176317501627, |
| "grad_norm": 0.20744885504245758, |
| "learning_rate": 0.0001, |
| "loss": 2.1546, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2823682498373455, |
| "grad_norm": 0.2620698809623718, |
| "learning_rate": 0.0001, |
| "loss": 2.5195, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2830188679245283, |
| "grad_norm": 0.291421115398407, |
| "learning_rate": 0.0001, |
| "loss": 2.4983, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.2836694860117111, |
| "grad_norm": 0.3294708728790283, |
| "learning_rate": 0.0001, |
| "loss": 2.3146, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.28432010409889397, |
| "grad_norm": 0.26191362738609314, |
| "learning_rate": 0.0001, |
| "loss": 2.2818, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2849707221860768, |
| "grad_norm": 0.29155483841896057, |
| "learning_rate": 0.0001, |
| "loss": 2.4888, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2856213402732596, |
| "grad_norm": 0.19482360780239105, |
| "learning_rate": 0.0001, |
| "loss": 2.0061, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.28627195836044245, |
| "grad_norm": 0.2594612240791321, |
| "learning_rate": 0.0001, |
| "loss": 2.1891, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.28692257644762525, |
| "grad_norm": 0.21656309068202972, |
| "learning_rate": 0.0001, |
| "loss": 1.7911, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.28757319453480806, |
| "grad_norm": 0.18664829432964325, |
| "learning_rate": 0.0001, |
| "loss": 1.9634, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.28822381262199087, |
| "grad_norm": 0.2178332507610321, |
| "learning_rate": 0.0001, |
| "loss": 2.32, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.28887443070917374, |
| "grad_norm": 0.351418673992157, |
| "learning_rate": 0.0001, |
| "loss": 3.0873, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.28952504879635654, |
| "grad_norm": 0.23604457080364227, |
| "learning_rate": 0.0001, |
| "loss": 2.46, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.29017566688353935, |
| "grad_norm": 0.2599848806858063, |
| "learning_rate": 0.0001, |
| "loss": 2.0207, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.29082628497072216, |
| "grad_norm": 0.340314120054245, |
| "learning_rate": 0.0001, |
| "loss": 2.279, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.291476903057905, |
| "grad_norm": 0.23228399455547333, |
| "learning_rate": 0.0001, |
| "loss": 2.3561, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.29212752114508783, |
| "grad_norm": 0.25504687428474426, |
| "learning_rate": 0.0001, |
| "loss": 2.2251, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.29277813923227064, |
| "grad_norm": 0.2465014010667801, |
| "learning_rate": 0.0001, |
| "loss": 2.1031, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2934287573194535, |
| "grad_norm": 0.2188328504562378, |
| "learning_rate": 0.0001, |
| "loss": 2.1483, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2940793754066363, |
| "grad_norm": 0.24546551704406738, |
| "learning_rate": 0.0001, |
| "loss": 2.2334, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2947299934938191, |
| "grad_norm": 0.23416215181350708, |
| "learning_rate": 0.0001, |
| "loss": 2.1846, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.29538061158100193, |
| "grad_norm": 0.25267231464385986, |
| "learning_rate": 0.0001, |
| "loss": 2.2134, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2960312296681848, |
| "grad_norm": 0.26632416248321533, |
| "learning_rate": 0.0001, |
| "loss": 2.5012, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2966818477553676, |
| "grad_norm": 0.18289139866828918, |
| "learning_rate": 0.0001, |
| "loss": 2.0524, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2973324658425504, |
| "grad_norm": 0.19033563137054443, |
| "learning_rate": 0.0001, |
| "loss": 2.0165, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2979830839297333, |
| "grad_norm": 0.200730562210083, |
| "learning_rate": 0.0001, |
| "loss": 1.8021, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2986337020169161, |
| "grad_norm": 0.2109062522649765, |
| "learning_rate": 0.0001, |
| "loss": 2.0655, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2992843201040989, |
| "grad_norm": 0.23461318016052246, |
| "learning_rate": 0.0001, |
| "loss": 2.3335, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2999349381912817, |
| "grad_norm": 0.2085726112127304, |
| "learning_rate": 0.0001, |
| "loss": 2.0061, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.30058555627846456, |
| "grad_norm": 0.2938329875469208, |
| "learning_rate": 0.0001, |
| "loss": 2.5245, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.30123617436564737, |
| "grad_norm": 0.22131232917308807, |
| "learning_rate": 0.0001, |
| "loss": 2.4115, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.3018867924528302, |
| "grad_norm": 0.3459152579307556, |
| "learning_rate": 0.0001, |
| "loss": 2.3896, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.302537410540013, |
| "grad_norm": 0.27464184165000916, |
| "learning_rate": 0.0001, |
| "loss": 2.6592, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.30318802862719585, |
| "grad_norm": 0.28379327058792114, |
| "learning_rate": 0.0001, |
| "loss": 2.1453, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.30383864671437866, |
| "grad_norm": 0.28283926844596863, |
| "learning_rate": 0.0001, |
| "loss": 2.1704, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.30448926480156147, |
| "grad_norm": 0.22243599593639374, |
| "learning_rate": 0.0001, |
| "loss": 2.1175, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.30513988288874433, |
| "grad_norm": 0.22331124544143677, |
| "learning_rate": 0.0001, |
| "loss": 1.8857, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.30579050097592714, |
| "grad_norm": 0.21995989978313446, |
| "learning_rate": 0.0001, |
| "loss": 2.1316, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.30644111906310995, |
| "grad_norm": 0.21140341460704803, |
| "learning_rate": 0.0001, |
| "loss": 2.0742, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.30709173715029275, |
| "grad_norm": 0.31053757667541504, |
| "learning_rate": 0.0001, |
| "loss": 2.615, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.3077423552374756, |
| "grad_norm": 0.2768484354019165, |
| "learning_rate": 0.0001, |
| "loss": 2.713, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.3083929733246584, |
| "grad_norm": 0.2538318336009979, |
| "learning_rate": 0.0001, |
| "loss": 2.1917, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.30904359141184123, |
| "grad_norm": 0.2105240672826767, |
| "learning_rate": 0.0001, |
| "loss": 2.2741, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3096942094990241, |
| "grad_norm": 0.2915903925895691, |
| "learning_rate": 0.0001, |
| "loss": 2.115, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.3103448275862069, |
| "grad_norm": 0.30282047390937805, |
| "learning_rate": 0.0001, |
| "loss": 2.7806, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.3109954456733897, |
| "grad_norm": 0.2707601487636566, |
| "learning_rate": 0.0001, |
| "loss": 2.6137, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.3116460637605725, |
| "grad_norm": 0.34574300050735474, |
| "learning_rate": 0.0001, |
| "loss": 2.5957, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.3122966818477554, |
| "grad_norm": 0.22767509520053864, |
| "learning_rate": 0.0001, |
| "loss": 2.3543, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3129472999349382, |
| "grad_norm": 0.25194215774536133, |
| "learning_rate": 0.0001, |
| "loss": 2.6586, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.313597918022121, |
| "grad_norm": 0.20427219569683075, |
| "learning_rate": 0.0001, |
| "loss": 1.9091, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.3142485361093038, |
| "grad_norm": 0.2993704378604889, |
| "learning_rate": 0.0001, |
| "loss": 2.4704, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.3148991541964867, |
| "grad_norm": 0.18951758742332458, |
| "learning_rate": 0.0001, |
| "loss": 2.1108, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.3155497722836695, |
| "grad_norm": 0.2622709572315216, |
| "learning_rate": 0.0001, |
| "loss": 2.4144, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.3162003903708523, |
| "grad_norm": 0.20735126733779907, |
| "learning_rate": 0.0001, |
| "loss": 2.3065, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.31685100845803515, |
| "grad_norm": 0.22782085835933685, |
| "learning_rate": 0.0001, |
| "loss": 2.4377, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.31750162654521796, |
| "grad_norm": 0.2568935453891754, |
| "learning_rate": 0.0001, |
| "loss": 2.1199, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.31815224463240077, |
| "grad_norm": 0.23917409777641296, |
| "learning_rate": 0.0001, |
| "loss": 2.2457, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.3188028627195836, |
| "grad_norm": 0.21531902253627777, |
| "learning_rate": 0.0001, |
| "loss": 2.0489, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.31945348080676644, |
| "grad_norm": 0.21461109817028046, |
| "learning_rate": 0.0001, |
| "loss": 2.1915, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.32010409889394925, |
| "grad_norm": 0.2458680123090744, |
| "learning_rate": 0.0001, |
| "loss": 2.3939, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.32075471698113206, |
| "grad_norm": 0.2617323696613312, |
| "learning_rate": 0.0001, |
| "loss": 2.5611, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.3214053350683149, |
| "grad_norm": 0.22562618553638458, |
| "learning_rate": 0.0001, |
| "loss": 2.2703, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.32205595315549773, |
| "grad_norm": 0.2290688008069992, |
| "learning_rate": 0.0001, |
| "loss": 2.3049, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.32270657124268054, |
| "grad_norm": 0.4118833541870117, |
| "learning_rate": 0.0001, |
| "loss": 2.9194, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.32335718932986335, |
| "grad_norm": 0.22502999007701874, |
| "learning_rate": 0.0001, |
| "loss": 2.2362, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3240078074170462, |
| "grad_norm": 0.23599191009998322, |
| "learning_rate": 0.0001, |
| "loss": 2.35, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.324658425504229, |
| "grad_norm": 0.3065047860145569, |
| "learning_rate": 0.0001, |
| "loss": 2.3984, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.32530904359141183, |
| "grad_norm": 0.19241982698440552, |
| "learning_rate": 0.0001, |
| "loss": 1.8787, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3259596616785947, |
| "grad_norm": 0.20695632696151733, |
| "learning_rate": 0.0001, |
| "loss": 1.9397, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.3266102797657775, |
| "grad_norm": 0.1998564749956131, |
| "learning_rate": 0.0001, |
| "loss": 2.1463, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3272608978529603, |
| "grad_norm": 0.27775317430496216, |
| "learning_rate": 0.0001, |
| "loss": 2.7956, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.3279115159401431, |
| "grad_norm": 0.2393936961889267, |
| "learning_rate": 0.0001, |
| "loss": 2.3785, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.328562134027326, |
| "grad_norm": 0.20921163260936737, |
| "learning_rate": 0.0001, |
| "loss": 2.1909, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.3292127521145088, |
| "grad_norm": 0.25875911116600037, |
| "learning_rate": 0.0001, |
| "loss": 2.129, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3298633702016916, |
| "grad_norm": 0.2382909208536148, |
| "learning_rate": 0.0001, |
| "loss": 2.3786, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.3305139882888744, |
| "grad_norm": 0.19657136499881744, |
| "learning_rate": 0.0001, |
| "loss": 1.951, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.33116460637605727, |
| "grad_norm": 0.23688004910945892, |
| "learning_rate": 0.0001, |
| "loss": 2.4348, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3318152244632401, |
| "grad_norm": 0.1988734006881714, |
| "learning_rate": 0.0001, |
| "loss": 2.2352, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3324658425504229, |
| "grad_norm": 0.2078763097524643, |
| "learning_rate": 0.0001, |
| "loss": 2.1376, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.33311646063760575, |
| "grad_norm": 0.18860888481140137, |
| "learning_rate": 0.0001, |
| "loss": 1.9367, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.33376707872478856, |
| "grad_norm": 0.30205249786376953, |
| "learning_rate": 0.0001, |
| "loss": 2.6822, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.33441769681197137, |
| "grad_norm": 0.2146618664264679, |
| "learning_rate": 0.0001, |
| "loss": 2.1927, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3350683148991542, |
| "grad_norm": 0.19332504272460938, |
| "learning_rate": 0.0001, |
| "loss": 2.0442, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.33571893298633704, |
| "grad_norm": 0.2289431244134903, |
| "learning_rate": 0.0001, |
| "loss": 2.0152, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.33636955107351985, |
| "grad_norm": 0.21815945208072662, |
| "learning_rate": 0.0001, |
| "loss": 2.0015, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.33702016916070265, |
| "grad_norm": 0.2226189821958542, |
| "learning_rate": 0.0001, |
| "loss": 2.2989, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.3376707872478855, |
| "grad_norm": 0.22195078432559967, |
| "learning_rate": 0.0001, |
| "loss": 2.2237, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3383214053350683, |
| "grad_norm": 0.1946515589952469, |
| "learning_rate": 0.0001, |
| "loss": 1.9459, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.33897202342225113, |
| "grad_norm": 0.21510568261146545, |
| "learning_rate": 0.0001, |
| "loss": 2.1305, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.33962264150943394, |
| "grad_norm": 0.23448903858661652, |
| "learning_rate": 0.0001, |
| "loss": 2.1838, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3402732595966168, |
| "grad_norm": 0.19046911597251892, |
| "learning_rate": 0.0001, |
| "loss": 1.9739, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.3409238776837996, |
| "grad_norm": 0.2314033806324005, |
| "learning_rate": 0.0001, |
| "loss": 2.2053, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3415744957709824, |
| "grad_norm": 0.2206612378358841, |
| "learning_rate": 0.0001, |
| "loss": 2.2566, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.34222511385816523, |
| "grad_norm": 0.19578076899051666, |
| "learning_rate": 0.0001, |
| "loss": 2.045, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3428757319453481, |
| "grad_norm": 0.1787755936384201, |
| "learning_rate": 0.0001, |
| "loss": 1.8942, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3435263500325309, |
| "grad_norm": 0.20091751217842102, |
| "learning_rate": 0.0001, |
| "loss": 2.1576, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.3441769681197137, |
| "grad_norm": 0.21869762241840363, |
| "learning_rate": 0.0001, |
| "loss": 2.1938, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 0.26101449131965637, |
| "learning_rate": 0.0001, |
| "loss": 2.3642, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3454782042940794, |
| "grad_norm": 0.21874766051769257, |
| "learning_rate": 0.0001, |
| "loss": 2.4553, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.3461288223812622, |
| "grad_norm": 0.224325492978096, |
| "learning_rate": 0.0001, |
| "loss": 2.2959, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.346779440468445, |
| "grad_norm": 0.21268363296985626, |
| "learning_rate": 0.0001, |
| "loss": 2.1021, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.34743005855562786, |
| "grad_norm": 0.20979231595993042, |
| "learning_rate": 0.0001, |
| "loss": 2.0304, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.34808067664281067, |
| "grad_norm": 0.19552691280841827, |
| "learning_rate": 0.0001, |
| "loss": 1.9747, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.3487312947299935, |
| "grad_norm": 0.27929842472076416, |
| "learning_rate": 0.0001, |
| "loss": 2.445, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.34938191281717634, |
| "grad_norm": 0.19953188300132751, |
| "learning_rate": 0.0001, |
| "loss": 1.9766, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.35003253090435915, |
| "grad_norm": 0.29898926615715027, |
| "learning_rate": 0.0001, |
| "loss": 2.4818, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.35068314899154196, |
| "grad_norm": 0.18719644844532013, |
| "learning_rate": 0.0001, |
| "loss": 1.9046, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.35133376707872477, |
| "grad_norm": 0.2602563798427582, |
| "learning_rate": 0.0001, |
| "loss": 2.1539, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.35198438516590763, |
| "grad_norm": 0.23460406064987183, |
| "learning_rate": 0.0001, |
| "loss": 2.3826, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.35263500325309044, |
| "grad_norm": 0.2821134328842163, |
| "learning_rate": 0.0001, |
| "loss": 2.223, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.35328562134027325, |
| "grad_norm": 0.2641044557094574, |
| "learning_rate": 0.0001, |
| "loss": 2.2402, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.35393623942745606, |
| "grad_norm": 0.21963565051555634, |
| "learning_rate": 0.0001, |
| "loss": 2.3988, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3545868575146389, |
| "grad_norm": 0.26475685834884644, |
| "learning_rate": 0.0001, |
| "loss": 2.3046, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.35523747560182173, |
| "grad_norm": 0.27148157358169556, |
| "learning_rate": 0.0001, |
| "loss": 2.5076, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.35588809368900454, |
| "grad_norm": 0.28925588726997375, |
| "learning_rate": 0.0001, |
| "loss": 2.8395, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3565387117761874, |
| "grad_norm": 0.22953632473945618, |
| "learning_rate": 0.0001, |
| "loss": 2.1198, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3571893298633702, |
| "grad_norm": 0.23960557579994202, |
| "learning_rate": 0.0001, |
| "loss": 2.3064, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.357839947950553, |
| "grad_norm": 0.3133333921432495, |
| "learning_rate": 0.0001, |
| "loss": 2.6034, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3584905660377358, |
| "grad_norm": 0.21745215356349945, |
| "learning_rate": 0.0001, |
| "loss": 2.4553, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.3591411841249187, |
| "grad_norm": 0.23547130823135376, |
| "learning_rate": 0.0001, |
| "loss": 2.0469, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.3597918022121015, |
| "grad_norm": 0.2646094262599945, |
| "learning_rate": 0.0001, |
| "loss": 1.9016, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3604424202992843, |
| "grad_norm": 0.3079530596733093, |
| "learning_rate": 0.0001, |
| "loss": 2.8979, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.36109303838646717, |
| "grad_norm": 0.38223740458488464, |
| "learning_rate": 0.0001, |
| "loss": 3.066, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.36174365647365, |
| "grad_norm": 0.2535337209701538, |
| "learning_rate": 0.0001, |
| "loss": 2.1327, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3623942745608328, |
| "grad_norm": 0.2373637855052948, |
| "learning_rate": 0.0001, |
| "loss": 2.1141, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.3630448926480156, |
| "grad_norm": 0.19437271356582642, |
| "learning_rate": 0.0001, |
| "loss": 1.9753, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.36369551073519846, |
| "grad_norm": 0.20236878097057343, |
| "learning_rate": 0.0001, |
| "loss": 2.2516, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.36434612882238127, |
| "grad_norm": 0.21252363920211792, |
| "learning_rate": 0.0001, |
| "loss": 2.3645, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3649967469095641, |
| "grad_norm": 0.21689258515834808, |
| "learning_rate": 0.0001, |
| "loss": 2.1145, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.3656473649967469, |
| "grad_norm": 0.22365228831768036, |
| "learning_rate": 0.0001, |
| "loss": 2.3083, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.36629798308392975, |
| "grad_norm": 0.21607807278633118, |
| "learning_rate": 0.0001, |
| "loss": 2.3199, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.36694860117111255, |
| "grad_norm": 0.1885683536529541, |
| "learning_rate": 0.0001, |
| "loss": 1.9303, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.36759921925829536, |
| "grad_norm": 0.20064905285835266, |
| "learning_rate": 0.0001, |
| "loss": 2.0661, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3682498373454782, |
| "grad_norm": 0.23532240092754364, |
| "learning_rate": 0.0001, |
| "loss": 2.6942, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.36890045543266103, |
| "grad_norm": 0.22937807440757751, |
| "learning_rate": 0.0001, |
| "loss": 2.1962, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.36955107351984384, |
| "grad_norm": 0.2540866732597351, |
| "learning_rate": 0.0001, |
| "loss": 2.5012, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.37020169160702665, |
| "grad_norm": 0.23405294120311737, |
| "learning_rate": 0.0001, |
| "loss": 2.2439, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3708523096942095, |
| "grad_norm": 0.24394820630550385, |
| "learning_rate": 0.0001, |
| "loss": 2.0741, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3715029277813923, |
| "grad_norm": 0.2063736468553543, |
| "learning_rate": 0.0001, |
| "loss": 2.0864, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.37215354586857513, |
| "grad_norm": 0.3300686180591583, |
| "learning_rate": 0.0001, |
| "loss": 2.4983, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.372804163955758, |
| "grad_norm": 0.21294772624969482, |
| "learning_rate": 0.0001, |
| "loss": 2.2273, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3734547820429408, |
| "grad_norm": 0.2629190981388092, |
| "learning_rate": 0.0001, |
| "loss": 2.1732, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3741054001301236, |
| "grad_norm": 0.2141999751329422, |
| "learning_rate": 0.0001, |
| "loss": 2.3038, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.3747560182173064, |
| "grad_norm": 0.3467566668987274, |
| "learning_rate": 0.0001, |
| "loss": 2.7748, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3754066363044893, |
| "grad_norm": 0.3112248182296753, |
| "learning_rate": 0.0001, |
| "loss": 2.2376, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.3760572543916721, |
| "grad_norm": 0.21217738091945648, |
| "learning_rate": 0.0001, |
| "loss": 1.9146, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3767078724788549, |
| "grad_norm": 0.19359458982944489, |
| "learning_rate": 0.0001, |
| "loss": 2.0913, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.37735849056603776, |
| "grad_norm": 0.27635738253593445, |
| "learning_rate": 0.0001, |
| "loss": 2.2855, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.37800910865322057, |
| "grad_norm": 0.19366882741451263, |
| "learning_rate": 0.0001, |
| "loss": 2.0194, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3786597267404034, |
| "grad_norm": 0.2016839236021042, |
| "learning_rate": 0.0001, |
| "loss": 2.1519, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3793103448275862, |
| "grad_norm": 0.22154097259044647, |
| "learning_rate": 0.0001, |
| "loss": 1.9849, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.37996096291476905, |
| "grad_norm": 0.2089187502861023, |
| "learning_rate": 0.0001, |
| "loss": 2.3624, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.38061158100195186, |
| "grad_norm": 0.25050756335258484, |
| "learning_rate": 0.0001, |
| "loss": 2.1773, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.38126219908913467, |
| "grad_norm": 0.23007918894290924, |
| "learning_rate": 0.0001, |
| "loss": 2.2054, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3819128171763175, |
| "grad_norm": 0.25022968649864197, |
| "learning_rate": 0.0001, |
| "loss": 2.219, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.38256343526350034, |
| "grad_norm": 0.2205193042755127, |
| "learning_rate": 0.0001, |
| "loss": 2.2049, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.38321405335068315, |
| "grad_norm": 0.21454961597919464, |
| "learning_rate": 0.0001, |
| "loss": 2.0683, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.38386467143786596, |
| "grad_norm": 0.2088347226381302, |
| "learning_rate": 0.0001, |
| "loss": 2.1301, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3845152895250488, |
| "grad_norm": 0.20322394371032715, |
| "learning_rate": 0.0001, |
| "loss": 2.2098, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.38516590761223163, |
| "grad_norm": 0.231514111161232, |
| "learning_rate": 0.0001, |
| "loss": 2.5523, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.38581652569941444, |
| "grad_norm": 0.24791982769966125, |
| "learning_rate": 0.0001, |
| "loss": 2.2259, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.38646714378659724, |
| "grad_norm": 0.21148578822612762, |
| "learning_rate": 0.0001, |
| "loss": 2.0834, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.3871177618737801, |
| "grad_norm": 0.263713538646698, |
| "learning_rate": 0.0001, |
| "loss": 2.3101, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.3877683799609629, |
| "grad_norm": 0.22197774052619934, |
| "learning_rate": 0.0001, |
| "loss": 2.1173, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3884189980481457, |
| "grad_norm": 0.2237439900636673, |
| "learning_rate": 0.0001, |
| "loss": 2.1109, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3890696161353286, |
| "grad_norm": 0.27451419830322266, |
| "learning_rate": 0.0001, |
| "loss": 2.5311, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3897202342225114, |
| "grad_norm": 0.18475750088691711, |
| "learning_rate": 0.0001, |
| "loss": 1.9241, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3903708523096942, |
| "grad_norm": 0.20120149850845337, |
| "learning_rate": 0.0001, |
| "loss": 2.1033, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.391021470396877, |
| "grad_norm": 0.19626259803771973, |
| "learning_rate": 0.0001, |
| "loss": 2.1223, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3916720884840599, |
| "grad_norm": 0.22795897722244263, |
| "learning_rate": 0.0001, |
| "loss": 2.2021, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3923227065712427, |
| "grad_norm": 0.5195867419242859, |
| "learning_rate": 0.0001, |
| "loss": 3.1849, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.3929733246584255, |
| "grad_norm": 0.2636241614818573, |
| "learning_rate": 0.0001, |
| "loss": 2.0739, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3936239427456083, |
| "grad_norm": 0.33922895789146423, |
| "learning_rate": 0.0001, |
| "loss": 2.31, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.39427456083279117, |
| "grad_norm": 0.17467042803764343, |
| "learning_rate": 0.0001, |
| "loss": 1.9201, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.394925178919974, |
| "grad_norm": 0.22457371652126312, |
| "learning_rate": 0.0001, |
| "loss": 1.9783, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3955757970071568, |
| "grad_norm": 0.5104444026947021, |
| "learning_rate": 0.0001, |
| "loss": 2.3777, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.39622641509433965, |
| "grad_norm": 0.4531616270542145, |
| "learning_rate": 0.0001, |
| "loss": 2.8208, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.39687703318152245, |
| "grad_norm": 0.20649151504039764, |
| "learning_rate": 0.0001, |
| "loss": 2.1377, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.39752765126870526, |
| "grad_norm": 0.39769667387008667, |
| "learning_rate": 0.0001, |
| "loss": 2.2228, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.39817826935588807, |
| "grad_norm": 0.2832731008529663, |
| "learning_rate": 0.0001, |
| "loss": 1.9664, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.39882888744307093, |
| "grad_norm": 0.2754386067390442, |
| "learning_rate": 0.0001, |
| "loss": 2.5595, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.39947950553025374, |
| "grad_norm": 0.404364675283432, |
| "learning_rate": 0.0001, |
| "loss": 2.8133, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.40013012361743655, |
| "grad_norm": 0.30304789543151855, |
| "learning_rate": 0.0001, |
| "loss": 2.2729, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.4007807417046194, |
| "grad_norm": 0.2519910931587219, |
| "learning_rate": 0.0001, |
| "loss": 2.3655, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.4014313597918022, |
| "grad_norm": 0.2863995134830475, |
| "learning_rate": 0.0001, |
| "loss": 2.0774, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.40208197787898503, |
| "grad_norm": 0.393622487783432, |
| "learning_rate": 0.0001, |
| "loss": 2.5082, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.40273259596616784, |
| "grad_norm": 0.21836060285568237, |
| "learning_rate": 0.0001, |
| "loss": 1.9548, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.4033832140533507, |
| "grad_norm": 0.358052521944046, |
| "learning_rate": 0.0001, |
| "loss": 2.5158, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4040338321405335, |
| "grad_norm": 0.237140953540802, |
| "learning_rate": 0.0001, |
| "loss": 2.2111, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.4046844502277163, |
| "grad_norm": 0.20998883247375488, |
| "learning_rate": 0.0001, |
| "loss": 2.1351, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.4053350683148991, |
| "grad_norm": 0.18059247732162476, |
| "learning_rate": 0.0001, |
| "loss": 1.9451, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.405985686402082, |
| "grad_norm": 0.17532669007778168, |
| "learning_rate": 0.0001, |
| "loss": 1.8591, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.4066363044892648, |
| "grad_norm": 0.24097976088523865, |
| "learning_rate": 0.0001, |
| "loss": 2.6534, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.4072869225764476, |
| "grad_norm": 0.19505445659160614, |
| "learning_rate": 0.0001, |
| "loss": 1.8952, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.40793754066363047, |
| "grad_norm": 0.232722207903862, |
| "learning_rate": 0.0001, |
| "loss": 2.2055, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.4085881587508133, |
| "grad_norm": 0.23899732530117035, |
| "learning_rate": 0.0001, |
| "loss": 2.5848, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.4092387768379961, |
| "grad_norm": 0.2411729097366333, |
| "learning_rate": 0.0001, |
| "loss": 2.5315, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.4098893949251789, |
| "grad_norm": 0.25042012333869934, |
| "learning_rate": 0.0001, |
| "loss": 2.4154, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.41054001301236176, |
| "grad_norm": 0.2764488160610199, |
| "learning_rate": 0.0001, |
| "loss": 2.0564, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.41119063109954457, |
| "grad_norm": 0.24761155247688293, |
| "learning_rate": 0.0001, |
| "loss": 2.3245, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.4118412491867274, |
| "grad_norm": 0.22376200556755066, |
| "learning_rate": 0.0001, |
| "loss": 2.1881, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.41249186727391024, |
| "grad_norm": 0.19060148298740387, |
| "learning_rate": 0.0001, |
| "loss": 1.9588, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.41314248536109305, |
| "grad_norm": 0.4157400131225586, |
| "learning_rate": 0.0001, |
| "loss": 2.9024, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 0.2557002007961273, |
| "learning_rate": 0.0001, |
| "loss": 1.9819, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.41444372153545866, |
| "grad_norm": 0.2908417880535126, |
| "learning_rate": 0.0001, |
| "loss": 2.112, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.41509433962264153, |
| "grad_norm": 0.32937270402908325, |
| "learning_rate": 0.0001, |
| "loss": 2.4976, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.41574495770982434, |
| "grad_norm": 0.20382268726825714, |
| "learning_rate": 0.0001, |
| "loss": 2.0448, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.41639557579700714, |
| "grad_norm": 0.23484939336776733, |
| "learning_rate": 0.0001, |
| "loss": 1.9514, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.41704619388418995, |
| "grad_norm": 0.23023058474063873, |
| "learning_rate": 0.0001, |
| "loss": 2.0768, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.4176968119713728, |
| "grad_norm": 0.22951190173625946, |
| "learning_rate": 0.0001, |
| "loss": 2.0764, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.4183474300585556, |
| "grad_norm": 0.18971513211727142, |
| "learning_rate": 0.0001, |
| "loss": 1.9693, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.41899804814573843, |
| "grad_norm": 0.24955709278583527, |
| "learning_rate": 0.0001, |
| "loss": 2.4898, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.4196486662329213, |
| "grad_norm": 0.3344306945800781, |
| "learning_rate": 0.0001, |
| "loss": 2.4779, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.4202992843201041, |
| "grad_norm": 0.21661825478076935, |
| "learning_rate": 0.0001, |
| "loss": 2.0472, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4209499024072869, |
| "grad_norm": 0.1972419023513794, |
| "learning_rate": 0.0001, |
| "loss": 2.1712, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.4216005204944697, |
| "grad_norm": 0.21619470417499542, |
| "learning_rate": 0.0001, |
| "loss": 2.0739, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4222511385816526, |
| "grad_norm": 0.2329091727733612, |
| "learning_rate": 0.0001, |
| "loss": 2.1362, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4229017566688354, |
| "grad_norm": 0.22971969842910767, |
| "learning_rate": 0.0001, |
| "loss": 1.9898, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4235523747560182, |
| "grad_norm": 0.20185063779354095, |
| "learning_rate": 0.0001, |
| "loss": 2.1008, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.42420299284320107, |
| "grad_norm": 0.2658546566963196, |
| "learning_rate": 0.0001, |
| "loss": 2.5734, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.4248536109303839, |
| "grad_norm": 0.23109374940395355, |
| "learning_rate": 0.0001, |
| "loss": 2.2569, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.4255042290175667, |
| "grad_norm": 0.25115352869033813, |
| "learning_rate": 0.0001, |
| "loss": 2.5967, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.4261548471047495, |
| "grad_norm": 0.20470669865608215, |
| "learning_rate": 0.0001, |
| "loss": 2.0302, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.42680546519193235, |
| "grad_norm": 0.2151513546705246, |
| "learning_rate": 0.0001, |
| "loss": 2.5183, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.42745608327911516, |
| "grad_norm": 0.2571411728858948, |
| "learning_rate": 0.0001, |
| "loss": 2.255, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.42810670136629797, |
| "grad_norm": 0.2414022833108902, |
| "learning_rate": 0.0001, |
| "loss": 2.4076, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.42875731945348083, |
| "grad_norm": 0.21041014790534973, |
| "learning_rate": 0.0001, |
| "loss": 2.0091, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.42940793754066364, |
| "grad_norm": 0.21241822838783264, |
| "learning_rate": 0.0001, |
| "loss": 2.355, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.43005855562784645, |
| "grad_norm": 0.21031403541564941, |
| "learning_rate": 0.0001, |
| "loss": 1.9887, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.43070917371502926, |
| "grad_norm": 0.19765952229499817, |
| "learning_rate": 0.0001, |
| "loss": 2.1555, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.4313597918022121, |
| "grad_norm": 0.24740834534168243, |
| "learning_rate": 0.0001, |
| "loss": 2.2349, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.43201040988939493, |
| "grad_norm": 0.22086234390735626, |
| "learning_rate": 0.0001, |
| "loss": 2.0948, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.43266102797657774, |
| "grad_norm": 0.21949239075183868, |
| "learning_rate": 0.0001, |
| "loss": 2.3905, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.43331164606376055, |
| "grad_norm": 0.20536834001541138, |
| "learning_rate": 0.0001, |
| "loss": 2.0547, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4339622641509434, |
| "grad_norm": 0.2570655941963196, |
| "learning_rate": 0.0001, |
| "loss": 2.0261, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.4346128822381262, |
| "grad_norm": 0.3293687701225281, |
| "learning_rate": 0.0001, |
| "loss": 2.344, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.435263500325309, |
| "grad_norm": 0.22947120666503906, |
| "learning_rate": 0.0001, |
| "loss": 2.232, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4359141184124919, |
| "grad_norm": 0.2425599992275238, |
| "learning_rate": 0.0001, |
| "loss": 2.309, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4365647364996747, |
| "grad_norm": 0.2506352663040161, |
| "learning_rate": 0.0001, |
| "loss": 2.1249, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.4372153545868575, |
| "grad_norm": 0.19457192718982697, |
| "learning_rate": 0.0001, |
| "loss": 1.9461, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4378659726740403, |
| "grad_norm": 0.3749271035194397, |
| "learning_rate": 0.0001, |
| "loss": 2.8532, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.4385165907612232, |
| "grad_norm": 0.25384366512298584, |
| "learning_rate": 0.0001, |
| "loss": 2.6495, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.439167208848406, |
| "grad_norm": 0.21413469314575195, |
| "learning_rate": 0.0001, |
| "loss": 2.084, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.4398178269355888, |
| "grad_norm": 0.228125661611557, |
| "learning_rate": 0.0001, |
| "loss": 2.2175, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.44046844502277166, |
| "grad_norm": 0.1948491632938385, |
| "learning_rate": 0.0001, |
| "loss": 1.9702, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.44111906310995447, |
| "grad_norm": 0.307992547750473, |
| "learning_rate": 0.0001, |
| "loss": 2.5884, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.4417696811971373, |
| "grad_norm": 0.23681728541851044, |
| "learning_rate": 0.0001, |
| "loss": 2.2104, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.4424202992843201, |
| "grad_norm": 0.23185166716575623, |
| "learning_rate": 0.0001, |
| "loss": 2.0823, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.44307091737150295, |
| "grad_norm": 0.2772667109966278, |
| "learning_rate": 0.0001, |
| "loss": 2.3729, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.44372153545868576, |
| "grad_norm": 0.18908965587615967, |
| "learning_rate": 0.0001, |
| "loss": 2.0585, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.44437215354586856, |
| "grad_norm": 0.2063988745212555, |
| "learning_rate": 0.0001, |
| "loss": 1.9474, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4450227716330514, |
| "grad_norm": 0.19444917142391205, |
| "learning_rate": 0.0001, |
| "loss": 1.9269, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.44567338972023424, |
| "grad_norm": 0.2866727113723755, |
| "learning_rate": 0.0001, |
| "loss": 2.5145, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.44632400780741704, |
| "grad_norm": 0.24801641702651978, |
| "learning_rate": 0.0001, |
| "loss": 2.2954, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.44697462589459985, |
| "grad_norm": 0.2115658074617386, |
| "learning_rate": 0.0001, |
| "loss": 2.1956, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.4476252439817827, |
| "grad_norm": 0.3155558109283447, |
| "learning_rate": 0.0001, |
| "loss": 2.7396, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4482758620689655, |
| "grad_norm": 0.22418133914470673, |
| "learning_rate": 0.0001, |
| "loss": 2.1066, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.44892648015614833, |
| "grad_norm": 0.2707614600658417, |
| "learning_rate": 0.0001, |
| "loss": 2.3353, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.44957709824333114, |
| "grad_norm": 0.22262880206108093, |
| "learning_rate": 0.0001, |
| "loss": 2.2143, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.450227716330514, |
| "grad_norm": 0.25256767868995667, |
| "learning_rate": 0.0001, |
| "loss": 2.2786, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.4508783344176968, |
| "grad_norm": 0.20360921323299408, |
| "learning_rate": 0.0001, |
| "loss": 2.0059, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4515289525048796, |
| "grad_norm": 0.20573420822620392, |
| "learning_rate": 0.0001, |
| "loss": 2.0884, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.4521795705920625, |
| "grad_norm": 0.31812623143196106, |
| "learning_rate": 0.0001, |
| "loss": 2.5905, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.4528301886792453, |
| "grad_norm": 0.24690969288349152, |
| "learning_rate": 0.0001, |
| "loss": 2.5157, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4534808067664281, |
| "grad_norm": 0.256793737411499, |
| "learning_rate": 0.0001, |
| "loss": 2.1548, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.4541314248536109, |
| "grad_norm": 0.2659960985183716, |
| "learning_rate": 0.0001, |
| "loss": 2.2977, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4547820429407938, |
| "grad_norm": 0.23824195563793182, |
| "learning_rate": 0.0001, |
| "loss": 2.5946, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.4554326610279766, |
| "grad_norm": 0.2580608129501343, |
| "learning_rate": 0.0001, |
| "loss": 2.2608, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4560832791151594, |
| "grad_norm": 0.270622193813324, |
| "learning_rate": 0.0001, |
| "loss": 2.5848, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.4567338972023422, |
| "grad_norm": 0.2170489877462387, |
| "learning_rate": 0.0001, |
| "loss": 2.4315, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.45738451528952506, |
| "grad_norm": 0.20716050267219543, |
| "learning_rate": 0.0001, |
| "loss": 2.1592, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.45803513337670787, |
| "grad_norm": 0.24847671389579773, |
| "learning_rate": 0.0001, |
| "loss": 2.3202, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.4586857514638907, |
| "grad_norm": 0.24049146473407745, |
| "learning_rate": 0.0001, |
| "loss": 2.1968, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.45933636955107354, |
| "grad_norm": 0.2079533487558365, |
| "learning_rate": 0.0001, |
| "loss": 2.2966, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.45998698763825635, |
| "grad_norm": 0.18255428969860077, |
| "learning_rate": 0.0001, |
| "loss": 1.9931, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.46063760572543916, |
| "grad_norm": 0.28015655279159546, |
| "learning_rate": 0.0001, |
| "loss": 2.2605, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.46128822381262197, |
| "grad_norm": 0.27453094720840454, |
| "learning_rate": 0.0001, |
| "loss": 2.2835, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.46193884189980483, |
| "grad_norm": 0.2751506268978119, |
| "learning_rate": 0.0001, |
| "loss": 2.665, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.46258945998698764, |
| "grad_norm": 0.2759210169315338, |
| "learning_rate": 0.0001, |
| "loss": 2.3593, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.46324007807417045, |
| "grad_norm": 0.2902829051017761, |
| "learning_rate": 0.0001, |
| "loss": 2.7421, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.4638906961613533, |
| "grad_norm": 0.24083854258060455, |
| "learning_rate": 0.0001, |
| "loss": 2.4644, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.4645413142485361, |
| "grad_norm": 0.23614934086799622, |
| "learning_rate": 0.0001, |
| "loss": 2.2939, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.4651919323357189, |
| "grad_norm": 0.1972537487745285, |
| "learning_rate": 0.0001, |
| "loss": 1.9391, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.46584255042290174, |
| "grad_norm": 0.2227838933467865, |
| "learning_rate": 0.0001, |
| "loss": 1.9396, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.4664931685100846, |
| "grad_norm": 0.3672918379306793, |
| "learning_rate": 0.0001, |
| "loss": 2.7508, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.4671437865972674, |
| "grad_norm": 0.2712246775627136, |
| "learning_rate": 0.0001, |
| "loss": 2.2838, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.4677944046844502, |
| "grad_norm": 0.2337927669286728, |
| "learning_rate": 0.0001, |
| "loss": 1.9807, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.468445022771633, |
| "grad_norm": 0.2051180601119995, |
| "learning_rate": 0.0001, |
| "loss": 2.0311, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4690956408588159, |
| "grad_norm": 0.1965889185667038, |
| "learning_rate": 0.0001, |
| "loss": 2.1114, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.4697462589459987, |
| "grad_norm": 0.2106337547302246, |
| "learning_rate": 0.0001, |
| "loss": 2.0792, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.4703968770331815, |
| "grad_norm": 0.19918356835842133, |
| "learning_rate": 0.0001, |
| "loss": 2.1323, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.47104749512036437, |
| "grad_norm": 0.20124401152133942, |
| "learning_rate": 0.0001, |
| "loss": 2.0008, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4716981132075472, |
| "grad_norm": 0.2172473967075348, |
| "learning_rate": 0.0001, |
| "loss": 2.3891, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.47234873129473, |
| "grad_norm": 0.2524811029434204, |
| "learning_rate": 0.0001, |
| "loss": 2.3343, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4729993493819128, |
| "grad_norm": 0.22882957756519318, |
| "learning_rate": 0.0001, |
| "loss": 2.6723, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.47364996746909566, |
| "grad_norm": 0.2434161901473999, |
| "learning_rate": 0.0001, |
| "loss": 1.9549, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.47430058555627846, |
| "grad_norm": 0.19140364229679108, |
| "learning_rate": 0.0001, |
| "loss": 2.0468, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.4749512036434613, |
| "grad_norm": 0.22166937589645386, |
| "learning_rate": 0.0001, |
| "loss": 2.3432, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.47560182173064414, |
| "grad_norm": 0.2005748748779297, |
| "learning_rate": 0.0001, |
| "loss": 2.0616, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.47625243981782694, |
| "grad_norm": 0.3115980923175812, |
| "learning_rate": 0.0001, |
| "loss": 2.6153, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.47690305790500975, |
| "grad_norm": 0.27135169506073, |
| "learning_rate": 0.0001, |
| "loss": 2.3225, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.47755367599219256, |
| "grad_norm": 0.20748727023601532, |
| "learning_rate": 0.0001, |
| "loss": 1.834, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.4782042940793754, |
| "grad_norm": 0.4031495153903961, |
| "learning_rate": 0.0001, |
| "loss": 2.8177, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.47885491216655823, |
| "grad_norm": 0.2978368401527405, |
| "learning_rate": 0.0001, |
| "loss": 2.6178, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.47950553025374104, |
| "grad_norm": 0.3466270864009857, |
| "learning_rate": 0.0001, |
| "loss": 2.6031, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.4801561483409239, |
| "grad_norm": 0.20074127614498138, |
| "learning_rate": 0.0001, |
| "loss": 2.247, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4808067664281067, |
| "grad_norm": 0.2393479198217392, |
| "learning_rate": 0.0001, |
| "loss": 2.1265, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.4814573845152895, |
| "grad_norm": 0.27758634090423584, |
| "learning_rate": 0.0001, |
| "loss": 2.5025, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.48210800260247233, |
| "grad_norm": 0.20123820006847382, |
| "learning_rate": 0.0001, |
| "loss": 2.0083, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.4827586206896552, |
| "grad_norm": 0.19012506306171417, |
| "learning_rate": 0.0001, |
| "loss": 2.0212, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.483409238776838, |
| "grad_norm": 0.19451047480106354, |
| "learning_rate": 0.0001, |
| "loss": 2.0295, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4840598568640208, |
| "grad_norm": 0.3339052200317383, |
| "learning_rate": 0.0001, |
| "loss": 2.4813, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4847104749512036, |
| "grad_norm": 0.2646152973175049, |
| "learning_rate": 0.0001, |
| "loss": 2.4302, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4853610930383865, |
| "grad_norm": 0.23590324819087982, |
| "learning_rate": 0.0001, |
| "loss": 2.1723, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.4860117111255693, |
| "grad_norm": 0.28924039006233215, |
| "learning_rate": 0.0001, |
| "loss": 2.8005, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4866623292127521, |
| "grad_norm": 0.21145464479923248, |
| "learning_rate": 0.0001, |
| "loss": 2.3501, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.48731294729993496, |
| "grad_norm": 0.22815656661987305, |
| "learning_rate": 0.0001, |
| "loss": 2.1997, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.48796356538711777, |
| "grad_norm": 0.24325215816497803, |
| "learning_rate": 0.0001, |
| "loss": 2.039, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4886141834743006, |
| "grad_norm": 0.3235335052013397, |
| "learning_rate": 0.0001, |
| "loss": 2.4533, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.4892648015614834, |
| "grad_norm": 0.25513559579849243, |
| "learning_rate": 0.0001, |
| "loss": 2.3779, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.48991541964866625, |
| "grad_norm": 0.2905427813529968, |
| "learning_rate": 0.0001, |
| "loss": 1.9843, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.49056603773584906, |
| "grad_norm": 0.23760183155536652, |
| "learning_rate": 0.0001, |
| "loss": 2.1825, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.49121665582303187, |
| "grad_norm": 0.2170071303844452, |
| "learning_rate": 0.0001, |
| "loss": 1.9877, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.49186727391021473, |
| "grad_norm": 0.2555190920829773, |
| "learning_rate": 0.0001, |
| "loss": 2.457, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.49251789199739754, |
| "grad_norm": 0.2571033835411072, |
| "learning_rate": 0.0001, |
| "loss": 2.1152, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.49316851008458035, |
| "grad_norm": 0.23969238996505737, |
| "learning_rate": 0.0001, |
| "loss": 2.3439, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.49381912817176316, |
| "grad_norm": 0.1900262087583542, |
| "learning_rate": 0.0001, |
| "loss": 1.8999, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.494469746258946, |
| "grad_norm": 0.19621430337429047, |
| "learning_rate": 0.0001, |
| "loss": 2.0658, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4951203643461288, |
| "grad_norm": 0.21956481039524078, |
| "learning_rate": 0.0001, |
| "loss": 2.5427, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.49577098243331164, |
| "grad_norm": 0.22567258775234222, |
| "learning_rate": 0.0001, |
| "loss": 2.2777, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.49642160052049444, |
| "grad_norm": 0.20233570039272308, |
| "learning_rate": 0.0001, |
| "loss": 2.0342, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4970722186076773, |
| "grad_norm": 0.23662947118282318, |
| "learning_rate": 0.0001, |
| "loss": 2.3668, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.4977228366948601, |
| "grad_norm": 0.2625278830528259, |
| "learning_rate": 0.0001, |
| "loss": 2.6536, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.4983734547820429, |
| "grad_norm": 0.23235228657722473, |
| "learning_rate": 0.0001, |
| "loss": 2.1891, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4990240728692258, |
| "grad_norm": 0.19439217448234558, |
| "learning_rate": 0.0001, |
| "loss": 1.9647, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4996746909564086, |
| "grad_norm": 0.19810114800930023, |
| "learning_rate": 0.0001, |
| "loss": 1.9965, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5003253090435914, |
| "grad_norm": 0.2525380253791809, |
| "learning_rate": 0.0001, |
| "loss": 2.2444, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.5009759271307742, |
| "grad_norm": 0.2409314513206482, |
| "learning_rate": 0.0001, |
| "loss": 2.1717, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.501626545217957, |
| "grad_norm": 0.25244686007499695, |
| "learning_rate": 0.0001, |
| "loss": 2.0126, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.5022771633051398, |
| "grad_norm": 0.19767141342163086, |
| "learning_rate": 0.0001, |
| "loss": 2.1384, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.5029277813923227, |
| "grad_norm": 0.39446812868118286, |
| "learning_rate": 0.0001, |
| "loss": 2.8039, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.5035783994795056, |
| "grad_norm": 0.2643390893936157, |
| "learning_rate": 0.0001, |
| "loss": 2.1524, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.5042290175666884, |
| "grad_norm": 0.27606508135795593, |
| "learning_rate": 0.0001, |
| "loss": 2.1802, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5048796356538712, |
| "grad_norm": 0.364106148481369, |
| "learning_rate": 0.0001, |
| "loss": 2.9694, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.505530253741054, |
| "grad_norm": 0.23091645538806915, |
| "learning_rate": 0.0001, |
| "loss": 2.5471, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.5061808718282368, |
| "grad_norm": 0.19318193197250366, |
| "learning_rate": 0.0001, |
| "loss": 2.2082, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5068314899154196, |
| "grad_norm": 0.28997862339019775, |
| "learning_rate": 0.0001, |
| "loss": 2.4399, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5074821080026025, |
| "grad_norm": 0.22487197816371918, |
| "learning_rate": 0.0001, |
| "loss": 2.1946, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5081327260897853, |
| "grad_norm": 0.24430596828460693, |
| "learning_rate": 0.0001, |
| "loss": 2.4456, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5087833441769681, |
| "grad_norm": 0.21677151322364807, |
| "learning_rate": 0.0001, |
| "loss": 2.2082, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.5094339622641509, |
| "grad_norm": 0.47995632886886597, |
| "learning_rate": 0.0001, |
| "loss": 3.1358, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.5100845803513337, |
| "grad_norm": 0.19044414162635803, |
| "learning_rate": 0.0001, |
| "loss": 1.8924, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5107351984385166, |
| "grad_norm": 0.19143608212471008, |
| "learning_rate": 0.0001, |
| "loss": 2.0459, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5113858165256994, |
| "grad_norm": 0.22588413953781128, |
| "learning_rate": 0.0001, |
| "loss": 2.1369, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5120364346128823, |
| "grad_norm": 0.2786167860031128, |
| "learning_rate": 0.0001, |
| "loss": 2.2029, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.5126870527000651, |
| "grad_norm": 0.24471627175807953, |
| "learning_rate": 0.0001, |
| "loss": 2.1248, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.5133376707872479, |
| "grad_norm": 0.17795225977897644, |
| "learning_rate": 0.0001, |
| "loss": 1.7926, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5139882888744307, |
| "grad_norm": 0.2173709124326706, |
| "learning_rate": 0.0001, |
| "loss": 2.0538, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5146389069616135, |
| "grad_norm": 0.2027692049741745, |
| "learning_rate": 0.0001, |
| "loss": 1.8568, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.5152895250487963, |
| "grad_norm": 0.2013595849275589, |
| "learning_rate": 0.0001, |
| "loss": 2.0501, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.5159401431359791, |
| "grad_norm": 0.21996662020683289, |
| "learning_rate": 0.0001, |
| "loss": 2.0374, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.516590761223162, |
| "grad_norm": 0.21435722708702087, |
| "learning_rate": 0.0001, |
| "loss": 2.1907, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.5172413793103449, |
| "grad_norm": 0.21512284874916077, |
| "learning_rate": 0.0001, |
| "loss": 2.315, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5178919973975277, |
| "grad_norm": 0.19432400166988373, |
| "learning_rate": 0.0001, |
| "loss": 2.103, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.5185426154847105, |
| "grad_norm": 0.23112992942333221, |
| "learning_rate": 0.0001, |
| "loss": 2.328, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.5191932335718933, |
| "grad_norm": 0.19719737768173218, |
| "learning_rate": 0.0001, |
| "loss": 1.9569, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.5198438516590761, |
| "grad_norm": 0.2115892618894577, |
| "learning_rate": 0.0001, |
| "loss": 2.2533, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.5204944697462589, |
| "grad_norm": 0.24321842193603516, |
| "learning_rate": 0.0001, |
| "loss": 2.6597, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5211450878334418, |
| "grad_norm": 0.18219350278377533, |
| "learning_rate": 0.0001, |
| "loss": 1.8709, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.5217957059206246, |
| "grad_norm": 0.18715021014213562, |
| "learning_rate": 0.0001, |
| "loss": 2.0021, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.5224463240078074, |
| "grad_norm": 0.25940024852752686, |
| "learning_rate": 0.0001, |
| "loss": 2.3742, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.5230969420949902, |
| "grad_norm": 0.18714728951454163, |
| "learning_rate": 0.0001, |
| "loss": 2.211, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.523747560182173, |
| "grad_norm": 0.20145951211452484, |
| "learning_rate": 0.0001, |
| "loss": 2.0047, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5243981782693559, |
| "grad_norm": 0.18992845714092255, |
| "learning_rate": 0.0001, |
| "loss": 1.8559, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.5250487963565387, |
| "grad_norm": 0.2682324945926666, |
| "learning_rate": 0.0001, |
| "loss": 2.4791, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.5256994144437215, |
| "grad_norm": 0.33034664392471313, |
| "learning_rate": 0.0001, |
| "loss": 2.3089, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5263500325309044, |
| "grad_norm": 0.18838956952095032, |
| "learning_rate": 0.0001, |
| "loss": 1.9462, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5270006506180872, |
| "grad_norm": 0.42872169613838196, |
| "learning_rate": 0.0001, |
| "loss": 2.6874, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.52765126870527, |
| "grad_norm": 0.2108643501996994, |
| "learning_rate": 0.0001, |
| "loss": 2.3627, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5283018867924528, |
| "grad_norm": 0.21745599806308746, |
| "learning_rate": 0.0001, |
| "loss": 2.1204, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5289525048796356, |
| "grad_norm": 0.2577585279941559, |
| "learning_rate": 0.0001, |
| "loss": 1.9746, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.5296031229668184, |
| "grad_norm": 0.372471421957016, |
| "learning_rate": 0.0001, |
| "loss": 2.688, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5302537410540012, |
| "grad_norm": 0.2425181120634079, |
| "learning_rate": 0.0001, |
| "loss": 2.1377, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.5309043591411842, |
| "grad_norm": 0.2638307511806488, |
| "learning_rate": 0.0001, |
| "loss": 2.1088, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.531554977228367, |
| "grad_norm": 0.2356933355331421, |
| "learning_rate": 0.0001, |
| "loss": 2.2291, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5322055953155498, |
| "grad_norm": 0.23714864253997803, |
| "learning_rate": 0.0001, |
| "loss": 2.0929, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5328562134027326, |
| "grad_norm": 0.19541950523853302, |
| "learning_rate": 0.0001, |
| "loss": 2.0883, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.5335068314899154, |
| "grad_norm": 0.3091617822647095, |
| "learning_rate": 0.0001, |
| "loss": 3.0127, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5341574495770982, |
| "grad_norm": 0.2592740058898926, |
| "learning_rate": 0.0001, |
| "loss": 1.8307, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.534808067664281, |
| "grad_norm": 0.22505807876586914, |
| "learning_rate": 0.0001, |
| "loss": 2.462, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5354586857514639, |
| "grad_norm": 0.22032824158668518, |
| "learning_rate": 0.0001, |
| "loss": 2.2718, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5361093038386467, |
| "grad_norm": 0.2457459270954132, |
| "learning_rate": 0.0001, |
| "loss": 2.4213, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5367599219258296, |
| "grad_norm": 0.24181683361530304, |
| "learning_rate": 0.0001, |
| "loss": 1.9347, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.5374105400130124, |
| "grad_norm": 0.29988738894462585, |
| "learning_rate": 0.0001, |
| "loss": 2.7697, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5380611581001952, |
| "grad_norm": 0.24946388602256775, |
| "learning_rate": 0.0001, |
| "loss": 2.2117, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.538711776187378, |
| "grad_norm": 0.20339331030845642, |
| "learning_rate": 0.0001, |
| "loss": 1.9936, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.5393623942745608, |
| "grad_norm": 0.22250457108020782, |
| "learning_rate": 0.0001, |
| "loss": 2.0785, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5400130123617437, |
| "grad_norm": 0.1869298666715622, |
| "learning_rate": 0.0001, |
| "loss": 2.0406, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5406636304489265, |
| "grad_norm": 0.1873755156993866, |
| "learning_rate": 0.0001, |
| "loss": 1.9126, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.5413142485361093, |
| "grad_norm": 0.3135535418987274, |
| "learning_rate": 0.0001, |
| "loss": 2.2881, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5419648666232921, |
| "grad_norm": 0.20596185326576233, |
| "learning_rate": 0.0001, |
| "loss": 2.0682, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5426154847104749, |
| "grad_norm": 0.25786712765693665, |
| "learning_rate": 0.0001, |
| "loss": 2.0591, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5432661027976577, |
| "grad_norm": 0.2592066824436188, |
| "learning_rate": 0.0001, |
| "loss": 2.052, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5439167208848406, |
| "grad_norm": 0.20738951861858368, |
| "learning_rate": 0.0001, |
| "loss": 1.9726, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5445673389720235, |
| "grad_norm": 0.21384763717651367, |
| "learning_rate": 0.0001, |
| "loss": 2.1897, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5452179570592063, |
| "grad_norm": 0.22050943970680237, |
| "learning_rate": 0.0001, |
| "loss": 2.3597, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.5458685751463891, |
| "grad_norm": 0.1996280699968338, |
| "learning_rate": 0.0001, |
| "loss": 2.0492, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.5465191932335719, |
| "grad_norm": 0.2430533468723297, |
| "learning_rate": 0.0001, |
| "loss": 2.2774, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5471698113207547, |
| "grad_norm": 0.22777177393436432, |
| "learning_rate": 0.0001, |
| "loss": 2.0779, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.5478204294079375, |
| "grad_norm": 0.22464539110660553, |
| "learning_rate": 0.0001, |
| "loss": 2.3316, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5484710474951203, |
| "grad_norm": 0.17759400606155396, |
| "learning_rate": 0.0001, |
| "loss": 1.8407, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5491216655823032, |
| "grad_norm": 0.22264355421066284, |
| "learning_rate": 0.0001, |
| "loss": 2.2869, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.549772283669486, |
| "grad_norm": 0.20819737017154694, |
| "learning_rate": 0.0001, |
| "loss": 2.1209, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5504229017566689, |
| "grad_norm": 0.2194463461637497, |
| "learning_rate": 0.0001, |
| "loss": 2.1457, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5510735198438517, |
| "grad_norm": 0.19314661622047424, |
| "learning_rate": 0.0001, |
| "loss": 2.1063, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 0.186354860663414, |
| "learning_rate": 0.0001, |
| "loss": 2.0833, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5523747560182173, |
| "grad_norm": 0.1862732619047165, |
| "learning_rate": 0.0001, |
| "loss": 1.9441, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5530253741054001, |
| "grad_norm": 0.24664181470870972, |
| "learning_rate": 0.0001, |
| "loss": 2.3277, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5536759921925829, |
| "grad_norm": 0.20182165503501892, |
| "learning_rate": 0.0001, |
| "loss": 2.1902, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5543266102797658, |
| "grad_norm": 0.2108999788761139, |
| "learning_rate": 0.0001, |
| "loss": 2.0826, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5549772283669486, |
| "grad_norm": 0.25388890504837036, |
| "learning_rate": 0.0001, |
| "loss": 2.5149, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5556278464541314, |
| "grad_norm": 0.2074718177318573, |
| "learning_rate": 0.0001, |
| "loss": 1.9135, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5562784645413142, |
| "grad_norm": 0.1992723047733307, |
| "learning_rate": 0.0001, |
| "loss": 2.186, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.556929082628497, |
| "grad_norm": 0.18721085786819458, |
| "learning_rate": 0.0001, |
| "loss": 1.9453, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5575797007156799, |
| "grad_norm": 0.21606992185115814, |
| "learning_rate": 0.0001, |
| "loss": 2.1703, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.5582303188028627, |
| "grad_norm": 0.2854723334312439, |
| "learning_rate": 0.0001, |
| "loss": 2.9538, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5588809368900456, |
| "grad_norm": 0.21503040194511414, |
| "learning_rate": 0.0001, |
| "loss": 2.0194, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5595315549772284, |
| "grad_norm": 0.2690679430961609, |
| "learning_rate": 0.0001, |
| "loss": 2.1562, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5601821730644112, |
| "grad_norm": 0.2811613976955414, |
| "learning_rate": 0.0001, |
| "loss": 2.2475, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.560832791151594, |
| "grad_norm": 0.2551681697368622, |
| "learning_rate": 0.0001, |
| "loss": 2.5585, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5614834092387768, |
| "grad_norm": 0.21423856914043427, |
| "learning_rate": 0.0001, |
| "loss": 2.1194, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5621340273259596, |
| "grad_norm": 0.22121264040470123, |
| "learning_rate": 0.0001, |
| "loss": 1.9257, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5627846454131424, |
| "grad_norm": 0.38684332370758057, |
| "learning_rate": 0.0001, |
| "loss": 2.5203, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.5634352635003254, |
| "grad_norm": 0.20299634337425232, |
| "learning_rate": 0.0001, |
| "loss": 2.0868, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5640858815875082, |
| "grad_norm": 0.33485493063926697, |
| "learning_rate": 0.0001, |
| "loss": 2.457, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.564736499674691, |
| "grad_norm": 0.23778866231441498, |
| "learning_rate": 0.0001, |
| "loss": 1.9863, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5653871177618738, |
| "grad_norm": 0.18562458455562592, |
| "learning_rate": 0.0001, |
| "loss": 1.915, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5660377358490566, |
| "grad_norm": 0.3780176341533661, |
| "learning_rate": 0.0001, |
| "loss": 2.5518, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5666883539362394, |
| "grad_norm": 0.1924014538526535, |
| "learning_rate": 0.0001, |
| "loss": 2.0665, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5673389720234222, |
| "grad_norm": 0.19788160920143127, |
| "learning_rate": 0.0001, |
| "loss": 1.9408, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5679895901106051, |
| "grad_norm": 0.2435147911310196, |
| "learning_rate": 0.0001, |
| "loss": 2.3716, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5686402081977879, |
| "grad_norm": 0.2023211270570755, |
| "learning_rate": 0.0001, |
| "loss": 2.2786, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5692908262849707, |
| "grad_norm": 0.29936715960502625, |
| "learning_rate": 0.0001, |
| "loss": 2.6689, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5699414443721535, |
| "grad_norm": 0.18846483528614044, |
| "learning_rate": 0.0001, |
| "loss": 1.9436, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5705920624593364, |
| "grad_norm": 0.44592785835266113, |
| "learning_rate": 0.0001, |
| "loss": 2.8648, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5712426805465192, |
| "grad_norm": 0.221640944480896, |
| "learning_rate": 0.0001, |
| "loss": 2.1613, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.571893298633702, |
| "grad_norm": 0.22345726191997528, |
| "learning_rate": 0.0001, |
| "loss": 2.076, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5725439167208849, |
| "grad_norm": 0.20094214379787445, |
| "learning_rate": 0.0001, |
| "loss": 2.0474, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5731945348080677, |
| "grad_norm": 0.1997043937444687, |
| "learning_rate": 0.0001, |
| "loss": 1.9812, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5738451528952505, |
| "grad_norm": 0.3758605420589447, |
| "learning_rate": 0.0001, |
| "loss": 2.8357, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5744957709824333, |
| "grad_norm": 0.2940578758716583, |
| "learning_rate": 0.0001, |
| "loss": 2.4955, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5751463890696161, |
| "grad_norm": 0.2434762865304947, |
| "learning_rate": 0.0001, |
| "loss": 2.0011, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5757970071567989, |
| "grad_norm": 0.24335308372974396, |
| "learning_rate": 0.0001, |
| "loss": 2.5458, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.5764476252439817, |
| "grad_norm": 0.2063351422548294, |
| "learning_rate": 0.0001, |
| "loss": 1.9801, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5770982433311646, |
| "grad_norm": 0.35102301836013794, |
| "learning_rate": 0.0001, |
| "loss": 2.5647, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5777488614183475, |
| "grad_norm": 0.22332875430583954, |
| "learning_rate": 0.0001, |
| "loss": 2.0542, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5783994795055303, |
| "grad_norm": 0.2073124796152115, |
| "learning_rate": 0.0001, |
| "loss": 1.9348, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.5790500975927131, |
| "grad_norm": 0.21079733967781067, |
| "learning_rate": 0.0001, |
| "loss": 1.9829, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5797007156798959, |
| "grad_norm": 0.2842913866043091, |
| "learning_rate": 0.0001, |
| "loss": 2.7215, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5803513337670787, |
| "grad_norm": 0.2807595133781433, |
| "learning_rate": 0.0001, |
| "loss": 2.1827, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5810019518542615, |
| "grad_norm": 0.24955599009990692, |
| "learning_rate": 0.0001, |
| "loss": 2.6246, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5816525699414443, |
| "grad_norm": 0.23281241953372955, |
| "learning_rate": 0.0001, |
| "loss": 2.3944, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5823031880286272, |
| "grad_norm": 0.2617682218551636, |
| "learning_rate": 0.0001, |
| "loss": 2.6147, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.58295380611581, |
| "grad_norm": 0.1915360391139984, |
| "learning_rate": 0.0001, |
| "loss": 2.0095, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5836044242029929, |
| "grad_norm": 0.20270249247550964, |
| "learning_rate": 0.0001, |
| "loss": 1.8983, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5842550422901757, |
| "grad_norm": 0.21804624795913696, |
| "learning_rate": 0.0001, |
| "loss": 2.0425, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5849056603773585, |
| "grad_norm": 0.25326576828956604, |
| "learning_rate": 0.0001, |
| "loss": 2.4875, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.5855562784645413, |
| "grad_norm": 0.21714434027671814, |
| "learning_rate": 0.0001, |
| "loss": 2.269, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5862068965517241, |
| "grad_norm": 0.22771766781806946, |
| "learning_rate": 0.0001, |
| "loss": 2.3039, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.586857514638907, |
| "grad_norm": 0.3638748824596405, |
| "learning_rate": 0.0001, |
| "loss": 2.7448, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.5875081327260898, |
| "grad_norm": 0.20194686949253082, |
| "learning_rate": 0.0001, |
| "loss": 2.0141, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5881587508132726, |
| "grad_norm": 0.187494158744812, |
| "learning_rate": 0.0001, |
| "loss": 2.1188, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5888093689004554, |
| "grad_norm": 0.23371635377407074, |
| "learning_rate": 0.0001, |
| "loss": 2.6014, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5894599869876382, |
| "grad_norm": 0.2642146050930023, |
| "learning_rate": 0.0001, |
| "loss": 2.2053, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.590110605074821, |
| "grad_norm": 0.20045514404773712, |
| "learning_rate": 0.0001, |
| "loss": 2.1828, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5907612231620039, |
| "grad_norm": 0.22904321551322937, |
| "learning_rate": 0.0001, |
| "loss": 2.3128, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5914118412491868, |
| "grad_norm": 0.36857542395591736, |
| "learning_rate": 0.0001, |
| "loss": 3.3891, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5920624593363696, |
| "grad_norm": 0.3417764902114868, |
| "learning_rate": 0.0001, |
| "loss": 2.6737, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5927130774235524, |
| "grad_norm": 0.46861669421195984, |
| "learning_rate": 0.0001, |
| "loss": 2.5329, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5933636955107352, |
| "grad_norm": 0.32909440994262695, |
| "learning_rate": 0.0001, |
| "loss": 2.4894, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.594014313597918, |
| "grad_norm": 0.2176060974597931, |
| "learning_rate": 0.0001, |
| "loss": 1.9696, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5946649316851008, |
| "grad_norm": 0.27317941188812256, |
| "learning_rate": 0.0001, |
| "loss": 2.2179, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5953155497722836, |
| "grad_norm": 0.267123281955719, |
| "learning_rate": 0.0001, |
| "loss": 2.5464, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5959661678594665, |
| "grad_norm": 0.320402055978775, |
| "learning_rate": 0.0001, |
| "loss": 2.5021, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5966167859466494, |
| "grad_norm": 0.20610998570919037, |
| "learning_rate": 0.0001, |
| "loss": 2.0586, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5972674040338322, |
| "grad_norm": 0.2108345478773117, |
| "learning_rate": 0.0001, |
| "loss": 2.3278, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.597918022121015, |
| "grad_norm": 0.18368126451969147, |
| "learning_rate": 0.0001, |
| "loss": 2.1026, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5985686402081978, |
| "grad_norm": 0.20730890333652496, |
| "learning_rate": 0.0001, |
| "loss": 2.1936, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5992192582953806, |
| "grad_norm": 0.2921161651611328, |
| "learning_rate": 0.0001, |
| "loss": 2.5618, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5998698763825634, |
| "grad_norm": 0.23977220058441162, |
| "learning_rate": 0.0001, |
| "loss": 2.533, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.6005204944697463, |
| "grad_norm": 0.25839105248451233, |
| "learning_rate": 0.0001, |
| "loss": 2.7033, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.6011711125569291, |
| "grad_norm": 0.214335098862648, |
| "learning_rate": 0.0001, |
| "loss": 1.9153, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.6018217306441119, |
| "grad_norm": 0.19577006995677948, |
| "learning_rate": 0.0001, |
| "loss": 1.8612, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.6024723487312947, |
| "grad_norm": 0.22480078041553497, |
| "learning_rate": 0.0001, |
| "loss": 2.2383, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.6031229668184775, |
| "grad_norm": 0.2090427577495575, |
| "learning_rate": 0.0001, |
| "loss": 1.9532, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.6037735849056604, |
| "grad_norm": 0.21045666933059692, |
| "learning_rate": 0.0001, |
| "loss": 2.1285, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.6044242029928432, |
| "grad_norm": 0.2302238792181015, |
| "learning_rate": 0.0001, |
| "loss": 2.5368, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.605074821080026, |
| "grad_norm": 0.22230245172977448, |
| "learning_rate": 0.0001, |
| "loss": 2.0551, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6057254391672089, |
| "grad_norm": 0.2619292140007019, |
| "learning_rate": 0.0001, |
| "loss": 2.5149, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.6063760572543917, |
| "grad_norm": 0.20247308909893036, |
| "learning_rate": 0.0001, |
| "loss": 2.0032, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.6070266753415745, |
| "grad_norm": 0.19772449135780334, |
| "learning_rate": 0.0001, |
| "loss": 1.9627, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.6076772934287573, |
| "grad_norm": 0.1917680948972702, |
| "learning_rate": 0.0001, |
| "loss": 1.9659, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.6083279115159401, |
| "grad_norm": 0.3457018733024597, |
| "learning_rate": 0.0001, |
| "loss": 2.4537, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.6089785296031229, |
| "grad_norm": 0.2027028501033783, |
| "learning_rate": 0.0001, |
| "loss": 2.1681, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.6096291476903057, |
| "grad_norm": 0.24525637924671173, |
| "learning_rate": 0.0001, |
| "loss": 2.0816, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.6102797657774887, |
| "grad_norm": 0.2690584659576416, |
| "learning_rate": 0.0001, |
| "loss": 2.7011, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.6109303838646715, |
| "grad_norm": 0.20961976051330566, |
| "learning_rate": 0.0001, |
| "loss": 2.576, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.6115810019518543, |
| "grad_norm": 0.21827319264411926, |
| "learning_rate": 0.0001, |
| "loss": 2.2605, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6122316200390371, |
| "grad_norm": 0.20448362827301025, |
| "learning_rate": 0.0001, |
| "loss": 1.9963, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.6128822381262199, |
| "grad_norm": 0.2513864040374756, |
| "learning_rate": 0.0001, |
| "loss": 2.4111, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.6135328562134027, |
| "grad_norm": 0.28347763419151306, |
| "learning_rate": 0.0001, |
| "loss": 2.3459, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.6141834743005855, |
| "grad_norm": 0.20679716765880585, |
| "learning_rate": 0.0001, |
| "loss": 1.9423, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.6148340923877684, |
| "grad_norm": 0.20072445273399353, |
| "learning_rate": 0.0001, |
| "loss": 2.2, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.6154847104749512, |
| "grad_norm": 0.2190425843000412, |
| "learning_rate": 0.0001, |
| "loss": 2.358, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.616135328562134, |
| "grad_norm": 0.2672726511955261, |
| "learning_rate": 0.0001, |
| "loss": 2.5034, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.6167859466493169, |
| "grad_norm": 0.20329232513904572, |
| "learning_rate": 0.0001, |
| "loss": 2.2972, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.6174365647364997, |
| "grad_norm": 0.21593444049358368, |
| "learning_rate": 0.0001, |
| "loss": 2.8221, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.6180871828236825, |
| "grad_norm": 0.22062361240386963, |
| "learning_rate": 0.0001, |
| "loss": 2.2051, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6187378009108653, |
| "grad_norm": 0.20640413463115692, |
| "learning_rate": 0.0001, |
| "loss": 2.1973, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.6193884189980482, |
| "grad_norm": 0.18919388949871063, |
| "learning_rate": 0.0001, |
| "loss": 2.1166, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.620039037085231, |
| "grad_norm": 0.18566597998142242, |
| "learning_rate": 0.0001, |
| "loss": 1.9342, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.6206896551724138, |
| "grad_norm": 0.3724953234195709, |
| "learning_rate": 0.0001, |
| "loss": 3.0303, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.6213402732595966, |
| "grad_norm": 0.24559584259986877, |
| "learning_rate": 0.0001, |
| "loss": 2.387, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.6219908913467794, |
| "grad_norm": 0.20384235680103302, |
| "learning_rate": 0.0001, |
| "loss": 2.1224, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.6226415094339622, |
| "grad_norm": 0.3225831687450409, |
| "learning_rate": 0.0001, |
| "loss": 2.4856, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.623292127521145, |
| "grad_norm": 0.21676267683506012, |
| "learning_rate": 0.0001, |
| "loss": 2.3457, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.623942745608328, |
| "grad_norm": 0.21707187592983246, |
| "learning_rate": 0.0001, |
| "loss": 2.3985, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.6245933636955108, |
| "grad_norm": 0.311277836561203, |
| "learning_rate": 0.0001, |
| "loss": 2.3087, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6252439817826936, |
| "grad_norm": 0.18904085457324982, |
| "learning_rate": 0.0001, |
| "loss": 1.9421, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.6258945998698764, |
| "grad_norm": 0.39046210050582886, |
| "learning_rate": 0.0001, |
| "loss": 2.7524, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.6265452179570592, |
| "grad_norm": 0.18455897271633148, |
| "learning_rate": 0.0001, |
| "loss": 1.7536, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.627195836044242, |
| "grad_norm": 0.1874053180217743, |
| "learning_rate": 0.0001, |
| "loss": 2.0853, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.6278464541314248, |
| "grad_norm": 0.24766068160533905, |
| "learning_rate": 0.0001, |
| "loss": 2.8099, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.6284970722186076, |
| "grad_norm": 0.20977729558944702, |
| "learning_rate": 0.0001, |
| "loss": 2.0339, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.6291476903057905, |
| "grad_norm": 0.2659202516078949, |
| "learning_rate": 0.0001, |
| "loss": 2.1282, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.6297983083929733, |
| "grad_norm": 0.23760046064853668, |
| "learning_rate": 0.0001, |
| "loss": 2.4225, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.6304489264801562, |
| "grad_norm": 0.1884511113166809, |
| "learning_rate": 0.0001, |
| "loss": 1.972, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.631099544567339, |
| "grad_norm": 0.2816404402256012, |
| "learning_rate": 0.0001, |
| "loss": 2.6831, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6317501626545218, |
| "grad_norm": 0.1874386966228485, |
| "learning_rate": 0.0001, |
| "loss": 2.0042, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.6324007807417046, |
| "grad_norm": 0.21592558920383453, |
| "learning_rate": 0.0001, |
| "loss": 2.338, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.6330513988288874, |
| "grad_norm": 0.22190915048122406, |
| "learning_rate": 0.0001, |
| "loss": 2.23, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.6337020169160703, |
| "grad_norm": 0.23270365595817566, |
| "learning_rate": 0.0001, |
| "loss": 2.1849, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.6343526350032531, |
| "grad_norm": 0.20524165034294128, |
| "learning_rate": 0.0001, |
| "loss": 1.8509, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.6350032530904359, |
| "grad_norm": 0.27826493978500366, |
| "learning_rate": 0.0001, |
| "loss": 2.6736, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.6356538711776187, |
| "grad_norm": 0.19887575507164001, |
| "learning_rate": 0.0001, |
| "loss": 2.1369, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.6363044892648015, |
| "grad_norm": 0.3760605752468109, |
| "learning_rate": 0.0001, |
| "loss": 2.7617, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.6369551073519844, |
| "grad_norm": 0.2116486132144928, |
| "learning_rate": 0.0001, |
| "loss": 2.1353, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.6376057254391672, |
| "grad_norm": 0.20685400068759918, |
| "learning_rate": 0.0001, |
| "loss": 2.2221, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6382563435263501, |
| "grad_norm": 0.25631460547447205, |
| "learning_rate": 0.0001, |
| "loss": 2.2755, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.6389069616135329, |
| "grad_norm": 0.2831932604312897, |
| "learning_rate": 0.0001, |
| "loss": 2.2544, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.6395575797007157, |
| "grad_norm": 0.19301310181617737, |
| "learning_rate": 0.0001, |
| "loss": 2.1736, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.6402081977878985, |
| "grad_norm": 0.18511143326759338, |
| "learning_rate": 0.0001, |
| "loss": 1.8847, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.6408588158750813, |
| "grad_norm": 0.23753167688846588, |
| "learning_rate": 0.0001, |
| "loss": 2.131, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.6415094339622641, |
| "grad_norm": 0.24566152691841125, |
| "learning_rate": 0.0001, |
| "loss": 2.2071, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6421600520494469, |
| "grad_norm": 0.21481812000274658, |
| "learning_rate": 0.0001, |
| "loss": 2.0292, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.6428106701366298, |
| "grad_norm": 0.3042278587818146, |
| "learning_rate": 0.0001, |
| "loss": 2.6444, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6434612882238127, |
| "grad_norm": 0.30741778016090393, |
| "learning_rate": 0.0001, |
| "loss": 2.5146, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.6441119063109955, |
| "grad_norm": 0.40835896134376526, |
| "learning_rate": 0.0001, |
| "loss": 2.9053, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6447625243981783, |
| "grad_norm": 0.21121574938297272, |
| "learning_rate": 0.0001, |
| "loss": 2.4513, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.6454131424853611, |
| "grad_norm": 0.2634606659412384, |
| "learning_rate": 0.0001, |
| "loss": 2.3141, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6460637605725439, |
| "grad_norm": 0.2463708072900772, |
| "learning_rate": 0.0001, |
| "loss": 2.4421, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.6467143786597267, |
| "grad_norm": 0.25485244393348694, |
| "learning_rate": 0.0001, |
| "loss": 2.3788, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6473649967469096, |
| "grad_norm": 0.20773370563983917, |
| "learning_rate": 0.0001, |
| "loss": 1.9861, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6480156148340924, |
| "grad_norm": 0.20728078484535217, |
| "learning_rate": 0.0001, |
| "loss": 2.3341, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6486662329212752, |
| "grad_norm": 0.26925981044769287, |
| "learning_rate": 0.0001, |
| "loss": 2.9172, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.649316851008458, |
| "grad_norm": 0.21403877437114716, |
| "learning_rate": 0.0001, |
| "loss": 2.1318, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6499674690956408, |
| "grad_norm": 0.2597064673900604, |
| "learning_rate": 0.0001, |
| "loss": 2.4316, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.6506180871828237, |
| "grad_norm": 0.26858747005462646, |
| "learning_rate": 0.0001, |
| "loss": 2.2716, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6512687052700065, |
| "grad_norm": 0.5603036880493164, |
| "learning_rate": 0.0001, |
| "loss": 3.1137, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.6519193233571894, |
| "grad_norm": 0.2423018366098404, |
| "learning_rate": 0.0001, |
| "loss": 2.2346, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.6525699414443722, |
| "grad_norm": 0.22914621233940125, |
| "learning_rate": 0.0001, |
| "loss": 2.2852, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.653220559531555, |
| "grad_norm": 0.22781658172607422, |
| "learning_rate": 0.0001, |
| "loss": 2.1961, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.6538711776187378, |
| "grad_norm": 0.2614092528820038, |
| "learning_rate": 0.0001, |
| "loss": 2.0631, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.6545217957059206, |
| "grad_norm": 0.23658867180347443, |
| "learning_rate": 0.0001, |
| "loss": 2.0379, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.6551724137931034, |
| "grad_norm": 0.20862211287021637, |
| "learning_rate": 0.0001, |
| "loss": 2.2786, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.6558230318802862, |
| "grad_norm": 0.2251960188150406, |
| "learning_rate": 0.0001, |
| "loss": 2.06, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.656473649967469, |
| "grad_norm": 0.2885074317455292, |
| "learning_rate": 0.0001, |
| "loss": 2.2583, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.657124268054652, |
| "grad_norm": 0.20309656858444214, |
| "learning_rate": 0.0001, |
| "loss": 2.1557, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6577748861418348, |
| "grad_norm": 0.20139531791210175, |
| "learning_rate": 0.0001, |
| "loss": 2.3419, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.6584255042290176, |
| "grad_norm": 0.2853332757949829, |
| "learning_rate": 0.0001, |
| "loss": 2.1415, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.6590761223162004, |
| "grad_norm": 0.2907620966434479, |
| "learning_rate": 0.0001, |
| "loss": 2.4452, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.6597267404033832, |
| "grad_norm": 0.18982461094856262, |
| "learning_rate": 0.0001, |
| "loss": 2.0215, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.660377358490566, |
| "grad_norm": 0.20890061557292938, |
| "learning_rate": 0.0001, |
| "loss": 2.0383, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.6610279765777488, |
| "grad_norm": 0.21294118463993073, |
| "learning_rate": 0.0001, |
| "loss": 1.7722, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6616785946649317, |
| "grad_norm": 0.22494040429592133, |
| "learning_rate": 0.0001, |
| "loss": 2.034, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.6623292127521145, |
| "grad_norm": 0.25089555978775024, |
| "learning_rate": 0.0001, |
| "loss": 2.3322, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.6629798308392973, |
| "grad_norm": 0.18898023664951324, |
| "learning_rate": 0.0001, |
| "loss": 1.9914, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.6636304489264802, |
| "grad_norm": 0.221091166138649, |
| "learning_rate": 0.0001, |
| "loss": 2.1613, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.664281067013663, |
| "grad_norm": 0.22317297756671906, |
| "learning_rate": 0.0001, |
| "loss": 2.3438, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.6649316851008458, |
| "grad_norm": 0.18826670944690704, |
| "learning_rate": 0.0001, |
| "loss": 2.0218, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.6655823031880286, |
| "grad_norm": 0.22612391412258148, |
| "learning_rate": 0.0001, |
| "loss": 2.2931, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.6662329212752115, |
| "grad_norm": 0.3006114959716797, |
| "learning_rate": 0.0001, |
| "loss": 2.4949, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6668835393623943, |
| "grad_norm": 0.1835569143295288, |
| "learning_rate": 0.0001, |
| "loss": 1.9396, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.6675341574495771, |
| "grad_norm": 0.19352416694164276, |
| "learning_rate": 0.0001, |
| "loss": 2.0038, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.6681847755367599, |
| "grad_norm": 0.2259102463722229, |
| "learning_rate": 0.0001, |
| "loss": 2.1818, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.6688353936239427, |
| "grad_norm": 0.20237034559249878, |
| "learning_rate": 0.0001, |
| "loss": 2.3196, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.6694860117111255, |
| "grad_norm": 0.1844060719013214, |
| "learning_rate": 0.0001, |
| "loss": 2.1389, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.6701366297983083, |
| "grad_norm": 0.21057841181755066, |
| "learning_rate": 0.0001, |
| "loss": 2.0058, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6707872478854913, |
| "grad_norm": 0.20054426789283752, |
| "learning_rate": 0.0001, |
| "loss": 2.2874, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.6714378659726741, |
| "grad_norm": 0.2507307529449463, |
| "learning_rate": 0.0001, |
| "loss": 2.4245, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.6720884840598569, |
| "grad_norm": 0.21066251397132874, |
| "learning_rate": 0.0001, |
| "loss": 2.1688, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.6727391021470397, |
| "grad_norm": 0.22210632264614105, |
| "learning_rate": 0.0001, |
| "loss": 2.1985, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.6733897202342225, |
| "grad_norm": 0.21617744863033295, |
| "learning_rate": 0.0001, |
| "loss": 2.5918, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.6740403383214053, |
| "grad_norm": 0.46473971009254456, |
| "learning_rate": 0.0001, |
| "loss": 2.9341, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.6746909564085881, |
| "grad_norm": 0.20464558899402618, |
| "learning_rate": 0.0001, |
| "loss": 2.1654, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.675341574495771, |
| "grad_norm": 0.212956503033638, |
| "learning_rate": 0.0001, |
| "loss": 2.1959, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.6759921925829538, |
| "grad_norm": 0.2572340667247772, |
| "learning_rate": 0.0001, |
| "loss": 2.4918, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.6766428106701367, |
| "grad_norm": 0.3264685273170471, |
| "learning_rate": 0.0001, |
| "loss": 2.8708, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6772934287573195, |
| "grad_norm": 0.22119931876659393, |
| "learning_rate": 0.0001, |
| "loss": 2.2222, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.6779440468445023, |
| "grad_norm": 0.24374569952487946, |
| "learning_rate": 0.0001, |
| "loss": 2.2457, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.6785946649316851, |
| "grad_norm": 0.2548108696937561, |
| "learning_rate": 0.0001, |
| "loss": 2.485, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.6792452830188679, |
| "grad_norm": 0.20976418256759644, |
| "learning_rate": 0.0001, |
| "loss": 2.3068, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.6798959011060507, |
| "grad_norm": 0.25135618448257446, |
| "learning_rate": 0.0001, |
| "loss": 2.1083, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.6805465191932336, |
| "grad_norm": 0.2677728831768036, |
| "learning_rate": 0.0001, |
| "loss": 2.4257, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.6811971372804164, |
| "grad_norm": 0.20250125229358673, |
| "learning_rate": 0.0001, |
| "loss": 2.0643, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.6818477553675992, |
| "grad_norm": 0.20850299298763275, |
| "learning_rate": 0.0001, |
| "loss": 2.0383, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.682498373454782, |
| "grad_norm": 0.21116970479488373, |
| "learning_rate": 0.0001, |
| "loss": 2.0259, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.6831489915419648, |
| "grad_norm": 0.2572707235813141, |
| "learning_rate": 0.0001, |
| "loss": 2.1982, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6837996096291477, |
| "grad_norm": 0.2010831981897354, |
| "learning_rate": 0.0001, |
| "loss": 2.0687, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.6844502277163305, |
| "grad_norm": 0.23995356261730194, |
| "learning_rate": 0.0001, |
| "loss": 2.1938, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.6851008458035134, |
| "grad_norm": 0.21428103744983673, |
| "learning_rate": 0.0001, |
| "loss": 2.2514, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.6857514638906962, |
| "grad_norm": 0.21370433270931244, |
| "learning_rate": 0.0001, |
| "loss": 2.2523, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.686402081977879, |
| "grad_norm": 0.2131800800561905, |
| "learning_rate": 0.0001, |
| "loss": 2.2413, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.6870527000650618, |
| "grad_norm": 0.20007681846618652, |
| "learning_rate": 0.0001, |
| "loss": 2.176, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6877033181522446, |
| "grad_norm": 0.2108153998851776, |
| "learning_rate": 0.0001, |
| "loss": 2.1081, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.6883539362394274, |
| "grad_norm": 0.19952858984470367, |
| "learning_rate": 0.0001, |
| "loss": 2.0249, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.6890045543266102, |
| "grad_norm": 0.20590882003307343, |
| "learning_rate": 0.0001, |
| "loss": 2.1949, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 0.2126530408859253, |
| "learning_rate": 0.0001, |
| "loss": 2.2726, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.690305790500976, |
| "grad_norm": 0.30162468552589417, |
| "learning_rate": 0.0001, |
| "loss": 2.5032, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.6909564085881588, |
| "grad_norm": 0.24452462792396545, |
| "learning_rate": 0.0001, |
| "loss": 2.3021, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.6916070266753416, |
| "grad_norm": 0.17819760739803314, |
| "learning_rate": 0.0001, |
| "loss": 1.9628, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.6922576447625244, |
| "grad_norm": 0.17437471449375153, |
| "learning_rate": 0.0001, |
| "loss": 1.879, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6929082628497072, |
| "grad_norm": 0.3003963232040405, |
| "learning_rate": 0.0001, |
| "loss": 2.4695, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.69355888093689, |
| "grad_norm": 0.2007562667131424, |
| "learning_rate": 0.0001, |
| "loss": 1.9754, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.6942094990240729, |
| "grad_norm": 0.21425336599349976, |
| "learning_rate": 0.0001, |
| "loss": 2.1767, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.6948601171112557, |
| "grad_norm": 0.20287302136421204, |
| "learning_rate": 0.0001, |
| "loss": 1.9933, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.6955107351984385, |
| "grad_norm": 0.2762700021266937, |
| "learning_rate": 0.0001, |
| "loss": 2.1079, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.6961613532856213, |
| "grad_norm": 0.18358288705348969, |
| "learning_rate": 0.0001, |
| "loss": 1.9445, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6968119713728042, |
| "grad_norm": 0.21157526969909668, |
| "learning_rate": 0.0001, |
| "loss": 2.169, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.697462589459987, |
| "grad_norm": 0.1847715675830841, |
| "learning_rate": 0.0001, |
| "loss": 2.0757, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6981132075471698, |
| "grad_norm": 0.1923181712627411, |
| "learning_rate": 0.0001, |
| "loss": 2.2365, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.6987638256343527, |
| "grad_norm": 0.26491835713386536, |
| "learning_rate": 0.0001, |
| "loss": 2.4613, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6994144437215355, |
| "grad_norm": 0.17674419283866882, |
| "learning_rate": 0.0001, |
| "loss": 1.9706, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.7000650618087183, |
| "grad_norm": 0.19894379377365112, |
| "learning_rate": 0.0001, |
| "loss": 1.9227, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.7007156798959011, |
| "grad_norm": 0.19496971368789673, |
| "learning_rate": 0.0001, |
| "loss": 2.1783, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.7013662979830839, |
| "grad_norm": 0.20685461163520813, |
| "learning_rate": 0.0001, |
| "loss": 2.1542, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.7020169160702667, |
| "grad_norm": 0.23061524331569672, |
| "learning_rate": 0.0001, |
| "loss": 2.3346, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.7026675341574495, |
| "grad_norm": 0.2044321447610855, |
| "learning_rate": 0.0001, |
| "loss": 2.0157, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7033181522446325, |
| "grad_norm": 0.18851466476917267, |
| "learning_rate": 0.0001, |
| "loss": 2.2045, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.7039687703318153, |
| "grad_norm": 0.18530018627643585, |
| "learning_rate": 0.0001, |
| "loss": 2.0695, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.7046193884189981, |
| "grad_norm": 0.23562023043632507, |
| "learning_rate": 0.0001, |
| "loss": 2.3919, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.7052700065061809, |
| "grad_norm": 0.22246116399765015, |
| "learning_rate": 0.0001, |
| "loss": 2.5821, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.7059206245933637, |
| "grad_norm": 0.2134729027748108, |
| "learning_rate": 0.0001, |
| "loss": 2.2181, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.7065712426805465, |
| "grad_norm": 0.29674917459487915, |
| "learning_rate": 0.0001, |
| "loss": 2.5069, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.7072218607677293, |
| "grad_norm": 0.2098974883556366, |
| "learning_rate": 0.0001, |
| "loss": 2.3307, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.7078724788549121, |
| "grad_norm": 0.27041876316070557, |
| "learning_rate": 0.0001, |
| "loss": 2.8081, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.708523096942095, |
| "grad_norm": 0.19734299182891846, |
| "learning_rate": 0.0001, |
| "loss": 2.0588, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.7091737150292778, |
| "grad_norm": 0.22952257096767426, |
| "learning_rate": 0.0001, |
| "loss": 2.2607, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7098243331164606, |
| "grad_norm": 0.20846691727638245, |
| "learning_rate": 0.0001, |
| "loss": 2.1657, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.7104749512036435, |
| "grad_norm": 0.19664259254932404, |
| "learning_rate": 0.0001, |
| "loss": 2.1256, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.7111255692908263, |
| "grad_norm": 0.23994791507720947, |
| "learning_rate": 0.0001, |
| "loss": 2.5377, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.7117761873780091, |
| "grad_norm": 0.22439789772033691, |
| "learning_rate": 0.0001, |
| "loss": 2.6225, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.7124268054651919, |
| "grad_norm": 0.20211316645145416, |
| "learning_rate": 0.0001, |
| "loss": 2.0582, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.7130774235523748, |
| "grad_norm": 0.23308198153972626, |
| "learning_rate": 0.0001, |
| "loss": 2.4341, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.7137280416395576, |
| "grad_norm": 0.17806245386600494, |
| "learning_rate": 0.0001, |
| "loss": 2.0211, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.7143786597267404, |
| "grad_norm": 0.20525243878364563, |
| "learning_rate": 0.0001, |
| "loss": 2.1248, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.7150292778139232, |
| "grad_norm": 0.22835716605186462, |
| "learning_rate": 0.0001, |
| "loss": 2.2993, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.715679895901106, |
| "grad_norm": 0.37078213691711426, |
| "learning_rate": 0.0001, |
| "loss": 3.1289, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7163305139882888, |
| "grad_norm": 0.22253082692623138, |
| "learning_rate": 0.0001, |
| "loss": 2.2304, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.7169811320754716, |
| "grad_norm": 0.20494401454925537, |
| "learning_rate": 0.0001, |
| "loss": 1.9473, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.7176317501626546, |
| "grad_norm": 0.22128112614154816, |
| "learning_rate": 0.0001, |
| "loss": 1.993, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.7182823682498374, |
| "grad_norm": 0.20786182582378387, |
| "learning_rate": 0.0001, |
| "loss": 2.0048, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.7189329863370202, |
| "grad_norm": 0.27697819471359253, |
| "learning_rate": 0.0001, |
| "loss": 2.372, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.719583604424203, |
| "grad_norm": 0.26237788796424866, |
| "learning_rate": 0.0001, |
| "loss": 1.9573, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.7202342225113858, |
| "grad_norm": 0.2544906437397003, |
| "learning_rate": 0.0001, |
| "loss": 2.2805, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.7208848405985686, |
| "grad_norm": 0.2175043374300003, |
| "learning_rate": 0.0001, |
| "loss": 2.3201, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.7215354586857514, |
| "grad_norm": 0.19637277722358704, |
| "learning_rate": 0.0001, |
| "loss": 1.8868, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.7221860767729343, |
| "grad_norm": 0.19888024032115936, |
| "learning_rate": 0.0001, |
| "loss": 2.0324, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7228366948601171, |
| "grad_norm": 0.20008981227874756, |
| "learning_rate": 0.0001, |
| "loss": 2.2898, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.7234873129473, |
| "grad_norm": 0.25185343623161316, |
| "learning_rate": 0.0001, |
| "loss": 2.2424, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.7241379310344828, |
| "grad_norm": 0.2434062957763672, |
| "learning_rate": 0.0001, |
| "loss": 2.2884, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.7247885491216656, |
| "grad_norm": 0.2278825044631958, |
| "learning_rate": 0.0001, |
| "loss": 2.1751, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.7254391672088484, |
| "grad_norm": 0.23180316388607025, |
| "learning_rate": 0.0001, |
| "loss": 2.6033, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.7260897852960312, |
| "grad_norm": 0.18574117124080658, |
| "learning_rate": 0.0001, |
| "loss": 2.3172, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.7267404033832141, |
| "grad_norm": 0.286155641078949, |
| "learning_rate": 0.0001, |
| "loss": 2.0482, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.7273910214703969, |
| "grad_norm": 0.1757357120513916, |
| "learning_rate": 0.0001, |
| "loss": 1.8881, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.7280416395575797, |
| "grad_norm": 0.25008201599121094, |
| "learning_rate": 0.0001, |
| "loss": 2.3797, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.7286922576447625, |
| "grad_norm": 0.29816892743110657, |
| "learning_rate": 0.0001, |
| "loss": 2.9163, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7293428757319453, |
| "grad_norm": 0.1951293647289276, |
| "learning_rate": 0.0001, |
| "loss": 2.0613, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.7299934938191281, |
| "grad_norm": 0.23593062162399292, |
| "learning_rate": 0.0001, |
| "loss": 2.2103, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.730644111906311, |
| "grad_norm": 0.18619036674499512, |
| "learning_rate": 0.0001, |
| "loss": 1.9223, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.7312947299934938, |
| "grad_norm": 0.20853224396705627, |
| "learning_rate": 0.0001, |
| "loss": 2.2651, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.7319453480806767, |
| "grad_norm": 0.27427271008491516, |
| "learning_rate": 0.0001, |
| "loss": 2.3866, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.7325959661678595, |
| "grad_norm": 0.35531318187713623, |
| "learning_rate": 0.0001, |
| "loss": 2.8333, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.7332465842550423, |
| "grad_norm": 0.21375155448913574, |
| "learning_rate": 0.0001, |
| "loss": 2.0703, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.7338972023422251, |
| "grad_norm": 0.24240247905254364, |
| "learning_rate": 0.0001, |
| "loss": 2.3032, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.7345478204294079, |
| "grad_norm": 0.2277136594057083, |
| "learning_rate": 0.0001, |
| "loss": 2.585, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.7351984385165907, |
| "grad_norm": 0.20665140450000763, |
| "learning_rate": 0.0001, |
| "loss": 2.1351, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.7358490566037735, |
| "grad_norm": 0.2534540891647339, |
| "learning_rate": 0.0001, |
| "loss": 2.5023, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.7364996746909565, |
| "grad_norm": 0.19695554673671722, |
| "learning_rate": 0.0001, |
| "loss": 1.9286, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.7371502927781393, |
| "grad_norm": 0.18500645458698273, |
| "learning_rate": 0.0001, |
| "loss": 2.0609, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.7378009108653221, |
| "grad_norm": 0.2103162556886673, |
| "learning_rate": 0.0001, |
| "loss": 2.2247, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.7384515289525049, |
| "grad_norm": 0.20303300023078918, |
| "learning_rate": 0.0001, |
| "loss": 2.1164, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.7391021470396877, |
| "grad_norm": 0.23574739694595337, |
| "learning_rate": 0.0001, |
| "loss": 2.6325, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.7397527651268705, |
| "grad_norm": 0.2764929234981537, |
| "learning_rate": 0.0001, |
| "loss": 2.3049, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.7404033832140533, |
| "grad_norm": 0.23995018005371094, |
| "learning_rate": 0.0001, |
| "loss": 2.3196, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.7410540013012362, |
| "grad_norm": 0.19074063003063202, |
| "learning_rate": 0.0001, |
| "loss": 2.1566, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.741704619388419, |
| "grad_norm": 0.18186306953430176, |
| "learning_rate": 0.0001, |
| "loss": 1.9629, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7423552374756018, |
| "grad_norm": 0.23841345310211182, |
| "learning_rate": 0.0001, |
| "loss": 2.1942, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.7430058555627846, |
| "grad_norm": 0.19697019457817078, |
| "learning_rate": 0.0001, |
| "loss": 2.0186, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.7436564736499675, |
| "grad_norm": 0.2117876410484314, |
| "learning_rate": 0.0001, |
| "loss": 2.4395, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.7443070917371503, |
| "grad_norm": 0.26921918988227844, |
| "learning_rate": 0.0001, |
| "loss": 2.4332, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.7449577098243331, |
| "grad_norm": 0.18999671936035156, |
| "learning_rate": 0.0001, |
| "loss": 2.0209, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.745608327911516, |
| "grad_norm": 0.22686484456062317, |
| "learning_rate": 0.0001, |
| "loss": 2.4369, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.7462589459986988, |
| "grad_norm": 0.22974656522274017, |
| "learning_rate": 0.0001, |
| "loss": 2.3737, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.7469095640858816, |
| "grad_norm": 0.19007977843284607, |
| "learning_rate": 0.0001, |
| "loss": 2.145, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.7475601821730644, |
| "grad_norm": 0.23000845313072205, |
| "learning_rate": 0.0001, |
| "loss": 2.0555, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.7482108002602472, |
| "grad_norm": 0.33339783549308777, |
| "learning_rate": 0.0001, |
| "loss": 2.7318, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.74886141834743, |
| "grad_norm": 0.18458595871925354, |
| "learning_rate": 0.0001, |
| "loss": 1.7868, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.7495120364346128, |
| "grad_norm": 0.2283509373664856, |
| "learning_rate": 0.0001, |
| "loss": 2.2609, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7501626545217958, |
| "grad_norm": 0.31175729632377625, |
| "learning_rate": 0.0001, |
| "loss": 2.5524, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.7508132726089786, |
| "grad_norm": 0.18617112934589386, |
| "learning_rate": 0.0001, |
| "loss": 2.2029, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.7514638906961614, |
| "grad_norm": 0.28690317273139954, |
| "learning_rate": 0.0001, |
| "loss": 2.4705, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.7521145087833442, |
| "grad_norm": 0.2267671674489975, |
| "learning_rate": 0.0001, |
| "loss": 2.1093, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.752765126870527, |
| "grad_norm": 0.21956512331962585, |
| "learning_rate": 0.0001, |
| "loss": 2.0962, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.7534157449577098, |
| "grad_norm": 0.2681393027305603, |
| "learning_rate": 0.0001, |
| "loss": 2.35, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.7540663630448926, |
| "grad_norm": 0.23306699097156525, |
| "learning_rate": 0.0001, |
| "loss": 2.4911, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 0.3148876428604126, |
| "learning_rate": 0.0001, |
| "loss": 2.8802, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7553675992192583, |
| "grad_norm": 0.2260347157716751, |
| "learning_rate": 0.0001, |
| "loss": 1.9286, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.7560182173064411, |
| "grad_norm": 0.24939195811748505, |
| "learning_rate": 0.0001, |
| "loss": 2.3544, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.756668835393624, |
| "grad_norm": 0.21007601916790009, |
| "learning_rate": 0.0001, |
| "loss": 2.0132, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.7573194534808068, |
| "grad_norm": 0.2570975720882416, |
| "learning_rate": 0.0001, |
| "loss": 1.9665, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.7579700715679896, |
| "grad_norm": 0.2818357050418854, |
| "learning_rate": 0.0001, |
| "loss": 2.2252, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.7586206896551724, |
| "grad_norm": 0.22388941049575806, |
| "learning_rate": 0.0001, |
| "loss": 2.4553, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.7592713077423552, |
| "grad_norm": 0.22799374163150787, |
| "learning_rate": 0.0001, |
| "loss": 2.4447, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.7599219258295381, |
| "grad_norm": 0.2610357105731964, |
| "learning_rate": 0.0001, |
| "loss": 2.4024, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7605725439167209, |
| "grad_norm": 0.39793217182159424, |
| "learning_rate": 0.0001, |
| "loss": 3.1529, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.7612231620039037, |
| "grad_norm": 0.19805116951465607, |
| "learning_rate": 0.0001, |
| "loss": 1.9483, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7618737800910865, |
| "grad_norm": 0.208368182182312, |
| "learning_rate": 0.0001, |
| "loss": 2.1785, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.7625243981782693, |
| "grad_norm": 0.25101637840270996, |
| "learning_rate": 0.0001, |
| "loss": 2.2517, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.7631750162654521, |
| "grad_norm": 0.27432793378829956, |
| "learning_rate": 0.0001, |
| "loss": 2.4759, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.763825634352635, |
| "grad_norm": 0.18746371567249298, |
| "learning_rate": 0.0001, |
| "loss": 2.0188, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.7644762524398179, |
| "grad_norm": 0.2882263958454132, |
| "learning_rate": 0.0001, |
| "loss": 2.2948, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.7651268705270007, |
| "grad_norm": 0.22075092792510986, |
| "learning_rate": 0.0001, |
| "loss": 2.4894, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.7657774886141835, |
| "grad_norm": 0.20792776346206665, |
| "learning_rate": 0.0001, |
| "loss": 1.8502, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.7664281067013663, |
| "grad_norm": 0.2436477392911911, |
| "learning_rate": 0.0001, |
| "loss": 2.1296, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.7670787247885491, |
| "grad_norm": 0.2839182913303375, |
| "learning_rate": 0.0001, |
| "loss": 2.8409, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.7677293428757319, |
| "grad_norm": 0.1826743334531784, |
| "learning_rate": 0.0001, |
| "loss": 1.941, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7683799609629147, |
| "grad_norm": 0.2757255434989929, |
| "learning_rate": 0.0001, |
| "loss": 2.7297, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.7690305790500976, |
| "grad_norm": 0.23313826322555542, |
| "learning_rate": 0.0001, |
| "loss": 2.8796, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.7696811971372804, |
| "grad_norm": 0.28900882601737976, |
| "learning_rate": 0.0001, |
| "loss": 2.313, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.7703318152244633, |
| "grad_norm": 0.32883039116859436, |
| "learning_rate": 0.0001, |
| "loss": 3.041, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7709824333116461, |
| "grad_norm": 0.2116912454366684, |
| "learning_rate": 0.0001, |
| "loss": 1.9891, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.7716330513988289, |
| "grad_norm": 0.2055017203092575, |
| "learning_rate": 0.0001, |
| "loss": 1.9567, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.7722836694860117, |
| "grad_norm": 0.2978801131248474, |
| "learning_rate": 0.0001, |
| "loss": 2.3322, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.7729342875731945, |
| "grad_norm": 0.21910034120082855, |
| "learning_rate": 0.0001, |
| "loss": 2.0262, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.7735849056603774, |
| "grad_norm": 0.19952894747257233, |
| "learning_rate": 0.0001, |
| "loss": 2.0621, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.7742355237475602, |
| "grad_norm": 0.20744554698467255, |
| "learning_rate": 0.0001, |
| "loss": 2.1154, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.774886141834743, |
| "grad_norm": 0.23886847496032715, |
| "learning_rate": 0.0001, |
| "loss": 2.3023, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.7755367599219258, |
| "grad_norm": 0.20722374320030212, |
| "learning_rate": 0.0001, |
| "loss": 2.2384, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.7761873780091086, |
| "grad_norm": 0.23317816853523254, |
| "learning_rate": 0.0001, |
| "loss": 2.6381, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.7768379960962914, |
| "grad_norm": 0.2527480125427246, |
| "learning_rate": 0.0001, |
| "loss": 2.1711, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.7774886141834743, |
| "grad_norm": 0.23817451298236847, |
| "learning_rate": 0.0001, |
| "loss": 2.6561, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.7781392322706572, |
| "grad_norm": 0.2609005570411682, |
| "learning_rate": 0.0001, |
| "loss": 2.5488, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.77878985035784, |
| "grad_norm": 0.19870908558368683, |
| "learning_rate": 0.0001, |
| "loss": 2.0435, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.7794404684450228, |
| "grad_norm": 0.20385386049747467, |
| "learning_rate": 0.0001, |
| "loss": 1.9711, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.7800910865322056, |
| "grad_norm": 0.20179738104343414, |
| "learning_rate": 0.0001, |
| "loss": 2.0247, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.7807417046193884, |
| "grad_norm": 0.40090981125831604, |
| "learning_rate": 0.0001, |
| "loss": 2.795, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7813923227065712, |
| "grad_norm": 0.1885748654603958, |
| "learning_rate": 0.0001, |
| "loss": 2.1588, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.782042940793754, |
| "grad_norm": 0.21952667832374573, |
| "learning_rate": 0.0001, |
| "loss": 2.0901, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.7826935588809368, |
| "grad_norm": 0.2344968616962433, |
| "learning_rate": 0.0001, |
| "loss": 1.9943, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.7833441769681198, |
| "grad_norm": 0.3153589069843292, |
| "learning_rate": 0.0001, |
| "loss": 2.59, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.7839947950553026, |
| "grad_norm": 0.1870599389076233, |
| "learning_rate": 0.0001, |
| "loss": 1.9435, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.7846454131424854, |
| "grad_norm": 0.189214825630188, |
| "learning_rate": 0.0001, |
| "loss": 2.128, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.7852960312296682, |
| "grad_norm": 0.22551633417606354, |
| "learning_rate": 0.0001, |
| "loss": 2.3913, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.785946649316851, |
| "grad_norm": 0.19963033497333527, |
| "learning_rate": 0.0001, |
| "loss": 2.1456, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.7865972674040338, |
| "grad_norm": 0.2087828814983368, |
| "learning_rate": 0.0001, |
| "loss": 2.3486, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.7872478854912166, |
| "grad_norm": 0.19814416766166687, |
| "learning_rate": 0.0001, |
| "loss": 2.0208, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7878985035783995, |
| "grad_norm": 0.20670342445373535, |
| "learning_rate": 0.0001, |
| "loss": 2.1276, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.7885491216655823, |
| "grad_norm": 0.1881658136844635, |
| "learning_rate": 0.0001, |
| "loss": 2.0502, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.7891997397527651, |
| "grad_norm": 0.2015887349843979, |
| "learning_rate": 0.0001, |
| "loss": 2.2935, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.789850357839948, |
| "grad_norm": 0.23532694578170776, |
| "learning_rate": 0.0001, |
| "loss": 2.8046, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.7905009759271308, |
| "grad_norm": 0.18583200871944427, |
| "learning_rate": 0.0001, |
| "loss": 1.7999, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.7911515940143136, |
| "grad_norm": 0.23056970536708832, |
| "learning_rate": 0.0001, |
| "loss": 2.126, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.7918022121014964, |
| "grad_norm": 0.3166569471359253, |
| "learning_rate": 0.0001, |
| "loss": 3.0332, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.7924528301886793, |
| "grad_norm": 0.273381769657135, |
| "learning_rate": 0.0001, |
| "loss": 2.2258, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.7931034482758621, |
| "grad_norm": 0.3166522979736328, |
| "learning_rate": 0.0001, |
| "loss": 2.35, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.7937540663630449, |
| "grad_norm": 0.1906355321407318, |
| "learning_rate": 0.0001, |
| "loss": 1.9739, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7944046844502277, |
| "grad_norm": 0.2339126616716385, |
| "learning_rate": 0.0001, |
| "loss": 2.3575, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.7950553025374105, |
| "grad_norm": 0.2760171592235565, |
| "learning_rate": 0.0001, |
| "loss": 2.4708, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.7957059206245933, |
| "grad_norm": 0.17487159371376038, |
| "learning_rate": 0.0001, |
| "loss": 1.7924, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.7963565387117761, |
| "grad_norm": 0.19386877119541168, |
| "learning_rate": 0.0001, |
| "loss": 2.3044, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.7970071567989591, |
| "grad_norm": 0.18056143820285797, |
| "learning_rate": 0.0001, |
| "loss": 1.9543, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.7976577748861419, |
| "grad_norm": 0.3085278868675232, |
| "learning_rate": 0.0001, |
| "loss": 2.2131, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.7983083929733247, |
| "grad_norm": 0.1960904896259308, |
| "learning_rate": 0.0001, |
| "loss": 2.0918, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.7989590110605075, |
| "grad_norm": 0.19437837600708008, |
| "learning_rate": 0.0001, |
| "loss": 2.2241, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.7996096291476903, |
| "grad_norm": 0.2129238396883011, |
| "learning_rate": 0.0001, |
| "loss": 2.1891, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.8002602472348731, |
| "grad_norm": 0.20101650059223175, |
| "learning_rate": 0.0001, |
| "loss": 2.1341, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8009108653220559, |
| "grad_norm": 0.20897014439105988, |
| "learning_rate": 0.0001, |
| "loss": 2.0937, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.8015614834092388, |
| "grad_norm": 0.2693694829940796, |
| "learning_rate": 0.0001, |
| "loss": 2.7406, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.8022121014964216, |
| "grad_norm": 0.2322738617658615, |
| "learning_rate": 0.0001, |
| "loss": 2.8483, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.8028627195836044, |
| "grad_norm": 0.21177823841571808, |
| "learning_rate": 0.0001, |
| "loss": 2.2315, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.8035133376707873, |
| "grad_norm": 0.2920454442501068, |
| "learning_rate": 0.0001, |
| "loss": 3.0264, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.8041639557579701, |
| "grad_norm": 0.2331319898366928, |
| "learning_rate": 0.0001, |
| "loss": 2.4574, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.8048145738451529, |
| "grad_norm": 0.2339990735054016, |
| "learning_rate": 0.0001, |
| "loss": 2.2752, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.8054651919323357, |
| "grad_norm": 0.22823981940746307, |
| "learning_rate": 0.0001, |
| "loss": 1.9615, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.8061158100195186, |
| "grad_norm": 0.20435038208961487, |
| "learning_rate": 0.0001, |
| "loss": 1.9989, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.8067664281067014, |
| "grad_norm": 0.32488611340522766, |
| "learning_rate": 0.0001, |
| "loss": 2.4791, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8074170461938842, |
| "grad_norm": 0.27227675914764404, |
| "learning_rate": 0.0001, |
| "loss": 2.6443, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.808067664281067, |
| "grad_norm": 0.20864960551261902, |
| "learning_rate": 0.0001, |
| "loss": 2.2324, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.8087182823682498, |
| "grad_norm": 0.22645455598831177, |
| "learning_rate": 0.0001, |
| "loss": 2.0199, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.8093689004554326, |
| "grad_norm": 0.22091244161128998, |
| "learning_rate": 0.0001, |
| "loss": 2.1145, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.8100195185426154, |
| "grad_norm": 0.20442111790180206, |
| "learning_rate": 0.0001, |
| "loss": 2.1277, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.8106701366297983, |
| "grad_norm": 0.19400720298290253, |
| "learning_rate": 0.0001, |
| "loss": 1.951, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.8113207547169812, |
| "grad_norm": 0.474490225315094, |
| "learning_rate": 0.0001, |
| "loss": 3.0206, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.811971372804164, |
| "grad_norm": 0.23634073138237, |
| "learning_rate": 0.0001, |
| "loss": 2.2556, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.8126219908913468, |
| "grad_norm": 0.23998601734638214, |
| "learning_rate": 0.0001, |
| "loss": 2.3201, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.8132726089785296, |
| "grad_norm": 0.19258932769298553, |
| "learning_rate": 0.0001, |
| "loss": 1.9719, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8139232270657124, |
| "grad_norm": 0.21039240062236786, |
| "learning_rate": 0.0001, |
| "loss": 2.3617, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.8145738451528952, |
| "grad_norm": 0.37176814675331116, |
| "learning_rate": 0.0001, |
| "loss": 2.7183, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.815224463240078, |
| "grad_norm": 0.24739331007003784, |
| "learning_rate": 0.0001, |
| "loss": 2.0098, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.8158750813272609, |
| "grad_norm": 0.32313254475593567, |
| "learning_rate": 0.0001, |
| "loss": 2.062, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.8165256994144438, |
| "grad_norm": 0.2571156322956085, |
| "learning_rate": 0.0001, |
| "loss": 2.3973, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.8171763175016266, |
| "grad_norm": 0.266369491815567, |
| "learning_rate": 0.0001, |
| "loss": 2.6019, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.8178269355888094, |
| "grad_norm": 0.3770993649959564, |
| "learning_rate": 0.0001, |
| "loss": 2.5413, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.8184775536759922, |
| "grad_norm": 0.24964609742164612, |
| "learning_rate": 0.0001, |
| "loss": 1.7407, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.819128171763175, |
| "grad_norm": 0.208835169672966, |
| "learning_rate": 0.0001, |
| "loss": 2.36, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.8197787898503578, |
| "grad_norm": 0.19789732992649078, |
| "learning_rate": 0.0001, |
| "loss": 2.0967, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8204294079375407, |
| "grad_norm": 0.4847930669784546, |
| "learning_rate": 0.0001, |
| "loss": 2.9673, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.8210800260247235, |
| "grad_norm": 0.277960866689682, |
| "learning_rate": 0.0001, |
| "loss": 2.2165, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.8217306441119063, |
| "grad_norm": 0.20278669893741608, |
| "learning_rate": 0.0001, |
| "loss": 2.5098, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.8223812621990891, |
| "grad_norm": 0.3295345604419708, |
| "learning_rate": 0.0001, |
| "loss": 2.4451, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.8230318802862719, |
| "grad_norm": 0.25482621788978577, |
| "learning_rate": 0.0001, |
| "loss": 2.3178, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.8236824983734548, |
| "grad_norm": 0.21955101191997528, |
| "learning_rate": 0.0001, |
| "loss": 2.3245, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.8243331164606376, |
| "grad_norm": 0.19811898469924927, |
| "learning_rate": 0.0001, |
| "loss": 2.1608, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.8249837345478205, |
| "grad_norm": 0.20357833802700043, |
| "learning_rate": 0.0001, |
| "loss": 2.0502, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.8256343526350033, |
| "grad_norm": 0.25111669301986694, |
| "learning_rate": 0.0001, |
| "loss": 2.9059, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.8262849707221861, |
| "grad_norm": 0.20970256626605988, |
| "learning_rate": 0.0001, |
| "loss": 2.3496, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8269355888093689, |
| "grad_norm": 0.19146494567394257, |
| "learning_rate": 0.0001, |
| "loss": 2.0773, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.8275862068965517, |
| "grad_norm": 0.2083313763141632, |
| "learning_rate": 0.0001, |
| "loss": 2.0031, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.8282368249837345, |
| "grad_norm": 0.19460196793079376, |
| "learning_rate": 0.0001, |
| "loss": 2.0411, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.8288874430709173, |
| "grad_norm": 0.1900896281003952, |
| "learning_rate": 0.0001, |
| "loss": 1.9517, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.8295380611581002, |
| "grad_norm": 0.20020513236522675, |
| "learning_rate": 0.0001, |
| "loss": 2.2062, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.8301886792452831, |
| "grad_norm": 0.21990856528282166, |
| "learning_rate": 0.0001, |
| "loss": 2.0837, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.8308392973324659, |
| "grad_norm": 0.1966349482536316, |
| "learning_rate": 0.0001, |
| "loss": 2.0407, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.8314899154196487, |
| "grad_norm": 0.19897864758968353, |
| "learning_rate": 0.0001, |
| "loss": 2.1639, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.8321405335068315, |
| "grad_norm": 0.21094024181365967, |
| "learning_rate": 0.0001, |
| "loss": 2.2158, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.8327911515940143, |
| "grad_norm": 0.1989631950855255, |
| "learning_rate": 0.0001, |
| "loss": 1.9578, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8334417696811971, |
| "grad_norm": 0.1953240931034088, |
| "learning_rate": 0.0001, |
| "loss": 2.0365, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.8340923877683799, |
| "grad_norm": 0.33914485573768616, |
| "learning_rate": 0.0001, |
| "loss": 2.3676, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.8347430058555628, |
| "grad_norm": 0.17135807871818542, |
| "learning_rate": 0.0001, |
| "loss": 1.821, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.8353936239427456, |
| "grad_norm": 0.1993912309408188, |
| "learning_rate": 0.0001, |
| "loss": 2.4103, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.8360442420299284, |
| "grad_norm": 0.21222157776355743, |
| "learning_rate": 0.0001, |
| "loss": 2.3443, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.8366948601171112, |
| "grad_norm": 0.22162573039531708, |
| "learning_rate": 0.0001, |
| "loss": 2.1757, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.8373454782042941, |
| "grad_norm": 0.22677986323833466, |
| "learning_rate": 0.0001, |
| "loss": 2.0542, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.8379960962914769, |
| "grad_norm": 0.1974060982465744, |
| "learning_rate": 0.0001, |
| "loss": 2.1686, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.8386467143786597, |
| "grad_norm": 0.30552592873573303, |
| "learning_rate": 0.0001, |
| "loss": 2.5467, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.8392973324658426, |
| "grad_norm": 0.24357165396213531, |
| "learning_rate": 0.0001, |
| "loss": 2.3276, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8399479505530254, |
| "grad_norm": 0.1960456818342209, |
| "learning_rate": 0.0001, |
| "loss": 2.0956, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.8405985686402082, |
| "grad_norm": 0.24264569580554962, |
| "learning_rate": 0.0001, |
| "loss": 2.0666, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.841249186727391, |
| "grad_norm": 0.25320202112197876, |
| "learning_rate": 0.0001, |
| "loss": 2.033, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.8418998048145738, |
| "grad_norm": 0.2313191145658493, |
| "learning_rate": 0.0001, |
| "loss": 2.0571, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.8425504229017566, |
| "grad_norm": 0.42846229672431946, |
| "learning_rate": 0.0001, |
| "loss": 2.0875, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.8432010409889394, |
| "grad_norm": 0.19277000427246094, |
| "learning_rate": 0.0001, |
| "loss": 1.9303, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.8438516590761224, |
| "grad_norm": 0.1947111338376999, |
| "learning_rate": 0.0001, |
| "loss": 1.9482, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.8445022771633052, |
| "grad_norm": 0.30196627974510193, |
| "learning_rate": 0.0001, |
| "loss": 2.3238, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.845152895250488, |
| "grad_norm": 0.21137486398220062, |
| "learning_rate": 0.0001, |
| "loss": 2.1962, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.8458035133376708, |
| "grad_norm": 0.2568284571170807, |
| "learning_rate": 0.0001, |
| "loss": 2.3231, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8464541314248536, |
| "grad_norm": 0.2092464715242386, |
| "learning_rate": 0.0001, |
| "loss": 1.8074, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.8471047495120364, |
| "grad_norm": 0.2112191617488861, |
| "learning_rate": 0.0001, |
| "loss": 2.169, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.8477553675992192, |
| "grad_norm": 0.17425194382667542, |
| "learning_rate": 0.0001, |
| "loss": 1.8025, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.8484059856864021, |
| "grad_norm": 0.20808906853199005, |
| "learning_rate": 0.0001, |
| "loss": 2.0869, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.8490566037735849, |
| "grad_norm": 0.25200703740119934, |
| "learning_rate": 0.0001, |
| "loss": 2.4963, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.8497072218607677, |
| "grad_norm": 0.23948469758033752, |
| "learning_rate": 0.0001, |
| "loss": 2.3028, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.8503578399479506, |
| "grad_norm": 0.185250923037529, |
| "learning_rate": 0.0001, |
| "loss": 1.7409, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.8510084580351334, |
| "grad_norm": 0.18948182463645935, |
| "learning_rate": 0.0001, |
| "loss": 1.8922, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.8516590761223162, |
| "grad_norm": 0.2027200311422348, |
| "learning_rate": 0.0001, |
| "loss": 2.0922, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.852309694209499, |
| "grad_norm": 0.28325602412223816, |
| "learning_rate": 0.0001, |
| "loss": 2.0428, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8529603122966819, |
| "grad_norm": 0.1829916387796402, |
| "learning_rate": 0.0001, |
| "loss": 1.9518, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.8536109303838647, |
| "grad_norm": 0.1982378512620926, |
| "learning_rate": 0.0001, |
| "loss": 2.0209, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.8542615484710475, |
| "grad_norm": 0.18915079534053802, |
| "learning_rate": 0.0001, |
| "loss": 1.9291, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.8549121665582303, |
| "grad_norm": 0.1832190752029419, |
| "learning_rate": 0.0001, |
| "loss": 1.9818, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.8555627846454131, |
| "grad_norm": 0.2646237313747406, |
| "learning_rate": 0.0001, |
| "loss": 2.4418, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.8562134027325959, |
| "grad_norm": 0.2831929326057434, |
| "learning_rate": 0.0001, |
| "loss": 2.8355, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.8568640208197787, |
| "grad_norm": 0.2711881995201111, |
| "learning_rate": 0.0001, |
| "loss": 2.1963, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.8575146389069617, |
| "grad_norm": 0.25786513090133667, |
| "learning_rate": 0.0001, |
| "loss": 3.002, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.8581652569941445, |
| "grad_norm": 0.26838061213493347, |
| "learning_rate": 0.0001, |
| "loss": 3.1155, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.8588158750813273, |
| "grad_norm": 0.2220889776945114, |
| "learning_rate": 0.0001, |
| "loss": 2.0535, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8594664931685101, |
| "grad_norm": 0.2008647471666336, |
| "learning_rate": 0.0001, |
| "loss": 2.0515, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.8601171112556929, |
| "grad_norm": 0.22017711400985718, |
| "learning_rate": 0.0001, |
| "loss": 2.289, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.8607677293428757, |
| "grad_norm": 0.19674621522426605, |
| "learning_rate": 0.0001, |
| "loss": 1.9414, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.8614183474300585, |
| "grad_norm": 0.191552072763443, |
| "learning_rate": 0.0001, |
| "loss": 1.9939, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.8620689655172413, |
| "grad_norm": 0.20212143659591675, |
| "learning_rate": 0.0001, |
| "loss": 1.8938, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.8627195836044242, |
| "grad_norm": 0.22502020001411438, |
| "learning_rate": 0.0001, |
| "loss": 2.13, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.863370201691607, |
| "grad_norm": 0.2504305839538574, |
| "learning_rate": 0.0001, |
| "loss": 2.7666, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.8640208197787899, |
| "grad_norm": 0.19481819868087769, |
| "learning_rate": 0.0001, |
| "loss": 2.1141, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.8646714378659727, |
| "grad_norm": 0.21994583308696747, |
| "learning_rate": 0.0001, |
| "loss": 2.7615, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.8653220559531555, |
| "grad_norm": 0.19281654059886932, |
| "learning_rate": 0.0001, |
| "loss": 2.0864, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.8659726740403383, |
| "grad_norm": 0.20329228043556213, |
| "learning_rate": 0.0001, |
| "loss": 2.1002, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.8666232921275211, |
| "grad_norm": 0.19484490156173706, |
| "learning_rate": 0.0001, |
| "loss": 2.0519, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.867273910214704, |
| "grad_norm": 0.1867295801639557, |
| "learning_rate": 0.0001, |
| "loss": 1.9208, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.8679245283018868, |
| "grad_norm": 0.30128392577171326, |
| "learning_rate": 0.0001, |
| "loss": 2.7527, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.8685751463890696, |
| "grad_norm": 0.22880543768405914, |
| "learning_rate": 0.0001, |
| "loss": 2.449, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.8692257644762524, |
| "grad_norm": 0.23333753645420074, |
| "learning_rate": 0.0001, |
| "loss": 2.0425, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.8698763825634352, |
| "grad_norm": 0.34176793694496155, |
| "learning_rate": 0.0001, |
| "loss": 2.8857, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.870527000650618, |
| "grad_norm": 0.19983690977096558, |
| "learning_rate": 0.0001, |
| "loss": 2.2466, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.8711776187378009, |
| "grad_norm": 0.21883231401443481, |
| "learning_rate": 0.0001, |
| "loss": 2.1262, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.8718282368249838, |
| "grad_norm": 0.19143971800804138, |
| "learning_rate": 0.0001, |
| "loss": 2.0119, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8724788549121666, |
| "grad_norm": 0.25845617055892944, |
| "learning_rate": 0.0001, |
| "loss": 2.6315, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.8731294729993494, |
| "grad_norm": 0.1914021521806717, |
| "learning_rate": 0.0001, |
| "loss": 1.8571, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.8737800910865322, |
| "grad_norm": 0.2742185592651367, |
| "learning_rate": 0.0001, |
| "loss": 2.1467, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.874430709173715, |
| "grad_norm": 0.19927754998207092, |
| "learning_rate": 0.0001, |
| "loss": 1.9877, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.8750813272608978, |
| "grad_norm": 0.2340778261423111, |
| "learning_rate": 0.0001, |
| "loss": 2.2476, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.8757319453480806, |
| "grad_norm": 0.2931828498840332, |
| "learning_rate": 0.0001, |
| "loss": 2.4643, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.8763825634352636, |
| "grad_norm": 0.18637506663799286, |
| "learning_rate": 0.0001, |
| "loss": 1.7933, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.8770331815224464, |
| "grad_norm": 0.1898747682571411, |
| "learning_rate": 0.0001, |
| "loss": 1.9781, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.8776837996096292, |
| "grad_norm": 0.229608952999115, |
| "learning_rate": 0.0001, |
| "loss": 2.1293, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.878334417696812, |
| "grad_norm": 0.31374409794807434, |
| "learning_rate": 0.0001, |
| "loss": 2.5436, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8789850357839948, |
| "grad_norm": 0.22544679045677185, |
| "learning_rate": 0.0001, |
| "loss": 2.0882, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.8796356538711776, |
| "grad_norm": 0.2415180653333664, |
| "learning_rate": 0.0001, |
| "loss": 2.3193, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.8802862719583604, |
| "grad_norm": 0.28355568647384644, |
| "learning_rate": 0.0001, |
| "loss": 2.5994, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.8809368900455433, |
| "grad_norm": 0.19143925607204437, |
| "learning_rate": 0.0001, |
| "loss": 2.0546, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.8815875081327261, |
| "grad_norm": 0.2990890443325043, |
| "learning_rate": 0.0001, |
| "loss": 2.7388, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.8822381262199089, |
| "grad_norm": 0.28672561049461365, |
| "learning_rate": 0.0001, |
| "loss": 1.915, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.8828887443070917, |
| "grad_norm": 0.20137082040309906, |
| "learning_rate": 0.0001, |
| "loss": 2.5376, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.8835393623942746, |
| "grad_norm": 0.2175220251083374, |
| "learning_rate": 0.0001, |
| "loss": 1.9055, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.8841899804814574, |
| "grad_norm": 0.2790168523788452, |
| "learning_rate": 0.0001, |
| "loss": 2.0223, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.8848405985686402, |
| "grad_norm": 0.22070975601673126, |
| "learning_rate": 0.0001, |
| "loss": 2.4071, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.885491216655823, |
| "grad_norm": 0.22505122423171997, |
| "learning_rate": 0.0001, |
| "loss": 2.2988, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.8861418347430059, |
| "grad_norm": 0.2231319099664688, |
| "learning_rate": 0.0001, |
| "loss": 2.0156, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.8867924528301887, |
| "grad_norm": 0.2921566665172577, |
| "learning_rate": 0.0001, |
| "loss": 2.7166, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.8874430709173715, |
| "grad_norm": 0.19267822802066803, |
| "learning_rate": 0.0001, |
| "loss": 2.0485, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.8880936890045543, |
| "grad_norm": 0.28789597749710083, |
| "learning_rate": 0.0001, |
| "loss": 2.7656, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.8887443070917371, |
| "grad_norm": 0.3205803334712982, |
| "learning_rate": 0.0001, |
| "loss": 2.5545, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.8893949251789199, |
| "grad_norm": 0.20888707041740417, |
| "learning_rate": 0.0001, |
| "loss": 1.8906, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.8900455432661027, |
| "grad_norm": 0.18200016021728516, |
| "learning_rate": 0.0001, |
| "loss": 1.8483, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.8906961613532857, |
| "grad_norm": 0.2367328256368637, |
| "learning_rate": 0.0001, |
| "loss": 2.3351, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.8913467794404685, |
| "grad_norm": 0.28111082315444946, |
| "learning_rate": 0.0001, |
| "loss": 2.5511, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8919973975276513, |
| "grad_norm": 0.19744041562080383, |
| "learning_rate": 0.0001, |
| "loss": 1.9521, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.8926480156148341, |
| "grad_norm": 0.2166965901851654, |
| "learning_rate": 0.0001, |
| "loss": 2.1205, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.8932986337020169, |
| "grad_norm": 0.20931009948253632, |
| "learning_rate": 0.0001, |
| "loss": 2.1394, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.8939492517891997, |
| "grad_norm": 0.2102230191230774, |
| "learning_rate": 0.0001, |
| "loss": 1.9695, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.8945998698763825, |
| "grad_norm": 0.22161559760570526, |
| "learning_rate": 0.0001, |
| "loss": 2.4084, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.8952504879635654, |
| "grad_norm": 0.22104842960834503, |
| "learning_rate": 0.0001, |
| "loss": 2.6029, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.8959011060507482, |
| "grad_norm": 0.2125016152858734, |
| "learning_rate": 0.0001, |
| "loss": 2.0576, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.896551724137931, |
| "grad_norm": 0.2626838684082031, |
| "learning_rate": 0.0001, |
| "loss": 2.5907, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.8972023422251139, |
| "grad_norm": 0.19114330410957336, |
| "learning_rate": 0.0001, |
| "loss": 2.2824, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.8978529603122967, |
| "grad_norm": 0.24731865525245667, |
| "learning_rate": 0.0001, |
| "loss": 2.5292, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8985035783994795, |
| "grad_norm": 0.23787495493888855, |
| "learning_rate": 0.0001, |
| "loss": 2.1433, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.8991541964866623, |
| "grad_norm": 0.2028874158859253, |
| "learning_rate": 0.0001, |
| "loss": 2.2726, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.8998048145738452, |
| "grad_norm": 0.22940067946910858, |
| "learning_rate": 0.0001, |
| "loss": 2.3222, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.900455432661028, |
| "grad_norm": 0.20267997682094574, |
| "learning_rate": 0.0001, |
| "loss": 2.2875, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.9011060507482108, |
| "grad_norm": 0.21694517135620117, |
| "learning_rate": 0.0001, |
| "loss": 2.3674, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.9017566688353936, |
| "grad_norm": 0.1904231160879135, |
| "learning_rate": 0.0001, |
| "loss": 1.996, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.9024072869225764, |
| "grad_norm": 0.2630701959133148, |
| "learning_rate": 0.0001, |
| "loss": 2.4881, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.9030579050097592, |
| "grad_norm": 0.19993318617343903, |
| "learning_rate": 0.0001, |
| "loss": 1.9409, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.903708523096942, |
| "grad_norm": 0.19389230012893677, |
| "learning_rate": 0.0001, |
| "loss": 2.1121, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.904359141184125, |
| "grad_norm": 0.20352298021316528, |
| "learning_rate": 0.0001, |
| "loss": 1.9887, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9050097592713078, |
| "grad_norm": 0.17967310547828674, |
| "learning_rate": 0.0001, |
| "loss": 1.8068, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.9056603773584906, |
| "grad_norm": 0.2310938984155655, |
| "learning_rate": 0.0001, |
| "loss": 2.2666, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.9063109954456734, |
| "grad_norm": 0.18979041278362274, |
| "learning_rate": 0.0001, |
| "loss": 2.0004, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.9069616135328562, |
| "grad_norm": 0.26813068985939026, |
| "learning_rate": 0.0001, |
| "loss": 2.4142, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.907612231620039, |
| "grad_norm": 0.23549699783325195, |
| "learning_rate": 0.0001, |
| "loss": 2.3059, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.9082628497072218, |
| "grad_norm": 0.2435377985239029, |
| "learning_rate": 0.0001, |
| "loss": 2.1919, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.9089134677944047, |
| "grad_norm": 0.21723680198192596, |
| "learning_rate": 0.0001, |
| "loss": 2.2244, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.9095640858815875, |
| "grad_norm": 0.20665475726127625, |
| "learning_rate": 0.0001, |
| "loss": 2.1907, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.9102147039687704, |
| "grad_norm": 0.26172783970832825, |
| "learning_rate": 0.0001, |
| "loss": 2.5632, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.9108653220559532, |
| "grad_norm": 0.22065763175487518, |
| "learning_rate": 0.0001, |
| "loss": 2.287, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.911515940143136, |
| "grad_norm": 0.260623574256897, |
| "learning_rate": 0.0001, |
| "loss": 2.7247, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.9121665582303188, |
| "grad_norm": 0.1967797726392746, |
| "learning_rate": 0.0001, |
| "loss": 2.3431, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.9128171763175016, |
| "grad_norm": 0.19779254496097565, |
| "learning_rate": 0.0001, |
| "loss": 1.8389, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.9134677944046844, |
| "grad_norm": 0.20970992743968964, |
| "learning_rate": 0.0001, |
| "loss": 2.1884, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.9141184124918673, |
| "grad_norm": 0.22229008376598358, |
| "learning_rate": 0.0001, |
| "loss": 2.2673, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.9147690305790501, |
| "grad_norm": 0.2208055853843689, |
| "learning_rate": 0.0001, |
| "loss": 2.1967, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.9154196486662329, |
| "grad_norm": 0.2209876924753189, |
| "learning_rate": 0.0001, |
| "loss": 2.2027, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.9160702667534157, |
| "grad_norm": 0.19158391654491425, |
| "learning_rate": 0.0001, |
| "loss": 1.9069, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.9167208848405985, |
| "grad_norm": 0.2156110256910324, |
| "learning_rate": 0.0001, |
| "loss": 2.2712, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.9173715029277814, |
| "grad_norm": 0.2610962390899658, |
| "learning_rate": 0.0001, |
| "loss": 1.8294, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9180221210149642, |
| "grad_norm": 0.18197974562644958, |
| "learning_rate": 0.0001, |
| "loss": 1.9715, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.9186727391021471, |
| "grad_norm": 0.19082801043987274, |
| "learning_rate": 0.0001, |
| "loss": 2.1091, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.9193233571893299, |
| "grad_norm": 0.26832160353660583, |
| "learning_rate": 0.0001, |
| "loss": 2.7021, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.9199739752765127, |
| "grad_norm": 0.3070698082447052, |
| "learning_rate": 0.0001, |
| "loss": 2.4547, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.9206245933636955, |
| "grad_norm": 0.25139206647872925, |
| "learning_rate": 0.0001, |
| "loss": 2.5873, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.9212752114508783, |
| "grad_norm": 0.2131306529045105, |
| "learning_rate": 0.0001, |
| "loss": 2.3841, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.9219258295380611, |
| "grad_norm": 0.24531540274620056, |
| "learning_rate": 0.0001, |
| "loss": 1.9666, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.9225764476252439, |
| "grad_norm": 0.1986437737941742, |
| "learning_rate": 0.0001, |
| "loss": 1.9241, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.9232270657124269, |
| "grad_norm": 0.23614904284477234, |
| "learning_rate": 0.0001, |
| "loss": 2.5824, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.9238776837996097, |
| "grad_norm": 0.2782133221626282, |
| "learning_rate": 0.0001, |
| "loss": 2.1812, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9245283018867925, |
| "grad_norm": 0.2232246845960617, |
| "learning_rate": 0.0001, |
| "loss": 2.3204, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.9251789199739753, |
| "grad_norm": 0.22002846002578735, |
| "learning_rate": 0.0001, |
| "loss": 1.8228, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.9258295380611581, |
| "grad_norm": 0.30900144577026367, |
| "learning_rate": 0.0001, |
| "loss": 2.4824, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.9264801561483409, |
| "grad_norm": 0.262989342212677, |
| "learning_rate": 0.0001, |
| "loss": 2.8719, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.9271307742355237, |
| "grad_norm": 0.5406531095504761, |
| "learning_rate": 0.0001, |
| "loss": 2.6984, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.9277813923227066, |
| "grad_norm": 0.2415890246629715, |
| "learning_rate": 0.0001, |
| "loss": 2.2543, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.9284320104098894, |
| "grad_norm": 0.21261392533779144, |
| "learning_rate": 0.0001, |
| "loss": 1.9761, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.9290826284970722, |
| "grad_norm": 0.23223569989204407, |
| "learning_rate": 0.0001, |
| "loss": 1.821, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.929733246584255, |
| "grad_norm": 0.2846924960613251, |
| "learning_rate": 0.0001, |
| "loss": 1.9886, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.9303838646714379, |
| "grad_norm": 0.2527627646923065, |
| "learning_rate": 0.0001, |
| "loss": 2.373, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.9310344827586207, |
| "grad_norm": 0.19917793571949005, |
| "learning_rate": 0.0001, |
| "loss": 2.0111, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.9316851008458035, |
| "grad_norm": 0.19021449983119965, |
| "learning_rate": 0.0001, |
| "loss": 2.0373, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.9323357189329864, |
| "grad_norm": 0.24929922819137573, |
| "learning_rate": 0.0001, |
| "loss": 2.3885, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.9329863370201692, |
| "grad_norm": 0.2533571124076843, |
| "learning_rate": 0.0001, |
| "loss": 2.544, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.933636955107352, |
| "grad_norm": 0.23931783437728882, |
| "learning_rate": 0.0001, |
| "loss": 2.418, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.9342875731945348, |
| "grad_norm": 0.30167070031166077, |
| "learning_rate": 0.0001, |
| "loss": 2.6513, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.9349381912817176, |
| "grad_norm": 0.1971869319677353, |
| "learning_rate": 0.0001, |
| "loss": 2.4016, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.9355888093689004, |
| "grad_norm": 0.21331265568733215, |
| "learning_rate": 0.0001, |
| "loss": 2.1524, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.9362394274560832, |
| "grad_norm": 0.26298433542251587, |
| "learning_rate": 0.0001, |
| "loss": 2.9442, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.936890045543266, |
| "grad_norm": 0.245792955160141, |
| "learning_rate": 0.0001, |
| "loss": 2.2055, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.937540663630449, |
| "grad_norm": 0.23703397810459137, |
| "learning_rate": 0.0001, |
| "loss": 2.5616, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.9381912817176318, |
| "grad_norm": 0.18641355633735657, |
| "learning_rate": 0.0001, |
| "loss": 1.8982, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.9388418998048146, |
| "grad_norm": 0.3551875650882721, |
| "learning_rate": 0.0001, |
| "loss": 2.7802, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.9394925178919974, |
| "grad_norm": 0.2278834879398346, |
| "learning_rate": 0.0001, |
| "loss": 2.175, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.9401431359791802, |
| "grad_norm": 0.26398956775665283, |
| "learning_rate": 0.0001, |
| "loss": 2.643, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.940793754066363, |
| "grad_norm": 0.31316065788269043, |
| "learning_rate": 0.0001, |
| "loss": 2.5662, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.9414443721535458, |
| "grad_norm": 0.22769761085510254, |
| "learning_rate": 0.0001, |
| "loss": 2.8677, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.9420949902407287, |
| "grad_norm": 0.2069929838180542, |
| "learning_rate": 0.0001, |
| "loss": 2.4393, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.9427456083279115, |
| "grad_norm": 0.23500226438045502, |
| "learning_rate": 0.0001, |
| "loss": 2.0914, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.9433962264150944, |
| "grad_norm": 0.2312425971031189, |
| "learning_rate": 0.0001, |
| "loss": 2.1085, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.9440468445022772, |
| "grad_norm": 0.20859290659427643, |
| "learning_rate": 0.0001, |
| "loss": 2.0653, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.94469746258946, |
| "grad_norm": 0.23336270451545715, |
| "learning_rate": 0.0001, |
| "loss": 2.1047, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.9453480806766428, |
| "grad_norm": 0.2613270580768585, |
| "learning_rate": 0.0001, |
| "loss": 2.3179, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.9459986987638256, |
| "grad_norm": 0.2182740867137909, |
| "learning_rate": 0.0001, |
| "loss": 2.0625, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.9466493168510085, |
| "grad_norm": 0.28436079621315, |
| "learning_rate": 0.0001, |
| "loss": 1.8766, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.9472999349381913, |
| "grad_norm": 0.1998225450515747, |
| "learning_rate": 0.0001, |
| "loss": 2.3157, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.9479505530253741, |
| "grad_norm": 0.19695498049259186, |
| "learning_rate": 0.0001, |
| "loss": 1.7501, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.9486011711125569, |
| "grad_norm": 0.1972542405128479, |
| "learning_rate": 0.0001, |
| "loss": 1.956, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.9492517891997397, |
| "grad_norm": 0.18410329520702362, |
| "learning_rate": 0.0001, |
| "loss": 1.8403, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.9499024072869225, |
| "grad_norm": 0.3675645887851715, |
| "learning_rate": 0.0001, |
| "loss": 2.9161, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9505530253741054, |
| "grad_norm": 0.2620394229888916, |
| "learning_rate": 0.0001, |
| "loss": 2.4318, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.9512036434612883, |
| "grad_norm": 0.28973767161369324, |
| "learning_rate": 0.0001, |
| "loss": 2.0047, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.9518542615484711, |
| "grad_norm": 0.31598249077796936, |
| "learning_rate": 0.0001, |
| "loss": 2.4517, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.9525048796356539, |
| "grad_norm": 0.18546514213085175, |
| "learning_rate": 0.0001, |
| "loss": 1.8551, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.9531554977228367, |
| "grad_norm": 0.32123416662216187, |
| "learning_rate": 0.0001, |
| "loss": 2.7277, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.9538061158100195, |
| "grad_norm": 0.25180497765541077, |
| "learning_rate": 0.0001, |
| "loss": 1.7946, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.9544567338972023, |
| "grad_norm": 0.24950966238975525, |
| "learning_rate": 0.0001, |
| "loss": 2.0796, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.9551073519843851, |
| "grad_norm": 0.20496372878551483, |
| "learning_rate": 0.0001, |
| "loss": 2.0713, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.955757970071568, |
| "grad_norm": 0.20856817066669464, |
| "learning_rate": 0.0001, |
| "loss": 2.1812, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.9564085881587508, |
| "grad_norm": 0.26053234934806824, |
| "learning_rate": 0.0001, |
| "loss": 2.3234, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.9570592062459337, |
| "grad_norm": 0.3086039125919342, |
| "learning_rate": 0.0001, |
| "loss": 2.3745, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.9577098243331165, |
| "grad_norm": 0.19647593796253204, |
| "learning_rate": 0.0001, |
| "loss": 1.8883, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.9583604424202993, |
| "grad_norm": 0.20327430963516235, |
| "learning_rate": 0.0001, |
| "loss": 2.125, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.9590110605074821, |
| "grad_norm": 0.22550363838672638, |
| "learning_rate": 0.0001, |
| "loss": 2.1609, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.9596616785946649, |
| "grad_norm": 0.2369288206100464, |
| "learning_rate": 0.0001, |
| "loss": 1.9352, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.9603122966818478, |
| "grad_norm": 0.21195881068706512, |
| "learning_rate": 0.0001, |
| "loss": 2.0275, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.9609629147690306, |
| "grad_norm": 0.17060896754264832, |
| "learning_rate": 0.0001, |
| "loss": 1.9566, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.9616135328562134, |
| "grad_norm": 0.23335829377174377, |
| "learning_rate": 0.0001, |
| "loss": 2.296, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.9622641509433962, |
| "grad_norm": 0.34170275926589966, |
| "learning_rate": 0.0001, |
| "loss": 2.0079, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.962914769030579, |
| "grad_norm": 0.2187998741865158, |
| "learning_rate": 0.0001, |
| "loss": 2.0203, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.9635653871177619, |
| "grad_norm": 0.1877596378326416, |
| "learning_rate": 0.0001, |
| "loss": 1.9496, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.9642160052049447, |
| "grad_norm": 0.18515220284461975, |
| "learning_rate": 0.0001, |
| "loss": 2.0025, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.9648666232921275, |
| "grad_norm": 0.21251696348190308, |
| "learning_rate": 0.0001, |
| "loss": 1.8843, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.9655172413793104, |
| "grad_norm": 0.19280041754245758, |
| "learning_rate": 0.0001, |
| "loss": 2.0726, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.9661678594664932, |
| "grad_norm": 0.1977832317352295, |
| "learning_rate": 0.0001, |
| "loss": 2.0546, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.966818477553676, |
| "grad_norm": 0.19019471108913422, |
| "learning_rate": 0.0001, |
| "loss": 1.9825, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.9674690956408588, |
| "grad_norm": 0.20381596684455872, |
| "learning_rate": 0.0001, |
| "loss": 2.3339, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.9681197137280416, |
| "grad_norm": 0.1899532973766327, |
| "learning_rate": 0.0001, |
| "loss": 2.2962, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.9687703318152244, |
| "grad_norm": 0.20524102449417114, |
| "learning_rate": 0.0001, |
| "loss": 2.0874, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.9694209499024072, |
| "grad_norm": 0.179798424243927, |
| "learning_rate": 0.0001, |
| "loss": 1.8875, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.9700715679895902, |
| "grad_norm": 0.19358840584754944, |
| "learning_rate": 0.0001, |
| "loss": 2.1539, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.970722186076773, |
| "grad_norm": 0.2686682343482971, |
| "learning_rate": 0.0001, |
| "loss": 2.3412, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.9713728041639558, |
| "grad_norm": 0.2146061509847641, |
| "learning_rate": 0.0001, |
| "loss": 2.476, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.9720234222511386, |
| "grad_norm": 0.26737329363822937, |
| "learning_rate": 0.0001, |
| "loss": 2.8003, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.9726740403383214, |
| "grad_norm": 0.23344694077968597, |
| "learning_rate": 0.0001, |
| "loss": 2.1174, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.9733246584255042, |
| "grad_norm": 0.1991250365972519, |
| "learning_rate": 0.0001, |
| "loss": 2.5734, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.973975276512687, |
| "grad_norm": 0.21246576309204102, |
| "learning_rate": 0.0001, |
| "loss": 2.5597, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.9746258945998699, |
| "grad_norm": 0.1873084306716919, |
| "learning_rate": 0.0001, |
| "loss": 1.9547, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.9752765126870527, |
| "grad_norm": 0.17600129544734955, |
| "learning_rate": 0.0001, |
| "loss": 1.7255, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.9759271307742355, |
| "grad_norm": 0.19860287010669708, |
| "learning_rate": 0.0001, |
| "loss": 2.5043, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9765777488614183, |
| "grad_norm": 0.1887977123260498, |
| "learning_rate": 0.0001, |
| "loss": 2.091, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.9772283669486012, |
| "grad_norm": 0.1981416791677475, |
| "learning_rate": 0.0001, |
| "loss": 1.968, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.977878985035784, |
| "grad_norm": 0.22598034143447876, |
| "learning_rate": 0.0001, |
| "loss": 2.2569, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.9785296031229668, |
| "grad_norm": 0.18924662470817566, |
| "learning_rate": 0.0001, |
| "loss": 2.3823, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.9791802212101497, |
| "grad_norm": 0.2178531438112259, |
| "learning_rate": 0.0001, |
| "loss": 2.0824, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.9798308392973325, |
| "grad_norm": 0.2125057578086853, |
| "learning_rate": 0.0001, |
| "loss": 2.196, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.9804814573845153, |
| "grad_norm": 0.19958944618701935, |
| "learning_rate": 0.0001, |
| "loss": 1.8752, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.9811320754716981, |
| "grad_norm": 0.23179121315479279, |
| "learning_rate": 0.0001, |
| "loss": 2.0539, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.9817826935588809, |
| "grad_norm": 0.19006481766700745, |
| "learning_rate": 0.0001, |
| "loss": 2.0125, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.9824333116460637, |
| "grad_norm": 0.1952325403690338, |
| "learning_rate": 0.0001, |
| "loss": 2.1829, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9830839297332465, |
| "grad_norm": 0.24362123012542725, |
| "learning_rate": 0.0001, |
| "loss": 2.4628, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.9837345478204295, |
| "grad_norm": 0.20148973166942596, |
| "learning_rate": 0.0001, |
| "loss": 1.9869, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.9843851659076123, |
| "grad_norm": 0.19783656299114227, |
| "learning_rate": 0.0001, |
| "loss": 2.1447, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.9850357839947951, |
| "grad_norm": 0.2120031863451004, |
| "learning_rate": 0.0001, |
| "loss": 2.1149, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.9856864020819779, |
| "grad_norm": 0.2673274278640747, |
| "learning_rate": 0.0001, |
| "loss": 2.3755, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.9863370201691607, |
| "grad_norm": 0.31493106484413147, |
| "learning_rate": 0.0001, |
| "loss": 2.8462, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.9869876382563435, |
| "grad_norm": 0.24251258373260498, |
| "learning_rate": 0.0001, |
| "loss": 2.6499, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.9876382563435263, |
| "grad_norm": 0.19818106293678284, |
| "learning_rate": 0.0001, |
| "loss": 2.1229, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.9882888744307091, |
| "grad_norm": 0.2608949542045593, |
| "learning_rate": 0.0001, |
| "loss": 2.7848, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.988939492517892, |
| "grad_norm": 0.19214370846748352, |
| "learning_rate": 0.0001, |
| "loss": 2.0514, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9895901106050748, |
| "grad_norm": 0.21454864740371704, |
| "learning_rate": 0.0001, |
| "loss": 1.8879, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.9902407286922577, |
| "grad_norm": 0.22206801176071167, |
| "learning_rate": 0.0001, |
| "loss": 2.0008, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.9908913467794405, |
| "grad_norm": 0.19270485639572144, |
| "learning_rate": 0.0001, |
| "loss": 1.9491, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.9915419648666233, |
| "grad_norm": 0.27471333742141724, |
| "learning_rate": 0.0001, |
| "loss": 2.4914, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.9921925829538061, |
| "grad_norm": 0.2767917513847351, |
| "learning_rate": 0.0001, |
| "loss": 2.3733, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.9928432010409889, |
| "grad_norm": 0.222362220287323, |
| "learning_rate": 0.0001, |
| "loss": 2.1563, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.9934938191281718, |
| "grad_norm": 0.2520142197608948, |
| "learning_rate": 0.0001, |
| "loss": 2.2877, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.9941444372153546, |
| "grad_norm": 0.20014792680740356, |
| "learning_rate": 0.0001, |
| "loss": 2.087, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.9947950553025374, |
| "grad_norm": 0.18027350306510925, |
| "learning_rate": 0.0001, |
| "loss": 1.9049, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.9954456733897202, |
| "grad_norm": 0.20437590777873993, |
| "learning_rate": 0.0001, |
| "loss": 1.9805, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.996096291476903, |
| "grad_norm": 0.38628190755844116, |
| "learning_rate": 0.0001, |
| "loss": 2.5385, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.9967469095640858, |
| "grad_norm": 0.24987295269966125, |
| "learning_rate": 0.0001, |
| "loss": 2.0762, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.9973975276512687, |
| "grad_norm": 0.2631097733974457, |
| "learning_rate": 0.0001, |
| "loss": 2.1693, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.9980481457384516, |
| "grad_norm": 0.21323037147521973, |
| "learning_rate": 0.0001, |
| "loss": 1.8547, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.9986987638256344, |
| "grad_norm": 0.19627395272254944, |
| "learning_rate": 0.0001, |
| "loss": 1.9524, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.9993493819128172, |
| "grad_norm": 0.23723964393138885, |
| "learning_rate": 0.0001, |
| "loss": 2.2301, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.2651236355304718, |
| "learning_rate": 0.0001, |
| "loss": 2.3068, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.0006506180871828, |
| "grad_norm": 0.21211975812911987, |
| "learning_rate": 0.0001, |
| "loss": 1.9403, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.0013012361743656, |
| "grad_norm": 0.21474523842334747, |
| "learning_rate": 0.0001, |
| "loss": 2.0487, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.0019518542615484, |
| "grad_norm": 0.1983027458190918, |
| "learning_rate": 0.0001, |
| "loss": 1.9984, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.0026024723487312, |
| "grad_norm": 0.18911167979240417, |
| "learning_rate": 0.0001, |
| "loss": 1.9451, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.003253090435914, |
| "grad_norm": 0.260123074054718, |
| "learning_rate": 0.0001, |
| "loss": 2.3423, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.0039037085230968, |
| "grad_norm": 0.2620505094528198, |
| "learning_rate": 0.0001, |
| "loss": 2.515, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.0045543266102797, |
| "grad_norm": 0.20050612092018127, |
| "learning_rate": 0.0001, |
| "loss": 1.9606, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.0052049446974627, |
| "grad_norm": 0.1960921436548233, |
| "learning_rate": 0.0001, |
| "loss": 1.8877, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.0058555627846455, |
| "grad_norm": 0.18536420166492462, |
| "learning_rate": 0.0001, |
| "loss": 1.7578, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.0065061808718283, |
| "grad_norm": 0.2662370800971985, |
| "learning_rate": 0.0001, |
| "loss": 2.3314, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.0071567989590111, |
| "grad_norm": 0.24542543292045593, |
| "learning_rate": 0.0001, |
| "loss": 1.9693, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.007807417046194, |
| "grad_norm": 0.20731844007968903, |
| "learning_rate": 0.0001, |
| "loss": 2.2239, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.0084580351333767, |
| "grad_norm": 0.20431263744831085, |
| "learning_rate": 0.0001, |
| "loss": 2.0081, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.0091086532205595, |
| "grad_norm": 0.21462121605873108, |
| "learning_rate": 0.0001, |
| "loss": 2.1085, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.0097592713077423, |
| "grad_norm": 0.21525529026985168, |
| "learning_rate": 0.0001, |
| "loss": 2.0028, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.0104098893949252, |
| "grad_norm": 0.229073628783226, |
| "learning_rate": 0.0001, |
| "loss": 2.1239, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.011060507482108, |
| "grad_norm": 0.17880584299564362, |
| "learning_rate": 0.0001, |
| "loss": 1.7847, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.0117111255692908, |
| "grad_norm": 0.20676289498806, |
| "learning_rate": 0.0001, |
| "loss": 2.1814, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.0123617436564736, |
| "grad_norm": 0.1960391253232956, |
| "learning_rate": 0.0001, |
| "loss": 2.031, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.0130123617436564, |
| "grad_norm": 0.21578386425971985, |
| "learning_rate": 0.0001, |
| "loss": 2.1994, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.0136629798308392, |
| "grad_norm": 0.3862130343914032, |
| "learning_rate": 0.0001, |
| "loss": 2.8472, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.0143135979180222, |
| "grad_norm": 0.2248488962650299, |
| "learning_rate": 0.0001, |
| "loss": 2.1641, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.014964216005205, |
| "grad_norm": 0.2818978428840637, |
| "learning_rate": 0.0001, |
| "loss": 2.3423, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.0156148340923878, |
| "grad_norm": 0.3258184790611267, |
| "learning_rate": 0.0001, |
| "loss": 2.812, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.0162654521795706, |
| "grad_norm": 0.22914977371692657, |
| "learning_rate": 0.0001, |
| "loss": 2.602, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.0169160702667535, |
| "grad_norm": 0.21586816012859344, |
| "learning_rate": 0.0001, |
| "loss": 2.2282, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.0175666883539363, |
| "grad_norm": 0.22111280262470245, |
| "learning_rate": 0.0001, |
| "loss": 1.9599, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.018217306441119, |
| "grad_norm": 0.22815647721290588, |
| "learning_rate": 0.0001, |
| "loss": 2.101, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.0188679245283019, |
| "grad_norm": 0.2073185294866562, |
| "learning_rate": 0.0001, |
| "loss": 2.0752, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.0195185426154847, |
| "grad_norm": 0.23528914153575897, |
| "learning_rate": 0.0001, |
| "loss": 2.3239, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.0201691607026675, |
| "grad_norm": 0.2299623340368271, |
| "learning_rate": 0.0001, |
| "loss": 1.9261, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.0208197787898503, |
| "grad_norm": 0.21713495254516602, |
| "learning_rate": 0.0001, |
| "loss": 2.1048, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.0214703968770331, |
| "grad_norm": 0.22250354290008545, |
| "learning_rate": 0.0001, |
| "loss": 2.2006, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.022121014964216, |
| "grad_norm": 0.21191568672657013, |
| "learning_rate": 0.0001, |
| "loss": 2.0713, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.0227716330513987, |
| "grad_norm": 0.2935731112957001, |
| "learning_rate": 0.0001, |
| "loss": 2.4078, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.0234222511385815, |
| "grad_norm": 0.19087597727775574, |
| "learning_rate": 0.0001, |
| "loss": 1.9585, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.0240728692257646, |
| "grad_norm": 0.23968839645385742, |
| "learning_rate": 0.0001, |
| "loss": 2.1732, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.0247234873129474, |
| "grad_norm": 0.25471219420433044, |
| "learning_rate": 0.0001, |
| "loss": 2.239, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.0253741054001302, |
| "grad_norm": 0.23481620848178864, |
| "learning_rate": 0.0001, |
| "loss": 1.988, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.026024723487313, |
| "grad_norm": 0.18225711584091187, |
| "learning_rate": 0.0001, |
| "loss": 1.7759, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.0266753415744958, |
| "grad_norm": 0.21290135383605957, |
| "learning_rate": 0.0001, |
| "loss": 2.4749, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.0273259596616786, |
| "grad_norm": 0.19437038898468018, |
| "learning_rate": 0.0001, |
| "loss": 2.1476, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.0279765777488614, |
| "grad_norm": 0.19912205636501312, |
| "learning_rate": 0.0001, |
| "loss": 1.8947, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.0286271958360442, |
| "grad_norm": 0.23415659368038177, |
| "learning_rate": 0.0001, |
| "loss": 1.9114, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.029277813923227, |
| "grad_norm": 0.20934872329235077, |
| "learning_rate": 0.0001, |
| "loss": 1.8594, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.0299284320104098, |
| "grad_norm": 0.34651368856430054, |
| "learning_rate": 0.0001, |
| "loss": 2.6035, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.0305790500975927, |
| "grad_norm": 0.19000445306301117, |
| "learning_rate": 0.0001, |
| "loss": 1.6682, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.0312296681847755, |
| "grad_norm": 0.20575547218322754, |
| "learning_rate": 0.0001, |
| "loss": 1.9983, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.0318802862719583, |
| "grad_norm": 0.21148836612701416, |
| "learning_rate": 0.0001, |
| "loss": 1.9058, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.032530904359141, |
| "grad_norm": 0.23416981101036072, |
| "learning_rate": 0.0001, |
| "loss": 2.1149, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.033181522446324, |
| "grad_norm": 0.20623871684074402, |
| "learning_rate": 0.0001, |
| "loss": 1.958, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.033832140533507, |
| "grad_norm": 0.21522465348243713, |
| "learning_rate": 0.0001, |
| "loss": 2.2985, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.0344827586206897, |
| "grad_norm": 0.20438739657402039, |
| "learning_rate": 0.0001, |
| "loss": 2.0288, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.0351333767078725, |
| "grad_norm": 0.1987256109714508, |
| "learning_rate": 0.0001, |
| "loss": 1.907, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.0357839947950553, |
| "grad_norm": 0.21431812644004822, |
| "learning_rate": 0.0001, |
| "loss": 2.0281, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.0364346128822381, |
| "grad_norm": 0.19776973128318787, |
| "learning_rate": 0.0001, |
| "loss": 1.8285, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.037085230969421, |
| "grad_norm": 0.1931617558002472, |
| "learning_rate": 0.0001, |
| "loss": 2.0006, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.0377358490566038, |
| "grad_norm": 0.21659119427204132, |
| "learning_rate": 0.0001, |
| "loss": 1.9848, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.0383864671437866, |
| "grad_norm": 0.220927432179451, |
| "learning_rate": 0.0001, |
| "loss": 2.0241, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.0390370852309694, |
| "grad_norm": 0.2014313042163849, |
| "learning_rate": 0.0001, |
| "loss": 1.9998, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.0396877033181522, |
| "grad_norm": 0.18325099349021912, |
| "learning_rate": 0.0001, |
| "loss": 1.8917, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.040338321405335, |
| "grad_norm": 0.2919682264328003, |
| "learning_rate": 0.0001, |
| "loss": 2.2153, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.0409889394925178, |
| "grad_norm": 0.24260954558849335, |
| "learning_rate": 0.0001, |
| "loss": 2.1984, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.0416395575797006, |
| "grad_norm": 0.19430296123027802, |
| "learning_rate": 0.0001, |
| "loss": 1.8837, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.0422901756668836, |
| "grad_norm": 0.2837108373641968, |
| "learning_rate": 0.0001, |
| "loss": 2.2499, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.0429407937540665, |
| "grad_norm": 0.3835313320159912, |
| "learning_rate": 0.0001, |
| "loss": 2.8099, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.0435914118412493, |
| "grad_norm": 0.20192676782608032, |
| "learning_rate": 0.0001, |
| "loss": 2.0169, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.044242029928432, |
| "grad_norm": 0.23236776888370514, |
| "learning_rate": 0.0001, |
| "loss": 2.1706, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.0448926480156149, |
| "grad_norm": 0.20029866695404053, |
| "learning_rate": 0.0001, |
| "loss": 1.89, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.0455432661027977, |
| "grad_norm": 0.19408684968948364, |
| "learning_rate": 0.0001, |
| "loss": 2.1653, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.0461938841899805, |
| "grad_norm": 0.18135976791381836, |
| "learning_rate": 0.0001, |
| "loss": 1.8118, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.0468445022771633, |
| "grad_norm": 0.21076172590255737, |
| "learning_rate": 0.0001, |
| "loss": 2.3655, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.047495120364346, |
| "grad_norm": 0.21254242956638336, |
| "learning_rate": 0.0001, |
| "loss": 2.1535, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.048145738451529, |
| "grad_norm": 0.21650992333889008, |
| "learning_rate": 0.0001, |
| "loss": 2.4061, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.0487963565387117, |
| "grad_norm": 0.19958347082138062, |
| "learning_rate": 0.0001, |
| "loss": 2.2802, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.0494469746258945, |
| "grad_norm": 0.22608724236488342, |
| "learning_rate": 0.0001, |
| "loss": 2.1614, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.0500975927130773, |
| "grad_norm": 0.2256132811307907, |
| "learning_rate": 0.0001, |
| "loss": 2.3517, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.0507482108002602, |
| "grad_norm": 0.20183439552783966, |
| "learning_rate": 0.0001, |
| "loss": 2.017, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.051398828887443, |
| "grad_norm": 0.18718242645263672, |
| "learning_rate": 0.0001, |
| "loss": 1.9489, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.052049446974626, |
| "grad_norm": 0.19910266995429993, |
| "learning_rate": 0.0001, |
| "loss": 1.8821, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.0527000650618088, |
| "grad_norm": 0.23922882974147797, |
| "learning_rate": 0.0001, |
| "loss": 2.5673, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.0533506831489916, |
| "grad_norm": 0.19010673463344574, |
| "learning_rate": 0.0001, |
| "loss": 1.918, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.0540013012361744, |
| "grad_norm": 0.24004904925823212, |
| "learning_rate": 0.0001, |
| "loss": 2.4854, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.0546519193233572, |
| "grad_norm": 0.29694491624832153, |
| "learning_rate": 0.0001, |
| "loss": 2.6428, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.05530253741054, |
| "grad_norm": 0.19973435997962952, |
| "learning_rate": 0.0001, |
| "loss": 2.1678, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.0559531554977228, |
| "grad_norm": 0.2725414037704468, |
| "learning_rate": 0.0001, |
| "loss": 2.3829, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.0566037735849056, |
| "grad_norm": 0.2413836568593979, |
| "learning_rate": 0.0001, |
| "loss": 2.442, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.0572543916720885, |
| "grad_norm": 0.2060219645500183, |
| "learning_rate": 0.0001, |
| "loss": 2.1697, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.0579050097592713, |
| "grad_norm": 0.23440484702587128, |
| "learning_rate": 0.0001, |
| "loss": 1.9699, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.058555627846454, |
| "grad_norm": 0.2511712610721588, |
| "learning_rate": 0.0001, |
| "loss": 1.963, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.0592062459336369, |
| "grad_norm": 0.20453818142414093, |
| "learning_rate": 0.0001, |
| "loss": 2.311, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.0598568640208197, |
| "grad_norm": 0.2451258897781372, |
| "learning_rate": 0.0001, |
| "loss": 2.2684, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.0605074821080025, |
| "grad_norm": 0.23648382723331451, |
| "learning_rate": 0.0001, |
| "loss": 2.7005, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.0611581001951855, |
| "grad_norm": 0.22232408821582794, |
| "learning_rate": 0.0001, |
| "loss": 2.2906, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.0618087182823683, |
| "grad_norm": 0.25329262018203735, |
| "learning_rate": 0.0001, |
| "loss": 2.3645, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.0624593363695511, |
| "grad_norm": 0.1991291046142578, |
| "learning_rate": 0.0001, |
| "loss": 2.2074, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.063109954456734, |
| "grad_norm": 0.19225656986236572, |
| "learning_rate": 0.0001, |
| "loss": 1.668, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.0637605725439168, |
| "grad_norm": 0.4304276704788208, |
| "learning_rate": 0.0001, |
| "loss": 2.9806, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.0644111906310996, |
| "grad_norm": 0.26432791352272034, |
| "learning_rate": 0.0001, |
| "loss": 2.4741, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.0650618087182824, |
| "grad_norm": 0.25258371233940125, |
| "learning_rate": 0.0001, |
| "loss": 2.1187, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.0657124268054652, |
| "grad_norm": 0.22779172658920288, |
| "learning_rate": 0.0001, |
| "loss": 2.1152, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.066363044892648, |
| "grad_norm": 0.19665396213531494, |
| "learning_rate": 0.0001, |
| "loss": 2.009, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.0670136629798308, |
| "grad_norm": 0.27610522508621216, |
| "learning_rate": 0.0001, |
| "loss": 2.5154, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.0676642810670136, |
| "grad_norm": 0.20003536343574524, |
| "learning_rate": 0.0001, |
| "loss": 1.9189, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.0683148991541964, |
| "grad_norm": 0.22261562943458557, |
| "learning_rate": 0.0001, |
| "loss": 2.4746, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.0689655172413792, |
| "grad_norm": 0.2206978052854538, |
| "learning_rate": 0.0001, |
| "loss": 2.2714, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.069616135328562, |
| "grad_norm": 0.2205539345741272, |
| "learning_rate": 0.0001, |
| "loss": 2.0337, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.070266753415745, |
| "grad_norm": 0.22209426760673523, |
| "learning_rate": 0.0001, |
| "loss": 2.1957, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.0709173715029279, |
| "grad_norm": 0.19629520177841187, |
| "learning_rate": 0.0001, |
| "loss": 1.8585, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.0715679895901107, |
| "grad_norm": 0.21181420981884003, |
| "learning_rate": 0.0001, |
| "loss": 2.073, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.0722186076772935, |
| "grad_norm": 0.24869917333126068, |
| "learning_rate": 0.0001, |
| "loss": 2.3575, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.0728692257644763, |
| "grad_norm": 0.21665319800376892, |
| "learning_rate": 0.0001, |
| "loss": 1.9634, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.073519843851659, |
| "grad_norm": 0.22750093042850494, |
| "learning_rate": 0.0001, |
| "loss": 2.1718, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.074170461938842, |
| "grad_norm": 0.3381316661834717, |
| "learning_rate": 0.0001, |
| "loss": 2.5657, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.0748210800260247, |
| "grad_norm": 0.21388553082942963, |
| "learning_rate": 0.0001, |
| "loss": 1.9223, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.0754716981132075, |
| "grad_norm": 0.2441757768392563, |
| "learning_rate": 0.0001, |
| "loss": 2.3226, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.0761223162003903, |
| "grad_norm": 0.22729694843292236, |
| "learning_rate": 0.0001, |
| "loss": 1.9919, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.0767729342875731, |
| "grad_norm": 0.19755782186985016, |
| "learning_rate": 0.0001, |
| "loss": 1.851, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.077423552374756, |
| "grad_norm": 0.22772987186908722, |
| "learning_rate": 0.0001, |
| "loss": 2.235, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.0780741704619388, |
| "grad_norm": 0.20212537050247192, |
| "learning_rate": 0.0001, |
| "loss": 1.9563, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.0787247885491216, |
| "grad_norm": 0.28667473793029785, |
| "learning_rate": 0.0001, |
| "loss": 2.7498, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.0793754066363044, |
| "grad_norm": 0.19072775542736053, |
| "learning_rate": 0.0001, |
| "loss": 2.1144, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.0800260247234874, |
| "grad_norm": 0.22677192091941833, |
| "learning_rate": 0.0001, |
| "loss": 2.1637, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.0806766428106702, |
| "grad_norm": 0.2249511182308197, |
| "learning_rate": 0.0001, |
| "loss": 1.9901, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.081327260897853, |
| "grad_norm": 0.20563147962093353, |
| "learning_rate": 0.0001, |
| "loss": 1.9635, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.0819778789850358, |
| "grad_norm": 0.20936359465122223, |
| "learning_rate": 0.0001, |
| "loss": 2.1167, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.0826284970722186, |
| "grad_norm": 0.22262610495090485, |
| "learning_rate": 0.0001, |
| "loss": 2.3993, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.0832791151594015, |
| "grad_norm": 0.2985728085041046, |
| "learning_rate": 0.0001, |
| "loss": 2.3795, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.0839297332465843, |
| "grad_norm": 0.1936149299144745, |
| "learning_rate": 0.0001, |
| "loss": 1.8922, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.084580351333767, |
| "grad_norm": 0.21391011774539948, |
| "learning_rate": 0.0001, |
| "loss": 2.2033, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.0852309694209499, |
| "grad_norm": 0.2065243273973465, |
| "learning_rate": 0.0001, |
| "loss": 1.8489, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.0858815875081327, |
| "grad_norm": 0.20983067154884338, |
| "learning_rate": 0.0001, |
| "loss": 2.0898, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.0865322055953155, |
| "grad_norm": 0.2056410312652588, |
| "learning_rate": 0.0001, |
| "loss": 1.9988, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.0871828236824983, |
| "grad_norm": 0.20099294185638428, |
| "learning_rate": 0.0001, |
| "loss": 2.0809, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.087833441769681, |
| "grad_norm": 0.21519562602043152, |
| "learning_rate": 0.0001, |
| "loss": 1.8996, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.088484059856864, |
| "grad_norm": 0.236520454287529, |
| "learning_rate": 0.0001, |
| "loss": 2.3744, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.089134677944047, |
| "grad_norm": 0.25001972913742065, |
| "learning_rate": 0.0001, |
| "loss": 2.3235, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.0897852960312298, |
| "grad_norm": 0.22644248604774475, |
| "learning_rate": 0.0001, |
| "loss": 2.1194, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.0904359141184126, |
| "grad_norm": 0.2943194508552551, |
| "learning_rate": 0.0001, |
| "loss": 2.6793, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.0910865322055954, |
| "grad_norm": 0.21737070381641388, |
| "learning_rate": 0.0001, |
| "loss": 2.3041, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.0917371502927782, |
| "grad_norm": 0.3601996898651123, |
| "learning_rate": 0.0001, |
| "loss": 2.2822, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.092387768379961, |
| "grad_norm": 0.2125779092311859, |
| "learning_rate": 0.0001, |
| "loss": 2.0889, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.0930383864671438, |
| "grad_norm": 0.35293838381767273, |
| "learning_rate": 0.0001, |
| "loss": 2.5618, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.0936890045543266, |
| "grad_norm": 0.2183585911989212, |
| "learning_rate": 0.0001, |
| "loss": 2.1648, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.0943396226415094, |
| "grad_norm": 0.2021835297346115, |
| "learning_rate": 0.0001, |
| "loss": 1.7622, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.0949902407286922, |
| "grad_norm": 0.23878879845142365, |
| "learning_rate": 0.0001, |
| "loss": 2.2226, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.095640858815875, |
| "grad_norm": 0.2424585223197937, |
| "learning_rate": 0.0001, |
| "loss": 2.1034, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.0962914769030578, |
| "grad_norm": 0.2282852977514267, |
| "learning_rate": 0.0001, |
| "loss": 2.1175, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.0969420949902406, |
| "grad_norm": 0.2147156298160553, |
| "learning_rate": 0.0001, |
| "loss": 2.0009, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.0975927130774235, |
| "grad_norm": 0.28743499517440796, |
| "learning_rate": 0.0001, |
| "loss": 2.8995, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.0982433311646065, |
| "grad_norm": 0.21453475952148438, |
| "learning_rate": 0.0001, |
| "loss": 2.2031, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.0988939492517893, |
| "grad_norm": 0.24577857553958893, |
| "learning_rate": 0.0001, |
| "loss": 2.4955, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.099544567338972, |
| "grad_norm": 0.1992902010679245, |
| "learning_rate": 0.0001, |
| "loss": 1.9768, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.100195185426155, |
| "grad_norm": 0.20476333796977997, |
| "learning_rate": 0.0001, |
| "loss": 1.9382, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.1008458035133377, |
| "grad_norm": 0.2085767537355423, |
| "learning_rate": 0.0001, |
| "loss": 2.1366, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.1014964216005205, |
| "grad_norm": 0.21662667393684387, |
| "learning_rate": 0.0001, |
| "loss": 2.1346, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.1021470396877033, |
| "grad_norm": 0.22070181369781494, |
| "learning_rate": 0.0001, |
| "loss": 2.0913, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.1027976577748861, |
| "grad_norm": 0.1995183378458023, |
| "learning_rate": 0.0001, |
| "loss": 1.8566, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.103448275862069, |
| "grad_norm": 0.4109169840812683, |
| "learning_rate": 0.0001, |
| "loss": 2.7357, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.1040988939492518, |
| "grad_norm": 0.2846473455429077, |
| "learning_rate": 0.0001, |
| "loss": 2.6316, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.1047495120364346, |
| "grad_norm": 0.2119186669588089, |
| "learning_rate": 0.0001, |
| "loss": 2.277, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.1054001301236174, |
| "grad_norm": 0.22267405688762665, |
| "learning_rate": 0.0001, |
| "loss": 2.6095, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.1060507482108002, |
| "grad_norm": 0.24612359702587128, |
| "learning_rate": 0.0001, |
| "loss": 2.0949, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.106701366297983, |
| "grad_norm": 0.2180100530385971, |
| "learning_rate": 0.0001, |
| "loss": 2.1434, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.1073519843851658, |
| "grad_norm": 0.2079351395368576, |
| "learning_rate": 0.0001, |
| "loss": 2.142, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.1080026024723488, |
| "grad_norm": 0.2950500249862671, |
| "learning_rate": 0.0001, |
| "loss": 2.2223, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.1086532205595316, |
| "grad_norm": 0.25519388914108276, |
| "learning_rate": 0.0001, |
| "loss": 2.2642, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.1093038386467144, |
| "grad_norm": 0.21185335516929626, |
| "learning_rate": 0.0001, |
| "loss": 1.9126, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.1099544567338973, |
| "grad_norm": 0.23266835510730743, |
| "learning_rate": 0.0001, |
| "loss": 2.4049, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.11060507482108, |
| "grad_norm": 0.2577219307422638, |
| "learning_rate": 0.0001, |
| "loss": 1.9254, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.1112556929082629, |
| "grad_norm": 0.2381918579339981, |
| "learning_rate": 0.0001, |
| "loss": 1.9373, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.1119063109954457, |
| "grad_norm": 0.215755432844162, |
| "learning_rate": 0.0001, |
| "loss": 1.865, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.1125569290826285, |
| "grad_norm": 0.2784242630004883, |
| "learning_rate": 0.0001, |
| "loss": 2.3655, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.1132075471698113, |
| "grad_norm": 0.21846503019332886, |
| "learning_rate": 0.0001, |
| "loss": 1.9513, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.113858165256994, |
| "grad_norm": 0.234720379114151, |
| "learning_rate": 0.0001, |
| "loss": 2.0833, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.114508783344177, |
| "grad_norm": 0.20331884920597076, |
| "learning_rate": 0.0001, |
| "loss": 1.9819, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.1151594014313597, |
| "grad_norm": 0.22917625308036804, |
| "learning_rate": 0.0001, |
| "loss": 1.9423, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.1158100195185425, |
| "grad_norm": 0.2647688388824463, |
| "learning_rate": 0.0001, |
| "loss": 2.4246, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.1164606376057253, |
| "grad_norm": 0.2142096906900406, |
| "learning_rate": 0.0001, |
| "loss": 1.9986, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.1171112556929081, |
| "grad_norm": 0.2264833152294159, |
| "learning_rate": 0.0001, |
| "loss": 1.8382, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.1177618737800912, |
| "grad_norm": 0.19746141135692596, |
| "learning_rate": 0.0001, |
| "loss": 2.0013, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.118412491867274, |
| "grad_norm": 0.3226877748966217, |
| "learning_rate": 0.0001, |
| "loss": 2.6326, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.1190631099544568, |
| "grad_norm": 0.20398877561092377, |
| "learning_rate": 0.0001, |
| "loss": 1.9253, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.1197137280416396, |
| "grad_norm": 0.277138352394104, |
| "learning_rate": 0.0001, |
| "loss": 2.8379, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.1203643461288224, |
| "grad_norm": 0.22668658196926117, |
| "learning_rate": 0.0001, |
| "loss": 1.9818, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.1210149642160052, |
| "grad_norm": 0.3012169897556305, |
| "learning_rate": 0.0001, |
| "loss": 2.803, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.121665582303188, |
| "grad_norm": 0.30012813210487366, |
| "learning_rate": 0.0001, |
| "loss": 2.8278, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.1223162003903708, |
| "grad_norm": 0.19938437640666962, |
| "learning_rate": 0.0001, |
| "loss": 1.8033, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.1229668184775536, |
| "grad_norm": 0.21210303902626038, |
| "learning_rate": 0.0001, |
| "loss": 2.1325, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.1236174365647364, |
| "grad_norm": 0.199849933385849, |
| "learning_rate": 0.0001, |
| "loss": 1.9711, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.1242680546519193, |
| "grad_norm": 0.20615191757678986, |
| "learning_rate": 0.0001, |
| "loss": 1.9243, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.124918672739102, |
| "grad_norm": 0.24273328483104706, |
| "learning_rate": 0.0001, |
| "loss": 2.151, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.1255692908262849, |
| "grad_norm": 0.21164491772651672, |
| "learning_rate": 0.0001, |
| "loss": 2.0024, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.126219908913468, |
| "grad_norm": 0.19444367289543152, |
| "learning_rate": 0.0001, |
| "loss": 1.7912, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.1268705270006507, |
| "grad_norm": 0.2064955085515976, |
| "learning_rate": 0.0001, |
| "loss": 1.9474, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.1275211450878335, |
| "grad_norm": 0.21510273218154907, |
| "learning_rate": 0.0001, |
| "loss": 2.4043, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.1281717631750163, |
| "grad_norm": 0.20456860959529877, |
| "learning_rate": 0.0001, |
| "loss": 1.8206, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.1288223812621991, |
| "grad_norm": 0.23858770728111267, |
| "learning_rate": 0.0001, |
| "loss": 1.9911, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.129472999349382, |
| "grad_norm": 0.2044251412153244, |
| "learning_rate": 0.0001, |
| "loss": 1.9787, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.1301236174365648, |
| "grad_norm": 0.20165155827999115, |
| "learning_rate": 0.0001, |
| "loss": 1.9325, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.1307742355237476, |
| "grad_norm": 0.3002881705760956, |
| "learning_rate": 0.0001, |
| "loss": 2.2711, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.1314248536109304, |
| "grad_norm": 0.2876165807247162, |
| "learning_rate": 0.0001, |
| "loss": 2.6178, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.1320754716981132, |
| "grad_norm": 0.19614382088184357, |
| "learning_rate": 0.0001, |
| "loss": 2.059, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.132726089785296, |
| "grad_norm": 0.30128970742225647, |
| "learning_rate": 0.0001, |
| "loss": 3.1129, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.1333767078724788, |
| "grad_norm": 0.21897728741168976, |
| "learning_rate": 0.0001, |
| "loss": 2.4768, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.1340273259596616, |
| "grad_norm": 0.3420639932155609, |
| "learning_rate": 0.0001, |
| "loss": 2.8371, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.1346779440468444, |
| "grad_norm": 0.20743510127067566, |
| "learning_rate": 0.0001, |
| "loss": 1.8603, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.1353285621340272, |
| "grad_norm": 0.20907175540924072, |
| "learning_rate": 0.0001, |
| "loss": 2.3143, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.1359791802212102, |
| "grad_norm": 0.21130706369876862, |
| "learning_rate": 0.0001, |
| "loss": 1.8725, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.136629798308393, |
| "grad_norm": 0.25445109605789185, |
| "learning_rate": 0.0001, |
| "loss": 2.3608, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.1372804163955759, |
| "grad_norm": 0.2993278503417969, |
| "learning_rate": 0.0001, |
| "loss": 2.3245, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.1379310344827587, |
| "grad_norm": 0.22779369354248047, |
| "learning_rate": 0.0001, |
| "loss": 2.1358, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.1385816525699415, |
| "grad_norm": 0.24406792223453522, |
| "learning_rate": 0.0001, |
| "loss": 2.2637, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.1392322706571243, |
| "grad_norm": 0.21420061588287354, |
| "learning_rate": 0.0001, |
| "loss": 2.0766, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.139882888744307, |
| "grad_norm": 0.25235506892204285, |
| "learning_rate": 0.0001, |
| "loss": 2.142, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.14053350683149, |
| "grad_norm": 0.23644569516181946, |
| "learning_rate": 0.0001, |
| "loss": 2.2236, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.1411841249186727, |
| "grad_norm": 0.19039738178253174, |
| "learning_rate": 0.0001, |
| "loss": 1.7089, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.1418347430058555, |
| "grad_norm": 0.20842482149600983, |
| "learning_rate": 0.0001, |
| "loss": 2.0404, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.1424853610930383, |
| "grad_norm": 0.39517682790756226, |
| "learning_rate": 0.0001, |
| "loss": 2.8021, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.1431359791802211, |
| "grad_norm": 0.244680717587471, |
| "learning_rate": 0.0001, |
| "loss": 2.4391, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.143786597267404, |
| "grad_norm": 0.2605243921279907, |
| "learning_rate": 0.0001, |
| "loss": 1.9974, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.144437215354587, |
| "grad_norm": 0.25239503383636475, |
| "learning_rate": 0.0001, |
| "loss": 2.3303, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.1450878334417696, |
| "grad_norm": 0.22976724803447723, |
| "learning_rate": 0.0001, |
| "loss": 2.1712, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.1457384515289526, |
| "grad_norm": 0.23868423700332642, |
| "learning_rate": 0.0001, |
| "loss": 1.8314, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.1463890696161354, |
| "grad_norm": 0.19508770108222961, |
| "learning_rate": 0.0001, |
| "loss": 1.8656, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.1470396877033182, |
| "grad_norm": 0.20572414994239807, |
| "learning_rate": 0.0001, |
| "loss": 1.9845, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.147690305790501, |
| "grad_norm": 0.21730579435825348, |
| "learning_rate": 0.0001, |
| "loss": 2.2111, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.1483409238776838, |
| "grad_norm": 0.21196532249450684, |
| "learning_rate": 0.0001, |
| "loss": 1.9421, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.1489915419648666, |
| "grad_norm": 0.21068131923675537, |
| "learning_rate": 0.0001, |
| "loss": 2.3136, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.1496421600520494, |
| "grad_norm": 0.20362576842308044, |
| "learning_rate": 0.0001, |
| "loss": 1.8275, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.1502927781392323, |
| "grad_norm": 0.2940424978733063, |
| "learning_rate": 0.0001, |
| "loss": 2.8365, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.150943396226415, |
| "grad_norm": 0.20834600925445557, |
| "learning_rate": 0.0001, |
| "loss": 2.09, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.1515940143135979, |
| "grad_norm": 0.21456307172775269, |
| "learning_rate": 0.0001, |
| "loss": 2.3048, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.1522446324007807, |
| "grad_norm": 0.2233203500509262, |
| "learning_rate": 0.0001, |
| "loss": 2.4119, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.1528952504879635, |
| "grad_norm": 0.20665253698825836, |
| "learning_rate": 0.0001, |
| "loss": 2.1773, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.1535458685751463, |
| "grad_norm": 0.23972131311893463, |
| "learning_rate": 0.0001, |
| "loss": 2.2826, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.1541964866623293, |
| "grad_norm": 0.21282705664634705, |
| "learning_rate": 0.0001, |
| "loss": 1.9999, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.1548471047495121, |
| "grad_norm": 0.240117147564888, |
| "learning_rate": 0.0001, |
| "loss": 2.8108, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.155497722836695, |
| "grad_norm": 0.19697974622249603, |
| "learning_rate": 0.0001, |
| "loss": 1.7676, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.1561483409238777, |
| "grad_norm": 0.2750251889228821, |
| "learning_rate": 0.0001, |
| "loss": 2.2249, |
| "step": 1777 |
| }, |
| { |
| "epoch": 1.1567989590110606, |
| "grad_norm": 0.22119766473770142, |
| "learning_rate": 0.0001, |
| "loss": 2.3459, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.1574495770982434, |
| "grad_norm": 0.20489272475242615, |
| "learning_rate": 0.0001, |
| "loss": 1.9987, |
| "step": 1779 |
| }, |
| { |
| "epoch": 1.1581001951854262, |
| "grad_norm": 0.24086709320545197, |
| "learning_rate": 0.0001, |
| "loss": 2.4173, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.158750813272609, |
| "grad_norm": 0.1924763172864914, |
| "learning_rate": 0.0001, |
| "loss": 1.9418, |
| "step": 1781 |
| }, |
| { |
| "epoch": 1.1594014313597918, |
| "grad_norm": 0.19156090915203094, |
| "learning_rate": 0.0001, |
| "loss": 1.8935, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.1600520494469746, |
| "grad_norm": 0.19609065353870392, |
| "learning_rate": 0.0001, |
| "loss": 2.066, |
| "step": 1783 |
| }, |
| { |
| "epoch": 1.1607026675341574, |
| "grad_norm": 0.23869499564170837, |
| "learning_rate": 0.0001, |
| "loss": 2.1297, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.1613532856213402, |
| "grad_norm": 0.2007722407579422, |
| "learning_rate": 0.0001, |
| "loss": 2.233, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.162003903708523, |
| "grad_norm": 0.2520548105239868, |
| "learning_rate": 0.0001, |
| "loss": 2.3152, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.1626545217957058, |
| "grad_norm": 0.2287060022354126, |
| "learning_rate": 0.0001, |
| "loss": 2.1939, |
| "step": 1787 |
| }, |
| { |
| "epoch": 1.1633051398828886, |
| "grad_norm": 0.2276492863893509, |
| "learning_rate": 0.0001, |
| "loss": 2.3642, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.1639557579700717, |
| "grad_norm": 0.21467982232570648, |
| "learning_rate": 0.0001, |
| "loss": 2.0627, |
| "step": 1789 |
| }, |
| { |
| "epoch": 1.1646063760572545, |
| "grad_norm": 0.29250141978263855, |
| "learning_rate": 0.0001, |
| "loss": 2.5733, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.1652569941444373, |
| "grad_norm": 0.1959691047668457, |
| "learning_rate": 0.0001, |
| "loss": 1.9551, |
| "step": 1791 |
| }, |
| { |
| "epoch": 1.16590761223162, |
| "grad_norm": 0.2442537546157837, |
| "learning_rate": 0.0001, |
| "loss": 2.1335, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.166558230318803, |
| "grad_norm": 0.24290277063846588, |
| "learning_rate": 0.0001, |
| "loss": 2.0621, |
| "step": 1793 |
| }, |
| { |
| "epoch": 1.1672088484059857, |
| "grad_norm": 0.22018510103225708, |
| "learning_rate": 0.0001, |
| "loss": 1.887, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.1678594664931685, |
| "grad_norm": 0.1958644986152649, |
| "learning_rate": 0.0001, |
| "loss": 1.8225, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.1685100845803513, |
| "grad_norm": 0.20498071610927582, |
| "learning_rate": 0.0001, |
| "loss": 1.7655, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.1691607026675341, |
| "grad_norm": 0.2889087498188019, |
| "learning_rate": 0.0001, |
| "loss": 2.6927, |
| "step": 1797 |
| }, |
| { |
| "epoch": 1.169811320754717, |
| "grad_norm": 0.19342635571956635, |
| "learning_rate": 0.0001, |
| "loss": 1.8491, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.1704619388418998, |
| "grad_norm": 0.21176034212112427, |
| "learning_rate": 0.0001, |
| "loss": 1.9023, |
| "step": 1799 |
| }, |
| { |
| "epoch": 1.1711125569290826, |
| "grad_norm": 0.20182453095912933, |
| "learning_rate": 0.0001, |
| "loss": 2.1102, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.1717631750162654, |
| "grad_norm": 0.19678173959255219, |
| "learning_rate": 0.0001, |
| "loss": 1.8946, |
| "step": 1801 |
| }, |
| { |
| "epoch": 1.1724137931034484, |
| "grad_norm": 0.2111876755952835, |
| "learning_rate": 0.0001, |
| "loss": 1.9776, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.173064411190631, |
| "grad_norm": 0.20064996182918549, |
| "learning_rate": 0.0001, |
| "loss": 1.9964, |
| "step": 1803 |
| }, |
| { |
| "epoch": 1.173715029277814, |
| "grad_norm": 0.21519283950328827, |
| "learning_rate": 0.0001, |
| "loss": 2.3182, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.1743656473649968, |
| "grad_norm": 0.20647580921649933, |
| "learning_rate": 0.0001, |
| "loss": 2.1749, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.1750162654521796, |
| "grad_norm": 0.23677599430084229, |
| "learning_rate": 0.0001, |
| "loss": 2.3327, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.1756668835393624, |
| "grad_norm": 0.4442680776119232, |
| "learning_rate": 0.0001, |
| "loss": 3.0767, |
| "step": 1807 |
| }, |
| { |
| "epoch": 1.1763175016265452, |
| "grad_norm": 0.22199684381484985, |
| "learning_rate": 0.0001, |
| "loss": 2.0433, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.176968119713728, |
| "grad_norm": 0.25266921520233154, |
| "learning_rate": 0.0001, |
| "loss": 2.0896, |
| "step": 1809 |
| }, |
| { |
| "epoch": 1.1776187378009109, |
| "grad_norm": 0.2544354200363159, |
| "learning_rate": 0.0001, |
| "loss": 2.3238, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.1782693558880937, |
| "grad_norm": 0.22011153399944305, |
| "learning_rate": 0.0001, |
| "loss": 2.081, |
| "step": 1811 |
| }, |
| { |
| "epoch": 1.1789199739752765, |
| "grad_norm": 0.2130918800830841, |
| "learning_rate": 0.0001, |
| "loss": 2.1746, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.1795705920624593, |
| "grad_norm": 0.27029815316200256, |
| "learning_rate": 0.0001, |
| "loss": 2.347, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.180221210149642, |
| "grad_norm": 0.2451375275850296, |
| "learning_rate": 0.0001, |
| "loss": 2.3811, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.180871828236825, |
| "grad_norm": 0.23932473361492157, |
| "learning_rate": 0.0001, |
| "loss": 2.3487, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.1815224463240077, |
| "grad_norm": 0.27545246481895447, |
| "learning_rate": 0.0001, |
| "loss": 2.8006, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.1821730644111907, |
| "grad_norm": 0.23789924383163452, |
| "learning_rate": 0.0001, |
| "loss": 2.412, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.1828236824983733, |
| "grad_norm": 0.21146753430366516, |
| "learning_rate": 0.0001, |
| "loss": 1.77, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.1834743005855564, |
| "grad_norm": 0.22426052391529083, |
| "learning_rate": 0.0001, |
| "loss": 2.8208, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.1841249186727392, |
| "grad_norm": 0.20795080065727234, |
| "learning_rate": 0.0001, |
| "loss": 1.9873, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.184775536759922, |
| "grad_norm": 0.2502988576889038, |
| "learning_rate": 0.0001, |
| "loss": 2.1848, |
| "step": 1821 |
| }, |
| { |
| "epoch": 1.1854261548471048, |
| "grad_norm": 0.2513422667980194, |
| "learning_rate": 0.0001, |
| "loss": 2.2672, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.1860767729342876, |
| "grad_norm": 0.2905328571796417, |
| "learning_rate": 0.0001, |
| "loss": 2.6693, |
| "step": 1823 |
| }, |
| { |
| "epoch": 1.1867273910214704, |
| "grad_norm": 0.24283568561077118, |
| "learning_rate": 0.0001, |
| "loss": 2.2195, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.1873780091086532, |
| "grad_norm": 0.4796983003616333, |
| "learning_rate": 0.0001, |
| "loss": 2.9053, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.188028627195836, |
| "grad_norm": 0.21809592843055725, |
| "learning_rate": 0.0001, |
| "loss": 2.3144, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.1886792452830188, |
| "grad_norm": 0.20797866582870483, |
| "learning_rate": 0.0001, |
| "loss": 2.0944, |
| "step": 1827 |
| }, |
| { |
| "epoch": 1.1893298633702016, |
| "grad_norm": 0.33743757009506226, |
| "learning_rate": 0.0001, |
| "loss": 2.5127, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.1899804814573844, |
| "grad_norm": 0.20291081070899963, |
| "learning_rate": 0.0001, |
| "loss": 1.8319, |
| "step": 1829 |
| }, |
| { |
| "epoch": 1.1906310995445673, |
| "grad_norm": 0.22974629700183868, |
| "learning_rate": 0.0001, |
| "loss": 2.5051, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.19128171763175, |
| "grad_norm": 0.19236895442008972, |
| "learning_rate": 0.0001, |
| "loss": 2.0788, |
| "step": 1831 |
| }, |
| { |
| "epoch": 1.191932335718933, |
| "grad_norm": 0.23548506200313568, |
| "learning_rate": 0.0001, |
| "loss": 2.2978, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.192582953806116, |
| "grad_norm": 0.21966218948364258, |
| "learning_rate": 0.0001, |
| "loss": 2.0079, |
| "step": 1833 |
| }, |
| { |
| "epoch": 1.1932335718932987, |
| "grad_norm": 0.2185019999742508, |
| "learning_rate": 0.0001, |
| "loss": 2.5881, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.1938841899804815, |
| "grad_norm": 0.2392290085554123, |
| "learning_rate": 0.0001, |
| "loss": 2.8093, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.1945348080676643, |
| "grad_norm": 0.2351524531841278, |
| "learning_rate": 0.0001, |
| "loss": 2.193, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.1951854261548471, |
| "grad_norm": 0.23001527786254883, |
| "learning_rate": 0.0001, |
| "loss": 2.1557, |
| "step": 1837 |
| }, |
| { |
| "epoch": 1.19583604424203, |
| "grad_norm": 0.26272302865982056, |
| "learning_rate": 0.0001, |
| "loss": 2.4233, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.1964866623292127, |
| "grad_norm": 0.22908174991607666, |
| "learning_rate": 0.0001, |
| "loss": 2.1755, |
| "step": 1839 |
| }, |
| { |
| "epoch": 1.1971372804163956, |
| "grad_norm": 0.24464088678359985, |
| "learning_rate": 0.0001, |
| "loss": 1.9405, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.1977878985035784, |
| "grad_norm": 0.20972700417041779, |
| "learning_rate": 0.0001, |
| "loss": 1.8764, |
| "step": 1841 |
| }, |
| { |
| "epoch": 1.1984385165907612, |
| "grad_norm": 0.20880474150180817, |
| "learning_rate": 0.0001, |
| "loss": 2.3584, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.199089134677944, |
| "grad_norm": 0.1944800466299057, |
| "learning_rate": 0.0001, |
| "loss": 1.8642, |
| "step": 1843 |
| }, |
| { |
| "epoch": 1.1997397527651268, |
| "grad_norm": 0.2051180899143219, |
| "learning_rate": 0.0001, |
| "loss": 1.986, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.2003903708523098, |
| "grad_norm": 0.2157822549343109, |
| "learning_rate": 0.0001, |
| "loss": 2.0517, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.2010409889394924, |
| "grad_norm": 0.3041553199291229, |
| "learning_rate": 0.0001, |
| "loss": 2.311, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.2016916070266754, |
| "grad_norm": 0.21936780214309692, |
| "learning_rate": 0.0001, |
| "loss": 1.9142, |
| "step": 1847 |
| }, |
| { |
| "epoch": 1.2023422251138582, |
| "grad_norm": 0.209041029214859, |
| "learning_rate": 0.0001, |
| "loss": 1.8511, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.202992843201041, |
| "grad_norm": 0.23482000827789307, |
| "learning_rate": 0.0001, |
| "loss": 1.9374, |
| "step": 1849 |
| }, |
| { |
| "epoch": 1.2036434612882239, |
| "grad_norm": 0.19679374992847443, |
| "learning_rate": 0.0001, |
| "loss": 2.0476, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.2042940793754067, |
| "grad_norm": 0.2212909758090973, |
| "learning_rate": 0.0001, |
| "loss": 2.2312, |
| "step": 1851 |
| }, |
| { |
| "epoch": 1.2049446974625895, |
| "grad_norm": 0.2555689513683319, |
| "learning_rate": 0.0001, |
| "loss": 2.0423, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.2055953155497723, |
| "grad_norm": 0.20109978318214417, |
| "learning_rate": 0.0001, |
| "loss": 1.7819, |
| "step": 1853 |
| }, |
| { |
| "epoch": 1.206245933636955, |
| "grad_norm": 0.2877024710178375, |
| "learning_rate": 0.0001, |
| "loss": 2.2722, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.206896551724138, |
| "grad_norm": 0.2399614006280899, |
| "learning_rate": 0.0001, |
| "loss": 2.3574, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.2075471698113207, |
| "grad_norm": 0.21033065021038055, |
| "learning_rate": 0.0001, |
| "loss": 1.9241, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.2081977878985035, |
| "grad_norm": 0.24114003777503967, |
| "learning_rate": 0.0001, |
| "loss": 2.219, |
| "step": 1857 |
| }, |
| { |
| "epoch": 1.2088484059856863, |
| "grad_norm": 0.2185184806585312, |
| "learning_rate": 0.0001, |
| "loss": 2.172, |
| "step": 1858 |
| }, |
| { |
| "epoch": 1.2094990240728691, |
| "grad_norm": 0.2120138555765152, |
| "learning_rate": 0.0001, |
| "loss": 1.9399, |
| "step": 1859 |
| }, |
| { |
| "epoch": 1.2101496421600522, |
| "grad_norm": 0.30281567573547363, |
| "learning_rate": 0.0001, |
| "loss": 2.0764, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.2108002602472347, |
| "grad_norm": 0.29093682765960693, |
| "learning_rate": 0.0001, |
| "loss": 2.2486, |
| "step": 1861 |
| }, |
| { |
| "epoch": 1.2114508783344178, |
| "grad_norm": 0.26967954635620117, |
| "learning_rate": 0.0001, |
| "loss": 2.577, |
| "step": 1862 |
| }, |
| { |
| "epoch": 1.2121014964216006, |
| "grad_norm": 0.29868391156196594, |
| "learning_rate": 0.0001, |
| "loss": 2.1684, |
| "step": 1863 |
| }, |
| { |
| "epoch": 1.2127521145087834, |
| "grad_norm": 0.2983965277671814, |
| "learning_rate": 0.0001, |
| "loss": 2.1385, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.2134027325959662, |
| "grad_norm": 0.21981745958328247, |
| "learning_rate": 0.0001, |
| "loss": 2.0178, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.214053350683149, |
| "grad_norm": 0.2781940996646881, |
| "learning_rate": 0.0001, |
| "loss": 1.9141, |
| "step": 1866 |
| }, |
| { |
| "epoch": 1.2147039687703318, |
| "grad_norm": 0.2705937623977661, |
| "learning_rate": 0.0001, |
| "loss": 2.2618, |
| "step": 1867 |
| }, |
| { |
| "epoch": 1.2153545868575146, |
| "grad_norm": 0.26933443546295166, |
| "learning_rate": 0.0001, |
| "loss": 2.1254, |
| "step": 1868 |
| }, |
| { |
| "epoch": 1.2160052049446974, |
| "grad_norm": 0.33348095417022705, |
| "learning_rate": 0.0001, |
| "loss": 2.4108, |
| "step": 1869 |
| }, |
| { |
| "epoch": 1.2166558230318802, |
| "grad_norm": 0.2542361319065094, |
| "learning_rate": 0.0001, |
| "loss": 2.3244, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.217306441119063, |
| "grad_norm": 0.2907015383243561, |
| "learning_rate": 0.0001, |
| "loss": 1.88, |
| "step": 1871 |
| }, |
| { |
| "epoch": 1.2179570592062459, |
| "grad_norm": 0.3185560405254364, |
| "learning_rate": 0.0001, |
| "loss": 2.2781, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.2186076772934287, |
| "grad_norm": 0.2869360148906708, |
| "learning_rate": 0.0001, |
| "loss": 2.402, |
| "step": 1873 |
| }, |
| { |
| "epoch": 1.2192582953806115, |
| "grad_norm": 0.20484799146652222, |
| "learning_rate": 0.0001, |
| "loss": 1.8669, |
| "step": 1874 |
| }, |
| { |
| "epoch": 1.2199089134677945, |
| "grad_norm": 0.32604745030403137, |
| "learning_rate": 0.0001, |
| "loss": 2.2388, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.2205595315549773, |
| "grad_norm": 0.23423580825328827, |
| "learning_rate": 0.0001, |
| "loss": 2.0514, |
| "step": 1876 |
| }, |
| { |
| "epoch": 1.2212101496421601, |
| "grad_norm": 0.2595270872116089, |
| "learning_rate": 0.0001, |
| "loss": 2.3477, |
| "step": 1877 |
| }, |
| { |
| "epoch": 1.221860767729343, |
| "grad_norm": 0.1947423815727234, |
| "learning_rate": 0.0001, |
| "loss": 2.0059, |
| "step": 1878 |
| }, |
| { |
| "epoch": 1.2225113858165257, |
| "grad_norm": 0.2795493006706238, |
| "learning_rate": 0.0001, |
| "loss": 1.8822, |
| "step": 1879 |
| }, |
| { |
| "epoch": 1.2231620039037086, |
| "grad_norm": 0.2808840870857239, |
| "learning_rate": 0.0001, |
| "loss": 2.3531, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.2238126219908914, |
| "grad_norm": 0.2068590670824051, |
| "learning_rate": 0.0001, |
| "loss": 1.912, |
| "step": 1881 |
| }, |
| { |
| "epoch": 1.2244632400780742, |
| "grad_norm": 0.22908970713615417, |
| "learning_rate": 0.0001, |
| "loss": 2.2255, |
| "step": 1882 |
| }, |
| { |
| "epoch": 1.225113858165257, |
| "grad_norm": 0.25003886222839355, |
| "learning_rate": 0.0001, |
| "loss": 2.1466, |
| "step": 1883 |
| }, |
| { |
| "epoch": 1.2257644762524398, |
| "grad_norm": 0.22825029492378235, |
| "learning_rate": 0.0001, |
| "loss": 1.992, |
| "step": 1884 |
| }, |
| { |
| "epoch": 1.2264150943396226, |
| "grad_norm": 0.2613295316696167, |
| "learning_rate": 0.0001, |
| "loss": 2.374, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.2270657124268054, |
| "grad_norm": 0.4435082674026489, |
| "learning_rate": 0.0001, |
| "loss": 3.1849, |
| "step": 1886 |
| }, |
| { |
| "epoch": 1.2277163305139882, |
| "grad_norm": 0.24659159779548645, |
| "learning_rate": 0.0001, |
| "loss": 2.1769, |
| "step": 1887 |
| }, |
| { |
| "epoch": 1.228366948601171, |
| "grad_norm": 0.4549017548561096, |
| "learning_rate": 0.0001, |
| "loss": 2.7894, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.2290175666883538, |
| "grad_norm": 0.25427743792533875, |
| "learning_rate": 0.0001, |
| "loss": 1.8263, |
| "step": 1889 |
| }, |
| { |
| "epoch": 1.2296681847755369, |
| "grad_norm": 0.22596922516822815, |
| "learning_rate": 0.0001, |
| "loss": 2.5682, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.2303188028627197, |
| "grad_norm": 0.2284218668937683, |
| "learning_rate": 0.0001, |
| "loss": 2.0553, |
| "step": 1891 |
| }, |
| { |
| "epoch": 1.2309694209499025, |
| "grad_norm": 0.23732468485832214, |
| "learning_rate": 0.0001, |
| "loss": 1.9246, |
| "step": 1892 |
| }, |
| { |
| "epoch": 1.2316200390370853, |
| "grad_norm": 0.23811183869838715, |
| "learning_rate": 0.0001, |
| "loss": 2.0138, |
| "step": 1893 |
| }, |
| { |
| "epoch": 1.232270657124268, |
| "grad_norm": 0.2370694875717163, |
| "learning_rate": 0.0001, |
| "loss": 2.0182, |
| "step": 1894 |
| }, |
| { |
| "epoch": 1.232921275211451, |
| "grad_norm": 0.28930938243865967, |
| "learning_rate": 0.0001, |
| "loss": 2.5753, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.2335718932986337, |
| "grad_norm": 0.25352779030799866, |
| "learning_rate": 0.0001, |
| "loss": 1.8375, |
| "step": 1896 |
| }, |
| { |
| "epoch": 1.2342225113858165, |
| "grad_norm": 0.30172285437583923, |
| "learning_rate": 0.0001, |
| "loss": 1.9245, |
| "step": 1897 |
| }, |
| { |
| "epoch": 1.2348731294729993, |
| "grad_norm": 0.2180672585964203, |
| "learning_rate": 0.0001, |
| "loss": 2.2914, |
| "step": 1898 |
| }, |
| { |
| "epoch": 1.2355237475601821, |
| "grad_norm": 0.2281951755285263, |
| "learning_rate": 0.0001, |
| "loss": 2.1435, |
| "step": 1899 |
| }, |
| { |
| "epoch": 1.236174365647365, |
| "grad_norm": 0.20682668685913086, |
| "learning_rate": 0.0001, |
| "loss": 1.7615, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.2368249837345477, |
| "grad_norm": 0.25352394580841064, |
| "learning_rate": 0.0001, |
| "loss": 2.5008, |
| "step": 1901 |
| }, |
| { |
| "epoch": 1.2374756018217306, |
| "grad_norm": 0.2721845507621765, |
| "learning_rate": 0.0001, |
| "loss": 2.4793, |
| "step": 1902 |
| }, |
| { |
| "epoch": 1.2381262199089136, |
| "grad_norm": 0.26155340671539307, |
| "learning_rate": 0.0001, |
| "loss": 2.4213, |
| "step": 1903 |
| }, |
| { |
| "epoch": 1.2387768379960962, |
| "grad_norm": 0.21231123805046082, |
| "learning_rate": 0.0001, |
| "loss": 2.3406, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.2394274560832792, |
| "grad_norm": 0.33180317282676697, |
| "learning_rate": 0.0001, |
| "loss": 2.6539, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.240078074170462, |
| "grad_norm": 0.316821813583374, |
| "learning_rate": 0.0001, |
| "loss": 1.9675, |
| "step": 1906 |
| }, |
| { |
| "epoch": 1.2407286922576448, |
| "grad_norm": 0.21710284054279327, |
| "learning_rate": 0.0001, |
| "loss": 2.0029, |
| "step": 1907 |
| }, |
| { |
| "epoch": 1.2413793103448276, |
| "grad_norm": 0.21154941618442535, |
| "learning_rate": 0.0001, |
| "loss": 2.0212, |
| "step": 1908 |
| }, |
| { |
| "epoch": 1.2420299284320104, |
| "grad_norm": 0.2514655590057373, |
| "learning_rate": 0.0001, |
| "loss": 1.9502, |
| "step": 1909 |
| }, |
| { |
| "epoch": 1.2426805465191932, |
| "grad_norm": 0.22810599207878113, |
| "learning_rate": 0.0001, |
| "loss": 1.9665, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.243331164606376, |
| "grad_norm": 0.235860675573349, |
| "learning_rate": 0.0001, |
| "loss": 2.0841, |
| "step": 1911 |
| }, |
| { |
| "epoch": 1.2439817826935589, |
| "grad_norm": 0.1910456418991089, |
| "learning_rate": 0.0001, |
| "loss": 1.8166, |
| "step": 1912 |
| }, |
| { |
| "epoch": 1.2446324007807417, |
| "grad_norm": 0.22014285624027252, |
| "learning_rate": 0.0001, |
| "loss": 2.1829, |
| "step": 1913 |
| }, |
| { |
| "epoch": 1.2452830188679245, |
| "grad_norm": 0.2244740128517151, |
| "learning_rate": 0.0001, |
| "loss": 2.0285, |
| "step": 1914 |
| }, |
| { |
| "epoch": 1.2459336369551073, |
| "grad_norm": 0.24731022119522095, |
| "learning_rate": 0.0001, |
| "loss": 1.9732, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.24658425504229, |
| "grad_norm": 0.23868077993392944, |
| "learning_rate": 0.0001, |
| "loss": 2.5917, |
| "step": 1916 |
| }, |
| { |
| "epoch": 1.247234873129473, |
| "grad_norm": 0.1961744874715805, |
| "learning_rate": 0.0001, |
| "loss": 2.166, |
| "step": 1917 |
| }, |
| { |
| "epoch": 1.247885491216656, |
| "grad_norm": 0.21610315144062042, |
| "learning_rate": 0.0001, |
| "loss": 2.3458, |
| "step": 1918 |
| }, |
| { |
| "epoch": 1.2485361093038387, |
| "grad_norm": 0.21530687808990479, |
| "learning_rate": 0.0001, |
| "loss": 2.6029, |
| "step": 1919 |
| }, |
| { |
| "epoch": 1.2491867273910215, |
| "grad_norm": 0.20758795738220215, |
| "learning_rate": 0.0001, |
| "loss": 1.9906, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.2498373454782044, |
| "grad_norm": 0.19392041862010956, |
| "learning_rate": 0.0001, |
| "loss": 1.9262, |
| "step": 1921 |
| }, |
| { |
| "epoch": 1.2504879635653872, |
| "grad_norm": 0.2207522839307785, |
| "learning_rate": 0.0001, |
| "loss": 2.4632, |
| "step": 1922 |
| }, |
| { |
| "epoch": 1.25113858165257, |
| "grad_norm": 0.2563434839248657, |
| "learning_rate": 0.0001, |
| "loss": 2.1078, |
| "step": 1923 |
| }, |
| { |
| "epoch": 1.2517891997397528, |
| "grad_norm": 0.20730890333652496, |
| "learning_rate": 0.0001, |
| "loss": 2.0233, |
| "step": 1924 |
| }, |
| { |
| "epoch": 1.2524398178269356, |
| "grad_norm": 0.2376047819852829, |
| "learning_rate": 0.0001, |
| "loss": 2.8245, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.2530904359141184, |
| "grad_norm": 0.23899880051612854, |
| "learning_rate": 0.0001, |
| "loss": 2.1696, |
| "step": 1926 |
| }, |
| { |
| "epoch": 1.2537410540013012, |
| "grad_norm": 0.23044680058956146, |
| "learning_rate": 0.0001, |
| "loss": 2.4687, |
| "step": 1927 |
| }, |
| { |
| "epoch": 1.254391672088484, |
| "grad_norm": 0.2099095731973648, |
| "learning_rate": 0.0001, |
| "loss": 1.9455, |
| "step": 1928 |
| }, |
| { |
| "epoch": 1.2550422901756668, |
| "grad_norm": 0.29073843359947205, |
| "learning_rate": 0.0001, |
| "loss": 2.4355, |
| "step": 1929 |
| }, |
| { |
| "epoch": 1.2556929082628496, |
| "grad_norm": 0.22105714678764343, |
| "learning_rate": 0.0001, |
| "loss": 2.2517, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.2563435263500327, |
| "grad_norm": 0.2283870428800583, |
| "learning_rate": 0.0001, |
| "loss": 2.3068, |
| "step": 1931 |
| }, |
| { |
| "epoch": 1.2569941444372152, |
| "grad_norm": 0.2354276329278946, |
| "learning_rate": 0.0001, |
| "loss": 1.942, |
| "step": 1932 |
| }, |
| { |
| "epoch": 1.2576447625243983, |
| "grad_norm": 0.2266663908958435, |
| "learning_rate": 0.0001, |
| "loss": 2.1488, |
| "step": 1933 |
| }, |
| { |
| "epoch": 1.258295380611581, |
| "grad_norm": 0.22540217638015747, |
| "learning_rate": 0.0001, |
| "loss": 2.2396, |
| "step": 1934 |
| }, |
| { |
| "epoch": 1.258945998698764, |
| "grad_norm": 0.21390073001384735, |
| "learning_rate": 0.0001, |
| "loss": 2.1606, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.2595966167859467, |
| "grad_norm": 0.23373158276081085, |
| "learning_rate": 0.0001, |
| "loss": 1.9439, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.2602472348731295, |
| "grad_norm": 0.23048003017902374, |
| "learning_rate": 0.0001, |
| "loss": 2.1865, |
| "step": 1937 |
| }, |
| { |
| "epoch": 1.2608978529603123, |
| "grad_norm": 0.22000010311603546, |
| "learning_rate": 0.0001, |
| "loss": 1.9846, |
| "step": 1938 |
| }, |
| { |
| "epoch": 1.2615484710474951, |
| "grad_norm": 0.21622230112552643, |
| "learning_rate": 0.0001, |
| "loss": 2.1252, |
| "step": 1939 |
| }, |
| { |
| "epoch": 1.262199089134678, |
| "grad_norm": 0.2605228126049042, |
| "learning_rate": 0.0001, |
| "loss": 2.1473, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.2628497072218607, |
| "grad_norm": 0.26679491996765137, |
| "learning_rate": 0.0001, |
| "loss": 2.6146, |
| "step": 1941 |
| }, |
| { |
| "epoch": 1.2635003253090435, |
| "grad_norm": 0.2634236514568329, |
| "learning_rate": 0.0001, |
| "loss": 2.346, |
| "step": 1942 |
| }, |
| { |
| "epoch": 1.2641509433962264, |
| "grad_norm": 0.24117980897426605, |
| "learning_rate": 0.0001, |
| "loss": 2.2245, |
| "step": 1943 |
| }, |
| { |
| "epoch": 1.2648015614834092, |
| "grad_norm": 0.24401871860027313, |
| "learning_rate": 0.0001, |
| "loss": 2.114, |
| "step": 1944 |
| }, |
| { |
| "epoch": 1.265452179570592, |
| "grad_norm": 0.22441592812538147, |
| "learning_rate": 0.0001, |
| "loss": 2.1032, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.266102797657775, |
| "grad_norm": 0.29374176263809204, |
| "learning_rate": 0.0001, |
| "loss": 2.1147, |
| "step": 1946 |
| }, |
| { |
| "epoch": 1.2667534157449576, |
| "grad_norm": 0.2995739281177521, |
| "learning_rate": 0.0001, |
| "loss": 2.115, |
| "step": 1947 |
| }, |
| { |
| "epoch": 1.2674040338321406, |
| "grad_norm": 0.20426590740680695, |
| "learning_rate": 0.0001, |
| "loss": 1.8542, |
| "step": 1948 |
| }, |
| { |
| "epoch": 1.2680546519193234, |
| "grad_norm": 0.1977882981300354, |
| "learning_rate": 0.0001, |
| "loss": 2.0438, |
| "step": 1949 |
| }, |
| { |
| "epoch": 1.2687052700065062, |
| "grad_norm": 0.21864080429077148, |
| "learning_rate": 0.0001, |
| "loss": 1.9422, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.269355888093689, |
| "grad_norm": 0.2340763509273529, |
| "learning_rate": 0.0001, |
| "loss": 1.8877, |
| "step": 1951 |
| }, |
| { |
| "epoch": 1.2700065061808719, |
| "grad_norm": 0.2309611737728119, |
| "learning_rate": 0.0001, |
| "loss": 2.2566, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.2706571242680547, |
| "grad_norm": 0.23693449795246124, |
| "learning_rate": 0.0001, |
| "loss": 2.3951, |
| "step": 1953 |
| }, |
| { |
| "epoch": 1.2713077423552375, |
| "grad_norm": 0.22100651264190674, |
| "learning_rate": 0.0001, |
| "loss": 2.5818, |
| "step": 1954 |
| }, |
| { |
| "epoch": 1.2719583604424203, |
| "grad_norm": 0.1990489661693573, |
| "learning_rate": 0.0001, |
| "loss": 1.9465, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.272608978529603, |
| "grad_norm": 0.20941214263439178, |
| "learning_rate": 0.0001, |
| "loss": 1.937, |
| "step": 1956 |
| }, |
| { |
| "epoch": 1.273259596616786, |
| "grad_norm": 0.21483126282691956, |
| "learning_rate": 0.0001, |
| "loss": 2.1043, |
| "step": 1957 |
| }, |
| { |
| "epoch": 1.2739102147039687, |
| "grad_norm": 0.21301157772541046, |
| "learning_rate": 0.0001, |
| "loss": 1.8593, |
| "step": 1958 |
| }, |
| { |
| "epoch": 1.2745608327911517, |
| "grad_norm": 0.21957635879516602, |
| "learning_rate": 0.0001, |
| "loss": 2.1362, |
| "step": 1959 |
| }, |
| { |
| "epoch": 1.2752114508783343, |
| "grad_norm": 0.2250145524740219, |
| "learning_rate": 0.0001, |
| "loss": 2.3579, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.2758620689655173, |
| "grad_norm": 0.20241393148899078, |
| "learning_rate": 0.0001, |
| "loss": 1.9527, |
| "step": 1961 |
| }, |
| { |
| "epoch": 1.2765126870527, |
| "grad_norm": 0.19743886590003967, |
| "learning_rate": 0.0001, |
| "loss": 1.7161, |
| "step": 1962 |
| }, |
| { |
| "epoch": 1.277163305139883, |
| "grad_norm": 0.19684547185897827, |
| "learning_rate": 0.0001, |
| "loss": 2.0134, |
| "step": 1963 |
| }, |
| { |
| "epoch": 1.2778139232270658, |
| "grad_norm": 0.23819373548030853, |
| "learning_rate": 0.0001, |
| "loss": 2.2772, |
| "step": 1964 |
| }, |
| { |
| "epoch": 1.2784645413142486, |
| "grad_norm": 0.1951412856578827, |
| "learning_rate": 0.0001, |
| "loss": 2.0478, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.2791151594014314, |
| "grad_norm": 0.20699426531791687, |
| "learning_rate": 0.0001, |
| "loss": 1.9329, |
| "step": 1966 |
| }, |
| { |
| "epoch": 1.2797657774886142, |
| "grad_norm": 0.2319498509168625, |
| "learning_rate": 0.0001, |
| "loss": 2.1841, |
| "step": 1967 |
| }, |
| { |
| "epoch": 1.280416395575797, |
| "grad_norm": 0.2663379907608032, |
| "learning_rate": 0.0001, |
| "loss": 2.2784, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.2810670136629798, |
| "grad_norm": 0.21082288026809692, |
| "learning_rate": 0.0001, |
| "loss": 2.0624, |
| "step": 1969 |
| }, |
| { |
| "epoch": 1.2817176317501626, |
| "grad_norm": 0.2609255313873291, |
| "learning_rate": 0.0001, |
| "loss": 2.2635, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.2823682498373454, |
| "grad_norm": 0.22040636837482452, |
| "learning_rate": 0.0001, |
| "loss": 2.5703, |
| "step": 1971 |
| }, |
| { |
| "epoch": 1.2830188679245282, |
| "grad_norm": 0.20903989672660828, |
| "learning_rate": 0.0001, |
| "loss": 2.3724, |
| "step": 1972 |
| }, |
| { |
| "epoch": 1.283669486011711, |
| "grad_norm": 0.23229360580444336, |
| "learning_rate": 0.0001, |
| "loss": 1.8577, |
| "step": 1973 |
| }, |
| { |
| "epoch": 1.284320104098894, |
| "grad_norm": 0.20225417613983154, |
| "learning_rate": 0.0001, |
| "loss": 1.7368, |
| "step": 1974 |
| }, |
| { |
| "epoch": 1.2849707221860767, |
| "grad_norm": 0.2678045332431793, |
| "learning_rate": 0.0001, |
| "loss": 2.3385, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.2856213402732597, |
| "grad_norm": 0.21571967005729675, |
| "learning_rate": 0.0001, |
| "loss": 2.4601, |
| "step": 1976 |
| }, |
| { |
| "epoch": 1.2862719583604425, |
| "grad_norm": 0.23303121328353882, |
| "learning_rate": 0.0001, |
| "loss": 2.3677, |
| "step": 1977 |
| }, |
| { |
| "epoch": 1.2869225764476253, |
| "grad_norm": 0.21977032721042633, |
| "learning_rate": 0.0001, |
| "loss": 1.9767, |
| "step": 1978 |
| }, |
| { |
| "epoch": 1.2875731945348081, |
| "grad_norm": 0.2213941365480423, |
| "learning_rate": 0.0001, |
| "loss": 2.1814, |
| "step": 1979 |
| }, |
| { |
| "epoch": 1.288223812621991, |
| "grad_norm": 0.2326277643442154, |
| "learning_rate": 0.0001, |
| "loss": 2.1136, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.2888744307091737, |
| "grad_norm": 0.2191379815340042, |
| "learning_rate": 0.0001, |
| "loss": 2.1645, |
| "step": 1981 |
| }, |
| { |
| "epoch": 1.2895250487963565, |
| "grad_norm": 0.2279415875673294, |
| "learning_rate": 0.0001, |
| "loss": 2.4334, |
| "step": 1982 |
| }, |
| { |
| "epoch": 1.2901756668835394, |
| "grad_norm": 0.22115138173103333, |
| "learning_rate": 0.0001, |
| "loss": 1.805, |
| "step": 1983 |
| }, |
| { |
| "epoch": 1.2908262849707222, |
| "grad_norm": 0.19766473770141602, |
| "learning_rate": 0.0001, |
| "loss": 1.9245, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.291476903057905, |
| "grad_norm": 0.20559804141521454, |
| "learning_rate": 0.0001, |
| "loss": 1.9477, |
| "step": 1985 |
| }, |
| { |
| "epoch": 1.2921275211450878, |
| "grad_norm": 0.2404945194721222, |
| "learning_rate": 0.0001, |
| "loss": 2.1143, |
| "step": 1986 |
| }, |
| { |
| "epoch": 1.2927781392322706, |
| "grad_norm": 0.2673717141151428, |
| "learning_rate": 0.0001, |
| "loss": 2.3239, |
| "step": 1987 |
| }, |
| { |
| "epoch": 1.2934287573194534, |
| "grad_norm": 0.2186470478773117, |
| "learning_rate": 0.0001, |
| "loss": 2.1699, |
| "step": 1988 |
| }, |
| { |
| "epoch": 1.2940793754066364, |
| "grad_norm": 0.20722255110740662, |
| "learning_rate": 0.0001, |
| "loss": 2.0015, |
| "step": 1989 |
| }, |
| { |
| "epoch": 1.294729993493819, |
| "grad_norm": 0.21739724278450012, |
| "learning_rate": 0.0001, |
| "loss": 2.6129, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.295380611581002, |
| "grad_norm": 0.20067770779132843, |
| "learning_rate": 0.0001, |
| "loss": 1.8162, |
| "step": 1991 |
| }, |
| { |
| "epoch": 1.2960312296681848, |
| "grad_norm": 0.239694282412529, |
| "learning_rate": 0.0001, |
| "loss": 2.3964, |
| "step": 1992 |
| }, |
| { |
| "epoch": 1.2966818477553677, |
| "grad_norm": 0.23374702036380768, |
| "learning_rate": 0.0001, |
| "loss": 1.9621, |
| "step": 1993 |
| }, |
| { |
| "epoch": 1.2973324658425505, |
| "grad_norm": 0.25045931339263916, |
| "learning_rate": 0.0001, |
| "loss": 2.581, |
| "step": 1994 |
| }, |
| { |
| "epoch": 1.2979830839297333, |
| "grad_norm": 0.236837238073349, |
| "learning_rate": 0.0001, |
| "loss": 2.5016, |
| "step": 1995 |
| }, |
| { |
| "epoch": 1.298633702016916, |
| "grad_norm": 0.1992330402135849, |
| "learning_rate": 0.0001, |
| "loss": 1.9391, |
| "step": 1996 |
| }, |
| { |
| "epoch": 1.2992843201040989, |
| "grad_norm": 0.2329629510641098, |
| "learning_rate": 0.0001, |
| "loss": 2.195, |
| "step": 1997 |
| }, |
| { |
| "epoch": 1.2999349381912817, |
| "grad_norm": 0.2705642580986023, |
| "learning_rate": 0.0001, |
| "loss": 2.9097, |
| "step": 1998 |
| }, |
| { |
| "epoch": 1.3005855562784645, |
| "grad_norm": 0.20443053543567657, |
| "learning_rate": 0.0001, |
| "loss": 2.4092, |
| "step": 1999 |
| }, |
| { |
| "epoch": 1.3012361743656473, |
| "grad_norm": 0.21766828000545502, |
| "learning_rate": 0.0001, |
| "loss": 2.0486, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.3018867924528301, |
| "grad_norm": 0.2439257651567459, |
| "learning_rate": 0.0001, |
| "loss": 2.1628, |
| "step": 2001 |
| }, |
| { |
| "epoch": 1.302537410540013, |
| "grad_norm": 0.27675187587738037, |
| "learning_rate": 0.0001, |
| "loss": 2.2286, |
| "step": 2002 |
| }, |
| { |
| "epoch": 1.3031880286271957, |
| "grad_norm": 0.21681272983551025, |
| "learning_rate": 0.0001, |
| "loss": 2.096, |
| "step": 2003 |
| }, |
| { |
| "epoch": 1.3038386467143788, |
| "grad_norm": 0.21704308688640594, |
| "learning_rate": 0.0001, |
| "loss": 1.9099, |
| "step": 2004 |
| }, |
| { |
| "epoch": 1.3044892648015614, |
| "grad_norm": 0.2061903178691864, |
| "learning_rate": 0.0001, |
| "loss": 1.9555, |
| "step": 2005 |
| }, |
| { |
| "epoch": 1.3051398828887444, |
| "grad_norm": 0.21564461290836334, |
| "learning_rate": 0.0001, |
| "loss": 2.002, |
| "step": 2006 |
| }, |
| { |
| "epoch": 1.3057905009759272, |
| "grad_norm": 0.2480417639017105, |
| "learning_rate": 0.0001, |
| "loss": 2.5662, |
| "step": 2007 |
| }, |
| { |
| "epoch": 1.30644111906311, |
| "grad_norm": 0.28024598956108093, |
| "learning_rate": 0.0001, |
| "loss": 2.6073, |
| "step": 2008 |
| }, |
| { |
| "epoch": 1.3070917371502928, |
| "grad_norm": 0.20935572683811188, |
| "learning_rate": 0.0001, |
| "loss": 2.0091, |
| "step": 2009 |
| }, |
| { |
| "epoch": 1.3077423552374756, |
| "grad_norm": 0.2737996578216553, |
| "learning_rate": 0.0001, |
| "loss": 2.6924, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.3083929733246584, |
| "grad_norm": 0.22892630100250244, |
| "learning_rate": 0.0001, |
| "loss": 2.1194, |
| "step": 2011 |
| }, |
| { |
| "epoch": 1.3090435914118412, |
| "grad_norm": 0.20866556465625763, |
| "learning_rate": 0.0001, |
| "loss": 2.018, |
| "step": 2012 |
| }, |
| { |
| "epoch": 1.309694209499024, |
| "grad_norm": 0.23603831231594086, |
| "learning_rate": 0.0001, |
| "loss": 2.1704, |
| "step": 2013 |
| }, |
| { |
| "epoch": 1.3103448275862069, |
| "grad_norm": 0.22809506952762604, |
| "learning_rate": 0.0001, |
| "loss": 2.1033, |
| "step": 2014 |
| }, |
| { |
| "epoch": 1.3109954456733897, |
| "grad_norm": 0.2483893483877182, |
| "learning_rate": 0.0001, |
| "loss": 2.2567, |
| "step": 2015 |
| }, |
| { |
| "epoch": 1.3116460637605725, |
| "grad_norm": 0.2553653419017792, |
| "learning_rate": 0.0001, |
| "loss": 2.1329, |
| "step": 2016 |
| }, |
| { |
| "epoch": 1.3122966818477555, |
| "grad_norm": 0.21949157118797302, |
| "learning_rate": 0.0001, |
| "loss": 2.0384, |
| "step": 2017 |
| }, |
| { |
| "epoch": 1.312947299934938, |
| "grad_norm": 0.21853339672088623, |
| "learning_rate": 0.0001, |
| "loss": 2.0058, |
| "step": 2018 |
| }, |
| { |
| "epoch": 1.3135979180221211, |
| "grad_norm": 0.27126333117485046, |
| "learning_rate": 0.0001, |
| "loss": 2.3404, |
| "step": 2019 |
| }, |
| { |
| "epoch": 1.3142485361093037, |
| "grad_norm": 0.22831089794635773, |
| "learning_rate": 0.0001, |
| "loss": 2.3679, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.3148991541964867, |
| "grad_norm": 0.22495043277740479, |
| "learning_rate": 0.0001, |
| "loss": 2.0592, |
| "step": 2021 |
| }, |
| { |
| "epoch": 1.3155497722836695, |
| "grad_norm": 0.21031904220581055, |
| "learning_rate": 0.0001, |
| "loss": 2.3091, |
| "step": 2022 |
| }, |
| { |
| "epoch": 1.3162003903708523, |
| "grad_norm": 0.21309922635555267, |
| "learning_rate": 0.0001, |
| "loss": 2.1981, |
| "step": 2023 |
| }, |
| { |
| "epoch": 1.3168510084580352, |
| "grad_norm": 0.21420729160308838, |
| "learning_rate": 0.0001, |
| "loss": 1.9456, |
| "step": 2024 |
| }, |
| { |
| "epoch": 1.317501626545218, |
| "grad_norm": 0.20996251702308655, |
| "learning_rate": 0.0001, |
| "loss": 1.8834, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.3181522446324008, |
| "grad_norm": 0.23981128633022308, |
| "learning_rate": 0.0001, |
| "loss": 2.4616, |
| "step": 2026 |
| }, |
| { |
| "epoch": 1.3188028627195836, |
| "grad_norm": 0.22735144197940826, |
| "learning_rate": 0.0001, |
| "loss": 2.1437, |
| "step": 2027 |
| }, |
| { |
| "epoch": 1.3194534808067664, |
| "grad_norm": 0.2444891333580017, |
| "learning_rate": 0.0001, |
| "loss": 2.2023, |
| "step": 2028 |
| }, |
| { |
| "epoch": 1.3201040988939492, |
| "grad_norm": 0.21967968344688416, |
| "learning_rate": 0.0001, |
| "loss": 2.0158, |
| "step": 2029 |
| }, |
| { |
| "epoch": 1.320754716981132, |
| "grad_norm": 0.22170864045619965, |
| "learning_rate": 0.0001, |
| "loss": 2.0339, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.3214053350683148, |
| "grad_norm": 0.22921305894851685, |
| "learning_rate": 0.0001, |
| "loss": 2.5155, |
| "step": 2031 |
| }, |
| { |
| "epoch": 1.3220559531554978, |
| "grad_norm": 0.19958047568798065, |
| "learning_rate": 0.0001, |
| "loss": 1.9231, |
| "step": 2032 |
| }, |
| { |
| "epoch": 1.3227065712426804, |
| "grad_norm": 0.2367618829011917, |
| "learning_rate": 0.0001, |
| "loss": 2.6605, |
| "step": 2033 |
| }, |
| { |
| "epoch": 1.3233571893298635, |
| "grad_norm": 0.2292090654373169, |
| "learning_rate": 0.0001, |
| "loss": 2.3329, |
| "step": 2034 |
| }, |
| { |
| "epoch": 1.3240078074170463, |
| "grad_norm": 0.20533816516399384, |
| "learning_rate": 0.0001, |
| "loss": 2.0238, |
| "step": 2035 |
| }, |
| { |
| "epoch": 1.324658425504229, |
| "grad_norm": 0.23811018466949463, |
| "learning_rate": 0.0001, |
| "loss": 2.4302, |
| "step": 2036 |
| }, |
| { |
| "epoch": 1.3253090435914119, |
| "grad_norm": 0.24907754361629486, |
| "learning_rate": 0.0001, |
| "loss": 2.261, |
| "step": 2037 |
| }, |
| { |
| "epoch": 1.3259596616785947, |
| "grad_norm": 0.20944790542125702, |
| "learning_rate": 0.0001, |
| "loss": 2.244, |
| "step": 2038 |
| }, |
| { |
| "epoch": 1.3266102797657775, |
| "grad_norm": 0.2156013548374176, |
| "learning_rate": 0.0001, |
| "loss": 2.0102, |
| "step": 2039 |
| }, |
| { |
| "epoch": 1.3272608978529603, |
| "grad_norm": 0.23466533422470093, |
| "learning_rate": 0.0001, |
| "loss": 2.2055, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.3279115159401431, |
| "grad_norm": 0.22344645857810974, |
| "learning_rate": 0.0001, |
| "loss": 2.0995, |
| "step": 2041 |
| }, |
| { |
| "epoch": 1.328562134027326, |
| "grad_norm": 0.23891085386276245, |
| "learning_rate": 0.0001, |
| "loss": 2.2788, |
| "step": 2042 |
| }, |
| { |
| "epoch": 1.3292127521145087, |
| "grad_norm": 0.2156655639410019, |
| "learning_rate": 0.0001, |
| "loss": 2.225, |
| "step": 2043 |
| }, |
| { |
| "epoch": 1.3298633702016915, |
| "grad_norm": 0.2561625838279724, |
| "learning_rate": 0.0001, |
| "loss": 2.3127, |
| "step": 2044 |
| }, |
| { |
| "epoch": 1.3305139882888743, |
| "grad_norm": 0.21961228549480438, |
| "learning_rate": 0.0001, |
| "loss": 2.2426, |
| "step": 2045 |
| }, |
| { |
| "epoch": 1.3311646063760572, |
| "grad_norm": 0.2692696452140808, |
| "learning_rate": 0.0001, |
| "loss": 2.1736, |
| "step": 2046 |
| }, |
| { |
| "epoch": 1.3318152244632402, |
| "grad_norm": 0.21352826058864594, |
| "learning_rate": 0.0001, |
| "loss": 1.8925, |
| "step": 2047 |
| }, |
| { |
| "epoch": 1.3324658425504228, |
| "grad_norm": 0.22041746973991394, |
| "learning_rate": 0.0001, |
| "loss": 2.1569, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.3331164606376058, |
| "grad_norm": 0.1909833401441574, |
| "learning_rate": 0.0001, |
| "loss": 1.8923, |
| "step": 2049 |
| }, |
| { |
| "epoch": 1.3337670787247886, |
| "grad_norm": 0.19885796308517456, |
| "learning_rate": 0.0001, |
| "loss": 1.9348, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.3344176968119714, |
| "grad_norm": 0.19970738887786865, |
| "learning_rate": 0.0001, |
| "loss": 1.8038, |
| "step": 2051 |
| }, |
| { |
| "epoch": 1.3350683148991542, |
| "grad_norm": 0.2591457962989807, |
| "learning_rate": 0.0001, |
| "loss": 2.2384, |
| "step": 2052 |
| }, |
| { |
| "epoch": 1.335718932986337, |
| "grad_norm": 0.19714346528053284, |
| "learning_rate": 0.0001, |
| "loss": 1.9811, |
| "step": 2053 |
| }, |
| { |
| "epoch": 1.3363695510735198, |
| "grad_norm": 0.2116885930299759, |
| "learning_rate": 0.0001, |
| "loss": 2.0122, |
| "step": 2054 |
| }, |
| { |
| "epoch": 1.3370201691607027, |
| "grad_norm": 0.2214263379573822, |
| "learning_rate": 0.0001, |
| "loss": 2.2859, |
| "step": 2055 |
| }, |
| { |
| "epoch": 1.3376707872478855, |
| "grad_norm": 0.2294740378856659, |
| "learning_rate": 0.0001, |
| "loss": 1.9794, |
| "step": 2056 |
| }, |
| { |
| "epoch": 1.3383214053350683, |
| "grad_norm": 0.2349557727575302, |
| "learning_rate": 0.0001, |
| "loss": 2.3355, |
| "step": 2057 |
| }, |
| { |
| "epoch": 1.338972023422251, |
| "grad_norm": 0.22130267322063446, |
| "learning_rate": 0.0001, |
| "loss": 1.8275, |
| "step": 2058 |
| }, |
| { |
| "epoch": 1.3396226415094339, |
| "grad_norm": 0.23748932778835297, |
| "learning_rate": 0.0001, |
| "loss": 1.8789, |
| "step": 2059 |
| }, |
| { |
| "epoch": 1.340273259596617, |
| "grad_norm": 0.2707836925983429, |
| "learning_rate": 0.0001, |
| "loss": 2.4778, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.3409238776837995, |
| "grad_norm": 0.2117568403482437, |
| "learning_rate": 0.0001, |
| "loss": 2.191, |
| "step": 2061 |
| }, |
| { |
| "epoch": 1.3415744957709825, |
| "grad_norm": 0.23183830082416534, |
| "learning_rate": 0.0001, |
| "loss": 1.753, |
| "step": 2062 |
| }, |
| { |
| "epoch": 1.3422251138581651, |
| "grad_norm": 0.2858640253543854, |
| "learning_rate": 0.0001, |
| "loss": 2.2033, |
| "step": 2063 |
| }, |
| { |
| "epoch": 1.3428757319453482, |
| "grad_norm": 0.2193751335144043, |
| "learning_rate": 0.0001, |
| "loss": 2.1896, |
| "step": 2064 |
| }, |
| { |
| "epoch": 1.343526350032531, |
| "grad_norm": 0.25401735305786133, |
| "learning_rate": 0.0001, |
| "loss": 2.0509, |
| "step": 2065 |
| }, |
| { |
| "epoch": 1.3441769681197138, |
| "grad_norm": 0.2629140615463257, |
| "learning_rate": 0.0001, |
| "loss": 2.2691, |
| "step": 2066 |
| }, |
| { |
| "epoch": 1.3448275862068966, |
| "grad_norm": 0.23819613456726074, |
| "learning_rate": 0.0001, |
| "loss": 1.9778, |
| "step": 2067 |
| }, |
| { |
| "epoch": 1.3454782042940794, |
| "grad_norm": 0.20642292499542236, |
| "learning_rate": 0.0001, |
| "loss": 1.8448, |
| "step": 2068 |
| }, |
| { |
| "epoch": 1.3461288223812622, |
| "grad_norm": 0.21848630905151367, |
| "learning_rate": 0.0001, |
| "loss": 1.8877, |
| "step": 2069 |
| }, |
| { |
| "epoch": 1.346779440468445, |
| "grad_norm": 0.21507719159126282, |
| "learning_rate": 0.0001, |
| "loss": 2.2834, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.3474300585556278, |
| "grad_norm": 0.24163243174552917, |
| "learning_rate": 0.0001, |
| "loss": 2.2033, |
| "step": 2071 |
| }, |
| { |
| "epoch": 1.3480806766428106, |
| "grad_norm": 0.19395868480205536, |
| "learning_rate": 0.0001, |
| "loss": 1.7733, |
| "step": 2072 |
| }, |
| { |
| "epoch": 1.3487312947299934, |
| "grad_norm": 0.22578482329845428, |
| "learning_rate": 0.0001, |
| "loss": 1.8876, |
| "step": 2073 |
| }, |
| { |
| "epoch": 1.3493819128171762, |
| "grad_norm": 0.21374450623989105, |
| "learning_rate": 0.0001, |
| "loss": 2.3039, |
| "step": 2074 |
| }, |
| { |
| "epoch": 1.3500325309043593, |
| "grad_norm": 0.21701087057590485, |
| "learning_rate": 0.0001, |
| "loss": 2.0551, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.3506831489915418, |
| "grad_norm": 0.22045759856700897, |
| "learning_rate": 0.0001, |
| "loss": 2.1285, |
| "step": 2076 |
| }, |
| { |
| "epoch": 1.3513337670787249, |
| "grad_norm": 0.223333477973938, |
| "learning_rate": 0.0001, |
| "loss": 2.0754, |
| "step": 2077 |
| }, |
| { |
| "epoch": 1.3519843851659077, |
| "grad_norm": 0.2512180507183075, |
| "learning_rate": 0.0001, |
| "loss": 2.4994, |
| "step": 2078 |
| }, |
| { |
| "epoch": 1.3526350032530905, |
| "grad_norm": 0.21181026101112366, |
| "learning_rate": 0.0001, |
| "loss": 2.1654, |
| "step": 2079 |
| }, |
| { |
| "epoch": 1.3532856213402733, |
| "grad_norm": 0.19490301609039307, |
| "learning_rate": 0.0001, |
| "loss": 1.8798, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.3539362394274561, |
| "grad_norm": 0.23965950310230255, |
| "learning_rate": 0.0001, |
| "loss": 2.2435, |
| "step": 2081 |
| }, |
| { |
| "epoch": 1.354586857514639, |
| "grad_norm": 0.24850183725357056, |
| "learning_rate": 0.0001, |
| "loss": 2.4788, |
| "step": 2082 |
| }, |
| { |
| "epoch": 1.3552374756018217, |
| "grad_norm": 0.25557541847229004, |
| "learning_rate": 0.0001, |
| "loss": 1.991, |
| "step": 2083 |
| }, |
| { |
| "epoch": 1.3558880936890045, |
| "grad_norm": 0.24425360560417175, |
| "learning_rate": 0.0001, |
| "loss": 2.4066, |
| "step": 2084 |
| }, |
| { |
| "epoch": 1.3565387117761873, |
| "grad_norm": 0.2026103287935257, |
| "learning_rate": 0.0001, |
| "loss": 1.97, |
| "step": 2085 |
| }, |
| { |
| "epoch": 1.3571893298633702, |
| "grad_norm": 0.2177918255329132, |
| "learning_rate": 0.0001, |
| "loss": 2.1209, |
| "step": 2086 |
| }, |
| { |
| "epoch": 1.357839947950553, |
| "grad_norm": 0.23414911329746246, |
| "learning_rate": 0.0001, |
| "loss": 2.6153, |
| "step": 2087 |
| }, |
| { |
| "epoch": 1.3584905660377358, |
| "grad_norm": 0.2161582112312317, |
| "learning_rate": 0.0001, |
| "loss": 2.0944, |
| "step": 2088 |
| }, |
| { |
| "epoch": 1.3591411841249186, |
| "grad_norm": 0.2110084444284439, |
| "learning_rate": 0.0001, |
| "loss": 2.2967, |
| "step": 2089 |
| }, |
| { |
| "epoch": 1.3597918022121016, |
| "grad_norm": 0.20428837835788727, |
| "learning_rate": 0.0001, |
| "loss": 1.7769, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.3604424202992842, |
| "grad_norm": 0.27536797523498535, |
| "learning_rate": 0.0001, |
| "loss": 2.8292, |
| "step": 2091 |
| }, |
| { |
| "epoch": 1.3610930383864672, |
| "grad_norm": 0.2233758419752121, |
| "learning_rate": 0.0001, |
| "loss": 2.3466, |
| "step": 2092 |
| }, |
| { |
| "epoch": 1.36174365647365, |
| "grad_norm": 0.24668963253498077, |
| "learning_rate": 0.0001, |
| "loss": 2.0451, |
| "step": 2093 |
| }, |
| { |
| "epoch": 1.3623942745608328, |
| "grad_norm": 0.21915104985237122, |
| "learning_rate": 0.0001, |
| "loss": 2.2835, |
| "step": 2094 |
| }, |
| { |
| "epoch": 1.3630448926480156, |
| "grad_norm": 0.22096975147724152, |
| "learning_rate": 0.0001, |
| "loss": 2.0873, |
| "step": 2095 |
| }, |
| { |
| "epoch": 1.3636955107351985, |
| "grad_norm": 0.19787731766700745, |
| "learning_rate": 0.0001, |
| "loss": 1.8287, |
| "step": 2096 |
| }, |
| { |
| "epoch": 1.3643461288223813, |
| "grad_norm": 0.23780184984207153, |
| "learning_rate": 0.0001, |
| "loss": 2.492, |
| "step": 2097 |
| }, |
| { |
| "epoch": 1.364996746909564, |
| "grad_norm": 0.1953575760126114, |
| "learning_rate": 0.0001, |
| "loss": 1.8464, |
| "step": 2098 |
| }, |
| { |
| "epoch": 1.3656473649967469, |
| "grad_norm": 0.23375852406024933, |
| "learning_rate": 0.0001, |
| "loss": 2.0762, |
| "step": 2099 |
| }, |
| { |
| "epoch": 1.3662979830839297, |
| "grad_norm": 0.22975870966911316, |
| "learning_rate": 0.0001, |
| "loss": 2.5536, |
| "step": 2100 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 3074, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.1678785153662976e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|