| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5855562784645413, |
| "eval_steps": 500, |
| "global_step": 900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006506180871828237, |
| "grad_norm": 3.778571605682373, |
| "learning_rate": 0.0001, |
| "loss": 4.706, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0013012361743656475, |
| "grad_norm": 0.7331739068031311, |
| "learning_rate": 0.0001, |
| "loss": 2.6402, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001951854261548471, |
| "grad_norm": 0.5679969191551208, |
| "learning_rate": 0.0001, |
| "loss": 2.5315, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002602472348731295, |
| "grad_norm": 0.6543067693710327, |
| "learning_rate": 0.0001, |
| "loss": 2.5226, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0032530904359141183, |
| "grad_norm": 0.42487671971321106, |
| "learning_rate": 0.0001, |
| "loss": 2.1375, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.003903708523096942, |
| "grad_norm": 0.48795655369758606, |
| "learning_rate": 0.0001, |
| "loss": 2.253, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.004554326610279766, |
| "grad_norm": 0.6054234504699707, |
| "learning_rate": 0.0001, |
| "loss": 2.3411, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00520494469746259, |
| "grad_norm": 0.3039970397949219, |
| "learning_rate": 0.0001, |
| "loss": 2.1293, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.005855562784645413, |
| "grad_norm": 0.6592361330986023, |
| "learning_rate": 0.0001, |
| "loss": 3.1615, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.006506180871828237, |
| "grad_norm": 0.4017999470233917, |
| "learning_rate": 0.0001, |
| "loss": 2.5068, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0071567989590110605, |
| "grad_norm": 0.31507641077041626, |
| "learning_rate": 0.0001, |
| "loss": 2.1894, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.007807417046193884, |
| "grad_norm": 0.33226895332336426, |
| "learning_rate": 0.0001, |
| "loss": 2.2006, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008458035133376708, |
| "grad_norm": 0.2632739841938019, |
| "learning_rate": 0.0001, |
| "loss": 2.0998, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.009108653220559532, |
| "grad_norm": 0.2794795036315918, |
| "learning_rate": 0.0001, |
| "loss": 2.113, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.009759271307742356, |
| "grad_norm": 0.29168492555618286, |
| "learning_rate": 0.0001, |
| "loss": 2.354, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01040988939492518, |
| "grad_norm": 0.2537970244884491, |
| "learning_rate": 0.0001, |
| "loss": 2.2939, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.011060507482108002, |
| "grad_norm": 0.5140053033828735, |
| "learning_rate": 0.0001, |
| "loss": 2.6237, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.011711125569290826, |
| "grad_norm": 0.3093675971031189, |
| "learning_rate": 0.0001, |
| "loss": 2.3502, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.01236174365647365, |
| "grad_norm": 0.29241421818733215, |
| "learning_rate": 0.0001, |
| "loss": 2.5365, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.013012361743656473, |
| "grad_norm": 0.3164322078227997, |
| "learning_rate": 0.0001, |
| "loss": 2.396, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.013662979830839297, |
| "grad_norm": 0.24512743949890137, |
| "learning_rate": 0.0001, |
| "loss": 2.2759, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.014313597918022121, |
| "grad_norm": 0.24328342080116272, |
| "learning_rate": 0.0001, |
| "loss": 2.2103, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.014964216005204945, |
| "grad_norm": 0.2563220262527466, |
| "learning_rate": 0.0001, |
| "loss": 2.4836, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.015614834092387769, |
| "grad_norm": 0.33601588010787964, |
| "learning_rate": 0.0001, |
| "loss": 2.4446, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01626545217957059, |
| "grad_norm": 0.28699007630348206, |
| "learning_rate": 0.0001, |
| "loss": 2.8504, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.016916070266753416, |
| "grad_norm": 0.3181653618812561, |
| "learning_rate": 0.0001, |
| "loss": 2.3042, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01756668835393624, |
| "grad_norm": 0.2349390834569931, |
| "learning_rate": 0.0001, |
| "loss": 2.1024, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.018217306441119064, |
| "grad_norm": 0.2751820981502533, |
| "learning_rate": 0.0001, |
| "loss": 2.2646, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.018867924528301886, |
| "grad_norm": 0.25547271966934204, |
| "learning_rate": 0.0001, |
| "loss": 2.1928, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01951854261548471, |
| "grad_norm": 0.283507764339447, |
| "learning_rate": 0.0001, |
| "loss": 2.3073, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.020169160702667534, |
| "grad_norm": 0.3354213237762451, |
| "learning_rate": 0.0001, |
| "loss": 2.6273, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.02081977878985036, |
| "grad_norm": 0.40484553575515747, |
| "learning_rate": 0.0001, |
| "loss": 2.4919, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02147039687703318, |
| "grad_norm": 0.34319421648979187, |
| "learning_rate": 0.0001, |
| "loss": 2.8381, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.022121014964216004, |
| "grad_norm": 0.32958984375, |
| "learning_rate": 0.0001, |
| "loss": 2.3062, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.02277163305139883, |
| "grad_norm": 0.4503105878829956, |
| "learning_rate": 0.0001, |
| "loss": 2.4647, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02342225113858165, |
| "grad_norm": 0.5084238052368164, |
| "learning_rate": 0.0001, |
| "loss": 3.0047, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.024072869225764477, |
| "grad_norm": 0.5192400813102722, |
| "learning_rate": 0.0001, |
| "loss": 2.2899, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0247234873129473, |
| "grad_norm": 0.4197874665260315, |
| "learning_rate": 0.0001, |
| "loss": 2.4057, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.025374105400130124, |
| "grad_norm": 0.5170285105705261, |
| "learning_rate": 0.0001, |
| "loss": 3.2918, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.026024723487312947, |
| "grad_norm": 0.2491147667169571, |
| "learning_rate": 0.0001, |
| "loss": 2.1957, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.026675341574495772, |
| "grad_norm": 0.6597635746002197, |
| "learning_rate": 0.0001, |
| "loss": 2.7474, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.027325959661678594, |
| "grad_norm": 0.40205034613609314, |
| "learning_rate": 0.0001, |
| "loss": 2.4561, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02797657774886142, |
| "grad_norm": 0.27388331294059753, |
| "learning_rate": 0.0001, |
| "loss": 2.0477, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.028627195836044242, |
| "grad_norm": 0.9163908958435059, |
| "learning_rate": 0.0001, |
| "loss": 3.334, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.029277813923227064, |
| "grad_norm": 0.2747696042060852, |
| "learning_rate": 0.0001, |
| "loss": 2.1604, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02992843201040989, |
| "grad_norm": 0.36308085918426514, |
| "learning_rate": 0.0001, |
| "loss": 2.693, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03057905009759271, |
| "grad_norm": 0.6159886121749878, |
| "learning_rate": 0.0001, |
| "loss": 2.5515, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.031229668184775537, |
| "grad_norm": 0.4801373779773712, |
| "learning_rate": 0.0001, |
| "loss": 2.809, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03188028627195836, |
| "grad_norm": 0.32580915093421936, |
| "learning_rate": 0.0001, |
| "loss": 2.5236, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.03253090435914118, |
| "grad_norm": 0.3028671443462372, |
| "learning_rate": 0.0001, |
| "loss": 2.2685, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03318152244632401, |
| "grad_norm": 0.5660931468009949, |
| "learning_rate": 0.0001, |
| "loss": 2.2564, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03383214053350683, |
| "grad_norm": 0.24634602665901184, |
| "learning_rate": 0.0001, |
| "loss": 2.1355, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.034482758620689655, |
| "grad_norm": 0.24830913543701172, |
| "learning_rate": 0.0001, |
| "loss": 2.0425, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03513337670787248, |
| "grad_norm": 0.23614570498466492, |
| "learning_rate": 0.0001, |
| "loss": 2.1975, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.035783994795055306, |
| "grad_norm": 0.2624325156211853, |
| "learning_rate": 0.0001, |
| "loss": 2.3071, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03643461288223813, |
| "grad_norm": 0.3967755436897278, |
| "learning_rate": 0.0001, |
| "loss": 2.6088, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.03708523096942095, |
| "grad_norm": 0.22147373855113983, |
| "learning_rate": 0.0001, |
| "loss": 2.003, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03773584905660377, |
| "grad_norm": 0.47795867919921875, |
| "learning_rate": 0.0001, |
| "loss": 2.1473, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.038386467143786594, |
| "grad_norm": 0.43953707814216614, |
| "learning_rate": 0.0001, |
| "loss": 2.6595, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03903708523096942, |
| "grad_norm": 0.29031845927238464, |
| "learning_rate": 0.0001, |
| "loss": 2.3173, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.039687703318152245, |
| "grad_norm": 0.2491024285554886, |
| "learning_rate": 0.0001, |
| "loss": 2.0575, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.04033832140533507, |
| "grad_norm": 0.3025687634944916, |
| "learning_rate": 0.0001, |
| "loss": 2.0965, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04098893949251789, |
| "grad_norm": 0.26097819209098816, |
| "learning_rate": 0.0001, |
| "loss": 2.2583, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.04163955757970072, |
| "grad_norm": 0.2413238286972046, |
| "learning_rate": 0.0001, |
| "loss": 2.2441, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04229017566688354, |
| "grad_norm": 0.2332315295934677, |
| "learning_rate": 0.0001, |
| "loss": 2.185, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04294079375406636, |
| "grad_norm": 0.4037252366542816, |
| "learning_rate": 0.0001, |
| "loss": 2.3875, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.043591411841249185, |
| "grad_norm": 0.34149354696273804, |
| "learning_rate": 0.0001, |
| "loss": 2.3835, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04424202992843201, |
| "grad_norm": 0.23793481290340424, |
| "learning_rate": 0.0001, |
| "loss": 2.3521, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.044892648015614836, |
| "grad_norm": 0.24252744019031525, |
| "learning_rate": 0.0001, |
| "loss": 2.0984, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.04554326610279766, |
| "grad_norm": 0.2870447635650635, |
| "learning_rate": 0.0001, |
| "loss": 2.5408, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04619388418998048, |
| "grad_norm": 0.5050077438354492, |
| "learning_rate": 0.0001, |
| "loss": 2.7091, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0468445022771633, |
| "grad_norm": 0.2391565591096878, |
| "learning_rate": 0.0001, |
| "loss": 2.1601, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04749512036434613, |
| "grad_norm": 0.20647507905960083, |
| "learning_rate": 0.0001, |
| "loss": 1.9582, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.048145738451528954, |
| "grad_norm": 0.26072338223457336, |
| "learning_rate": 0.0001, |
| "loss": 2.3577, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.048796356538711776, |
| "grad_norm": 0.28378504514694214, |
| "learning_rate": 0.0001, |
| "loss": 2.349, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0494469746258946, |
| "grad_norm": 0.2536943256855011, |
| "learning_rate": 0.0001, |
| "loss": 2.375, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.05009759271307743, |
| "grad_norm": 0.29276445508003235, |
| "learning_rate": 0.0001, |
| "loss": 2.5003, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.05074821080026025, |
| "grad_norm": 0.2649310231208801, |
| "learning_rate": 0.0001, |
| "loss": 2.3247, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05139882888744307, |
| "grad_norm": 0.38125383853912354, |
| "learning_rate": 0.0001, |
| "loss": 2.5405, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05204944697462589, |
| "grad_norm": 0.40980008244514465, |
| "learning_rate": 0.0001, |
| "loss": 2.212, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.052700065061808715, |
| "grad_norm": 0.5363492965698242, |
| "learning_rate": 0.0001, |
| "loss": 2.6499, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.053350683148991544, |
| "grad_norm": 0.34647300839424133, |
| "learning_rate": 0.0001, |
| "loss": 2.6302, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.054001301236174366, |
| "grad_norm": 0.27607980370521545, |
| "learning_rate": 0.0001, |
| "loss": 2.1819, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05465191932335719, |
| "grad_norm": 0.27654680609703064, |
| "learning_rate": 0.0001, |
| "loss": 2.1763, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05530253741054001, |
| "grad_norm": 0.24596217274665833, |
| "learning_rate": 0.0001, |
| "loss": 2.2585, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.05595315549772284, |
| "grad_norm": 0.24279890954494476, |
| "learning_rate": 0.0001, |
| "loss": 2.4247, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05660377358490566, |
| "grad_norm": 0.2918747365474701, |
| "learning_rate": 0.0001, |
| "loss": 2.3986, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.057254391672088484, |
| "grad_norm": 0.26778745651245117, |
| "learning_rate": 0.0001, |
| "loss": 2.3592, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.057905009759271306, |
| "grad_norm": 0.39637815952301025, |
| "learning_rate": 0.0001, |
| "loss": 2.8006, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.05855562784645413, |
| "grad_norm": 0.2676962614059448, |
| "learning_rate": 0.0001, |
| "loss": 2.2384, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05920624593363696, |
| "grad_norm": 0.3044937252998352, |
| "learning_rate": 0.0001, |
| "loss": 2.7762, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.05985686402081978, |
| "grad_norm": 0.23922136425971985, |
| "learning_rate": 0.0001, |
| "loss": 2.0873, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0605074821080026, |
| "grad_norm": 0.25385046005249023, |
| "learning_rate": 0.0001, |
| "loss": 2.2708, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06115810019518542, |
| "grad_norm": 0.378401517868042, |
| "learning_rate": 0.0001, |
| "loss": 3.0583, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06180871828236825, |
| "grad_norm": 0.37193092703819275, |
| "learning_rate": 0.0001, |
| "loss": 2.3632, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.062459336369551074, |
| "grad_norm": 0.3757643699645996, |
| "learning_rate": 0.0001, |
| "loss": 2.4071, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0631099544567339, |
| "grad_norm": 0.272833913564682, |
| "learning_rate": 0.0001, |
| "loss": 2.3989, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06376057254391672, |
| "grad_norm": 0.26533326506614685, |
| "learning_rate": 0.0001, |
| "loss": 2.1716, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06441119063109954, |
| "grad_norm": 0.5787199139595032, |
| "learning_rate": 0.0001, |
| "loss": 2.9445, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.06506180871828236, |
| "grad_norm": 0.29046157002449036, |
| "learning_rate": 0.0001, |
| "loss": 2.3325, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06571242680546518, |
| "grad_norm": 0.531452476978302, |
| "learning_rate": 0.0001, |
| "loss": 2.7445, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06636304489264802, |
| "grad_norm": 0.3969165086746216, |
| "learning_rate": 0.0001, |
| "loss": 2.7126, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06701366297983084, |
| "grad_norm": 0.24183356761932373, |
| "learning_rate": 0.0001, |
| "loss": 1.9971, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06766428106701367, |
| "grad_norm": 0.3268399238586426, |
| "learning_rate": 0.0001, |
| "loss": 2.1055, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06831489915419649, |
| "grad_norm": 0.2625877559185028, |
| "learning_rate": 0.0001, |
| "loss": 1.9946, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06896551724137931, |
| "grad_norm": 0.2720443308353424, |
| "learning_rate": 0.0001, |
| "loss": 2.0764, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06961613532856213, |
| "grad_norm": 0.20969334244728088, |
| "learning_rate": 0.0001, |
| "loss": 1.8687, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.07026675341574495, |
| "grad_norm": 0.26211223006248474, |
| "learning_rate": 0.0001, |
| "loss": 2.2042, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07091737150292778, |
| "grad_norm": 0.27889683842658997, |
| "learning_rate": 0.0001, |
| "loss": 2.3146, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.07156798959011061, |
| "grad_norm": 0.2657179832458496, |
| "learning_rate": 0.0001, |
| "loss": 2.1021, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07221860767729343, |
| "grad_norm": 0.26620885729789734, |
| "learning_rate": 0.0001, |
| "loss": 2.3488, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.07286922576447626, |
| "grad_norm": 0.4223373830318451, |
| "learning_rate": 0.0001, |
| "loss": 2.5289, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07351984385165908, |
| "grad_norm": 0.35398781299591064, |
| "learning_rate": 0.0001, |
| "loss": 2.5702, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0741704619388419, |
| "grad_norm": 0.23328129947185516, |
| "learning_rate": 0.0001, |
| "loss": 2.1292, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07482108002602472, |
| "grad_norm": 0.33508536219596863, |
| "learning_rate": 0.0001, |
| "loss": 2.2049, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07547169811320754, |
| "grad_norm": 0.2646953761577606, |
| "learning_rate": 0.0001, |
| "loss": 2.3445, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07612231620039037, |
| "grad_norm": 0.27866706252098083, |
| "learning_rate": 0.0001, |
| "loss": 2.2472, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07677293428757319, |
| "grad_norm": 0.35688602924346924, |
| "learning_rate": 0.0001, |
| "loss": 2.5045, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07742355237475602, |
| "grad_norm": 0.24262933433055878, |
| "learning_rate": 0.0001, |
| "loss": 2.4565, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.07807417046193885, |
| "grad_norm": 0.44757333397865295, |
| "learning_rate": 0.0001, |
| "loss": 2.1619, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07872478854912167, |
| "grad_norm": 0.3279111385345459, |
| "learning_rate": 0.0001, |
| "loss": 2.3996, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07937540663630449, |
| "grad_norm": 0.25862693786621094, |
| "learning_rate": 0.0001, |
| "loss": 2.3214, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.08002602472348731, |
| "grad_norm": 0.30093592405319214, |
| "learning_rate": 0.0001, |
| "loss": 2.6446, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.08067664281067013, |
| "grad_norm": 0.25440871715545654, |
| "learning_rate": 0.0001, |
| "loss": 2.1181, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.08132726089785296, |
| "grad_norm": 0.19935627281665802, |
| "learning_rate": 0.0001, |
| "loss": 2.0904, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08197787898503578, |
| "grad_norm": 0.27385473251342773, |
| "learning_rate": 0.0001, |
| "loss": 2.0829, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0826284970722186, |
| "grad_norm": 0.24417711794376373, |
| "learning_rate": 0.0001, |
| "loss": 2.0019, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.08327911515940144, |
| "grad_norm": 0.27386653423309326, |
| "learning_rate": 0.0001, |
| "loss": 2.2743, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08392973324658426, |
| "grad_norm": 0.22413575649261475, |
| "learning_rate": 0.0001, |
| "loss": 2.1584, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.08458035133376708, |
| "grad_norm": 0.27748343348503113, |
| "learning_rate": 0.0001, |
| "loss": 2.1428, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0852309694209499, |
| "grad_norm": 0.18890976905822754, |
| "learning_rate": 0.0001, |
| "loss": 1.9474, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08588158750813273, |
| "grad_norm": 0.3067719340324402, |
| "learning_rate": 0.0001, |
| "loss": 2.287, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08653220559531555, |
| "grad_norm": 0.35126858949661255, |
| "learning_rate": 0.0001, |
| "loss": 2.5086, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08718282368249837, |
| "grad_norm": 0.19619591534137726, |
| "learning_rate": 0.0001, |
| "loss": 2.0132, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08783344176968119, |
| "grad_norm": 0.360569566488266, |
| "learning_rate": 0.0001, |
| "loss": 2.607, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08848405985686401, |
| "grad_norm": 0.22566738724708557, |
| "learning_rate": 0.0001, |
| "loss": 2.0942, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08913467794404685, |
| "grad_norm": 0.27346086502075195, |
| "learning_rate": 0.0001, |
| "loss": 2.3139, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.08978529603122967, |
| "grad_norm": 0.2500152289867401, |
| "learning_rate": 0.0001, |
| "loss": 2.0815, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0904359141184125, |
| "grad_norm": 0.22101153433322906, |
| "learning_rate": 0.0001, |
| "loss": 2.374, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.09108653220559532, |
| "grad_norm": 0.2173723727464676, |
| "learning_rate": 0.0001, |
| "loss": 2.0084, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09173715029277814, |
| "grad_norm": 0.28956499695777893, |
| "learning_rate": 0.0001, |
| "loss": 2.6283, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.09238776837996096, |
| "grad_norm": 0.27032795548439026, |
| "learning_rate": 0.0001, |
| "loss": 2.142, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.09303838646714378, |
| "grad_norm": 0.24320480227470398, |
| "learning_rate": 0.0001, |
| "loss": 2.1402, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.0936890045543266, |
| "grad_norm": 0.3127799332141876, |
| "learning_rate": 0.0001, |
| "loss": 2.6671, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.09433962264150944, |
| "grad_norm": 0.30706024169921875, |
| "learning_rate": 0.0001, |
| "loss": 2.3026, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.09499024072869226, |
| "grad_norm": 0.2378646731376648, |
| "learning_rate": 0.0001, |
| "loss": 2.0422, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.09564085881587508, |
| "grad_norm": 0.24755406379699707, |
| "learning_rate": 0.0001, |
| "loss": 2.2574, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.09629147690305791, |
| "grad_norm": 0.34464696049690247, |
| "learning_rate": 0.0001, |
| "loss": 2.2817, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.09694209499024073, |
| "grad_norm": 0.30485469102859497, |
| "learning_rate": 0.0001, |
| "loss": 2.7303, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.09759271307742355, |
| "grad_norm": 0.1860698163509369, |
| "learning_rate": 0.0001, |
| "loss": 1.8582, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09824333116460637, |
| "grad_norm": 0.23853841423988342, |
| "learning_rate": 0.0001, |
| "loss": 2.1378, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0988939492517892, |
| "grad_norm": 0.20248261094093323, |
| "learning_rate": 0.0001, |
| "loss": 2.1888, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09954456733897202, |
| "grad_norm": 0.3582792282104492, |
| "learning_rate": 0.0001, |
| "loss": 2.6726, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.10019518542615485, |
| "grad_norm": 0.2576686441898346, |
| "learning_rate": 0.0001, |
| "loss": 2.4494, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.10084580351333768, |
| "grad_norm": 0.306029349565506, |
| "learning_rate": 0.0001, |
| "loss": 2.2273, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.1014964216005205, |
| "grad_norm": 0.31375500559806824, |
| "learning_rate": 0.0001, |
| "loss": 2.2474, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.10214703968770332, |
| "grad_norm": 0.253250390291214, |
| "learning_rate": 0.0001, |
| "loss": 2.0142, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.10279765777488614, |
| "grad_norm": 0.3098273277282715, |
| "learning_rate": 0.0001, |
| "loss": 2.2516, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.10344827586206896, |
| "grad_norm": 0.3239591717720032, |
| "learning_rate": 0.0001, |
| "loss": 2.2432, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.10409889394925179, |
| "grad_norm": 0.24929773807525635, |
| "learning_rate": 0.0001, |
| "loss": 2.2495, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10474951203643461, |
| "grad_norm": 0.3203783929347992, |
| "learning_rate": 0.0001, |
| "loss": 2.68, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.10540013012361743, |
| "grad_norm": 0.38844674825668335, |
| "learning_rate": 0.0001, |
| "loss": 2.7457, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.10605074821080027, |
| "grad_norm": 0.21753644943237305, |
| "learning_rate": 0.0001, |
| "loss": 2.1284, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.10670136629798309, |
| "grad_norm": 0.20610418915748596, |
| "learning_rate": 0.0001, |
| "loss": 1.8377, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.10735198438516591, |
| "grad_norm": 0.3555772304534912, |
| "learning_rate": 0.0001, |
| "loss": 2.3599, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.10800260247234873, |
| "grad_norm": 0.3971005380153656, |
| "learning_rate": 0.0001, |
| "loss": 2.2771, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10865322055953155, |
| "grad_norm": 0.28628769516944885, |
| "learning_rate": 0.0001, |
| "loss": 2.2438, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.10930383864671438, |
| "grad_norm": 0.38728833198547363, |
| "learning_rate": 0.0001, |
| "loss": 2.4103, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1099544567338972, |
| "grad_norm": 0.26340189576148987, |
| "learning_rate": 0.0001, |
| "loss": 2.6832, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.11060507482108002, |
| "grad_norm": 0.20119386911392212, |
| "learning_rate": 0.0001, |
| "loss": 1.9622, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11125569290826284, |
| "grad_norm": 0.2929171621799469, |
| "learning_rate": 0.0001, |
| "loss": 2.2762, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.11190631099544568, |
| "grad_norm": 0.422146201133728, |
| "learning_rate": 0.0001, |
| "loss": 2.4015, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1125569290826285, |
| "grad_norm": 0.29050537943840027, |
| "learning_rate": 0.0001, |
| "loss": 2.4399, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.11320754716981132, |
| "grad_norm": 0.2646816074848175, |
| "learning_rate": 0.0001, |
| "loss": 2.3058, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.11385816525699415, |
| "grad_norm": 0.2643061578273773, |
| "learning_rate": 0.0001, |
| "loss": 2.1892, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.11450878334417697, |
| "grad_norm": 0.5878323316574097, |
| "learning_rate": 0.0001, |
| "loss": 3.2198, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.11515940143135979, |
| "grad_norm": 0.36881884932518005, |
| "learning_rate": 0.0001, |
| "loss": 2.4112, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.11581001951854261, |
| "grad_norm": 0.25198304653167725, |
| "learning_rate": 0.0001, |
| "loss": 2.1667, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.11646063760572543, |
| "grad_norm": 0.34164664149284363, |
| "learning_rate": 0.0001, |
| "loss": 2.6248, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.11711125569290826, |
| "grad_norm": 0.41471973061561584, |
| "learning_rate": 0.0001, |
| "loss": 2.5616, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11776187378009109, |
| "grad_norm": 0.26372480392456055, |
| "learning_rate": 0.0001, |
| "loss": 2.2904, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.11841249186727391, |
| "grad_norm": 0.2271176278591156, |
| "learning_rate": 0.0001, |
| "loss": 2.0312, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11906310995445674, |
| "grad_norm": 0.2106996774673462, |
| "learning_rate": 0.0001, |
| "loss": 1.9661, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11971372804163956, |
| "grad_norm": 0.22870291769504547, |
| "learning_rate": 0.0001, |
| "loss": 1.9052, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.12036434612882238, |
| "grad_norm": 0.41253864765167236, |
| "learning_rate": 0.0001, |
| "loss": 2.3747, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1210149642160052, |
| "grad_norm": 0.3258817791938782, |
| "learning_rate": 0.0001, |
| "loss": 2.5401, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.12166558230318802, |
| "grad_norm": 0.3461870551109314, |
| "learning_rate": 0.0001, |
| "loss": 2.8027, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.12231620039037085, |
| "grad_norm": 0.3704046607017517, |
| "learning_rate": 0.0001, |
| "loss": 2.799, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.12296681847755368, |
| "grad_norm": 0.30265969038009644, |
| "learning_rate": 0.0001, |
| "loss": 2.4287, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.1236174365647365, |
| "grad_norm": 0.4215582013130188, |
| "learning_rate": 0.0001, |
| "loss": 2.6857, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12426805465191933, |
| "grad_norm": 0.3003520965576172, |
| "learning_rate": 0.0001, |
| "loss": 2.4155, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.12491867273910215, |
| "grad_norm": 0.412749320268631, |
| "learning_rate": 0.0001, |
| "loss": 2.6352, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.12556929082628496, |
| "grad_norm": 0.2772350013256073, |
| "learning_rate": 0.0001, |
| "loss": 2.2452, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.1262199089134678, |
| "grad_norm": 0.21457143127918243, |
| "learning_rate": 0.0001, |
| "loss": 2.0172, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.12687052700065063, |
| "grad_norm": 0.40995845198631287, |
| "learning_rate": 0.0001, |
| "loss": 2.6218, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.12752114508783344, |
| "grad_norm": 0.2253209501504898, |
| "learning_rate": 0.0001, |
| "loss": 2.2319, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.12817176317501627, |
| "grad_norm": 0.36564287543296814, |
| "learning_rate": 0.0001, |
| "loss": 2.4585, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.12882238126219908, |
| "grad_norm": 0.41084784269332886, |
| "learning_rate": 0.0001, |
| "loss": 2.6326, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.12947299934938192, |
| "grad_norm": 0.36012157797813416, |
| "learning_rate": 0.0001, |
| "loss": 2.0168, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.13012361743656473, |
| "grad_norm": 0.5138425230979919, |
| "learning_rate": 0.0001, |
| "loss": 2.3377, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13077423552374756, |
| "grad_norm": 0.2799031436443329, |
| "learning_rate": 0.0001, |
| "loss": 2.532, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.13142485361093037, |
| "grad_norm": 0.3078779876232147, |
| "learning_rate": 0.0001, |
| "loss": 2.044, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1320754716981132, |
| "grad_norm": 0.31270912289619446, |
| "learning_rate": 0.0001, |
| "loss": 1.8576, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.13272608978529604, |
| "grad_norm": 0.23117204010486603, |
| "learning_rate": 0.0001, |
| "loss": 2.1908, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.13337670787247885, |
| "grad_norm": 0.2531285285949707, |
| "learning_rate": 0.0001, |
| "loss": 2.143, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.1340273259596617, |
| "grad_norm": 0.28053218126296997, |
| "learning_rate": 0.0001, |
| "loss": 2.6902, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1346779440468445, |
| "grad_norm": 0.2600589692592621, |
| "learning_rate": 0.0001, |
| "loss": 2.0355, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.13532856213402733, |
| "grad_norm": 0.2725912630558014, |
| "learning_rate": 0.0001, |
| "loss": 2.3949, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.13597918022121014, |
| "grad_norm": 0.6166338324546814, |
| "learning_rate": 0.0001, |
| "loss": 2.8146, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.13662979830839297, |
| "grad_norm": 0.4028575122356415, |
| "learning_rate": 0.0001, |
| "loss": 2.888, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1372804163955758, |
| "grad_norm": 0.23181548714637756, |
| "learning_rate": 0.0001, |
| "loss": 2.1406, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.13793103448275862, |
| "grad_norm": 0.24338063597679138, |
| "learning_rate": 0.0001, |
| "loss": 2.1564, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.13858165256994145, |
| "grad_norm": 0.233146533370018, |
| "learning_rate": 0.0001, |
| "loss": 2.1695, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.13923227065712426, |
| "grad_norm": 0.21236726641654968, |
| "learning_rate": 0.0001, |
| "loss": 1.9272, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1398828887443071, |
| "grad_norm": 0.25471317768096924, |
| "learning_rate": 0.0001, |
| "loss": 2.3447, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.1405335068314899, |
| "grad_norm": 0.35532835125923157, |
| "learning_rate": 0.0001, |
| "loss": 2.4328, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.14118412491867274, |
| "grad_norm": 0.32900944352149963, |
| "learning_rate": 0.0001, |
| "loss": 2.385, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.14183474300585555, |
| "grad_norm": 0.45404863357543945, |
| "learning_rate": 0.0001, |
| "loss": 2.8053, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1424853610930384, |
| "grad_norm": 0.33968400955200195, |
| "learning_rate": 0.0001, |
| "loss": 2.4524, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.14313597918022122, |
| "grad_norm": 0.3250170946121216, |
| "learning_rate": 0.0001, |
| "loss": 2.6173, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.14378659726740403, |
| "grad_norm": 0.34765559434890747, |
| "learning_rate": 0.0001, |
| "loss": 2.8468, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.14443721535458687, |
| "grad_norm": 0.2274564653635025, |
| "learning_rate": 0.0001, |
| "loss": 2.1305, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.14508783344176968, |
| "grad_norm": 0.42719507217407227, |
| "learning_rate": 0.0001, |
| "loss": 2.3682, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.1457384515289525, |
| "grad_norm": 0.2848481833934784, |
| "learning_rate": 0.0001, |
| "loss": 2.0923, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.14638906961613532, |
| "grad_norm": 0.266548752784729, |
| "learning_rate": 0.0001, |
| "loss": 2.0393, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.14703968770331816, |
| "grad_norm": 0.24076099693775177, |
| "learning_rate": 0.0001, |
| "loss": 2.2674, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.14769030579050096, |
| "grad_norm": 0.23347622156143188, |
| "learning_rate": 0.0001, |
| "loss": 1.9455, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1483409238776838, |
| "grad_norm": 0.3925648033618927, |
| "learning_rate": 0.0001, |
| "loss": 2.7117, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.14899154196486664, |
| "grad_norm": 0.27654924988746643, |
| "learning_rate": 0.0001, |
| "loss": 2.1306, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.14964216005204944, |
| "grad_norm": 0.2853853702545166, |
| "learning_rate": 0.0001, |
| "loss": 2.4369, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.15029277813923228, |
| "grad_norm": 0.4509859085083008, |
| "learning_rate": 0.0001, |
| "loss": 2.6047, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.1509433962264151, |
| "grad_norm": 0.2515909671783447, |
| "learning_rate": 0.0001, |
| "loss": 2.2065, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.15159401431359792, |
| "grad_norm": 0.5977367162704468, |
| "learning_rate": 0.0001, |
| "loss": 2.7133, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.15224463240078073, |
| "grad_norm": 0.30381399393081665, |
| "learning_rate": 0.0001, |
| "loss": 2.343, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.15289525048796357, |
| "grad_norm": 0.27204832434654236, |
| "learning_rate": 0.0001, |
| "loss": 2.2908, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.15354586857514638, |
| "grad_norm": 0.6246710419654846, |
| "learning_rate": 0.0001, |
| "loss": 2.7862, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1541964866623292, |
| "grad_norm": 0.4803178012371063, |
| "learning_rate": 0.0001, |
| "loss": 3.4388, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.15484710474951205, |
| "grad_norm": 0.3038940727710724, |
| "learning_rate": 0.0001, |
| "loss": 2.7409, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.15549772283669486, |
| "grad_norm": 0.2494591474533081, |
| "learning_rate": 0.0001, |
| "loss": 2.2601, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1561483409238777, |
| "grad_norm": 0.23808616399765015, |
| "learning_rate": 0.0001, |
| "loss": 2.1319, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1567989590110605, |
| "grad_norm": 0.3111306130886078, |
| "learning_rate": 0.0001, |
| "loss": 2.7414, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.15744957709824334, |
| "grad_norm": 0.22197599709033966, |
| "learning_rate": 0.0001, |
| "loss": 2.1346, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.15810019518542615, |
| "grad_norm": 0.2681500315666199, |
| "learning_rate": 0.0001, |
| "loss": 2.3779, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.15875081327260898, |
| "grad_norm": 0.2612643241882324, |
| "learning_rate": 0.0001, |
| "loss": 2.5743, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1594014313597918, |
| "grad_norm": 0.201397106051445, |
| "learning_rate": 0.0001, |
| "loss": 2.0312, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.16005204944697463, |
| "grad_norm": 0.25662410259246826, |
| "learning_rate": 0.0001, |
| "loss": 2.5085, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.16070266753415746, |
| "grad_norm": 0.21460294723510742, |
| "learning_rate": 0.0001, |
| "loss": 2.1099, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.16135328562134027, |
| "grad_norm": 0.19971312582492828, |
| "learning_rate": 0.0001, |
| "loss": 2.1024, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1620039037085231, |
| "grad_norm": 0.1986059844493866, |
| "learning_rate": 0.0001, |
| "loss": 1.9306, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.16265452179570591, |
| "grad_norm": 0.21961884200572968, |
| "learning_rate": 0.0001, |
| "loss": 2.1218, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16330513988288875, |
| "grad_norm": 0.20071017742156982, |
| "learning_rate": 0.0001, |
| "loss": 2.0581, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.16395575797007156, |
| "grad_norm": 0.32734909653663635, |
| "learning_rate": 0.0001, |
| "loss": 2.6229, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1646063760572544, |
| "grad_norm": 0.21822451055049896, |
| "learning_rate": 0.0001, |
| "loss": 1.9954, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.1652569941444372, |
| "grad_norm": 0.3013177216053009, |
| "learning_rate": 0.0001, |
| "loss": 2.454, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.16590761223162004, |
| "grad_norm": 0.31199347972869873, |
| "learning_rate": 0.0001, |
| "loss": 2.815, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.16655823031880287, |
| "grad_norm": 0.2255464345216751, |
| "learning_rate": 0.0001, |
| "loss": 2.0232, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.16720884840598568, |
| "grad_norm": 0.21208804845809937, |
| "learning_rate": 0.0001, |
| "loss": 1.9663, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.16785946649316852, |
| "grad_norm": 0.2432132512331009, |
| "learning_rate": 0.0001, |
| "loss": 2.4189, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.16851008458035133, |
| "grad_norm": 0.21116623282432556, |
| "learning_rate": 0.0001, |
| "loss": 2.0761, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.16916070266753416, |
| "grad_norm": 0.18722975254058838, |
| "learning_rate": 0.0001, |
| "loss": 1.9537, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16981132075471697, |
| "grad_norm": 0.2683362662792206, |
| "learning_rate": 0.0001, |
| "loss": 2.4483, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1704619388418998, |
| "grad_norm": 0.2739648222923279, |
| "learning_rate": 0.0001, |
| "loss": 2.3754, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.17111255692908262, |
| "grad_norm": 0.1836375594139099, |
| "learning_rate": 0.0001, |
| "loss": 2.0103, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.17176317501626545, |
| "grad_norm": 0.34002602100372314, |
| "learning_rate": 0.0001, |
| "loss": 2.2626, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1724137931034483, |
| "grad_norm": 0.19341516494750977, |
| "learning_rate": 0.0001, |
| "loss": 1.9751, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.1730644111906311, |
| "grad_norm": 0.25080743432044983, |
| "learning_rate": 0.0001, |
| "loss": 2.2162, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.17371502927781393, |
| "grad_norm": 0.2362661212682724, |
| "learning_rate": 0.0001, |
| "loss": 2.0226, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.17436564736499674, |
| "grad_norm": 0.25844064354896545, |
| "learning_rate": 0.0001, |
| "loss": 2.3176, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.17501626545217958, |
| "grad_norm": 0.3904498517513275, |
| "learning_rate": 0.0001, |
| "loss": 2.4871, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.17566688353936238, |
| "grad_norm": 0.22143317759037018, |
| "learning_rate": 0.0001, |
| "loss": 2.2073, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.17631750162654522, |
| "grad_norm": 0.20974211394786835, |
| "learning_rate": 0.0001, |
| "loss": 2.1393, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.17696811971372803, |
| "grad_norm": 0.24463056027889252, |
| "learning_rate": 0.0001, |
| "loss": 2.0203, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.17761873780091086, |
| "grad_norm": 0.23296399414539337, |
| "learning_rate": 0.0001, |
| "loss": 2.1096, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.1782693558880937, |
| "grad_norm": 0.4122619926929474, |
| "learning_rate": 0.0001, |
| "loss": 3.1512, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1789199739752765, |
| "grad_norm": 0.2744470536708832, |
| "learning_rate": 0.0001, |
| "loss": 2.2211, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.17957059206245934, |
| "grad_norm": 0.21010619401931763, |
| "learning_rate": 0.0001, |
| "loss": 2.2203, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.18022121014964215, |
| "grad_norm": 0.27855056524276733, |
| "learning_rate": 0.0001, |
| "loss": 2.2903, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.180871828236825, |
| "grad_norm": 0.2909989058971405, |
| "learning_rate": 0.0001, |
| "loss": 2.237, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1815224463240078, |
| "grad_norm": 0.21754448115825653, |
| "learning_rate": 0.0001, |
| "loss": 2.0138, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.18217306441119063, |
| "grad_norm": 0.35209745168685913, |
| "learning_rate": 0.0001, |
| "loss": 2.652, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18282368249837344, |
| "grad_norm": 0.29994750022888184, |
| "learning_rate": 0.0001, |
| "loss": 2.1868, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.18347430058555628, |
| "grad_norm": 0.2645902633666992, |
| "learning_rate": 0.0001, |
| "loss": 2.2925, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1841249186727391, |
| "grad_norm": 0.3492202162742615, |
| "learning_rate": 0.0001, |
| "loss": 2.4176, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.18477553675992192, |
| "grad_norm": 0.256651371717453, |
| "learning_rate": 0.0001, |
| "loss": 2.3414, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.18542615484710476, |
| "grad_norm": 0.23287786543369293, |
| "learning_rate": 0.0001, |
| "loss": 2.5488, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.18607677293428757, |
| "grad_norm": 0.26059290766716003, |
| "learning_rate": 0.0001, |
| "loss": 2.4551, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1867273910214704, |
| "grad_norm": 0.2482365071773529, |
| "learning_rate": 0.0001, |
| "loss": 2.0818, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1873780091086532, |
| "grad_norm": 0.23024773597717285, |
| "learning_rate": 0.0001, |
| "loss": 2.2592, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.18802862719583605, |
| "grad_norm": 0.2590011656284332, |
| "learning_rate": 0.0001, |
| "loss": 2.4177, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.18867924528301888, |
| "grad_norm": 0.19760870933532715, |
| "learning_rate": 0.0001, |
| "loss": 2.0731, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1893298633702017, |
| "grad_norm": 0.20266428589820862, |
| "learning_rate": 0.0001, |
| "loss": 2.1221, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.18998048145738453, |
| "grad_norm": 0.20199884474277496, |
| "learning_rate": 0.0001, |
| "loss": 2.0489, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.19063109954456733, |
| "grad_norm": 0.23876360058784485, |
| "learning_rate": 0.0001, |
| "loss": 2.1392, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.19128171763175017, |
| "grad_norm": 0.23555997014045715, |
| "learning_rate": 0.0001, |
| "loss": 2.4116, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.19193233571893298, |
| "grad_norm": 0.5010725259780884, |
| "learning_rate": 0.0001, |
| "loss": 2.7444, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.19258295380611581, |
| "grad_norm": 0.37809622287750244, |
| "learning_rate": 0.0001, |
| "loss": 2.2635, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.19323357189329862, |
| "grad_norm": 0.499888151884079, |
| "learning_rate": 0.0001, |
| "loss": 2.1984, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.19388418998048146, |
| "grad_norm": 0.43810585141181946, |
| "learning_rate": 0.0001, |
| "loss": 3.084, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1945348080676643, |
| "grad_norm": 0.35633769631385803, |
| "learning_rate": 0.0001, |
| "loss": 2.0351, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1951854261548471, |
| "grad_norm": 0.3693079650402069, |
| "learning_rate": 0.0001, |
| "loss": 1.9525, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.19583604424202994, |
| "grad_norm": 0.36550503969192505, |
| "learning_rate": 0.0001, |
| "loss": 2.2469, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.19648666232921275, |
| "grad_norm": 0.2579827308654785, |
| "learning_rate": 0.0001, |
| "loss": 2.3585, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.19713728041639558, |
| "grad_norm": 0.2603841722011566, |
| "learning_rate": 0.0001, |
| "loss": 2.3959, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.1977878985035784, |
| "grad_norm": 0.33103683590888977, |
| "learning_rate": 0.0001, |
| "loss": 2.2197, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.19843851659076123, |
| "grad_norm": 0.2977697551250458, |
| "learning_rate": 0.0001, |
| "loss": 2.2569, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.19908913467794404, |
| "grad_norm": 0.2085130512714386, |
| "learning_rate": 0.0001, |
| "loss": 2.2284, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.19973975276512687, |
| "grad_norm": 0.409212201833725, |
| "learning_rate": 0.0001, |
| "loss": 2.7014, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.2003903708523097, |
| "grad_norm": 0.2447553277015686, |
| "learning_rate": 0.0001, |
| "loss": 2.2826, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.20104098893949252, |
| "grad_norm": 0.21881726384162903, |
| "learning_rate": 0.0001, |
| "loss": 1.8573, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.20169160702667535, |
| "grad_norm": 0.24484936892986298, |
| "learning_rate": 0.0001, |
| "loss": 2.318, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.20234222511385816, |
| "grad_norm": 0.3251173198223114, |
| "learning_rate": 0.0001, |
| "loss": 2.3346, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.202992843201041, |
| "grad_norm": 0.22313712537288666, |
| "learning_rate": 0.0001, |
| "loss": 1.9119, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.2036434612882238, |
| "grad_norm": 0.3086949288845062, |
| "learning_rate": 0.0001, |
| "loss": 2.1809, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.20429407937540664, |
| "grad_norm": 0.28272122144699097, |
| "learning_rate": 0.0001, |
| "loss": 2.3335, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.20494469746258945, |
| "grad_norm": 0.208637535572052, |
| "learning_rate": 0.0001, |
| "loss": 2.1947, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.20559531554977228, |
| "grad_norm": 0.2913041114807129, |
| "learning_rate": 0.0001, |
| "loss": 2.3009, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.20624593363695512, |
| "grad_norm": 0.2813785970211029, |
| "learning_rate": 0.0001, |
| "loss": 2.0133, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.20689655172413793, |
| "grad_norm": 0.2324337363243103, |
| "learning_rate": 0.0001, |
| "loss": 2.0827, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.20754716981132076, |
| "grad_norm": 0.25195491313934326, |
| "learning_rate": 0.0001, |
| "loss": 2.5201, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.20819778789850357, |
| "grad_norm": 0.3435034453868866, |
| "learning_rate": 0.0001, |
| "loss": 2.321, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2088484059856864, |
| "grad_norm": 0.2735581696033478, |
| "learning_rate": 0.0001, |
| "loss": 2.2218, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.20949902407286922, |
| "grad_norm": 0.2250661551952362, |
| "learning_rate": 0.0001, |
| "loss": 1.9416, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.21014964216005205, |
| "grad_norm": 0.3160262107849121, |
| "learning_rate": 0.0001, |
| "loss": 2.5494, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.21080026024723486, |
| "grad_norm": 0.3669279217720032, |
| "learning_rate": 0.0001, |
| "loss": 2.7751, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2114508783344177, |
| "grad_norm": 0.2052752673625946, |
| "learning_rate": 0.0001, |
| "loss": 2.0139, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.21210149642160053, |
| "grad_norm": 0.2906612455844879, |
| "learning_rate": 0.0001, |
| "loss": 2.227, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.21275211450878334, |
| "grad_norm": 0.30327048897743225, |
| "learning_rate": 0.0001, |
| "loss": 2.2905, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.21340273259596618, |
| "grad_norm": 0.33950623869895935, |
| "learning_rate": 0.0001, |
| "loss": 3.0731, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.21405335068314899, |
| "grad_norm": 0.31319788098335266, |
| "learning_rate": 0.0001, |
| "loss": 2.1374, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.21470396877033182, |
| "grad_norm": 0.21442054212093353, |
| "learning_rate": 0.0001, |
| "loss": 1.7588, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.21535458685751463, |
| "grad_norm": 0.23125174641609192, |
| "learning_rate": 0.0001, |
| "loss": 1.9295, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.21600520494469747, |
| "grad_norm": 0.23220308125019073, |
| "learning_rate": 0.0001, |
| "loss": 2.2606, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.21665582303188027, |
| "grad_norm": 0.24599219858646393, |
| "learning_rate": 0.0001, |
| "loss": 2.2687, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2173064411190631, |
| "grad_norm": 0.22226236760616302, |
| "learning_rate": 0.0001, |
| "loss": 2.1428, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.21795705920624595, |
| "grad_norm": 0.2653510570526123, |
| "learning_rate": 0.0001, |
| "loss": 2.4381, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.21860767729342875, |
| "grad_norm": 0.23770929872989655, |
| "learning_rate": 0.0001, |
| "loss": 1.9655, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.2192582953806116, |
| "grad_norm": 0.1932332068681717, |
| "learning_rate": 0.0001, |
| "loss": 1.9465, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.2199089134677944, |
| "grad_norm": 0.181661456823349, |
| "learning_rate": 0.0001, |
| "loss": 1.9912, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.22055953155497723, |
| "grad_norm": 0.22275297343730927, |
| "learning_rate": 0.0001, |
| "loss": 2.1964, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.22121014964216004, |
| "grad_norm": 0.22086840867996216, |
| "learning_rate": 0.0001, |
| "loss": 2.2216, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.22186076772934288, |
| "grad_norm": 0.22807130217552185, |
| "learning_rate": 0.0001, |
| "loss": 2.2434, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.2225113858165257, |
| "grad_norm": 0.26616647839546204, |
| "learning_rate": 0.0001, |
| "loss": 2.442, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.22316200390370852, |
| "grad_norm": 0.2841719388961792, |
| "learning_rate": 0.0001, |
| "loss": 2.2358, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.22381262199089136, |
| "grad_norm": 0.23251943290233612, |
| "learning_rate": 0.0001, |
| "loss": 2.3436, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.22446324007807417, |
| "grad_norm": 0.20406994223594666, |
| "learning_rate": 0.0001, |
| "loss": 2.101, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.225113858165257, |
| "grad_norm": 0.18677304685115814, |
| "learning_rate": 0.0001, |
| "loss": 2.0596, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2257644762524398, |
| "grad_norm": 0.22367873787879944, |
| "learning_rate": 0.0001, |
| "loss": 2.2051, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.22641509433962265, |
| "grad_norm": 0.2521246671676636, |
| "learning_rate": 0.0001, |
| "loss": 2.1718, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.22706571242680545, |
| "grad_norm": 0.23043319582939148, |
| "learning_rate": 0.0001, |
| "loss": 2.2818, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2277163305139883, |
| "grad_norm": 0.22021251916885376, |
| "learning_rate": 0.0001, |
| "loss": 2.0337, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2283669486011711, |
| "grad_norm": 0.18043603003025055, |
| "learning_rate": 0.0001, |
| "loss": 1.9434, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.22901756668835394, |
| "grad_norm": 0.4757142961025238, |
| "learning_rate": 0.0001, |
| "loss": 2.2467, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.22966818477553677, |
| "grad_norm": 0.30740290880203247, |
| "learning_rate": 0.0001, |
| "loss": 2.5296, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.23031880286271958, |
| "grad_norm": 0.23037666082382202, |
| "learning_rate": 0.0001, |
| "loss": 2.311, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.23096942094990242, |
| "grad_norm": 0.22314564883708954, |
| "learning_rate": 0.0001, |
| "loss": 2.0494, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.23162003903708522, |
| "grad_norm": 0.21417242288589478, |
| "learning_rate": 0.0001, |
| "loss": 2.2459, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.23227065712426806, |
| "grad_norm": 0.2895831763744354, |
| "learning_rate": 0.0001, |
| "loss": 2.2705, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.23292127521145087, |
| "grad_norm": 0.2110838145017624, |
| "learning_rate": 0.0001, |
| "loss": 2.1175, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.2335718932986337, |
| "grad_norm": 0.3999682664871216, |
| "learning_rate": 0.0001, |
| "loss": 2.6891, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2342225113858165, |
| "grad_norm": 0.5169201493263245, |
| "learning_rate": 0.0001, |
| "loss": 2.5764, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23487312947299935, |
| "grad_norm": 0.24382548034191132, |
| "learning_rate": 0.0001, |
| "loss": 2.1065, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.23552374756018218, |
| "grad_norm": 0.2830081582069397, |
| "learning_rate": 0.0001, |
| "loss": 2.1186, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.236174365647365, |
| "grad_norm": 0.23680554330348969, |
| "learning_rate": 0.0001, |
| "loss": 2.118, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.23682498373454783, |
| "grad_norm": 0.3790690302848816, |
| "learning_rate": 0.0001, |
| "loss": 2.3566, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.23747560182173064, |
| "grad_norm": 0.2664685845375061, |
| "learning_rate": 0.0001, |
| "loss": 2.2118, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.23812621990891347, |
| "grad_norm": 0.22439126670360565, |
| "learning_rate": 0.0001, |
| "loss": 2.0897, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.23877683799609628, |
| "grad_norm": 0.2559892237186432, |
| "learning_rate": 0.0001, |
| "loss": 2.2559, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.23942745608327912, |
| "grad_norm": 0.43989577889442444, |
| "learning_rate": 0.0001, |
| "loss": 2.5208, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.24007807417046195, |
| "grad_norm": 0.24543894827365875, |
| "learning_rate": 0.0001, |
| "loss": 2.1692, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.24072869225764476, |
| "grad_norm": 0.37020954489707947, |
| "learning_rate": 0.0001, |
| "loss": 2.1287, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2413793103448276, |
| "grad_norm": 0.41815564036369324, |
| "learning_rate": 0.0001, |
| "loss": 2.5952, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2420299284320104, |
| "grad_norm": 0.22579136490821838, |
| "learning_rate": 0.0001, |
| "loss": 2.2427, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.24268054651919324, |
| "grad_norm": 0.3004798889160156, |
| "learning_rate": 0.0001, |
| "loss": 2.2767, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.24333116460637605, |
| "grad_norm": 0.27470141649246216, |
| "learning_rate": 0.0001, |
| "loss": 2.092, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.24398178269355889, |
| "grad_norm": 0.25301867723464966, |
| "learning_rate": 0.0001, |
| "loss": 2.1816, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.2446324007807417, |
| "grad_norm": 0.21194620430469513, |
| "learning_rate": 0.0001, |
| "loss": 2.1322, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.24528301886792453, |
| "grad_norm": 0.28737103939056396, |
| "learning_rate": 0.0001, |
| "loss": 2.6685, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.24593363695510737, |
| "grad_norm": 0.28857922554016113, |
| "learning_rate": 0.0001, |
| "loss": 2.2219, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.24658425504229017, |
| "grad_norm": 0.29493409395217896, |
| "learning_rate": 0.0001, |
| "loss": 2.717, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.247234873129473, |
| "grad_norm": 0.33975929021835327, |
| "learning_rate": 0.0001, |
| "loss": 2.3499, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.24788549121665582, |
| "grad_norm": 0.21486152708530426, |
| "learning_rate": 0.0001, |
| "loss": 2.306, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.24853610930383865, |
| "grad_norm": 0.2686431109905243, |
| "learning_rate": 0.0001, |
| "loss": 2.0942, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.24918672739102146, |
| "grad_norm": 0.2812007963657379, |
| "learning_rate": 0.0001, |
| "loss": 2.3729, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.2498373454782043, |
| "grad_norm": 0.31875330209732056, |
| "learning_rate": 0.0001, |
| "loss": 2.5766, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2504879635653871, |
| "grad_norm": 0.2624376714229584, |
| "learning_rate": 0.0001, |
| "loss": 2.2057, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2511385816525699, |
| "grad_norm": 0.265286386013031, |
| "learning_rate": 0.0001, |
| "loss": 2.2405, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2517891997397528, |
| "grad_norm": 0.3202246129512787, |
| "learning_rate": 0.0001, |
| "loss": 2.2817, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2524398178269356, |
| "grad_norm": 0.22770161926746368, |
| "learning_rate": 0.0001, |
| "loss": 1.9564, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2530904359141184, |
| "grad_norm": 0.3313138484954834, |
| "learning_rate": 0.0001, |
| "loss": 2.4424, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.25374105400130126, |
| "grad_norm": 0.2961839437484741, |
| "learning_rate": 0.0001, |
| "loss": 2.4122, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.25439167208848407, |
| "grad_norm": 0.24270308017730713, |
| "learning_rate": 0.0001, |
| "loss": 1.99, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2550422901756669, |
| "grad_norm": 0.2306670844554901, |
| "learning_rate": 0.0001, |
| "loss": 2.3529, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.2556929082628497, |
| "grad_norm": 0.28387176990509033, |
| "learning_rate": 0.0001, |
| "loss": 2.0824, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.25634352635003255, |
| "grad_norm": 0.3105824291706085, |
| "learning_rate": 0.0001, |
| "loss": 2.437, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.25699414443721535, |
| "grad_norm": 0.1932361125946045, |
| "learning_rate": 0.0001, |
| "loss": 1.9747, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.25764476252439816, |
| "grad_norm": 0.31146278977394104, |
| "learning_rate": 0.0001, |
| "loss": 2.263, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.258295380611581, |
| "grad_norm": 0.24420365691184998, |
| "learning_rate": 0.0001, |
| "loss": 2.015, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.25894599869876384, |
| "grad_norm": 0.24144989252090454, |
| "learning_rate": 0.0001, |
| "loss": 2.2536, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.25959661678594664, |
| "grad_norm": 0.3478517532348633, |
| "learning_rate": 0.0001, |
| "loss": 2.5835, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.26024723487312945, |
| "grad_norm": 0.24381348490715027, |
| "learning_rate": 0.0001, |
| "loss": 2.2439, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2608978529603123, |
| "grad_norm": 0.2834983468055725, |
| "learning_rate": 0.0001, |
| "loss": 2.3991, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.2615484710474951, |
| "grad_norm": 0.28689858317375183, |
| "learning_rate": 0.0001, |
| "loss": 1.9156, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.26219908913467793, |
| "grad_norm": 0.23692357540130615, |
| "learning_rate": 0.0001, |
| "loss": 2.0189, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.26284970722186074, |
| "grad_norm": 0.30104926228523254, |
| "learning_rate": 0.0001, |
| "loss": 2.4945, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2635003253090436, |
| "grad_norm": 0.23472270369529724, |
| "learning_rate": 0.0001, |
| "loss": 1.8892, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2641509433962264, |
| "grad_norm": 0.31508034467697144, |
| "learning_rate": 0.0001, |
| "loss": 2.4935, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2648015614834092, |
| "grad_norm": 0.25103551149368286, |
| "learning_rate": 0.0001, |
| "loss": 2.4428, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2654521795705921, |
| "grad_norm": 0.2387259602546692, |
| "learning_rate": 0.0001, |
| "loss": 2.0989, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2661027976577749, |
| "grad_norm": 0.2606028616428375, |
| "learning_rate": 0.0001, |
| "loss": 1.9494, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.2667534157449577, |
| "grad_norm": 0.25114724040031433, |
| "learning_rate": 0.0001, |
| "loss": 2.2432, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2674040338321405, |
| "grad_norm": 0.3072582483291626, |
| "learning_rate": 0.0001, |
| "loss": 2.3506, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.2680546519193234, |
| "grad_norm": 0.23917561769485474, |
| "learning_rate": 0.0001, |
| "loss": 2.2665, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2687052700065062, |
| "grad_norm": 0.2120814174413681, |
| "learning_rate": 0.0001, |
| "loss": 1.9625, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.269355888093689, |
| "grad_norm": 0.22003813087940216, |
| "learning_rate": 0.0001, |
| "loss": 2.1179, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.27000650618087185, |
| "grad_norm": 0.33217060565948486, |
| "learning_rate": 0.0001, |
| "loss": 2.6353, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.27065712426805466, |
| "grad_norm": 0.2260630577802658, |
| "learning_rate": 0.0001, |
| "loss": 2.0355, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.27130774235523747, |
| "grad_norm": 0.30081093311309814, |
| "learning_rate": 0.0001, |
| "loss": 2.1825, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2719583604424203, |
| "grad_norm": 0.27275893092155457, |
| "learning_rate": 0.0001, |
| "loss": 2.6183, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.27260897852960314, |
| "grad_norm": 0.4902358651161194, |
| "learning_rate": 0.0001, |
| "loss": 3.0888, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.27325959661678595, |
| "grad_norm": 0.21213112771511078, |
| "learning_rate": 0.0001, |
| "loss": 2.1172, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.27391021470396876, |
| "grad_norm": 0.35953450202941895, |
| "learning_rate": 0.0001, |
| "loss": 2.5109, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2745608327911516, |
| "grad_norm": 0.2081584334373474, |
| "learning_rate": 0.0001, |
| "loss": 2.0894, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.27521145087833443, |
| "grad_norm": 0.20892906188964844, |
| "learning_rate": 0.0001, |
| "loss": 1.9643, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 0.30058735609054565, |
| "learning_rate": 0.0001, |
| "loss": 2.6503, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.27651268705270005, |
| "grad_norm": 0.32902124524116516, |
| "learning_rate": 0.0001, |
| "loss": 2.3271, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2771633051398829, |
| "grad_norm": 0.2003614902496338, |
| "learning_rate": 0.0001, |
| "loss": 1.9881, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2778139232270657, |
| "grad_norm": 0.33349111676216125, |
| "learning_rate": 0.0001, |
| "loss": 2.7625, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2784645413142485, |
| "grad_norm": 0.25051257014274597, |
| "learning_rate": 0.0001, |
| "loss": 2.0825, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.27911515940143133, |
| "grad_norm": 0.3301559388637543, |
| "learning_rate": 0.0001, |
| "loss": 2.85, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2797657774886142, |
| "grad_norm": 0.18224254250526428, |
| "learning_rate": 0.0001, |
| "loss": 1.9687, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.280416395575797, |
| "grad_norm": 0.21809989213943481, |
| "learning_rate": 0.0001, |
| "loss": 2.2596, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.2810670136629798, |
| "grad_norm": 0.2473779171705246, |
| "learning_rate": 0.0001, |
| "loss": 2.2042, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2817176317501627, |
| "grad_norm": 0.20744885504245758, |
| "learning_rate": 0.0001, |
| "loss": 2.1546, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2823682498373455, |
| "grad_norm": 0.2620698809623718, |
| "learning_rate": 0.0001, |
| "loss": 2.5195, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2830188679245283, |
| "grad_norm": 0.291421115398407, |
| "learning_rate": 0.0001, |
| "loss": 2.4983, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.2836694860117111, |
| "grad_norm": 0.3294708728790283, |
| "learning_rate": 0.0001, |
| "loss": 2.3146, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.28432010409889397, |
| "grad_norm": 0.26191362738609314, |
| "learning_rate": 0.0001, |
| "loss": 2.2818, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2849707221860768, |
| "grad_norm": 0.29155483841896057, |
| "learning_rate": 0.0001, |
| "loss": 2.4888, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2856213402732596, |
| "grad_norm": 0.19482360780239105, |
| "learning_rate": 0.0001, |
| "loss": 2.0061, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.28627195836044245, |
| "grad_norm": 0.2594612240791321, |
| "learning_rate": 0.0001, |
| "loss": 2.1891, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.28692257644762525, |
| "grad_norm": 0.21656309068202972, |
| "learning_rate": 0.0001, |
| "loss": 1.7911, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.28757319453480806, |
| "grad_norm": 0.18664829432964325, |
| "learning_rate": 0.0001, |
| "loss": 1.9634, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.28822381262199087, |
| "grad_norm": 0.2178332507610321, |
| "learning_rate": 0.0001, |
| "loss": 2.32, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.28887443070917374, |
| "grad_norm": 0.351418673992157, |
| "learning_rate": 0.0001, |
| "loss": 3.0873, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.28952504879635654, |
| "grad_norm": 0.23604457080364227, |
| "learning_rate": 0.0001, |
| "loss": 2.46, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.29017566688353935, |
| "grad_norm": 0.2599848806858063, |
| "learning_rate": 0.0001, |
| "loss": 2.0207, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.29082628497072216, |
| "grad_norm": 0.340314120054245, |
| "learning_rate": 0.0001, |
| "loss": 2.279, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.291476903057905, |
| "grad_norm": 0.23228399455547333, |
| "learning_rate": 0.0001, |
| "loss": 2.3561, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.29212752114508783, |
| "grad_norm": 0.25504687428474426, |
| "learning_rate": 0.0001, |
| "loss": 2.2251, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.29277813923227064, |
| "grad_norm": 0.2465014010667801, |
| "learning_rate": 0.0001, |
| "loss": 2.1031, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2934287573194535, |
| "grad_norm": 0.2188328504562378, |
| "learning_rate": 0.0001, |
| "loss": 2.1483, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2940793754066363, |
| "grad_norm": 0.24546551704406738, |
| "learning_rate": 0.0001, |
| "loss": 2.2334, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2947299934938191, |
| "grad_norm": 0.23416215181350708, |
| "learning_rate": 0.0001, |
| "loss": 2.1846, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.29538061158100193, |
| "grad_norm": 0.25267231464385986, |
| "learning_rate": 0.0001, |
| "loss": 2.2134, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2960312296681848, |
| "grad_norm": 0.26632416248321533, |
| "learning_rate": 0.0001, |
| "loss": 2.5012, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2966818477553676, |
| "grad_norm": 0.18289139866828918, |
| "learning_rate": 0.0001, |
| "loss": 2.0524, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2973324658425504, |
| "grad_norm": 0.19033563137054443, |
| "learning_rate": 0.0001, |
| "loss": 2.0165, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2979830839297333, |
| "grad_norm": 0.200730562210083, |
| "learning_rate": 0.0001, |
| "loss": 1.8021, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2986337020169161, |
| "grad_norm": 0.2109062522649765, |
| "learning_rate": 0.0001, |
| "loss": 2.0655, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2992843201040989, |
| "grad_norm": 0.23461318016052246, |
| "learning_rate": 0.0001, |
| "loss": 2.3335, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2999349381912817, |
| "grad_norm": 0.2085726112127304, |
| "learning_rate": 0.0001, |
| "loss": 2.0061, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.30058555627846456, |
| "grad_norm": 0.2938329875469208, |
| "learning_rate": 0.0001, |
| "loss": 2.5245, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.30123617436564737, |
| "grad_norm": 0.22131232917308807, |
| "learning_rate": 0.0001, |
| "loss": 2.4115, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.3018867924528302, |
| "grad_norm": 0.3459152579307556, |
| "learning_rate": 0.0001, |
| "loss": 2.3896, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.302537410540013, |
| "grad_norm": 0.27464184165000916, |
| "learning_rate": 0.0001, |
| "loss": 2.6592, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.30318802862719585, |
| "grad_norm": 0.28379327058792114, |
| "learning_rate": 0.0001, |
| "loss": 2.1453, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.30383864671437866, |
| "grad_norm": 0.28283926844596863, |
| "learning_rate": 0.0001, |
| "loss": 2.1704, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.30448926480156147, |
| "grad_norm": 0.22243599593639374, |
| "learning_rate": 0.0001, |
| "loss": 2.1175, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.30513988288874433, |
| "grad_norm": 0.22331124544143677, |
| "learning_rate": 0.0001, |
| "loss": 1.8857, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.30579050097592714, |
| "grad_norm": 0.21995989978313446, |
| "learning_rate": 0.0001, |
| "loss": 2.1316, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.30644111906310995, |
| "grad_norm": 0.21140341460704803, |
| "learning_rate": 0.0001, |
| "loss": 2.0742, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.30709173715029275, |
| "grad_norm": 0.31053757667541504, |
| "learning_rate": 0.0001, |
| "loss": 2.615, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.3077423552374756, |
| "grad_norm": 0.2768484354019165, |
| "learning_rate": 0.0001, |
| "loss": 2.713, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.3083929733246584, |
| "grad_norm": 0.2538318336009979, |
| "learning_rate": 0.0001, |
| "loss": 2.1917, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.30904359141184123, |
| "grad_norm": 0.2105240672826767, |
| "learning_rate": 0.0001, |
| "loss": 2.2741, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3096942094990241, |
| "grad_norm": 0.2915903925895691, |
| "learning_rate": 0.0001, |
| "loss": 2.115, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.3103448275862069, |
| "grad_norm": 0.30282047390937805, |
| "learning_rate": 0.0001, |
| "loss": 2.7806, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.3109954456733897, |
| "grad_norm": 0.2707601487636566, |
| "learning_rate": 0.0001, |
| "loss": 2.6137, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.3116460637605725, |
| "grad_norm": 0.34574300050735474, |
| "learning_rate": 0.0001, |
| "loss": 2.5957, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.3122966818477554, |
| "grad_norm": 0.22767509520053864, |
| "learning_rate": 0.0001, |
| "loss": 2.3543, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3129472999349382, |
| "grad_norm": 0.25194215774536133, |
| "learning_rate": 0.0001, |
| "loss": 2.6586, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.313597918022121, |
| "grad_norm": 0.20427219569683075, |
| "learning_rate": 0.0001, |
| "loss": 1.9091, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.3142485361093038, |
| "grad_norm": 0.2993704378604889, |
| "learning_rate": 0.0001, |
| "loss": 2.4704, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.3148991541964867, |
| "grad_norm": 0.18951758742332458, |
| "learning_rate": 0.0001, |
| "loss": 2.1108, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.3155497722836695, |
| "grad_norm": 0.2622709572315216, |
| "learning_rate": 0.0001, |
| "loss": 2.4144, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.3162003903708523, |
| "grad_norm": 0.20735126733779907, |
| "learning_rate": 0.0001, |
| "loss": 2.3065, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.31685100845803515, |
| "grad_norm": 0.22782085835933685, |
| "learning_rate": 0.0001, |
| "loss": 2.4377, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.31750162654521796, |
| "grad_norm": 0.2568935453891754, |
| "learning_rate": 0.0001, |
| "loss": 2.1199, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.31815224463240077, |
| "grad_norm": 0.23917409777641296, |
| "learning_rate": 0.0001, |
| "loss": 2.2457, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.3188028627195836, |
| "grad_norm": 0.21531902253627777, |
| "learning_rate": 0.0001, |
| "loss": 2.0489, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.31945348080676644, |
| "grad_norm": 0.21461109817028046, |
| "learning_rate": 0.0001, |
| "loss": 2.1915, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.32010409889394925, |
| "grad_norm": 0.2458680123090744, |
| "learning_rate": 0.0001, |
| "loss": 2.3939, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.32075471698113206, |
| "grad_norm": 0.2617323696613312, |
| "learning_rate": 0.0001, |
| "loss": 2.5611, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.3214053350683149, |
| "grad_norm": 0.22562618553638458, |
| "learning_rate": 0.0001, |
| "loss": 2.2703, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.32205595315549773, |
| "grad_norm": 0.2290688008069992, |
| "learning_rate": 0.0001, |
| "loss": 2.3049, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.32270657124268054, |
| "grad_norm": 0.4118833541870117, |
| "learning_rate": 0.0001, |
| "loss": 2.9194, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.32335718932986335, |
| "grad_norm": 0.22502999007701874, |
| "learning_rate": 0.0001, |
| "loss": 2.2362, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3240078074170462, |
| "grad_norm": 0.23599191009998322, |
| "learning_rate": 0.0001, |
| "loss": 2.35, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.324658425504229, |
| "grad_norm": 0.3065047860145569, |
| "learning_rate": 0.0001, |
| "loss": 2.3984, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.32530904359141183, |
| "grad_norm": 0.19241982698440552, |
| "learning_rate": 0.0001, |
| "loss": 1.8787, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3259596616785947, |
| "grad_norm": 0.20695632696151733, |
| "learning_rate": 0.0001, |
| "loss": 1.9397, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.3266102797657775, |
| "grad_norm": 0.1998564749956131, |
| "learning_rate": 0.0001, |
| "loss": 2.1463, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3272608978529603, |
| "grad_norm": 0.27775317430496216, |
| "learning_rate": 0.0001, |
| "loss": 2.7956, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.3279115159401431, |
| "grad_norm": 0.2393936961889267, |
| "learning_rate": 0.0001, |
| "loss": 2.3785, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.328562134027326, |
| "grad_norm": 0.20921163260936737, |
| "learning_rate": 0.0001, |
| "loss": 2.1909, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.3292127521145088, |
| "grad_norm": 0.25875911116600037, |
| "learning_rate": 0.0001, |
| "loss": 2.129, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3298633702016916, |
| "grad_norm": 0.2382909208536148, |
| "learning_rate": 0.0001, |
| "loss": 2.3786, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.3305139882888744, |
| "grad_norm": 0.19657136499881744, |
| "learning_rate": 0.0001, |
| "loss": 1.951, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.33116460637605727, |
| "grad_norm": 0.23688004910945892, |
| "learning_rate": 0.0001, |
| "loss": 2.4348, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3318152244632401, |
| "grad_norm": 0.1988734006881714, |
| "learning_rate": 0.0001, |
| "loss": 2.2352, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3324658425504229, |
| "grad_norm": 0.2078763097524643, |
| "learning_rate": 0.0001, |
| "loss": 2.1376, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.33311646063760575, |
| "grad_norm": 0.18860888481140137, |
| "learning_rate": 0.0001, |
| "loss": 1.9367, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.33376707872478856, |
| "grad_norm": 0.30205249786376953, |
| "learning_rate": 0.0001, |
| "loss": 2.6822, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.33441769681197137, |
| "grad_norm": 0.2146618664264679, |
| "learning_rate": 0.0001, |
| "loss": 2.1927, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3350683148991542, |
| "grad_norm": 0.19332504272460938, |
| "learning_rate": 0.0001, |
| "loss": 2.0442, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.33571893298633704, |
| "grad_norm": 0.2289431244134903, |
| "learning_rate": 0.0001, |
| "loss": 2.0152, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.33636955107351985, |
| "grad_norm": 0.21815945208072662, |
| "learning_rate": 0.0001, |
| "loss": 2.0015, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.33702016916070265, |
| "grad_norm": 0.2226189821958542, |
| "learning_rate": 0.0001, |
| "loss": 2.2989, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.3376707872478855, |
| "grad_norm": 0.22195078432559967, |
| "learning_rate": 0.0001, |
| "loss": 2.2237, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3383214053350683, |
| "grad_norm": 0.1946515589952469, |
| "learning_rate": 0.0001, |
| "loss": 1.9459, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.33897202342225113, |
| "grad_norm": 0.21510568261146545, |
| "learning_rate": 0.0001, |
| "loss": 2.1305, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.33962264150943394, |
| "grad_norm": 0.23448903858661652, |
| "learning_rate": 0.0001, |
| "loss": 2.1838, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3402732595966168, |
| "grad_norm": 0.19046911597251892, |
| "learning_rate": 0.0001, |
| "loss": 1.9739, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.3409238776837996, |
| "grad_norm": 0.2314033806324005, |
| "learning_rate": 0.0001, |
| "loss": 2.2053, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3415744957709824, |
| "grad_norm": 0.2206612378358841, |
| "learning_rate": 0.0001, |
| "loss": 2.2566, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.34222511385816523, |
| "grad_norm": 0.19578076899051666, |
| "learning_rate": 0.0001, |
| "loss": 2.045, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3428757319453481, |
| "grad_norm": 0.1787755936384201, |
| "learning_rate": 0.0001, |
| "loss": 1.8942, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3435263500325309, |
| "grad_norm": 0.20091751217842102, |
| "learning_rate": 0.0001, |
| "loss": 2.1576, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.3441769681197137, |
| "grad_norm": 0.21869762241840363, |
| "learning_rate": 0.0001, |
| "loss": 2.1938, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 0.26101449131965637, |
| "learning_rate": 0.0001, |
| "loss": 2.3642, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3454782042940794, |
| "grad_norm": 0.21874766051769257, |
| "learning_rate": 0.0001, |
| "loss": 2.4553, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.3461288223812622, |
| "grad_norm": 0.224325492978096, |
| "learning_rate": 0.0001, |
| "loss": 2.2959, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.346779440468445, |
| "grad_norm": 0.21268363296985626, |
| "learning_rate": 0.0001, |
| "loss": 2.1021, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.34743005855562786, |
| "grad_norm": 0.20979231595993042, |
| "learning_rate": 0.0001, |
| "loss": 2.0304, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.34808067664281067, |
| "grad_norm": 0.19552691280841827, |
| "learning_rate": 0.0001, |
| "loss": 1.9747, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.3487312947299935, |
| "grad_norm": 0.27929842472076416, |
| "learning_rate": 0.0001, |
| "loss": 2.445, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.34938191281717634, |
| "grad_norm": 0.19953188300132751, |
| "learning_rate": 0.0001, |
| "loss": 1.9766, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.35003253090435915, |
| "grad_norm": 0.29898926615715027, |
| "learning_rate": 0.0001, |
| "loss": 2.4818, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.35068314899154196, |
| "grad_norm": 0.18719644844532013, |
| "learning_rate": 0.0001, |
| "loss": 1.9046, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.35133376707872477, |
| "grad_norm": 0.2602563798427582, |
| "learning_rate": 0.0001, |
| "loss": 2.1539, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.35198438516590763, |
| "grad_norm": 0.23460406064987183, |
| "learning_rate": 0.0001, |
| "loss": 2.3826, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.35263500325309044, |
| "grad_norm": 0.2821134328842163, |
| "learning_rate": 0.0001, |
| "loss": 2.223, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.35328562134027325, |
| "grad_norm": 0.2641044557094574, |
| "learning_rate": 0.0001, |
| "loss": 2.2402, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.35393623942745606, |
| "grad_norm": 0.21963565051555634, |
| "learning_rate": 0.0001, |
| "loss": 2.3988, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3545868575146389, |
| "grad_norm": 0.26475685834884644, |
| "learning_rate": 0.0001, |
| "loss": 2.3046, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.35523747560182173, |
| "grad_norm": 0.27148157358169556, |
| "learning_rate": 0.0001, |
| "loss": 2.5076, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.35588809368900454, |
| "grad_norm": 0.28925588726997375, |
| "learning_rate": 0.0001, |
| "loss": 2.8395, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3565387117761874, |
| "grad_norm": 0.22953632473945618, |
| "learning_rate": 0.0001, |
| "loss": 2.1198, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3571893298633702, |
| "grad_norm": 0.23960557579994202, |
| "learning_rate": 0.0001, |
| "loss": 2.3064, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.357839947950553, |
| "grad_norm": 0.3133333921432495, |
| "learning_rate": 0.0001, |
| "loss": 2.6034, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3584905660377358, |
| "grad_norm": 0.21745215356349945, |
| "learning_rate": 0.0001, |
| "loss": 2.4553, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.3591411841249187, |
| "grad_norm": 0.23547130823135376, |
| "learning_rate": 0.0001, |
| "loss": 2.0469, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.3597918022121015, |
| "grad_norm": 0.2646094262599945, |
| "learning_rate": 0.0001, |
| "loss": 1.9016, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3604424202992843, |
| "grad_norm": 0.3079530596733093, |
| "learning_rate": 0.0001, |
| "loss": 2.8979, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.36109303838646717, |
| "grad_norm": 0.38223740458488464, |
| "learning_rate": 0.0001, |
| "loss": 3.066, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.36174365647365, |
| "grad_norm": 0.2535337209701538, |
| "learning_rate": 0.0001, |
| "loss": 2.1327, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3623942745608328, |
| "grad_norm": 0.2373637855052948, |
| "learning_rate": 0.0001, |
| "loss": 2.1141, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.3630448926480156, |
| "grad_norm": 0.19437271356582642, |
| "learning_rate": 0.0001, |
| "loss": 1.9753, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.36369551073519846, |
| "grad_norm": 0.20236878097057343, |
| "learning_rate": 0.0001, |
| "loss": 2.2516, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.36434612882238127, |
| "grad_norm": 0.21252363920211792, |
| "learning_rate": 0.0001, |
| "loss": 2.3645, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3649967469095641, |
| "grad_norm": 0.21689258515834808, |
| "learning_rate": 0.0001, |
| "loss": 2.1145, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.3656473649967469, |
| "grad_norm": 0.22365228831768036, |
| "learning_rate": 0.0001, |
| "loss": 2.3083, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.36629798308392975, |
| "grad_norm": 0.21607807278633118, |
| "learning_rate": 0.0001, |
| "loss": 2.3199, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.36694860117111255, |
| "grad_norm": 0.1885683536529541, |
| "learning_rate": 0.0001, |
| "loss": 1.9303, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.36759921925829536, |
| "grad_norm": 0.20064905285835266, |
| "learning_rate": 0.0001, |
| "loss": 2.0661, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3682498373454782, |
| "grad_norm": 0.23532240092754364, |
| "learning_rate": 0.0001, |
| "loss": 2.6942, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.36890045543266103, |
| "grad_norm": 0.22937807440757751, |
| "learning_rate": 0.0001, |
| "loss": 2.1962, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.36955107351984384, |
| "grad_norm": 0.2540866732597351, |
| "learning_rate": 0.0001, |
| "loss": 2.5012, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.37020169160702665, |
| "grad_norm": 0.23405294120311737, |
| "learning_rate": 0.0001, |
| "loss": 2.2439, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3708523096942095, |
| "grad_norm": 0.24394820630550385, |
| "learning_rate": 0.0001, |
| "loss": 2.0741, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3715029277813923, |
| "grad_norm": 0.2063736468553543, |
| "learning_rate": 0.0001, |
| "loss": 2.0864, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.37215354586857513, |
| "grad_norm": 0.3300686180591583, |
| "learning_rate": 0.0001, |
| "loss": 2.4983, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.372804163955758, |
| "grad_norm": 0.21294772624969482, |
| "learning_rate": 0.0001, |
| "loss": 2.2273, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3734547820429408, |
| "grad_norm": 0.2629190981388092, |
| "learning_rate": 0.0001, |
| "loss": 2.1732, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3741054001301236, |
| "grad_norm": 0.2141999751329422, |
| "learning_rate": 0.0001, |
| "loss": 2.3038, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.3747560182173064, |
| "grad_norm": 0.3467566668987274, |
| "learning_rate": 0.0001, |
| "loss": 2.7748, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3754066363044893, |
| "grad_norm": 0.3112248182296753, |
| "learning_rate": 0.0001, |
| "loss": 2.2376, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.3760572543916721, |
| "grad_norm": 0.21217738091945648, |
| "learning_rate": 0.0001, |
| "loss": 1.9146, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3767078724788549, |
| "grad_norm": 0.19359458982944489, |
| "learning_rate": 0.0001, |
| "loss": 2.0913, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.37735849056603776, |
| "grad_norm": 0.27635738253593445, |
| "learning_rate": 0.0001, |
| "loss": 2.2855, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.37800910865322057, |
| "grad_norm": 0.19366882741451263, |
| "learning_rate": 0.0001, |
| "loss": 2.0194, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3786597267404034, |
| "grad_norm": 0.2016839236021042, |
| "learning_rate": 0.0001, |
| "loss": 2.1519, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3793103448275862, |
| "grad_norm": 0.22154097259044647, |
| "learning_rate": 0.0001, |
| "loss": 1.9849, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.37996096291476905, |
| "grad_norm": 0.2089187502861023, |
| "learning_rate": 0.0001, |
| "loss": 2.3624, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.38061158100195186, |
| "grad_norm": 0.25050756335258484, |
| "learning_rate": 0.0001, |
| "loss": 2.1773, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.38126219908913467, |
| "grad_norm": 0.23007918894290924, |
| "learning_rate": 0.0001, |
| "loss": 2.2054, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3819128171763175, |
| "grad_norm": 0.25022968649864197, |
| "learning_rate": 0.0001, |
| "loss": 2.219, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.38256343526350034, |
| "grad_norm": 0.2205193042755127, |
| "learning_rate": 0.0001, |
| "loss": 2.2049, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.38321405335068315, |
| "grad_norm": 0.21454961597919464, |
| "learning_rate": 0.0001, |
| "loss": 2.0683, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.38386467143786596, |
| "grad_norm": 0.2088347226381302, |
| "learning_rate": 0.0001, |
| "loss": 2.1301, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3845152895250488, |
| "grad_norm": 0.20322394371032715, |
| "learning_rate": 0.0001, |
| "loss": 2.2098, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.38516590761223163, |
| "grad_norm": 0.231514111161232, |
| "learning_rate": 0.0001, |
| "loss": 2.5523, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.38581652569941444, |
| "grad_norm": 0.24791982769966125, |
| "learning_rate": 0.0001, |
| "loss": 2.2259, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.38646714378659724, |
| "grad_norm": 0.21148578822612762, |
| "learning_rate": 0.0001, |
| "loss": 2.0834, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.3871177618737801, |
| "grad_norm": 0.263713538646698, |
| "learning_rate": 0.0001, |
| "loss": 2.3101, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.3877683799609629, |
| "grad_norm": 0.22197774052619934, |
| "learning_rate": 0.0001, |
| "loss": 2.1173, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3884189980481457, |
| "grad_norm": 0.2237439900636673, |
| "learning_rate": 0.0001, |
| "loss": 2.1109, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3890696161353286, |
| "grad_norm": 0.27451419830322266, |
| "learning_rate": 0.0001, |
| "loss": 2.5311, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3897202342225114, |
| "grad_norm": 0.18475750088691711, |
| "learning_rate": 0.0001, |
| "loss": 1.9241, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3903708523096942, |
| "grad_norm": 0.20120149850845337, |
| "learning_rate": 0.0001, |
| "loss": 2.1033, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.391021470396877, |
| "grad_norm": 0.19626259803771973, |
| "learning_rate": 0.0001, |
| "loss": 2.1223, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3916720884840599, |
| "grad_norm": 0.22795897722244263, |
| "learning_rate": 0.0001, |
| "loss": 2.2021, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3923227065712427, |
| "grad_norm": 0.5195867419242859, |
| "learning_rate": 0.0001, |
| "loss": 3.1849, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.3929733246584255, |
| "grad_norm": 0.2636241614818573, |
| "learning_rate": 0.0001, |
| "loss": 2.0739, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3936239427456083, |
| "grad_norm": 0.33922895789146423, |
| "learning_rate": 0.0001, |
| "loss": 2.31, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.39427456083279117, |
| "grad_norm": 0.17467042803764343, |
| "learning_rate": 0.0001, |
| "loss": 1.9201, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.394925178919974, |
| "grad_norm": 0.22457371652126312, |
| "learning_rate": 0.0001, |
| "loss": 1.9783, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3955757970071568, |
| "grad_norm": 0.5104444026947021, |
| "learning_rate": 0.0001, |
| "loss": 2.3777, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.39622641509433965, |
| "grad_norm": 0.4531616270542145, |
| "learning_rate": 0.0001, |
| "loss": 2.8208, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.39687703318152245, |
| "grad_norm": 0.20649151504039764, |
| "learning_rate": 0.0001, |
| "loss": 2.1377, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.39752765126870526, |
| "grad_norm": 0.39769667387008667, |
| "learning_rate": 0.0001, |
| "loss": 2.2228, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.39817826935588807, |
| "grad_norm": 0.2832731008529663, |
| "learning_rate": 0.0001, |
| "loss": 1.9664, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.39882888744307093, |
| "grad_norm": 0.2754386067390442, |
| "learning_rate": 0.0001, |
| "loss": 2.5595, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.39947950553025374, |
| "grad_norm": 0.404364675283432, |
| "learning_rate": 0.0001, |
| "loss": 2.8133, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.40013012361743655, |
| "grad_norm": 0.30304789543151855, |
| "learning_rate": 0.0001, |
| "loss": 2.2729, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.4007807417046194, |
| "grad_norm": 0.2519910931587219, |
| "learning_rate": 0.0001, |
| "loss": 2.3655, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.4014313597918022, |
| "grad_norm": 0.2863995134830475, |
| "learning_rate": 0.0001, |
| "loss": 2.0774, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.40208197787898503, |
| "grad_norm": 0.393622487783432, |
| "learning_rate": 0.0001, |
| "loss": 2.5082, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.40273259596616784, |
| "grad_norm": 0.21836060285568237, |
| "learning_rate": 0.0001, |
| "loss": 1.9548, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.4033832140533507, |
| "grad_norm": 0.358052521944046, |
| "learning_rate": 0.0001, |
| "loss": 2.5158, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4040338321405335, |
| "grad_norm": 0.237140953540802, |
| "learning_rate": 0.0001, |
| "loss": 2.2111, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.4046844502277163, |
| "grad_norm": 0.20998883247375488, |
| "learning_rate": 0.0001, |
| "loss": 2.1351, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.4053350683148991, |
| "grad_norm": 0.18059247732162476, |
| "learning_rate": 0.0001, |
| "loss": 1.9451, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.405985686402082, |
| "grad_norm": 0.17532669007778168, |
| "learning_rate": 0.0001, |
| "loss": 1.8591, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.4066363044892648, |
| "grad_norm": 0.24097976088523865, |
| "learning_rate": 0.0001, |
| "loss": 2.6534, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.4072869225764476, |
| "grad_norm": 0.19505445659160614, |
| "learning_rate": 0.0001, |
| "loss": 1.8952, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.40793754066363047, |
| "grad_norm": 0.232722207903862, |
| "learning_rate": 0.0001, |
| "loss": 2.2055, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.4085881587508133, |
| "grad_norm": 0.23899732530117035, |
| "learning_rate": 0.0001, |
| "loss": 2.5848, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.4092387768379961, |
| "grad_norm": 0.2411729097366333, |
| "learning_rate": 0.0001, |
| "loss": 2.5315, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.4098893949251789, |
| "grad_norm": 0.25042012333869934, |
| "learning_rate": 0.0001, |
| "loss": 2.4154, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.41054001301236176, |
| "grad_norm": 0.2764488160610199, |
| "learning_rate": 0.0001, |
| "loss": 2.0564, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.41119063109954457, |
| "grad_norm": 0.24761155247688293, |
| "learning_rate": 0.0001, |
| "loss": 2.3245, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.4118412491867274, |
| "grad_norm": 0.22376200556755066, |
| "learning_rate": 0.0001, |
| "loss": 2.1881, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.41249186727391024, |
| "grad_norm": 0.19060148298740387, |
| "learning_rate": 0.0001, |
| "loss": 1.9588, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.41314248536109305, |
| "grad_norm": 0.4157400131225586, |
| "learning_rate": 0.0001, |
| "loss": 2.9024, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 0.2557002007961273, |
| "learning_rate": 0.0001, |
| "loss": 1.9819, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.41444372153545866, |
| "grad_norm": 0.2908417880535126, |
| "learning_rate": 0.0001, |
| "loss": 2.112, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.41509433962264153, |
| "grad_norm": 0.32937270402908325, |
| "learning_rate": 0.0001, |
| "loss": 2.4976, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.41574495770982434, |
| "grad_norm": 0.20382268726825714, |
| "learning_rate": 0.0001, |
| "loss": 2.0448, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.41639557579700714, |
| "grad_norm": 0.23484939336776733, |
| "learning_rate": 0.0001, |
| "loss": 1.9514, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.41704619388418995, |
| "grad_norm": 0.23023058474063873, |
| "learning_rate": 0.0001, |
| "loss": 2.0768, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.4176968119713728, |
| "grad_norm": 0.22951190173625946, |
| "learning_rate": 0.0001, |
| "loss": 2.0764, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.4183474300585556, |
| "grad_norm": 0.18971513211727142, |
| "learning_rate": 0.0001, |
| "loss": 1.9693, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.41899804814573843, |
| "grad_norm": 0.24955709278583527, |
| "learning_rate": 0.0001, |
| "loss": 2.4898, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.4196486662329213, |
| "grad_norm": 0.3344306945800781, |
| "learning_rate": 0.0001, |
| "loss": 2.4779, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.4202992843201041, |
| "grad_norm": 0.21661825478076935, |
| "learning_rate": 0.0001, |
| "loss": 2.0472, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4209499024072869, |
| "grad_norm": 0.1972419023513794, |
| "learning_rate": 0.0001, |
| "loss": 2.1712, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.4216005204944697, |
| "grad_norm": 0.21619470417499542, |
| "learning_rate": 0.0001, |
| "loss": 2.0739, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4222511385816526, |
| "grad_norm": 0.2329091727733612, |
| "learning_rate": 0.0001, |
| "loss": 2.1362, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4229017566688354, |
| "grad_norm": 0.22971969842910767, |
| "learning_rate": 0.0001, |
| "loss": 1.9898, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4235523747560182, |
| "grad_norm": 0.20185063779354095, |
| "learning_rate": 0.0001, |
| "loss": 2.1008, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.42420299284320107, |
| "grad_norm": 0.2658546566963196, |
| "learning_rate": 0.0001, |
| "loss": 2.5734, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.4248536109303839, |
| "grad_norm": 0.23109374940395355, |
| "learning_rate": 0.0001, |
| "loss": 2.2569, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.4255042290175667, |
| "grad_norm": 0.25115352869033813, |
| "learning_rate": 0.0001, |
| "loss": 2.5967, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.4261548471047495, |
| "grad_norm": 0.20470669865608215, |
| "learning_rate": 0.0001, |
| "loss": 2.0302, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.42680546519193235, |
| "grad_norm": 0.2151513546705246, |
| "learning_rate": 0.0001, |
| "loss": 2.5183, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.42745608327911516, |
| "grad_norm": 0.2571411728858948, |
| "learning_rate": 0.0001, |
| "loss": 2.255, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.42810670136629797, |
| "grad_norm": 0.2414022833108902, |
| "learning_rate": 0.0001, |
| "loss": 2.4076, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.42875731945348083, |
| "grad_norm": 0.21041014790534973, |
| "learning_rate": 0.0001, |
| "loss": 2.0091, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.42940793754066364, |
| "grad_norm": 0.21241822838783264, |
| "learning_rate": 0.0001, |
| "loss": 2.355, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.43005855562784645, |
| "grad_norm": 0.21031403541564941, |
| "learning_rate": 0.0001, |
| "loss": 1.9887, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.43070917371502926, |
| "grad_norm": 0.19765952229499817, |
| "learning_rate": 0.0001, |
| "loss": 2.1555, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.4313597918022121, |
| "grad_norm": 0.24740834534168243, |
| "learning_rate": 0.0001, |
| "loss": 2.2349, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.43201040988939493, |
| "grad_norm": 0.22086234390735626, |
| "learning_rate": 0.0001, |
| "loss": 2.0948, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.43266102797657774, |
| "grad_norm": 0.21949239075183868, |
| "learning_rate": 0.0001, |
| "loss": 2.3905, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.43331164606376055, |
| "grad_norm": 0.20536834001541138, |
| "learning_rate": 0.0001, |
| "loss": 2.0547, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4339622641509434, |
| "grad_norm": 0.2570655941963196, |
| "learning_rate": 0.0001, |
| "loss": 2.0261, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.4346128822381262, |
| "grad_norm": 0.3293687701225281, |
| "learning_rate": 0.0001, |
| "loss": 2.344, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.435263500325309, |
| "grad_norm": 0.22947120666503906, |
| "learning_rate": 0.0001, |
| "loss": 2.232, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4359141184124919, |
| "grad_norm": 0.2425599992275238, |
| "learning_rate": 0.0001, |
| "loss": 2.309, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4365647364996747, |
| "grad_norm": 0.2506352663040161, |
| "learning_rate": 0.0001, |
| "loss": 2.1249, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.4372153545868575, |
| "grad_norm": 0.19457192718982697, |
| "learning_rate": 0.0001, |
| "loss": 1.9461, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4378659726740403, |
| "grad_norm": 0.3749271035194397, |
| "learning_rate": 0.0001, |
| "loss": 2.8532, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.4385165907612232, |
| "grad_norm": 0.25384366512298584, |
| "learning_rate": 0.0001, |
| "loss": 2.6495, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.439167208848406, |
| "grad_norm": 0.21413469314575195, |
| "learning_rate": 0.0001, |
| "loss": 2.084, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.4398178269355888, |
| "grad_norm": 0.228125661611557, |
| "learning_rate": 0.0001, |
| "loss": 2.2175, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.44046844502277166, |
| "grad_norm": 0.1948491632938385, |
| "learning_rate": 0.0001, |
| "loss": 1.9702, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.44111906310995447, |
| "grad_norm": 0.307992547750473, |
| "learning_rate": 0.0001, |
| "loss": 2.5884, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.4417696811971373, |
| "grad_norm": 0.23681728541851044, |
| "learning_rate": 0.0001, |
| "loss": 2.2104, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.4424202992843201, |
| "grad_norm": 0.23185166716575623, |
| "learning_rate": 0.0001, |
| "loss": 2.0823, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.44307091737150295, |
| "grad_norm": 0.2772667109966278, |
| "learning_rate": 0.0001, |
| "loss": 2.3729, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.44372153545868576, |
| "grad_norm": 0.18908965587615967, |
| "learning_rate": 0.0001, |
| "loss": 2.0585, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.44437215354586856, |
| "grad_norm": 0.2063988745212555, |
| "learning_rate": 0.0001, |
| "loss": 1.9474, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4450227716330514, |
| "grad_norm": 0.19444917142391205, |
| "learning_rate": 0.0001, |
| "loss": 1.9269, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.44567338972023424, |
| "grad_norm": 0.2866727113723755, |
| "learning_rate": 0.0001, |
| "loss": 2.5145, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.44632400780741704, |
| "grad_norm": 0.24801641702651978, |
| "learning_rate": 0.0001, |
| "loss": 2.2954, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.44697462589459985, |
| "grad_norm": 0.2115658074617386, |
| "learning_rate": 0.0001, |
| "loss": 2.1956, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.4476252439817827, |
| "grad_norm": 0.3155558109283447, |
| "learning_rate": 0.0001, |
| "loss": 2.7396, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4482758620689655, |
| "grad_norm": 0.22418133914470673, |
| "learning_rate": 0.0001, |
| "loss": 2.1066, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.44892648015614833, |
| "grad_norm": 0.2707614600658417, |
| "learning_rate": 0.0001, |
| "loss": 2.3353, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.44957709824333114, |
| "grad_norm": 0.22262880206108093, |
| "learning_rate": 0.0001, |
| "loss": 2.2143, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.450227716330514, |
| "grad_norm": 0.25256767868995667, |
| "learning_rate": 0.0001, |
| "loss": 2.2786, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.4508783344176968, |
| "grad_norm": 0.20360921323299408, |
| "learning_rate": 0.0001, |
| "loss": 2.0059, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4515289525048796, |
| "grad_norm": 0.20573420822620392, |
| "learning_rate": 0.0001, |
| "loss": 2.0884, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.4521795705920625, |
| "grad_norm": 0.31812623143196106, |
| "learning_rate": 0.0001, |
| "loss": 2.5905, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.4528301886792453, |
| "grad_norm": 0.24690969288349152, |
| "learning_rate": 0.0001, |
| "loss": 2.5157, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4534808067664281, |
| "grad_norm": 0.256793737411499, |
| "learning_rate": 0.0001, |
| "loss": 2.1548, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.4541314248536109, |
| "grad_norm": 0.2659960985183716, |
| "learning_rate": 0.0001, |
| "loss": 2.2977, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4547820429407938, |
| "grad_norm": 0.23824195563793182, |
| "learning_rate": 0.0001, |
| "loss": 2.5946, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.4554326610279766, |
| "grad_norm": 0.2580608129501343, |
| "learning_rate": 0.0001, |
| "loss": 2.2608, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4560832791151594, |
| "grad_norm": 0.270622193813324, |
| "learning_rate": 0.0001, |
| "loss": 2.5848, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.4567338972023422, |
| "grad_norm": 0.2170489877462387, |
| "learning_rate": 0.0001, |
| "loss": 2.4315, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.45738451528952506, |
| "grad_norm": 0.20716050267219543, |
| "learning_rate": 0.0001, |
| "loss": 2.1592, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.45803513337670787, |
| "grad_norm": 0.24847671389579773, |
| "learning_rate": 0.0001, |
| "loss": 2.3202, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.4586857514638907, |
| "grad_norm": 0.24049146473407745, |
| "learning_rate": 0.0001, |
| "loss": 2.1968, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.45933636955107354, |
| "grad_norm": 0.2079533487558365, |
| "learning_rate": 0.0001, |
| "loss": 2.2966, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.45998698763825635, |
| "grad_norm": 0.18255428969860077, |
| "learning_rate": 0.0001, |
| "loss": 1.9931, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.46063760572543916, |
| "grad_norm": 0.28015655279159546, |
| "learning_rate": 0.0001, |
| "loss": 2.2605, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.46128822381262197, |
| "grad_norm": 0.27453094720840454, |
| "learning_rate": 0.0001, |
| "loss": 2.2835, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.46193884189980483, |
| "grad_norm": 0.2751506268978119, |
| "learning_rate": 0.0001, |
| "loss": 2.665, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.46258945998698764, |
| "grad_norm": 0.2759210169315338, |
| "learning_rate": 0.0001, |
| "loss": 2.3593, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.46324007807417045, |
| "grad_norm": 0.2902829051017761, |
| "learning_rate": 0.0001, |
| "loss": 2.7421, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.4638906961613533, |
| "grad_norm": 0.24083854258060455, |
| "learning_rate": 0.0001, |
| "loss": 2.4644, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.4645413142485361, |
| "grad_norm": 0.23614934086799622, |
| "learning_rate": 0.0001, |
| "loss": 2.2939, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.4651919323357189, |
| "grad_norm": 0.1972537487745285, |
| "learning_rate": 0.0001, |
| "loss": 1.9391, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.46584255042290174, |
| "grad_norm": 0.2227838933467865, |
| "learning_rate": 0.0001, |
| "loss": 1.9396, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.4664931685100846, |
| "grad_norm": 0.3672918379306793, |
| "learning_rate": 0.0001, |
| "loss": 2.7508, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.4671437865972674, |
| "grad_norm": 0.2712246775627136, |
| "learning_rate": 0.0001, |
| "loss": 2.2838, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.4677944046844502, |
| "grad_norm": 0.2337927669286728, |
| "learning_rate": 0.0001, |
| "loss": 1.9807, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.468445022771633, |
| "grad_norm": 0.2051180601119995, |
| "learning_rate": 0.0001, |
| "loss": 2.0311, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4690956408588159, |
| "grad_norm": 0.1965889185667038, |
| "learning_rate": 0.0001, |
| "loss": 2.1114, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.4697462589459987, |
| "grad_norm": 0.2106337547302246, |
| "learning_rate": 0.0001, |
| "loss": 2.0792, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.4703968770331815, |
| "grad_norm": 0.19918356835842133, |
| "learning_rate": 0.0001, |
| "loss": 2.1323, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.47104749512036437, |
| "grad_norm": 0.20124401152133942, |
| "learning_rate": 0.0001, |
| "loss": 2.0008, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4716981132075472, |
| "grad_norm": 0.2172473967075348, |
| "learning_rate": 0.0001, |
| "loss": 2.3891, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.47234873129473, |
| "grad_norm": 0.2524811029434204, |
| "learning_rate": 0.0001, |
| "loss": 2.3343, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4729993493819128, |
| "grad_norm": 0.22882957756519318, |
| "learning_rate": 0.0001, |
| "loss": 2.6723, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.47364996746909566, |
| "grad_norm": 0.2434161901473999, |
| "learning_rate": 0.0001, |
| "loss": 1.9549, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.47430058555627846, |
| "grad_norm": 0.19140364229679108, |
| "learning_rate": 0.0001, |
| "loss": 2.0468, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.4749512036434613, |
| "grad_norm": 0.22166937589645386, |
| "learning_rate": 0.0001, |
| "loss": 2.3432, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.47560182173064414, |
| "grad_norm": 0.2005748748779297, |
| "learning_rate": 0.0001, |
| "loss": 2.0616, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.47625243981782694, |
| "grad_norm": 0.3115980923175812, |
| "learning_rate": 0.0001, |
| "loss": 2.6153, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.47690305790500975, |
| "grad_norm": 0.27135169506073, |
| "learning_rate": 0.0001, |
| "loss": 2.3225, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.47755367599219256, |
| "grad_norm": 0.20748727023601532, |
| "learning_rate": 0.0001, |
| "loss": 1.834, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.4782042940793754, |
| "grad_norm": 0.4031495153903961, |
| "learning_rate": 0.0001, |
| "loss": 2.8177, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.47885491216655823, |
| "grad_norm": 0.2978368401527405, |
| "learning_rate": 0.0001, |
| "loss": 2.6178, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.47950553025374104, |
| "grad_norm": 0.3466270864009857, |
| "learning_rate": 0.0001, |
| "loss": 2.6031, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.4801561483409239, |
| "grad_norm": 0.20074127614498138, |
| "learning_rate": 0.0001, |
| "loss": 2.247, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4808067664281067, |
| "grad_norm": 0.2393479198217392, |
| "learning_rate": 0.0001, |
| "loss": 2.1265, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.4814573845152895, |
| "grad_norm": 0.27758634090423584, |
| "learning_rate": 0.0001, |
| "loss": 2.5025, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.48210800260247233, |
| "grad_norm": 0.20123820006847382, |
| "learning_rate": 0.0001, |
| "loss": 2.0083, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.4827586206896552, |
| "grad_norm": 0.19012506306171417, |
| "learning_rate": 0.0001, |
| "loss": 2.0212, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.483409238776838, |
| "grad_norm": 0.19451047480106354, |
| "learning_rate": 0.0001, |
| "loss": 2.0295, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4840598568640208, |
| "grad_norm": 0.3339052200317383, |
| "learning_rate": 0.0001, |
| "loss": 2.4813, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4847104749512036, |
| "grad_norm": 0.2646152973175049, |
| "learning_rate": 0.0001, |
| "loss": 2.4302, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4853610930383865, |
| "grad_norm": 0.23590324819087982, |
| "learning_rate": 0.0001, |
| "loss": 2.1723, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.4860117111255693, |
| "grad_norm": 0.28924039006233215, |
| "learning_rate": 0.0001, |
| "loss": 2.8005, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4866623292127521, |
| "grad_norm": 0.21145464479923248, |
| "learning_rate": 0.0001, |
| "loss": 2.3501, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.48731294729993496, |
| "grad_norm": 0.22815656661987305, |
| "learning_rate": 0.0001, |
| "loss": 2.1997, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.48796356538711777, |
| "grad_norm": 0.24325215816497803, |
| "learning_rate": 0.0001, |
| "loss": 2.039, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4886141834743006, |
| "grad_norm": 0.3235335052013397, |
| "learning_rate": 0.0001, |
| "loss": 2.4533, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.4892648015614834, |
| "grad_norm": 0.25513559579849243, |
| "learning_rate": 0.0001, |
| "loss": 2.3779, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.48991541964866625, |
| "grad_norm": 0.2905427813529968, |
| "learning_rate": 0.0001, |
| "loss": 1.9843, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.49056603773584906, |
| "grad_norm": 0.23760183155536652, |
| "learning_rate": 0.0001, |
| "loss": 2.1825, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.49121665582303187, |
| "grad_norm": 0.2170071303844452, |
| "learning_rate": 0.0001, |
| "loss": 1.9877, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.49186727391021473, |
| "grad_norm": 0.2555190920829773, |
| "learning_rate": 0.0001, |
| "loss": 2.457, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.49251789199739754, |
| "grad_norm": 0.2571033835411072, |
| "learning_rate": 0.0001, |
| "loss": 2.1152, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.49316851008458035, |
| "grad_norm": 0.23969238996505737, |
| "learning_rate": 0.0001, |
| "loss": 2.3439, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.49381912817176316, |
| "grad_norm": 0.1900262087583542, |
| "learning_rate": 0.0001, |
| "loss": 1.8999, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.494469746258946, |
| "grad_norm": 0.19621430337429047, |
| "learning_rate": 0.0001, |
| "loss": 2.0658, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4951203643461288, |
| "grad_norm": 0.21956481039524078, |
| "learning_rate": 0.0001, |
| "loss": 2.5427, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.49577098243331164, |
| "grad_norm": 0.22567258775234222, |
| "learning_rate": 0.0001, |
| "loss": 2.2777, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.49642160052049444, |
| "grad_norm": 0.20233570039272308, |
| "learning_rate": 0.0001, |
| "loss": 2.0342, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4970722186076773, |
| "grad_norm": 0.23662947118282318, |
| "learning_rate": 0.0001, |
| "loss": 2.3668, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.4977228366948601, |
| "grad_norm": 0.2625278830528259, |
| "learning_rate": 0.0001, |
| "loss": 2.6536, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.4983734547820429, |
| "grad_norm": 0.23235228657722473, |
| "learning_rate": 0.0001, |
| "loss": 2.1891, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4990240728692258, |
| "grad_norm": 0.19439217448234558, |
| "learning_rate": 0.0001, |
| "loss": 1.9647, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4996746909564086, |
| "grad_norm": 0.19810114800930023, |
| "learning_rate": 0.0001, |
| "loss": 1.9965, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5003253090435914, |
| "grad_norm": 0.2525380253791809, |
| "learning_rate": 0.0001, |
| "loss": 2.2444, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.5009759271307742, |
| "grad_norm": 0.2409314513206482, |
| "learning_rate": 0.0001, |
| "loss": 2.1717, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.501626545217957, |
| "grad_norm": 0.25244686007499695, |
| "learning_rate": 0.0001, |
| "loss": 2.0126, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.5022771633051398, |
| "grad_norm": 0.19767141342163086, |
| "learning_rate": 0.0001, |
| "loss": 2.1384, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.5029277813923227, |
| "grad_norm": 0.39446812868118286, |
| "learning_rate": 0.0001, |
| "loss": 2.8039, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.5035783994795056, |
| "grad_norm": 0.2643390893936157, |
| "learning_rate": 0.0001, |
| "loss": 2.1524, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.5042290175666884, |
| "grad_norm": 0.27606508135795593, |
| "learning_rate": 0.0001, |
| "loss": 2.1802, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5048796356538712, |
| "grad_norm": 0.364106148481369, |
| "learning_rate": 0.0001, |
| "loss": 2.9694, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.505530253741054, |
| "grad_norm": 0.23091645538806915, |
| "learning_rate": 0.0001, |
| "loss": 2.5471, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.5061808718282368, |
| "grad_norm": 0.19318193197250366, |
| "learning_rate": 0.0001, |
| "loss": 2.2082, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5068314899154196, |
| "grad_norm": 0.28997862339019775, |
| "learning_rate": 0.0001, |
| "loss": 2.4399, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5074821080026025, |
| "grad_norm": 0.22487197816371918, |
| "learning_rate": 0.0001, |
| "loss": 2.1946, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5081327260897853, |
| "grad_norm": 0.24430596828460693, |
| "learning_rate": 0.0001, |
| "loss": 2.4456, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5087833441769681, |
| "grad_norm": 0.21677151322364807, |
| "learning_rate": 0.0001, |
| "loss": 2.2082, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.5094339622641509, |
| "grad_norm": 0.47995632886886597, |
| "learning_rate": 0.0001, |
| "loss": 3.1358, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.5100845803513337, |
| "grad_norm": 0.19044414162635803, |
| "learning_rate": 0.0001, |
| "loss": 1.8924, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5107351984385166, |
| "grad_norm": 0.19143608212471008, |
| "learning_rate": 0.0001, |
| "loss": 2.0459, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5113858165256994, |
| "grad_norm": 0.22588413953781128, |
| "learning_rate": 0.0001, |
| "loss": 2.1369, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5120364346128823, |
| "grad_norm": 0.2786167860031128, |
| "learning_rate": 0.0001, |
| "loss": 2.2029, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.5126870527000651, |
| "grad_norm": 0.24471627175807953, |
| "learning_rate": 0.0001, |
| "loss": 2.1248, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.5133376707872479, |
| "grad_norm": 0.17795225977897644, |
| "learning_rate": 0.0001, |
| "loss": 1.7926, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5139882888744307, |
| "grad_norm": 0.2173709124326706, |
| "learning_rate": 0.0001, |
| "loss": 2.0538, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5146389069616135, |
| "grad_norm": 0.2027692049741745, |
| "learning_rate": 0.0001, |
| "loss": 1.8568, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.5152895250487963, |
| "grad_norm": 0.2013595849275589, |
| "learning_rate": 0.0001, |
| "loss": 2.0501, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.5159401431359791, |
| "grad_norm": 0.21996662020683289, |
| "learning_rate": 0.0001, |
| "loss": 2.0374, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.516590761223162, |
| "grad_norm": 0.21435722708702087, |
| "learning_rate": 0.0001, |
| "loss": 2.1907, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.5172413793103449, |
| "grad_norm": 0.21512284874916077, |
| "learning_rate": 0.0001, |
| "loss": 2.315, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5178919973975277, |
| "grad_norm": 0.19432400166988373, |
| "learning_rate": 0.0001, |
| "loss": 2.103, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.5185426154847105, |
| "grad_norm": 0.23112992942333221, |
| "learning_rate": 0.0001, |
| "loss": 2.328, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.5191932335718933, |
| "grad_norm": 0.19719737768173218, |
| "learning_rate": 0.0001, |
| "loss": 1.9569, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.5198438516590761, |
| "grad_norm": 0.2115892618894577, |
| "learning_rate": 0.0001, |
| "loss": 2.2533, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.5204944697462589, |
| "grad_norm": 0.24321842193603516, |
| "learning_rate": 0.0001, |
| "loss": 2.6597, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5211450878334418, |
| "grad_norm": 0.18219350278377533, |
| "learning_rate": 0.0001, |
| "loss": 1.8709, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.5217957059206246, |
| "grad_norm": 0.18715021014213562, |
| "learning_rate": 0.0001, |
| "loss": 2.0021, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.5224463240078074, |
| "grad_norm": 0.25940024852752686, |
| "learning_rate": 0.0001, |
| "loss": 2.3742, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.5230969420949902, |
| "grad_norm": 0.18714728951454163, |
| "learning_rate": 0.0001, |
| "loss": 2.211, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.523747560182173, |
| "grad_norm": 0.20145951211452484, |
| "learning_rate": 0.0001, |
| "loss": 2.0047, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5243981782693559, |
| "grad_norm": 0.18992845714092255, |
| "learning_rate": 0.0001, |
| "loss": 1.8559, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.5250487963565387, |
| "grad_norm": 0.2682324945926666, |
| "learning_rate": 0.0001, |
| "loss": 2.4791, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.5256994144437215, |
| "grad_norm": 0.33034664392471313, |
| "learning_rate": 0.0001, |
| "loss": 2.3089, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5263500325309044, |
| "grad_norm": 0.18838956952095032, |
| "learning_rate": 0.0001, |
| "loss": 1.9462, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5270006506180872, |
| "grad_norm": 0.42872169613838196, |
| "learning_rate": 0.0001, |
| "loss": 2.6874, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.52765126870527, |
| "grad_norm": 0.2108643501996994, |
| "learning_rate": 0.0001, |
| "loss": 2.3627, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5283018867924528, |
| "grad_norm": 0.21745599806308746, |
| "learning_rate": 0.0001, |
| "loss": 2.1204, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5289525048796356, |
| "grad_norm": 0.2577585279941559, |
| "learning_rate": 0.0001, |
| "loss": 1.9746, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.5296031229668184, |
| "grad_norm": 0.372471421957016, |
| "learning_rate": 0.0001, |
| "loss": 2.688, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5302537410540012, |
| "grad_norm": 0.2425181120634079, |
| "learning_rate": 0.0001, |
| "loss": 2.1377, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.5309043591411842, |
| "grad_norm": 0.2638307511806488, |
| "learning_rate": 0.0001, |
| "loss": 2.1088, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.531554977228367, |
| "grad_norm": 0.2356933355331421, |
| "learning_rate": 0.0001, |
| "loss": 2.2291, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5322055953155498, |
| "grad_norm": 0.23714864253997803, |
| "learning_rate": 0.0001, |
| "loss": 2.0929, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5328562134027326, |
| "grad_norm": 0.19541950523853302, |
| "learning_rate": 0.0001, |
| "loss": 2.0883, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.5335068314899154, |
| "grad_norm": 0.3091617822647095, |
| "learning_rate": 0.0001, |
| "loss": 3.0127, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5341574495770982, |
| "grad_norm": 0.2592740058898926, |
| "learning_rate": 0.0001, |
| "loss": 1.8307, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.534808067664281, |
| "grad_norm": 0.22505807876586914, |
| "learning_rate": 0.0001, |
| "loss": 2.462, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5354586857514639, |
| "grad_norm": 0.22032824158668518, |
| "learning_rate": 0.0001, |
| "loss": 2.2718, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5361093038386467, |
| "grad_norm": 0.2457459270954132, |
| "learning_rate": 0.0001, |
| "loss": 2.4213, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5367599219258296, |
| "grad_norm": 0.24181683361530304, |
| "learning_rate": 0.0001, |
| "loss": 1.9347, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.5374105400130124, |
| "grad_norm": 0.29988738894462585, |
| "learning_rate": 0.0001, |
| "loss": 2.7697, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5380611581001952, |
| "grad_norm": 0.24946388602256775, |
| "learning_rate": 0.0001, |
| "loss": 2.2117, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.538711776187378, |
| "grad_norm": 0.20339331030845642, |
| "learning_rate": 0.0001, |
| "loss": 1.9936, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.5393623942745608, |
| "grad_norm": 0.22250457108020782, |
| "learning_rate": 0.0001, |
| "loss": 2.0785, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5400130123617437, |
| "grad_norm": 0.1869298666715622, |
| "learning_rate": 0.0001, |
| "loss": 2.0406, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5406636304489265, |
| "grad_norm": 0.1873755156993866, |
| "learning_rate": 0.0001, |
| "loss": 1.9126, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.5413142485361093, |
| "grad_norm": 0.3135535418987274, |
| "learning_rate": 0.0001, |
| "loss": 2.2881, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5419648666232921, |
| "grad_norm": 0.20596185326576233, |
| "learning_rate": 0.0001, |
| "loss": 2.0682, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5426154847104749, |
| "grad_norm": 0.25786712765693665, |
| "learning_rate": 0.0001, |
| "loss": 2.0591, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5432661027976577, |
| "grad_norm": 0.2592066824436188, |
| "learning_rate": 0.0001, |
| "loss": 2.052, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5439167208848406, |
| "grad_norm": 0.20738951861858368, |
| "learning_rate": 0.0001, |
| "loss": 1.9726, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5445673389720235, |
| "grad_norm": 0.21384763717651367, |
| "learning_rate": 0.0001, |
| "loss": 2.1897, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5452179570592063, |
| "grad_norm": 0.22050943970680237, |
| "learning_rate": 0.0001, |
| "loss": 2.3597, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.5458685751463891, |
| "grad_norm": 0.1996280699968338, |
| "learning_rate": 0.0001, |
| "loss": 2.0492, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.5465191932335719, |
| "grad_norm": 0.2430533468723297, |
| "learning_rate": 0.0001, |
| "loss": 2.2774, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5471698113207547, |
| "grad_norm": 0.22777177393436432, |
| "learning_rate": 0.0001, |
| "loss": 2.0779, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.5478204294079375, |
| "grad_norm": 0.22464539110660553, |
| "learning_rate": 0.0001, |
| "loss": 2.3316, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5484710474951203, |
| "grad_norm": 0.17759400606155396, |
| "learning_rate": 0.0001, |
| "loss": 1.8407, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5491216655823032, |
| "grad_norm": 0.22264355421066284, |
| "learning_rate": 0.0001, |
| "loss": 2.2869, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.549772283669486, |
| "grad_norm": 0.20819737017154694, |
| "learning_rate": 0.0001, |
| "loss": 2.1209, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5504229017566689, |
| "grad_norm": 0.2194463461637497, |
| "learning_rate": 0.0001, |
| "loss": 2.1457, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5510735198438517, |
| "grad_norm": 0.19314661622047424, |
| "learning_rate": 0.0001, |
| "loss": 2.1063, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 0.186354860663414, |
| "learning_rate": 0.0001, |
| "loss": 2.0833, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5523747560182173, |
| "grad_norm": 0.1862732619047165, |
| "learning_rate": 0.0001, |
| "loss": 1.9441, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5530253741054001, |
| "grad_norm": 0.24664181470870972, |
| "learning_rate": 0.0001, |
| "loss": 2.3277, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5536759921925829, |
| "grad_norm": 0.20182165503501892, |
| "learning_rate": 0.0001, |
| "loss": 2.1902, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5543266102797658, |
| "grad_norm": 0.2108999788761139, |
| "learning_rate": 0.0001, |
| "loss": 2.0826, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5549772283669486, |
| "grad_norm": 0.25388890504837036, |
| "learning_rate": 0.0001, |
| "loss": 2.5149, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5556278464541314, |
| "grad_norm": 0.2074718177318573, |
| "learning_rate": 0.0001, |
| "loss": 1.9135, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5562784645413142, |
| "grad_norm": 0.1992723047733307, |
| "learning_rate": 0.0001, |
| "loss": 2.186, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.556929082628497, |
| "grad_norm": 0.18721085786819458, |
| "learning_rate": 0.0001, |
| "loss": 1.9453, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5575797007156799, |
| "grad_norm": 0.21606992185115814, |
| "learning_rate": 0.0001, |
| "loss": 2.1703, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.5582303188028627, |
| "grad_norm": 0.2854723334312439, |
| "learning_rate": 0.0001, |
| "loss": 2.9538, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5588809368900456, |
| "grad_norm": 0.21503040194511414, |
| "learning_rate": 0.0001, |
| "loss": 2.0194, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5595315549772284, |
| "grad_norm": 0.2690679430961609, |
| "learning_rate": 0.0001, |
| "loss": 2.1562, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5601821730644112, |
| "grad_norm": 0.2811613976955414, |
| "learning_rate": 0.0001, |
| "loss": 2.2475, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.560832791151594, |
| "grad_norm": 0.2551681697368622, |
| "learning_rate": 0.0001, |
| "loss": 2.5585, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5614834092387768, |
| "grad_norm": 0.21423856914043427, |
| "learning_rate": 0.0001, |
| "loss": 2.1194, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5621340273259596, |
| "grad_norm": 0.22121264040470123, |
| "learning_rate": 0.0001, |
| "loss": 1.9257, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5627846454131424, |
| "grad_norm": 0.38684332370758057, |
| "learning_rate": 0.0001, |
| "loss": 2.5203, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.5634352635003254, |
| "grad_norm": 0.20299634337425232, |
| "learning_rate": 0.0001, |
| "loss": 2.0868, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5640858815875082, |
| "grad_norm": 0.33485493063926697, |
| "learning_rate": 0.0001, |
| "loss": 2.457, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.564736499674691, |
| "grad_norm": 0.23778866231441498, |
| "learning_rate": 0.0001, |
| "loss": 1.9863, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5653871177618738, |
| "grad_norm": 0.18562458455562592, |
| "learning_rate": 0.0001, |
| "loss": 1.915, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5660377358490566, |
| "grad_norm": 0.3780176341533661, |
| "learning_rate": 0.0001, |
| "loss": 2.5518, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5666883539362394, |
| "grad_norm": 0.1924014538526535, |
| "learning_rate": 0.0001, |
| "loss": 2.0665, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5673389720234222, |
| "grad_norm": 0.19788160920143127, |
| "learning_rate": 0.0001, |
| "loss": 1.9408, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5679895901106051, |
| "grad_norm": 0.2435147911310196, |
| "learning_rate": 0.0001, |
| "loss": 2.3716, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5686402081977879, |
| "grad_norm": 0.2023211270570755, |
| "learning_rate": 0.0001, |
| "loss": 2.2786, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5692908262849707, |
| "grad_norm": 0.29936715960502625, |
| "learning_rate": 0.0001, |
| "loss": 2.6689, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5699414443721535, |
| "grad_norm": 0.18846483528614044, |
| "learning_rate": 0.0001, |
| "loss": 1.9436, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5705920624593364, |
| "grad_norm": 0.44592785835266113, |
| "learning_rate": 0.0001, |
| "loss": 2.8648, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5712426805465192, |
| "grad_norm": 0.221640944480896, |
| "learning_rate": 0.0001, |
| "loss": 2.1613, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.571893298633702, |
| "grad_norm": 0.22345726191997528, |
| "learning_rate": 0.0001, |
| "loss": 2.076, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5725439167208849, |
| "grad_norm": 0.20094214379787445, |
| "learning_rate": 0.0001, |
| "loss": 2.0474, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5731945348080677, |
| "grad_norm": 0.1997043937444687, |
| "learning_rate": 0.0001, |
| "loss": 1.9812, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5738451528952505, |
| "grad_norm": 0.3758605420589447, |
| "learning_rate": 0.0001, |
| "loss": 2.8357, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5744957709824333, |
| "grad_norm": 0.2940578758716583, |
| "learning_rate": 0.0001, |
| "loss": 2.4955, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5751463890696161, |
| "grad_norm": 0.2434762865304947, |
| "learning_rate": 0.0001, |
| "loss": 2.0011, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5757970071567989, |
| "grad_norm": 0.24335308372974396, |
| "learning_rate": 0.0001, |
| "loss": 2.5458, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.5764476252439817, |
| "grad_norm": 0.2063351422548294, |
| "learning_rate": 0.0001, |
| "loss": 1.9801, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5770982433311646, |
| "grad_norm": 0.35102301836013794, |
| "learning_rate": 0.0001, |
| "loss": 2.5647, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5777488614183475, |
| "grad_norm": 0.22332875430583954, |
| "learning_rate": 0.0001, |
| "loss": 2.0542, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5783994795055303, |
| "grad_norm": 0.2073124796152115, |
| "learning_rate": 0.0001, |
| "loss": 1.9348, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.5790500975927131, |
| "grad_norm": 0.21079733967781067, |
| "learning_rate": 0.0001, |
| "loss": 1.9829, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5797007156798959, |
| "grad_norm": 0.2842913866043091, |
| "learning_rate": 0.0001, |
| "loss": 2.7215, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5803513337670787, |
| "grad_norm": 0.2807595133781433, |
| "learning_rate": 0.0001, |
| "loss": 2.1827, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5810019518542615, |
| "grad_norm": 0.24955599009990692, |
| "learning_rate": 0.0001, |
| "loss": 2.6246, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5816525699414443, |
| "grad_norm": 0.23281241953372955, |
| "learning_rate": 0.0001, |
| "loss": 2.3944, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5823031880286272, |
| "grad_norm": 0.2617682218551636, |
| "learning_rate": 0.0001, |
| "loss": 2.6147, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.58295380611581, |
| "grad_norm": 0.1915360391139984, |
| "learning_rate": 0.0001, |
| "loss": 2.0095, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5836044242029929, |
| "grad_norm": 0.20270249247550964, |
| "learning_rate": 0.0001, |
| "loss": 1.8983, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5842550422901757, |
| "grad_norm": 0.21804624795913696, |
| "learning_rate": 0.0001, |
| "loss": 2.0425, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5849056603773585, |
| "grad_norm": 0.25326576828956604, |
| "learning_rate": 0.0001, |
| "loss": 2.4875, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.5855562784645413, |
| "grad_norm": 0.21714434027671814, |
| "learning_rate": 0.0001, |
| "loss": 2.269, |
| "step": 900 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1537, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3576622208712704e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|