| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1537, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006506180871828237, | |
| "grad_norm": 3.778571605682373, | |
| "learning_rate": 0.0001, | |
| "loss": 4.706, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0013012361743656475, | |
| "grad_norm": 0.7331739068031311, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6402, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.001951854261548471, | |
| "grad_norm": 0.5679969191551208, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5315, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.002602472348731295, | |
| "grad_norm": 0.6543067693710327, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5226, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0032530904359141183, | |
| "grad_norm": 0.42487671971321106, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1375, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.003903708523096942, | |
| "grad_norm": 0.48795655369758606, | |
| "learning_rate": 0.0001, | |
| "loss": 2.253, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004554326610279766, | |
| "grad_norm": 0.6054234504699707, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3411, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.00520494469746259, | |
| "grad_norm": 0.3039970397949219, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1293, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005855562784645413, | |
| "grad_norm": 0.6592361330986023, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1615, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.006506180871828237, | |
| "grad_norm": 0.4017999470233917, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5068, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0071567989590110605, | |
| "grad_norm": 0.31507641077041626, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1894, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.007807417046193884, | |
| "grad_norm": 0.33226895332336426, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2006, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008458035133376708, | |
| "grad_norm": 0.2632739841938019, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0998, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.009108653220559532, | |
| "grad_norm": 0.2794795036315918, | |
| "learning_rate": 0.0001, | |
| "loss": 2.113, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.009759271307742356, | |
| "grad_norm": 0.29168492555618286, | |
| "learning_rate": 0.0001, | |
| "loss": 2.354, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01040988939492518, | |
| "grad_norm": 0.2537970244884491, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2939, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.011060507482108002, | |
| "grad_norm": 0.5140053033828735, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6237, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.011711125569290826, | |
| "grad_norm": 0.3093675971031189, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3502, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.01236174365647365, | |
| "grad_norm": 0.29241421818733215, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5365, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.013012361743656473, | |
| "grad_norm": 0.3164322078227997, | |
| "learning_rate": 0.0001, | |
| "loss": 2.396, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013662979830839297, | |
| "grad_norm": 0.24512743949890137, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2759, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.014313597918022121, | |
| "grad_norm": 0.24328342080116272, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2103, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.014964216005204945, | |
| "grad_norm": 0.2563220262527466, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4836, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.015614834092387769, | |
| "grad_norm": 0.33601588010787964, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4446, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01626545217957059, | |
| "grad_norm": 0.28699007630348206, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8504, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.016916070266753416, | |
| "grad_norm": 0.3181653618812561, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3042, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01756668835393624, | |
| "grad_norm": 0.2349390834569931, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1024, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.018217306441119064, | |
| "grad_norm": 0.2751820981502533, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2646, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.018867924528301886, | |
| "grad_norm": 0.25547271966934204, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1928, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.01951854261548471, | |
| "grad_norm": 0.283507764339447, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3073, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.020169160702667534, | |
| "grad_norm": 0.3354213237762451, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6273, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.02081977878985036, | |
| "grad_norm": 0.40484553575515747, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4919, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.02147039687703318, | |
| "grad_norm": 0.34319421648979187, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8381, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.022121014964216004, | |
| "grad_norm": 0.32958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3062, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02277163305139883, | |
| "grad_norm": 0.4503105878829956, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4647, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02342225113858165, | |
| "grad_norm": 0.5084238052368164, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0047, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.024072869225764477, | |
| "grad_norm": 0.5192400813102722, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2899, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.0247234873129473, | |
| "grad_norm": 0.4197874665260315, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4057, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.025374105400130124, | |
| "grad_norm": 0.5170285105705261, | |
| "learning_rate": 0.0001, | |
| "loss": 3.2918, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.026024723487312947, | |
| "grad_norm": 0.2491147667169571, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1957, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.026675341574495772, | |
| "grad_norm": 0.6597635746002197, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7474, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.027325959661678594, | |
| "grad_norm": 0.40205034613609314, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4561, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.02797657774886142, | |
| "grad_norm": 0.27388331294059753, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0477, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.028627195836044242, | |
| "grad_norm": 0.9163908958435059, | |
| "learning_rate": 0.0001, | |
| "loss": 3.334, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.029277813923227064, | |
| "grad_norm": 0.2747696042060852, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1604, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.02992843201040989, | |
| "grad_norm": 0.36308085918426514, | |
| "learning_rate": 0.0001, | |
| "loss": 2.693, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.03057905009759271, | |
| "grad_norm": 0.6159886121749878, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5515, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.031229668184775537, | |
| "grad_norm": 0.4801373779773712, | |
| "learning_rate": 0.0001, | |
| "loss": 2.809, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.03188028627195836, | |
| "grad_norm": 0.32580915093421936, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5236, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.03253090435914118, | |
| "grad_norm": 0.3028671443462372, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2685, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03318152244632401, | |
| "grad_norm": 0.5660931468009949, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2564, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.03383214053350683, | |
| "grad_norm": 0.24634602665901184, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1355, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.034482758620689655, | |
| "grad_norm": 0.24830913543701172, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0425, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.03513337670787248, | |
| "grad_norm": 0.23614570498466492, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1975, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.035783994795055306, | |
| "grad_norm": 0.2624325156211853, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3071, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03643461288223813, | |
| "grad_norm": 0.3967755436897278, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6088, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03708523096942095, | |
| "grad_norm": 0.22147373855113983, | |
| "learning_rate": 0.0001, | |
| "loss": 2.003, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.03773584905660377, | |
| "grad_norm": 0.47795867919921875, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1473, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.038386467143786594, | |
| "grad_norm": 0.43953707814216614, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6595, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.03903708523096942, | |
| "grad_norm": 0.29031845927238464, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3173, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.039687703318152245, | |
| "grad_norm": 0.2491024285554886, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0575, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.04033832140533507, | |
| "grad_norm": 0.3025687634944916, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0965, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.04098893949251789, | |
| "grad_norm": 0.26097819209098816, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2583, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.04163955757970072, | |
| "grad_norm": 0.2413238286972046, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2441, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.04229017566688354, | |
| "grad_norm": 0.2332315295934677, | |
| "learning_rate": 0.0001, | |
| "loss": 2.185, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.04294079375406636, | |
| "grad_norm": 0.4037252366542816, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3875, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.043591411841249185, | |
| "grad_norm": 0.34149354696273804, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3835, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.04424202992843201, | |
| "grad_norm": 0.23793481290340424, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3521, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.044892648015614836, | |
| "grad_norm": 0.24252744019031525, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0984, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.04554326610279766, | |
| "grad_norm": 0.2870447635650635, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5408, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04619388418998048, | |
| "grad_norm": 0.5050077438354492, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7091, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0468445022771633, | |
| "grad_norm": 0.2391565591096878, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1601, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04749512036434613, | |
| "grad_norm": 0.20647507905960083, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9582, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.048145738451528954, | |
| "grad_norm": 0.26072338223457336, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3577, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.048796356538711776, | |
| "grad_norm": 0.28378504514694214, | |
| "learning_rate": 0.0001, | |
| "loss": 2.349, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0494469746258946, | |
| "grad_norm": 0.2536943256855011, | |
| "learning_rate": 0.0001, | |
| "loss": 2.375, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.05009759271307743, | |
| "grad_norm": 0.29276445508003235, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5003, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.05074821080026025, | |
| "grad_norm": 0.2649310231208801, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3247, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.05139882888744307, | |
| "grad_norm": 0.38125383853912354, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5405, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.05204944697462589, | |
| "grad_norm": 0.40980008244514465, | |
| "learning_rate": 0.0001, | |
| "loss": 2.212, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.052700065061808715, | |
| "grad_norm": 0.5363492965698242, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6499, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.053350683148991544, | |
| "grad_norm": 0.34647300839424133, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6302, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.054001301236174366, | |
| "grad_norm": 0.27607980370521545, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1819, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.05465191932335719, | |
| "grad_norm": 0.27654680609703064, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1763, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.05530253741054001, | |
| "grad_norm": 0.24596217274665833, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2585, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05595315549772284, | |
| "grad_norm": 0.24279890954494476, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4247, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05660377358490566, | |
| "grad_norm": 0.2918747365474701, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3986, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.057254391672088484, | |
| "grad_norm": 0.26778745651245117, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3592, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.057905009759271306, | |
| "grad_norm": 0.39637815952301025, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8006, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.05855562784645413, | |
| "grad_norm": 0.2676962614059448, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2384, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05920624593363696, | |
| "grad_norm": 0.3044937252998352, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7762, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.05985686402081978, | |
| "grad_norm": 0.23922136425971985, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0873, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0605074821080026, | |
| "grad_norm": 0.25385046005249023, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2708, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.06115810019518542, | |
| "grad_norm": 0.378401517868042, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0583, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.06180871828236825, | |
| "grad_norm": 0.37193092703819275, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3632, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.062459336369551074, | |
| "grad_norm": 0.3757643699645996, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4071, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0631099544567339, | |
| "grad_norm": 0.272833913564682, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3989, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.06376057254391672, | |
| "grad_norm": 0.26533326506614685, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1716, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.06441119063109954, | |
| "grad_norm": 0.5787199139595032, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9445, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.06506180871828236, | |
| "grad_norm": 0.29046157002449036, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3325, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06571242680546518, | |
| "grad_norm": 0.531452476978302, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7445, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.06636304489264802, | |
| "grad_norm": 0.3969165086746216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7126, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.06701366297983084, | |
| "grad_norm": 0.24183356761932373, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9971, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.06766428106701367, | |
| "grad_norm": 0.3268399238586426, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1055, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06831489915419649, | |
| "grad_norm": 0.2625877559185028, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9946, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06896551724137931, | |
| "grad_norm": 0.2720443308353424, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0764, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06961613532856213, | |
| "grad_norm": 0.20969334244728088, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8687, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.07026675341574495, | |
| "grad_norm": 0.26211223006248474, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2042, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.07091737150292778, | |
| "grad_norm": 0.27889683842658997, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3146, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.07156798959011061, | |
| "grad_norm": 0.2657179832458496, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1021, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07221860767729343, | |
| "grad_norm": 0.26620885729789734, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3488, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.07286922576447626, | |
| "grad_norm": 0.4223373830318451, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5289, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.07351984385165908, | |
| "grad_norm": 0.35398781299591064, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5702, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.0741704619388419, | |
| "grad_norm": 0.23328129947185516, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1292, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.07482108002602472, | |
| "grad_norm": 0.33508536219596863, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2049, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.07547169811320754, | |
| "grad_norm": 0.2646953761577606, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3445, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.07612231620039037, | |
| "grad_norm": 0.27866706252098083, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2472, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.07677293428757319, | |
| "grad_norm": 0.35688602924346924, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5045, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.07742355237475602, | |
| "grad_norm": 0.24262933433055878, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4565, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.07807417046193885, | |
| "grad_norm": 0.44757333397865295, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1619, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07872478854912167, | |
| "grad_norm": 0.3279111385345459, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3996, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.07937540663630449, | |
| "grad_norm": 0.25862693786621094, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3214, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.08002602472348731, | |
| "grad_norm": 0.30093592405319214, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6446, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.08067664281067013, | |
| "grad_norm": 0.25440871715545654, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1181, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.08132726089785296, | |
| "grad_norm": 0.19935627281665802, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0904, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08197787898503578, | |
| "grad_norm": 0.27385473251342773, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0829, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.0826284970722186, | |
| "grad_norm": 0.24417711794376373, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0019, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.08327911515940144, | |
| "grad_norm": 0.27386653423309326, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2743, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.08392973324658426, | |
| "grad_norm": 0.22413575649261475, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1584, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.08458035133376708, | |
| "grad_norm": 0.27748343348503113, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1428, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0852309694209499, | |
| "grad_norm": 0.18890976905822754, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9474, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.08588158750813273, | |
| "grad_norm": 0.3067719340324402, | |
| "learning_rate": 0.0001, | |
| "loss": 2.287, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.08653220559531555, | |
| "grad_norm": 0.35126858949661255, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5086, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.08718282368249837, | |
| "grad_norm": 0.19619591534137726, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0132, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.08783344176968119, | |
| "grad_norm": 0.360569566488266, | |
| "learning_rate": 0.0001, | |
| "loss": 2.607, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.08848405985686401, | |
| "grad_norm": 0.22566738724708557, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0942, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.08913467794404685, | |
| "grad_norm": 0.27346086502075195, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3139, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.08978529603122967, | |
| "grad_norm": 0.2500152289867401, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0815, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.0904359141184125, | |
| "grad_norm": 0.22101153433322906, | |
| "learning_rate": 0.0001, | |
| "loss": 2.374, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.09108653220559532, | |
| "grad_norm": 0.2173723727464676, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0084, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09173715029277814, | |
| "grad_norm": 0.28956499695777893, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6283, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.09238776837996096, | |
| "grad_norm": 0.27032795548439026, | |
| "learning_rate": 0.0001, | |
| "loss": 2.142, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.09303838646714378, | |
| "grad_norm": 0.24320480227470398, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1402, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.0936890045543266, | |
| "grad_norm": 0.3127799332141876, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6671, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.09433962264150944, | |
| "grad_norm": 0.30706024169921875, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3026, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09499024072869226, | |
| "grad_norm": 0.2378646731376648, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0422, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.09564085881587508, | |
| "grad_norm": 0.24755406379699707, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2574, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.09629147690305791, | |
| "grad_norm": 0.34464696049690247, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2817, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.09694209499024073, | |
| "grad_norm": 0.30485469102859497, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7303, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.09759271307742355, | |
| "grad_norm": 0.1860698163509369, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8582, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09824333116460637, | |
| "grad_norm": 0.23853841423988342, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1378, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.0988939492517892, | |
| "grad_norm": 0.20248261094093323, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1888, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.09954456733897202, | |
| "grad_norm": 0.3582792282104492, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6726, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.10019518542615485, | |
| "grad_norm": 0.2576686441898346, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4494, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.10084580351333768, | |
| "grad_norm": 0.306029349565506, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2273, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1014964216005205, | |
| "grad_norm": 0.31375500559806824, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2474, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.10214703968770332, | |
| "grad_norm": 0.253250390291214, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0142, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.10279765777488614, | |
| "grad_norm": 0.3098273277282715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2516, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.10344827586206896, | |
| "grad_norm": 0.3239591717720032, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2432, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.10409889394925179, | |
| "grad_norm": 0.24929773807525635, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2495, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10474951203643461, | |
| "grad_norm": 0.3203783929347992, | |
| "learning_rate": 0.0001, | |
| "loss": 2.68, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.10540013012361743, | |
| "grad_norm": 0.38844674825668335, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7457, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.10605074821080027, | |
| "grad_norm": 0.21753644943237305, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1284, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.10670136629798309, | |
| "grad_norm": 0.20610418915748596, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8377, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.10735198438516591, | |
| "grad_norm": 0.3555772304534912, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3599, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.10800260247234873, | |
| "grad_norm": 0.3971005380153656, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2771, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.10865322055953155, | |
| "grad_norm": 0.28628769516944885, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2438, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.10930383864671438, | |
| "grad_norm": 0.38728833198547363, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4103, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1099544567338972, | |
| "grad_norm": 0.26340189576148987, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6832, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.11060507482108002, | |
| "grad_norm": 0.20119386911392212, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9622, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11125569290826284, | |
| "grad_norm": 0.2929171621799469, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2762, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.11190631099544568, | |
| "grad_norm": 0.422146201133728, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4015, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1125569290826285, | |
| "grad_norm": 0.29050537943840027, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4399, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.11320754716981132, | |
| "grad_norm": 0.2646816074848175, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3058, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.11385816525699415, | |
| "grad_norm": 0.2643061578273773, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1892, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.11450878334417697, | |
| "grad_norm": 0.5878323316574097, | |
| "learning_rate": 0.0001, | |
| "loss": 3.2198, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.11515940143135979, | |
| "grad_norm": 0.36881884932518005, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4112, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.11581001951854261, | |
| "grad_norm": 0.25198304653167725, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1667, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.11646063760572543, | |
| "grad_norm": 0.34164664149284363, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6248, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.11711125569290826, | |
| "grad_norm": 0.41471973061561584, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5616, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.11776187378009109, | |
| "grad_norm": 0.26372480392456055, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2904, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.11841249186727391, | |
| "grad_norm": 0.2271176278591156, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0312, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.11906310995445674, | |
| "grad_norm": 0.2106996774673462, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9661, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.11971372804163956, | |
| "grad_norm": 0.22870291769504547, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9052, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.12036434612882238, | |
| "grad_norm": 0.41253864765167236, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3747, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1210149642160052, | |
| "grad_norm": 0.3258817791938782, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5401, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.12166558230318802, | |
| "grad_norm": 0.3461870551109314, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8027, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.12231620039037085, | |
| "grad_norm": 0.3704046607017517, | |
| "learning_rate": 0.0001, | |
| "loss": 2.799, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.12296681847755368, | |
| "grad_norm": 0.30265969038009644, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4287, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.1236174365647365, | |
| "grad_norm": 0.4215582013130188, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6857, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12426805465191933, | |
| "grad_norm": 0.3003520965576172, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4155, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.12491867273910215, | |
| "grad_norm": 0.412749320268631, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6352, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.12556929082628496, | |
| "grad_norm": 0.2772350013256073, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2452, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.1262199089134678, | |
| "grad_norm": 0.21457143127918243, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0172, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.12687052700065063, | |
| "grad_norm": 0.40995845198631287, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6218, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.12752114508783344, | |
| "grad_norm": 0.2253209501504898, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2319, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.12817176317501627, | |
| "grad_norm": 0.36564287543296814, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4585, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.12882238126219908, | |
| "grad_norm": 0.41084784269332886, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6326, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.12947299934938192, | |
| "grad_norm": 0.36012157797813416, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0168, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.13012361743656473, | |
| "grad_norm": 0.5138425230979919, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3377, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13077423552374756, | |
| "grad_norm": 0.2799031436443329, | |
| "learning_rate": 0.0001, | |
| "loss": 2.532, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.13142485361093037, | |
| "grad_norm": 0.3078779876232147, | |
| "learning_rate": 0.0001, | |
| "loss": 2.044, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1320754716981132, | |
| "grad_norm": 0.31270912289619446, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8576, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.13272608978529604, | |
| "grad_norm": 0.23117204010486603, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1908, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.13337670787247885, | |
| "grad_norm": 0.2531285285949707, | |
| "learning_rate": 0.0001, | |
| "loss": 2.143, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.1340273259596617, | |
| "grad_norm": 0.28053218126296997, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6902, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.1346779440468445, | |
| "grad_norm": 0.2600589692592621, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0355, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.13532856213402733, | |
| "grad_norm": 0.2725912630558014, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3949, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.13597918022121014, | |
| "grad_norm": 0.6166338324546814, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8146, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.13662979830839297, | |
| "grad_norm": 0.4028575122356415, | |
| "learning_rate": 0.0001, | |
| "loss": 2.888, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1372804163955758, | |
| "grad_norm": 0.23181548714637756, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1406, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.13793103448275862, | |
| "grad_norm": 0.24338063597679138, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1564, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.13858165256994145, | |
| "grad_norm": 0.233146533370018, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1695, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.13923227065712426, | |
| "grad_norm": 0.21236726641654968, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9272, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.1398828887443071, | |
| "grad_norm": 0.25471317768096924, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3447, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1405335068314899, | |
| "grad_norm": 0.35532835125923157, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4328, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.14118412491867274, | |
| "grad_norm": 0.32900944352149963, | |
| "learning_rate": 0.0001, | |
| "loss": 2.385, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.14183474300585555, | |
| "grad_norm": 0.45404863357543945, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8053, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.1424853610930384, | |
| "grad_norm": 0.33968400955200195, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4524, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.14313597918022122, | |
| "grad_norm": 0.3250170946121216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6173, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.14378659726740403, | |
| "grad_norm": 0.34765559434890747, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8468, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.14443721535458687, | |
| "grad_norm": 0.2274564653635025, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1305, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.14508783344176968, | |
| "grad_norm": 0.42719507217407227, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3682, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.1457384515289525, | |
| "grad_norm": 0.2848481833934784, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0923, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.14638906961613532, | |
| "grad_norm": 0.266548752784729, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0393, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.14703968770331816, | |
| "grad_norm": 0.24076099693775177, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2674, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.14769030579050096, | |
| "grad_norm": 0.23347622156143188, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9455, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.1483409238776838, | |
| "grad_norm": 0.3925648033618927, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7117, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.14899154196486664, | |
| "grad_norm": 0.27654924988746643, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1306, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.14964216005204944, | |
| "grad_norm": 0.2853853702545166, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4369, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.15029277813923228, | |
| "grad_norm": 0.4509859085083008, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6047, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.1509433962264151, | |
| "grad_norm": 0.2515909671783447, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2065, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.15159401431359792, | |
| "grad_norm": 0.5977367162704468, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7133, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.15224463240078073, | |
| "grad_norm": 0.30381399393081665, | |
| "learning_rate": 0.0001, | |
| "loss": 2.343, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.15289525048796357, | |
| "grad_norm": 0.27204832434654236, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2908, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.15354586857514638, | |
| "grad_norm": 0.6246710419654846, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7862, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.1541964866623292, | |
| "grad_norm": 0.4803178012371063, | |
| "learning_rate": 0.0001, | |
| "loss": 3.4388, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.15484710474951205, | |
| "grad_norm": 0.3038940727710724, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7409, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.15549772283669486, | |
| "grad_norm": 0.2494591474533081, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2601, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.1561483409238777, | |
| "grad_norm": 0.23808616399765015, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1319, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1567989590110605, | |
| "grad_norm": 0.3111306130886078, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7414, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.15744957709824334, | |
| "grad_norm": 0.22197599709033966, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1346, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.15810019518542615, | |
| "grad_norm": 0.2681500315666199, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3779, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.15875081327260898, | |
| "grad_norm": 0.2612643241882324, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5743, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.1594014313597918, | |
| "grad_norm": 0.201397106051445, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0312, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.16005204944697463, | |
| "grad_norm": 0.25662410259246826, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5085, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.16070266753415746, | |
| "grad_norm": 0.21460294723510742, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1099, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.16135328562134027, | |
| "grad_norm": 0.19971312582492828, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1024, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1620039037085231, | |
| "grad_norm": 0.1986059844493866, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9306, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.16265452179570591, | |
| "grad_norm": 0.21961884200572968, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1218, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16330513988288875, | |
| "grad_norm": 0.20071017742156982, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0581, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.16395575797007156, | |
| "grad_norm": 0.32734909653663635, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6229, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1646063760572544, | |
| "grad_norm": 0.21822451055049896, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9954, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.1652569941444372, | |
| "grad_norm": 0.3013177216053009, | |
| "learning_rate": 0.0001, | |
| "loss": 2.454, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.16590761223162004, | |
| "grad_norm": 0.31199347972869873, | |
| "learning_rate": 0.0001, | |
| "loss": 2.815, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.16655823031880287, | |
| "grad_norm": 0.2255464345216751, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0232, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.16720884840598568, | |
| "grad_norm": 0.21208804845809937, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9663, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.16785946649316852, | |
| "grad_norm": 0.2432132512331009, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4189, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.16851008458035133, | |
| "grad_norm": 0.21116623282432556, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0761, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.16916070266753416, | |
| "grad_norm": 0.18722975254058838, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9537, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.16981132075471697, | |
| "grad_norm": 0.2683362662792206, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4483, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.1704619388418998, | |
| "grad_norm": 0.2739648222923279, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3754, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.17111255692908262, | |
| "grad_norm": 0.1836375594139099, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0103, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.17176317501626545, | |
| "grad_norm": 0.34002602100372314, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2626, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1724137931034483, | |
| "grad_norm": 0.19341516494750977, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9751, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.1730644111906311, | |
| "grad_norm": 0.25080743432044983, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2162, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.17371502927781393, | |
| "grad_norm": 0.2362661212682724, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0226, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.17436564736499674, | |
| "grad_norm": 0.25844064354896545, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3176, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.17501626545217958, | |
| "grad_norm": 0.3904498517513275, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4871, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.17566688353936238, | |
| "grad_norm": 0.22143317759037018, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2073, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.17631750162654522, | |
| "grad_norm": 0.20974211394786835, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1393, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.17696811971372803, | |
| "grad_norm": 0.24463056027889252, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0203, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.17761873780091086, | |
| "grad_norm": 0.23296399414539337, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1096, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.1782693558880937, | |
| "grad_norm": 0.4122619926929474, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1512, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.1789199739752765, | |
| "grad_norm": 0.2744470536708832, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2211, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.17957059206245934, | |
| "grad_norm": 0.21010619401931763, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2203, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.18022121014964215, | |
| "grad_norm": 0.27855056524276733, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2903, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.180871828236825, | |
| "grad_norm": 0.2909989058971405, | |
| "learning_rate": 0.0001, | |
| "loss": 2.237, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.1815224463240078, | |
| "grad_norm": 0.21754448115825653, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0138, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.18217306441119063, | |
| "grad_norm": 0.35209745168685913, | |
| "learning_rate": 0.0001, | |
| "loss": 2.652, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.18282368249837344, | |
| "grad_norm": 0.29994750022888184, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1868, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.18347430058555628, | |
| "grad_norm": 0.2645902633666992, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2925, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1841249186727391, | |
| "grad_norm": 0.3492202162742615, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4176, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.18477553675992192, | |
| "grad_norm": 0.256651371717453, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3414, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.18542615484710476, | |
| "grad_norm": 0.23287786543369293, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5488, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.18607677293428757, | |
| "grad_norm": 0.26059290766716003, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4551, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1867273910214704, | |
| "grad_norm": 0.2482365071773529, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0818, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.1873780091086532, | |
| "grad_norm": 0.23024773597717285, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2592, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.18802862719583605, | |
| "grad_norm": 0.2590011656284332, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4177, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 0.19760870933532715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0731, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1893298633702017, | |
| "grad_norm": 0.20266428589820862, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1221, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.18998048145738453, | |
| "grad_norm": 0.20199884474277496, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0489, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.19063109954456733, | |
| "grad_norm": 0.23876360058784485, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1392, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.19128171763175017, | |
| "grad_norm": 0.23555997014045715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4116, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.19193233571893298, | |
| "grad_norm": 0.5010725259780884, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7444, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.19258295380611581, | |
| "grad_norm": 0.37809622287750244, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2635, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.19323357189329862, | |
| "grad_norm": 0.499888151884079, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1984, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.19388418998048146, | |
| "grad_norm": 0.43810585141181946, | |
| "learning_rate": 0.0001, | |
| "loss": 3.084, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.1945348080676643, | |
| "grad_norm": 0.35633769631385803, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0351, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.1951854261548471, | |
| "grad_norm": 0.3693079650402069, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9525, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19583604424202994, | |
| "grad_norm": 0.36550503969192505, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2469, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.19648666232921275, | |
| "grad_norm": 0.2579827308654785, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3585, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.19713728041639558, | |
| "grad_norm": 0.2603841722011566, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3959, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.1977878985035784, | |
| "grad_norm": 0.33103683590888977, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2197, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.19843851659076123, | |
| "grad_norm": 0.2977697551250458, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2569, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.19908913467794404, | |
| "grad_norm": 0.2085130512714386, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2284, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.19973975276512687, | |
| "grad_norm": 0.409212201833725, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7014, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.2003903708523097, | |
| "grad_norm": 0.2447553277015686, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2826, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.20104098893949252, | |
| "grad_norm": 0.21881726384162903, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8573, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.20169160702667535, | |
| "grad_norm": 0.24484936892986298, | |
| "learning_rate": 0.0001, | |
| "loss": 2.318, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.20234222511385816, | |
| "grad_norm": 0.3251173198223114, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3346, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.202992843201041, | |
| "grad_norm": 0.22313712537288666, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9119, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.2036434612882238, | |
| "grad_norm": 0.3086949288845062, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1809, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.20429407937540664, | |
| "grad_norm": 0.28272122144699097, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3335, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.20494469746258945, | |
| "grad_norm": 0.208637535572052, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1947, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.20559531554977228, | |
| "grad_norm": 0.2913041114807129, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3009, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.20624593363695512, | |
| "grad_norm": 0.2813785970211029, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0133, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.20689655172413793, | |
| "grad_norm": 0.2324337363243103, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0827, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.20754716981132076, | |
| "grad_norm": 0.25195491313934326, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5201, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.20819778789850357, | |
| "grad_norm": 0.3435034453868866, | |
| "learning_rate": 0.0001, | |
| "loss": 2.321, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2088484059856864, | |
| "grad_norm": 0.2735581696033478, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2218, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.20949902407286922, | |
| "grad_norm": 0.2250661551952362, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9416, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.21014964216005205, | |
| "grad_norm": 0.3160262107849121, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5494, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.21080026024723486, | |
| "grad_norm": 0.3669279217720032, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7751, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.2114508783344177, | |
| "grad_norm": 0.2052752673625946, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0139, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.21210149642160053, | |
| "grad_norm": 0.2906612455844879, | |
| "learning_rate": 0.0001, | |
| "loss": 2.227, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.21275211450878334, | |
| "grad_norm": 0.30327048897743225, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2905, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.21340273259596618, | |
| "grad_norm": 0.33950623869895935, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0731, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.21405335068314899, | |
| "grad_norm": 0.31319788098335266, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1374, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.21470396877033182, | |
| "grad_norm": 0.21442054212093353, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7588, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.21535458685751463, | |
| "grad_norm": 0.23125174641609192, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9295, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.21600520494469747, | |
| "grad_norm": 0.23220308125019073, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2606, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.21665582303188027, | |
| "grad_norm": 0.24599219858646393, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2687, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2173064411190631, | |
| "grad_norm": 0.22226236760616302, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1428, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.21795705920624595, | |
| "grad_norm": 0.2653510570526123, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4381, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.21860767729342875, | |
| "grad_norm": 0.23770929872989655, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9655, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2192582953806116, | |
| "grad_norm": 0.1932332068681717, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9465, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.2199089134677944, | |
| "grad_norm": 0.181661456823349, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9912, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.22055953155497723, | |
| "grad_norm": 0.22275297343730927, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1964, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.22121014964216004, | |
| "grad_norm": 0.22086840867996216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2216, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.22186076772934288, | |
| "grad_norm": 0.22807130217552185, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2434, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2225113858165257, | |
| "grad_norm": 0.26616647839546204, | |
| "learning_rate": 0.0001, | |
| "loss": 2.442, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.22316200390370852, | |
| "grad_norm": 0.2841719388961792, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2358, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.22381262199089136, | |
| "grad_norm": 0.23251943290233612, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3436, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.22446324007807417, | |
| "grad_norm": 0.20406994223594666, | |
| "learning_rate": 0.0001, | |
| "loss": 2.101, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.225113858165257, | |
| "grad_norm": 0.18677304685115814, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0596, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.2257644762524398, | |
| "grad_norm": 0.22367873787879944, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2051, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.22641509433962265, | |
| "grad_norm": 0.2521246671676636, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1718, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.22706571242680545, | |
| "grad_norm": 0.23043319582939148, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2818, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.2277163305139883, | |
| "grad_norm": 0.22021251916885376, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0337, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2283669486011711, | |
| "grad_norm": 0.18043603003025055, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9434, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.22901756668835394, | |
| "grad_norm": 0.4757142961025238, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2467, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.22966818477553677, | |
| "grad_norm": 0.30740290880203247, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5296, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.23031880286271958, | |
| "grad_norm": 0.23037666082382202, | |
| "learning_rate": 0.0001, | |
| "loss": 2.311, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.23096942094990242, | |
| "grad_norm": 0.22314564883708954, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0494, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.23162003903708522, | |
| "grad_norm": 0.21417242288589478, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2459, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.23227065712426806, | |
| "grad_norm": 0.2895831763744354, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2705, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.23292127521145087, | |
| "grad_norm": 0.2110838145017624, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1175, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2335718932986337, | |
| "grad_norm": 0.3999682664871216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6891, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.2342225113858165, | |
| "grad_norm": 0.5169201493263245, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5764, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.23487312947299935, | |
| "grad_norm": 0.24382548034191132, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1065, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.23552374756018218, | |
| "grad_norm": 0.2830081582069397, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1186, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.236174365647365, | |
| "grad_norm": 0.23680554330348969, | |
| "learning_rate": 0.0001, | |
| "loss": 2.118, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.23682498373454783, | |
| "grad_norm": 0.3790690302848816, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3566, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.23747560182173064, | |
| "grad_norm": 0.2664685845375061, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2118, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.23812621990891347, | |
| "grad_norm": 0.22439126670360565, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0897, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.23877683799609628, | |
| "grad_norm": 0.2559892237186432, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2559, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.23942745608327912, | |
| "grad_norm": 0.43989577889442444, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5208, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.24007807417046195, | |
| "grad_norm": 0.24543894827365875, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1692, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.24072869225764476, | |
| "grad_norm": 0.37020954489707947, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1287, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2413793103448276, | |
| "grad_norm": 0.41815564036369324, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5952, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.2420299284320104, | |
| "grad_norm": 0.22579136490821838, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2427, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.24268054651919324, | |
| "grad_norm": 0.3004798889160156, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2767, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.24333116460637605, | |
| "grad_norm": 0.27470141649246216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.092, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.24398178269355889, | |
| "grad_norm": 0.25301867723464966, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1816, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2446324007807417, | |
| "grad_norm": 0.21194620430469513, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1322, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.24528301886792453, | |
| "grad_norm": 0.28737103939056396, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6685, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.24593363695510737, | |
| "grad_norm": 0.28857922554016113, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2219, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.24658425504229017, | |
| "grad_norm": 0.29493409395217896, | |
| "learning_rate": 0.0001, | |
| "loss": 2.717, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.247234873129473, | |
| "grad_norm": 0.33975929021835327, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3499, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.24788549121665582, | |
| "grad_norm": 0.21486152708530426, | |
| "learning_rate": 0.0001, | |
| "loss": 2.306, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.24853610930383865, | |
| "grad_norm": 0.2686431109905243, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0942, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.24918672739102146, | |
| "grad_norm": 0.2812007963657379, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3729, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.2498373454782043, | |
| "grad_norm": 0.31875330209732056, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5766, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.2504879635653871, | |
| "grad_norm": 0.2624376714229584, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2057, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2511385816525699, | |
| "grad_norm": 0.265286386013031, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2405, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.2517891997397528, | |
| "grad_norm": 0.3202246129512787, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2817, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2524398178269356, | |
| "grad_norm": 0.22770161926746368, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9564, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2530904359141184, | |
| "grad_norm": 0.3313138484954834, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4424, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.25374105400130126, | |
| "grad_norm": 0.2961839437484741, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4122, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.25439167208848407, | |
| "grad_norm": 0.24270308017730713, | |
| "learning_rate": 0.0001, | |
| "loss": 1.99, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.2550422901756669, | |
| "grad_norm": 0.2306670844554901, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3529, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.2556929082628497, | |
| "grad_norm": 0.28387176990509033, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0824, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.25634352635003255, | |
| "grad_norm": 0.3105824291706085, | |
| "learning_rate": 0.0001, | |
| "loss": 2.437, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.25699414443721535, | |
| "grad_norm": 0.1932361125946045, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9747, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.25764476252439816, | |
| "grad_norm": 0.31146278977394104, | |
| "learning_rate": 0.0001, | |
| "loss": 2.263, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.258295380611581, | |
| "grad_norm": 0.24420365691184998, | |
| "learning_rate": 0.0001, | |
| "loss": 2.015, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.25894599869876384, | |
| "grad_norm": 0.24144989252090454, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2536, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.25959661678594664, | |
| "grad_norm": 0.3478517532348633, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5835, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.26024723487312945, | |
| "grad_norm": 0.24381348490715027, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2439, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2608978529603123, | |
| "grad_norm": 0.2834983468055725, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3991, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.2615484710474951, | |
| "grad_norm": 0.28689858317375183, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9156, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.26219908913467793, | |
| "grad_norm": 0.23692357540130615, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0189, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.26284970722186074, | |
| "grad_norm": 0.30104926228523254, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4945, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.2635003253090436, | |
| "grad_norm": 0.23472270369529724, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8892, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2641509433962264, | |
| "grad_norm": 0.31508034467697144, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4935, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2648015614834092, | |
| "grad_norm": 0.25103551149368286, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4428, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.2654521795705921, | |
| "grad_norm": 0.2387259602546692, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0989, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.2661027976577749, | |
| "grad_norm": 0.2606028616428375, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9494, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.2667534157449577, | |
| "grad_norm": 0.25114724040031433, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2432, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2674040338321405, | |
| "grad_norm": 0.3072582483291626, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3506, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.2680546519193234, | |
| "grad_norm": 0.23917561769485474, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2665, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.2687052700065062, | |
| "grad_norm": 0.2120814174413681, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9625, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.269355888093689, | |
| "grad_norm": 0.22003813087940216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1179, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.27000650618087185, | |
| "grad_norm": 0.33217060565948486, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6353, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.27065712426805466, | |
| "grad_norm": 0.2260630577802658, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0355, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.27130774235523747, | |
| "grad_norm": 0.30081093311309814, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1825, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.2719583604424203, | |
| "grad_norm": 0.27275893092155457, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6183, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.27260897852960314, | |
| "grad_norm": 0.4902358651161194, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0888, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.27325959661678595, | |
| "grad_norm": 0.21213112771511078, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1172, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.27391021470396876, | |
| "grad_norm": 0.35953450202941895, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5109, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.2745608327911516, | |
| "grad_norm": 0.2081584334373474, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0894, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.27521145087833443, | |
| "grad_norm": 0.20892906188964844, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9643, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 0.30058735609054565, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6503, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.27651268705270005, | |
| "grad_norm": 0.32902124524116516, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3271, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2771633051398829, | |
| "grad_norm": 0.2003614902496338, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9881, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.2778139232270657, | |
| "grad_norm": 0.33349111676216125, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7625, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.2784645413142485, | |
| "grad_norm": 0.25051257014274597, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0825, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.27911515940143133, | |
| "grad_norm": 0.3301559388637543, | |
| "learning_rate": 0.0001, | |
| "loss": 2.85, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.2797657774886142, | |
| "grad_norm": 0.18224254250526428, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9687, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.280416395575797, | |
| "grad_norm": 0.21809989213943481, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2596, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.2810670136629798, | |
| "grad_norm": 0.2473779171705246, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2042, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2817176317501627, | |
| "grad_norm": 0.20744885504245758, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1546, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.2823682498373455, | |
| "grad_norm": 0.2620698809623718, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5195, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.2830188679245283, | |
| "grad_norm": 0.291421115398407, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4983, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.2836694860117111, | |
| "grad_norm": 0.3294708728790283, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3146, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.28432010409889397, | |
| "grad_norm": 0.26191362738609314, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2818, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.2849707221860768, | |
| "grad_norm": 0.29155483841896057, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4888, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.2856213402732596, | |
| "grad_norm": 0.19482360780239105, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0061, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.28627195836044245, | |
| "grad_norm": 0.2594612240791321, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1891, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.28692257644762525, | |
| "grad_norm": 0.21656309068202972, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7911, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.28757319453480806, | |
| "grad_norm": 0.18664829432964325, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9634, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.28822381262199087, | |
| "grad_norm": 0.2178332507610321, | |
| "learning_rate": 0.0001, | |
| "loss": 2.32, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.28887443070917374, | |
| "grad_norm": 0.351418673992157, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0873, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.28952504879635654, | |
| "grad_norm": 0.23604457080364227, | |
| "learning_rate": 0.0001, | |
| "loss": 2.46, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.29017566688353935, | |
| "grad_norm": 0.2599848806858063, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0207, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.29082628497072216, | |
| "grad_norm": 0.340314120054245, | |
| "learning_rate": 0.0001, | |
| "loss": 2.279, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.291476903057905, | |
| "grad_norm": 0.23228399455547333, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3561, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.29212752114508783, | |
| "grad_norm": 0.25504687428474426, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2251, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.29277813923227064, | |
| "grad_norm": 0.2465014010667801, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1031, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2934287573194535, | |
| "grad_norm": 0.2188328504562378, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1483, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.2940793754066363, | |
| "grad_norm": 0.24546551704406738, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2334, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.2947299934938191, | |
| "grad_norm": 0.23416215181350708, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1846, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.29538061158100193, | |
| "grad_norm": 0.25267231464385986, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2134, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2960312296681848, | |
| "grad_norm": 0.26632416248321533, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5012, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.2966818477553676, | |
| "grad_norm": 0.18289139866828918, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0524, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2973324658425504, | |
| "grad_norm": 0.19033563137054443, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0165, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.2979830839297333, | |
| "grad_norm": 0.200730562210083, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8021, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.2986337020169161, | |
| "grad_norm": 0.2109062522649765, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0655, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.2992843201040989, | |
| "grad_norm": 0.23461318016052246, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3335, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2999349381912817, | |
| "grad_norm": 0.2085726112127304, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0061, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.30058555627846456, | |
| "grad_norm": 0.2938329875469208, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5245, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.30123617436564737, | |
| "grad_norm": 0.22131232917308807, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4115, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.3018867924528302, | |
| "grad_norm": 0.3459152579307556, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3896, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.302537410540013, | |
| "grad_norm": 0.27464184165000916, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6592, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.30318802862719585, | |
| "grad_norm": 0.28379327058792114, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1453, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.30383864671437866, | |
| "grad_norm": 0.28283926844596863, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1704, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.30448926480156147, | |
| "grad_norm": 0.22243599593639374, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1175, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.30513988288874433, | |
| "grad_norm": 0.22331124544143677, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8857, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.30579050097592714, | |
| "grad_norm": 0.21995989978313446, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1316, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.30644111906310995, | |
| "grad_norm": 0.21140341460704803, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0742, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.30709173715029275, | |
| "grad_norm": 0.31053757667541504, | |
| "learning_rate": 0.0001, | |
| "loss": 2.615, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.3077423552374756, | |
| "grad_norm": 0.2768484354019165, | |
| "learning_rate": 0.0001, | |
| "loss": 2.713, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.3083929733246584, | |
| "grad_norm": 0.2538318336009979, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1917, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.30904359141184123, | |
| "grad_norm": 0.2105240672826767, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2741, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3096942094990241, | |
| "grad_norm": 0.2915903925895691, | |
| "learning_rate": 0.0001, | |
| "loss": 2.115, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.3103448275862069, | |
| "grad_norm": 0.30282047390937805, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7806, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.3109954456733897, | |
| "grad_norm": 0.2707601487636566, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6137, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.3116460637605725, | |
| "grad_norm": 0.34574300050735474, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5957, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.3122966818477554, | |
| "grad_norm": 0.22767509520053864, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3543, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3129472999349382, | |
| "grad_norm": 0.25194215774536133, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6586, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.313597918022121, | |
| "grad_norm": 0.20427219569683075, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9091, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3142485361093038, | |
| "grad_norm": 0.2993704378604889, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4704, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.3148991541964867, | |
| "grad_norm": 0.18951758742332458, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1108, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.3155497722836695, | |
| "grad_norm": 0.2622709572315216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4144, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3162003903708523, | |
| "grad_norm": 0.20735126733779907, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3065, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.31685100845803515, | |
| "grad_norm": 0.22782085835933685, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4377, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.31750162654521796, | |
| "grad_norm": 0.2568935453891754, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1199, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.31815224463240077, | |
| "grad_norm": 0.23917409777641296, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2457, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.3188028627195836, | |
| "grad_norm": 0.21531902253627777, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0489, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.31945348080676644, | |
| "grad_norm": 0.21461109817028046, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1915, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.32010409889394925, | |
| "grad_norm": 0.2458680123090744, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3939, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.32075471698113206, | |
| "grad_norm": 0.2617323696613312, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5611, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.3214053350683149, | |
| "grad_norm": 0.22562618553638458, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2703, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.32205595315549773, | |
| "grad_norm": 0.2290688008069992, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3049, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.32270657124268054, | |
| "grad_norm": 0.4118833541870117, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9194, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.32335718932986335, | |
| "grad_norm": 0.22502999007701874, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2362, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.3240078074170462, | |
| "grad_norm": 0.23599191009998322, | |
| "learning_rate": 0.0001, | |
| "loss": 2.35, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.324658425504229, | |
| "grad_norm": 0.3065047860145569, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3984, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.32530904359141183, | |
| "grad_norm": 0.19241982698440552, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8787, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3259596616785947, | |
| "grad_norm": 0.20695632696151733, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9397, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.3266102797657775, | |
| "grad_norm": 0.1998564749956131, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1463, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.3272608978529603, | |
| "grad_norm": 0.27775317430496216, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7956, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.3279115159401431, | |
| "grad_norm": 0.2393936961889267, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3785, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.328562134027326, | |
| "grad_norm": 0.20921163260936737, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1909, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3292127521145088, | |
| "grad_norm": 0.25875911116600037, | |
| "learning_rate": 0.0001, | |
| "loss": 2.129, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.3298633702016916, | |
| "grad_norm": 0.2382909208536148, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3786, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.3305139882888744, | |
| "grad_norm": 0.19657136499881744, | |
| "learning_rate": 0.0001, | |
| "loss": 1.951, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.33116460637605727, | |
| "grad_norm": 0.23688004910945892, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4348, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.3318152244632401, | |
| "grad_norm": 0.1988734006881714, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2352, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3324658425504229, | |
| "grad_norm": 0.2078763097524643, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1376, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.33311646063760575, | |
| "grad_norm": 0.18860888481140137, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9367, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.33376707872478856, | |
| "grad_norm": 0.30205249786376953, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6822, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.33441769681197137, | |
| "grad_norm": 0.2146618664264679, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1927, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.3350683148991542, | |
| "grad_norm": 0.19332504272460938, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0442, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.33571893298633704, | |
| "grad_norm": 0.2289431244134903, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0152, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.33636955107351985, | |
| "grad_norm": 0.21815945208072662, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0015, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.33702016916070265, | |
| "grad_norm": 0.2226189821958542, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2989, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.3376707872478855, | |
| "grad_norm": 0.22195078432559967, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2237, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.3383214053350683, | |
| "grad_norm": 0.1946515589952469, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9459, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.33897202342225113, | |
| "grad_norm": 0.21510568261146545, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1305, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.33962264150943394, | |
| "grad_norm": 0.23448903858661652, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1838, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.3402732595966168, | |
| "grad_norm": 0.19046911597251892, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9739, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.3409238776837996, | |
| "grad_norm": 0.2314033806324005, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2053, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.3415744957709824, | |
| "grad_norm": 0.2206612378358841, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2566, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.34222511385816523, | |
| "grad_norm": 0.19578076899051666, | |
| "learning_rate": 0.0001, | |
| "loss": 2.045, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3428757319453481, | |
| "grad_norm": 0.1787755936384201, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8942, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.3435263500325309, | |
| "grad_norm": 0.20091751217842102, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1576, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.3441769681197137, | |
| "grad_norm": 0.21869762241840363, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1938, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 0.26101449131965637, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3642, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3454782042940794, | |
| "grad_norm": 0.21874766051769257, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4553, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.3461288223812622, | |
| "grad_norm": 0.224325492978096, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2959, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.346779440468445, | |
| "grad_norm": 0.21268363296985626, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1021, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.34743005855562786, | |
| "grad_norm": 0.20979231595993042, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0304, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.34808067664281067, | |
| "grad_norm": 0.19552691280841827, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9747, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.3487312947299935, | |
| "grad_norm": 0.27929842472076416, | |
| "learning_rate": 0.0001, | |
| "loss": 2.445, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.34938191281717634, | |
| "grad_norm": 0.19953188300132751, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9766, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.35003253090435915, | |
| "grad_norm": 0.29898926615715027, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4818, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.35068314899154196, | |
| "grad_norm": 0.18719644844532013, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9046, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.35133376707872477, | |
| "grad_norm": 0.2602563798427582, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1539, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.35198438516590763, | |
| "grad_norm": 0.23460406064987183, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3826, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.35263500325309044, | |
| "grad_norm": 0.2821134328842163, | |
| "learning_rate": 0.0001, | |
| "loss": 2.223, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.35328562134027325, | |
| "grad_norm": 0.2641044557094574, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2402, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.35393623942745606, | |
| "grad_norm": 0.21963565051555634, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3988, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.3545868575146389, | |
| "grad_norm": 0.26475685834884644, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3046, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.35523747560182173, | |
| "grad_norm": 0.27148157358169556, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5076, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.35588809368900454, | |
| "grad_norm": 0.28925588726997375, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8395, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.3565387117761874, | |
| "grad_norm": 0.22953632473945618, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1198, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.3571893298633702, | |
| "grad_norm": 0.23960557579994202, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3064, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.357839947950553, | |
| "grad_norm": 0.3133333921432495, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6034, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3584905660377358, | |
| "grad_norm": 0.21745215356349945, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4553, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.3591411841249187, | |
| "grad_norm": 0.23547130823135376, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0469, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3597918022121015, | |
| "grad_norm": 0.2646094262599945, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9016, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.3604424202992843, | |
| "grad_norm": 0.3079530596733093, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8979, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.36109303838646717, | |
| "grad_norm": 0.38223740458488464, | |
| "learning_rate": 0.0001, | |
| "loss": 3.066, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.36174365647365, | |
| "grad_norm": 0.2535337209701538, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1327, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.3623942745608328, | |
| "grad_norm": 0.2373637855052948, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1141, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.3630448926480156, | |
| "grad_norm": 0.19437271356582642, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9753, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.36369551073519846, | |
| "grad_norm": 0.20236878097057343, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2516, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.36434612882238127, | |
| "grad_norm": 0.21252363920211792, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3645, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3649967469095641, | |
| "grad_norm": 0.21689258515834808, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1145, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.3656473649967469, | |
| "grad_norm": 0.22365228831768036, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3083, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.36629798308392975, | |
| "grad_norm": 0.21607807278633118, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3199, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.36694860117111255, | |
| "grad_norm": 0.1885683536529541, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9303, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.36759921925829536, | |
| "grad_norm": 0.20064905285835266, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0661, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3682498373454782, | |
| "grad_norm": 0.23532240092754364, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6942, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.36890045543266103, | |
| "grad_norm": 0.22937807440757751, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1962, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.36955107351984384, | |
| "grad_norm": 0.2540866732597351, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5012, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.37020169160702665, | |
| "grad_norm": 0.23405294120311737, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2439, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.3708523096942095, | |
| "grad_norm": 0.24394820630550385, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0741, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3715029277813923, | |
| "grad_norm": 0.2063736468553543, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0864, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.37215354586857513, | |
| "grad_norm": 0.3300686180591583, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4983, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.372804163955758, | |
| "grad_norm": 0.21294772624969482, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2273, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.3734547820429408, | |
| "grad_norm": 0.2629190981388092, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1732, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.3741054001301236, | |
| "grad_norm": 0.2141999751329422, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3038, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3747560182173064, | |
| "grad_norm": 0.3467566668987274, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7748, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.3754066363044893, | |
| "grad_norm": 0.3112248182296753, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2376, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.3760572543916721, | |
| "grad_norm": 0.21217738091945648, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9146, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3767078724788549, | |
| "grad_norm": 0.19359458982944489, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0913, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 0.27635738253593445, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2855, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.37800910865322057, | |
| "grad_norm": 0.19366882741451263, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0194, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.3786597267404034, | |
| "grad_norm": 0.2016839236021042, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1519, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.3793103448275862, | |
| "grad_norm": 0.22154097259044647, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9849, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.37996096291476905, | |
| "grad_norm": 0.2089187502861023, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3624, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.38061158100195186, | |
| "grad_norm": 0.25050756335258484, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1773, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.38126219908913467, | |
| "grad_norm": 0.23007918894290924, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2054, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.3819128171763175, | |
| "grad_norm": 0.25022968649864197, | |
| "learning_rate": 0.0001, | |
| "loss": 2.219, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.38256343526350034, | |
| "grad_norm": 0.2205193042755127, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2049, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.38321405335068315, | |
| "grad_norm": 0.21454961597919464, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0683, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.38386467143786596, | |
| "grad_norm": 0.2088347226381302, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1301, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3845152895250488, | |
| "grad_norm": 0.20322394371032715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2098, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.38516590761223163, | |
| "grad_norm": 0.231514111161232, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5523, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.38581652569941444, | |
| "grad_norm": 0.24791982769966125, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2259, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.38646714378659724, | |
| "grad_norm": 0.21148578822612762, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0834, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.3871177618737801, | |
| "grad_norm": 0.263713538646698, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3101, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3877683799609629, | |
| "grad_norm": 0.22197774052619934, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1173, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.3884189980481457, | |
| "grad_norm": 0.2237439900636673, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1109, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.3890696161353286, | |
| "grad_norm": 0.27451419830322266, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5311, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.3897202342225114, | |
| "grad_norm": 0.18475750088691711, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9241, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.3903708523096942, | |
| "grad_norm": 0.20120149850845337, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1033, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.391021470396877, | |
| "grad_norm": 0.19626259803771973, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1223, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.3916720884840599, | |
| "grad_norm": 0.22795897722244263, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2021, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.3923227065712427, | |
| "grad_norm": 0.5195867419242859, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1849, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.3929733246584255, | |
| "grad_norm": 0.2636241614818573, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0739, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.3936239427456083, | |
| "grad_norm": 0.33922895789146423, | |
| "learning_rate": 0.0001, | |
| "loss": 2.31, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.39427456083279117, | |
| "grad_norm": 0.17467042803764343, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9201, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.394925178919974, | |
| "grad_norm": 0.22457371652126312, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9783, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.3955757970071568, | |
| "grad_norm": 0.5104444026947021, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3777, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.39622641509433965, | |
| "grad_norm": 0.4531616270542145, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8208, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.39687703318152245, | |
| "grad_norm": 0.20649151504039764, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1377, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.39752765126870526, | |
| "grad_norm": 0.39769667387008667, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2228, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.39817826935588807, | |
| "grad_norm": 0.2832731008529663, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9664, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.39882888744307093, | |
| "grad_norm": 0.2754386067390442, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5595, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.39947950553025374, | |
| "grad_norm": 0.404364675283432, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8133, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.40013012361743655, | |
| "grad_norm": 0.30304789543151855, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2729, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4007807417046194, | |
| "grad_norm": 0.2519910931587219, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3655, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.4014313597918022, | |
| "grad_norm": 0.2863995134830475, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0774, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.40208197787898503, | |
| "grad_norm": 0.393622487783432, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5082, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.40273259596616784, | |
| "grad_norm": 0.21836060285568237, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9548, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.4033832140533507, | |
| "grad_norm": 0.358052521944046, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5158, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4040338321405335, | |
| "grad_norm": 0.237140953540802, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2111, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.4046844502277163, | |
| "grad_norm": 0.20998883247375488, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1351, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.4053350683148991, | |
| "grad_norm": 0.18059247732162476, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9451, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.405985686402082, | |
| "grad_norm": 0.17532669007778168, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8591, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.4066363044892648, | |
| "grad_norm": 0.24097976088523865, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6534, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4072869225764476, | |
| "grad_norm": 0.19505445659160614, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8952, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.40793754066363047, | |
| "grad_norm": 0.232722207903862, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2055, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.4085881587508133, | |
| "grad_norm": 0.23899732530117035, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5848, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.4092387768379961, | |
| "grad_norm": 0.2411729097366333, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5315, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.4098893949251789, | |
| "grad_norm": 0.25042012333869934, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4154, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.41054001301236176, | |
| "grad_norm": 0.2764488160610199, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0564, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.41119063109954457, | |
| "grad_norm": 0.24761155247688293, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3245, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.4118412491867274, | |
| "grad_norm": 0.22376200556755066, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1881, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.41249186727391024, | |
| "grad_norm": 0.19060148298740387, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9588, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.41314248536109305, | |
| "grad_norm": 0.4157400131225586, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9024, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.41379310344827586, | |
| "grad_norm": 0.2557002007961273, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9819, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.41444372153545866, | |
| "grad_norm": 0.2908417880535126, | |
| "learning_rate": 0.0001, | |
| "loss": 2.112, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.41509433962264153, | |
| "grad_norm": 0.32937270402908325, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4976, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.41574495770982434, | |
| "grad_norm": 0.20382268726825714, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0448, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.41639557579700714, | |
| "grad_norm": 0.23484939336776733, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9514, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.41704619388418995, | |
| "grad_norm": 0.23023058474063873, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0768, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.4176968119713728, | |
| "grad_norm": 0.22951190173625946, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0764, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.4183474300585556, | |
| "grad_norm": 0.18971513211727142, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9693, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.41899804814573843, | |
| "grad_norm": 0.24955709278583527, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4898, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.4196486662329213, | |
| "grad_norm": 0.3344306945800781, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4779, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.4202992843201041, | |
| "grad_norm": 0.21661825478076935, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0472, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.4209499024072869, | |
| "grad_norm": 0.1972419023513794, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1712, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.4216005204944697, | |
| "grad_norm": 0.21619470417499542, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0739, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.4222511385816526, | |
| "grad_norm": 0.2329091727733612, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1362, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.4229017566688354, | |
| "grad_norm": 0.22971969842910767, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9898, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4235523747560182, | |
| "grad_norm": 0.20185063779354095, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1008, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.42420299284320107, | |
| "grad_norm": 0.2658546566963196, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5734, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.4248536109303839, | |
| "grad_norm": 0.23109374940395355, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2569, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.4255042290175667, | |
| "grad_norm": 0.25115352869033813, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5967, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.4261548471047495, | |
| "grad_norm": 0.20470669865608215, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0302, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.42680546519193235, | |
| "grad_norm": 0.2151513546705246, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5183, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.42745608327911516, | |
| "grad_norm": 0.2571411728858948, | |
| "learning_rate": 0.0001, | |
| "loss": 2.255, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.42810670136629797, | |
| "grad_norm": 0.2414022833108902, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4076, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.42875731945348083, | |
| "grad_norm": 0.21041014790534973, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0091, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.42940793754066364, | |
| "grad_norm": 0.21241822838783264, | |
| "learning_rate": 0.0001, | |
| "loss": 2.355, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.43005855562784645, | |
| "grad_norm": 0.21031403541564941, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9887, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.43070917371502926, | |
| "grad_norm": 0.19765952229499817, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1555, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.4313597918022121, | |
| "grad_norm": 0.24740834534168243, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2349, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.43201040988939493, | |
| "grad_norm": 0.22086234390735626, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0948, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.43266102797657774, | |
| "grad_norm": 0.21949239075183868, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3905, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.43331164606376055, | |
| "grad_norm": 0.20536834001541138, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0547, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.4339622641509434, | |
| "grad_norm": 0.2570655941963196, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0261, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.4346128822381262, | |
| "grad_norm": 0.3293687701225281, | |
| "learning_rate": 0.0001, | |
| "loss": 2.344, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.435263500325309, | |
| "grad_norm": 0.22947120666503906, | |
| "learning_rate": 0.0001, | |
| "loss": 2.232, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.4359141184124919, | |
| "grad_norm": 0.2425599992275238, | |
| "learning_rate": 0.0001, | |
| "loss": 2.309, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4365647364996747, | |
| "grad_norm": 0.2506352663040161, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1249, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.4372153545868575, | |
| "grad_norm": 0.19457192718982697, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9461, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.4378659726740403, | |
| "grad_norm": 0.3749271035194397, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8532, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.4385165907612232, | |
| "grad_norm": 0.25384366512298584, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6495, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.439167208848406, | |
| "grad_norm": 0.21413469314575195, | |
| "learning_rate": 0.0001, | |
| "loss": 2.084, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.4398178269355888, | |
| "grad_norm": 0.228125661611557, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2175, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.44046844502277166, | |
| "grad_norm": 0.1948491632938385, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9702, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.44111906310995447, | |
| "grad_norm": 0.307992547750473, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5884, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.4417696811971373, | |
| "grad_norm": 0.23681728541851044, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2104, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.4424202992843201, | |
| "grad_norm": 0.23185166716575623, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0823, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.44307091737150295, | |
| "grad_norm": 0.2772667109966278, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3729, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.44372153545868576, | |
| "grad_norm": 0.18908965587615967, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0585, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.44437215354586856, | |
| "grad_norm": 0.2063988745212555, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9474, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.4450227716330514, | |
| "grad_norm": 0.19444917142391205, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9269, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.44567338972023424, | |
| "grad_norm": 0.2866727113723755, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5145, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.44632400780741704, | |
| "grad_norm": 0.24801641702651978, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2954, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.44697462589459985, | |
| "grad_norm": 0.2115658074617386, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1956, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.4476252439817827, | |
| "grad_norm": 0.3155558109283447, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7396, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.4482758620689655, | |
| "grad_norm": 0.22418133914470673, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1066, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.44892648015614833, | |
| "grad_norm": 0.2707614600658417, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3353, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.44957709824333114, | |
| "grad_norm": 0.22262880206108093, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2143, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.450227716330514, | |
| "grad_norm": 0.25256767868995667, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2786, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.4508783344176968, | |
| "grad_norm": 0.20360921323299408, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0059, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.4515289525048796, | |
| "grad_norm": 0.20573420822620392, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0884, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.4521795705920625, | |
| "grad_norm": 0.31812623143196106, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5905, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.4528301886792453, | |
| "grad_norm": 0.24690969288349152, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5157, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.4534808067664281, | |
| "grad_norm": 0.256793737411499, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1548, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.4541314248536109, | |
| "grad_norm": 0.2659960985183716, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2977, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.4547820429407938, | |
| "grad_norm": 0.23824195563793182, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5946, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.4554326610279766, | |
| "grad_norm": 0.2580608129501343, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2608, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4560832791151594, | |
| "grad_norm": 0.270622193813324, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5848, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.4567338972023422, | |
| "grad_norm": 0.2170489877462387, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4315, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.45738451528952506, | |
| "grad_norm": 0.20716050267219543, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1592, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.45803513337670787, | |
| "grad_norm": 0.24847671389579773, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3202, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.4586857514638907, | |
| "grad_norm": 0.24049146473407745, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1968, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.45933636955107354, | |
| "grad_norm": 0.2079533487558365, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2966, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.45998698763825635, | |
| "grad_norm": 0.18255428969860077, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9931, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.46063760572543916, | |
| "grad_norm": 0.28015655279159546, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2605, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.46128822381262197, | |
| "grad_norm": 0.27453094720840454, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2835, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.46193884189980483, | |
| "grad_norm": 0.2751506268978119, | |
| "learning_rate": 0.0001, | |
| "loss": 2.665, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.46258945998698764, | |
| "grad_norm": 0.2759210169315338, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3593, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.46324007807417045, | |
| "grad_norm": 0.2902829051017761, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7421, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.4638906961613533, | |
| "grad_norm": 0.24083854258060455, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4644, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.4645413142485361, | |
| "grad_norm": 0.23614934086799622, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2939, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.4651919323357189, | |
| "grad_norm": 0.1972537487745285, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9391, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.46584255042290174, | |
| "grad_norm": 0.2227838933467865, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9396, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.4664931685100846, | |
| "grad_norm": 0.3672918379306793, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7508, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.4671437865972674, | |
| "grad_norm": 0.2712246775627136, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2838, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.4677944046844502, | |
| "grad_norm": 0.2337927669286728, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9807, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.468445022771633, | |
| "grad_norm": 0.2051180601119995, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0311, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4690956408588159, | |
| "grad_norm": 0.1965889185667038, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1114, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.4697462589459987, | |
| "grad_norm": 0.2106337547302246, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0792, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.4703968770331815, | |
| "grad_norm": 0.19918356835842133, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1323, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.47104749512036437, | |
| "grad_norm": 0.20124401152133942, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0008, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.4716981132075472, | |
| "grad_norm": 0.2172473967075348, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3891, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.47234873129473, | |
| "grad_norm": 0.2524811029434204, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3343, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.4729993493819128, | |
| "grad_norm": 0.22882957756519318, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6723, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.47364996746909566, | |
| "grad_norm": 0.2434161901473999, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9549, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.47430058555627846, | |
| "grad_norm": 0.19140364229679108, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0468, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.4749512036434613, | |
| "grad_norm": 0.22166937589645386, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3432, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.47560182173064414, | |
| "grad_norm": 0.2005748748779297, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0616, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.47625243981782694, | |
| "grad_norm": 0.3115980923175812, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6153, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.47690305790500975, | |
| "grad_norm": 0.27135169506073, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3225, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.47755367599219256, | |
| "grad_norm": 0.20748727023601532, | |
| "learning_rate": 0.0001, | |
| "loss": 1.834, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.4782042940793754, | |
| "grad_norm": 0.4031495153903961, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8177, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.47885491216655823, | |
| "grad_norm": 0.2978368401527405, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6178, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.47950553025374104, | |
| "grad_norm": 0.3466270864009857, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6031, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.4801561483409239, | |
| "grad_norm": 0.20074127614498138, | |
| "learning_rate": 0.0001, | |
| "loss": 2.247, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.4808067664281067, | |
| "grad_norm": 0.2393479198217392, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1265, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.4814573845152895, | |
| "grad_norm": 0.27758634090423584, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5025, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.48210800260247233, | |
| "grad_norm": 0.20123820006847382, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0083, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.4827586206896552, | |
| "grad_norm": 0.19012506306171417, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0212, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.483409238776838, | |
| "grad_norm": 0.19451047480106354, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0295, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.4840598568640208, | |
| "grad_norm": 0.3339052200317383, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4813, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.4847104749512036, | |
| "grad_norm": 0.2646152973175049, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4302, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.4853610930383865, | |
| "grad_norm": 0.23590324819087982, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1723, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.4860117111255693, | |
| "grad_norm": 0.28924039006233215, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8005, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.4866623292127521, | |
| "grad_norm": 0.21145464479923248, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3501, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.48731294729993496, | |
| "grad_norm": 0.22815656661987305, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1997, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.48796356538711777, | |
| "grad_norm": 0.24325215816497803, | |
| "learning_rate": 0.0001, | |
| "loss": 2.039, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4886141834743006, | |
| "grad_norm": 0.3235335052013397, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4533, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.4892648015614834, | |
| "grad_norm": 0.25513559579849243, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3779, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.48991541964866625, | |
| "grad_norm": 0.2905427813529968, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9843, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.49056603773584906, | |
| "grad_norm": 0.23760183155536652, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1825, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.49121665582303187, | |
| "grad_norm": 0.2170071303844452, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9877, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.49186727391021473, | |
| "grad_norm": 0.2555190920829773, | |
| "learning_rate": 0.0001, | |
| "loss": 2.457, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.49251789199739754, | |
| "grad_norm": 0.2571033835411072, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1152, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.49316851008458035, | |
| "grad_norm": 0.23969238996505737, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3439, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.49381912817176316, | |
| "grad_norm": 0.1900262087583542, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8999, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.494469746258946, | |
| "grad_norm": 0.19621430337429047, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0658, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4951203643461288, | |
| "grad_norm": 0.21956481039524078, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5427, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.49577098243331164, | |
| "grad_norm": 0.22567258775234222, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2777, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.49642160052049444, | |
| "grad_norm": 0.20233570039272308, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0342, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.4970722186076773, | |
| "grad_norm": 0.23662947118282318, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3668, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.4977228366948601, | |
| "grad_norm": 0.2625278830528259, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6536, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.4983734547820429, | |
| "grad_norm": 0.23235228657722473, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1891, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.4990240728692258, | |
| "grad_norm": 0.19439217448234558, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9647, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.4996746909564086, | |
| "grad_norm": 0.19810114800930023, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9965, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.5003253090435914, | |
| "grad_norm": 0.2525380253791809, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2444, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.5009759271307742, | |
| "grad_norm": 0.2409314513206482, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1717, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.501626545217957, | |
| "grad_norm": 0.25244686007499695, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0126, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.5022771633051398, | |
| "grad_norm": 0.19767141342163086, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1384, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.5029277813923227, | |
| "grad_norm": 0.39446812868118286, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8039, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.5035783994795056, | |
| "grad_norm": 0.2643390893936157, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1524, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.5042290175666884, | |
| "grad_norm": 0.27606508135795593, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1802, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5048796356538712, | |
| "grad_norm": 0.364106148481369, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9694, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.505530253741054, | |
| "grad_norm": 0.23091645538806915, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5471, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.5061808718282368, | |
| "grad_norm": 0.19318193197250366, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2082, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.5068314899154196, | |
| "grad_norm": 0.28997862339019775, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4399, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.5074821080026025, | |
| "grad_norm": 0.22487197816371918, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1946, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5081327260897853, | |
| "grad_norm": 0.24430596828460693, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4456, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.5087833441769681, | |
| "grad_norm": 0.21677151322364807, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2082, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.5094339622641509, | |
| "grad_norm": 0.47995632886886597, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1358, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.5100845803513337, | |
| "grad_norm": 0.19044414162635803, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8924, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.5107351984385166, | |
| "grad_norm": 0.19143608212471008, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0459, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.5113858165256994, | |
| "grad_norm": 0.22588413953781128, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1369, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.5120364346128823, | |
| "grad_norm": 0.2786167860031128, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2029, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.5126870527000651, | |
| "grad_norm": 0.24471627175807953, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1248, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.5133376707872479, | |
| "grad_norm": 0.17795225977897644, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7926, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.5139882888744307, | |
| "grad_norm": 0.2173709124326706, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0538, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5146389069616135, | |
| "grad_norm": 0.2027692049741745, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8568, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.5152895250487963, | |
| "grad_norm": 0.2013595849275589, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0501, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.5159401431359791, | |
| "grad_norm": 0.21996662020683289, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0374, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.516590761223162, | |
| "grad_norm": 0.21435722708702087, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1907, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.5172413793103449, | |
| "grad_norm": 0.21512284874916077, | |
| "learning_rate": 0.0001, | |
| "loss": 2.315, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5178919973975277, | |
| "grad_norm": 0.19432400166988373, | |
| "learning_rate": 0.0001, | |
| "loss": 2.103, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.5185426154847105, | |
| "grad_norm": 0.23112992942333221, | |
| "learning_rate": 0.0001, | |
| "loss": 2.328, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.5191932335718933, | |
| "grad_norm": 0.19719737768173218, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9569, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.5198438516590761, | |
| "grad_norm": 0.2115892618894577, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2533, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.5204944697462589, | |
| "grad_norm": 0.24321842193603516, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6597, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5211450878334418, | |
| "grad_norm": 0.18219350278377533, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8709, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.5217957059206246, | |
| "grad_norm": 0.18715021014213562, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0021, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.5224463240078074, | |
| "grad_norm": 0.25940024852752686, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3742, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.5230969420949902, | |
| "grad_norm": 0.18714728951454163, | |
| "learning_rate": 0.0001, | |
| "loss": 2.211, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.523747560182173, | |
| "grad_norm": 0.20145951211452484, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0047, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5243981782693559, | |
| "grad_norm": 0.18992845714092255, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8559, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.5250487963565387, | |
| "grad_norm": 0.2682324945926666, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4791, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.5256994144437215, | |
| "grad_norm": 0.33034664392471313, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3089, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.5263500325309044, | |
| "grad_norm": 0.18838956952095032, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9462, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.5270006506180872, | |
| "grad_norm": 0.42872169613838196, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6874, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.52765126870527, | |
| "grad_norm": 0.2108643501996994, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3627, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.5283018867924528, | |
| "grad_norm": 0.21745599806308746, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1204, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.5289525048796356, | |
| "grad_norm": 0.2577585279941559, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9746, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.5296031229668184, | |
| "grad_norm": 0.372471421957016, | |
| "learning_rate": 0.0001, | |
| "loss": 2.688, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.5302537410540012, | |
| "grad_norm": 0.2425181120634079, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1377, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.5309043591411842, | |
| "grad_norm": 0.2638307511806488, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1088, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.531554977228367, | |
| "grad_norm": 0.2356933355331421, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2291, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.5322055953155498, | |
| "grad_norm": 0.23714864253997803, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0929, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.5328562134027326, | |
| "grad_norm": 0.19541950523853302, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0883, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.5335068314899154, | |
| "grad_norm": 0.3091617822647095, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0127, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5341574495770982, | |
| "grad_norm": 0.2592740058898926, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8307, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.534808067664281, | |
| "grad_norm": 0.22505807876586914, | |
| "learning_rate": 0.0001, | |
| "loss": 2.462, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.5354586857514639, | |
| "grad_norm": 0.22032824158668518, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2718, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.5361093038386467, | |
| "grad_norm": 0.2457459270954132, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4213, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.5367599219258296, | |
| "grad_norm": 0.24181683361530304, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9347, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5374105400130124, | |
| "grad_norm": 0.29988738894462585, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7697, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.5380611581001952, | |
| "grad_norm": 0.24946388602256775, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2117, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.538711776187378, | |
| "grad_norm": 0.20339331030845642, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9936, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.5393623942745608, | |
| "grad_norm": 0.22250457108020782, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0785, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.5400130123617437, | |
| "grad_norm": 0.1869298666715622, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0406, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5406636304489265, | |
| "grad_norm": 0.1873755156993866, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9126, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.5413142485361093, | |
| "grad_norm": 0.3135535418987274, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2881, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.5419648666232921, | |
| "grad_norm": 0.20596185326576233, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0682, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.5426154847104749, | |
| "grad_norm": 0.25786712765693665, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0591, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.5432661027976577, | |
| "grad_norm": 0.2592066824436188, | |
| "learning_rate": 0.0001, | |
| "loss": 2.052, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5439167208848406, | |
| "grad_norm": 0.20738951861858368, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9726, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.5445673389720235, | |
| "grad_norm": 0.21384763717651367, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1897, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.5452179570592063, | |
| "grad_norm": 0.22050943970680237, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3597, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.5458685751463891, | |
| "grad_norm": 0.1996280699968338, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0492, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.5465191932335719, | |
| "grad_norm": 0.2430533468723297, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2774, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5471698113207547, | |
| "grad_norm": 0.22777177393436432, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0779, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.5478204294079375, | |
| "grad_norm": 0.22464539110660553, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3316, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.5484710474951203, | |
| "grad_norm": 0.17759400606155396, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8407, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.5491216655823032, | |
| "grad_norm": 0.22264355421066284, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2869, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.549772283669486, | |
| "grad_norm": 0.20819737017154694, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1209, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5504229017566689, | |
| "grad_norm": 0.2194463461637497, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1457, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.5510735198438517, | |
| "grad_norm": 0.19314661622047424, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1063, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 0.186354860663414, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0833, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.5523747560182173, | |
| "grad_norm": 0.1862732619047165, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9441, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.5530253741054001, | |
| "grad_norm": 0.24664181470870972, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3277, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5536759921925829, | |
| "grad_norm": 0.20182165503501892, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1902, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.5543266102797658, | |
| "grad_norm": 0.2108999788761139, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0826, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.5549772283669486, | |
| "grad_norm": 0.25388890504837036, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5149, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.5556278464541314, | |
| "grad_norm": 0.2074718177318573, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9135, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.5562784645413142, | |
| "grad_norm": 0.1992723047733307, | |
| "learning_rate": 0.0001, | |
| "loss": 2.186, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.556929082628497, | |
| "grad_norm": 0.18721085786819458, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9453, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.5575797007156799, | |
| "grad_norm": 0.21606992185115814, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1703, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.5582303188028627, | |
| "grad_norm": 0.2854723334312439, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9538, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.5588809368900456, | |
| "grad_norm": 0.21503040194511414, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0194, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.5595315549772284, | |
| "grad_norm": 0.2690679430961609, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1562, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5601821730644112, | |
| "grad_norm": 0.2811613976955414, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2475, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.560832791151594, | |
| "grad_norm": 0.2551681697368622, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5585, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.5614834092387768, | |
| "grad_norm": 0.21423856914043427, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1194, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.5621340273259596, | |
| "grad_norm": 0.22121264040470123, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9257, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.5627846454131424, | |
| "grad_norm": 0.38684332370758057, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5203, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5634352635003254, | |
| "grad_norm": 0.20299634337425232, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0868, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.5640858815875082, | |
| "grad_norm": 0.33485493063926697, | |
| "learning_rate": 0.0001, | |
| "loss": 2.457, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.564736499674691, | |
| "grad_norm": 0.23778866231441498, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9863, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.5653871177618738, | |
| "grad_norm": 0.18562458455562592, | |
| "learning_rate": 0.0001, | |
| "loss": 1.915, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 0.3780176341533661, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5518, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5666883539362394, | |
| "grad_norm": 0.1924014538526535, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0665, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.5673389720234222, | |
| "grad_norm": 0.19788160920143127, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9408, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5679895901106051, | |
| "grad_norm": 0.2435147911310196, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3716, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.5686402081977879, | |
| "grad_norm": 0.2023211270570755, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2786, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.5692908262849707, | |
| "grad_norm": 0.29936715960502625, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6689, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5699414443721535, | |
| "grad_norm": 0.18846483528614044, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9436, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5705920624593364, | |
| "grad_norm": 0.44592785835266113, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8648, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.5712426805465192, | |
| "grad_norm": 0.221640944480896, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1613, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.571893298633702, | |
| "grad_norm": 0.22345726191997528, | |
| "learning_rate": 0.0001, | |
| "loss": 2.076, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.5725439167208849, | |
| "grad_norm": 0.20094214379787445, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0474, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5731945348080677, | |
| "grad_norm": 0.1997043937444687, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9812, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.5738451528952505, | |
| "grad_norm": 0.3758605420589447, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8357, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5744957709824333, | |
| "grad_norm": 0.2940578758716583, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4955, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.5751463890696161, | |
| "grad_norm": 0.2434762865304947, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0011, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5757970071567989, | |
| "grad_norm": 0.24335308372974396, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5458, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5764476252439817, | |
| "grad_norm": 0.2063351422548294, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9801, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5770982433311646, | |
| "grad_norm": 0.35102301836013794, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5647, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.5777488614183475, | |
| "grad_norm": 0.22332875430583954, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0542, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.5783994795055303, | |
| "grad_norm": 0.2073124796152115, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9348, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.5790500975927131, | |
| "grad_norm": 0.21079733967781067, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9829, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5797007156798959, | |
| "grad_norm": 0.2842913866043091, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7215, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.5803513337670787, | |
| "grad_norm": 0.2807595133781433, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1827, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.5810019518542615, | |
| "grad_norm": 0.24955599009990692, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6246, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.5816525699414443, | |
| "grad_norm": 0.23281241953372955, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3944, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5823031880286272, | |
| "grad_norm": 0.2617682218551636, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6147, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.58295380611581, | |
| "grad_norm": 0.1915360391139984, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0095, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5836044242029929, | |
| "grad_norm": 0.20270249247550964, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8983, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.5842550422901757, | |
| "grad_norm": 0.21804624795913696, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0425, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5849056603773585, | |
| "grad_norm": 0.25326576828956604, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4875, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.5855562784645413, | |
| "grad_norm": 0.21714434027671814, | |
| "learning_rate": 0.0001, | |
| "loss": 2.269, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5862068965517241, | |
| "grad_norm": 0.22771766781806946, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3039, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.586857514638907, | |
| "grad_norm": 0.3638748824596405, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7448, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.5875081327260898, | |
| "grad_norm": 0.20194686949253082, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0141, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.5881587508132726, | |
| "grad_norm": 0.187494158744812, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1188, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.5888093689004554, | |
| "grad_norm": 0.23371635377407074, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6014, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5894599869876382, | |
| "grad_norm": 0.2642146050930023, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2053, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.590110605074821, | |
| "grad_norm": 0.20045514404773712, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1828, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.5907612231620039, | |
| "grad_norm": 0.22904321551322937, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3128, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.5914118412491868, | |
| "grad_norm": 0.36857542395591736, | |
| "learning_rate": 0.0001, | |
| "loss": 3.3891, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.5920624593363696, | |
| "grad_norm": 0.3417764902114868, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6737, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5927130774235524, | |
| "grad_norm": 0.46861669421195984, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5329, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.5933636955107352, | |
| "grad_norm": 0.32909440994262695, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4894, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.594014313597918, | |
| "grad_norm": 0.2176060974597931, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9696, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.5946649316851008, | |
| "grad_norm": 0.27317941188812256, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2179, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.5953155497722836, | |
| "grad_norm": 0.267123281955719, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5464, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5959661678594665, | |
| "grad_norm": 0.320402055978775, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5021, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.5966167859466494, | |
| "grad_norm": 0.20610998570919037, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0586, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.5972674040338322, | |
| "grad_norm": 0.2108345478773117, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3278, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.597918022121015, | |
| "grad_norm": 0.18368126451969147, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1026, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.5985686402081978, | |
| "grad_norm": 0.20730890333652496, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1936, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5992192582953806, | |
| "grad_norm": 0.2921161651611328, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5618, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.5998698763825634, | |
| "grad_norm": 0.23977220058441162, | |
| "learning_rate": 0.0001, | |
| "loss": 2.533, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.6005204944697463, | |
| "grad_norm": 0.25839105248451233, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7033, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.6011711125569291, | |
| "grad_norm": 0.214335098862648, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9153, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.6018217306441119, | |
| "grad_norm": 0.19577006995677948, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8612, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.6024723487312947, | |
| "grad_norm": 0.22480078041553497, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2383, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.6031229668184775, | |
| "grad_norm": 0.2090427577495575, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9532, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.6037735849056604, | |
| "grad_norm": 0.21045666933059692, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1285, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.6044242029928432, | |
| "grad_norm": 0.2302238792181015, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5368, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.605074821080026, | |
| "grad_norm": 0.22230245172977448, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0551, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6057254391672089, | |
| "grad_norm": 0.2619292140007019, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5149, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.6063760572543917, | |
| "grad_norm": 0.20247308909893036, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0032, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.6070266753415745, | |
| "grad_norm": 0.19772449135780334, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9627, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.6076772934287573, | |
| "grad_norm": 0.1917680948972702, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9659, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.6083279115159401, | |
| "grad_norm": 0.3457018733024597, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4537, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.6089785296031229, | |
| "grad_norm": 0.2027028501033783, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1681, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.6096291476903057, | |
| "grad_norm": 0.24525637924671173, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0816, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.6102797657774887, | |
| "grad_norm": 0.2690584659576416, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7011, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.6109303838646715, | |
| "grad_norm": 0.20961976051330566, | |
| "learning_rate": 0.0001, | |
| "loss": 2.576, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.6115810019518543, | |
| "grad_norm": 0.21827319264411926, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2605, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6122316200390371, | |
| "grad_norm": 0.20448362827301025, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9963, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.6128822381262199, | |
| "grad_norm": 0.2513864040374756, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4111, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.6135328562134027, | |
| "grad_norm": 0.28347763419151306, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3459, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.6141834743005855, | |
| "grad_norm": 0.20679716765880585, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9423, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.6148340923877684, | |
| "grad_norm": 0.20072445273399353, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.6154847104749512, | |
| "grad_norm": 0.2190425843000412, | |
| "learning_rate": 0.0001, | |
| "loss": 2.358, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.616135328562134, | |
| "grad_norm": 0.2672726511955261, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5034, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.6167859466493169, | |
| "grad_norm": 0.20329232513904572, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2972, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.6174365647364997, | |
| "grad_norm": 0.21593444049358368, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8221, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.6180871828236825, | |
| "grad_norm": 0.22062361240386963, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2051, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6187378009108653, | |
| "grad_norm": 0.20640413463115692, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1973, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.6193884189980482, | |
| "grad_norm": 0.18919388949871063, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1166, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.620039037085231, | |
| "grad_norm": 0.18566597998142242, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9342, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.6206896551724138, | |
| "grad_norm": 0.3724953234195709, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0303, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.6213402732595966, | |
| "grad_norm": 0.24559584259986877, | |
| "learning_rate": 0.0001, | |
| "loss": 2.387, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.6219908913467794, | |
| "grad_norm": 0.20384235680103302, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1224, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.6226415094339622, | |
| "grad_norm": 0.3225831687450409, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4856, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.623292127521145, | |
| "grad_norm": 0.21676267683506012, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3457, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.623942745608328, | |
| "grad_norm": 0.21707187592983246, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3985, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.6245933636955108, | |
| "grad_norm": 0.311277836561203, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3087, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6252439817826936, | |
| "grad_norm": 0.18904085457324982, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9421, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.6258945998698764, | |
| "grad_norm": 0.39046210050582886, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7524, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.6265452179570592, | |
| "grad_norm": 0.18455897271633148, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7536, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.627195836044242, | |
| "grad_norm": 0.1874053180217743, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0853, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.6278464541314248, | |
| "grad_norm": 0.24766068160533905, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8099, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.6284970722186076, | |
| "grad_norm": 0.20977729558944702, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0339, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.6291476903057905, | |
| "grad_norm": 0.2659202516078949, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1282, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.6297983083929733, | |
| "grad_norm": 0.23760046064853668, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4225, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.6304489264801562, | |
| "grad_norm": 0.1884511113166809, | |
| "learning_rate": 0.0001, | |
| "loss": 1.972, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.631099544567339, | |
| "grad_norm": 0.2816404402256012, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6831, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6317501626545218, | |
| "grad_norm": 0.1874386966228485, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0042, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.6324007807417046, | |
| "grad_norm": 0.21592558920383453, | |
| "learning_rate": 0.0001, | |
| "loss": 2.338, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.6330513988288874, | |
| "grad_norm": 0.22190915048122406, | |
| "learning_rate": 0.0001, | |
| "loss": 2.23, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.6337020169160703, | |
| "grad_norm": 0.23270365595817566, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1849, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.6343526350032531, | |
| "grad_norm": 0.20524165034294128, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8509, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6350032530904359, | |
| "grad_norm": 0.27826493978500366, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6736, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.6356538711776187, | |
| "grad_norm": 0.19887575507164001, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1369, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.6363044892648015, | |
| "grad_norm": 0.3760605752468109, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7617, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.6369551073519844, | |
| "grad_norm": 0.2116486132144928, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1353, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.6376057254391672, | |
| "grad_norm": 0.20685400068759918, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2221, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6382563435263501, | |
| "grad_norm": 0.25631460547447205, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2755, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.6389069616135329, | |
| "grad_norm": 0.2831932604312897, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2544, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.6395575797007157, | |
| "grad_norm": 0.19301310181617737, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1736, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.6402081977878985, | |
| "grad_norm": 0.18511143326759338, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8847, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.6408588158750813, | |
| "grad_norm": 0.23753167688846588, | |
| "learning_rate": 0.0001, | |
| "loss": 2.131, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.6415094339622641, | |
| "grad_norm": 0.24566152691841125, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2071, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.6421600520494469, | |
| "grad_norm": 0.21481812000274658, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0292, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.6428106701366298, | |
| "grad_norm": 0.3042278587818146, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6444, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.6434612882238127, | |
| "grad_norm": 0.30741778016090393, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5146, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.6441119063109955, | |
| "grad_norm": 0.40835896134376526, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9053, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6447625243981783, | |
| "grad_norm": 0.21121574938297272, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4513, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.6454131424853611, | |
| "grad_norm": 0.2634606659412384, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3141, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.6460637605725439, | |
| "grad_norm": 0.2463708072900772, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4421, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.6467143786597267, | |
| "grad_norm": 0.25485244393348694, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3788, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.6473649967469096, | |
| "grad_norm": 0.20773370563983917, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9861, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.6480156148340924, | |
| "grad_norm": 0.20728078484535217, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3341, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.6486662329212752, | |
| "grad_norm": 0.26925981044769287, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9172, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.649316851008458, | |
| "grad_norm": 0.21403877437114716, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1318, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.6499674690956408, | |
| "grad_norm": 0.2597064673900604, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4316, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.6506180871828237, | |
| "grad_norm": 0.26858747005462646, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2716, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6512687052700065, | |
| "grad_norm": 0.5603036880493164, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1137, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.6519193233571894, | |
| "grad_norm": 0.2423018366098404, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2346, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.6525699414443722, | |
| "grad_norm": 0.22914621233940125, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2852, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.653220559531555, | |
| "grad_norm": 0.22781658172607422, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1961, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.6538711776187378, | |
| "grad_norm": 0.2614092528820038, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0631, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.6545217957059206, | |
| "grad_norm": 0.23658867180347443, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0379, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.6551724137931034, | |
| "grad_norm": 0.20862211287021637, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2786, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.6558230318802862, | |
| "grad_norm": 0.2251960188150406, | |
| "learning_rate": 0.0001, | |
| "loss": 2.06, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.656473649967469, | |
| "grad_norm": 0.2885074317455292, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2583, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.657124268054652, | |
| "grad_norm": 0.20309656858444214, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1557, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6577748861418348, | |
| "grad_norm": 0.20139531791210175, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3419, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.6584255042290176, | |
| "grad_norm": 0.2853332757949829, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1415, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.6590761223162004, | |
| "grad_norm": 0.2907620966434479, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4452, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.6597267404033832, | |
| "grad_norm": 0.18982461094856262, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0215, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.660377358490566, | |
| "grad_norm": 0.20890061557292938, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0383, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.6610279765777488, | |
| "grad_norm": 0.21294118463993073, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7722, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.6616785946649317, | |
| "grad_norm": 0.22494040429592133, | |
| "learning_rate": 0.0001, | |
| "loss": 2.034, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.6623292127521145, | |
| "grad_norm": 0.25089555978775024, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3322, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.6629798308392973, | |
| "grad_norm": 0.18898023664951324, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9914, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.6636304489264802, | |
| "grad_norm": 0.221091166138649, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1613, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.664281067013663, | |
| "grad_norm": 0.22317297756671906, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3438, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.6649316851008458, | |
| "grad_norm": 0.18826670944690704, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0218, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.6655823031880286, | |
| "grad_norm": 0.22612391412258148, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2931, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.6662329212752115, | |
| "grad_norm": 0.3006114959716797, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4949, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.6668835393623943, | |
| "grad_norm": 0.1835569143295288, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9396, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.6675341574495771, | |
| "grad_norm": 0.19352416694164276, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0038, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.6681847755367599, | |
| "grad_norm": 0.2259102463722229, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1818, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.6688353936239427, | |
| "grad_norm": 0.20237034559249878, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3196, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.6694860117111255, | |
| "grad_norm": 0.1844060719013214, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1389, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.6701366297983083, | |
| "grad_norm": 0.21057841181755066, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0058, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6707872478854913, | |
| "grad_norm": 0.20054426789283752, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2874, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.6714378659726741, | |
| "grad_norm": 0.2507307529449463, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4245, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.6720884840598569, | |
| "grad_norm": 0.21066251397132874, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1688, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.6727391021470397, | |
| "grad_norm": 0.22210632264614105, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1985, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.6733897202342225, | |
| "grad_norm": 0.21617744863033295, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5918, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.6740403383214053, | |
| "grad_norm": 0.46473971009254456, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9341, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.6746909564085881, | |
| "grad_norm": 0.20464558899402618, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1654, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.675341574495771, | |
| "grad_norm": 0.212956503033638, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1959, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.6759921925829538, | |
| "grad_norm": 0.2572340667247772, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4918, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.6766428106701367, | |
| "grad_norm": 0.3264685273170471, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8708, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6772934287573195, | |
| "grad_norm": 0.22119931876659393, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2222, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.6779440468445023, | |
| "grad_norm": 0.24374569952487946, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2457, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.6785946649316851, | |
| "grad_norm": 0.2548108696937561, | |
| "learning_rate": 0.0001, | |
| "loss": 2.485, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.6792452830188679, | |
| "grad_norm": 0.20976418256759644, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3068, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.6798959011060507, | |
| "grad_norm": 0.25135618448257446, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1083, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.6805465191932336, | |
| "grad_norm": 0.2677728831768036, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4257, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.6811971372804164, | |
| "grad_norm": 0.20250125229358673, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0643, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.6818477553675992, | |
| "grad_norm": 0.20850299298763275, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0383, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.682498373454782, | |
| "grad_norm": 0.21116970479488373, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0259, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.6831489915419648, | |
| "grad_norm": 0.2572707235813141, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1982, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6837996096291477, | |
| "grad_norm": 0.2010831981897354, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0687, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.6844502277163305, | |
| "grad_norm": 0.23995356261730194, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1938, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.6851008458035134, | |
| "grad_norm": 0.21428103744983673, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2514, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.6857514638906962, | |
| "grad_norm": 0.21370433270931244, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2523, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.686402081977879, | |
| "grad_norm": 0.2131800800561905, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2413, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.6870527000650618, | |
| "grad_norm": 0.20007681846618652, | |
| "learning_rate": 0.0001, | |
| "loss": 2.176, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.6877033181522446, | |
| "grad_norm": 0.2108153998851776, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1081, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.6883539362394274, | |
| "grad_norm": 0.19952858984470367, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0249, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.6890045543266102, | |
| "grad_norm": 0.20590882003307343, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1949, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 0.2126530408859253, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2726, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.690305790500976, | |
| "grad_norm": 0.30162468552589417, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5032, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.6909564085881588, | |
| "grad_norm": 0.24452462792396545, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3021, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.6916070266753416, | |
| "grad_norm": 0.17819760739803314, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9628, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.6922576447625244, | |
| "grad_norm": 0.17437471449375153, | |
| "learning_rate": 0.0001, | |
| "loss": 1.879, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.6929082628497072, | |
| "grad_norm": 0.3003963232040405, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4695, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.69355888093689, | |
| "grad_norm": 0.2007562667131424, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9754, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.6942094990240729, | |
| "grad_norm": 0.21425336599349976, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1767, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.6948601171112557, | |
| "grad_norm": 0.20287302136421204, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9933, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.6955107351984385, | |
| "grad_norm": 0.2762700021266937, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1079, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.6961613532856213, | |
| "grad_norm": 0.18358288705348969, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9445, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6968119713728042, | |
| "grad_norm": 0.21157526969909668, | |
| "learning_rate": 0.0001, | |
| "loss": 2.169, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.697462589459987, | |
| "grad_norm": 0.1847715675830841, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0757, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.6981132075471698, | |
| "grad_norm": 0.1923181712627411, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2365, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.6987638256343527, | |
| "grad_norm": 0.26491835713386536, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4613, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.6994144437215355, | |
| "grad_norm": 0.17674419283866882, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9706, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.7000650618087183, | |
| "grad_norm": 0.19894379377365112, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9227, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.7007156798959011, | |
| "grad_norm": 0.19496971368789673, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1783, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.7013662979830839, | |
| "grad_norm": 0.20685461163520813, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1542, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.7020169160702667, | |
| "grad_norm": 0.23061524331569672, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3346, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.7026675341574495, | |
| "grad_norm": 0.2044321447610855, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0157, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7033181522446325, | |
| "grad_norm": 0.18851466476917267, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2045, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.7039687703318153, | |
| "grad_norm": 0.18530018627643585, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0695, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.7046193884189981, | |
| "grad_norm": 0.23562023043632507, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3919, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.7052700065061809, | |
| "grad_norm": 0.22246116399765015, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5821, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.7059206245933637, | |
| "grad_norm": 0.2134729027748108, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2181, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.7065712426805465, | |
| "grad_norm": 0.29674917459487915, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5069, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.7072218607677293, | |
| "grad_norm": 0.2098974883556366, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3307, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.7078724788549121, | |
| "grad_norm": 0.27041876316070557, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8081, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.708523096942095, | |
| "grad_norm": 0.19734299182891846, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0588, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.7091737150292778, | |
| "grad_norm": 0.22952257096767426, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2607, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7098243331164606, | |
| "grad_norm": 0.20846691727638245, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1657, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.7104749512036435, | |
| "grad_norm": 0.19664259254932404, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1256, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.7111255692908263, | |
| "grad_norm": 0.23994791507720947, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5377, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.7117761873780091, | |
| "grad_norm": 0.22439789772033691, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6225, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.7124268054651919, | |
| "grad_norm": 0.20211316645145416, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0582, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.7130774235523748, | |
| "grad_norm": 0.23308198153972626, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4341, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.7137280416395576, | |
| "grad_norm": 0.17806245386600494, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0211, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.7143786597267404, | |
| "grad_norm": 0.20525243878364563, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1248, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.7150292778139232, | |
| "grad_norm": 0.22835716605186462, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2993, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.715679895901106, | |
| "grad_norm": 0.37078213691711426, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1289, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7163305139882888, | |
| "grad_norm": 0.22253082692623138, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2304, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.7169811320754716, | |
| "grad_norm": 0.20494401454925537, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9473, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.7176317501626546, | |
| "grad_norm": 0.22128112614154816, | |
| "learning_rate": 0.0001, | |
| "loss": 1.993, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.7182823682498374, | |
| "grad_norm": 0.20786182582378387, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0048, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.7189329863370202, | |
| "grad_norm": 0.27697819471359253, | |
| "learning_rate": 0.0001, | |
| "loss": 2.372, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.719583604424203, | |
| "grad_norm": 0.26237788796424866, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9573, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.7202342225113858, | |
| "grad_norm": 0.2544906437397003, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2805, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.7208848405985686, | |
| "grad_norm": 0.2175043374300003, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3201, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.7215354586857514, | |
| "grad_norm": 0.19637277722358704, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8868, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.7221860767729343, | |
| "grad_norm": 0.19888024032115936, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0324, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7228366948601171, | |
| "grad_norm": 0.20008981227874756, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2898, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.7234873129473, | |
| "grad_norm": 0.25185343623161316, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2424, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.7241379310344828, | |
| "grad_norm": 0.2434062957763672, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2884, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.7247885491216656, | |
| "grad_norm": 0.2278825044631958, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1751, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.7254391672088484, | |
| "grad_norm": 0.23180316388607025, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6033, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.7260897852960312, | |
| "grad_norm": 0.18574117124080658, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3172, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.7267404033832141, | |
| "grad_norm": 0.286155641078949, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0482, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.7273910214703969, | |
| "grad_norm": 0.1757357120513916, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8881, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.7280416395575797, | |
| "grad_norm": 0.25008201599121094, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3797, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.7286922576447625, | |
| "grad_norm": 0.29816892743110657, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9163, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7293428757319453, | |
| "grad_norm": 0.1951293647289276, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0613, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.7299934938191281, | |
| "grad_norm": 0.23593062162399292, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2103, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.730644111906311, | |
| "grad_norm": 0.18619036674499512, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9223, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.7312947299934938, | |
| "grad_norm": 0.20853224396705627, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2651, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.7319453480806767, | |
| "grad_norm": 0.27427271008491516, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3866, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7325959661678595, | |
| "grad_norm": 0.35531318187713623, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8333, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.7332465842550423, | |
| "grad_norm": 0.21375155448913574, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0703, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.7338972023422251, | |
| "grad_norm": 0.24240247905254364, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3032, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.7345478204294079, | |
| "grad_norm": 0.2277136594057083, | |
| "learning_rate": 0.0001, | |
| "loss": 2.585, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.7351984385165907, | |
| "grad_norm": 0.20665140450000763, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1351, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7358490566037735, | |
| "grad_norm": 0.2534540891647339, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5023, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.7364996746909565, | |
| "grad_norm": 0.19695554673671722, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9286, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.7371502927781393, | |
| "grad_norm": 0.18500645458698273, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0609, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.7378009108653221, | |
| "grad_norm": 0.2103162556886673, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2247, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.7384515289525049, | |
| "grad_norm": 0.20303300023078918, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1164, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.7391021470396877, | |
| "grad_norm": 0.23574739694595337, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6325, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.7397527651268705, | |
| "grad_norm": 0.2764929234981537, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3049, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.7404033832140533, | |
| "grad_norm": 0.23995018005371094, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3196, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.7410540013012362, | |
| "grad_norm": 0.19074063003063202, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1566, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.741704619388419, | |
| "grad_norm": 0.18186306953430176, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9629, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7423552374756018, | |
| "grad_norm": 0.23841345310211182, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1942, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.7430058555627846, | |
| "grad_norm": 0.19697019457817078, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0186, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.7436564736499675, | |
| "grad_norm": 0.2117876410484314, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4395, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.7443070917371503, | |
| "grad_norm": 0.26921918988227844, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4332, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.7449577098243331, | |
| "grad_norm": 0.18999671936035156, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0209, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.745608327911516, | |
| "grad_norm": 0.22686484456062317, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4369, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.7462589459986988, | |
| "grad_norm": 0.22974656522274017, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3737, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.7469095640858816, | |
| "grad_norm": 0.19007977843284607, | |
| "learning_rate": 0.0001, | |
| "loss": 2.145, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.7475601821730644, | |
| "grad_norm": 0.23000845313072205, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0555, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.7482108002602472, | |
| "grad_norm": 0.33339783549308777, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7318, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.74886141834743, | |
| "grad_norm": 0.18458595871925354, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7868, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.7495120364346128, | |
| "grad_norm": 0.2283509373664856, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2609, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.7501626545217958, | |
| "grad_norm": 0.31175729632377625, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5524, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.7508132726089786, | |
| "grad_norm": 0.18617112934589386, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2029, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.7514638906961614, | |
| "grad_norm": 0.28690317273139954, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4705, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.7521145087833442, | |
| "grad_norm": 0.2267671674489975, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1093, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.752765126870527, | |
| "grad_norm": 0.21956512331962585, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0962, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.7534157449577098, | |
| "grad_norm": 0.2681393027305603, | |
| "learning_rate": 0.0001, | |
| "loss": 2.35, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.7540663630448926, | |
| "grad_norm": 0.23306699097156525, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4911, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 0.3148876428604126, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8802, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7553675992192583, | |
| "grad_norm": 0.2260347157716751, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9286, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.7560182173064411, | |
| "grad_norm": 0.24939195811748505, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3544, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.756668835393624, | |
| "grad_norm": 0.21007601916790009, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0132, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.7573194534808068, | |
| "grad_norm": 0.2570975720882416, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9665, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.7579700715679896, | |
| "grad_norm": 0.2818357050418854, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2252, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.7586206896551724, | |
| "grad_norm": 0.22388941049575806, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4553, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.7592713077423552, | |
| "grad_norm": 0.22799374163150787, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4447, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.7599219258295381, | |
| "grad_norm": 0.2610357105731964, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4024, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.7605725439167209, | |
| "grad_norm": 0.39793217182159424, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1529, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.7612231620039037, | |
| "grad_norm": 0.19805116951465607, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9483, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7618737800910865, | |
| "grad_norm": 0.208368182182312, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1785, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.7625243981782693, | |
| "grad_norm": 0.25101637840270996, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2517, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.7631750162654521, | |
| "grad_norm": 0.27432793378829956, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4759, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.763825634352635, | |
| "grad_norm": 0.18746371567249298, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0188, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.7644762524398179, | |
| "grad_norm": 0.2882263958454132, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2948, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.7651268705270007, | |
| "grad_norm": 0.22075092792510986, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4894, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.7657774886141835, | |
| "grad_norm": 0.20792776346206665, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8502, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.7664281067013663, | |
| "grad_norm": 0.2436477392911911, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1296, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.7670787247885491, | |
| "grad_norm": 0.2839182913303375, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8409, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.7677293428757319, | |
| "grad_norm": 0.1826743334531784, | |
| "learning_rate": 0.0001, | |
| "loss": 1.941, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.7683799609629147, | |
| "grad_norm": 0.2757255434989929, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7297, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.7690305790500976, | |
| "grad_norm": 0.23313826322555542, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8796, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.7696811971372804, | |
| "grad_norm": 0.28900882601737976, | |
| "learning_rate": 0.0001, | |
| "loss": 2.313, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.7703318152244633, | |
| "grad_norm": 0.32883039116859436, | |
| "learning_rate": 0.0001, | |
| "loss": 3.041, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.7709824333116461, | |
| "grad_norm": 0.2116912454366684, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9891, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.7716330513988289, | |
| "grad_norm": 0.2055017203092575, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9567, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.7722836694860117, | |
| "grad_norm": 0.2978801131248474, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3322, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.7729342875731945, | |
| "grad_norm": 0.21910034120082855, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0262, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.7735849056603774, | |
| "grad_norm": 0.19952894747257233, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0621, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.7742355237475602, | |
| "grad_norm": 0.20744554698467255, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1154, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.774886141834743, | |
| "grad_norm": 0.23886847496032715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3023, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.7755367599219258, | |
| "grad_norm": 0.20722374320030212, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2384, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.7761873780091086, | |
| "grad_norm": 0.23317816853523254, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6381, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.7768379960962914, | |
| "grad_norm": 0.2527480125427246, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1711, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.7774886141834743, | |
| "grad_norm": 0.23817451298236847, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6561, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.7781392322706572, | |
| "grad_norm": 0.2609005570411682, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5488, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.77878985035784, | |
| "grad_norm": 0.19870908558368683, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0435, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.7794404684450228, | |
| "grad_norm": 0.20385386049747467, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9711, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.7800910865322056, | |
| "grad_norm": 0.20179738104343414, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0247, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.7807417046193884, | |
| "grad_norm": 0.40090981125831604, | |
| "learning_rate": 0.0001, | |
| "loss": 2.795, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7813923227065712, | |
| "grad_norm": 0.1885748654603958, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1588, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.782042940793754, | |
| "grad_norm": 0.21952667832374573, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0901, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.7826935588809368, | |
| "grad_norm": 0.2344968616962433, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9943, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.7833441769681198, | |
| "grad_norm": 0.3153589069843292, | |
| "learning_rate": 0.0001, | |
| "loss": 2.59, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.7839947950553026, | |
| "grad_norm": 0.1870599389076233, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9435, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.7846454131424854, | |
| "grad_norm": 0.189214825630188, | |
| "learning_rate": 0.0001, | |
| "loss": 2.128, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.7852960312296682, | |
| "grad_norm": 0.22551633417606354, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3913, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.785946649316851, | |
| "grad_norm": 0.19963033497333527, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1456, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.7865972674040338, | |
| "grad_norm": 0.2087828814983368, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3486, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.7872478854912166, | |
| "grad_norm": 0.19814416766166687, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0208, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7878985035783995, | |
| "grad_norm": 0.20670342445373535, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1276, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.7885491216655823, | |
| "grad_norm": 0.1881658136844635, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0502, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.7891997397527651, | |
| "grad_norm": 0.2015887349843979, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2935, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.789850357839948, | |
| "grad_norm": 0.23532694578170776, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8046, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.7905009759271308, | |
| "grad_norm": 0.18583200871944427, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7999, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.7911515940143136, | |
| "grad_norm": 0.23056970536708832, | |
| "learning_rate": 0.0001, | |
| "loss": 2.126, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.7918022121014964, | |
| "grad_norm": 0.3166569471359253, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0332, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.7924528301886793, | |
| "grad_norm": 0.273381769657135, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2258, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.7931034482758621, | |
| "grad_norm": 0.3166522979736328, | |
| "learning_rate": 0.0001, | |
| "loss": 2.35, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.7937540663630449, | |
| "grad_norm": 0.1906355321407318, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9739, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7944046844502277, | |
| "grad_norm": 0.2339126616716385, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3575, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.7950553025374105, | |
| "grad_norm": 0.2760171592235565, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4708, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.7957059206245933, | |
| "grad_norm": 0.17487159371376038, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7924, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.7963565387117761, | |
| "grad_norm": 0.19386877119541168, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3044, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.7970071567989591, | |
| "grad_norm": 0.18056143820285797, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9543, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.7976577748861419, | |
| "grad_norm": 0.3085278868675232, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2131, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.7983083929733247, | |
| "grad_norm": 0.1960904896259308, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0918, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.7989590110605075, | |
| "grad_norm": 0.19437837600708008, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2241, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.7996096291476903, | |
| "grad_norm": 0.2129238396883011, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1891, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.8002602472348731, | |
| "grad_norm": 0.20101650059223175, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1341, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.8009108653220559, | |
| "grad_norm": 0.20897014439105988, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0937, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.8015614834092388, | |
| "grad_norm": 0.2693694829940796, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7406, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.8022121014964216, | |
| "grad_norm": 0.2322738617658615, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8483, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.8028627195836044, | |
| "grad_norm": 0.21177823841571808, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2315, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.8035133376707873, | |
| "grad_norm": 0.2920454442501068, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0264, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.8041639557579701, | |
| "grad_norm": 0.2331319898366928, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4574, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.8048145738451529, | |
| "grad_norm": 0.2339990735054016, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2752, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.8054651919323357, | |
| "grad_norm": 0.22823981940746307, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9615, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.8061158100195186, | |
| "grad_norm": 0.20435038208961487, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9989, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.8067664281067014, | |
| "grad_norm": 0.32488611340522766, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4791, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8074170461938842, | |
| "grad_norm": 0.27227675914764404, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6443, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.808067664281067, | |
| "grad_norm": 0.20864960551261902, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2324, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.8087182823682498, | |
| "grad_norm": 0.22645455598831177, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0199, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.8093689004554326, | |
| "grad_norm": 0.22091244161128998, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1145, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.8100195185426154, | |
| "grad_norm": 0.20442111790180206, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1277, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.8106701366297983, | |
| "grad_norm": 0.19400720298290253, | |
| "learning_rate": 0.0001, | |
| "loss": 1.951, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.8113207547169812, | |
| "grad_norm": 0.474490225315094, | |
| "learning_rate": 0.0001, | |
| "loss": 3.0206, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.811971372804164, | |
| "grad_norm": 0.23634073138237, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2556, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.8126219908913468, | |
| "grad_norm": 0.23998601734638214, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3201, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.8132726089785296, | |
| "grad_norm": 0.19258932769298553, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9719, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8139232270657124, | |
| "grad_norm": 0.21039240062236786, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3617, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.8145738451528952, | |
| "grad_norm": 0.37176814675331116, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7183, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.815224463240078, | |
| "grad_norm": 0.24739331007003784, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0098, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.8158750813272609, | |
| "grad_norm": 0.32313254475593567, | |
| "learning_rate": 0.0001, | |
| "loss": 2.062, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.8165256994144438, | |
| "grad_norm": 0.2571156322956085, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3973, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.8171763175016266, | |
| "grad_norm": 0.266369491815567, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6019, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.8178269355888094, | |
| "grad_norm": 0.3770993649959564, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5413, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.8184775536759922, | |
| "grad_norm": 0.24964609742164612, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7407, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.819128171763175, | |
| "grad_norm": 0.208835169672966, | |
| "learning_rate": 0.0001, | |
| "loss": 2.36, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.8197787898503578, | |
| "grad_norm": 0.19789732992649078, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0967, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8204294079375407, | |
| "grad_norm": 0.4847930669784546, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9673, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.8210800260247235, | |
| "grad_norm": 0.277960866689682, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2165, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.8217306441119063, | |
| "grad_norm": 0.20278669893741608, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5098, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.8223812621990891, | |
| "grad_norm": 0.3295345604419708, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4451, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.8230318802862719, | |
| "grad_norm": 0.25482621788978577, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3178, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.8236824983734548, | |
| "grad_norm": 0.21955101191997528, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3245, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.8243331164606376, | |
| "grad_norm": 0.19811898469924927, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1608, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.8249837345478205, | |
| "grad_norm": 0.20357833802700043, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0502, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.8256343526350033, | |
| "grad_norm": 0.25111669301986694, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9059, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.8262849707221861, | |
| "grad_norm": 0.20970256626605988, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3496, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8269355888093689, | |
| "grad_norm": 0.19146494567394257, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0773, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 0.2083313763141632, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0031, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.8282368249837345, | |
| "grad_norm": 0.19460196793079376, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0411, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.8288874430709173, | |
| "grad_norm": 0.1900896281003952, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9517, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.8295380611581002, | |
| "grad_norm": 0.20020513236522675, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2062, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8301886792452831, | |
| "grad_norm": 0.21990856528282166, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0837, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.8308392973324659, | |
| "grad_norm": 0.1966349482536316, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0407, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.8314899154196487, | |
| "grad_norm": 0.19897864758968353, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1639, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.8321405335068315, | |
| "grad_norm": 0.21094024181365967, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2158, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.8327911515940143, | |
| "grad_norm": 0.1989631950855255, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9578, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8334417696811971, | |
| "grad_norm": 0.1953240931034088, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0365, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.8340923877683799, | |
| "grad_norm": 0.33914485573768616, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3676, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.8347430058555628, | |
| "grad_norm": 0.17135807871818542, | |
| "learning_rate": 0.0001, | |
| "loss": 1.821, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.8353936239427456, | |
| "grad_norm": 0.1993912309408188, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4103, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.8360442420299284, | |
| "grad_norm": 0.21222157776355743, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3443, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.8366948601171112, | |
| "grad_norm": 0.22162573039531708, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1757, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.8373454782042941, | |
| "grad_norm": 0.22677986323833466, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0542, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.8379960962914769, | |
| "grad_norm": 0.1974060982465744, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1686, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.8386467143786597, | |
| "grad_norm": 0.30552592873573303, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5467, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.8392973324658426, | |
| "grad_norm": 0.24357165396213531, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3276, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8399479505530254, | |
| "grad_norm": 0.1960456818342209, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0956, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.8405985686402082, | |
| "grad_norm": 0.24264569580554962, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0666, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.841249186727391, | |
| "grad_norm": 0.25320202112197876, | |
| "learning_rate": 0.0001, | |
| "loss": 2.033, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.8418998048145738, | |
| "grad_norm": 0.2313191145658493, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0571, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.8425504229017566, | |
| "grad_norm": 0.42846229672431946, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0875, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.8432010409889394, | |
| "grad_norm": 0.19277000427246094, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9303, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.8438516590761224, | |
| "grad_norm": 0.1947111338376999, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9482, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.8445022771633052, | |
| "grad_norm": 0.30196627974510193, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3238, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.845152895250488, | |
| "grad_norm": 0.21137486398220062, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1962, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.8458035133376708, | |
| "grad_norm": 0.2568284571170807, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3231, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8464541314248536, | |
| "grad_norm": 0.2092464715242386, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8074, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.8471047495120364, | |
| "grad_norm": 0.2112191617488861, | |
| "learning_rate": 0.0001, | |
| "loss": 2.169, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.8477553675992192, | |
| "grad_norm": 0.17425194382667542, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8025, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.8484059856864021, | |
| "grad_norm": 0.20808906853199005, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0869, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.8490566037735849, | |
| "grad_norm": 0.25200703740119934, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4963, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.8497072218607677, | |
| "grad_norm": 0.23948469758033752, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3028, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.8503578399479506, | |
| "grad_norm": 0.185250923037529, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7409, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.8510084580351334, | |
| "grad_norm": 0.18948182463645935, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8922, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.8516590761223162, | |
| "grad_norm": 0.2027200311422348, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0922, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.852309694209499, | |
| "grad_norm": 0.28325602412223816, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0428, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8529603122966819, | |
| "grad_norm": 0.1829916387796402, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9518, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.8536109303838647, | |
| "grad_norm": 0.1982378512620926, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0209, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.8542615484710475, | |
| "grad_norm": 0.18915079534053802, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9291, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.8549121665582303, | |
| "grad_norm": 0.1832190752029419, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9818, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.8555627846454131, | |
| "grad_norm": 0.2646237313747406, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4418, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.8562134027325959, | |
| "grad_norm": 0.2831929326057434, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8355, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.8568640208197787, | |
| "grad_norm": 0.2711881995201111, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1963, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.8575146389069617, | |
| "grad_norm": 0.25786513090133667, | |
| "learning_rate": 0.0001, | |
| "loss": 3.002, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.8581652569941445, | |
| "grad_norm": 0.26838061213493347, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1155, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.8588158750813273, | |
| "grad_norm": 0.2220889776945114, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0535, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8594664931685101, | |
| "grad_norm": 0.2008647471666336, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0515, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.8601171112556929, | |
| "grad_norm": 0.22017711400985718, | |
| "learning_rate": 0.0001, | |
| "loss": 2.289, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.8607677293428757, | |
| "grad_norm": 0.19674621522426605, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9414, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.8614183474300585, | |
| "grad_norm": 0.191552072763443, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9939, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.8620689655172413, | |
| "grad_norm": 0.20212143659591675, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8938, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.8627195836044242, | |
| "grad_norm": 0.22502020001411438, | |
| "learning_rate": 0.0001, | |
| "loss": 2.13, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.863370201691607, | |
| "grad_norm": 0.2504305839538574, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7666, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.8640208197787899, | |
| "grad_norm": 0.19481819868087769, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1141, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.8646714378659727, | |
| "grad_norm": 0.21994583308696747, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7615, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.8653220559531555, | |
| "grad_norm": 0.19281654059886932, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0864, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.8659726740403383, | |
| "grad_norm": 0.20329228043556213, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1002, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.8666232921275211, | |
| "grad_norm": 0.19484490156173706, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0519, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.867273910214704, | |
| "grad_norm": 0.1867295801639557, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9208, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.8679245283018868, | |
| "grad_norm": 0.30128392577171326, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7527, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.8685751463890696, | |
| "grad_norm": 0.22880543768405914, | |
| "learning_rate": 0.0001, | |
| "loss": 2.449, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.8692257644762524, | |
| "grad_norm": 0.23333753645420074, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0425, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.8698763825634352, | |
| "grad_norm": 0.34176793694496155, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8857, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.870527000650618, | |
| "grad_norm": 0.19983690977096558, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2466, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.8711776187378009, | |
| "grad_norm": 0.21883231401443481, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1262, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.8718282368249838, | |
| "grad_norm": 0.19143971800804138, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0119, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8724788549121666, | |
| "grad_norm": 0.25845617055892944, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6315, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.8731294729993494, | |
| "grad_norm": 0.1914021521806717, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8571, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.8737800910865322, | |
| "grad_norm": 0.2742185592651367, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1467, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.874430709173715, | |
| "grad_norm": 0.19927754998207092, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9877, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.8750813272608978, | |
| "grad_norm": 0.2340778261423111, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2476, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.8757319453480806, | |
| "grad_norm": 0.2931828498840332, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4643, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.8763825634352636, | |
| "grad_norm": 0.18637506663799286, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7933, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.8770331815224464, | |
| "grad_norm": 0.1898747682571411, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9781, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.8776837996096292, | |
| "grad_norm": 0.229608952999115, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1293, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.878334417696812, | |
| "grad_norm": 0.31374409794807434, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5436, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8789850357839948, | |
| "grad_norm": 0.22544679045677185, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0882, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.8796356538711776, | |
| "grad_norm": 0.2415180653333664, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3193, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.8802862719583604, | |
| "grad_norm": 0.28355568647384644, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5994, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.8809368900455433, | |
| "grad_norm": 0.19143925607204437, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0546, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.8815875081327261, | |
| "grad_norm": 0.2990890443325043, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7388, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.8822381262199089, | |
| "grad_norm": 0.28672561049461365, | |
| "learning_rate": 0.0001, | |
| "loss": 1.915, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.8828887443070917, | |
| "grad_norm": 0.20137082040309906, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5376, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.8835393623942746, | |
| "grad_norm": 0.2175220251083374, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9055, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.8841899804814574, | |
| "grad_norm": 0.2790168523788452, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0223, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.8848405985686402, | |
| "grad_norm": 0.22070975601673126, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4071, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.885491216655823, | |
| "grad_norm": 0.22505122423171997, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2988, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.8861418347430059, | |
| "grad_norm": 0.2231319099664688, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0156, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.8867924528301887, | |
| "grad_norm": 0.2921566665172577, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7166, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.8874430709173715, | |
| "grad_norm": 0.19267822802066803, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0485, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.8880936890045543, | |
| "grad_norm": 0.28789597749710083, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7656, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.8887443070917371, | |
| "grad_norm": 0.3205803334712982, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5545, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.8893949251789199, | |
| "grad_norm": 0.20888707041740417, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8906, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.8900455432661027, | |
| "grad_norm": 0.18200016021728516, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8483, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.8906961613532857, | |
| "grad_norm": 0.2367328256368637, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3351, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.8913467794404685, | |
| "grad_norm": 0.28111082315444946, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5511, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.8919973975276513, | |
| "grad_norm": 0.19744041562080383, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9521, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.8926480156148341, | |
| "grad_norm": 0.2166965901851654, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1205, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.8932986337020169, | |
| "grad_norm": 0.20931009948253632, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1394, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.8939492517891997, | |
| "grad_norm": 0.2102230191230774, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9695, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.8945998698763825, | |
| "grad_norm": 0.22161559760570526, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4084, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.8952504879635654, | |
| "grad_norm": 0.22104842960834503, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6029, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.8959011060507482, | |
| "grad_norm": 0.2125016152858734, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0576, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.896551724137931, | |
| "grad_norm": 0.2626838684082031, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5907, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.8972023422251139, | |
| "grad_norm": 0.19114330410957336, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2824, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.8978529603122967, | |
| "grad_norm": 0.24731865525245667, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5292, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.8985035783994795, | |
| "grad_norm": 0.23787495493888855, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1433, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.8991541964866623, | |
| "grad_norm": 0.2028874158859253, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2726, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.8998048145738452, | |
| "grad_norm": 0.22940067946910858, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3222, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.900455432661028, | |
| "grad_norm": 0.20267997682094574, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2875, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.9011060507482108, | |
| "grad_norm": 0.21694517135620117, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3674, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.9017566688353936, | |
| "grad_norm": 0.1904231160879135, | |
| "learning_rate": 0.0001, | |
| "loss": 1.996, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.9024072869225764, | |
| "grad_norm": 0.2630701959133148, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4881, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.9030579050097592, | |
| "grad_norm": 0.19993318617343903, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9409, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.903708523096942, | |
| "grad_norm": 0.19389230012893677, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1121, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.904359141184125, | |
| "grad_norm": 0.20352298021316528, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9887, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.9050097592713078, | |
| "grad_norm": 0.17967310547828674, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8068, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.9056603773584906, | |
| "grad_norm": 0.2310938984155655, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2666, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.9063109954456734, | |
| "grad_norm": 0.18979041278362274, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0004, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.9069616135328562, | |
| "grad_norm": 0.26813068985939026, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4142, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.907612231620039, | |
| "grad_norm": 0.23549699783325195, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3059, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.9082628497072218, | |
| "grad_norm": 0.2435377985239029, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1919, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.9089134677944047, | |
| "grad_norm": 0.21723680198192596, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2244, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.9095640858815875, | |
| "grad_norm": 0.20665475726127625, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1907, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.9102147039687704, | |
| "grad_norm": 0.26172783970832825, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5632, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.9108653220559532, | |
| "grad_norm": 0.22065763175487518, | |
| "learning_rate": 0.0001, | |
| "loss": 2.287, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.911515940143136, | |
| "grad_norm": 0.260623574256897, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7247, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.9121665582303188, | |
| "grad_norm": 0.1967797726392746, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3431, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.9128171763175016, | |
| "grad_norm": 0.19779254496097565, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8389, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.9134677944046844, | |
| "grad_norm": 0.20970992743968964, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1884, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.9141184124918673, | |
| "grad_norm": 0.22229008376598358, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2673, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.9147690305790501, | |
| "grad_norm": 0.2208055853843689, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1967, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.9154196486662329, | |
| "grad_norm": 0.2209876924753189, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2027, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.9160702667534157, | |
| "grad_norm": 0.19158391654491425, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9069, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.9167208848405985, | |
| "grad_norm": 0.2156110256910324, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2712, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.9173715029277814, | |
| "grad_norm": 0.2610962390899658, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8294, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9180221210149642, | |
| "grad_norm": 0.18197974562644958, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9715, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.9186727391021471, | |
| "grad_norm": 0.19082801043987274, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1091, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.9193233571893299, | |
| "grad_norm": 0.26832160353660583, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7021, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.9199739752765127, | |
| "grad_norm": 0.3070698082447052, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4547, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.9206245933636955, | |
| "grad_norm": 0.25139206647872925, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5873, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.9212752114508783, | |
| "grad_norm": 0.2131306529045105, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3841, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.9219258295380611, | |
| "grad_norm": 0.24531540274620056, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9666, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.9225764476252439, | |
| "grad_norm": 0.1986437737941742, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9241, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.9232270657124269, | |
| "grad_norm": 0.23614904284477234, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5824, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.9238776837996097, | |
| "grad_norm": 0.2782133221626282, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1812, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9245283018867925, | |
| "grad_norm": 0.2232246845960617, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3204, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.9251789199739753, | |
| "grad_norm": 0.22002846002578735, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8228, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.9258295380611581, | |
| "grad_norm": 0.30900144577026367, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4824, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.9264801561483409, | |
| "grad_norm": 0.262989342212677, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8719, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.9271307742355237, | |
| "grad_norm": 0.5406531095504761, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6984, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.9277813923227066, | |
| "grad_norm": 0.2415890246629715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2543, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.9284320104098894, | |
| "grad_norm": 0.21261392533779144, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9761, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.9290826284970722, | |
| "grad_norm": 0.23223569989204407, | |
| "learning_rate": 0.0001, | |
| "loss": 1.821, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.929733246584255, | |
| "grad_norm": 0.2846924960613251, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9886, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 0.9303838646714379, | |
| "grad_norm": 0.2527627646923065, | |
| "learning_rate": 0.0001, | |
| "loss": 2.373, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9310344827586207, | |
| "grad_norm": 0.19917793571949005, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0111, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.9316851008458035, | |
| "grad_norm": 0.19021449983119965, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0373, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.9323357189329864, | |
| "grad_norm": 0.24929922819137573, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3885, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.9329863370201692, | |
| "grad_norm": 0.2533571124076843, | |
| "learning_rate": 0.0001, | |
| "loss": 2.544, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.933636955107352, | |
| "grad_norm": 0.23931783437728882, | |
| "learning_rate": 0.0001, | |
| "loss": 2.418, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.9342875731945348, | |
| "grad_norm": 0.30167070031166077, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6513, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.9349381912817176, | |
| "grad_norm": 0.1971869319677353, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4016, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.9355888093689004, | |
| "grad_norm": 0.21331265568733215, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1524, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.9362394274560832, | |
| "grad_norm": 0.26298433542251587, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9442, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.936890045543266, | |
| "grad_norm": 0.245792955160141, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2055, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.937540663630449, | |
| "grad_norm": 0.23703397810459137, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5616, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 0.9381912817176318, | |
| "grad_norm": 0.18641355633735657, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8982, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.9388418998048146, | |
| "grad_norm": 0.3551875650882721, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7802, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 0.9394925178919974, | |
| "grad_norm": 0.2278834879398346, | |
| "learning_rate": 0.0001, | |
| "loss": 2.175, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.9401431359791802, | |
| "grad_norm": 0.26398956775665283, | |
| "learning_rate": 0.0001, | |
| "loss": 2.643, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.940793754066363, | |
| "grad_norm": 0.31316065788269043, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5662, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.9414443721535458, | |
| "grad_norm": 0.22769761085510254, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8677, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 0.9420949902407287, | |
| "grad_norm": 0.2069929838180542, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4393, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.9427456083279115, | |
| "grad_norm": 0.23500226438045502, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0914, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 0.2312425971031189, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1085, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9440468445022772, | |
| "grad_norm": 0.20859290659427643, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0653, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 0.94469746258946, | |
| "grad_norm": 0.23336270451545715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1047, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.9453480806766428, | |
| "grad_norm": 0.2613270580768585, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3179, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 0.9459986987638256, | |
| "grad_norm": 0.2182740867137909, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0625, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.9466493168510085, | |
| "grad_norm": 0.28436079621315, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8766, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.9472999349381913, | |
| "grad_norm": 0.1998225450515747, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3157, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.9479505530253741, | |
| "grad_norm": 0.19695498049259186, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7501, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.9486011711125569, | |
| "grad_norm": 0.1972542405128479, | |
| "learning_rate": 0.0001, | |
| "loss": 1.956, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.9492517891997397, | |
| "grad_norm": 0.18410329520702362, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8403, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 0.9499024072869225, | |
| "grad_norm": 0.3675645887851715, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9161, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.9505530253741054, | |
| "grad_norm": 0.2620394229888916, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4318, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 0.9512036434612883, | |
| "grad_norm": 0.28973767161369324, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0047, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.9518542615484711, | |
| "grad_norm": 0.31598249077796936, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4517, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.9525048796356539, | |
| "grad_norm": 0.18546514213085175, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8551, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.9531554977228367, | |
| "grad_norm": 0.32123416662216187, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7277, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.9538061158100195, | |
| "grad_norm": 0.25180497765541077, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7946, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.9544567338972023, | |
| "grad_norm": 0.24950966238975525, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0796, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 0.9551073519843851, | |
| "grad_norm": 0.20496372878551483, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0713, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.955757970071568, | |
| "grad_norm": 0.20856817066669464, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1812, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 0.9564085881587508, | |
| "grad_norm": 0.26053234934806824, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3234, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.9570592062459337, | |
| "grad_norm": 0.3086039125919342, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3745, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 0.9577098243331165, | |
| "grad_norm": 0.19647593796253204, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8883, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.9583604424202993, | |
| "grad_norm": 0.20327430963516235, | |
| "learning_rate": 0.0001, | |
| "loss": 2.125, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 0.9590110605074821, | |
| "grad_norm": 0.22550363838672638, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1609, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.9596616785946649, | |
| "grad_norm": 0.2369288206100464, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9352, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.9603122966818478, | |
| "grad_norm": 0.21195881068706512, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0275, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.9609629147690306, | |
| "grad_norm": 0.17060896754264832, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9566, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 0.9616135328562134, | |
| "grad_norm": 0.23335829377174377, | |
| "learning_rate": 0.0001, | |
| "loss": 2.296, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.9622641509433962, | |
| "grad_norm": 0.34170275926589966, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0079, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 0.962914769030579, | |
| "grad_norm": 0.2187998741865158, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0203, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.9635653871177619, | |
| "grad_norm": 0.1877596378326416, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9496, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 0.9642160052049447, | |
| "grad_norm": 0.18515220284461975, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0025, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.9648666232921275, | |
| "grad_norm": 0.21251696348190308, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8843, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 0.9655172413793104, | |
| "grad_norm": 0.19280041754245758, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0726, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.9661678594664932, | |
| "grad_norm": 0.1977832317352295, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0546, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.966818477553676, | |
| "grad_norm": 0.19019471108913422, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9825, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.9674690956408588, | |
| "grad_norm": 0.20381596684455872, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3339, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 0.9681197137280416, | |
| "grad_norm": 0.1899532973766327, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2962, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.9687703318152244, | |
| "grad_norm": 0.20524102449417114, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0874, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 0.9694209499024072, | |
| "grad_norm": 0.179798424243927, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8875, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.9700715679895902, | |
| "grad_norm": 0.19358840584754944, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1539, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 0.970722186076773, | |
| "grad_norm": 0.2686682343482971, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3412, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.9713728041639558, | |
| "grad_norm": 0.2146061509847641, | |
| "learning_rate": 0.0001, | |
| "loss": 2.476, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 0.9720234222511386, | |
| "grad_norm": 0.26737329363822937, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8003, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.9726740403383214, | |
| "grad_norm": 0.23344694077968597, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1174, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.9733246584255042, | |
| "grad_norm": 0.1991250365972519, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5734, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.973975276512687, | |
| "grad_norm": 0.21246576309204102, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5597, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 0.9746258945998699, | |
| "grad_norm": 0.1873084306716919, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9547, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.9752765126870527, | |
| "grad_norm": 0.17600129544734955, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7255, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 0.9759271307742355, | |
| "grad_norm": 0.19860287010669708, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5043, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9765777488614183, | |
| "grad_norm": 0.1887977123260498, | |
| "learning_rate": 0.0001, | |
| "loss": 2.091, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 0.9772283669486012, | |
| "grad_norm": 0.1981416791677475, | |
| "learning_rate": 0.0001, | |
| "loss": 1.968, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.977878985035784, | |
| "grad_norm": 0.22598034143447876, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2569, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 0.9785296031229668, | |
| "grad_norm": 0.18924662470817566, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3823, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.9791802212101497, | |
| "grad_norm": 0.2178531438112259, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0824, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.9798308392973325, | |
| "grad_norm": 0.2125057578086853, | |
| "learning_rate": 0.0001, | |
| "loss": 2.196, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.9804814573845153, | |
| "grad_norm": 0.19958944618701935, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8752, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 0.9811320754716981, | |
| "grad_norm": 0.23179121315479279, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0539, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.9817826935588809, | |
| "grad_norm": 0.19006481766700745, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0125, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 0.9824333116460637, | |
| "grad_norm": 0.1952325403690338, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1829, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.9830839297332465, | |
| "grad_norm": 0.24362123012542725, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4628, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 0.9837345478204295, | |
| "grad_norm": 0.20148973166942596, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9869, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.9843851659076123, | |
| "grad_norm": 0.19783656299114227, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1447, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 0.9850357839947951, | |
| "grad_norm": 0.2120031863451004, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1149, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.9856864020819779, | |
| "grad_norm": 0.2673274278640747, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3755, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.9863370201691607, | |
| "grad_norm": 0.31493106484413147, | |
| "learning_rate": 0.0001, | |
| "loss": 2.8462, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.9869876382563435, | |
| "grad_norm": 0.24251258373260498, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6499, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 0.9876382563435263, | |
| "grad_norm": 0.19818106293678284, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1229, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.9882888744307091, | |
| "grad_norm": 0.2608949542045593, | |
| "learning_rate": 0.0001, | |
| "loss": 2.7848, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 0.988939492517892, | |
| "grad_norm": 0.19214370846748352, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0514, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.9895901106050748, | |
| "grad_norm": 0.21454864740371704, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8879, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 0.9902407286922577, | |
| "grad_norm": 0.22206801176071167, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0008, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.9908913467794405, | |
| "grad_norm": 0.19270485639572144, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9491, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 0.9915419648666233, | |
| "grad_norm": 0.27471333742141724, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4914, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.9921925829538061, | |
| "grad_norm": 0.2767917513847351, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3733, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.9928432010409889, | |
| "grad_norm": 0.222362220287323, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1563, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.9934938191281718, | |
| "grad_norm": 0.2520142197608948, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2877, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 0.9941444372153546, | |
| "grad_norm": 0.20014792680740356, | |
| "learning_rate": 0.0001, | |
| "loss": 2.087, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.9947950553025374, | |
| "grad_norm": 0.18027350306510925, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9049, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 0.9954456733897202, | |
| "grad_norm": 0.20437590777873993, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9805, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.996096291476903, | |
| "grad_norm": 0.38628190755844116, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5385, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 0.9967469095640858, | |
| "grad_norm": 0.24987295269966125, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0762, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.9973975276512687, | |
| "grad_norm": 0.2631097733974457, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1693, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 0.9980481457384516, | |
| "grad_norm": 0.21323037147521973, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8547, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.9986987638256344, | |
| "grad_norm": 0.19627395272254944, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9524, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.9993493819128172, | |
| "grad_norm": 0.23723964393138885, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2301, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.2651236355304718, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3068, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1537, | |
| "total_flos": 2.3185853705323807e+18, | |
| "train_loss": 2.2698031654587485, | |
| "train_runtime": 18789.3101, | |
| "train_samples_per_second": 0.327, | |
| "train_steps_per_second": 0.082 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1537, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3185853705323807e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |