| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 296, |
| "global_step": 2954, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003386171721233413, |
| "grad_norm": 29.75, |
| "learning_rate": 0.0, |
| "loss": 0.5027, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0006772343442466826, |
| "grad_norm": 35.25, |
| "learning_rate": 1.3513513513513515e-07, |
| "loss": 0.5494, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001015851516370024, |
| "grad_norm": 28.5, |
| "learning_rate": 2.702702702702703e-07, |
| "loss": 0.4482, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0013544686884933651, |
| "grad_norm": 33.5, |
| "learning_rate": 4.0540540540540546e-07, |
| "loss": 0.5661, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0016930858606167066, |
| "grad_norm": 30.25, |
| "learning_rate": 5.405405405405406e-07, |
| "loss": 0.4796, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002031703032740048, |
| "grad_norm": 34.25, |
| "learning_rate": 6.756756756756758e-07, |
| "loss": 0.515, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0023703202048633893, |
| "grad_norm": 31.875, |
| "learning_rate": 8.108108108108109e-07, |
| "loss": 0.4466, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0027089373769867303, |
| "grad_norm": 30.0, |
| "learning_rate": 9.459459459459461e-07, |
| "loss": 0.4354, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0030475545491100717, |
| "grad_norm": 28.0, |
| "learning_rate": 1.0810810810810812e-06, |
| "loss": 0.4231, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.003386171721233413, |
| "grad_norm": 19.375, |
| "learning_rate": 1.2162162162162164e-06, |
| "loss": 0.3424, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003724788893356754, |
| "grad_norm": 21.875, |
| "learning_rate": 1.3513513513513515e-06, |
| "loss": 0.37, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004063406065480096, |
| "grad_norm": 19.0, |
| "learning_rate": 1.4864864864864868e-06, |
| "loss": 0.3439, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004402023237603437, |
| "grad_norm": 15.0625, |
| "learning_rate": 1.6216216216216219e-06, |
| "loss": 0.2812, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0047406404097267785, |
| "grad_norm": 16.0, |
| "learning_rate": 1.756756756756757e-06, |
| "loss": 0.335, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0050792575818501195, |
| "grad_norm": 12.5, |
| "learning_rate": 1.8918918918918922e-06, |
| "loss": 0.2898, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0054178747539734605, |
| "grad_norm": 11.6875, |
| "learning_rate": 2.0270270270270273e-06, |
| "loss": 0.2427, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.005756491926096802, |
| "grad_norm": 9.625, |
| "learning_rate": 2.1621621621621623e-06, |
| "loss": 0.2346, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.006095109098220143, |
| "grad_norm": 8.0625, |
| "learning_rate": 2.297297297297298e-06, |
| "loss": 0.2305, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.006433726270343484, |
| "grad_norm": 6.46875, |
| "learning_rate": 2.432432432432433e-06, |
| "loss": 0.1882, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.006772343442466826, |
| "grad_norm": 5.125, |
| "learning_rate": 2.5675675675675675e-06, |
| "loss": 0.1815, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007110960614590167, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.702702702702703e-06, |
| "loss": 0.1453, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.007449577786713508, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.837837837837838e-06, |
| "loss": 0.1595, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.00778819495883685, |
| "grad_norm": 4.4375, |
| "learning_rate": 2.9729729729729736e-06, |
| "loss": 0.1914, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.008126812130960191, |
| "grad_norm": 3.703125, |
| "learning_rate": 3.1081081081081082e-06, |
| "loss": 0.1724, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.008465429303083532, |
| "grad_norm": 2.65625, |
| "learning_rate": 3.2432432432432437e-06, |
| "loss": 0.1183, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.008804046475206873, |
| "grad_norm": 3.09375, |
| "learning_rate": 3.3783783783783788e-06, |
| "loss": 0.165, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.009142663647330216, |
| "grad_norm": 2.546875, |
| "learning_rate": 3.513513513513514e-06, |
| "loss": 0.1775, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.009481280819453557, |
| "grad_norm": 1.71875, |
| "learning_rate": 3.648648648648649e-06, |
| "loss": 0.1276, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.009819897991576898, |
| "grad_norm": 1.953125, |
| "learning_rate": 3.7837837837837844e-06, |
| "loss": 0.1605, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.010158515163700239, |
| "grad_norm": 1.703125, |
| "learning_rate": 3.918918918918919e-06, |
| "loss": 0.1486, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01049713233582358, |
| "grad_norm": 1.90625, |
| "learning_rate": 4.0540540540540545e-06, |
| "loss": 0.1535, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.010835749507946921, |
| "grad_norm": 1.5625, |
| "learning_rate": 4.189189189189189e-06, |
| "loss": 0.1189, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.011174366680070264, |
| "grad_norm": 1.828125, |
| "learning_rate": 4.324324324324325e-06, |
| "loss": 0.1359, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.011512983852193605, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.45945945945946e-06, |
| "loss": 0.1289, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.011851601024316946, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.594594594594596e-06, |
| "loss": 0.1033, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.012190218196440287, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.72972972972973e-06, |
| "loss": 0.149, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.012528835368563628, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.864864864864866e-06, |
| "loss": 0.0975, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.012867452540686969, |
| "grad_norm": 1.0859375, |
| "learning_rate": 5e-06, |
| "loss": 0.0938, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.013206069712810312, |
| "grad_norm": 1.0625, |
| "learning_rate": 5.135135135135135e-06, |
| "loss": 0.1031, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.013544686884933653, |
| "grad_norm": 1.1796875, |
| "learning_rate": 5.2702702702702705e-06, |
| "loss": 0.1088, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.013883304057056994, |
| "grad_norm": 1.2421875, |
| "learning_rate": 5.405405405405406e-06, |
| "loss": 0.1207, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.014221921229180335, |
| "grad_norm": 1.703125, |
| "learning_rate": 5.540540540540541e-06, |
| "loss": 0.2261, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.014560538401303676, |
| "grad_norm": 1.4765625, |
| "learning_rate": 5.675675675675676e-06, |
| "loss": 0.12, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.014899155573427017, |
| "grad_norm": 1.21875, |
| "learning_rate": 5.810810810810811e-06, |
| "loss": 0.1243, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.01523777274555036, |
| "grad_norm": 1.140625, |
| "learning_rate": 5.945945945945947e-06, |
| "loss": 0.1028, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0155763899176737, |
| "grad_norm": 1.1171875, |
| "learning_rate": 6.081081081081082e-06, |
| "loss": 0.1044, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.01591500708979704, |
| "grad_norm": 1.25, |
| "learning_rate": 6.2162162162162164e-06, |
| "loss": 0.1267, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.016253624261920382, |
| "grad_norm": 0.90625, |
| "learning_rate": 6.351351351351351e-06, |
| "loss": 0.0914, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.016592241434043725, |
| "grad_norm": 1.0546875, |
| "learning_rate": 6.486486486486487e-06, |
| "loss": 0.1085, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.016930858606167064, |
| "grad_norm": 0.87109375, |
| "learning_rate": 6.621621621621622e-06, |
| "loss": 0.0949, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.017269475778290407, |
| "grad_norm": 0.8671875, |
| "learning_rate": 6.7567567567567575e-06, |
| "loss": 0.0934, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.017608092950413747, |
| "grad_norm": 1.2734375, |
| "learning_rate": 6.891891891891892e-06, |
| "loss": 0.1231, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01794671012253709, |
| "grad_norm": 0.84375, |
| "learning_rate": 7.027027027027028e-06, |
| "loss": 0.0946, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.018285327294660432, |
| "grad_norm": 1.3671875, |
| "learning_rate": 7.162162162162163e-06, |
| "loss": 0.1202, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.01862394446678377, |
| "grad_norm": 0.79296875, |
| "learning_rate": 7.297297297297298e-06, |
| "loss": 0.0802, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.018962561638907114, |
| "grad_norm": 1.2109375, |
| "learning_rate": 7.4324324324324324e-06, |
| "loss": 0.1052, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.019301178811030453, |
| "grad_norm": 0.91796875, |
| "learning_rate": 7.567567567567569e-06, |
| "loss": 0.0898, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.019639795983153796, |
| "grad_norm": 0.9921875, |
| "learning_rate": 7.702702702702704e-06, |
| "loss": 0.1046, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.019978413155277135, |
| "grad_norm": 0.87890625, |
| "learning_rate": 7.837837837837838e-06, |
| "loss": 0.0983, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.020317030327400478, |
| "grad_norm": 0.97265625, |
| "learning_rate": 7.972972972972974e-06, |
| "loss": 0.0832, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02065564749952382, |
| "grad_norm": 1.0, |
| "learning_rate": 8.108108108108109e-06, |
| "loss": 0.1012, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.02099426467164716, |
| "grad_norm": 1.0, |
| "learning_rate": 8.243243243243245e-06, |
| "loss": 0.1124, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.021332881843770503, |
| "grad_norm": 0.73828125, |
| "learning_rate": 8.378378378378378e-06, |
| "loss": 0.1053, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.021671499015893842, |
| "grad_norm": 0.828125, |
| "learning_rate": 8.513513513513514e-06, |
| "loss": 0.0782, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.022010116188017185, |
| "grad_norm": 0.74609375, |
| "learning_rate": 8.64864864864865e-06, |
| "loss": 0.0739, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.022348733360140528, |
| "grad_norm": 0.89453125, |
| "learning_rate": 8.783783783783785e-06, |
| "loss": 0.0715, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.022687350532263867, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.91891891891892e-06, |
| "loss": 0.0913, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.02302596770438721, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.054054054054054e-06, |
| "loss": 0.0844, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.02336458487651055, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.189189189189191e-06, |
| "loss": 0.0825, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02370320204863389, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.324324324324325e-06, |
| "loss": 0.084, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02404181922075723, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.45945945945946e-06, |
| "loss": 0.0933, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.024380436392880574, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.594594594594594e-06, |
| "loss": 0.0835, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.024719053565003916, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.729729729729732e-06, |
| "loss": 0.0814, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.025057670737127256, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.864864864864865e-06, |
| "loss": 0.079, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0253962879092506, |
| "grad_norm": 1.046875, |
| "learning_rate": 1e-05, |
| "loss": 0.0984, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.025734905081373938, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.0135135135135136e-05, |
| "loss": 0.079, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02607352225349728, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.027027027027027e-05, |
| "loss": 0.0811, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.026412139425620623, |
| "grad_norm": 0.796875, |
| "learning_rate": 1.0405405405405407e-05, |
| "loss": 0.0819, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.026750756597743963, |
| "grad_norm": 0.84765625, |
| "learning_rate": 1.0540540540540541e-05, |
| "loss": 0.0828, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.027089373769867305, |
| "grad_norm": 0.86328125, |
| "learning_rate": 1.0675675675675677e-05, |
| "loss": 0.1061, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.027427990941990645, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.0810810810810812e-05, |
| "loss": 0.0912, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.027766608114113987, |
| "grad_norm": 0.78515625, |
| "learning_rate": 1.0945945945945946e-05, |
| "loss": 0.0702, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.02810522528623733, |
| "grad_norm": 0.80078125, |
| "learning_rate": 1.1081081081081081e-05, |
| "loss": 0.0769, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.02844384245836067, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.1216216216216219e-05, |
| "loss": 0.0786, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.028782459630484012, |
| "grad_norm": 0.75, |
| "learning_rate": 1.1351351351351352e-05, |
| "loss": 0.0837, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.02912107680260735, |
| "grad_norm": 0.75390625, |
| "learning_rate": 1.1486486486486488e-05, |
| "loss": 0.0993, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.029459693974730694, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.1621621621621622e-05, |
| "loss": 0.0806, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.029798311146854033, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.1756756756756757e-05, |
| "loss": 0.0988, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.030136928318977376, |
| "grad_norm": 0.78515625, |
| "learning_rate": 1.1891891891891894e-05, |
| "loss": 0.0947, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03047554549110072, |
| "grad_norm": 0.859375, |
| "learning_rate": 1.2027027027027028e-05, |
| "loss": 0.0726, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.030814162663224058, |
| "grad_norm": 0.90625, |
| "learning_rate": 1.2162162162162164e-05, |
| "loss": 0.0958, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0311527798353474, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.2297297297297299e-05, |
| "loss": 0.0637, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.031491397007470744, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.2432432432432433e-05, |
| "loss": 0.0935, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.03183001417959408, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.2567567567567568e-05, |
| "loss": 0.0661, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03216863135171742, |
| "grad_norm": 0.8671875, |
| "learning_rate": 1.2702702702702702e-05, |
| "loss": 0.0871, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.032507248523840765, |
| "grad_norm": 1.5234375, |
| "learning_rate": 1.283783783783784e-05, |
| "loss": 0.1231, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03284586569596411, |
| "grad_norm": 0.91015625, |
| "learning_rate": 1.2972972972972975e-05, |
| "loss": 0.0777, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03318448286808745, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.3108108108108109e-05, |
| "loss": 0.1009, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.033523100040210786, |
| "grad_norm": 0.78515625, |
| "learning_rate": 1.3243243243243244e-05, |
| "loss": 0.0944, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03386171721233413, |
| "grad_norm": 0.734375, |
| "learning_rate": 1.3378378378378381e-05, |
| "loss": 0.0649, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03420033438445747, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.3513513513513515e-05, |
| "loss": 0.0813, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.034538951556580814, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.364864864864865e-05, |
| "loss": 0.0726, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.03487756872870416, |
| "grad_norm": 0.75, |
| "learning_rate": 1.3783783783783784e-05, |
| "loss": 0.0783, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.03521618590082749, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.391891891891892e-05, |
| "loss": 0.0656, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.035554803072950836, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.4054054054054055e-05, |
| "loss": 0.0719, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03589342024507418, |
| "grad_norm": 0.79296875, |
| "learning_rate": 1.4189189189189189e-05, |
| "loss": 0.0821, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.03623203741719752, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.4324324324324326e-05, |
| "loss": 0.075, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.036570654589320864, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.4459459459459462e-05, |
| "loss": 0.0753, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0369092717614442, |
| "grad_norm": 0.796875, |
| "learning_rate": 1.4594594594594596e-05, |
| "loss": 0.0976, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03724788893356754, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.4729729729729731e-05, |
| "loss": 0.0781, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.037586506105690885, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.4864864864864865e-05, |
| "loss": 0.0808, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.03792512327781423, |
| "grad_norm": 0.90625, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.0816, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.03826374044993757, |
| "grad_norm": 0.828125, |
| "learning_rate": 1.5135135135135138e-05, |
| "loss": 0.082, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.03860235762206091, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.527027027027027e-05, |
| "loss": 0.0811, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.03894097479418425, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.540540540540541e-05, |
| "loss": 0.0754, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.03927959196630759, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.554054054054054e-05, |
| "loss": 0.0891, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.039618209138430935, |
| "grad_norm": 0.77734375, |
| "learning_rate": 1.5675675675675676e-05, |
| "loss": 0.1031, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.03995682631055427, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.581081081081081e-05, |
| "loss": 0.0718, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.04029544348267761, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.5945945945945947e-05, |
| "loss": 0.0749, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.040634060654800956, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.6081081081081083e-05, |
| "loss": 0.0815, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0409726778269243, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.6216216216216218e-05, |
| "loss": 0.0832, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04131129499904764, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.6351351351351354e-05, |
| "loss": 0.0703, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04164991217117098, |
| "grad_norm": 0.81640625, |
| "learning_rate": 1.648648648648649e-05, |
| "loss": 0.0965, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.04198852934329432, |
| "grad_norm": 0.8671875, |
| "learning_rate": 1.662162162162162e-05, |
| "loss": 0.0811, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04232714651541766, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.6756756756756757e-05, |
| "loss": 0.0647, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.042665763687541006, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.6891891891891896e-05, |
| "loss": 0.0717, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04300438085966435, |
| "grad_norm": 1.125, |
| "learning_rate": 1.7027027027027028e-05, |
| "loss": 0.1835, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.043342998031787684, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.7162162162162163e-05, |
| "loss": 0.0627, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.04368161520391103, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.72972972972973e-05, |
| "loss": 0.0673, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04402023237603437, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.7432432432432434e-05, |
| "loss": 0.0917, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04435884954815771, |
| "grad_norm": 0.828125, |
| "learning_rate": 1.756756756756757e-05, |
| "loss": 0.094, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.044697466720281055, |
| "grad_norm": 0.86328125, |
| "learning_rate": 1.7702702702702702e-05, |
| "loss": 0.0912, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.04503608389240439, |
| "grad_norm": 0.9140625, |
| "learning_rate": 1.783783783783784e-05, |
| "loss": 0.1102, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.045374701064527734, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.7972972972972976e-05, |
| "loss": 0.0581, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.045713318236651077, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.8108108108108108e-05, |
| "loss": 0.0591, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04605193540877442, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.8243243243243244e-05, |
| "loss": 0.0896, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.04639055258089776, |
| "grad_norm": 0.83203125, |
| "learning_rate": 1.8378378378378383e-05, |
| "loss": 0.0733, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.0467291697530211, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.8513513513513515e-05, |
| "loss": 0.0883, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.04706778692514444, |
| "grad_norm": 0.84765625, |
| "learning_rate": 1.864864864864865e-05, |
| "loss": 0.0886, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.04740640409726778, |
| "grad_norm": 0.85546875, |
| "learning_rate": 1.8783783783783786e-05, |
| "loss": 0.1031, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.047745021269391126, |
| "grad_norm": 0.86328125, |
| "learning_rate": 1.891891891891892e-05, |
| "loss": 0.0936, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.04808363844151446, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.9054054054054057e-05, |
| "loss": 0.0706, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.048422255613637805, |
| "grad_norm": 0.84375, |
| "learning_rate": 1.918918918918919e-05, |
| "loss": 0.1006, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.04876087278576115, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.9324324324324328e-05, |
| "loss": 0.0715, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.04909948995788449, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.9459459459459463e-05, |
| "loss": 0.0856, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.04943810713000783, |
| "grad_norm": 0.83984375, |
| "learning_rate": 1.9594594594594595e-05, |
| "loss": 0.0923, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.04977672430213117, |
| "grad_norm": 0.75, |
| "learning_rate": 1.972972972972973e-05, |
| "loss": 0.0825, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05011534147425451, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.9864864864864866e-05, |
| "loss": 0.0588, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.050453958646377854, |
| "grad_norm": 0.81640625, |
| "learning_rate": 2e-05, |
| "loss": 0.1127, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0507925758185012, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.9999993732499594e-05, |
| "loss": 0.0628, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05113119299062454, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9999974930006222e-05, |
| "loss": 0.0637, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.051469810162747875, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.999994359254346e-05, |
| "loss": 0.0706, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.05180842733487122, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.999989972015058e-05, |
| "loss": 0.0876, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05214704450699456, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9999843312882592e-05, |
| "loss": 0.0708, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.052485661679117904, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9999774370810187e-05, |
| "loss": 0.0571, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.052824278851241246, |
| "grad_norm": 0.8203125, |
| "learning_rate": 1.9999692894019792e-05, |
| "loss": 0.0943, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.05316289602336458, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.9999598882613537e-05, |
| "loss": 0.0712, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.053501513195487925, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.9999492336709263e-05, |
| "loss": 0.0836, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.05384013036761127, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.999937325644053e-05, |
| "loss": 0.0626, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.05417874753973461, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.99992416419566e-05, |
| "loss": 0.0855, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05451736471185795, |
| "grad_norm": 0.75, |
| "learning_rate": 1.9999097493422453e-05, |
| "loss": 0.082, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.05485598188398129, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.9998940811018782e-05, |
| "loss": 0.0769, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.05519459905610463, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.9998771594941983e-05, |
| "loss": 0.0691, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.055533216228227975, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.9998589845404176e-05, |
| "loss": 0.0687, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.05587183340035132, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.9998395562633176e-05, |
| "loss": 0.0738, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.05621045057247466, |
| "grad_norm": 1.5234375, |
| "learning_rate": 1.9998188746872523e-05, |
| "loss": 0.0891, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.056549067744597996, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.9997969398381454e-05, |
| "loss": 0.0911, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.05688768491672134, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.9997737517434932e-05, |
| "loss": 0.0826, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.05722630208884468, |
| "grad_norm": 0.81640625, |
| "learning_rate": 1.9997493104323607e-05, |
| "loss": 0.0955, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.057564919260968024, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.9997236159353864e-05, |
| "loss": 0.0804, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05790353643309136, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9996966682847776e-05, |
| "loss": 0.0777, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0582421536052147, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.9996684675143132e-05, |
| "loss": 0.0823, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.058580770777338045, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.999639013659343e-05, |
| "loss": 0.09, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.05891938794946139, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.9996083067567876e-05, |
| "loss": 0.108, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.05925800512158473, |
| "grad_norm": 0.78515625, |
| "learning_rate": 1.9995763468451376e-05, |
| "loss": 0.0663, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.05959662229370807, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9995431339644552e-05, |
| "loss": 0.0856, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.05993523946583141, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.9995086681563725e-05, |
| "loss": 0.0879, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06027385663795475, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.999472949464093e-05, |
| "loss": 0.0747, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.060612473810078095, |
| "grad_norm": 0.8359375, |
| "learning_rate": 1.9994359779323892e-05, |
| "loss": 0.1313, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06095109098220144, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.9993977536076052e-05, |
| "loss": 0.0781, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.061289708154324773, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.999358276537655e-05, |
| "loss": 0.0731, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.061628325326448116, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9993175467720242e-05, |
| "loss": 0.0756, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06196694249857146, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.9992755643617663e-05, |
| "loss": 0.0743, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0623055596706948, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.9992323293595065e-05, |
| "loss": 0.0743, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.06264417684281814, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.9991878418194407e-05, |
| "loss": 0.0809, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06298279401494149, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.9991421017973328e-05, |
| "loss": 0.0751, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06332141118706483, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.999095109350519e-05, |
| "loss": 0.0868, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.06366002835918816, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.9990468645379038e-05, |
| "loss": 0.0625, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.0639986455313115, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.998997367419962e-05, |
| "loss": 0.0829, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.06433726270343484, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.9989466180587386e-05, |
| "loss": 0.0729, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06467587987555819, |
| "grad_norm": 0.84375, |
| "learning_rate": 1.998894616517848e-05, |
| "loss": 0.1061, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.06501449704768153, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.998841362862473e-05, |
| "loss": 0.0675, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.06535311421980487, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.998786857159369e-05, |
| "loss": 0.0581, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.06569173139192822, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.9987310994768573e-05, |
| "loss": 0.0743, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.06603034856405156, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9986740898848306e-05, |
| "loss": 0.0655, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0663689657361749, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.998615828454751e-05, |
| "loss": 0.0885, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.06670758290829824, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.998556315259648e-05, |
| "loss": 0.0657, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.06704620008042157, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.9984955503741227e-05, |
| "loss": 0.0742, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.06738481725254492, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.998433533874343e-05, |
| "loss": 0.0862, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.06772343442466826, |
| "grad_norm": 0.77734375, |
| "learning_rate": 1.9983702658380474e-05, |
| "loss": 0.1001, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0680620515967916, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.9983057463445415e-05, |
| "loss": 0.0688, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.06840066876891494, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.998239975474701e-05, |
| "loss": 0.0882, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.06873928594103829, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9981729533109694e-05, |
| "loss": 0.064, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.06907790311316163, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.9981046799373595e-05, |
| "loss": 0.0665, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.06941652028528497, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.9980351554394514e-05, |
| "loss": 0.0861, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.06975513745740831, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9979643799043945e-05, |
| "loss": 0.0691, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.07009375462953164, |
| "grad_norm": 0.85546875, |
| "learning_rate": 1.9978923534209052e-05, |
| "loss": 0.1439, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.07043237180165499, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9978190760792698e-05, |
| "loss": 0.0725, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.07077098897377833, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.997744547971341e-05, |
| "loss": 0.0732, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.07110960614590167, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.9976687691905394e-05, |
| "loss": 0.0718, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07144822331802501, |
| "grad_norm": 0.78125, |
| "learning_rate": 1.997591739831854e-05, |
| "loss": 0.0774, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.07178684049014836, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9975134599918414e-05, |
| "loss": 0.0717, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.0721254576622717, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.9974339297686246e-05, |
| "loss": 0.0601, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.07246407483439504, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9973531492618956e-05, |
| "loss": 0.0813, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.07280269200651839, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.9972711185729124e-05, |
| "loss": 0.0679, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07314130917864173, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9971878378045005e-05, |
| "loss": 0.0735, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.07347992635076506, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.997103307061052e-05, |
| "loss": 0.0756, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.0738185435228884, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.9970175264485268e-05, |
| "loss": 0.0679, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.07415716069501174, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.9969304960744508e-05, |
| "loss": 0.0684, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.07449577786713509, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.996842216047916e-05, |
| "loss": 0.0952, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07483439503925843, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.996752686479582e-05, |
| "loss": 0.0888, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.07517301221138177, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.996661907481674e-05, |
| "loss": 0.0633, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.07551162938350511, |
| "grad_norm": 0.82421875, |
| "learning_rate": 1.9965698791679834e-05, |
| "loss": 0.0985, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.07585024655562846, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.996476601653868e-05, |
| "loss": 0.0888, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.0761888637277518, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.9963820750562506e-05, |
| "loss": 0.0905, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.07652748089987514, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9962862994936207e-05, |
| "loss": 0.0802, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.07686609807199847, |
| "grad_norm": 0.625, |
| "learning_rate": 1.996189275086033e-05, |
| "loss": 0.0809, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.07720471524412181, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.9960910019551073e-05, |
| "loss": 0.0823, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.07754333241624516, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.9959914802240293e-05, |
| "loss": 0.0713, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0778819495883685, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.9958907100175492e-05, |
| "loss": 0.0754, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07822056676049184, |
| "grad_norm": 0.796875, |
| "learning_rate": 1.9957886914619826e-05, |
| "loss": 0.0887, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.07855918393261518, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.99568542468521e-05, |
| "loss": 0.0667, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.07889780110473853, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.995580909816676e-05, |
| "loss": 0.0622, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.07923641827686187, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.99547514698739e-05, |
| "loss": 0.0765, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.07957503544898521, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.9953681363299258e-05, |
| "loss": 0.0936, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.07991365262110854, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.9952598779784214e-05, |
| "loss": 0.0574, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.08025226979323188, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9951503720685784e-05, |
| "loss": 0.0716, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.08059088696535523, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.9950396187376628e-05, |
| "loss": 0.0781, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.08092950413747857, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.9949276181245037e-05, |
| "loss": 0.0779, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.08126812130960191, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.994814370369494e-05, |
| "loss": 0.1049, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08160673848172526, |
| "grad_norm": 0.734375, |
| "learning_rate": 1.9946998756145894e-05, |
| "loss": 0.0857, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0819453556538486, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9945841340033093e-05, |
| "loss": 0.0722, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.08228397282597194, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.994467145680736e-05, |
| "loss": 0.083, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.08262258999809528, |
| "grad_norm": 0.8203125, |
| "learning_rate": 1.994348910793514e-05, |
| "loss": 0.0852, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.08296120717021863, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.9942294294898513e-05, |
| "loss": 0.063, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08329982434234195, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.994108701919517e-05, |
| "loss": 0.0701, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0836384415144653, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.993986728233844e-05, |
| "loss": 0.0869, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.08397705868658864, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.9938635085857257e-05, |
| "loss": 0.0619, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.08431567585871198, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.993739043129618e-05, |
| "loss": 0.0643, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.08465429303083533, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9936133320215385e-05, |
| "loss": 0.0753, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08499291020295867, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9934863754190662e-05, |
| "loss": 0.0535, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.08533152737508201, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.9933581734813404e-05, |
| "loss": 0.0759, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.08567014454720535, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9932287263690637e-05, |
| "loss": 0.0852, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0860087617193287, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9930980342444966e-05, |
| "loss": 0.07, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.08634737889145204, |
| "grad_norm": 0.81640625, |
| "learning_rate": 1.9929660972714626e-05, |
| "loss": 0.1072, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.08668599606357537, |
| "grad_norm": 2.0, |
| "learning_rate": 1.9928329156153444e-05, |
| "loss": 0.0896, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.08702461323569871, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.992698489443085e-05, |
| "loss": 0.0809, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.08736323040782205, |
| "grad_norm": 0.91015625, |
| "learning_rate": 1.9925628189231885e-05, |
| "loss": 0.1163, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.0877018475799454, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.992425904225717e-05, |
| "loss": 0.0763, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.08804046475206874, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.9922877455222932e-05, |
| "loss": 0.069, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08837908192419208, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.992148342986099e-05, |
| "loss": 0.0689, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.08871769909631542, |
| "grad_norm": 0.625, |
| "learning_rate": 1.9920076967918762e-05, |
| "loss": 0.0634, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.08905631626843877, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.9918658071159243e-05, |
| "loss": 0.0745, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.08939493344056211, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.9917226741361014e-05, |
| "loss": 0.0769, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.08973355061268544, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.991578298031826e-05, |
| "loss": 0.0794, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.09007216778480878, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.9914326789840728e-05, |
| "loss": 0.0771, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.09041078495693212, |
| "grad_norm": 0.90234375, |
| "learning_rate": 1.991285817175375e-05, |
| "loss": 0.0841, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.09074940212905547, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.991137712789825e-05, |
| "loss": 0.0692, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.09108801930117881, |
| "grad_norm": 0.81640625, |
| "learning_rate": 1.9909883660130703e-05, |
| "loss": 0.0986, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.09142663647330215, |
| "grad_norm": 0.8203125, |
| "learning_rate": 1.9908377770323178e-05, |
| "loss": 0.0774, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0917652536454255, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.9906859460363307e-05, |
| "loss": 0.0663, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.09210387081754884, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.9905328732154294e-05, |
| "loss": 0.0812, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.09244248798967218, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.9903785587614907e-05, |
| "loss": 0.0833, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.09278110516179552, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.990223002867947e-05, |
| "loss": 0.0854, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.09311972233391885, |
| "grad_norm": 1.375, |
| "learning_rate": 1.9900662057297886e-05, |
| "loss": 0.0741, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0934583395060422, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.9899081675435604e-05, |
| "loss": 0.0868, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.09379695667816554, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.989748888507363e-05, |
| "loss": 0.0632, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.09413557385028888, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.9895883688208527e-05, |
| "loss": 0.0696, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.09447419102241222, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.9894266086852414e-05, |
| "loss": 0.0657, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.09481280819453557, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.989263608303295e-05, |
| "loss": 0.0742, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09515142536665891, |
| "grad_norm": 0.734375, |
| "learning_rate": 1.989099367879335e-05, |
| "loss": 0.0787, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.09549004253878225, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.9889338876192365e-05, |
| "loss": 0.0675, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.0958286597109056, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.9887671677304285e-05, |
| "loss": 0.073, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.09616727688302892, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.9885992084218948e-05, |
| "loss": 0.0574, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.09650589405515227, |
| "grad_norm": 0.8671875, |
| "learning_rate": 1.9884300099041728e-05, |
| "loss": 0.1312, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.09684451122727561, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.9882595723893525e-05, |
| "loss": 0.0594, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.09718312839939895, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.9880878960910772e-05, |
| "loss": 0.0683, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.0975217455715223, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9879149812245434e-05, |
| "loss": 0.0887, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.09786036274364564, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9877408280065e-05, |
| "loss": 0.0788, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.09819897991576898, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.9875654366552476e-05, |
| "loss": 0.0716, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09853759708789232, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9873888073906396e-05, |
| "loss": 0.0902, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.09887621426001567, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.987210940434081e-05, |
| "loss": 0.0776, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.09921483143213901, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.9870318360085277e-05, |
| "loss": 0.0828, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.09955344860426234, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.9868514943384872e-05, |
| "loss": 0.0727, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.09989206577638568, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9866699156500177e-05, |
| "loss": 0.0831, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.10023068294850902, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.986487100170728e-05, |
| "loss": 0.0895, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.10023068294850902, |
| "eval_loss": 0.0773262232542038, |
| "eval_runtime": 833.2157, |
| "eval_samples_per_second": 11.939, |
| "eval_steps_per_second": 2.985, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.10056930012063237, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.986303048129778e-05, |
| "loss": 0.0779, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.10090791729275571, |
| "grad_norm": 0.8515625, |
| "learning_rate": 1.9861177597578765e-05, |
| "loss": 0.0699, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.10124653446487905, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.9859312352872822e-05, |
| "loss": 0.0706, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1015851516370024, |
| "grad_norm": 0.76953125, |
| "learning_rate": 1.985743474951804e-05, |
| "loss": 0.0926, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10192376880912574, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.985554478986799e-05, |
| "loss": 0.0685, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.10226238598124908, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.9853642476291743e-05, |
| "loss": 0.0623, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.10260100315337242, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.9851727811173844e-05, |
| "loss": 0.0708, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.10293962032549575, |
| "grad_norm": 0.71484375, |
| "learning_rate": 1.984980079691433e-05, |
| "loss": 0.0816, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1032782374976191, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.9847861435928708e-05, |
| "loss": 0.0685, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.10361685466974244, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.984590973064797e-05, |
| "loss": 0.0951, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.10395547184186578, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.984394568351858e-05, |
| "loss": 0.0931, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.10429408901398912, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.9841969297002473e-05, |
| "loss": 0.0701, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.10463270618611246, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9839980573577046e-05, |
| "loss": 0.0865, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.10497132335823581, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.9837979515735168e-05, |
| "loss": 0.0716, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10530994053035915, |
| "grad_norm": 0.80859375, |
| "learning_rate": 1.9835966125985155e-05, |
| "loss": 0.0832, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.10564855770248249, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.9833940406850805e-05, |
| "loss": 0.0777, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.10598717487460582, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.9831902360871344e-05, |
| "loss": 0.0747, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.10632579204672916, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.9829851990601475e-05, |
| "loss": 0.0761, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.10666440921885251, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.982778929861133e-05, |
| "loss": 0.0632, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.10700302639097585, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.9825714287486493e-05, |
| "loss": 0.0886, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.10734164356309919, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.9823626959827997e-05, |
| "loss": 0.0639, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.10768026073522254, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.98215273182523e-05, |
| "loss": 0.0787, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.10801887790734588, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.9819415365391307e-05, |
| "loss": 0.1027, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.10835749507946922, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.9817291103892348e-05, |
| "loss": 0.0793, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.10869611225159256, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.981515453641819e-05, |
| "loss": 0.0799, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1090347294237159, |
| "grad_norm": 0.81640625, |
| "learning_rate": 1.9813005665647017e-05, |
| "loss": 0.1096, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.10937334659583924, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.981084449427244e-05, |
| "loss": 0.0687, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.10971196376796258, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.9808671025003487e-05, |
| "loss": 0.0751, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.11005058094008592, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9806485260564597e-05, |
| "loss": 0.0686, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.11038919811220926, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9804287203695636e-05, |
| "loss": 0.0608, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1107278152843326, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9802076857151863e-05, |
| "loss": 0.1027, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.11106643245645595, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.9799854223703943e-05, |
| "loss": 0.0796, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.11140504962857929, |
| "grad_norm": 0.71484375, |
| "learning_rate": 1.9797619306137958e-05, |
| "loss": 0.0817, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.11174366680070263, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.9795372107255368e-05, |
| "loss": 0.0582, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11208228397282598, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.979311262987304e-05, |
| "loss": 0.0996, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.11242090114494932, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.979084087682323e-05, |
| "loss": 0.0598, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.11275951831707265, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.978855685095358e-05, |
| "loss": 0.0623, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.11309813548919599, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.9786260555127116e-05, |
| "loss": 0.0582, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.11343675266131933, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.9783951992222246e-05, |
| "loss": 0.091, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.11377536983344268, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.9781631165132755e-05, |
| "loss": 0.0793, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.11411398700556602, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.9779298076767795e-05, |
| "loss": 0.0565, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.11445260417768936, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9776952730051896e-05, |
| "loss": 0.0736, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1147912213498127, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9774595127924955e-05, |
| "loss": 0.0834, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.11512983852193605, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.9772225273342216e-05, |
| "loss": 0.0604, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11546845569405939, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.97698431692743e-05, |
| "loss": 0.0674, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.11580707286618272, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.976744881870717e-05, |
| "loss": 0.0729, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.11614569003830606, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9765042224642146e-05, |
| "loss": 0.0758, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1164843072104294, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.9762623390095897e-05, |
| "loss": 0.0778, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.11682292438255275, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.976019231810043e-05, |
| "loss": 0.079, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.11716154155467609, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.9757749011703095e-05, |
| "loss": 0.0729, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.11750015872679943, |
| "grad_norm": 0.625, |
| "learning_rate": 1.9755293473966574e-05, |
| "loss": 0.069, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.11783877589892278, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.9752825707968884e-05, |
| "loss": 0.0707, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.11817739307104612, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.975034571680337e-05, |
| "loss": 0.0582, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.11851601024316946, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.9747853503578708e-05, |
| "loss": 0.073, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1188546274152928, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.9745349071418877e-05, |
| "loss": 0.0576, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.11919324458741613, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.974283242346319e-05, |
| "loss": 0.0855, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.11953186175953948, |
| "grad_norm": 0.7890625, |
| "learning_rate": 1.974030356286626e-05, |
| "loss": 0.0982, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.11987047893166282, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.9737762492798018e-05, |
| "loss": 0.0632, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.12020909610378616, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.97352092164437e-05, |
| "loss": 0.0637, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.1205477132759095, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.9732643737003827e-05, |
| "loss": 0.0851, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.12088633044803285, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.9730066057694236e-05, |
| "loss": 0.0726, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.12122494762015619, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9727476181746045e-05, |
| "loss": 0.0977, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.12156356479227953, |
| "grad_norm": 0.70703125, |
| "learning_rate": 1.9724874112405663e-05, |
| "loss": 0.0807, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.12190218196440288, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.9722259852934785e-05, |
| "loss": 0.0616, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1222407991365262, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.971963340661039e-05, |
| "loss": 0.0783, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.12257941630864955, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.971699477672472e-05, |
| "loss": 0.0684, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.12291803348077289, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.9714343966585308e-05, |
| "loss": 0.0636, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.12325665065289623, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.9711680979514936e-05, |
| "loss": 0.0655, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.12359526782501958, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.970900581885166e-05, |
| "loss": 0.0897, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.12393388499714292, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.97063184879488e-05, |
| "loss": 0.0594, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.12427250216926626, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.9703618990174917e-05, |
| "loss": 0.0733, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.1246111193413896, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.970090732891384e-05, |
| "loss": 0.1027, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.12494973651351295, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.9698183507564626e-05, |
| "loss": 0.0769, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1252883536856363, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.96954475295416e-05, |
| "loss": 0.0639, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12562697085775962, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.9692699398274298e-05, |
| "loss": 0.0608, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.12596558802988297, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.968993911720751e-05, |
| "loss": 0.0676, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1263042052020063, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.9687166689801244e-05, |
| "loss": 0.065, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.12664282237412966, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.968438211953074e-05, |
| "loss": 0.1108, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.126981439546253, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.9681585409886454e-05, |
| "loss": 0.0755, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.12732005671837632, |
| "grad_norm": 0.5, |
| "learning_rate": 1.9678776564374068e-05, |
| "loss": 0.0649, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.12765867389049967, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.967595558651447e-05, |
| "loss": 0.081, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.127997291062623, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.9673122479843748e-05, |
| "loss": 0.0675, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.12833590823474636, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.9670277247913205e-05, |
| "loss": 0.0803, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.1286745254068697, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.9667419894289345e-05, |
| "loss": 0.0778, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12901314257899305, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.9664550422553852e-05, |
| "loss": 0.0565, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.12935175975111637, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.966166883630362e-05, |
| "loss": 0.0802, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.12969037692323973, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.9658775139150705e-05, |
| "loss": 0.0626, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.13002899409536306, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.9655869334722363e-05, |
| "loss": 0.0667, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.13036761126748642, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9652951426661025e-05, |
| "loss": 0.0552, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.13070622843960975, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.965002141862428e-05, |
| "loss": 0.06, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.13104484561173307, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.9647079314284897e-05, |
| "loss": 0.0681, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.13138346278385643, |
| "grad_norm": 0.625, |
| "learning_rate": 1.9644125117330806e-05, |
| "loss": 0.0949, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.13172207995597976, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.964115883146509e-05, |
| "loss": 0.0591, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.13206069712810312, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.9638180460405995e-05, |
| "loss": 0.0798, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13239931430022644, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.96351900078869e-05, |
| "loss": 0.0617, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.1327379314723498, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9632187477656342e-05, |
| "loss": 0.0765, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.13307654864447313, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.9629172873477995e-05, |
| "loss": 0.0888, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.1334151658165965, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.9626146199130664e-05, |
| "loss": 0.0678, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.13375378298871982, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.962310745840828e-05, |
| "loss": 0.0658, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.13409240016084314, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.962005665511991e-05, |
| "loss": 0.0577, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.1344310173329665, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.961699379308974e-05, |
| "loss": 0.0693, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.13476963450508983, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.9613918876157062e-05, |
| "loss": 0.0795, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1351082516772132, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.9610831908176285e-05, |
| "loss": 0.0647, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.13544686884933652, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.9607732893016926e-05, |
| "loss": 0.0737, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13578548602145987, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9604621834563602e-05, |
| "loss": 0.0687, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1361241031935832, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.960149873671602e-05, |
| "loss": 0.0583, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.13646272036570656, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.9598363603388986e-05, |
| "loss": 0.0702, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1368013375378299, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.959521643851239e-05, |
| "loss": 0.0803, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.13713995470995322, |
| "grad_norm": 0.8515625, |
| "learning_rate": 1.9592057246031203e-05, |
| "loss": 0.0896, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.13747857188207657, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.9588886029905474e-05, |
| "loss": 0.0889, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1378171890541999, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9585702794110322e-05, |
| "loss": 0.0684, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.13815580622632326, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.9582507542635933e-05, |
| "loss": 0.0822, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.1384944233984466, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.9579300279487558e-05, |
| "loss": 0.0572, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.13883304057056994, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.9576081008685495e-05, |
| "loss": 0.0897, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.13917165774269327, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.9572849734265107e-05, |
| "loss": 0.0655, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.13951027491481663, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.956960646027679e-05, |
| "loss": 0.0831, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.13984889208693996, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.9566351190785998e-05, |
| "loss": 0.0684, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1401875092590633, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.9563083929873202e-05, |
| "loss": 0.0739, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.14052612643118664, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.9559804681633918e-05, |
| "loss": 0.0624, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.14086474360330997, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.9556513450178683e-05, |
| "loss": 0.0775, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.14120336077543333, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.955321023963306e-05, |
| "loss": 0.1071, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.14154197794755666, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.9549895054137616e-05, |
| "loss": 0.0705, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.14188059511968001, |
| "grad_norm": 0.70703125, |
| "learning_rate": 1.954656789784794e-05, |
| "loss": 0.0901, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.14221921229180334, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.9543228774934627e-05, |
| "loss": 0.0946, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1425578294639267, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.953987768958326e-05, |
| "loss": 0.0663, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.14289644663605003, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.953651464599443e-05, |
| "loss": 0.0736, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.14323506380817339, |
| "grad_norm": 0.71484375, |
| "learning_rate": 1.9533139648383712e-05, |
| "loss": 0.0952, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.14357368098029671, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.9529752700981664e-05, |
| "loss": 0.0701, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.14391229815242004, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.9526353808033827e-05, |
| "loss": 0.0776, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.1442509153245434, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.9522942973800712e-05, |
| "loss": 0.0644, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.14458953249666673, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.95195202025578e-05, |
| "loss": 0.0784, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.14492814966879008, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.9516085498595533e-05, |
| "loss": 0.0623, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1452667668409134, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.951263886621932e-05, |
| "loss": 0.0529, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.14560538401303677, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.9509180309749505e-05, |
| "loss": 0.0828, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1459440011851601, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.9505709833521396e-05, |
| "loss": 0.0572, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.14628261835728346, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.9502227441885232e-05, |
| "loss": 0.0668, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.14662123552940678, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.9498733139206193e-05, |
| "loss": 0.0878, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.1469598527015301, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.9495226929864384e-05, |
| "loss": 0.0672, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.14729846987365347, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.9491708818254847e-05, |
| "loss": 0.078, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.1476370870457768, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.9488178808787527e-05, |
| "loss": 0.0633, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.14797570421790016, |
| "grad_norm": 0.76953125, |
| "learning_rate": 1.94846369058873e-05, |
| "loss": 0.1021, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.14831432139002348, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.9481083113993927e-05, |
| "loss": 0.0724, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.14865293856214684, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9477517437562097e-05, |
| "loss": 0.0714, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.14899155573427017, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9473939881061385e-05, |
| "loss": 0.0857, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14933017290639353, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.9470350448976257e-05, |
| "loss": 0.0953, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.14966879007851686, |
| "grad_norm": 0.859375, |
| "learning_rate": 1.9466749145806065e-05, |
| "loss": 0.0685, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.15000740725064018, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.9463135976065043e-05, |
| "loss": 0.0905, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.15034602442276354, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.9459510944282307e-05, |
| "loss": 0.0771, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.15068464159488687, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9455874055001824e-05, |
| "loss": 0.0682, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.15102325876701023, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.945222531278244e-05, |
| "loss": 0.0599, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.15136187593913356, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.9448564722197855e-05, |
| "loss": 0.0668, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.1517004931112569, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9444892287836614e-05, |
| "loss": 0.0712, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.15203911028338024, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.944120801430212e-05, |
| "loss": 0.0727, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.1523777274555036, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.9437511906212607e-05, |
| "loss": 0.0698, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15271634462762693, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.9433803968201148e-05, |
| "loss": 0.0945, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.15305496179975028, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9430084204915642e-05, |
| "loss": 0.069, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.1533935789718736, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.9426352621018817e-05, |
| "loss": 0.071, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.15373219614399694, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.9422609221188208e-05, |
| "loss": 0.0809, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.1540708133161203, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9418854010116168e-05, |
| "loss": 0.085, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.15440943048824363, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.9415086992509858e-05, |
| "loss": 0.0916, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.15474804766036698, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.941130817309123e-05, |
| "loss": 0.0544, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.1550866648324903, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.940751755659704e-05, |
| "loss": 0.073, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.15542528200461367, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.9403715147778822e-05, |
| "loss": 0.0854, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.155763899176737, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.9399900951402897e-05, |
| "loss": 0.0612, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15610251634886035, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.939607497225036e-05, |
| "loss": 0.079, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.15644113352098368, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.9392237215117076e-05, |
| "loss": 0.0544, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.156779750693107, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.9388387684813676e-05, |
| "loss": 0.0535, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.15711836786523037, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.9384526386165548e-05, |
| "loss": 0.081, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.1574569850373537, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.938065332401282e-05, |
| "loss": 0.0916, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.15779560220947705, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.9376768503210388e-05, |
| "loss": 0.0584, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.15813421938160038, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.937287192862787e-05, |
| "loss": 0.0942, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.15847283655372374, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.9368963605149624e-05, |
| "loss": 0.0744, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.15881145372584707, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.936504353767473e-05, |
| "loss": 0.0531, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.15915007089797042, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.9361111731116993e-05, |
| "loss": 0.0792, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.15948868807009375, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.9357168190404937e-05, |
| "loss": 0.0809, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.15982730524221708, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.9353212920481792e-05, |
| "loss": 0.0707, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.16016592241434044, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.934924592630548e-05, |
| "loss": 0.0847, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.16050453958646377, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.9345267212848638e-05, |
| "loss": 0.0683, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.16084315675858712, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.9341276785098584e-05, |
| "loss": 0.081, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.16118177393071045, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.9337274648057313e-05, |
| "loss": 0.0843, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.1615203911028338, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.93332608067415e-05, |
| "loss": 0.0621, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.16185900827495714, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.932923526618251e-05, |
| "loss": 0.0625, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.1621976254470805, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.932519803142635e-05, |
| "loss": 0.0812, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.16253624261920382, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.9321149107533693e-05, |
| "loss": 0.0565, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16287485979132718, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.931708849957987e-05, |
| "loss": 0.0996, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.1632134769634505, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.9313016212654845e-05, |
| "loss": 0.0621, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.16355209413557384, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.9308932251863243e-05, |
| "loss": 0.0792, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1638907113076972, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9304836622324295e-05, |
| "loss": 0.0705, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.16422932847982052, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.930072932917188e-05, |
| "loss": 0.0944, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.16456794565194388, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.9296610377554496e-05, |
| "loss": 0.0713, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.1649065628240672, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.9292479772635236e-05, |
| "loss": 0.0654, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.16524517999619057, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.9288337519591827e-05, |
| "loss": 0.0673, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.1655837971683139, |
| "grad_norm": 0.71484375, |
| "learning_rate": 1.9284183623616573e-05, |
| "loss": 0.0963, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.16592241434043725, |
| "grad_norm": 0.8125, |
| "learning_rate": 1.9280018089916387e-05, |
| "loss": 0.083, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16626103151256058, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.927584092371277e-05, |
| "loss": 0.0769, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.1665996486846839, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.9271652130241794e-05, |
| "loss": 0.0801, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.16693826585680727, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.9267451714754113e-05, |
| "loss": 0.0599, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.1672768830289306, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.9263239682514953e-05, |
| "loss": 0.0793, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.16761550020105395, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.925901603880409e-05, |
| "loss": 0.0641, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.16795411737317728, |
| "grad_norm": 0.71484375, |
| "learning_rate": 1.9254780788915865e-05, |
| "loss": 0.0641, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.16829273454530064, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.9250533938159166e-05, |
| "loss": 0.0575, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.16863135171742397, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.9246275491857417e-05, |
| "loss": 0.0695, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.16896996888954732, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.9242005455348582e-05, |
| "loss": 0.0702, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.16930858606167065, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9237723833985154e-05, |
| "loss": 0.0819, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16964720323379398, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.9233430633134146e-05, |
| "loss": 0.0699, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.16998582040591734, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.922912585817708e-05, |
| "loss": 0.0708, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.17032443757804067, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.9224809514509998e-05, |
| "loss": 0.0752, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.17066305475016402, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.9220481607543436e-05, |
| "loss": 0.0789, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.17100167192228735, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.9216142142702424e-05, |
| "loss": 0.0735, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1713402890944107, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.921179112542648e-05, |
| "loss": 0.0773, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.17167890626653404, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.920742856116961e-05, |
| "loss": 0.0579, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.1720175234386574, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.920305445540028e-05, |
| "loss": 0.0578, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.17235614061078072, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9198668813601443e-05, |
| "loss": 0.0664, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.17269475778290408, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.919427164127049e-05, |
| "loss": 0.0685, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1730333749550274, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.918986294391929e-05, |
| "loss": 0.0815, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.17337199212715074, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.918544272707413e-05, |
| "loss": 0.0878, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.1737106092992741, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.9181010996275767e-05, |
| "loss": 0.0727, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.17404922647139742, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.9176567757079368e-05, |
| "loss": 0.0583, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.17438784364352078, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.917211301505453e-05, |
| "loss": 0.073, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.1747264608156441, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.916764677578528e-05, |
| "loss": 0.0841, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.17506507798776746, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.916316904487005e-05, |
| "loss": 0.0486, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.1754036951598908, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.9158679827921667e-05, |
| "loss": 0.088, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.17574231233201415, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.9154179130567374e-05, |
| "loss": 0.0673, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.17608092950413748, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.9149666958448792e-05, |
| "loss": 0.0723, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1764195466762608, |
| "grad_norm": 0.86328125, |
| "learning_rate": 1.9145143317221925e-05, |
| "loss": 0.0824, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.17675816384838416, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.9140608212557165e-05, |
| "loss": 0.0802, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.1770967810205075, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.9136061650139262e-05, |
| "loss": 0.0781, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.17743539819263085, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.9131503635667337e-05, |
| "loss": 0.0737, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.17777401536475418, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.9126934174854856e-05, |
| "loss": 0.0691, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.17811263253687754, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.9122353273429635e-05, |
| "loss": 0.0804, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.17845124970900086, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.9117760937133843e-05, |
| "loss": 0.0839, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.17878986688112422, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.911315717172397e-05, |
| "loss": 0.0671, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.17912848405324755, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.910854198297084e-05, |
| "loss": 0.061, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.17946710122537088, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.9103915376659583e-05, |
| "loss": 0.0598, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17980571839749424, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.909927735858966e-05, |
| "loss": 0.0592, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.18014433556961756, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.9094627934574825e-05, |
| "loss": 0.0601, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.18048295274174092, |
| "grad_norm": 0.828125, |
| "learning_rate": 1.9089967110443127e-05, |
| "loss": 0.0756, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.18082156991386425, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9085294892036914e-05, |
| "loss": 0.0741, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.1811601870859876, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.908061128521281e-05, |
| "loss": 0.0654, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.18149880425811094, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.907591629584172e-05, |
| "loss": 0.0712, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.1818374214302343, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.9071209929808808e-05, |
| "loss": 0.0643, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.18217603860235762, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.9066492193013505e-05, |
| "loss": 0.0861, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.18251465577448098, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.9061763091369498e-05, |
| "loss": 0.0656, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.1828532729466043, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.9057022630804715e-05, |
| "loss": 0.0592, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.18319189011872763, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.9052270817261323e-05, |
| "loss": 0.0877, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.183530507290851, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.9047507656695722e-05, |
| "loss": 0.0686, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.18386912446297432, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9042733155078536e-05, |
| "loss": 0.0651, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.18420774163509768, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.9037947318394594e-05, |
| "loss": 0.0624, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.184546358807221, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.9033150152642953e-05, |
| "loss": 0.073, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.18488497597934436, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.9028341663836855e-05, |
| "loss": 0.0587, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.1852235931514677, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.9023521858003744e-05, |
| "loss": 0.0741, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.18556221032359105, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.9018690741185244e-05, |
| "loss": 0.0801, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.18590082749571438, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.9013848319437163e-05, |
| "loss": 0.0627, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.1862394446678377, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.900899459882948e-05, |
| "loss": 0.0828, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18657806183996106, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.9004129585446326e-05, |
| "loss": 0.0765, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.1869166790120844, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.8999253285386e-05, |
| "loss": 0.0803, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.18725529618420775, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.8994365704760946e-05, |
| "loss": 0.0785, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.18759391335633108, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.8989466849697745e-05, |
| "loss": 0.0949, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.18793253052845443, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.8984556726337113e-05, |
| "loss": 0.062, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.18827114770057776, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.8979635340833887e-05, |
| "loss": 0.0739, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.18860976487270112, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.897470269935703e-05, |
| "loss": 0.0574, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.18894838204482445, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.8969758808089602e-05, |
| "loss": 0.0689, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.18928699921694778, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.8964803673228776e-05, |
| "loss": 0.0719, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.18962561638907113, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.895983730098581e-05, |
| "loss": 0.0746, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18996423356119446, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.8954859697586057e-05, |
| "loss": 0.07, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.19030285073331782, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.8949870869268942e-05, |
| "loss": 0.076, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.19064146790544115, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.8944870822287957e-05, |
| "loss": 0.0698, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.1909800850775645, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.893985956291067e-05, |
| "loss": 0.0552, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.19131870224968783, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.893483709741868e-05, |
| "loss": 0.0708, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.1916573194218112, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.8929803432107662e-05, |
| "loss": 0.0855, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.19199593659393452, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.8924758573287315e-05, |
| "loss": 0.0745, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.19233455376605785, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.891970252728136e-05, |
| "loss": 0.07, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.1926731709381812, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.8914635300427563e-05, |
| "loss": 0.0778, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.19301178811030453, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.8909556899077683e-05, |
| "loss": 0.0545, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.1933504052824279, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.8904467329597503e-05, |
| "loss": 0.0503, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.19368902245455122, |
| "grad_norm": 0.7734375, |
| "learning_rate": 1.8899366598366796e-05, |
| "loss": 0.0593, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.19402763962667458, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.8894254711779333e-05, |
| "loss": 0.1005, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.1943662567987979, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.8889131676242858e-05, |
| "loss": 0.0604, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.19470487397092126, |
| "grad_norm": 1.5859375, |
| "learning_rate": 1.8883997498179103e-05, |
| "loss": 0.0908, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.1950434911430446, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.8878852184023754e-05, |
| "loss": 0.0736, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.19538210831516795, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.8873695740226468e-05, |
| "loss": 0.0734, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.19572072548729127, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.8868528173250846e-05, |
| "loss": 0.0574, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.1960593426594146, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.886334948957443e-05, |
| "loss": 0.0719, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.19639795983153796, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.8858159695688708e-05, |
| "loss": 0.0642, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1967365770036613, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.885295879809908e-05, |
| "loss": 0.0721, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.19707519417578465, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.884774680332487e-05, |
| "loss": 0.0739, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.19741381134790797, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.8842523717899326e-05, |
| "loss": 0.0791, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.19775242852003133, |
| "grad_norm": 0.5, |
| "learning_rate": 1.8837289548369574e-05, |
| "loss": 0.0719, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.19809104569215466, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.8832044301296652e-05, |
| "loss": 0.0706, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.19842966286427802, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.8826787983255474e-05, |
| "loss": 0.0736, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.19876828003640135, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.882152060083484e-05, |
| "loss": 0.0699, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.19910689720852467, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.881624216063741e-05, |
| "loss": 0.0479, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.19944551438064803, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.8810952669279707e-05, |
| "loss": 0.0669, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.19978413155277136, |
| "grad_norm": 0.71484375, |
| "learning_rate": 1.8805652133392115e-05, |
| "loss": 0.0875, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.20012274872489472, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.8800340559618855e-05, |
| "loss": 0.0666, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.20046136589701805, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.8795017954617982e-05, |
| "loss": 0.0774, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.20046136589701805, |
| "eval_loss": 0.07367200404405594, |
| "eval_runtime": 815.492, |
| "eval_samples_per_second": 12.199, |
| "eval_steps_per_second": 3.05, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2007999830691414, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.8789684325061382e-05, |
| "loss": 0.0784, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.20113860024126473, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.8784339677634763e-05, |
| "loss": 0.0774, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.2014772174133881, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.8778984019037642e-05, |
| "loss": 0.0737, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.20181583458551142, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.8773617355983332e-05, |
| "loss": 0.0823, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.20215445175763475, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.8768239695198945e-05, |
| "loss": 0.0601, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.2024930689297581, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.876285104342539e-05, |
| "loss": 0.0671, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.20283168610188143, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.8757451407417332e-05, |
| "loss": 0.0679, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2031703032740048, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.8752040793943215e-05, |
| "loss": 0.0969, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.20350892044612812, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.8746619209785253e-05, |
| "loss": 0.0729, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.20384753761825147, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.874118666173939e-05, |
| "loss": 0.1034, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.2041861547903748, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.8735743156615337e-05, |
| "loss": 0.0666, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.20452477196249816, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.873028870123652e-05, |
| "loss": 0.0677, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2048633891346215, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.87248233024401e-05, |
| "loss": 0.0931, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.20520200630674484, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.871934696707696e-05, |
| "loss": 0.0632, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.20554062347886817, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.871385970201168e-05, |
| "loss": 0.0587, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.2058792406509915, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.870836151412255e-05, |
| "loss": 0.0776, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.20621785782311486, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.8702852410301556e-05, |
| "loss": 0.0557, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2065564749952382, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.869733239745435e-05, |
| "loss": 0.0801, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.20689509216736154, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.869180148250027e-05, |
| "loss": 0.0632, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.20723370933948487, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.8686259672372323e-05, |
| "loss": 0.0592, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.20757232651160823, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.8680706974017164e-05, |
| "loss": 0.0714, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.20791094368373156, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.8675143394395106e-05, |
| "loss": 0.066, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.20824956085585491, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.8669568940480093e-05, |
| "loss": 0.0525, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.20858817802797824, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.86639836192597e-05, |
| "loss": 0.0637, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.20892679520010157, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.8658387437735137e-05, |
| "loss": 0.0581, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.20926541237222493, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.865278040292121e-05, |
| "loss": 0.0503, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.20960402954434826, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.864716252184634e-05, |
| "loss": 0.0602, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.20994264671647161, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.864153380155254e-05, |
| "loss": 0.0762, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.21028126388859494, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.863589424909541e-05, |
| "loss": 0.0517, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2106198810607183, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.863024387154414e-05, |
| "loss": 0.0551, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.21095849823284163, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.8624582675981466e-05, |
| "loss": 0.0596, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.21129711540496499, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.8618910669503704e-05, |
| "loss": 0.0525, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.21163573257708831, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.861322785922071e-05, |
| "loss": 0.0619, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.21197434974921164, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.8607534252255896e-05, |
| "loss": 0.0728, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.212312966921335, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.8601829855746187e-05, |
| "loss": 0.0583, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.21265158409345833, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.8596114676842054e-05, |
| "loss": 0.0659, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.21299020126558169, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.8590388722707465e-05, |
| "loss": 0.08, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.21332881843770501, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.8584652000519913e-05, |
| "loss": 0.0701, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.21366743560982837, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.8578904517470375e-05, |
| "loss": 0.0718, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2140060527819517, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.8573146280763327e-05, |
| "loss": 0.1271, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.21434466995407506, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.856737729761671e-05, |
| "loss": 0.0677, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.21468328712619839, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.856159757526195e-05, |
| "loss": 0.0763, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.21502190429832174, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.8555807120943927e-05, |
| "loss": 0.0588, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.21536052147044507, |
| "grad_norm": 0.78125, |
| "learning_rate": 1.8550005941920984e-05, |
| "loss": 0.1482, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2156991386425684, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.8544194045464888e-05, |
| "loss": 0.0953, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.21603775581469176, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.8538371438860858e-05, |
| "loss": 0.0685, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.21637637298681509, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.8532538129407532e-05, |
| "loss": 0.0665, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.21671499015893844, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.8526694124416963e-05, |
| "loss": 0.049, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.21705360733106177, |
| "grad_norm": 0.78125, |
| "learning_rate": 1.852083943121461e-05, |
| "loss": 0.086, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.21739222450318513, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.8514974057139335e-05, |
| "loss": 0.0779, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.21773084167530846, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.8509098009543378e-05, |
| "loss": 0.0581, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2180694588474318, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.8503211295792375e-05, |
| "loss": 0.0687, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.21840807601955514, |
| "grad_norm": 0.8125, |
| "learning_rate": 1.8497313923265315e-05, |
| "loss": 0.1094, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.21874669319167847, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.8491405899354556e-05, |
| "loss": 0.0887, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.21908531036380183, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.848548723146581e-05, |
| "loss": 0.0721, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.21942392753592516, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.8479557927018127e-05, |
| "loss": 0.0684, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.2197625447080485, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.8473617993443885e-05, |
| "loss": 0.0704, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.22010116188017184, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.8467667438188794e-05, |
| "loss": 0.0695, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2204397790522952, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.8461706268711878e-05, |
| "loss": 0.0717, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.22077839622441853, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.8455734492485464e-05, |
| "loss": 0.0598, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.22111701339654188, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.844975211699517e-05, |
| "loss": 0.0602, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.2214556305686652, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.8443759149739906e-05, |
| "loss": 0.0675, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.22179424774078854, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.8437755598231857e-05, |
| "loss": 0.0677, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2221328649129119, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.8431741469996475e-05, |
| "loss": 0.0745, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.22247148208503523, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.8425716772572472e-05, |
| "loss": 0.0688, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.22281009925715858, |
| "grad_norm": 0.85546875, |
| "learning_rate": 1.8419681513511807e-05, |
| "loss": 0.0683, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.2231487164292819, |
| "grad_norm": 1.0, |
| "learning_rate": 1.8413635700379674e-05, |
| "loss": 0.0793, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.22348733360140527, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.84075793407545e-05, |
| "loss": 0.0661, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2238259507735286, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.840151244222794e-05, |
| "loss": 0.087, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.22416456794565195, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.8395435012404837e-05, |
| "loss": 0.0571, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.22450318511777528, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.838934705890327e-05, |
| "loss": 0.0709, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.22484180228989864, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.838324858935447e-05, |
| "loss": 0.0707, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.22518041946202197, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.8377139611402883e-05, |
| "loss": 0.0706, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.2255190366341453, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.8371020132706104e-05, |
| "loss": 0.0537, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.22585765380626865, |
| "grad_norm": 0.7265625, |
| "learning_rate": 1.8364890160934905e-05, |
| "loss": 0.0909, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.22619627097839198, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.8358749703773206e-05, |
| "loss": 0.0635, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.22653488815051534, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.835259876891807e-05, |
| "loss": 0.0939, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.22687350532263867, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.8346437364079693e-05, |
| "loss": 0.0852, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.22721212249476203, |
| "grad_norm": 0.70703125, |
| "learning_rate": 1.8340265496981395e-05, |
| "loss": 0.0639, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.22755073966688535, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.8334083175359616e-05, |
| "loss": 0.0598, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.2278893568390087, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.8327890406963895e-05, |
| "loss": 0.0872, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.22822797401113204, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.8321687199556872e-05, |
| "loss": 0.0835, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.22856659118325537, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.8315473560914258e-05, |
| "loss": 0.0586, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.22890520835537873, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.8309249498824853e-05, |
| "loss": 0.0586, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.22924382552750205, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.8303015021090526e-05, |
| "loss": 0.0627, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.2295824426996254, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.829677013552619e-05, |
| "loss": 0.0771, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.22992105987174874, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.829051484995981e-05, |
| "loss": 0.0759, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.2302596770438721, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.828424917223239e-05, |
| "loss": 0.0696, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.23059829421599543, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.827797311019795e-05, |
| "loss": 0.078, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.23093691138811878, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.8271686671723543e-05, |
| "loss": 0.0612, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.2312755285602421, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.8265389864689213e-05, |
| "loss": 0.0886, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.23161414573236544, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.8259082696988013e-05, |
| "loss": 0.0824, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.2319527629044888, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.8252765176525976e-05, |
| "loss": 0.0776, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.23229138007661213, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.8246437311222117e-05, |
| "loss": 0.0831, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.23262999724873548, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.8240099109008413e-05, |
| "loss": 0.0765, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.2329686144208588, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.82337505778298e-05, |
| "loss": 0.0721, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.23330723159298217, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.8227391725644167e-05, |
| "loss": 0.0893, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.2336458487651055, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.822102256042233e-05, |
| "loss": 0.0597, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.23398446593722885, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.8214643090148044e-05, |
| "loss": 0.0805, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.23432308310935218, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.820825332281797e-05, |
| "loss": 0.0646, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.23466170028147554, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.820185326644169e-05, |
| "loss": 0.0747, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.23500031745359887, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.819544292904166e-05, |
| "loss": 0.0687, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.2353389346257222, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.8189022318653254e-05, |
| "loss": 0.0573, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.23567755179784555, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.81825914433247e-05, |
| "loss": 0.0576, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.23601616896996888, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.8176150311117103e-05, |
| "loss": 0.0783, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.23635478614209224, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.816969893010442e-05, |
| "loss": 0.063, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.23669340331421557, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.8163237308373465e-05, |
| "loss": 0.084, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.23703202048633892, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.8156765454023873e-05, |
| "loss": 0.0549, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.23737063765846225, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.8150283375168112e-05, |
| "loss": 0.0821, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.2377092548305856, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.814379107993148e-05, |
| "loss": 0.0675, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.23804787200270894, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.8137288576452064e-05, |
| "loss": 0.0852, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.23838648917483227, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.8130775872880748e-05, |
| "loss": 0.0743, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.23872510634695562, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.812425297738121e-05, |
| "loss": 0.0767, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.23906372351907895, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.81177198981299e-05, |
| "loss": 0.0984, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.2394023406912023, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.811117664331604e-05, |
| "loss": 0.0517, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.23974095786332564, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.810462322114159e-05, |
| "loss": 0.0606, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.240079575035449, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.8098059639821265e-05, |
| "loss": 0.0588, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.24041819220757232, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.809148590758252e-05, |
| "loss": 0.0773, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.24075680937969568, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.8084902032665533e-05, |
| "loss": 0.0607, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.241095426551819, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.8078308023323186e-05, |
| "loss": 0.0862, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.24143404372394234, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.8071703887821067e-05, |
| "loss": 0.0735, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.2417726608960657, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.8065089634437467e-05, |
| "loss": 0.0684, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.24211127806818902, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.805846527146335e-05, |
| "loss": 0.0843, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.24244989524031238, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.8051830807202355e-05, |
| "loss": 0.0703, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.2427885124124357, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.8045186249970786e-05, |
| "loss": 0.0828, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.24312712958455907, |
| "grad_norm": 0.80859375, |
| "learning_rate": 1.8038531608097592e-05, |
| "loss": 0.1078, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.2434657467566824, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.803186688992437e-05, |
| "loss": 0.0585, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.24380436392880575, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.8025192103805348e-05, |
| "loss": 0.0646, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.24414298110092908, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.8018507258107364e-05, |
| "loss": 0.0928, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2444815982730524, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.801181236120988e-05, |
| "loss": 0.0747, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.24482021544517577, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.800510742150494e-05, |
| "loss": 0.057, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.2451588326172991, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.7998392447397197e-05, |
| "loss": 0.0711, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.24549744978942245, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.7991667447303865e-05, |
| "loss": 0.0806, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.24583606696154578, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.7984932429654734e-05, |
| "loss": 0.0787, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.24617468413366914, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.7978187402892148e-05, |
| "loss": 0.0801, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.24651330130579246, |
| "grad_norm": 0.5, |
| "learning_rate": 1.7971432375471e-05, |
| "loss": 0.0636, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.24685191847791582, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.7964667355858718e-05, |
| "loss": 0.0683, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.24719053565003915, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.7957892352535253e-05, |
| "loss": 0.0845, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2475291528221625, |
| "grad_norm": 0.89453125, |
| "learning_rate": 1.7951107373993074e-05, |
| "loss": 0.1793, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.24786776999428584, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.7944312428737154e-05, |
| "loss": 0.0657, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.24820638716640916, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.793750752528495e-05, |
| "loss": 0.0679, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.24854500433853252, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.7930692672166416e-05, |
| "loss": 0.0831, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.24888362151065585, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.7923867877923967e-05, |
| "loss": 0.0764, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.2492222386827792, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.791703315111249e-05, |
| "loss": 0.0583, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.24956085585490254, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.7910188500299303e-05, |
| "loss": 0.0661, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.2498994730270259, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.7903333934064185e-05, |
| "loss": 0.0654, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2502380901991492, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.789646946099934e-05, |
| "loss": 0.0655, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2505767073712726, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.7889595089709377e-05, |
| "loss": 0.074, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.25091532454339593, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.7882710828811322e-05, |
| "loss": 0.0676, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.25125394171551924, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.7875816686934596e-05, |
| "loss": 0.053, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.2515925588876426, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.7868912672721014e-05, |
| "loss": 0.0705, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.25193117605976595, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.7861998794824747e-05, |
| "loss": 0.0544, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.25226979323188925, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.785507506191235e-05, |
| "loss": 0.063, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.2526084104040126, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.7848141482662726e-05, |
| "loss": 0.065, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.25294702757613596, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.7841198065767107e-05, |
| "loss": 0.0687, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.2532856447482593, |
| "grad_norm": 0.5, |
| "learning_rate": 1.783424481992907e-05, |
| "loss": 0.0679, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.2536242619203826, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.782728175386451e-05, |
| "loss": 0.0764, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.253962879092506, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.7820308876301633e-05, |
| "loss": 0.0632, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.25430149626462933, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.781332619598094e-05, |
| "loss": 0.0694, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.25464011343675264, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.780633372165522e-05, |
| "loss": 0.0661, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.254978730608876, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.7799331462089543e-05, |
| "loss": 0.0546, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.25531734778099935, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.7792319426061236e-05, |
| "loss": 0.0567, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.2556559649531227, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.7785297622359893e-05, |
| "loss": 0.0569, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.255994582125246, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.7778266059787345e-05, |
| "loss": 0.0831, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.25633319929736936, |
| "grad_norm": 0.7734375, |
| "learning_rate": 1.7771224747157655e-05, |
| "loss": 0.0997, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2566718164694927, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.7764173693297106e-05, |
| "loss": 0.0747, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.2570104336416161, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.77571129070442e-05, |
| "loss": 0.0697, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.2573490508137394, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.775004239724963e-05, |
| "loss": 0.0685, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.25768766798586273, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.774296217277628e-05, |
| "loss": 0.0603, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2580262851579861, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.773587224249921e-05, |
| "loss": 0.0621, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.2583649023301094, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.7728772615305657e-05, |
| "loss": 0.061, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.25870351950223275, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.7721663300094997e-05, |
| "loss": 0.0644, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2590421366743561, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.7714544305778757e-05, |
| "loss": 0.0581, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.25938075384647946, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.7707415641280598e-05, |
| "loss": 0.0812, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.25971937101860276, |
| "grad_norm": 0.5, |
| "learning_rate": 1.7700277315536305e-05, |
| "loss": 0.0754, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.2600579881907261, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.7693129337493764e-05, |
| "loss": 0.059, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.2603966053628495, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.768597171611297e-05, |
| "loss": 0.0896, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.26073522253497283, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.7678804460366e-05, |
| "loss": 0.0651, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.26107383970709613, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.7671627579237016e-05, |
| "loss": 0.0634, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.2614124568792195, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.766444108172223e-05, |
| "loss": 0.079, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.26175107405134285, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.765724497682992e-05, |
| "loss": 0.0723, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.26208969122346615, |
| "grad_norm": 0.75, |
| "learning_rate": 1.7650039273580406e-05, |
| "loss": 0.0871, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.2624283083955895, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.7642823981006037e-05, |
| "loss": 0.065, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.26276692556771286, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.763559910815118e-05, |
| "loss": 0.072, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.2631055427398362, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.7628364664072218e-05, |
| "loss": 0.0845, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.2634441599119595, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.7621120657837528e-05, |
| "loss": 0.0778, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.2637827770840829, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.761386709852747e-05, |
| "loss": 0.0744, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.26412139425620623, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.760660399523438e-05, |
| "loss": 0.0994, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.26446001142832953, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.759933135706256e-05, |
| "loss": 0.0787, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.2647986286004529, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.759204919312826e-05, |
| "loss": 0.0614, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.26513724577257625, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.7584757512559674e-05, |
| "loss": 0.0776, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.2654758629446996, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.757745632449693e-05, |
| "loss": 0.0646, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.2658144801168229, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.757014563809206e-05, |
| "loss": 0.0648, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.26615309728894626, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.7562825462509018e-05, |
| "loss": 0.0566, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.2664917144610696, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.7555495806923635e-05, |
| "loss": 0.0736, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.266830331633193, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.754815668052364e-05, |
| "loss": 0.0691, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.2671689488053163, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.754080809250863e-05, |
| "loss": 0.069, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.26750756597743963, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.753345005209006e-05, |
| "loss": 0.0768, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.267846183149563, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.7526082568491233e-05, |
| "loss": 0.0748, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.2681848003216863, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.7518705650947292e-05, |
| "loss": 0.07, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.26852341749380965, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.7511319308705198e-05, |
| "loss": 0.075, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.268862034665933, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.750392355102374e-05, |
| "loss": 0.0648, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.26920065183805636, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.74965183871735e-05, |
| "loss": 0.0848, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.26953926901017966, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.7489103826436843e-05, |
| "loss": 0.067, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.269877886182303, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.7481679878107928e-05, |
| "loss": 0.0608, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.2702165033544264, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.7474246551492674e-05, |
| "loss": 0.0584, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.2705551205265497, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.7466803855908753e-05, |
| "loss": 0.0558, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.27089373769867303, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.745935180068559e-05, |
| "loss": 0.057, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2712323548707964, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.745189039516434e-05, |
| "loss": 0.0831, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.27157097204291974, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.7444419648697866e-05, |
| "loss": 0.0964, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.27190958921504305, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.7436939570650754e-05, |
| "loss": 0.0753, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.2722482063871664, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.7429450170399278e-05, |
| "loss": 0.0524, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.27258682355928976, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.742195145733141e-05, |
| "loss": 0.0638, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.2729254407314131, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.741444344084678e-05, |
| "loss": 0.0813, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.2732640579035364, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.7406926130356692e-05, |
| "loss": 0.0662, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.2736026750756598, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.7399399535284093e-05, |
| "loss": 0.0566, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.27394129224778313, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.7391863665063572e-05, |
| "loss": 0.0858, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.27427990941990643, |
| "grad_norm": 0.734375, |
| "learning_rate": 1.738431852914134e-05, |
| "loss": 0.0827, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.2746185265920298, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.737676413697523e-05, |
| "loss": 0.0644, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.27495714376415314, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.736920049803467e-05, |
| "loss": 0.0772, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.2752957609362765, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.7361627621800683e-05, |
| "loss": 0.0561, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.2756343781083998, |
| "grad_norm": 0.734375, |
| "learning_rate": 1.735404551776587e-05, |
| "loss": 0.0853, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.27597299528052316, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.73464541954344e-05, |
| "loss": 0.0618, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.2763116124526465, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.7338853664321993e-05, |
| "loss": 0.079, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.27665022962476987, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.7331243933955918e-05, |
| "loss": 0.0579, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.2769888467968932, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.7323625013874972e-05, |
| "loss": 0.0667, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.27732746396901653, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.731599691362947e-05, |
| "loss": 0.0661, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.2776660811411399, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.730835964278124e-05, |
| "loss": 0.117, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2780046983132632, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.7300713210903605e-05, |
| "loss": 0.0619, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.27834331548538654, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.7293057627581355e-05, |
| "loss": 0.0645, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.2786819326575099, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.7285392902410776e-05, |
| "loss": 0.0636, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.27902054982963326, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.7277719044999595e-05, |
| "loss": 0.0543, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.27935916700175656, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.7270036064967e-05, |
| "loss": 0.06, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.2796977841738799, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.7262343971943602e-05, |
| "loss": 0.0598, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.28003640134600327, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.725464277557144e-05, |
| "loss": 0.0667, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.2803750185181266, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.7246932485503964e-05, |
| "loss": 0.0554, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.28071363569024993, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.7239213111406027e-05, |
| "loss": 0.0648, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.2810522528623733, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.7231484662953862e-05, |
| "loss": 0.063, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.28139087003449664, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.7223747149835078e-05, |
| "loss": 0.0752, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.28172948720661994, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.7216000581748655e-05, |
| "loss": 0.0745, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.2820681043787433, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.7208244968404904e-05, |
| "loss": 0.0526, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.28240672155086666, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.7200480319525505e-05, |
| "loss": 0.0644, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.28274533872299, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.719270664484343e-05, |
| "loss": 0.0788, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.2830839558951133, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.7184923954102992e-05, |
| "loss": 0.0718, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.28342257306723667, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.7177132257059788e-05, |
| "loss": 0.0729, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.28376119023936003, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.7169331563480713e-05, |
| "loss": 0.0568, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.28409980741148333, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.7161521883143936e-05, |
| "loss": 0.0644, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.2844384245836067, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.7153703225838892e-05, |
| "loss": 0.0567, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.28477704175573004, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.714587560136627e-05, |
| "loss": 0.0855, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.2851156589278534, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.7138039019538e-05, |
| "loss": 0.0765, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.2854542760999767, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.713019349017723e-05, |
| "loss": 0.0771, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.28579289327210006, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.7122339023118338e-05, |
| "loss": 0.0754, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.2861315104442234, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.7114475628206897e-05, |
| "loss": 0.0429, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.28647012761634677, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.7106603315299674e-05, |
| "loss": 0.0673, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.28680874478847007, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.7098722094264616e-05, |
| "loss": 0.0862, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.28714736196059343, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.7090831974980832e-05, |
| "loss": 0.0655, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.2874859791327168, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.7082932967338588e-05, |
| "loss": 0.0658, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.2878245963048401, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.7075025081239286e-05, |
| "loss": 0.0895, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.28816321347696344, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.706710832659547e-05, |
| "loss": 0.0735, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.2885018306490868, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.7059182713330787e-05, |
| "loss": 0.0594, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.28884044782121016, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.7051248251379997e-05, |
| "loss": 0.0557, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.28917906499333346, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.7043304950688947e-05, |
| "loss": 0.065, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.2895176821654568, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.703535282121456e-05, |
| "loss": 0.0784, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.28985629933758017, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.702739187292484e-05, |
| "loss": 0.0567, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.29019491650970347, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.7019422115798835e-05, |
| "loss": 0.073, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.2905335336818268, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.7011443559826632e-05, |
| "loss": 0.079, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.2908721508539502, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.700345621500935e-05, |
| "loss": 0.0667, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.29121076802607354, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.699546009135913e-05, |
| "loss": 0.1011, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.29154938519819684, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.6987455198899118e-05, |
| "loss": 0.0662, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.2918880023703202, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.6979441547663434e-05, |
| "loss": 0.0675, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.29222661954244356, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.6971419147697206e-05, |
| "loss": 0.0607, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.2925652367145669, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.6963388009056505e-05, |
| "loss": 0.075, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.2929038538866902, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.6955348141808367e-05, |
| "loss": 0.0512, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.29324247105881357, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.694729955603076e-05, |
| "loss": 0.0719, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.2935810882309369, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.6939242261812592e-05, |
| "loss": 0.0996, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.2939197054030602, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.693117626925368e-05, |
| "loss": 0.066, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.2942583225751836, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.6923101588464753e-05, |
| "loss": 0.0758, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.29459693974730694, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.6915018229567412e-05, |
| "loss": 0.0675, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.2949355569194303, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.6906926202694158e-05, |
| "loss": 0.2418, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.2952741740915536, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.6898825517988342e-05, |
| "loss": 0.1013, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.29561279126367696, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.6890716185604178e-05, |
| "loss": 0.053, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.2959514084358003, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.688259821570671e-05, |
| "loss": 0.0407, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.29629002560792367, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.6874471618471813e-05, |
| "loss": 0.1096, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.29662864278004697, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.6866336404086185e-05, |
| "loss": 0.0607, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.2969672599521703, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.6858192582747306e-05, |
| "loss": 0.0732, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.2973058771242937, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.685004016466347e-05, |
| "loss": 0.0591, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.297644494296417, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.6841879160053724e-05, |
| "loss": 0.0513, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.29798311146854034, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.683370957914789e-05, |
| "loss": 0.056, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.2983217286406637, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.6825531432186545e-05, |
| "loss": 0.0679, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.29866034581278705, |
| "grad_norm": 0.90625, |
| "learning_rate": 1.6817344729420985e-05, |
| "loss": 0.0666, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.29899896298491035, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.6809149481113252e-05, |
| "loss": 0.0664, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.2993375801570337, |
| "grad_norm": 0.625, |
| "learning_rate": 1.6800945697536088e-05, |
| "loss": 0.081, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.29967619732915707, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.679273338897293e-05, |
| "loss": 0.0667, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.30001481450128037, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.678451256571792e-05, |
| "loss": 0.0587, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.3003534316734037, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.6776283238075853e-05, |
| "loss": 0.0736, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3006920488455271, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.6768045416362192e-05, |
| "loss": 0.0947, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.3006920488455271, |
| "eval_loss": 0.07113409042358398, |
| "eval_runtime": 815.5638, |
| "eval_samples_per_second": 12.198, |
| "eval_steps_per_second": 3.049, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.30103066601765044, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.6759799110903046e-05, |
| "loss": 0.0615, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.30136928318977374, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.6751544332035164e-05, |
| "loss": 0.0656, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.3017079003618971, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.674328109010591e-05, |
| "loss": 0.0781, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.30204651753402045, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.6735009395473252e-05, |
| "loss": 0.0776, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3023851347061438, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.672672925850577e-05, |
| "loss": 0.079, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.3027237518782671, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.6718440689582613e-05, |
| "loss": 0.0536, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.30306236905039047, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.67101436990935e-05, |
| "loss": 0.0796, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.3034009862225138, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.6701838297438713e-05, |
| "loss": 0.0662, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3037396033946371, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.669352449502907e-05, |
| "loss": 0.0663, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.3040782205667605, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.6685202302285926e-05, |
| "loss": 0.0546, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.30441683773888384, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.667687172964115e-05, |
| "loss": 0.0613, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.3047554549110072, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.6668532787537115e-05, |
| "loss": 0.077, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3050940720831305, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.6660185486426684e-05, |
| "loss": 0.0601, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.30543268925525385, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.66518298367732e-05, |
| "loss": 0.0648, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.3057713064273772, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.6643465849050473e-05, |
| "loss": 0.0603, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.30610992359950057, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.6635093533742762e-05, |
| "loss": 0.0758, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.30644854077162387, |
| "grad_norm": 0.5, |
| "learning_rate": 1.662671290134476e-05, |
| "loss": 0.0569, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.3067871579437472, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.6618323962361595e-05, |
| "loss": 0.0667, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.3071257751158706, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.6609926727308804e-05, |
| "loss": 0.0652, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.3074643922879939, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.660152120671232e-05, |
| "loss": 0.0823, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.30780300946011724, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.6593107411108462e-05, |
| "loss": 0.0695, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.3081416266322406, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.6584685351043924e-05, |
| "loss": 0.0791, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.30848024380436395, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.657625503707576e-05, |
| "loss": 0.0716, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.30881886097648725, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.6567816479771372e-05, |
| "loss": 0.0772, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.3091574781486106, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.655936968970848e-05, |
| "loss": 0.0743, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.30949609532073397, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.6550914677475155e-05, |
| "loss": 0.0842, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.30983471249285727, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.654245145366974e-05, |
| "loss": 0.0685, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.3101733296649806, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.6533980028900896e-05, |
| "loss": 0.0904, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.310511946837104, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.6525500413787554e-05, |
| "loss": 0.0768, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.31085056400922734, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.6517012618958905e-05, |
| "loss": 0.0794, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.31118918118135064, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.6508516655054404e-05, |
| "loss": 0.0797, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.311527798353474, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.6500012532723748e-05, |
| "loss": 0.0678, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.31186641552559735, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.6491500262626847e-05, |
| "loss": 0.0588, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3122050326977207, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.6482979855433837e-05, |
| "loss": 0.0647, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.312543649869844, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.6474451321825048e-05, |
| "loss": 0.0774, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.31288226704196737, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.6465914672491e-05, |
| "loss": 0.062, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.3132208842140907, |
| "grad_norm": 0.75, |
| "learning_rate": 1.6457369918132376e-05, |
| "loss": 0.0995, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.313559501386214, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.6448817069460033e-05, |
| "loss": 0.0756, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.3138981185583374, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.6440256137194965e-05, |
| "loss": 0.0769, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.31423673573046074, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.6431687132068305e-05, |
| "loss": 0.0754, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.3145753529025841, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.6423110064821296e-05, |
| "loss": 0.0838, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.3149139700747074, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.64145249462053e-05, |
| "loss": 0.0665, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.31525258724683075, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.6405931786981753e-05, |
| "loss": 0.0765, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3155912044189541, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.639733059792219e-05, |
| "loss": 0.0623, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.31592982159107746, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.63887213898082e-05, |
| "loss": 0.0968, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.31626843876320077, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.6380104173431423e-05, |
| "loss": 0.0638, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.3166070559353241, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.6371478959593543e-05, |
| "loss": 0.0689, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3169456731074475, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.6362845759106267e-05, |
| "loss": 0.0748, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3172842902795708, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.635420458279131e-05, |
| "loss": 0.0638, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.31762290745169414, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.634555544148039e-05, |
| "loss": 0.0695, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3179615246238175, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.6336898346015202e-05, |
| "loss": 0.0657, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.31830014179594085, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.6328233307247426e-05, |
| "loss": 0.0808, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.31863875896806415, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.6319560336038678e-05, |
| "loss": 0.0642, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.3189773761401875, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.631087944326053e-05, |
| "loss": 0.0618, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.31931599331231086, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.6302190639794486e-05, |
| "loss": 0.0638, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.31965461048443417, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.6293493936531956e-05, |
| "loss": 0.0731, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.3199932276565575, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.6284789344374266e-05, |
| "loss": 0.0728, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.3203318448286809, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.6276076874232614e-05, |
| "loss": 0.0491, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.32067046200080424, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.626735653702809e-05, |
| "loss": 0.059, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.32100907917292754, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.6258628343691635e-05, |
| "loss": 0.0598, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.3213476963450509, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.6249892305164036e-05, |
| "loss": 0.0661, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.32168631351717425, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.624114843239592e-05, |
| "loss": 0.0594, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3220249306892976, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.6232396736347736e-05, |
| "loss": 0.0663, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3223635478614209, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.6223637227989736e-05, |
| "loss": 0.059, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.32270216503354426, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.621486991830196e-05, |
| "loss": 0.0622, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.3230407822056676, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.6206094818274228e-05, |
| "loss": 0.0461, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.3233793993777909, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.619731193890614e-05, |
| "loss": 0.0483, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.3237180165499143, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.6188521291207027e-05, |
| "loss": 0.0747, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.32405663372203763, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.6179722886195967e-05, |
| "loss": 0.0733, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.324395250894161, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.6170916734901765e-05, |
| "loss": 0.0702, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.3247338680662843, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.6162102848362932e-05, |
| "loss": 0.0655, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.32507248523840765, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.6153281237627675e-05, |
| "loss": 0.0657, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.325411102410531, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.6144451913753882e-05, |
| "loss": 0.0588, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.32574971958265436, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.6135614887809113e-05, |
| "loss": 0.0687, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.32608833675477766, |
| "grad_norm": 0.828125, |
| "learning_rate": 1.612677017087058e-05, |
| "loss": 0.0764, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.326426953926901, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.6117917774025138e-05, |
| "loss": 0.0539, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.3267655710990244, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.6109057708369263e-05, |
| "loss": 0.0853, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.3271041882711477, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.6100189985009053e-05, |
| "loss": 0.0523, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.32744280544327103, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.6091314615060196e-05, |
| "loss": 0.0599, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3277814226153944, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.608243160964797e-05, |
| "loss": 0.0625, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.32812003978751775, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.6073540979907227e-05, |
| "loss": 0.0561, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.32845865695964105, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.6064642736982368e-05, |
| "loss": 0.08, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3287972741317644, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.6055736892027342e-05, |
| "loss": 0.0691, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.32913589130388776, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.6046823456205623e-05, |
| "loss": 0.0728, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.32947450847601106, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.6037902440690212e-05, |
| "loss": 0.0604, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3298131256481344, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.6028973856663595e-05, |
| "loss": 0.0723, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3301517428202578, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.6020037715317756e-05, |
| "loss": 0.0836, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.33049035999238113, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.6011094027854147e-05, |
| "loss": 0.0603, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.33082897716450443, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.6002142805483686e-05, |
| "loss": 0.0717, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3311675943366278, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.5993184059426725e-05, |
| "loss": 0.068, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.33150621150875115, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.5984217800913052e-05, |
| "loss": 0.0738, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.3318448286808745, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.5975244041181877e-05, |
| "loss": 0.0627, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3321834458529978, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.5966262791481812e-05, |
| "loss": 0.0633, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.33252206302512116, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.5957274063070845e-05, |
| "loss": 0.0576, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3328606801972445, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.5948277867216355e-05, |
| "loss": 0.0802, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.3331992973693678, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.5939274215195074e-05, |
| "loss": 0.0643, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.3335379145414912, |
| "grad_norm": 1.6796875, |
| "learning_rate": 1.5930263118293075e-05, |
| "loss": 0.0746, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.33387653171361453, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.5921244587805774e-05, |
| "loss": 0.0764, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.3342151488857379, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.5912218635037896e-05, |
| "loss": 0.0583, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.3345537660578612, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.5903185271303477e-05, |
| "loss": 0.0803, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.33489238322998455, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.5894144507925836e-05, |
| "loss": 0.077, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3352310004021079, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.5885096356237572e-05, |
| "loss": 0.0668, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.33556961757423126, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.5876040827580545e-05, |
| "loss": 0.0635, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.33590823474635456, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.586697793330586e-05, |
| "loss": 0.058, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.3362468519184779, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.5857907684773858e-05, |
| "loss": 0.0653, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.3365854690906013, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.584883009335409e-05, |
| "loss": 0.0474, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.3369240862627246, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.5839745170425326e-05, |
| "loss": 0.0607, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.33726270343484793, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.5830652927375506e-05, |
| "loss": 0.0584, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.3376013206069713, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.582155337560177e-05, |
| "loss": 0.0642, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.33793993777909465, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.58124465265104e-05, |
| "loss": 0.0747, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.33827855495121795, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.5803332391516832e-05, |
| "loss": 0.0634, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.3386171721233413, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.5794210982045638e-05, |
| "loss": 0.0548, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.33895578929546466, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.5785082309530504e-05, |
| "loss": 0.0659, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.33929440646758796, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.577594638541422e-05, |
| "loss": 0.0745, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.3396330236397113, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.5766803221148676e-05, |
| "loss": 0.0689, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3399716408118347, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.5757652828194815e-05, |
| "loss": 0.0874, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.34031025798395803, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.5748495218022665e-05, |
| "loss": 0.0583, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.34064887515608133, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.573933040211129e-05, |
| "loss": 0.0639, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.3409874923282047, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.5730158391948785e-05, |
| "loss": 0.0566, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.34132610950032805, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.5720979199032268e-05, |
| "loss": 0.0807, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3416647266724514, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.5711792834867856e-05, |
| "loss": 0.0633, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.3420033438445747, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.570259931097066e-05, |
| "loss": 0.0722, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.34234196101669806, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.569339863886476e-05, |
| "loss": 0.0596, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3426805781888214, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.56841908300832e-05, |
| "loss": 0.0618, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.3430191953609447, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.567497589616797e-05, |
| "loss": 0.0626, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.3433578125330681, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.5665753848669987e-05, |
| "loss": 0.0731, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.34369642970519143, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.5656524699149096e-05, |
| "loss": 0.0723, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.3440350468773148, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.5647288459174032e-05, |
| "loss": 0.0949, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.3443736640494381, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.563804514032242e-05, |
| "loss": 0.0491, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.34471228122156145, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.5628794754180764e-05, |
| "loss": 0.0886, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.3450508983936848, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.5619537312344422e-05, |
| "loss": 0.0659, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.34538951556580816, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.56102728264176e-05, |
| "loss": 0.0632, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.34572813273793146, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.560100130801333e-05, |
| "loss": 0.0618, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.3460667499100548, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.5591722768753464e-05, |
| "loss": 0.0721, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.3464053670821782, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.5582437220268648e-05, |
| "loss": 0.0518, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.3467439842543015, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.5573144674198323e-05, |
| "loss": 0.0599, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.34708260142642483, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.5563845142190687e-05, |
| "loss": 0.0601, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.3474212185985482, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.555453863590272e-05, |
| "loss": 0.0596, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.34775983577067154, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.554522516700011e-05, |
| "loss": 0.0496, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.34809845294279484, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.5535904747157303e-05, |
| "loss": 0.0744, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.3484370701149182, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.5526577388057444e-05, |
| "loss": 0.0532, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.34877568728704156, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.5517243101392373e-05, |
| "loss": 0.08, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.34911430445916486, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.5507901898862623e-05, |
| "loss": 0.0623, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.3494529216312882, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.5498553792177395e-05, |
| "loss": 0.0582, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.3497915388034116, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.5489198793054535e-05, |
| "loss": 0.0806, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.35013015597553493, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.5479836913220544e-05, |
| "loss": 0.0691, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.35046877314765823, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.547046816441053e-05, |
| "loss": 0.0724, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.3508073903197816, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.5461092558368223e-05, |
| "loss": 0.0604, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.35114600749190494, |
| "grad_norm": 0.625, |
| "learning_rate": 1.5451710106845953e-05, |
| "loss": 0.0606, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.3514846246640283, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.5442320821604616e-05, |
| "loss": 0.0774, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.3518232418361516, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.5432924714413685e-05, |
| "loss": 0.0642, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.35216185900827496, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.5423521797051176e-05, |
| "loss": 0.1061, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.3525004761803983, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.541411208130365e-05, |
| "loss": 0.0601, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.3528390933525216, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.540469557896619e-05, |
| "loss": 0.0713, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.35317771052464497, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.539527230184238e-05, |
| "loss": 0.0591, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.35351632769676833, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.5385842261744296e-05, |
| "loss": 0.0738, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.3538549448688917, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.5376405470492502e-05, |
| "loss": 0.0825, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.354193562041015, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.536696193991601e-05, |
| "loss": 0.0502, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.35453217921313834, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.535751168185228e-05, |
| "loss": 0.0713, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.3548707963852617, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.5348054708147225e-05, |
| "loss": 0.0786, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.35520941355738506, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.5338591030655154e-05, |
| "loss": 0.0689, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.35554803072950836, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.5329120661238788e-05, |
| "loss": 0.0645, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3558866479016317, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.5319643611769237e-05, |
| "loss": 0.0825, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.35622526507375507, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.5310159894125986e-05, |
| "loss": 0.0608, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.35656388224587837, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.530066952019687e-05, |
| "loss": 0.0677, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.35690249941800173, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.529117250187808e-05, |
| "loss": 0.078, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.3572411165901251, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.5281668851074123e-05, |
| "loss": 0.0656, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.35757973376224844, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.527215857969783e-05, |
| "loss": 0.0576, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.35791835093437174, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.526264169967033e-05, |
| "loss": 0.0565, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.3582569681064951, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.5253118222921024e-05, |
| "loss": 0.0859, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.35859558527861846, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.5243588161387596e-05, |
| "loss": 0.0602, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.35893420245074176, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.5234051527015983e-05, |
| "loss": 0.0625, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3592728196228651, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.522450833176035e-05, |
| "loss": 0.0579, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.35961143679498847, |
| "grad_norm": 0.87890625, |
| "learning_rate": 1.5214958587583092e-05, |
| "loss": 0.0713, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.3599500539671118, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.5205402306454823e-05, |
| "loss": 0.0696, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.36028867113923513, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.5195839500354337e-05, |
| "loss": 0.0755, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.3606272883113585, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.5186270181268612e-05, |
| "loss": 0.0575, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.36096590548348184, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.5176694361192787e-05, |
| "loss": 0.0814, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.3613045226556052, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.516711205213016e-05, |
| "loss": 0.0819, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.3616431398277285, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.5157523266092153e-05, |
| "loss": 0.0777, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.36198175699985186, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.5147928015098309e-05, |
| "loss": 0.0584, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.3623203741719752, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.5138326311176278e-05, |
| "loss": 0.0745, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.3626589913440985, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.5128718166361793e-05, |
| "loss": 0.0671, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.36299760851622187, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.511910359269867e-05, |
| "loss": 0.0531, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.3633362256883452, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.5109482602238773e-05, |
| "loss": 0.0624, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.3636748428604686, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.5099855207042016e-05, |
| "loss": 0.0907, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.3640134600325919, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.509022141917634e-05, |
| "loss": 0.0673, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.36435207720471524, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.5080581250717699e-05, |
| "loss": 0.0671, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.3646906943768386, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.5070934713750043e-05, |
| "loss": 0.0854, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.36502931154896195, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.5061281820365308e-05, |
| "loss": 0.0592, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.36536792872108526, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.50516225826634e-05, |
| "loss": 0.0659, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.3657065458932086, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.5041957012752173e-05, |
| "loss": 0.0522, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.36604516306533197, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.5032285122747414e-05, |
| "loss": 0.0703, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.36638378023745527, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.5022606924772842e-05, |
| "loss": 0.0595, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.3667223974095786, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.5012922430960082e-05, |
| "loss": 0.059, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.367061014581702, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.5003231653448645e-05, |
| "loss": 0.0567, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.36739963175382534, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.4993534604385917e-05, |
| "loss": 0.0622, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.36773824892594864, |
| "grad_norm": 0.625, |
| "learning_rate": 1.4983831295927154e-05, |
| "loss": 0.0874, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.368076866098072, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.4974121740235457e-05, |
| "loss": 0.0616, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.36841548327019535, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.496440594948175e-05, |
| "loss": 0.0776, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.36875410044231866, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.495468393584478e-05, |
| "loss": 0.0603, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.369092717614442, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.4944955711511091e-05, |
| "loss": 0.0549, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.36943133478656537, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.4935221288675013e-05, |
| "loss": 0.0634, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.3697699519586887, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.4925480679538646e-05, |
| "loss": 0.0627, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.370108569130812, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.4915733896311844e-05, |
| "loss": 0.052, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.3704471863029354, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.49059809512122e-05, |
| "loss": 0.0806, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.37078580347505874, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.4896221856465034e-05, |
| "loss": 0.0737, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.3711244206471821, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.4886456624303369e-05, |
| "loss": 0.0788, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.3714630378193054, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.4876685266967926e-05, |
| "loss": 0.0535, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.37180165499142875, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.4866907796707102e-05, |
| "loss": 0.0557, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.3721402721635521, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.4857124225776955e-05, |
| "loss": 0.0696, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.3724788893356754, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.4847334566441199e-05, |
| "loss": 0.0639, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.37281750650779877, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.4837538830971162e-05, |
| "loss": 0.0792, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.3731561236799221, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.4827737031645808e-05, |
| "loss": 0.0613, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.3734947408520455, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.481792918075169e-05, |
| "loss": 0.0628, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.3738333580241688, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.4808115290582947e-05, |
| "loss": 0.0682, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.37417197519629214, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.4798295373441293e-05, |
| "loss": 0.0762, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.3745105923684155, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.4788469441635997e-05, |
| "loss": 0.0504, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.3748492095405388, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.4778637507483867e-05, |
| "loss": 0.0547, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.37518782671266215, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.4768799583309228e-05, |
| "loss": 0.0602, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.3755264438847855, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.475895568144392e-05, |
| "loss": 0.0622, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.37586506105690887, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.4749105814227278e-05, |
| "loss": 0.0564, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.37620367822903217, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.4739249994006111e-05, |
| "loss": 0.0762, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.3765422954011555, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.4729388233134684e-05, |
| "loss": 0.059, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.3768809125732789, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.4719520543974723e-05, |
| "loss": 0.0712, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.37721952974540224, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.4709646938895374e-05, |
| "loss": 0.0532, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.37755814691752554, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.4699767430273202e-05, |
| "loss": 0.0628, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.3778967640896489, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.468988203049217e-05, |
| "loss": 0.0845, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.37823538126177225, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.4679990751943632e-05, |
| "loss": 0.0704, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.37857399843389555, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.4670093607026302e-05, |
| "loss": 0.05, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.3789126156060189, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.4660190608146253e-05, |
| "loss": 0.0856, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.37925123277814227, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.4650281767716895e-05, |
| "loss": 0.0504, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3795898499502656, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.4640367098158961e-05, |
| "loss": 0.0704, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.3799284671223889, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.4630446611900493e-05, |
| "loss": 0.0601, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.3802670842945123, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.4620520321376814e-05, |
| "loss": 0.0665, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.38060570146663564, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.4610588239030537e-05, |
| "loss": 0.0776, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.380944318638759, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.4600650377311523e-05, |
| "loss": 0.0622, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.3812829358108823, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.4590706748676886e-05, |
| "loss": 0.0618, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.38162155298300565, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.4580757365590965e-05, |
| "loss": 0.0694, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.381960170155129, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.4570802240525309e-05, |
| "loss": 0.0619, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.3822987873272523, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.456084138595867e-05, |
| "loss": 0.0651, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.38263740449937567, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.4550874814376983e-05, |
| "loss": 0.0668, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.382976021671499, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.4540902538273343e-05, |
| "loss": 0.0639, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.3833146388436224, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.4530924570147998e-05, |
| "loss": 0.0626, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.3836532560157457, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.452094092250834e-05, |
| "loss": 0.0724, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.38399187318786904, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.451095160786886e-05, |
| "loss": 0.1049, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.3843304903599924, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.450095663875117e-05, |
| "loss": 0.0733, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.3846691075321157, |
| "grad_norm": 0.7734375, |
| "learning_rate": 1.449095602768397e-05, |
| "loss": 0.077, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.38500772470423905, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.4480949787203015e-05, |
| "loss": 0.0811, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.3853463418763624, |
| "grad_norm": 0.82421875, |
| "learning_rate": 1.4470937929851142e-05, |
| "loss": 0.0932, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.38568495904848576, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.4460920468178204e-05, |
| "loss": 0.0565, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.38602357622060907, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.4450897414741095e-05, |
| "loss": 0.0705, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.3863621933927324, |
| "grad_norm": 0.5, |
| "learning_rate": 1.4440868782103711e-05, |
| "loss": 0.064, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.3867008105648558, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.443083458283695e-05, |
| "loss": 0.055, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.38703942773697914, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.4420794829518674e-05, |
| "loss": 0.0502, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.38737804490910244, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.4410749534733719e-05, |
| "loss": 0.0673, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.3877166620812258, |
| "grad_norm": 0.75, |
| "learning_rate": 1.440069871107386e-05, |
| "loss": 0.0594, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.38805527925334915, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.4390642371137807e-05, |
| "loss": 0.059, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.38839389642547245, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.438058052753118e-05, |
| "loss": 0.099, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.3887325135975958, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.4370513192866507e-05, |
| "loss": 0.0603, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.38907113076971916, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.4360440379763187e-05, |
| "loss": 0.059, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.3894097479418425, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.4350362100847495e-05, |
| "loss": 0.0636, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3897483651139658, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.4340278368752553e-05, |
| "loss": 0.0746, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.3900869822860892, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.4330189196118323e-05, |
| "loss": 0.0642, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.39042559945821254, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.4320094595591578e-05, |
| "loss": 0.0929, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.3907642166303359, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.4309994579825908e-05, |
| "loss": 0.0529, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.3911028338024592, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.4299889161481676e-05, |
| "loss": 0.0702, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.39144145097458255, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.4289778353226032e-05, |
| "loss": 0.0706, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.3917800681467059, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.4279662167732869e-05, |
| "loss": 0.0684, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.3921186853188292, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.4269540617682826e-05, |
| "loss": 0.0706, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.39245730249095256, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.4259413715763276e-05, |
| "loss": 0.0789, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.3927959196630759, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.4249281474668279e-05, |
| "loss": 0.07, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.3931345368351993, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.423914390709861e-05, |
| "loss": 0.0906, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.3934731540073226, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.4229001025761704e-05, |
| "loss": 0.0536, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.39381177117944594, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.4218852843371665e-05, |
| "loss": 0.0557, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.3941503883515693, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.4208699372649244e-05, |
| "loss": 0.0668, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.3944890055236926, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.4198540626321817e-05, |
| "loss": 0.0609, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.39482762269581595, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.4188376617123368e-05, |
| "loss": 0.0655, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.3951662398679393, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.4178207357794486e-05, |
| "loss": 0.0662, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.39550485704006266, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.4168032861082344e-05, |
| "loss": 0.0721, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.39584347421218596, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.4157853139740665e-05, |
| "loss": 0.0676, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.3961820913843093, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.4147668206529737e-05, |
| "loss": 0.0768, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3965207085564327, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.413747807421637e-05, |
| "loss": 0.0727, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.39685932572855603, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.4127282755573903e-05, |
| "loss": 0.062, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.39719794290067933, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.4117082263382162e-05, |
| "loss": 0.0696, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.3975365600728027, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.4106876610427466e-05, |
| "loss": 0.0592, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.39787517724492605, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.4096665809502607e-05, |
| "loss": 0.0607, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.39821379441704935, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.408644987340682e-05, |
| "loss": 0.0619, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.3985524115891727, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.4076228814945778e-05, |
| "loss": 0.0537, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.39889102876129606, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.4066002646931587e-05, |
| "loss": 0.0623, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.3992296459334194, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.4055771382182744e-05, |
| "loss": 0.0812, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.3995682631055427, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.404553503352414e-05, |
| "loss": 0.0687, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.3999068802776661, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.4035293613787042e-05, |
| "loss": 0.0612, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.40024549744978943, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.4025047135809069e-05, |
| "loss": 0.0767, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.4005841146219128, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.4014795612434182e-05, |
| "loss": 0.085, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.4009227317940361, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.4004539056512667e-05, |
| "loss": 0.0683, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.4009227317940361, |
| "eval_loss": 0.06940508633852005, |
| "eval_runtime": 816.0342, |
| "eval_samples_per_second": 12.191, |
| "eval_steps_per_second": 3.048, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.40126134896615945, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.3994277480901116e-05, |
| "loss": 0.0512, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.4015999661382828, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.3984010898462417e-05, |
| "loss": 0.0746, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.4019385833104061, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.397373932206573e-05, |
| "loss": 0.0604, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.40227720048252946, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.3963462764586479e-05, |
| "loss": 0.0658, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.4026158176546528, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.3953181238906326e-05, |
| "loss": 0.0674, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.4029544348267762, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.3942894757913169e-05, |
| "loss": 0.0628, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4032930519988995, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.3932603334501106e-05, |
| "loss": 0.0657, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.40363166917102283, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.3922306981570447e-05, |
| "loss": 0.0588, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.4039702863431462, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.3912005712027661e-05, |
| "loss": 0.0558, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.4043089035152695, |
| "grad_norm": 0.375, |
| "learning_rate": 1.3901699538785398e-05, |
| "loss": 0.0519, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.40464752068739285, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.3891388474762444e-05, |
| "loss": 0.0755, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.4049861378595162, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.388107253288372e-05, |
| "loss": 0.0614, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.40532475503163956, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.3870751726080256e-05, |
| "loss": 0.1036, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.40566337220376286, |
| "grad_norm": 0.349609375, |
| "learning_rate": 1.3860426067289185e-05, |
| "loss": 0.0498, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.4060019893758862, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.3850095569453728e-05, |
| "loss": 0.0602, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4063406065480096, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.3839760245523155e-05, |
| "loss": 0.0664, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.40667922372013293, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.38294201084528e-05, |
| "loss": 0.089, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.40701784089225623, |
| "grad_norm": 0.7265625, |
| "learning_rate": 1.3819075171204028e-05, |
| "loss": 0.0595, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4073564580643796, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.3808725446744218e-05, |
| "loss": 0.0571, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.40769507523650295, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.3798370948046747e-05, |
| "loss": 0.0537, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.40803369240862625, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.3788011688090978e-05, |
| "loss": 0.0609, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.4083723095807496, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.3777647679862254e-05, |
| "loss": 0.0642, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.40871092675287296, |
| "grad_norm": 0.625, |
| "learning_rate": 1.3767278936351853e-05, |
| "loss": 0.0814, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.4090495439249963, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.3756905470556996e-05, |
| "loss": 0.099, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.4093881610971196, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.3746527295480825e-05, |
| "loss": 0.0597, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.409726778269243, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.3736144424132383e-05, |
| "loss": 0.0622, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.41006539544136633, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.3725756869526598e-05, |
| "loss": 0.0794, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.4104040126134897, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.3715364644684273e-05, |
| "loss": 0.0525, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.410742629785613, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.370496776263206e-05, |
| "loss": 0.064, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.41108124695773635, |
| "grad_norm": 0.828125, |
| "learning_rate": 1.3694566236402458e-05, |
| "loss": 0.0815, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.4114198641298597, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.3684160079033772e-05, |
| "loss": 0.0638, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.411758481301983, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.3673749303570127e-05, |
| "loss": 0.0801, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.41209709847410636, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.366333392306143e-05, |
| "loss": 0.0628, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4124357156462297, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.3652913950563362e-05, |
| "loss": 0.062, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4127743328183531, |
| "grad_norm": 0.7265625, |
| "learning_rate": 1.3642489399137358e-05, |
| "loss": 0.079, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.4131129499904764, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.3632060281850593e-05, |
| "loss": 0.0634, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.41345156716259973, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.3621626611775966e-05, |
| "loss": 0.0488, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.4137901843347231, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.3611188401992087e-05, |
| "loss": 0.0813, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.4141288015068464, |
| "grad_norm": 0.91015625, |
| "learning_rate": 1.360074566558325e-05, |
| "loss": 0.0672, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.41446741867896975, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.3590298415639427e-05, |
| "loss": 0.0753, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4148060358510931, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.3579846665256244e-05, |
| "loss": 0.0703, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.41514465302321646, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.3569390427534976e-05, |
| "loss": 0.0963, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.41548327019533976, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.3558929715582517e-05, |
| "loss": 0.07, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.4158218873674631, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.3548464542511364e-05, |
| "loss": 0.0673, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.4161605045395865, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.353799492143962e-05, |
| "loss": 0.0684, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.41649912171170983, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.352752086549095e-05, |
| "loss": 0.0755, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.41683773888383313, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.3517042387794585e-05, |
| "loss": 0.0698, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.4171763560559565, |
| "grad_norm": 0.5, |
| "learning_rate": 1.3506559501485304e-05, |
| "loss": 0.058, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.41751497322807984, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.3496072219703399e-05, |
| "loss": 0.0792, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.41785359040020315, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.3485580555594679e-05, |
| "loss": 0.0639, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4181922075723265, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.3475084522310451e-05, |
| "loss": 0.0783, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.41853082474444986, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.3464584133007486e-05, |
| "loss": 0.0711, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.4188694419165732, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.3454079400848029e-05, |
| "loss": 0.0688, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.4192080590886965, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.3443570338999759e-05, |
| "loss": 0.0748, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4195466762608199, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.3433056960635788e-05, |
| "loss": 0.0767, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.41988529343294323, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.3422539278934637e-05, |
| "loss": 0.0543, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4202239106050666, |
| "grad_norm": 0.81640625, |
| "learning_rate": 1.341201730708022e-05, |
| "loss": 0.0847, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4205625277771899, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.3401491058261829e-05, |
| "loss": 0.0803, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.42090114494931324, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.3390960545674117e-05, |
| "loss": 0.058, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.4212397621214366, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.3380425782517084e-05, |
| "loss": 0.0666, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4215783792935599, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.3369886781996056e-05, |
| "loss": 0.0741, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.42191699646568326, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.335934355732167e-05, |
| "loss": 0.052, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4222556136378066, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.3348796121709862e-05, |
| "loss": 0.0629, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.42259423080992997, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.3338244488381843e-05, |
| "loss": 0.0573, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4229328479820533, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.332768867056408e-05, |
| "loss": 0.0526, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.42327146515417663, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.3317128681488301e-05, |
| "loss": 0.0641, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4236100823263, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.3306564534391447e-05, |
| "loss": 0.0617, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4239486994984233, |
| "grad_norm": 0.91796875, |
| "learning_rate": 1.3295996242515679e-05, |
| "loss": 0.0626, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.42428731667054664, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.3285423819108349e-05, |
| "loss": 0.0596, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.42462593384267, |
| "grad_norm": 0.375, |
| "learning_rate": 1.3274847277421997e-05, |
| "loss": 0.0488, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.42496455101479336, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.3264266630714308e-05, |
| "loss": 0.0614, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.42530316818691666, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.3253681892248136e-05, |
| "loss": 0.0623, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.42564178535904, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.3243093075291444e-05, |
| "loss": 0.0729, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.42598040253116337, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.3232500193117318e-05, |
| "loss": 0.0576, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.42631901970328673, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.3221903259003935e-05, |
| "loss": 0.0782, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.42665763687541003, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.3211302286234553e-05, |
| "loss": 0.0638, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4269962540475334, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.3200697288097492e-05, |
| "loss": 0.0536, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.42733487121965674, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.3190088277886119e-05, |
| "loss": 0.073, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.42767348839178004, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.3179475268898828e-05, |
| "loss": 0.0467, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4280121055639034, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.316885827443903e-05, |
| "loss": 0.0436, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.42835072273602676, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.3158237307815122e-05, |
| "loss": 0.0731, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.4286893399081501, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.3147612382340493e-05, |
| "loss": 0.0736, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4290279570802734, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.3136983511333483e-05, |
| "loss": 0.0812, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.42936657425239677, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.3126350708117387e-05, |
| "loss": 0.0731, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.4297051914245201, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.3115713986020421e-05, |
| "loss": 0.0622, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.4300438085966435, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.3105073358375719e-05, |
| "loss": 0.0519, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4303824257687668, |
| "grad_norm": 0.75390625, |
| "learning_rate": 1.309442883852131e-05, |
| "loss": 0.0535, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.43072104294089014, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.30837804398001e-05, |
| "loss": 0.0714, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.4310596601130135, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.3073128175559852e-05, |
| "loss": 0.0685, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.4313982772851368, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.3062472059153185e-05, |
| "loss": 0.0976, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.43173689445726016, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.3051812103937545e-05, |
| "loss": 0.0825, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.4320755116293835, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.3041148323275182e-05, |
| "loss": 0.0525, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.43241412880150687, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.3030480730533146e-05, |
| "loss": 0.0686, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.43275274597363017, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.3019809339083262e-05, |
| "loss": 0.0872, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.4330913631457535, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.3009134162302131e-05, |
| "loss": 0.0991, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.4334299803178769, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.299845521357108e-05, |
| "loss": 0.0583, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4337685974900002, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.2987772506276173e-05, |
| "loss": 0.0625, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.43410721466212354, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.2977086053808183e-05, |
| "loss": 0.0614, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.4344458318342469, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.2966395869562582e-05, |
| "loss": 0.0513, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.43478444900637025, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.2955701966939517e-05, |
| "loss": 0.0637, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.43512306617849356, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.2945004359343794e-05, |
| "loss": 0.0661, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4354616833506169, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.2934303060184865e-05, |
| "loss": 0.0694, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.43580030052274027, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.2923598082876811e-05, |
| "loss": 0.0542, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.4361389176948636, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.291288944083832e-05, |
| "loss": 0.0567, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.4364775348669869, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.2902177147492677e-05, |
| "loss": 0.0662, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.4368161520391103, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.2891461216267742e-05, |
| "loss": 0.0785, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.43715476921123364, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.2880741660595936e-05, |
| "loss": 0.0521, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.43749338638335694, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.2870018493914227e-05, |
| "loss": 0.0479, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.4378320035554803, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.2859291729664094e-05, |
| "loss": 0.0694, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.43817062072760365, |
| "grad_norm": 0.76953125, |
| "learning_rate": 1.2848561381291547e-05, |
| "loss": 0.0655, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.438509237899727, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.2837827462247077e-05, |
| "loss": 0.0711, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.4388478550718503, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.2827089985985647e-05, |
| "loss": 0.1055, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.43918647224397367, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.2816348965966693e-05, |
| "loss": 0.053, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.439525089416097, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.2805604415654076e-05, |
| "loss": 0.0567, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.4398637065882204, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.2794856348516095e-05, |
| "loss": 0.0856, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.4402023237603437, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.2784104778025455e-05, |
| "loss": 0.0913, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.44054094093246704, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.2773349717659245e-05, |
| "loss": 0.0607, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.4408795581045904, |
| "grad_norm": 0.76953125, |
| "learning_rate": 1.2762591180898938e-05, |
| "loss": 0.1013, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.4412181752767137, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.2751829181230364e-05, |
| "loss": 0.0565, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.44155679244883705, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.274106373214368e-05, |
| "loss": 0.0702, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.4418954096209604, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.2730294847133386e-05, |
| "loss": 0.0666, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.44223402679308377, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.2719522539698277e-05, |
| "loss": 0.0646, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.44257264396520707, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.2708746823341444e-05, |
| "loss": 0.0871, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.4429112611373304, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.2697967711570243e-05, |
| "loss": 0.0495, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.4432498783094538, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.2687185217896297e-05, |
| "loss": 0.0733, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.4435884954815771, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.267639935583546e-05, |
| "loss": 0.072, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.44392711265370044, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.2665610138907813e-05, |
| "loss": 0.0742, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.4442657298258238, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.2654817580637637e-05, |
| "loss": 0.1116, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.44460434699794715, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.264402169455341e-05, |
| "loss": 0.0562, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.44494296417007045, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.263322249418777e-05, |
| "loss": 0.052, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.4452815813421938, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.2622419993077518e-05, |
| "loss": 0.0801, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.44562019851431717, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.2611614204763587e-05, |
| "loss": 0.0588, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.4459588156864405, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.2600805142791042e-05, |
| "loss": 0.0619, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.4462974328585638, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.2589992820709033e-05, |
| "loss": 0.0616, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.4466360500306872, |
| "grad_norm": 0.78515625, |
| "learning_rate": 1.2579177252070815e-05, |
| "loss": 0.0718, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.44697466720281054, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.2568358450433698e-05, |
| "loss": 0.0587, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.44731328437493384, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.2557536429359054e-05, |
| "loss": 0.0561, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.4476519015470572, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.2546711202412287e-05, |
| "loss": 0.0559, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.44799051871918055, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.2535882783162823e-05, |
| "loss": 0.0528, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.4483291358913039, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.2525051185184078e-05, |
| "loss": 0.0451, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.4486677530634272, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.2514216422053468e-05, |
| "loss": 0.0545, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.44900637023555057, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.2503378507352365e-05, |
| "loss": 0.071, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.4493449874076739, |
| "grad_norm": 0.87890625, |
| "learning_rate": 1.24925374546661e-05, |
| "loss": 0.0632, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.4496836045797973, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.2481693277583932e-05, |
| "loss": 0.0858, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.4500222217519206, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.2470845989699036e-05, |
| "loss": 0.0668, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.45036083892404394, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.2459995604608493e-05, |
| "loss": 0.066, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.4506994560961673, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.2449142135913254e-05, |
| "loss": 0.0731, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.4510380732682906, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.243828559721815e-05, |
| "loss": 0.0638, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.45137669044041395, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.2427426002131848e-05, |
| "loss": 0.0645, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.4517153076125373, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.2416563364266859e-05, |
| "loss": 0.0873, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.45205392478466067, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.240569769723949e-05, |
| "loss": 0.0606, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.45239254195678397, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.2394829014669863e-05, |
| "loss": 0.0785, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.4527311591289073, |
| "grad_norm": 0.5, |
| "learning_rate": 1.238395733018187e-05, |
| "loss": 0.0489, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.4530697763010307, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.2373082657403168e-05, |
| "loss": 0.0622, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.453408393473154, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.236220500996516e-05, |
| "loss": 0.0688, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.45374701064527734, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.235132440150298e-05, |
| "loss": 0.0579, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.4540856278174007, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.234044084565547e-05, |
| "loss": 0.0592, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.45442424498952405, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.232955435606517e-05, |
| "loss": 0.0614, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.45476286216164735, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.2318664946378292e-05, |
| "loss": 0.0752, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.4551014793337707, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.2307772630244715e-05, |
| "loss": 0.0526, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.45544009650589407, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.2296877421317958e-05, |
| "loss": 0.0691, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.4557787136780174, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.2285979333255165e-05, |
| "loss": 0.0796, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.4561173308501407, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.227507837971709e-05, |
| "loss": 0.0538, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.4564559480222641, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.2264174574368079e-05, |
| "loss": 0.0668, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.45679456519438744, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.2253267930876056e-05, |
| "loss": 0.0635, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.45713318236651074, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.2242358462912496e-05, |
| "loss": 0.0826, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4574717995386341, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.2231446184152419e-05, |
| "loss": 0.0538, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.45781041671075745, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.2220531108274367e-05, |
| "loss": 0.0596, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.4581490338828808, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.220961324896039e-05, |
| "loss": 0.0715, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.4584876510550041, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.2198692619896026e-05, |
| "loss": 0.0625, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.45882626822712747, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.218776923477028e-05, |
| "loss": 0.0606, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.4591648853992508, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.2176843107275624e-05, |
| "loss": 0.0471, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.4595035025713742, |
| "grad_norm": 0.83984375, |
| "learning_rate": 1.2165914251107953e-05, |
| "loss": 0.0775, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.4598421197434975, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.215498267996659e-05, |
| "loss": 0.0786, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.46018073691562084, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.214404840755426e-05, |
| "loss": 0.0682, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.4605193540877442, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.2133111447577077e-05, |
| "loss": 0.0661, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4608579712598675, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.2122171813744522e-05, |
| "loss": 0.0905, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.46119658843199085, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.2111229519769421e-05, |
| "loss": 0.0615, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.4615352056041142, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.2100284579367947e-05, |
| "loss": 0.0636, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.46187382277623756, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.2089337006259581e-05, |
| "loss": 0.0617, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.46221243994836086, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.2078386814167106e-05, |
| "loss": 0.0522, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.4625510571204842, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.2067434016816591e-05, |
| "loss": 0.0824, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.4628896742926076, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.2056478627937364e-05, |
| "loss": 0.0736, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.4632282914647309, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.2045520661262011e-05, |
| "loss": 0.0579, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.46356690863685424, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.2034560130526341e-05, |
| "loss": 0.0649, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.4639055258089776, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.2023597049469378e-05, |
| "loss": 0.0666, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.46424414298110095, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.201263143183335e-05, |
| "loss": 0.0632, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.46458276015322425, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.2001663291363661e-05, |
| "loss": 0.0655, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.4649213773253476, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.199069264180887e-05, |
| "loss": 0.0583, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.46525999449747096, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.1979719496920686e-05, |
| "loss": 0.0851, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.4655986116695943, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.1968743870453956e-05, |
| "loss": 0.0895, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.4659372288417176, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.195776577616662e-05, |
| "loss": 0.0533, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.466275846013841, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.1946785227819726e-05, |
| "loss": 0.0661, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.46661446318596433, |
| "grad_norm": 0.5, |
| "learning_rate": 1.1935802239177387e-05, |
| "loss": 0.0636, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.46695308035808764, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.1924816824006787e-05, |
| "loss": 0.0596, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.467291697530211, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.1913828996078136e-05, |
| "loss": 0.054, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.46763031470233435, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.1902838769164685e-05, |
| "loss": 0.0634, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.4679689318744577, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.1891846157042678e-05, |
| "loss": 0.0675, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.468307549046581, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.1880851173491361e-05, |
| "loss": 0.0691, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.46864616621870436, |
| "grad_norm": 0.71484375, |
| "learning_rate": 1.1869853832292944e-05, |
| "loss": 0.1164, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.4689847833908277, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.1858854147232595e-05, |
| "loss": 0.0892, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.4693234005629511, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.184785213209842e-05, |
| "loss": 0.0762, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.4696620177350744, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.1836847800681443e-05, |
| "loss": 0.0613, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.47000063490719773, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.1825841166775605e-05, |
| "loss": 0.0655, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.4703392520793211, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.181483224417771e-05, |
| "loss": 0.052, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.4706778692514444, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.180382104668745e-05, |
| "loss": 0.045, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.47101648642356775, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.1792807588107358e-05, |
| "loss": 0.0532, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.4713551035956911, |
| "grad_norm": 0.890625, |
| "learning_rate": 1.1781791882242811e-05, |
| "loss": 0.0719, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.47169372076781446, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.177077394290199e-05, |
| "loss": 0.0679, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.47203233793993776, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.175975378389589e-05, |
| "loss": 0.0782, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.4723709551120611, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.1748731419038278e-05, |
| "loss": 0.0547, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.4727095722841845, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.1737706862145688e-05, |
| "loss": 0.0719, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.4730481894563078, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.1726680127037403e-05, |
| "loss": 0.063, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.47338680662843113, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.1715651227535441e-05, |
| "loss": 0.0681, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.4737254238005545, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.170462017746452e-05, |
| "loss": 0.0679, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.47406404097267785, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.169358699065207e-05, |
| "loss": 0.0749, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.47440265814480115, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.1682551680928189e-05, |
| "loss": 0.0639, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.4747412753169245, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.1671514262125638e-05, |
| "loss": 0.07, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.47507989248904786, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.1660474748079823e-05, |
| "loss": 0.0539, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.4754185096611712, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.1649433152628775e-05, |
| "loss": 0.0699, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.4757571268332945, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.1638389489613133e-05, |
| "loss": 0.0785, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.4760957440054179, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.1627343772876133e-05, |
| "loss": 0.0577, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.47643436117754123, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.1616296016263581e-05, |
| "loss": 0.0617, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.47677297834966453, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.1605246233623843e-05, |
| "loss": 0.0687, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.4771115955217879, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.1594194438807817e-05, |
| "loss": 0.0702, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.47745021269391125, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.1583140645668933e-05, |
| "loss": 0.0706, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.4777888298660346, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.157208486806312e-05, |
| "loss": 0.0633, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.4781274470381579, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.1561027119848793e-05, |
| "loss": 0.0517, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.47846606421028126, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.1549967414886847e-05, |
| "loss": 0.073, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.4788046813824046, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.153890576704062e-05, |
| "loss": 0.0749, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.4791432985545279, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.1527842190175886e-05, |
| "loss": 0.0569, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.4794819157266513, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.1516776698160841e-05, |
| "loss": 0.0752, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.47982053289877463, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.1505709304866084e-05, |
| "loss": 0.0677, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.480159150070898, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.1494640024164587e-05, |
| "loss": 0.0518, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.4804977672430213, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.14835688699317e-05, |
| "loss": 0.055, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.48083638441514465, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.1472495856045112e-05, |
| "loss": 0.073, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.481175001587268, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.1461420996384849e-05, |
| "loss": 0.0762, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.48151361875939136, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.1450344304833248e-05, |
| "loss": 0.0513, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.48185223593151466, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.1439265795274941e-05, |
| "loss": 0.0863, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.482190853103638, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.142818548159684e-05, |
| "loss": 0.0618, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.4825294702757614, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.1417103377688121e-05, |
| "loss": 0.0715, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.4828680874478847, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.1406019497440206e-05, |
| "loss": 0.0583, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.48320670462000803, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.1394933854746733e-05, |
| "loss": 0.078, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.4835453217921314, |
| "grad_norm": 0.5, |
| "learning_rate": 1.1383846463503558e-05, |
| "loss": 0.0681, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.48388393896425475, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.1372757337608732e-05, |
| "loss": 0.0879, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.48422255613637805, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.1361666490962468e-05, |
| "loss": 0.0716, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.4845611733085014, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.1350573937467147e-05, |
| "loss": 0.0754, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.48489979048062476, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.1339479691027284e-05, |
| "loss": 0.0527, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.4852384076527481, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.132838376554952e-05, |
| "loss": 0.0522, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.4855770248248714, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.1317286174942596e-05, |
| "loss": 0.0715, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.4859156419969948, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.1306186933117343e-05, |
| "loss": 0.0668, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.48625425916911813, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.1295086053986664e-05, |
| "loss": 0.0657, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.48659287634124143, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.1283983551465512e-05, |
| "loss": 0.058, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.4869314935133648, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.127287943947087e-05, |
| "loss": 0.0614, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.48727011068548814, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.1261773731921746e-05, |
| "loss": 0.0736, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.4876087278576115, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.1250666442739149e-05, |
| "loss": 0.0513, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.4879473450297348, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.1239557585846066e-05, |
| "loss": 0.0689, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.48828596220185816, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.1228447175167443e-05, |
| "loss": 0.065, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.4886245793739815, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.1217335224630186e-05, |
| "loss": 0.054, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.4889631965461048, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.1206221748163127e-05, |
| "loss": 0.0709, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.4893018137182282, |
| "grad_norm": 2.390625, |
| "learning_rate": 1.1195106759697005e-05, |
| "loss": 0.0699, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.48964043089035153, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.1183990273164464e-05, |
| "loss": 0.0593, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.4899790480624749, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.1172872302500017e-05, |
| "loss": 0.0554, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.4903176652345982, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.1161752861640046e-05, |
| "loss": 0.0639, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.49065628240672154, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.1150631964522767e-05, |
| "loss": 0.0659, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.4909948995788449, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.1139509625088225e-05, |
| "loss": 0.0659, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.49133351675096826, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.1128385857278274e-05, |
| "loss": 0.0584, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.49167213392309156, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.1117260675036563e-05, |
| "loss": 0.0791, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.4920107510952149, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.1106134092308502e-05, |
| "loss": 0.0626, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.49234936826733827, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.1095006123041262e-05, |
| "loss": 0.0627, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.4926879854394616, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.1083876781183762e-05, |
| "loss": 0.047, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.49302660261158493, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.1072746080686628e-05, |
| "loss": 0.0471, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.4933652197837083, |
| "grad_norm": 0.625, |
| "learning_rate": 1.1061614035502193e-05, |
| "loss": 0.0858, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.49370383695583164, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.1050480659584475e-05, |
| "loss": 0.0583, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.49404245412795494, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.1039345966889167e-05, |
| "loss": 0.0484, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.4943810713000783, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.1028209971373605e-05, |
| "loss": 0.0672, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.49471968847220166, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.101707268699676e-05, |
| "loss": 0.063, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.495058305644325, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.1005934127719218e-05, |
| "loss": 0.0549, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.4953969228164483, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.0994794307503162e-05, |
| "loss": 0.0881, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.49573553998857167, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.0983653240312364e-05, |
| "loss": 0.0701, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.49607415716069503, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.0972510940112149e-05, |
| "loss": 0.0641, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.49641277433281833, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.0961367420869387e-05, |
| "loss": 0.0599, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.4967513915049417, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.0950222696552487e-05, |
| "loss": 0.0651, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.49709000867706504, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.0939076781131357e-05, |
| "loss": 0.0631, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.4974286258491884, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.0927929688577408e-05, |
| "loss": 0.0606, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.4977672430213117, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.0916781432863514e-05, |
| "loss": 0.064, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.49810586019343506, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.0905632027964024e-05, |
| "loss": 0.0527, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.4984444773655584, |
| "grad_norm": 0.359375, |
| "learning_rate": 1.0894481487854711e-05, |
| "loss": 0.0429, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.4987830945376817, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.0883329826512779e-05, |
| "loss": 0.0731, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.49912171170980507, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.087217705791684e-05, |
| "loss": 0.088, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.49946032888192843, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.0861023196046885e-05, |
| "loss": 0.0539, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.4997989460540518, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.0849868254884284e-05, |
| "loss": 0.0572, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5001375632261751, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.0838712248411754e-05, |
| "loss": 0.0495, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5004761803982984, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.0827555190613353e-05, |
| "loss": 0.0592, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5008147975704218, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.0816397095474454e-05, |
| "loss": 0.0719, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5011534147425452, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.0805237976981729e-05, |
| "loss": 0.0547, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5011534147425452, |
| "eval_loss": 0.06768392771482468, |
| "eval_runtime": 815.5247, |
| "eval_samples_per_second": 12.198, |
| "eval_steps_per_second": 3.05, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5014920319146685, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.0794077849123134e-05, |
| "loss": 0.0581, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.5018306490867919, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.0782916725887888e-05, |
| "loss": 0.0647, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5021692662589151, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.0771754621266466e-05, |
| "loss": 0.0725, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5025078834310385, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.0760591549250561e-05, |
| "loss": 0.0648, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.5028465006031618, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.0749427523833084e-05, |
| "loss": 0.0707, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5031851177752852, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.0738262559008148e-05, |
| "loss": 0.0649, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.5035237349474085, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.0727096668771035e-05, |
| "loss": 0.0522, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5038623521195319, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.0715929867118187e-05, |
| "loss": 0.0691, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5042009692916553, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.0704762168047189e-05, |
| "loss": 0.0571, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5045395864637785, |
| "grad_norm": 0.5, |
| "learning_rate": 1.069359358555676e-05, |
| "loss": 0.0701, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5048782036359019, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.0682424133646712e-05, |
| "loss": 0.0739, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5052168208080252, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.0671253826317957e-05, |
| "loss": 0.0613, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.5055554379801486, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.0660082677572474e-05, |
| "loss": 0.0781, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5058940551522719, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.0648910701413306e-05, |
| "loss": 0.0718, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5062326723243953, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.0637737911844516e-05, |
| "loss": 0.0781, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5065712894965186, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.0626564322871205e-05, |
| "loss": 0.09, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5069099066686419, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.061538994849946e-05, |
| "loss": 0.0554, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5072485238407652, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.0604214802736366e-05, |
| "loss": 0.0735, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5075871410128886, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.0593038899589968e-05, |
| "loss": 0.0592, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.507925758185012, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.0581862253069262e-05, |
| "loss": 0.0484, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5082643753571353, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.0570684877184169e-05, |
| "loss": 0.0938, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5086029925292587, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.0559506785945538e-05, |
| "loss": 0.0768, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.508941609701382, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.0548327993365108e-05, |
| "loss": 0.0552, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.5092802268735053, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.0537148513455493e-05, |
| "loss": 0.0519, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5096188440456286, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.0525968360230173e-05, |
| "loss": 0.0869, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.509957461217752, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.0514787547703466e-05, |
| "loss": 0.0748, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5102960783898753, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.050360608989053e-05, |
| "loss": 0.0689, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5106346955619987, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.0492424000807316e-05, |
| "loss": 0.0596, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.510973312734122, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.0481241294470578e-05, |
| "loss": 0.0427, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.5113119299062454, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.047005798489784e-05, |
| "loss": 0.0608, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5116505470783688, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.0458874086107379e-05, |
| "loss": 0.0565, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.511989164250492, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.0447689612118208e-05, |
| "loss": 0.0595, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5123277814226154, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.0436504576950077e-05, |
| "loss": 0.05, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5126663985947387, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.0425318994623423e-05, |
| "loss": 0.0583, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5130050157668621, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.0414132879159375e-05, |
| "loss": 0.0612, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5133436329389854, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.0402946244579726e-05, |
| "loss": 0.1383, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.5136822501111088, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.0391759104906928e-05, |
| "loss": 0.0571, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5140208672832322, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.038057147416406e-05, |
| "loss": 0.0564, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5143594844553554, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.0369383366374819e-05, |
| "loss": 0.0551, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5146981016274788, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.0358194795563497e-05, |
| "loss": 0.0617, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5150367187996021, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.0347005775754969e-05, |
| "loss": 0.0756, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5153753359717255, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.0335816320974672e-05, |
| "loss": 0.0606, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5157139531438488, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.0324626445248592e-05, |
| "loss": 0.0454, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5160525703159722, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.0313436162603231e-05, |
| "loss": 0.0752, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.5163911874880955, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.0302245487065621e-05, |
| "loss": 0.0705, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.5167298046602188, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.0291054432663267e-05, |
| "loss": 0.0666, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.5170684218323421, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.0279863013424154e-05, |
| "loss": 0.0596, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5174070390044655, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.0268671243376733e-05, |
| "loss": 0.0686, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.5177456561765889, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.0257479136549889e-05, |
| "loss": 0.0569, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5180842733487122, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.0246286706972923e-05, |
| "loss": 0.0582, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5184228905208356, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.023509396867555e-05, |
| "loss": 0.072, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.5187615076929589, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.0223900935687866e-05, |
| "loss": 0.076, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5191001248650822, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.0212707622040345e-05, |
| "loss": 0.0651, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5194387420372055, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.02015140417638e-05, |
| "loss": 0.0795, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.5197773592093289, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.0190320208889388e-05, |
| "loss": 0.0507, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.5201159763814522, |
| "grad_norm": 0.5, |
| "learning_rate": 1.0179126137448577e-05, |
| "loss": 0.0691, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5204545935535756, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.0167931841473143e-05, |
| "loss": 0.0529, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.520793210725699, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.0156737334995129e-05, |
| "loss": 0.0722, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.5211318278978223, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.014554263204685e-05, |
| "loss": 0.0621, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5214704450699457, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.013434774666087e-05, |
| "loss": 0.0425, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5218090622420689, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.0123152692869981e-05, |
| "loss": 0.056, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.5221476794141923, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.0111957484707182e-05, |
| "loss": 0.0616, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.5224862965863156, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.0100762136205664e-05, |
| "loss": 0.0521, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.522824913758439, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.0089566661398802e-05, |
| "loss": 0.0845, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.5231635309305623, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.0078371074320123e-05, |
| "loss": 0.0735, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5235021481026857, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.0067175389003297e-05, |
| "loss": 0.0699, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.523840765274809, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.0055979619482112e-05, |
| "loss": 0.0785, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.5241793824469323, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.0044783779790472e-05, |
| "loss": 0.0614, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.5245179996190557, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.0033587883962362e-05, |
| "loss": 0.0635, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.524856616791179, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.0022391946031832e-05, |
| "loss": 0.0542, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5251952339633024, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.0011195980032996e-05, |
| "loss": 0.067, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.5255338511354257, |
| "grad_norm": 0.53125, |
| "learning_rate": 1e-05, |
| "loss": 0.0592, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5258724683075491, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.988804019967005e-06, |
| "loss": 0.0721, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.5262110854796724, |
| "grad_norm": 0.48046875, |
| "learning_rate": 9.977608053968172e-06, |
| "loss": 0.064, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.5265497026517957, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.966412116037643e-06, |
| "loss": 0.0672, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.526888319823919, |
| "grad_norm": 0.423828125, |
| "learning_rate": 9.95521622020953e-06, |
| "loss": 0.0526, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.5272269369960424, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.94402038051789e-06, |
| "loss": 0.0733, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5275655541681658, |
| "grad_norm": 0.4609375, |
| "learning_rate": 9.932824610996706e-06, |
| "loss": 0.0561, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.5279041713402891, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.921628925679877e-06, |
| "loss": 0.072, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5282427885124125, |
| "grad_norm": 0.44921875, |
| "learning_rate": 9.910433338601198e-06, |
| "loss": 0.0561, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5285814056845358, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.899237863794336e-06, |
| "loss": 0.0679, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.5289200228566591, |
| "grad_norm": 0.427734375, |
| "learning_rate": 9.888042515292821e-06, |
| "loss": 0.0552, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5292586400287824, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.876847307130024e-06, |
| "loss": 0.0788, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.5295972572009058, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.865652253339133e-06, |
| "loss": 0.0774, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.5299358743730291, |
| "grad_norm": 0.427734375, |
| "learning_rate": 9.854457367953155e-06, |
| "loss": 0.0599, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5302744915451525, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.843262665004876e-06, |
| "loss": 0.062, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5306131087172758, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.832068158526862e-06, |
| "loss": 0.0831, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.5309517258893992, |
| "grad_norm": 0.404296875, |
| "learning_rate": 9.820873862551425e-06, |
| "loss": 0.053, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5312903430615225, |
| "grad_norm": 1.28125, |
| "learning_rate": 9.809679791110615e-06, |
| "loss": 0.0688, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.5316289602336458, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.798485958236203e-06, |
| "loss": 0.0557, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5319675774057692, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.787292377959659e-06, |
| "loss": 0.0671, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.5323061945778925, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.776099064312135e-06, |
| "loss": 0.0679, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.5326448117500159, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.764906031324454e-06, |
| "loss": 0.0996, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.5329834289221392, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.75371329302708e-06, |
| "loss": 0.0634, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.5333220460942626, |
| "grad_norm": 0.41796875, |
| "learning_rate": 9.742520863450116e-06, |
| "loss": 0.054, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.533660663266386, |
| "grad_norm": 0.451171875, |
| "learning_rate": 9.731328756623269e-06, |
| "loss": 0.059, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.5339992804385092, |
| "grad_norm": 0.5, |
| "learning_rate": 9.720136986575849e-06, |
| "loss": 0.0614, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.5343378976106326, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.708945567336736e-06, |
| "loss": 0.0475, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.5346765147827559, |
| "grad_norm": 0.5234375, |
| "learning_rate": 9.69775451293438e-06, |
| "loss": 0.0628, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.5350151319548793, |
| "grad_norm": 0.455078125, |
| "learning_rate": 9.686563837396769e-06, |
| "loss": 0.0635, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5353537491270026, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.675373554751412e-06, |
| "loss": 0.0987, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.535692366299126, |
| "grad_norm": 0.453125, |
| "learning_rate": 9.664183679025327e-06, |
| "loss": 0.061, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.5360309834712493, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.652994224245033e-06, |
| "loss": 0.0729, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.5363696006433726, |
| "grad_norm": 0.462890625, |
| "learning_rate": 9.641805204436508e-06, |
| "loss": 0.0598, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.5367082178154959, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.630616633625186e-06, |
| "loss": 0.0672, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.5370468349876193, |
| "grad_norm": 0.466796875, |
| "learning_rate": 9.619428525835944e-06, |
| "loss": 0.0625, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.5373854521597426, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.608240895093077e-06, |
| "loss": 0.0487, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.537724069331866, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.597053755420277e-06, |
| "loss": 0.0708, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.5380626865039894, |
| "grad_norm": 0.52734375, |
| "learning_rate": 9.58586712084063e-06, |
| "loss": 0.0683, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.5384013036761127, |
| "grad_norm": 0.427734375, |
| "learning_rate": 9.57468100537658e-06, |
| "loss": 0.0504, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.538739920848236, |
| "grad_norm": 0.453125, |
| "learning_rate": 9.563495423049925e-06, |
| "loss": 0.0582, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.5390785380203593, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.552310387881793e-06, |
| "loss": 0.0629, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.5394171551924827, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.541125913892625e-06, |
| "loss": 0.0937, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.539755772364606, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.529942015102164e-06, |
| "loss": 0.079, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.5400943895367294, |
| "grad_norm": 0.5078125, |
| "learning_rate": 9.518758705529423e-06, |
| "loss": 0.0697, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.5404330067088527, |
| "grad_norm": 0.4375, |
| "learning_rate": 9.507575999192686e-06, |
| "loss": 0.0548, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.5407716238809761, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.496393910109473e-06, |
| "loss": 0.0503, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.5411102410530994, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.485212452296535e-06, |
| "loss": 0.0829, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.5414488582252227, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.474031639769832e-06, |
| "loss": 0.058, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.5417874753973461, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.46285148654451e-06, |
| "loss": 0.0779, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5421260925694694, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.451672006634892e-06, |
| "loss": 0.0568, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.5424647097415928, |
| "grad_norm": 0.478515625, |
| "learning_rate": 9.44049321405446e-06, |
| "loss": 0.0697, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.5428033269137161, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.429315122815831e-06, |
| "loss": 0.0661, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.5431419440858395, |
| "grad_norm": 0.396484375, |
| "learning_rate": 9.418137746930743e-06, |
| "loss": 0.0526, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.5434805612579628, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.406961100410033e-06, |
| "loss": 0.0715, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.5438191784300861, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.395785197263638e-06, |
| "loss": 0.0763, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.5441577956022094, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.384610051500546e-06, |
| "loss": 0.0883, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.5444964127743328, |
| "grad_norm": 0.470703125, |
| "learning_rate": 9.3734356771288e-06, |
| "loss": 0.0611, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.5448350299464562, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.362262088155487e-06, |
| "loss": 0.0593, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.5451736471185795, |
| "grad_norm": 0.408203125, |
| "learning_rate": 9.351089298586699e-06, |
| "loss": 0.0573, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5455122642907029, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.339917322427528e-06, |
| "loss": 0.0757, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.5458508814628262, |
| "grad_norm": 0.51171875, |
| "learning_rate": 9.328746173682046e-06, |
| "loss": 0.0641, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.5461894986349495, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.317575866353293e-06, |
| "loss": 0.0635, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.5465281158070728, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.306406414443246e-06, |
| "loss": 0.073, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.5468667329791962, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.295237831952815e-06, |
| "loss": 0.0737, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.5472053501513195, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.284070132881817e-06, |
| "loss": 0.0773, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.5475439673234429, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.272903331228968e-06, |
| "loss": 0.0576, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.5478825844955663, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.261737440991854e-06, |
| "loss": 0.0701, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.5482212016676896, |
| "grad_norm": 0.484375, |
| "learning_rate": 9.250572476166918e-06, |
| "loss": 0.0601, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.5485598188398129, |
| "grad_norm": 1.1640625, |
| "learning_rate": 9.239408450749442e-06, |
| "loss": 0.0674, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5488984360119362, |
| "grad_norm": 0.43359375, |
| "learning_rate": 9.228245378733537e-06, |
| "loss": 0.0615, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.5492370531840596, |
| "grad_norm": 0.458984375, |
| "learning_rate": 9.217083274112114e-06, |
| "loss": 0.061, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.5495756703561829, |
| "grad_norm": 0.486328125, |
| "learning_rate": 9.20592215087687e-06, |
| "loss": 0.0649, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.5499142875283063, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.194762023018271e-06, |
| "loss": 0.0715, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.5502529047004296, |
| "grad_norm": 0.427734375, |
| "learning_rate": 9.183602904525546e-06, |
| "loss": 0.0529, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.550591521872553, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.172444809386647e-06, |
| "loss": 0.0841, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.5509301390446762, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.161287751588249e-06, |
| "loss": 0.0757, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.5512687562167996, |
| "grad_norm": 0.447265625, |
| "learning_rate": 9.150131745115721e-06, |
| "loss": 0.0556, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.551607373388923, |
| "grad_norm": 0.435546875, |
| "learning_rate": 9.138976803953122e-06, |
| "loss": 0.0578, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.5519459905610463, |
| "grad_norm": 0.49609375, |
| "learning_rate": 9.127822942083167e-06, |
| "loss": 0.064, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5522846077331697, |
| "grad_norm": 0.47265625, |
| "learning_rate": 9.116670173487223e-06, |
| "loss": 0.059, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.552623224905293, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.105518512145292e-06, |
| "loss": 0.1812, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.5529618420774164, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.09436797203598e-06, |
| "loss": 0.0754, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.5533004592495397, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.083218567136487e-06, |
| "loss": 0.0926, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.553639076421663, |
| "grad_norm": 0.42578125, |
| "learning_rate": 9.072070311422595e-06, |
| "loss": 0.0527, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.5539776935937863, |
| "grad_norm": 0.51171875, |
| "learning_rate": 9.060923218868644e-06, |
| "loss": 0.0603, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.5543163107659097, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.049777303447517e-06, |
| "loss": 0.0782, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.5546549279380331, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.038632579130617e-06, |
| "loss": 0.0807, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.5549935451101564, |
| "grad_norm": 0.625, |
| "learning_rate": 9.027489059887855e-06, |
| "loss": 0.071, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.5553321622822798, |
| "grad_norm": 0.466796875, |
| "learning_rate": 9.01634675968764e-06, |
| "loss": 0.0492, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.5556707794544031, |
| "grad_norm": 0.5078125, |
| "learning_rate": 9.00520569249684e-06, |
| "loss": 0.0619, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.5560093966265264, |
| "grad_norm": 0.412109375, |
| "learning_rate": 8.994065872280785e-06, |
| "loss": 0.0572, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.5563480137986497, |
| "grad_norm": 0.46484375, |
| "learning_rate": 8.982927313003242e-06, |
| "loss": 0.069, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.5566866309707731, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.971790028626395e-06, |
| "loss": 0.0644, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.5570252481428964, |
| "grad_norm": 0.46484375, |
| "learning_rate": 8.960654033110834e-06, |
| "loss": 0.0668, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.5573638653150198, |
| "grad_norm": 0.427734375, |
| "learning_rate": 8.949519340415526e-06, |
| "loss": 0.0586, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.5577024824871432, |
| "grad_norm": 0.375, |
| "learning_rate": 8.938385964497807e-06, |
| "loss": 0.0506, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.5580410996592665, |
| "grad_norm": 0.65625, |
| "learning_rate": 8.927253919313377e-06, |
| "loss": 0.0757, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.5583797168313898, |
| "grad_norm": 0.5390625, |
| "learning_rate": 8.916123218816243e-06, |
| "loss": 0.0689, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.5587183340035131, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.90499387695874e-06, |
| "loss": 0.078, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5590569511756365, |
| "grad_norm": 0.40625, |
| "learning_rate": 8.893865907691503e-06, |
| "loss": 0.0516, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.5593955683477598, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.882739324963442e-06, |
| "loss": 0.0698, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.5597341855198832, |
| "grad_norm": 0.443359375, |
| "learning_rate": 8.871614142721728e-06, |
| "loss": 0.0616, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.5600728026920065, |
| "grad_norm": 0.66015625, |
| "learning_rate": 8.860490374911777e-06, |
| "loss": 0.0799, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.5604114198641299, |
| "grad_norm": 0.83203125, |
| "learning_rate": 8.849368035477236e-06, |
| "loss": 0.0669, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.5607500370362531, |
| "grad_norm": 0.6953125, |
| "learning_rate": 8.838247138359957e-06, |
| "loss": 0.1207, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.5610886542083765, |
| "grad_norm": 0.494140625, |
| "learning_rate": 8.827127697499985e-06, |
| "loss": 0.0637, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.5614272713804999, |
| "grad_norm": 0.4296875, |
| "learning_rate": 8.816009726835538e-06, |
| "loss": 0.0543, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.5617658885526232, |
| "grad_norm": 0.46484375, |
| "learning_rate": 8.804893240302997e-06, |
| "loss": 0.0566, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.5621045057247466, |
| "grad_norm": 0.431640625, |
| "learning_rate": 8.793778251836878e-06, |
| "loss": 0.0618, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5624431228968699, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.782664775369818e-06, |
| "loss": 0.0639, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.5627817400689933, |
| "grad_norm": 0.443359375, |
| "learning_rate": 8.771552824832559e-06, |
| "loss": 0.0619, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.5631203572411166, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.760442414153937e-06, |
| "loss": 0.0627, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.5634589744132399, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.749333557260851e-06, |
| "loss": 0.0621, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.5637975915853632, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.738226268078254e-06, |
| "loss": 0.0725, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.5641362087574866, |
| "grad_norm": 0.67578125, |
| "learning_rate": 8.72712056052913e-06, |
| "loss": 0.0559, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.56447482592961, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.71601644853449e-06, |
| "loss": 0.0639, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.5648134431017333, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.704913946013337e-06, |
| "loss": 0.0652, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.5651520602738567, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.69381306688266e-06, |
| "loss": 0.0588, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.56549067744598, |
| "grad_norm": 0.703125, |
| "learning_rate": 8.682713825057409e-06, |
| "loss": 0.0987, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.5658292946181033, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.671616234450486e-06, |
| "loss": 0.0794, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.5661679117902266, |
| "grad_norm": 0.390625, |
| "learning_rate": 8.660520308972722e-06, |
| "loss": 0.0537, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.56650652896235, |
| "grad_norm": 0.416015625, |
| "learning_rate": 8.649426062532858e-06, |
| "loss": 0.0569, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.5668451461344733, |
| "grad_norm": 0.45703125, |
| "learning_rate": 8.638333509037537e-06, |
| "loss": 0.0588, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.5671837633065967, |
| "grad_norm": 0.42578125, |
| "learning_rate": 8.627242662391273e-06, |
| "loss": 0.0688, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.5675223804787201, |
| "grad_norm": 0.482421875, |
| "learning_rate": 8.616153536496444e-06, |
| "loss": 0.0627, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.5678609976508434, |
| "grad_norm": 0.45703125, |
| "learning_rate": 8.605066145253269e-06, |
| "loss": 0.0622, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.5681996148229667, |
| "grad_norm": 1.1484375, |
| "learning_rate": 8.593980502559797e-06, |
| "loss": 0.1008, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.56853823199509, |
| "grad_norm": 0.5078125, |
| "learning_rate": 8.58289662231188e-06, |
| "loss": 0.0611, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.5688768491672134, |
| "grad_norm": 0.43359375, |
| "learning_rate": 8.571814518403162e-06, |
| "loss": 0.0609, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5692154663393367, |
| "grad_norm": 0.466796875, |
| "learning_rate": 8.560734204725064e-06, |
| "loss": 0.0711, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.5695540835114601, |
| "grad_norm": 0.46484375, |
| "learning_rate": 8.549655695166756e-06, |
| "loss": 0.0548, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.5698927006835834, |
| "grad_norm": 0.43359375, |
| "learning_rate": 8.538579003615154e-06, |
| "loss": 0.0634, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.5702313178557068, |
| "grad_norm": 0.50390625, |
| "learning_rate": 8.52750414395489e-06, |
| "loss": 0.059, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.57056993502783, |
| "grad_norm": 0.37890625, |
| "learning_rate": 8.516431130068303e-06, |
| "loss": 0.0496, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.5709085521999534, |
| "grad_norm": 0.5078125, |
| "learning_rate": 8.505359975835413e-06, |
| "loss": 0.0686, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.5712471693720768, |
| "grad_norm": 0.53515625, |
| "learning_rate": 8.494290695133918e-06, |
| "loss": 0.0561, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.5715857865442001, |
| "grad_norm": 0.4296875, |
| "learning_rate": 8.483223301839159e-06, |
| "loss": 0.0549, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.5719244037163235, |
| "grad_norm": 0.447265625, |
| "learning_rate": 8.472157809824115e-06, |
| "loss": 0.0581, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.5722630208884468, |
| "grad_norm": 0.5, |
| "learning_rate": 8.461094232959381e-06, |
| "loss": 0.0655, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.5726016380605702, |
| "grad_norm": 0.44921875, |
| "learning_rate": 8.450032585113156e-06, |
| "loss": 0.0554, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.5729402552326935, |
| "grad_norm": 0.45703125, |
| "learning_rate": 8.438972880151209e-06, |
| "loss": 0.0606, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.5732788724048168, |
| "grad_norm": 0.5390625, |
| "learning_rate": 8.427915131936885e-06, |
| "loss": 0.0702, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.5736174895769401, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.416859354331072e-06, |
| "loss": 0.0659, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.5739561067490635, |
| "grad_norm": 0.435546875, |
| "learning_rate": 8.405805561192188e-06, |
| "loss": 0.0478, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.5742947239211869, |
| "grad_norm": 0.48046875, |
| "learning_rate": 8.39475376637616e-06, |
| "loss": 0.0667, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.5746333410933102, |
| "grad_norm": 0.486328125, |
| "learning_rate": 8.38370398373642e-06, |
| "loss": 0.0618, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.5749719582654336, |
| "grad_norm": 0.5078125, |
| "learning_rate": 8.372656227123868e-06, |
| "loss": 0.0609, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.5753105754375569, |
| "grad_norm": 0.494140625, |
| "learning_rate": 8.36161051038687e-06, |
| "loss": 0.0585, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.5756491926096802, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.350566847371228e-06, |
| "loss": 0.0544, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5759878097818035, |
| "grad_norm": 0.5390625, |
| "learning_rate": 8.33952525192018e-06, |
| "loss": 0.0759, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.5763264269539269, |
| "grad_norm": 0.796875, |
| "learning_rate": 8.328485737874365e-06, |
| "loss": 0.0673, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.5766650441260502, |
| "grad_norm": 0.51953125, |
| "learning_rate": 8.317448319071815e-06, |
| "loss": 0.0662, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.5770036612981736, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.306413009347933e-06, |
| "loss": 0.0805, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.577342278470297, |
| "grad_norm": 0.45703125, |
| "learning_rate": 8.295379822535482e-06, |
| "loss": 0.0578, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.5776808956424203, |
| "grad_norm": 0.458984375, |
| "learning_rate": 8.284348772464564e-06, |
| "loss": 0.0575, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.5780195128145436, |
| "grad_norm": 0.50390625, |
| "learning_rate": 8.273319872962599e-06, |
| "loss": 0.0549, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.5783581299866669, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.262293137854315e-06, |
| "loss": 0.0598, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.5786967471587903, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.251268580961724e-06, |
| "loss": 0.0611, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.5790353643309136, |
| "grad_norm": 0.44921875, |
| "learning_rate": 8.24024621610411e-06, |
| "loss": 0.0528, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.579373981503037, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.229226057098012e-06, |
| "loss": 0.0724, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.5797125986751603, |
| "grad_norm": 0.4609375, |
| "learning_rate": 8.218208117757194e-06, |
| "loss": 0.0598, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.5800512158472837, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.207192411892645e-06, |
| "loss": 0.0767, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.5803898330194069, |
| "grad_norm": 0.369140625, |
| "learning_rate": 8.196178953312557e-06, |
| "loss": 0.0515, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.5807284501915303, |
| "grad_norm": 0.484375, |
| "learning_rate": 8.185167755822294e-06, |
| "loss": 0.0664, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.5810670673636537, |
| "grad_norm": 0.474609375, |
| "learning_rate": 8.1741588332244e-06, |
| "loss": 0.06, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.581405684535777, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.163152199318559e-06, |
| "loss": 0.0656, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.5817443017079004, |
| "grad_norm": 0.49609375, |
| "learning_rate": 8.152147867901586e-06, |
| "loss": 0.059, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.5820829188800237, |
| "grad_norm": 0.470703125, |
| "learning_rate": 8.141145852767408e-06, |
| "loss": 0.0609, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.5824215360521471, |
| "grad_norm": 0.46484375, |
| "learning_rate": 8.13014616770706e-06, |
| "loss": 0.0649, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.5827601532242704, |
| "grad_norm": 0.41796875, |
| "learning_rate": 8.119148826508642e-06, |
| "loss": 0.055, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.5830987703963937, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.108153842957324e-06, |
| "loss": 0.0735, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.583437387568517, |
| "grad_norm": 0.51171875, |
| "learning_rate": 8.09716123083532e-06, |
| "loss": 0.06, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.5837760047406404, |
| "grad_norm": 0.8046875, |
| "learning_rate": 8.086171003921865e-06, |
| "loss": 0.0472, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.5841146219127638, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.075183175993218e-06, |
| "loss": 0.0706, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.5844532390848871, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.064197760822615e-06, |
| "loss": 0.0567, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.5847918562570105, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.053214772180277e-06, |
| "loss": 0.0868, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.5851304734291338, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.042234223833381e-06, |
| "loss": 0.0503, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.5854690906012571, |
| "grad_norm": 0.439453125, |
| "learning_rate": 8.031256129546046e-06, |
| "loss": 0.0617, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.5858077077733804, |
| "grad_norm": 0.3671875, |
| "learning_rate": 8.020280503079314e-06, |
| "loss": 0.0443, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.5861463249455038, |
| "grad_norm": 0.48046875, |
| "learning_rate": 8.009307358191133e-06, |
| "loss": 0.0642, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.5864849421176271, |
| "grad_norm": 0.50390625, |
| "learning_rate": 7.99833670863634e-06, |
| "loss": 0.0677, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.5868235592897505, |
| "grad_norm": 0.52734375, |
| "learning_rate": 7.987368568166653e-06, |
| "loss": 0.0724, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.5871621764618739, |
| "grad_norm": 0.421875, |
| "learning_rate": 7.976402950530623e-06, |
| "loss": 0.0529, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.5875007936339972, |
| "grad_norm": 0.5, |
| "learning_rate": 7.965439869473664e-06, |
| "loss": 0.067, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.5878394108061205, |
| "grad_norm": 0.421875, |
| "learning_rate": 7.954479338737995e-06, |
| "loss": 0.0582, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.5881780279782438, |
| "grad_norm": 0.609375, |
| "learning_rate": 7.943521372062641e-06, |
| "loss": 0.0765, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.5885166451503672, |
| "grad_norm": 0.59375, |
| "learning_rate": 7.932565983183416e-06, |
| "loss": 0.0745, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.5888552623224905, |
| "grad_norm": 0.45703125, |
| "learning_rate": 7.921613185832897e-06, |
| "loss": 0.0624, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.5891938794946139, |
| "grad_norm": 0.455078125, |
| "learning_rate": 7.910662993740422e-06, |
| "loss": 0.0571, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.5895324966667372, |
| "grad_norm": 0.5078125, |
| "learning_rate": 7.899715420632056e-06, |
| "loss": 0.0677, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.5898711138388606, |
| "grad_norm": 0.44140625, |
| "learning_rate": 7.888770480230582e-06, |
| "loss": 0.0539, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.5902097310109838, |
| "grad_norm": 0.5703125, |
| "learning_rate": 7.87782818625548e-06, |
| "loss": 0.0742, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.5905483481831072, |
| "grad_norm": 0.51953125, |
| "learning_rate": 7.866888552422924e-06, |
| "loss": 0.0653, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.5908869653552306, |
| "grad_norm": 0.4609375, |
| "learning_rate": 7.855951592445743e-06, |
| "loss": 0.0559, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.5912255825273539, |
| "grad_norm": 0.470703125, |
| "learning_rate": 7.845017320033415e-06, |
| "loss": 0.0502, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.5915641996994773, |
| "grad_norm": 0.53125, |
| "learning_rate": 7.834085748892052e-06, |
| "loss": 0.064, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.5919028168716006, |
| "grad_norm": 0.4921875, |
| "learning_rate": 7.823156892724379e-06, |
| "loss": 0.0646, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.592241434043724, |
| "grad_norm": 0.7109375, |
| "learning_rate": 7.81223076522972e-06, |
| "loss": 0.0848, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.5925800512158473, |
| "grad_norm": 0.546875, |
| "learning_rate": 7.801307380103977e-06, |
| "loss": 0.0657, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5929186683879706, |
| "grad_norm": 0.458984375, |
| "learning_rate": 7.790386751039609e-06, |
| "loss": 0.0562, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.5932572855600939, |
| "grad_norm": 0.59375, |
| "learning_rate": 7.779468891725633e-06, |
| "loss": 0.0803, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.5935959027322173, |
| "grad_norm": 0.515625, |
| "learning_rate": 7.768553815847583e-06, |
| "loss": 0.0589, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.5939345199043407, |
| "grad_norm": 0.5859375, |
| "learning_rate": 7.757641537087509e-06, |
| "loss": 0.0716, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.594273137076464, |
| "grad_norm": 0.52734375, |
| "learning_rate": 7.74673206912395e-06, |
| "loss": 0.0715, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.5946117542485874, |
| "grad_norm": 0.50390625, |
| "learning_rate": 7.735825425631926e-06, |
| "loss": 0.0671, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.5949503714207107, |
| "grad_norm": 0.396484375, |
| "learning_rate": 7.724921620282917e-06, |
| "loss": 0.0529, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.595288988592834, |
| "grad_norm": 0.453125, |
| "learning_rate": 7.71402066674484e-06, |
| "loss": 0.0607, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.5956276057649573, |
| "grad_norm": 0.5390625, |
| "learning_rate": 7.703122578682047e-06, |
| "loss": 0.0687, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.5959662229370807, |
| "grad_norm": 0.390625, |
| "learning_rate": 7.69222736975529e-06, |
| "loss": 0.0491, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.596304840109204, |
| "grad_norm": 0.64453125, |
| "learning_rate": 7.681335053621712e-06, |
| "loss": 0.0563, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.5966434572813274, |
| "grad_norm": 0.43359375, |
| "learning_rate": 7.670445643934833e-06, |
| "loss": 0.0574, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.5969820744534508, |
| "grad_norm": 0.5390625, |
| "learning_rate": 7.659559154344533e-06, |
| "loss": 0.0558, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.5973206916255741, |
| "grad_norm": 0.59765625, |
| "learning_rate": 7.648675598497023e-06, |
| "loss": 0.0637, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.5976593087976974, |
| "grad_norm": 0.69140625, |
| "learning_rate": 7.637794990034843e-06, |
| "loss": 0.0891, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.5979979259698207, |
| "grad_norm": 0.51953125, |
| "learning_rate": 7.626917342596833e-06, |
| "loss": 0.0642, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.5983365431419441, |
| "grad_norm": 0.546875, |
| "learning_rate": 7.616042669818133e-06, |
| "loss": 0.0673, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.5986751603140674, |
| "grad_norm": 0.6328125, |
| "learning_rate": 7.605170985330139e-06, |
| "loss": 0.0731, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.5990137774861908, |
| "grad_norm": 0.462890625, |
| "learning_rate": 7.594302302760512e-06, |
| "loss": 0.0545, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.5993523946583141, |
| "grad_norm": 0.53515625, |
| "learning_rate": 7.5834366357331436e-06, |
| "loss": 0.0648, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.5996910118304375, |
| "grad_norm": 0.466796875, |
| "learning_rate": 7.572573997868151e-06, |
| "loss": 0.0583, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.6000296290025607, |
| "grad_norm": 0.66015625, |
| "learning_rate": 7.5617144027818515e-06, |
| "loss": 0.1069, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6003682461746841, |
| "grad_norm": 0.474609375, |
| "learning_rate": 7.550857864086747e-06, |
| "loss": 0.0693, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6007068633468075, |
| "grad_norm": 0.427734375, |
| "learning_rate": 7.540004395391509e-06, |
| "loss": 0.0567, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6010454805189308, |
| "grad_norm": 0.4140625, |
| "learning_rate": 7.529154010300963e-06, |
| "loss": 0.0502, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.6013840976910542, |
| "grad_norm": 0.470703125, |
| "learning_rate": 7.518306722416074e-06, |
| "loss": 0.0619, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6013840976910542, |
| "eval_loss": 0.06656693667173386, |
| "eval_runtime": 815.1123, |
| "eval_samples_per_second": 12.204, |
| "eval_steps_per_second": 3.051, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6017227148631775, |
| "grad_norm": 0.486328125, |
| "learning_rate": 7.5074625453339034e-06, |
| "loss": 0.0615, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.6020613320353009, |
| "grad_norm": 0.51953125, |
| "learning_rate": 7.496621492647638e-06, |
| "loss": 0.0651, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6023999492074242, |
| "grad_norm": 0.62109375, |
| "learning_rate": 7.485783577946537e-06, |
| "loss": 0.0694, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.6027385663795475, |
| "grad_norm": 0.46484375, |
| "learning_rate": 7.474948814815927e-06, |
| "loss": 0.0644, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6030771835516708, |
| "grad_norm": 0.408203125, |
| "learning_rate": 7.464117216837181e-06, |
| "loss": 0.055, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.6034158007237942, |
| "grad_norm": 0.5078125, |
| "learning_rate": 7.453288797587714e-06, |
| "loss": 0.0585, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.6037544178959176, |
| "grad_norm": 0.45703125, |
| "learning_rate": 7.442463570640947e-06, |
| "loss": 0.0593, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.6040930350680409, |
| "grad_norm": 0.412109375, |
| "learning_rate": 7.431641549566304e-06, |
| "loss": 0.0542, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.6044316522401643, |
| "grad_norm": 0.54296875, |
| "learning_rate": 7.420822747929187e-06, |
| "loss": 0.0711, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6047702694122876, |
| "grad_norm": 0.98046875, |
| "learning_rate": 7.410007179290968e-06, |
| "loss": 0.0832, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.6051088865844109, |
| "grad_norm": 0.49609375, |
| "learning_rate": 7.399194857208962e-06, |
| "loss": 0.0673, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.6054475037565342, |
| "grad_norm": 0.51171875, |
| "learning_rate": 7.388385795236415e-06, |
| "loss": 0.0717, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.6057861209286576, |
| "grad_norm": 0.53515625, |
| "learning_rate": 7.377580006922486e-06, |
| "loss": 0.0606, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.6061247381007809, |
| "grad_norm": 0.50390625, |
| "learning_rate": 7.366777505812234e-06, |
| "loss": 0.0667, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6064633552729043, |
| "grad_norm": 0.421875, |
| "learning_rate": 7.355978305446594e-06, |
| "loss": 0.0498, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.6068019724450276, |
| "grad_norm": 0.7265625, |
| "learning_rate": 7.345182419362364e-06, |
| "loss": 0.1045, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.607140589617151, |
| "grad_norm": 2.0, |
| "learning_rate": 7.334389861092187e-06, |
| "loss": 0.0706, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6074792067892743, |
| "grad_norm": 0.44140625, |
| "learning_rate": 7.323600644164539e-06, |
| "loss": 0.0613, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.6078178239613976, |
| "grad_norm": 0.66015625, |
| "learning_rate": 7.312814782103703e-06, |
| "loss": 0.0837, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.608156441133521, |
| "grad_norm": 0.48828125, |
| "learning_rate": 7.3020322884297565e-06, |
| "loss": 0.073, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.6084950583056443, |
| "grad_norm": 0.353515625, |
| "learning_rate": 7.291253176658562e-06, |
| "loss": 0.046, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.6088336754777677, |
| "grad_norm": 0.5546875, |
| "learning_rate": 7.280477460301727e-06, |
| "loss": 0.0621, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.609172292649891, |
| "grad_norm": 0.546875, |
| "learning_rate": 7.26970515286662e-06, |
| "loss": 0.0646, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.6095109098220144, |
| "grad_norm": 0.427734375, |
| "learning_rate": 7.258936267856323e-06, |
| "loss": 0.0622, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6098495269941376, |
| "grad_norm": 0.435546875, |
| "learning_rate": 7.248170818769642e-06, |
| "loss": 0.0426, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.610188144166261, |
| "grad_norm": 0.51171875, |
| "learning_rate": 7.237408819101064e-06, |
| "loss": 0.0586, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.6105267613383843, |
| "grad_norm": 0.64453125, |
| "learning_rate": 7.2266502823407584e-06, |
| "loss": 0.0624, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.6108653785105077, |
| "grad_norm": 0.474609375, |
| "learning_rate": 7.215895221974548e-06, |
| "loss": 0.062, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.6112039956826311, |
| "grad_norm": 0.51953125, |
| "learning_rate": 7.2051436514839064e-06, |
| "loss": 0.0654, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6115426128547544, |
| "grad_norm": 0.47265625, |
| "learning_rate": 7.194395584345927e-06, |
| "loss": 0.0551, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.6118812300268778, |
| "grad_norm": 0.55078125, |
| "learning_rate": 7.1836510340333125e-06, |
| "loss": 0.0641, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.6122198471990011, |
| "grad_norm": 0.39453125, |
| "learning_rate": 7.1729100140143535e-06, |
| "loss": 0.0479, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.6125584643711244, |
| "grad_norm": 0.6328125, |
| "learning_rate": 7.162172537752927e-06, |
| "loss": 0.0776, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.6128970815432477, |
| "grad_norm": 0.486328125, |
| "learning_rate": 7.151438618708455e-06, |
| "loss": 0.065, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6132356987153711, |
| "grad_norm": 0.58984375, |
| "learning_rate": 7.1407082703359085e-06, |
| "loss": 0.0835, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.6135743158874944, |
| "grad_norm": 0.54296875, |
| "learning_rate": 7.129981506085777e-06, |
| "loss": 0.0665, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.6139129330596178, |
| "grad_norm": 0.48046875, |
| "learning_rate": 7.119258339404065e-06, |
| "loss": 0.0605, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.6142515502317412, |
| "grad_norm": 0.39453125, |
| "learning_rate": 7.1085387837322595e-06, |
| "loss": 0.0528, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.6145901674038645, |
| "grad_norm": 0.65625, |
| "learning_rate": 7.097822852507325e-06, |
| "loss": 0.0892, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.6149287845759878, |
| "grad_norm": 0.5625, |
| "learning_rate": 7.087110559161681e-06, |
| "loss": 0.079, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.6152674017481111, |
| "grad_norm": 0.671875, |
| "learning_rate": 7.0764019171231906e-06, |
| "loss": 0.0519, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.6156060189202345, |
| "grad_norm": 0.53125, |
| "learning_rate": 7.06569693981514e-06, |
| "loss": 0.0608, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.6159446360923578, |
| "grad_norm": 0.5546875, |
| "learning_rate": 7.0549956406562105e-06, |
| "loss": 0.0784, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.6162832532644812, |
| "grad_norm": 0.47265625, |
| "learning_rate": 7.044298033060487e-06, |
| "loss": 0.0667, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6166218704366045, |
| "grad_norm": 0.447265625, |
| "learning_rate": 7.033604130437422e-06, |
| "loss": 0.0612, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.6169604876087279, |
| "grad_norm": 0.61328125, |
| "learning_rate": 7.022913946191821e-06, |
| "loss": 0.0698, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.6172991047808511, |
| "grad_norm": 0.5625, |
| "learning_rate": 7.012227493723831e-06, |
| "loss": 0.0673, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.6176377219529745, |
| "grad_norm": 0.5546875, |
| "learning_rate": 7.001544786428924e-06, |
| "loss": 0.0601, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.6179763391250979, |
| "grad_norm": 0.5390625, |
| "learning_rate": 6.990865837697872e-06, |
| "loss": 0.0562, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.6183149562972212, |
| "grad_norm": 0.46875, |
| "learning_rate": 6.980190660916739e-06, |
| "loss": 0.0658, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.6186535734693446, |
| "grad_norm": 0.42578125, |
| "learning_rate": 6.969519269466858e-06, |
| "loss": 0.055, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.6189921906414679, |
| "grad_norm": 0.490234375, |
| "learning_rate": 6.958851676724823e-06, |
| "loss": 0.0652, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.6193308078135913, |
| "grad_norm": 0.578125, |
| "learning_rate": 6.9481878960624585e-06, |
| "loss": 0.0715, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.6196694249857145, |
| "grad_norm": 0.74609375, |
| "learning_rate": 6.937527940846816e-06, |
| "loss": 0.1297, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6200080421578379, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.926871824440149e-06, |
| "loss": 0.0607, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.6203466593299612, |
| "grad_norm": 0.470703125, |
| "learning_rate": 6.916219560199904e-06, |
| "loss": 0.0621, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.6206852765020846, |
| "grad_norm": 0.46484375, |
| "learning_rate": 6.905571161478692e-06, |
| "loss": 0.0516, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.621023893674208, |
| "grad_norm": 0.5546875, |
| "learning_rate": 6.894926641624282e-06, |
| "loss": 0.0806, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.6213625108463313, |
| "grad_norm": 0.453125, |
| "learning_rate": 6.8842860139795795e-06, |
| "loss": 0.0625, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.6217011280184547, |
| "grad_norm": 0.490234375, |
| "learning_rate": 6.873649291882613e-06, |
| "loss": 0.0609, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.622039745190578, |
| "grad_norm": 0.478515625, |
| "learning_rate": 6.8630164886665165e-06, |
| "loss": 0.0683, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.6223783623627013, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.8523876176595084e-06, |
| "loss": 0.0567, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.6227169795348246, |
| "grad_norm": 0.41796875, |
| "learning_rate": 6.841762692184881e-06, |
| "loss": 0.0535, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.623055596706948, |
| "grad_norm": 0.57421875, |
| "learning_rate": 6.831141725560975e-06, |
| "loss": 0.0775, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6233942138790713, |
| "grad_norm": 0.5, |
| "learning_rate": 6.820524731101176e-06, |
| "loss": 0.0621, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.6237328310511947, |
| "grad_norm": 0.490234375, |
| "learning_rate": 6.809911722113884e-06, |
| "loss": 0.0549, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.6240714482233181, |
| "grad_norm": 0.54296875, |
| "learning_rate": 6.7993027119025115e-06, |
| "loss": 0.0683, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.6244100653954414, |
| "grad_norm": 0.4453125, |
| "learning_rate": 6.7886977137654505e-06, |
| "loss": 0.0644, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.6247486825675647, |
| "grad_norm": 0.52734375, |
| "learning_rate": 6.778096740996069e-06, |
| "loss": 0.0677, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.625087299739688, |
| "grad_norm": 1.734375, |
| "learning_rate": 6.767499806882685e-06, |
| "loss": 0.0645, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.6254259169118114, |
| "grad_norm": 0.486328125, |
| "learning_rate": 6.756906924708558e-06, |
| "loss": 0.069, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.6257645340839347, |
| "grad_norm": 0.4765625, |
| "learning_rate": 6.746318107751867e-06, |
| "loss": 0.064, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.6261031512560581, |
| "grad_norm": 0.48828125, |
| "learning_rate": 6.735733369285694e-06, |
| "loss": 0.0662, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.6264417684281814, |
| "grad_norm": 0.55078125, |
| "learning_rate": 6.7251527225780075e-06, |
| "loss": 0.0766, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6267803856003048, |
| "grad_norm": 0.51171875, |
| "learning_rate": 6.714576180891653e-06, |
| "loss": 0.0681, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.627119002772428, |
| "grad_norm": 0.52734375, |
| "learning_rate": 6.7040037574843255e-06, |
| "loss": 0.0711, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.6274576199445514, |
| "grad_norm": 0.58203125, |
| "learning_rate": 6.693435465608556e-06, |
| "loss": 0.0742, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.6277962371166748, |
| "grad_norm": 0.36328125, |
| "learning_rate": 6.682871318511702e-06, |
| "loss": 0.0477, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.6281348542887981, |
| "grad_norm": 0.498046875, |
| "learning_rate": 6.672311329435919e-06, |
| "loss": 0.0624, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.6284734714609215, |
| "grad_norm": 0.5078125, |
| "learning_rate": 6.66175551161816e-06, |
| "loss": 0.0648, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.6288120886330448, |
| "grad_norm": 0.53125, |
| "learning_rate": 6.651203878290139e-06, |
| "loss": 0.0629, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.6291507058051682, |
| "grad_norm": 0.4375, |
| "learning_rate": 6.64065644267833e-06, |
| "loss": 0.059, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.6294893229772914, |
| "grad_norm": 0.5390625, |
| "learning_rate": 6.630113218003944e-06, |
| "loss": 0.0675, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.6298279401494148, |
| "grad_norm": 0.59375, |
| "learning_rate": 6.619574217482918e-06, |
| "loss": 0.0798, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6301665573215381, |
| "grad_norm": 0.51171875, |
| "learning_rate": 6.609039454325887e-06, |
| "loss": 0.0715, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.6305051744936615, |
| "grad_norm": 0.59375, |
| "learning_rate": 6.598508941738176e-06, |
| "loss": 0.0674, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.6308437916657849, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.587982692919785e-06, |
| "loss": 0.045, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.6311824088379082, |
| "grad_norm": 0.53515625, |
| "learning_rate": 6.5774607210653675e-06, |
| "loss": 0.0666, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.6315210260100316, |
| "grad_norm": 0.5859375, |
| "learning_rate": 6.566943039364215e-06, |
| "loss": 0.0581, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.6318596431821549, |
| "grad_norm": 0.5546875, |
| "learning_rate": 6.556429661000244e-06, |
| "loss": 0.0726, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.6321982603542782, |
| "grad_norm": 1.3203125, |
| "learning_rate": 6.545920599151976e-06, |
| "loss": 0.0555, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.6325368775264015, |
| "grad_norm": 0.51171875, |
| "learning_rate": 6.535415866992518e-06, |
| "loss": 0.0702, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.6328754946985249, |
| "grad_norm": 0.51171875, |
| "learning_rate": 6.524915477689553e-06, |
| "loss": 0.0601, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.6332141118706482, |
| "grad_norm": 0.384765625, |
| "learning_rate": 6.5144194444053235e-06, |
| "loss": 0.0561, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6335527290427716, |
| "grad_norm": 0.59765625, |
| "learning_rate": 6.503927780296605e-06, |
| "loss": 0.0833, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.633891346214895, |
| "grad_norm": 0.48828125, |
| "learning_rate": 6.4934404985147e-06, |
| "loss": 0.0658, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.6342299633870183, |
| "grad_norm": 0.44140625, |
| "learning_rate": 6.482957612205416e-06, |
| "loss": 0.0476, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.6345685805591416, |
| "grad_norm": 1.03125, |
| "learning_rate": 6.472479134509052e-06, |
| "loss": 0.094, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.6349071977312649, |
| "grad_norm": 0.6484375, |
| "learning_rate": 6.4620050785603836e-06, |
| "loss": 0.0744, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.6352458149033883, |
| "grad_norm": 0.462890625, |
| "learning_rate": 6.451535457488638e-06, |
| "loss": 0.0597, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.6355844320755116, |
| "grad_norm": 0.42578125, |
| "learning_rate": 6.4410702844174875e-06, |
| "loss": 0.0638, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.635923049247635, |
| "grad_norm": 0.4765625, |
| "learning_rate": 6.430609572465024e-06, |
| "loss": 0.0623, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.6362616664197583, |
| "grad_norm": 0.4375, |
| "learning_rate": 6.420153334743755e-06, |
| "loss": 0.0562, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.6366002835918817, |
| "grad_norm": 1.125, |
| "learning_rate": 6.409701584360575e-06, |
| "loss": 0.0465, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.636938900764005, |
| "grad_norm": 0.416015625, |
| "learning_rate": 6.399254334416752e-06, |
| "loss": 0.0492, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.6372775179361283, |
| "grad_norm": 0.45703125, |
| "learning_rate": 6.388811598007918e-06, |
| "loss": 0.0583, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.6376161351082517, |
| "grad_norm": 0.53125, |
| "learning_rate": 6.378373388224039e-06, |
| "loss": 0.0709, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.637954752280375, |
| "grad_norm": 0.46484375, |
| "learning_rate": 6.3679397181494115e-06, |
| "loss": 0.0598, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.6382933694524984, |
| "grad_norm": 0.41796875, |
| "learning_rate": 6.357510600862646e-06, |
| "loss": 0.0561, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.6386319866246217, |
| "grad_norm": 0.5703125, |
| "learning_rate": 6.3470860494366415e-06, |
| "loss": 0.0637, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.6389706037967451, |
| "grad_norm": 0.5390625, |
| "learning_rate": 6.336666076938573e-06, |
| "loss": 0.0627, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.6393092209688683, |
| "grad_norm": 0.5703125, |
| "learning_rate": 6.326250696429877e-06, |
| "loss": 0.0742, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.6396478381409917, |
| "grad_norm": 0.474609375, |
| "learning_rate": 6.315839920966229e-06, |
| "loss": 0.0568, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.639986455313115, |
| "grad_norm": 0.51953125, |
| "learning_rate": 6.305433763597546e-06, |
| "loss": 0.0758, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6403250724852384, |
| "grad_norm": 0.462890625, |
| "learning_rate": 6.295032237367942e-06, |
| "loss": 0.0552, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.6406636896573618, |
| "grad_norm": 0.470703125, |
| "learning_rate": 6.284635355315731e-06, |
| "loss": 0.0675, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.6410023068294851, |
| "grad_norm": 0.52734375, |
| "learning_rate": 6.274243130473405e-06, |
| "loss": 0.063, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.6413409240016085, |
| "grad_norm": 0.40625, |
| "learning_rate": 6.2638555758676215e-06, |
| "loss": 0.0549, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.6416795411737318, |
| "grad_norm": 0.478515625, |
| "learning_rate": 6.253472704519179e-06, |
| "loss": 0.0589, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.6420181583458551, |
| "grad_norm": 0.62109375, |
| "learning_rate": 6.243094529443008e-06, |
| "loss": 0.0856, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.6423567755179784, |
| "grad_norm": 0.380859375, |
| "learning_rate": 6.232721063648148e-06, |
| "loss": 0.0506, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.6426953926901018, |
| "grad_norm": 0.55859375, |
| "learning_rate": 6.222352320137748e-06, |
| "loss": 0.0758, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.6430340098622251, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.211988311909021e-06, |
| "loss": 0.0528, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.6433726270343485, |
| "grad_norm": 0.4296875, |
| "learning_rate": 6.201629051953257e-06, |
| "loss": 0.0562, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6437112442064719, |
| "grad_norm": 0.44140625, |
| "learning_rate": 6.1912745532557834e-06, |
| "loss": 0.0583, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.6440498613785952, |
| "grad_norm": 0.56640625, |
| "learning_rate": 6.180924828795972e-06, |
| "loss": 0.0687, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.6443884785507185, |
| "grad_norm": 0.53125, |
| "learning_rate": 6.170579891547202e-06, |
| "loss": 0.0623, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.6447270957228418, |
| "grad_norm": 0.4921875, |
| "learning_rate": 6.160239754476849e-06, |
| "loss": 0.0695, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.6450657128949652, |
| "grad_norm": 0.46484375, |
| "learning_rate": 6.149904430546278e-06, |
| "loss": 0.0585, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.6454043300670885, |
| "grad_norm": 0.44921875, |
| "learning_rate": 6.1395739327108185e-06, |
| "loss": 0.0655, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.6457429472392119, |
| "grad_norm": 0.56640625, |
| "learning_rate": 6.12924827391975e-06, |
| "loss": 0.0727, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.6460815644113352, |
| "grad_norm": 0.46484375, |
| "learning_rate": 6.118927467116285e-06, |
| "loss": 0.0523, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.6464201815834586, |
| "grad_norm": 0.625, |
| "learning_rate": 6.1086115252375585e-06, |
| "loss": 0.0855, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.6467587987555818, |
| "grad_norm": 0.6328125, |
| "learning_rate": 6.098300461214605e-06, |
| "loss": 0.0866, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6470974159277052, |
| "grad_norm": 0.4765625, |
| "learning_rate": 6.087994287972341e-06, |
| "loss": 0.0627, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.6474360330998286, |
| "grad_norm": 0.7109375, |
| "learning_rate": 6.077693018429556e-06, |
| "loss": 0.0702, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.6477746502719519, |
| "grad_norm": 0.578125, |
| "learning_rate": 6.0673966654988946e-06, |
| "loss": 0.0679, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.6481132674440753, |
| "grad_norm": 0.51953125, |
| "learning_rate": 6.057105242086836e-06, |
| "loss": 0.0615, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.6484518846161986, |
| "grad_norm": 0.462890625, |
| "learning_rate": 6.046818761093678e-06, |
| "loss": 0.0506, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.648790501788322, |
| "grad_norm": 0.76171875, |
| "learning_rate": 6.036537235413524e-06, |
| "loss": 0.1215, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.6491291189604452, |
| "grad_norm": 0.4921875, |
| "learning_rate": 6.026260677934273e-06, |
| "loss": 0.0574, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.6494677361325686, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.015989101537586e-06, |
| "loss": 0.0573, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.6498063533046919, |
| "grad_norm": 0.384765625, |
| "learning_rate": 6.005722519098887e-06, |
| "loss": 0.0501, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.6501449704768153, |
| "grad_norm": 0.46875, |
| "learning_rate": 5.995460943487334e-06, |
| "loss": 0.0666, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6504835876489387, |
| "grad_norm": 0.50390625, |
| "learning_rate": 5.9852043875658195e-06, |
| "loss": 0.0678, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.650822204821062, |
| "grad_norm": 0.58203125, |
| "learning_rate": 5.974952864190933e-06, |
| "loss": 0.0809, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.6511608219931854, |
| "grad_norm": 0.453125, |
| "learning_rate": 5.964706386212959e-06, |
| "loss": 0.0657, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.6514994391653087, |
| "grad_norm": 0.443359375, |
| "learning_rate": 5.95446496647586e-06, |
| "loss": 0.0508, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.651838056337432, |
| "grad_norm": 0.427734375, |
| "learning_rate": 5.944228617817263e-06, |
| "loss": 0.0598, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.6521766735095553, |
| "grad_norm": 0.470703125, |
| "learning_rate": 5.933997353068419e-06, |
| "loss": 0.0699, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.6525152906816787, |
| "grad_norm": 0.609375, |
| "learning_rate": 5.923771185054224e-06, |
| "loss": 0.0726, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.652853907853802, |
| "grad_norm": 0.5078125, |
| "learning_rate": 5.913550126593186e-06, |
| "loss": 0.0721, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.6531925250259254, |
| "grad_norm": 0.37890625, |
| "learning_rate": 5.903334190497396e-06, |
| "loss": 0.0483, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.6535311421980488, |
| "grad_norm": 0.462890625, |
| "learning_rate": 5.8931233895725345e-06, |
| "loss": 0.0528, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6538697593701721, |
| "grad_norm": 0.482421875, |
| "learning_rate": 5.882917736617839e-06, |
| "loss": 0.0751, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.6542083765422954, |
| "grad_norm": 0.4296875, |
| "learning_rate": 5.872717244426099e-06, |
| "loss": 0.0562, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.6545469937144187, |
| "grad_norm": 0.498046875, |
| "learning_rate": 5.862521925783631e-06, |
| "loss": 0.0628, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.6548856108865421, |
| "grad_norm": 0.44140625, |
| "learning_rate": 5.852331793470267e-06, |
| "loss": 0.0523, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.6552242280586654, |
| "grad_norm": 0.5390625, |
| "learning_rate": 5.842146860259337e-06, |
| "loss": 0.0563, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.6555628452307888, |
| "grad_norm": 0.41796875, |
| "learning_rate": 5.8319671389176605e-06, |
| "loss": 0.0523, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.6559014624029121, |
| "grad_norm": 0.439453125, |
| "learning_rate": 5.821792642205512e-06, |
| "loss": 0.0534, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.6562400795750355, |
| "grad_norm": 0.8984375, |
| "learning_rate": 5.811623382876636e-06, |
| "loss": 0.0865, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.6565786967471587, |
| "grad_norm": 0.7734375, |
| "learning_rate": 5.8014593736781864e-06, |
| "loss": 0.0701, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.6569173139192821, |
| "grad_norm": 0.458984375, |
| "learning_rate": 5.791300627350759e-06, |
| "loss": 0.052, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6572559310914055, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.781147156628336e-06, |
| "loss": 0.0633, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.6575945482635288, |
| "grad_norm": 0.54296875, |
| "learning_rate": 5.770998974238298e-06, |
| "loss": 0.0629, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.6579331654356522, |
| "grad_norm": 0.48828125, |
| "learning_rate": 5.760856092901394e-06, |
| "loss": 0.0605, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.6582717826077755, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.750718525331722e-06, |
| "loss": 0.0576, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.6586103997798989, |
| "grad_norm": 0.373046875, |
| "learning_rate": 5.740586284236724e-06, |
| "loss": 0.0499, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.6589490169520221, |
| "grad_norm": 0.5078125, |
| "learning_rate": 5.730459382317177e-06, |
| "loss": 0.0711, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.6592876341241455, |
| "grad_norm": 0.412109375, |
| "learning_rate": 5.720337832267136e-06, |
| "loss": 0.06, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.6596262512962688, |
| "grad_norm": 0.52734375, |
| "learning_rate": 5.710221646773971e-06, |
| "loss": 0.0605, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.6599648684683922, |
| "grad_norm": 0.48046875, |
| "learning_rate": 5.700110838518327e-06, |
| "loss": 0.0567, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.6603034856405156, |
| "grad_norm": 0.376953125, |
| "learning_rate": 5.690005420174095e-06, |
| "loss": 0.0477, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6606421028126389, |
| "grad_norm": 0.486328125, |
| "learning_rate": 5.679905404408426e-06, |
| "loss": 0.0622, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.6609807199847623, |
| "grad_norm": 0.6015625, |
| "learning_rate": 5.6698108038816815e-06, |
| "loss": 0.0638, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.6613193371568856, |
| "grad_norm": 0.400390625, |
| "learning_rate": 5.6597216312474476e-06, |
| "loss": 0.054, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.6616579543290089, |
| "grad_norm": 0.453125, |
| "learning_rate": 5.649637899152509e-06, |
| "loss": 0.0533, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.6619965715011322, |
| "grad_norm": 0.455078125, |
| "learning_rate": 5.639559620236815e-06, |
| "loss": 0.0573, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.6623351886732556, |
| "grad_norm": 0.5, |
| "learning_rate": 5.629486807133495e-06, |
| "loss": 0.0699, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.6626738058453789, |
| "grad_norm": 0.44921875, |
| "learning_rate": 5.619419472468824e-06, |
| "loss": 0.0509, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.6630124230175023, |
| "grad_norm": 0.625, |
| "learning_rate": 5.609357628862197e-06, |
| "loss": 0.0755, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.6633510401896257, |
| "grad_norm": 0.451171875, |
| "learning_rate": 5.599301288926145e-06, |
| "loss": 0.0509, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.663689657361749, |
| "grad_norm": 0.478515625, |
| "learning_rate": 5.5892504652662845e-06, |
| "loss": 0.0623, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6640282745338723, |
| "grad_norm": 0.478515625, |
| "learning_rate": 5.579205170481328e-06, |
| "loss": 0.0578, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.6643668917059956, |
| "grad_norm": 0.498046875, |
| "learning_rate": 5.569165417163054e-06, |
| "loss": 0.0685, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.664705508878119, |
| "grad_norm": 0.515625, |
| "learning_rate": 5.559131217896288e-06, |
| "loss": 0.0699, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.6650441260502423, |
| "grad_norm": 0.470703125, |
| "learning_rate": 5.549102585258904e-06, |
| "loss": 0.0572, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.6653827432223657, |
| "grad_norm": 0.48046875, |
| "learning_rate": 5.539079531821799e-06, |
| "loss": 0.0532, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.665721360394489, |
| "grad_norm": 0.47265625, |
| "learning_rate": 5.529062070148859e-06, |
| "loss": 0.0597, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.6660599775666124, |
| "grad_norm": 0.470703125, |
| "learning_rate": 5.519050212796986e-06, |
| "loss": 0.0668, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.6663985947387356, |
| "grad_norm": 0.498046875, |
| "learning_rate": 5.509043972316037e-06, |
| "loss": 0.0614, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.666737211910859, |
| "grad_norm": 0.56640625, |
| "learning_rate": 5.499043361248832e-06, |
| "loss": 0.0561, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.6670758290829824, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.489048392131147e-06, |
| "loss": 0.0859, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.6674144462551057, |
| "grad_norm": 0.46875, |
| "learning_rate": 5.4790590774916665e-06, |
| "loss": 0.0537, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.6677530634272291, |
| "grad_norm": 0.435546875, |
| "learning_rate": 5.469075429852002e-06, |
| "loss": 0.0555, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.6680916805993524, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.459097461726661e-06, |
| "loss": 0.0719, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.6684302977714758, |
| "grad_norm": 0.55078125, |
| "learning_rate": 5.44912518562302e-06, |
| "loss": 0.0772, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.668768914943599, |
| "grad_norm": 0.60546875, |
| "learning_rate": 5.439158614041331e-06, |
| "loss": 0.06, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.6691075321157224, |
| "grad_norm": 0.3984375, |
| "learning_rate": 5.4291977594746955e-06, |
| "loss": 0.0536, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.6694461492878457, |
| "grad_norm": 0.5859375, |
| "learning_rate": 5.419242634409039e-06, |
| "loss": 0.113, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.6697847664599691, |
| "grad_norm": 0.6484375, |
| "learning_rate": 5.409293251323119e-06, |
| "loss": 0.0825, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.6701233836320925, |
| "grad_norm": 0.5390625, |
| "learning_rate": 5.399349622688479e-06, |
| "loss": 0.0676, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.6704620008042158, |
| "grad_norm": 0.609375, |
| "learning_rate": 5.3894117609694655e-06, |
| "loss": 0.0731, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.6708006179763392, |
| "grad_norm": 0.4765625, |
| "learning_rate": 5.379479678623189e-06, |
| "loss": 0.0647, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.6711392351484625, |
| "grad_norm": 0.5078125, |
| "learning_rate": 5.3695533880995096e-06, |
| "loss": 0.0873, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.6714778523205858, |
| "grad_norm": 0.478515625, |
| "learning_rate": 5.359632901841038e-06, |
| "loss": 0.0594, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.6718164694927091, |
| "grad_norm": 0.404296875, |
| "learning_rate": 5.349718232283106e-06, |
| "loss": 0.0601, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.6721550866648325, |
| "grad_norm": 0.63671875, |
| "learning_rate": 5.339809391853747e-06, |
| "loss": 0.0798, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.6724937038369558, |
| "grad_norm": 0.494140625, |
| "learning_rate": 5.3299063929737015e-06, |
| "loss": 0.0687, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.6728323210090792, |
| "grad_norm": 0.4140625, |
| "learning_rate": 5.3200092480563704e-06, |
| "loss": 0.0536, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.6731709381812025, |
| "grad_norm": 0.609375, |
| "learning_rate": 5.310117969507833e-06, |
| "loss": 0.0457, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.6735095553533259, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.300232569726805e-06, |
| "loss": 0.0617, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.6738481725254492, |
| "grad_norm": 0.490234375, |
| "learning_rate": 5.29035306110463e-06, |
| "loss": 0.0634, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.6741867896975725, |
| "grad_norm": 0.53515625, |
| "learning_rate": 5.2804794560252785e-06, |
| "loss": 0.0601, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.6745254068696959, |
| "grad_norm": 0.61328125, |
| "learning_rate": 5.270611766865319e-06, |
| "loss": 0.0957, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.6748640240418192, |
| "grad_norm": 0.6328125, |
| "learning_rate": 5.2607500059938935e-06, |
| "loss": 0.1005, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.6752026412139426, |
| "grad_norm": 0.458984375, |
| "learning_rate": 5.250894185772724e-06, |
| "loss": 0.0555, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.6755412583860659, |
| "grad_norm": 0.44921875, |
| "learning_rate": 5.241044318556083e-06, |
| "loss": 0.0605, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.6758798755581893, |
| "grad_norm": 0.490234375, |
| "learning_rate": 5.231200416690775e-06, |
| "loss": 0.0753, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.6762184927303125, |
| "grad_norm": 0.515625, |
| "learning_rate": 5.221362492516139e-06, |
| "loss": 0.0718, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.6765571099024359, |
| "grad_norm": 0.50390625, |
| "learning_rate": 5.211530558364005e-06, |
| "loss": 0.0645, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.6768957270745593, |
| "grad_norm": 0.5, |
| "learning_rate": 5.201704626558708e-06, |
| "loss": 0.0597, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.6772343442466826, |
| "grad_norm": 1.109375, |
| "learning_rate": 5.191884709417058e-06, |
| "loss": 0.0725, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.677572961418806, |
| "grad_norm": 0.45703125, |
| "learning_rate": 5.1820708192483145e-06, |
| "loss": 0.0579, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.6779115785909293, |
| "grad_norm": 0.71484375, |
| "learning_rate": 5.172262968354198e-06, |
| "loss": 0.087, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.6782501957630527, |
| "grad_norm": 0.404296875, |
| "learning_rate": 5.162461169028841e-06, |
| "loss": 0.0513, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.6785888129351759, |
| "grad_norm": 0.58984375, |
| "learning_rate": 5.152665433558803e-06, |
| "loss": 0.0824, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.6789274301072993, |
| "grad_norm": 0.5, |
| "learning_rate": 5.1428757742230466e-06, |
| "loss": 0.0706, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.6792660472794226, |
| "grad_norm": 0.5078125, |
| "learning_rate": 5.1330922032928996e-06, |
| "loss": 0.0718, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.679604664451546, |
| "grad_norm": 0.59765625, |
| "learning_rate": 5.123314733032074e-06, |
| "loss": 0.0998, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.6799432816236693, |
| "grad_norm": 0.37890625, |
| "learning_rate": 5.113543375696633e-06, |
| "loss": 0.052, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.6802818987957927, |
| "grad_norm": 0.46484375, |
| "learning_rate": 5.1037781435349676e-06, |
| "loss": 0.065, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.6806205159679161, |
| "grad_norm": 0.3984375, |
| "learning_rate": 5.094019048787802e-06, |
| "loss": 0.051, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6809591331400394, |
| "grad_norm": 0.60546875, |
| "learning_rate": 5.084266103688161e-06, |
| "loss": 0.0822, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.6812977503121627, |
| "grad_norm": 0.482421875, |
| "learning_rate": 5.074519320461358e-06, |
| "loss": 0.0605, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.681636367484286, |
| "grad_norm": 0.484375, |
| "learning_rate": 5.064778711324989e-06, |
| "loss": 0.0494, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.6819749846564094, |
| "grad_norm": 0.88671875, |
| "learning_rate": 5.055044288488913e-06, |
| "loss": 0.0791, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.6823136018285327, |
| "grad_norm": 0.41015625, |
| "learning_rate": 5.045316064155221e-06, |
| "loss": 0.054, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.6826522190006561, |
| "grad_norm": 0.4453125, |
| "learning_rate": 5.035594050518254e-06, |
| "loss": 0.0535, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.6829908361727794, |
| "grad_norm": 0.5078125, |
| "learning_rate": 5.025878259764545e-06, |
| "loss": 0.0676, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.6833294533449028, |
| "grad_norm": 0.427734375, |
| "learning_rate": 5.016168704072846e-06, |
| "loss": 0.0536, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.683668070517026, |
| "grad_norm": 0.4453125, |
| "learning_rate": 5.006465395614086e-06, |
| "loss": 0.0603, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.6840066876891494, |
| "grad_norm": 0.498046875, |
| "learning_rate": 4.9967683465513595e-06, |
| "loss": 0.0726, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.6843453048612728, |
| "grad_norm": 0.453125, |
| "learning_rate": 4.987077569039922e-06, |
| "loss": 0.0645, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.6846839220333961, |
| "grad_norm": 0.453125, |
| "learning_rate": 4.977393075227159e-06, |
| "loss": 0.0542, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.6850225392055195, |
| "grad_norm": 0.4453125, |
| "learning_rate": 4.967714877252587e-06, |
| "loss": 0.0515, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.6853611563776428, |
| "grad_norm": 0.458984375, |
| "learning_rate": 4.958042987247832e-06, |
| "loss": 0.0684, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.6856997735497662, |
| "grad_norm": 0.490234375, |
| "learning_rate": 4.9483774173366e-06, |
| "loss": 0.0718, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.6860383907218894, |
| "grad_norm": 0.41796875, |
| "learning_rate": 4.938718179634689e-06, |
| "loss": 0.0558, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.6863770078940128, |
| "grad_norm": 0.52734375, |
| "learning_rate": 4.929065286249959e-06, |
| "loss": 0.0724, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.6867156250661361, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.919418749282302e-06, |
| "loss": 0.1876, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.6870542422382595, |
| "grad_norm": 0.38671875, |
| "learning_rate": 4.909778580823663e-06, |
| "loss": 0.0502, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.6873928594103829, |
| "grad_norm": 0.65234375, |
| "learning_rate": 4.9001447929579855e-06, |
| "loss": 0.0814, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.6877314765825062, |
| "grad_norm": 0.5703125, |
| "learning_rate": 4.890517397761232e-06, |
| "loss": 0.0727, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.6880700937546296, |
| "grad_norm": 0.482421875, |
| "learning_rate": 4.880896407301333e-06, |
| "loss": 0.064, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.6884087109267528, |
| "grad_norm": 0.75, |
| "learning_rate": 4.8712818336382104e-06, |
| "loss": 0.0589, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.6887473280988762, |
| "grad_norm": 0.5703125, |
| "learning_rate": 4.861673688823726e-06, |
| "loss": 0.0676, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.6890859452709995, |
| "grad_norm": 0.46484375, |
| "learning_rate": 4.852071984901696e-06, |
| "loss": 0.0677, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.6894245624431229, |
| "grad_norm": 0.67578125, |
| "learning_rate": 4.842476733907851e-06, |
| "loss": 0.0656, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.6897631796152462, |
| "grad_norm": 0.3984375, |
| "learning_rate": 4.832887947869841e-06, |
| "loss": 0.0561, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.6901017967873696, |
| "grad_norm": 0.46875, |
| "learning_rate": 4.823305638807215e-06, |
| "loss": 0.0559, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.690440413959493, |
| "grad_norm": 0.70703125, |
| "learning_rate": 4.813729818731391e-06, |
| "loss": 0.0806, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.6907790311316163, |
| "grad_norm": 0.55859375, |
| "learning_rate": 4.804160499645667e-06, |
| "loss": 0.0692, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.6911176483037396, |
| "grad_norm": 0.546875, |
| "learning_rate": 4.794597693545179e-06, |
| "loss": 0.0532, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.6914562654758629, |
| "grad_norm": 0.435546875, |
| "learning_rate": 4.785041412416906e-06, |
| "loss": 0.0625, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.6917948826479863, |
| "grad_norm": 0.49609375, |
| "learning_rate": 4.7754916682396545e-06, |
| "loss": 0.0646, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.6921334998201096, |
| "grad_norm": 0.38671875, |
| "learning_rate": 4.76594847298402e-06, |
| "loss": 0.0437, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.692472116992233, |
| "grad_norm": 0.494140625, |
| "learning_rate": 4.756411838612402e-06, |
| "loss": 0.0618, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.6928107341643563, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.746881777078979e-06, |
| "loss": 0.0675, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.6931493513364797, |
| "grad_norm": 0.63671875, |
| "learning_rate": 4.737358300329673e-06, |
| "loss": 0.0711, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.693487968508603, |
| "grad_norm": 0.443359375, |
| "learning_rate": 4.727841420302172e-06, |
| "loss": 0.0549, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.6938265856807263, |
| "grad_norm": 0.412109375, |
| "learning_rate": 4.7183311489258774e-06, |
| "loss": 0.0567, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.6941652028528497, |
| "grad_norm": 0.482421875, |
| "learning_rate": 4.70882749812192e-06, |
| "loss": 0.058, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.694503820024973, |
| "grad_norm": 0.48046875, |
| "learning_rate": 4.699330479803131e-06, |
| "loss": 0.0677, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.6948424371970964, |
| "grad_norm": 0.388671875, |
| "learning_rate": 4.68984010587402e-06, |
| "loss": 0.0471, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.6951810543692197, |
| "grad_norm": 0.47265625, |
| "learning_rate": 4.6803563882307655e-06, |
| "loss": 0.06, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.6955196715413431, |
| "grad_norm": 0.482421875, |
| "learning_rate": 4.670879338761218e-06, |
| "loss": 0.0604, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.6958582887134663, |
| "grad_norm": 0.51171875, |
| "learning_rate": 4.6614089693448515e-06, |
| "loss": 0.0571, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.6961969058855897, |
| "grad_norm": 0.50390625, |
| "learning_rate": 4.651945291852779e-06, |
| "loss": 0.0746, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.696535523057713, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.642488318147723e-06, |
| "loss": 0.0734, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.6968741402298364, |
| "grad_norm": 0.384765625, |
| "learning_rate": 4.633038060083996e-06, |
| "loss": 0.0513, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.6972127574019598, |
| "grad_norm": 0.5859375, |
| "learning_rate": 4.623594529507503e-06, |
| "loss": 0.0631, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.6975513745740831, |
| "grad_norm": 0.5859375, |
| "learning_rate": 4.6141577382557044e-06, |
| "loss": 0.0805, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.6978899917462065, |
| "grad_norm": 0.55859375, |
| "learning_rate": 4.604727698157621e-06, |
| "loss": 0.0771, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.6982286089183297, |
| "grad_norm": 0.46484375, |
| "learning_rate": 4.5953044210338116e-06, |
| "loss": 0.0578, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.6985672260904531, |
| "grad_norm": 0.474609375, |
| "learning_rate": 4.58588791869635e-06, |
| "loss": 0.0691, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.6989058432625764, |
| "grad_norm": 0.439453125, |
| "learning_rate": 4.576478202948826e-06, |
| "loss": 0.0568, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.6992444604346998, |
| "grad_norm": 0.427734375, |
| "learning_rate": 4.567075285586321e-06, |
| "loss": 0.0491, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.6995830776068231, |
| "grad_norm": 0.88671875, |
| "learning_rate": 4.557679178395387e-06, |
| "loss": 0.0596, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.6999216947789465, |
| "grad_norm": 0.458984375, |
| "learning_rate": 4.5482898931540505e-06, |
| "loss": 0.0626, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.7002603119510699, |
| "grad_norm": 0.52734375, |
| "learning_rate": 4.538907441631776e-06, |
| "loss": 0.0592, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.7005989291231932, |
| "grad_norm": 0.43359375, |
| "learning_rate": 4.5295318355894705e-06, |
| "loss": 0.0555, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.7009375462953165, |
| "grad_norm": 0.4296875, |
| "learning_rate": 4.52016308677946e-06, |
| "loss": 0.0569, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7012761634674398, |
| "grad_norm": 0.55859375, |
| "learning_rate": 4.5108012069454645e-06, |
| "loss": 0.066, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.7016147806395632, |
| "grad_norm": 0.46484375, |
| "learning_rate": 4.5014462078226064e-06, |
| "loss": 0.0519, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.7016147806395632, |
| "eval_loss": 0.06592338532209396, |
| "eval_runtime": 815.5749, |
| "eval_samples_per_second": 12.198, |
| "eval_steps_per_second": 3.049, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.7019533978116865, |
| "grad_norm": 0.41796875, |
| "learning_rate": 4.492098101137382e-06, |
| "loss": 0.0626, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.7022920149838099, |
| "grad_norm": 0.52734375, |
| "learning_rate": 4.482756898607633e-06, |
| "loss": 0.072, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.7026306321559332, |
| "grad_norm": 0.44140625, |
| "learning_rate": 4.4734226119425615e-06, |
| "loss": 0.0637, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.7029692493280566, |
| "grad_norm": 0.42578125, |
| "learning_rate": 4.464095252842703e-06, |
| "loss": 0.0534, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.7033078665001798, |
| "grad_norm": 0.37890625, |
| "learning_rate": 4.454774832999893e-06, |
| "loss": 0.0472, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.7036464836723032, |
| "grad_norm": 0.640625, |
| "learning_rate": 4.445461364097288e-06, |
| "loss": 0.0701, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.7039851008444266, |
| "grad_norm": 0.78125, |
| "learning_rate": 4.436154857809314e-06, |
| "loss": 0.0544, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.7043237180165499, |
| "grad_norm": 0.6171875, |
| "learning_rate": 4.42685532580168e-06, |
| "loss": 0.0479, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7046623351886733, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.417562779731355e-06, |
| "loss": 0.0743, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.7050009523607966, |
| "grad_norm": 0.41796875, |
| "learning_rate": 4.408277231246539e-06, |
| "loss": 0.0463, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.70533956953292, |
| "grad_norm": 0.439453125, |
| "learning_rate": 4.3989986919866716e-06, |
| "loss": 0.0552, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.7056781867050432, |
| "grad_norm": 0.478515625, |
| "learning_rate": 4.3897271735824045e-06, |
| "loss": 0.0654, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.7060168038771666, |
| "grad_norm": 0.404296875, |
| "learning_rate": 4.380462687655581e-06, |
| "loss": 0.053, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.7063554210492899, |
| "grad_norm": 0.474609375, |
| "learning_rate": 4.371205245819241e-06, |
| "loss": 0.0636, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.7066940382214133, |
| "grad_norm": 0.46875, |
| "learning_rate": 4.361954859677584e-06, |
| "loss": 0.0645, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.7070326553935367, |
| "grad_norm": 0.609375, |
| "learning_rate": 4.35271154082597e-06, |
| "loss": 0.0726, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.70737127256566, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.343475300850907e-06, |
| "loss": 0.0656, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.7077098897377834, |
| "grad_norm": 0.55859375, |
| "learning_rate": 4.334246151330012e-06, |
| "loss": 0.0644, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7080485069099066, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.32502410383203e-06, |
| "loss": 0.0722, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.70838712408203, |
| "grad_norm": 0.4375, |
| "learning_rate": 4.315809169916802e-06, |
| "loss": 0.0505, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.7087257412541533, |
| "grad_norm": 0.4140625, |
| "learning_rate": 4.306601361135241e-06, |
| "loss": 0.0484, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.7090643584262767, |
| "grad_norm": 0.42578125, |
| "learning_rate": 4.297400689029344e-06, |
| "loss": 0.0606, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.7094029755984, |
| "grad_norm": 0.6484375, |
| "learning_rate": 4.2882071651321485e-06, |
| "loss": 0.0702, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.7097415927705234, |
| "grad_norm": 0.43359375, |
| "learning_rate": 4.279020800967736e-06, |
| "loss": 0.0488, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.7100802099426468, |
| "grad_norm": 0.4140625, |
| "learning_rate": 4.2698416080512204e-06, |
| "loss": 0.0486, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.7104188271147701, |
| "grad_norm": 0.462890625, |
| "learning_rate": 4.260669597888715e-06, |
| "loss": 0.0501, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.7107574442868934, |
| "grad_norm": 0.62890625, |
| "learning_rate": 4.251504781977337e-06, |
| "loss": 0.0779, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.7110960614590167, |
| "grad_norm": 0.4453125, |
| "learning_rate": 4.24234717180519e-06, |
| "loss": 0.0576, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7114346786311401, |
| "grad_norm": 0.49609375, |
| "learning_rate": 4.2331967788513295e-06, |
| "loss": 0.0669, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.7117732958032634, |
| "grad_norm": 0.451171875, |
| "learning_rate": 4.224053614585779e-06, |
| "loss": 0.0635, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.7121119129753868, |
| "grad_norm": 0.482421875, |
| "learning_rate": 4.214917690469499e-06, |
| "loss": 0.0612, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.7124505301475101, |
| "grad_norm": 0.55078125, |
| "learning_rate": 4.205789017954364e-06, |
| "loss": 0.0592, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.7127891473196335, |
| "grad_norm": 0.52734375, |
| "learning_rate": 4.1966676084831715e-06, |
| "loss": 0.065, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.7131277644917567, |
| "grad_norm": 0.380859375, |
| "learning_rate": 4.187553473489604e-06, |
| "loss": 0.0527, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.7134663816638801, |
| "grad_norm": 0.4296875, |
| "learning_rate": 4.178446624398233e-06, |
| "loss": 0.0521, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.7138049988360035, |
| "grad_norm": 0.486328125, |
| "learning_rate": 4.169347072624497e-06, |
| "loss": 0.071, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.7141436160081268, |
| "grad_norm": 0.392578125, |
| "learning_rate": 4.160254829574679e-06, |
| "loss": 0.0548, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.7144822331802502, |
| "grad_norm": 0.423828125, |
| "learning_rate": 4.15116990664591e-06, |
| "loss": 0.0579, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.7148208503523735, |
| "grad_norm": 0.400390625, |
| "learning_rate": 4.142092315226146e-06, |
| "loss": 0.0456, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.7151594675244969, |
| "grad_norm": 0.62890625, |
| "learning_rate": 4.13302206669414e-06, |
| "loss": 0.0613, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.7154980846966201, |
| "grad_norm": 0.46875, |
| "learning_rate": 4.123959172419456e-06, |
| "loss": 0.0577, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.7158367018687435, |
| "grad_norm": 0.421875, |
| "learning_rate": 4.114903643762428e-06, |
| "loss": 0.0641, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.7161753190408668, |
| "grad_norm": 0.64453125, |
| "learning_rate": 4.1058554920741635e-06, |
| "loss": 0.079, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.7165139362129902, |
| "grad_norm": 0.42578125, |
| "learning_rate": 4.096814728696529e-06, |
| "loss": 0.0563, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.7168525533851136, |
| "grad_norm": 0.4921875, |
| "learning_rate": 4.087781364962108e-06, |
| "loss": 0.0606, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.7171911705572369, |
| "grad_norm": 0.5078125, |
| "learning_rate": 4.078755412194228e-06, |
| "loss": 0.0593, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.7175297877293603, |
| "grad_norm": 0.51171875, |
| "learning_rate": 4.069736881706929e-06, |
| "loss": 0.0645, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.7178684049014835, |
| "grad_norm": 0.455078125, |
| "learning_rate": 4.06072578480493e-06, |
| "loss": 0.048, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7182070220736069, |
| "grad_norm": 0.46875, |
| "learning_rate": 4.051722132783644e-06, |
| "loss": 0.0683, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.7185456392457302, |
| "grad_norm": 0.39453125, |
| "learning_rate": 4.042725936929157e-06, |
| "loss": 0.0465, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.7188842564178536, |
| "grad_norm": 0.50390625, |
| "learning_rate": 4.0337372085181905e-06, |
| "loss": 0.0717, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.7192228735899769, |
| "grad_norm": 0.55859375, |
| "learning_rate": 4.024755958818125e-06, |
| "loss": 0.0725, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.7195614907621003, |
| "grad_norm": 0.478515625, |
| "learning_rate": 4.0157821990869505e-06, |
| "loss": 0.0528, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.7199001079342237, |
| "grad_norm": 0.498046875, |
| "learning_rate": 4.006815940573279e-06, |
| "loss": 0.0793, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.720238725106347, |
| "grad_norm": 0.625, |
| "learning_rate": 3.997857194516319e-06, |
| "loss": 0.0728, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.7205773422784703, |
| "grad_norm": 0.5546875, |
| "learning_rate": 3.988905972145854e-06, |
| "loss": 0.0728, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.7209159594505936, |
| "grad_norm": 0.5234375, |
| "learning_rate": 3.979962284682245e-06, |
| "loss": 0.0724, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.721254576622717, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.971026143336409e-06, |
| "loss": 0.0748, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.7215931937948403, |
| "grad_norm": 0.3984375, |
| "learning_rate": 3.96209755930979e-06, |
| "loss": 0.0569, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.7219318109669637, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.953176543794378e-06, |
| "loss": 0.0673, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.722270428139087, |
| "grad_norm": 0.458984375, |
| "learning_rate": 3.94426310797266e-06, |
| "loss": 0.0556, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.7226090453112104, |
| "grad_norm": 0.447265625, |
| "learning_rate": 3.935357263017633e-06, |
| "loss": 0.0616, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.7229476624833336, |
| "grad_norm": 0.51171875, |
| "learning_rate": 3.926459020092774e-06, |
| "loss": 0.066, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.723286279655457, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.917568390352029e-06, |
| "loss": 0.0712, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.7236248968275804, |
| "grad_norm": 0.55859375, |
| "learning_rate": 3.908685384939807e-06, |
| "loss": 0.0741, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.7239635139997037, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.899810014990953e-06, |
| "loss": 0.0728, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.7243021311718271, |
| "grad_norm": 0.52734375, |
| "learning_rate": 3.890942291630739e-06, |
| "loss": 0.0746, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.7246407483439504, |
| "grad_norm": 0.484375, |
| "learning_rate": 3.8820822259748645e-06, |
| "loss": 0.0595, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.7249793655160738, |
| "grad_norm": 0.455078125, |
| "learning_rate": 3.873229829129423e-06, |
| "loss": 0.053, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.725317982688197, |
| "grad_norm": 0.41015625, |
| "learning_rate": 3.864385112190889e-06, |
| "loss": 0.0526, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.7256565998603204, |
| "grad_norm": 0.6171875, |
| "learning_rate": 3.8555480862461214e-06, |
| "loss": 0.0773, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.7259952170324437, |
| "grad_norm": 0.5078125, |
| "learning_rate": 3.846718762372328e-06, |
| "loss": 0.0595, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.7263338342045671, |
| "grad_norm": 0.58203125, |
| "learning_rate": 3.837897151637069e-06, |
| "loss": 0.073, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.7266724513766905, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.829083265098236e-06, |
| "loss": 0.0546, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.7270110685488138, |
| "grad_norm": 0.4921875, |
| "learning_rate": 3.820277113804034e-06, |
| "loss": 0.0585, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.7273496857209372, |
| "grad_norm": 0.67578125, |
| "learning_rate": 3.811478708792975e-06, |
| "loss": 0.0918, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.7276883028930604, |
| "grad_norm": 0.43359375, |
| "learning_rate": 3.802688061093864e-06, |
| "loss": 0.0533, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.7280269200651838, |
| "grad_norm": 0.466796875, |
| "learning_rate": 3.793905181725772e-06, |
| "loss": 0.0574, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7283655372373071, |
| "grad_norm": 0.640625, |
| "learning_rate": 3.785130081698045e-06, |
| "loss": 0.0713, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.7287041544094305, |
| "grad_norm": 0.47265625, |
| "learning_rate": 3.776362772010267e-06, |
| "loss": 0.0664, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.7290427715815538, |
| "grad_norm": 0.515625, |
| "learning_rate": 3.767603263652263e-06, |
| "loss": 0.0622, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.7293813887536772, |
| "grad_norm": 0.50390625, |
| "learning_rate": 3.7588515676040805e-06, |
| "loss": 0.0673, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.7297200059258006, |
| "grad_norm": 0.4765625, |
| "learning_rate": 3.750107694835966e-06, |
| "loss": 0.0663, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.7300586230979239, |
| "grad_norm": 0.54296875, |
| "learning_rate": 3.7413716563083704e-06, |
| "loss": 0.0625, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.7303972402700472, |
| "grad_norm": 0.42578125, |
| "learning_rate": 3.7326434629719122e-06, |
| "loss": 0.0558, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.7307358574421705, |
| "grad_norm": 0.546875, |
| "learning_rate": 3.723923125767389e-06, |
| "loss": 0.0678, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.7310744746142939, |
| "grad_norm": 0.447265625, |
| "learning_rate": 3.715210655625738e-06, |
| "loss": 0.0477, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.7314130917864172, |
| "grad_norm": 0.474609375, |
| "learning_rate": 3.7065060634680485e-06, |
| "loss": 0.0604, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7317517089585406, |
| "grad_norm": 0.67578125, |
| "learning_rate": 3.6978093602055186e-06, |
| "loss": 0.0876, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.7320903261306639, |
| "grad_norm": 0.54296875, |
| "learning_rate": 3.689120556739475e-06, |
| "loss": 0.073, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.7324289433027873, |
| "grad_norm": 0.455078125, |
| "learning_rate": 3.6804396639613273e-06, |
| "loss": 0.0456, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.7327675604749105, |
| "grad_norm": 0.734375, |
| "learning_rate": 3.6717666927525765e-06, |
| "loss": 0.1512, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.7331061776470339, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.6631016539847987e-06, |
| "loss": 0.0597, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.7334447948191573, |
| "grad_norm": 0.5, |
| "learning_rate": 3.654444558519612e-06, |
| "loss": 0.059, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.7337834119912806, |
| "grad_norm": 0.546875, |
| "learning_rate": 3.6457954172086895e-06, |
| "loss": 0.0734, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.734122029163404, |
| "grad_norm": 0.53125, |
| "learning_rate": 3.6371542408937355e-06, |
| "loss": 0.0575, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.7344606463355273, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.6285210404064587e-06, |
| "loss": 0.0573, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.7347992635076507, |
| "grad_norm": 0.431640625, |
| "learning_rate": 3.619895826568581e-06, |
| "loss": 0.0489, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7351378806797739, |
| "grad_norm": 0.48828125, |
| "learning_rate": 3.611278610191804e-06, |
| "loss": 0.0538, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.7354764978518973, |
| "grad_norm": 0.55859375, |
| "learning_rate": 3.602669402077811e-06, |
| "loss": 0.0678, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.7358151150240206, |
| "grad_norm": 0.54296875, |
| "learning_rate": 3.594068213018249e-06, |
| "loss": 0.052, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.736153732196144, |
| "grad_norm": 0.43359375, |
| "learning_rate": 3.5854750537947035e-06, |
| "loss": 0.0622, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.7364923493682674, |
| "grad_norm": 0.6953125, |
| "learning_rate": 3.5768899351787066e-06, |
| "loss": 0.0634, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.7368309665403907, |
| "grad_norm": 0.59765625, |
| "learning_rate": 3.568312867931697e-06, |
| "loss": 0.1019, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.7371695837125141, |
| "grad_norm": 0.5078125, |
| "learning_rate": 3.559743862805034e-06, |
| "loss": 0.0662, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.7375082008846373, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.551182930539969e-06, |
| "loss": 0.0743, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.7378468180567607, |
| "grad_norm": 0.5703125, |
| "learning_rate": 3.5426300818676264e-06, |
| "loss": 0.072, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.738185435228884, |
| "grad_norm": 0.466796875, |
| "learning_rate": 3.534085327509006e-06, |
| "loss": 0.0677, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7385240524010074, |
| "grad_norm": 0.4609375, |
| "learning_rate": 3.525548678174957e-06, |
| "loss": 0.0604, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.7388626695731307, |
| "grad_norm": 0.5078125, |
| "learning_rate": 3.5170201445661655e-06, |
| "loss": 0.0628, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.7392012867452541, |
| "grad_norm": 0.38671875, |
| "learning_rate": 3.5084997373731546e-06, |
| "loss": 0.0482, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.7395399039173775, |
| "grad_norm": 0.498046875, |
| "learning_rate": 3.4999874672762567e-06, |
| "loss": 0.0587, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.7398785210895007, |
| "grad_norm": 2.1875, |
| "learning_rate": 3.4914833449455963e-06, |
| "loss": 0.0638, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.740217138261624, |
| "grad_norm": 0.5546875, |
| "learning_rate": 3.482987381041096e-06, |
| "loss": 0.0692, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.7405557554337474, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.4744995862124498e-06, |
| "loss": 0.0501, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.7408943726058708, |
| "grad_norm": 0.48046875, |
| "learning_rate": 3.4660199710991038e-06, |
| "loss": 0.0731, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.7412329897779941, |
| "grad_norm": 0.62890625, |
| "learning_rate": 3.4575485463302603e-06, |
| "loss": 0.1051, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.7415716069501175, |
| "grad_norm": 0.44140625, |
| "learning_rate": 3.449085322524848e-06, |
| "loss": 0.0553, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7419102241222408, |
| "grad_norm": 0.55078125, |
| "learning_rate": 3.440630310291517e-06, |
| "loss": 0.0543, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.7422488412943642, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.432183520228635e-06, |
| "loss": 0.0701, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.7425874584664874, |
| "grad_norm": 0.53125, |
| "learning_rate": 3.4237449629242427e-06, |
| "loss": 0.0757, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.7429260756386108, |
| "grad_norm": 0.42578125, |
| "learning_rate": 3.4153146489560807e-06, |
| "loss": 0.0497, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.7432646928107342, |
| "grad_norm": 0.5625, |
| "learning_rate": 3.4068925888915417e-06, |
| "loss": 0.0708, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.7436033099828575, |
| "grad_norm": 0.4765625, |
| "learning_rate": 3.398478793287682e-06, |
| "loss": 0.0616, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.7439419271549809, |
| "grad_norm": 0.44140625, |
| "learning_rate": 3.390073272691198e-06, |
| "loss": 0.0545, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.7442805443271042, |
| "grad_norm": 0.396484375, |
| "learning_rate": 3.381676037638404e-06, |
| "loss": 0.0548, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.7446191614992276, |
| "grad_norm": 0.50390625, |
| "learning_rate": 3.3732870986552392e-06, |
| "loss": 0.0593, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.7449577786713508, |
| "grad_norm": 0.6953125, |
| "learning_rate": 3.3649064662572406e-06, |
| "loss": 0.0843, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7452963958434742, |
| "grad_norm": 0.578125, |
| "learning_rate": 3.35653415094953e-06, |
| "loss": 0.0733, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.7456350130155975, |
| "grad_norm": 0.56640625, |
| "learning_rate": 3.3481701632268014e-06, |
| "loss": 0.0623, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.7459736301877209, |
| "grad_norm": 0.439453125, |
| "learning_rate": 3.339814513573321e-06, |
| "loss": 0.059, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.7463122473598442, |
| "grad_norm": 0.37890625, |
| "learning_rate": 3.3314672124628877e-06, |
| "loss": 0.0464, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.7466508645319676, |
| "grad_norm": 0.55078125, |
| "learning_rate": 3.323128270358851e-06, |
| "loss": 0.0573, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.746989481704091, |
| "grad_norm": 0.51171875, |
| "learning_rate": 3.3147976977140763e-06, |
| "loss": 0.0692, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.7473280988762142, |
| "grad_norm": 0.423828125, |
| "learning_rate": 3.3064755049709307e-06, |
| "loss": 0.0537, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.7476667160483376, |
| "grad_norm": 0.4375, |
| "learning_rate": 3.2981617025612913e-06, |
| "loss": 0.0586, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.7480053332204609, |
| "grad_norm": 0.58203125, |
| "learning_rate": 3.289856300906502e-06, |
| "loss": 0.0716, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.7483439503925843, |
| "grad_norm": 0.515625, |
| "learning_rate": 3.2815593104173882e-06, |
| "loss": 0.0656, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.7486825675647076, |
| "grad_norm": 0.41796875, |
| "learning_rate": 3.273270741494232e-06, |
| "loss": 0.055, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.749021184736831, |
| "grad_norm": 0.462890625, |
| "learning_rate": 3.264990604526749e-06, |
| "loss": 0.0665, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.7493598019089543, |
| "grad_norm": 0.48046875, |
| "learning_rate": 3.2567189098940966e-06, |
| "loss": 0.0582, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.7496984190810776, |
| "grad_norm": 0.5703125, |
| "learning_rate": 3.2484556679648393e-06, |
| "loss": 0.084, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.750037036253201, |
| "grad_norm": 0.51171875, |
| "learning_rate": 3.240200889096955e-06, |
| "loss": 0.0749, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.7503756534253243, |
| "grad_norm": 0.4921875, |
| "learning_rate": 3.231954583637812e-06, |
| "loss": 0.0605, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.7507142705974477, |
| "grad_norm": 0.4609375, |
| "learning_rate": 3.2237167619241492e-06, |
| "loss": 0.0609, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.751052887769571, |
| "grad_norm": 0.44921875, |
| "learning_rate": 3.2154874342820797e-06, |
| "loss": 0.0622, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.7513915049416944, |
| "grad_norm": 0.3828125, |
| "learning_rate": 3.207266611027069e-06, |
| "loss": 0.0463, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.7517301221138177, |
| "grad_norm": 0.5078125, |
| "learning_rate": 3.199054302463914e-06, |
| "loss": 0.0745, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7520687392859411, |
| "grad_norm": 0.453125, |
| "learning_rate": 3.1908505188867513e-06, |
| "loss": 0.0582, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.7524073564580643, |
| "grad_norm": 0.5, |
| "learning_rate": 3.1826552705790192e-06, |
| "loss": 0.0577, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.7527459736301877, |
| "grad_norm": 0.486328125, |
| "learning_rate": 3.174468567813461e-06, |
| "loss": 0.0602, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.753084590802311, |
| "grad_norm": 0.51171875, |
| "learning_rate": 3.166290420852114e-06, |
| "loss": 0.0631, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.7534232079744344, |
| "grad_norm": 1.5078125, |
| "learning_rate": 3.1581208399462804e-06, |
| "loss": 0.0609, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.7537618251465578, |
| "grad_norm": 0.369140625, |
| "learning_rate": 3.1499598353365334e-06, |
| "loss": 0.0447, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.7541004423186811, |
| "grad_norm": 0.47265625, |
| "learning_rate": 3.141807417252697e-06, |
| "loss": 0.0606, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.7544390594908045, |
| "grad_norm": 0.396484375, |
| "learning_rate": 3.1336635959138197e-06, |
| "loss": 0.054, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.7547776766629277, |
| "grad_norm": 0.453125, |
| "learning_rate": 3.1255283815281876e-06, |
| "loss": 0.0674, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.7551162938350511, |
| "grad_norm": 0.466796875, |
| "learning_rate": 3.1174017842932946e-06, |
| "loss": 0.0645, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.7554549110071744, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.109283814395825e-06, |
| "loss": 0.0789, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.7557935281792978, |
| "grad_norm": 0.71875, |
| "learning_rate": 3.1011744820116607e-06, |
| "loss": 0.1046, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.7561321453514211, |
| "grad_norm": 0.56640625, |
| "learning_rate": 3.0930737973058443e-06, |
| "loss": 0.076, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.7564707625235445, |
| "grad_norm": 0.48828125, |
| "learning_rate": 3.084981770432588e-06, |
| "loss": 0.0705, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.7568093796956679, |
| "grad_norm": 0.62109375, |
| "learning_rate": 3.076898411535252e-06, |
| "loss": 0.0654, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.7571479968677911, |
| "grad_norm": 0.4609375, |
| "learning_rate": 3.06882373074632e-06, |
| "loss": 0.0583, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.7574866140399145, |
| "grad_norm": 0.484375, |
| "learning_rate": 3.0607577381874088e-06, |
| "loss": 0.0562, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.7578252312120378, |
| "grad_norm": 0.53125, |
| "learning_rate": 3.0527004439692433e-06, |
| "loss": 0.0584, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.7581638483841612, |
| "grad_norm": 0.5, |
| "learning_rate": 3.044651858191636e-06, |
| "loss": 0.0621, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.7585024655562845, |
| "grad_norm": 0.80078125, |
| "learning_rate": 3.0366119909434977e-06, |
| "loss": 0.0545, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.7588410827284079, |
| "grad_norm": 0.470703125, |
| "learning_rate": 3.0285808523027936e-06, |
| "loss": 0.0627, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.7591796999005312, |
| "grad_norm": 0.482421875, |
| "learning_rate": 3.0205584523365626e-06, |
| "loss": 0.0729, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.7595183170726545, |
| "grad_norm": 0.453125, |
| "learning_rate": 3.0125448011008894e-06, |
| "loss": 0.0605, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.7598569342447778, |
| "grad_norm": 0.59765625, |
| "learning_rate": 3.004539908640872e-06, |
| "loss": 0.0855, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.7601955514169012, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.996543784990653e-06, |
| "loss": 0.0587, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.7605341685890246, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.9885564401733745e-06, |
| "loss": 0.0519, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.7608727857611479, |
| "grad_norm": 0.5, |
| "learning_rate": 2.980577884201169e-06, |
| "loss": 0.0668, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.7612114029332713, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.9726081270751594e-06, |
| "loss": 0.0552, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.7615500201053946, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.9646471787854416e-06, |
| "loss": 0.0611, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.761888637277518, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.956695049311057e-06, |
| "loss": 0.0522, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7622272544496412, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.948751748620007e-06, |
| "loss": 0.0615, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.7625658716217646, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.940817286669214e-06, |
| "loss": 0.0447, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.762904488793888, |
| "grad_norm": 0.5, |
| "learning_rate": 2.93289167340453e-06, |
| "loss": 0.0689, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.7632431059660113, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.9249749187607146e-06, |
| "loss": 0.0608, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.7635817231381347, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.917067032661415e-06, |
| "loss": 0.0734, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.763920340310258, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.909168025019168e-06, |
| "loss": 0.071, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.7642589574823814, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.901277905735386e-06, |
| "loss": 0.0604, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.7645975746545046, |
| "grad_norm": 0.6484375, |
| "learning_rate": 2.893396684700326e-06, |
| "loss": 0.0887, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.764936191826628, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.885524371793106e-06, |
| "loss": 0.0768, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.7652748089987513, |
| "grad_norm": 0.353515625, |
| "learning_rate": 2.8776609768816655e-06, |
| "loss": 0.0521, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.7656134261708747, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.8698065098227725e-06, |
| "loss": 0.0669, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.765952043342998, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.8619609804620063e-06, |
| "loss": 0.0602, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.7662906605151214, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.854124398633732e-06, |
| "loss": 0.0546, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.7666292776872448, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.846296774161108e-06, |
| "loss": 0.0598, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.766967894859368, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.8384781168560693e-06, |
| "loss": 0.06, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.7673065120314914, |
| "grad_norm": 0.4921875, |
| "learning_rate": 2.8306684365192915e-06, |
| "loss": 0.0638, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.7676451292036147, |
| "grad_norm": 0.5, |
| "learning_rate": 2.822867742940214e-06, |
| "loss": 0.0603, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.7679837463757381, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.8150760458970115e-06, |
| "loss": 0.0627, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.7683223635478614, |
| "grad_norm": 0.5546875, |
| "learning_rate": 2.8072933551565706e-06, |
| "loss": 0.0726, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.7686609807199848, |
| "grad_norm": 0.56640625, |
| "learning_rate": 2.7995196804745005e-06, |
| "loss": 0.0813, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.7689995978921081, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.791755031595096e-06, |
| "loss": 0.072, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.7693382150642314, |
| "grad_norm": 0.66796875, |
| "learning_rate": 2.7839994182513496e-06, |
| "loss": 0.0566, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.7696768322363547, |
| "grad_norm": 0.58203125, |
| "learning_rate": 2.7762528501649256e-06, |
| "loss": 0.0811, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.7700154494084781, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.7685153370461424e-06, |
| "loss": 0.0523, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.7703540665806015, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.760786888593975e-06, |
| "loss": 0.0529, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.7706926837527248, |
| "grad_norm": 0.359375, |
| "learning_rate": 2.7530675144960382e-06, |
| "loss": 0.0425, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.7710313009248482, |
| "grad_norm": 0.7578125, |
| "learning_rate": 2.745357224428563e-06, |
| "loss": 0.0685, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.7713699180969715, |
| "grad_norm": 1.9296875, |
| "learning_rate": 2.7376560280564025e-06, |
| "loss": 0.0716, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.7717085352690949, |
| "grad_norm": 0.6015625, |
| "learning_rate": 2.729963935033002e-06, |
| "loss": 0.0661, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.7720471524412181, |
| "grad_norm": 0.57421875, |
| "learning_rate": 2.722280955000404e-06, |
| "loss": 0.0642, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.7723857696133415, |
| "grad_norm": 0.625, |
| "learning_rate": 2.714607097589226e-06, |
| "loss": 0.1111, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.7727243867854648, |
| "grad_norm": 0.59375, |
| "learning_rate": 2.706942372418645e-06, |
| "loss": 0.0496, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.7730630039575882, |
| "grad_norm": 0.55859375, |
| "learning_rate": 2.699286789096397e-06, |
| "loss": 0.0549, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.7734016211297116, |
| "grad_norm": 0.5, |
| "learning_rate": 2.691640357218759e-06, |
| "loss": 0.0606, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.7737402383018349, |
| "grad_norm": 0.5234375, |
| "learning_rate": 2.684003086370528e-06, |
| "loss": 0.0644, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.7740788554739583, |
| "grad_norm": 0.52734375, |
| "learning_rate": 2.6763749861250297e-06, |
| "loss": 0.0697, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.7744174726460815, |
| "grad_norm": 0.52734375, |
| "learning_rate": 2.6687560660440858e-06, |
| "loss": 0.0635, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.7747560898182049, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.66114633567801e-06, |
| "loss": 0.0387, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.7750947069903282, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.653545804565606e-06, |
| "loss": 0.071, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.7754333241624516, |
| "grad_norm": 0.55078125, |
| "learning_rate": 2.645954482234133e-06, |
| "loss": 0.0768, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.7757719413345749, |
| "grad_norm": 0.53515625, |
| "learning_rate": 2.6383723781993187e-06, |
| "loss": 0.0642, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.7761105585066983, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.630799501965333e-06, |
| "loss": 0.0548, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.7764491756788217, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.6232358630247722e-06, |
| "loss": 0.0597, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.7767877928509449, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.61568147085866e-06, |
| "loss": 0.0518, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.7771264100230683, |
| "grad_norm": 0.53515625, |
| "learning_rate": 2.6081363349364317e-06, |
| "loss": 0.0703, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.7774650271951916, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.600600464715909e-06, |
| "loss": 0.0616, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.777803644367315, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.5930738696433124e-06, |
| "loss": 0.0609, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.7781422615394383, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.5855565591532227e-06, |
| "loss": 0.0559, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.7784808787115617, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.578048542668593e-06, |
| "loss": 0.058, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.778819495883685, |
| "grad_norm": 0.6015625, |
| "learning_rate": 2.5705498296007247e-06, |
| "loss": 0.0668, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7791581130558083, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.56306042934925e-06, |
| "loss": 0.0635, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.7794967302279316, |
| "grad_norm": 0.6171875, |
| "learning_rate": 2.5555803513021393e-06, |
| "loss": 0.0563, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.779835347400055, |
| "grad_norm": 0.58203125, |
| "learning_rate": 2.5481096048356636e-06, |
| "loss": 0.0667, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.7801739645721784, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.5406481993144084e-06, |
| "loss": 0.0589, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.7805125817443017, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.5331961440912476e-06, |
| "loss": 0.0657, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.7808511989164251, |
| "grad_norm": 0.5, |
| "learning_rate": 2.525753448507329e-06, |
| "loss": 0.0664, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.7811898160885484, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.518320121892076e-06, |
| "loss": 0.0628, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.7815284332606718, |
| "grad_norm": 0.5546875, |
| "learning_rate": 2.5108961735631634e-06, |
| "loss": 0.0737, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.781867050432795, |
| "grad_norm": 0.67578125, |
| "learning_rate": 2.503481612826506e-06, |
| "loss": 0.0841, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.7822056676049184, |
| "grad_norm": 0.5234375, |
| "learning_rate": 2.496076448976261e-06, |
| "loss": 0.0647, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.7825442847770417, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.4886806912948034e-06, |
| "loss": 0.0586, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.7828829019491651, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.481294349052711e-06, |
| "loss": 0.0621, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.7832215191212885, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.4739174315087678e-06, |
| "loss": 0.0668, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.7835601362934118, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.466549947909942e-06, |
| "loss": 0.0792, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.7838987534655352, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.4591919074913707e-06, |
| "loss": 0.06, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.7842373706376584, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.4518433194763625e-06, |
| "loss": 0.0653, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.7845759878097818, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.444504193076368e-06, |
| "loss": 0.0655, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.7849146049819051, |
| "grad_norm": 0.5, |
| "learning_rate": 2.437174537490985e-06, |
| "loss": 0.0754, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.7852532221540285, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.429854361907942e-06, |
| "loss": 0.0545, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.7855918393261518, |
| "grad_norm": 0.68359375, |
| "learning_rate": 2.4225436755030717e-06, |
| "loss": 0.0695, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.7859304564982752, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.415242487440328e-06, |
| "loss": 0.0421, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.7862690736703986, |
| "grad_norm": 0.53515625, |
| "learning_rate": 2.4079508068717427e-06, |
| "loss": 0.0666, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.7866076908425218, |
| "grad_norm": 0.365234375, |
| "learning_rate": 2.4006686429374437e-06, |
| "loss": 0.0405, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.7869463080146452, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.3933960047656235e-06, |
| "loss": 0.0497, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.7872849251867685, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.386132901472532e-06, |
| "loss": 0.0493, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.7876235423588919, |
| "grad_norm": 0.5390625, |
| "learning_rate": 2.378879342162471e-06, |
| "loss": 0.0699, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.7879621595310152, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.371635335927781e-06, |
| "loss": 0.0606, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.7883007767031386, |
| "grad_norm": 0.61328125, |
| "learning_rate": 2.3644008918488216e-06, |
| "loss": 0.0653, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.7886393938752619, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.357176018993966e-06, |
| "loss": 0.0768, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.7889780110473852, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.349960726419599e-06, |
| "loss": 0.0589, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.7893166282195085, |
| "grad_norm": 0.52734375, |
| "learning_rate": 2.3427550231700836e-06, |
| "loss": 0.0645, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.7896552453916319, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.335558918277774e-06, |
| "loss": 0.0513, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.7899938625637553, |
| "grad_norm": 0.490234375, |
| "learning_rate": 2.3283724207629886e-06, |
| "loss": 0.0674, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.7903324797358786, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.3211955396340003e-06, |
| "loss": 0.0536, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.790671096908002, |
| "grad_norm": 0.63671875, |
| "learning_rate": 2.3140282838870332e-06, |
| "loss": 0.074, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.7910097140801253, |
| "grad_norm": 0.625, |
| "learning_rate": 2.3068706625062385e-06, |
| "loss": 0.0729, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.7913483312522487, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.299722684463698e-06, |
| "loss": 0.0594, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.7916869484243719, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.2925843587194042e-06, |
| "loss": 0.0524, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.7920255655964953, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.285455694221246e-06, |
| "loss": 0.0622, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.7923641827686186, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.2783366999050074e-06, |
| "loss": 0.0543, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.792702799940742, |
| "grad_norm": 0.5859375, |
| "learning_rate": 2.2712273846943457e-06, |
| "loss": 0.0729, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.7930414171128654, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.264127757500789e-06, |
| "loss": 0.0587, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.7933800342849887, |
| "grad_norm": 0.37109375, |
| "learning_rate": 2.2570378272237237e-06, |
| "loss": 0.046, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.7937186514571121, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.2499576027503723e-06, |
| "loss": 0.0689, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.7940572686292353, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.2428870929558012e-06, |
| "loss": 0.0569, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.7943958858013587, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.2358263067028952e-06, |
| "loss": 0.0631, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.794734502973482, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.228775252842347e-06, |
| "loss": 0.064, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.7950731201456054, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.221733940212657e-06, |
| "loss": 0.0628, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.7954117373177287, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.2147023776401077e-06, |
| "loss": 0.0629, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.7957503544898521, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.2076805739387664e-06, |
| "loss": 0.0649, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7960889716619755, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.200668537910461e-06, |
| "loss": 0.054, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.7964275888340987, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.1936662783447836e-06, |
| "loss": 0.0542, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.796766206006222, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.1866738040190638e-06, |
| "loss": 0.0587, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.7971048231783454, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.1796911236983708e-06, |
| "loss": 0.0656, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.7974434403504688, |
| "grad_norm": 0.498046875, |
| "learning_rate": 2.172718246135492e-06, |
| "loss": 0.0612, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.7977820575225921, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.165755180070932e-06, |
| "loss": 0.0623, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.7981206746947155, |
| "grad_norm": 0.59375, |
| "learning_rate": 2.158801934232897e-06, |
| "loss": 0.0772, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.7984592918668388, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.1518585173372774e-06, |
| "loss": 0.0662, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.7987979090389621, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.14492493808765e-06, |
| "loss": 0.0785, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.7991365262110854, |
| "grad_norm": 0.55859375, |
| "learning_rate": 2.138001205175253e-06, |
| "loss": 0.0744, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.7994751433832088, |
| "grad_norm": 0.5390625, |
| "learning_rate": 2.1310873272789878e-06, |
| "loss": 0.0661, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.7998137605553322, |
| "grad_norm": 0.52734375, |
| "learning_rate": 2.1241833130654056e-06, |
| "loss": 0.0665, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.8001523777274555, |
| "grad_norm": 0.5625, |
| "learning_rate": 2.117289171188681e-06, |
| "loss": 0.0626, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.8004909948995789, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.1104049102906254e-06, |
| "loss": 0.0716, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.8008296120717022, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.103530539000662e-06, |
| "loss": 0.0494, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.8011682292438256, |
| "grad_norm": 0.56640625, |
| "learning_rate": 2.096666065935813e-06, |
| "loss": 0.0471, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.8015068464159488, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.089811499700699e-06, |
| "loss": 0.0715, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.8018454635880722, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.082966848887514e-06, |
| "loss": 0.0612, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.8018454635880722, |
| "eval_loss": 0.06570233404636383, |
| "eval_runtime": 815.4749, |
| "eval_samples_per_second": 12.199, |
| "eval_steps_per_second": 3.05, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.8021840807601955, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.0761321220760324e-06, |
| "loss": 0.0557, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.8025226979323189, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.069307327833586e-06, |
| "loss": 0.0535, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.8028613151044423, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.062492474715053e-06, |
| "loss": 0.0586, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.8031999322765656, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.05568757126285e-06, |
| "loss": 0.058, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.803538549448689, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.0488926260069284e-06, |
| "loss": 0.0479, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.8038771666208122, |
| "grad_norm": 0.546875, |
| "learning_rate": 2.042107647464748e-06, |
| "loss": 0.0671, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.8042157837929356, |
| "grad_norm": 0.57421875, |
| "learning_rate": 2.0353326441412835e-06, |
| "loss": 0.0799, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.8045544009650589, |
| "grad_norm": 0.6640625, |
| "learning_rate": 2.0285676245290032e-06, |
| "loss": 0.0755, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.8048930181371823, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.021812597107855e-06, |
| "loss": 0.0677, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.8052316353093056, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.0150675703452717e-06, |
| "loss": 0.0649, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.805570252481429, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.0083325526961394e-06, |
| "loss": 0.0591, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.8059088696535524, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.0016075526028066e-06, |
| "loss": 0.0642, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.8062474868256756, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.9948925784950625e-06, |
| "loss": 0.0564, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.806586103997799, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.9881876387901243e-06, |
| "loss": 0.0556, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.8069247211699223, |
| "grad_norm": 0.5, |
| "learning_rate": 1.9814927418926366e-06, |
| "loss": 0.0643, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.8072633383420457, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.974807896194655e-06, |
| "loss": 0.0687, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.807601955514169, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.9681331100756298e-06, |
| "loss": 0.073, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.8079405726862924, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.9614683919024103e-06, |
| "loss": 0.0685, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.8082791898584157, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.9548137500292163e-06, |
| "loss": 0.059, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.808617807030539, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.9481691927976453e-06, |
| "loss": 0.0592, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.8089564242026623, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.9415347285366527e-06, |
| "loss": 0.0778, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.8092950413747857, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.9349103655625346e-06, |
| "loss": 0.0807, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.809633658546909, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.9282961121789324e-06, |
| "loss": 0.0714, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.8099722757190324, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.9216919766768194e-06, |
| "loss": 0.0507, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.8103108928911558, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.915097967334469e-06, |
| "loss": 0.0808, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.8106495100632791, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.9085140924174783e-06, |
| "loss": 0.0563, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.8109881272354025, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.9019403601787377e-06, |
| "loss": 0.0615, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.8113267444075257, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.8953767788584155e-06, |
| "loss": 0.0671, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.8116653615796491, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.8888233566839654e-06, |
| "loss": 0.0672, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.8120039787517724, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.8822801018700999e-06, |
| "loss": 0.0612, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.8123425959238958, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.8757470226187902e-06, |
| "loss": 0.0745, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.8126812130960192, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.8692241271192557e-06, |
| "loss": 0.0505, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8130198302681425, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.8627114235479393e-06, |
| "loss": 0.0616, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.8133584474402659, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.8562089200685195e-06, |
| "loss": 0.0558, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.8136970646123891, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.8497166248318876e-06, |
| "loss": 0.0664, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.8140356817845125, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.8432345459761303e-06, |
| "loss": 0.0728, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.8143742989566358, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.8367626916265401e-06, |
| "loss": 0.0737, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.8147129161287592, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.8303010698955803e-06, |
| "loss": 0.0819, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.8150515333008825, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.8238496888828983e-06, |
| "loss": 0.0435, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.8153901504730059, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.817408556675302e-06, |
| "loss": 0.0885, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.8157287676451292, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.8109776813467473e-06, |
| "loss": 0.0799, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.8160673848172525, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.8045570709583394e-06, |
| "loss": 0.0674, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.8164060019893759, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.7981467335583158e-06, |
| "loss": 0.0574, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.8167446191614992, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.7917466771820303e-06, |
| "loss": 0.064, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.8170832363336226, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.7853569098519586e-06, |
| "loss": 0.0681, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.8174218535057459, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.7789774395776716e-06, |
| "loss": 0.053, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.8177604706778693, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.7726082743558349e-06, |
| "loss": 0.0633, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.8180990878499926, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.766249422170202e-06, |
| "loss": 0.0689, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.8184377050221159, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.7599008909915894e-06, |
| "loss": 0.0711, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.8187763221942392, |
| "grad_norm": 0.70703125, |
| "learning_rate": 1.7535626887778846e-06, |
| "loss": 0.0525, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.8191149393663626, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.7472348234740255e-06, |
| "loss": 0.0505, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.819453556538486, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.7409173030119886e-06, |
| "loss": 0.0696, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.8197921737106093, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.734610135310788e-06, |
| "loss": 0.0605, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.8201307908827327, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.7283133282764609e-06, |
| "loss": 0.0601, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.820469408054856, |
| "grad_norm": 0.76953125, |
| "learning_rate": 1.722026889802052e-06, |
| "loss": 0.0807, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.8208080252269794, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.715750827767615e-06, |
| "loss": 0.0635, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.8211466423991026, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.7094851500401922e-06, |
| "loss": 0.0611, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.821485259571226, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.703229864473811e-06, |
| "loss": 0.0772, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.8218238767433493, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.6969849789094762e-06, |
| "loss": 0.0508, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.8221624939154727, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.6907505011751468e-06, |
| "loss": 0.0776, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.822501111087596, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.684526439085744e-06, |
| "loss": 0.073, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.8228397282597194, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.6783128004431326e-06, |
| "loss": 0.0656, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.8231783454318428, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.6721095930361042e-06, |
| "loss": 0.0916, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.823516962603966, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.6659168246403855e-06, |
| "loss": 0.0689, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.8238555797760894, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.6597345030186052e-06, |
| "loss": 0.0594, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.8241941969482127, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.6535626359203083e-06, |
| "loss": 0.0555, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.8245328141203361, |
| "grad_norm": 0.875, |
| "learning_rate": 1.6474012310819354e-06, |
| "loss": 0.0823, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.8248714312924594, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.6412502962267973e-06, |
| "loss": 0.0519, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.8252100484645828, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.6351098390650966e-06, |
| "loss": 0.0576, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.8255486656367061, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.6289798672938994e-06, |
| "loss": 0.0627, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.8258872828088294, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.6228603885971206e-06, |
| "loss": 0.0665, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.8262258999809527, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.6167514106455306e-06, |
| "loss": 0.0593, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.8265645171530761, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.6106529410967354e-06, |
| "loss": 0.0556, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.8269031343251995, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.604564987595162e-06, |
| "loss": 0.059, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.8272417514973228, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.598487557772066e-06, |
| "loss": 0.0548, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.8275803686694462, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.5924206592455016e-06, |
| "loss": 0.0506, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.8279189858415695, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.5863642996203288e-06, |
| "loss": 0.0797, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.8282576030136928, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.580318486488197e-06, |
| "loss": 0.0668, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.8285962201858161, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.5742832274275288e-06, |
| "loss": 0.0522, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.8289348373579395, |
| "grad_norm": 0.359375, |
| "learning_rate": 1.5682585300035237e-06, |
| "loss": 0.0387, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.8292734545300628, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.5622444017681438e-06, |
| "loss": 0.0626, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.8296120717021862, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.5562408502600946e-06, |
| "loss": 0.0529, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8299506888743096, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.550247883004833e-06, |
| "loss": 0.0554, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.8302893060464329, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.5442655075145375e-06, |
| "loss": 0.059, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.8306279232185563, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.5382937312881208e-06, |
| "loss": 0.0573, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.8309665403906795, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.5323325618112072e-06, |
| "loss": 0.0677, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.8313051575628029, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.5263820065561174e-06, |
| "loss": 0.0532, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.8316437747349262, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.520442072981877e-06, |
| "loss": 0.0751, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.8319823919070496, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.5145127685341932e-06, |
| "loss": 0.0475, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.832321009079173, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.5085941006454453e-06, |
| "loss": 0.0856, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.8326596262512963, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.5026860767346862e-06, |
| "loss": 0.0559, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.8329982434234197, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.4967887042076278e-06, |
| "loss": 0.0658, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.8333368605955429, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.4909019904566223e-06, |
| "loss": 0.0674, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.8336754777676663, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.4850259428606707e-06, |
| "loss": 0.0607, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.8340140949397896, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.4791605687853927e-06, |
| "loss": 0.0569, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.834352712111913, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.4733058755830399e-06, |
| "loss": 0.0584, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.8346913292840363, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.4674618705924715e-06, |
| "loss": 0.0637, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.8350299464561597, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.4616285611391445e-06, |
| "loss": 0.0515, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.835368563628283, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.4558059545351144e-06, |
| "loss": 0.0595, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.8357071808004063, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.4499940580790207e-06, |
| "loss": 0.0883, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.8360457979725296, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.4441928790560733e-06, |
| "loss": 0.059, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.836384415144653, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.4384024247380534e-06, |
| "loss": 0.0896, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.8367230323167764, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.4326227023832928e-06, |
| "loss": 0.0609, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.8370616494888997, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.426853719236676e-06, |
| "loss": 0.0617, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.8374002666610231, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.4210954825296253e-06, |
| "loss": 0.0609, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.8377388838331464, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.4153479994800868e-06, |
| "loss": 0.0683, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.8380775010052697, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.4096112772925353e-06, |
| "loss": 0.0581, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.838416118177393, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.4038853231579486e-06, |
| "loss": 0.0599, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.8387547353495164, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.3981701442538155e-06, |
| "loss": 0.1391, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.8390933525216397, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.3924657477441072e-06, |
| "loss": 0.0586, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.8394319696937631, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.38677214077929e-06, |
| "loss": 0.0528, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.8397705868658865, |
| "grad_norm": 0.83984375, |
| "learning_rate": 1.381089330496297e-06, |
| "loss": 0.0666, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.8401092040380098, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.3754173240185364e-06, |
| "loss": 0.0938, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.8404478212101332, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.3697561284558624e-06, |
| "loss": 0.0529, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.8407864383822564, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.3641057509045885e-06, |
| "loss": 0.0661, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.8411250555543798, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.3584661984474634e-06, |
| "loss": 0.0582, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.8414636727265031, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.3528374781536634e-06, |
| "loss": 0.0646, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.8418022898986265, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.3472195970787927e-06, |
| "loss": 0.0813, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.8421409070707498, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.3416125622648668e-06, |
| "loss": 0.0589, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.8424795242428732, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.3360163807403004e-06, |
| "loss": 0.0494, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.8428181414149966, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.3304310595199121e-06, |
| "loss": 0.0604, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.8431567585871198, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.3248566056048972e-06, |
| "loss": 0.0608, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8434953757592432, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.3192930259828363e-06, |
| "loss": 0.0544, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.8438339929313665, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.3137403276276805e-06, |
| "loss": 0.0719, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.8441726101034899, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.3081985174997325e-06, |
| "loss": 0.0548, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.8445112272756132, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.3026676025456553e-06, |
| "loss": 0.0585, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.8448498444477366, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.2971475896984475e-06, |
| "loss": 0.0612, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.8451884616198599, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.2916384858774488e-06, |
| "loss": 0.0516, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.8455270787919832, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.2861402979883231e-06, |
| "loss": 0.0614, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.8458656959641065, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.280653032923046e-06, |
| "loss": 0.0543, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.8462043131362299, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.2751766975599033e-06, |
| "loss": 0.0527, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.8465429303083533, |
| "grad_norm": 0.56640625, |
| "learning_rate": 1.2697112987634852e-06, |
| "loss": 0.0711, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8468815474804766, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.264256843384668e-06, |
| "loss": 0.0619, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.8472201646526, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.2588133382606105e-06, |
| "loss": 0.0684, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.8475587818247233, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.2533807902147522e-06, |
| "loss": 0.062, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.8478973989968466, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.2479592060567857e-06, |
| "loss": 0.0526, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.8482360161689699, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.2425485925826708e-06, |
| "loss": 0.0605, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.8485746333410933, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.2371489565746141e-06, |
| "loss": 0.0743, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.8489132505132166, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.231760304801054e-06, |
| "loss": 0.0679, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.84925186768534, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.2263826440166725e-06, |
| "loss": 0.0572, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.8495904848574634, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.2210159809623622e-06, |
| "loss": 0.0659, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.8499291020295867, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.2156603223652376e-06, |
| "loss": 0.0493, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.8502677192017101, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.2103156749386192e-06, |
| "loss": 0.0545, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.8506063363738333, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.2049820453820194e-06, |
| "loss": 0.0697, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.8509449535459567, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.1996594403811478e-06, |
| "loss": 0.0584, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.85128357071808, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.1943478666078856e-06, |
| "loss": 0.0706, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.8516221878902034, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.1890473307202922e-06, |
| "loss": 0.0624, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.8519608050623267, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.1837578393625937e-06, |
| "loss": 0.0638, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.8522994222344501, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.1784793991651623e-06, |
| "loss": 0.0716, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.8526380394065735, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.1732120167445248e-06, |
| "loss": 0.0467, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.8529766565786967, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.1679556987033492e-06, |
| "loss": 0.0635, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.8533152737508201, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.1627104516304278e-06, |
| "loss": 0.0492, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.8536538909229434, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.157476282100677e-06, |
| "loss": 0.0493, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.8539925080950668, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.1522531966751304e-06, |
| "loss": 0.0641, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.8543311252671901, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.1470412019009246e-06, |
| "loss": 0.0554, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.8546697424393135, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.141840304311298e-06, |
| "loss": 0.0642, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.8550083596114368, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.1366505104255732e-06, |
| "loss": 0.0578, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.8553469767835601, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.1314718267491587e-06, |
| "loss": 0.0582, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.8556855939556834, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.1263042597735363e-06, |
| "loss": 0.0638, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.8560242111278068, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.121147815976248e-06, |
| "loss": 0.0628, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.8563628282999302, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.1160025018208997e-06, |
| "loss": 0.056, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.8567014454720535, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.110868323757144e-06, |
| "loss": 0.0598, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.8570400626441769, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.1057452882206688e-06, |
| "loss": 0.0579, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.8573786798163002, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.1006334016332054e-06, |
| "loss": 0.072, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.8577172969884235, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.0955326704024983e-06, |
| "loss": 0.0652, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.8580559141605468, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.090443100922317e-06, |
| "loss": 0.059, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.8583945313326702, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.085364699572441e-06, |
| "loss": 0.0618, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.8587331485047935, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.08029747271864e-06, |
| "loss": 0.0659, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.8590717656769169, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.0752414267126876e-06, |
| "loss": 0.074, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.8594103828490403, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.0701965678923387e-06, |
| "loss": 0.0665, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.8597490000211636, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.0651629025813203e-06, |
| "loss": 0.0722, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.860087617193287, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.0601404370893364e-06, |
| "loss": 0.0595, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.8604262343654102, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.0551291777120465e-06, |
| "loss": 0.0733, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.8607648515375336, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.0501291307310613e-06, |
| "loss": 0.0652, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.8611034687096569, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.045140302413945e-06, |
| "loss": 0.0781, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.8614420858817803, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.040162699014191e-06, |
| "loss": 0.0637, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.8617807030539036, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.0351963267712261e-06, |
| "loss": 0.0651, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.862119320226027, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.0302411919104005e-06, |
| "loss": 0.0532, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.8624579373981504, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.0252973006429733e-06, |
| "loss": 0.0686, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.8627965545702736, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.0203646591661142e-06, |
| "loss": 0.059, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.863135171742397, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.0154432736628916e-06, |
| "loss": 0.09, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.8634737889145203, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.0105331503022574e-06, |
| "loss": 0.0472, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8638124060866437, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.0056342952390574e-06, |
| "loss": 0.0524, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.864151023258767, |
| "grad_norm": 0.482421875, |
| "learning_rate": 1.0007467146140026e-06, |
| "loss": 0.061, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.8644896404308904, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.958704145536767e-07, |
| "loss": 0.0603, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.8648282576030137, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.91005401170524e-07, |
| "loss": 0.0645, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.865166874775137, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.86151680562837e-07, |
| "loss": 0.0801, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.8655054919472603, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.813092588147554e-07, |
| "loss": 0.0743, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.8658441091193837, |
| "grad_norm": 0.45703125, |
| "learning_rate": 9.764781419962576e-07, |
| "loss": 0.066, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.866182726291507, |
| "grad_norm": 0.390625, |
| "learning_rate": 9.71658336163146e-07, |
| "loss": 0.051, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.8665213434636304, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.668498473570499e-07, |
| "loss": 0.0704, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.8668599606357538, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.620526816054065e-07, |
| "loss": 0.0629, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.8671985778078771, |
| "grad_norm": 0.482421875, |
| "learning_rate": 9.572668449214672e-07, |
| "loss": 0.0703, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.8675371949800004, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.52492343304281e-07, |
| "loss": 0.0715, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.8678758121521237, |
| "grad_norm": 0.53515625, |
| "learning_rate": 9.477291827386781e-07, |
| "loss": 0.0736, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.8682144293242471, |
| "grad_norm": 0.51171875, |
| "learning_rate": 9.42977369195286e-07, |
| "loss": 0.0627, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.8685530464963704, |
| "grad_norm": 0.53515625, |
| "learning_rate": 9.382369086305043e-07, |
| "loss": 0.0759, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.8688916636684938, |
| "grad_norm": 0.44140625, |
| "learning_rate": 9.335078069864967e-07, |
| "loss": 0.0654, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.8692302808406172, |
| "grad_norm": 0.4375, |
| "learning_rate": 9.287900701911945e-07, |
| "loss": 0.0488, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.8695688980127405, |
| "grad_norm": 0.44140625, |
| "learning_rate": 9.240837041582839e-07, |
| "loss": 0.0575, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.8699075151848639, |
| "grad_norm": 0.51171875, |
| "learning_rate": 9.193887147871905e-07, |
| "loss": 0.0628, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.8702461323569871, |
| "grad_norm": 0.474609375, |
| "learning_rate": 9.147051079630886e-07, |
| "loss": 0.0584, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.8705847495291105, |
| "grad_norm": 0.453125, |
| "learning_rate": 9.100328895568745e-07, |
| "loss": 0.0583, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.8709233667012338, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.053720654251774e-07, |
| "loss": 0.0696, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.8712619838733572, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.00722641410342e-07, |
| "loss": 0.0825, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.8716006010454805, |
| "grad_norm": 0.447265625, |
| "learning_rate": 8.960846233404175e-07, |
| "loss": 0.0647, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.8719392182176039, |
| "grad_norm": 0.83984375, |
| "learning_rate": 8.914580170291632e-07, |
| "loss": 0.1008, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.8722778353897273, |
| "grad_norm": 0.5, |
| "learning_rate": 8.86842828276031e-07, |
| "loss": 0.0718, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.8726164525618505, |
| "grad_norm": 0.462890625, |
| "learning_rate": 8.822390628661581e-07, |
| "loss": 0.0611, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.8729550697339739, |
| "grad_norm": 0.53515625, |
| "learning_rate": 8.77646726570367e-07, |
| "loss": 0.0616, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.8732936869060972, |
| "grad_norm": 0.421875, |
| "learning_rate": 8.730658251451485e-07, |
| "loss": 0.0514, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.8736323040782206, |
| "grad_norm": 0.3828125, |
| "learning_rate": 8.68496364332665e-07, |
| "loss": 0.0523, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.8739709212503439, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.639383498607379e-07, |
| "loss": 0.0746, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.8743095384224673, |
| "grad_norm": 0.41796875, |
| "learning_rate": 8.593917874428348e-07, |
| "loss": 0.0556, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.8746481555945906, |
| "grad_norm": 0.41015625, |
| "learning_rate": 8.548566827780747e-07, |
| "loss": 0.0481, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.8749867727667139, |
| "grad_norm": 0.63671875, |
| "learning_rate": 8.503330415512123e-07, |
| "loss": 0.072, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.8753253899388372, |
| "grad_norm": 0.431640625, |
| "learning_rate": 8.458208694326287e-07, |
| "loss": 0.054, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.8756640071109606, |
| "grad_norm": 0.734375, |
| "learning_rate": 8.413201720783337e-07, |
| "loss": 0.1025, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.876002624283084, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.368309551299536e-07, |
| "loss": 0.0694, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.8763412414552073, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.323532242147203e-07, |
| "loss": 0.067, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.8766798586273307, |
| "grad_norm": 0.48828125, |
| "learning_rate": 8.278869849454718e-07, |
| "loss": 0.0664, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.877018475799454, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.234322429206354e-07, |
| "loss": 0.0697, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.8773570929715773, |
| "grad_norm": 0.376953125, |
| "learning_rate": 8.189890037242343e-07, |
| "loss": 0.0443, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.8776957101437006, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.145572729258689e-07, |
| "loss": 0.0524, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.878034327315824, |
| "grad_norm": 0.380859375, |
| "learning_rate": 8.101370560807132e-07, |
| "loss": 0.0537, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.8783729444879473, |
| "grad_norm": 0.439453125, |
| "learning_rate": 8.057283587295084e-07, |
| "loss": 0.0652, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.8787115616600707, |
| "grad_norm": 0.447265625, |
| "learning_rate": 8.013311863985596e-07, |
| "loss": 0.0605, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.879050178832194, |
| "grad_norm": 0.59375, |
| "learning_rate": 7.969455445997198e-07, |
| "loss": 0.0819, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.8793887960043174, |
| "grad_norm": 0.47265625, |
| "learning_rate": 7.92571438830394e-07, |
| "loss": 0.0681, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.8797274131764408, |
| "grad_norm": 0.478515625, |
| "learning_rate": 7.882088745735217e-07, |
| "loss": 0.0554, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.880066030348564, |
| "grad_norm": 0.69140625, |
| "learning_rate": 7.838578572975786e-07, |
| "loss": 0.0827, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.8804046475206874, |
| "grad_norm": 0.48046875, |
| "learning_rate": 7.795183924565675e-07, |
| "loss": 0.0565, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8807432646928107, |
| "grad_norm": 0.48828125, |
| "learning_rate": 7.751904854900027e-07, |
| "loss": 0.0599, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.8810818818649341, |
| "grad_norm": 0.498046875, |
| "learning_rate": 7.708741418229215e-07, |
| "loss": 0.0602, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.8814204990370574, |
| "grad_norm": 0.4609375, |
| "learning_rate": 7.665693668658569e-07, |
| "loss": 0.0624, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.8817591162091808, |
| "grad_norm": 0.6015625, |
| "learning_rate": 7.62276166014847e-07, |
| "loss": 0.066, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.8820977333813041, |
| "grad_norm": 0.412109375, |
| "learning_rate": 7.579945446514192e-07, |
| "loss": 0.0527, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.8824363505534274, |
| "grad_norm": 0.474609375, |
| "learning_rate": 7.53724508142587e-07, |
| "loss": 0.0625, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.8827749677255508, |
| "grad_norm": 0.46875, |
| "learning_rate": 7.494660618408379e-07, |
| "loss": 0.0601, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.8831135848976741, |
| "grad_norm": 0.51171875, |
| "learning_rate": 7.452192110841383e-07, |
| "loss": 0.0722, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.8834522020697975, |
| "grad_norm": 0.447265625, |
| "learning_rate": 7.409839611959136e-07, |
| "loss": 0.0582, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.8837908192419208, |
| "grad_norm": 0.515625, |
| "learning_rate": 7.367603174850502e-07, |
| "loss": 0.0681, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.8841294364140442, |
| "grad_norm": 0.43359375, |
| "learning_rate": 7.325482852458887e-07, |
| "loss": 0.0551, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.8844680535861675, |
| "grad_norm": 0.6796875, |
| "learning_rate": 7.283478697582091e-07, |
| "loss": 0.0829, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.8848066707582908, |
| "grad_norm": 0.48828125, |
| "learning_rate": 7.241590762872319e-07, |
| "loss": 0.062, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.8851452879304141, |
| "grad_norm": 0.5078125, |
| "learning_rate": 7.199819100836136e-07, |
| "loss": 0.0635, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.8854839051025375, |
| "grad_norm": 0.6171875, |
| "learning_rate": 7.158163763834292e-07, |
| "loss": 0.0689, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.8858225222746609, |
| "grad_norm": 0.408203125, |
| "learning_rate": 7.116624804081773e-07, |
| "loss": 0.052, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.8861611394467842, |
| "grad_norm": 0.4296875, |
| "learning_rate": 7.075202273647652e-07, |
| "loss": 0.0523, |
| "step": 2617 |
| }, |
| { |
| "epoch": 0.8864997566189076, |
| "grad_norm": 0.5234375, |
| "learning_rate": 7.033896224455072e-07, |
| "loss": 0.0745, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.8868383737910309, |
| "grad_norm": 0.416015625, |
| "learning_rate": 6.992706708281205e-07, |
| "loss": 0.0497, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.8871769909631542, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.951633776757071e-07, |
| "loss": 0.0559, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.8875156081352775, |
| "grad_norm": 0.470703125, |
| "learning_rate": 6.910677481367623e-07, |
| "loss": 0.0584, |
| "step": 2621 |
| }, |
| { |
| "epoch": 0.8878542253074009, |
| "grad_norm": 0.392578125, |
| "learning_rate": 6.869837873451557e-07, |
| "loss": 0.0469, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.8881928424795242, |
| "grad_norm": 0.5703125, |
| "learning_rate": 6.829115004201325e-07, |
| "loss": 0.0613, |
| "step": 2623 |
| }, |
| { |
| "epoch": 0.8885314596516476, |
| "grad_norm": 0.484375, |
| "learning_rate": 6.788508924663084e-07, |
| "loss": 0.0785, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.888870076823771, |
| "grad_norm": 0.71875, |
| "learning_rate": 6.748019685736507e-07, |
| "loss": 0.0639, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.8892086939958943, |
| "grad_norm": 0.48046875, |
| "learning_rate": 6.707647338174905e-07, |
| "loss": 0.0623, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.8895473111680177, |
| "grad_norm": 0.63671875, |
| "learning_rate": 6.667391932584999e-07, |
| "loss": 0.047, |
| "step": 2627 |
| }, |
| { |
| "epoch": 0.8898859283401409, |
| "grad_norm": 0.609375, |
| "learning_rate": 6.627253519426913e-07, |
| "loss": 0.0521, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.8902245455122643, |
| "grad_norm": 0.455078125, |
| "learning_rate": 6.587232149014189e-07, |
| "loss": 0.0527, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.8905631626843876, |
| "grad_norm": 0.4296875, |
| "learning_rate": 6.54732787151362e-07, |
| "loss": 0.0537, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.890901779856511, |
| "grad_norm": 0.76953125, |
| "learning_rate": 6.507540736945195e-07, |
| "loss": 0.0994, |
| "step": 2631 |
| }, |
| { |
| "epoch": 0.8912403970286343, |
| "grad_norm": 0.58203125, |
| "learning_rate": 6.467870795182108e-07, |
| "loss": 0.0807, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.8915790142007577, |
| "grad_norm": 0.447265625, |
| "learning_rate": 6.428318095950648e-07, |
| "loss": 0.0555, |
| "step": 2633 |
| }, |
| { |
| "epoch": 0.891917631372881, |
| "grad_norm": 0.53515625, |
| "learning_rate": 6.388882688830089e-07, |
| "loss": 0.0607, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.8922562485450043, |
| "grad_norm": 0.5859375, |
| "learning_rate": 6.349564623252746e-07, |
| "loss": 0.0594, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.8925948657171277, |
| "grad_norm": 0.4140625, |
| "learning_rate": 6.310363948503806e-07, |
| "loss": 0.0569, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.892933482889251, |
| "grad_norm": 0.484375, |
| "learning_rate": 6.271280713721317e-07, |
| "loss": 0.0663, |
| "step": 2637 |
| }, |
| { |
| "epoch": 0.8932721000613744, |
| "grad_norm": 0.45703125, |
| "learning_rate": 6.232314967896136e-07, |
| "loss": 0.0572, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.8936107172334977, |
| "grad_norm": 0.443359375, |
| "learning_rate": 6.193466759871792e-07, |
| "loss": 0.0492, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.8939493344056211, |
| "grad_norm": 0.5546875, |
| "learning_rate": 6.154736138344564e-07, |
| "loss": 0.0611, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.8942879515777444, |
| "grad_norm": 0.53125, |
| "learning_rate": 6.11612315186324e-07, |
| "loss": 0.0669, |
| "step": 2641 |
| }, |
| { |
| "epoch": 0.8946265687498677, |
| "grad_norm": 0.45703125, |
| "learning_rate": 6.077627848829238e-07, |
| "loss": 0.0627, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.894965185921991, |
| "grad_norm": 0.443359375, |
| "learning_rate": 6.039250277496411e-07, |
| "loss": 0.0535, |
| "step": 2643 |
| }, |
| { |
| "epoch": 0.8953038030941144, |
| "grad_norm": 0.55078125, |
| "learning_rate": 6.000990485971048e-07, |
| "loss": 0.0703, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.8956424202662377, |
| "grad_norm": 0.47265625, |
| "learning_rate": 5.962848522211784e-07, |
| "loss": 0.0602, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.8959810374383611, |
| "grad_norm": 0.6640625, |
| "learning_rate": 5.924824434029619e-07, |
| "loss": 0.0655, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.8963196546104845, |
| "grad_norm": 0.466796875, |
| "learning_rate": 5.886918269087716e-07, |
| "loss": 0.0624, |
| "step": 2647 |
| }, |
| { |
| "epoch": 0.8966582717826078, |
| "grad_norm": 0.48046875, |
| "learning_rate": 5.849130074901444e-07, |
| "loss": 0.0661, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.8969968889547311, |
| "grad_norm": 0.67578125, |
| "learning_rate": 5.811459898838345e-07, |
| "loss": 0.0686, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.8973355061268544, |
| "grad_norm": 0.38671875, |
| "learning_rate": 5.77390778811796e-07, |
| "loss": 0.0509, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.8976741232989778, |
| "grad_norm": 0.5703125, |
| "learning_rate": 5.736473789811858e-07, |
| "loss": 0.0724, |
| "step": 2651 |
| }, |
| { |
| "epoch": 0.8980127404711011, |
| "grad_norm": 0.62109375, |
| "learning_rate": 5.699157950843592e-07, |
| "loss": 0.0741, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.8983513576432245, |
| "grad_norm": 0.38671875, |
| "learning_rate": 5.661960317988535e-07, |
| "loss": 0.05, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.8986899748153478, |
| "grad_norm": 0.54296875, |
| "learning_rate": 5.624880937873956e-07, |
| "loss": 0.063, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.8990285919874712, |
| "grad_norm": 0.46875, |
| "learning_rate": 5.587919856978819e-07, |
| "loss": 0.053, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.8993672091595946, |
| "grad_norm": 0.474609375, |
| "learning_rate": 5.551077121633875e-07, |
| "loss": 0.0588, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.8997058263317178, |
| "grad_norm": 0.6953125, |
| "learning_rate": 5.514352778021492e-07, |
| "loss": 0.0755, |
| "step": 2657 |
| }, |
| { |
| "epoch": 0.9000444435038412, |
| "grad_norm": 0.57421875, |
| "learning_rate": 5.477746872175615e-07, |
| "loss": 0.0687, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.9003830606759645, |
| "grad_norm": 0.470703125, |
| "learning_rate": 5.441259449981795e-07, |
| "loss": 0.0619, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.9007216778480879, |
| "grad_norm": 0.45703125, |
| "learning_rate": 5.404890557176967e-07, |
| "loss": 0.0589, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.9010602950202112, |
| "grad_norm": 0.53515625, |
| "learning_rate": 5.368640239349554e-07, |
| "loss": 0.0579, |
| "step": 2661 |
| }, |
| { |
| "epoch": 0.9013989121923346, |
| "grad_norm": 0.515625, |
| "learning_rate": 5.332508541939374e-07, |
| "loss": 0.0655, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.901737529364458, |
| "grad_norm": 0.515625, |
| "learning_rate": 5.296495510237453e-07, |
| "loss": 0.0809, |
| "step": 2663 |
| }, |
| { |
| "epoch": 0.9020761465365812, |
| "grad_norm": 0.65625, |
| "learning_rate": 5.26060118938616e-07, |
| "loss": 0.0621, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.9020761465365812, |
| "eval_loss": 0.06565158814191818, |
| "eval_runtime": 818.5534, |
| "eval_samples_per_second": 12.153, |
| "eval_steps_per_second": 3.038, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.9024147637087045, |
| "grad_norm": 0.640625, |
| "learning_rate": 5.224825624379048e-07, |
| "loss": 0.0672, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.9027533808808279, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.189168860060756e-07, |
| "loss": 0.0757, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.9030919980529513, |
| "grad_norm": 0.52734375, |
| "learning_rate": 5.153630941127063e-07, |
| "loss": 0.0584, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.9034306152250746, |
| "grad_norm": 0.84765625, |
| "learning_rate": 5.118211912124726e-07, |
| "loss": 0.0992, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.903769232397198, |
| "grad_norm": 0.5390625, |
| "learning_rate": 5.082911817451541e-07, |
| "loss": 0.0662, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.9041078495693213, |
| "grad_norm": 0.47265625, |
| "learning_rate": 5.047730701356146e-07, |
| "loss": 0.0505, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.9044464667414446, |
| "grad_norm": 0.6875, |
| "learning_rate": 5.012668607938087e-07, |
| "loss": 0.0694, |
| "step": 2671 |
| }, |
| { |
| "epoch": 0.9047850839135679, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.977725581147697e-07, |
| "loss": 0.0784, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.9051237010856913, |
| "grad_norm": 0.453125, |
| "learning_rate": 4.942901664786071e-07, |
| "loss": 0.0589, |
| "step": 2673 |
| }, |
| { |
| "epoch": 0.9054623182578146, |
| "grad_norm": 0.482421875, |
| "learning_rate": 4.90819690250497e-07, |
| "loss": 0.0609, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.905800935429938, |
| "grad_norm": 0.455078125, |
| "learning_rate": 4.873611337806838e-07, |
| "loss": 0.0631, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.9061395526020614, |
| "grad_norm": 0.53125, |
| "learning_rate": 4.839145014044688e-07, |
| "loss": 0.0775, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.9064781697741847, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.804797974422026e-07, |
| "loss": 0.0666, |
| "step": 2677 |
| }, |
| { |
| "epoch": 0.906816786946308, |
| "grad_norm": 0.486328125, |
| "learning_rate": 4.770570261992913e-07, |
| "loss": 0.0547, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.9071554041184313, |
| "grad_norm": 0.408203125, |
| "learning_rate": 4.73646191966175e-07, |
| "loss": 0.0487, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.9074940212905547, |
| "grad_norm": 0.48828125, |
| "learning_rate": 4.70247299018336e-07, |
| "loss": 0.0698, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.907832638462678, |
| "grad_norm": 0.427734375, |
| "learning_rate": 4.668603516162895e-07, |
| "loss": 0.0562, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.9081712556348014, |
| "grad_norm": 0.5078125, |
| "learning_rate": 4.634853540055706e-07, |
| "loss": 0.0682, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.9085098728069247, |
| "grad_norm": 0.65234375, |
| "learning_rate": 4.601223104167407e-07, |
| "loss": 0.0755, |
| "step": 2683 |
| }, |
| { |
| "epoch": 0.9088484899790481, |
| "grad_norm": 0.490234375, |
| "learning_rate": 4.567712250653755e-07, |
| "loss": 0.0657, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.9091871071511715, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.5343210215206047e-07, |
| "loss": 0.0488, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.9095257243232947, |
| "grad_norm": 0.57421875, |
| "learning_rate": 4.501049458623863e-07, |
| "loss": 0.0672, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.9098643414954181, |
| "grad_norm": 0.423828125, |
| "learning_rate": 4.4678976036694354e-07, |
| "loss": 0.0471, |
| "step": 2687 |
| }, |
| { |
| "epoch": 0.9102029586675414, |
| "grad_norm": 0.447265625, |
| "learning_rate": 4.43486549821317e-07, |
| "loss": 0.0542, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.9105415758396648, |
| "grad_norm": 0.57421875, |
| "learning_rate": 4.401953183660834e-07, |
| "loss": 0.0772, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.9108801930117881, |
| "grad_norm": 0.52734375, |
| "learning_rate": 4.369160701268016e-07, |
| "loss": 0.0738, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.9112188101839115, |
| "grad_norm": 0.86328125, |
| "learning_rate": 4.3364880921400567e-07, |
| "loss": 0.1494, |
| "step": 2691 |
| }, |
| { |
| "epoch": 0.9115574273560348, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.303935397232117e-07, |
| "loss": 0.081, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.9118960445281581, |
| "grad_norm": 0.59765625, |
| "learning_rate": 4.271502657348969e-07, |
| "loss": 0.0663, |
| "step": 2693 |
| }, |
| { |
| "epoch": 0.9122346617002814, |
| "grad_norm": 0.435546875, |
| "learning_rate": 4.23918991314507e-07, |
| "loss": 0.0502, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.9125732788724048, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.2069972051244635e-07, |
| "loss": 0.055, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.9129118960445282, |
| "grad_norm": 0.671875, |
| "learning_rate": 4.174924573640682e-07, |
| "loss": 0.0958, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.9132505132166515, |
| "grad_norm": 0.52734375, |
| "learning_rate": 4.14297205889681e-07, |
| "loss": 0.0754, |
| "step": 2697 |
| }, |
| { |
| "epoch": 0.9135891303887749, |
| "grad_norm": 0.37890625, |
| "learning_rate": 4.111139700945277e-07, |
| "loss": 0.052, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.9139277475608982, |
| "grad_norm": 0.5, |
| "learning_rate": 4.0794275396879856e-07, |
| "loss": 0.0686, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.9142663647330215, |
| "grad_norm": 0.53515625, |
| "learning_rate": 4.047835614876128e-07, |
| "loss": 0.0685, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9146049819051448, |
| "grad_norm": 0.59765625, |
| "learning_rate": 4.0163639661101594e-07, |
| "loss": 0.0812, |
| "step": 2701 |
| }, |
| { |
| "epoch": 0.9149435990772682, |
| "grad_norm": 0.5234375, |
| "learning_rate": 3.985012632839824e-07, |
| "loss": 0.0745, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.9152822162493915, |
| "grad_norm": 0.47265625, |
| "learning_rate": 3.9537816543640085e-07, |
| "loss": 0.0658, |
| "step": 2703 |
| }, |
| { |
| "epoch": 0.9156208334215149, |
| "grad_norm": 0.427734375, |
| "learning_rate": 3.9226710698307416e-07, |
| "loss": 0.0452, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.9159594505936383, |
| "grad_norm": 0.609375, |
| "learning_rate": 3.891680918237151e-07, |
| "loss": 0.0815, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.9162980677657616, |
| "grad_norm": 0.384765625, |
| "learning_rate": 3.8608112384293963e-07, |
| "loss": 0.0468, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.9166366849378849, |
| "grad_norm": 0.45703125, |
| "learning_rate": 3.8300620691026024e-07, |
| "loss": 0.0508, |
| "step": 2707 |
| }, |
| { |
| "epoch": 0.9169753021100082, |
| "grad_norm": 0.54296875, |
| "learning_rate": 3.799433448800893e-07, |
| "loss": 0.0618, |
| "step": 2708 |
| }, |
| { |
| "epoch": 0.9173139192821316, |
| "grad_norm": 0.5390625, |
| "learning_rate": 3.7689254159172127e-07, |
| "loss": 0.0641, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.9176525364542549, |
| "grad_norm": 0.7578125, |
| "learning_rate": 3.738538008693393e-07, |
| "loss": 0.0743, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.9179911536263783, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.708271265220087e-07, |
| "loss": 0.0496, |
| "step": 2711 |
| }, |
| { |
| "epoch": 0.9183297707985016, |
| "grad_norm": 0.4375, |
| "learning_rate": 3.6781252234365905e-07, |
| "loss": 0.058, |
| "step": 2712 |
| }, |
| { |
| "epoch": 0.918668387970625, |
| "grad_norm": 0.50390625, |
| "learning_rate": 3.64809992113101e-07, |
| "loss": 0.0674, |
| "step": 2713 |
| }, |
| { |
| "epoch": 0.9190070051427484, |
| "grad_norm": 0.53125, |
| "learning_rate": 3.618195395940083e-07, |
| "loss": 0.0584, |
| "step": 2714 |
| }, |
| { |
| "epoch": 0.9193456223148716, |
| "grad_norm": 0.59375, |
| "learning_rate": 3.5884116853490915e-07, |
| "loss": 0.0713, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.919684239486995, |
| "grad_norm": 0.416015625, |
| "learning_rate": 3.558748826691949e-07, |
| "loss": 0.0544, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.9200228566591183, |
| "grad_norm": 0.51171875, |
| "learning_rate": 3.529206857151035e-07, |
| "loss": 0.0735, |
| "step": 2717 |
| }, |
| { |
| "epoch": 0.9203614738312417, |
| "grad_norm": 0.51171875, |
| "learning_rate": 3.4997858137572174e-07, |
| "loss": 0.0596, |
| "step": 2718 |
| }, |
| { |
| "epoch": 0.920700091003365, |
| "grad_norm": 0.453125, |
| "learning_rate": 3.4704857333897834e-07, |
| "loss": 0.0601, |
| "step": 2719 |
| }, |
| { |
| "epoch": 0.9210387081754884, |
| "grad_norm": 0.6015625, |
| "learning_rate": 3.4413066527763774e-07, |
| "loss": 0.0785, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.9213773253476117, |
| "grad_norm": 0.42578125, |
| "learning_rate": 3.412248608492974e-07, |
| "loss": 0.0552, |
| "step": 2721 |
| }, |
| { |
| "epoch": 0.921715942519735, |
| "grad_norm": 0.462890625, |
| "learning_rate": 3.38331163696386e-07, |
| "loss": 0.0571, |
| "step": 2722 |
| }, |
| { |
| "epoch": 0.9220545596918583, |
| "grad_norm": 0.48046875, |
| "learning_rate": 3.354495774461497e-07, |
| "loss": 0.063, |
| "step": 2723 |
| }, |
| { |
| "epoch": 0.9223931768639817, |
| "grad_norm": 0.5625, |
| "learning_rate": 3.3258010571065925e-07, |
| "loss": 0.0796, |
| "step": 2724 |
| }, |
| { |
| "epoch": 0.9227317940361051, |
| "grad_norm": 0.61328125, |
| "learning_rate": 3.2972275208679625e-07, |
| "loss": 0.0554, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.9230704112082284, |
| "grad_norm": 0.46484375, |
| "learning_rate": 3.2687752015625574e-07, |
| "loss": 0.0585, |
| "step": 2726 |
| }, |
| { |
| "epoch": 0.9234090283803518, |
| "grad_norm": 0.53125, |
| "learning_rate": 3.2404441348553475e-07, |
| "loss": 0.0628, |
| "step": 2727 |
| }, |
| { |
| "epoch": 0.9237476455524751, |
| "grad_norm": 0.65625, |
| "learning_rate": 3.212234356259325e-07, |
| "loss": 0.0557, |
| "step": 2728 |
| }, |
| { |
| "epoch": 0.9240862627245984, |
| "grad_norm": 0.5078125, |
| "learning_rate": 3.18414590113546e-07, |
| "loss": 0.0617, |
| "step": 2729 |
| }, |
| { |
| "epoch": 0.9244248798967217, |
| "grad_norm": 0.44921875, |
| "learning_rate": 3.1561788046926335e-07, |
| "loss": 0.0522, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.9247634970688451, |
| "grad_norm": 0.5625, |
| "learning_rate": 3.1283331019875905e-07, |
| "loss": 0.0849, |
| "step": 2731 |
| }, |
| { |
| "epoch": 0.9251021142409684, |
| "grad_norm": 0.486328125, |
| "learning_rate": 3.100608827924934e-07, |
| "loss": 0.063, |
| "step": 2732 |
| }, |
| { |
| "epoch": 0.9254407314130918, |
| "grad_norm": 0.494140625, |
| "learning_rate": 3.0730060172570407e-07, |
| "loss": 0.0636, |
| "step": 2733 |
| }, |
| { |
| "epoch": 0.9257793485852152, |
| "grad_norm": 0.5703125, |
| "learning_rate": 3.045524704584024e-07, |
| "loss": 0.0786, |
| "step": 2734 |
| }, |
| { |
| "epoch": 0.9261179657573385, |
| "grad_norm": 0.4375, |
| "learning_rate": 3.018164924353739e-07, |
| "loss": 0.0595, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.9264565829294618, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.990926710861641e-07, |
| "loss": 0.0659, |
| "step": 2736 |
| }, |
| { |
| "epoch": 0.9267952001015851, |
| "grad_norm": 0.6484375, |
| "learning_rate": 2.963810098250841e-07, |
| "loss": 0.0725, |
| "step": 2737 |
| }, |
| { |
| "epoch": 0.9271338172737085, |
| "grad_norm": 0.5390625, |
| "learning_rate": 2.936815120512038e-07, |
| "loss": 0.0688, |
| "step": 2738 |
| }, |
| { |
| "epoch": 0.9274724344458318, |
| "grad_norm": 0.546875, |
| "learning_rate": 2.909941811483408e-07, |
| "loss": 0.0729, |
| "step": 2739 |
| }, |
| { |
| "epoch": 0.9278110516179552, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.883190204850661e-07, |
| "loss": 0.0586, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.9281496687900785, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.8565603341469514e-07, |
| "loss": 0.0993, |
| "step": 2741 |
| }, |
| { |
| "epoch": 0.9284882859622019, |
| "grad_norm": 0.5, |
| "learning_rate": 2.8300522327528e-07, |
| "loss": 0.0586, |
| "step": 2742 |
| }, |
| { |
| "epoch": 0.9288269031343253, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.803665933896127e-07, |
| "loss": 0.0638, |
| "step": 2743 |
| }, |
| { |
| "epoch": 0.9291655203064485, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.7774014706521524e-07, |
| "loss": 0.0539, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.9295041374785719, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.7512588759433857e-07, |
| "loss": 0.0481, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.9298427546506952, |
| "grad_norm": 0.5625, |
| "learning_rate": 2.7252381825395804e-07, |
| "loss": 0.0726, |
| "step": 2746 |
| }, |
| { |
| "epoch": 0.9301813718228186, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.6993394230576676e-07, |
| "loss": 0.0488, |
| "step": 2747 |
| }, |
| { |
| "epoch": 0.9305199889949419, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.6735626299617456e-07, |
| "loss": 0.0516, |
| "step": 2748 |
| }, |
| { |
| "epoch": 0.9308586061670653, |
| "grad_norm": 0.56640625, |
| "learning_rate": 2.647907835563035e-07, |
| "loss": 0.077, |
| "step": 2749 |
| }, |
| { |
| "epoch": 0.9311972233391886, |
| "grad_norm": 5.59375, |
| "learning_rate": 2.6223750720198115e-07, |
| "loss": 0.079, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.9315358405113119, |
| "grad_norm": 0.59765625, |
| "learning_rate": 2.596964371337418e-07, |
| "loss": 0.0828, |
| "step": 2751 |
| }, |
| { |
| "epoch": 0.9318744576834352, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.5716757653681313e-07, |
| "loss": 0.0627, |
| "step": 2752 |
| }, |
| { |
| "epoch": 0.9322130748555586, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.5465092858112495e-07, |
| "loss": 0.0546, |
| "step": 2753 |
| }, |
| { |
| "epoch": 0.932551692027682, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.521464964212972e-07, |
| "loss": 0.0481, |
| "step": 2754 |
| }, |
| { |
| "epoch": 0.9328903091998053, |
| "grad_norm": 0.55078125, |
| "learning_rate": 2.4965428319663085e-07, |
| "loss": 0.0664, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.9332289263719287, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.471742920311193e-07, |
| "loss": 0.0811, |
| "step": 2756 |
| }, |
| { |
| "epoch": 0.933567543544052, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.4470652603343024e-07, |
| "loss": 0.0636, |
| "step": 2757 |
| }, |
| { |
| "epoch": 0.9339061607161753, |
| "grad_norm": 0.5234375, |
| "learning_rate": 2.422509882969093e-07, |
| "loss": 0.0657, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.9342447778882986, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.3980768189957205e-07, |
| "loss": 0.0632, |
| "step": 2759 |
| }, |
| { |
| "epoch": 0.934583395060422, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.3737660990410415e-07, |
| "loss": 0.0615, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.9349220122325453, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.349577753578547e-07, |
| "loss": 0.043, |
| "step": 2761 |
| }, |
| { |
| "epoch": 0.9352606294046687, |
| "grad_norm": 0.55078125, |
| "learning_rate": 2.325511812928327e-07, |
| "loss": 0.0684, |
| "step": 2762 |
| }, |
| { |
| "epoch": 0.935599246576792, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.3015683072570406e-07, |
| "loss": 0.0581, |
| "step": 2763 |
| }, |
| { |
| "epoch": 0.9359378637489154, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.2777472665778678e-07, |
| "loss": 0.0654, |
| "step": 2764 |
| }, |
| { |
| "epoch": 0.9362764809210387, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.2540487207505012e-07, |
| "loss": 0.0574, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.936615098093162, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.2304726994810454e-07, |
| "loss": 0.0502, |
| "step": 2766 |
| }, |
| { |
| "epoch": 0.9369537152652854, |
| "grad_norm": 0.765625, |
| "learning_rate": 2.2070192323220606e-07, |
| "loss": 0.0884, |
| "step": 2767 |
| }, |
| { |
| "epoch": 0.9372923324374087, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.1836883486724857e-07, |
| "loss": 0.0496, |
| "step": 2768 |
| }, |
| { |
| "epoch": 0.9376309496095321, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.1604800777775492e-07, |
| "loss": 0.0553, |
| "step": 2769 |
| }, |
| { |
| "epoch": 0.9379695667816554, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.1373944487288577e-07, |
| "loss": 0.0578, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.9383081839537788, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.1144314904642194e-07, |
| "loss": 0.0569, |
| "step": 2771 |
| }, |
| { |
| "epoch": 0.9386468011259022, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.091591231767709e-07, |
| "loss": 0.0565, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.9389854182980254, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.0688737012696136e-07, |
| "loss": 0.0561, |
| "step": 2773 |
| }, |
| { |
| "epoch": 0.9393240354701488, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.0462789274463323e-07, |
| "loss": 0.069, |
| "step": 2774 |
| }, |
| { |
| "epoch": 0.9396626526422721, |
| "grad_norm": 0.55859375, |
| "learning_rate": 2.023806938620443e-07, |
| "loss": 0.0771, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.9400012698143955, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.0014577629605681e-07, |
| "loss": 0.0607, |
| "step": 2776 |
| }, |
| { |
| "epoch": 0.9403398869865188, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.9792314284813984e-07, |
| "loss": 0.0706, |
| "step": 2777 |
| }, |
| { |
| "epoch": 0.9406785041586422, |
| "grad_norm": 0.734375, |
| "learning_rate": 1.957127963043648e-07, |
| "loss": 0.0555, |
| "step": 2778 |
| }, |
| { |
| "epoch": 0.9410171213307655, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.93514739435402e-07, |
| "loss": 0.0445, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.9413557385028888, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.9132897499651636e-07, |
| "loss": 0.0706, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.9416943556750121, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.8915550572756293e-07, |
| "loss": 0.0628, |
| "step": 2781 |
| }, |
| { |
| "epoch": 0.9420329728471355, |
| "grad_norm": 0.78125, |
| "learning_rate": 1.8699433435298452e-07, |
| "loss": 0.0882, |
| "step": 2782 |
| }, |
| { |
| "epoch": 0.9423715900192589, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.848454635818109e-07, |
| "loss": 0.0704, |
| "step": 2783 |
| }, |
| { |
| "epoch": 0.9427102071913822, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.8270889610765285e-07, |
| "loss": 0.0689, |
| "step": 2784 |
| }, |
| { |
| "epoch": 0.9430488243635056, |
| "grad_norm": 0.671875, |
| "learning_rate": 1.8058463460869478e-07, |
| "loss": 0.0878, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.9433874415356289, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.7847268174770226e-07, |
| "loss": 0.0701, |
| "step": 2786 |
| }, |
| { |
| "epoch": 0.9437260587077522, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.763730401720065e-07, |
| "loss": 0.0942, |
| "step": 2787 |
| }, |
| { |
| "epoch": 0.9440646758798755, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.7428571251350779e-07, |
| "loss": 0.0697, |
| "step": 2788 |
| }, |
| { |
| "epoch": 0.9444032930519989, |
| "grad_norm": 0.51171875, |
| "learning_rate": 1.7221070138867312e-07, |
| "loss": 0.0664, |
| "step": 2789 |
| }, |
| { |
| "epoch": 0.9447419102241222, |
| "grad_norm": 0.5, |
| "learning_rate": 1.701480093985275e-07, |
| "loss": 0.0477, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.9450805273962456, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.6809763912865596e-07, |
| "loss": 0.0492, |
| "step": 2791 |
| }, |
| { |
| "epoch": 0.945419144568369, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.660595931491993e-07, |
| "loss": 0.063, |
| "step": 2792 |
| }, |
| { |
| "epoch": 0.9457577617404923, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.6403387401484506e-07, |
| "loss": 0.076, |
| "step": 2793 |
| }, |
| { |
| "epoch": 0.9460963789126156, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.6202048426483652e-07, |
| "loss": 0.067, |
| "step": 2794 |
| }, |
| { |
| "epoch": 0.9464349960847389, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.6001942642295487e-07, |
| "loss": 0.0734, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.9467736132568623, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.580307029975281e-07, |
| "loss": 0.0465, |
| "step": 2796 |
| }, |
| { |
| "epoch": 0.9471122304289856, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.5605431648141878e-07, |
| "loss": 0.0428, |
| "step": 2797 |
| }, |
| { |
| "epoch": 0.947450847601109, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.5409026935203075e-07, |
| "loss": 0.0825, |
| "step": 2798 |
| }, |
| { |
| "epoch": 0.9477894647732323, |
| "grad_norm": 0.49609375, |
| "learning_rate": 1.5213856407129467e-07, |
| "loss": 0.0632, |
| "step": 2799 |
| }, |
| { |
| "epoch": 0.9481280819453557, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.501992030856736e-07, |
| "loss": 0.0405, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9484666991174789, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.4827218882615847e-07, |
| "loss": 0.0767, |
| "step": 2801 |
| }, |
| { |
| "epoch": 0.9488053162896023, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.463575237082593e-07, |
| "loss": 0.1085, |
| "step": 2802 |
| }, |
| { |
| "epoch": 0.9491439334617257, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.444552101320107e-07, |
| "loss": 0.0514, |
| "step": 2803 |
| }, |
| { |
| "epoch": 0.949482550633849, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.42565250481963e-07, |
| "loss": 0.068, |
| "step": 2804 |
| }, |
| { |
| "epoch": 0.9498211678059724, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.4068764712717897e-07, |
| "loss": 0.0566, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.9501597849780957, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.3882240242123811e-07, |
| "loss": 0.0567, |
| "step": 2806 |
| }, |
| { |
| "epoch": 0.9504984021502191, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.3696951870222018e-07, |
| "loss": 0.0671, |
| "step": 2807 |
| }, |
| { |
| "epoch": 0.9508370193223424, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.3512899829271954e-07, |
| "loss": 0.0617, |
| "step": 2808 |
| }, |
| { |
| "epoch": 0.9511756364944657, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.3330084349982509e-07, |
| "loss": 0.0632, |
| "step": 2809 |
| }, |
| { |
| "epoch": 0.951514253666589, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.3148505661513045e-07, |
| "loss": 0.0709, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.9518528708387124, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.2968163991472493e-07, |
| "loss": 0.0774, |
| "step": 2811 |
| }, |
| { |
| "epoch": 0.9521914880108358, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.2789059565919138e-07, |
| "loss": 0.0542, |
| "step": 2812 |
| }, |
| { |
| "epoch": 0.9525301051829591, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.261119260936039e-07, |
| "loss": 0.0662, |
| "step": 2813 |
| }, |
| { |
| "epoch": 0.9528687223550825, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.243456334475246e-07, |
| "loss": 0.0544, |
| "step": 2814 |
| }, |
| { |
| "epoch": 0.9532073395272058, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.225917199350013e-07, |
| "loss": 0.0614, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.9535459566993291, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.2085018775456648e-07, |
| "loss": 0.0535, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.9538845738714524, |
| "grad_norm": 0.375, |
| "learning_rate": 1.1912103908922945e-07, |
| "loss": 0.0468, |
| "step": 2817 |
| }, |
| { |
| "epoch": 0.9542231910435758, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.1740427610647643e-07, |
| "loss": 0.0739, |
| "step": 2818 |
| }, |
| { |
| "epoch": 0.9545618082156991, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.1569990095827378e-07, |
| "loss": 0.0539, |
| "step": 2819 |
| }, |
| { |
| "epoch": 0.9549004253878225, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.1400791578105253e-07, |
| "loss": 0.0921, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.9552390425599459, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.1232832269571725e-07, |
| "loss": 0.1924, |
| "step": 2821 |
| }, |
| { |
| "epoch": 0.9555776597320692, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.1066112380763939e-07, |
| "loss": 0.0674, |
| "step": 2822 |
| }, |
| { |
| "epoch": 0.9559162769041925, |
| "grad_norm": 0.5, |
| "learning_rate": 1.0900632120665166e-07, |
| "loss": 0.0646, |
| "step": 2823 |
| }, |
| { |
| "epoch": 0.9562548940763158, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.073639169670504e-07, |
| "loss": 0.0756, |
| "step": 2824 |
| }, |
| { |
| "epoch": 0.9565935112484392, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.0573391314758652e-07, |
| "loss": 0.0681, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.9569321284205625, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.0411631179147342e-07, |
| "loss": 0.0694, |
| "step": 2826 |
| }, |
| { |
| "epoch": 0.9572707455926859, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.0251111492637245e-07, |
| "loss": 0.0779, |
| "step": 2827 |
| }, |
| { |
| "epoch": 0.9576093627648092, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.0091832456439854e-07, |
| "loss": 0.0551, |
| "step": 2828 |
| }, |
| { |
| "epoch": 0.9579479799369326, |
| "grad_norm": 0.494140625, |
| "learning_rate": 9.933794270211461e-08, |
| "loss": 0.0679, |
| "step": 2829 |
| }, |
| { |
| "epoch": 0.9582865971090558, |
| "grad_norm": 0.51171875, |
| "learning_rate": 9.776997132052935e-08, |
| "loss": 0.0604, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.9586252142811792, |
| "grad_norm": 0.44140625, |
| "learning_rate": 9.621441238509611e-08, |
| "loss": 0.0617, |
| "step": 2831 |
| }, |
| { |
| "epoch": 0.9589638314533026, |
| "grad_norm": 0.53515625, |
| "learning_rate": 9.467126784570623e-08, |
| "loss": 0.0703, |
| "step": 2832 |
| }, |
| { |
| "epoch": 0.9593024486254259, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.314053963669245e-08, |
| "loss": 0.0632, |
| "step": 2833 |
| }, |
| { |
| "epoch": 0.9596410657975493, |
| "grad_norm": 0.431640625, |
| "learning_rate": 9.162222967682322e-08, |
| "loss": 0.0564, |
| "step": 2834 |
| }, |
| { |
| "epoch": 0.9599796829696726, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.011633986929947e-08, |
| "loss": 0.0722, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.960318300141796, |
| "grad_norm": 0.490234375, |
| "learning_rate": 8.862287210175347e-08, |
| "loss": 0.0665, |
| "step": 2836 |
| }, |
| { |
| "epoch": 0.9606569173139193, |
| "grad_norm": 0.494140625, |
| "learning_rate": 8.714182824624883e-08, |
| "loss": 0.0717, |
| "step": 2837 |
| }, |
| { |
| "epoch": 0.9609955344860426, |
| "grad_norm": 0.46875, |
| "learning_rate": 8.567321015927387e-08, |
| "loss": 0.0603, |
| "step": 2838 |
| }, |
| { |
| "epoch": 0.9613341516581659, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.421701968174156e-08, |
| "loss": 0.0772, |
| "step": 2839 |
| }, |
| { |
| "epoch": 0.9616727688302893, |
| "grad_norm": 0.5234375, |
| "learning_rate": 8.27732586389851e-08, |
| "loss": 0.0668, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.9620113860024126, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.134192884076131e-08, |
| "loss": 0.0734, |
| "step": 2841 |
| }, |
| { |
| "epoch": 0.962350003174536, |
| "grad_norm": 0.423828125, |
| "learning_rate": 7.992303208123941e-08, |
| "loss": 0.0504, |
| "step": 2842 |
| }, |
| { |
| "epoch": 0.9626886203466594, |
| "grad_norm": 0.51171875, |
| "learning_rate": 7.851657013901003e-08, |
| "loss": 0.0743, |
| "step": 2843 |
| }, |
| { |
| "epoch": 0.9630272375187827, |
| "grad_norm": 0.478515625, |
| "learning_rate": 7.712254477707071e-08, |
| "loss": 0.0614, |
| "step": 2844 |
| }, |
| { |
| "epoch": 0.963365854690906, |
| "grad_norm": 0.470703125, |
| "learning_rate": 7.574095774283363e-08, |
| "loss": 0.0666, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.9637044718630293, |
| "grad_norm": 0.48046875, |
| "learning_rate": 7.437181076811794e-08, |
| "loss": 0.06, |
| "step": 2846 |
| }, |
| { |
| "epoch": 0.9640430890351527, |
| "grad_norm": 0.427734375, |
| "learning_rate": 7.301510556914859e-08, |
| "loss": 0.0535, |
| "step": 2847 |
| }, |
| { |
| "epoch": 0.964381706207276, |
| "grad_norm": 0.70703125, |
| "learning_rate": 7.167084384655742e-08, |
| "loss": 0.0815, |
| "step": 2848 |
| }, |
| { |
| "epoch": 0.9647203233793994, |
| "grad_norm": 0.439453125, |
| "learning_rate": 7.033902728537546e-08, |
| "loss": 0.0635, |
| "step": 2849 |
| }, |
| { |
| "epoch": 0.9650589405515227, |
| "grad_norm": 0.443359375, |
| "learning_rate": 6.901965755503503e-08, |
| "loss": 0.0566, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9653975577236461, |
| "grad_norm": 0.51171875, |
| "learning_rate": 6.77127363093666e-08, |
| "loss": 0.0646, |
| "step": 2851 |
| }, |
| { |
| "epoch": 0.9657361748957694, |
| "grad_norm": 0.41796875, |
| "learning_rate": 6.641826518659633e-08, |
| "loss": 0.0575, |
| "step": 2852 |
| }, |
| { |
| "epoch": 0.9660747920678927, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.513624580934186e-08, |
| "loss": 0.0448, |
| "step": 2853 |
| }, |
| { |
| "epoch": 0.9664134092400161, |
| "grad_norm": 0.462890625, |
| "learning_rate": 6.386667978461658e-08, |
| "loss": 0.0509, |
| "step": 2854 |
| }, |
| { |
| "epoch": 0.9667520264121394, |
| "grad_norm": 0.55078125, |
| "learning_rate": 6.260956870382196e-08, |
| "loss": 0.0709, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.9670906435842628, |
| "grad_norm": 0.55078125, |
| "learning_rate": 6.136491414274415e-08, |
| "loss": 0.0599, |
| "step": 2856 |
| }, |
| { |
| "epoch": 0.9674292607563861, |
| "grad_norm": 0.478515625, |
| "learning_rate": 6.01327176615607e-08, |
| "loss": 0.0645, |
| "step": 2857 |
| }, |
| { |
| "epoch": 0.9677678779285095, |
| "grad_norm": 0.494140625, |
| "learning_rate": 5.891298080482943e-08, |
| "loss": 0.0707, |
| "step": 2858 |
| }, |
| { |
| "epoch": 0.9681064951006327, |
| "grad_norm": 0.71875, |
| "learning_rate": 5.770570510148954e-08, |
| "loss": 0.0616, |
| "step": 2859 |
| }, |
| { |
| "epoch": 0.9684451122727561, |
| "grad_norm": 0.59375, |
| "learning_rate": 5.65108920648616e-08, |
| "loss": 0.087, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.9687837294448794, |
| "grad_norm": 0.625, |
| "learning_rate": 5.5328543192643134e-08, |
| "loss": 0.0885, |
| "step": 2861 |
| }, |
| { |
| "epoch": 0.9691223466170028, |
| "grad_norm": 0.50390625, |
| "learning_rate": 5.4158659966909724e-08, |
| "loss": 0.0677, |
| "step": 2862 |
| }, |
| { |
| "epoch": 0.9694609637891262, |
| "grad_norm": 0.498046875, |
| "learning_rate": 5.300124385410943e-08, |
| "loss": 0.0629, |
| "step": 2863 |
| }, |
| { |
| "epoch": 0.9697995809612495, |
| "grad_norm": 0.5390625, |
| "learning_rate": 5.1856296305063945e-08, |
| "loss": 0.0759, |
| "step": 2864 |
| }, |
| { |
| "epoch": 0.9701381981333729, |
| "grad_norm": 0.451171875, |
| "learning_rate": 5.072381875496524e-08, |
| "loss": 0.065, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.9704768153054962, |
| "grad_norm": 0.447265625, |
| "learning_rate": 4.960381262337333e-08, |
| "loss": 0.0499, |
| "step": 2866 |
| }, |
| { |
| "epoch": 0.9708154324776195, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.84962793142163e-08, |
| "loss": 0.0571, |
| "step": 2867 |
| }, |
| { |
| "epoch": 0.9711540496497428, |
| "grad_norm": 0.5859375, |
| "learning_rate": 4.740122021578808e-08, |
| "loss": 0.0695, |
| "step": 2868 |
| }, |
| { |
| "epoch": 0.9714926668218662, |
| "grad_norm": 0.435546875, |
| "learning_rate": 4.6318636700743994e-08, |
| "loss": 0.0598, |
| "step": 2869 |
| }, |
| { |
| "epoch": 0.9718312839939895, |
| "grad_norm": 0.412109375, |
| "learning_rate": 4.5248530126102976e-08, |
| "loss": 0.0446, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.9721699011661129, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.419090183324315e-08, |
| "loss": 0.0603, |
| "step": 2871 |
| }, |
| { |
| "epoch": 0.9725085183382363, |
| "grad_norm": 0.5546875, |
| "learning_rate": 4.314575314790292e-08, |
| "loss": 0.074, |
| "step": 2872 |
| }, |
| { |
| "epoch": 0.9728471355103596, |
| "grad_norm": 0.53515625, |
| "learning_rate": 4.2113085380176556e-08, |
| "loss": 0.0664, |
| "step": 2873 |
| }, |
| { |
| "epoch": 0.9731857526824829, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.109289982451081e-08, |
| "loss": 0.0668, |
| "step": 2874 |
| }, |
| { |
| "epoch": 0.9735243698546062, |
| "grad_norm": 0.52734375, |
| "learning_rate": 4.008519775971054e-08, |
| "loss": 0.0689, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.9738629870267296, |
| "grad_norm": 0.376953125, |
| "learning_rate": 3.908998044892975e-08, |
| "loss": 0.0483, |
| "step": 2876 |
| }, |
| { |
| "epoch": 0.9742016041988529, |
| "grad_norm": 0.53125, |
| "learning_rate": 3.810724913967278e-08, |
| "loss": 0.0719, |
| "step": 2877 |
| }, |
| { |
| "epoch": 0.9745402213709763, |
| "grad_norm": 0.4609375, |
| "learning_rate": 3.713700506379536e-08, |
| "loss": 0.0687, |
| "step": 2878 |
| }, |
| { |
| "epoch": 0.9748788385430996, |
| "grad_norm": 0.58984375, |
| "learning_rate": 3.617924943749573e-08, |
| "loss": 0.0788, |
| "step": 2879 |
| }, |
| { |
| "epoch": 0.975217455715223, |
| "grad_norm": 0.609375, |
| "learning_rate": 3.5233983461322453e-08, |
| "loss": 0.0752, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.9755560728873462, |
| "grad_norm": 0.5234375, |
| "learning_rate": 3.430120832016659e-08, |
| "loss": 0.0585, |
| "step": 2881 |
| }, |
| { |
| "epoch": 0.9758946900594696, |
| "grad_norm": 0.58203125, |
| "learning_rate": 3.338092518326064e-08, |
| "loss": 0.0634, |
| "step": 2882 |
| }, |
| { |
| "epoch": 0.976233307231593, |
| "grad_norm": 0.474609375, |
| "learning_rate": 3.2473135204180715e-08, |
| "loss": 0.0662, |
| "step": 2883 |
| }, |
| { |
| "epoch": 0.9765719244037163, |
| "grad_norm": 0.58203125, |
| "learning_rate": 3.1577839520841034e-08, |
| "loss": 0.0625, |
| "step": 2884 |
| }, |
| { |
| "epoch": 0.9769105415758397, |
| "grad_norm": 0.5, |
| "learning_rate": 3.0695039255494995e-08, |
| "loss": 0.0629, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.977249158747963, |
| "grad_norm": 0.578125, |
| "learning_rate": 2.982473551473297e-08, |
| "loss": 0.0783, |
| "step": 2886 |
| }, |
| { |
| "epoch": 0.9775877759200864, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.8966929389481202e-08, |
| "loss": 0.0542, |
| "step": 2887 |
| }, |
| { |
| "epoch": 0.9779263930922096, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.8121621954998457e-08, |
| "loss": 0.0662, |
| "step": 2888 |
| }, |
| { |
| "epoch": 0.978265010264333, |
| "grad_norm": 0.5390625, |
| "learning_rate": 2.7288814270878262e-08, |
| "loss": 0.0559, |
| "step": 2889 |
| }, |
| { |
| "epoch": 0.9786036274364563, |
| "grad_norm": 0.80078125, |
| "learning_rate": 2.6468507381045562e-08, |
| "loss": 0.0583, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.9789422446085797, |
| "grad_norm": 0.7109375, |
| "learning_rate": 2.5660702313754505e-08, |
| "loss": 0.0789, |
| "step": 2891 |
| }, |
| { |
| "epoch": 0.9792808617807031, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.4865400081589552e-08, |
| "loss": 0.0491, |
| "step": 2892 |
| }, |
| { |
| "epoch": 0.9796194789528264, |
| "grad_norm": 0.375, |
| "learning_rate": 2.4082601681461038e-08, |
| "loss": 0.0448, |
| "step": 2893 |
| }, |
| { |
| "epoch": 0.9799580961249498, |
| "grad_norm": 0.5546875, |
| "learning_rate": 2.3312308094607382e-08, |
| "loss": 0.063, |
| "step": 2894 |
| }, |
| { |
| "epoch": 0.9802967132970731, |
| "grad_norm": 0.67578125, |
| "learning_rate": 2.2554520286592885e-08, |
| "loss": 0.0595, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.9806353304691964, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.180923920730216e-08, |
| "loss": 0.0575, |
| "step": 2896 |
| }, |
| { |
| "epoch": 0.9809739476413197, |
| "grad_norm": 0.546875, |
| "learning_rate": 2.10764657909468e-08, |
| "loss": 0.0802, |
| "step": 2897 |
| }, |
| { |
| "epoch": 0.9813125648134431, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.0356200956058725e-08, |
| "loss": 0.0639, |
| "step": 2898 |
| }, |
| { |
| "epoch": 0.9816511819855664, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.9648445605487954e-08, |
| "loss": 0.044, |
| "step": 2899 |
| }, |
| { |
| "epoch": 0.9819897991576898, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.8953200626408153e-08, |
| "loss": 0.0908, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9823284163298132, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.827046689030665e-08, |
| "loss": 0.0594, |
| "step": 2901 |
| }, |
| { |
| "epoch": 0.9826670335019365, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.76002452529922e-08, |
| "loss": 0.051, |
| "step": 2902 |
| }, |
| { |
| "epoch": 0.9830056506740598, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.6942536554587218e-08, |
| "loss": 0.0653, |
| "step": 2903 |
| }, |
| { |
| "epoch": 0.9833442678461831, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.6297341619528894e-08, |
| "loss": 0.0472, |
| "step": 2904 |
| }, |
| { |
| "epoch": 0.9836828850183065, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.566466125656918e-08, |
| "loss": 0.0653, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.9840215021904298, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.50444962587748e-08, |
| "loss": 0.079, |
| "step": 2906 |
| }, |
| { |
| "epoch": 0.9843601193625532, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.4436847403519471e-08, |
| "loss": 0.0816, |
| "step": 2907 |
| }, |
| { |
| "epoch": 0.9846987365346765, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.3841715452493908e-08, |
| "loss": 0.0587, |
| "step": 2908 |
| }, |
| { |
| "epoch": 0.9850373537067999, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.325910115169471e-08, |
| "loss": 0.0831, |
| "step": 2909 |
| }, |
| { |
| "epoch": 0.9853759708789231, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.2689005231429907e-08, |
| "loss": 0.0584, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.9857145880510465, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.2131428406313428e-08, |
| "loss": 0.0553, |
| "step": 2911 |
| }, |
| { |
| "epoch": 0.9860532052231699, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.1586371375268413e-08, |
| "loss": 0.0721, |
| "step": 2912 |
| }, |
| { |
| "epoch": 0.9863918223952932, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.105383482152389e-08, |
| "loss": 0.0446, |
| "step": 2913 |
| }, |
| { |
| "epoch": 0.9867304395674166, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.0533819412614776e-08, |
| "loss": 0.0634, |
| "step": 2914 |
| }, |
| { |
| "epoch": 0.9870690567395399, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.0026325800380766e-08, |
| "loss": 0.0712, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.9874076739116633, |
| "grad_norm": 0.443359375, |
| "learning_rate": 9.531354620964107e-09, |
| "loss": 0.0529, |
| "step": 2916 |
| }, |
| { |
| "epoch": 0.9877462910837865, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.048906494811826e-09, |
| "loss": 0.0603, |
| "step": 2917 |
| }, |
| { |
| "epoch": 0.9880849082559099, |
| "grad_norm": 0.486328125, |
| "learning_rate": 8.5789820266724e-09, |
| "loss": 0.0652, |
| "step": 2918 |
| }, |
| { |
| "epoch": 0.9884235254280332, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.121581805596857e-09, |
| "loss": 0.0653, |
| "step": 2919 |
| }, |
| { |
| "epoch": 0.9887621426001566, |
| "grad_norm": 0.423828125, |
| "learning_rate": 7.676706404935453e-09, |
| "loss": 0.0558, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.98910075977228, |
| "grad_norm": 0.490234375, |
| "learning_rate": 7.24435638233989e-09, |
| "loss": 0.066, |
| "step": 2921 |
| }, |
| { |
| "epoch": 0.9894393769444033, |
| "grad_norm": 0.4140625, |
| "learning_rate": 6.824532279761098e-09, |
| "loss": 0.0527, |
| "step": 2922 |
| }, |
| { |
| "epoch": 0.9897779941165267, |
| "grad_norm": 0.42578125, |
| "learning_rate": 6.417234623449231e-09, |
| "loss": 0.0532, |
| "step": 2923 |
| }, |
| { |
| "epoch": 0.99011661128865, |
| "grad_norm": 0.4375, |
| "learning_rate": 6.02246392395145e-09, |
| "loss": 0.0528, |
| "step": 2924 |
| }, |
| { |
| "epoch": 0.9904552284607733, |
| "grad_norm": 0.421875, |
| "learning_rate": 5.6402206761119185e-09, |
| "loss": 0.0547, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.9907938456328966, |
| "grad_norm": 0.671875, |
| "learning_rate": 5.27050535907403e-09, |
| "loss": 0.0707, |
| "step": 2926 |
| }, |
| { |
| "epoch": 0.99113246280502, |
| "grad_norm": 0.49609375, |
| "learning_rate": 4.91331843627485e-09, |
| "loss": 0.0661, |
| "step": 2927 |
| }, |
| { |
| "epoch": 0.9914710799771433, |
| "grad_norm": 0.4765625, |
| "learning_rate": 4.568660355448451e-09, |
| "loss": 0.0526, |
| "step": 2928 |
| }, |
| { |
| "epoch": 0.9918096971492667, |
| "grad_norm": 0.490234375, |
| "learning_rate": 4.2365315486248e-09, |
| "loss": 0.0686, |
| "step": 2929 |
| }, |
| { |
| "epoch": 0.9921483143213901, |
| "grad_norm": 0.41015625, |
| "learning_rate": 3.91693243212643e-09, |
| "loss": 0.0554, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.9924869314935134, |
| "grad_norm": 0.419921875, |
| "learning_rate": 3.609863406570657e-09, |
| "loss": 0.0495, |
| "step": 2931 |
| }, |
| { |
| "epoch": 0.9928255486656367, |
| "grad_norm": 0.5078125, |
| "learning_rate": 3.315324856869584e-09, |
| "loss": 0.0686, |
| "step": 2932 |
| }, |
| { |
| "epoch": 0.99316416583776, |
| "grad_norm": 0.423828125, |
| "learning_rate": 3.0333171522256568e-09, |
| "loss": 0.0586, |
| "step": 2933 |
| }, |
| { |
| "epoch": 0.9935027830098834, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.7638406461372167e-09, |
| "loss": 0.065, |
| "step": 2934 |
| }, |
| { |
| "epoch": 0.9938414001820067, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.5068956763918405e-09, |
| "loss": 0.0688, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.9941800173541301, |
| "grad_norm": 0.66796875, |
| "learning_rate": 2.262482565070778e-09, |
| "loss": 0.0933, |
| "step": 2936 |
| }, |
| { |
| "epoch": 0.9945186345262534, |
| "grad_norm": 0.83984375, |
| "learning_rate": 2.0306016185456243e-09, |
| "loss": 0.0699, |
| "step": 2937 |
| }, |
| { |
| "epoch": 0.9948572516983768, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.8112531274794287e-09, |
| "loss": 0.0583, |
| "step": 2938 |
| }, |
| { |
| "epoch": 0.9951958688705, |
| "grad_norm": 0.5, |
| "learning_rate": 1.6044373668255841e-09, |
| "loss": 0.0712, |
| "step": 2939 |
| }, |
| { |
| "epoch": 0.9955344860426234, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.4101545958267183e-09, |
| "loss": 0.0991, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.9958731032147468, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.228405058018023e-09, |
| "loss": 0.0565, |
| "step": 2941 |
| }, |
| { |
| "epoch": 0.9962117203868701, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.0591889812205934e-09, |
| "loss": 0.067, |
| "step": 2942 |
| }, |
| { |
| "epoch": 0.9965503375589935, |
| "grad_norm": 0.380859375, |
| "learning_rate": 9.025065775492003e-10, |
| "loss": 0.0483, |
| "step": 2943 |
| }, |
| { |
| "epoch": 0.9968889547311168, |
| "grad_norm": 0.431640625, |
| "learning_rate": 7.583580434022963e-10, |
| "loss": 0.064, |
| "step": 2944 |
| }, |
| { |
| "epoch": 0.9972275719032402, |
| "grad_norm": 0.494140625, |
| "learning_rate": 6.267435594720095e-10, |
| "loss": 0.0585, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.9975661890753634, |
| "grad_norm": 0.451171875, |
| "learning_rate": 5.076632907374812e-10, |
| "loss": 0.0568, |
| "step": 2946 |
| }, |
| { |
| "epoch": 0.9979048062474868, |
| "grad_norm": 0.486328125, |
| "learning_rate": 4.011173864637563e-10, |
| "loss": 0.0697, |
| "step": 2947 |
| }, |
| { |
| "epoch": 0.9982434234196101, |
| "grad_norm": 0.5234375, |
| "learning_rate": 3.0710598020844416e-10, |
| "loss": 0.0631, |
| "step": 2948 |
| }, |
| { |
| "epoch": 0.9985820405917335, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.2562918981394732e-10, |
| "loss": 0.0585, |
| "step": 2949 |
| }, |
| { |
| "epoch": 0.9989206577638569, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.5668711741079202e-10, |
| "loss": 0.0752, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.9992592749359802, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.0027984941873847e-10, |
| "loss": 0.0674, |
| "step": 2951 |
| }, |
| { |
| "epoch": 0.9995978921081036, |
| "grad_norm": 0.609375, |
| "learning_rate": 5.640745654345026e-11, |
| "loss": 0.0774, |
| "step": 2952 |
| }, |
| { |
| "epoch": 0.9999365092802269, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.5069993779824887e-11, |
| "loss": 0.0685, |
| "step": 2953 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.98046875, |
| "learning_rate": 6.267500408663196e-12, |
| "loss": 0.0576, |
| "step": 2954 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2954, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 296, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.726211900679385e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|