|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0017714791851196, |
|
"eval_steps": 500, |
|
"global_step": 1131, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 15.385598322874909, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.7627, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 15.42812332406859, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.794, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 13.76934599903778, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7894, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.9055471186770685, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7346, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.624179170790118, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7458, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 37.14544394485457, |
|
"learning_rate": 3e-06, |
|
"loss": 0.8249, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.413192499879626, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.7692, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.194156755277431, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.7724, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.569279640169995, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.7851, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 18.113903622060178, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7874, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.486914001687124, |
|
"learning_rate": 4.999997558722919e-06, |
|
"loss": 0.7553, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.280219682440894, |
|
"learning_rate": 4.999990234896445e-06, |
|
"loss": 0.7095, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.3413734180304155, |
|
"learning_rate": 4.99997802853488e-06, |
|
"loss": 0.6916, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.756315245615391, |
|
"learning_rate": 4.999960939662063e-06, |
|
"loss": 0.7407, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.090553047874293, |
|
"learning_rate": 4.999938968311371e-06, |
|
"loss": 0.7387, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.8370558847287075, |
|
"learning_rate": 4.9999121145257126e-06, |
|
"loss": 0.7051, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.986658012877664, |
|
"learning_rate": 4.999880378357535e-06, |
|
"loss": 0.6871, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.141716122521651, |
|
"learning_rate": 4.9998437598688195e-06, |
|
"loss": 0.6694, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.729722439630604, |
|
"learning_rate": 4.9998022591310815e-06, |
|
"loss": 0.716, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9486336901615497, |
|
"learning_rate": 4.999755876225375e-06, |
|
"loss": 0.6387, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8336874650575745, |
|
"learning_rate": 4.999704611242285e-06, |
|
"loss": 0.6542, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.6724374918638905, |
|
"learning_rate": 4.999648464281934e-06, |
|
"loss": 0.6617, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.941494127880678, |
|
"learning_rate": 4.999587435453979e-06, |
|
"loss": 0.6687, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.6261822206464744, |
|
"learning_rate": 4.999521524877608e-06, |
|
"loss": 0.6634, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8059947014946305, |
|
"learning_rate": 4.999450732681549e-06, |
|
"loss": 0.6901, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.131537494217822, |
|
"learning_rate": 4.999375059004058e-06, |
|
"loss": 0.6407, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.7893212245465837, |
|
"learning_rate": 4.99929450399293e-06, |
|
"loss": 0.6638, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.4411586751746, |
|
"learning_rate": 4.999209067805487e-06, |
|
"loss": 0.6196, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8807261299944082, |
|
"learning_rate": 4.999118750608591e-06, |
|
"loss": 0.6839, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.879993804839069, |
|
"learning_rate": 4.9990235525786326e-06, |
|
"loss": 0.6484, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.604360711268946, |
|
"learning_rate": 4.998923473901535e-06, |
|
"loss": 0.6313, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.403225544767816, |
|
"learning_rate": 4.9988185147727544e-06, |
|
"loss": 0.6209, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.669567772543462, |
|
"learning_rate": 4.998708675397278e-06, |
|
"loss": 0.6068, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.443946495915797, |
|
"learning_rate": 4.998593955989626e-06, |
|
"loss": 0.6731, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.2104680876118317, |
|
"learning_rate": 4.998474356773845e-06, |
|
"loss": 0.6243, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3602199264043957, |
|
"learning_rate": 4.9983498779835175e-06, |
|
"loss": 0.6649, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.4676911263240844, |
|
"learning_rate": 4.998220519861752e-06, |
|
"loss": 0.6174, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3419026099030282, |
|
"learning_rate": 4.998086282661188e-06, |
|
"loss": 0.6123, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.14900736954254, |
|
"learning_rate": 4.997947166643993e-06, |
|
"loss": 0.63, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.570907426799795, |
|
"learning_rate": 4.997803172081864e-06, |
|
"loss": 0.6249, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.516952735669967, |
|
"learning_rate": 4.997654299256026e-06, |
|
"loss": 0.6727, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1600457198543874, |
|
"learning_rate": 4.997500548457231e-06, |
|
"loss": 0.6719, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.2177572033934743, |
|
"learning_rate": 4.997341919985756e-06, |
|
"loss": 0.6148, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.397105205209689, |
|
"learning_rate": 4.997178414151409e-06, |
|
"loss": 0.6167, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1254940534972167, |
|
"learning_rate": 4.997010031273517e-06, |
|
"loss": 0.6446, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.2113023791837194, |
|
"learning_rate": 4.996836771680937e-06, |
|
"loss": 0.6304, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.386446316275664, |
|
"learning_rate": 4.99665863571205e-06, |
|
"loss": 0.6621, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1838934384314483, |
|
"learning_rate": 4.996475623714756e-06, |
|
"loss": 0.6214, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.2047933657923586, |
|
"learning_rate": 4.996287736046485e-06, |
|
"loss": 0.6478, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.208809457983808, |
|
"learning_rate": 4.996094973074183e-06, |
|
"loss": 0.6097, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.1318377198138267, |
|
"learning_rate": 4.995897335174322e-06, |
|
"loss": 0.622, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0673034122993537, |
|
"learning_rate": 4.995694822732893e-06, |
|
"loss": 0.6036, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.195105312645423, |
|
"learning_rate": 4.9954874361454055e-06, |
|
"loss": 0.6052, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.157855029176061, |
|
"learning_rate": 4.995275175816892e-06, |
|
"loss": 0.6455, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0500405783991043, |
|
"learning_rate": 4.9950580421619e-06, |
|
"loss": 0.6353, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.199629904296075, |
|
"learning_rate": 4.9948360356044965e-06, |
|
"loss": 0.6122, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.186847580161491, |
|
"learning_rate": 4.994609156578267e-06, |
|
"loss": 0.6073, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0207512037097835, |
|
"learning_rate": 4.994377405526308e-06, |
|
"loss": 0.61, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.3170193964114976, |
|
"learning_rate": 4.994140782901237e-06, |
|
"loss": 0.6322, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.014785890436746, |
|
"learning_rate": 4.9938992891651825e-06, |
|
"loss": 0.6205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.9538385063221935, |
|
"learning_rate": 4.9936529247897854e-06, |
|
"loss": 0.5992, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.084943826856202, |
|
"learning_rate": 4.993401690256203e-06, |
|
"loss": 0.6148, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.135158856581583, |
|
"learning_rate": 4.9931455860551e-06, |
|
"loss": 0.5937, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.982621418518698, |
|
"learning_rate": 4.992884612686655e-06, |
|
"loss": 0.6091, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.1030931953494956, |
|
"learning_rate": 4.992618770660553e-06, |
|
"loss": 0.6034, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.1994634556563994, |
|
"learning_rate": 4.992348060495989e-06, |
|
"loss": 0.5846, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.410691403277427, |
|
"learning_rate": 4.992072482721669e-06, |
|
"loss": 0.6294, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.9720494401999067, |
|
"learning_rate": 4.991792037875799e-06, |
|
"loss": 0.591, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.147504025949435, |
|
"learning_rate": 4.991506726506094e-06, |
|
"loss": 0.5689, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.1837702519904223, |
|
"learning_rate": 4.991216549169776e-06, |
|
"loss": 0.6422, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.0883865330274958, |
|
"learning_rate": 4.9909215064335655e-06, |
|
"loss": 0.6076, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.20727863923846, |
|
"learning_rate": 4.990621598873687e-06, |
|
"loss": 0.5974, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.0735330806418464, |
|
"learning_rate": 4.990316827075868e-06, |
|
"loss": 0.6809, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.0203203347538774, |
|
"learning_rate": 4.990007191635334e-06, |
|
"loss": 0.6107, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.234889365362174, |
|
"learning_rate": 4.989692693156809e-06, |
|
"loss": 0.6218, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.9902503343433904, |
|
"learning_rate": 4.989373332254516e-06, |
|
"loss": 0.6257, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1041971507252466, |
|
"learning_rate": 4.989049109552173e-06, |
|
"loss": 0.5888, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1151685783302123, |
|
"learning_rate": 4.988720025682995e-06, |
|
"loss": 0.6333, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.9223819269893592, |
|
"learning_rate": 4.988386081289689e-06, |
|
"loss": 0.6442, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.139676463756265, |
|
"learning_rate": 4.988047277024456e-06, |
|
"loss": 0.5966, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1665820212993068, |
|
"learning_rate": 4.987703613548988e-06, |
|
"loss": 0.603, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.931456975470041, |
|
"learning_rate": 4.987355091534467e-06, |
|
"loss": 0.6122, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.134995092135601, |
|
"learning_rate": 4.987001711661566e-06, |
|
"loss": 0.6213, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.0173352657570818, |
|
"learning_rate": 4.98664347462044e-06, |
|
"loss": 0.5966, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0816939924571183, |
|
"learning_rate": 4.986280381110737e-06, |
|
"loss": 0.5575, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0072477771163357, |
|
"learning_rate": 4.985912431841584e-06, |
|
"loss": 0.6225, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.1895945454214507, |
|
"learning_rate": 4.985539627531596e-06, |
|
"loss": 0.6169, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.84518214074801, |
|
"learning_rate": 4.985161968908866e-06, |
|
"loss": 0.6317, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.194209857089938, |
|
"learning_rate": 4.984779456710971e-06, |
|
"loss": 0.6205, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.1604595364123083, |
|
"learning_rate": 4.9843920916849645e-06, |
|
"loss": 0.6176, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.039087518829079, |
|
"learning_rate": 4.9839998745873795e-06, |
|
"loss": 0.5842, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0148570016863334, |
|
"learning_rate": 4.983602806184225e-06, |
|
"loss": 0.5936, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.073137159272384, |
|
"learning_rate": 4.983200887250982e-06, |
|
"loss": 0.6317, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.045469602089007, |
|
"learning_rate": 4.9827941185726095e-06, |
|
"loss": 0.5338, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.1201743116757417, |
|
"learning_rate": 4.982382500943533e-06, |
|
"loss": 0.6133, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0637214917996363, |
|
"learning_rate": 4.981966035167654e-06, |
|
"loss": 0.6483, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.155574452675582, |
|
"learning_rate": 4.981544722058336e-06, |
|
"loss": 0.6001, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.9347601392775928, |
|
"learning_rate": 4.981118562438414e-06, |
|
"loss": 0.5954, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.3054537863874756, |
|
"learning_rate": 4.980687557140187e-06, |
|
"loss": 0.6338, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0421104909837338, |
|
"learning_rate": 4.980251707005417e-06, |
|
"loss": 0.6166, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.023167301994367, |
|
"learning_rate": 4.979811012885329e-06, |
|
"loss": 0.5682, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0583654213007967, |
|
"learning_rate": 4.979365475640609e-06, |
|
"loss": 0.5759, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.008917223929121, |
|
"learning_rate": 4.9789150961414e-06, |
|
"loss": 0.6324, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.1111479338304306, |
|
"learning_rate": 4.978459875267303e-06, |
|
"loss": 0.5821, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.400366962461983, |
|
"learning_rate": 4.977999813907375e-06, |
|
"loss": 0.5699, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.090668061316384, |
|
"learning_rate": 4.977534912960124e-06, |
|
"loss": 0.5754, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.2103419288491466, |
|
"learning_rate": 4.977065173333515e-06, |
|
"loss": 0.6005, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1332380447628294, |
|
"learning_rate": 4.9765905959449565e-06, |
|
"loss": 0.6178, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1372224949542464, |
|
"learning_rate": 4.976111181721309e-06, |
|
"loss": 0.6021, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.636052326949506, |
|
"learning_rate": 4.97562693159888e-06, |
|
"loss": 0.6418, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1234423477493443, |
|
"learning_rate": 4.975137846523419e-06, |
|
"loss": 0.6231, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2817790529425315, |
|
"learning_rate": 4.974643927450121e-06, |
|
"loss": 0.5681, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2605060344304713, |
|
"learning_rate": 4.9741451753436205e-06, |
|
"loss": 0.5803, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.0355236974665876, |
|
"learning_rate": 4.973641591177991e-06, |
|
"loss": 0.6003, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.4343221170301415, |
|
"learning_rate": 4.973133175936743e-06, |
|
"loss": 0.5882, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2135760843199734, |
|
"learning_rate": 4.972619930612822e-06, |
|
"loss": 0.5886, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.161909448676307, |
|
"learning_rate": 4.972101856208609e-06, |
|
"loss": 0.5792, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.0871148781401927, |
|
"learning_rate": 4.9715789537359126e-06, |
|
"loss": 0.6383, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.1159018206478626, |
|
"learning_rate": 4.971051224215973e-06, |
|
"loss": 0.5865, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.2036428070670375, |
|
"learning_rate": 4.970518668679459e-06, |
|
"loss": 0.5905, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.22262007661876, |
|
"learning_rate": 4.969981288166461e-06, |
|
"loss": 0.5951, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.0713458839382786, |
|
"learning_rate": 4.969439083726496e-06, |
|
"loss": 0.6011, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.0686060725186897, |
|
"learning_rate": 4.9688920564185e-06, |
|
"loss": 0.6038, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.1825376161159964, |
|
"learning_rate": 4.968340207310832e-06, |
|
"loss": 0.6098, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.142436541976576, |
|
"learning_rate": 4.967783537481262e-06, |
|
"loss": 0.6119, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.330044622755397, |
|
"learning_rate": 4.967222048016979e-06, |
|
"loss": 0.6057, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.109116942854107, |
|
"learning_rate": 4.966655740014585e-06, |
|
"loss": 0.5958, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.174219068914296, |
|
"learning_rate": 4.9660846145800914e-06, |
|
"loss": 0.6276, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.135736248304593, |
|
"learning_rate": 4.965508672828918e-06, |
|
"loss": 0.6309, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.2339234058672885, |
|
"learning_rate": 4.964927915885893e-06, |
|
"loss": 0.5879, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.0960660335616224, |
|
"learning_rate": 4.9643423448852455e-06, |
|
"loss": 0.6218, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.9468729925472703, |
|
"learning_rate": 4.963751960970609e-06, |
|
"loss": 0.5998, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1623168252289915, |
|
"learning_rate": 4.9631567652950164e-06, |
|
"loss": 0.6885, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.084420579583794, |
|
"learning_rate": 4.962556759020898e-06, |
|
"loss": 0.5758, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1082890389844713, |
|
"learning_rate": 4.961951943320078e-06, |
|
"loss": 0.6116, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.006123424806457, |
|
"learning_rate": 4.9613423193737754e-06, |
|
"loss": 0.5708, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.309431970929405, |
|
"learning_rate": 4.960727888372599e-06, |
|
"loss": 0.621, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.226488524758773, |
|
"learning_rate": 4.9601086515165456e-06, |
|
"loss": 0.5896, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1242070778655253, |
|
"learning_rate": 4.959484610014997e-06, |
|
"loss": 0.624, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.2147491445730516, |
|
"learning_rate": 4.958855765086722e-06, |
|
"loss": 0.6064, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1818004600393, |
|
"learning_rate": 4.958222117959868e-06, |
|
"loss": 0.6252, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.1094535889409696, |
|
"learning_rate": 4.95758366987196e-06, |
|
"loss": 0.5779, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.2043056809252577, |
|
"learning_rate": 4.9569404220699025e-06, |
|
"loss": 0.6156, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.158056342799238, |
|
"learning_rate": 4.956292375809971e-06, |
|
"loss": 0.5662, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.987581635345228, |
|
"learning_rate": 4.955639532357815e-06, |
|
"loss": 0.6148, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.266145451051948, |
|
"learning_rate": 4.954981892988451e-06, |
|
"loss": 0.5867, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.071082600205798, |
|
"learning_rate": 4.954319458986264e-06, |
|
"loss": 0.5976, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.1615342548575374, |
|
"learning_rate": 4.953652231645002e-06, |
|
"loss": 0.5643, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.145126231371731, |
|
"learning_rate": 4.952980212267773e-06, |
|
"loss": 0.5592, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9161750244434461, |
|
"learning_rate": 4.952303402167047e-06, |
|
"loss": 0.5547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.234370958372018, |
|
"learning_rate": 4.9516218026646475e-06, |
|
"loss": 0.578, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.149553338429868, |
|
"learning_rate": 4.950935415091753e-06, |
|
"loss": 0.5952, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1021801657048016, |
|
"learning_rate": 4.950244240788895e-06, |
|
"loss": 0.573, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.488711367210497, |
|
"learning_rate": 4.949548281105951e-06, |
|
"loss": 0.5776, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.0302393290147167, |
|
"learning_rate": 4.948847537402145e-06, |
|
"loss": 0.5685, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1563261797248043, |
|
"learning_rate": 4.948142011046044e-06, |
|
"loss": 0.6185, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1308303224609997, |
|
"learning_rate": 4.947431703415558e-06, |
|
"loss": 0.6229, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.0988414912992273, |
|
"learning_rate": 4.946716615897932e-06, |
|
"loss": 0.6167, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.3558302474583095, |
|
"learning_rate": 4.9459967498897485e-06, |
|
"loss": 0.5903, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1505555405055223, |
|
"learning_rate": 4.945272106796919e-06, |
|
"loss": 0.5709, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.0604140956574635, |
|
"learning_rate": 4.94454268803469e-06, |
|
"loss": 0.635, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.3699836246614696, |
|
"learning_rate": 4.943808495027631e-06, |
|
"loss": 0.581, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.9809907136859368, |
|
"learning_rate": 4.9430695292096365e-06, |
|
"loss": 0.5703, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.213101907296851, |
|
"learning_rate": 4.942325792023922e-06, |
|
"loss": 0.5915, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.3778783149383944, |
|
"learning_rate": 4.941577284923025e-06, |
|
"loss": 0.537, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.9283694807512721, |
|
"learning_rate": 4.9408240093687934e-06, |
|
"loss": 0.579, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.083087334039033, |
|
"learning_rate": 4.940065966832392e-06, |
|
"loss": 0.5612, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.314684793845775, |
|
"learning_rate": 4.939303158794294e-06, |
|
"loss": 0.6001, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.131977461745334, |
|
"learning_rate": 4.93853558674428e-06, |
|
"loss": 0.5809, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.1291924932946755, |
|
"learning_rate": 4.937763252181434e-06, |
|
"loss": 0.6216, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.9366549866764742, |
|
"learning_rate": 4.936986156614144e-06, |
|
"loss": 0.5888, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.231889540095555, |
|
"learning_rate": 4.9362043015600934e-06, |
|
"loss": 0.6437, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.0696023557568233, |
|
"learning_rate": 4.9354176885462626e-06, |
|
"loss": 0.5951, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.10974806039572, |
|
"learning_rate": 4.934626319108923e-06, |
|
"loss": 0.5817, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.0633698321381946, |
|
"learning_rate": 4.933830194793636e-06, |
|
"loss": 0.5692, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.0163693967733423, |
|
"learning_rate": 4.933029317155251e-06, |
|
"loss": 0.5322, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.1118176135699813, |
|
"learning_rate": 4.932223687757899e-06, |
|
"loss": 0.5809, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.181431947183138, |
|
"learning_rate": 4.9314133081749906e-06, |
|
"loss": 0.5444, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.2055197469621386, |
|
"learning_rate": 4.930598179989215e-06, |
|
"loss": 0.6063, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.1103699877035638, |
|
"learning_rate": 4.929778304792537e-06, |
|
"loss": 0.5908, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.01692648335164, |
|
"learning_rate": 4.928953684186189e-06, |
|
"loss": 0.5729, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.990744003423107, |
|
"learning_rate": 4.928124319780673e-06, |
|
"loss": 0.5935, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9898687560952446, |
|
"learning_rate": 4.9272902131957555e-06, |
|
"loss": 0.6008, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9499116832570582, |
|
"learning_rate": 4.926451366060465e-06, |
|
"loss": 0.5731, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8933258467243923, |
|
"learning_rate": 4.925607780013088e-06, |
|
"loss": 0.5822, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9711936623837691, |
|
"learning_rate": 4.924759456701167e-06, |
|
"loss": 0.5433, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9981254191144715, |
|
"learning_rate": 4.923906397781495e-06, |
|
"loss": 0.5603, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9489584101682442, |
|
"learning_rate": 4.923048604920115e-06, |
|
"loss": 0.592, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.14587896098926, |
|
"learning_rate": 4.922186079792315e-06, |
|
"loss": 0.5861, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.093505234897306, |
|
"learning_rate": 4.921318824082625e-06, |
|
"loss": 0.5756, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9726924068956073, |
|
"learning_rate": 4.920446839484814e-06, |
|
"loss": 0.5954, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.0009011296035886, |
|
"learning_rate": 4.919570127701888e-06, |
|
"loss": 0.5185, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.0801246171281993, |
|
"learning_rate": 4.9186886904460826e-06, |
|
"loss": 0.5788, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.7712602468155096, |
|
"learning_rate": 4.917802529438865e-06, |
|
"loss": 0.6637, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9721040372060654, |
|
"learning_rate": 4.916911646410926e-06, |
|
"loss": 0.5926, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.1199089061376855, |
|
"learning_rate": 4.91601604310218e-06, |
|
"loss": 0.5854, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9518281461372036, |
|
"learning_rate": 4.915115721261759e-06, |
|
"loss": 0.5456, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.1537515435847734, |
|
"learning_rate": 4.9142106826480114e-06, |
|
"loss": 0.6152, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.3461320565666344, |
|
"learning_rate": 4.913300929028498e-06, |
|
"loss": 0.617, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9789785575462193, |
|
"learning_rate": 4.912386462179987e-06, |
|
"loss": 0.5845, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.0705337307209253, |
|
"learning_rate": 4.9114672838884515e-06, |
|
"loss": 0.6062, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9972918925367322, |
|
"learning_rate": 4.910543395949066e-06, |
|
"loss": 0.6318, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.03173534028091, |
|
"learning_rate": 4.9096148001662055e-06, |
|
"loss": 0.64, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.0861416304602356, |
|
"learning_rate": 4.908681498353436e-06, |
|
"loss": 0.5859, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.932510611788884, |
|
"learning_rate": 4.907743492333517e-06, |
|
"loss": 0.5483, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9618471764126828, |
|
"learning_rate": 4.906800783938395e-06, |
|
"loss": 0.5767, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.3557796360921786, |
|
"learning_rate": 4.905853375009198e-06, |
|
"loss": 0.5934, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.0993364379712784, |
|
"learning_rate": 4.9049012673962385e-06, |
|
"loss": 0.5879, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.2015612636555155, |
|
"learning_rate": 4.903944462959001e-06, |
|
"loss": 0.5598, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0374544745406062, |
|
"learning_rate": 4.902982963566147e-06, |
|
"loss": 0.577, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.194866218807, |
|
"learning_rate": 4.902016771095506e-06, |
|
"loss": 0.5848, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.2545375351308614, |
|
"learning_rate": 4.901045887434072e-06, |
|
"loss": 0.5846, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.017012770131601, |
|
"learning_rate": 4.900070314478001e-06, |
|
"loss": 0.5651, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.150900326654639, |
|
"learning_rate": 4.899090054132609e-06, |
|
"loss": 0.568, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0404886979870454, |
|
"learning_rate": 4.898105108312366e-06, |
|
"loss": 0.5277, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.036614254190257, |
|
"learning_rate": 4.897115478940892e-06, |
|
"loss": 0.5754, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.041133008809928, |
|
"learning_rate": 4.896121167950954e-06, |
|
"loss": 0.6294, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0029503409054885, |
|
"learning_rate": 4.895122177284465e-06, |
|
"loss": 0.5531, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0303439698174754, |
|
"learning_rate": 4.894118508892474e-06, |
|
"loss": 0.6008, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.899982778272908, |
|
"learning_rate": 4.893110164735167e-06, |
|
"loss": 0.6076, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.170640326694132, |
|
"learning_rate": 4.892097146781862e-06, |
|
"loss": 0.5806, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.961802557992624, |
|
"learning_rate": 4.8910794570110055e-06, |
|
"loss": 0.5456, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.1149182672715807, |
|
"learning_rate": 4.890057097410167e-06, |
|
"loss": 0.5683, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.9988574008443096, |
|
"learning_rate": 4.889030069976038e-06, |
|
"loss": 0.5603, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.137840782586502, |
|
"learning_rate": 4.887998376714424e-06, |
|
"loss": 0.5713, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.2956357234771634, |
|
"learning_rate": 4.886962019640244e-06, |
|
"loss": 0.5635, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.2175517801056346, |
|
"learning_rate": 4.885921000777528e-06, |
|
"loss": 0.631, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.0861966792656546, |
|
"learning_rate": 4.884875322159407e-06, |
|
"loss": 0.5521, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.170862650134145, |
|
"learning_rate": 4.883824985828114e-06, |
|
"loss": 0.5953, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.016871028914906, |
|
"learning_rate": 4.882769993834978e-06, |
|
"loss": 0.5745, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.4069309610367107, |
|
"learning_rate": 4.8817103482404236e-06, |
|
"loss": 0.5752, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.9834780557891722, |
|
"learning_rate": 4.880646051113959e-06, |
|
"loss": 0.5619, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1221686040256005, |
|
"learning_rate": 4.87957710453418e-06, |
|
"loss": 0.561, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1497751964139002, |
|
"learning_rate": 4.878503510588764e-06, |
|
"loss": 0.5754, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.8535318318419167, |
|
"learning_rate": 4.877425271374462e-06, |
|
"loss": 0.5551, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1537345489224404, |
|
"learning_rate": 4.876342388997099e-06, |
|
"loss": 0.544, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.9695512744073471, |
|
"learning_rate": 4.875254865571567e-06, |
|
"loss": 0.6003, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.2550853928957193, |
|
"learning_rate": 4.874162703221823e-06, |
|
"loss": 0.5968, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0658630166795917, |
|
"learning_rate": 4.873065904080884e-06, |
|
"loss": 0.5658, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0821280326495524, |
|
"learning_rate": 4.871964470290823e-06, |
|
"loss": 0.5711, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.9833074137024158, |
|
"learning_rate": 4.8708584040027636e-06, |
|
"loss": 0.5899, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0288963441502195, |
|
"learning_rate": 4.869747707376877e-06, |
|
"loss": 0.5601, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0970435875726463, |
|
"learning_rate": 4.868632382582378e-06, |
|
"loss": 0.6381, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.1303280408644194, |
|
"learning_rate": 4.86751243179752e-06, |
|
"loss": 0.5495, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0851781018580584, |
|
"learning_rate": 4.866387857209591e-06, |
|
"loss": 0.5901, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.8310760160854438, |
|
"learning_rate": 4.86525866101491e-06, |
|
"loss": 0.5513, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.199726167537497, |
|
"learning_rate": 4.8641248454188205e-06, |
|
"loss": 0.5873, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9776691221978735, |
|
"learning_rate": 4.862986412635691e-06, |
|
"loss": 0.6143, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0663231641830873, |
|
"learning_rate": 4.8618433648889034e-06, |
|
"loss": 0.5937, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.170520506577784, |
|
"learning_rate": 4.860695704410856e-06, |
|
"loss": 0.5374, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9685756224067419, |
|
"learning_rate": 4.8595434334429535e-06, |
|
"loss": 0.5139, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9668205539999677, |
|
"learning_rate": 4.8583865542356065e-06, |
|
"loss": 0.5459, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0793578279258704, |
|
"learning_rate": 4.857225069048226e-06, |
|
"loss": 0.593, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9265474492849337, |
|
"learning_rate": 4.8560589801492165e-06, |
|
"loss": 0.5559, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.8555278122830696, |
|
"learning_rate": 4.854888289815976e-06, |
|
"loss": 0.5949, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.063838630196542, |
|
"learning_rate": 4.853713000334887e-06, |
|
"loss": 0.5712, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.168668910730517, |
|
"learning_rate": 4.852533114001316e-06, |
|
"loss": 0.5475, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.064042820960706, |
|
"learning_rate": 4.8513486331196055e-06, |
|
"loss": 0.5616, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.026751060346143, |
|
"learning_rate": 4.850159560003074e-06, |
|
"loss": 0.5997, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1228129299875254, |
|
"learning_rate": 4.848965896974006e-06, |
|
"loss": 0.5622, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.9418510365881214, |
|
"learning_rate": 4.847767646363652e-06, |
|
"loss": 0.5741, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.070611833895483, |
|
"learning_rate": 4.846564810512221e-06, |
|
"loss": 0.5729, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8833621440375596, |
|
"learning_rate": 4.845357391768877e-06, |
|
"loss": 0.5503, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1022924907055387, |
|
"learning_rate": 4.844145392491735e-06, |
|
"loss": 0.6204, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.024625007813473, |
|
"learning_rate": 4.842928815047856e-06, |
|
"loss": 0.5776, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.9123739071371275, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"loss": 0.5417, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.062879186086598, |
|
"learning_rate": 4.8404819351728336e-06, |
|
"loss": 0.5387, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.9944627549250884, |
|
"learning_rate": 4.8392516375204986e-06, |
|
"loss": 0.5731, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.9859912626846585, |
|
"learning_rate": 4.838016771259037e-06, |
|
"loss": 0.5969, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.043069520519082, |
|
"learning_rate": 4.836777338800168e-06, |
|
"loss": 0.6217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.913212451622778, |
|
"learning_rate": 4.835533342564531e-06, |
|
"loss": 0.5527, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.978858281238778, |
|
"learning_rate": 4.834284784981678e-06, |
|
"loss": 0.5997, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.004628826916504, |
|
"learning_rate": 4.833031668490067e-06, |
|
"loss": 0.551, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.164370107566024, |
|
"learning_rate": 4.8317739955370645e-06, |
|
"loss": 0.5537, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.891772326146366, |
|
"learning_rate": 4.83051176857893e-06, |
|
"loss": 0.6075, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.0553128913886645, |
|
"learning_rate": 4.8292449900808216e-06, |
|
"loss": 0.5854, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.009000622167072, |
|
"learning_rate": 4.827973662516786e-06, |
|
"loss": 0.5503, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9385043396652537, |
|
"learning_rate": 4.826697788369752e-06, |
|
"loss": 0.5704, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.3263786060073826, |
|
"learning_rate": 4.8254173701315295e-06, |
|
"loss": 0.5604, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9251504140774536, |
|
"learning_rate": 4.8241324103028055e-06, |
|
"loss": 0.5647, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9714117964729747, |
|
"learning_rate": 4.822842911393131e-06, |
|
"loss": 0.604, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.034372279161665, |
|
"learning_rate": 4.821548875920927e-06, |
|
"loss": 0.5803, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9849114644945505, |
|
"learning_rate": 4.8202503064134725e-06, |
|
"loss": 0.5854, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.3435998455971343, |
|
"learning_rate": 4.818947205406902e-06, |
|
"loss": 0.4988, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.0672779732760924, |
|
"learning_rate": 4.8176395754462e-06, |
|
"loss": 0.5734, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.1206384205127544, |
|
"learning_rate": 4.816327419085197e-06, |
|
"loss": 0.563, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.1105254841893095, |
|
"learning_rate": 4.815010738886561e-06, |
|
"loss": 0.5765, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.072546090747287, |
|
"learning_rate": 4.813689537421798e-06, |
|
"loss": 0.6003, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.1131138426394442, |
|
"learning_rate": 4.812363817271243e-06, |
|
"loss": 0.6097, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.9218545344238502, |
|
"learning_rate": 4.811033581024056e-06, |
|
"loss": 0.6272, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.235420687671868, |
|
"learning_rate": 4.809698831278217e-06, |
|
"loss": 0.5519, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.8915062282224397, |
|
"learning_rate": 4.808359570640522e-06, |
|
"loss": 0.5832, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.9185231023206675, |
|
"learning_rate": 4.8070158017265755e-06, |
|
"loss": 0.5854, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.086526046887808, |
|
"learning_rate": 4.805667527160788e-06, |
|
"loss": 0.5314, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.9995370937944454, |
|
"learning_rate": 4.804314749576368e-06, |
|
"loss": 0.5749, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.099313489806141, |
|
"learning_rate": 4.802957471615319e-06, |
|
"loss": 0.5173, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.067736275086448, |
|
"learning_rate": 4.8015956959284346e-06, |
|
"loss": 0.5434, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.005525416579935, |
|
"learning_rate": 4.800229425175294e-06, |
|
"loss": 0.5589, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.172708847484724, |
|
"learning_rate": 4.7988586620242515e-06, |
|
"loss": 0.5919, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0010542748493823, |
|
"learning_rate": 4.797483409152438e-06, |
|
"loss": 0.5803, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.1169505971764506, |
|
"learning_rate": 4.7961036692457516e-06, |
|
"loss": 0.5763, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.202849419501746, |
|
"learning_rate": 4.794719444998856e-06, |
|
"loss": 0.5691, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9765013761990564, |
|
"learning_rate": 4.793330739115169e-06, |
|
"loss": 0.5657, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0404392238791136, |
|
"learning_rate": 4.791937554306863e-06, |
|
"loss": 0.5648, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0298920886210516, |
|
"learning_rate": 4.790539893294861e-06, |
|
"loss": 0.5353, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.03157486915788, |
|
"learning_rate": 4.789137758808823e-06, |
|
"loss": 0.5716, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.060346338513047, |
|
"learning_rate": 4.787731153587149e-06, |
|
"loss": 0.5502, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9286831590091769, |
|
"learning_rate": 4.786320080376968e-06, |
|
"loss": 0.5646, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.042346254905274, |
|
"learning_rate": 4.7849045419341376e-06, |
|
"loss": 0.6085, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0758243469708293, |
|
"learning_rate": 4.7834845410232356e-06, |
|
"loss": 0.5452, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.0454965773706553, |
|
"learning_rate": 4.782060080417553e-06, |
|
"loss": 0.514, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.073931876222572, |
|
"learning_rate": 4.780631162899094e-06, |
|
"loss": 0.5884, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9699688248650635, |
|
"learning_rate": 4.7791977912585645e-06, |
|
"loss": 0.529, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9886162974888701, |
|
"learning_rate": 4.7777599682953696e-06, |
|
"loss": 0.5796, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9579685029739566, |
|
"learning_rate": 4.7763176968176106e-06, |
|
"loss": 0.5553, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.2181861411036086, |
|
"learning_rate": 4.7748709796420735e-06, |
|
"loss": 0.5806, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.0345738930041777, |
|
"learning_rate": 4.773419819594228e-06, |
|
"loss": 0.6059, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.0710385535524902, |
|
"learning_rate": 4.7719642195082224e-06, |
|
"loss": 0.5539, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.1239710444371442, |
|
"learning_rate": 4.770504182226875e-06, |
|
"loss": 0.5655, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9564631444382952, |
|
"learning_rate": 4.769039710601669e-06, |
|
"loss": 0.5914, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9969926160116234, |
|
"learning_rate": 4.767570807492752e-06, |
|
"loss": 0.55, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9650736880864492, |
|
"learning_rate": 4.766097475768919e-06, |
|
"loss": 0.5804, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.1946368157969194, |
|
"learning_rate": 4.7646197183076236e-06, |
|
"loss": 0.5631, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9834181085585831, |
|
"learning_rate": 4.763137537994955e-06, |
|
"loss": 0.5779, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.1081651164417057, |
|
"learning_rate": 4.7616509377256445e-06, |
|
"loss": 0.5375, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9972027344990544, |
|
"learning_rate": 4.760159920403055e-06, |
|
"loss": 0.5608, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9554967826543683, |
|
"learning_rate": 4.758664488939174e-06, |
|
"loss": 0.5613, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.211716512822424, |
|
"learning_rate": 4.757164646254614e-06, |
|
"loss": 0.5863, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9203184200502181, |
|
"learning_rate": 4.755660395278598e-06, |
|
"loss": 0.5275, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.0355308159742505, |
|
"learning_rate": 4.7541517389489626e-06, |
|
"loss": 0.5304, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.005680103405306, |
|
"learning_rate": 4.752638680212145e-06, |
|
"loss": 0.5782, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9930094995522492, |
|
"learning_rate": 4.751121222023183e-06, |
|
"loss": 0.5197, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.130907347619711, |
|
"learning_rate": 4.749599367345703e-06, |
|
"loss": 0.5453, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0380649677356715, |
|
"learning_rate": 4.748073119151923e-06, |
|
"loss": 0.5394, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.02655053696048, |
|
"learning_rate": 4.7465424804226366e-06, |
|
"loss": 0.5359, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.108255877778432, |
|
"learning_rate": 4.745007454147215e-06, |
|
"loss": 0.5262, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.8422966312136684, |
|
"learning_rate": 4.7434680433235986e-06, |
|
"loss": 0.529, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.1387816386921004, |
|
"learning_rate": 4.741924250958289e-06, |
|
"loss": 0.5599, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.2063774820548794, |
|
"learning_rate": 4.740376080066346e-06, |
|
"loss": 0.6014, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.917696303327652, |
|
"learning_rate": 4.738823533671383e-06, |
|
"loss": 0.615, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0283765999277916, |
|
"learning_rate": 4.737266614805554e-06, |
|
"loss": 0.5802, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0340264609590437, |
|
"learning_rate": 4.7357053265095575e-06, |
|
"loss": 0.5331, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.102037194450825, |
|
"learning_rate": 4.734139671832622e-06, |
|
"loss": 0.5534, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.4389875670618113, |
|
"learning_rate": 4.732569653832505e-06, |
|
"loss": 0.5637, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.1143521053252012, |
|
"learning_rate": 4.730995275575486e-06, |
|
"loss": 0.6539, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.6240136232872064, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"loss": 0.5515, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.037602072097695, |
|
"learning_rate": 4.727833450598433e-06, |
|
"loss": 0.5609, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.10711733636797, |
|
"learning_rate": 4.72624601005351e-06, |
|
"loss": 0.5719, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.277613433738313, |
|
"learning_rate": 4.724654221601899e-06, |
|
"loss": 0.5815, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.0082624113337824, |
|
"learning_rate": 4.7230580883523955e-06, |
|
"loss": 0.5524, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.8922591374161477, |
|
"learning_rate": 4.721457613422285e-06, |
|
"loss": 0.5981, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.108229047424278, |
|
"learning_rate": 4.7198527999373266e-06, |
|
"loss": 0.57, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.152965480400126, |
|
"learning_rate": 4.718243651031759e-06, |
|
"loss": 0.5996, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.8885994019827148, |
|
"learning_rate": 4.716630169848282e-06, |
|
"loss": 0.5543, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.221396082747074, |
|
"learning_rate": 4.715012359538062e-06, |
|
"loss": 0.5423, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.247525651087526, |
|
"learning_rate": 4.7133902232607145e-06, |
|
"loss": 0.6049, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.905837742487114, |
|
"learning_rate": 4.711763764184309e-06, |
|
"loss": 0.5523, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.117965067814315, |
|
"learning_rate": 4.710132985485355e-06, |
|
"loss": 0.5682, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.1530948606389373, |
|
"learning_rate": 4.7084978903487985e-06, |
|
"loss": 0.5506, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8738866858316863, |
|
"learning_rate": 4.706858481968017e-06, |
|
"loss": 0.5426, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.9967053512246618, |
|
"learning_rate": 4.705214763544806e-06, |
|
"loss": 0.5555, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.352080896364055, |
|
"learning_rate": 4.703566738289389e-06, |
|
"loss": 0.587, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.031696719881503, |
|
"learning_rate": 4.701914409420392e-06, |
|
"loss": 0.6088, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.140107830595095, |
|
"learning_rate": 4.700257780164849e-06, |
|
"loss": 0.5596, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.125236417141067, |
|
"learning_rate": 4.698596853758194e-06, |
|
"loss": 0.5513, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8878623518397697, |
|
"learning_rate": 4.696931633444251e-06, |
|
"loss": 0.5557, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9523463678463824, |
|
"learning_rate": 4.695262122475232e-06, |
|
"loss": 0.5317, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.3748547328434455, |
|
"learning_rate": 4.6935883241117286e-06, |
|
"loss": 0.5733, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9248854873148575, |
|
"learning_rate": 4.691910241622704e-06, |
|
"loss": 0.5523, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.1731794693383923, |
|
"learning_rate": 4.69022787828549e-06, |
|
"loss": 0.6489, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.996570702327501, |
|
"learning_rate": 4.688541237385781e-06, |
|
"loss": 0.584, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.0272036390008097, |
|
"learning_rate": 4.68685032221762e-06, |
|
"loss": 0.554, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9986403184037858, |
|
"learning_rate": 4.685155136083401e-06, |
|
"loss": 0.5798, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.24642442330448, |
|
"learning_rate": 4.683455682293863e-06, |
|
"loss": 0.5486, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.916261956844043, |
|
"learning_rate": 4.681751964168071e-06, |
|
"loss": 0.5678, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.1597492287443396, |
|
"learning_rate": 4.680043985033427e-06, |
|
"loss": 0.5801, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9634034606261326, |
|
"learning_rate": 4.6783317482256506e-06, |
|
"loss": 0.5412, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.0128604293697263, |
|
"learning_rate": 4.676615257088777e-06, |
|
"loss": 0.5538, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.2205659530523976, |
|
"learning_rate": 4.674894514975149e-06, |
|
"loss": 0.494, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.000557085172021, |
|
"learning_rate": 4.673169525245416e-06, |
|
"loss": 0.5459, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.0089256125274826, |
|
"learning_rate": 4.671440291268518e-06, |
|
"loss": 0.5729, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.076112293053539, |
|
"learning_rate": 4.66970681642169e-06, |
|
"loss": 0.5277, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.996445627957894, |
|
"learning_rate": 4.667969104090441e-06, |
|
"loss": 0.5879, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.379165029211644, |
|
"learning_rate": 4.666227157668564e-06, |
|
"loss": 0.5924, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.101190475222136, |
|
"learning_rate": 4.664480980558118e-06, |
|
"loss": 0.6466, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.035159570620747, |
|
"learning_rate": 4.662730576169423e-06, |
|
"loss": 0.5979, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.1034174780447814, |
|
"learning_rate": 4.660975947921058e-06, |
|
"loss": 0.5635, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.131573174129039, |
|
"learning_rate": 4.65921709923985e-06, |
|
"loss": 0.5602, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.9282515780121203, |
|
"learning_rate": 4.657454033560868e-06, |
|
"loss": 0.5292, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.922997066030009, |
|
"learning_rate": 4.655686754327419e-06, |
|
"loss": 0.5475, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9692624098665525, |
|
"learning_rate": 4.653915264991035e-06, |
|
"loss": 0.5529, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.976011234185068, |
|
"learning_rate": 4.652139569011475e-06, |
|
"loss": 0.5439, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.909657950321316, |
|
"learning_rate": 4.650359669856711e-06, |
|
"loss": 0.5558, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9134183734362904, |
|
"learning_rate": 4.648575571002926e-06, |
|
"loss": 0.5428, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.067168876792994, |
|
"learning_rate": 4.646787275934501e-06, |
|
"loss": 0.6261, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9358304010171785, |
|
"learning_rate": 4.644994788144017e-06, |
|
"loss": 0.5698, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9671634072657547, |
|
"learning_rate": 4.643198111132241e-06, |
|
"loss": 0.5345, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.0176052011599133, |
|
"learning_rate": 4.641397248408122e-06, |
|
"loss": 0.5028, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9960700483606102, |
|
"learning_rate": 4.639592203488784e-06, |
|
"loss": 0.5253, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9329472749401087, |
|
"learning_rate": 4.63778297989952e-06, |
|
"loss": 0.615, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9689526846990402, |
|
"learning_rate": 4.6359695811737805e-06, |
|
"loss": 0.5558, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.043494453339269, |
|
"learning_rate": 4.634152010853175e-06, |
|
"loss": 0.5955, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9251519214200417, |
|
"learning_rate": 4.632330272487455e-06, |
|
"loss": 0.5587, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.2049650629169495, |
|
"learning_rate": 4.6305043696345175e-06, |
|
"loss": 0.5633, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8971004366601951, |
|
"learning_rate": 4.628674305860389e-06, |
|
"loss": 0.5147, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.958131978242853, |
|
"learning_rate": 4.626840084739224e-06, |
|
"loss": 0.558, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8809187299789303, |
|
"learning_rate": 4.625001709853296e-06, |
|
"loss": 0.6029, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.07376704403877, |
|
"learning_rate": 4.623159184792992e-06, |
|
"loss": 0.5985, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9773215118384355, |
|
"learning_rate": 4.621312513156801e-06, |
|
"loss": 0.5592, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.2454931529711373, |
|
"learning_rate": 4.6194616985513144e-06, |
|
"loss": 0.5265, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.917266484743525, |
|
"learning_rate": 4.617606744591214e-06, |
|
"loss": 0.5579, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9196448264725143, |
|
"learning_rate": 4.615747654899263e-06, |
|
"loss": 0.5345, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9733157447209138, |
|
"learning_rate": 4.613884433106306e-06, |
|
"loss": 0.528, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.994664364309963, |
|
"learning_rate": 4.612017082851253e-06, |
|
"loss": 0.5489, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8266904473141898, |
|
"learning_rate": 4.610145607781081e-06, |
|
"loss": 0.5411, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.0294108873934364, |
|
"learning_rate": 4.608270011550823e-06, |
|
"loss": 0.5963, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9735002273071562, |
|
"learning_rate": 4.606390297823555e-06, |
|
"loss": 0.5858, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8987568737188125, |
|
"learning_rate": 4.604506470270403e-06, |
|
"loss": 0.493, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9371998611194052, |
|
"learning_rate": 4.6026185325705195e-06, |
|
"loss": 0.521, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8926221916061328, |
|
"learning_rate": 4.60072648841109e-06, |
|
"loss": 0.4922, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8759546163633927, |
|
"learning_rate": 4.598830341487317e-06, |
|
"loss": 0.5487, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9425705301229708, |
|
"learning_rate": 4.596930095502416e-06, |
|
"loss": 0.5155, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8718904454318124, |
|
"learning_rate": 4.59502575416761e-06, |
|
"loss": 0.5372, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8361742824749525, |
|
"learning_rate": 4.593117321202117e-06, |
|
"loss": 0.556, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8520540031413573, |
|
"learning_rate": 4.59120480033315e-06, |
|
"loss": 0.6213, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9670746741442957, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"loss": 0.5685, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.969557039139786, |
|
"learning_rate": 4.587367509833543e-06, |
|
"loss": 0.5472, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9873217018861624, |
|
"learning_rate": 4.585442747697218e-06, |
|
"loss": 0.5419, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9508580236237527, |
|
"learning_rate": 4.5835139126460234e-06, |
|
"loss": 0.566, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8929503262145966, |
|
"learning_rate": 4.58158100844702e-06, |
|
"loss": 0.5526, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9394545018501204, |
|
"learning_rate": 4.57964403887521e-06, |
|
"loss": 0.5469, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.1045619298179927, |
|
"learning_rate": 4.577703007713538e-06, |
|
"loss": 0.5397, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8886665443222683, |
|
"learning_rate": 4.575757918752879e-06, |
|
"loss": 0.5174, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.849256286655662, |
|
"learning_rate": 4.573808775792033e-06, |
|
"loss": 0.558, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.89537230772545, |
|
"learning_rate": 4.5718555826377195e-06, |
|
"loss": 0.6155, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.028600611269796, |
|
"learning_rate": 4.569898343104568e-06, |
|
"loss": 0.5639, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.1153787641168273, |
|
"learning_rate": 4.567937061015107e-06, |
|
"loss": 0.5883, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.0217937777574075, |
|
"learning_rate": 4.5659717401997655e-06, |
|
"loss": 0.5936, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.248716610859176, |
|
"learning_rate": 4.564002384496856e-06, |
|
"loss": 0.5539, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.9689879082294663, |
|
"learning_rate": 4.562028997752574e-06, |
|
"loss": 0.5636, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.763292547062648, |
|
"learning_rate": 4.560051583820987e-06, |
|
"loss": 0.5402, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.129235681815295, |
|
"learning_rate": 4.558070146564025e-06, |
|
"loss": 0.5279, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.987329959970642, |
|
"learning_rate": 4.55608468985148e-06, |
|
"loss": 0.5597, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8223595251951752, |
|
"learning_rate": 4.554095217560991e-06, |
|
"loss": 0.5523, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8945373677348296, |
|
"learning_rate": 4.55210173357804e-06, |
|
"loss": 0.5611, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8010628987468362, |
|
"learning_rate": 4.550104241795946e-06, |
|
"loss": 0.5406, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.7680591979019162, |
|
"learning_rate": 4.548102746115852e-06, |
|
"loss": 0.5392, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9894409183828397, |
|
"learning_rate": 4.546097250446724e-06, |
|
"loss": 0.568, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9527217933389673, |
|
"learning_rate": 4.544087758705338e-06, |
|
"loss": 0.5616, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8813970745759399, |
|
"learning_rate": 4.5420742748162735e-06, |
|
"loss": 0.5857, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9697471415378363, |
|
"learning_rate": 4.540056802711911e-06, |
|
"loss": 0.5563, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8610261764458738, |
|
"learning_rate": 4.5380353463324135e-06, |
|
"loss": 0.5414, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.0760585222699075, |
|
"learning_rate": 4.536009909625733e-06, |
|
"loss": 0.6113, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9376608369819073, |
|
"learning_rate": 4.533980496547588e-06, |
|
"loss": 0.5567, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9360208325717025, |
|
"learning_rate": 4.5319471110614676e-06, |
|
"loss": 0.5637, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9103146510774847, |
|
"learning_rate": 4.529909757138619e-06, |
|
"loss": 0.5049, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9645365532954322, |
|
"learning_rate": 4.5278684387580356e-06, |
|
"loss": 0.5424, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.0430691701895065, |
|
"learning_rate": 4.52582315990646e-06, |
|
"loss": 0.547, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.995685349345533, |
|
"learning_rate": 4.523773924578362e-06, |
|
"loss": 0.6005, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9830544751269077, |
|
"learning_rate": 4.521720736775947e-06, |
|
"loss": 0.5563, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8473463212841006, |
|
"learning_rate": 4.519663600509131e-06, |
|
"loss": 0.5913, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8993140839815026, |
|
"learning_rate": 4.5176025197955495e-06, |
|
"loss": 0.5653, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8179551662772986, |
|
"learning_rate": 4.515537498660535e-06, |
|
"loss": 0.5485, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9275228062086758, |
|
"learning_rate": 4.51346854113712e-06, |
|
"loss": 0.5248, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9668428438048349, |
|
"learning_rate": 4.511395651266023e-06, |
|
"loss": 0.5939, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9602042152930792, |
|
"learning_rate": 4.509318833095642e-06, |
|
"loss": 0.5452, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8348566721600683, |
|
"learning_rate": 4.507238090682049e-06, |
|
"loss": 0.5514, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.938525142403929, |
|
"learning_rate": 4.505153428088979e-06, |
|
"loss": 0.5822, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.008973560332548, |
|
"learning_rate": 4.503064849387822e-06, |
|
"loss": 0.5765, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.8911779425902009, |
|
"learning_rate": 4.500972358657618e-06, |
|
"loss": 0.5465, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9224818772820709, |
|
"learning_rate": 4.4988759599850485e-06, |
|
"loss": 0.5897, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.990817812633161, |
|
"learning_rate": 4.496775657464423e-06, |
|
"loss": 0.5505, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9167562026803746, |
|
"learning_rate": 4.4946714551976795e-06, |
|
"loss": 0.5779, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9388400892712594, |
|
"learning_rate": 4.492563357294369e-06, |
|
"loss": 0.574, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.0140312788131762, |
|
"learning_rate": 4.490451367871655e-06, |
|
"loss": 0.4928, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.074902721101316, |
|
"learning_rate": 4.488335491054296e-06, |
|
"loss": 0.5366, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.8245504149698855, |
|
"learning_rate": 4.486215730974646e-06, |
|
"loss": 0.581, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.1100306515160656, |
|
"learning_rate": 4.4840920917726425e-06, |
|
"loss": 0.5677, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9560380000004616, |
|
"learning_rate": 4.4819645775958e-06, |
|
"loss": 0.5426, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.721267171163405, |
|
"learning_rate": 4.479833192599198e-06, |
|
"loss": 0.5868, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0001169229847124, |
|
"learning_rate": 4.477697940945478e-06, |
|
"loss": 0.5667, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0111322894409134, |
|
"learning_rate": 4.475558826804833e-06, |
|
"loss": 0.5707, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.8179588699061133, |
|
"learning_rate": 4.473415854355e-06, |
|
"loss": 0.5484, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0491236128150345, |
|
"learning_rate": 4.47126902778125e-06, |
|
"loss": 0.5575, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.049676347036571, |
|
"learning_rate": 4.469118351276381e-06, |
|
"loss": 0.5807, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.8999028972772445, |
|
"learning_rate": 4.4669638290407115e-06, |
|
"loss": 0.5447, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0754807768031687, |
|
"learning_rate": 4.464805465282071e-06, |
|
"loss": 0.503, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9532719169013661, |
|
"learning_rate": 4.462643264215789e-06, |
|
"loss": 0.5304, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.038547881198709, |
|
"learning_rate": 4.460477230064693e-06, |
|
"loss": 0.6116, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.1342568039197136, |
|
"learning_rate": 4.458307367059092e-06, |
|
"loss": 0.5632, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9267024509918977, |
|
"learning_rate": 4.456133679436778e-06, |
|
"loss": 0.5574, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.795213135692931, |
|
"learning_rate": 4.453956171443008e-06, |
|
"loss": 0.5737, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.9428252328171443, |
|
"learning_rate": 4.451774847330505e-06, |
|
"loss": 0.5685, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.7903749800219122, |
|
"learning_rate": 4.449589711359439e-06, |
|
"loss": 0.5214, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.111615491479605, |
|
"learning_rate": 4.447400767797429e-06, |
|
"loss": 0.5329, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.936578332165912, |
|
"learning_rate": 4.445208020919531e-06, |
|
"loss": 0.543, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.0005145681262473, |
|
"learning_rate": 4.4430114750082246e-06, |
|
"loss": 0.5593, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.9720912009242426, |
|
"learning_rate": 4.4408111343534125e-06, |
|
"loss": 0.5812, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.0486055586452787, |
|
"learning_rate": 4.4386070032524085e-06, |
|
"loss": 0.5563, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.8043262288689983, |
|
"learning_rate": 4.436399086009928e-06, |
|
"loss": 0.4905, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.9608580808640215, |
|
"learning_rate": 4.43418738693808e-06, |
|
"loss": 0.5548, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.008548225584814, |
|
"learning_rate": 4.431971910356363e-06, |
|
"loss": 0.5955, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.8974274240345173, |
|
"learning_rate": 4.429752660591648e-06, |
|
"loss": 0.5742, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.8257689605722616, |
|
"learning_rate": 4.427529641978181e-06, |
|
"loss": 0.6177, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.0327301577551764, |
|
"learning_rate": 4.425302858857563e-06, |
|
"loss": 0.5872, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9539661576324254, |
|
"learning_rate": 4.42307231557875e-06, |
|
"loss": 0.5728, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9346302819034207, |
|
"learning_rate": 4.420838016498043e-06, |
|
"loss": 0.6019, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.1255667417446054, |
|
"learning_rate": 4.418599965979074e-06, |
|
"loss": 0.5981, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.8293805714793054, |
|
"learning_rate": 4.416358168392806e-06, |
|
"loss": 0.5497, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.929762647152706, |
|
"learning_rate": 4.414112628117518e-06, |
|
"loss": 0.5655, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9808758258773635, |
|
"learning_rate": 4.411863349538798e-06, |
|
"loss": 0.5465, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.0413084054198647, |
|
"learning_rate": 4.409610337049537e-06, |
|
"loss": 0.5264, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9506473664088613, |
|
"learning_rate": 4.4073535950499155e-06, |
|
"loss": 0.5284, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.7875399190820846, |
|
"learning_rate": 4.405093127947402e-06, |
|
"loss": 0.5406, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9594159192262046, |
|
"learning_rate": 4.402828940156735e-06, |
|
"loss": 0.573, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.025943836966642, |
|
"learning_rate": 4.400561036099924e-06, |
|
"loss": 0.5227, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9439140060564322, |
|
"learning_rate": 4.398289420206235e-06, |
|
"loss": 0.5802, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.891060025336787, |
|
"learning_rate": 4.396014096912182e-06, |
|
"loss": 0.55, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9575594944193413, |
|
"learning_rate": 4.393735070661521e-06, |
|
"loss": 0.5213, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.024463679893138, |
|
"learning_rate": 4.391452345905239e-06, |
|
"loss": 0.5354, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.825359223217947, |
|
"learning_rate": 4.389165927101549e-06, |
|
"loss": 0.5506, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.0284690208197484, |
|
"learning_rate": 4.386875818715875e-06, |
|
"loss": 0.5763, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9021830177238082, |
|
"learning_rate": 4.3845820252208476e-06, |
|
"loss": 0.5596, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.0000504821060203, |
|
"learning_rate": 4.3822845510962966e-06, |
|
"loss": 0.5701, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.7341340075311633, |
|
"learning_rate": 4.379983400829237e-06, |
|
"loss": 0.5315, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9297447671947465, |
|
"learning_rate": 4.377678578913868e-06, |
|
"loss": 0.5798, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9233069620366818, |
|
"learning_rate": 4.375370089851554e-06, |
|
"loss": 0.5391, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.976671700063146, |
|
"learning_rate": 4.3730579381508254e-06, |
|
"loss": 0.5674, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.914097057045113, |
|
"learning_rate": 4.3707421283273645e-06, |
|
"loss": 0.5367, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.8477362806445459, |
|
"learning_rate": 4.368422664903997e-06, |
|
"loss": 0.5349, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9704477099484594, |
|
"learning_rate": 4.366099552410686e-06, |
|
"loss": 0.501, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9297086500071385, |
|
"learning_rate": 4.363772795384522e-06, |
|
"loss": 0.5352, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9090996748848685, |
|
"learning_rate": 4.36144239836971e-06, |
|
"loss": 0.5457, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.905870882711107, |
|
"learning_rate": 4.3591083659175655e-06, |
|
"loss": 0.5685, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.968618442539214, |
|
"learning_rate": 4.356770702586506e-06, |
|
"loss": 0.5476, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9431218136805426, |
|
"learning_rate": 4.354429412942038e-06, |
|
"loss": 0.5719, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.0756451350956215, |
|
"learning_rate": 4.3520845015567495e-06, |
|
"loss": 0.5502, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8350117686217275, |
|
"learning_rate": 4.349735973010306e-06, |
|
"loss": 0.5417, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.03495920394236, |
|
"learning_rate": 4.3473838318894324e-06, |
|
"loss": 0.545, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.7864245375307775, |
|
"learning_rate": 4.3450280827879125e-06, |
|
"loss": 0.5242, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.9018530036883652, |
|
"learning_rate": 4.342668730306575e-06, |
|
"loss": 0.554, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8575071370513128, |
|
"learning_rate": 4.340305779053286e-06, |
|
"loss": 0.5287, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8480049595126469, |
|
"learning_rate": 4.33793923364294e-06, |
|
"loss": 0.5554, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.103039565778625, |
|
"learning_rate": 4.335569098697454e-06, |
|
"loss": 0.5526, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8712145108160219, |
|
"learning_rate": 4.33319537884575e-06, |
|
"loss": 0.5472, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.9271972466285336, |
|
"learning_rate": 4.330818078723756e-06, |
|
"loss": 0.5827, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.954438973741856, |
|
"learning_rate": 4.328437202974389e-06, |
|
"loss": 0.5433, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.0467264178153726, |
|
"learning_rate": 4.326052756247553e-06, |
|
"loss": 0.5981, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.9418055408636266, |
|
"learning_rate": 4.323664743200123e-06, |
|
"loss": 0.5832, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.444044603553196, |
|
"learning_rate": 4.32127316849594e-06, |
|
"loss": 0.5638, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.8791947879326414, |
|
"learning_rate": 4.318878036805802e-06, |
|
"loss": 0.5864, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.872356245946924, |
|
"learning_rate": 4.3164793528074525e-06, |
|
"loss": 0.5337, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.025493213646544, |
|
"learning_rate": 4.3140771211855725e-06, |
|
"loss": 0.5401, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.9845857759145742, |
|
"learning_rate": 4.3116713466317745e-06, |
|
"loss": 0.5712, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.9091874317608197, |
|
"learning_rate": 4.309262033844587e-06, |
|
"loss": 0.5337, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.926646558220673, |
|
"learning_rate": 4.30684918752945e-06, |
|
"loss": 0.5787, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.0450560123448165, |
|
"learning_rate": 4.304432812398704e-06, |
|
"loss": 0.5704, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.915800332391142, |
|
"learning_rate": 4.302012913171584e-06, |
|
"loss": 0.5194, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.9050588229807015, |
|
"learning_rate": 4.299589494574204e-06, |
|
"loss": 0.5104, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.9241714112001687, |
|
"learning_rate": 4.297162561339554e-06, |
|
"loss": 0.5388, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8520273210081386, |
|
"learning_rate": 4.294732118207486e-06, |
|
"loss": 0.5363, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.0240180827444205, |
|
"learning_rate": 4.292298169924709e-06, |
|
"loss": 0.5632, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8385436745856445, |
|
"learning_rate": 4.289860721244776e-06, |
|
"loss": 0.542, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.9260618068482396, |
|
"learning_rate": 4.287419776928078e-06, |
|
"loss": 0.5555, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.155290692386073, |
|
"learning_rate": 4.284975341741833e-06, |
|
"loss": 0.5336, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.461077264148098, |
|
"learning_rate": 4.282527420460073e-06, |
|
"loss": 0.5794, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8539810703173831, |
|
"learning_rate": 4.280076017863643e-06, |
|
"loss": 0.5298, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.981150552962984, |
|
"learning_rate": 4.277621138740185e-06, |
|
"loss": 0.5862, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8768796036679432, |
|
"learning_rate": 4.275162787884132e-06, |
|
"loss": 0.5255, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.022795676637582, |
|
"learning_rate": 4.272700970096696e-06, |
|
"loss": 0.5984, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.835618231704385, |
|
"learning_rate": 4.27023569018586e-06, |
|
"loss": 0.5297, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.853495005213679, |
|
"learning_rate": 4.267766952966369e-06, |
|
"loss": 0.5188, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8841750183665413, |
|
"learning_rate": 4.265294763259721e-06, |
|
"loss": 0.5678, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8013177249236558, |
|
"learning_rate": 4.262819125894156e-06, |
|
"loss": 0.5286, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8320928495052518, |
|
"learning_rate": 4.2603400457046476e-06, |
|
"loss": 0.5341, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8323864124122828, |
|
"learning_rate": 4.257857527532891e-06, |
|
"loss": 0.5283, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9487038959665601, |
|
"learning_rate": 4.255371576227301e-06, |
|
"loss": 0.5418, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.7875154296015772, |
|
"learning_rate": 4.252882196642993e-06, |
|
"loss": 0.5065, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.089827238376911, |
|
"learning_rate": 4.250389393641778e-06, |
|
"loss": 0.5919, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9078348658003164, |
|
"learning_rate": 4.247893172092157e-06, |
|
"loss": 0.5212, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9952457072102052, |
|
"learning_rate": 4.245393536869303e-06, |
|
"loss": 0.5284, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0728561008210384, |
|
"learning_rate": 4.242890492855056e-06, |
|
"loss": 0.5214, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.97825451090628, |
|
"learning_rate": 4.240384044937919e-06, |
|
"loss": 0.5586, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.85380003580073, |
|
"learning_rate": 4.237874198013037e-06, |
|
"loss": 0.6078, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.8198051628607304, |
|
"learning_rate": 4.235360956982196e-06, |
|
"loss": 0.5677, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.1343351043013183, |
|
"learning_rate": 4.23284432675381e-06, |
|
"loss": 0.5706, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0294462862804896, |
|
"learning_rate": 4.230324312242911e-06, |
|
"loss": 0.5399, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.9618881336969853, |
|
"learning_rate": 4.227800918371145e-06, |
|
"loss": 0.5292, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.9665398714083597, |
|
"learning_rate": 4.225274150066752e-06, |
|
"loss": 0.5414, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0976099857689268, |
|
"learning_rate": 4.222744012264567e-06, |
|
"loss": 0.5204, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.968032018982793, |
|
"learning_rate": 4.220210509906002e-06, |
|
"loss": 0.5622, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0055542027073523, |
|
"learning_rate": 4.217673647939044e-06, |
|
"loss": 0.5723, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.031612125247833, |
|
"learning_rate": 4.215133431318239e-06, |
|
"loss": 0.5727, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.04253552367063, |
|
"learning_rate": 4.212589865004684e-06, |
|
"loss": 0.5676, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9143447724555291, |
|
"learning_rate": 4.2100429539660205e-06, |
|
"loss": 0.5452, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.1284999811605334, |
|
"learning_rate": 4.20749270317642e-06, |
|
"loss": 0.5679, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9726237378545723, |
|
"learning_rate": 4.204939117616578e-06, |
|
"loss": 0.5514, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.0537722291479583, |
|
"learning_rate": 4.202382202273702e-06, |
|
"loss": 0.5979, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9695944675405062, |
|
"learning_rate": 4.1998219621415035e-06, |
|
"loss": 0.5519, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.1175148159531196, |
|
"learning_rate": 4.197258402220187e-06, |
|
"loss": 0.5437, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9698920488340708, |
|
"learning_rate": 4.19469152751644e-06, |
|
"loss": 0.5765, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.879379971551763, |
|
"learning_rate": 4.192121343043424e-06, |
|
"loss": 0.5219, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9668215341266202, |
|
"learning_rate": 4.189547853820767e-06, |
|
"loss": 0.4967, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.0264415648360723, |
|
"learning_rate": 4.186971064874547e-06, |
|
"loss": 0.5591, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9996711001240413, |
|
"learning_rate": 4.18439098123729e-06, |
|
"loss": 0.5909, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9209919754307736, |
|
"learning_rate": 4.181807607947954e-06, |
|
"loss": 0.5516, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8120062816345244, |
|
"learning_rate": 4.1792209500519245e-06, |
|
"loss": 0.5112, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9265993932694714, |
|
"learning_rate": 4.176631012601e-06, |
|
"loss": 0.5716, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.7951063568824173, |
|
"learning_rate": 4.1740378006533835e-06, |
|
"loss": 0.5546, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9478736935670538, |
|
"learning_rate": 4.1714413192736756e-06, |
|
"loss": 0.5137, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9166713700159672, |
|
"learning_rate": 4.168841573532859e-06, |
|
"loss": 0.5285, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.903061790874867, |
|
"learning_rate": 4.166238568508294e-06, |
|
"loss": 0.5643, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8709574261812854, |
|
"learning_rate": 4.1636323092837065e-06, |
|
"loss": 0.5531, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.891374469060374, |
|
"learning_rate": 4.161022800949177e-06, |
|
"loss": 0.5386, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8621023435008923, |
|
"learning_rate": 4.1584100486011315e-06, |
|
"loss": 0.5472, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8927480615848256, |
|
"learning_rate": 4.155794057342333e-06, |
|
"loss": 0.567, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9157957155248084, |
|
"learning_rate": 4.153174832281867e-06, |
|
"loss": 0.5295, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.7900976303440275, |
|
"learning_rate": 4.150552378535138e-06, |
|
"loss": 0.5374, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9233860209522704, |
|
"learning_rate": 4.1479267012238555e-06, |
|
"loss": 0.5673, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.904244620695313, |
|
"learning_rate": 4.145297805476023e-06, |
|
"loss": 0.5674, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8633100020518014, |
|
"learning_rate": 4.142665696425932e-06, |
|
"loss": 0.5717, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.0449274851229764, |
|
"learning_rate": 4.140030379214147e-06, |
|
"loss": 0.5382, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8437126524936716, |
|
"learning_rate": 4.137391858987502e-06, |
|
"loss": 0.5635, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9476300616110815, |
|
"learning_rate": 4.134750140899082e-06, |
|
"loss": 0.5354, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8187836169409277, |
|
"learning_rate": 4.132105230108221e-06, |
|
"loss": 0.5678, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8325255303792565, |
|
"learning_rate": 4.1294571317804854e-06, |
|
"loss": 0.5497, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.947073088948294, |
|
"learning_rate": 4.12680585108767e-06, |
|
"loss": 0.6005, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9094602677105208, |
|
"learning_rate": 4.1241513932077835e-06, |
|
"loss": 0.5442, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9308069577521967, |
|
"learning_rate": 4.121493763325039e-06, |
|
"loss": 0.4952, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.955225453108231, |
|
"learning_rate": 4.118832966629847e-06, |
|
"loss": 0.5161, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.8884686835300686, |
|
"learning_rate": 4.116169008318798e-06, |
|
"loss": 0.5834, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.851971220446282, |
|
"learning_rate": 4.113501893594662e-06, |
|
"loss": 0.5762, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.982231343732386, |
|
"learning_rate": 4.110831627666372e-06, |
|
"loss": 0.5043, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.8783480932058496, |
|
"learning_rate": 4.108158215749014e-06, |
|
"loss": 0.5202, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.7472053862830499, |
|
"learning_rate": 4.105481663063821e-06, |
|
"loss": 0.5064, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.71435326799849, |
|
"learning_rate": 4.102801974838158e-06, |
|
"loss": 0.5808, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9383972995582568, |
|
"learning_rate": 4.100119156305514e-06, |
|
"loss": 0.5268, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.7165619283230378, |
|
"learning_rate": 4.097433212705492e-06, |
|
"loss": 0.5376, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8524888535442023, |
|
"learning_rate": 4.094744149283796e-06, |
|
"loss": 0.5388, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.958121956311822, |
|
"learning_rate": 4.092051971292228e-06, |
|
"loss": 0.5273, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8752806971174674, |
|
"learning_rate": 4.089356683988668e-06, |
|
"loss": 0.5283, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.4399117721583465, |
|
"learning_rate": 4.086658292637072e-06, |
|
"loss": 0.5643, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.897865148445396, |
|
"learning_rate": 4.083956802507456e-06, |
|
"loss": 0.5432, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.0947253224544826, |
|
"learning_rate": 4.0812522188758874e-06, |
|
"loss": 0.6738, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8801252766945993, |
|
"learning_rate": 4.078544547024479e-06, |
|
"loss": 0.5516, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.884681207915535, |
|
"learning_rate": 4.075833792241371e-06, |
|
"loss": 0.5521, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.911314829964074, |
|
"learning_rate": 4.073119959820728e-06, |
|
"loss": 0.5279, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.860637117587055, |
|
"learning_rate": 4.070403055062721e-06, |
|
"loss": 0.5543, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.0453601596603157, |
|
"learning_rate": 4.0676830832735245e-06, |
|
"loss": 0.5757, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8114060321351384, |
|
"learning_rate": 4.064960049765304e-06, |
|
"loss": 0.5049, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.959305167631277, |
|
"learning_rate": 4.062233959856202e-06, |
|
"loss": 0.5378, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8509512649844786, |
|
"learning_rate": 4.059504818870332e-06, |
|
"loss": 0.5695, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0120311393374677, |
|
"learning_rate": 4.056772632137762e-06, |
|
"loss": 0.5548, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.185006431209757, |
|
"learning_rate": 4.054037404994516e-06, |
|
"loss": 0.5796, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8639659087725635, |
|
"learning_rate": 4.05129914278255e-06, |
|
"loss": 0.503, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0128366658538726, |
|
"learning_rate": 4.048557850849749e-06, |
|
"loss": 0.5543, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0493127075126467, |
|
"learning_rate": 4.045813534549917e-06, |
|
"loss": 0.5971, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8943877873256292, |
|
"learning_rate": 4.043066199242762e-06, |
|
"loss": 0.5512, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8607643797927613, |
|
"learning_rate": 4.04031585029389e-06, |
|
"loss": 0.5755, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.933467010931308, |
|
"learning_rate": 4.037562493074792e-06, |
|
"loss": 0.546, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.870898209604796, |
|
"learning_rate": 4.034806132962834e-06, |
|
"loss": 0.5101, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.7765005525064146, |
|
"learning_rate": 4.032046775341247e-06, |
|
"loss": 0.535, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.808388020113739, |
|
"learning_rate": 4.029284425599116e-06, |
|
"loss": 0.5532, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9444426383785842, |
|
"learning_rate": 4.026519089131371e-06, |
|
"loss": 0.5804, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8810929458792174, |
|
"learning_rate": 4.023750771338774e-06, |
|
"loss": 0.5023, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.7587173598023012, |
|
"learning_rate": 4.020979477627907e-06, |
|
"loss": 0.588, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8616544736960938, |
|
"learning_rate": 4.018205213411169e-06, |
|
"loss": 0.5604, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8517363531329913, |
|
"learning_rate": 4.015427984106759e-06, |
|
"loss": 0.5503, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.7164279131663547, |
|
"learning_rate": 4.012647795138664e-06, |
|
"loss": 0.5353, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8490922932257532, |
|
"learning_rate": 4.009864651936653e-06, |
|
"loss": 0.5527, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9222471762582807, |
|
"learning_rate": 4.007078559936268e-06, |
|
"loss": 0.5449, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7126406752680576, |
|
"learning_rate": 4.0042895245788035e-06, |
|
"loss": 0.5102, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7999692875631594, |
|
"learning_rate": 4.001497551311308e-06, |
|
"loss": 0.514, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8482521644616647, |
|
"learning_rate": 3.998702645586565e-06, |
|
"loss": 0.546, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8124842120343776, |
|
"learning_rate": 3.995904812863086e-06, |
|
"loss": 0.5432, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.9053654350943952, |
|
"learning_rate": 3.993104058605099e-06, |
|
"loss": 0.6222, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.851530834120678, |
|
"learning_rate": 3.9903003882825396e-06, |
|
"loss": 0.5069, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.824612938648448, |
|
"learning_rate": 3.987493807371033e-06, |
|
"loss": 0.5279, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8322983038942529, |
|
"learning_rate": 3.984684321351895e-06, |
|
"loss": 0.504, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.1601679247075105, |
|
"learning_rate": 3.981871935712112e-06, |
|
"loss": 0.5448, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.9324323412240167, |
|
"learning_rate": 3.979056655944335e-06, |
|
"loss": 0.5696, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8887222870071794, |
|
"learning_rate": 3.9762384875468645e-06, |
|
"loss": 0.5147, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.9025483031058836, |
|
"learning_rate": 3.973417436023646e-06, |
|
"loss": 0.5322, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.944754689874286, |
|
"learning_rate": 3.970593506884254e-06, |
|
"loss": 0.564, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8782062559948918, |
|
"learning_rate": 3.9677667056438824e-06, |
|
"loss": 0.5179, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.7615090001622373, |
|
"learning_rate": 3.964937037823337e-06, |
|
"loss": 0.52, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.877979446527034, |
|
"learning_rate": 3.962104508949018e-06, |
|
"loss": 0.5611, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8668900126580097, |
|
"learning_rate": 3.9592691245529174e-06, |
|
"loss": 0.5398, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.0467424748632395, |
|
"learning_rate": 3.9564308901726016e-06, |
|
"loss": 0.5429, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.7523480652481473, |
|
"learning_rate": 3.9535898113512046e-06, |
|
"loss": 0.5456, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.9384307177445268, |
|
"learning_rate": 3.950745893637414e-06, |
|
"loss": 0.5298, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.0200307543606266, |
|
"learning_rate": 3.947899142585464e-06, |
|
"loss": 0.5813, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8825594318661294, |
|
"learning_rate": 3.945049563755119e-06, |
|
"loss": 0.5843, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.801304483173922, |
|
"learning_rate": 3.94219716271167e-06, |
|
"loss": 0.5332, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.789336412692842, |
|
"learning_rate": 3.939341945025918e-06, |
|
"loss": 0.5712, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.6764596672056864, |
|
"learning_rate": 3.936483916274163e-06, |
|
"loss": 0.5471, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8160991340297739, |
|
"learning_rate": 3.933623082038199e-06, |
|
"loss": 0.5172, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.9958719154660882, |
|
"learning_rate": 3.930759447905298e-06, |
|
"loss": 0.5243, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.7844190098902166, |
|
"learning_rate": 3.927893019468196e-06, |
|
"loss": 0.5679, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8231700761644845, |
|
"learning_rate": 3.925023802325094e-06, |
|
"loss": 0.5415, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8577751348591511, |
|
"learning_rate": 3.922151802079633e-06, |
|
"loss": 0.5451, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.872268020286279, |
|
"learning_rate": 3.919277024340891e-06, |
|
"loss": 0.5805, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.956916033214976, |
|
"learning_rate": 3.916399474723373e-06, |
|
"loss": 0.5142, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8690696320721123, |
|
"learning_rate": 3.913519158846994e-06, |
|
"loss": 0.5377, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8932224298053513, |
|
"learning_rate": 3.910636082337076e-06, |
|
"loss": 0.5174, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7671002724508906, |
|
"learning_rate": 3.907750250824327e-06, |
|
"loss": 0.5227, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8537234882936333, |
|
"learning_rate": 3.904861669944839e-06, |
|
"loss": 0.5672, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8993796687475375, |
|
"learning_rate": 3.901970345340075e-06, |
|
"loss": 0.5131, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8118617206389966, |
|
"learning_rate": 3.899076282656853e-06, |
|
"loss": 0.5243, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8195324114535576, |
|
"learning_rate": 3.89617948754734e-06, |
|
"loss": 0.5255, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.777076552111516, |
|
"learning_rate": 3.89327996566904e-06, |
|
"loss": 0.5482, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7960584295638569, |
|
"learning_rate": 3.890377722684782e-06, |
|
"loss": 0.5232, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.0180517293259777, |
|
"learning_rate": 3.887472764262709e-06, |
|
"loss": 0.4988, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7698597985590767, |
|
"learning_rate": 3.884565096076269e-06, |
|
"loss": 0.4934, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.9593013419554524, |
|
"learning_rate": 3.8816547238042e-06, |
|
"loss": 0.554, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.803176799671639, |
|
"learning_rate": 3.878741653130521e-06, |
|
"loss": 0.5058, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8739139669777212, |
|
"learning_rate": 3.875825889744525e-06, |
|
"loss": 0.5291, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7425957572489872, |
|
"learning_rate": 3.872907439340758e-06, |
|
"loss": 0.5132, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7880023308134785, |
|
"learning_rate": 3.86998630761902e-06, |
|
"loss": 0.5388, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.035324802689225, |
|
"learning_rate": 3.867062500284342e-06, |
|
"loss": 0.5225, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7720228048563502, |
|
"learning_rate": 3.864136023046984e-06, |
|
"loss": 0.5535, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.893636721431615, |
|
"learning_rate": 3.861206881622419e-06, |
|
"loss": 0.5445, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.9975882991420841, |
|
"learning_rate": 3.8582750817313245e-06, |
|
"loss": 0.498, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8894358056153195, |
|
"learning_rate": 3.855340629099568e-06, |
|
"loss": 0.5262, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8226831631189866, |
|
"learning_rate": 3.852403529458199e-06, |
|
"loss": 0.5289, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.9219589460322386, |
|
"learning_rate": 3.84946378854344e-06, |
|
"loss": 0.5828, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.9524000874112546, |
|
"learning_rate": 3.846521412096665e-06, |
|
"loss": 0.5755, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7855988589662195, |
|
"learning_rate": 3.8435764058643994e-06, |
|
"loss": 0.508, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.7556968697529176, |
|
"learning_rate": 3.840628775598306e-06, |
|
"loss": 0.5038, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8615629845007688, |
|
"learning_rate": 3.837678527055168e-06, |
|
"loss": 0.5658, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.355106616980178, |
|
"learning_rate": 3.834725665996889e-06, |
|
"loss": 0.6255, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.057901705133853, |
|
"learning_rate": 3.8317701981904655e-06, |
|
"loss": 0.5009, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8144866213511652, |
|
"learning_rate": 3.828812129407994e-06, |
|
"loss": 0.5378, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.895740992214761, |
|
"learning_rate": 3.825851465426643e-06, |
|
"loss": 0.5414, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.7690202691648218, |
|
"learning_rate": 3.822888212028658e-06, |
|
"loss": 0.5782, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.9910212850942313, |
|
"learning_rate": 3.819922375001334e-06, |
|
"loss": 0.538, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.022977401775343, |
|
"learning_rate": 3.816953960137017e-06, |
|
"loss": 0.5265, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.18942238369997, |
|
"learning_rate": 3.8139829732330833e-06, |
|
"loss": 0.5419, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.0143145051916487, |
|
"learning_rate": 3.8110094200919356e-06, |
|
"loss": 0.5396, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8684895296380082, |
|
"learning_rate": 3.8080333065209885e-06, |
|
"loss": 0.5285, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.899758991227905, |
|
"learning_rate": 3.8050546383326546e-06, |
|
"loss": 0.5392, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.7830347822365242, |
|
"learning_rate": 3.8020734213443392e-06, |
|
"loss": 0.5395, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9688219937316351, |
|
"learning_rate": 3.799089661378423e-06, |
|
"loss": 0.5832, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8380061964557934, |
|
"learning_rate": 3.7961033642622536e-06, |
|
"loss": 0.5182, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9752769027783192, |
|
"learning_rate": 3.793114535828134e-06, |
|
"loss": 0.5189, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9908258845677271, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"loss": 0.5863, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8419144313470388, |
|
"learning_rate": 3.787129308359963e-06, |
|
"loss": 0.5596, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8578409208981632, |
|
"learning_rate": 3.7841329210151905e-06, |
|
"loss": 0.5757, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8125362585272666, |
|
"learning_rate": 3.7811340257310036e-06, |
|
"loss": 0.5625, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8266843142853604, |
|
"learning_rate": 3.778132628364309e-06, |
|
"loss": 0.5121, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.9286747700189457, |
|
"learning_rate": 3.7751287347769006e-06, |
|
"loss": 0.5856, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8358169963837994, |
|
"learning_rate": 3.772122350835447e-06, |
|
"loss": 0.5363, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8751145280860322, |
|
"learning_rate": 3.769113482411483e-06, |
|
"loss": 0.5435, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.7372022137266947, |
|
"learning_rate": 3.766102135381393e-06, |
|
"loss": 0.5114, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.848532567966691, |
|
"learning_rate": 3.763088315626402e-06, |
|
"loss": 0.4887, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8724024281108291, |
|
"learning_rate": 3.7600720290325666e-06, |
|
"loss": 0.5681, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.7564274203136065, |
|
"learning_rate": 3.757053281490759e-06, |
|
"loss": 0.5365, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.7090468035537372, |
|
"learning_rate": 3.75403207889666e-06, |
|
"loss": 0.4976, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8628034310476902, |
|
"learning_rate": 3.7510084271507417e-06, |
|
"loss": 0.5908, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8673457440060792, |
|
"learning_rate": 3.7479823321582624e-06, |
|
"loss": 0.5641, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8378062191959523, |
|
"learning_rate": 3.744953799829252e-06, |
|
"loss": 0.5175, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.779154712157358, |
|
"learning_rate": 3.7419228360784987e-06, |
|
"loss": 0.5539, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.1820639181555315, |
|
"learning_rate": 3.73888944682554e-06, |
|
"loss": 0.5247, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.927216958283792, |
|
"learning_rate": 3.735853637994652e-06, |
|
"loss": 0.5851, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.7670365768745326, |
|
"learning_rate": 3.732815415514834e-06, |
|
"loss": 0.5829, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.825202964363253, |
|
"learning_rate": 3.729774785319801e-06, |
|
"loss": 0.5257, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.8200852022234557, |
|
"learning_rate": 3.72673175334797e-06, |
|
"loss": 0.55, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.9436493930137209, |
|
"learning_rate": 3.723686325542448e-06, |
|
"loss": 0.5583, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.7581670709714554, |
|
"learning_rate": 3.7206385078510204e-06, |
|
"loss": 0.5267, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.9439324051591973, |
|
"learning_rate": 3.717588306226143e-06, |
|
"loss": 0.5686, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.8154349894294908, |
|
"learning_rate": 3.7145357266249248e-06, |
|
"loss": 0.5668, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.829602382975092, |
|
"learning_rate": 3.7114807750091198e-06, |
|
"loss": 0.5096, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.7902487805325054, |
|
"learning_rate": 3.7084234573451145e-06, |
|
"loss": 0.5387, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.7734778927084154, |
|
"learning_rate": 3.7053637796039173e-06, |
|
"loss": 0.5227, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.8359664701196194, |
|
"learning_rate": 3.7023017477611444e-06, |
|
"loss": 0.5183, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.049839823780983, |
|
"learning_rate": 3.699237367797011e-06, |
|
"loss": 0.5158, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.839740383172249, |
|
"learning_rate": 3.6961706456963166e-06, |
|
"loss": 0.509, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.7742492301936488, |
|
"learning_rate": 3.693101587448436e-06, |
|
"loss": 0.547, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.7626686489679533, |
|
"learning_rate": 3.6900301990473074e-06, |
|
"loss": 0.5501, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.81358040457354, |
|
"learning_rate": 3.686956486491419e-06, |
|
"loss": 0.5258, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.8446309626844912, |
|
"learning_rate": 3.6838804557837972e-06, |
|
"loss": 0.5438, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.8020540676799555, |
|
"learning_rate": 3.680802112931996e-06, |
|
"loss": 0.5333, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.8177001575706107, |
|
"learning_rate": 3.677721463948087e-06, |
|
"loss": 0.5194, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.7662648614084315, |
|
"learning_rate": 3.6746385148486437e-06, |
|
"loss": 0.5229, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.7914748738808024, |
|
"learning_rate": 3.6715532716547325e-06, |
|
"loss": 0.5443, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.6582914688424026, |
|
"learning_rate": 3.6684657403919005e-06, |
|
"loss": 0.4672, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.8779379042503213, |
|
"learning_rate": 3.6653759270901634e-06, |
|
"loss": 0.5361, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.843796012903189, |
|
"learning_rate": 3.6622838377839927e-06, |
|
"loss": 0.5903, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.7389903959091482, |
|
"learning_rate": 3.6591894785123065e-06, |
|
"loss": 0.5232, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.1531271375101912, |
|
"learning_rate": 3.6560928553184556e-06, |
|
"loss": 0.5811, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.8744519871212226, |
|
"learning_rate": 3.6529939742502114e-06, |
|
"loss": 0.5094, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.1796693544184405, |
|
"learning_rate": 3.649892841359756e-06, |
|
"loss": 0.5324, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.7983464824305884, |
|
"learning_rate": 3.6467894627036697e-06, |
|
"loss": 0.5406, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.8385213368207254, |
|
"learning_rate": 3.6436838443429177e-06, |
|
"loss": 0.5116, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.8303911353695022, |
|
"learning_rate": 3.64057599234284e-06, |
|
"loss": 0.5032, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.8212476470235475, |
|
"learning_rate": 3.6374659127731394e-06, |
|
"loss": 0.4977, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.788273127421183, |
|
"learning_rate": 3.6343536117078674e-06, |
|
"loss": 0.5132, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.792471501776643, |
|
"learning_rate": 3.631239095225417e-06, |
|
"loss": 0.6034, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.7180614128401976, |
|
"learning_rate": 3.6281223694085055e-06, |
|
"loss": 0.5125, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.968143388774121, |
|
"learning_rate": 3.625003440344166e-06, |
|
"loss": 0.5192, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.78698643398069, |
|
"learning_rate": 3.6218823141237346e-06, |
|
"loss": 0.5389, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.7360516235744345, |
|
"learning_rate": 3.6187589968428388e-06, |
|
"loss": 0.55, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.8926768947040113, |
|
"learning_rate": 3.6156334946013844e-06, |
|
"loss": 0.5402, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.7341073776764506, |
|
"learning_rate": 3.612505813503545e-06, |
|
"loss": 0.5156, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.9037532755321576, |
|
"learning_rate": 3.6093759596577493e-06, |
|
"loss": 0.5035, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.768026916515408, |
|
"learning_rate": 3.60624393917667e-06, |
|
"loss": 0.5317, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.8195441338851683, |
|
"learning_rate": 3.6031097581772123e-06, |
|
"loss": 0.5173, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.7451524294172138, |
|
"learning_rate": 3.599973422780497e-06, |
|
"loss": 0.5447, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.7994216931464604, |
|
"learning_rate": 3.5968349391118573e-06, |
|
"loss": 0.5468, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.7827289419599717, |
|
"learning_rate": 3.5936943133008183e-06, |
|
"loss": 0.5036, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.9123669337349365, |
|
"learning_rate": 3.590551551481091e-06, |
|
"loss": 0.4994, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.909151918011393, |
|
"learning_rate": 3.5874066597905573e-06, |
|
"loss": 0.5437, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.8314316190947115, |
|
"learning_rate": 3.5842596443712586e-06, |
|
"loss": 0.5327, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.025080353968657, |
|
"learning_rate": 3.581110511369384e-06, |
|
"loss": 0.5207, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.719830501688002, |
|
"learning_rate": 3.5779592669352588e-06, |
|
"loss": 0.5043, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.8856042934205883, |
|
"learning_rate": 3.574805917223332e-06, |
|
"loss": 0.534, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.8669902777268896, |
|
"learning_rate": 3.5716504683921626e-06, |
|
"loss": 0.5487, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.8420217203623648, |
|
"learning_rate": 3.568492926604412e-06, |
|
"loss": 0.4655, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.8587298766263622, |
|
"learning_rate": 3.5653332980268267e-06, |
|
"loss": 0.5308, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.8329162913986954, |
|
"learning_rate": 3.562171588830231e-06, |
|
"loss": 0.5061, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7226245016695787, |
|
"learning_rate": 3.5590078051895105e-06, |
|
"loss": 0.5022, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7947516408265423, |
|
"learning_rate": 3.555841953283603e-06, |
|
"loss": 0.5059, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7754650010913384, |
|
"learning_rate": 3.552674039295486e-06, |
|
"loss": 0.5183, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.0058342412884267, |
|
"learning_rate": 3.5495040694121644e-06, |
|
"loss": 0.5717, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.8536876200790606, |
|
"learning_rate": 3.546332049824659e-06, |
|
"loss": 0.5445, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.9446394955278312, |
|
"learning_rate": 3.543157986727991e-06, |
|
"loss": 0.5778, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7769561446293407, |
|
"learning_rate": 3.5399818863211747e-06, |
|
"loss": 0.5209, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7847626696288204, |
|
"learning_rate": 3.5368037548072042e-06, |
|
"loss": 0.5684, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.856855628494933, |
|
"learning_rate": 3.5336235983930383e-06, |
|
"loss": 0.5277, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.799135122090622, |
|
"learning_rate": 3.530441423289591e-06, |
|
"loss": 0.53, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.7372348199564838, |
|
"learning_rate": 3.5272572357117208e-06, |
|
"loss": 0.5082, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.7713730143331359, |
|
"learning_rate": 3.5240710418782137e-06, |
|
"loss": 0.5127, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.808116845193293, |
|
"learning_rate": 3.520882848011775e-06, |
|
"loss": 0.5339, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8168585745209507, |
|
"learning_rate": 3.5176926603390176e-06, |
|
"loss": 0.5773, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8433472787266432, |
|
"learning_rate": 3.514500485090446e-06, |
|
"loss": 0.5446, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.7473743951502463, |
|
"learning_rate": 3.511306328500449e-06, |
|
"loss": 0.5182, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.9068925551475813, |
|
"learning_rate": 3.5081101968072818e-06, |
|
"loss": 0.5428, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8621077674572017, |
|
"learning_rate": 3.5049120962530608e-06, |
|
"loss": 0.5783, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8188442080835585, |
|
"learning_rate": 3.501712033083744e-06, |
|
"loss": 0.559, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.9008658249988244, |
|
"learning_rate": 3.4985100135491245e-06, |
|
"loss": 0.5322, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8107617898563186, |
|
"learning_rate": 3.495306043902817e-06, |
|
"loss": 0.592, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8972175021059394, |
|
"learning_rate": 3.4921001304022422e-06, |
|
"loss": 0.527, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.773730752308571, |
|
"learning_rate": 3.4888922793086192e-06, |
|
"loss": 0.5422, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8207201600566427, |
|
"learning_rate": 3.4856824968869506e-06, |
|
"loss": 0.5463, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.7825701352278942, |
|
"learning_rate": 3.4824707894060108e-06, |
|
"loss": 0.5376, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8186780308546509, |
|
"learning_rate": 3.4792571631383345e-06, |
|
"loss": 0.5448, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.7196535770637023, |
|
"learning_rate": 3.4760416243602034e-06, |
|
"loss": 0.5719, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.7996950762262636, |
|
"learning_rate": 3.4728241793516345e-06, |
|
"loss": 0.575, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8460755337411012, |
|
"learning_rate": 3.4696048343963667e-06, |
|
"loss": 0.5303, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8518850346827596, |
|
"learning_rate": 3.4663835957818515e-06, |
|
"loss": 0.5294, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.761477307422264, |
|
"learning_rate": 3.463160469799237e-06, |
|
"loss": 0.5303, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8476905525063971, |
|
"learning_rate": 3.459935462743359e-06, |
|
"loss": 0.5365, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.7748738324934357, |
|
"learning_rate": 3.4567085809127247e-06, |
|
"loss": 0.5581, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.69994493873254, |
|
"learning_rate": 3.4534798306095054e-06, |
|
"loss": 0.5142, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.7867273775159276, |
|
"learning_rate": 3.45024921813952e-06, |
|
"loss": 0.5397, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8894059211718275, |
|
"learning_rate": 3.4470167498122253e-06, |
|
"loss": 0.5327, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8759154191563252, |
|
"learning_rate": 3.4437824319407003e-06, |
|
"loss": 0.5091, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.7992806971923871, |
|
"learning_rate": 3.4405462708416393e-06, |
|
"loss": 0.5206, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8238604800708562, |
|
"learning_rate": 3.437308272835335e-06, |
|
"loss": 0.5452, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8504559231955047, |
|
"learning_rate": 3.4340684442456673e-06, |
|
"loss": 0.4953, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.754272242495459, |
|
"learning_rate": 3.4308267914000915e-06, |
|
"loss": 0.5897, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8733571713304673, |
|
"learning_rate": 3.427583320629626e-06, |
|
"loss": 0.4897, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8284259921968489, |
|
"learning_rate": 3.4243380382688395e-06, |
|
"loss": 0.5285, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.4115721951019933, |
|
"learning_rate": 3.4210909506558383e-06, |
|
"loss": 0.5327, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.827035801006768, |
|
"learning_rate": 3.4178420641322564e-06, |
|
"loss": 0.5959, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7275971455556467, |
|
"learning_rate": 3.414591385043237e-06, |
|
"loss": 0.5378, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.8399392023051784, |
|
"learning_rate": 3.411338919737429e-06, |
|
"loss": 0.4737, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.768301025681768, |
|
"learning_rate": 3.408084674566967e-06, |
|
"loss": 0.5237, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7940606795442973, |
|
"learning_rate": 3.404828655887462e-06, |
|
"loss": 0.5199, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.736302967715387, |
|
"learning_rate": 3.4015708700579893e-06, |
|
"loss": 0.5103, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.864705554020529, |
|
"learning_rate": 3.398311323441075e-06, |
|
"loss": 0.5456, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7371337216784375, |
|
"learning_rate": 3.3950500224026838e-06, |
|
"loss": 0.54, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7936602187941955, |
|
"learning_rate": 3.3917869733122082e-06, |
|
"loss": 0.5079, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.78627252413609, |
|
"learning_rate": 3.3885221825424535e-06, |
|
"loss": 0.5272, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.0255442379828588, |
|
"learning_rate": 3.385255656469627e-06, |
|
"loss": 0.5451, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.8151205951225127, |
|
"learning_rate": 3.3819874014733245e-06, |
|
"loss": 0.545, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7644602173142565, |
|
"learning_rate": 3.3787174239365183e-06, |
|
"loss": 0.5021, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.88690726704404, |
|
"learning_rate": 3.3754457302455464e-06, |
|
"loss": 0.5518, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.9466161438131033, |
|
"learning_rate": 3.372172326790097e-06, |
|
"loss": 0.5499, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7759200801637758, |
|
"learning_rate": 3.3688972199631974e-06, |
|
"loss": 0.5165, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7404813059594972, |
|
"learning_rate": 3.365620416161204e-06, |
|
"loss": 0.4914, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7186493344503415, |
|
"learning_rate": 3.3623419217837836e-06, |
|
"loss": 0.4742, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.688196680775216, |
|
"learning_rate": 3.3590617432339077e-06, |
|
"loss": 0.4973, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.9998510596311416, |
|
"learning_rate": 3.355779886917836e-06, |
|
"loss": 0.4844, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.9138346820930676, |
|
"learning_rate": 3.3524963592451048e-06, |
|
"loss": 0.5767, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.8240977441306703, |
|
"learning_rate": 3.349211166628515e-06, |
|
"loss": 0.5535, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.866188876988342, |
|
"learning_rate": 3.3459243154841194e-06, |
|
"loss": 0.5293, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.8428560106324356, |
|
"learning_rate": 3.342635812231208e-06, |
|
"loss": 0.5545, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.946339663223573, |
|
"learning_rate": 3.3393456632922997e-06, |
|
"loss": 0.5662, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.7835322668971936, |
|
"learning_rate": 3.3360538750931277e-06, |
|
"loss": 0.5343, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.8985737358987655, |
|
"learning_rate": 3.3327604540626245e-06, |
|
"loss": 0.4882, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.7452799601454962, |
|
"learning_rate": 3.3294654066329125e-06, |
|
"loss": 0.4847, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.8001237054125527, |
|
"learning_rate": 3.3261687392392917e-06, |
|
"loss": 0.5294, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.878202857326882, |
|
"learning_rate": 3.3228704583202244e-06, |
|
"loss": 0.5506, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.9555722164046163, |
|
"learning_rate": 3.319570570317324e-06, |
|
"loss": 0.5675, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.842178231242227, |
|
"learning_rate": 3.316269081675345e-06, |
|
"loss": 0.507, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.7925971037996111, |
|
"learning_rate": 3.3129659988421646e-06, |
|
"loss": 0.544, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.8448861762114805, |
|
"learning_rate": 3.309661328268776e-06, |
|
"loss": 0.5547, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.8798388041152536, |
|
"learning_rate": 3.3063550764092722e-06, |
|
"loss": 0.5535, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.111205651077239, |
|
"learning_rate": 3.3030472497208354e-06, |
|
"loss": 0.5372, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.9023950174091275, |
|
"learning_rate": 3.2997378546637217e-06, |
|
"loss": 0.5183, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.828168427249714, |
|
"learning_rate": 3.296426897701251e-06, |
|
"loss": 0.5139, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.752269482139502, |
|
"learning_rate": 3.293114385299795e-06, |
|
"loss": 0.4977, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.8319951115110833, |
|
"learning_rate": 3.2898003239287626e-06, |
|
"loss": 0.4762, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.9203452380089554, |
|
"learning_rate": 3.2864847200605864e-06, |
|
"loss": 0.5328, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.9603318007718882, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"loss": 0.5114, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.772386222577394, |
|
"learning_rate": 3.2798489107375875e-06, |
|
"loss": 0.5365, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.7664388279000272, |
|
"learning_rate": 3.2765287182426445e-06, |
|
"loss": 0.5218, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.705238499414661, |
|
"learning_rate": 3.2732070091702928e-06, |
|
"loss": 0.515, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.8346490363510246, |
|
"learning_rate": 3.2698837900078995e-06, |
|
"loss": 0.5032, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.1169074366870504, |
|
"learning_rate": 3.2665590672457853e-06, |
|
"loss": 0.5463, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.9794978557420737, |
|
"learning_rate": 3.263232847377205e-06, |
|
"loss": 0.5556, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.8775372141713855, |
|
"learning_rate": 3.2599051368983393e-06, |
|
"loss": 0.5479, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.9608965084656977, |
|
"learning_rate": 3.256575942308278e-06, |
|
"loss": 0.4934, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.9035969324400404, |
|
"learning_rate": 3.2532452701090107e-06, |
|
"loss": 0.494, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.8348725792159002, |
|
"learning_rate": 3.2499131268054114e-06, |
|
"loss": 0.5101, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.837442323872043, |
|
"learning_rate": 3.2465795189052283e-06, |
|
"loss": 0.5028, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.0588580347681114, |
|
"learning_rate": 3.2432444529190714e-06, |
|
"loss": 0.5572, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.800197863385395, |
|
"learning_rate": 3.2399079353603958e-06, |
|
"loss": 0.5456, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.8642409261562531, |
|
"learning_rate": 3.236569972745492e-06, |
|
"loss": 0.4677, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8605177191737032, |
|
"learning_rate": 3.2332305715934735e-06, |
|
"loss": 0.5086, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8779408638935786, |
|
"learning_rate": 3.229889738426264e-06, |
|
"loss": 0.4576, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8069917958596904, |
|
"learning_rate": 3.226547479768582e-06, |
|
"loss": 0.4847, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.949377976689351, |
|
"learning_rate": 3.2232038021479317e-06, |
|
"loss": 0.5095, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.9043326063097796, |
|
"learning_rate": 3.2198587120945878e-06, |
|
"loss": 0.5382, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8420984644699558, |
|
"learning_rate": 3.2165122161415844e-06, |
|
"loss": 0.5354, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.9159042477860826, |
|
"learning_rate": 3.2131643208246994e-06, |
|
"loss": 0.5676, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8091292349745058, |
|
"learning_rate": 3.209815032682445e-06, |
|
"loss": 0.5152, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.9172852365194688, |
|
"learning_rate": 3.206464358256054e-06, |
|
"loss": 0.4965, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8611473653995623, |
|
"learning_rate": 3.2031123040894658e-06, |
|
"loss": 0.5222, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.0718827285873, |
|
"learning_rate": 3.1997588767293146e-06, |
|
"loss": 0.5512, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8367854431958046, |
|
"learning_rate": 3.196404082724918e-06, |
|
"loss": 0.522, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.9326854247843166, |
|
"learning_rate": 3.19304792862826e-06, |
|
"loss": 0.5262, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8127395054303974, |
|
"learning_rate": 3.1896904209939827e-06, |
|
"loss": 0.4792, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.7562676297882738, |
|
"learning_rate": 3.1863315663793715e-06, |
|
"loss": 0.5132, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.1115973982625826, |
|
"learning_rate": 3.182971371344342e-06, |
|
"loss": 0.5431, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.9125267865316575, |
|
"learning_rate": 3.179609842451428e-06, |
|
"loss": 0.5049, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8084301603852846, |
|
"learning_rate": 3.1762469862657673e-06, |
|
"loss": 0.5057, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.979887599791109, |
|
"learning_rate": 3.172882809355092e-06, |
|
"loss": 0.5076, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8023843851685244, |
|
"learning_rate": 3.1695173182897126e-06, |
|
"loss": 0.507, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.894018453771296, |
|
"learning_rate": 3.166150519642506e-06, |
|
"loss": 0.4892, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.085200027059979, |
|
"learning_rate": 3.162782419988901e-06, |
|
"loss": 0.5109, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.9145317338940404, |
|
"learning_rate": 3.1594130259068723e-06, |
|
"loss": 0.5597, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.6898725390450196, |
|
"learning_rate": 3.1560423439769173e-06, |
|
"loss": 0.5364, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8953702977370355, |
|
"learning_rate": 3.152670380782052e-06, |
|
"loss": 0.5402, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8989394358006901, |
|
"learning_rate": 3.1492971429077924e-06, |
|
"loss": 0.499, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8295299154755171, |
|
"learning_rate": 3.1459226369421465e-06, |
|
"loss": 0.5133, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.9849867895935545, |
|
"learning_rate": 3.1425468694755968e-06, |
|
"loss": 0.5173, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.7806451045050948, |
|
"learning_rate": 3.13916984710109e-06, |
|
"loss": 0.5314, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8227836319972825, |
|
"learning_rate": 3.1357915764140247e-06, |
|
"loss": 0.5413, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.873012898370893, |
|
"learning_rate": 3.1324120640122362e-06, |
|
"loss": 0.5582, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.7312834865810094, |
|
"learning_rate": 3.129031316495986e-06, |
|
"loss": 0.4969, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.850102780247153, |
|
"learning_rate": 3.1256493404679468e-06, |
|
"loss": 0.4981, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.85121227661343, |
|
"learning_rate": 3.122266142533191e-06, |
|
"loss": 0.4926, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.911516866472808, |
|
"learning_rate": 3.118881729299178e-06, |
|
"loss": 0.5141, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.9562838385609387, |
|
"learning_rate": 3.1154961073757388e-06, |
|
"loss": 0.5119, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.9792813407411627, |
|
"learning_rate": 3.1121092833750684e-06, |
|
"loss": 0.5379, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.02442320634539, |
|
"learning_rate": 3.1087212639117057e-06, |
|
"loss": 0.5516, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.9139240600717167, |
|
"learning_rate": 3.1053320556025272e-06, |
|
"loss": 0.5035, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.6820068229198286, |
|
"learning_rate": 3.10194166506673e-06, |
|
"loss": 0.5082, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.837945615423465, |
|
"learning_rate": 3.098550098925819e-06, |
|
"loss": 0.5301, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.8297516724958631, |
|
"learning_rate": 3.095157363803598e-06, |
|
"loss": 0.531, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.8057255627930757, |
|
"learning_rate": 3.091763466326152e-06, |
|
"loss": 0.4962, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.8568993199742134, |
|
"learning_rate": 3.0883684131218356e-06, |
|
"loss": 0.5555, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.7537389006494144, |
|
"learning_rate": 3.084972210821261e-06, |
|
"loss": 0.4783, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.936835841446932, |
|
"learning_rate": 3.0815748660572856e-06, |
|
"loss": 0.5696, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.818312553754802, |
|
"learning_rate": 3.078176385464997e-06, |
|
"loss": 0.5125, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.9098144545445246, |
|
"learning_rate": 3.074776775681702e-06, |
|
"loss": 0.5472, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.8530900425697827, |
|
"learning_rate": 3.071376043346912e-06, |
|
"loss": 0.5387, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.734080732564932, |
|
"learning_rate": 3.0679741951023302e-06, |
|
"loss": 0.5082, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.7157271380716255, |
|
"learning_rate": 3.06457123759184e-06, |
|
"loss": 0.5057, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.8615941154610314, |
|
"learning_rate": 3.061167177461492e-06, |
|
"loss": 0.5326, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.8820053895933144, |
|
"learning_rate": 3.0577620213594888e-06, |
|
"loss": 0.5446, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.8157963098312144, |
|
"learning_rate": 3.0543557759361735e-06, |
|
"loss": 0.5627, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.7642611841801312, |
|
"learning_rate": 3.0509484478440187e-06, |
|
"loss": 0.5062, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.6839843509551078, |
|
"learning_rate": 3.047540043737609e-06, |
|
"loss": 0.526, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.9004464286881788, |
|
"learning_rate": 3.0441305702736314e-06, |
|
"loss": 0.5617, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.9767954561122347, |
|
"learning_rate": 3.0407200341108618e-06, |
|
"loss": 0.5077, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.825193444039661, |
|
"learning_rate": 3.0373084419101506e-06, |
|
"loss": 0.5097, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.6810496770660706, |
|
"learning_rate": 3.0338958003344115e-06, |
|
"loss": 0.4993, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7411591022211208, |
|
"learning_rate": 3.0304821160486086e-06, |
|
"loss": 0.4789, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7580191857406102, |
|
"learning_rate": 3.0270673957197393e-06, |
|
"loss": 0.5225, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7440391739784626, |
|
"learning_rate": 3.023651646016828e-06, |
|
"loss": 0.5281, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.8458326991098015, |
|
"learning_rate": 3.0202348736109074e-06, |
|
"loss": 0.5419, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7105130101397825, |
|
"learning_rate": 3.0168170851750077e-06, |
|
"loss": 0.5113, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.74741112552671, |
|
"learning_rate": 3.013398287384144e-06, |
|
"loss": 0.5389, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7962043049830843, |
|
"learning_rate": 3.009978486915302e-06, |
|
"loss": 0.5212, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.698744627764944, |
|
"learning_rate": 3.006557690447427e-06, |
|
"loss": 0.508, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.852219826000981, |
|
"learning_rate": 3.0031359046614073e-06, |
|
"loss": 0.5491, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.8471065567470235, |
|
"learning_rate": 2.9997131362400666e-06, |
|
"loss": 0.4937, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.7925416653935446, |
|
"learning_rate": 2.996289391868144e-06, |
|
"loss": 0.4691, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.8399091219230026, |
|
"learning_rate": 2.9928646782322875e-06, |
|
"loss": 0.5317, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.7232111222956334, |
|
"learning_rate": 2.989439002021036e-06, |
|
"loss": 0.5152, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.8514924128683583, |
|
"learning_rate": 2.986012369924811e-06, |
|
"loss": 0.573, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9226274135737127, |
|
"learning_rate": 2.982584788635897e-06, |
|
"loss": 0.5168, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.7726209925124323, |
|
"learning_rate": 2.979156264848437e-06, |
|
"loss": 0.5157, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9564580777074403, |
|
"learning_rate": 2.9757268052584097e-06, |
|
"loss": 0.5693, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.959067937570625, |
|
"learning_rate": 2.9722964165636263e-06, |
|
"loss": 0.5151, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9170985685573452, |
|
"learning_rate": 2.9688651054637086e-06, |
|
"loss": 0.5944, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8773917767941883, |
|
"learning_rate": 2.9654328786600823e-06, |
|
"loss": 0.5128, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8184350092212354, |
|
"learning_rate": 2.96199974285596e-06, |
|
"loss": 0.5052, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.9785501833054495, |
|
"learning_rate": 2.9585657047563314e-06, |
|
"loss": 0.5794, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8252999737390432, |
|
"learning_rate": 2.9551307710679467e-06, |
|
"loss": 0.5657, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8050415950517775, |
|
"learning_rate": 2.9516949484993055e-06, |
|
"loss": 0.5054, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.7751399822789855, |
|
"learning_rate": 2.9482582437606445e-06, |
|
"loss": 0.5025, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.7388276457967873, |
|
"learning_rate": 2.9448206635639213e-06, |
|
"loss": 0.48, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.9401107131003557, |
|
"learning_rate": 2.941382214622806e-06, |
|
"loss": 0.5503, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8055033222058048, |
|
"learning_rate": 2.937942903652663e-06, |
|
"loss": 0.5589, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8833337691151302, |
|
"learning_rate": 2.93450273737054e-06, |
|
"loss": 0.5395, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.875491652961695, |
|
"learning_rate": 2.9310617224951594e-06, |
|
"loss": 0.5316, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.801842376116382, |
|
"learning_rate": 2.9276198657468947e-06, |
|
"loss": 0.5369, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.7434378005034878, |
|
"learning_rate": 2.9241771738477686e-06, |
|
"loss": 0.5345, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.840106192598806, |
|
"learning_rate": 2.920733653521432e-06, |
|
"loss": 0.5391, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.847462115860291, |
|
"learning_rate": 2.917289311493155e-06, |
|
"loss": 0.5176, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.7647442625122556, |
|
"learning_rate": 2.9138441544898123e-06, |
|
"loss": 0.502, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.7981764340288842, |
|
"learning_rate": 2.9103981892398698e-06, |
|
"loss": 0.5422, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.8491619302175528, |
|
"learning_rate": 2.9069514224733725e-06, |
|
"loss": 0.4993, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.8345458812848932, |
|
"learning_rate": 2.903503860921931e-06, |
|
"loss": 0.5322, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.9341637425072102, |
|
"learning_rate": 2.900055511318707e-06, |
|
"loss": 0.5338, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.8629389822642988, |
|
"learning_rate": 2.896606380398402e-06, |
|
"loss": 0.538, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.8190212821977385, |
|
"learning_rate": 2.8931564748972446e-06, |
|
"loss": 0.5417, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.808827329636345, |
|
"learning_rate": 2.8897058015529734e-06, |
|
"loss": 0.5142, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8593710361108637, |
|
"learning_rate": 2.8862543671048288e-06, |
|
"loss": 0.5148, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.9421214945128942, |
|
"learning_rate": 2.882802178293538e-06, |
|
"loss": 0.5375, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8337412539689857, |
|
"learning_rate": 2.879349241861299e-06, |
|
"loss": 0.5179, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8368160375080673, |
|
"learning_rate": 2.8758955645517724e-06, |
|
"loss": 0.5404, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8549078592919745, |
|
"learning_rate": 2.8724411531100642e-06, |
|
"loss": 0.5668, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.7783870646526379, |
|
"learning_rate": 2.8689860142827153e-06, |
|
"loss": 0.5556, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8409533014441846, |
|
"learning_rate": 2.865530154817687e-06, |
|
"loss": 0.4876, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.786131185447664, |
|
"learning_rate": 2.8620735814643467e-06, |
|
"loss": 0.5503, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8105893579918746, |
|
"learning_rate": 2.858616300973458e-06, |
|
"loss": 0.4895, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8569751090297868, |
|
"learning_rate": 2.8551583200971638e-06, |
|
"loss": 0.5826, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.9048981456653071, |
|
"learning_rate": 2.8516996455889763e-06, |
|
"loss": 0.5319, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.7483385624776147, |
|
"learning_rate": 2.8482402842037615e-06, |
|
"loss": 0.4664, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.9686504696650498, |
|
"learning_rate": 2.844780242697727e-06, |
|
"loss": 0.5459, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.7882376390021062, |
|
"learning_rate": 2.8413195278284084e-06, |
|
"loss": 0.5272, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.9426980086335028, |
|
"learning_rate": 2.8378581463546578e-06, |
|
"loss": 0.4785, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.7027235807049006, |
|
"learning_rate": 2.8343961050366275e-06, |
|
"loss": 0.5295, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.7896557139712275, |
|
"learning_rate": 2.8309334106357606e-06, |
|
"loss": 0.4917, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.9559071265127668, |
|
"learning_rate": 2.827470069914772e-06, |
|
"loss": 0.4813, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.8624058139739532, |
|
"learning_rate": 2.8240060896376425e-06, |
|
"loss": 0.5173, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.8517426107017696, |
|
"learning_rate": 2.8205414765696005e-06, |
|
"loss": 0.5022, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.8669678553165407, |
|
"learning_rate": 2.817076237477111e-06, |
|
"loss": 0.5153, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.765641529260409, |
|
"learning_rate": 2.8136103791278597e-06, |
|
"loss": 0.5459, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.7951199125059072, |
|
"learning_rate": 2.8101439082907432e-06, |
|
"loss": 0.5556, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.809753719361155, |
|
"learning_rate": 2.806676831735855e-06, |
|
"loss": 0.5082, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.022714382944429, |
|
"learning_rate": 2.8032091562344704e-06, |
|
"loss": 0.5079, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.7851810141612734, |
|
"learning_rate": 2.7997408885590355e-06, |
|
"loss": 0.5044, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.6942393347489977, |
|
"learning_rate": 2.7962720354831507e-06, |
|
"loss": 0.4845, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.8154999007296455, |
|
"learning_rate": 2.792802603781562e-06, |
|
"loss": 0.5039, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.909875578351421, |
|
"learning_rate": 2.7893326002301446e-06, |
|
"loss": 0.5081, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.8216639768552991, |
|
"learning_rate": 2.785862031605891e-06, |
|
"loss": 0.5022, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.968304394575798, |
|
"learning_rate": 2.7823909046868957e-06, |
|
"loss": 0.5217, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.269471892811331, |
|
"learning_rate": 2.778919226252346e-06, |
|
"loss": 0.5526, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.8562637541083824, |
|
"learning_rate": 2.775447003082505e-06, |
|
"loss": 0.5686, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.2263991007598114, |
|
"learning_rate": 2.7719742419586998e-06, |
|
"loss": 0.5402, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.6883177707586092, |
|
"learning_rate": 2.7685009496633075e-06, |
|
"loss": 0.5033, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.8528118029727803, |
|
"learning_rate": 2.765027132979743e-06, |
|
"loss": 0.5544, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.9477833558101318, |
|
"learning_rate": 2.761552798692446e-06, |
|
"loss": 0.5255, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.8332232845916867, |
|
"learning_rate": 2.7580779535868675e-06, |
|
"loss": 0.5296, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7825492914819279, |
|
"learning_rate": 2.754602604449454e-06, |
|
"loss": 0.5071, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.8629044416803393, |
|
"learning_rate": 2.7511267580676382e-06, |
|
"loss": 0.5242, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7488315230717792, |
|
"learning_rate": 2.7476504212298233e-06, |
|
"loss": 0.5252, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.8087959812729764, |
|
"learning_rate": 2.7441736007253705e-06, |
|
"loss": 0.4935, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.805794397151574, |
|
"learning_rate": 2.740696303344585e-06, |
|
"loss": 0.5819, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7460467388665153, |
|
"learning_rate": 2.737218535878705e-06, |
|
"loss": 0.5411, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.9047016227341778, |
|
"learning_rate": 2.7337403051198846e-06, |
|
"loss": 0.4755, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.839222519978661, |
|
"learning_rate": 2.730261617861185e-06, |
|
"loss": 0.4855, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.862929087519683, |
|
"learning_rate": 2.726782480896557e-06, |
|
"loss": 0.5431, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.7864340196228758, |
|
"learning_rate": 2.723302901020831e-06, |
|
"loss": 0.5108, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.827547647278096, |
|
"learning_rate": 2.719822885029701e-06, |
|
"loss": 0.5029, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.6624898067287452, |
|
"learning_rate": 2.716342439719714e-06, |
|
"loss": 0.4861, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.8852871442731454, |
|
"learning_rate": 2.7128615718882554e-06, |
|
"loss": 0.5053, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.9534449028119654, |
|
"learning_rate": 2.7093802883335357e-06, |
|
"loss": 0.5654, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.7249582061537097, |
|
"learning_rate": 2.7058985958545765e-06, |
|
"loss": 0.5002, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.7562844672053906, |
|
"learning_rate": 2.702416501251199e-06, |
|
"loss": 0.5436, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.986901333170154, |
|
"learning_rate": 2.6989340113240087e-06, |
|
"loss": 0.527, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.7825768030688796, |
|
"learning_rate": 2.695451132874385e-06, |
|
"loss": 0.525, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.6606555374476397, |
|
"learning_rate": 2.691967872704464e-06, |
|
"loss": 0.476, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.7825083810087277, |
|
"learning_rate": 2.688484237617129e-06, |
|
"loss": 0.477, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.9686430333958531, |
|
"learning_rate": 2.6850002344159943e-06, |
|
"loss": 0.5434, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.717636244450827, |
|
"learning_rate": 2.6815158699053935e-06, |
|
"loss": 0.5794, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.704099898400831, |
|
"learning_rate": 2.6780311508903673e-06, |
|
"loss": 0.5107, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.7521786436297433, |
|
"learning_rate": 2.6745460841766456e-06, |
|
"loss": 0.543, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.8288817857074091, |
|
"learning_rate": 2.67106067657064e-06, |
|
"loss": 0.4888, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.7606668312978737, |
|
"learning_rate": 2.6675749348794273e-06, |
|
"loss": 0.5438, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.8974007350921405, |
|
"learning_rate": 2.6640888659107355e-06, |
|
"loss": 0.5103, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.8366492024047152, |
|
"learning_rate": 2.660602476472935e-06, |
|
"loss": 0.5211, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.813780419169426, |
|
"learning_rate": 2.657115773375018e-06, |
|
"loss": 0.4786, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7751633927736221, |
|
"learning_rate": 2.6536287634265918e-06, |
|
"loss": 0.5456, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.8112706588151493, |
|
"learning_rate": 2.6501414534378616e-06, |
|
"loss": 0.536, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7362256266600447, |
|
"learning_rate": 2.646653850219621e-06, |
|
"loss": 0.5266, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7746963075577837, |
|
"learning_rate": 2.643165960583233e-06, |
|
"loss": 0.4845, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7798827281249407, |
|
"learning_rate": 2.6396777913406228e-06, |
|
"loss": 0.457, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.801038100374856, |
|
"learning_rate": 2.6361893493042594e-06, |
|
"loss": 0.5093, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.70079798226535, |
|
"learning_rate": 2.632700641287147e-06, |
|
"loss": 0.5093, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7688921140375633, |
|
"learning_rate": 2.6292116741028073e-06, |
|
"loss": 0.4999, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7535762670261703, |
|
"learning_rate": 2.6257224545652688e-06, |
|
"loss": 0.5292, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.6511600588345345, |
|
"learning_rate": 2.622232989489052e-06, |
|
"loss": 0.5098, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.8226418118893164, |
|
"learning_rate": 2.6187432856891585e-06, |
|
"loss": 0.4995, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7453251697809469, |
|
"learning_rate": 2.6152533499810567e-06, |
|
"loss": 0.5324, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 3.854764123671763, |
|
"learning_rate": 2.611763189180665e-06, |
|
"loss": 0.55, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.8822045822720739, |
|
"learning_rate": 2.608272810104343e-06, |
|
"loss": 0.4948, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7655712435117557, |
|
"learning_rate": 2.6047822195688775e-06, |
|
"loss": 0.5361, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7601014248725226, |
|
"learning_rate": 2.6012914243914667e-06, |
|
"loss": 0.4455, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7832576894792285, |
|
"learning_rate": 2.5978004313897104e-06, |
|
"loss": 0.5356, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7379093473470857, |
|
"learning_rate": 2.5943092473815922e-06, |
|
"loss": 0.4881, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.9126265402481486, |
|
"learning_rate": 2.590817879185471e-06, |
|
"loss": 0.4768, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.9554559921329315, |
|
"learning_rate": 2.5873263336200636e-06, |
|
"loss": 0.572, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7969024529150637, |
|
"learning_rate": 2.5838346175044355e-06, |
|
"loss": 0.4894, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7234439211809627, |
|
"learning_rate": 2.5803427376579824e-06, |
|
"loss": 0.4926, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.9809564443971945, |
|
"learning_rate": 2.5768507009004224e-06, |
|
"loss": 0.5677, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7861662896762107, |
|
"learning_rate": 2.573358514051779e-06, |
|
"loss": 0.5283, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7627618988801406, |
|
"learning_rate": 2.569866183932368e-06, |
|
"loss": 0.5366, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.870448428328825, |
|
"learning_rate": 2.5663737173627863e-06, |
|
"loss": 0.4864, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7537459242093265, |
|
"learning_rate": 2.5628811211638967e-06, |
|
"loss": 0.5091, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7948869258749416, |
|
"learning_rate": 2.5593884021568143e-06, |
|
"loss": 0.4851, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7948580243638423, |
|
"learning_rate": 2.5558955671628964e-06, |
|
"loss": 0.5038, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7071321162179267, |
|
"learning_rate": 2.552402623003726e-06, |
|
"loss": 0.5172, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.9189581718497948, |
|
"learning_rate": 2.548909576501096e-06, |
|
"loss": 0.5421, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.80649128798113, |
|
"learning_rate": 2.5454164344770044e-06, |
|
"loss": 0.5418, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7691828394369862, |
|
"learning_rate": 2.5419232037536316e-06, |
|
"loss": 0.5103, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7960539293917688, |
|
"learning_rate": 2.5384298911533344e-06, |
|
"loss": 0.5318, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.8973140103512256, |
|
"learning_rate": 2.5349365034986267e-06, |
|
"loss": 0.5705, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.821467182784847, |
|
"learning_rate": 2.531443047612171e-06, |
|
"loss": 0.5195, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.951851802158412, |
|
"learning_rate": 2.527949530316762e-06, |
|
"loss": 0.5033, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.866099609048635, |
|
"learning_rate": 2.5244559584353146e-06, |
|
"loss": 0.5482, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.764005332766906, |
|
"learning_rate": 2.520962338790851e-06, |
|
"loss": 0.4973, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.863408871226844, |
|
"learning_rate": 2.517468678206485e-06, |
|
"loss": 0.5249, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.7459173200826197, |
|
"learning_rate": 2.5139749835054123e-06, |
|
"loss": 0.4845, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.9910802418986058, |
|
"learning_rate": 2.5104812615108943e-06, |
|
"loss": 0.5702, |
|
"step": 1131 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2258, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 377, |
|
"total_flos": 532701102735360.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|