{ "best_metric": 1.3179453611373901, "best_model_checkpoint": "yolo-tiny-fashion/checkpoint-28520", "epoch": 10.0, "eval_steps": 500, "global_step": 28520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004908835904628331, "grad_norm": NaN, "learning_rate": 1.753155680224404e-07, "loss": 7.2827, "step": 14 }, { "epoch": 0.009817671809256662, "grad_norm": 42.19830322265625, "learning_rate": 4.2075736325385697e-07, "loss": 7.2488, "step": 28 }, { "epoch": 0.014726507713884993, "grad_norm": NaN, "learning_rate": 6.486676016830295e-07, "loss": 7.315, "step": 42 }, { "epoch": 0.019635343618513323, "grad_norm": 128.13803100585938, "learning_rate": 8.94109396914446e-07, "loss": 7.1454, "step": 56 }, { "epoch": 0.024544179523141654, "grad_norm": 31.699440002441406, "learning_rate": 1.1220196353436186e-06, "loss": 7.24, "step": 70 }, { "epoch": 0.029453015427769985, "grad_norm": 43.054813385009766, "learning_rate": 1.367461430575035e-06, "loss": 7.1142, "step": 84 }, { "epoch": 0.034361851332398316, "grad_norm": 97.52753448486328, "learning_rate": 1.6129032258064516e-06, "loss": 7.0304, "step": 98 }, { "epoch": 0.03927068723702665, "grad_norm": 33.70622253417969, "learning_rate": 1.858345021037868e-06, "loss": 7.0026, "step": 112 }, { "epoch": 0.04417952314165498, "grad_norm": 41.05027389526367, "learning_rate": 2.1037868162692846e-06, "loss": 6.9645, "step": 126 }, { "epoch": 0.04908835904628331, "grad_norm": 92.50530242919922, "learning_rate": 2.3492286115007015e-06, "loss": 6.835, "step": 140 }, { "epoch": 0.05399719495091164, "grad_norm": 124.34849548339844, "learning_rate": 2.594670406732118e-06, "loss": 6.7781, "step": 154 }, { "epoch": 0.05890603085553997, "grad_norm": 48.94189453125, "learning_rate": 2.8401122019635345e-06, "loss": 6.6723, "step": 168 }, { "epoch": 0.0638148667601683, "grad_norm": 60.10065460205078, "learning_rate": 3.085553997194951e-06, "loss": 6.5306, "step": 182 }, { "epoch": 0.06872370266479663, "grad_norm": 96.10130310058594, "learning_rate": 3.3309957924263675e-06, "loss": 6.3872, "step": 196 }, { "epoch": 0.07363253856942496, "grad_norm": 44.42685317993164, "learning_rate": 3.576437587657784e-06, "loss": 6.3447, "step": 210 }, { "epoch": 0.0785413744740533, "grad_norm": 71.67019653320312, "learning_rate": 3.8218793828892005e-06, "loss": 6.3001, "step": 224 }, { "epoch": 0.08345021037868162, "grad_norm": 33.569602966308594, "learning_rate": 4.067321178120617e-06, "loss": 6.1325, "step": 238 }, { "epoch": 0.08835904628330996, "grad_norm": 34.78213882446289, "learning_rate": 4.3127629733520335e-06, "loss": 6.0745, "step": 252 }, { "epoch": 0.09326788218793829, "grad_norm": 85.39997863769531, "learning_rate": 4.55820476858345e-06, "loss": 5.8789, "step": 266 }, { "epoch": 0.09817671809256662, "grad_norm": 71.47327423095703, "learning_rate": 4.8036465638148665e-06, "loss": 5.7403, "step": 280 }, { "epoch": 0.10308555399719495, "grad_norm": 48.89495086669922, "learning_rate": 5.049088359046283e-06, "loss": 5.7809, "step": 294 }, { "epoch": 0.10799438990182328, "grad_norm": 51.65408706665039, "learning_rate": 5.2945301542777e-06, "loss": 5.6049, "step": 308 }, { "epoch": 0.11290322580645161, "grad_norm": 47.655052185058594, "learning_rate": 5.539971949509117e-06, "loss": 5.5456, "step": 322 }, { "epoch": 0.11781206171107994, "grad_norm": 51.074913024902344, "learning_rate": 5.785413744740533e-06, "loss": 5.2569, "step": 336 }, { "epoch": 0.12272089761570827, "grad_norm": 93.9029541015625, "learning_rate": 6.03085553997195e-06, "loss": 5.1324, "step": 350 }, { "epoch": 0.1276297335203366, "grad_norm": 31.065109252929688, "learning_rate": 6.276297335203367e-06, "loss": 5.0047, "step": 364 }, { "epoch": 0.13253856942496495, "grad_norm": 50.65034484863281, "learning_rate": 6.521739130434783e-06, "loss": 4.8052, "step": 378 }, { "epoch": 0.13744740532959326, "grad_norm": 56.795963287353516, "learning_rate": 6.7671809256662e-06, "loss": 4.6019, "step": 392 }, { "epoch": 0.1423562412342216, "grad_norm": 29.37743377685547, "learning_rate": 7.012622720897616e-06, "loss": 4.5886, "step": 406 }, { "epoch": 0.14726507713884993, "grad_norm": 44.446353912353516, "learning_rate": 7.258064516129033e-06, "loss": 4.4182, "step": 420 }, { "epoch": 0.15217391304347827, "grad_norm": 39.80805969238281, "learning_rate": 7.503506311360449e-06, "loss": 4.2886, "step": 434 }, { "epoch": 0.1570827489481066, "grad_norm": 35.339202880859375, "learning_rate": 7.748948106591865e-06, "loss": 4.3017, "step": 448 }, { "epoch": 0.16199158485273493, "grad_norm": 71.77003479003906, "learning_rate": 7.994389901823283e-06, "loss": 4.2472, "step": 462 }, { "epoch": 0.16690042075736325, "grad_norm": 41.62904357910156, "learning_rate": 8.2398316970547e-06, "loss": 4.1653, "step": 476 }, { "epoch": 0.1718092566619916, "grad_norm": 43.17316818237305, "learning_rate": 8.485273492286116e-06, "loss": 4.1206, "step": 490 }, { "epoch": 0.1767180925666199, "grad_norm": 73.35100555419922, "learning_rate": 8.730715287517533e-06, "loss": 4.23, "step": 504 }, { "epoch": 0.18162692847124826, "grad_norm": 38.54780960083008, "learning_rate": 8.976157082748949e-06, "loss": 3.955, "step": 518 }, { "epoch": 0.18653576437587657, "grad_norm": 33.19742202758789, "learning_rate": 9.221598877980366e-06, "loss": 4.2163, "step": 532 }, { "epoch": 0.19144460028050492, "grad_norm": 25.57295036315918, "learning_rate": 9.467040673211782e-06, "loss": 4.021, "step": 546 }, { "epoch": 0.19635343618513323, "grad_norm": 152.22877502441406, "learning_rate": 9.712482468443199e-06, "loss": 3.8931, "step": 560 }, { "epoch": 0.20126227208976158, "grad_norm": 16.732576370239258, "learning_rate": 9.957924263674615e-06, "loss": 3.7064, "step": 574 }, { "epoch": 0.2061711079943899, "grad_norm": 19.926677703857422, "learning_rate": 1.0203366058906032e-05, "loss": 3.7644, "step": 588 }, { "epoch": 0.21107994389901824, "grad_norm": 21.11427879333496, "learning_rate": 1.0448807854137448e-05, "loss": 3.79, "step": 602 }, { "epoch": 0.21598877980364656, "grad_norm": 25.395204544067383, "learning_rate": 1.0694249649368865e-05, "loss": 3.6279, "step": 616 }, { "epoch": 0.2208976157082749, "grad_norm": 40.874576568603516, "learning_rate": 1.093969144460028e-05, "loss": 3.6052, "step": 630 }, { "epoch": 0.22580645161290322, "grad_norm": 34.260658264160156, "learning_rate": 1.1185133239831698e-05, "loss": 3.6119, "step": 644 }, { "epoch": 0.23071528751753156, "grad_norm": 26.136035919189453, "learning_rate": 1.1430575035063114e-05, "loss": 3.5188, "step": 658 }, { "epoch": 0.23562412342215988, "grad_norm": 22.59177589416504, "learning_rate": 1.1676016830294531e-05, "loss": 3.5699, "step": 672 }, { "epoch": 0.24053295932678823, "grad_norm": 37.566715240478516, "learning_rate": 1.1921458625525947e-05, "loss": 3.4652, "step": 686 }, { "epoch": 0.24544179523141654, "grad_norm": 22.238405227661133, "learning_rate": 1.2166900420757364e-05, "loss": 3.4793, "step": 700 }, { "epoch": 0.2503506311360449, "grad_norm": 24.722185134887695, "learning_rate": 1.241234221598878e-05, "loss": 3.5241, "step": 714 }, { "epoch": 0.2552594670406732, "grad_norm": 24.38312339782715, "learning_rate": 1.2657784011220197e-05, "loss": 3.4958, "step": 728 }, { "epoch": 0.2601683029453015, "grad_norm": 24.15485191345215, "learning_rate": 1.2903225806451613e-05, "loss": 3.4331, "step": 742 }, { "epoch": 0.2650771388499299, "grad_norm": 20.52837562561035, "learning_rate": 1.3148667601683028e-05, "loss": 3.3565, "step": 756 }, { "epoch": 0.2699859747545582, "grad_norm": 19.052499771118164, "learning_rate": 1.3394109396914447e-05, "loss": 3.4168, "step": 770 }, { "epoch": 0.27489481065918653, "grad_norm": 20.974838256835938, "learning_rate": 1.3639551192145863e-05, "loss": 3.3857, "step": 784 }, { "epoch": 0.27980364656381485, "grad_norm": 17.413082122802734, "learning_rate": 1.3884992987377279e-05, "loss": 3.2355, "step": 798 }, { "epoch": 0.2847124824684432, "grad_norm": 19.82984161376953, "learning_rate": 1.4130434782608694e-05, "loss": 3.2425, "step": 812 }, { "epoch": 0.28962131837307153, "grad_norm": 22.61432456970215, "learning_rate": 1.4375876577840113e-05, "loss": 3.1791, "step": 826 }, { "epoch": 0.29453015427769985, "grad_norm": 18.661943435668945, "learning_rate": 1.4621318373071529e-05, "loss": 3.2602, "step": 840 }, { "epoch": 0.29943899018232817, "grad_norm": 20.135164260864258, "learning_rate": 1.4866760168302945e-05, "loss": 3.1938, "step": 854 }, { "epoch": 0.30434782608695654, "grad_norm": 15.22934627532959, "learning_rate": 1.5112201963534362e-05, "loss": 3.2286, "step": 868 }, { "epoch": 0.30925666199158486, "grad_norm": 13.456915855407715, "learning_rate": 1.535764375876578e-05, "loss": 3.1472, "step": 882 }, { "epoch": 0.3141654978962132, "grad_norm": 14.68199348449707, "learning_rate": 1.5603085553997195e-05, "loss": 3.1961, "step": 896 }, { "epoch": 0.3190743338008415, "grad_norm": 18.367368698120117, "learning_rate": 1.584852734922861e-05, "loss": 3.1288, "step": 910 }, { "epoch": 0.32398316970546986, "grad_norm": 13.704262733459473, "learning_rate": 1.6093969144460026e-05, "loss": 3.0742, "step": 924 }, { "epoch": 0.3288920056100982, "grad_norm": 17.843402862548828, "learning_rate": 1.6339410939691445e-05, "loss": 2.9869, "step": 938 }, { "epoch": 0.3338008415147265, "grad_norm": 18.23180389404297, "learning_rate": 1.658485273492286e-05, "loss": 2.9591, "step": 952 }, { "epoch": 0.3387096774193548, "grad_norm": 20.095623016357422, "learning_rate": 1.6830294530154277e-05, "loss": 2.961, "step": 966 }, { "epoch": 0.3436185133239832, "grad_norm": 15.657941818237305, "learning_rate": 1.7075736325385692e-05, "loss": 2.9915, "step": 980 }, { "epoch": 0.3485273492286115, "grad_norm": 13.479241371154785, "learning_rate": 1.732117812061711e-05, "loss": 2.9985, "step": 994 }, { "epoch": 0.3534361851332398, "grad_norm": 13.291443824768066, "learning_rate": 1.7566619915848527e-05, "loss": 2.9517, "step": 1008 }, { "epoch": 0.35834502103786814, "grad_norm": 11.864936828613281, "learning_rate": 1.7812061711079943e-05, "loss": 2.9474, "step": 1022 }, { "epoch": 0.3632538569424965, "grad_norm": 13.936219215393066, "learning_rate": 1.805750350631136e-05, "loss": 2.8872, "step": 1036 }, { "epoch": 0.36816269284712483, "grad_norm": 15.497425079345703, "learning_rate": 1.8302945301542777e-05, "loss": 2.8639, "step": 1050 }, { "epoch": 0.37307152875175315, "grad_norm": 15.485796928405762, "learning_rate": 1.8548387096774193e-05, "loss": 2.8804, "step": 1064 }, { "epoch": 0.37798036465638146, "grad_norm": 13.917716979980469, "learning_rate": 1.8793828892005612e-05, "loss": 2.7771, "step": 1078 }, { "epoch": 0.38288920056100983, "grad_norm": 12.483355522155762, "learning_rate": 1.9039270687237028e-05, "loss": 2.8439, "step": 1092 }, { "epoch": 0.38779803646563815, "grad_norm": 14.524687767028809, "learning_rate": 1.9284712482468443e-05, "loss": 2.8548, "step": 1106 }, { "epoch": 0.39270687237026647, "grad_norm": 12.81187629699707, "learning_rate": 1.9530154277699863e-05, "loss": 2.8047, "step": 1120 }, { "epoch": 0.3976157082748948, "grad_norm": 14.675981521606445, "learning_rate": 1.9775596072931278e-05, "loss": 2.6724, "step": 1134 }, { "epoch": 0.40252454417952316, "grad_norm": 12.65477180480957, "learning_rate": 2.0021037868162694e-05, "loss": 2.9343, "step": 1148 }, { "epoch": 0.4074333800841515, "grad_norm": 11.129637718200684, "learning_rate": 2.0266479663394113e-05, "loss": 2.8642, "step": 1162 }, { "epoch": 0.4123422159887798, "grad_norm": 11.501176834106445, "learning_rate": 2.051192145862553e-05, "loss": 2.7049, "step": 1176 }, { "epoch": 0.4172510518934081, "grad_norm": 12.377784729003906, "learning_rate": 2.0757363253856944e-05, "loss": 2.6807, "step": 1190 }, { "epoch": 0.4221598877980365, "grad_norm": 13.045960426330566, "learning_rate": 2.100280504908836e-05, "loss": 2.7744, "step": 1204 }, { "epoch": 0.4270687237026648, "grad_norm": 11.821955680847168, "learning_rate": 2.124824684431978e-05, "loss": 2.7234, "step": 1218 }, { "epoch": 0.4319775596072931, "grad_norm": 11.671823501586914, "learning_rate": 2.1493688639551195e-05, "loss": 2.6539, "step": 1232 }, { "epoch": 0.43688639551192143, "grad_norm": 11.532149314880371, "learning_rate": 2.173913043478261e-05, "loss": 2.7056, "step": 1246 }, { "epoch": 0.4417952314165498, "grad_norm": 13.150310516357422, "learning_rate": 2.1984572230014026e-05, "loss": 2.6854, "step": 1260 }, { "epoch": 0.4467040673211781, "grad_norm": 10.445241928100586, "learning_rate": 2.2230014025245445e-05, "loss": 2.6738, "step": 1274 }, { "epoch": 0.45161290322580644, "grad_norm": 9.731876373291016, "learning_rate": 2.247545582047686e-05, "loss": 2.6863, "step": 1288 }, { "epoch": 0.45652173913043476, "grad_norm": 11.845926284790039, "learning_rate": 2.2720897615708276e-05, "loss": 2.5377, "step": 1302 }, { "epoch": 0.46143057503506313, "grad_norm": 10.02022647857666, "learning_rate": 2.2966339410939692e-05, "loss": 2.5309, "step": 1316 }, { "epoch": 0.46633941093969145, "grad_norm": 12.004232406616211, "learning_rate": 2.321178120617111e-05, "loss": 2.5826, "step": 1330 }, { "epoch": 0.47124824684431976, "grad_norm": 10.918401718139648, "learning_rate": 2.3457223001402527e-05, "loss": 2.5702, "step": 1344 }, { "epoch": 0.4761570827489481, "grad_norm": 10.725889205932617, "learning_rate": 2.3702664796633942e-05, "loss": 2.5217, "step": 1358 }, { "epoch": 0.48106591865357645, "grad_norm": 12.304047584533691, "learning_rate": 2.3948106591865358e-05, "loss": 2.572, "step": 1372 }, { "epoch": 0.48597475455820477, "grad_norm": 15.934927940368652, "learning_rate": 2.4193548387096777e-05, "loss": 2.4723, "step": 1386 }, { "epoch": 0.4908835904628331, "grad_norm": 11.389201164245605, "learning_rate": 2.4438990182328193e-05, "loss": 2.6098, "step": 1400 }, { "epoch": 0.4957924263674614, "grad_norm": 11.188345909118652, "learning_rate": 2.4684431977559608e-05, "loss": 2.5557, "step": 1414 }, { "epoch": 0.5007012622720898, "grad_norm": 13.196151733398438, "learning_rate": 2.4929873772791024e-05, "loss": 2.4671, "step": 1428 }, { "epoch": 0.5056100981767181, "grad_norm": 11.173541069030762, "learning_rate": 2.517531556802244e-05, "loss": 2.5022, "step": 1442 }, { "epoch": 0.5105189340813464, "grad_norm": 13.601677894592285, "learning_rate": 2.5420757363253862e-05, "loss": 2.4453, "step": 1456 }, { "epoch": 0.5154277699859747, "grad_norm": 10.56280517578125, "learning_rate": 2.5666199158485278e-05, "loss": 2.4315, "step": 1470 }, { "epoch": 0.520336605890603, "grad_norm": 12.714173316955566, "learning_rate": 2.5911640953716693e-05, "loss": 2.4841, "step": 1484 }, { "epoch": 0.5252454417952315, "grad_norm": 10.69168472290039, "learning_rate": 2.615708274894811e-05, "loss": 2.5158, "step": 1498 }, { "epoch": 0.5301542776998598, "grad_norm": 13.300841331481934, "learning_rate": 2.6402524544179525e-05, "loss": 2.4859, "step": 1512 }, { "epoch": 0.5350631136044881, "grad_norm": 14.17661190032959, "learning_rate": 2.664796633941094e-05, "loss": 2.4128, "step": 1526 }, { "epoch": 0.5399719495091164, "grad_norm": 13.375521659851074, "learning_rate": 2.6893408134642356e-05, "loss": 2.477, "step": 1540 }, { "epoch": 0.5448807854137447, "grad_norm": 10.8220796585083, "learning_rate": 2.713884992987377e-05, "loss": 2.4237, "step": 1554 }, { "epoch": 0.5497896213183731, "grad_norm": 11.611028671264648, "learning_rate": 2.7384291725105194e-05, "loss": 2.4681, "step": 1568 }, { "epoch": 0.5546984572230014, "grad_norm": 12.796971321105957, "learning_rate": 2.762973352033661e-05, "loss": 2.3964, "step": 1582 }, { "epoch": 0.5596072931276297, "grad_norm": 13.078907012939453, "learning_rate": 2.7875175315568025e-05, "loss": 2.4901, "step": 1596 }, { "epoch": 0.5645161290322581, "grad_norm": 12.2595796585083, "learning_rate": 2.812061711079944e-05, "loss": 2.4515, "step": 1610 }, { "epoch": 0.5694249649368864, "grad_norm": 10.939292907714844, "learning_rate": 2.8366058906030857e-05, "loss": 2.4501, "step": 1624 }, { "epoch": 0.5743338008415148, "grad_norm": 13.540836334228516, "learning_rate": 2.8611500701262272e-05, "loss": 2.4875, "step": 1638 }, { "epoch": 0.5792426367461431, "grad_norm": 11.8589448928833, "learning_rate": 2.8856942496493688e-05, "loss": 2.2876, "step": 1652 }, { "epoch": 0.5841514726507714, "grad_norm": 11.35145378112793, "learning_rate": 2.9102384291725104e-05, "loss": 2.3779, "step": 1666 }, { "epoch": 0.5890603085553997, "grad_norm": 9.149605751037598, "learning_rate": 2.9347826086956526e-05, "loss": 2.3149, "step": 1680 }, { "epoch": 0.593969144460028, "grad_norm": 8.443830490112305, "learning_rate": 2.959326788218794e-05, "loss": 2.3358, "step": 1694 }, { "epoch": 0.5988779803646563, "grad_norm": 13.923408508300781, "learning_rate": 2.9838709677419357e-05, "loss": 2.3236, "step": 1708 }, { "epoch": 0.6037868162692848, "grad_norm": 8.976480484008789, "learning_rate": 3.0084151472650773e-05, "loss": 2.4643, "step": 1722 }, { "epoch": 0.6086956521739131, "grad_norm": 13.93576717376709, "learning_rate": 3.032959326788219e-05, "loss": 2.3638, "step": 1736 }, { "epoch": 0.6136044880785414, "grad_norm": 10.757050514221191, "learning_rate": 3.0575035063113604e-05, "loss": 2.3821, "step": 1750 }, { "epoch": 0.6185133239831697, "grad_norm": 12.22114086151123, "learning_rate": 3.082047685834502e-05, "loss": 2.4982, "step": 1764 }, { "epoch": 0.623422159887798, "grad_norm": 10.15032958984375, "learning_rate": 3.1065918653576436e-05, "loss": 2.2804, "step": 1778 }, { "epoch": 0.6283309957924264, "grad_norm": 11.646088600158691, "learning_rate": 3.131136044880786e-05, "loss": 2.3047, "step": 1792 }, { "epoch": 0.6332398316970547, "grad_norm": 11.344216346740723, "learning_rate": 3.1556802244039274e-05, "loss": 2.3375, "step": 1806 }, { "epoch": 0.638148667601683, "grad_norm": 11.708880424499512, "learning_rate": 3.180224403927069e-05, "loss": 2.3442, "step": 1820 }, { "epoch": 0.6430575035063114, "grad_norm": 8.759408950805664, "learning_rate": 3.2047685834502105e-05, "loss": 2.3329, "step": 1834 }, { "epoch": 0.6479663394109397, "grad_norm": 10.318377494812012, "learning_rate": 3.229312762973352e-05, "loss": 2.3036, "step": 1848 }, { "epoch": 0.652875175315568, "grad_norm": 8.172351837158203, "learning_rate": 3.2538569424964936e-05, "loss": 2.3354, "step": 1862 }, { "epoch": 0.6577840112201964, "grad_norm": 9.213096618652344, "learning_rate": 3.278401122019635e-05, "loss": 2.3717, "step": 1876 }, { "epoch": 0.6626928471248247, "grad_norm": 13.916830062866211, "learning_rate": 3.302945301542777e-05, "loss": 2.3272, "step": 1890 }, { "epoch": 0.667601683029453, "grad_norm": 14.394588470458984, "learning_rate": 3.327489481065919e-05, "loss": 2.3076, "step": 1904 }, { "epoch": 0.6725105189340813, "grad_norm": 12.898151397705078, "learning_rate": 3.3520336605890606e-05, "loss": 2.3834, "step": 1918 }, { "epoch": 0.6774193548387096, "grad_norm": 11.074892044067383, "learning_rate": 3.376577840112202e-05, "loss": 2.3654, "step": 1932 }, { "epoch": 0.6823281907433381, "grad_norm": 8.492025375366211, "learning_rate": 3.401122019635344e-05, "loss": 2.2728, "step": 1946 }, { "epoch": 0.6872370266479664, "grad_norm": 10.871256828308105, "learning_rate": 3.425666199158485e-05, "loss": 2.3263, "step": 1960 }, { "epoch": 0.6921458625525947, "grad_norm": 10.596525192260742, "learning_rate": 3.450210378681627e-05, "loss": 2.3814, "step": 1974 }, { "epoch": 0.697054698457223, "grad_norm": 10.780174255371094, "learning_rate": 3.4747545582047684e-05, "loss": 2.3811, "step": 1988 }, { "epoch": 0.7019635343618513, "grad_norm": 10.591049194335938, "learning_rate": 3.49929873772791e-05, "loss": 2.2705, "step": 2002 }, { "epoch": 0.7068723702664796, "grad_norm": 10.986788749694824, "learning_rate": 3.523842917251052e-05, "loss": 2.2615, "step": 2016 }, { "epoch": 0.711781206171108, "grad_norm": 10.810262680053711, "learning_rate": 3.548387096774194e-05, "loss": 2.361, "step": 2030 }, { "epoch": 0.7166900420757363, "grad_norm": 12.08793830871582, "learning_rate": 3.572931276297335e-05, "loss": 2.2864, "step": 2044 }, { "epoch": 0.7215988779803647, "grad_norm": 10.931816101074219, "learning_rate": 3.597475455820477e-05, "loss": 2.2363, "step": 2058 }, { "epoch": 0.726507713884993, "grad_norm": 8.752777099609375, "learning_rate": 3.6220196353436185e-05, "loss": 2.2574, "step": 2072 }, { "epoch": 0.7314165497896213, "grad_norm": 10.33248233795166, "learning_rate": 3.64656381486676e-05, "loss": 2.3547, "step": 2086 }, { "epoch": 0.7363253856942497, "grad_norm": 10.40267276763916, "learning_rate": 3.6711079943899016e-05, "loss": 2.1709, "step": 2100 }, { "epoch": 0.741234221598878, "grad_norm": 10.064871788024902, "learning_rate": 3.695652173913043e-05, "loss": 2.1136, "step": 2114 }, { "epoch": 0.7461430575035063, "grad_norm": 10.189778327941895, "learning_rate": 3.7201963534361854e-05, "loss": 2.3451, "step": 2128 }, { "epoch": 0.7510518934081346, "grad_norm": 10.47258186340332, "learning_rate": 3.744740532959327e-05, "loss": 2.2748, "step": 2142 }, { "epoch": 0.7559607293127629, "grad_norm": 11.113805770874023, "learning_rate": 3.7692847124824685e-05, "loss": 2.2262, "step": 2156 }, { "epoch": 0.7608695652173914, "grad_norm": 12.038606643676758, "learning_rate": 3.79382889200561e-05, "loss": 2.2957, "step": 2170 }, { "epoch": 0.7657784011220197, "grad_norm": 12.08336067199707, "learning_rate": 3.8183730715287517e-05, "loss": 2.1958, "step": 2184 }, { "epoch": 0.770687237026648, "grad_norm": 10.742866516113281, "learning_rate": 3.842917251051893e-05, "loss": 2.2351, "step": 2198 }, { "epoch": 0.7755960729312763, "grad_norm": 15.493946075439453, "learning_rate": 3.867461430575035e-05, "loss": 2.1916, "step": 2212 }, { "epoch": 0.7805049088359046, "grad_norm": 10.196372032165527, "learning_rate": 3.8920056100981764e-05, "loss": 2.2457, "step": 2226 }, { "epoch": 0.7854137447405329, "grad_norm": 13.315951347351074, "learning_rate": 3.9165497896213186e-05, "loss": 2.2254, "step": 2240 }, { "epoch": 0.7903225806451613, "grad_norm": 9.214174270629883, "learning_rate": 3.94109396914446e-05, "loss": 2.2714, "step": 2254 }, { "epoch": 0.7952314165497896, "grad_norm": 12.721196174621582, "learning_rate": 3.965638148667602e-05, "loss": 2.0954, "step": 2268 }, { "epoch": 0.800140252454418, "grad_norm": 12.535224914550781, "learning_rate": 3.990182328190743e-05, "loss": 2.1711, "step": 2282 }, { "epoch": 0.8050490883590463, "grad_norm": 13.189672470092773, "learning_rate": 4.014726507713885e-05, "loss": 2.2156, "step": 2296 }, { "epoch": 0.8099579242636746, "grad_norm": 11.445016860961914, "learning_rate": 4.0392706872370264e-05, "loss": 2.1816, "step": 2310 }, { "epoch": 0.814866760168303, "grad_norm": 10.535476684570312, "learning_rate": 4.063814866760168e-05, "loss": 2.201, "step": 2324 }, { "epoch": 0.8197755960729313, "grad_norm": 10.352631568908691, "learning_rate": 4.08835904628331e-05, "loss": 2.2462, "step": 2338 }, { "epoch": 0.8246844319775596, "grad_norm": 9.68475341796875, "learning_rate": 4.112903225806452e-05, "loss": 2.1847, "step": 2352 }, { "epoch": 0.8295932678821879, "grad_norm": 8.54753589630127, "learning_rate": 4.1374474053295934e-05, "loss": 2.2556, "step": 2366 }, { "epoch": 0.8345021037868162, "grad_norm": 11.65552043914795, "learning_rate": 4.161991584852735e-05, "loss": 2.2431, "step": 2380 }, { "epoch": 0.8394109396914446, "grad_norm": 9.139135360717773, "learning_rate": 4.1865357643758765e-05, "loss": 2.189, "step": 2394 }, { "epoch": 0.844319775596073, "grad_norm": 9.686403274536133, "learning_rate": 4.211079943899018e-05, "loss": 2.1846, "step": 2408 }, { "epoch": 0.8492286115007013, "grad_norm": 9.016684532165527, "learning_rate": 4.23562412342216e-05, "loss": 2.1495, "step": 2422 }, { "epoch": 0.8541374474053296, "grad_norm": 12.100763320922852, "learning_rate": 4.260168302945302e-05, "loss": 2.094, "step": 2436 }, { "epoch": 0.8590462833099579, "grad_norm": 9.994915962219238, "learning_rate": 4.2847124824684434e-05, "loss": 2.1511, "step": 2450 }, { "epoch": 0.8639551192145862, "grad_norm": 12.8555908203125, "learning_rate": 4.309256661991585e-05, "loss": 2.1861, "step": 2464 }, { "epoch": 0.8688639551192145, "grad_norm": 11.036151885986328, "learning_rate": 4.3338008415147266e-05, "loss": 2.1653, "step": 2478 }, { "epoch": 0.8737727910238429, "grad_norm": 11.066033363342285, "learning_rate": 4.358345021037868e-05, "loss": 2.1817, "step": 2492 }, { "epoch": 0.8786816269284713, "grad_norm": 10.98554801940918, "learning_rate": 4.3828892005610104e-05, "loss": 2.2626, "step": 2506 }, { "epoch": 0.8835904628330996, "grad_norm": 10.402631759643555, "learning_rate": 4.407433380084152e-05, "loss": 2.1989, "step": 2520 }, { "epoch": 0.8884992987377279, "grad_norm": 11.987300872802734, "learning_rate": 4.4319775596072935e-05, "loss": 2.1732, "step": 2534 }, { "epoch": 0.8934081346423562, "grad_norm": 10.886210441589355, "learning_rate": 4.456521739130435e-05, "loss": 2.1646, "step": 2548 }, { "epoch": 0.8983169705469846, "grad_norm": 7.655377388000488, "learning_rate": 4.4810659186535766e-05, "loss": 2.1208, "step": 2562 }, { "epoch": 0.9032258064516129, "grad_norm": 9.558204650878906, "learning_rate": 4.505610098176718e-05, "loss": 2.1745, "step": 2576 }, { "epoch": 0.9081346423562412, "grad_norm": 10.461518287658691, "learning_rate": 4.5301542776998604e-05, "loss": 2.0639, "step": 2590 }, { "epoch": 0.9130434782608695, "grad_norm": 14.199075698852539, "learning_rate": 4.554698457223002e-05, "loss": 2.1226, "step": 2604 }, { "epoch": 0.9179523141654979, "grad_norm": 11.382636070251465, "learning_rate": 4.5792426367461436e-05, "loss": 2.0797, "step": 2618 }, { "epoch": 0.9228611500701263, "grad_norm": 12.709613800048828, "learning_rate": 4.603786816269285e-05, "loss": 2.1045, "step": 2632 }, { "epoch": 0.9277699859747546, "grad_norm": 11.907670974731445, "learning_rate": 4.628330995792427e-05, "loss": 2.1145, "step": 2646 }, { "epoch": 0.9326788218793829, "grad_norm": 9.398067474365234, "learning_rate": 4.652875175315568e-05, "loss": 2.1424, "step": 2660 }, { "epoch": 0.9375876577840112, "grad_norm": 10.583477973937988, "learning_rate": 4.67741935483871e-05, "loss": 2.1589, "step": 2674 }, { "epoch": 0.9424964936886395, "grad_norm": 9.549612998962402, "learning_rate": 4.701963534361852e-05, "loss": 2.0745, "step": 2688 }, { "epoch": 0.9474053295932678, "grad_norm": 12.489312171936035, "learning_rate": 4.7265077138849936e-05, "loss": 2.0374, "step": 2702 }, { "epoch": 0.9523141654978962, "grad_norm": 11.122519493103027, "learning_rate": 4.751051893408135e-05, "loss": 2.1924, "step": 2716 }, { "epoch": 0.9572230014025246, "grad_norm": 8.99759292602539, "learning_rate": 4.775596072931277e-05, "loss": 2.0905, "step": 2730 }, { "epoch": 0.9621318373071529, "grad_norm": 10.245967864990234, "learning_rate": 4.8001402524544183e-05, "loss": 2.1523, "step": 2744 }, { "epoch": 0.9670406732117812, "grad_norm": 10.33755874633789, "learning_rate": 4.82468443197756e-05, "loss": 2.0583, "step": 2758 }, { "epoch": 0.9719495091164095, "grad_norm": 9.640695571899414, "learning_rate": 4.8492286115007015e-05, "loss": 2.1748, "step": 2772 }, { "epoch": 0.9768583450210379, "grad_norm": 11.129230499267578, "learning_rate": 4.873772791023843e-05, "loss": 2.1645, "step": 2786 }, { "epoch": 0.9817671809256662, "grad_norm": 10.209572792053223, "learning_rate": 4.898316970546985e-05, "loss": 2.0401, "step": 2800 }, { "epoch": 0.9866760168302945, "grad_norm": 13.204483032226562, "learning_rate": 4.922861150070127e-05, "loss": 2.1324, "step": 2814 }, { "epoch": 0.9915848527349228, "grad_norm": 9.750168800354004, "learning_rate": 4.9474053295932684e-05, "loss": 2.1203, "step": 2828 }, { "epoch": 0.9964936886395512, "grad_norm": 11.356013298034668, "learning_rate": 4.97194950911641e-05, "loss": 2.1313, "step": 2842 }, { "epoch": 1.0, "eval_loss": 2.054868698120117, "eval_map": 0.0312, "eval_map_50": 0.0567, "eval_map_75": 0.0308, "eval_map_applique": 0.0, "eval_map_bag, wallet": 0.0063, "eval_map_bead": 0.0035, "eval_map_belt": 0.041, "eval_map_bow": 0.0, "eval_map_buckle": 0.0, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.002, "eval_map_collar": 0.0578, "eval_map_dress": 0.1979, "eval_map_epaulette": 0.0, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.055, "eval_map_glove": 0.0, "eval_map_hat": 0.0206, "eval_map_headband, head covering, hair accessory": 0.0061, "eval_map_hood": 0.0, "eval_map_jacket": 0.0889, "eval_map_jumpsuit": 0.0, "eval_map_lapel": 0.0563, "eval_map_large": 0.0316, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.0114, "eval_map_neckline": 0.1296, "eval_map_pants": 0.1568, "eval_map_pocket": 0.0454, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0054, "eval_map_ruffle": 0.0, "eval_map_scarf": 0.0, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0158, "eval_map_shoe": 0.2266, "eval_map_shorts": 0.0, "eval_map_skirt": 0.1006, "eval_map_sleeve": 0.1239, "eval_map_small": 0.0, "eval_map_sock": 0.0, "eval_map_sweater": 0.0, "eval_map_tassel": 0.0, "eval_map_tie": 0.0, "eval_map_tights, stockings": 0.019, "eval_map_top, t-shirt, sweatshirt": 0.0749, "eval_map_umbrella": 0.0, "eval_map_vest": 0.0, "eval_map_watch": 0.0025, "eval_map_zipper": 0.0009, "eval_mar_1": 0.0691, "eval_mar_10": 0.1556, "eval_mar_100": 0.1613, "eval_mar_100_applique": 0.0, "eval_mar_100_bag, wallet": 0.131, "eval_mar_100_bead": 0.0738, "eval_mar_100_belt": 0.3518, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.0, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.0039, "eval_mar_100_collar": 0.3032, "eval_mar_100_dress": 0.7356, "eval_mar_100_epaulette": 0.0, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.4124, "eval_mar_100_glove": 0.0, "eval_mar_100_hat": 0.0877, "eval_mar_100_headband, head covering, hair accessory": 0.0826, "eval_mar_100_hood": 0.0, "eval_mar_100_jacket": 0.4692, "eval_mar_100_jumpsuit": 0.0, "eval_mar_100_lapel": 0.2889, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.6609, "eval_mar_100_pants": 0.699, "eval_mar_100_pocket": 0.5074, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.0164, "eval_mar_100_ruffle": 0.0, "eval_mar_100_scarf": 0.0, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.0327, "eval_mar_100_shoe": 0.6827, "eval_mar_100_shorts": 0.0, "eval_mar_100_skirt": 0.4642, "eval_mar_100_sleeve": 0.615, "eval_mar_100_sock": 0.0, "eval_mar_100_sweater": 0.0, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.0, "eval_mar_100_tights, stockings": 0.2057, "eval_mar_100_top, t-shirt, sweatshirt": 0.5832, "eval_mar_100_umbrella": 0.0, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.0108, "eval_mar_100_zipper": 0.0031, "eval_mar_large": 0.1627, "eval_mar_medium": 0.0377, "eval_mar_small": 0.0, "eval_runtime": 80.1615, "eval_samples_per_second": 14.446, "eval_steps_per_second": 0.462, "step": 2852 }, { "epoch": 1.0014025245441796, "grad_norm": 11.393332481384277, "learning_rate": 4.9964936886395515e-05, "loss": 2.1629, "step": 2856 }, { "epoch": 1.0063113604488079, "grad_norm": 12.991202354431152, "learning_rate": 4.9976624590930344e-05, "loss": 2.1225, "step": 2870 }, { "epoch": 1.0112201963534362, "grad_norm": 10.489317893981934, "learning_rate": 4.994935328034908e-05, "loss": 2.1079, "step": 2884 }, { "epoch": 1.0161290322580645, "grad_norm": 8.909357070922852, "learning_rate": 4.992208196976781e-05, "loss": 2.0351, "step": 2898 }, { "epoch": 1.0210378681626928, "grad_norm": 12.74708080291748, "learning_rate": 4.9894810659186535e-05, "loss": 2.1075, "step": 2912 }, { "epoch": 1.0259467040673211, "grad_norm": 13.665748596191406, "learning_rate": 4.986753934860527e-05, "loss": 2.0124, "step": 2926 }, { "epoch": 1.0308555399719495, "grad_norm": 14.663222312927246, "learning_rate": 4.9840268038024004e-05, "loss": 2.0201, "step": 2940 }, { "epoch": 1.0357643758765778, "grad_norm": 10.230842590332031, "learning_rate": 4.981299672744273e-05, "loss": 2.0467, "step": 2954 }, { "epoch": 1.040673211781206, "grad_norm": 10.297760963439941, "learning_rate": 4.978572541686146e-05, "loss": 2.1416, "step": 2968 }, { "epoch": 1.0455820476858344, "grad_norm": 10.538700103759766, "learning_rate": 4.9758454106280194e-05, "loss": 2.1438, "step": 2982 }, { "epoch": 1.050490883590463, "grad_norm": 13.004799842834473, "learning_rate": 4.973118279569893e-05, "loss": 2.1437, "step": 2996 }, { "epoch": 1.0553997194950913, "grad_norm": 12.405245780944824, "learning_rate": 4.970391148511766e-05, "loss": 2.0785, "step": 3010 }, { "epoch": 1.0603085553997196, "grad_norm": 14.152544975280762, "learning_rate": 4.9676640174536384e-05, "loss": 2.0874, "step": 3024 }, { "epoch": 1.065217391304348, "grad_norm": 9.52756118774414, "learning_rate": 4.964936886395512e-05, "loss": 2.0479, "step": 3038 }, { "epoch": 1.0701262272089762, "grad_norm": 9.712385177612305, "learning_rate": 4.9622097553373854e-05, "loss": 2.0998, "step": 3052 }, { "epoch": 1.0750350631136045, "grad_norm": 10.643545150756836, "learning_rate": 4.959482624279259e-05, "loss": 2.0593, "step": 3066 }, { "epoch": 1.0799438990182328, "grad_norm": 10.168733596801758, "learning_rate": 4.9567554932211316e-05, "loss": 2.1063, "step": 3080 }, { "epoch": 1.0848527349228612, "grad_norm": 11.256218910217285, "learning_rate": 4.9540283621630044e-05, "loss": 1.9453, "step": 3094 }, { "epoch": 1.0897615708274895, "grad_norm": 13.091086387634277, "learning_rate": 4.951301231104878e-05, "loss": 2.0579, "step": 3108 }, { "epoch": 1.0946704067321178, "grad_norm": 13.763653755187988, "learning_rate": 4.948574100046751e-05, "loss": 2.0324, "step": 3122 }, { "epoch": 1.0995792426367461, "grad_norm": 10.417903900146484, "learning_rate": 4.945846968988624e-05, "loss": 2.0769, "step": 3136 }, { "epoch": 1.1044880785413744, "grad_norm": 8.329463958740234, "learning_rate": 4.943119837930497e-05, "loss": 2.0699, "step": 3150 }, { "epoch": 1.1093969144460027, "grad_norm": 9.213794708251953, "learning_rate": 4.94039270687237e-05, "loss": 2.0476, "step": 3164 }, { "epoch": 1.114305750350631, "grad_norm": 10.918821334838867, "learning_rate": 4.937665575814244e-05, "loss": 1.967, "step": 3178 }, { "epoch": 1.1192145862552594, "grad_norm": 12.201175689697266, "learning_rate": 4.934938444756117e-05, "loss": 2.0709, "step": 3192 }, { "epoch": 1.1241234221598877, "grad_norm": 12.51259994506836, "learning_rate": 4.93221131369799e-05, "loss": 2.1189, "step": 3206 }, { "epoch": 1.129032258064516, "grad_norm": 24.294113159179688, "learning_rate": 4.929484182639863e-05, "loss": 2.1726, "step": 3220 }, { "epoch": 1.1339410939691446, "grad_norm": 11.156673431396484, "learning_rate": 4.926757051581736e-05, "loss": 2.1287, "step": 3234 }, { "epoch": 1.1388499298737729, "grad_norm": 9.864721298217773, "learning_rate": 4.92402992052361e-05, "loss": 2.0568, "step": 3248 }, { "epoch": 1.1437587657784012, "grad_norm": 12.125511169433594, "learning_rate": 4.9213027894654825e-05, "loss": 2.0721, "step": 3262 }, { "epoch": 1.1486676016830295, "grad_norm": 12.784907341003418, "learning_rate": 4.918575658407355e-05, "loss": 2.0203, "step": 3276 }, { "epoch": 1.1535764375876578, "grad_norm": 10.564140319824219, "learning_rate": 4.915848527349229e-05, "loss": 2.037, "step": 3290 }, { "epoch": 1.1584852734922861, "grad_norm": 10.880088806152344, "learning_rate": 4.913121396291102e-05, "loss": 1.9642, "step": 3304 }, { "epoch": 1.1633941093969145, "grad_norm": 12.263250350952148, "learning_rate": 4.910394265232976e-05, "loss": 2.1419, "step": 3318 }, { "epoch": 1.1683029453015428, "grad_norm": 11.363569259643555, "learning_rate": 4.907667134174848e-05, "loss": 2.0139, "step": 3332 }, { "epoch": 1.173211781206171, "grad_norm": 10.072726249694824, "learning_rate": 4.904940003116721e-05, "loss": 2.0506, "step": 3346 }, { "epoch": 1.1781206171107994, "grad_norm": 10.887332916259766, "learning_rate": 4.902212872058595e-05, "loss": 2.0439, "step": 3360 }, { "epoch": 1.1830294530154277, "grad_norm": 11.091092109680176, "learning_rate": 4.899485741000468e-05, "loss": 1.9922, "step": 3374 }, { "epoch": 1.187938288920056, "grad_norm": 10.256185531616211, "learning_rate": 4.896758609942341e-05, "loss": 2.05, "step": 3388 }, { "epoch": 1.1928471248246844, "grad_norm": 11.831741333007812, "learning_rate": 4.894031478884214e-05, "loss": 1.9551, "step": 3402 }, { "epoch": 1.1977559607293127, "grad_norm": 9.267203330993652, "learning_rate": 4.891304347826087e-05, "loss": 2.0633, "step": 3416 }, { "epoch": 1.202664796633941, "grad_norm": 9.528267860412598, "learning_rate": 4.888577216767961e-05, "loss": 2.0717, "step": 3430 }, { "epoch": 1.2075736325385695, "grad_norm": 8.31550121307373, "learning_rate": 4.8858500857098334e-05, "loss": 2.0016, "step": 3444 }, { "epoch": 1.2124824684431978, "grad_norm": 9.605377197265625, "learning_rate": 4.883122954651706e-05, "loss": 2.048, "step": 3458 }, { "epoch": 1.2173913043478262, "grad_norm": 10.196882247924805, "learning_rate": 4.88039582359358e-05, "loss": 1.8473, "step": 3472 }, { "epoch": 1.2223001402524545, "grad_norm": 10.502631187438965, "learning_rate": 4.877668692535453e-05, "loss": 2.0185, "step": 3486 }, { "epoch": 1.2272089761570828, "grad_norm": 9.617410659790039, "learning_rate": 4.874941561477326e-05, "loss": 2.0298, "step": 3500 }, { "epoch": 1.2321178120617111, "grad_norm": 13.304073333740234, "learning_rate": 4.8722144304191994e-05, "loss": 2.074, "step": 3514 }, { "epoch": 1.2370266479663394, "grad_norm": 10.661674499511719, "learning_rate": 4.869487299361072e-05, "loss": 2.0598, "step": 3528 }, { "epoch": 1.2419354838709677, "grad_norm": 10.506295204162598, "learning_rate": 4.8667601683029456e-05, "loss": 2.0267, "step": 3542 }, { "epoch": 1.246844319775596, "grad_norm": 12.688090324401855, "learning_rate": 4.864033037244819e-05, "loss": 2.0169, "step": 3556 }, { "epoch": 1.2517531556802244, "grad_norm": 13.907393455505371, "learning_rate": 4.861305906186692e-05, "loss": 1.9079, "step": 3570 }, { "epoch": 1.2566619915848527, "grad_norm": 9.069457054138184, "learning_rate": 4.858578775128565e-05, "loss": 1.9821, "step": 3584 }, { "epoch": 1.261570827489481, "grad_norm": 11.51883602142334, "learning_rate": 4.855851644070438e-05, "loss": 2.0138, "step": 3598 }, { "epoch": 1.2664796633941093, "grad_norm": 10.67378044128418, "learning_rate": 4.8531245130123116e-05, "loss": 1.9707, "step": 3612 }, { "epoch": 1.2713884992987377, "grad_norm": 11.026805877685547, "learning_rate": 4.8503973819541844e-05, "loss": 1.9556, "step": 3626 }, { "epoch": 1.276297335203366, "grad_norm": 11.554420471191406, "learning_rate": 4.847670250896058e-05, "loss": 2.0349, "step": 3640 }, { "epoch": 1.2812061711079945, "grad_norm": 10.631027221679688, "learning_rate": 4.8449431198379306e-05, "loss": 2.0353, "step": 3654 }, { "epoch": 1.2861150070126226, "grad_norm": 12.887740135192871, "learning_rate": 4.842215988779804e-05, "loss": 1.9662, "step": 3668 }, { "epoch": 1.2910238429172511, "grad_norm": 11.3402681350708, "learning_rate": 4.839488857721677e-05, "loss": 1.9418, "step": 3682 }, { "epoch": 1.2959326788218795, "grad_norm": 13.566056251525879, "learning_rate": 4.83676172666355e-05, "loss": 1.9195, "step": 3696 }, { "epoch": 1.3008415147265078, "grad_norm": 10.74652099609375, "learning_rate": 4.834034595605423e-05, "loss": 1.9756, "step": 3710 }, { "epoch": 1.305750350631136, "grad_norm": 12.462689399719238, "learning_rate": 4.8313074645472966e-05, "loss": 2.0019, "step": 3724 }, { "epoch": 1.3106591865357644, "grad_norm": 7.358362674713135, "learning_rate": 4.8285803334891693e-05, "loss": 2.0025, "step": 3738 }, { "epoch": 1.3155680224403927, "grad_norm": 13.027688026428223, "learning_rate": 4.825853202431043e-05, "loss": 1.9149, "step": 3752 }, { "epoch": 1.320476858345021, "grad_norm": 11.582486152648926, "learning_rate": 4.823126071372916e-05, "loss": 1.9548, "step": 3766 }, { "epoch": 1.3253856942496494, "grad_norm": 11.150921821594238, "learning_rate": 4.820398940314789e-05, "loss": 1.9218, "step": 3780 }, { "epoch": 1.3302945301542777, "grad_norm": 14.548352241516113, "learning_rate": 4.817671809256662e-05, "loss": 1.9598, "step": 3794 }, { "epoch": 1.335203366058906, "grad_norm": 13.304227828979492, "learning_rate": 4.814944678198535e-05, "loss": 1.9891, "step": 3808 }, { "epoch": 1.3401122019635343, "grad_norm": 11.231230735778809, "learning_rate": 4.812217547140409e-05, "loss": 2.0876, "step": 3822 }, { "epoch": 1.3450210378681626, "grad_norm": 13.334442138671875, "learning_rate": 4.8094904160822815e-05, "loss": 2.0125, "step": 3836 }, { "epoch": 1.349929873772791, "grad_norm": 12.552372932434082, "learning_rate": 4.806763285024155e-05, "loss": 1.9091, "step": 3850 }, { "epoch": 1.3548387096774195, "grad_norm": 9.97275447845459, "learning_rate": 4.804036153966028e-05, "loss": 1.9221, "step": 3864 }, { "epoch": 1.3597475455820476, "grad_norm": 10.065577507019043, "learning_rate": 4.801309022907901e-05, "loss": 1.9058, "step": 3878 }, { "epoch": 1.3646563814866761, "grad_norm": 9.246734619140625, "learning_rate": 4.798581891849774e-05, "loss": 1.9682, "step": 3892 }, { "epoch": 1.3695652173913042, "grad_norm": 12.037038803100586, "learning_rate": 4.7958547607916475e-05, "loss": 1.8924, "step": 3906 }, { "epoch": 1.3744740532959328, "grad_norm": 8.455939292907715, "learning_rate": 4.79312762973352e-05, "loss": 2.0011, "step": 3920 }, { "epoch": 1.379382889200561, "grad_norm": 11.324376106262207, "learning_rate": 4.790400498675394e-05, "loss": 1.9293, "step": 3934 }, { "epoch": 1.3842917251051894, "grad_norm": 11.965813636779785, "learning_rate": 4.787673367617267e-05, "loss": 2.129, "step": 3948 }, { "epoch": 1.3892005610098177, "grad_norm": 8.53834342956543, "learning_rate": 4.78494623655914e-05, "loss": 1.9535, "step": 3962 }, { "epoch": 1.394109396914446, "grad_norm": 10.098605155944824, "learning_rate": 4.782219105501013e-05, "loss": 1.9093, "step": 3976 }, { "epoch": 1.3990182328190743, "grad_norm": 8.86446475982666, "learning_rate": 4.779491974442886e-05, "loss": 1.9554, "step": 3990 }, { "epoch": 1.4039270687237027, "grad_norm": 9.64447021484375, "learning_rate": 4.77676484338476e-05, "loss": 1.9228, "step": 4004 }, { "epoch": 1.408835904628331, "grad_norm": 9.118378639221191, "learning_rate": 4.7740377123266325e-05, "loss": 1.9127, "step": 4018 }, { "epoch": 1.4137447405329593, "grad_norm": 12.44186019897461, "learning_rate": 4.771310581268505e-05, "loss": 1.9145, "step": 4032 }, { "epoch": 1.4186535764375876, "grad_norm": 9.889150619506836, "learning_rate": 4.768583450210379e-05, "loss": 1.9957, "step": 4046 }, { "epoch": 1.423562412342216, "grad_norm": 10.56630802154541, "learning_rate": 4.765856319152252e-05, "loss": 1.9622, "step": 4060 }, { "epoch": 1.4284712482468442, "grad_norm": 9.96074104309082, "learning_rate": 4.7631291880941256e-05, "loss": 1.874, "step": 4074 }, { "epoch": 1.4333800841514726, "grad_norm": 13.176163673400879, "learning_rate": 4.7604020570359984e-05, "loss": 2.0051, "step": 4088 }, { "epoch": 1.438288920056101, "grad_norm": 10.800230979919434, "learning_rate": 4.757674925977871e-05, "loss": 1.9494, "step": 4102 }, { "epoch": 1.4431977559607292, "grad_norm": 8.526347160339355, "learning_rate": 4.7549477949197446e-05, "loss": 1.8852, "step": 4116 }, { "epoch": 1.4481065918653577, "grad_norm": 12.091789245605469, "learning_rate": 4.752220663861618e-05, "loss": 1.9441, "step": 4130 }, { "epoch": 1.453015427769986, "grad_norm": 9.557324409484863, "learning_rate": 4.749493532803491e-05, "loss": 1.8484, "step": 4144 }, { "epoch": 1.4579242636746144, "grad_norm": 11.472286224365234, "learning_rate": 4.746766401745364e-05, "loss": 1.9335, "step": 4158 }, { "epoch": 1.4628330995792427, "grad_norm": 17.37548065185547, "learning_rate": 4.744039270687237e-05, "loss": 1.9243, "step": 4172 }, { "epoch": 1.467741935483871, "grad_norm": 8.8356294631958, "learning_rate": 4.7413121396291106e-05, "loss": 1.9103, "step": 4186 }, { "epoch": 1.4726507713884993, "grad_norm": 10.95909595489502, "learning_rate": 4.738585008570984e-05, "loss": 1.9502, "step": 4200 }, { "epoch": 1.4775596072931276, "grad_norm": 13.479392051696777, "learning_rate": 4.735857877512856e-05, "loss": 1.8849, "step": 4214 }, { "epoch": 1.482468443197756, "grad_norm": 12.191668510437012, "learning_rate": 4.7331307464547296e-05, "loss": 1.9721, "step": 4228 }, { "epoch": 1.4873772791023843, "grad_norm": 10.614190101623535, "learning_rate": 4.730403615396603e-05, "loss": 1.928, "step": 4242 }, { "epoch": 1.4922861150070126, "grad_norm": 11.305283546447754, "learning_rate": 4.7276764843384765e-05, "loss": 1.9712, "step": 4256 }, { "epoch": 1.497194950911641, "grad_norm": 12.925456047058105, "learning_rate": 4.724949353280349e-05, "loss": 1.9874, "step": 4270 }, { "epoch": 1.5021037868162694, "grad_norm": 13.113685607910156, "learning_rate": 4.722222222222222e-05, "loss": 1.9041, "step": 4284 }, { "epoch": 1.5070126227208975, "grad_norm": 14.207528114318848, "learning_rate": 4.7194950911640956e-05, "loss": 1.9565, "step": 4298 }, { "epoch": 1.511921458625526, "grad_norm": 11.907343864440918, "learning_rate": 4.716767960105969e-05, "loss": 1.8086, "step": 4312 }, { "epoch": 1.5168302945301542, "grad_norm": 11.142657279968262, "learning_rate": 4.7140408290478425e-05, "loss": 1.822, "step": 4326 }, { "epoch": 1.5217391304347827, "grad_norm": 11.145451545715332, "learning_rate": 4.7113136979897146e-05, "loss": 1.8479, "step": 4340 }, { "epoch": 1.5266479663394108, "grad_norm": 10.881003379821777, "learning_rate": 4.708586566931588e-05, "loss": 1.9232, "step": 4354 }, { "epoch": 1.5315568022440393, "grad_norm": 9.57334041595459, "learning_rate": 4.7058594358734615e-05, "loss": 1.959, "step": 4368 }, { "epoch": 1.5364656381486677, "grad_norm": 10.239831924438477, "learning_rate": 4.703132304815335e-05, "loss": 1.8201, "step": 4382 }, { "epoch": 1.541374474053296, "grad_norm": 11.039583206176758, "learning_rate": 4.700405173757208e-05, "loss": 1.8637, "step": 4396 }, { "epoch": 1.5462833099579243, "grad_norm": 11.059945106506348, "learning_rate": 4.6976780426990805e-05, "loss": 1.899, "step": 4410 }, { "epoch": 1.5511921458625526, "grad_norm": 11.268326759338379, "learning_rate": 4.694950911640954e-05, "loss": 1.9152, "step": 4424 }, { "epoch": 1.556100981767181, "grad_norm": 13.233866691589355, "learning_rate": 4.6922237805828275e-05, "loss": 1.9129, "step": 4438 }, { "epoch": 1.5610098176718092, "grad_norm": 9.713801383972168, "learning_rate": 4.6894966495247e-05, "loss": 1.8764, "step": 4452 }, { "epoch": 1.5659186535764376, "grad_norm": 10.359221458435059, "learning_rate": 4.686769518466573e-05, "loss": 1.8169, "step": 4466 }, { "epoch": 1.5708274894810659, "grad_norm": 12.062459945678711, "learning_rate": 4.6840423874084465e-05, "loss": 1.9491, "step": 4480 }, { "epoch": 1.5757363253856944, "grad_norm": 12.685450553894043, "learning_rate": 4.68131525635032e-05, "loss": 1.9291, "step": 4494 }, { "epoch": 1.5806451612903225, "grad_norm": 13.979676246643066, "learning_rate": 4.678588125292193e-05, "loss": 1.7741, "step": 4508 }, { "epoch": 1.585553997194951, "grad_norm": 12.639430046081543, "learning_rate": 4.675860994234066e-05, "loss": 1.8654, "step": 4522 }, { "epoch": 1.5904628330995791, "grad_norm": 9.166572570800781, "learning_rate": 4.673133863175939e-05, "loss": 1.8266, "step": 4536 }, { "epoch": 1.5953716690042077, "grad_norm": 9.623431205749512, "learning_rate": 4.6704067321178124e-05, "loss": 1.8693, "step": 4550 }, { "epoch": 1.6002805049088358, "grad_norm": 12.572652816772461, "learning_rate": 4.667679601059685e-05, "loss": 1.9, "step": 4564 }, { "epoch": 1.6051893408134643, "grad_norm": 9.248883247375488, "learning_rate": 4.664952470001559e-05, "loss": 2.0691, "step": 4578 }, { "epoch": 1.6100981767180924, "grad_norm": 9.85283088684082, "learning_rate": 4.6622253389434315e-05, "loss": 1.9833, "step": 4592 }, { "epoch": 1.615007012622721, "grad_norm": 13.418924331665039, "learning_rate": 4.659498207885305e-05, "loss": 2.0258, "step": 4606 }, { "epoch": 1.6199158485273493, "grad_norm": 11.970269203186035, "learning_rate": 4.6567710768271784e-05, "loss": 1.8496, "step": 4620 }, { "epoch": 1.6248246844319776, "grad_norm": 11.734840393066406, "learning_rate": 4.654043945769051e-05, "loss": 1.9931, "step": 4634 }, { "epoch": 1.629733520336606, "grad_norm": 11.321859359741211, "learning_rate": 4.651316814710924e-05, "loss": 1.8347, "step": 4648 }, { "epoch": 1.6346423562412342, "grad_norm": 10.94494915008545, "learning_rate": 4.6485896836527974e-05, "loss": 1.9001, "step": 4662 }, { "epoch": 1.6395511921458625, "grad_norm": 13.282726287841797, "learning_rate": 4.645862552594671e-05, "loss": 1.7935, "step": 4676 }, { "epoch": 1.6444600280504909, "grad_norm": 10.5330228805542, "learning_rate": 4.6431354215365437e-05, "loss": 1.9515, "step": 4690 }, { "epoch": 1.6493688639551192, "grad_norm": 10.041396141052246, "learning_rate": 4.640408290478417e-05, "loss": 1.7845, "step": 4704 }, { "epoch": 1.6542776998597475, "grad_norm": 14.296436309814453, "learning_rate": 4.63768115942029e-05, "loss": 1.9501, "step": 4718 }, { "epoch": 1.659186535764376, "grad_norm": 13.445257186889648, "learning_rate": 4.6349540283621634e-05, "loss": 1.8096, "step": 4732 }, { "epoch": 1.6640953716690041, "grad_norm": 10.243102073669434, "learning_rate": 4.632226897304036e-05, "loss": 1.8965, "step": 4746 }, { "epoch": 1.6690042075736327, "grad_norm": 9.259690284729004, "learning_rate": 4.6294997662459096e-05, "loss": 1.8838, "step": 4760 }, { "epoch": 1.6739130434782608, "grad_norm": 9.463726997375488, "learning_rate": 4.6267726351877824e-05, "loss": 1.8387, "step": 4774 }, { "epoch": 1.6788218793828893, "grad_norm": 9.736865043640137, "learning_rate": 4.624045504129656e-05, "loss": 1.9159, "step": 4788 }, { "epoch": 1.6837307152875174, "grad_norm": 10.477815628051758, "learning_rate": 4.6213183730715286e-05, "loss": 1.8738, "step": 4802 }, { "epoch": 1.688639551192146, "grad_norm": 9.217684745788574, "learning_rate": 4.618591242013402e-05, "loss": 1.8484, "step": 4816 }, { "epoch": 1.6935483870967742, "grad_norm": 10.218056678771973, "learning_rate": 4.6158641109552756e-05, "loss": 1.9266, "step": 4830 }, { "epoch": 1.6984572230014026, "grad_norm": 10.764497756958008, "learning_rate": 4.613136979897148e-05, "loss": 1.7937, "step": 4844 }, { "epoch": 1.7033660589060309, "grad_norm": 11.148290634155273, "learning_rate": 4.610409848839022e-05, "loss": 1.8768, "step": 4858 }, { "epoch": 1.7082748948106592, "grad_norm": 9.272621154785156, "learning_rate": 4.6076827177808946e-05, "loss": 1.8719, "step": 4872 }, { "epoch": 1.7131837307152875, "grad_norm": 10.965556144714355, "learning_rate": 4.604955586722768e-05, "loss": 1.8836, "step": 4886 }, { "epoch": 1.7180925666199158, "grad_norm": 10.035648345947266, "learning_rate": 4.602228455664641e-05, "loss": 1.8335, "step": 4900 }, { "epoch": 1.7230014025245441, "grad_norm": 9.720402717590332, "learning_rate": 4.599501324606514e-05, "loss": 1.9356, "step": 4914 }, { "epoch": 1.7279102384291725, "grad_norm": 11.15916919708252, "learning_rate": 4.596774193548387e-05, "loss": 1.8801, "step": 4928 }, { "epoch": 1.732819074333801, "grad_norm": 9.771932601928711, "learning_rate": 4.5940470624902605e-05, "loss": 1.9195, "step": 4942 }, { "epoch": 1.737727910238429, "grad_norm": 11.494741439819336, "learning_rate": 4.591319931432134e-05, "loss": 1.8748, "step": 4956 }, { "epoch": 1.7426367461430576, "grad_norm": 8.913902282714844, "learning_rate": 4.588592800374007e-05, "loss": 1.8317, "step": 4970 }, { "epoch": 1.7475455820476857, "grad_norm": 11.460936546325684, "learning_rate": 4.5858656693158796e-05, "loss": 1.9173, "step": 4984 }, { "epoch": 1.7524544179523143, "grad_norm": 12.785904884338379, "learning_rate": 4.583138538257753e-05, "loss": 1.8037, "step": 4998 }, { "epoch": 1.7573632538569424, "grad_norm": 12.0155029296875, "learning_rate": 4.5804114071996265e-05, "loss": 1.8738, "step": 5012 }, { "epoch": 1.762272089761571, "grad_norm": 10.310662269592285, "learning_rate": 4.577684276141499e-05, "loss": 1.9172, "step": 5026 }, { "epoch": 1.767180925666199, "grad_norm": 13.062027931213379, "learning_rate": 4.574957145083372e-05, "loss": 1.8425, "step": 5040 }, { "epoch": 1.7720897615708275, "grad_norm": 15.804108619689941, "learning_rate": 4.5722300140252455e-05, "loss": 1.867, "step": 5054 }, { "epoch": 1.7769985974754559, "grad_norm": 10.58868408203125, "learning_rate": 4.569502882967119e-05, "loss": 1.8926, "step": 5068 }, { "epoch": 1.7819074333800842, "grad_norm": 10.328556060791016, "learning_rate": 4.5667757519089924e-05, "loss": 1.9685, "step": 5082 }, { "epoch": 1.7868162692847125, "grad_norm": 9.850960731506348, "learning_rate": 4.564048620850865e-05, "loss": 1.8319, "step": 5096 }, { "epoch": 1.7917251051893408, "grad_norm": 10.264177322387695, "learning_rate": 4.561321489792738e-05, "loss": 1.8972, "step": 5110 }, { "epoch": 1.7966339410939691, "grad_norm": 10.365046501159668, "learning_rate": 4.5585943587346114e-05, "loss": 1.9764, "step": 5124 }, { "epoch": 1.8015427769985974, "grad_norm": 13.297977447509766, "learning_rate": 4.555867227676485e-05, "loss": 1.8671, "step": 5138 }, { "epoch": 1.8064516129032258, "grad_norm": 12.31112289428711, "learning_rate": 4.553140096618358e-05, "loss": 1.8602, "step": 5152 }, { "epoch": 1.811360448807854, "grad_norm": 11.093729019165039, "learning_rate": 4.5504129655602305e-05, "loss": 1.8399, "step": 5166 }, { "epoch": 1.8162692847124826, "grad_norm": 10.216219902038574, "learning_rate": 4.547685834502104e-05, "loss": 1.8032, "step": 5180 }, { "epoch": 1.8211781206171107, "grad_norm": 9.43352222442627, "learning_rate": 4.5449587034439774e-05, "loss": 1.8277, "step": 5194 }, { "epoch": 1.8260869565217392, "grad_norm": 10.963068962097168, "learning_rate": 4.54223157238585e-05, "loss": 1.8448, "step": 5208 }, { "epoch": 1.8309957924263673, "grad_norm": 16.256513595581055, "learning_rate": 4.539504441327723e-05, "loss": 1.9804, "step": 5222 }, { "epoch": 1.8359046283309959, "grad_norm": 9.337403297424316, "learning_rate": 4.5367773102695964e-05, "loss": 1.6757, "step": 5236 }, { "epoch": 1.840813464235624, "grad_norm": 12.72520923614502, "learning_rate": 4.53405017921147e-05, "loss": 1.8268, "step": 5250 }, { "epoch": 1.8457223001402525, "grad_norm": 10.21760082244873, "learning_rate": 4.5313230481533433e-05, "loss": 1.8736, "step": 5264 }, { "epoch": 1.8506311360448808, "grad_norm": 10.809754371643066, "learning_rate": 4.528595917095216e-05, "loss": 1.8584, "step": 5278 }, { "epoch": 1.8555399719495091, "grad_norm": 12.770214080810547, "learning_rate": 4.525868786037089e-05, "loss": 1.9142, "step": 5292 }, { "epoch": 1.8604488078541375, "grad_norm": 11.563720703125, "learning_rate": 4.5231416549789624e-05, "loss": 1.8055, "step": 5306 }, { "epoch": 1.8653576437587658, "grad_norm": 10.505379676818848, "learning_rate": 4.520414523920836e-05, "loss": 1.8969, "step": 5320 }, { "epoch": 1.870266479663394, "grad_norm": 10.692577362060547, "learning_rate": 4.5176873928627086e-05, "loss": 1.7717, "step": 5334 }, { "epoch": 1.8751753155680224, "grad_norm": 9.505712509155273, "learning_rate": 4.5149602618045814e-05, "loss": 1.8337, "step": 5348 }, { "epoch": 1.8800841514726507, "grad_norm": 11.840970993041992, "learning_rate": 4.512233130746455e-05, "loss": 1.8374, "step": 5362 }, { "epoch": 1.884992987377279, "grad_norm": 11.399272918701172, "learning_rate": 4.509505999688328e-05, "loss": 1.8235, "step": 5376 }, { "epoch": 1.8899018232819076, "grad_norm": 10.03107738494873, "learning_rate": 4.506778868630202e-05, "loss": 1.7811, "step": 5390 }, { "epoch": 1.8948106591865357, "grad_norm": 8.736416816711426, "learning_rate": 4.504051737572074e-05, "loss": 1.7995, "step": 5404 }, { "epoch": 1.8997194950911642, "grad_norm": 9.962418556213379, "learning_rate": 4.5013246065139473e-05, "loss": 1.8031, "step": 5418 }, { "epoch": 1.9046283309957923, "grad_norm": 9.44190788269043, "learning_rate": 4.498597475455821e-05, "loss": 1.8253, "step": 5432 }, { "epoch": 1.9095371669004209, "grad_norm": 9.592060089111328, "learning_rate": 4.495870344397694e-05, "loss": 1.7658, "step": 5446 }, { "epoch": 1.914446002805049, "grad_norm": 11.648765563964844, "learning_rate": 4.493143213339567e-05, "loss": 1.784, "step": 5460 }, { "epoch": 1.9193548387096775, "grad_norm": 11.510946273803711, "learning_rate": 4.49041608228144e-05, "loss": 1.908, "step": 5474 }, { "epoch": 1.9242636746143056, "grad_norm": 11.496999740600586, "learning_rate": 4.487688951223313e-05, "loss": 1.7439, "step": 5488 }, { "epoch": 1.9291725105189341, "grad_norm": 12.739660263061523, "learning_rate": 4.484961820165187e-05, "loss": 1.844, "step": 5502 }, { "epoch": 1.9340813464235624, "grad_norm": 12.82052230834961, "learning_rate": 4.4822346891070595e-05, "loss": 1.7986, "step": 5516 }, { "epoch": 1.9389901823281908, "grad_norm": 10.354856491088867, "learning_rate": 4.479507558048932e-05, "loss": 1.7901, "step": 5530 }, { "epoch": 1.943899018232819, "grad_norm": 10.20328140258789, "learning_rate": 4.476780426990806e-05, "loss": 1.7836, "step": 5544 }, { "epoch": 1.9488078541374474, "grad_norm": 14.908529281616211, "learning_rate": 4.474053295932679e-05, "loss": 1.7762, "step": 5558 }, { "epoch": 1.9537166900420757, "grad_norm": 11.893385887145996, "learning_rate": 4.471326164874552e-05, "loss": 1.9061, "step": 5572 }, { "epoch": 1.958625525946704, "grad_norm": 11.026830673217773, "learning_rate": 4.4685990338164255e-05, "loss": 1.8877, "step": 5586 }, { "epoch": 1.9635343618513323, "grad_norm": 11.234782218933105, "learning_rate": 4.465871902758298e-05, "loss": 1.7913, "step": 5600 }, { "epoch": 1.9684431977559607, "grad_norm": 9.364754676818848, "learning_rate": 4.463144771700172e-05, "loss": 1.7472, "step": 5614 }, { "epoch": 1.9733520336605892, "grad_norm": 10.984906196594238, "learning_rate": 4.460417640642045e-05, "loss": 1.8709, "step": 5628 }, { "epoch": 1.9782608695652173, "grad_norm": 9.148139953613281, "learning_rate": 4.457690509583918e-05, "loss": 1.8786, "step": 5642 }, { "epoch": 1.9831697054698458, "grad_norm": 10.805230140686035, "learning_rate": 4.454963378525791e-05, "loss": 1.787, "step": 5656 }, { "epoch": 1.988078541374474, "grad_norm": 12.728995323181152, "learning_rate": 4.452236247467664e-05, "loss": 1.7712, "step": 5670 }, { "epoch": 1.9929873772791025, "grad_norm": 10.249566078186035, "learning_rate": 4.449509116409538e-05, "loss": 1.7664, "step": 5684 }, { "epoch": 1.9978962131837306, "grad_norm": 11.51685619354248, "learning_rate": 4.4467819853514105e-05, "loss": 1.7907, "step": 5698 }, { "epoch": 2.0, "eval_loss": 1.8477481603622437, "eval_map": 0.062, "eval_map_50": 0.1041, "eval_map_75": 0.0655, "eval_map_applique": 0.0, "eval_map_bag, wallet": 0.0237, "eval_map_bead": 0.0058, "eval_map_belt": 0.0408, "eval_map_bow": 0.0, "eval_map_buckle": 0.0231, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.033, "eval_map_collar": 0.1274, "eval_map_dress": 0.3642, "eval_map_epaulette": 0.0021, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.1233, "eval_map_glove": 0.0, "eval_map_hat": 0.0827, "eval_map_headband, head covering, hair accessory": 0.0286, "eval_map_hood": 0.0157, "eval_map_jacket": 0.1699, "eval_map_jumpsuit": 0.0, "eval_map_lapel": 0.0814, "eval_map_large": 0.0625, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.0337, "eval_map_neckline": 0.1851, "eval_map_pants": 0.3191, "eval_map_pocket": 0.07, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0, "eval_map_ruffle": 0.0035, "eval_map_scarf": 0.0, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.037, "eval_map_shoe": 0.3258, "eval_map_shorts": 0.0935, "eval_map_skirt": 0.1448, "eval_map_sleeve": 0.2254, "eval_map_small": 0.0, "eval_map_sock": 0.0175, "eval_map_sweater": 0.0, "eval_map_tassel": 0.0, "eval_map_tie": 0.1084, "eval_map_tights, stockings": 0.1111, "eval_map_top, t-shirt, sweatshirt": 0.0637, "eval_map_umbrella": 0.0, "eval_map_vest": 0.0, "eval_map_watch": 0.0071, "eval_map_zipper": 0.0181, "eval_mar_1": 0.1198, "eval_mar_10": 0.2523, "eval_mar_100": 0.2579, "eval_mar_100_applique": 0.0, "eval_mar_100_bag, wallet": 0.2559, "eval_mar_100_bead": 0.1804, "eval_mar_100_belt": 0.4, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.0493, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.1777, "eval_mar_100_collar": 0.5636, "eval_mar_100_dress": 0.7287, "eval_mar_100_epaulette": 0.15, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.5868, "eval_mar_100_glove": 0.0, "eval_mar_100_hat": 0.5507, "eval_mar_100_headband, head covering, hair accessory": 0.2807, "eval_mar_100_hood": 0.0688, "eval_mar_100_jacket": 0.6747, "eval_mar_100_jumpsuit": 0.0, "eval_mar_100_lapel": 0.5267, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.6897, "eval_mar_100_pants": 0.7143, "eval_mar_100_pocket": 0.5991, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.0, "eval_mar_100_ruffle": 0.0816, "eval_mar_100_scarf": 0.0, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.2772, "eval_mar_100_shoe": 0.687, "eval_mar_100_shorts": 0.3472, "eval_mar_100_skirt": 0.5802, "eval_mar_100_sleeve": 0.6425, "eval_mar_100_sock": 0.1765, "eval_mar_100_sweater": 0.0, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.5333, "eval_mar_100_tights, stockings": 0.5115, "eval_mar_100_top, t-shirt, sweatshirt": 0.54, "eval_mar_100_umbrella": 0.0, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.1193, "eval_mar_100_zipper": 0.1701, "eval_mar_large": 0.2596, "eval_mar_medium": 0.0899, "eval_mar_small": 0.0, "eval_runtime": 78.4699, "eval_samples_per_second": 14.757, "eval_steps_per_second": 0.472, "step": 5704 }, { "epoch": 2.002805049088359, "grad_norm": 15.515993118286133, "learning_rate": 4.444054854293284e-05, "loss": 1.9001, "step": 5712 }, { "epoch": 2.007713884992987, "grad_norm": 14.966017723083496, "learning_rate": 4.441327723235157e-05, "loss": 1.8096, "step": 5726 }, { "epoch": 2.0126227208976157, "grad_norm": 11.279698371887207, "learning_rate": 4.43860059217703e-05, "loss": 1.7421, "step": 5740 }, { "epoch": 2.017531556802244, "grad_norm": 9.804573059082031, "learning_rate": 4.435873461118903e-05, "loss": 1.8245, "step": 5754 }, { "epoch": 2.0224403927068724, "grad_norm": 13.036866188049316, "learning_rate": 4.4331463300607764e-05, "loss": 1.8343, "step": 5768 }, { "epoch": 2.027349228611501, "grad_norm": 14.370100975036621, "learning_rate": 4.430419199002649e-05, "loss": 1.7831, "step": 5782 }, { "epoch": 2.032258064516129, "grad_norm": 8.274703979492188, "learning_rate": 4.4276920679445226e-05, "loss": 1.7654, "step": 5796 }, { "epoch": 2.0371669004207575, "grad_norm": 13.228246688842773, "learning_rate": 4.4249649368863954e-05, "loss": 1.7743, "step": 5810 }, { "epoch": 2.0420757363253856, "grad_norm": 9.601645469665527, "learning_rate": 4.422237805828269e-05, "loss": 1.8265, "step": 5824 }, { "epoch": 2.046984572230014, "grad_norm": 9.885420799255371, "learning_rate": 4.4195106747701424e-05, "loss": 1.793, "step": 5838 }, { "epoch": 2.0518934081346423, "grad_norm": 10.006317138671875, "learning_rate": 4.416783543712015e-05, "loss": 1.7813, "step": 5852 }, { "epoch": 2.056802244039271, "grad_norm": 12.095258712768555, "learning_rate": 4.4140564126538886e-05, "loss": 1.7731, "step": 5866 }, { "epoch": 2.061711079943899, "grad_norm": 11.208070755004883, "learning_rate": 4.4113292815957614e-05, "loss": 1.717, "step": 5880 }, { "epoch": 2.0666199158485274, "grad_norm": 12.589214324951172, "learning_rate": 4.408602150537635e-05, "loss": 1.8187, "step": 5894 }, { "epoch": 2.0715287517531555, "grad_norm": 13.213720321655273, "learning_rate": 4.4058750194795076e-05, "loss": 1.7561, "step": 5908 }, { "epoch": 2.076437587657784, "grad_norm": 11.721803665161133, "learning_rate": 4.403147888421381e-05, "loss": 1.7875, "step": 5922 }, { "epoch": 2.081346423562412, "grad_norm": 12.867485046386719, "learning_rate": 4.400420757363254e-05, "loss": 1.798, "step": 5936 }, { "epoch": 2.0862552594670407, "grad_norm": 8.166487693786621, "learning_rate": 4.397693626305127e-05, "loss": 1.8406, "step": 5950 }, { "epoch": 2.091164095371669, "grad_norm": 11.435522079467773, "learning_rate": 4.394966495247e-05, "loss": 1.8066, "step": 5964 }, { "epoch": 2.0960729312762973, "grad_norm": 12.214832305908203, "learning_rate": 4.3922393641888736e-05, "loss": 1.8131, "step": 5978 }, { "epoch": 2.100981767180926, "grad_norm": 10.94597053527832, "learning_rate": 4.3895122331307464e-05, "loss": 1.7436, "step": 5992 }, { "epoch": 2.105890603085554, "grad_norm": 9.09123420715332, "learning_rate": 4.38678510207262e-05, "loss": 1.8674, "step": 6006 }, { "epoch": 2.1107994389901825, "grad_norm": 12.619202613830566, "learning_rate": 4.384057971014493e-05, "loss": 1.7795, "step": 6020 }, { "epoch": 2.1157082748948106, "grad_norm": 11.83459186553955, "learning_rate": 4.381330839956366e-05, "loss": 1.768, "step": 6034 }, { "epoch": 2.120617110799439, "grad_norm": 14.364986419677734, "learning_rate": 4.378603708898239e-05, "loss": 1.8283, "step": 6048 }, { "epoch": 2.1255259467040672, "grad_norm": 12.984795570373535, "learning_rate": 4.375876577840112e-05, "loss": 1.7697, "step": 6062 }, { "epoch": 2.130434782608696, "grad_norm": 11.105409622192383, "learning_rate": 4.373149446781986e-05, "loss": 1.892, "step": 6076 }, { "epoch": 2.135343618513324, "grad_norm": 10.542952537536621, "learning_rate": 4.3704223157238585e-05, "loss": 1.7932, "step": 6090 }, { "epoch": 2.1402524544179524, "grad_norm": 10.694328308105469, "learning_rate": 4.367695184665732e-05, "loss": 1.74, "step": 6104 }, { "epoch": 2.1451612903225805, "grad_norm": 14.823897361755371, "learning_rate": 4.364968053607605e-05, "loss": 1.7728, "step": 6118 }, { "epoch": 2.150070126227209, "grad_norm": 15.00540542602539, "learning_rate": 4.362240922549478e-05, "loss": 1.7875, "step": 6132 }, { "epoch": 2.154978962131837, "grad_norm": 9.258666038513184, "learning_rate": 4.359513791491352e-05, "loss": 1.7685, "step": 6146 }, { "epoch": 2.1598877980364657, "grad_norm": 11.277965545654297, "learning_rate": 4.3567866604332245e-05, "loss": 1.7849, "step": 6160 }, { "epoch": 2.164796633941094, "grad_norm": 10.425918579101562, "learning_rate": 4.354059529375097e-05, "loss": 1.7913, "step": 6174 }, { "epoch": 2.1697054698457223, "grad_norm": 12.694295883178711, "learning_rate": 4.351332398316971e-05, "loss": 1.7626, "step": 6188 }, { "epoch": 2.1746143057503504, "grad_norm": 13.709222793579102, "learning_rate": 4.348605267258844e-05, "loss": 1.8914, "step": 6202 }, { "epoch": 2.179523141654979, "grad_norm": 12.746072769165039, "learning_rate": 4.345878136200717e-05, "loss": 1.6727, "step": 6216 }, { "epoch": 2.1844319775596075, "grad_norm": 12.651052474975586, "learning_rate": 4.34315100514259e-05, "loss": 1.8569, "step": 6230 }, { "epoch": 2.1893408134642356, "grad_norm": 9.538745880126953, "learning_rate": 4.340423874084463e-05, "loss": 1.783, "step": 6244 }, { "epoch": 2.194249649368864, "grad_norm": 11.545536041259766, "learning_rate": 4.337696743026337e-05, "loss": 1.8094, "step": 6258 }, { "epoch": 2.1991584852734922, "grad_norm": 15.02635669708252, "learning_rate": 4.33496961196821e-05, "loss": 1.8601, "step": 6272 }, { "epoch": 2.2040673211781208, "grad_norm": 9.555161476135254, "learning_rate": 4.332242480910082e-05, "loss": 1.7567, "step": 6286 }, { "epoch": 2.208976157082749, "grad_norm": 8.94969654083252, "learning_rate": 4.329515349851956e-05, "loss": 1.8171, "step": 6300 }, { "epoch": 2.2138849929873774, "grad_norm": 8.987335205078125, "learning_rate": 4.326788218793829e-05, "loss": 1.7506, "step": 6314 }, { "epoch": 2.2187938288920055, "grad_norm": 11.341231346130371, "learning_rate": 4.3240610877357026e-05, "loss": 1.751, "step": 6328 }, { "epoch": 2.223702664796634, "grad_norm": 10.149150848388672, "learning_rate": 4.3213339566775754e-05, "loss": 1.7734, "step": 6342 }, { "epoch": 2.228611500701262, "grad_norm": 8.128470420837402, "learning_rate": 4.318606825619448e-05, "loss": 1.6719, "step": 6356 }, { "epoch": 2.2335203366058907, "grad_norm": 9.661511421203613, "learning_rate": 4.3158796945613217e-05, "loss": 1.7986, "step": 6370 }, { "epoch": 2.2384291725105188, "grad_norm": 10.676513671875, "learning_rate": 4.313152563503195e-05, "loss": 1.8378, "step": 6384 }, { "epoch": 2.2433380084151473, "grad_norm": 11.733198165893555, "learning_rate": 4.3104254324450686e-05, "loss": 1.7386, "step": 6398 }, { "epoch": 2.2482468443197754, "grad_norm": 11.243391036987305, "learning_rate": 4.307698301386941e-05, "loss": 1.7463, "step": 6412 }, { "epoch": 2.253155680224404, "grad_norm": 14.038941383361816, "learning_rate": 4.304971170328814e-05, "loss": 1.7062, "step": 6426 }, { "epoch": 2.258064516129032, "grad_norm": 8.653011322021484, "learning_rate": 4.3022440392706876e-05, "loss": 1.7163, "step": 6440 }, { "epoch": 2.2629733520336606, "grad_norm": 10.56112289428711, "learning_rate": 4.299516908212561e-05, "loss": 1.6944, "step": 6454 }, { "epoch": 2.267882187938289, "grad_norm": 13.755622863769531, "learning_rate": 4.296789777154434e-05, "loss": 1.7691, "step": 6468 }, { "epoch": 2.272791023842917, "grad_norm": 10.97589111328125, "learning_rate": 4.2940626460963066e-05, "loss": 1.7878, "step": 6482 }, { "epoch": 2.2776998597475457, "grad_norm": 10.423060417175293, "learning_rate": 4.29133551503818e-05, "loss": 1.6882, "step": 6496 }, { "epoch": 2.282608695652174, "grad_norm": 12.56727409362793, "learning_rate": 4.2886083839800536e-05, "loss": 1.7335, "step": 6510 }, { "epoch": 2.2875175315568024, "grad_norm": 8.888439178466797, "learning_rate": 4.285881252921926e-05, "loss": 1.8622, "step": 6524 }, { "epoch": 2.2924263674614305, "grad_norm": 8.066498756408691, "learning_rate": 4.283154121863799e-05, "loss": 1.8131, "step": 6538 }, { "epoch": 2.297335203366059, "grad_norm": 9.842330932617188, "learning_rate": 4.2804269908056726e-05, "loss": 1.7566, "step": 6552 }, { "epoch": 2.302244039270687, "grad_norm": 12.25030517578125, "learning_rate": 4.277699859747546e-05, "loss": 1.7828, "step": 6566 }, { "epoch": 2.3071528751753156, "grad_norm": 12.484109878540039, "learning_rate": 4.274972728689419e-05, "loss": 1.7855, "step": 6580 }, { "epoch": 2.3120617110799437, "grad_norm": 10.326115608215332, "learning_rate": 4.272245597631292e-05, "loss": 1.8685, "step": 6594 }, { "epoch": 2.3169705469845723, "grad_norm": 12.960858345031738, "learning_rate": 4.269518466573165e-05, "loss": 1.7891, "step": 6608 }, { "epoch": 2.3218793828892004, "grad_norm": 14.57947063446045, "learning_rate": 4.2667913355150385e-05, "loss": 1.8456, "step": 6622 }, { "epoch": 2.326788218793829, "grad_norm": 8.798315048217773, "learning_rate": 4.264064204456912e-05, "loss": 1.8112, "step": 6636 }, { "epoch": 2.3316970546984574, "grad_norm": 9.462950706481934, "learning_rate": 4.261337073398785e-05, "loss": 1.7658, "step": 6650 }, { "epoch": 2.3366058906030855, "grad_norm": 8.18662166595459, "learning_rate": 4.2586099423406576e-05, "loss": 1.7632, "step": 6664 }, { "epoch": 2.3415147265077136, "grad_norm": 9.434016227722168, "learning_rate": 4.255882811282531e-05, "loss": 1.7674, "step": 6678 }, { "epoch": 2.346423562412342, "grad_norm": 8.215347290039062, "learning_rate": 4.2531556802244045e-05, "loss": 1.798, "step": 6692 }, { "epoch": 2.3513323983169707, "grad_norm": 10.631091117858887, "learning_rate": 4.250428549166277e-05, "loss": 1.6382, "step": 6706 }, { "epoch": 2.356241234221599, "grad_norm": 8.78810977935791, "learning_rate": 4.24770141810815e-05, "loss": 1.738, "step": 6720 }, { "epoch": 2.3611500701262274, "grad_norm": 13.026304244995117, "learning_rate": 4.2449742870500235e-05, "loss": 1.7538, "step": 6734 }, { "epoch": 2.3660589060308554, "grad_norm": 9.983083724975586, "learning_rate": 4.242247155991897e-05, "loss": 1.7183, "step": 6748 }, { "epoch": 2.370967741935484, "grad_norm": 12.937304496765137, "learning_rate": 4.23952002493377e-05, "loss": 1.8304, "step": 6762 }, { "epoch": 2.375876577840112, "grad_norm": 10.261804580688477, "learning_rate": 4.236792893875643e-05, "loss": 1.6787, "step": 6776 }, { "epoch": 2.3807854137447406, "grad_norm": 8.588639259338379, "learning_rate": 4.234065762817516e-05, "loss": 1.734, "step": 6790 }, { "epoch": 2.3856942496493687, "grad_norm": 9.98062515258789, "learning_rate": 4.2313386317593894e-05, "loss": 1.6642, "step": 6804 }, { "epoch": 2.3906030855539973, "grad_norm": 11.090353012084961, "learning_rate": 4.228611500701262e-05, "loss": 1.8679, "step": 6818 }, { "epoch": 2.3955119214586253, "grad_norm": 11.746686935424805, "learning_rate": 4.225884369643136e-05, "loss": 1.7671, "step": 6832 }, { "epoch": 2.400420757363254, "grad_norm": 10.1532564163208, "learning_rate": 4.2231572385850085e-05, "loss": 1.7685, "step": 6846 }, { "epoch": 2.405329593267882, "grad_norm": 12.835618019104004, "learning_rate": 4.220430107526882e-05, "loss": 1.6472, "step": 6860 }, { "epoch": 2.4102384291725105, "grad_norm": 15.137368202209473, "learning_rate": 4.2177029764687554e-05, "loss": 1.7152, "step": 6874 }, { "epoch": 2.415147265077139, "grad_norm": 9.470553398132324, "learning_rate": 4.214975845410628e-05, "loss": 1.739, "step": 6888 }, { "epoch": 2.420056100981767, "grad_norm": 10.057332992553711, "learning_rate": 4.2122487143525016e-05, "loss": 1.7944, "step": 6902 }, { "epoch": 2.4249649368863957, "grad_norm": 8.357356071472168, "learning_rate": 4.2095215832943744e-05, "loss": 1.7565, "step": 6916 }, { "epoch": 2.429873772791024, "grad_norm": 10.46395206451416, "learning_rate": 4.206794452236248e-05, "loss": 1.6592, "step": 6930 }, { "epoch": 2.4347826086956523, "grad_norm": 12.0501070022583, "learning_rate": 4.204067321178121e-05, "loss": 1.6994, "step": 6944 }, { "epoch": 2.4396914446002804, "grad_norm": 10.903725624084473, "learning_rate": 4.201340190119994e-05, "loss": 1.7046, "step": 6958 }, { "epoch": 2.444600280504909, "grad_norm": 10.537360191345215, "learning_rate": 4.198613059061867e-05, "loss": 1.6934, "step": 6972 }, { "epoch": 2.449509116409537, "grad_norm": 13.187154769897461, "learning_rate": 4.1958859280037404e-05, "loss": 1.8721, "step": 6986 }, { "epoch": 2.4544179523141656, "grad_norm": 8.556586265563965, "learning_rate": 4.193158796945613e-05, "loss": 1.7617, "step": 7000 }, { "epoch": 2.4593267882187937, "grad_norm": 9.357501029968262, "learning_rate": 4.1904316658874866e-05, "loss": 1.7198, "step": 7014 }, { "epoch": 2.4642356241234222, "grad_norm": 15.635339736938477, "learning_rate": 4.18770453482936e-05, "loss": 1.7504, "step": 7028 }, { "epoch": 2.4691444600280503, "grad_norm": 12.526928901672363, "learning_rate": 4.184977403771233e-05, "loss": 1.777, "step": 7042 }, { "epoch": 2.474053295932679, "grad_norm": 11.7166748046875, "learning_rate": 4.1822502727131056e-05, "loss": 1.7545, "step": 7056 }, { "epoch": 2.478962131837307, "grad_norm": 9.757047653198242, "learning_rate": 4.179523141654979e-05, "loss": 1.8048, "step": 7070 }, { "epoch": 2.4838709677419355, "grad_norm": 22.93931007385254, "learning_rate": 4.1767960105968526e-05, "loss": 1.7421, "step": 7084 }, { "epoch": 2.4887798036465636, "grad_norm": 9.293508529663086, "learning_rate": 4.1740688795387253e-05, "loss": 1.7118, "step": 7098 }, { "epoch": 2.493688639551192, "grad_norm": 9.60457706451416, "learning_rate": 4.171341748480599e-05, "loss": 1.7908, "step": 7112 }, { "epoch": 2.4985974754558207, "grad_norm": 11.589381217956543, "learning_rate": 4.1686146174224716e-05, "loss": 1.6851, "step": 7126 }, { "epoch": 2.5035063113604488, "grad_norm": 8.966891288757324, "learning_rate": 4.165887486364345e-05, "loss": 1.8401, "step": 7140 }, { "epoch": 2.5084151472650773, "grad_norm": 9.93249797821045, "learning_rate": 4.1631603553062185e-05, "loss": 1.7288, "step": 7154 }, { "epoch": 2.5133239831697054, "grad_norm": 10.140751838684082, "learning_rate": 4.160433224248091e-05, "loss": 1.7088, "step": 7168 }, { "epoch": 2.518232819074334, "grad_norm": 9.510957717895508, "learning_rate": 4.157706093189964e-05, "loss": 1.7797, "step": 7182 }, { "epoch": 2.523141654978962, "grad_norm": 8.722766876220703, "learning_rate": 4.1549789621318375e-05, "loss": 1.6607, "step": 7196 }, { "epoch": 2.5280504908835906, "grad_norm": 10.606809616088867, "learning_rate": 4.152251831073711e-05, "loss": 1.5949, "step": 7210 }, { "epoch": 2.5329593267882187, "grad_norm": 10.865644454956055, "learning_rate": 4.149524700015584e-05, "loss": 1.6778, "step": 7224 }, { "epoch": 2.537868162692847, "grad_norm": 17.56764793395996, "learning_rate": 4.1467975689574566e-05, "loss": 1.8133, "step": 7238 }, { "epoch": 2.5427769985974753, "grad_norm": 10.09190559387207, "learning_rate": 4.14407043789933e-05, "loss": 1.7122, "step": 7252 }, { "epoch": 2.547685834502104, "grad_norm": 11.04134464263916, "learning_rate": 4.1413433068412035e-05, "loss": 1.711, "step": 7266 }, { "epoch": 2.552594670406732, "grad_norm": 13.115461349487305, "learning_rate": 4.138616175783076e-05, "loss": 1.6347, "step": 7280 }, { "epoch": 2.5575035063113605, "grad_norm": 12.689261436462402, "learning_rate": 4.135889044724949e-05, "loss": 1.798, "step": 7294 }, { "epoch": 2.562412342215989, "grad_norm": 11.928689956665039, "learning_rate": 4.1331619136668225e-05, "loss": 1.7243, "step": 7308 }, { "epoch": 2.567321178120617, "grad_norm": 11.615339279174805, "learning_rate": 4.130434782608696e-05, "loss": 1.778, "step": 7322 }, { "epoch": 2.572230014025245, "grad_norm": 8.334004402160645, "learning_rate": 4.1277076515505694e-05, "loss": 1.6859, "step": 7336 }, { "epoch": 2.5771388499298737, "grad_norm": 8.651143074035645, "learning_rate": 4.124980520492442e-05, "loss": 1.7429, "step": 7350 }, { "epoch": 2.5820476858345023, "grad_norm": 8.387301445007324, "learning_rate": 4.122253389434315e-05, "loss": 1.7024, "step": 7364 }, { "epoch": 2.5869565217391304, "grad_norm": 10.35479736328125, "learning_rate": 4.1195262583761885e-05, "loss": 1.735, "step": 7378 }, { "epoch": 2.591865357643759, "grad_norm": 9.068761825561523, "learning_rate": 4.116799127318062e-05, "loss": 1.8053, "step": 7392 }, { "epoch": 2.596774193548387, "grad_norm": 9.8923921585083, "learning_rate": 4.114071996259935e-05, "loss": 1.6467, "step": 7406 }, { "epoch": 2.6016830294530155, "grad_norm": 12.428220748901367, "learning_rate": 4.1113448652018075e-05, "loss": 1.657, "step": 7420 }, { "epoch": 2.6065918653576436, "grad_norm": 8.930908203125, "learning_rate": 4.108617734143681e-05, "loss": 1.7012, "step": 7434 }, { "epoch": 2.611500701262272, "grad_norm": 9.074140548706055, "learning_rate": 4.1058906030855544e-05, "loss": 1.7329, "step": 7448 }, { "epoch": 2.6164095371669003, "grad_norm": 10.447490692138672, "learning_rate": 4.103163472027428e-05, "loss": 1.7106, "step": 7462 }, { "epoch": 2.621318373071529, "grad_norm": 11.242508888244629, "learning_rate": 4.1004363409693e-05, "loss": 1.6228, "step": 7476 }, { "epoch": 2.6262272089761574, "grad_norm": 9.8106689453125, "learning_rate": 4.0977092099111734e-05, "loss": 1.7399, "step": 7490 }, { "epoch": 2.6311360448807855, "grad_norm": 9.90949535369873, "learning_rate": 4.094982078853047e-05, "loss": 1.6294, "step": 7504 }, { "epoch": 2.6360448807854135, "grad_norm": 8.398763656616211, "learning_rate": 4.0922549477949204e-05, "loss": 1.781, "step": 7518 }, { "epoch": 2.640953716690042, "grad_norm": 10.50091552734375, "learning_rate": 4.089527816736793e-05, "loss": 1.6618, "step": 7532 }, { "epoch": 2.6458625525946706, "grad_norm": 8.735673904418945, "learning_rate": 4.086800685678666e-05, "loss": 1.661, "step": 7546 }, { "epoch": 2.6507713884992987, "grad_norm": 8.890250205993652, "learning_rate": 4.0840735546205394e-05, "loss": 1.7206, "step": 7560 }, { "epoch": 2.655680224403927, "grad_norm": 13.577272415161133, "learning_rate": 4.081346423562413e-05, "loss": 1.7485, "step": 7574 }, { "epoch": 2.6605890603085554, "grad_norm": 11.667314529418945, "learning_rate": 4.0786192925042856e-05, "loss": 1.7351, "step": 7588 }, { "epoch": 2.665497896213184, "grad_norm": 9.145251274108887, "learning_rate": 4.0758921614461584e-05, "loss": 1.6505, "step": 7602 }, { "epoch": 2.670406732117812, "grad_norm": 10.979633331298828, "learning_rate": 4.073165030388032e-05, "loss": 1.6577, "step": 7616 }, { "epoch": 2.6753155680224405, "grad_norm": 9.944910049438477, "learning_rate": 4.070437899329905e-05, "loss": 1.7226, "step": 7630 }, { "epoch": 2.6802244039270686, "grad_norm": 11.810866355895996, "learning_rate": 4.067710768271779e-05, "loss": 1.6856, "step": 7644 }, { "epoch": 2.685133239831697, "grad_norm": 9.7964448928833, "learning_rate": 4.0649836372136516e-05, "loss": 1.7153, "step": 7658 }, { "epoch": 2.6900420757363253, "grad_norm": 9.244463920593262, "learning_rate": 4.0622565061555244e-05, "loss": 1.7191, "step": 7672 }, { "epoch": 2.694950911640954, "grad_norm": 10.865559577941895, "learning_rate": 4.059529375097398e-05, "loss": 1.7482, "step": 7686 }, { "epoch": 2.699859747545582, "grad_norm": 12.302393913269043, "learning_rate": 4.056802244039271e-05, "loss": 1.6683, "step": 7700 }, { "epoch": 2.7047685834502104, "grad_norm": 11.497150421142578, "learning_rate": 4.054075112981144e-05, "loss": 1.7658, "step": 7714 }, { "epoch": 2.709677419354839, "grad_norm": 9.789331436157227, "learning_rate": 4.051347981923017e-05, "loss": 1.6385, "step": 7728 }, { "epoch": 2.714586255259467, "grad_norm": 10.493391990661621, "learning_rate": 4.04862085086489e-05, "loss": 1.6184, "step": 7742 }, { "epoch": 2.719495091164095, "grad_norm": 10.430872917175293, "learning_rate": 4.045893719806764e-05, "loss": 1.6985, "step": 7756 }, { "epoch": 2.7244039270687237, "grad_norm": 9.537189483642578, "learning_rate": 4.0431665887486365e-05, "loss": 1.6931, "step": 7770 }, { "epoch": 2.7293127629733522, "grad_norm": 12.373956680297852, "learning_rate": 4.04043945769051e-05, "loss": 1.7604, "step": 7784 }, { "epoch": 2.7342215988779803, "grad_norm": 16.21540069580078, "learning_rate": 4.037712326632383e-05, "loss": 1.7919, "step": 7798 }, { "epoch": 2.7391304347826084, "grad_norm": 10.272254943847656, "learning_rate": 4.034985195574256e-05, "loss": 1.6539, "step": 7812 }, { "epoch": 2.744039270687237, "grad_norm": 10.454662322998047, "learning_rate": 4.032258064516129e-05, "loss": 1.7325, "step": 7826 }, { "epoch": 2.7489481065918655, "grad_norm": 10.469573020935059, "learning_rate": 4.0295309334580025e-05, "loss": 1.6232, "step": 7840 }, { "epoch": 2.7538569424964936, "grad_norm": 14.109940528869629, "learning_rate": 4.026803802399875e-05, "loss": 1.7338, "step": 7854 }, { "epoch": 2.758765778401122, "grad_norm": 8.417902946472168, "learning_rate": 4.024076671341749e-05, "loss": 1.5991, "step": 7868 }, { "epoch": 2.7636746143057502, "grad_norm": 13.251653671264648, "learning_rate": 4.021349540283622e-05, "loss": 1.6268, "step": 7882 }, { "epoch": 2.7685834502103788, "grad_norm": 12.4844388961792, "learning_rate": 4.018622409225495e-05, "loss": 1.6361, "step": 7896 }, { "epoch": 2.773492286115007, "grad_norm": 9.43792724609375, "learning_rate": 4.0158952781673684e-05, "loss": 1.7181, "step": 7910 }, { "epoch": 2.7784011220196354, "grad_norm": 10.29134750366211, "learning_rate": 4.013168147109241e-05, "loss": 1.7368, "step": 7924 }, { "epoch": 2.7833099579242635, "grad_norm": 12.619813919067383, "learning_rate": 4.010441016051115e-05, "loss": 1.8055, "step": 7938 }, { "epoch": 2.788218793828892, "grad_norm": 10.6949462890625, "learning_rate": 4.0077138849929875e-05, "loss": 1.7494, "step": 7952 }, { "epoch": 2.7931276297335206, "grad_norm": 9.839770317077637, "learning_rate": 4.004986753934861e-05, "loss": 1.7928, "step": 7966 }, { "epoch": 2.7980364656381487, "grad_norm": 10.024321556091309, "learning_rate": 4.002259622876734e-05, "loss": 1.6696, "step": 7980 }, { "epoch": 2.8029453015427768, "grad_norm": 10.886838912963867, "learning_rate": 3.999532491818607e-05, "loss": 1.6434, "step": 7994 }, { "epoch": 2.8078541374474053, "grad_norm": 10.482260704040527, "learning_rate": 3.99680536076048e-05, "loss": 1.6244, "step": 8008 }, { "epoch": 2.812762973352034, "grad_norm": 8.974091529846191, "learning_rate": 3.9940782297023534e-05, "loss": 1.6397, "step": 8022 }, { "epoch": 2.817671809256662, "grad_norm": 12.7367525100708, "learning_rate": 3.991351098644226e-05, "loss": 1.679, "step": 8036 }, { "epoch": 2.8225806451612905, "grad_norm": 9.750895500183105, "learning_rate": 3.9886239675860997e-05, "loss": 1.7336, "step": 8050 }, { "epoch": 2.8274894810659186, "grad_norm": 10.983283996582031, "learning_rate": 3.9858968365279724e-05, "loss": 1.6762, "step": 8064 }, { "epoch": 2.832398316970547, "grad_norm": 9.592301368713379, "learning_rate": 3.983169705469846e-05, "loss": 1.6798, "step": 8078 }, { "epoch": 2.837307152875175, "grad_norm": 9.510612487792969, "learning_rate": 3.9804425744117194e-05, "loss": 1.6819, "step": 8092 }, { "epoch": 2.8422159887798037, "grad_norm": 9.170543670654297, "learning_rate": 3.977715443353592e-05, "loss": 1.691, "step": 8106 }, { "epoch": 2.847124824684432, "grad_norm": 9.918283462524414, "learning_rate": 3.974988312295465e-05, "loss": 1.6384, "step": 8120 }, { "epoch": 2.8520336605890604, "grad_norm": 9.282137870788574, "learning_rate": 3.9722611812373384e-05, "loss": 1.7032, "step": 8134 }, { "epoch": 2.8569424964936885, "grad_norm": 11.447927474975586, "learning_rate": 3.969534050179212e-05, "loss": 1.6394, "step": 8148 }, { "epoch": 2.861851332398317, "grad_norm": 8.801981925964355, "learning_rate": 3.9668069191210846e-05, "loss": 1.899, "step": 8162 }, { "epoch": 2.866760168302945, "grad_norm": 8.942591667175293, "learning_rate": 3.964079788062958e-05, "loss": 1.6419, "step": 8176 }, { "epoch": 2.8716690042075736, "grad_norm": 8.878727912902832, "learning_rate": 3.961352657004831e-05, "loss": 1.667, "step": 8190 }, { "epoch": 2.876577840112202, "grad_norm": 8.497553825378418, "learning_rate": 3.958625525946704e-05, "loss": 1.6403, "step": 8204 }, { "epoch": 2.8814866760168303, "grad_norm": 12.34414291381836, "learning_rate": 3.955898394888578e-05, "loss": 1.7323, "step": 8218 }, { "epoch": 2.8863955119214584, "grad_norm": 11.00311279296875, "learning_rate": 3.9531712638304506e-05, "loss": 1.7104, "step": 8232 }, { "epoch": 2.891304347826087, "grad_norm": 11.268806457519531, "learning_rate": 3.9504441327723234e-05, "loss": 1.6479, "step": 8246 }, { "epoch": 2.8962131837307155, "grad_norm": 10.241960525512695, "learning_rate": 3.947717001714197e-05, "loss": 1.6541, "step": 8260 }, { "epoch": 2.9011220196353436, "grad_norm": 10.397504806518555, "learning_rate": 3.94498987065607e-05, "loss": 1.6445, "step": 8274 }, { "epoch": 2.906030855539972, "grad_norm": 9.17982292175293, "learning_rate": 3.942262739597943e-05, "loss": 1.6894, "step": 8288 }, { "epoch": 2.9109396914446, "grad_norm": 9.627437591552734, "learning_rate": 3.939535608539816e-05, "loss": 1.6245, "step": 8302 }, { "epoch": 2.9158485273492287, "grad_norm": 12.00189208984375, "learning_rate": 3.936808477481689e-05, "loss": 1.7071, "step": 8316 }, { "epoch": 2.920757363253857, "grad_norm": 9.693345069885254, "learning_rate": 3.934081346423563e-05, "loss": 1.7189, "step": 8330 }, { "epoch": 2.9256661991584854, "grad_norm": 8.787467956542969, "learning_rate": 3.931354215365436e-05, "loss": 1.7679, "step": 8344 }, { "epoch": 2.9305750350631135, "grad_norm": 10.095147132873535, "learning_rate": 3.928627084307308e-05, "loss": 1.704, "step": 8358 }, { "epoch": 2.935483870967742, "grad_norm": 10.231818199157715, "learning_rate": 3.925899953249182e-05, "loss": 1.7023, "step": 8372 }, { "epoch": 2.9403927068723705, "grad_norm": 12.534753799438477, "learning_rate": 3.923172822191055e-05, "loss": 1.544, "step": 8386 }, { "epoch": 2.9453015427769986, "grad_norm": 11.50756549835205, "learning_rate": 3.920445691132929e-05, "loss": 1.6946, "step": 8400 }, { "epoch": 2.9502103786816267, "grad_norm": 11.215807914733887, "learning_rate": 3.9177185600748015e-05, "loss": 1.8326, "step": 8414 }, { "epoch": 2.9551192145862553, "grad_norm": 14.409260749816895, "learning_rate": 3.914991429016674e-05, "loss": 1.6875, "step": 8428 }, { "epoch": 2.960028050490884, "grad_norm": 12.639996528625488, "learning_rate": 3.912264297958548e-05, "loss": 1.7613, "step": 8442 }, { "epoch": 2.964936886395512, "grad_norm": 8.737318992614746, "learning_rate": 3.909537166900421e-05, "loss": 1.6456, "step": 8456 }, { "epoch": 2.96984572230014, "grad_norm": 10.508599281311035, "learning_rate": 3.906810035842295e-05, "loss": 1.5451, "step": 8470 }, { "epoch": 2.9747545582047685, "grad_norm": 9.872836112976074, "learning_rate": 3.904277699859748e-05, "loss": 1.7796, "step": 8484 }, { "epoch": 2.979663394109397, "grad_norm": 10.325733184814453, "learning_rate": 3.9015505688016206e-05, "loss": 1.6729, "step": 8498 }, { "epoch": 2.984572230014025, "grad_norm": 8.751960754394531, "learning_rate": 3.898823437743494e-05, "loss": 1.7108, "step": 8512 }, { "epoch": 2.9894810659186537, "grad_norm": 7.909047603607178, "learning_rate": 3.8960963066853675e-05, "loss": 1.7038, "step": 8526 }, { "epoch": 2.994389901823282, "grad_norm": 10.97799015045166, "learning_rate": 3.89336917562724e-05, "loss": 1.7719, "step": 8540 }, { "epoch": 2.9992987377279103, "grad_norm": 12.486078262329102, "learning_rate": 3.890642044569113e-05, "loss": 1.8569, "step": 8554 }, { "epoch": 3.0, "eval_loss": 1.5925724506378174, "eval_map": 0.0875, "eval_map_50": 0.1334, "eval_map_75": 0.0986, "eval_map_applique": 0.0014, "eval_map_bag, wallet": 0.0644, "eval_map_bead": 0.0101, "eval_map_belt": 0.0922, "eval_map_bow": 0.0, "eval_map_buckle": 0.0912, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.1326, "eval_map_collar": 0.1214, "eval_map_dress": 0.4072, "eval_map_epaulette": 0.0072, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.1738, "eval_map_glove": 0.0197, "eval_map_hat": 0.1617, "eval_map_headband, head covering, hair accessory": 0.0445, "eval_map_hood": 0.0463, "eval_map_jacket": 0.2299, "eval_map_jumpsuit": 0.0, "eval_map_lapel": 0.0715, "eval_map_large": 0.0882, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.072, "eval_map_neckline": 0.2878, "eval_map_pants": 0.3362, "eval_map_pocket": 0.0693, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0085, "eval_map_ruffle": 0.0178, "eval_map_scarf": 0.0, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0193, "eval_map_shoe": 0.3701, "eval_map_shorts": 0.1873, "eval_map_skirt": 0.2238, "eval_map_sleeve": 0.2818, "eval_map_small": 0.0, "eval_map_sock": 0.0405, "eval_map_sweater": 0.0149, "eval_map_tassel": 0.0, "eval_map_tie": 0.0539, "eval_map_tights, stockings": 0.134, "eval_map_top, t-shirt, sweatshirt": 0.1208, "eval_map_umbrella": 0.1248, "eval_map_vest": 0.0, "eval_map_watch": 0.0291, "eval_map_zipper": 0.0316, "eval_mar_1": 0.1479, "eval_mar_10": 0.3112, "eval_mar_100": 0.3168, "eval_mar_100_applique": 0.0639, "eval_mar_100_bag, wallet": 0.4263, "eval_mar_100_bead": 0.2402, "eval_mar_100_belt": 0.5915, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.3284, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.4515, "eval_mar_100_collar": 0.4972, "eval_mar_100_dress": 0.8024, "eval_mar_100_epaulette": 0.1857, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.6085, "eval_mar_100_glove": 0.0484, "eval_mar_100_hat": 0.5178, "eval_mar_100_headband, head covering, hair accessory": 0.411, "eval_mar_100_hood": 0.1219, "eval_mar_100_jacket": 0.733, "eval_mar_100_jumpsuit": 0.0, "eval_mar_100_lapel": 0.4585, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7494, "eval_mar_100_pants": 0.7739, "eval_mar_100_pocket": 0.6161, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.105, "eval_mar_100_ruffle": 0.1434, "eval_mar_100_scarf": 0.0, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.1337, "eval_mar_100_shoe": 0.7602, "eval_mar_100_shorts": 0.5802, "eval_mar_100_skirt": 0.6877, "eval_mar_100_sleeve": 0.6913, "eval_mar_100_sock": 0.4294, "eval_mar_100_sweater": 0.0143, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.2667, "eval_mar_100_tights, stockings": 0.6549, "eval_mar_100_top, t-shirt, sweatshirt": 0.6709, "eval_mar_100_umbrella": 0.12, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.4434, "eval_mar_100_zipper": 0.2474, "eval_mar_large": 0.3194, "eval_mar_medium": 0.1474, "eval_mar_small": 0.0, "eval_runtime": 79.3948, "eval_samples_per_second": 14.585, "eval_steps_per_second": 0.466, "step": 8556 }, { "epoch": 3.0042075736325384, "grad_norm": 11.367321014404297, "learning_rate": 3.8879149135109865e-05, "loss": 1.6779, "step": 8568 }, { "epoch": 3.009116409537167, "grad_norm": 10.493619918823242, "learning_rate": 3.88518778245286e-05, "loss": 1.6881, "step": 8582 }, { "epoch": 3.014025245441795, "grad_norm": 10.880057334899902, "learning_rate": 3.8824606513947335e-05, "loss": 1.7067, "step": 8596 }, { "epoch": 3.0189340813464236, "grad_norm": 11.518646240234375, "learning_rate": 3.8797335203366056e-05, "loss": 1.5436, "step": 8610 }, { "epoch": 3.0238429172510517, "grad_norm": 8.62612247467041, "learning_rate": 3.877006389278479e-05, "loss": 1.616, "step": 8624 }, { "epoch": 3.0287517531556802, "grad_norm": 13.416956901550293, "learning_rate": 3.8742792582203525e-05, "loss": 1.697, "step": 8638 }, { "epoch": 3.0336605890603083, "grad_norm": 12.201942443847656, "learning_rate": 3.871552127162226e-05, "loss": 1.6799, "step": 8652 }, { "epoch": 3.038569424964937, "grad_norm": 8.00610065460205, "learning_rate": 3.868824996104099e-05, "loss": 1.6159, "step": 8666 }, { "epoch": 3.0434782608695654, "grad_norm": 9.506668090820312, "learning_rate": 3.8660978650459715e-05, "loss": 1.7577, "step": 8680 }, { "epoch": 3.0483870967741935, "grad_norm": 11.961243629455566, "learning_rate": 3.863370733987845e-05, "loss": 1.6275, "step": 8694 }, { "epoch": 3.053295932678822, "grad_norm": 10.81982421875, "learning_rate": 3.8606436029297184e-05, "loss": 1.6251, "step": 8708 }, { "epoch": 3.05820476858345, "grad_norm": 10.379215240478516, "learning_rate": 3.857916471871591e-05, "loss": 1.6721, "step": 8722 }, { "epoch": 3.0631136044880787, "grad_norm": 9.837891578674316, "learning_rate": 3.855189340813464e-05, "loss": 1.7039, "step": 8736 }, { "epoch": 3.0680224403927068, "grad_norm": 13.380369186401367, "learning_rate": 3.8524622097553375e-05, "loss": 1.6558, "step": 8750 }, { "epoch": 3.0729312762973353, "grad_norm": 11.380098342895508, "learning_rate": 3.849735078697211e-05, "loss": 1.7553, "step": 8764 }, { "epoch": 3.0778401122019634, "grad_norm": 11.506340980529785, "learning_rate": 3.8470079476390844e-05, "loss": 1.626, "step": 8778 }, { "epoch": 3.082748948106592, "grad_norm": 9.561569213867188, "learning_rate": 3.844280816580957e-05, "loss": 1.6987, "step": 8792 }, { "epoch": 3.08765778401122, "grad_norm": 9.783227920532227, "learning_rate": 3.84155368552283e-05, "loss": 1.6628, "step": 8806 }, { "epoch": 3.0925666199158486, "grad_norm": 10.126801490783691, "learning_rate": 3.8388265544647034e-05, "loss": 1.5723, "step": 8820 }, { "epoch": 3.0974754558204767, "grad_norm": 12.15204906463623, "learning_rate": 3.836099423406577e-05, "loss": 1.7645, "step": 8834 }, { "epoch": 3.102384291725105, "grad_norm": 10.13781452178955, "learning_rate": 3.8333722923484497e-05, "loss": 1.6186, "step": 8848 }, { "epoch": 3.1072931276297333, "grad_norm": 9.259557723999023, "learning_rate": 3.8306451612903224e-05, "loss": 1.6963, "step": 8862 }, { "epoch": 3.112201963534362, "grad_norm": 8.444145202636719, "learning_rate": 3.827918030232196e-05, "loss": 1.7032, "step": 8876 }, { "epoch": 3.1171107994389904, "grad_norm": 16.62631607055664, "learning_rate": 3.8251908991740694e-05, "loss": 1.6282, "step": 8890 }, { "epoch": 3.1220196353436185, "grad_norm": 11.481710433959961, "learning_rate": 3.822463768115942e-05, "loss": 1.642, "step": 8904 }, { "epoch": 3.126928471248247, "grad_norm": 8.826152801513672, "learning_rate": 3.819736637057815e-05, "loss": 1.638, "step": 8918 }, { "epoch": 3.131837307152875, "grad_norm": 10.16854190826416, "learning_rate": 3.8170095059996884e-05, "loss": 1.6667, "step": 8932 }, { "epoch": 3.1367461430575037, "grad_norm": 12.8665189743042, "learning_rate": 3.814282374941562e-05, "loss": 1.6684, "step": 8946 }, { "epoch": 3.1416549789621318, "grad_norm": 10.484704971313477, "learning_rate": 3.8115552438834346e-05, "loss": 1.6529, "step": 8960 }, { "epoch": 3.1465638148667603, "grad_norm": 11.737460136413574, "learning_rate": 3.808828112825308e-05, "loss": 1.7089, "step": 8974 }, { "epoch": 3.1514726507713884, "grad_norm": 10.074626922607422, "learning_rate": 3.806100981767181e-05, "loss": 1.5559, "step": 8988 }, { "epoch": 3.156381486676017, "grad_norm": 9.759592056274414, "learning_rate": 3.803373850709054e-05, "loss": 1.5909, "step": 9002 }, { "epoch": 3.161290322580645, "grad_norm": 9.62687873840332, "learning_rate": 3.800646719650928e-05, "loss": 1.6652, "step": 9016 }, { "epoch": 3.1661991584852736, "grad_norm": 16.35776710510254, "learning_rate": 3.7979195885928006e-05, "loss": 1.5978, "step": 9030 }, { "epoch": 3.1711079943899017, "grad_norm": 10.073527336120605, "learning_rate": 3.7951924575346734e-05, "loss": 1.6363, "step": 9044 }, { "epoch": 3.17601683029453, "grad_norm": 8.469326972961426, "learning_rate": 3.792465326476547e-05, "loss": 1.5641, "step": 9058 }, { "epoch": 3.1809256661991583, "grad_norm": 15.14966106414795, "learning_rate": 3.78973819541842e-05, "loss": 1.6536, "step": 9072 }, { "epoch": 3.185834502103787, "grad_norm": 9.0816011428833, "learning_rate": 3.787011064360293e-05, "loss": 1.7285, "step": 9086 }, { "epoch": 3.1907433380084154, "grad_norm": 12.279108047485352, "learning_rate": 3.7842839333021665e-05, "loss": 1.6365, "step": 9100 }, { "epoch": 3.1956521739130435, "grad_norm": 9.20957088470459, "learning_rate": 3.781556802244039e-05, "loss": 1.6811, "step": 9114 }, { "epoch": 3.200561009817672, "grad_norm": 9.166754722595215, "learning_rate": 3.778829671185913e-05, "loss": 1.6235, "step": 9128 }, { "epoch": 3.2054698457223, "grad_norm": 12.226280212402344, "learning_rate": 3.7761025401277856e-05, "loss": 1.6878, "step": 9142 }, { "epoch": 3.2103786816269286, "grad_norm": 11.001879692077637, "learning_rate": 3.773375409069659e-05, "loss": 1.6418, "step": 9156 }, { "epoch": 3.2152875175315567, "grad_norm": 12.653868675231934, "learning_rate": 3.770648278011532e-05, "loss": 1.6822, "step": 9170 }, { "epoch": 3.2201963534361853, "grad_norm": 11.276497840881348, "learning_rate": 3.767921146953405e-05, "loss": 1.6046, "step": 9184 }, { "epoch": 3.2251051893408134, "grad_norm": 14.662013053894043, "learning_rate": 3.765194015895278e-05, "loss": 1.6231, "step": 9198 }, { "epoch": 3.230014025245442, "grad_norm": 9.71202278137207, "learning_rate": 3.7624668848371515e-05, "loss": 1.6455, "step": 9212 }, { "epoch": 3.23492286115007, "grad_norm": 16.2723388671875, "learning_rate": 3.759739753779025e-05, "loss": 1.671, "step": 9226 }, { "epoch": 3.2398316970546985, "grad_norm": 9.491569519042969, "learning_rate": 3.757012622720898e-05, "loss": 1.6282, "step": 9240 }, { "epoch": 3.2447405329593266, "grad_norm": 10.986793518066406, "learning_rate": 3.754285491662771e-05, "loss": 1.6429, "step": 9254 }, { "epoch": 3.249649368863955, "grad_norm": 13.151708602905273, "learning_rate": 3.751558360604644e-05, "loss": 1.656, "step": 9268 }, { "epoch": 3.2545582047685833, "grad_norm": 10.949094772338867, "learning_rate": 3.7488312295465174e-05, "loss": 1.6618, "step": 9282 }, { "epoch": 3.259467040673212, "grad_norm": 8.961837768554688, "learning_rate": 3.74610409848839e-05, "loss": 1.5818, "step": 9296 }, { "epoch": 3.26437587657784, "grad_norm": 10.108515739440918, "learning_rate": 3.743376967430264e-05, "loss": 1.7165, "step": 9310 }, { "epoch": 3.2692847124824684, "grad_norm": 9.9966402053833, "learning_rate": 3.7406498363721365e-05, "loss": 1.6736, "step": 9324 }, { "epoch": 3.274193548387097, "grad_norm": 13.809403419494629, "learning_rate": 3.73792270531401e-05, "loss": 1.7412, "step": 9338 }, { "epoch": 3.279102384291725, "grad_norm": 10.990723609924316, "learning_rate": 3.7351955742558834e-05, "loss": 1.6214, "step": 9352 }, { "epoch": 3.2840112201963536, "grad_norm": 14.38874626159668, "learning_rate": 3.732468443197756e-05, "loss": 1.7069, "step": 9366 }, { "epoch": 3.2889200561009817, "grad_norm": 9.18278980255127, "learning_rate": 3.729741312139629e-05, "loss": 1.6566, "step": 9380 }, { "epoch": 3.2938288920056102, "grad_norm": 15.143953323364258, "learning_rate": 3.7270141810815024e-05, "loss": 1.6204, "step": 9394 }, { "epoch": 3.2987377279102383, "grad_norm": 13.364217758178711, "learning_rate": 3.724287050023376e-05, "loss": 1.6892, "step": 9408 }, { "epoch": 3.303646563814867, "grad_norm": 12.723637580871582, "learning_rate": 3.721559918965249e-05, "loss": 1.6477, "step": 9422 }, { "epoch": 3.308555399719495, "grad_norm": 13.52658748626709, "learning_rate": 3.7188327879071215e-05, "loss": 1.7016, "step": 9436 }, { "epoch": 3.3134642356241235, "grad_norm": 10.67376708984375, "learning_rate": 3.716105656848995e-05, "loss": 1.7572, "step": 9450 }, { "epoch": 3.3183730715287516, "grad_norm": 10.454672813415527, "learning_rate": 3.7133785257908684e-05, "loss": 1.6264, "step": 9464 }, { "epoch": 3.32328190743338, "grad_norm": 17.63751792907715, "learning_rate": 3.710651394732741e-05, "loss": 1.734, "step": 9478 }, { "epoch": 3.3281907433380082, "grad_norm": 11.990534782409668, "learning_rate": 3.707924263674614e-05, "loss": 1.6428, "step": 9492 }, { "epoch": 3.333099579242637, "grad_norm": 8.512350082397461, "learning_rate": 3.7051971326164874e-05, "loss": 1.6868, "step": 9506 }, { "epoch": 3.3380084151472653, "grad_norm": 12.821173667907715, "learning_rate": 3.702470001558361e-05, "loss": 1.6398, "step": 9520 }, { "epoch": 3.3429172510518934, "grad_norm": 8.805124282836914, "learning_rate": 3.699742870500234e-05, "loss": 1.6628, "step": 9534 }, { "epoch": 3.3478260869565215, "grad_norm": 10.618570327758789, "learning_rate": 3.697015739442107e-05, "loss": 1.6155, "step": 9548 }, { "epoch": 3.35273492286115, "grad_norm": 10.196632385253906, "learning_rate": 3.69428860838398e-05, "loss": 1.6771, "step": 9562 }, { "epoch": 3.3576437587657786, "grad_norm": 10.084875106811523, "learning_rate": 3.6915614773258533e-05, "loss": 1.6743, "step": 9576 }, { "epoch": 3.3625525946704067, "grad_norm": 10.951545715332031, "learning_rate": 3.688834346267727e-05, "loss": 1.6499, "step": 9590 }, { "epoch": 3.367461430575035, "grad_norm": 10.110026359558105, "learning_rate": 3.6861072152095996e-05, "loss": 1.578, "step": 9604 }, { "epoch": 3.3723702664796633, "grad_norm": 11.486991882324219, "learning_rate": 3.6833800841514724e-05, "loss": 1.6853, "step": 9618 }, { "epoch": 3.377279102384292, "grad_norm": 10.194928169250488, "learning_rate": 3.680652953093346e-05, "loss": 1.6282, "step": 9632 }, { "epoch": 3.38218793828892, "grad_norm": 11.62670612335205, "learning_rate": 3.677925822035219e-05, "loss": 1.6656, "step": 9646 }, { "epoch": 3.3870967741935485, "grad_norm": 10.153413772583008, "learning_rate": 3.675198690977093e-05, "loss": 1.609, "step": 9660 }, { "epoch": 3.3920056100981766, "grad_norm": 10.1113862991333, "learning_rate": 3.672471559918965e-05, "loss": 1.6446, "step": 9674 }, { "epoch": 3.396914446002805, "grad_norm": 10.905536651611328, "learning_rate": 3.669744428860838e-05, "loss": 1.6172, "step": 9688 }, { "epoch": 3.401823281907433, "grad_norm": 10.753458976745605, "learning_rate": 3.667017297802712e-05, "loss": 1.6312, "step": 9702 }, { "epoch": 3.4067321178120618, "grad_norm": 16.529098510742188, "learning_rate": 3.664290166744585e-05, "loss": 1.6848, "step": 9716 }, { "epoch": 3.41164095371669, "grad_norm": 10.726247787475586, "learning_rate": 3.661563035686458e-05, "loss": 1.6212, "step": 9730 }, { "epoch": 3.4165497896213184, "grad_norm": 10.488245964050293, "learning_rate": 3.658835904628331e-05, "loss": 1.7289, "step": 9744 }, { "epoch": 3.421458625525947, "grad_norm": 10.276187896728516, "learning_rate": 3.656108773570204e-05, "loss": 1.655, "step": 9758 }, { "epoch": 3.426367461430575, "grad_norm": 10.538097381591797, "learning_rate": 3.653381642512078e-05, "loss": 1.6885, "step": 9772 }, { "epoch": 3.431276297335203, "grad_norm": 12.225607872009277, "learning_rate": 3.650654511453951e-05, "loss": 1.6674, "step": 9786 }, { "epoch": 3.4361851332398317, "grad_norm": 11.717621803283691, "learning_rate": 3.647927380395823e-05, "loss": 1.6485, "step": 9800 }, { "epoch": 3.44109396914446, "grad_norm": 10.804078102111816, "learning_rate": 3.645200249337697e-05, "loss": 1.7198, "step": 9814 }, { "epoch": 3.4460028050490883, "grad_norm": 6.7144856452941895, "learning_rate": 3.64247311827957e-05, "loss": 1.6644, "step": 9828 }, { "epoch": 3.450911640953717, "grad_norm": 9.771450996398926, "learning_rate": 3.639745987221444e-05, "loss": 1.6614, "step": 9842 }, { "epoch": 3.455820476858345, "grad_norm": 7.97376823425293, "learning_rate": 3.6370188561633165e-05, "loss": 1.5454, "step": 9856 }, { "epoch": 3.4607293127629735, "grad_norm": 10.677958488464355, "learning_rate": 3.634291725105189e-05, "loss": 1.6896, "step": 9870 }, { "epoch": 3.4656381486676016, "grad_norm": 13.293195724487305, "learning_rate": 3.631564594047063e-05, "loss": 1.6181, "step": 9884 }, { "epoch": 3.47054698457223, "grad_norm": 9.33857250213623, "learning_rate": 3.628837462988936e-05, "loss": 1.5823, "step": 9898 }, { "epoch": 3.475455820476858, "grad_norm": 11.867480278015137, "learning_rate": 3.626110331930809e-05, "loss": 1.6888, "step": 9912 }, { "epoch": 3.4803646563814867, "grad_norm": 7.777559757232666, "learning_rate": 3.623383200872682e-05, "loss": 1.6636, "step": 9926 }, { "epoch": 3.485273492286115, "grad_norm": 13.903548240661621, "learning_rate": 3.620656069814555e-05, "loss": 1.5291, "step": 9940 }, { "epoch": 3.4901823281907434, "grad_norm": 10.636628150939941, "learning_rate": 3.6179289387564286e-05, "loss": 1.6514, "step": 9954 }, { "epoch": 3.4950911640953715, "grad_norm": 9.113883018493652, "learning_rate": 3.6152018076983014e-05, "loss": 1.6735, "step": 9968 }, { "epoch": 3.5, "grad_norm": 12.960830688476562, "learning_rate": 3.612474676640175e-05, "loss": 1.5965, "step": 9982 }, { "epoch": 3.5049088359046285, "grad_norm": 9.549434661865234, "learning_rate": 3.609747545582048e-05, "loss": 1.6522, "step": 9996 }, { "epoch": 3.5098176718092566, "grad_norm": 10.647913932800293, "learning_rate": 3.607020414523921e-05, "loss": 1.5417, "step": 10010 }, { "epoch": 3.5147265077138847, "grad_norm": 11.607914924621582, "learning_rate": 3.6042932834657946e-05, "loss": 1.6105, "step": 10024 }, { "epoch": 3.5196353436185133, "grad_norm": 9.875362396240234, "learning_rate": 3.6015661524076674e-05, "loss": 1.5793, "step": 10038 }, { "epoch": 3.524544179523142, "grad_norm": 11.207001686096191, "learning_rate": 3.59883902134954e-05, "loss": 1.6997, "step": 10052 }, { "epoch": 3.52945301542777, "grad_norm": 12.96037483215332, "learning_rate": 3.5961118902914136e-05, "loss": 1.6713, "step": 10066 }, { "epoch": 3.5343618513323984, "grad_norm": 10.965265274047852, "learning_rate": 3.593384759233287e-05, "loss": 1.5143, "step": 10080 }, { "epoch": 3.5392706872370265, "grad_norm": 14.32422161102295, "learning_rate": 3.59065762817516e-05, "loss": 1.6562, "step": 10094 }, { "epoch": 3.544179523141655, "grad_norm": 9.368932723999023, "learning_rate": 3.587930497117033e-05, "loss": 1.6146, "step": 10108 }, { "epoch": 3.549088359046283, "grad_norm": 11.610596656799316, "learning_rate": 3.585203366058906e-05, "loss": 1.5722, "step": 10122 }, { "epoch": 3.5539971949509117, "grad_norm": 10.019697189331055, "learning_rate": 3.5824762350007796e-05, "loss": 1.7386, "step": 10136 }, { "epoch": 3.55890603085554, "grad_norm": 10.275087356567383, "learning_rate": 3.5797491039426524e-05, "loss": 1.6052, "step": 10150 }, { "epoch": 3.5638148667601683, "grad_norm": 10.79914379119873, "learning_rate": 3.577021972884526e-05, "loss": 1.6322, "step": 10164 }, { "epoch": 3.568723702664797, "grad_norm": 9.784767150878906, "learning_rate": 3.5742948418263986e-05, "loss": 1.5927, "step": 10178 }, { "epoch": 3.573632538569425, "grad_norm": 11.598213195800781, "learning_rate": 3.571567710768272e-05, "loss": 1.6066, "step": 10192 }, { "epoch": 3.578541374474053, "grad_norm": 10.225600242614746, "learning_rate": 3.568840579710145e-05, "loss": 1.6216, "step": 10206 }, { "epoch": 3.5834502103786816, "grad_norm": 11.653697967529297, "learning_rate": 3.566113448652018e-05, "loss": 1.6265, "step": 10220 }, { "epoch": 3.58835904628331, "grad_norm": 9.210939407348633, "learning_rate": 3.563386317593891e-05, "loss": 1.6402, "step": 10234 }, { "epoch": 3.5932678821879382, "grad_norm": 12.62119197845459, "learning_rate": 3.5606591865357645e-05, "loss": 1.6239, "step": 10248 }, { "epoch": 3.598176718092567, "grad_norm": 9.469446182250977, "learning_rate": 3.557932055477638e-05, "loss": 1.5262, "step": 10262 }, { "epoch": 3.603085553997195, "grad_norm": 11.418021202087402, "learning_rate": 3.555204924419511e-05, "loss": 1.6152, "step": 10276 }, { "epoch": 3.6079943899018234, "grad_norm": 14.434630393981934, "learning_rate": 3.552477793361384e-05, "loss": 1.668, "step": 10290 }, { "epoch": 3.6129032258064515, "grad_norm": 9.804949760437012, "learning_rate": 3.549750662303257e-05, "loss": 1.5137, "step": 10304 }, { "epoch": 3.61781206171108, "grad_norm": 9.849825859069824, "learning_rate": 3.5470235312451305e-05, "loss": 1.628, "step": 10318 }, { "epoch": 3.622720897615708, "grad_norm": 9.4788818359375, "learning_rate": 3.544296400187003e-05, "loss": 1.529, "step": 10332 }, { "epoch": 3.6276297335203367, "grad_norm": 11.775032043457031, "learning_rate": 3.541569269128877e-05, "loss": 1.6187, "step": 10346 }, { "epoch": 3.632538569424965, "grad_norm": 11.226166725158691, "learning_rate": 3.5388421380707495e-05, "loss": 1.6199, "step": 10360 }, { "epoch": 3.6374474053295933, "grad_norm": 12.122457504272461, "learning_rate": 3.536115007012623e-05, "loss": 1.5231, "step": 10374 }, { "epoch": 3.6423562412342214, "grad_norm": 15.18635082244873, "learning_rate": 3.533387875954496e-05, "loss": 1.5551, "step": 10388 }, { "epoch": 3.64726507713885, "grad_norm": 10.09758186340332, "learning_rate": 3.530660744896369e-05, "loss": 1.6187, "step": 10402 }, { "epoch": 3.6521739130434785, "grad_norm": 11.849078178405762, "learning_rate": 3.527933613838243e-05, "loss": 1.6934, "step": 10416 }, { "epoch": 3.6570827489481066, "grad_norm": 11.756773948669434, "learning_rate": 3.5252064827801155e-05, "loss": 1.6421, "step": 10430 }, { "epoch": 3.6619915848527347, "grad_norm": 11.294310569763184, "learning_rate": 3.522479351721988e-05, "loss": 1.6379, "step": 10444 }, { "epoch": 3.666900420757363, "grad_norm": 10.484573364257812, "learning_rate": 3.519752220663862e-05, "loss": 1.577, "step": 10458 }, { "epoch": 3.6718092566619918, "grad_norm": 9.353947639465332, "learning_rate": 3.517025089605735e-05, "loss": 1.5436, "step": 10472 }, { "epoch": 3.67671809256662, "grad_norm": 8.169730186462402, "learning_rate": 3.514297958547608e-05, "loss": 1.653, "step": 10486 }, { "epoch": 3.6816269284712484, "grad_norm": 11.992076873779297, "learning_rate": 3.511570827489481e-05, "loss": 1.5671, "step": 10500 }, { "epoch": 3.6865357643758765, "grad_norm": 11.97618579864502, "learning_rate": 3.508843696431354e-05, "loss": 1.6109, "step": 10514 }, { "epoch": 3.691444600280505, "grad_norm": 8.204534530639648, "learning_rate": 3.5061165653732277e-05, "loss": 1.5241, "step": 10528 }, { "epoch": 3.696353436185133, "grad_norm": 9.087929725646973, "learning_rate": 3.503389434315101e-05, "loss": 1.6827, "step": 10542 }, { "epoch": 3.7012622720897617, "grad_norm": 11.816279411315918, "learning_rate": 3.500662303256974e-05, "loss": 1.6285, "step": 10556 }, { "epoch": 3.7061711079943898, "grad_norm": 11.24445629119873, "learning_rate": 3.497935172198847e-05, "loss": 1.6281, "step": 10570 }, { "epoch": 3.7110799438990183, "grad_norm": 11.288105010986328, "learning_rate": 3.49520804114072e-05, "loss": 1.5607, "step": 10584 }, { "epoch": 3.715988779803647, "grad_norm": 21.707548141479492, "learning_rate": 3.4924809100825936e-05, "loss": 1.5478, "step": 10598 }, { "epoch": 3.720897615708275, "grad_norm": 10.162115097045898, "learning_rate": 3.4897537790244664e-05, "loss": 1.6116, "step": 10612 }, { "epoch": 3.725806451612903, "grad_norm": 12.764215469360352, "learning_rate": 3.487026647966339e-05, "loss": 1.5426, "step": 10626 }, { "epoch": 3.7307152875175316, "grad_norm": 10.847417831420898, "learning_rate": 3.4842995169082126e-05, "loss": 1.6094, "step": 10640 }, { "epoch": 3.73562412342216, "grad_norm": 9.67869758605957, "learning_rate": 3.481572385850086e-05, "loss": 1.5822, "step": 10654 }, { "epoch": 3.740532959326788, "grad_norm": 12.551370620727539, "learning_rate": 3.4788452547919596e-05, "loss": 1.6487, "step": 10668 }, { "epoch": 3.7454417952314163, "grad_norm": 11.341785430908203, "learning_rate": 3.4761181237338317e-05, "loss": 1.5283, "step": 10682 }, { "epoch": 3.750350631136045, "grad_norm": 10.435938835144043, "learning_rate": 3.473390992675705e-05, "loss": 1.6446, "step": 10696 }, { "epoch": 3.7552594670406734, "grad_norm": 7.6258769035339355, "learning_rate": 3.4706638616175786e-05, "loss": 1.6037, "step": 10710 }, { "epoch": 3.7601683029453015, "grad_norm": 10.772723197937012, "learning_rate": 3.4681315256350324e-05, "loss": 1.5314, "step": 10724 }, { "epoch": 3.76507713884993, "grad_norm": 11.095947265625, "learning_rate": 3.465404394576905e-05, "loss": 1.6286, "step": 10738 }, { "epoch": 3.769985974754558, "grad_norm": 10.717830657958984, "learning_rate": 3.462677263518778e-05, "loss": 1.6376, "step": 10752 }, { "epoch": 3.7748948106591866, "grad_norm": 13.202998161315918, "learning_rate": 3.4599501324606514e-05, "loss": 1.6604, "step": 10766 }, { "epoch": 3.7798036465638147, "grad_norm": 8.416659355163574, "learning_rate": 3.457223001402525e-05, "loss": 1.6224, "step": 10780 }, { "epoch": 3.7847124824684433, "grad_norm": 9.9965181350708, "learning_rate": 3.4544958703443984e-05, "loss": 1.6527, "step": 10794 }, { "epoch": 3.7896213183730714, "grad_norm": 11.899386405944824, "learning_rate": 3.4517687392862705e-05, "loss": 1.5785, "step": 10808 }, { "epoch": 3.7945301542777, "grad_norm": 9.11591911315918, "learning_rate": 3.449041608228144e-05, "loss": 1.6836, "step": 10822 }, { "epoch": 3.7994389901823284, "grad_norm": 12.630671501159668, "learning_rate": 3.4463144771700174e-05, "loss": 1.6111, "step": 10836 }, { "epoch": 3.8043478260869565, "grad_norm": 8.317117691040039, "learning_rate": 3.443587346111891e-05, "loss": 1.5958, "step": 10850 }, { "epoch": 3.8092566619915846, "grad_norm": 8.361594200134277, "learning_rate": 3.4408602150537636e-05, "loss": 1.5765, "step": 10864 }, { "epoch": 3.814165497896213, "grad_norm": 10.488751411437988, "learning_rate": 3.4381330839956364e-05, "loss": 1.6111, "step": 10878 }, { "epoch": 3.8190743338008417, "grad_norm": 12.822625160217285, "learning_rate": 3.43540595293751e-05, "loss": 1.6268, "step": 10892 }, { "epoch": 3.82398316970547, "grad_norm": 12.413044929504395, "learning_rate": 3.432678821879383e-05, "loss": 1.6634, "step": 10906 }, { "epoch": 3.828892005610098, "grad_norm": 11.380762100219727, "learning_rate": 3.429951690821256e-05, "loss": 1.5552, "step": 10920 }, { "epoch": 3.8338008415147264, "grad_norm": 11.49669075012207, "learning_rate": 3.427224559763129e-05, "loss": 1.648, "step": 10934 }, { "epoch": 3.838709677419355, "grad_norm": 10.676758766174316, "learning_rate": 3.4244974287050024e-05, "loss": 1.6064, "step": 10948 }, { "epoch": 3.843618513323983, "grad_norm": 9.832487106323242, "learning_rate": 3.421770297646876e-05, "loss": 1.6396, "step": 10962 }, { "epoch": 3.8485273492286116, "grad_norm": 14.6599760055542, "learning_rate": 3.419043166588749e-05, "loss": 1.6097, "step": 10976 }, { "epoch": 3.8534361851332397, "grad_norm": 12.616104125976562, "learning_rate": 3.416316035530622e-05, "loss": 1.5887, "step": 10990 }, { "epoch": 3.8583450210378682, "grad_norm": 9.049616813659668, "learning_rate": 3.413588904472495e-05, "loss": 1.6461, "step": 11004 }, { "epoch": 3.8632538569424963, "grad_norm": 13.467806816101074, "learning_rate": 3.410861773414368e-05, "loss": 1.5514, "step": 11018 }, { "epoch": 3.868162692847125, "grad_norm": 9.450819969177246, "learning_rate": 3.408134642356242e-05, "loss": 1.5552, "step": 11032 }, { "epoch": 3.873071528751753, "grad_norm": 8.592188835144043, "learning_rate": 3.4054075112981145e-05, "loss": 1.5748, "step": 11046 }, { "epoch": 3.8779803646563815, "grad_norm": 19.697107315063477, "learning_rate": 3.402680380239987e-05, "loss": 1.7525, "step": 11060 }, { "epoch": 3.88288920056101, "grad_norm": 9.762519836425781, "learning_rate": 3.399953249181861e-05, "loss": 1.6178, "step": 11074 }, { "epoch": 3.887798036465638, "grad_norm": 11.345512390136719, "learning_rate": 3.397226118123734e-05, "loss": 1.5569, "step": 11088 }, { "epoch": 3.8927068723702662, "grad_norm": 9.798188209533691, "learning_rate": 3.394498987065607e-05, "loss": 1.5932, "step": 11102 }, { "epoch": 3.897615708274895, "grad_norm": 14.541221618652344, "learning_rate": 3.39177185600748e-05, "loss": 1.6176, "step": 11116 }, { "epoch": 3.9025245441795233, "grad_norm": 10.047798156738281, "learning_rate": 3.389044724949353e-05, "loss": 1.5472, "step": 11130 }, { "epoch": 3.9074333800841514, "grad_norm": 7.906284332275391, "learning_rate": 3.386317593891227e-05, "loss": 1.5109, "step": 11144 }, { "epoch": 3.91234221598878, "grad_norm": 11.65975284576416, "learning_rate": 3.3835904628331e-05, "loss": 1.5641, "step": 11158 }, { "epoch": 3.917251051893408, "grad_norm": 10.643242835998535, "learning_rate": 3.380863331774973e-05, "loss": 1.6417, "step": 11172 }, { "epoch": 3.9221598877980366, "grad_norm": 11.930948257446289, "learning_rate": 3.378136200716846e-05, "loss": 1.5819, "step": 11186 }, { "epoch": 3.9270687237026647, "grad_norm": 9.571308135986328, "learning_rate": 3.375409069658719e-05, "loss": 1.5726, "step": 11200 }, { "epoch": 3.9319775596072932, "grad_norm": 8.767221450805664, "learning_rate": 3.372681938600593e-05, "loss": 1.5627, "step": 11214 }, { "epoch": 3.9368863955119213, "grad_norm": 11.255206108093262, "learning_rate": 3.3699548075424655e-05, "loss": 1.5763, "step": 11228 }, { "epoch": 3.94179523141655, "grad_norm": 13.33690357208252, "learning_rate": 3.367227676484338e-05, "loss": 1.799, "step": 11242 }, { "epoch": 3.946704067321178, "grad_norm": 9.469399452209473, "learning_rate": 3.364500545426212e-05, "loss": 1.5736, "step": 11256 }, { "epoch": 3.9516129032258065, "grad_norm": 11.10346794128418, "learning_rate": 3.361773414368085e-05, "loss": 1.5963, "step": 11270 }, { "epoch": 3.9565217391304346, "grad_norm": 8.936464309692383, "learning_rate": 3.359046283309958e-05, "loss": 1.6026, "step": 11284 }, { "epoch": 3.961430575035063, "grad_norm": 11.139102935791016, "learning_rate": 3.3563191522518314e-05, "loss": 1.6834, "step": 11298 }, { "epoch": 3.9663394109396917, "grad_norm": 9.805230140686035, "learning_rate": 3.353592021193704e-05, "loss": 1.5601, "step": 11312 }, { "epoch": 3.9712482468443198, "grad_norm": 9.808457374572754, "learning_rate": 3.350864890135578e-05, "loss": 1.6442, "step": 11326 }, { "epoch": 3.976157082748948, "grad_norm": 11.996650695800781, "learning_rate": 3.3481377590774504e-05, "loss": 1.6668, "step": 11340 }, { "epoch": 3.9810659186535764, "grad_norm": 12.452975273132324, "learning_rate": 3.345410628019324e-05, "loss": 1.5418, "step": 11354 }, { "epoch": 3.985974754558205, "grad_norm": 10.335541725158691, "learning_rate": 3.342683496961197e-05, "loss": 1.5864, "step": 11368 }, { "epoch": 3.990883590462833, "grad_norm": 11.311763763427734, "learning_rate": 3.33995636590307e-05, "loss": 1.6637, "step": 11382 }, { "epoch": 3.9957924263674616, "grad_norm": 11.239322662353516, "learning_rate": 3.3372292348449436e-05, "loss": 1.5714, "step": 11396 }, { "epoch": 4.0, "eval_loss": 1.534949779510498, "eval_map": 0.098, "eval_map_50": 0.1464, "eval_map_75": 0.109, "eval_map_applique": 0.0008, "eval_map_bag, wallet": 0.082, "eval_map_bead": 0.0196, "eval_map_belt": 0.1047, "eval_map_bow": 0.0, "eval_map_buckle": 0.1233, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.0935, "eval_map_collar": 0.1775, "eval_map_dress": 0.4482, "eval_map_epaulette": 0.0054, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.1932, "eval_map_glove": 0.0135, "eval_map_hat": 0.1602, "eval_map_headband, head covering, hair accessory": 0.072, "eval_map_hood": 0.0398, "eval_map_jacket": 0.2535, "eval_map_jumpsuit": 0.0008, "eval_map_lapel": 0.0989, "eval_map_large": 0.0987, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.1146, "eval_map_neckline": 0.2521, "eval_map_pants": 0.3815, "eval_map_pocket": 0.0846, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0067, "eval_map_ruffle": 0.021, "eval_map_scarf": 0.0, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0517, "eval_map_shoe": 0.3701, "eval_map_shorts": 0.2401, "eval_map_skirt": 0.2899, "eval_map_sleeve": 0.2896, "eval_map_small": 0.0, "eval_map_sock": 0.0305, "eval_map_sweater": 0.0012, "eval_map_tassel": 0.0, "eval_map_tie": 0.0687, "eval_map_tights, stockings": 0.1653, "eval_map_top, t-shirt, sweatshirt": 0.1785, "eval_map_umbrella": 0.1246, "eval_map_vest": 0.0, "eval_map_watch": 0.0399, "eval_map_zipper": 0.0233, "eval_mar_1": 0.1648, "eval_mar_10": 0.3271, "eval_mar_100": 0.3334, "eval_mar_100_applique": 0.0115, "eval_mar_100_bag, wallet": 0.4958, "eval_mar_100_bead": 0.2523, "eval_mar_100_belt": 0.5732, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.3672, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.3272, "eval_mar_100_collar": 0.5747, "eval_mar_100_dress": 0.7959, "eval_mar_100_epaulette": 0.1571, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.6248, "eval_mar_100_glove": 0.0839, "eval_mar_100_hat": 0.5589, "eval_mar_100_headband, head covering, hair accessory": 0.4028, "eval_mar_100_hood": 0.1031, "eval_mar_100_jacket": 0.7407, "eval_mar_100_jumpsuit": 0.0476, "eval_mar_100_lapel": 0.5393, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7431, "eval_mar_100_pants": 0.7618, "eval_mar_100_pocket": 0.643, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.07, "eval_mar_100_ruffle": 0.1684, "eval_mar_100_scarf": 0.0, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.2545, "eval_mar_100_shoe": 0.7786, "eval_mar_100_shorts": 0.5821, "eval_mar_100_skirt": 0.679, "eval_mar_100_sleeve": 0.7001, "eval_mar_100_sock": 0.5976, "eval_mar_100_sweater": 0.0095, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.4333, "eval_mar_100_tights, stockings": 0.6434, "eval_mar_100_top, t-shirt, sweatshirt": 0.7036, "eval_mar_100_umbrella": 0.32, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.3988, "eval_mar_100_zipper": 0.1918, "eval_mar_large": 0.3363, "eval_mar_medium": 0.1848, "eval_mar_small": 0.0, "eval_runtime": 78.0431, "eval_samples_per_second": 14.838, "eval_steps_per_second": 0.474, "step": 11408 }, { "epoch": 4.00070126227209, "grad_norm": 9.66673755645752, "learning_rate": 3.3345021037868164e-05, "loss": 1.5513, "step": 11410 }, { "epoch": 4.005610098176718, "grad_norm": 11.523599624633789, "learning_rate": 3.33177497272869e-05, "loss": 1.5361, "step": 11424 }, { "epoch": 4.010518934081347, "grad_norm": 8.45030403137207, "learning_rate": 3.3290478416705626e-05, "loss": 1.5344, "step": 11438 }, { "epoch": 4.015427769985974, "grad_norm": 11.079434394836426, "learning_rate": 3.326320710612436e-05, "loss": 1.659, "step": 11452 }, { "epoch": 4.020336605890603, "grad_norm": 19.345903396606445, "learning_rate": 3.323593579554309e-05, "loss": 1.5821, "step": 11466 }, { "epoch": 4.0252454417952315, "grad_norm": 9.751747131347656, "learning_rate": 3.3208664484961823e-05, "loss": 1.5549, "step": 11480 }, { "epoch": 4.03015427769986, "grad_norm": 11.230302810668945, "learning_rate": 3.318139317438055e-05, "loss": 1.6495, "step": 11494 }, { "epoch": 4.035063113604488, "grad_norm": 12.477935791015625, "learning_rate": 3.3154121863799286e-05, "loss": 1.522, "step": 11508 }, { "epoch": 4.039971949509116, "grad_norm": 12.162940979003906, "learning_rate": 3.3126850553218014e-05, "loss": 1.4947, "step": 11522 }, { "epoch": 4.044880785413745, "grad_norm": 8.901485443115234, "learning_rate": 3.309957924263675e-05, "loss": 1.5949, "step": 11536 }, { "epoch": 4.049789621318373, "grad_norm": 10.766268730163574, "learning_rate": 3.307230793205548e-05, "loss": 1.628, "step": 11550 }, { "epoch": 4.054698457223002, "grad_norm": 8.613593101501465, "learning_rate": 3.304503662147421e-05, "loss": 1.5274, "step": 11564 }, { "epoch": 4.0596072931276295, "grad_norm": 14.793852806091309, "learning_rate": 3.301776531089294e-05, "loss": 1.5749, "step": 11578 }, { "epoch": 4.064516129032258, "grad_norm": 8.682899475097656, "learning_rate": 3.299049400031167e-05, "loss": 1.5156, "step": 11592 }, { "epoch": 4.0694249649368865, "grad_norm": 11.472668647766113, "learning_rate": 3.296322268973041e-05, "loss": 1.5752, "step": 11606 }, { "epoch": 4.074333800841515, "grad_norm": 10.45604419708252, "learning_rate": 3.2935951379149136e-05, "loss": 1.5281, "step": 11620 }, { "epoch": 4.079242636746143, "grad_norm": 10.968986511230469, "learning_rate": 3.290868006856787e-05, "loss": 1.4978, "step": 11634 }, { "epoch": 4.084151472650771, "grad_norm": 11.206563949584961, "learning_rate": 3.28814087579866e-05, "loss": 1.4893, "step": 11648 }, { "epoch": 4.0890603085554, "grad_norm": 10.0808687210083, "learning_rate": 3.285413744740533e-05, "loss": 1.5835, "step": 11662 }, { "epoch": 4.093969144460028, "grad_norm": 13.311516761779785, "learning_rate": 3.282686613682406e-05, "loss": 1.6009, "step": 11676 }, { "epoch": 4.098877980364656, "grad_norm": 9.528651237487793, "learning_rate": 3.2799594826242795e-05, "loss": 1.6841, "step": 11690 }, { "epoch": 4.1037868162692845, "grad_norm": 15.137151718139648, "learning_rate": 3.277232351566152e-05, "loss": 1.5736, "step": 11704 }, { "epoch": 4.108695652173913, "grad_norm": 7.964916229248047, "learning_rate": 3.274505220508026e-05, "loss": 1.6114, "step": 11718 }, { "epoch": 4.113604488078542, "grad_norm": 9.579951286315918, "learning_rate": 3.271778089449899e-05, "loss": 1.5567, "step": 11732 }, { "epoch": 4.118513323983169, "grad_norm": 13.57766342163086, "learning_rate": 3.269050958391772e-05, "loss": 1.5638, "step": 11746 }, { "epoch": 4.123422159887798, "grad_norm": 9.2661714553833, "learning_rate": 3.266323827333645e-05, "loss": 1.5609, "step": 11760 }, { "epoch": 4.128330995792426, "grad_norm": 10.55330753326416, "learning_rate": 3.263596696275518e-05, "loss": 1.5509, "step": 11774 }, { "epoch": 4.133239831697055, "grad_norm": 10.576773643493652, "learning_rate": 3.260869565217392e-05, "loss": 1.5516, "step": 11788 }, { "epoch": 4.138148667601683, "grad_norm": 11.580509185791016, "learning_rate": 3.2581424341592645e-05, "loss": 1.5027, "step": 11802 }, { "epoch": 4.143057503506311, "grad_norm": 10.741377830505371, "learning_rate": 3.255415303101137e-05, "loss": 1.5575, "step": 11816 }, { "epoch": 4.14796633941094, "grad_norm": 14.081369400024414, "learning_rate": 3.252688172043011e-05, "loss": 1.5773, "step": 11830 }, { "epoch": 4.152875175315568, "grad_norm": 11.597344398498535, "learning_rate": 3.249961040984884e-05, "loss": 1.6257, "step": 11844 }, { "epoch": 4.157784011220197, "grad_norm": 10.63237190246582, "learning_rate": 3.2472339099267576e-05, "loss": 1.5557, "step": 11858 }, { "epoch": 4.162692847124824, "grad_norm": 13.090503692626953, "learning_rate": 3.24450677886863e-05, "loss": 1.5732, "step": 11872 }, { "epoch": 4.167601683029453, "grad_norm": 9.729809761047363, "learning_rate": 3.241779647810503e-05, "loss": 1.5704, "step": 11886 }, { "epoch": 4.172510518934081, "grad_norm": 9.571310997009277, "learning_rate": 3.239052516752377e-05, "loss": 1.5718, "step": 11900 }, { "epoch": 4.17741935483871, "grad_norm": 12.394028663635254, "learning_rate": 3.23632538569425e-05, "loss": 1.4916, "step": 11914 }, { "epoch": 4.182328190743338, "grad_norm": 8.15948486328125, "learning_rate": 3.233598254636123e-05, "loss": 1.5675, "step": 11928 }, { "epoch": 4.187237026647966, "grad_norm": 9.521645545959473, "learning_rate": 3.230871123577996e-05, "loss": 1.5847, "step": 11942 }, { "epoch": 4.192145862552595, "grad_norm": 12.008919715881348, "learning_rate": 3.228143992519869e-05, "loss": 1.5235, "step": 11956 }, { "epoch": 4.197054698457223, "grad_norm": 9.02338981628418, "learning_rate": 3.2254168614617426e-05, "loss": 1.567, "step": 11970 }, { "epoch": 4.201963534361852, "grad_norm": 11.452201843261719, "learning_rate": 3.222689730403616e-05, "loss": 1.5276, "step": 11984 }, { "epoch": 4.206872370266479, "grad_norm": 8.785606384277344, "learning_rate": 3.219962599345488e-05, "loss": 1.617, "step": 11998 }, { "epoch": 4.211781206171108, "grad_norm": 9.739134788513184, "learning_rate": 3.2172354682873616e-05, "loss": 1.5641, "step": 12012 }, { "epoch": 4.2166900420757365, "grad_norm": 9.828926086425781, "learning_rate": 3.214508337229235e-05, "loss": 1.5228, "step": 12026 }, { "epoch": 4.221598877980365, "grad_norm": 10.775086402893066, "learning_rate": 3.2117812061711086e-05, "loss": 1.6412, "step": 12040 }, { "epoch": 4.226507713884993, "grad_norm": 10.516294479370117, "learning_rate": 3.2090540751129813e-05, "loss": 1.5475, "step": 12054 }, { "epoch": 4.231416549789621, "grad_norm": 12.287437438964844, "learning_rate": 3.206326944054854e-05, "loss": 1.6479, "step": 12068 }, { "epoch": 4.23632538569425, "grad_norm": 14.104717254638672, "learning_rate": 3.2035998129967276e-05, "loss": 1.496, "step": 12082 }, { "epoch": 4.241234221598878, "grad_norm": 8.50561809539795, "learning_rate": 3.200872681938601e-05, "loss": 1.5109, "step": 12096 }, { "epoch": 4.246143057503506, "grad_norm": 9.909367561340332, "learning_rate": 3.198145550880474e-05, "loss": 1.5176, "step": 12110 }, { "epoch": 4.2510518934081345, "grad_norm": 8.9078950881958, "learning_rate": 3.1954184198223466e-05, "loss": 1.5411, "step": 12124 }, { "epoch": 4.255960729312763, "grad_norm": 9.790164947509766, "learning_rate": 3.19269128876422e-05, "loss": 1.464, "step": 12138 }, { "epoch": 4.260869565217392, "grad_norm": 11.287303924560547, "learning_rate": 3.1899641577060935e-05, "loss": 1.6458, "step": 12152 }, { "epoch": 4.265778401122019, "grad_norm": 12.238380432128906, "learning_rate": 3.187237026647967e-05, "loss": 1.5749, "step": 12166 }, { "epoch": 4.270687237026648, "grad_norm": 11.297727584838867, "learning_rate": 3.18450989558984e-05, "loss": 1.561, "step": 12180 }, { "epoch": 4.275596072931276, "grad_norm": 10.53003978729248, "learning_rate": 3.1817827645317126e-05, "loss": 1.5815, "step": 12194 }, { "epoch": 4.280504908835905, "grad_norm": 10.161109924316406, "learning_rate": 3.179055633473586e-05, "loss": 1.5122, "step": 12208 }, { "epoch": 4.2854137447405325, "grad_norm": 9.58373737335205, "learning_rate": 3.1763285024154595e-05, "loss": 1.5711, "step": 12222 }, { "epoch": 4.290322580645161, "grad_norm": 11.285284996032715, "learning_rate": 3.173601371357332e-05, "loss": 1.543, "step": 12236 }, { "epoch": 4.29523141654979, "grad_norm": 13.326284408569336, "learning_rate": 3.170874240299205e-05, "loss": 1.5236, "step": 12250 }, { "epoch": 4.300140252454418, "grad_norm": 10.333477020263672, "learning_rate": 3.1681471092410785e-05, "loss": 1.5287, "step": 12264 }, { "epoch": 4.305049088359047, "grad_norm": 9.891762733459473, "learning_rate": 3.165419978182952e-05, "loss": 1.566, "step": 12278 }, { "epoch": 4.309957924263674, "grad_norm": 9.980710983276367, "learning_rate": 3.162692847124825e-05, "loss": 1.6196, "step": 12292 }, { "epoch": 4.314866760168303, "grad_norm": 9.005849838256836, "learning_rate": 3.159965716066698e-05, "loss": 1.5799, "step": 12306 }, { "epoch": 4.319775596072931, "grad_norm": 10.111163139343262, "learning_rate": 3.157238585008571e-05, "loss": 1.627, "step": 12320 }, { "epoch": 4.32468443197756, "grad_norm": 8.10124683380127, "learning_rate": 3.1545114539504445e-05, "loss": 1.5878, "step": 12334 }, { "epoch": 4.329593267882188, "grad_norm": 11.300957679748535, "learning_rate": 3.151784322892317e-05, "loss": 1.5336, "step": 12348 }, { "epoch": 4.334502103786816, "grad_norm": 10.956774711608887, "learning_rate": 3.149057191834191e-05, "loss": 1.5929, "step": 12362 }, { "epoch": 4.339410939691445, "grad_norm": 11.961463928222656, "learning_rate": 3.1463300607760635e-05, "loss": 1.5406, "step": 12376 }, { "epoch": 4.344319775596073, "grad_norm": 9.761469841003418, "learning_rate": 3.143602929717937e-05, "loss": 1.5845, "step": 12390 }, { "epoch": 4.349228611500701, "grad_norm": 7.312012195587158, "learning_rate": 3.1408757986598104e-05, "loss": 1.5472, "step": 12404 }, { "epoch": 4.354137447405329, "grad_norm": 9.521291732788086, "learning_rate": 3.138148667601683e-05, "loss": 1.5054, "step": 12418 }, { "epoch": 4.359046283309958, "grad_norm": 9.15357780456543, "learning_rate": 3.135421536543556e-05, "loss": 1.5493, "step": 12432 }, { "epoch": 4.3639551192145865, "grad_norm": 9.72464370727539, "learning_rate": 3.1326944054854294e-05, "loss": 1.5134, "step": 12446 }, { "epoch": 4.368863955119215, "grad_norm": 10.519390106201172, "learning_rate": 3.129967274427303e-05, "loss": 1.5775, "step": 12460 }, { "epoch": 4.373772791023843, "grad_norm": 8.410240173339844, "learning_rate": 3.127240143369176e-05, "loss": 1.5898, "step": 12474 }, { "epoch": 4.378681626928471, "grad_norm": 10.563374519348145, "learning_rate": 3.124513012311049e-05, "loss": 1.4616, "step": 12488 }, { "epoch": 4.3835904628331, "grad_norm": 11.19625186920166, "learning_rate": 3.121785881252922e-05, "loss": 1.5172, "step": 12502 }, { "epoch": 4.388499298737728, "grad_norm": 8.7938814163208, "learning_rate": 3.1190587501947954e-05, "loss": 1.5172, "step": 12516 }, { "epoch": 4.393408134642356, "grad_norm": 9.717074394226074, "learning_rate": 3.116331619136668e-05, "loss": 1.5354, "step": 12530 }, { "epoch": 4.3983169705469845, "grad_norm": 8.544587135314941, "learning_rate": 3.1136044880785416e-05, "loss": 1.5456, "step": 12544 }, { "epoch": 4.403225806451613, "grad_norm": 11.177563667297363, "learning_rate": 3.1108773570204144e-05, "loss": 1.5658, "step": 12558 }, { "epoch": 4.4081346423562415, "grad_norm": 9.176631927490234, "learning_rate": 3.108150225962288e-05, "loss": 1.6022, "step": 12572 }, { "epoch": 4.413043478260869, "grad_norm": 12.47849178314209, "learning_rate": 3.1054230949041607e-05, "loss": 1.6179, "step": 12586 }, { "epoch": 4.417952314165498, "grad_norm": 16.832164764404297, "learning_rate": 3.102695963846034e-05, "loss": 1.5703, "step": 12600 }, { "epoch": 4.422861150070126, "grad_norm": 11.572169303894043, "learning_rate": 3.0999688327879076e-05, "loss": 1.5747, "step": 12614 }, { "epoch": 4.427769985974755, "grad_norm": 10.980843544006348, "learning_rate": 3.0972417017297804e-05, "loss": 1.6733, "step": 12628 }, { "epoch": 4.432678821879383, "grad_norm": 11.77977180480957, "learning_rate": 3.094514570671654e-05, "loss": 1.5871, "step": 12642 }, { "epoch": 4.437587657784011, "grad_norm": 9.084946632385254, "learning_rate": 3.0917874396135266e-05, "loss": 1.5163, "step": 12656 }, { "epoch": 4.4424964936886395, "grad_norm": 10.340909957885742, "learning_rate": 3.0890603085554e-05, "loss": 1.5415, "step": 12670 }, { "epoch": 4.447405329593268, "grad_norm": 14.121663093566895, "learning_rate": 3.086333177497273e-05, "loss": 1.4926, "step": 12684 }, { "epoch": 4.452314165497897, "grad_norm": 14.766218185424805, "learning_rate": 3.083606046439146e-05, "loss": 1.4897, "step": 12698 }, { "epoch": 4.457223001402524, "grad_norm": 9.75975513458252, "learning_rate": 3.080878915381019e-05, "loss": 1.5554, "step": 12712 }, { "epoch": 4.462131837307153, "grad_norm": 7.265247344970703, "learning_rate": 3.0781517843228925e-05, "loss": 1.5224, "step": 12726 }, { "epoch": 4.467040673211781, "grad_norm": 9.95474910736084, "learning_rate": 3.075424653264766e-05, "loss": 1.5556, "step": 12740 }, { "epoch": 4.47194950911641, "grad_norm": 9.839644432067871, "learning_rate": 3.072697522206639e-05, "loss": 1.583, "step": 12754 }, { "epoch": 4.4768583450210375, "grad_norm": 13.647014617919922, "learning_rate": 3.0699703911485116e-05, "loss": 1.5627, "step": 12768 }, { "epoch": 4.481767180925666, "grad_norm": 11.288187980651855, "learning_rate": 3.067243260090385e-05, "loss": 1.5237, "step": 12782 }, { "epoch": 4.486676016830295, "grad_norm": 11.84894847869873, "learning_rate": 3.0645161290322585e-05, "loss": 1.5449, "step": 12796 }, { "epoch": 4.491584852734923, "grad_norm": 8.708813667297363, "learning_rate": 3.061788997974131e-05, "loss": 1.5504, "step": 12810 }, { "epoch": 4.496493688639551, "grad_norm": 12.55114459991455, "learning_rate": 3.059061866916004e-05, "loss": 1.5999, "step": 12824 }, { "epoch": 4.501402524544179, "grad_norm": 9.775962829589844, "learning_rate": 3.0563347358578775e-05, "loss": 1.465, "step": 12838 }, { "epoch": 4.506311360448808, "grad_norm": 9.088383674621582, "learning_rate": 3.053607604799751e-05, "loss": 1.4994, "step": 12852 }, { "epoch": 4.511220196353436, "grad_norm": 11.072678565979004, "learning_rate": 3.050880473741624e-05, "loss": 1.549, "step": 12866 }, { "epoch": 4.516129032258064, "grad_norm": 7.814243793487549, "learning_rate": 3.048153342683497e-05, "loss": 1.5301, "step": 12880 }, { "epoch": 4.521037868162693, "grad_norm": 11.055352210998535, "learning_rate": 3.04542621162537e-05, "loss": 1.4589, "step": 12894 }, { "epoch": 4.525946704067321, "grad_norm": 11.707395553588867, "learning_rate": 3.0426990805672435e-05, "loss": 1.5174, "step": 12908 }, { "epoch": 4.53085553997195, "grad_norm": 8.760632514953613, "learning_rate": 3.0399719495091166e-05, "loss": 1.6149, "step": 12922 }, { "epoch": 4.535764375876578, "grad_norm": 13.473443984985352, "learning_rate": 3.03724481845099e-05, "loss": 1.5536, "step": 12936 }, { "epoch": 4.540673211781206, "grad_norm": 12.472698211669922, "learning_rate": 3.034517687392863e-05, "loss": 1.5321, "step": 12950 }, { "epoch": 4.545582047685834, "grad_norm": 12.632447242736816, "learning_rate": 3.031790556334736e-05, "loss": 1.509, "step": 12964 }, { "epoch": 4.550490883590463, "grad_norm": 11.99154281616211, "learning_rate": 3.029063425276609e-05, "loss": 1.5516, "step": 12978 }, { "epoch": 4.5553997194950915, "grad_norm": 8.433908462524414, "learning_rate": 3.0263362942184825e-05, "loss": 1.5338, "step": 12992 }, { "epoch": 4.560308555399719, "grad_norm": 10.799421310424805, "learning_rate": 3.0236091631603553e-05, "loss": 1.6058, "step": 13006 }, { "epoch": 4.565217391304348, "grad_norm": 8.67467212677002, "learning_rate": 3.0208820321022284e-05, "loss": 1.5545, "step": 13020 }, { "epoch": 4.570126227208976, "grad_norm": 12.024298667907715, "learning_rate": 3.018154901044102e-05, "loss": 1.6409, "step": 13034 }, { "epoch": 4.575035063113605, "grad_norm": 10.116240501403809, "learning_rate": 3.015427769985975e-05, "loss": 1.5844, "step": 13048 }, { "epoch": 4.579943899018232, "grad_norm": 9.839113235473633, "learning_rate": 3.0127006389278478e-05, "loss": 1.5943, "step": 13062 }, { "epoch": 4.584852734922861, "grad_norm": 9.690649032592773, "learning_rate": 3.009973507869721e-05, "loss": 1.5196, "step": 13076 }, { "epoch": 4.5897615708274895, "grad_norm": 10.301356315612793, "learning_rate": 3.0072463768115944e-05, "loss": 1.5433, "step": 13090 }, { "epoch": 4.594670406732118, "grad_norm": 8.421154975891113, "learning_rate": 3.0045192457534675e-05, "loss": 1.5324, "step": 13104 }, { "epoch": 4.599579242636747, "grad_norm": 9.830830574035645, "learning_rate": 3.0017921146953403e-05, "loss": 1.5279, "step": 13118 }, { "epoch": 4.604488078541374, "grad_norm": 8.924742698669434, "learning_rate": 2.9990649836372138e-05, "loss": 1.5157, "step": 13132 }, { "epoch": 4.609396914446003, "grad_norm": 8.364477157592773, "learning_rate": 2.996337852579087e-05, "loss": 1.6128, "step": 13146 }, { "epoch": 4.614305750350631, "grad_norm": 9.776467323303223, "learning_rate": 2.9936107215209603e-05, "loss": 1.5764, "step": 13160 }, { "epoch": 4.61921458625526, "grad_norm": 9.461152076721191, "learning_rate": 2.9908835904628335e-05, "loss": 1.5747, "step": 13174 }, { "epoch": 4.6241234221598875, "grad_norm": 9.374897003173828, "learning_rate": 2.9881564594047062e-05, "loss": 1.5876, "step": 13188 }, { "epoch": 4.629032258064516, "grad_norm": 13.405077934265137, "learning_rate": 2.9854293283465794e-05, "loss": 1.499, "step": 13202 }, { "epoch": 4.6339410939691446, "grad_norm": 12.45617961883545, "learning_rate": 2.9827021972884528e-05, "loss": 1.5529, "step": 13216 }, { "epoch": 4.638849929873773, "grad_norm": 8.182879447937012, "learning_rate": 2.979975066230326e-05, "loss": 1.6036, "step": 13230 }, { "epoch": 4.643758765778401, "grad_norm": 9.265517234802246, "learning_rate": 2.9772479351721987e-05, "loss": 1.4718, "step": 13244 }, { "epoch": 4.648667601683029, "grad_norm": 12.694684028625488, "learning_rate": 2.9745208041140722e-05, "loss": 1.5238, "step": 13258 }, { "epoch": 4.653576437587658, "grad_norm": 7.661476135253906, "learning_rate": 2.9717936730559453e-05, "loss": 1.5171, "step": 13272 }, { "epoch": 4.658485273492286, "grad_norm": 13.192995071411133, "learning_rate": 2.9690665419978188e-05, "loss": 1.5152, "step": 13286 }, { "epoch": 4.663394109396915, "grad_norm": 11.829466819763184, "learning_rate": 2.9663394109396912e-05, "loss": 1.6312, "step": 13300 }, { "epoch": 4.6683029453015426, "grad_norm": 11.915106773376465, "learning_rate": 2.9636122798815647e-05, "loss": 1.4959, "step": 13314 }, { "epoch": 4.673211781206171, "grad_norm": 8.157515525817871, "learning_rate": 2.9608851488234378e-05, "loss": 1.5386, "step": 13328 }, { "epoch": 4.6781206171108, "grad_norm": 11.170829772949219, "learning_rate": 2.9581580177653113e-05, "loss": 1.5808, "step": 13342 }, { "epoch": 4.683029453015427, "grad_norm": 11.044084548950195, "learning_rate": 2.955430886707184e-05, "loss": 1.584, "step": 13356 }, { "epoch": 4.687938288920056, "grad_norm": 11.19999885559082, "learning_rate": 2.952703755649057e-05, "loss": 1.501, "step": 13370 }, { "epoch": 4.692847124824684, "grad_norm": 12.792724609375, "learning_rate": 2.9499766245909306e-05, "loss": 1.5371, "step": 13384 }, { "epoch": 4.697755960729313, "grad_norm": 9.709602355957031, "learning_rate": 2.9472494935328037e-05, "loss": 1.4738, "step": 13398 }, { "epoch": 4.702664796633941, "grad_norm": 10.702052116394043, "learning_rate": 2.9445223624746772e-05, "loss": 1.6133, "step": 13412 }, { "epoch": 4.707573632538569, "grad_norm": 10.1063871383667, "learning_rate": 2.9417952314165497e-05, "loss": 1.5978, "step": 13426 }, { "epoch": 4.712482468443198, "grad_norm": 8.928024291992188, "learning_rate": 2.939068100358423e-05, "loss": 1.5125, "step": 13440 }, { "epoch": 4.717391304347826, "grad_norm": 12.163514137268066, "learning_rate": 2.9363409693002962e-05, "loss": 1.5057, "step": 13454 }, { "epoch": 4.722300140252455, "grad_norm": 9.348257064819336, "learning_rate": 2.9336138382421697e-05, "loss": 1.6379, "step": 13468 }, { "epoch": 4.727208976157083, "grad_norm": 9.17818832397461, "learning_rate": 2.9308867071840425e-05, "loss": 1.5089, "step": 13482 }, { "epoch": 4.732117812061711, "grad_norm": 14.371675491333008, "learning_rate": 2.9281595761259156e-05, "loss": 1.4613, "step": 13496 }, { "epoch": 4.737026647966339, "grad_norm": 10.593215942382812, "learning_rate": 2.925432445067789e-05, "loss": 1.5341, "step": 13510 }, { "epoch": 4.741935483870968, "grad_norm": 7.972141265869141, "learning_rate": 2.9227053140096622e-05, "loss": 1.589, "step": 13524 }, { "epoch": 4.746844319775596, "grad_norm": 14.644682884216309, "learning_rate": 2.919978182951535e-05, "loss": 1.5096, "step": 13538 }, { "epoch": 4.751753155680224, "grad_norm": 10.660957336425781, "learning_rate": 2.917251051893408e-05, "loss": 1.6092, "step": 13552 }, { "epoch": 4.756661991584853, "grad_norm": 7.987213134765625, "learning_rate": 2.9145239208352815e-05, "loss": 1.5244, "step": 13566 }, { "epoch": 4.761570827489481, "grad_norm": 7.576722621917725, "learning_rate": 2.9117967897771547e-05, "loss": 1.576, "step": 13580 }, { "epoch": 4.76647966339411, "grad_norm": 10.570621490478516, "learning_rate": 2.9090696587190275e-05, "loss": 1.5475, "step": 13594 }, { "epoch": 4.771388499298737, "grad_norm": 10.171460151672363, "learning_rate": 2.906342527660901e-05, "loss": 1.4744, "step": 13608 }, { "epoch": 4.776297335203366, "grad_norm": 10.46237564086914, "learning_rate": 2.903615396602774e-05, "loss": 1.5122, "step": 13622 }, { "epoch": 4.7812061711079945, "grad_norm": 12.364640235900879, "learning_rate": 2.900888265544647e-05, "loss": 1.4554, "step": 13636 }, { "epoch": 4.786115007012623, "grad_norm": 12.390414237976074, "learning_rate": 2.89816113448652e-05, "loss": 1.5424, "step": 13650 }, { "epoch": 4.791023842917251, "grad_norm": 11.041661262512207, "learning_rate": 2.8954340034283934e-05, "loss": 1.526, "step": 13664 }, { "epoch": 4.795932678821879, "grad_norm": 9.604235649108887, "learning_rate": 2.8927068723702665e-05, "loss": 1.5869, "step": 13678 }, { "epoch": 4.800841514726508, "grad_norm": 9.546652793884277, "learning_rate": 2.88997974131214e-05, "loss": 1.5978, "step": 13692 }, { "epoch": 4.805750350631136, "grad_norm": 11.46382999420166, "learning_rate": 2.887252610254013e-05, "loss": 1.5546, "step": 13706 }, { "epoch": 4.810659186535764, "grad_norm": 8.589116096496582, "learning_rate": 2.884525479195886e-05, "loss": 1.4978, "step": 13720 }, { "epoch": 4.8155680224403925, "grad_norm": 12.266592979431152, "learning_rate": 2.881798348137759e-05, "loss": 1.4515, "step": 13734 }, { "epoch": 4.820476858345021, "grad_norm": 10.601984977722168, "learning_rate": 2.8790712170796325e-05, "loss": 1.5704, "step": 13748 }, { "epoch": 4.82538569424965, "grad_norm": 11.45940113067627, "learning_rate": 2.8763440860215056e-05, "loss": 1.4529, "step": 13762 }, { "epoch": 4.830294530154278, "grad_norm": 11.621453285217285, "learning_rate": 2.8736169549633784e-05, "loss": 1.6529, "step": 13776 }, { "epoch": 4.835203366058906, "grad_norm": 10.980024337768555, "learning_rate": 2.870889823905252e-05, "loss": 1.5039, "step": 13790 }, { "epoch": 4.840112201963534, "grad_norm": 13.213388442993164, "learning_rate": 2.868162692847125e-05, "loss": 1.5619, "step": 13804 }, { "epoch": 4.845021037868163, "grad_norm": 10.524654388427734, "learning_rate": 2.8654355617889984e-05, "loss": 1.6128, "step": 13818 }, { "epoch": 4.849929873772791, "grad_norm": 9.079891204833984, "learning_rate": 2.862903225806452e-05, "loss": 1.5569, "step": 13832 }, { "epoch": 4.854838709677419, "grad_norm": 13.493910789489746, "learning_rate": 2.8601760947483247e-05, "loss": 1.4837, "step": 13846 }, { "epoch": 4.859747545582048, "grad_norm": 10.3644380569458, "learning_rate": 2.857448963690198e-05, "loss": 1.5082, "step": 13860 }, { "epoch": 4.864656381486676, "grad_norm": 9.131660461425781, "learning_rate": 2.8547218326320713e-05, "loss": 1.4984, "step": 13874 }, { "epoch": 4.869565217391305, "grad_norm": 12.155920028686523, "learning_rate": 2.8519947015739444e-05, "loss": 1.5794, "step": 13888 }, { "epoch": 4.874474053295932, "grad_norm": 11.692342758178711, "learning_rate": 2.8492675705158172e-05, "loss": 1.4665, "step": 13902 }, { "epoch": 4.879382889200561, "grad_norm": 12.053450584411621, "learning_rate": 2.8465404394576906e-05, "loss": 1.5086, "step": 13916 }, { "epoch": 4.884291725105189, "grad_norm": 8.57055950164795, "learning_rate": 2.8438133083995638e-05, "loss": 1.536, "step": 13930 }, { "epoch": 4.889200561009818, "grad_norm": 10.6359224319458, "learning_rate": 2.8410861773414372e-05, "loss": 1.5335, "step": 13944 }, { "epoch": 4.8941093969144465, "grad_norm": 13.88723087310791, "learning_rate": 2.83835904628331e-05, "loss": 1.7277, "step": 13958 }, { "epoch": 4.899018232819074, "grad_norm": 8.180968284606934, "learning_rate": 2.835631915225183e-05, "loss": 1.5887, "step": 13972 }, { "epoch": 4.903927068723703, "grad_norm": 12.371262550354004, "learning_rate": 2.8329047841670562e-05, "loss": 1.6233, "step": 13986 }, { "epoch": 4.908835904628331, "grad_norm": 8.73080825805664, "learning_rate": 2.8301776531089297e-05, "loss": 1.5177, "step": 14000 }, { "epoch": 4.913744740532959, "grad_norm": 13.652143478393555, "learning_rate": 2.8274505220508028e-05, "loss": 1.5457, "step": 14014 }, { "epoch": 4.918653576437587, "grad_norm": 11.52315902709961, "learning_rate": 2.8247233909926756e-05, "loss": 1.5875, "step": 14028 }, { "epoch": 4.923562412342216, "grad_norm": 10.014960289001465, "learning_rate": 2.821996259934549e-05, "loss": 1.5788, "step": 14042 }, { "epoch": 4.9284712482468445, "grad_norm": 14.16897201538086, "learning_rate": 2.8192691288764222e-05, "loss": 1.4673, "step": 14056 }, { "epoch": 4.933380084151473, "grad_norm": 11.262075424194336, "learning_rate": 2.8165419978182957e-05, "loss": 1.4806, "step": 14070 }, { "epoch": 4.938288920056101, "grad_norm": 8.350088119506836, "learning_rate": 2.813814866760168e-05, "loss": 1.5829, "step": 14084 }, { "epoch": 4.943197755960729, "grad_norm": 12.026558876037598, "learning_rate": 2.8110877357020416e-05, "loss": 1.5139, "step": 14098 }, { "epoch": 4.948106591865358, "grad_norm": 11.063884735107422, "learning_rate": 2.8083606046439147e-05, "loss": 1.4916, "step": 14112 }, { "epoch": 4.953015427769986, "grad_norm": 9.393431663513184, "learning_rate": 2.805633473585788e-05, "loss": 1.489, "step": 14126 }, { "epoch": 4.957924263674614, "grad_norm": 11.962431907653809, "learning_rate": 2.802906342527661e-05, "loss": 1.5775, "step": 14140 }, { "epoch": 4.9628330995792425, "grad_norm": 10.3517484664917, "learning_rate": 2.800179211469534e-05, "loss": 1.6995, "step": 14154 }, { "epoch": 4.967741935483871, "grad_norm": 8.509556770324707, "learning_rate": 2.7974520804114075e-05, "loss": 1.4432, "step": 14168 }, { "epoch": 4.9726507713884995, "grad_norm": 9.714330673217773, "learning_rate": 2.7947249493532806e-05, "loss": 1.5509, "step": 14182 }, { "epoch": 4.977559607293127, "grad_norm": 12.255983352661133, "learning_rate": 2.7919978182951534e-05, "loss": 1.555, "step": 14196 }, { "epoch": 4.982468443197756, "grad_norm": 14.505913734436035, "learning_rate": 2.7892706872370265e-05, "loss": 1.4361, "step": 14210 }, { "epoch": 4.987377279102384, "grad_norm": 8.551470756530762, "learning_rate": 2.7865435561789e-05, "loss": 1.4862, "step": 14224 }, { "epoch": 4.992286115007013, "grad_norm": 15.504724502563477, "learning_rate": 2.783816425120773e-05, "loss": 1.5222, "step": 14238 }, { "epoch": 4.997194950911641, "grad_norm": 10.080548286437988, "learning_rate": 2.781089294062646e-05, "loss": 1.5458, "step": 14252 }, { "epoch": 5.0, "eval_loss": 1.4677339792251587, "eval_map": 0.1058, "eval_map_50": 0.1566, "eval_map_75": 0.119, "eval_map_applique": 0.0013, "eval_map_bag, wallet": 0.0842, "eval_map_bead": 0.0234, "eval_map_belt": 0.0996, "eval_map_bow": 0.0, "eval_map_buckle": 0.1289, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.1341, "eval_map_collar": 0.1875, "eval_map_dress": 0.4263, "eval_map_epaulette": 0.0098, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.1645, "eval_map_glove": 0.0509, "eval_map_hat": 0.1734, "eval_map_headband, head covering, hair accessory": 0.0831, "eval_map_hood": 0.0617, "eval_map_jacket": 0.261, "eval_map_jumpsuit": 0.0183, "eval_map_lapel": 0.1203, "eval_map_large": 0.1064, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.0912, "eval_map_neckline": 0.2681, "eval_map_pants": 0.327, "eval_map_pocket": 0.0966, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.022, "eval_map_ruffle": 0.0383, "eval_map_scarf": 0.0136, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0458, "eval_map_shoe": 0.4103, "eval_map_shorts": 0.2252, "eval_map_skirt": 0.2521, "eval_map_sleeve": 0.3647, "eval_map_small": 0.0, "eval_map_sock": 0.0259, "eval_map_sweater": 0.0, "eval_map_tassel": 0.0, "eval_map_tie": 0.1869, "eval_map_tights, stockings": 0.17, "eval_map_top, t-shirt, sweatshirt": 0.1613, "eval_map_umbrella": 0.1364, "eval_map_vest": 0.0, "eval_map_watch": 0.06, "eval_map_zipper": 0.0326, "eval_mar_1": 0.1623, "eval_mar_10": 0.3593, "eval_mar_100": 0.367, "eval_mar_100_applique": 0.0361, "eval_mar_100_bag, wallet": 0.5315, "eval_mar_100_bead": 0.3308, "eval_mar_100_belt": 0.5884, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.4119, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.5194, "eval_mar_100_collar": 0.6336, "eval_mar_100_dress": 0.8276, "eval_mar_100_epaulette": 0.3214, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.586, "eval_mar_100_glove": 0.129, "eval_mar_100_hat": 0.5329, "eval_mar_100_headband, head covering, hair accessory": 0.4431, "eval_mar_100_hood": 0.1875, "eval_mar_100_jacket": 0.6747, "eval_mar_100_jumpsuit": 0.119, "eval_mar_100_lapel": 0.5711, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7575, "eval_mar_100_pants": 0.7758, "eval_mar_100_pocket": 0.6427, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.1807, "eval_mar_100_ruffle": 0.2276, "eval_mar_100_scarf": 0.0812, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.3257, "eval_mar_100_shoe": 0.7645, "eval_mar_100_shorts": 0.5915, "eval_mar_100_skirt": 0.7617, "eval_mar_100_sleeve": 0.7376, "eval_mar_100_sock": 0.4471, "eval_mar_100_sweater": 0.0, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.7333, "eval_mar_100_tights, stockings": 0.6672, "eval_mar_100_top, t-shirt, sweatshirt": 0.6941, "eval_mar_100_umbrella": 0.3, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.4373, "eval_mar_100_zipper": 0.3129, "eval_mar_large": 0.3699, "eval_mar_medium": 0.1712, "eval_mar_small": 0.0, "eval_runtime": 83.6403, "eval_samples_per_second": 13.845, "eval_steps_per_second": 0.442, "step": 14260 }, { "epoch": 5.002103786816269, "grad_norm": 10.828971862792969, "learning_rate": 2.7783621630045194e-05, "loss": 1.4744, "step": 14266 }, { "epoch": 5.0070126227208975, "grad_norm": 9.532293319702148, "learning_rate": 2.7756350319463925e-05, "loss": 1.5108, "step": 14280 }, { "epoch": 5.011921458625526, "grad_norm": 8.659916877746582, "learning_rate": 2.772907900888266e-05, "loss": 1.4402, "step": 14294 }, { "epoch": 5.016830294530155, "grad_norm": 9.1572904586792, "learning_rate": 2.770180769830139e-05, "loss": 1.5536, "step": 14308 }, { "epoch": 5.021739130434782, "grad_norm": 8.648621559143066, "learning_rate": 2.767453638772012e-05, "loss": 1.3834, "step": 14322 }, { "epoch": 5.026647966339411, "grad_norm": 10.386967658996582, "learning_rate": 2.764726507713885e-05, "loss": 1.4763, "step": 14336 }, { "epoch": 5.031556802244039, "grad_norm": 8.2344388961792, "learning_rate": 2.7619993766557584e-05, "loss": 1.5814, "step": 14350 }, { "epoch": 5.036465638148668, "grad_norm": 11.98327350616455, "learning_rate": 2.7592722455976316e-05, "loss": 1.5535, "step": 14364 }, { "epoch": 5.0413744740532955, "grad_norm": 10.29323673248291, "learning_rate": 2.7565451145395043e-05, "loss": 1.5063, "step": 14378 }, { "epoch": 5.046283309957924, "grad_norm": 9.296043395996094, "learning_rate": 2.7538179834813778e-05, "loss": 1.5084, "step": 14392 }, { "epoch": 5.051192145862553, "grad_norm": 7.4277777671813965, "learning_rate": 2.751090852423251e-05, "loss": 1.5107, "step": 14406 }, { "epoch": 5.056100981767181, "grad_norm": 14.324305534362793, "learning_rate": 2.7483637213651244e-05, "loss": 1.522, "step": 14420 }, { "epoch": 5.06100981767181, "grad_norm": 12.01891040802002, "learning_rate": 2.7456365903069968e-05, "loss": 1.4606, "step": 14434 }, { "epoch": 5.065918653576437, "grad_norm": 10.561046600341797, "learning_rate": 2.7429094592488703e-05, "loss": 1.4975, "step": 14448 }, { "epoch": 5.070827489481066, "grad_norm": 9.934788703918457, "learning_rate": 2.7401823281907434e-05, "loss": 1.5625, "step": 14462 }, { "epoch": 5.075736325385694, "grad_norm": 13.608302116394043, "learning_rate": 2.737455197132617e-05, "loss": 1.4304, "step": 14476 }, { "epoch": 5.080645161290323, "grad_norm": 12.651925086975098, "learning_rate": 2.7347280660744896e-05, "loss": 1.4775, "step": 14490 }, { "epoch": 5.085553997194951, "grad_norm": 11.472325325012207, "learning_rate": 2.7320009350163628e-05, "loss": 1.5164, "step": 14504 }, { "epoch": 5.090462833099579, "grad_norm": 10.306644439697266, "learning_rate": 2.7292738039582362e-05, "loss": 1.5059, "step": 14518 }, { "epoch": 5.095371669004208, "grad_norm": 10.602091789245605, "learning_rate": 2.7265466729001094e-05, "loss": 1.4864, "step": 14532 }, { "epoch": 5.100280504908836, "grad_norm": 9.467174530029297, "learning_rate": 2.7238195418419825e-05, "loss": 1.504, "step": 14546 }, { "epoch": 5.105189340813464, "grad_norm": 10.817330360412598, "learning_rate": 2.7210924107838553e-05, "loss": 1.4793, "step": 14560 }, { "epoch": 5.110098176718092, "grad_norm": 9.00490951538086, "learning_rate": 2.7183652797257287e-05, "loss": 1.4905, "step": 14574 }, { "epoch": 5.115007012622721, "grad_norm": 9.131601333618164, "learning_rate": 2.715638148667602e-05, "loss": 1.5179, "step": 14588 }, { "epoch": 5.1199158485273495, "grad_norm": 8.824382781982422, "learning_rate": 2.7129110176094753e-05, "loss": 1.5396, "step": 14602 }, { "epoch": 5.124824684431977, "grad_norm": 8.914623260498047, "learning_rate": 2.710183886551348e-05, "loss": 1.5252, "step": 14616 }, { "epoch": 5.129733520336606, "grad_norm": 9.91822338104248, "learning_rate": 2.7074567554932212e-05, "loss": 1.6081, "step": 14630 }, { "epoch": 5.134642356241234, "grad_norm": 9.212238311767578, "learning_rate": 2.7047296244350943e-05, "loss": 1.4528, "step": 14644 }, { "epoch": 5.139551192145863, "grad_norm": 10.962977409362793, "learning_rate": 2.7020024933769678e-05, "loss": 1.4926, "step": 14658 }, { "epoch": 5.144460028050491, "grad_norm": 12.485968589782715, "learning_rate": 2.6992753623188406e-05, "loss": 1.4373, "step": 14672 }, { "epoch": 5.149368863955119, "grad_norm": 9.201684951782227, "learning_rate": 2.6965482312607137e-05, "loss": 1.4185, "step": 14686 }, { "epoch": 5.1542776998597475, "grad_norm": 10.72849178314209, "learning_rate": 2.693821100202587e-05, "loss": 1.5352, "step": 14700 }, { "epoch": 5.159186535764376, "grad_norm": 11.774998664855957, "learning_rate": 2.6910939691444603e-05, "loss": 1.5494, "step": 14714 }, { "epoch": 5.164095371669005, "grad_norm": 9.737135887145996, "learning_rate": 2.688366838086333e-05, "loss": 1.5658, "step": 14728 }, { "epoch": 5.169004207573632, "grad_norm": 9.868380546569824, "learning_rate": 2.6856397070282062e-05, "loss": 1.4824, "step": 14742 }, { "epoch": 5.173913043478261, "grad_norm": 9.13532543182373, "learning_rate": 2.6829125759700796e-05, "loss": 1.5844, "step": 14756 }, { "epoch": 5.178821879382889, "grad_norm": 9.312422752380371, "learning_rate": 2.6801854449119528e-05, "loss": 1.4649, "step": 14770 }, { "epoch": 5.183730715287518, "grad_norm": 8.033080101013184, "learning_rate": 2.6774583138538262e-05, "loss": 1.5409, "step": 14784 }, { "epoch": 5.1886395511921455, "grad_norm": 9.6113862991333, "learning_rate": 2.674731182795699e-05, "loss": 1.5186, "step": 14798 }, { "epoch": 5.193548387096774, "grad_norm": 10.496145248413086, "learning_rate": 2.672004051737572e-05, "loss": 1.5037, "step": 14812 }, { "epoch": 5.198457223001403, "grad_norm": 10.66936206817627, "learning_rate": 2.6692769206794456e-05, "loss": 1.5237, "step": 14826 }, { "epoch": 5.203366058906031, "grad_norm": 11.149521827697754, "learning_rate": 2.6665497896213187e-05, "loss": 1.4858, "step": 14840 }, { "epoch": 5.208274894810659, "grad_norm": 9.701436042785645, "learning_rate": 2.6638226585631915e-05, "loss": 1.578, "step": 14854 }, { "epoch": 5.213183730715287, "grad_norm": 8.996268272399902, "learning_rate": 2.6610955275050646e-05, "loss": 1.5768, "step": 14868 }, { "epoch": 5.218092566619916, "grad_norm": 16.672197341918945, "learning_rate": 2.658368396446938e-05, "loss": 1.5509, "step": 14882 }, { "epoch": 5.223001402524544, "grad_norm": 7.819777488708496, "learning_rate": 2.6556412653888112e-05, "loss": 1.5148, "step": 14896 }, { "epoch": 5.227910238429173, "grad_norm": 9.684638023376465, "learning_rate": 2.652914134330684e-05, "loss": 1.48, "step": 14910 }, { "epoch": 5.232819074333801, "grad_norm": 11.993080139160156, "learning_rate": 2.6501870032725574e-05, "loss": 1.4851, "step": 14924 }, { "epoch": 5.237727910238429, "grad_norm": 13.42110824584961, "learning_rate": 2.6474598722144306e-05, "loss": 1.4354, "step": 14938 }, { "epoch": 5.242636746143058, "grad_norm": 9.174415588378906, "learning_rate": 2.644732741156304e-05, "loss": 1.4862, "step": 14952 }, { "epoch": 5.247545582047686, "grad_norm": 10.783615112304688, "learning_rate": 2.6420056100981765e-05, "loss": 1.4823, "step": 14966 }, { "epoch": 5.252454417952314, "grad_norm": 11.66201400756836, "learning_rate": 2.63927847904005e-05, "loss": 1.4038, "step": 14980 }, { "epoch": 5.257363253856942, "grad_norm": 11.767220497131348, "learning_rate": 2.636551347981923e-05, "loss": 1.4773, "step": 14994 }, { "epoch": 5.262272089761571, "grad_norm": 16.271791458129883, "learning_rate": 2.6338242169237965e-05, "loss": 1.5842, "step": 15008 }, { "epoch": 5.267180925666199, "grad_norm": 9.433297157287598, "learning_rate": 2.6310970858656693e-05, "loss": 1.4596, "step": 15022 }, { "epoch": 5.272089761570827, "grad_norm": 10.042821884155273, "learning_rate": 2.6283699548075424e-05, "loss": 1.4302, "step": 15036 }, { "epoch": 5.276998597475456, "grad_norm": 15.53907299041748, "learning_rate": 2.625642823749416e-05, "loss": 1.4717, "step": 15050 }, { "epoch": 5.281907433380084, "grad_norm": 12.140774726867676, "learning_rate": 2.622915692691289e-05, "loss": 1.4309, "step": 15064 }, { "epoch": 5.286816269284713, "grad_norm": 10.222538948059082, "learning_rate": 2.6201885616331625e-05, "loss": 1.5349, "step": 15078 }, { "epoch": 5.291725105189341, "grad_norm": 10.042827606201172, "learning_rate": 2.617461430575035e-05, "loss": 1.4703, "step": 15092 }, { "epoch": 5.296633941093969, "grad_norm": 10.486383438110352, "learning_rate": 2.6147342995169084e-05, "loss": 1.4538, "step": 15106 }, { "epoch": 5.301542776998597, "grad_norm": 12.241567611694336, "learning_rate": 2.6120071684587815e-05, "loss": 1.4357, "step": 15120 }, { "epoch": 5.306451612903226, "grad_norm": 10.87087345123291, "learning_rate": 2.609280037400655e-05, "loss": 1.4023, "step": 15134 }, { "epoch": 5.3113604488078545, "grad_norm": 9.050533294677734, "learning_rate": 2.6065529063425277e-05, "loss": 1.527, "step": 15148 }, { "epoch": 5.316269284712482, "grad_norm": 7.801620960235596, "learning_rate": 2.603825775284401e-05, "loss": 1.4587, "step": 15162 }, { "epoch": 5.321178120617111, "grad_norm": 9.803585052490234, "learning_rate": 2.6010986442262743e-05, "loss": 1.5177, "step": 15176 }, { "epoch": 5.326086956521739, "grad_norm": 11.899490356445312, "learning_rate": 2.5983715131681474e-05, "loss": 1.4434, "step": 15190 }, { "epoch": 5.330995792426368, "grad_norm": 13.519216537475586, "learning_rate": 2.5956443821100202e-05, "loss": 1.4169, "step": 15204 }, { "epoch": 5.335904628330995, "grad_norm": 9.961104393005371, "learning_rate": 2.5929172510518933e-05, "loss": 1.4271, "step": 15218 }, { "epoch": 5.340813464235624, "grad_norm": 7.657628536224365, "learning_rate": 2.5901901199937668e-05, "loss": 1.5091, "step": 15232 }, { "epoch": 5.3457223001402525, "grad_norm": 10.430190086364746, "learning_rate": 2.58746298893564e-05, "loss": 1.5617, "step": 15246 }, { "epoch": 5.350631136044881, "grad_norm": 10.046984672546387, "learning_rate": 2.5847358578775127e-05, "loss": 1.4256, "step": 15260 }, { "epoch": 5.355539971949509, "grad_norm": 12.484925270080566, "learning_rate": 2.582008726819386e-05, "loss": 1.5074, "step": 15274 }, { "epoch": 5.360448807854137, "grad_norm": 10.967076301574707, "learning_rate": 2.5792815957612593e-05, "loss": 1.544, "step": 15288 }, { "epoch": 5.365357643758766, "grad_norm": 8.79193115234375, "learning_rate": 2.5765544647031324e-05, "loss": 1.5107, "step": 15302 }, { "epoch": 5.370266479663394, "grad_norm": 11.14491081237793, "learning_rate": 2.573827333645006e-05, "loss": 1.4043, "step": 15316 }, { "epoch": 5.375175315568022, "grad_norm": 9.866151809692383, "learning_rate": 2.5711002025868786e-05, "loss": 1.5032, "step": 15330 }, { "epoch": 5.3800841514726505, "grad_norm": 16.183712005615234, "learning_rate": 2.5683730715287518e-05, "loss": 1.4872, "step": 15344 }, { "epoch": 5.384992987377279, "grad_norm": 15.064723014831543, "learning_rate": 2.5656459404706252e-05, "loss": 1.4751, "step": 15358 }, { "epoch": 5.389901823281908, "grad_norm": 7.717487335205078, "learning_rate": 2.5629188094124984e-05, "loss": 1.4615, "step": 15372 }, { "epoch": 5.394810659186536, "grad_norm": 8.270702362060547, "learning_rate": 2.560191678354371e-05, "loss": 1.5353, "step": 15386 }, { "epoch": 5.399719495091164, "grad_norm": 10.126509666442871, "learning_rate": 2.5574645472962443e-05, "loss": 1.4475, "step": 15400 }, { "epoch": 5.404628330995792, "grad_norm": 13.742813110351562, "learning_rate": 2.5547374162381177e-05, "loss": 1.4583, "step": 15414 }, { "epoch": 5.409537166900421, "grad_norm": 8.14853286743164, "learning_rate": 2.552010285179991e-05, "loss": 1.4227, "step": 15428 }, { "epoch": 5.414446002805049, "grad_norm": 9.957688331604004, "learning_rate": 2.5492831541218636e-05, "loss": 1.4622, "step": 15442 }, { "epoch": 5.419354838709677, "grad_norm": 10.143245697021484, "learning_rate": 2.546556023063737e-05, "loss": 1.5529, "step": 15456 }, { "epoch": 5.424263674614306, "grad_norm": 8.753482818603516, "learning_rate": 2.5438288920056102e-05, "loss": 1.5052, "step": 15470 }, { "epoch": 5.429172510518934, "grad_norm": 9.410078048706055, "learning_rate": 2.5411017609474837e-05, "loss": 1.4393, "step": 15484 }, { "epoch": 5.434081346423563, "grad_norm": 8.715230941772461, "learning_rate": 2.538374629889356e-05, "loss": 1.4792, "step": 15498 }, { "epoch": 5.43899018232819, "grad_norm": 10.485312461853027, "learning_rate": 2.5356474988312296e-05, "loss": 1.5115, "step": 15512 }, { "epoch": 5.443899018232819, "grad_norm": 13.775911331176758, "learning_rate": 2.5329203677731027e-05, "loss": 1.4603, "step": 15526 }, { "epoch": 5.448807854137447, "grad_norm": 8.108677864074707, "learning_rate": 2.530193236714976e-05, "loss": 1.4354, "step": 15540 }, { "epoch": 5.453716690042076, "grad_norm": 8.821089744567871, "learning_rate": 2.5274661056568493e-05, "loss": 1.4819, "step": 15554 }, { "epoch": 5.4586255259467045, "grad_norm": 10.816542625427246, "learning_rate": 2.524738974598722e-05, "loss": 1.451, "step": 15568 }, { "epoch": 5.463534361851332, "grad_norm": 9.48865795135498, "learning_rate": 2.5220118435405955e-05, "loss": 1.3926, "step": 15582 }, { "epoch": 5.468443197755961, "grad_norm": 11.544801712036133, "learning_rate": 2.5192847124824686e-05, "loss": 1.5436, "step": 15596 }, { "epoch": 5.473352033660589, "grad_norm": 10.305142402648926, "learning_rate": 2.516557581424342e-05, "loss": 1.5235, "step": 15610 }, { "epoch": 5.478260869565218, "grad_norm": 10.590760231018066, "learning_rate": 2.5138304503662145e-05, "loss": 1.4868, "step": 15624 }, { "epoch": 5.483169705469845, "grad_norm": 7.474534511566162, "learning_rate": 2.511103319308088e-05, "loss": 1.4991, "step": 15638 }, { "epoch": 5.488078541374474, "grad_norm": 10.573728561401367, "learning_rate": 2.508376188249961e-05, "loss": 1.4544, "step": 15652 }, { "epoch": 5.4929873772791025, "grad_norm": 8.621743202209473, "learning_rate": 2.5056490571918346e-05, "loss": 1.4626, "step": 15666 }, { "epoch": 5.497896213183731, "grad_norm": 11.079089164733887, "learning_rate": 2.5029219261337074e-05, "loss": 1.5114, "step": 15680 }, { "epoch": 5.502805049088359, "grad_norm": 11.437620162963867, "learning_rate": 2.5001947950755805e-05, "loss": 1.4471, "step": 15694 }, { "epoch": 5.507713884992987, "grad_norm": 8.199406623840332, "learning_rate": 2.497467664017454e-05, "loss": 1.5011, "step": 15708 }, { "epoch": 5.512622720897616, "grad_norm": 8.941916465759277, "learning_rate": 2.4947405329593267e-05, "loss": 1.4612, "step": 15722 }, { "epoch": 5.517531556802244, "grad_norm": 10.170859336853027, "learning_rate": 2.4920134019012002e-05, "loss": 1.4407, "step": 15736 }, { "epoch": 5.522440392706873, "grad_norm": 7.681944847106934, "learning_rate": 2.489286270843073e-05, "loss": 1.5101, "step": 15750 }, { "epoch": 5.5273492286115005, "grad_norm": 9.580062866210938, "learning_rate": 2.4865591397849464e-05, "loss": 1.5021, "step": 15764 }, { "epoch": 5.532258064516129, "grad_norm": 9.330092430114746, "learning_rate": 2.4838320087268192e-05, "loss": 1.6393, "step": 15778 }, { "epoch": 5.5371669004207575, "grad_norm": 11.519564628601074, "learning_rate": 2.4811048776686927e-05, "loss": 1.5028, "step": 15792 }, { "epoch": 5.542075736325386, "grad_norm": 7.591921329498291, "learning_rate": 2.4783777466105658e-05, "loss": 1.4116, "step": 15806 }, { "epoch": 5.546984572230014, "grad_norm": 9.51305866241455, "learning_rate": 2.475650615552439e-05, "loss": 1.4389, "step": 15820 }, { "epoch": 5.551893408134642, "grad_norm": 10.093965530395508, "learning_rate": 2.472923484494312e-05, "loss": 1.4938, "step": 15834 }, { "epoch": 5.556802244039271, "grad_norm": 9.198793411254883, "learning_rate": 2.470196353436185e-05, "loss": 1.5299, "step": 15848 }, { "epoch": 5.561711079943899, "grad_norm": 10.317605972290039, "learning_rate": 2.4674692223780586e-05, "loss": 1.6031, "step": 15862 }, { "epoch": 5.566619915848527, "grad_norm": 8.21995735168457, "learning_rate": 2.4647420913199314e-05, "loss": 1.528, "step": 15876 }, { "epoch": 5.5715287517531555, "grad_norm": 10.989445686340332, "learning_rate": 2.462014960261805e-05, "loss": 1.4995, "step": 15890 }, { "epoch": 5.576437587657784, "grad_norm": 9.458423614501953, "learning_rate": 2.4592878292036777e-05, "loss": 1.4271, "step": 15904 }, { "epoch": 5.581346423562413, "grad_norm": 9.122955322265625, "learning_rate": 2.456560698145551e-05, "loss": 1.4583, "step": 15918 }, { "epoch": 5.586255259467041, "grad_norm": 7.938518524169922, "learning_rate": 2.453833567087424e-05, "loss": 1.5624, "step": 15932 }, { "epoch": 5.591164095371669, "grad_norm": 12.863533020019531, "learning_rate": 2.4511064360292974e-05, "loss": 1.4486, "step": 15946 }, { "epoch": 5.596072931276297, "grad_norm": 9.980164527893066, "learning_rate": 2.4483793049711705e-05, "loss": 1.5473, "step": 15960 }, { "epoch": 5.600981767180926, "grad_norm": 9.826157569885254, "learning_rate": 2.4456521739130436e-05, "loss": 1.4489, "step": 15974 }, { "epoch": 5.6058906030855535, "grad_norm": 11.19827651977539, "learning_rate": 2.4429250428549167e-05, "loss": 1.5885, "step": 15988 }, { "epoch": 5.610799438990182, "grad_norm": 9.529542922973633, "learning_rate": 2.44019791179679e-05, "loss": 1.5176, "step": 16002 }, { "epoch": 5.615708274894811, "grad_norm": 11.656436920166016, "learning_rate": 2.437470780738663e-05, "loss": 1.5657, "step": 16016 }, { "epoch": 5.620617110799439, "grad_norm": 12.954453468322754, "learning_rate": 2.434743649680536e-05, "loss": 1.4307, "step": 16030 }, { "epoch": 5.625525946704068, "grad_norm": 12.220403671264648, "learning_rate": 2.4320165186224096e-05, "loss": 1.5013, "step": 16044 }, { "epoch": 5.630434782608695, "grad_norm": 10.568757057189941, "learning_rate": 2.4292893875642823e-05, "loss": 1.4618, "step": 16058 }, { "epoch": 5.635343618513324, "grad_norm": 8.148253440856934, "learning_rate": 2.4265622565061558e-05, "loss": 1.4693, "step": 16072 }, { "epoch": 5.640252454417952, "grad_norm": 13.97885799407959, "learning_rate": 2.423835125448029e-05, "loss": 1.5174, "step": 16086 }, { "epoch": 5.645161290322581, "grad_norm": 11.083066940307617, "learning_rate": 2.421107994389902e-05, "loss": 1.3656, "step": 16100 }, { "epoch": 5.650070126227209, "grad_norm": 11.526761054992676, "learning_rate": 2.418380863331775e-05, "loss": 1.523, "step": 16114 }, { "epoch": 5.654978962131837, "grad_norm": 10.015779495239258, "learning_rate": 2.4156537322736483e-05, "loss": 1.4344, "step": 16128 }, { "epoch": 5.659887798036466, "grad_norm": 7.6912031173706055, "learning_rate": 2.4129266012155214e-05, "loss": 1.5921, "step": 16142 }, { "epoch": 5.664796633941094, "grad_norm": 11.58763313293457, "learning_rate": 2.4101994701573945e-05, "loss": 1.4747, "step": 16156 }, { "epoch": 5.669705469845722, "grad_norm": 10.542034149169922, "learning_rate": 2.4074723390992676e-05, "loss": 1.5187, "step": 16170 }, { "epoch": 5.67461430575035, "grad_norm": 8.030844688415527, "learning_rate": 2.4047452080411408e-05, "loss": 1.5607, "step": 16184 }, { "epoch": 5.679523141654979, "grad_norm": 10.379592895507812, "learning_rate": 2.402018076983014e-05, "loss": 1.4669, "step": 16198 }, { "epoch": 5.6844319775596075, "grad_norm": 8.623067855834961, "learning_rate": 2.399290945924887e-05, "loss": 1.5181, "step": 16212 }, { "epoch": 5.689340813464236, "grad_norm": 8.488362312316895, "learning_rate": 2.39656381486676e-05, "loss": 1.4436, "step": 16226 }, { "epoch": 5.694249649368864, "grad_norm": 9.368444442749023, "learning_rate": 2.3938366838086336e-05, "loss": 1.427, "step": 16240 }, { "epoch": 5.699158485273492, "grad_norm": 10.04302978515625, "learning_rate": 2.3911095527505064e-05, "loss": 1.4921, "step": 16254 }, { "epoch": 5.704067321178121, "grad_norm": 12.247575759887695, "learning_rate": 2.38838242169238e-05, "loss": 1.4681, "step": 16268 }, { "epoch": 5.708976157082749, "grad_norm": 8.732044219970703, "learning_rate": 2.3856552906342526e-05, "loss": 1.5172, "step": 16282 }, { "epoch": 5.713884992987377, "grad_norm": 12.912535667419434, "learning_rate": 2.382928159576126e-05, "loss": 1.443, "step": 16296 }, { "epoch": 5.7187938288920055, "grad_norm": 10.172974586486816, "learning_rate": 2.3802010285179992e-05, "loss": 1.5209, "step": 16310 }, { "epoch": 5.723702664796634, "grad_norm": 11.503385543823242, "learning_rate": 2.3774738974598723e-05, "loss": 1.5319, "step": 16324 }, { "epoch": 5.728611500701263, "grad_norm": 9.782374382019043, "learning_rate": 2.3747467664017454e-05, "loss": 1.447, "step": 16338 }, { "epoch": 5.73352033660589, "grad_norm": 11.036943435668945, "learning_rate": 2.3720196353436186e-05, "loss": 1.4979, "step": 16352 }, { "epoch": 5.738429172510519, "grad_norm": 12.21195125579834, "learning_rate": 2.369292504285492e-05, "loss": 1.4681, "step": 16366 }, { "epoch": 5.743338008415147, "grad_norm": 10.80864429473877, "learning_rate": 2.3665653732273648e-05, "loss": 1.4596, "step": 16380 }, { "epoch": 5.748246844319776, "grad_norm": 11.83200454711914, "learning_rate": 2.3638382421692383e-05, "loss": 1.5069, "step": 16394 }, { "epoch": 5.753155680224404, "grad_norm": 10.44923210144043, "learning_rate": 2.361111111111111e-05, "loss": 1.5329, "step": 16408 }, { "epoch": 5.758064516129032, "grad_norm": 10.31904125213623, "learning_rate": 2.3583839800529845e-05, "loss": 1.4456, "step": 16422 }, { "epoch": 5.762973352033661, "grad_norm": 8.462638854980469, "learning_rate": 2.3556568489948573e-05, "loss": 1.4872, "step": 16436 }, { "epoch": 5.767882187938289, "grad_norm": 9.697592735290527, "learning_rate": 2.3529297179367308e-05, "loss": 1.4025, "step": 16450 }, { "epoch": 5.772791023842917, "grad_norm": 8.221132278442383, "learning_rate": 2.350202586878604e-05, "loss": 1.5065, "step": 16464 }, { "epoch": 5.777699859747545, "grad_norm": 8.630955696105957, "learning_rate": 2.347475455820477e-05, "loss": 1.4926, "step": 16478 }, { "epoch": 5.782608695652174, "grad_norm": 9.214800834655762, "learning_rate": 2.34474832476235e-05, "loss": 1.5374, "step": 16492 }, { "epoch": 5.787517531556802, "grad_norm": 9.669870376586914, "learning_rate": 2.3420211937042232e-05, "loss": 1.5387, "step": 16506 }, { "epoch": 5.792426367461431, "grad_norm": 9.121651649475098, "learning_rate": 2.3392940626460964e-05, "loss": 1.5354, "step": 16520 }, { "epoch": 5.797335203366059, "grad_norm": 9.68411636352539, "learning_rate": 2.3365669315879695e-05, "loss": 1.4694, "step": 16534 }, { "epoch": 5.802244039270687, "grad_norm": 13.968744277954102, "learning_rate": 2.3338398005298426e-05, "loss": 1.4728, "step": 16548 }, { "epoch": 5.807152875175316, "grad_norm": 10.822831153869629, "learning_rate": 2.3311126694717157e-05, "loss": 1.4569, "step": 16562 }, { "epoch": 5.812061711079944, "grad_norm": 8.317570686340332, "learning_rate": 2.3283855384135892e-05, "loss": 1.4527, "step": 16576 }, { "epoch": 5.816970546984573, "grad_norm": 11.460013389587402, "learning_rate": 2.325658407355462e-05, "loss": 1.5329, "step": 16590 }, { "epoch": 5.8218793828892, "grad_norm": 10.29436206817627, "learning_rate": 2.3229312762973354e-05, "loss": 1.4773, "step": 16604 }, { "epoch": 5.826788218793829, "grad_norm": 10.599599838256836, "learning_rate": 2.3202041452392086e-05, "loss": 1.4036, "step": 16618 }, { "epoch": 5.8316970546984574, "grad_norm": 11.653804779052734, "learning_rate": 2.3174770141810817e-05, "loss": 1.4704, "step": 16632 }, { "epoch": 5.836605890603085, "grad_norm": 10.494343757629395, "learning_rate": 2.3147498831229548e-05, "loss": 1.4493, "step": 16646 }, { "epoch": 5.841514726507714, "grad_norm": 8.908622741699219, "learning_rate": 2.312022752064828e-05, "loss": 1.5986, "step": 16660 }, { "epoch": 5.846423562412342, "grad_norm": 7.868162631988525, "learning_rate": 2.309295621006701e-05, "loss": 1.4784, "step": 16674 }, { "epoch": 5.851332398316971, "grad_norm": 7.682257175445557, "learning_rate": 2.306568489948574e-05, "loss": 1.4519, "step": 16688 }, { "epoch": 5.856241234221599, "grad_norm": 10.003222465515137, "learning_rate": 2.3038413588904473e-05, "loss": 1.4233, "step": 16702 }, { "epoch": 5.861150070126227, "grad_norm": 9.260310173034668, "learning_rate": 2.3011142278323204e-05, "loss": 1.4721, "step": 16716 }, { "epoch": 5.8660589060308554, "grad_norm": 10.833046913146973, "learning_rate": 2.2983870967741935e-05, "loss": 1.4897, "step": 16730 }, { "epoch": 5.870967741935484, "grad_norm": 9.674302101135254, "learning_rate": 2.295659965716067e-05, "loss": 1.53, "step": 16744 }, { "epoch": 5.8758765778401125, "grad_norm": 10.764018058776855, "learning_rate": 2.2929328346579398e-05, "loss": 1.5448, "step": 16758 }, { "epoch": 5.88078541374474, "grad_norm": 10.686898231506348, "learning_rate": 2.2902057035998132e-05, "loss": 1.4422, "step": 16772 }, { "epoch": 5.885694249649369, "grad_norm": 9.511950492858887, "learning_rate": 2.287478572541686e-05, "loss": 1.4426, "step": 16786 }, { "epoch": 5.890603085553997, "grad_norm": 11.369965553283691, "learning_rate": 2.2847514414835595e-05, "loss": 1.4351, "step": 16800 }, { "epoch": 5.895511921458626, "grad_norm": 7.908684730529785, "learning_rate": 2.2820243104254326e-05, "loss": 1.4913, "step": 16814 }, { "epoch": 5.900420757363253, "grad_norm": 14.781864166259766, "learning_rate": 2.2792971793673057e-05, "loss": 1.5549, "step": 16828 }, { "epoch": 5.905329593267882, "grad_norm": 10.581713676452637, "learning_rate": 2.276570048309179e-05, "loss": 1.4791, "step": 16842 }, { "epoch": 5.9102384291725105, "grad_norm": 8.692490577697754, "learning_rate": 2.2740377123266323e-05, "loss": 1.5094, "step": 16856 }, { "epoch": 5.915147265077139, "grad_norm": 10.264924049377441, "learning_rate": 2.2713105812685058e-05, "loss": 1.4278, "step": 16870 }, { "epoch": 5.920056100981768, "grad_norm": 11.44729995727539, "learning_rate": 2.2685834502103786e-05, "loss": 1.4886, "step": 16884 }, { "epoch": 5.924964936886395, "grad_norm": 10.96728515625, "learning_rate": 2.265856319152252e-05, "loss": 1.4673, "step": 16898 }, { "epoch": 5.929873772791024, "grad_norm": 11.541457176208496, "learning_rate": 2.263129188094125e-05, "loss": 1.5301, "step": 16912 }, { "epoch": 5.934782608695652, "grad_norm": 11.090242385864258, "learning_rate": 2.2604020570359983e-05, "loss": 1.4318, "step": 16926 }, { "epoch": 5.939691444600281, "grad_norm": 10.806449890136719, "learning_rate": 2.2576749259778714e-05, "loss": 1.5122, "step": 16940 }, { "epoch": 5.9446002805049085, "grad_norm": 11.456941604614258, "learning_rate": 2.2549477949197445e-05, "loss": 1.4435, "step": 16954 }, { "epoch": 5.949509116409537, "grad_norm": 9.093932151794434, "learning_rate": 2.2522206638616177e-05, "loss": 1.4695, "step": 16968 }, { "epoch": 5.954417952314166, "grad_norm": 10.029184341430664, "learning_rate": 2.2494935328034908e-05, "loss": 1.479, "step": 16982 }, { "epoch": 5.959326788218794, "grad_norm": 8.10474967956543, "learning_rate": 2.2467664017453642e-05, "loss": 1.4673, "step": 16996 }, { "epoch": 5.964235624123422, "grad_norm": 12.506030082702637, "learning_rate": 2.244039270687237e-05, "loss": 1.4734, "step": 17010 }, { "epoch": 5.96914446002805, "grad_norm": 10.493617057800293, "learning_rate": 2.2413121396291105e-05, "loss": 1.543, "step": 17024 }, { "epoch": 5.974053295932679, "grad_norm": 11.451977729797363, "learning_rate": 2.2385850085709833e-05, "loss": 1.4732, "step": 17038 }, { "epoch": 5.978962131837307, "grad_norm": 8.62108325958252, "learning_rate": 2.2358578775128567e-05, "loss": 1.4923, "step": 17052 }, { "epoch": 5.983870967741936, "grad_norm": 9.49013900756836, "learning_rate": 2.2331307464547295e-05, "loss": 1.5061, "step": 17066 }, { "epoch": 5.988779803646564, "grad_norm": 13.788419723510742, "learning_rate": 2.230403615396603e-05, "loss": 1.4015, "step": 17080 }, { "epoch": 5.993688639551192, "grad_norm": 11.859352111816406, "learning_rate": 2.227676484338476e-05, "loss": 1.4898, "step": 17094 }, { "epoch": 5.998597475455821, "grad_norm": 10.014341354370117, "learning_rate": 2.2249493532803492e-05, "loss": 1.5053, "step": 17108 }, { "epoch": 6.0, "eval_loss": 1.45059072971344, "eval_map": 0.1102, "eval_map_50": 0.1615, "eval_map_75": 0.1254, "eval_map_applique": 0.0009, "eval_map_bag, wallet": 0.1031, "eval_map_bead": 0.0224, "eval_map_belt": 0.1243, "eval_map_bow": 0.0, "eval_map_buckle": 0.1368, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.1438, "eval_map_collar": 0.1794, "eval_map_dress": 0.4295, "eval_map_epaulette": 0.0091, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.1776, "eval_map_glove": 0.0478, "eval_map_hat": 0.242, "eval_map_headband, head covering, hair accessory": 0.0808, "eval_map_hood": 0.0461, "eval_map_jacket": 0.2564, "eval_map_jumpsuit": 0.01, "eval_map_lapel": 0.1113, "eval_map_large": 0.1109, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.0574, "eval_map_neckline": 0.3274, "eval_map_pants": 0.3928, "eval_map_pocket": 0.0863, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0236, "eval_map_ruffle": 0.0356, "eval_map_scarf": 0.012, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0542, "eval_map_shoe": 0.3829, "eval_map_shorts": 0.2424, "eval_map_skirt": 0.2999, "eval_map_sleeve": 0.3115, "eval_map_small": 0.0, "eval_map_sock": 0.0448, "eval_map_sweater": 0.0, "eval_map_tassel": 0.0, "eval_map_tie": 0.0687, "eval_map_tights, stockings": 0.1913, "eval_map_top, t-shirt, sweatshirt": 0.1656, "eval_map_umbrella": 0.2314, "eval_map_vest": 0.0, "eval_map_watch": 0.039, "eval_map_zipper": 0.0403, "eval_mar_1": 0.1679, "eval_mar_10": 0.36, "eval_mar_100": 0.3681, "eval_mar_100_applique": 0.0262, "eval_mar_100_bag, wallet": 0.5338, "eval_mar_100_bead": 0.2748, "eval_mar_100_belt": 0.5909, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.4149, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.5078, "eval_mar_100_collar": 0.5802, "eval_mar_100_dress": 0.8238, "eval_mar_100_epaulette": 0.2214, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.6419, "eval_mar_100_glove": 0.1935, "eval_mar_100_hat": 0.6068, "eval_mar_100_headband, head covering, hair accessory": 0.4239, "eval_mar_100_hood": 0.1, "eval_mar_100_jacket": 0.7275, "eval_mar_100_jumpsuit": 0.1238, "eval_mar_100_lapel": 0.5496, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7677, "eval_mar_100_pants": 0.8067, "eval_mar_100_pocket": 0.6855, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.1686, "eval_mar_100_ruffle": 0.2539, "eval_mar_100_scarf": 0.0396, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.2832, "eval_mar_100_shoe": 0.7708, "eval_mar_100_shorts": 0.6349, "eval_mar_100_skirt": 0.7494, "eval_mar_100_sleeve": 0.7153, "eval_mar_100_sock": 0.6541, "eval_mar_100_sweater": 0.0, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.6, "eval_mar_100_tights, stockings": 0.7238, "eval_mar_100_top, t-shirt, sweatshirt": 0.7171, "eval_mar_100_umbrella": 0.32, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.3988, "eval_mar_100_zipper": 0.3026, "eval_mar_large": 0.3712, "eval_mar_medium": 0.138, "eval_mar_small": 0.0, "eval_runtime": 81.259, "eval_samples_per_second": 14.251, "eval_steps_per_second": 0.455, "step": 17112 }, { "epoch": 6.003506311360449, "grad_norm": 9.068500518798828, "learning_rate": 2.2222222222222223e-05, "loss": 1.5001, "step": 17122 }, { "epoch": 6.008415147265077, "grad_norm": 10.508646965026855, "learning_rate": 2.2194950911640955e-05, "loss": 1.436, "step": 17136 }, { "epoch": 6.013323983169705, "grad_norm": 12.8457612991333, "learning_rate": 2.2167679601059686e-05, "loss": 1.3901, "step": 17150 }, { "epoch": 6.018232819074334, "grad_norm": 8.916848182678223, "learning_rate": 2.2140408290478417e-05, "loss": 1.4724, "step": 17164 }, { "epoch": 6.0231416549789625, "grad_norm": 8.48674201965332, "learning_rate": 2.211313697989715e-05, "loss": 1.4419, "step": 17178 }, { "epoch": 6.02805049088359, "grad_norm": 8.519328117370605, "learning_rate": 2.208586566931588e-05, "loss": 1.5074, "step": 17192 }, { "epoch": 6.032959326788219, "grad_norm": 8.271635055541992, "learning_rate": 2.2058594358734614e-05, "loss": 1.4611, "step": 17206 }, { "epoch": 6.037868162692847, "grad_norm": 9.49409294128418, "learning_rate": 2.2031323048153342e-05, "loss": 1.4496, "step": 17220 }, { "epoch": 6.042776998597476, "grad_norm": 11.67951774597168, "learning_rate": 2.2004051737572076e-05, "loss": 1.454, "step": 17234 }, { "epoch": 6.047685834502103, "grad_norm": 9.643449783325195, "learning_rate": 2.1976780426990808e-05, "loss": 1.4218, "step": 17248 }, { "epoch": 6.052594670406732, "grad_norm": 10.617080688476562, "learning_rate": 2.194950911640954e-05, "loss": 1.471, "step": 17262 }, { "epoch": 6.0575035063113605, "grad_norm": 10.32384967803955, "learning_rate": 2.192223780582827e-05, "loss": 1.4281, "step": 17276 }, { "epoch": 6.062412342215989, "grad_norm": 9.527899742126465, "learning_rate": 2.1894966495247e-05, "loss": 1.4518, "step": 17290 }, { "epoch": 6.067321178120617, "grad_norm": 9.227794647216797, "learning_rate": 2.1867695184665733e-05, "loss": 1.5274, "step": 17304 }, { "epoch": 6.072230014025245, "grad_norm": 9.16999626159668, "learning_rate": 2.1840423874084464e-05, "loss": 1.4008, "step": 17318 }, { "epoch": 6.077138849929874, "grad_norm": 9.679044723510742, "learning_rate": 2.1813152563503195e-05, "loss": 1.4457, "step": 17332 }, { "epoch": 6.082047685834502, "grad_norm": 9.025115966796875, "learning_rate": 2.1785881252921926e-05, "loss": 1.5402, "step": 17346 }, { "epoch": 6.086956521739131, "grad_norm": 12.776460647583008, "learning_rate": 2.1758609942340657e-05, "loss": 1.5973, "step": 17360 }, { "epoch": 6.0918653576437585, "grad_norm": 11.206905364990234, "learning_rate": 2.1731338631759392e-05, "loss": 1.4833, "step": 17374 }, { "epoch": 6.096774193548387, "grad_norm": 11.683110237121582, "learning_rate": 2.170406732117812e-05, "loss": 1.4735, "step": 17388 }, { "epoch": 6.1016830294530155, "grad_norm": 8.331926345825195, "learning_rate": 2.1676796010596854e-05, "loss": 1.3819, "step": 17402 }, { "epoch": 6.106591865357644, "grad_norm": 9.959644317626953, "learning_rate": 2.1649524700015586e-05, "loss": 1.5054, "step": 17416 }, { "epoch": 6.111500701262272, "grad_norm": 8.732429504394531, "learning_rate": 2.1622253389434317e-05, "loss": 1.4482, "step": 17430 }, { "epoch": 6.1164095371669, "grad_norm": 9.3687162399292, "learning_rate": 2.1594982078853048e-05, "loss": 1.5057, "step": 17444 }, { "epoch": 6.121318373071529, "grad_norm": 9.055269241333008, "learning_rate": 2.156771076827178e-05, "loss": 1.5053, "step": 17458 }, { "epoch": 6.126227208976157, "grad_norm": 9.992002487182617, "learning_rate": 2.154043945769051e-05, "loss": 1.4731, "step": 17472 }, { "epoch": 6.131136044880785, "grad_norm": 8.384088516235352, "learning_rate": 2.1513168147109242e-05, "loss": 1.5886, "step": 17486 }, { "epoch": 6.1360448807854135, "grad_norm": 8.024591445922852, "learning_rate": 2.1485896836527973e-05, "loss": 1.439, "step": 17500 }, { "epoch": 6.140953716690042, "grad_norm": 10.275501251220703, "learning_rate": 2.1458625525946704e-05, "loss": 1.3289, "step": 17514 }, { "epoch": 6.145862552594671, "grad_norm": 11.699153900146484, "learning_rate": 2.143135421536544e-05, "loss": 1.4755, "step": 17528 }, { "epoch": 6.150771388499299, "grad_norm": 9.4712553024292, "learning_rate": 2.1404082904784167e-05, "loss": 1.4178, "step": 17542 }, { "epoch": 6.155680224403927, "grad_norm": 9.175274848937988, "learning_rate": 2.13768115942029e-05, "loss": 1.4685, "step": 17556 }, { "epoch": 6.160589060308555, "grad_norm": 12.330000877380371, "learning_rate": 2.134954028362163e-05, "loss": 1.4728, "step": 17570 }, { "epoch": 6.165497896213184, "grad_norm": 9.581226348876953, "learning_rate": 2.1322268973040364e-05, "loss": 1.4983, "step": 17584 }, { "epoch": 6.170406732117812, "grad_norm": 7.944814682006836, "learning_rate": 2.129499766245909e-05, "loss": 1.3899, "step": 17598 }, { "epoch": 6.17531556802244, "grad_norm": 11.616495132446289, "learning_rate": 2.1267726351877826e-05, "loss": 1.4415, "step": 17612 }, { "epoch": 6.180224403927069, "grad_norm": 11.340723991394043, "learning_rate": 2.1240455041296557e-05, "loss": 1.4155, "step": 17626 }, { "epoch": 6.185133239831697, "grad_norm": 9.289383888244629, "learning_rate": 2.121318373071529e-05, "loss": 1.4765, "step": 17640 }, { "epoch": 6.190042075736326, "grad_norm": 9.487038612365723, "learning_rate": 2.118591242013402e-05, "loss": 1.4051, "step": 17654 }, { "epoch": 6.194950911640953, "grad_norm": 10.028060913085938, "learning_rate": 2.115864110955275e-05, "loss": 1.5638, "step": 17668 }, { "epoch": 6.199859747545582, "grad_norm": 7.275920867919922, "learning_rate": 2.1131369798971486e-05, "loss": 1.4383, "step": 17682 }, { "epoch": 6.20476858345021, "grad_norm": 9.868939399719238, "learning_rate": 2.1104098488390213e-05, "loss": 1.4458, "step": 17696 }, { "epoch": 6.209677419354839, "grad_norm": 11.343465805053711, "learning_rate": 2.1076827177808948e-05, "loss": 1.4501, "step": 17710 }, { "epoch": 6.214586255259467, "grad_norm": 9.167390823364258, "learning_rate": 2.1049555867227676e-05, "loss": 1.5023, "step": 17724 }, { "epoch": 6.219495091164095, "grad_norm": 12.570178031921387, "learning_rate": 2.102228455664641e-05, "loss": 1.4735, "step": 17738 }, { "epoch": 6.224403927068724, "grad_norm": 10.759772300720215, "learning_rate": 2.099501324606514e-05, "loss": 1.5136, "step": 17752 }, { "epoch": 6.229312762973352, "grad_norm": 7.906188488006592, "learning_rate": 2.0967741935483873e-05, "loss": 1.4663, "step": 17766 }, { "epoch": 6.234221598877981, "grad_norm": 7.923150062561035, "learning_rate": 2.0940470624902604e-05, "loss": 1.4439, "step": 17780 }, { "epoch": 6.239130434782608, "grad_norm": 10.664134979248047, "learning_rate": 2.0913199314321335e-05, "loss": 1.4426, "step": 17794 }, { "epoch": 6.244039270687237, "grad_norm": 8.980731010437012, "learning_rate": 2.0885928003740066e-05, "loss": 1.4697, "step": 17808 }, { "epoch": 6.2489481065918655, "grad_norm": 8.363482475280762, "learning_rate": 2.0858656693158798e-05, "loss": 1.4151, "step": 17822 }, { "epoch": 6.253856942496494, "grad_norm": 8.338066101074219, "learning_rate": 2.083138538257753e-05, "loss": 1.5217, "step": 17836 }, { "epoch": 6.258765778401122, "grad_norm": 9.81488037109375, "learning_rate": 2.080411407199626e-05, "loss": 1.416, "step": 17850 }, { "epoch": 6.26367461430575, "grad_norm": 10.538592338562012, "learning_rate": 2.077684276141499e-05, "loss": 1.433, "step": 17864 }, { "epoch": 6.268583450210379, "grad_norm": 8.380131721496582, "learning_rate": 2.0749571450833723e-05, "loss": 1.4898, "step": 17878 }, { "epoch": 6.273492286115007, "grad_norm": 14.992820739746094, "learning_rate": 2.0722300140252454e-05, "loss": 1.4679, "step": 17892 }, { "epoch": 6.278401122019635, "grad_norm": 9.680002212524414, "learning_rate": 2.069502882967119e-05, "loss": 1.4649, "step": 17906 }, { "epoch": 6.2833099579242635, "grad_norm": 8.482555389404297, "learning_rate": 2.066775751908992e-05, "loss": 1.3762, "step": 17920 }, { "epoch": 6.288218793828892, "grad_norm": 14.837858200073242, "learning_rate": 2.064048620850865e-05, "loss": 1.4415, "step": 17934 }, { "epoch": 6.293127629733521, "grad_norm": 9.53703784942627, "learning_rate": 2.0613214897927382e-05, "loss": 1.4568, "step": 17948 }, { "epoch": 6.298036465638148, "grad_norm": 10.602538108825684, "learning_rate": 2.0585943587346113e-05, "loss": 1.4379, "step": 17962 }, { "epoch": 6.302945301542777, "grad_norm": 12.194435119628906, "learning_rate": 2.0558672276764844e-05, "loss": 1.4131, "step": 17976 }, { "epoch": 6.307854137447405, "grad_norm": 8.640816688537598, "learning_rate": 2.0531400966183576e-05, "loss": 1.4249, "step": 17990 }, { "epoch": 6.312762973352034, "grad_norm": 10.857731819152832, "learning_rate": 2.0504129655602307e-05, "loss": 1.4308, "step": 18004 }, { "epoch": 6.317671809256662, "grad_norm": 10.382655143737793, "learning_rate": 2.0476858345021038e-05, "loss": 1.4096, "step": 18018 }, { "epoch": 6.32258064516129, "grad_norm": 8.584638595581055, "learning_rate": 2.0449587034439773e-05, "loss": 1.3975, "step": 18032 }, { "epoch": 6.327489481065919, "grad_norm": 9.514897346496582, "learning_rate": 2.04223157238585e-05, "loss": 1.4138, "step": 18046 }, { "epoch": 6.332398316970547, "grad_norm": 10.798758506774902, "learning_rate": 2.0395044413277235e-05, "loss": 1.476, "step": 18060 }, { "epoch": 6.337307152875176, "grad_norm": 8.662817001342773, "learning_rate": 2.0367773102695963e-05, "loss": 1.4536, "step": 18074 }, { "epoch": 6.342215988779803, "grad_norm": 9.438653945922852, "learning_rate": 2.0340501792114698e-05, "loss": 1.5231, "step": 18088 }, { "epoch": 6.347124824684432, "grad_norm": 9.605496406555176, "learning_rate": 2.0313230481533425e-05, "loss": 1.4426, "step": 18102 }, { "epoch": 6.35203366058906, "grad_norm": 8.355499267578125, "learning_rate": 2.028595917095216e-05, "loss": 1.4016, "step": 18116 }, { "epoch": 6.356942496493689, "grad_norm": 9.466206550598145, "learning_rate": 2.025868786037089e-05, "loss": 1.5048, "step": 18130 }, { "epoch": 6.361851332398317, "grad_norm": 11.348767280578613, "learning_rate": 2.0231416549789622e-05, "loss": 1.4504, "step": 18144 }, { "epoch": 6.366760168302945, "grad_norm": 7.334563732147217, "learning_rate": 2.0204145239208354e-05, "loss": 1.4412, "step": 18158 }, { "epoch": 6.371669004207574, "grad_norm": 12.698456764221191, "learning_rate": 2.0176873928627085e-05, "loss": 1.4512, "step": 18172 }, { "epoch": 6.376577840112202, "grad_norm": 10.626336097717285, "learning_rate": 2.014960261804582e-05, "loss": 1.3714, "step": 18186 }, { "epoch": 6.381486676016831, "grad_norm": 10.079010963439941, "learning_rate": 2.0122331307464547e-05, "loss": 1.4234, "step": 18200 }, { "epoch": 6.386395511921458, "grad_norm": 9.41364574432373, "learning_rate": 2.0095059996883282e-05, "loss": 1.3886, "step": 18214 }, { "epoch": 6.391304347826087, "grad_norm": 15.254612922668457, "learning_rate": 2.006778868630201e-05, "loss": 1.475, "step": 18228 }, { "epoch": 6.3962131837307155, "grad_norm": 10.010102272033691, "learning_rate": 2.0040517375720744e-05, "loss": 1.4267, "step": 18242 }, { "epoch": 6.401122019635344, "grad_norm": 10.914276123046875, "learning_rate": 2.0013246065139472e-05, "loss": 1.4428, "step": 18256 }, { "epoch": 6.406030855539972, "grad_norm": 10.0631103515625, "learning_rate": 1.9985974754558207e-05, "loss": 1.4794, "step": 18270 }, { "epoch": 6.4109396914446, "grad_norm": 10.975937843322754, "learning_rate": 1.9958703443976938e-05, "loss": 1.4126, "step": 18284 }, { "epoch": 6.415848527349229, "grad_norm": 9.281163215637207, "learning_rate": 1.993143213339567e-05, "loss": 1.4962, "step": 18298 }, { "epoch": 6.420757363253857, "grad_norm": 7.56453800201416, "learning_rate": 1.99041608228144e-05, "loss": 1.4056, "step": 18312 }, { "epoch": 6.425666199158485, "grad_norm": 9.836796760559082, "learning_rate": 1.9876889512233132e-05, "loss": 1.4697, "step": 18326 }, { "epoch": 6.4305750350631135, "grad_norm": 7.9421491622924805, "learning_rate": 1.9849618201651863e-05, "loss": 1.4207, "step": 18340 }, { "epoch": 6.435483870967742, "grad_norm": 12.528155326843262, "learning_rate": 1.9822346891070594e-05, "loss": 1.4492, "step": 18354 }, { "epoch": 6.4403927068723705, "grad_norm": 9.686692237854004, "learning_rate": 1.9795075580489325e-05, "loss": 1.3886, "step": 18368 }, { "epoch": 6.445301542776998, "grad_norm": 9.666247367858887, "learning_rate": 1.9767804269908057e-05, "loss": 1.478, "step": 18382 }, { "epoch": 6.450210378681627, "grad_norm": 13.610347747802734, "learning_rate": 1.9740532959326788e-05, "loss": 1.4304, "step": 18396 }, { "epoch": 6.455119214586255, "grad_norm": 9.56430721282959, "learning_rate": 1.9713261648745522e-05, "loss": 1.4493, "step": 18410 }, { "epoch": 6.460028050490884, "grad_norm": 11.889124870300293, "learning_rate": 1.9685990338164254e-05, "loss": 1.4663, "step": 18424 }, { "epoch": 6.4649368863955115, "grad_norm": 17.291988372802734, "learning_rate": 1.9658719027582985e-05, "loss": 1.4206, "step": 18438 }, { "epoch": 6.46984572230014, "grad_norm": 10.754548072814941, "learning_rate": 1.9631447717001716e-05, "loss": 1.5272, "step": 18452 }, { "epoch": 6.4747545582047685, "grad_norm": 8.094221115112305, "learning_rate": 1.9604176406420447e-05, "loss": 1.4459, "step": 18466 }, { "epoch": 6.479663394109397, "grad_norm": 10.806952476501465, "learning_rate": 1.957690509583918e-05, "loss": 1.3938, "step": 18480 }, { "epoch": 6.484572230014026, "grad_norm": 10.832298278808594, "learning_rate": 1.954963378525791e-05, "loss": 1.5341, "step": 18494 }, { "epoch": 6.489481065918653, "grad_norm": 8.12965202331543, "learning_rate": 1.952236247467664e-05, "loss": 1.3957, "step": 18508 }, { "epoch": 6.494389901823282, "grad_norm": 11.571578025817871, "learning_rate": 1.9495091164095372e-05, "loss": 1.3961, "step": 18522 }, { "epoch": 6.49929873772791, "grad_norm": 12.90964412689209, "learning_rate": 1.9467819853514103e-05, "loss": 1.4552, "step": 18536 }, { "epoch": 6.504207573632539, "grad_norm": 11.175451278686523, "learning_rate": 1.9440548542932835e-05, "loss": 1.463, "step": 18550 }, { "epoch": 6.5091164095371665, "grad_norm": 9.53769588470459, "learning_rate": 1.941327723235157e-05, "loss": 1.3511, "step": 18564 }, { "epoch": 6.514025245441795, "grad_norm": 11.803421974182129, "learning_rate": 1.9386005921770297e-05, "loss": 1.4831, "step": 18578 }, { "epoch": 6.518934081346424, "grad_norm": 9.731769561767578, "learning_rate": 1.935873461118903e-05, "loss": 1.3691, "step": 18592 }, { "epoch": 6.523842917251052, "grad_norm": 9.23890495300293, "learning_rate": 1.933146330060776e-05, "loss": 1.3568, "step": 18606 }, { "epoch": 6.52875175315568, "grad_norm": 12.81054401397705, "learning_rate": 1.9304191990026494e-05, "loss": 1.4638, "step": 18620 }, { "epoch": 6.533660589060308, "grad_norm": 10.914559364318848, "learning_rate": 1.9276920679445222e-05, "loss": 1.4843, "step": 18634 }, { "epoch": 6.538569424964937, "grad_norm": 8.538981437683105, "learning_rate": 1.9249649368863956e-05, "loss": 1.4484, "step": 18648 }, { "epoch": 6.543478260869565, "grad_norm": 11.463729858398438, "learning_rate": 1.9222378058282688e-05, "loss": 1.5654, "step": 18662 }, { "epoch": 6.548387096774194, "grad_norm": 8.986562728881836, "learning_rate": 1.919510674770142e-05, "loss": 1.4082, "step": 18676 }, { "epoch": 6.553295932678822, "grad_norm": 8.854567527770996, "learning_rate": 1.916783543712015e-05, "loss": 1.3996, "step": 18690 }, { "epoch": 6.55820476858345, "grad_norm": 9.794734954833984, "learning_rate": 1.914056412653888e-05, "loss": 1.4698, "step": 18704 }, { "epoch": 6.563113604488079, "grad_norm": 10.789362907409668, "learning_rate": 1.9113292815957616e-05, "loss": 1.4389, "step": 18718 }, { "epoch": 6.568022440392707, "grad_norm": 10.824317932128906, "learning_rate": 1.9086021505376344e-05, "loss": 1.4296, "step": 18732 }, { "epoch": 6.572931276297335, "grad_norm": 9.935757637023926, "learning_rate": 1.905875019479508e-05, "loss": 1.4742, "step": 18746 }, { "epoch": 6.577840112201963, "grad_norm": 10.474770545959473, "learning_rate": 1.9031478884213806e-05, "loss": 1.476, "step": 18760 }, { "epoch": 6.582748948106592, "grad_norm": 11.693185806274414, "learning_rate": 1.900420757363254e-05, "loss": 1.415, "step": 18774 }, { "epoch": 6.5876577840112205, "grad_norm": 11.27559757232666, "learning_rate": 1.8976936263051272e-05, "loss": 1.3482, "step": 18788 }, { "epoch": 6.592566619915848, "grad_norm": 12.614275932312012, "learning_rate": 1.8949664952470003e-05, "loss": 1.4345, "step": 18802 }, { "epoch": 6.597475455820477, "grad_norm": 8.720357894897461, "learning_rate": 1.8922393641888734e-05, "loss": 1.4078, "step": 18816 }, { "epoch": 6.602384291725105, "grad_norm": 9.657590866088867, "learning_rate": 1.8895122331307466e-05, "loss": 1.436, "step": 18830 }, { "epoch": 6.607293127629734, "grad_norm": 9.984074592590332, "learning_rate": 1.8867851020726197e-05, "loss": 1.4343, "step": 18844 }, { "epoch": 6.612201963534362, "grad_norm": 10.282735824584961, "learning_rate": 1.8840579710144928e-05, "loss": 1.3668, "step": 18858 }, { "epoch": 6.61711079943899, "grad_norm": 7.510200023651123, "learning_rate": 1.881330839956366e-05, "loss": 1.3952, "step": 18872 }, { "epoch": 6.6220196353436185, "grad_norm": 9.796003341674805, "learning_rate": 1.878603708898239e-05, "loss": 1.4222, "step": 18886 }, { "epoch": 6.626928471248247, "grad_norm": 9.634902000427246, "learning_rate": 1.8758765778401122e-05, "loss": 1.49, "step": 18900 }, { "epoch": 6.631837307152876, "grad_norm": 9.81937313079834, "learning_rate": 1.8731494467819853e-05, "loss": 1.4783, "step": 18914 }, { "epoch": 6.636746143057503, "grad_norm": 10.803921699523926, "learning_rate": 1.8704223157238584e-05, "loss": 1.4002, "step": 18928 }, { "epoch": 6.641654978962132, "grad_norm": 7.29136323928833, "learning_rate": 1.867695184665732e-05, "loss": 1.464, "step": 18942 }, { "epoch": 6.64656381486676, "grad_norm": 10.382338523864746, "learning_rate": 1.864968053607605e-05, "loss": 1.499, "step": 18956 }, { "epoch": 6.651472650771389, "grad_norm": 7.957187175750732, "learning_rate": 1.862240922549478e-05, "loss": 1.4574, "step": 18970 }, { "epoch": 6.6563814866760165, "grad_norm": 15.209294319152832, "learning_rate": 1.8595137914913512e-05, "loss": 1.5408, "step": 18984 }, { "epoch": 6.661290322580645, "grad_norm": 8.682625770568848, "learning_rate": 1.8567866604332244e-05, "loss": 1.2956, "step": 18998 }, { "epoch": 6.666199158485274, "grad_norm": 9.993380546569824, "learning_rate": 1.8540595293750975e-05, "loss": 1.3775, "step": 19012 }, { "epoch": 6.671107994389902, "grad_norm": 8.9593505859375, "learning_rate": 1.8513323983169706e-05, "loss": 1.4391, "step": 19026 }, { "epoch": 6.676016830294531, "grad_norm": 10.684712409973145, "learning_rate": 1.8486052672588437e-05, "loss": 1.4368, "step": 19040 }, { "epoch": 6.680925666199158, "grad_norm": 13.64364242553711, "learning_rate": 1.845878136200717e-05, "loss": 1.4583, "step": 19054 }, { "epoch": 6.685834502103787, "grad_norm": 8.923884391784668, "learning_rate": 1.84315100514259e-05, "loss": 1.433, "step": 19068 }, { "epoch": 6.690743338008415, "grad_norm": 8.212026596069336, "learning_rate": 1.840423874084463e-05, "loss": 1.4046, "step": 19082 }, { "epoch": 6.695652173913043, "grad_norm": 13.0266695022583, "learning_rate": 1.8376967430263366e-05, "loss": 1.4203, "step": 19096 }, { "epoch": 6.7005610098176716, "grad_norm": 10.736886024475098, "learning_rate": 1.8349696119682093e-05, "loss": 1.3898, "step": 19110 }, { "epoch": 6.7054698457223, "grad_norm": 11.774788856506348, "learning_rate": 1.8322424809100828e-05, "loss": 1.4637, "step": 19124 }, { "epoch": 6.710378681626929, "grad_norm": 11.053827285766602, "learning_rate": 1.8295153498519556e-05, "loss": 1.4584, "step": 19138 }, { "epoch": 6.715287517531557, "grad_norm": 11.064839363098145, "learning_rate": 1.826788218793829e-05, "loss": 1.3717, "step": 19152 }, { "epoch": 6.720196353436185, "grad_norm": 10.942109107971191, "learning_rate": 1.824061087735702e-05, "loss": 1.4437, "step": 19166 }, { "epoch": 6.725105189340813, "grad_norm": 8.267911911010742, "learning_rate": 1.8213339566775753e-05, "loss": 1.4315, "step": 19180 }, { "epoch": 6.730014025245442, "grad_norm": 12.76940631866455, "learning_rate": 1.8186068256194484e-05, "loss": 1.5424, "step": 19194 }, { "epoch": 6.73492286115007, "grad_norm": 8.371118545532227, "learning_rate": 1.8158796945613215e-05, "loss": 1.5302, "step": 19208 }, { "epoch": 6.739831697054698, "grad_norm": 12.240155220031738, "learning_rate": 1.813152563503195e-05, "loss": 1.4349, "step": 19222 }, { "epoch": 6.744740532959327, "grad_norm": 11.11669921875, "learning_rate": 1.8104254324450678e-05, "loss": 1.3519, "step": 19236 }, { "epoch": 6.749649368863955, "grad_norm": 9.404054641723633, "learning_rate": 1.8076983013869412e-05, "loss": 1.396, "step": 19250 }, { "epoch": 6.754558204768584, "grad_norm": 10.504002571105957, "learning_rate": 1.804971170328814e-05, "loss": 1.4712, "step": 19264 }, { "epoch": 6.759467040673211, "grad_norm": 13.720281600952148, "learning_rate": 1.8022440392706875e-05, "loss": 1.5206, "step": 19278 }, { "epoch": 6.76437587657784, "grad_norm": 10.428255081176758, "learning_rate": 1.7995169082125603e-05, "loss": 1.4836, "step": 19292 }, { "epoch": 6.769284712482468, "grad_norm": 10.951818466186523, "learning_rate": 1.7967897771544337e-05, "loss": 1.4403, "step": 19306 }, { "epoch": 6.774193548387097, "grad_norm": 12.359960556030273, "learning_rate": 1.794062646096307e-05, "loss": 1.3285, "step": 19320 }, { "epoch": 6.7791023842917255, "grad_norm": 11.789525985717773, "learning_rate": 1.79133551503818e-05, "loss": 1.4658, "step": 19334 }, { "epoch": 6.784011220196353, "grad_norm": 9.45041561126709, "learning_rate": 1.788608383980053e-05, "loss": 1.5046, "step": 19348 }, { "epoch": 6.788920056100982, "grad_norm": 12.272042274475098, "learning_rate": 1.7858812529219262e-05, "loss": 1.4218, "step": 19362 }, { "epoch": 6.79382889200561, "grad_norm": 9.896013259887695, "learning_rate": 1.7831541218637993e-05, "loss": 1.462, "step": 19376 }, { "epoch": 6.798737727910239, "grad_norm": 9.01681900024414, "learning_rate": 1.7804269908056725e-05, "loss": 1.4169, "step": 19390 }, { "epoch": 6.803646563814866, "grad_norm": 9.40587329864502, "learning_rate": 1.7776998597475456e-05, "loss": 1.5414, "step": 19404 }, { "epoch": 6.808555399719495, "grad_norm": 7.286059856414795, "learning_rate": 1.7749727286894187e-05, "loss": 1.3894, "step": 19418 }, { "epoch": 6.8134642356241235, "grad_norm": 9.725820541381836, "learning_rate": 1.7722455976312918e-05, "loss": 1.3704, "step": 19432 }, { "epoch": 6.818373071528752, "grad_norm": 8.661629676818848, "learning_rate": 1.769518466573165e-05, "loss": 1.4075, "step": 19446 }, { "epoch": 6.82328190743338, "grad_norm": 12.563484191894531, "learning_rate": 1.7667913355150384e-05, "loss": 1.4196, "step": 19460 }, { "epoch": 6.828190743338008, "grad_norm": 8.7814359664917, "learning_rate": 1.764258999532492e-05, "loss": 1.4363, "step": 19474 }, { "epoch": 6.833099579242637, "grad_norm": 11.422985076904297, "learning_rate": 1.761531868474365e-05, "loss": 1.4, "step": 19488 }, { "epoch": 6.838008415147265, "grad_norm": 6.646035671234131, "learning_rate": 1.758804737416238e-05, "loss": 1.5214, "step": 19502 }, { "epoch": 6.842917251051894, "grad_norm": 9.983500480651855, "learning_rate": 1.7560776063581113e-05, "loss": 1.3638, "step": 19516 }, { "epoch": 6.8478260869565215, "grad_norm": 16.840742111206055, "learning_rate": 1.7533504752999844e-05, "loss": 1.6551, "step": 19530 }, { "epoch": 6.85273492286115, "grad_norm": 8.563652038574219, "learning_rate": 1.7506233442418575e-05, "loss": 1.4328, "step": 19544 }, { "epoch": 6.857643758765779, "grad_norm": 9.56379508972168, "learning_rate": 1.747896213183731e-05, "loss": 1.3617, "step": 19558 }, { "epoch": 6.862552594670406, "grad_norm": 8.87074089050293, "learning_rate": 1.745169082125604e-05, "loss": 1.4214, "step": 19572 }, { "epoch": 6.867461430575035, "grad_norm": 9.573884010314941, "learning_rate": 1.7424419510674772e-05, "loss": 1.3549, "step": 19586 }, { "epoch": 6.872370266479663, "grad_norm": 8.975354194641113, "learning_rate": 1.7397148200093503e-05, "loss": 1.4086, "step": 19600 }, { "epoch": 6.877279102384292, "grad_norm": 10.155501365661621, "learning_rate": 1.7369876889512235e-05, "loss": 1.507, "step": 19614 }, { "epoch": 6.88218793828892, "grad_norm": 10.191372871398926, "learning_rate": 1.7342605578930966e-05, "loss": 1.407, "step": 19628 }, { "epoch": 6.887096774193548, "grad_norm": 11.062378883361816, "learning_rate": 1.7315334268349697e-05, "loss": 1.406, "step": 19642 }, { "epoch": 6.892005610098177, "grad_norm": 11.822031021118164, "learning_rate": 1.7288062957768428e-05, "loss": 1.3959, "step": 19656 }, { "epoch": 6.896914446002805, "grad_norm": 11.39206600189209, "learning_rate": 1.726079164718716e-05, "loss": 1.3626, "step": 19670 }, { "epoch": 6.901823281907434, "grad_norm": 11.639822959899902, "learning_rate": 1.723352033660589e-05, "loss": 1.3824, "step": 19684 }, { "epoch": 6.906732117812062, "grad_norm": 11.516700744628906, "learning_rate": 1.7206249026024622e-05, "loss": 1.4324, "step": 19698 }, { "epoch": 6.91164095371669, "grad_norm": 8.176033973693848, "learning_rate": 1.7178977715443353e-05, "loss": 1.3654, "step": 19712 }, { "epoch": 6.916549789621318, "grad_norm": 9.23556137084961, "learning_rate": 1.7151706404862088e-05, "loss": 1.4739, "step": 19726 }, { "epoch": 6.921458625525947, "grad_norm": 6.97739315032959, "learning_rate": 1.7124435094280815e-05, "loss": 1.4153, "step": 19740 }, { "epoch": 6.926367461430575, "grad_norm": 11.657757759094238, "learning_rate": 1.709716378369955e-05, "loss": 1.4568, "step": 19754 }, { "epoch": 6.931276297335203, "grad_norm": 10.570274353027344, "learning_rate": 1.7069892473118278e-05, "loss": 1.3866, "step": 19768 }, { "epoch": 6.936185133239832, "grad_norm": 11.258580207824707, "learning_rate": 1.7042621162537013e-05, "loss": 1.4293, "step": 19782 }, { "epoch": 6.94109396914446, "grad_norm": 11.264379501342773, "learning_rate": 1.7015349851955744e-05, "loss": 1.4317, "step": 19796 }, { "epoch": 6.946002805049089, "grad_norm": 10.778213500976562, "learning_rate": 1.6988078541374475e-05, "loss": 1.4701, "step": 19810 }, { "epoch": 6.950911640953716, "grad_norm": 11.039793968200684, "learning_rate": 1.6960807230793206e-05, "loss": 1.5175, "step": 19824 }, { "epoch": 6.955820476858345, "grad_norm": 8.773394584655762, "learning_rate": 1.6933535920211937e-05, "loss": 1.5299, "step": 19838 }, { "epoch": 6.9607293127629735, "grad_norm": 7.976569175720215, "learning_rate": 1.6906264609630672e-05, "loss": 1.396, "step": 19852 }, { "epoch": 6.965638148667602, "grad_norm": 8.811712265014648, "learning_rate": 1.68789932990494e-05, "loss": 1.4224, "step": 19866 }, { "epoch": 6.97054698457223, "grad_norm": 11.523651123046875, "learning_rate": 1.6851721988468134e-05, "loss": 1.3975, "step": 19880 }, { "epoch": 6.975455820476858, "grad_norm": 12.636638641357422, "learning_rate": 1.6824450677886862e-05, "loss": 1.3566, "step": 19894 }, { "epoch": 6.980364656381487, "grad_norm": 8.698878288269043, "learning_rate": 1.6797179367305597e-05, "loss": 1.3981, "step": 19908 }, { "epoch": 6.985273492286115, "grad_norm": 12.37654972076416, "learning_rate": 1.6769908056724325e-05, "loss": 1.4283, "step": 19922 }, { "epoch": 6.990182328190743, "grad_norm": 7.628023624420166, "learning_rate": 1.674263674614306e-05, "loss": 1.3938, "step": 19936 }, { "epoch": 6.9950911640953715, "grad_norm": 9.854772567749023, "learning_rate": 1.671536543556179e-05, "loss": 1.4323, "step": 19950 }, { "epoch": 7.0, "grad_norm": 15.187037467956543, "learning_rate": 1.6688094124980522e-05, "loss": 1.4952, "step": 19964 }, { "epoch": 7.0, "eval_loss": 1.3883416652679443, "eval_map": 0.1246, "eval_map_50": 0.1804, "eval_map_75": 0.1378, "eval_map_applique": 0.0012, "eval_map_bag, wallet": 0.1117, "eval_map_bead": 0.0204, "eval_map_belt": 0.1361, "eval_map_bow": 0.0, "eval_map_buckle": 0.124, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.1551, "eval_map_collar": 0.224, "eval_map_dress": 0.463, "eval_map_epaulette": 0.0276, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.2086, "eval_map_glove": 0.0581, "eval_map_hat": 0.1942, "eval_map_headband, head covering, hair accessory": 0.0872, "eval_map_hood": 0.0532, "eval_map_jacket": 0.275, "eval_map_jumpsuit": 0.0043, "eval_map_lapel": 0.1739, "eval_map_large": 0.1253, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.09, "eval_map_neckline": 0.3055, "eval_map_pants": 0.4452, "eval_map_pocket": 0.1035, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0083, "eval_map_ruffle": 0.0465, "eval_map_scarf": 0.019, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0602, "eval_map_shoe": 0.4237, "eval_map_shorts": 0.2402, "eval_map_skirt": 0.331, "eval_map_sleeve": 0.3225, "eval_map_small": 0.0, "eval_map_sock": 0.0439, "eval_map_sweater": 0.0, "eval_map_tassel": 0.0, "eval_map_tie": 0.3436, "eval_map_tights, stockings": 0.1659, "eval_map_top, t-shirt, sweatshirt": 0.1938, "eval_map_umbrella": 0.2468, "eval_map_vest": 0.0, "eval_map_watch": 0.0767, "eval_map_zipper": 0.0381, "eval_mar_1": 0.1944, "eval_mar_10": 0.3898, "eval_mar_100": 0.3962, "eval_mar_100_applique": 0.0197, "eval_mar_100_bag, wallet": 0.5343, "eval_mar_100_bead": 0.329, "eval_mar_100_belt": 0.6707, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.4507, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.5049, "eval_mar_100_collar": 0.6152, "eval_mar_100_dress": 0.825, "eval_mar_100_epaulette": 0.4429, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.6829, "eval_mar_100_glove": 0.2161, "eval_mar_100_hat": 0.6041, "eval_mar_100_headband, head covering, hair accessory": 0.5055, "eval_mar_100_hood": 0.1813, "eval_mar_100_jacket": 0.7297, "eval_mar_100_jumpsuit": 0.1143, "eval_mar_100_lapel": 0.583, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7656, "eval_mar_100_pants": 0.7803, "eval_mar_100_pocket": 0.6852, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.0721, "eval_mar_100_ruffle": 0.3092, "eval_mar_100_scarf": 0.0958, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.3881, "eval_mar_100_shoe": 0.7759, "eval_mar_100_shorts": 0.5962, "eval_mar_100_skirt": 0.7352, "eval_mar_100_sleeve": 0.7525, "eval_mar_100_sock": 0.6518, "eval_mar_100_sweater": 0.0, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.8333, "eval_mar_100_tights, stockings": 0.7352, "eval_mar_100_top, t-shirt, sweatshirt": 0.7269, "eval_mar_100_umbrella": 0.52, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.5193, "eval_mar_100_zipper": 0.2727, "eval_mar_large": 0.3991, "eval_mar_medium": 0.1994, "eval_mar_small": 0.0, "eval_runtime": 83.507, "eval_samples_per_second": 13.867, "eval_steps_per_second": 0.443, "step": 19964 }, { "epoch": 7.0049088359046285, "grad_norm": 14.052332878112793, "learning_rate": 1.6660822814399253e-05, "loss": 1.3707, "step": 19978 }, { "epoch": 7.009817671809257, "grad_norm": 8.777816772460938, "learning_rate": 1.6633551503817984e-05, "loss": 1.4566, "step": 19992 }, { "epoch": 7.014726507713885, "grad_norm": 11.48232364654541, "learning_rate": 1.6606280193236715e-05, "loss": 1.4376, "step": 20006 }, { "epoch": 7.019635343618513, "grad_norm": 9.184467315673828, "learning_rate": 1.6579008882655447e-05, "loss": 1.4592, "step": 20020 }, { "epoch": 7.024544179523142, "grad_norm": 12.339717864990234, "learning_rate": 1.6551737572074178e-05, "loss": 1.3972, "step": 20034 }, { "epoch": 7.02945301542777, "grad_norm": 10.02008056640625, "learning_rate": 1.652446626149291e-05, "loss": 1.4308, "step": 20048 }, { "epoch": 7.034361851332398, "grad_norm": 10.710232734680176, "learning_rate": 1.6497194950911644e-05, "loss": 1.4699, "step": 20062 }, { "epoch": 7.0392706872370265, "grad_norm": 10.526137351989746, "learning_rate": 1.646992364033037e-05, "loss": 1.3724, "step": 20076 }, { "epoch": 7.044179523141655, "grad_norm": 11.503752708435059, "learning_rate": 1.6442652329749106e-05, "loss": 1.4082, "step": 20090 }, { "epoch": 7.049088359046284, "grad_norm": 9.594290733337402, "learning_rate": 1.6415381019167837e-05, "loss": 1.4189, "step": 20104 }, { "epoch": 7.053997194950911, "grad_norm": 8.869670867919922, "learning_rate": 1.638810970858657e-05, "loss": 1.4987, "step": 20118 }, { "epoch": 7.05890603085554, "grad_norm": 9.73611068725586, "learning_rate": 1.63608383980053e-05, "loss": 1.485, "step": 20132 }, { "epoch": 7.063814866760168, "grad_norm": 7.555271625518799, "learning_rate": 1.633356708742403e-05, "loss": 1.3924, "step": 20146 }, { "epoch": 7.068723702664797, "grad_norm": 8.857402801513672, "learning_rate": 1.6306295776842762e-05, "loss": 1.4764, "step": 20160 }, { "epoch": 7.0736325385694245, "grad_norm": 10.975354194641113, "learning_rate": 1.6279024466261493e-05, "loss": 1.3692, "step": 20174 }, { "epoch": 7.078541374474053, "grad_norm": 9.163580894470215, "learning_rate": 1.6251753155680225e-05, "loss": 1.3982, "step": 20188 }, { "epoch": 7.083450210378682, "grad_norm": 9.780982971191406, "learning_rate": 1.6224481845098956e-05, "loss": 1.4112, "step": 20202 }, { "epoch": 7.08835904628331, "grad_norm": 8.409701347351074, "learning_rate": 1.6197210534517687e-05, "loss": 1.3339, "step": 20216 }, { "epoch": 7.093267882187939, "grad_norm": 9.813055992126465, "learning_rate": 1.616993922393642e-05, "loss": 1.2821, "step": 20230 }, { "epoch": 7.098176718092566, "grad_norm": 9.561906814575195, "learning_rate": 1.614266791335515e-05, "loss": 1.3341, "step": 20244 }, { "epoch": 7.103085553997195, "grad_norm": 10.607975006103516, "learning_rate": 1.6115396602773884e-05, "loss": 1.4144, "step": 20258 }, { "epoch": 7.107994389901823, "grad_norm": 8.607682228088379, "learning_rate": 1.6088125292192612e-05, "loss": 1.4532, "step": 20272 }, { "epoch": 7.112903225806452, "grad_norm": 11.696894645690918, "learning_rate": 1.6060853981611347e-05, "loss": 1.4521, "step": 20286 }, { "epoch": 7.11781206171108, "grad_norm": 10.232346534729004, "learning_rate": 1.6033582671030078e-05, "loss": 1.3873, "step": 20300 }, { "epoch": 7.122720897615708, "grad_norm": 9.588444709777832, "learning_rate": 1.600631136044881e-05, "loss": 1.4505, "step": 20314 }, { "epoch": 7.127629733520337, "grad_norm": 9.698781967163086, "learning_rate": 1.597904004986754e-05, "loss": 1.4514, "step": 20328 }, { "epoch": 7.132538569424965, "grad_norm": 8.451394081115723, "learning_rate": 1.595176873928627e-05, "loss": 1.4684, "step": 20342 }, { "epoch": 7.137447405329593, "grad_norm": 11.872838020324707, "learning_rate": 1.5924497428705003e-05, "loss": 1.4582, "step": 20356 }, { "epoch": 7.142356241234221, "grad_norm": 9.581931114196777, "learning_rate": 1.5897226118123734e-05, "loss": 1.4625, "step": 20370 }, { "epoch": 7.14726507713885, "grad_norm": 9.831469535827637, "learning_rate": 1.586995480754247e-05, "loss": 1.438, "step": 20384 }, { "epoch": 7.1521739130434785, "grad_norm": 9.434301376342773, "learning_rate": 1.5842683496961196e-05, "loss": 1.424, "step": 20398 }, { "epoch": 7.157082748948106, "grad_norm": 11.16064167022705, "learning_rate": 1.581541218637993e-05, "loss": 1.3983, "step": 20412 }, { "epoch": 7.161991584852735, "grad_norm": 8.29039192199707, "learning_rate": 1.578814087579866e-05, "loss": 1.5061, "step": 20426 }, { "epoch": 7.166900420757363, "grad_norm": 10.802579879760742, "learning_rate": 1.5760869565217393e-05, "loss": 1.4302, "step": 20440 }, { "epoch": 7.171809256661992, "grad_norm": 10.296086311340332, "learning_rate": 1.573359825463612e-05, "loss": 1.4741, "step": 20454 }, { "epoch": 7.17671809256662, "grad_norm": 8.605633735656738, "learning_rate": 1.5706326944054856e-05, "loss": 1.4172, "step": 20468 }, { "epoch": 7.181626928471248, "grad_norm": 10.08398151397705, "learning_rate": 1.5679055633473587e-05, "loss": 1.3722, "step": 20482 }, { "epoch": 7.1865357643758765, "grad_norm": 10.622535705566406, "learning_rate": 1.5651784322892318e-05, "loss": 1.4581, "step": 20496 }, { "epoch": 7.191444600280505, "grad_norm": 7.515201091766357, "learning_rate": 1.562451301231105e-05, "loss": 1.3352, "step": 20510 }, { "epoch": 7.196353436185134, "grad_norm": 9.010266304016113, "learning_rate": 1.559724170172978e-05, "loss": 1.3515, "step": 20524 }, { "epoch": 7.201262272089761, "grad_norm": 7.469473361968994, "learning_rate": 1.5569970391148512e-05, "loss": 1.4145, "step": 20538 }, { "epoch": 7.20617110799439, "grad_norm": 9.59260368347168, "learning_rate": 1.5542699080567243e-05, "loss": 1.4104, "step": 20552 }, { "epoch": 7.211079943899018, "grad_norm": 9.790210723876953, "learning_rate": 1.5515427769985978e-05, "loss": 1.4568, "step": 20566 }, { "epoch": 7.215988779803647, "grad_norm": 9.659904479980469, "learning_rate": 1.5488156459404705e-05, "loss": 1.3488, "step": 20580 }, { "epoch": 7.2208976157082745, "grad_norm": 10.070599555969238, "learning_rate": 1.546088514882344e-05, "loss": 1.4872, "step": 20594 }, { "epoch": 7.225806451612903, "grad_norm": 12.182007789611816, "learning_rate": 1.543361383824217e-05, "loss": 1.4166, "step": 20608 }, { "epoch": 7.230715287517532, "grad_norm": 11.02771282196045, "learning_rate": 1.5406342527660903e-05, "loss": 1.5217, "step": 20622 }, { "epoch": 7.23562412342216, "grad_norm": 10.208148956298828, "learning_rate": 1.5379071217079634e-05, "loss": 1.4196, "step": 20636 }, { "epoch": 7.240532959326789, "grad_norm": 7.172757148742676, "learning_rate": 1.5351799906498365e-05, "loss": 1.4542, "step": 20650 }, { "epoch": 7.245441795231416, "grad_norm": 9.347253799438477, "learning_rate": 1.5324528595917096e-05, "loss": 1.4258, "step": 20664 }, { "epoch": 7.250350631136045, "grad_norm": 9.983814239501953, "learning_rate": 1.5297257285335827e-05, "loss": 1.4073, "step": 20678 }, { "epoch": 7.255259467040673, "grad_norm": 15.27082347869873, "learning_rate": 1.526998597475456e-05, "loss": 1.335, "step": 20692 }, { "epoch": 7.260168302945302, "grad_norm": 9.402547836303711, "learning_rate": 1.5242714664173292e-05, "loss": 1.4537, "step": 20706 }, { "epoch": 7.26507713884993, "grad_norm": 6.118456840515137, "learning_rate": 1.5215443353592021e-05, "loss": 1.4602, "step": 20720 }, { "epoch": 7.269985974754558, "grad_norm": 10.030014038085938, "learning_rate": 1.5188172043010754e-05, "loss": 1.4299, "step": 20734 }, { "epoch": 7.274894810659187, "grad_norm": 14.366145133972168, "learning_rate": 1.5160900732429483e-05, "loss": 1.4341, "step": 20748 }, { "epoch": 7.279803646563815, "grad_norm": 8.98214054107666, "learning_rate": 1.5133629421848216e-05, "loss": 1.4835, "step": 20762 }, { "epoch": 7.284712482468443, "grad_norm": 9.328564643859863, "learning_rate": 1.5106358111266946e-05, "loss": 1.4388, "step": 20776 }, { "epoch": 7.289621318373071, "grad_norm": 8.07311725616455, "learning_rate": 1.5079086800685679e-05, "loss": 1.3834, "step": 20790 }, { "epoch": 7.2945301542777, "grad_norm": 9.006145477294922, "learning_rate": 1.505181549010441e-05, "loss": 1.3894, "step": 20804 }, { "epoch": 7.2994389901823284, "grad_norm": 13.649395942687988, "learning_rate": 1.5024544179523143e-05, "loss": 1.5363, "step": 20818 }, { "epoch": 7.304347826086957, "grad_norm": 9.589092254638672, "learning_rate": 1.4997272868941876e-05, "loss": 1.4398, "step": 20832 }, { "epoch": 7.309256661991585, "grad_norm": 11.915826797485352, "learning_rate": 1.4970001558360605e-05, "loss": 1.3923, "step": 20846 }, { "epoch": 7.314165497896213, "grad_norm": 8.9043607711792, "learning_rate": 1.4942730247779338e-05, "loss": 1.4509, "step": 20860 }, { "epoch": 7.319074333800842, "grad_norm": 13.134561538696289, "learning_rate": 1.4915458937198068e-05, "loss": 1.3687, "step": 20874 }, { "epoch": 7.32398316970547, "grad_norm": 19.296321868896484, "learning_rate": 1.48881876266168e-05, "loss": 1.3872, "step": 20888 }, { "epoch": 7.328892005610098, "grad_norm": 9.079136848449707, "learning_rate": 1.486091631603553e-05, "loss": 1.4035, "step": 20902 }, { "epoch": 7.333800841514726, "grad_norm": 10.589427947998047, "learning_rate": 1.4833645005454263e-05, "loss": 1.405, "step": 20916 }, { "epoch": 7.338709677419355, "grad_norm": 10.514184951782227, "learning_rate": 1.4806373694872994e-05, "loss": 1.3212, "step": 20930 }, { "epoch": 7.3436185133239835, "grad_norm": 12.042459487915039, "learning_rate": 1.4779102384291726e-05, "loss": 1.4242, "step": 20944 }, { "epoch": 7.348527349228611, "grad_norm": 11.868163108825684, "learning_rate": 1.4751831073710457e-05, "loss": 1.3862, "step": 20958 }, { "epoch": 7.35343618513324, "grad_norm": 10.931702613830566, "learning_rate": 1.472455976312919e-05, "loss": 1.408, "step": 20972 }, { "epoch": 7.358345021037868, "grad_norm": 11.41661262512207, "learning_rate": 1.469728845254792e-05, "loss": 1.4722, "step": 20986 }, { "epoch": 7.363253856942497, "grad_norm": 11.791145324707031, "learning_rate": 1.4670017141966652e-05, "loss": 1.5229, "step": 21000 }, { "epoch": 7.368162692847124, "grad_norm": 11.432427406311035, "learning_rate": 1.4642745831385382e-05, "loss": 1.4442, "step": 21014 }, { "epoch": 7.373071528751753, "grad_norm": 12.32584285736084, "learning_rate": 1.4615474520804115e-05, "loss": 1.3575, "step": 21028 }, { "epoch": 7.3779803646563815, "grad_norm": 7.989016056060791, "learning_rate": 1.4588203210222844e-05, "loss": 1.4482, "step": 21042 }, { "epoch": 7.38288920056101, "grad_norm": 10.293359756469727, "learning_rate": 1.4560931899641577e-05, "loss": 1.3744, "step": 21056 }, { "epoch": 7.387798036465638, "grad_norm": 9.311391830444336, "learning_rate": 1.453366058906031e-05, "loss": 1.4189, "step": 21070 }, { "epoch": 7.392706872370266, "grad_norm": 8.816062927246094, "learning_rate": 1.4506389278479041e-05, "loss": 1.4234, "step": 21084 }, { "epoch": 7.397615708274895, "grad_norm": 9.444703102111816, "learning_rate": 1.4479117967897774e-05, "loss": 1.4531, "step": 21098 }, { "epoch": 7.402524544179523, "grad_norm": 10.565774917602539, "learning_rate": 1.4451846657316504e-05, "loss": 1.4286, "step": 21112 }, { "epoch": 7.407433380084152, "grad_norm": 9.062630653381348, "learning_rate": 1.4424575346735237e-05, "loss": 1.3771, "step": 21126 }, { "epoch": 7.4123422159887795, "grad_norm": 10.755143165588379, "learning_rate": 1.4397304036153966e-05, "loss": 1.4122, "step": 21140 }, { "epoch": 7.417251051893408, "grad_norm": 8.505136489868164, "learning_rate": 1.4370032725572699e-05, "loss": 1.3791, "step": 21154 }, { "epoch": 7.422159887798037, "grad_norm": 10.296607971191406, "learning_rate": 1.4342761414991428e-05, "loss": 1.3273, "step": 21168 }, { "epoch": 7.427068723702665, "grad_norm": 9.681766510009766, "learning_rate": 1.4315490104410161e-05, "loss": 1.3496, "step": 21182 }, { "epoch": 7.431977559607293, "grad_norm": 10.839095115661621, "learning_rate": 1.4288218793828893e-05, "loss": 1.3928, "step": 21196 }, { "epoch": 7.436886395511921, "grad_norm": 9.698502540588379, "learning_rate": 1.4260947483247626e-05, "loss": 1.4352, "step": 21210 }, { "epoch": 7.44179523141655, "grad_norm": 9.575050354003906, "learning_rate": 1.4233676172666355e-05, "loss": 1.4108, "step": 21224 }, { "epoch": 7.446704067321178, "grad_norm": 8.651358604431152, "learning_rate": 1.4206404862085088e-05, "loss": 1.3924, "step": 21238 }, { "epoch": 7.451612903225806, "grad_norm": 10.242620468139648, "learning_rate": 1.4179133551503817e-05, "loss": 1.3155, "step": 21252 }, { "epoch": 7.456521739130435, "grad_norm": 8.698535919189453, "learning_rate": 1.415186224092255e-05, "loss": 1.3917, "step": 21266 }, { "epoch": 7.461430575035063, "grad_norm": 8.049873352050781, "learning_rate": 1.412459093034128e-05, "loss": 1.4075, "step": 21280 }, { "epoch": 7.466339410939692, "grad_norm": 10.40372085571289, "learning_rate": 1.4097319619760013e-05, "loss": 1.3569, "step": 21294 }, { "epoch": 7.47124824684432, "grad_norm": 10.181981086730957, "learning_rate": 1.4070048309178744e-05, "loss": 1.4047, "step": 21308 }, { "epoch": 7.476157082748948, "grad_norm": 11.190930366516113, "learning_rate": 1.4042776998597475e-05, "loss": 1.4252, "step": 21322 }, { "epoch": 7.481065918653576, "grad_norm": 14.994572639465332, "learning_rate": 1.4015505688016208e-05, "loss": 1.3817, "step": 21336 }, { "epoch": 7.485974754558205, "grad_norm": 7.397494316101074, "learning_rate": 1.398823437743494e-05, "loss": 1.3876, "step": 21350 }, { "epoch": 7.4908835904628335, "grad_norm": 12.284586906433105, "learning_rate": 1.3960963066853672e-05, "loss": 1.5015, "step": 21364 }, { "epoch": 7.495792426367461, "grad_norm": 10.379143714904785, "learning_rate": 1.3933691756272402e-05, "loss": 1.378, "step": 21378 }, { "epoch": 7.50070126227209, "grad_norm": 9.108928680419922, "learning_rate": 1.3906420445691135e-05, "loss": 1.349, "step": 21392 }, { "epoch": 7.505610098176718, "grad_norm": 14.98458194732666, "learning_rate": 1.3879149135109864e-05, "loss": 1.3951, "step": 21406 }, { "epoch": 7.510518934081347, "grad_norm": 11.841029167175293, "learning_rate": 1.3851877824528597e-05, "loss": 1.4104, "step": 21420 }, { "epoch": 7.515427769985974, "grad_norm": 10.123833656311035, "learning_rate": 1.3824606513947327e-05, "loss": 1.4226, "step": 21434 }, { "epoch": 7.520336605890603, "grad_norm": 10.13443660736084, "learning_rate": 1.379733520336606e-05, "loss": 1.431, "step": 21448 }, { "epoch": 7.5252454417952315, "grad_norm": 9.026961326599121, "learning_rate": 1.377006389278479e-05, "loss": 1.3121, "step": 21462 }, { "epoch": 7.53015427769986, "grad_norm": 9.739246368408203, "learning_rate": 1.3742792582203524e-05, "loss": 1.3776, "step": 21476 }, { "epoch": 7.5350631136044885, "grad_norm": 10.509860038757324, "learning_rate": 1.3715521271622253e-05, "loss": 1.4007, "step": 21490 }, { "epoch": 7.539971949509116, "grad_norm": 10.27804946899414, "learning_rate": 1.3688249961040986e-05, "loss": 1.4141, "step": 21504 }, { "epoch": 7.544880785413745, "grad_norm": 8.621501922607422, "learning_rate": 1.3660978650459716e-05, "loss": 1.4678, "step": 21518 }, { "epoch": 7.549789621318373, "grad_norm": 9.16821002960205, "learning_rate": 1.3633707339878449e-05, "loss": 1.3974, "step": 21532 }, { "epoch": 7.554698457223001, "grad_norm": 11.520796775817871, "learning_rate": 1.3606436029297178e-05, "loss": 1.3897, "step": 21546 }, { "epoch": 7.5596072931276295, "grad_norm": 9.53386402130127, "learning_rate": 1.3579164718715911e-05, "loss": 1.3845, "step": 21560 }, { "epoch": 7.564516129032258, "grad_norm": 10.057029724121094, "learning_rate": 1.3551893408134644e-05, "loss": 1.3937, "step": 21574 }, { "epoch": 7.5694249649368865, "grad_norm": 12.053243637084961, "learning_rate": 1.3524622097553375e-05, "loss": 1.3417, "step": 21588 }, { "epoch": 7.574333800841515, "grad_norm": 9.579270362854004, "learning_rate": 1.3497350786972106e-05, "loss": 1.3535, "step": 21602 }, { "epoch": 7.579242636746143, "grad_norm": 9.573877334594727, "learning_rate": 1.3470079476390838e-05, "loss": 1.437, "step": 21616 }, { "epoch": 7.584151472650771, "grad_norm": 11.228386878967285, "learning_rate": 1.344280816580957e-05, "loss": 1.4432, "step": 21630 }, { "epoch": 7.5890603085554, "grad_norm": 11.409266471862793, "learning_rate": 1.34155368552283e-05, "loss": 1.3108, "step": 21644 }, { "epoch": 7.593969144460028, "grad_norm": 9.730371475219727, "learning_rate": 1.3388265544647033e-05, "loss": 1.4215, "step": 21658 }, { "epoch": 7.598877980364656, "grad_norm": 9.397181510925293, "learning_rate": 1.3360994234065762e-05, "loss": 1.4843, "step": 21672 }, { "epoch": 7.6037868162692845, "grad_norm": 8.901537895202637, "learning_rate": 1.3333722923484495e-05, "loss": 1.4043, "step": 21686 }, { "epoch": 7.608695652173913, "grad_norm": 10.520024299621582, "learning_rate": 1.3306451612903225e-05, "loss": 1.3647, "step": 21700 }, { "epoch": 7.613604488078542, "grad_norm": 9.834330558776855, "learning_rate": 1.3279180302321958e-05, "loss": 1.4539, "step": 21714 }, { "epoch": 7.618513323983169, "grad_norm": 8.599248886108398, "learning_rate": 1.3251908991740689e-05, "loss": 1.451, "step": 21728 }, { "epoch": 7.623422159887798, "grad_norm": 8.243707656860352, "learning_rate": 1.3224637681159422e-05, "loss": 1.4202, "step": 21742 }, { "epoch": 7.628330995792426, "grad_norm": 9.864405632019043, "learning_rate": 1.3197366370578151e-05, "loss": 1.4549, "step": 21756 }, { "epoch": 7.633239831697055, "grad_norm": 9.83797836303711, "learning_rate": 1.3170095059996884e-05, "loss": 1.3807, "step": 21770 }, { "epoch": 7.638148667601683, "grad_norm": 10.732083320617676, "learning_rate": 1.3142823749415614e-05, "loss": 1.4174, "step": 21784 }, { "epoch": 7.643057503506311, "grad_norm": 11.50915813446045, "learning_rate": 1.3115552438834347e-05, "loss": 1.3711, "step": 21798 }, { "epoch": 7.64796633941094, "grad_norm": 8.630019187927246, "learning_rate": 1.3088281128253076e-05, "loss": 1.3502, "step": 21812 }, { "epoch": 7.652875175315568, "grad_norm": 10.402270317077637, "learning_rate": 1.306100981767181e-05, "loss": 1.4079, "step": 21826 }, { "epoch": 7.657784011220197, "grad_norm": 9.245973587036133, "learning_rate": 1.3033738507090542e-05, "loss": 1.3976, "step": 21840 }, { "epoch": 7.662692847124824, "grad_norm": 10.196998596191406, "learning_rate": 1.3006467196509273e-05, "loss": 1.414, "step": 21854 }, { "epoch": 7.667601683029453, "grad_norm": 9.814986228942871, "learning_rate": 1.2979195885928006e-05, "loss": 1.5527, "step": 21868 }, { "epoch": 7.672510518934081, "grad_norm": 7.9122467041015625, "learning_rate": 1.2951924575346736e-05, "loss": 1.4564, "step": 21882 }, { "epoch": 7.67741935483871, "grad_norm": 18.043025970458984, "learning_rate": 1.2924653264765469e-05, "loss": 1.4936, "step": 21896 }, { "epoch": 7.682328190743338, "grad_norm": 12.859254837036133, "learning_rate": 1.2897381954184198e-05, "loss": 1.3629, "step": 21910 }, { "epoch": 7.687237026647966, "grad_norm": 9.312397956848145, "learning_rate": 1.2870110643602931e-05, "loss": 1.3778, "step": 21924 }, { "epoch": 7.692145862552595, "grad_norm": 16.000150680541992, "learning_rate": 1.284283933302166e-05, "loss": 1.3538, "step": 21938 }, { "epoch": 7.697054698457223, "grad_norm": 19.46660614013672, "learning_rate": 1.2815568022440394e-05, "loss": 1.3957, "step": 21952 }, { "epoch": 7.701963534361852, "grad_norm": 11.668210983276367, "learning_rate": 1.2788296711859125e-05, "loss": 1.4068, "step": 21966 }, { "epoch": 7.706872370266479, "grad_norm": 10.402234077453613, "learning_rate": 1.2762973352033661e-05, "loss": 1.3029, "step": 21980 }, { "epoch": 7.711781206171108, "grad_norm": 9.28171443939209, "learning_rate": 1.2735702041452394e-05, "loss": 1.4146, "step": 21994 }, { "epoch": 7.7166900420757365, "grad_norm": 14.036051750183105, "learning_rate": 1.2708430730871124e-05, "loss": 1.3579, "step": 22008 }, { "epoch": 7.721598877980365, "grad_norm": 7.550570964813232, "learning_rate": 1.2681159420289857e-05, "loss": 1.4087, "step": 22022 }, { "epoch": 7.726507713884993, "grad_norm": 9.57552433013916, "learning_rate": 1.2653888109708586e-05, "loss": 1.3743, "step": 22036 }, { "epoch": 7.731416549789621, "grad_norm": 9.657792091369629, "learning_rate": 1.262661679912732e-05, "loss": 1.4694, "step": 22050 }, { "epoch": 7.73632538569425, "grad_norm": 10.498087882995605, "learning_rate": 1.2599345488546049e-05, "loss": 1.2815, "step": 22064 }, { "epoch": 7.741234221598878, "grad_norm": 13.66938304901123, "learning_rate": 1.2572074177964782e-05, "loss": 1.4335, "step": 22078 }, { "epoch": 7.746143057503506, "grad_norm": 10.687335968017578, "learning_rate": 1.2544802867383513e-05, "loss": 1.4336, "step": 22092 }, { "epoch": 7.7510518934081345, "grad_norm": 8.956398010253906, "learning_rate": 1.2517531556802246e-05, "loss": 1.4272, "step": 22106 }, { "epoch": 7.755960729312763, "grad_norm": 8.839801788330078, "learning_rate": 1.2490260246220977e-05, "loss": 1.4096, "step": 22120 }, { "epoch": 7.760869565217392, "grad_norm": 13.55469036102295, "learning_rate": 1.2462988935639708e-05, "loss": 1.4033, "step": 22134 }, { "epoch": 7.76577840112202, "grad_norm": 8.753087997436523, "learning_rate": 1.243571762505844e-05, "loss": 1.3652, "step": 22148 }, { "epoch": 7.770687237026648, "grad_norm": 9.47844123840332, "learning_rate": 1.240844631447717e-05, "loss": 1.4185, "step": 22162 }, { "epoch": 7.775596072931276, "grad_norm": 10.422697067260742, "learning_rate": 1.2381175003895902e-05, "loss": 1.4021, "step": 22176 }, { "epoch": 7.780504908835905, "grad_norm": 9.713798522949219, "learning_rate": 1.2353903693314633e-05, "loss": 1.3793, "step": 22190 }, { "epoch": 7.7854137447405325, "grad_norm": 11.503867149353027, "learning_rate": 1.2326632382733364e-05, "loss": 1.4373, "step": 22204 }, { "epoch": 7.790322580645161, "grad_norm": 17.052602767944336, "learning_rate": 1.2299361072152096e-05, "loss": 1.4273, "step": 22218 }, { "epoch": 7.79523141654979, "grad_norm": 12.047451972961426, "learning_rate": 1.2272089761570828e-05, "loss": 1.4038, "step": 22232 }, { "epoch": 7.800140252454418, "grad_norm": 9.216214179992676, "learning_rate": 1.224481845098956e-05, "loss": 1.3084, "step": 22246 }, { "epoch": 7.805049088359047, "grad_norm": 9.928129196166992, "learning_rate": 1.2217547140408291e-05, "loss": 1.4378, "step": 22260 }, { "epoch": 7.809957924263674, "grad_norm": 11.35969352722168, "learning_rate": 1.2190275829827022e-05, "loss": 1.5172, "step": 22274 }, { "epoch": 7.814866760168303, "grad_norm": 12.51523494720459, "learning_rate": 1.2163004519245753e-05, "loss": 1.4589, "step": 22288 }, { "epoch": 7.819775596072931, "grad_norm": 13.861340522766113, "learning_rate": 1.2135733208664486e-05, "loss": 1.4809, "step": 22302 }, { "epoch": 7.82468443197756, "grad_norm": 7.796072959899902, "learning_rate": 1.2108461898083217e-05, "loss": 1.361, "step": 22316 }, { "epoch": 7.829593267882188, "grad_norm": 9.534405708312988, "learning_rate": 1.2081190587501949e-05, "loss": 1.3348, "step": 22330 }, { "epoch": 7.834502103786816, "grad_norm": 9.701468467712402, "learning_rate": 1.205391927692068e-05, "loss": 1.3764, "step": 22344 }, { "epoch": 7.839410939691445, "grad_norm": 10.149394989013672, "learning_rate": 1.2026647966339411e-05, "loss": 1.38, "step": 22358 }, { "epoch": 7.844319775596073, "grad_norm": 9.409993171691895, "learning_rate": 1.1999376655758144e-05, "loss": 1.3283, "step": 22372 }, { "epoch": 7.849228611500701, "grad_norm": 9.273966789245605, "learning_rate": 1.1972105345176875e-05, "loss": 1.3124, "step": 22386 }, { "epoch": 7.854137447405329, "grad_norm": 12.398307800292969, "learning_rate": 1.1944834034595606e-05, "loss": 1.3449, "step": 22400 }, { "epoch": 7.859046283309958, "grad_norm": 11.279448509216309, "learning_rate": 1.1917562724014338e-05, "loss": 1.293, "step": 22414 }, { "epoch": 7.8639551192145865, "grad_norm": 11.620269775390625, "learning_rate": 1.1890291413433069e-05, "loss": 1.3363, "step": 22428 }, { "epoch": 7.868863955119215, "grad_norm": 10.660218238830566, "learning_rate": 1.18630201028518e-05, "loss": 1.3739, "step": 22442 }, { "epoch": 7.873772791023843, "grad_norm": 9.57472038269043, "learning_rate": 1.1835748792270531e-05, "loss": 1.4063, "step": 22456 }, { "epoch": 7.878681626928471, "grad_norm": 13.500846862792969, "learning_rate": 1.1808477481689263e-05, "loss": 1.3762, "step": 22470 }, { "epoch": 7.8835904628331, "grad_norm": 8.393508911132812, "learning_rate": 1.1781206171107995e-05, "loss": 1.335, "step": 22484 }, { "epoch": 7.888499298737728, "grad_norm": 11.197653770446777, "learning_rate": 1.1753934860526727e-05, "loss": 1.38, "step": 22498 }, { "epoch": 7.893408134642356, "grad_norm": 9.55688762664795, "learning_rate": 1.1726663549945458e-05, "loss": 1.4109, "step": 22512 }, { "epoch": 7.8983169705469845, "grad_norm": 9.848345756530762, "learning_rate": 1.1699392239364189e-05, "loss": 1.4215, "step": 22526 }, { "epoch": 7.903225806451613, "grad_norm": 11.763021469116211, "learning_rate": 1.167212092878292e-05, "loss": 1.438, "step": 22540 }, { "epoch": 7.9081346423562415, "grad_norm": 9.62762451171875, "learning_rate": 1.1644849618201653e-05, "loss": 1.3285, "step": 22554 }, { "epoch": 7.913043478260869, "grad_norm": 12.479962348937988, "learning_rate": 1.1617578307620384e-05, "loss": 1.438, "step": 22568 }, { "epoch": 7.917952314165498, "grad_norm": 9.252252578735352, "learning_rate": 1.1590306997039116e-05, "loss": 1.4146, "step": 22582 }, { "epoch": 7.922861150070126, "grad_norm": 12.372159957885742, "learning_rate": 1.1563035686457847e-05, "loss": 1.4355, "step": 22596 }, { "epoch": 7.927769985974755, "grad_norm": 12.063983917236328, "learning_rate": 1.1535764375876578e-05, "loss": 1.3967, "step": 22610 }, { "epoch": 7.932678821879383, "grad_norm": 10.885601043701172, "learning_rate": 1.1508493065295311e-05, "loss": 1.5953, "step": 22624 }, { "epoch": 7.937587657784011, "grad_norm": 13.346940040588379, "learning_rate": 1.1481221754714042e-05, "loss": 1.3406, "step": 22638 }, { "epoch": 7.9424964936886395, "grad_norm": 9.05918025970459, "learning_rate": 1.1453950444132773e-05, "loss": 1.3586, "step": 22652 }, { "epoch": 7.947405329593268, "grad_norm": 10.274587631225586, "learning_rate": 1.1426679133551505e-05, "loss": 1.3298, "step": 22666 }, { "epoch": 7.952314165497896, "grad_norm": 10.546265602111816, "learning_rate": 1.1399407822970236e-05, "loss": 1.3427, "step": 22680 }, { "epoch": 7.957223001402524, "grad_norm": 8.034473419189453, "learning_rate": 1.1372136512388967e-05, "loss": 1.3968, "step": 22694 }, { "epoch": 7.962131837307153, "grad_norm": 9.773804664611816, "learning_rate": 1.1344865201807698e-05, "loss": 1.3748, "step": 22708 }, { "epoch": 7.967040673211781, "grad_norm": 11.21592903137207, "learning_rate": 1.131759389122643e-05, "loss": 1.3989, "step": 22722 }, { "epoch": 7.97194950911641, "grad_norm": 10.833891868591309, "learning_rate": 1.129032258064516e-05, "loss": 1.364, "step": 22736 }, { "epoch": 7.9768583450210375, "grad_norm": 10.027050018310547, "learning_rate": 1.1263051270063894e-05, "loss": 1.463, "step": 22750 }, { "epoch": 7.981767180925666, "grad_norm": 9.534196853637695, "learning_rate": 1.1235779959482625e-05, "loss": 1.389, "step": 22764 }, { "epoch": 7.986676016830295, "grad_norm": 11.599031448364258, "learning_rate": 1.1208508648901356e-05, "loss": 1.4311, "step": 22778 }, { "epoch": 7.991584852734923, "grad_norm": 11.466763496398926, "learning_rate": 1.1181237338320087e-05, "loss": 1.3746, "step": 22792 }, { "epoch": 7.996493688639552, "grad_norm": 9.784607887268066, "learning_rate": 1.115396602773882e-05, "loss": 1.356, "step": 22806 }, { "epoch": 8.0, "eval_loss": 1.349021077156067, "eval_map": 0.1273, "eval_map_50": 0.1797, "eval_map_75": 0.1465, "eval_map_applique": 0.0001, "eval_map_bag, wallet": 0.1138, "eval_map_bead": 0.0318, "eval_map_belt": 0.1442, "eval_map_bow": 0.0, "eval_map_buckle": 0.1281, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.1687, "eval_map_collar": 0.2044, "eval_map_dress": 0.4521, "eval_map_epaulette": 0.021, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.2349, "eval_map_glove": 0.0782, "eval_map_hat": 0.1917, "eval_map_headband, head covering, hair accessory": 0.0911, "eval_map_hood": 0.0651, "eval_map_jacket": 0.2975, "eval_map_jumpsuit": 0.0229, "eval_map_lapel": 0.1397, "eval_map_large": 0.128, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.1232, "eval_map_neckline": 0.3332, "eval_map_pants": 0.4275, "eval_map_pocket": 0.1119, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0348, "eval_map_ruffle": 0.0438, "eval_map_scarf": 0.0281, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0627, "eval_map_shoe": 0.4586, "eval_map_shorts": 0.2468, "eval_map_skirt": 0.3591, "eval_map_sleeve": 0.3589, "eval_map_small": 0.0, "eval_map_sock": 0.055, "eval_map_sweater": 0.0, "eval_map_tassel": 0.0, "eval_map_tie": 0.1835, "eval_map_tights, stockings": 0.2164, "eval_map_top, t-shirt, sweatshirt": 0.1938, "eval_map_umbrella": 0.2287, "eval_map_vest": 0.0, "eval_map_watch": 0.0866, "eval_map_zipper": 0.0407, "eval_mar_1": 0.1979, "eval_mar_10": 0.3926, "eval_mar_100": 0.3995, "eval_mar_100_applique": 0.0098, "eval_mar_100_bag, wallet": 0.5493, "eval_mar_100_bead": 0.3271, "eval_mar_100_belt": 0.6311, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.4328, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.5835, "eval_mar_100_collar": 0.6276, "eval_mar_100_dress": 0.8437, "eval_mar_100_epaulette": 0.4286, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.6682, "eval_mar_100_glove": 0.3032, "eval_mar_100_hat": 0.6178, "eval_mar_100_headband, head covering, hair accessory": 0.5239, "eval_mar_100_hood": 0.1906, "eval_mar_100_jacket": 0.722, "eval_mar_100_jumpsuit": 0.1524, "eval_mar_100_lapel": 0.5622, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7721, "eval_mar_100_pants": 0.8073, "eval_mar_100_pocket": 0.6848, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.18, "eval_mar_100_ruffle": 0.3461, "eval_mar_100_scarf": 0.1167, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.3733, "eval_mar_100_shoe": 0.7988, "eval_mar_100_shorts": 0.5962, "eval_mar_100_skirt": 0.7778, "eval_mar_100_sleeve": 0.7668, "eval_mar_100_sock": 0.6341, "eval_mar_100_sweater": 0.0, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.7, "eval_mar_100_tights, stockings": 0.7754, "eval_mar_100_top, t-shirt, sweatshirt": 0.7204, "eval_mar_100_umbrella": 0.32, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.4699, "eval_mar_100_zipper": 0.3639, "eval_mar_large": 0.4026, "eval_mar_medium": 0.2467, "eval_mar_small": 0.0, "eval_runtime": 84.4479, "eval_samples_per_second": 13.713, "eval_steps_per_second": 0.438, "step": 22816 }, { "epoch": 8.00140252454418, "grad_norm": 9.270607948303223, "learning_rate": 1.1126694717157551e-05, "loss": 1.3758, "step": 22820 }, { "epoch": 8.006311360448807, "grad_norm": 8.719541549682617, "learning_rate": 1.1099423406576283e-05, "loss": 1.3609, "step": 22834 }, { "epoch": 8.011220196353436, "grad_norm": 8.484326362609863, "learning_rate": 1.1072152095995014e-05, "loss": 1.3595, "step": 22848 }, { "epoch": 8.016129032258064, "grad_norm": 8.215718269348145, "learning_rate": 1.1044880785413745e-05, "loss": 1.3705, "step": 22862 }, { "epoch": 8.021037868162693, "grad_norm": 12.481683731079102, "learning_rate": 1.1017609474832476e-05, "loss": 1.3807, "step": 22876 }, { "epoch": 8.025946704067321, "grad_norm": 8.499955177307129, "learning_rate": 1.099033816425121e-05, "loss": 1.4081, "step": 22890 }, { "epoch": 8.030855539971949, "grad_norm": 8.154129028320312, "learning_rate": 1.096306685366994e-05, "loss": 1.3256, "step": 22904 }, { "epoch": 8.035764375876578, "grad_norm": 10.932087898254395, "learning_rate": 1.0935795543088672e-05, "loss": 1.455, "step": 22918 }, { "epoch": 8.040673211781206, "grad_norm": 7.840991973876953, "learning_rate": 1.0908524232507403e-05, "loss": 1.3057, "step": 22932 }, { "epoch": 8.045582047685835, "grad_norm": 9.397950172424316, "learning_rate": 1.0881252921926134e-05, "loss": 1.4476, "step": 22946 }, { "epoch": 8.050490883590463, "grad_norm": 9.163840293884277, "learning_rate": 1.0853981611344865e-05, "loss": 1.3882, "step": 22960 }, { "epoch": 8.05539971949509, "grad_norm": 8.668262481689453, "learning_rate": 1.0826710300763597e-05, "loss": 1.3344, "step": 22974 }, { "epoch": 8.06030855539972, "grad_norm": 7.99281120300293, "learning_rate": 1.0799438990182328e-05, "loss": 1.4393, "step": 22988 }, { "epoch": 8.065217391304348, "grad_norm": 7.629146099090576, "learning_rate": 1.077216767960106e-05, "loss": 1.3217, "step": 23002 }, { "epoch": 8.070126227208975, "grad_norm": 10.247729301452637, "learning_rate": 1.0744896369019792e-05, "loss": 1.3449, "step": 23016 }, { "epoch": 8.075035063113605, "grad_norm": 10.801100730895996, "learning_rate": 1.0717625058438523e-05, "loss": 1.4616, "step": 23030 }, { "epoch": 8.079943899018232, "grad_norm": 8.188803672790527, "learning_rate": 1.0690353747857254e-05, "loss": 1.3918, "step": 23044 }, { "epoch": 8.084852734922862, "grad_norm": 10.7251615524292, "learning_rate": 1.0663082437275986e-05, "loss": 1.3733, "step": 23058 }, { "epoch": 8.08976157082749, "grad_norm": 9.752542495727539, "learning_rate": 1.0635811126694718e-05, "loss": 1.3546, "step": 23072 }, { "epoch": 8.094670406732117, "grad_norm": 7.812772274017334, "learning_rate": 1.060853981611345e-05, "loss": 1.3314, "step": 23086 }, { "epoch": 8.099579242636747, "grad_norm": 8.898970603942871, "learning_rate": 1.0581268505532181e-05, "loss": 1.4008, "step": 23100 }, { "epoch": 8.104488078541374, "grad_norm": 11.2909574508667, "learning_rate": 1.0553997194950912e-05, "loss": 1.3566, "step": 23114 }, { "epoch": 8.109396914446004, "grad_norm": 10.15160846710205, "learning_rate": 1.0526725884369643e-05, "loss": 1.4529, "step": 23128 }, { "epoch": 8.114305750350631, "grad_norm": 9.653207778930664, "learning_rate": 1.0499454573788376e-05, "loss": 1.4151, "step": 23142 }, { "epoch": 8.119214586255259, "grad_norm": 8.644963264465332, "learning_rate": 1.0472183263207107e-05, "loss": 1.3465, "step": 23156 }, { "epoch": 8.124123422159888, "grad_norm": 10.67163372039795, "learning_rate": 1.0444911952625839e-05, "loss": 1.3641, "step": 23170 }, { "epoch": 8.129032258064516, "grad_norm": 11.005233764648438, "learning_rate": 1.041764064204457e-05, "loss": 1.3869, "step": 23184 }, { "epoch": 8.133941093969144, "grad_norm": 9.511191368103027, "learning_rate": 1.0390369331463301e-05, "loss": 1.3778, "step": 23198 }, { "epoch": 8.138849929873773, "grad_norm": 9.03869342803955, "learning_rate": 1.0363098020882032e-05, "loss": 1.3755, "step": 23212 }, { "epoch": 8.1437587657784, "grad_norm": 11.134561538696289, "learning_rate": 1.0335826710300764e-05, "loss": 1.4567, "step": 23226 }, { "epoch": 8.14866760168303, "grad_norm": 10.021492958068848, "learning_rate": 1.0308555399719495e-05, "loss": 1.479, "step": 23240 }, { "epoch": 8.153576437587658, "grad_norm": 10.748839378356934, "learning_rate": 1.0281284089138226e-05, "loss": 1.3435, "step": 23254 }, { "epoch": 8.158485273492285, "grad_norm": 8.370108604431152, "learning_rate": 1.0254012778556959e-05, "loss": 1.4283, "step": 23268 }, { "epoch": 8.163394109396915, "grad_norm": 8.724272727966309, "learning_rate": 1.022674146797569e-05, "loss": 1.4479, "step": 23282 }, { "epoch": 8.168302945301543, "grad_norm": 10.252472877502441, "learning_rate": 1.0199470157394421e-05, "loss": 1.4213, "step": 23296 }, { "epoch": 8.173211781206172, "grad_norm": 11.816312789916992, "learning_rate": 1.0172198846813153e-05, "loss": 1.3934, "step": 23310 }, { "epoch": 8.1781206171108, "grad_norm": 10.511289596557617, "learning_rate": 1.0144927536231885e-05, "loss": 1.4442, "step": 23324 }, { "epoch": 8.183029453015427, "grad_norm": 14.556593894958496, "learning_rate": 1.0117656225650617e-05, "loss": 1.3327, "step": 23338 }, { "epoch": 8.187938288920057, "grad_norm": 8.738597869873047, "learning_rate": 1.0090384915069348e-05, "loss": 1.4099, "step": 23352 }, { "epoch": 8.192847124824684, "grad_norm": 9.480131149291992, "learning_rate": 1.0063113604488079e-05, "loss": 1.4098, "step": 23366 }, { "epoch": 8.197755960729312, "grad_norm": 8.602457046508789, "learning_rate": 1.003584229390681e-05, "loss": 1.3113, "step": 23380 }, { "epoch": 8.202664796633941, "grad_norm": 8.225278854370117, "learning_rate": 1.0008570983325542e-05, "loss": 1.4483, "step": 23394 }, { "epoch": 8.207573632538569, "grad_norm": 8.3049955368042, "learning_rate": 9.981299672744274e-06, "loss": 1.51, "step": 23408 }, { "epoch": 8.212482468443199, "grad_norm": 8.701492309570312, "learning_rate": 9.954028362163006e-06, "loss": 1.4064, "step": 23422 }, { "epoch": 8.217391304347826, "grad_norm": 7.885621547698975, "learning_rate": 9.926757051581737e-06, "loss": 1.3096, "step": 23436 }, { "epoch": 8.222300140252454, "grad_norm": 9.69874095916748, "learning_rate": 9.899485741000468e-06, "loss": 1.3519, "step": 23450 }, { "epoch": 8.227208976157083, "grad_norm": 11.22630500793457, "learning_rate": 9.8722144304192e-06, "loss": 1.3841, "step": 23464 }, { "epoch": 8.232117812061711, "grad_norm": 9.076868057250977, "learning_rate": 9.84494311983793e-06, "loss": 1.3696, "step": 23478 }, { "epoch": 8.237026647966339, "grad_norm": 13.788867950439453, "learning_rate": 9.817671809256662e-06, "loss": 1.3392, "step": 23492 }, { "epoch": 8.241935483870968, "grad_norm": 9.284319877624512, "learning_rate": 9.790400498675393e-06, "loss": 1.4121, "step": 23506 }, { "epoch": 8.246844319775596, "grad_norm": 9.021834373474121, "learning_rate": 9.763129188094126e-06, "loss": 1.3209, "step": 23520 }, { "epoch": 8.251753155680225, "grad_norm": 7.8818511962890625, "learning_rate": 9.735857877512857e-06, "loss": 1.3334, "step": 23534 }, { "epoch": 8.256661991584853, "grad_norm": 12.007120132446289, "learning_rate": 9.708586566931588e-06, "loss": 1.3498, "step": 23548 }, { "epoch": 8.26157082748948, "grad_norm": 9.958595275878906, "learning_rate": 9.68131525635032e-06, "loss": 1.3142, "step": 23562 }, { "epoch": 8.26647966339411, "grad_norm": 9.64443588256836, "learning_rate": 9.654043945769052e-06, "loss": 1.3742, "step": 23576 }, { "epoch": 8.271388499298737, "grad_norm": 10.041512489318848, "learning_rate": 9.626772635187784e-06, "loss": 1.3849, "step": 23590 }, { "epoch": 8.276297335203367, "grad_norm": 9.904881477355957, "learning_rate": 9.599501324606515e-06, "loss": 1.3415, "step": 23604 }, { "epoch": 8.281206171107995, "grad_norm": 9.454484939575195, "learning_rate": 9.572230014025246e-06, "loss": 1.4016, "step": 23618 }, { "epoch": 8.286115007012622, "grad_norm": 11.111409187316895, "learning_rate": 9.544958703443977e-06, "loss": 1.3294, "step": 23632 }, { "epoch": 8.291023842917252, "grad_norm": 12.80233097076416, "learning_rate": 9.517687392862709e-06, "loss": 1.3447, "step": 23646 }, { "epoch": 8.29593267882188, "grad_norm": 7.715102672576904, "learning_rate": 9.490416082281441e-06, "loss": 1.3991, "step": 23660 }, { "epoch": 8.300841514726507, "grad_norm": 8.486387252807617, "learning_rate": 9.463144771700173e-06, "loss": 1.4587, "step": 23674 }, { "epoch": 8.305750350631136, "grad_norm": 12.285039901733398, "learning_rate": 9.435873461118904e-06, "loss": 1.3862, "step": 23688 }, { "epoch": 8.310659186535764, "grad_norm": 10.54503345489502, "learning_rate": 9.408602150537635e-06, "loss": 1.3088, "step": 23702 }, { "epoch": 8.315568022440393, "grad_norm": 9.932971954345703, "learning_rate": 9.381330839956366e-06, "loss": 1.3568, "step": 23716 }, { "epoch": 8.320476858345021, "grad_norm": 9.089605331420898, "learning_rate": 9.354059529375098e-06, "loss": 1.47, "step": 23730 }, { "epoch": 8.325385694249649, "grad_norm": 10.21333122253418, "learning_rate": 9.326788218793829e-06, "loss": 1.4225, "step": 23744 }, { "epoch": 8.330294530154278, "grad_norm": 7.756230354309082, "learning_rate": 9.29951690821256e-06, "loss": 1.5258, "step": 23758 }, { "epoch": 8.335203366058906, "grad_norm": 9.800232887268066, "learning_rate": 9.272245597631291e-06, "loss": 1.3793, "step": 23772 }, { "epoch": 8.340112201963535, "grad_norm": 12.844193458557129, "learning_rate": 9.244974287050024e-06, "loss": 1.3067, "step": 23786 }, { "epoch": 8.345021037868163, "grad_norm": 9.783154487609863, "learning_rate": 9.217702976468755e-06, "loss": 1.4463, "step": 23800 }, { "epoch": 8.34992987377279, "grad_norm": 9.239618301391602, "learning_rate": 9.190431665887487e-06, "loss": 1.3153, "step": 23814 }, { "epoch": 8.35483870967742, "grad_norm": 11.713922500610352, "learning_rate": 9.16316035530622e-06, "loss": 1.3548, "step": 23828 }, { "epoch": 8.359747545582048, "grad_norm": 10.482182502746582, "learning_rate": 9.13588904472495e-06, "loss": 1.3256, "step": 23842 }, { "epoch": 8.364656381486675, "grad_norm": 10.166626930236816, "learning_rate": 9.108617734143682e-06, "loss": 1.3197, "step": 23856 }, { "epoch": 8.369565217391305, "grad_norm": 11.65921688079834, "learning_rate": 9.081346423562413e-06, "loss": 1.3987, "step": 23870 }, { "epoch": 8.374474053295932, "grad_norm": 7.959221363067627, "learning_rate": 9.054075112981144e-06, "loss": 1.3427, "step": 23884 }, { "epoch": 8.379382889200562, "grad_norm": 9.080570220947266, "learning_rate": 9.026803802399876e-06, "loss": 1.3897, "step": 23898 }, { "epoch": 8.38429172510519, "grad_norm": 10.626421928405762, "learning_rate": 8.999532491818607e-06, "loss": 1.4466, "step": 23912 }, { "epoch": 8.389200561009817, "grad_norm": 10.404516220092773, "learning_rate": 8.97226118123734e-06, "loss": 1.4959, "step": 23926 }, { "epoch": 8.394109396914446, "grad_norm": 11.152312278747559, "learning_rate": 8.944989870656071e-06, "loss": 1.4327, "step": 23940 }, { "epoch": 8.399018232819074, "grad_norm": 8.836381912231445, "learning_rate": 8.917718560074802e-06, "loss": 1.3946, "step": 23954 }, { "epoch": 8.403927068723704, "grad_norm": 10.076040267944336, "learning_rate": 8.890447249493533e-06, "loss": 1.3104, "step": 23968 }, { "epoch": 8.408835904628331, "grad_norm": 9.547856330871582, "learning_rate": 8.863175938912265e-06, "loss": 1.341, "step": 23982 }, { "epoch": 8.413744740532959, "grad_norm": 11.793825149536133, "learning_rate": 8.835904628330996e-06, "loss": 1.2602, "step": 23996 }, { "epoch": 8.418653576437588, "grad_norm": 9.797273635864258, "learning_rate": 8.808633317749727e-06, "loss": 1.3988, "step": 24010 }, { "epoch": 8.423562412342216, "grad_norm": 10.352211952209473, "learning_rate": 8.781362007168458e-06, "loss": 1.3351, "step": 24024 }, { "epoch": 8.428471248246844, "grad_norm": 8.39946174621582, "learning_rate": 8.75409069658719e-06, "loss": 1.3809, "step": 24038 }, { "epoch": 8.433380084151473, "grad_norm": 8.047261238098145, "learning_rate": 8.726819386005922e-06, "loss": 1.3515, "step": 24052 }, { "epoch": 8.4382889200561, "grad_norm": 11.394075393676758, "learning_rate": 8.699548075424654e-06, "loss": 1.4897, "step": 24066 }, { "epoch": 8.44319775596073, "grad_norm": 9.669784545898438, "learning_rate": 8.672276764843386e-06, "loss": 1.4428, "step": 24080 }, { "epoch": 8.448106591865358, "grad_norm": 12.837532997131348, "learning_rate": 8.645005454262118e-06, "loss": 1.3978, "step": 24094 }, { "epoch": 8.453015427769985, "grad_norm": 9.32642650604248, "learning_rate": 8.617734143680849e-06, "loss": 1.3106, "step": 24108 }, { "epoch": 8.457924263674615, "grad_norm": 11.50643253326416, "learning_rate": 8.59046283309958e-06, "loss": 1.375, "step": 24122 }, { "epoch": 8.462833099579242, "grad_norm": 8.420050621032715, "learning_rate": 8.563191522518311e-06, "loss": 1.3965, "step": 24136 }, { "epoch": 8.46774193548387, "grad_norm": 10.560477256774902, "learning_rate": 8.535920211937043e-06, "loss": 1.3653, "step": 24150 }, { "epoch": 8.4726507713885, "grad_norm": 9.773297309875488, "learning_rate": 8.508648901355774e-06, "loss": 1.3968, "step": 24164 }, { "epoch": 8.477559607293127, "grad_norm": 9.642863273620605, "learning_rate": 8.481377590774505e-06, "loss": 1.3857, "step": 24178 }, { "epoch": 8.482468443197757, "grad_norm": 9.012931823730469, "learning_rate": 8.454106280193238e-06, "loss": 1.3399, "step": 24192 }, { "epoch": 8.487377279102384, "grad_norm": 12.6365385055542, "learning_rate": 8.426834969611969e-06, "loss": 1.3567, "step": 24206 }, { "epoch": 8.492286115007012, "grad_norm": 8.108349800109863, "learning_rate": 8.3995636590307e-06, "loss": 1.3084, "step": 24220 }, { "epoch": 8.497194950911641, "grad_norm": 8.651247024536133, "learning_rate": 8.372292348449432e-06, "loss": 1.3479, "step": 24234 }, { "epoch": 8.502103786816269, "grad_norm": 8.60372257232666, "learning_rate": 8.345021037868163e-06, "loss": 1.4439, "step": 24248 }, { "epoch": 8.507012622720898, "grad_norm": 13.123096466064453, "learning_rate": 8.317749727286894e-06, "loss": 1.3708, "step": 24262 }, { "epoch": 8.511921458625526, "grad_norm": 10.118566513061523, "learning_rate": 8.290478416705625e-06, "loss": 1.3077, "step": 24276 }, { "epoch": 8.516830294530154, "grad_norm": 8.466140747070312, "learning_rate": 8.263207106124356e-06, "loss": 1.3603, "step": 24290 }, { "epoch": 8.521739130434783, "grad_norm": 8.500524520874023, "learning_rate": 8.23593579554309e-06, "loss": 1.4404, "step": 24304 }, { "epoch": 8.52664796633941, "grad_norm": 8.18726921081543, "learning_rate": 8.20866448496182e-06, "loss": 1.3822, "step": 24318 }, { "epoch": 8.531556802244038, "grad_norm": 8.55293083190918, "learning_rate": 8.181393174380552e-06, "loss": 1.4377, "step": 24332 }, { "epoch": 8.536465638148668, "grad_norm": 9.06181526184082, "learning_rate": 8.154121863799285e-06, "loss": 1.3668, "step": 24346 }, { "epoch": 8.541374474053296, "grad_norm": 7.8349432945251465, "learning_rate": 8.126850553218016e-06, "loss": 1.3804, "step": 24360 }, { "epoch": 8.546283309957925, "grad_norm": 14.103631973266602, "learning_rate": 8.099579242636747e-06, "loss": 1.395, "step": 24374 }, { "epoch": 8.551192145862553, "grad_norm": 10.257575988769531, "learning_rate": 8.072307932055478e-06, "loss": 1.3871, "step": 24388 }, { "epoch": 8.55610098176718, "grad_norm": 10.686551094055176, "learning_rate": 8.04503662147421e-06, "loss": 1.4384, "step": 24402 }, { "epoch": 8.56100981767181, "grad_norm": 10.211069107055664, "learning_rate": 8.01776531089294e-06, "loss": 1.4006, "step": 24416 }, { "epoch": 8.565918653576437, "grad_norm": 9.04093074798584, "learning_rate": 7.990494000311672e-06, "loss": 1.3671, "step": 24430 }, { "epoch": 8.570827489481065, "grad_norm": 14.364224433898926, "learning_rate": 7.963222689730405e-06, "loss": 1.4505, "step": 24444 }, { "epoch": 8.575736325385694, "grad_norm": 11.827798843383789, "learning_rate": 7.935951379149136e-06, "loss": 1.3594, "step": 24458 }, { "epoch": 8.580645161290322, "grad_norm": 11.665510177612305, "learning_rate": 7.908680068567867e-06, "loss": 1.3831, "step": 24472 }, { "epoch": 8.585553997194951, "grad_norm": 10.422080993652344, "learning_rate": 7.881408757986599e-06, "loss": 1.3718, "step": 24486 }, { "epoch": 8.59046283309958, "grad_norm": 11.450899124145508, "learning_rate": 7.85413744740533e-06, "loss": 1.4713, "step": 24500 }, { "epoch": 8.595371669004207, "grad_norm": 10.755717277526855, "learning_rate": 7.826866136824061e-06, "loss": 1.3778, "step": 24514 }, { "epoch": 8.600280504908836, "grad_norm": 9.115351676940918, "learning_rate": 7.799594826242792e-06, "loss": 1.3101, "step": 24528 }, { "epoch": 8.605189340813464, "grad_norm": 10.295341491699219, "learning_rate": 7.772323515661523e-06, "loss": 1.4654, "step": 24542 }, { "epoch": 8.610098176718093, "grad_norm": 8.904923439025879, "learning_rate": 7.745052205080255e-06, "loss": 1.3541, "step": 24556 }, { "epoch": 8.615007012622721, "grad_norm": 10.637202262878418, "learning_rate": 7.717780894498988e-06, "loss": 1.4713, "step": 24570 }, { "epoch": 8.619915848527349, "grad_norm": 9.29478645324707, "learning_rate": 7.690509583917719e-06, "loss": 1.4272, "step": 24584 }, { "epoch": 8.624824684431978, "grad_norm": 9.365023612976074, "learning_rate": 7.663238273336452e-06, "loss": 1.384, "step": 24598 }, { "epoch": 8.629733520336606, "grad_norm": 9.811233520507812, "learning_rate": 7.635966962755183e-06, "loss": 1.3872, "step": 24612 }, { "epoch": 8.634642356241233, "grad_norm": 9.084137916564941, "learning_rate": 7.608695652173914e-06, "loss": 1.4238, "step": 24626 }, { "epoch": 8.639551192145863, "grad_norm": 8.346358299255371, "learning_rate": 7.581424341592645e-06, "loss": 1.3908, "step": 24640 }, { "epoch": 8.64446002805049, "grad_norm": 9.164006233215332, "learning_rate": 7.5541530310113765e-06, "loss": 1.4918, "step": 24654 }, { "epoch": 8.64936886395512, "grad_norm": 8.44897747039795, "learning_rate": 7.526881720430108e-06, "loss": 1.3293, "step": 24668 }, { "epoch": 8.654277699859747, "grad_norm": 9.303556442260742, "learning_rate": 7.49961040984884e-06, "loss": 1.3529, "step": 24682 }, { "epoch": 8.659186535764375, "grad_norm": 8.709724426269531, "learning_rate": 7.472339099267571e-06, "loss": 1.3906, "step": 24696 }, { "epoch": 8.664095371669005, "grad_norm": 9.47709846496582, "learning_rate": 7.447015739442108e-06, "loss": 1.4326, "step": 24710 }, { "epoch": 8.669004207573632, "grad_norm": 10.879790306091309, "learning_rate": 7.419744428860839e-06, "loss": 1.4036, "step": 24724 }, { "epoch": 8.673913043478262, "grad_norm": 10.815740585327148, "learning_rate": 7.392473118279571e-06, "loss": 1.3684, "step": 24738 }, { "epoch": 8.67882187938289, "grad_norm": 9.378632545471191, "learning_rate": 7.365201807698302e-06, "loss": 1.34, "step": 24752 }, { "epoch": 8.683730715287517, "grad_norm": 9.881305694580078, "learning_rate": 7.337930497117033e-06, "loss": 1.3818, "step": 24766 }, { "epoch": 8.688639551192146, "grad_norm": 8.160423278808594, "learning_rate": 7.3106591865357646e-06, "loss": 1.3262, "step": 24780 }, { "epoch": 8.693548387096774, "grad_norm": 9.152375221252441, "learning_rate": 7.283387875954497e-06, "loss": 1.3621, "step": 24794 }, { "epoch": 8.698457223001402, "grad_norm": 9.97005558013916, "learning_rate": 7.256116565373228e-06, "loss": 1.4091, "step": 24808 }, { "epoch": 8.703366058906031, "grad_norm": 15.009248733520508, "learning_rate": 7.228845254791959e-06, "loss": 1.2979, "step": 24822 }, { "epoch": 8.708274894810659, "grad_norm": 8.677882194519043, "learning_rate": 7.20157394421069e-06, "loss": 1.3208, "step": 24836 }, { "epoch": 8.713183730715288, "grad_norm": 8.239389419555664, "learning_rate": 7.174302633629422e-06, "loss": 1.2911, "step": 24850 }, { "epoch": 8.718092566619916, "grad_norm": 9.813348770141602, "learning_rate": 7.1470313230481536e-06, "loss": 1.411, "step": 24864 }, { "epoch": 8.723001402524543, "grad_norm": 8.227449417114258, "learning_rate": 7.119760012466885e-06, "loss": 1.4109, "step": 24878 }, { "epoch": 8.727910238429173, "grad_norm": 10.676633834838867, "learning_rate": 7.092488701885616e-06, "loss": 1.3275, "step": 24892 }, { "epoch": 8.7328190743338, "grad_norm": 8.095037460327148, "learning_rate": 7.065217391304347e-06, "loss": 1.3061, "step": 24906 }, { "epoch": 8.73772791023843, "grad_norm": 9.870763778686523, "learning_rate": 7.037946080723079e-06, "loss": 1.369, "step": 24920 }, { "epoch": 8.742636746143058, "grad_norm": 10.917984962463379, "learning_rate": 7.010674770141812e-06, "loss": 1.3115, "step": 24934 }, { "epoch": 8.747545582047685, "grad_norm": 10.270341873168945, "learning_rate": 6.983403459560543e-06, "loss": 1.4206, "step": 24948 }, { "epoch": 8.752454417952315, "grad_norm": 8.013930320739746, "learning_rate": 6.956132148979275e-06, "loss": 1.3655, "step": 24962 }, { "epoch": 8.757363253856942, "grad_norm": 11.731679916381836, "learning_rate": 6.928860838398006e-06, "loss": 1.3103, "step": 24976 }, { "epoch": 8.76227208976157, "grad_norm": 11.211536407470703, "learning_rate": 6.901589527816738e-06, "loss": 1.3306, "step": 24990 }, { "epoch": 8.7671809256662, "grad_norm": 11.36117935180664, "learning_rate": 6.874318217235469e-06, "loss": 1.2952, "step": 25004 }, { "epoch": 8.772089761570827, "grad_norm": 11.293071746826172, "learning_rate": 6.8470469066542e-06, "loss": 1.3712, "step": 25018 }, { "epoch": 8.776998597475457, "grad_norm": 7.839993476867676, "learning_rate": 6.8197755960729316e-06, "loss": 1.4488, "step": 25032 }, { "epoch": 8.781907433380084, "grad_norm": 9.638472557067871, "learning_rate": 6.792504285491663e-06, "loss": 1.3006, "step": 25046 }, { "epoch": 8.786816269284712, "grad_norm": 9.798298835754395, "learning_rate": 6.765232974910395e-06, "loss": 1.3279, "step": 25060 }, { "epoch": 8.791725105189341, "grad_norm": 11.955029487609863, "learning_rate": 6.737961664329126e-06, "loss": 1.3791, "step": 25074 }, { "epoch": 8.796633941093969, "grad_norm": 9.229399681091309, "learning_rate": 6.710690353747857e-06, "loss": 1.3869, "step": 25088 }, { "epoch": 8.801542776998598, "grad_norm": 11.299568176269531, "learning_rate": 6.6834190431665885e-06, "loss": 1.4122, "step": 25102 }, { "epoch": 8.806451612903226, "grad_norm": 11.114765167236328, "learning_rate": 6.6561477325853206e-06, "loss": 1.4098, "step": 25116 }, { "epoch": 8.811360448807854, "grad_norm": 11.320123672485352, "learning_rate": 6.628876422004052e-06, "loss": 1.3193, "step": 25130 }, { "epoch": 8.816269284712483, "grad_norm": 7.558450698852539, "learning_rate": 6.601605111422783e-06, "loss": 1.396, "step": 25144 }, { "epoch": 8.82117812061711, "grad_norm": 10.638873100280762, "learning_rate": 6.574333800841514e-06, "loss": 1.3232, "step": 25158 }, { "epoch": 8.826086956521738, "grad_norm": 11.091486930847168, "learning_rate": 6.547062490260246e-06, "loss": 1.3902, "step": 25172 }, { "epoch": 8.830995792426368, "grad_norm": 8.447153091430664, "learning_rate": 6.5197911796789775e-06, "loss": 1.3064, "step": 25186 }, { "epoch": 8.835904628330995, "grad_norm": 10.349783897399902, "learning_rate": 6.49251986909771e-06, "loss": 1.3894, "step": 25200 }, { "epoch": 8.840813464235625, "grad_norm": 9.248343467712402, "learning_rate": 6.465248558516442e-06, "loss": 1.35, "step": 25214 }, { "epoch": 8.845722300140253, "grad_norm": 8.932231903076172, "learning_rate": 6.437977247935173e-06, "loss": 1.4149, "step": 25228 }, { "epoch": 8.85063113604488, "grad_norm": 8.059513092041016, "learning_rate": 6.410705937353904e-06, "loss": 1.4274, "step": 25242 }, { "epoch": 8.85553997194951, "grad_norm": 8.31438159942627, "learning_rate": 6.383434626772636e-06, "loss": 1.3482, "step": 25256 }, { "epoch": 8.860448807854137, "grad_norm": 8.70984172821045, "learning_rate": 6.356163316191367e-06, "loss": 1.3503, "step": 25270 }, { "epoch": 8.865357643758767, "grad_norm": 8.449482917785645, "learning_rate": 6.3288920056100986e-06, "loss": 1.2877, "step": 25284 }, { "epoch": 8.870266479663394, "grad_norm": 8.236336708068848, "learning_rate": 6.30162069502883e-06, "loss": 1.3383, "step": 25298 }, { "epoch": 8.875175315568022, "grad_norm": 13.84942626953125, "learning_rate": 6.274349384447562e-06, "loss": 1.3557, "step": 25312 }, { "epoch": 8.880084151472651, "grad_norm": 8.822325706481934, "learning_rate": 6.247078073866293e-06, "loss": 1.3407, "step": 25326 }, { "epoch": 8.884992987377279, "grad_norm": 10.631125450134277, "learning_rate": 6.219806763285024e-06, "loss": 1.355, "step": 25340 }, { "epoch": 8.889901823281907, "grad_norm": 9.551398277282715, "learning_rate": 6.1925354527037555e-06, "loss": 1.3063, "step": 25354 }, { "epoch": 8.894810659186536, "grad_norm": 9.061261177062988, "learning_rate": 6.1652641421224876e-06, "loss": 1.4617, "step": 25368 }, { "epoch": 8.899719495091164, "grad_norm": 8.96053695678711, "learning_rate": 6.13799283154122e-06, "loss": 1.288, "step": 25382 }, { "epoch": 8.904628330995793, "grad_norm": 12.210264205932617, "learning_rate": 6.110721520959951e-06, "loss": 1.2842, "step": 25396 }, { "epoch": 8.90953716690042, "grad_norm": 9.340170860290527, "learning_rate": 6.083450210378682e-06, "loss": 1.3206, "step": 25410 }, { "epoch": 8.914446002805049, "grad_norm": 7.038578987121582, "learning_rate": 6.056178899797413e-06, "loss": 1.3342, "step": 25424 }, { "epoch": 8.919354838709678, "grad_norm": 14.4190092086792, "learning_rate": 6.028907589216145e-06, "loss": 1.3572, "step": 25438 }, { "epoch": 8.924263674614306, "grad_norm": 11.785904884338379, "learning_rate": 6.0016362786348766e-06, "loss": 1.3909, "step": 25452 }, { "epoch": 8.929172510518935, "grad_norm": 9.646684646606445, "learning_rate": 5.974364968053608e-06, "loss": 1.3585, "step": 25466 }, { "epoch": 8.934081346423563, "grad_norm": 11.281700134277344, "learning_rate": 5.947093657472339e-06, "loss": 1.3509, "step": 25480 }, { "epoch": 8.93899018232819, "grad_norm": 11.222411155700684, "learning_rate": 5.91982234689107e-06, "loss": 1.3281, "step": 25494 }, { "epoch": 8.94389901823282, "grad_norm": 11.21274185180664, "learning_rate": 5.892551036309803e-06, "loss": 1.3455, "step": 25508 }, { "epoch": 8.948807854137447, "grad_norm": 8.487445831298828, "learning_rate": 5.865279725728534e-06, "loss": 1.3593, "step": 25522 }, { "epoch": 8.953716690042075, "grad_norm": 8.834291458129883, "learning_rate": 5.8380084151472655e-06, "loss": 1.3886, "step": 25536 }, { "epoch": 8.958625525946704, "grad_norm": 8.546503067016602, "learning_rate": 5.810737104565997e-06, "loss": 1.3509, "step": 25550 }, { "epoch": 8.963534361851332, "grad_norm": 8.960319519042969, "learning_rate": 5.783465793984728e-06, "loss": 1.4393, "step": 25564 }, { "epoch": 8.968443197755962, "grad_norm": 10.056375503540039, "learning_rate": 5.75619448340346e-06, "loss": 1.368, "step": 25578 }, { "epoch": 8.97335203366059, "grad_norm": 8.225775718688965, "learning_rate": 5.728923172822191e-06, "loss": 1.2879, "step": 25592 }, { "epoch": 8.978260869565217, "grad_norm": 8.935049057006836, "learning_rate": 5.7016518622409225e-06, "loss": 1.3386, "step": 25606 }, { "epoch": 8.983169705469846, "grad_norm": 8.652969360351562, "learning_rate": 5.674380551659654e-06, "loss": 1.3785, "step": 25620 }, { "epoch": 8.988078541374474, "grad_norm": 9.471237182617188, "learning_rate": 5.647109241078386e-06, "loss": 1.2777, "step": 25634 }, { "epoch": 8.992987377279102, "grad_norm": 9.36213207244873, "learning_rate": 5.619837930497118e-06, "loss": 1.3858, "step": 25648 }, { "epoch": 8.997896213183731, "grad_norm": 9.776371955871582, "learning_rate": 5.592566619915849e-06, "loss": 1.3707, "step": 25662 }, { "epoch": 9.0, "eval_loss": 1.3260630369186401, "eval_map": 0.1358, "eval_map_50": 0.189, "eval_map_75": 0.1555, "eval_map_applique": 0.0008, "eval_map_bag, wallet": 0.1249, "eval_map_bead": 0.0249, "eval_map_belt": 0.1484, "eval_map_bow": 0.0, "eval_map_buckle": 0.1515, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.1695, "eval_map_collar": 0.2373, "eval_map_dress": 0.4391, "eval_map_epaulette": 0.036, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.2495, "eval_map_glove": 0.0556, "eval_map_hat": 0.2254, "eval_map_headband, head covering, hair accessory": 0.1087, "eval_map_hood": 0.0546, "eval_map_jacket": 0.2819, "eval_map_jumpsuit": 0.0112, "eval_map_lapel": 0.1605, "eval_map_large": 0.1363, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.1155, "eval_map_neckline": 0.3644, "eval_map_pants": 0.4573, "eval_map_pocket": 0.1175, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0316, "eval_map_ruffle": 0.0545, "eval_map_scarf": 0.0193, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0893, "eval_map_shoe": 0.4771, "eval_map_shorts": 0.2396, "eval_map_skirt": 0.299, "eval_map_sleeve": 0.3793, "eval_map_small": 0.0, "eval_map_sock": 0.0809, "eval_map_sweater": 0.0, "eval_map_tassel": 0.0, "eval_map_tie": 0.3708, "eval_map_tights, stockings": 0.2305, "eval_map_top, t-shirt, sweatshirt": 0.2131, "eval_map_umbrella": 0.1987, "eval_map_vest": 0.0, "eval_map_watch": 0.1019, "eval_map_zipper": 0.0412, "eval_mar_1": 0.2088, "eval_mar_10": 0.4005, "eval_mar_100": 0.4081, "eval_mar_100_applique": 0.0475, "eval_mar_100_bag, wallet": 0.5667, "eval_mar_100_bead": 0.3318, "eval_mar_100_belt": 0.6348, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.4672, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.5505, "eval_mar_100_collar": 0.6594, "eval_mar_100_dress": 0.84, "eval_mar_100_epaulette": 0.5357, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.7023, "eval_mar_100_glove": 0.2032, "eval_mar_100_hat": 0.5863, "eval_mar_100_headband, head covering, hair accessory": 0.5367, "eval_mar_100_hood": 0.1875, "eval_mar_100_jacket": 0.7247, "eval_mar_100_jumpsuit": 0.1286, "eval_mar_100_lapel": 0.5881, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7828, "eval_mar_100_pants": 0.8111, "eval_mar_100_pocket": 0.7106, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.2021, "eval_mar_100_ruffle": 0.3526, "eval_mar_100_scarf": 0.0854, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.4386, "eval_mar_100_shoe": 0.8023, "eval_mar_100_shorts": 0.5934, "eval_mar_100_skirt": 0.7914, "eval_mar_100_sleeve": 0.7764, "eval_mar_100_sock": 0.6341, "eval_mar_100_sweater": 0.0, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.8, "eval_mar_100_tights, stockings": 0.7648, "eval_mar_100_top, t-shirt, sweatshirt": 0.7598, "eval_mar_100_umbrella": 0.3, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.5253, "eval_mar_100_zipper": 0.351, "eval_mar_large": 0.4109, "eval_mar_medium": 0.2688, "eval_mar_small": 0.0, "eval_runtime": 84.3863, "eval_samples_per_second": 13.723, "eval_steps_per_second": 0.438, "step": 25668 }, { "epoch": 9.002805049088359, "grad_norm": 10.259478569030762, "learning_rate": 5.56529530933458e-06, "loss": 1.3861, "step": 25676 }, { "epoch": 9.007713884992988, "grad_norm": 9.960562705993652, "learning_rate": 5.5380239987533115e-06, "loss": 1.3193, "step": 25690 }, { "epoch": 9.012622720897616, "grad_norm": 8.19436264038086, "learning_rate": 5.5107526881720435e-06, "loss": 1.3444, "step": 25704 }, { "epoch": 9.017531556802243, "grad_norm": 9.15941047668457, "learning_rate": 5.483481377590775e-06, "loss": 1.3723, "step": 25718 }, { "epoch": 9.022440392706873, "grad_norm": 10.215705871582031, "learning_rate": 5.456210067009506e-06, "loss": 1.2795, "step": 25732 }, { "epoch": 9.0273492286115, "grad_norm": 10.481411933898926, "learning_rate": 5.428938756428237e-06, "loss": 1.3212, "step": 25746 }, { "epoch": 9.03225806451613, "grad_norm": 8.175745010375977, "learning_rate": 5.401667445846969e-06, "loss": 1.2496, "step": 25760 }, { "epoch": 9.037166900420758, "grad_norm": 11.088653564453125, "learning_rate": 5.374396135265701e-06, "loss": 1.3569, "step": 25774 }, { "epoch": 9.042075736325385, "grad_norm": 8.359198570251465, "learning_rate": 5.3471248246844325e-06, "loss": 1.2911, "step": 25788 }, { "epoch": 9.046984572230015, "grad_norm": 7.8419575691223145, "learning_rate": 5.319853514103164e-06, "loss": 1.3693, "step": 25802 }, { "epoch": 9.051893408134642, "grad_norm": 11.811532020568848, "learning_rate": 5.292582203521895e-06, "loss": 1.411, "step": 25816 }, { "epoch": 9.05680224403927, "grad_norm": 12.888726234436035, "learning_rate": 5.265310892940627e-06, "loss": 1.4331, "step": 25830 }, { "epoch": 9.0617110799439, "grad_norm": 10.09017562866211, "learning_rate": 5.238039582359358e-06, "loss": 1.4003, "step": 25844 }, { "epoch": 9.066619915848527, "grad_norm": 10.120200157165527, "learning_rate": 5.2107682717780895e-06, "loss": 1.3175, "step": 25858 }, { "epoch": 9.071528751753156, "grad_norm": 8.898833274841309, "learning_rate": 5.183496961196821e-06, "loss": 1.3833, "step": 25872 }, { "epoch": 9.076437587657784, "grad_norm": 10.347800254821777, "learning_rate": 5.156225650615553e-06, "loss": 1.3028, "step": 25886 }, { "epoch": 9.081346423562412, "grad_norm": 8.257257461547852, "learning_rate": 5.128954340034285e-06, "loss": 1.3484, "step": 25900 }, { "epoch": 9.086255259467041, "grad_norm": 7.830197334289551, "learning_rate": 5.101683029453016e-06, "loss": 1.3742, "step": 25914 }, { "epoch": 9.091164095371669, "grad_norm": 8.511148452758789, "learning_rate": 5.074411718871747e-06, "loss": 1.3954, "step": 25928 }, { "epoch": 9.096072931276296, "grad_norm": 10.692118644714355, "learning_rate": 5.0471404082904785e-06, "loss": 1.3535, "step": 25942 }, { "epoch": 9.100981767180926, "grad_norm": 9.800304412841797, "learning_rate": 5.0198690977092105e-06, "loss": 1.4233, "step": 25956 }, { "epoch": 9.105890603085554, "grad_norm": 9.139087677001953, "learning_rate": 4.994545737883746e-06, "loss": 1.3793, "step": 25970 }, { "epoch": 9.110799438990183, "grad_norm": 13.930682182312012, "learning_rate": 4.967274427302478e-06, "loss": 1.371, "step": 25984 }, { "epoch": 9.11570827489481, "grad_norm": 11.31553840637207, "learning_rate": 4.94000311672121e-06, "loss": 1.4149, "step": 25998 }, { "epoch": 9.120617110799438, "grad_norm": 8.146488189697266, "learning_rate": 4.912731806139941e-06, "loss": 1.3456, "step": 26012 }, { "epoch": 9.125525946704068, "grad_norm": 9.068326950073242, "learning_rate": 4.885460495558673e-06, "loss": 1.4401, "step": 26026 }, { "epoch": 9.130434782608695, "grad_norm": 9.7858304977417, "learning_rate": 4.858189184977404e-06, "loss": 1.3656, "step": 26040 }, { "epoch": 9.135343618513325, "grad_norm": 9.310206413269043, "learning_rate": 4.830917874396135e-06, "loss": 1.3316, "step": 26054 }, { "epoch": 9.140252454417952, "grad_norm": 9.794814109802246, "learning_rate": 4.8036465638148665e-06, "loss": 1.3461, "step": 26068 }, { "epoch": 9.14516129032258, "grad_norm": 14.549107551574707, "learning_rate": 4.776375253233599e-06, "loss": 1.3527, "step": 26082 }, { "epoch": 9.15007012622721, "grad_norm": 9.873281478881836, "learning_rate": 4.74910394265233e-06, "loss": 1.3148, "step": 26096 }, { "epoch": 9.154978962131837, "grad_norm": 8.348831176757812, "learning_rate": 4.721832632071062e-06, "loss": 1.3377, "step": 26110 }, { "epoch": 9.159887798036465, "grad_norm": 9.957566261291504, "learning_rate": 4.694561321489793e-06, "loss": 1.326, "step": 26124 }, { "epoch": 9.164796633941094, "grad_norm": 8.998187065124512, "learning_rate": 4.667290010908524e-06, "loss": 1.481, "step": 26138 }, { "epoch": 9.169705469845722, "grad_norm": 7.6998186111450195, "learning_rate": 4.640018700327256e-06, "loss": 1.298, "step": 26152 }, { "epoch": 9.174614305750351, "grad_norm": 8.56472396850586, "learning_rate": 4.612747389745988e-06, "loss": 1.3768, "step": 26166 }, { "epoch": 9.179523141654979, "grad_norm": 10.202958106994629, "learning_rate": 4.585476079164719e-06, "loss": 1.343, "step": 26180 }, { "epoch": 9.184431977559607, "grad_norm": 8.30465030670166, "learning_rate": 4.55820476858345e-06, "loss": 1.3678, "step": 26194 }, { "epoch": 9.189340813464236, "grad_norm": 11.15686321258545, "learning_rate": 4.530933458002182e-06, "loss": 1.3312, "step": 26208 }, { "epoch": 9.194249649368864, "grad_norm": 7.807880401611328, "learning_rate": 4.503662147420913e-06, "loss": 1.2716, "step": 26222 }, { "epoch": 9.199158485273493, "grad_norm": 11.202817916870117, "learning_rate": 4.476390836839645e-06, "loss": 1.4774, "step": 26236 }, { "epoch": 9.20406732117812, "grad_norm": 8.673836708068848, "learning_rate": 4.449119526258377e-06, "loss": 1.3591, "step": 26250 }, { "epoch": 9.208976157082748, "grad_norm": 11.538403511047363, "learning_rate": 4.421848215677108e-06, "loss": 1.3573, "step": 26264 }, { "epoch": 9.213884992987378, "grad_norm": 9.310532569885254, "learning_rate": 4.39457690509584e-06, "loss": 1.3252, "step": 26278 }, { "epoch": 9.218793828892005, "grad_norm": 7.862910270690918, "learning_rate": 4.367305594514571e-06, "loss": 1.4184, "step": 26292 }, { "epoch": 9.223702664796633, "grad_norm": 10.853288650512695, "learning_rate": 4.340034283933302e-06, "loss": 1.3185, "step": 26306 }, { "epoch": 9.228611500701263, "grad_norm": 10.052688598632812, "learning_rate": 4.3127629733520335e-06, "loss": 1.4037, "step": 26320 }, { "epoch": 9.23352033660589, "grad_norm": 8.545676231384277, "learning_rate": 4.285491662770765e-06, "loss": 1.3587, "step": 26334 }, { "epoch": 9.23842917251052, "grad_norm": 8.90212345123291, "learning_rate": 4.258220352189497e-06, "loss": 1.298, "step": 26348 }, { "epoch": 9.243338008415147, "grad_norm": 11.650616645812988, "learning_rate": 4.230949041608229e-06, "loss": 1.2873, "step": 26362 }, { "epoch": 9.248246844319775, "grad_norm": 12.029667854309082, "learning_rate": 4.20367773102696e-06, "loss": 1.4023, "step": 26376 }, { "epoch": 9.253155680224404, "grad_norm": 9.846620559692383, "learning_rate": 4.176406420445691e-06, "loss": 1.3571, "step": 26390 }, { "epoch": 9.258064516129032, "grad_norm": 10.662572860717773, "learning_rate": 4.1491351098644225e-06, "loss": 1.3622, "step": 26404 }, { "epoch": 9.262973352033661, "grad_norm": 10.016236305236816, "learning_rate": 4.121863799283155e-06, "loss": 1.3763, "step": 26418 }, { "epoch": 9.267882187938289, "grad_norm": 10.253090858459473, "learning_rate": 4.094592488701886e-06, "loss": 1.4029, "step": 26432 }, { "epoch": 9.272791023842917, "grad_norm": 10.527180671691895, "learning_rate": 4.067321178120617e-06, "loss": 1.3093, "step": 26446 }, { "epoch": 9.277699859747546, "grad_norm": 9.686782836914062, "learning_rate": 4.040049867539348e-06, "loss": 1.4018, "step": 26460 }, { "epoch": 9.282608695652174, "grad_norm": 10.974291801452637, "learning_rate": 4.01277855695808e-06, "loss": 1.2754, "step": 26474 }, { "epoch": 9.287517531556801, "grad_norm": 10.735367774963379, "learning_rate": 3.9855072463768115e-06, "loss": 1.3705, "step": 26488 }, { "epoch": 9.292426367461431, "grad_norm": 8.33791446685791, "learning_rate": 3.958235935795544e-06, "loss": 1.2366, "step": 26502 }, { "epoch": 9.297335203366059, "grad_norm": 11.580737113952637, "learning_rate": 3.930964625214275e-06, "loss": 1.3817, "step": 26516 }, { "epoch": 9.302244039270688, "grad_norm": 8.892800331115723, "learning_rate": 3.903693314633006e-06, "loss": 1.3118, "step": 26530 }, { "epoch": 9.307152875175316, "grad_norm": 8.501441955566406, "learning_rate": 3.876422004051738e-06, "loss": 1.3247, "step": 26544 }, { "epoch": 9.312061711079943, "grad_norm": 10.974807739257812, "learning_rate": 3.849150693470469e-06, "loss": 1.3562, "step": 26558 }, { "epoch": 9.316970546984573, "grad_norm": 10.667025566101074, "learning_rate": 3.8218793828892005e-06, "loss": 1.4051, "step": 26572 }, { "epoch": 9.3218793828892, "grad_norm": 8.310371398925781, "learning_rate": 3.794608072307932e-06, "loss": 1.3715, "step": 26586 }, { "epoch": 9.32678821879383, "grad_norm": 11.424276351928711, "learning_rate": 3.7673367617266634e-06, "loss": 1.3311, "step": 26600 }, { "epoch": 9.331697054698457, "grad_norm": 13.911672592163086, "learning_rate": 3.740065451145395e-06, "loss": 1.5097, "step": 26614 }, { "epoch": 9.336605890603085, "grad_norm": 11.588387489318848, "learning_rate": 3.712794140564127e-06, "loss": 1.4067, "step": 26628 }, { "epoch": 9.341514726507715, "grad_norm": 9.034385681152344, "learning_rate": 3.6855228299828583e-06, "loss": 1.396, "step": 26642 }, { "epoch": 9.346423562412342, "grad_norm": 11.057976722717285, "learning_rate": 3.65825151940159e-06, "loss": 1.3958, "step": 26656 }, { "epoch": 9.35133239831697, "grad_norm": 13.324390411376953, "learning_rate": 3.630980208820321e-06, "loss": 1.3932, "step": 26670 }, { "epoch": 9.3562412342216, "grad_norm": 12.691455841064453, "learning_rate": 3.603708898239053e-06, "loss": 1.2514, "step": 26684 }, { "epoch": 9.361150070126227, "grad_norm": 12.058928489685059, "learning_rate": 3.576437587657784e-06, "loss": 1.3983, "step": 26698 }, { "epoch": 9.366058906030856, "grad_norm": 9.322803497314453, "learning_rate": 3.5491662770765157e-06, "loss": 1.3071, "step": 26712 }, { "epoch": 9.370967741935484, "grad_norm": 9.060636520385742, "learning_rate": 3.521894966495247e-06, "loss": 1.3538, "step": 26726 }, { "epoch": 9.375876577840112, "grad_norm": 7.651117324829102, "learning_rate": 3.4946236559139785e-06, "loss": 1.3004, "step": 26740 }, { "epoch": 9.380785413744741, "grad_norm": 8.591988563537598, "learning_rate": 3.4673523453327106e-06, "loss": 1.3118, "step": 26754 }, { "epoch": 9.385694249649369, "grad_norm": 14.677626609802246, "learning_rate": 3.440081034751442e-06, "loss": 1.2945, "step": 26768 }, { "epoch": 9.390603085553996, "grad_norm": 10.315505981445312, "learning_rate": 3.4128097241701734e-06, "loss": 1.2921, "step": 26782 }, { "epoch": 9.395511921458626, "grad_norm": 11.082088470458984, "learning_rate": 3.3855384135889047e-06, "loss": 1.38, "step": 26796 }, { "epoch": 9.400420757363253, "grad_norm": 9.208745956420898, "learning_rate": 3.3582671030076363e-06, "loss": 1.2814, "step": 26810 }, { "epoch": 9.405329593267883, "grad_norm": 7.205362319946289, "learning_rate": 3.3309957924263675e-06, "loss": 1.2941, "step": 26824 }, { "epoch": 9.41023842917251, "grad_norm": 13.11959171295166, "learning_rate": 3.3037244818450987e-06, "loss": 1.3331, "step": 26838 }, { "epoch": 9.415147265077138, "grad_norm": 9.20059871673584, "learning_rate": 3.2764531712638304e-06, "loss": 1.3898, "step": 26852 }, { "epoch": 9.420056100981768, "grad_norm": 10.801356315612793, "learning_rate": 3.2491818606825616e-06, "loss": 1.3863, "step": 26866 }, { "epoch": 9.424964936886395, "grad_norm": 10.054000854492188, "learning_rate": 3.221910550101294e-06, "loss": 1.4651, "step": 26880 }, { "epoch": 9.429873772791025, "grad_norm": 9.03390121459961, "learning_rate": 3.1946392395200253e-06, "loss": 1.4162, "step": 26894 }, { "epoch": 9.434782608695652, "grad_norm": 9.508460998535156, "learning_rate": 3.1673679289387565e-06, "loss": 1.3406, "step": 26908 }, { "epoch": 9.43969144460028, "grad_norm": 11.818727493286133, "learning_rate": 3.140096618357488e-06, "loss": 1.3734, "step": 26922 }, { "epoch": 9.44460028050491, "grad_norm": 11.640634536743164, "learning_rate": 3.1128253077762194e-06, "loss": 1.2866, "step": 26936 }, { "epoch": 9.449509116409537, "grad_norm": 13.635339736938477, "learning_rate": 3.085553997194951e-06, "loss": 1.3628, "step": 26950 }, { "epoch": 9.454417952314165, "grad_norm": 10.332751274108887, "learning_rate": 3.0582826866136822e-06, "loss": 1.3879, "step": 26964 }, { "epoch": 9.459326788218794, "grad_norm": 9.06944751739502, "learning_rate": 3.0310113760324143e-06, "loss": 1.3762, "step": 26978 }, { "epoch": 9.464235624123422, "grad_norm": 11.053771018981934, "learning_rate": 3.0037400654511455e-06, "loss": 1.3141, "step": 26992 }, { "epoch": 9.469144460028051, "grad_norm": 7.256747245788574, "learning_rate": 2.976468754869877e-06, "loss": 1.4031, "step": 27006 }, { "epoch": 9.474053295932679, "grad_norm": 9.862464904785156, "learning_rate": 2.9491974442886084e-06, "loss": 1.3713, "step": 27020 }, { "epoch": 9.478962131837307, "grad_norm": 7.59253454208374, "learning_rate": 2.92192613370734e-06, "loss": 1.3119, "step": 27034 }, { "epoch": 9.483870967741936, "grad_norm": 10.79275131225586, "learning_rate": 2.8946548231260717e-06, "loss": 1.3544, "step": 27048 }, { "epoch": 9.488779803646564, "grad_norm": 11.223390579223633, "learning_rate": 2.867383512544803e-06, "loss": 1.3475, "step": 27062 }, { "epoch": 9.493688639551191, "grad_norm": 10.209470748901367, "learning_rate": 2.8401122019635345e-06, "loss": 1.3032, "step": 27076 }, { "epoch": 9.49859747545582, "grad_norm": 10.004685401916504, "learning_rate": 2.8128408913822657e-06, "loss": 1.2741, "step": 27090 }, { "epoch": 9.503506311360448, "grad_norm": 10.535722732543945, "learning_rate": 2.785569580800998e-06, "loss": 1.3756, "step": 27104 }, { "epoch": 9.508415147265078, "grad_norm": 10.4974365234375, "learning_rate": 2.758298270219729e-06, "loss": 1.2602, "step": 27118 }, { "epoch": 9.513323983169705, "grad_norm": 10.065242767333984, "learning_rate": 2.7310269596384602e-06, "loss": 1.2999, "step": 27132 }, { "epoch": 9.518232819074333, "grad_norm": 8.170348167419434, "learning_rate": 2.703755649057192e-06, "loss": 1.3865, "step": 27146 }, { "epoch": 9.523141654978962, "grad_norm": 12.240524291992188, "learning_rate": 2.6764843384759235e-06, "loss": 1.3537, "step": 27160 }, { "epoch": 9.52805049088359, "grad_norm": 10.04339599609375, "learning_rate": 2.649213027894655e-06, "loss": 1.3491, "step": 27174 }, { "epoch": 9.53295932678822, "grad_norm": 12.512887954711914, "learning_rate": 2.6219417173133864e-06, "loss": 1.4213, "step": 27188 }, { "epoch": 9.537868162692847, "grad_norm": 11.928011894226074, "learning_rate": 2.594670406732118e-06, "loss": 1.4685, "step": 27202 }, { "epoch": 9.542776998597475, "grad_norm": 7.220157146453857, "learning_rate": 2.5673990961508492e-06, "loss": 1.4133, "step": 27216 }, { "epoch": 9.547685834502104, "grad_norm": 10.976841926574707, "learning_rate": 2.540127785569581e-06, "loss": 1.3595, "step": 27230 }, { "epoch": 9.552594670406732, "grad_norm": 9.795488357543945, "learning_rate": 2.5128564749883125e-06, "loss": 1.3394, "step": 27244 }, { "epoch": 9.55750350631136, "grad_norm": 11.860605239868164, "learning_rate": 2.4855851644070437e-06, "loss": 1.3801, "step": 27258 }, { "epoch": 9.562412342215989, "grad_norm": 12.453754425048828, "learning_rate": 2.4583138538257754e-06, "loss": 1.4328, "step": 27272 }, { "epoch": 9.567321178120617, "grad_norm": 11.297079086303711, "learning_rate": 2.431042543244507e-06, "loss": 1.4182, "step": 27286 }, { "epoch": 9.572230014025246, "grad_norm": 8.1820707321167, "learning_rate": 2.4037712326632387e-06, "loss": 1.2739, "step": 27300 }, { "epoch": 9.577138849929874, "grad_norm": 9.244611740112305, "learning_rate": 2.37649992208197e-06, "loss": 1.3895, "step": 27314 }, { "epoch": 9.582047685834501, "grad_norm": 9.740119934082031, "learning_rate": 2.3492286115007015e-06, "loss": 1.3535, "step": 27328 }, { "epoch": 9.58695652173913, "grad_norm": 8.039715766906738, "learning_rate": 2.3219573009194327e-06, "loss": 1.404, "step": 27342 }, { "epoch": 9.591865357643758, "grad_norm": 13.259716987609863, "learning_rate": 2.2946859903381644e-06, "loss": 1.3313, "step": 27356 }, { "epoch": 9.596774193548388, "grad_norm": 7.262179374694824, "learning_rate": 2.267414679756896e-06, "loss": 1.284, "step": 27370 }, { "epoch": 9.601683029453016, "grad_norm": 7.339652061462402, "learning_rate": 2.2401433691756272e-06, "loss": 1.3414, "step": 27384 }, { "epoch": 9.606591865357643, "grad_norm": 10.79587459564209, "learning_rate": 2.212872058594359e-06, "loss": 1.347, "step": 27398 }, { "epoch": 9.611500701262273, "grad_norm": 8.039835929870605, "learning_rate": 2.1856007480130905e-06, "loss": 1.3327, "step": 27412 }, { "epoch": 9.6164095371669, "grad_norm": 9.625452995300293, "learning_rate": 2.1583294374318217e-06, "loss": 1.3336, "step": 27426 }, { "epoch": 9.621318373071528, "grad_norm": 8.077139854431152, "learning_rate": 2.1310581268505534e-06, "loss": 1.4126, "step": 27440 }, { "epoch": 9.626227208976157, "grad_norm": 11.333827018737793, "learning_rate": 2.1037868162692846e-06, "loss": 1.3699, "step": 27454 }, { "epoch": 9.631136044880785, "grad_norm": 9.918453216552734, "learning_rate": 2.0765155056880162e-06, "loss": 1.3683, "step": 27468 }, { "epoch": 9.636044880785414, "grad_norm": 10.507020950317383, "learning_rate": 2.049244195106748e-06, "loss": 1.291, "step": 27482 }, { "epoch": 9.640953716690042, "grad_norm": 12.763260841369629, "learning_rate": 2.0219728845254795e-06, "loss": 1.3627, "step": 27496 }, { "epoch": 9.64586255259467, "grad_norm": 9.558838844299316, "learning_rate": 1.9947015739442107e-06, "loss": 1.3074, "step": 27510 }, { "epoch": 9.6507713884993, "grad_norm": 8.112711906433105, "learning_rate": 1.9674302633629424e-06, "loss": 1.3833, "step": 27524 }, { "epoch": 9.655680224403927, "grad_norm": 8.497075080871582, "learning_rate": 1.9401589527816736e-06, "loss": 1.3593, "step": 27538 }, { "epoch": 9.660589060308556, "grad_norm": 11.119254112243652, "learning_rate": 1.9128876422004052e-06, "loss": 1.3622, "step": 27552 }, { "epoch": 9.665497896213184, "grad_norm": 10.378993034362793, "learning_rate": 1.8856163316191369e-06, "loss": 1.3434, "step": 27566 }, { "epoch": 9.670406732117812, "grad_norm": 9.672821044921875, "learning_rate": 1.858345021037868e-06, "loss": 1.3597, "step": 27580 }, { "epoch": 9.675315568022441, "grad_norm": 9.768972396850586, "learning_rate": 1.8310737104565995e-06, "loss": 1.2926, "step": 27594 }, { "epoch": 9.680224403927069, "grad_norm": 8.230093955993652, "learning_rate": 1.8038023998753314e-06, "loss": 1.3576, "step": 27608 }, { "epoch": 9.685133239831696, "grad_norm": 9.457991600036621, "learning_rate": 1.7765310892940628e-06, "loss": 1.4374, "step": 27622 }, { "epoch": 9.690042075736326, "grad_norm": 8.174189567565918, "learning_rate": 1.7492597787127942e-06, "loss": 1.4266, "step": 27636 }, { "epoch": 9.694950911640953, "grad_norm": 7.444729328155518, "learning_rate": 1.7219884681315257e-06, "loss": 1.4135, "step": 27650 }, { "epoch": 9.699859747545583, "grad_norm": 14.1568603515625, "learning_rate": 1.694717157550257e-06, "loss": 1.3406, "step": 27664 }, { "epoch": 9.70476858345021, "grad_norm": 6.845445156097412, "learning_rate": 1.6674458469689887e-06, "loss": 1.3099, "step": 27678 }, { "epoch": 9.709677419354838, "grad_norm": 12.063641548156738, "learning_rate": 1.6401745363877202e-06, "loss": 1.4106, "step": 27692 }, { "epoch": 9.714586255259468, "grad_norm": 10.44062614440918, "learning_rate": 1.6129032258064516e-06, "loss": 1.2979, "step": 27706 }, { "epoch": 9.719495091164095, "grad_norm": 8.306778907775879, "learning_rate": 1.585631915225183e-06, "loss": 1.3548, "step": 27720 }, { "epoch": 9.724403927068725, "grad_norm": 8.316340446472168, "learning_rate": 1.5583606046439147e-06, "loss": 1.3289, "step": 27734 }, { "epoch": 9.729312762973352, "grad_norm": 11.205327033996582, "learning_rate": 1.5310892940626463e-06, "loss": 1.3343, "step": 27748 }, { "epoch": 9.73422159887798, "grad_norm": 8.332571029663086, "learning_rate": 1.5038179834813777e-06, "loss": 1.3896, "step": 27762 }, { "epoch": 9.73913043478261, "grad_norm": 8.552364349365234, "learning_rate": 1.4765466729001092e-06, "loss": 1.3611, "step": 27776 }, { "epoch": 9.744039270687237, "grad_norm": 10.986411094665527, "learning_rate": 1.4492753623188406e-06, "loss": 1.3521, "step": 27790 }, { "epoch": 9.748948106591865, "grad_norm": 9.23542308807373, "learning_rate": 1.422004051737572e-06, "loss": 1.3293, "step": 27804 }, { "epoch": 9.753856942496494, "grad_norm": 8.762764930725098, "learning_rate": 1.3947327411563037e-06, "loss": 1.4671, "step": 27818 }, { "epoch": 9.758765778401122, "grad_norm": 9.477548599243164, "learning_rate": 1.367461430575035e-06, "loss": 1.3371, "step": 27832 }, { "epoch": 9.763674614305751, "grad_norm": 7.731424808502197, "learning_rate": 1.3401901199937667e-06, "loss": 1.3241, "step": 27846 }, { "epoch": 9.768583450210379, "grad_norm": 8.31930923461914, "learning_rate": 1.3129188094124982e-06, "loss": 1.4619, "step": 27860 }, { "epoch": 9.773492286115006, "grad_norm": 9.466615676879883, "learning_rate": 1.2856474988312296e-06, "loss": 1.4089, "step": 27874 }, { "epoch": 9.778401122019636, "grad_norm": 6.525511264801025, "learning_rate": 1.258376188249961e-06, "loss": 1.3554, "step": 27888 }, { "epoch": 9.783309957924264, "grad_norm": 8.53394889831543, "learning_rate": 1.2311048776686925e-06, "loss": 1.355, "step": 27902 }, { "epoch": 9.788218793828893, "grad_norm": 6.389432907104492, "learning_rate": 1.203833567087424e-06, "loss": 1.331, "step": 27916 }, { "epoch": 9.79312762973352, "grad_norm": 13.623087882995605, "learning_rate": 1.1765622565061555e-06, "loss": 1.4314, "step": 27930 }, { "epoch": 9.798036465638148, "grad_norm": 10.968790054321289, "learning_rate": 1.1492909459248872e-06, "loss": 1.3466, "step": 27944 }, { "epoch": 9.802945301542778, "grad_norm": 9.671365737915039, "learning_rate": 1.1220196353436186e-06, "loss": 1.3905, "step": 27958 }, { "epoch": 9.807854137447405, "grad_norm": 9.744417190551758, "learning_rate": 1.09474832476235e-06, "loss": 1.3932, "step": 27972 }, { "epoch": 9.812762973352033, "grad_norm": 11.195330619812012, "learning_rate": 1.0674770141810815e-06, "loss": 1.3842, "step": 27986 }, { "epoch": 9.817671809256662, "grad_norm": 11.541499137878418, "learning_rate": 1.0402057035998129e-06, "loss": 1.4194, "step": 28000 }, { "epoch": 9.82258064516129, "grad_norm": 8.26220989227295, "learning_rate": 1.0129343930185445e-06, "loss": 1.3797, "step": 28014 }, { "epoch": 9.82748948106592, "grad_norm": 12.407514572143555, "learning_rate": 9.85663082437276e-07, "loss": 1.3079, "step": 28028 }, { "epoch": 9.832398316970547, "grad_norm": 12.555651664733887, "learning_rate": 9.583917718560076e-07, "loss": 1.2648, "step": 28042 }, { "epoch": 9.837307152875175, "grad_norm": 9.400788307189941, "learning_rate": 9.31120461274739e-07, "loss": 1.3493, "step": 28056 }, { "epoch": 9.842215988779804, "grad_norm": 8.336677551269531, "learning_rate": 9.038491506934706e-07, "loss": 1.3455, "step": 28070 }, { "epoch": 9.847124824684432, "grad_norm": 7.823832988739014, "learning_rate": 8.76577840112202e-07, "loss": 1.3602, "step": 28084 }, { "epoch": 9.85203366058906, "grad_norm": 10.257705688476562, "learning_rate": 8.493065295309335e-07, "loss": 1.404, "step": 28098 }, { "epoch": 9.856942496493689, "grad_norm": 7.930234909057617, "learning_rate": 8.22035218949665e-07, "loss": 1.3264, "step": 28112 }, { "epoch": 9.861851332398317, "grad_norm": 9.146740913391113, "learning_rate": 7.947639083683964e-07, "loss": 1.3698, "step": 28126 }, { "epoch": 9.866760168302946, "grad_norm": 24.07539939880371, "learning_rate": 7.67492597787128e-07, "loss": 1.4616, "step": 28140 }, { "epoch": 9.871669004207574, "grad_norm": 10.381732940673828, "learning_rate": 7.402212872058594e-07, "loss": 1.346, "step": 28154 }, { "epoch": 9.876577840112201, "grad_norm": 9.001080513000488, "learning_rate": 7.12949976624591e-07, "loss": 1.3106, "step": 28168 }, { "epoch": 9.88148667601683, "grad_norm": 12.959094047546387, "learning_rate": 6.856786660433224e-07, "loss": 1.3494, "step": 28182 }, { "epoch": 9.886395511921458, "grad_norm": 11.125319480895996, "learning_rate": 6.58407355462054e-07, "loss": 1.3331, "step": 28196 }, { "epoch": 9.891304347826086, "grad_norm": 10.199729919433594, "learning_rate": 6.311360448807854e-07, "loss": 1.3408, "step": 28210 }, { "epoch": 9.896213183730715, "grad_norm": 7.545327663421631, "learning_rate": 6.038647342995169e-07, "loss": 1.3205, "step": 28224 }, { "epoch": 9.901122019635343, "grad_norm": 8.735750198364258, "learning_rate": 5.765934237182484e-07, "loss": 1.379, "step": 28238 }, { "epoch": 9.906030855539973, "grad_norm": 10.330979347229004, "learning_rate": 5.4932211313698e-07, "loss": 1.3285, "step": 28252 }, { "epoch": 9.9109396914446, "grad_norm": 9.538982391357422, "learning_rate": 5.220508025557114e-07, "loss": 1.3298, "step": 28266 }, { "epoch": 9.915848527349228, "grad_norm": 8.921728134155273, "learning_rate": 4.94779491974443e-07, "loss": 1.3713, "step": 28280 }, { "epoch": 9.920757363253857, "grad_norm": 9.986502647399902, "learning_rate": 4.675081813931744e-07, "loss": 1.3586, "step": 28294 }, { "epoch": 9.925666199158485, "grad_norm": 8.497357368469238, "learning_rate": 4.4023687081190586e-07, "loss": 1.3428, "step": 28308 }, { "epoch": 9.930575035063114, "grad_norm": 9.78085994720459, "learning_rate": 4.1296556023063734e-07, "loss": 1.3797, "step": 28322 }, { "epoch": 9.935483870967742, "grad_norm": 7.941118240356445, "learning_rate": 3.856942496493689e-07, "loss": 1.4241, "step": 28336 }, { "epoch": 9.94039270687237, "grad_norm": 14.489609718322754, "learning_rate": 3.5842293906810036e-07, "loss": 1.3237, "step": 28350 }, { "epoch": 9.945301542776999, "grad_norm": 10.404537200927734, "learning_rate": 3.311516284868319e-07, "loss": 1.3315, "step": 28364 }, { "epoch": 9.950210378681627, "grad_norm": 7.971680164337158, "learning_rate": 3.038803179055634e-07, "loss": 1.3533, "step": 28378 }, { "epoch": 9.955119214586254, "grad_norm": 12.491605758666992, "learning_rate": 2.7660900732429486e-07, "loss": 1.2717, "step": 28392 }, { "epoch": 9.960028050490884, "grad_norm": 10.607575416564941, "learning_rate": 2.4933769674302634e-07, "loss": 1.3319, "step": 28406 }, { "epoch": 9.964936886395511, "grad_norm": 10.18859577178955, "learning_rate": 2.2206638616175785e-07, "loss": 1.3406, "step": 28420 }, { "epoch": 9.96984572230014, "grad_norm": 8.52375602722168, "learning_rate": 1.9479507558048933e-07, "loss": 1.3615, "step": 28434 }, { "epoch": 9.974754558204769, "grad_norm": 12.264177322387695, "learning_rate": 1.6752376499922084e-07, "loss": 1.3333, "step": 28448 }, { "epoch": 9.979663394109396, "grad_norm": 10.572373390197754, "learning_rate": 1.4025245441795232e-07, "loss": 1.316, "step": 28462 }, { "epoch": 9.984572230014026, "grad_norm": 8.828007698059082, "learning_rate": 1.1298114383668382e-07, "loss": 1.296, "step": 28476 }, { "epoch": 9.989481065918653, "grad_norm": 12.068406105041504, "learning_rate": 8.57098332554153e-08, "loss": 1.3528, "step": 28490 }, { "epoch": 9.994389901823283, "grad_norm": 8.637850761413574, "learning_rate": 5.84385226741468e-08, "loss": 1.2954, "step": 28504 }, { "epoch": 9.99929873772791, "grad_norm": 7.776968955993652, "learning_rate": 3.116721209287829e-08, "loss": 1.3251, "step": 28518 }, { "epoch": 10.0, "eval_loss": 1.3179453611373901, "eval_map": 0.1361, "eval_map_50": 0.1892, "eval_map_75": 0.1548, "eval_map_applique": 0.0003, "eval_map_bag, wallet": 0.1255, "eval_map_bead": 0.0238, "eval_map_belt": 0.146, "eval_map_bow": 0.0, "eval_map_buckle": 0.1536, "eval_map_cape": 0.0, "eval_map_cardigan": 0.0, "eval_map_coat": 0.1801, "eval_map_collar": 0.2273, "eval_map_dress": 0.4442, "eval_map_epaulette": 0.0387, "eval_map_flower": 0.0, "eval_map_fringe": 0.0, "eval_map_glasses": 0.2364, "eval_map_glove": 0.0646, "eval_map_hat": 0.2112, "eval_map_headband, head covering, hair accessory": 0.1072, "eval_map_hood": 0.0706, "eval_map_jacket": 0.2999, "eval_map_jumpsuit": 0.0189, "eval_map_lapel": 0.1601, "eval_map_large": 0.1367, "eval_map_leg warmer": 0.0, "eval_map_medium": 0.102, "eval_map_neckline": 0.3227, "eval_map_pants": 0.4488, "eval_map_pocket": 0.1201, "eval_map_ribbon": 0.0, "eval_map_rivet": 0.0334, "eval_map_ruffle": 0.0539, "eval_map_scarf": 0.0253, "eval_map_sequin": 0.0, "eval_map_shirt, blouse": 0.0963, "eval_map_shoe": 0.4638, "eval_map_shorts": 0.2561, "eval_map_skirt": 0.304, "eval_map_sleeve": 0.3655, "eval_map_small": 0.0, "eval_map_sock": 0.0827, "eval_map_sweater": 0.0033, "eval_map_tassel": 0.0, "eval_map_tie": 0.3468, "eval_map_tights, stockings": 0.227, "eval_map_top, t-shirt, sweatshirt": 0.2145, "eval_map_umbrella": 0.2418, "eval_map_vest": 0.0, "eval_map_watch": 0.1045, "eval_map_zipper": 0.0412, "eval_mar_1": 0.2076, "eval_mar_10": 0.4071, "eval_mar_100": 0.4151, "eval_mar_100_applique": 0.0311, "eval_mar_100_bag, wallet": 0.5798, "eval_mar_100_bead": 0.3327, "eval_mar_100_belt": 0.6433, "eval_mar_100_bow": 0.0, "eval_mar_100_buckle": 0.497, "eval_mar_100_cape": 0.0, "eval_mar_100_cardigan": 0.0, "eval_mar_100_coat": 0.6029, "eval_mar_100_collar": 0.6475, "eval_mar_100_dress": 0.8372, "eval_mar_100_epaulette": 0.5286, "eval_mar_100_flower": 0.0, "eval_mar_100_fringe": 0.0, "eval_mar_100_glasses": 0.7395, "eval_mar_100_glove": 0.229, "eval_mar_100_hat": 0.6137, "eval_mar_100_headband, head covering, hair accessory": 0.5193, "eval_mar_100_hood": 0.2031, "eval_mar_100_jacket": 0.733, "eval_mar_100_jumpsuit": 0.1429, "eval_mar_100_lapel": 0.577, "eval_mar_100_leg warmer": 0.0, "eval_mar_100_neckline": 0.7834, "eval_mar_100_pants": 0.8213, "eval_mar_100_pocket": 0.7135, "eval_mar_100_ribbon": 0.0, "eval_mar_100_rivet": 0.21, "eval_mar_100_ruffle": 0.3474, "eval_mar_100_scarf": 0.0938, "eval_mar_100_sequin": 0.0, "eval_mar_100_shirt, blouse": 0.4337, "eval_mar_100_shoe": 0.807, "eval_mar_100_shorts": 0.6708, "eval_mar_100_skirt": 0.784, "eval_mar_100_sleeve": 0.7741, "eval_mar_100_sock": 0.6518, "eval_mar_100_sweater": 0.0381, "eval_mar_100_tassel": 0.0, "eval_mar_100_tie": 0.6667, "eval_mar_100_tights, stockings": 0.7787, "eval_mar_100_top, t-shirt, sweatshirt": 0.7543, "eval_mar_100_umbrella": 0.42, "eval_mar_100_vest": 0.0, "eval_mar_100_watch": 0.5313, "eval_mar_100_zipper": 0.3582, "eval_mar_large": 0.4179, "eval_mar_medium": 0.2304, "eval_mar_small": 0.0, "eval_runtime": 81.3376, "eval_samples_per_second": 14.237, "eval_steps_per_second": 0.455, "step": 28520 } ], "logging_steps": 14, "max_steps": 28520, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.9157430966890398e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }