Mugs / pretrained models /vit_base_400ep /fine_tuning_log.txt
zhoupans's picture
Upload 6 files
79e4603 verified
{"train_lr": 2.997721731484072e-05, "train_min_lr": 1.2632993704056536e-08, "train_loss": 5.520860667041928, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.7876333700571891, "test_loss": 1.2911619528043317, "test_acc1": 74.72200232666016, "test_acc5": 93.24800252929687, "epoch": 0, "n_parameters": 86567656}
{"train_lr": 8.997961549222588e-05, "train_min_lr": 3.791919390209611e-08, "train_loss": 3.9236352715513214, "train_loss_scale": 72241.52198241407, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9932003387774544, "test_acc1": 78.1120025, "test_acc5": 94.77000260742187, "epoch": 1, "n_parameters": 86567656}
{"train_lr": 0.0001499820136696111, "train_min_lr": 6.320539410013567e-08, "train_loss": 3.725630641507683, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.2544760774556014, "test_loss": 0.9443287001601581, "test_acc1": 79.17000241699219, "test_acc5": 95.18600247070313, "epoch": 2, "n_parameters": 86567656}
{"train_lr": 0.00020998441184699623, "train_min_lr": 8.849159429817523e-08, "train_loss": 3.6402467927224724, "train_loss_scale": 41673.77138289368, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8953651087608374, "test_acc1": 79.84800232910156, "test_acc5": 95.39600252929688, "epoch": 3, "n_parameters": 86567656}
{"train_lr": 0.0002699868100243815, "train_min_lr": 1.1377779449621479e-07, "train_loss": 3.5708488427477776, "train_loss_scale": 37011.338129496406, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.429488264590049, "test_loss": 0.8919022682754473, "test_acc1": 80.18200263183594, "test_acc5": 95.58400259765625, "epoch": 4, "n_parameters": 86567656}
{"train_lr": 0.0003299892082017667, "train_min_lr": 1.3906399469425437e-07, "train_loss": 3.5118252102205223, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.479309021235465, "test_loss": 0.8736933603939883, "test_acc1": 80.3000026171875, "test_acc5": 95.69600282226563, "epoch": 5, "n_parameters": 86567656}
{"train_lr": 0.00038999160637915177, "train_min_lr": 1.6435019489229392e-07, "train_loss": 3.4766570559079697, "train_loss_scale": 71770.03996802558, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8609478334357455, "test_acc1": 80.7040026953125, "test_acc5": 95.80000268554687, "epoch": 6, "n_parameters": 86567656}
{"train_lr": 0.0004499940045565371, "train_min_lr": 1.896363950903335e-07, "train_loss": 3.422856244025566, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.5213533583686027, "test_loss": 0.8425315629933529, "test_acc1": 80.59800272460937, "test_acc5": 95.8560024609375, "epoch": 7, "n_parameters": 86567656}
{"train_lr": 0.0005099964027339221, "train_min_lr": 2.1492259528837304e-07, "train_loss": 3.402624214784228, "train_loss_scale": 71298.5579536371, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8391822452061021, "test_acc1": 80.79000236328125, "test_acc5": 95.82000240234375, "epoch": 8, "n_parameters": 86567656}
{"train_lr": 0.0005699988009113075, "train_min_lr": 2.4020879548641256e-07, "train_loss": 3.388823616895363, "train_loss_scale": 66950.44604316547, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8245565242694255, "test_acc1": 80.78800240234375, "test_acc5": 95.90200271484375, "epoch": 9, "n_parameters": 86567656}
{"train_lr": 0.0006300011990886926, "train_min_lr": 2.6549499568445216e-07, "train_loss": 3.357212643114497, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.6284468315011686, "test_loss": 0.8330145920716027, "test_acc1": 81.01000258300782, "test_acc5": 95.97000260742188, "epoch": 10, "n_parameters": 86567656}
{"train_lr": 0.000690003597266078, "train_min_lr": 2.9078119588249177e-07, "train_loss": 3.3222429960084665, "train_loss_scale": 67841.02318145483, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.818669908233986, "test_acc1": 81.01600230957031, "test_acc5": 95.91600233398438, "epoch": 11, "n_parameters": 86567656}
{"train_lr": 0.0007500059954434631, "train_min_lr": 3.1606739608053116e-07, "train_loss": 3.3176285028219414, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.7103671267164127, "test_loss": 0.8214341971266772, "test_acc1": 81.164002421875, "test_acc5": 96.03000223632813, "epoch": 12, "n_parameters": 86567656}
{"train_lr": 0.0008100083936208481, "train_min_lr": 3.413535962785711e-07, "train_loss": 3.281946166384992, "train_loss_scale": 62418.98001598721, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8092124291420897, "test_acc1": 81.20400237304688, "test_acc5": 96.07400286132813, "epoch": 13, "n_parameters": 86567656}
{"train_lr": 0.0008700107917982333, "train_min_lr": 3.666397964766104e-07, "train_loss": 3.276398069590783, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.7573651366001317, "test_loss": 0.8288006947529271, "test_acc1": 81.20200268554687, "test_acc5": 96.07200264648438, "epoch": 14, "n_parameters": 86567656}
{"train_lr": 0.0009300131899756183, "train_min_lr": 3.9192599667464996e-07, "train_loss": 3.258756810228983, "train_loss_scale": 51915.408473221425, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.787756677821195, "test_loss": 0.7999338651525563, "test_acc1": 81.35400252929688, "test_acc5": 96.10000237304688, "epoch": 15, "n_parameters": 86567656}
{"train_lr": 0.0009900155881530036, "train_min_lr": 4.172121968726895e-07, "train_loss": 3.2439505576753884, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.823040919909946, "test_loss": 0.8009963551129418, "test_acc1": 81.35400240722656, "test_acc5": 96.06000271484375, "epoch": 16, "n_parameters": 86567656}
{"train_lr": 0.001050017986330389, "train_min_lr": 4.4249839707072927e-07, "train_loss": 3.240008914618374, "train_loss_scale": 66898.05915267786, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8127847778043528, "test_acc1": 81.38600256835937, "test_acc5": 96.114002578125, "epoch": 17, "n_parameters": 86567656}
{"train_lr": 0.001110020384507774, "train_min_lr": 4.677845972687686e-07, "train_loss": 3.225492820155611, "train_loss_scale": 67212.38049560352, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8199993256065581, "test_acc1": 81.41000250488281, "test_acc5": 96.12400251953125, "epoch": 18, "n_parameters": 86567656}
{"train_lr": 0.0011700227826851595, "train_min_lr": 4.930707974668082e-07, "train_loss": 3.215334701464235, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 3.9085217025354324, "test_loss": 0.796583924371164, "test_acc1": 81.67000245117187, "test_acc5": 96.1540025390625, "epoch": 19, "n_parameters": 86567656}
{"train_lr": 0.001199846112502229, "train_min_lr": 5.056389399283351e-07, "train_loss": 3.2008181128332276, "train_loss_scale": 65850.32134292566, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7928336741709617, "test_acc1": 81.46600270507813, "test_acc5": 96.27200250976563, "epoch": 20, "n_parameters": 86567656}
{"train_lr": 0.0011989223336968786, "train_min_lr": 5.052496412249431e-07, "train_loss": 3.1840649868944566, "train_loss_scale": 71350.9448441247, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7827162116309235, "test_acc1": 81.6800024951172, "test_acc5": 96.23800234375, "epoch": 21, "n_parameters": 86567656}
{"train_lr": 0.0011970758310274807, "train_min_lr": 5.04471488391341e-07, "train_loss": 3.1690280729298776, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.004816926259408, "test_loss": 0.7787450542851883, "test_acc1": 81.74000225097656, "test_acc5": 96.20800274414063, "epoch": 22, "n_parameters": 86567656}
{"train_lr": 0.0011943094516673142, "train_min_lr": 5.033056812827984e-07, "train_loss": 3.159724979794664, "train_loss_scale": 66059.8689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7740526974087018, "test_acc1": 81.90200269042968, "test_acc5": 96.262002578125, "epoch": 23, "n_parameters": 86567656}
{"train_lr": 0.0011906274611728472, "train_min_lr": 5.017540174893758e-07, "train_loss": 3.1535610288119527, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.036875244524839, "test_loss": 0.7908680991651454, "test_acc1": 81.86000249023438, "test_acc5": 96.24200283203125, "epoch": 24, "n_parameters": 86567656}
{"train_lr": 0.0011860355369065422, "train_min_lr": 4.998188895641762e-07, "train_loss": 3.1362370051544826, "train_loss_scale": 66898.05915267786, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.792016631798726, "test_acc1": 82.12200241210938, "test_acc5": 96.23200262695312, "epoch": 25, "n_parameters": 86567656}
{"train_lr": 0.0011805407592827904, "train_min_lr": 4.975032813341991e-07, "train_loss": 3.1249033724137254, "train_loss_scale": 66112.25579536372, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7777336133051649, "test_acc1": 82.27600250488281, "test_acc5": 96.32800279296875, "epoch": 26, "n_parameters": 86567656}
{"train_lr": 0.001174151600850419, "train_min_lr": 4.948107632994973e-07, "train_loss": 3.1114193041452305, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.097763954020804, "test_loss": 0.7679208606709922, "test_acc1": 82.16800248046874, "test_acc5": 96.3540027734375, "epoch": 27, "n_parameters": 86567656}
{"train_lr": 0.0011668779132286333, "train_min_lr": 4.91745487127723e-07, "train_loss": 3.1032731526975725, "train_loss_scale": 67998.18385291766, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7699071806052635, "test_acc1": 82.24800230957031, "test_acc5": 96.35200282226562, "epoch": 28, "n_parameters": 86567656}
{"train_lr": 0.0011587309119165377, "train_min_lr": 4.883121792525556e-07, "train_loss": 3.092605069077177, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.139214401717761, "test_loss": 0.7845067852872541, "test_acc1": 82.2200025390625, "test_acc5": 96.33800263671876, "epoch": 29, "n_parameters": 86567656}
{"train_lr": 0.0011497231589996488, "train_min_lr": 4.845161335858885e-07, "train_loss": 3.074865544728524, "train_loss_scale": 66636.1247002398, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.75524768427414, "test_acc1": 82.42600250976562, "test_acc5": 96.42000235351563, "epoch": 30, "n_parameters": 86567656}
{"train_lr": 0.0011398685437800748, "train_min_lr": 4.803632033549976e-07, "train_loss": 3.07888973739555, "train_loss_scale": 65797.93445243804, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.7587476886551956, "test_acc1": 82.42600234375, "test_acc5": 96.44600251953125, "epoch": 31, "n_parameters": 86567656}
{"train_lr": 0.0011291822613602026, "train_min_lr": 4.758597920772873e-07, "train_loss": 3.0678702676705987, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.152900679982443, "test_loss": 0.7668100874656918, "test_acc1": 82.6540025, "test_acc5": 96.48000250976563, "epoch": 32, "n_parameters": 86567656}
{"train_lr": 0.0011176807892129682, "train_min_lr": 4.710128436865345e-07, "train_loss": 3.0540791688134057, "train_loss_scale": 65850.32134292566, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7593972311851165, "test_acc1": 82.71800252441406, "test_acc5": 96.4380025390625, "epoch": 33, "n_parameters": 86567656}
{"train_lr": 0.0011053818617747795, "train_min_lr": 4.6582983182584453e-07, "train_loss": 3.0519031952682445, "train_loss_scale": 66478.96402877697, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7531241007805785, "test_acc1": 82.71800251464843, "test_acc5": 96.46800264648438, "epoch": 34, "n_parameters": 86567656}
{"train_lr": 0.0010923044431002948, "train_min_lr": 4.6031874832383414e-07, "train_loss": 3.046807482350263, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.198336031606538, "test_loss": 0.7503690476961062, "test_acc1": 82.66600259765625, "test_acc5": 96.47400267578125, "epoch": 35, "n_parameters": 86567656}
{"train_lr": 0.0010784686976212541, "train_min_lr": 4.5448809087181056e-07, "train_loss": 3.037025463488176, "train_loss_scale": 40547.453237410075, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.763132609798077, "test_acc1": 82.70000251953125, "test_acc5": 96.54200280273437, "epoch": 36, "n_parameters": 86567656}
{"train_lr": 0.0010638959590543423, "train_min_lr": 4.483468499209532e-07, "train_loss": 3.0310163204547025, "train_loss_scale": 38137.656274980014, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.224732938716166, "test_loss": 0.7393661386322701, "test_acc1": 82.78000245605469, "test_acc5": 96.5560026953125, "epoch": 37, "n_parameters": 86567656}
{"train_lr": 0.001048608697506178, "train_min_lr": 4.4190449481967945e-07, "train_loss": 3.0250999666661094, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.243450759030837, "test_loss": 0.7476486272510441, "test_acc1": 82.60600237792968, "test_acc5": 96.58800276367188, "epoch": 38, "n_parameters": 86567656}
{"train_lr": 0.0010326304848260217, "train_min_lr": 4.351709592126058e-07, "train_loss": 3.0134136104088225, "train_loss_scale": 65588.38689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7421724304728124, "test_acc1": 82.65200239746093, "test_acc5": 96.57200255859375, "epoch": 39, "n_parameters": 86567656}
{"train_lr": 0.0010159859582597086, "train_min_lr": 4.28156625723582e-07, "train_loss": 3.007446444684939, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.264243228830022, "test_loss": 0.7419182843746354, "test_acc1": 82.96400233886719, "test_acc5": 96.57400279296876, "epoch": 40, "n_parameters": 86567656}
{"train_lr": 0.0009987007824607825, "train_min_lr": 4.208723099464383e-07, "train_loss": 3.010810269547595, "train_loss_scale": 66112.25579536372, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7403421497322134, "test_acc1": 83.05000249023438, "test_acc5": 96.59200270507813, "epoch": 41, "n_parameters": 86567656}
{"train_lr": 0.0009808016099174693, "train_min_lr": 4.1332924376812644e-07, "train_loss": 2.9900352902216114, "train_loss_scale": 65693.16067146283, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7503324944625869, "test_acc1": 82.78400237792968, "test_acc5": 96.630002734375, "epoch": 42, "n_parameters": 86567656}
{"train_lr": 0.000962316039856474, "train_min_lr": 4.0553905804996027e-07, "train_loss": 2.9880909409692626, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.288231012823103, "test_loss": 0.7383982079581739, "test_acc1": 82.93600242675781, "test_acc5": 96.60000286132812, "epoch": 43, "n_parameters": 86567656}
{"train_lr": 0.0009432725756869719, "train_min_lr": 3.975137646936729e-07, "train_loss": 2.9796482501127164, "train_loss_scale": 66007.48201438849, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7380693295677959, "test_acc1": 82.94600254882812, "test_acc5": 96.67400279296875, "epoch": 44, "n_parameters": 86567656}
{"train_lr": 0.0009237005810504102, "train_min_lr": 3.8926573811993493e-07, "train_loss": 2.9724457585196986, "train_loss_scale": 65955.09512390089, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7309757927254242, "test_acc1": 83.23800255859375, "test_acc5": 96.64800267578126, "epoch": 45, "n_parameters": 86567656}
{"train_lr": 0.0009036302345439133, "train_min_lr": 3.8080769618789577e-07, "train_loss": 2.975195857868206, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.320639576366861, "test_loss": 0.7446174885349712, "test_acc1": 83.0080026123047, "test_acc5": 96.6480025390625, "epoch": 46, "n_parameters": 86567656}
{"train_lr": 0.000883092483187069, "train_min_lr": 3.7215268058516126e-07, "train_loss": 2.9650250939752083, "train_loss_scale": 65850.32134292566, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.738021041127457, "test_acc1": 83.17000241699219, "test_acc5": 96.672002734375, "epoch": 47, "n_parameters": 86567656}
{"train_lr": 0.0008621189947038636, "train_min_lr": 3.6331403671846427e-07, "train_loss": 2.95888396148249, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.324197247541018, "test_loss": 0.7405206413789728, "test_acc1": 83.188002578125, "test_acc5": 96.70000259765625, "epoch": 48, "n_parameters": 86567656}
{"train_lr": 0.0008407421086933434, "train_min_lr": 3.543053931360085e-07, "train_loss": 2.9525925789734155, "train_loss_scale": 38242.430055955236, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.727422605015309, "test_acc1": 83.35200257324219, "test_acc5": 96.65000270507812, "epoch": 49, "n_parameters": 86567656}
{"train_lr": 0.0008189947867643157, "train_min_lr": 3.451406405132407e-07, "train_loss": 2.9467900004937686, "train_loss_scale": 40652.22701838529, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.330086668046544, "test_loss": 0.7281826938934253, "test_acc1": 83.16400224121094, "test_acc5": 96.6680025390625, "epoch": 50, "n_parameters": 86567656}
{"train_lr": 0.0007969105617109163, "train_min_lr": 3.358339102344285e-07, "train_loss": 2.934308975696754, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.321183702642683, "test_loss": 0.7255150868746513, "test_acc1": 83.20800248046875, "test_acc5": 96.66600247070312, "epoch": 51, "n_parameters": 86567656}
{"train_lr": 0.0007745234858074802, "train_min_lr": 3.263995526030967e-07, "train_loss": 2.929205379659514, "train_loss_scale": 51967.79536370903, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7246349909524809, "test_acc1": 83.34400239746094, "test_acc5": 96.68200251953125, "epoch": 52, "n_parameters": 86567656}
{"train_lr": 0.0007518680783024018, "train_min_lr": 3.1685211471489785e-07, "train_loss": 2.917676522803726, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.364273701735633, "test_loss": 0.7191879193216448, "test_acc1": 83.37400263671876, "test_acc5": 96.622002734375, "epoch": 53, "n_parameters": 86567656}
{"train_lr": 0.0007289792721919573, "train_min_lr": 3.0720631802704263e-07, "train_loss": 2.9227888960179857, "train_loss_scale": 59904.409272581936, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.351603170855345, "test_loss": 0.7313475701315649, "test_acc1": 83.26200262207031, "test_acc5": 96.68000265625, "epoch": 54, "n_parameters": 86567656}
{"train_lr": 0.000705892360356159, "train_min_lr": 2.9747703565888335e-07, "train_loss": 2.921744898545275, "train_loss_scale": 66426.57713828937, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7358082024758803, "test_acc1": 83.16600234375, "test_acc5": 96.70800276367187, "epoch": 55, "n_parameters": 86567656}
{"train_lr": 0.000682642941139679, "train_min_lr": 2.87679269458639e-07, "train_loss": 2.920350502161004, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.370010533016458, "test_loss": 0.7242730563056881, "test_acc1": 83.42800243164062, "test_acc5": 96.67800274414063, "epoch": 56, "n_parameters": 86567656}
{"train_lr": 0.0006592668634618035, "train_min_lr": 2.7782812687163277e-07, "train_loss": 2.8959154472362507, "train_loss_scale": 65588.38689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7200340082818978, "test_acc1": 83.37400270019532, "test_acc5": 96.66400271484375, "epoch": 57, "n_parameters": 86567656}
{"train_lr": 0.0006358001715399762, "train_min_lr": 2.6793879764571024e-07, "train_loss": 2.904490867357174, "train_loss_scale": 65640.77378097521, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.726642900271434, "test_acc1": 83.43000255371093, "test_acc5": 96.66000247070312, "epoch": 58, "n_parameters": 86567656}
{"train_lr": 0.0006122790493122424, "train_min_lr": 2.5802653040974467e-07, "train_loss": 2.9036317672684704, "train_loss_scale": 54220.43165467626, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7142911152022094, "test_acc1": 83.45400243164063, "test_acc5": 96.75200271484375, "epoch": 59, "n_parameters": 86567656}
{"train_lr": 0.0005887397646442304, "train_min_lr": 2.4810660916135856e-07, "train_loss": 2.887482542130682, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.401691564648367, "test_loss": 0.7133666251240105, "test_acc1": 83.52400232910156, "test_acc5": 96.7200027734375, "epoch": 60, "n_parameters": 86567656}
{"train_lr": 0.0005652186134067388, "train_min_lr": 2.3819432970010637e-07, "train_loss": 2.8858746321295663, "train_loss_scale": 57232.67785771383, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.398287223873855, "test_loss": 0.7202667361589227, "test_acc1": 83.52200258300782, "test_acc5": 96.71800279296875, "epoch": 61, "n_parameters": 86567656}
{"train_lr": 0.000541751863510129, "train_min_lr": 2.2830497604246107e-07, "train_loss": 2.8804565207373134, "train_loss_scale": 48064.97202238209, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.7144578388139206, "test_acc1": 83.5660024609375, "test_acc5": 96.71600268554687, "epoch": 62, "n_parameters": 86567656}
{"train_lr": 0.0005183756989818348, "train_min_lr": 2.184537968549676e-07, "train_loss": 2.875593495049732, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.390706345522337, "test_loss": 0.7177405433864886, "test_acc1": 83.68600259765626, "test_acc5": 96.7460026171875, "epoch": 63, "n_parameters": 86567656}
{"train_lr": 0.0004951261641732231, "train_min_lr": 2.0865598194190603e-07, "train_loss": 2.8712423799945106, "train_loss_scale": 63388.13749000799, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.398436711751205, "test_loss": 0.7134516480667837, "test_acc1": 83.60600252929687, "test_acc5": 96.72400254882812, "epoch": 64, "n_parameters": 86567656}
{"train_lr": 0.00047203910818180867, "train_min_lr": 1.9892663882371233e-07, "train_loss": 2.8682457483667645, "train_loss_scale": 65797.93445243804, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.720002885865069, "test_acc1": 83.61800224121093, "test_acc5": 96.71600276367188, "epoch": 65, "n_parameters": 86567656}
{"train_lr": 0.000449150129574547, "train_min_lr": 1.8928076944227806e-07, "train_loss": 2.861393963892778, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.408258606394608, "test_loss": 0.7167302289570885, "test_acc1": 83.79400278320313, "test_acc5": 96.74600272460937, "epoch": 66, "n_parameters": 86567656}
{"train_lr": 0.00042649452149742334, "train_min_lr": 1.7973324702904246e-07, "train_loss": 2.8593696803569224, "train_loss_scale": 34968.24940047962, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7123029780113834, "test_acc1": 83.78200267089844, "test_acc5": 96.75000284179687, "epoch": 67, "n_parameters": 86567656}
{"train_lr": 0.00040410721725598286, "train_min_lr": 1.7029879317154918e-07, "train_loss": 2.8529019075022233, "train_loss_scale": 35151.60351718625, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7152690977086509, "test_acc1": 83.8200025, "test_acc5": 96.78600288085937, "epoch": 68, "n_parameters": 86567656}
{"train_lr": 0.00038202273645070005, "train_min_lr": 1.6099195511382242e-07, "train_loss": 2.8580098963088747, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.424673608357577, "test_loss": 0.7205284839046413, "test_acc1": 83.73000251464843, "test_acc5": 96.69800247070313, "epoch": 69, "n_parameters": 86567656}
{"train_lr": 0.00036027513175026284, "train_min_lr": 1.5182708332557547e-07, "train_loss": 2.8424704135726873, "train_loss_scale": 33082.32134292566, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.711243065095496, "test_acc1": 83.81400243164063, "test_acc5": 96.71600263671876, "epoch": 70, "n_parameters": 86567656}
{"train_lr": 0.0003388979363848201, "train_min_lr": 1.4281830937482196e-07, "train_loss": 2.8361254449275664, "train_loss_scale": 34549.15427657874, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.438496785007602, "test_loss": 0.7114368569234322, "test_acc1": 83.80000257324218, "test_acc5": 96.7940028125, "epoch": 71, "n_parameters": 86567656}
{"train_lr": 0.00031792411244017677, "train_min_lr": 1.3397952413802485e-07, "train_loss": 2.8401458026241246, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.429834205946095, "test_loss": 0.718437923154155, "test_acc1": 83.7720026171875, "test_acc5": 96.73800274414063, "epoch": 72, "n_parameters": 86567656}
{"train_lr": 0.0002973860000326411, "train_min_lr": 1.2532435638137122e-07, "train_loss": 2.8237763777387133, "train_loss_scale": 65745.54756195044, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7083346051845514, "test_acc1": 83.87200264648438, "test_acc5": 96.80400306640625, "epoch": 73, "n_parameters": 86567656}
{"train_lr": 0.000277315267442915, "train_min_lr": 1.1686615174620377e-07, "train_loss": 2.8223798083101244, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.445520975416322, "test_loss": 0.7110328133078827, "test_acc1": 83.94800254882813, "test_acc5": 96.82200271484375, "epoch": 74, "n_parameters": 86567656}
{"train_lr": 0.00025774286228589475, "train_min_lr": 1.0861795217100597e-07, "train_loss": 2.8225074359100404, "train_loss_scale": 66374.19024780176, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7038041298987765, "test_acc1": 84.03000243652343, "test_acc5": 96.81000271484375, "epoch": 75, "n_parameters": 86567656}
{"train_lr": 0.0002386989637916971, "train_min_lr": 1.0059247578168191e-07, "train_loss": 2.8161609079912124, "train_loss_scale": 33710.96402877698, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.706734044570119, "test_acc1": 83.93000270507812, "test_acc5": 96.79600279296875, "epoch": 76, "n_parameters": 86567656}
{"train_lr": 0.0002202129362714707, "train_min_lr": 9.280209728112546e-08, "train_loss": 2.8213605545312284, "train_loss_scale": 44974.14548361311, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.449057151945375, "test_loss": 0.7088592466951786, "test_acc1": 83.97800252441407, "test_acc5": 96.8740028125, "epoch": 77, "n_parameters": 86567656}
{"train_lr": 0.0002023132838397564, "train_min_lr": 8.525882886832677e-08, "train_loss": 2.805161129918506, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.454313451628224, "test_loss": 0.7003062844733169, "test_acc1": 84.07800253417969, "test_acc5": 96.86400283203125, "epoch": 78, "n_parameters": 86567656}
{"train_lr": 0.00018502760646321278, "train_min_lr": 7.797430171643127e-08, "train_loss": 2.8085132227550975, "train_loss_scale": 32925.16067146283, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.70055901650268, "test_acc1": 84.04200233886719, "test_acc5": 96.83800252929687, "epoch": 79, "n_parameters": 86567656}
{"train_lr": 0.00016838255740346322, "train_min_lr": 7.095974803831439e-08, "train_loss": 2.7946216055362534, "train_loss_scale": 45759.94884092726, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.446751663724867, "test_loss": 0.7034925127052256, "test_acc1": 84.12800258300781, "test_acc5": 96.83400280273437, "epoch": 80, "n_parameters": 86567656}
{"train_lr": 0.0001524038021197004, "train_min_lr": 6.422598376732251e-08, "train_loss": 2.7903329889765747, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.421228220899232, "test_loss": 0.701983569225589, "test_acc1": 84.02200268066406, "test_acc5": 96.82600288085938, "epoch": 81, "n_parameters": 86567656}
{"train_lr": 0.00013711597869440866, "train_min_lr": 5.7783391879888294e-08, "train_loss": 2.7974041374943717, "train_loss_scale": 65588.38689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.700939668378154, "test_acc1": 84.05400269042968, "test_acc5": 96.80600260742187, "epoch": 82, "n_parameters": 86567656}
{"train_lr": 0.00012254265984322626, "train_min_lr": 5.164190638573445e-08, "train_loss": 2.7941539998201255, "train_loss_scale": 65797.93445243804, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7049672881648001, "test_acc1": 84.16600239746094, "test_acc5": 96.80600266601563, "epoch": 83, "n_parameters": 86567656}
{"train_lr": 0.00010870631656752672, "train_min_lr": 4.58109970103487e-08, "train_loss": 2.79092302806467, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.4221697806549685, "test_loss": 0.7020374086053892, "test_acc1": 84.06000257324219, "test_acc5": 96.8140028515625, "epoch": 84, "n_parameters": 86567656}
{"train_lr": 9.56282835057626e-05, "train_min_lr": 4.0299654593355386e-08, "train_loss": 2.7862211030354795, "train_loss_scale": 38504.364508393286, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.700157942500151, "test_acc1": 84.12400225097656, "test_acc5": 96.84600271484375, "epoch": 85, "n_parameters": 86567656}
{"train_lr": 8.332872603700069e-05, "train_min_lr": 3.511637722529139e-08, "train_loss": 2.7828092837838723, "train_loss_scale": 40180.7450039968, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.446651417002689, "test_loss": 0.7034655883051883, "test_acc1": 84.11000245117188, "test_acc5": 96.844002734375, "epoch": 86, "n_parameters": 86567656}
{"train_lr": 7.182660918736792e-05, "train_min_lr": 3.0269157144166766e-08, "train_loss": 2.7829707633319805, "train_loss_scale": 55818.23181454836, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7032621891676695, "test_acc1": 84.16200255371093, "test_acc5": 96.86000287109376, "epoch": 87, "n_parameters": 86567656}
{"train_lr": 6.11396683873568e-05, "train_min_lr": 2.5765468412013197e-08, "train_loss": 2.7807730604990497, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.459189962521255, "test_loss": 0.7004189751043174, "test_acc1": 84.22600258300781, "test_acc5": 96.858002890625, "epoch": 88, "n_parameters": 86567656}
{"train_lr": 5.128438212507544e-05, "train_min_lr": 2.1612255390421663e-08, "train_loss": 2.7785856380141514, "train_loss_scale": 49296.06394884093, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7015960227369805, "test_acc1": 84.18400243652344, "test_acc5": 96.86000275390624, "epoch": 89, "n_parameters": 86567656}
{"train_lr": 4.227594653761509e-05, "train_min_lr": 1.781592203284064e-08, "train_loss": 2.784802940907143, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.460121157643893, "test_loss": 0.7015866099417895, "test_acc1": 84.22600259765625, "test_acc5": 96.8420029296875, "epoch": 90, "n_parameters": 86567656}
{"train_lr": 3.4128251979707407e-05, "train_min_lr": 1.4382322010143828e-08, "train_loss": 2.784660592734766, "train_loss_scale": 39735.456434852116, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.6971373246889918, "test_acc1": 84.25400245605469, "test_acc5": 96.87600274414062, "epoch": 91, "n_parameters": 86567656}
{"train_lr": 2.685386160580364e-05, "train_min_lr": 1.1316749684693876e-08, "train_loss": 2.777699251719516, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.439904323298868, "test_loss": 0.7022319208273943, "test_acc1": 84.2380027294922, "test_acc5": 96.8740028515625, "epoch": 92, "n_parameters": 86567656}
{"train_lr": 2.0463991998602305e-05, "train_min_lr": 8.623931946819504e-09, "train_loss": 2.7784814881525643, "train_loss_scale": 58437.576338928855, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.446779675144467, "test_loss": 0.7001935119715686, "test_acc1": 84.28200243164062, "test_acc5": 96.8660028125, "epoch": 93, "n_parameters": 86567656}
{"train_lr": 1.4968495873890584e-05, "train_min_lr": 6.3080209262932594e-09, "train_loss": 2.7700077018482414, "train_loss_scale": 33160.90167865707, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.698743403528842, "test_acc1": 84.25200243164062, "test_acc5": 96.886002734375, "epoch": 94, "n_parameters": 86567656}
{"train_lr": 1.0375846888371264e-05, "train_min_lr": 4.3725875900481435e-09, "train_loss": 2.7759395932241215, "train_loss_scale": 45524.20783373302, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.436390553923438, "test_loss": 0.6990965720109099, "test_acc1": 84.22400258789062, "test_acc5": 96.85000291015625, "epoch": 95, "n_parameters": 86567656}
{"train_lr": 6.693126573898256e-06, "train_min_lr": 2.8206162360056763e-09, "train_loss": 2.774880387383304, "train_loss_scale": 62445.17346123102, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.6991106388783547, "test_acc1": 84.2340025341797, "test_acc5": 96.85600279296875, "epoch": 96, "n_parameters": 86567656}
{"train_lr": 3.926013418267524e-06, "train_min_lr": 1.6544998915046446e-09, "train_loss": 2.7747137634326324, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.450026090768316, "test_loss": 0.6995714533488869, "test_acc1": 84.23400247558594, "test_acc5": 96.85800271484375, "epoch": 97, "n_parameters": 86567656}
{"train_lr": 2.078774109400714e-06, "train_min_lr": 8.760366234264862e-10, "train_loss": 2.7724640819904427, "train_loss_scale": 49007.93605115907, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 4.414179214756552, "test_loss": 0.6990149175298626, "test_acc1": 84.24200247070313, "test_acc5": 96.850002734375, "epoch": 98, "n_parameters": 86567656}
{"train_lr": 1.1542569564215213e-06, "train_min_lr": 4.864267657064229e-10, "train_loss": 2.777094287528313, "train_loss_scale": 61083.11430855316, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.698827356956471, "test_acc1": 84.244002421875, "test_acc5": 96.85600276367188, "epoch": 99, "n_parameters": 86567656}