Mamba-ND / k400 /log.txt
Shufan Li
add k400 training
d0825b5
{"train_lr": 1.0223911293623287e-05, "train_min_lr": 2.4371587462957037e-10, "train_loss": 5.8170912236585135, "train_loss_scale": 220363.6824010914, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 6.741515036071632, "val_loss": 4.445647297262846, "val_acc1": 13.208439180772912, "val_acc5": 34.14147939970271, "epoch": 0, "n_parameters": 39992848}
{"train_lr": 3.0174455646811635e-05, "train_min_lr": 7.192935891394719e-10, "train_loss": 4.6733906379053405, "train_loss_scale": 2807050.826739427, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 7.748025844269527, "val_loss": 2.6085919576042547, "val_acc1": 40.806047192148355, "val_acc5": 69.67359623560677, "epoch": 1, "n_parameters": 39992848}
{"train_lr": 5.012499999999998e-05, "train_min_lr": 1.194871303649374e-09, "train_loss": 4.112347854017235, "train_loss_scale": 33493777.672032744, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 9.50233030345235, "val_loss": 1.9699647131256328, "val_acc1": 52.40344470333993, "val_acc5": 79.86985954649802, "epoch": 2, "n_parameters": 39992848}
{"train_lr": 7.007554435318836e-05, "train_min_lr": 1.6704490181592762e-09, "train_loss": 3.8728296238029474, "train_loss_scale": 449940671.3888131, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 10.076074038163377, "val_loss": 1.6927277547464488, "val_acc1": 58.35432656525965, "val_acc5": 84.36713047772271, "epoch": 3, "n_parameters": 39992848}
{"train_lr": 9.00260887063767e-05, "train_min_lr": 2.1460267326691787e-09, "train_loss": 3.739147011272985, "train_loss_scale": 5726867204.540246, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 10.486215209700726, "val_loss": 1.5609473092880297, "val_acc1": 61.880774988035114, "val_acc5": 85.95718071502762, "epoch": 4, "n_parameters": 39992848}
{"train_lr": 9.995950404207107e-05, "train_min_lr": 2.382817813603873e-09, "train_loss": 3.6329521221341246, "train_loss_scale": 68431192437.27804, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 10.961555453960111, "val_loss": 1.4068040530870596, "val_acc1": 64.62531736455577, "val_acc5": 88.25042142315536, "epoch": 5, "n_parameters": 39992848}
{"train_lr": 9.971673192248489e-05, "train_min_lr": 2.3770306527256757e-09, "train_loss": 3.530236593992697, "train_loss_scale": 918665965260.6603, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 11.424072421751763, "val_loss": 1.327795745119903, "val_acc1": 67.312135243596, "val_acc5": 89.12678571552114, "epoch": 6, "n_parameters": 39992848}
{"train_lr": 9.923235386991673e-05, "train_min_lr": 2.3654841303290367e-09, "train_loss": 3.4545294186895736, "train_loss_scale": 11683623559000.36, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 12.182526731100772, "val_loss": 1.3070635755149933, "val_acc1": 68.06255514015179, "val_acc5": 89.39966562352794, "epoch": 7, "n_parameters": 39992848}
{"train_lr": 9.850872972554721e-05, "train_min_lr": 2.3482344999103887e-09, "train_loss": 3.389071476831524, "train_loss_scale": 139811078558173.22, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 12.721785512692607, "val_loss": 1.2297115281566753, "val_acc1": 69.92548539596481, "val_acc5": 90.40722218808959, "epoch": 8, "n_parameters": 39992848}
{"train_lr": 9.7549384913471e-05, "train_min_lr": 2.325365799935235e-09, "train_loss": 3.3352645051206933, "train_loss_scale": 1875667835938880.2, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 13.262168244092559, "val_loss": 1.2073919311029748, "val_acc1": 70.60243752501174, "val_acc5": 90.87426662685289, "epoch": 9, "n_parameters": 39992848}
{"train_lr": 9.635899326517863e-05, "train_min_lr": 2.296989444411083e-09, "train_loss": 3.290210308069503, "train_loss_scale": 2.3835900074193504e+16, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 13.93395519308634, "val_loss": 1.1688495613963013, "val_acc1": 71.51553575698314, "val_acc5": 91.11041269614654, "epoch": 10, "n_parameters": 39992848}
{"train_lr": 9.49433542491233e-05, "train_min_lr": 2.263243680089656e-09, "train_loss": 3.2512276168226544, "train_loss_scale": 2.8564495360983248e+17, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 14.217566891549229, "val_loss": 1.172261585479119, "val_acc1": 71.735938801273, "val_acc5": 91.42002643565988, "epoch": 11, "n_parameters": 39992848}
{"train_lr": 9.330936471629166e-05, "train_min_lr": 2.224292912942643e-09, "train_loss": 3.2112138473402103, "train_loss_scale": 3.829571123249005e+18, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 14.621982845603037, "val_loss": 1.1030519641865448, "val_acc1": 73.38371379789838, "val_acc5": 92.13371231694066, "epoch": 12, "n_parameters": 39992848}
{"train_lr": 9.146498529943911e-05, "train_min_lr": 2.1803269071922398e-09, "train_loss": 3.17370755252051, "train_loss_scale": 4.862717767588715e+19, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 15.01116598371269, "val_loss": 1.0960214767834964, "val_acc1": 73.17905381464537, "val_acc5": 92.01826312740144, "epoch": 13, "n_parameters": 39992848}
{"train_lr": 8.941920162968766e-05, "train_min_lr": 2.13155986079904e-09, "train_loss": 3.1509699742392336, "train_loss_scale": 5.835915639450137e+20, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 15.317556290802168, "val_loss": 1.0728981197279144, "val_acc1": 73.64085064727053, "val_acc5": 92.23341843102982, "epoch": 14, "n_parameters": 39992848}
{"train_lr": 8.718198055943694e-05, "train_min_lr": 2.0782293619109876e-09, "train_loss": 3.1085077658864844, "train_loss_scale": 7.818802213878136e+21, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 15.925172185442458, "val_loss": 1.0655722598749346, "val_acc1": 74.41750886398239, "val_acc5": 92.448573744267, "epoch": 15, "n_parameters": 39992848}
{"train_lr": 8.476422160485482e-05, "train_min_lr": 2.0205952313579484e-09, "train_loss": 3.085390474300674, "train_loss_scale": 9.920190873564365e+22, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 16.079373867872793, "val_loss": 1.0350527216701462, "val_acc1": 74.59068262066589, "val_acc5": 92.83165512517357, "epoch": 16, "n_parameters": 39992848}
{"train_lr": 8.217770384453662e-05, "train_min_lr": 1.958938256830578e-09, "train_loss": 3.059762961827066, "train_loss_scale": 1.1923092744132412e+24, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 16.28747444907445, "val_loss": 1.0119282300173487, "val_acc1": 75.30961630626649, "val_acc5": 92.91037048801067, "epoch": 17, "n_parameters": 39992848}
{"train_lr": 7.943502853301379e-05, "train_min_lr": 1.8935588249110622e-09, "train_loss": 3.0296028912524817, "train_loss_scale": 1.5963428387517048e+25, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 16.751232886997446, "val_loss": 0.997636639900149, "val_acc1": 75.84488076226837, "val_acc5": 93.26196573243033, "epoch": 18, "n_parameters": 39992848}
{"train_lr": 7.654955770871187e-05, "train_min_lr": 1.8247754576197843e-09, "train_loss": 3.004172313609764, "train_loss_scale": 2.0237385234651223e+26, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 16.65895098520984, "val_loss": 0.9740263239119287, "val_acc1": 76.14924681757199, "val_acc5": 93.49811178971298, "epoch": 19, "n_parameters": 39992848}
{"train_lr": 7.353534909544071e-05, "train_min_lr": 1.7529232606081043e-09, "train_loss": 2.980225207808073, "train_loss_scale": 2.4359383569758093e+27, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 16.99603130592981, "val_loss": 0.9760335738575954, "val_acc1": 76.2751914415912, "val_acc5": 93.52435024559348, "epoch": 20, "n_parameters": 39992848}
{"train_lr": 7.040708761457642e-05, "train_min_lr": 1.6783522905573017e-09, "train_loss": 2.9564644565605045, "train_loss_scale": 3.259176927439191e+28, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 18.363919782833708, "val_loss": 0.9528852675581627, "val_acc1": 76.44836520908461, "val_acc5": 93.48236872627393, "epoch": 21, "n_parameters": 39992848}
{"train_lr": 6.718001384160243e-05, "train_min_lr": 1.601425849737623e-09, "train_loss": 2.9284960108607154, "train_loss_scale": 4.1284033659376896e+29, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 16.983604083770295, "val_loss": 0.9445340681788266, "val_acc1": 77.072840384332, "val_acc5": 93.76049631488713, "epoch": 22, "n_parameters": 39992848}
{"train_lr": 6.386984975555609e-05, "train_min_lr": 1.5225187160361227e-09, "train_loss": 2.902773851217222, "train_loss_scale": 4.97669595126436e+30, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 16.85462112921159, "val_loss": 0.9307590378706416, "val_acc1": 77.48740799180807, "val_acc5": 93.90743164091326, "epoch": 23, "n_parameters": 39992848}
{"train_lr": 6.049272214312802e-05, "train_min_lr": 1.4420153170764875e-09, "train_loss": 2.8849937728947945, "train_loss_scale": 6.654041540843295e+31, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 17.59567844916496, "val_loss": 0.9212161080538758, "val_acc1": 77.63434336227493, "val_acc5": 93.93891777980238, "epoch": 24, "n_parameters": 39992848}
{"train_lr": 5.706508403057876e-05, "train_min_lr": 1.3603078573262506e-09, "train_loss": 2.8564175290084797, "train_loss_scale": 8.42176560295692e+32, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 17.420279217156676, "val_loss": 0.9161053535315004, "val_acc1": 77.80751714057827, "val_acc5": 94.18031153450684, "epoch": 25, "n_parameters": 39992848}
{"train_lr": 5.360363452624676e-05, "train_min_lr": 1.27779440731613e-09, "train_loss": 2.83136665174918, "train_loss_scale": 1.0167480621961753e+34, "train_weight_decay": 0.05000000000000669, "train_grad_norm": 17.742768015087382, "val_loss": 0.9016925615354697, "val_acc1": 78.27980934282394, "val_acc5": 94.32199916791555, "epoch": 26, "n_parameters": 39992848}
{"train_lr": 5.012523746417433e-05, "train_min_lr": 1.1948769642803507e-09, "train_loss": 2.8059325080420847, "train_loss_scale": 1.2141049281929541e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.9065596212536772, "val_acc1": 78.18535086490046, "val_acc5": 94.29051303383085, "epoch": 27, "n_parameters": 39992848}
{"train_lr": 4.6646839245200224e-05, "train_min_lr": 1.1119594936665407e-09, "train_loss": 2.787129109673009, "train_loss_scale": 1.9514535838527184e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.893245403403862, "val_acc1": 78.62090925846051, "val_acc5": 94.56864060322644, "epoch": 28, "n_parameters": 39992848}
{"train_lr": 4.318538627579993e-05, "train_min_lr": 1.0294459610566982e-09, "train_loss": 2.760546349262435, "train_loss_scale": 1.9630140592594426e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8669220389814705, "val_acc1": 79.16666908768623, "val_acc5": 94.74706207534828, "epoch": 29, "n_parameters": 39992848}
{"train_lr": 3.975774240689686e-05, "train_min_lr": 9.477383640874768e-10, "train_loss": 2.7390297125155896, "train_loss_scale": 1.8072876552512166e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8632838064053985, "val_acc1": 79.49202590385072, "val_acc5": 94.83627281621361, "epoch": 30, "n_parameters": 39992848}
{"train_lr": 3.638060677487374e-05, "train_min_lr": 8.672347739580804e-10, "train_loss": 2.717780929243784, "train_loss_scale": 1.878917267575233e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8554754473376898, "val_acc1": 79.46053971931977, "val_acc5": 94.79429129449186, "epoch": 31, "n_parameters": 39992848}
{"train_lr": 3.307043244506221e-05, "train_min_lr": 7.883273960674355e-10, "train_loss": 2.6939170242524813, "train_loss_scale": 2.2134910264051335e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": NaN, "val_loss": 0.8402039022642359, "val_acc1": 79.62846581941888, "val_acc5": 94.84676820144846, "epoch": 32, "n_parameters": 39992848}
{"train_lr": 2.984334625405923e-05, "train_min_lr": 7.114006592288705e-10, "train_loss": 2.6730119074767233, "train_loss_scale": 1.0914448839877848e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8375941329677906, "val_acc1": 79.95382265720319, "val_acc5": 94.97796045202452, "epoch": 33, "n_parameters": 39992848}
{"train_lr": 2.6715070241401648e-05, "train_min_lr": 6.368293427716309e-10, "train_loss": 2.6565713019754438, "train_loss_scale": 1.1756550136661776e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8407533223018219, "val_acc1": 79.67569505417677, "val_acc5": 94.95696969116365, "epoch": 34, "n_parameters": 39992848}
{"train_lr": 2.3700845053369857e-05, "train_min_lr": 5.64976750653605e-10, "train_loss": 2.6369161930631484, "train_loss_scale": 1.3240144480524716e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8263127480865801, "val_acc1": 80.45760099593578, "val_acc5": 95.1091527122094, "epoch": 35, "n_parameters": 39992848}
{"train_lr": 2.0815355692091567e-05, "train_min_lr": 4.961929414809949e-10, "train_loss": 2.6190819935860445, "train_loss_scale": 1.9726477887650463e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": NaN, "val_loss": 0.8255390998630571, "val_acc1": 80.40512408297369, "val_acc5": 95.18262037282027, "epoch": 36, "n_parameters": 39992848}
{"train_lr": 1.8072659971686194e-05, "train_min_lr": 4.308130230579649e-10, "train_loss": 2.5987123648749124, "train_loss_scale": 1.8310886340297665e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8226508005482311, "val_acc1": 80.49958245159696, "val_acc5": 95.26658341866596, "epoch": 37, "n_parameters": 39992848}
{"train_lr": 1.54861200300087e-05, "train_min_lr": 3.6915551977510535e-10, "train_loss": 2.5861242946903102, "train_loss_scale": 1.8580630766454562e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.8046001431764448, "val_acc1": 80.83543468782824, "val_acc5": 95.35579414992249, "epoch": 38, "n_parameters": 39992848}
{"train_lr": 1.3068337229656993e-05, "train_min_lr": 3.1152082079062125e-10, "train_loss": 2.5688301714724195, "train_loss_scale": 1.8279151701926263e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.80954064953467, "val_acc1": 80.88791160919804, "val_acc5": 95.35054646451167, "epoch": 39, "n_parameters": 39992848}
{"train_lr": 1.0831090765399777e-05, "train_min_lr": 2.5818971656454734e-10, "train_loss": 2.554028320402964, "train_loss_scale": 1.8204348625765107e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": NaN, "val_loss": 0.8078244944673116, "val_acc1": 80.87216853134578, "val_acc5": 95.32430801103337, "epoch": 40, "n_parameters": 39992848}
{"train_lr": 8.785280277123507e-06, "train_min_lr": 2.0942203087584495e-10, "train_loss": 2.5448538385250314, "train_loss_scale": 2.0112960447816436e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": NaN, "val_loss": 0.8044941746453832, "val_acc1": 80.94563618715223, "val_acc5": 95.40827105687907, "epoch": 41, "n_parameters": 39992848}
{"train_lr": 6.940872747883076e-06, "train_min_lr": 1.6545535498708208e-10, "train_loss": 2.5336505306724555, "train_loss_scale": 1.879823971528702e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": NaN, "val_loss": 0.8016023224721702, "val_acc1": 81.18178229848444, "val_acc5": 95.31906032081815, "epoch": 42, "n_parameters": 39992848}
{"train_lr": 5.306853945761619e-06, "train_min_lr": 1.265038901236701e-10, "train_loss": 2.522356897904595, "train_loss_scale": 1.7200173997298673e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.7990030080833066, "val_acc1": 81.33921299653329, "val_acc5": 95.35579415232469, "epoch": 43, "n_parameters": 39992848}
{"train_lr": 3.891184646112095e-06, "train_min_lr": 9.275740390704045e-11, "train_loss": 2.512602561960279, "train_loss_scale": 2.9145998584247013e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.7950183249123263, "val_acc1": 81.37069914863451, "val_acc5": 95.4712433370595, "epoch": 44, "n_parameters": 39992848}
{"train_lr": 2.700761847459884e-06, "train_min_lr": 6.438030582585259e-11, "train_loss": 2.5072691910151814, "train_loss_scale": 1.775779692868184e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": NaN, "val_loss": 0.7980627838266493, "val_acc1": 81.35495607678776, "val_acc5": 95.40827105687907, "epoch": 45, "n_parameters": 39992848}
{"train_lr": 1.7413851700190804e-06, "train_min_lr": 4.151084624950393e-11, "train_loss": 2.5043958159577797, "train_loss_scale": 1.9852283061194226e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.795208231256266, "val_acc1": 81.34446069635732, "val_acc5": 95.40302336666386, "epoch": 46, "n_parameters": 39992848}
{"train_lr": 1.0177286005259916e-06, "train_min_lr": 2.4260442886219458e-11, "train_loss": 2.499114779115618, "train_loss_scale": 1.672868794149502e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.7962640616186526, "val_acc1": 81.391689916702, "val_acc5": 95.46599564924648, "epoch": 47, "n_parameters": 39992848}
{"train_lr": 5.333177210454811e-07, "train_min_lr": 1.2713137967180628e-11, "train_loss": 2.496748880375212, "train_loss_scale": 2.4451538865163518e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": NaN, "val_loss": 0.7954816204373741, "val_acc1": 81.35495609240208, "val_acc5": 95.43450950795517, "epoch": 48, "n_parameters": 39992848}
{"train_lr": 2.905125326895701e-07, "train_min_lr": 6.925188801222288e-12, "train_loss": 2.4971960905832615, "train_loss_scale": 1.992708613735538e+35, "train_weight_decay": 0.05000000000000669, "train_grad_norm": Infinity, "val_loss": 0.7948179201044667, "val_acc1": 81.38644223249229, "val_acc5": 95.38203260340077, "epoch": 49, "n_parameters": 39992848}
{"Final top-1": 81.87591853873609, "Final Top-5": 95.62250682343061}