{ "best_metric": 0.96282727, "best_model_checkpoint": "/data1/wjx/model/swift/output/v1_prompt/output/internvl2-26b/v0-20240806-203157/checkpoint-7656", "epoch": 6.0, "eval_steps": 1, "global_step": 7656, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.51014894, "epoch": 0.0, "learning_rate": 1.3054830287206266e-07, "loss": 1.93593538, "memory(GiB)": 59.04, "step": 1, "train_speed(iter/s)": 0.032837 }, { "acc": 0.53850144, "epoch": 0.0, "learning_rate": 6.527415143603134e-07, "loss": 1.74545956, "memory(GiB)": 64.54, "step": 5, "train_speed(iter/s)": 0.035227 }, { "acc": 0.5365521, "epoch": 0.01, "learning_rate": 1.3054830287206267e-06, "loss": 1.77564487, "memory(GiB)": 75.45, "step": 10, "train_speed(iter/s)": 0.034818 }, { "acc": 0.53638253, "epoch": 0.01, "learning_rate": 1.9582245430809403e-06, "loss": 1.75824242, "memory(GiB)": 75.45, "step": 15, "train_speed(iter/s)": 0.035152 }, { "acc": 0.5359807, "epoch": 0.02, "learning_rate": 2.6109660574412534e-06, "loss": 1.74080467, "memory(GiB)": 78.2, "step": 20, "train_speed(iter/s)": 0.034968 }, { "acc": 0.52935181, "epoch": 0.02, "learning_rate": 3.263707571801567e-06, "loss": 1.78961926, "memory(GiB)": 71.11, "step": 25, "train_speed(iter/s)": 0.035135 }, { "acc": 0.52923927, "epoch": 0.02, "learning_rate": 3.9164490861618806e-06, "loss": 1.77055035, "memory(GiB)": 71.11, "step": 30, "train_speed(iter/s)": 0.035279 }, { "acc": 0.51313214, "epoch": 0.03, "learning_rate": 4.569190600522193e-06, "loss": 1.85138378, "memory(GiB)": 71.11, "step": 35, "train_speed(iter/s)": 0.035382 }, { "acc": 0.53307762, "epoch": 0.03, "learning_rate": 5.221932114882507e-06, "loss": 1.75060291, "memory(GiB)": 71.11, "step": 40, "train_speed(iter/s)": 0.035438 }, { "acc": 0.54460502, "epoch": 0.04, "learning_rate": 5.87467362924282e-06, "loss": 1.69541492, "memory(GiB)": 71.11, "step": 45, "train_speed(iter/s)": 0.035472 }, { "acc": 0.53159156, "epoch": 0.04, "learning_rate": 6.527415143603134e-06, "loss": 1.75854664, "memory(GiB)": 71.11, "step": 50, "train_speed(iter/s)": 0.035355 }, { "acc": 0.54251604, "epoch": 0.04, "learning_rate": 7.180156657963447e-06, "loss": 1.72001286, "memory(GiB)": 71.11, "step": 55, "train_speed(iter/s)": 0.035265 }, { "acc": 0.54704895, "epoch": 0.05, "learning_rate": 7.832898172323761e-06, "loss": 1.69055214, "memory(GiB)": 71.11, "step": 60, "train_speed(iter/s)": 0.035313 }, { "acc": 0.54154701, "epoch": 0.05, "learning_rate": 8.485639686684073e-06, "loss": 1.73966904, "memory(GiB)": 71.11, "step": 65, "train_speed(iter/s)": 0.035332 }, { "acc": 0.54317932, "epoch": 0.05, "learning_rate": 9.138381201044387e-06, "loss": 1.72243404, "memory(GiB)": 71.11, "step": 70, "train_speed(iter/s)": 0.035356 }, { "acc": 0.54935493, "epoch": 0.06, "learning_rate": 9.7911227154047e-06, "loss": 1.67216759, "memory(GiB)": 71.11, "step": 75, "train_speed(iter/s)": 0.03538 }, { "acc": 0.5512825, "epoch": 0.06, "learning_rate": 1.0443864229765014e-05, "loss": 1.63593922, "memory(GiB)": 71.11, "step": 80, "train_speed(iter/s)": 0.035391 }, { "acc": 0.55266647, "epoch": 0.07, "learning_rate": 1.1096605744125327e-05, "loss": 1.64257793, "memory(GiB)": 71.11, "step": 85, "train_speed(iter/s)": 0.035413 }, { "acc": 0.5626039, "epoch": 0.07, "learning_rate": 1.174934725848564e-05, "loss": 1.62862835, "memory(GiB)": 71.11, "step": 90, "train_speed(iter/s)": 0.035346 }, { "acc": 0.55852866, "epoch": 0.07, "learning_rate": 1.2402088772845953e-05, "loss": 1.66630707, "memory(GiB)": 71.11, "step": 95, "train_speed(iter/s)": 0.035372 }, { "acc": 0.56864738, "epoch": 0.08, "learning_rate": 1.3054830287206268e-05, "loss": 1.5820467, "memory(GiB)": 71.11, "step": 100, "train_speed(iter/s)": 0.035404 }, { "acc": 0.57975974, "epoch": 0.08, "learning_rate": 1.370757180156658e-05, "loss": 1.50294018, "memory(GiB)": 71.11, "step": 105, "train_speed(iter/s)": 0.035429 }, { "acc": 0.56583891, "epoch": 0.09, "learning_rate": 1.4360313315926893e-05, "loss": 1.5526598, "memory(GiB)": 71.11, "step": 110, "train_speed(iter/s)": 0.035444 }, { "acc": 0.57876883, "epoch": 0.09, "learning_rate": 1.5013054830287207e-05, "loss": 1.52297754, "memory(GiB)": 71.11, "step": 115, "train_speed(iter/s)": 0.03546 }, { "acc": 0.56726651, "epoch": 0.09, "learning_rate": 1.5665796344647522e-05, "loss": 1.5246254, "memory(GiB)": 71.11, "step": 120, "train_speed(iter/s)": 0.035468 }, { "acc": 0.57492142, "epoch": 0.1, "learning_rate": 1.6318537859007836e-05, "loss": 1.51940918, "memory(GiB)": 71.11, "step": 125, "train_speed(iter/s)": 0.035473 }, { "acc": 0.57668262, "epoch": 0.1, "learning_rate": 1.6971279373368146e-05, "loss": 1.49961472, "memory(GiB)": 71.11, "step": 130, "train_speed(iter/s)": 0.03548 }, { "acc": 0.58218517, "epoch": 0.11, "learning_rate": 1.762402088772846e-05, "loss": 1.46871567, "memory(GiB)": 71.11, "step": 135, "train_speed(iter/s)": 0.035434 }, { "acc": 0.5942565, "epoch": 0.11, "learning_rate": 1.8276762402088773e-05, "loss": 1.41751032, "memory(GiB)": 71.11, "step": 140, "train_speed(iter/s)": 0.035445 }, { "acc": 0.58242044, "epoch": 0.11, "learning_rate": 1.8929503916449087e-05, "loss": 1.454846, "memory(GiB)": 71.11, "step": 145, "train_speed(iter/s)": 0.035456 }, { "acc": 0.59025207, "epoch": 0.12, "learning_rate": 1.95822454308094e-05, "loss": 1.44744148, "memory(GiB)": 71.11, "step": 150, "train_speed(iter/s)": 0.035462 }, { "acc": 0.60451708, "epoch": 0.12, "learning_rate": 2.0234986945169714e-05, "loss": 1.38559418, "memory(GiB)": 71.11, "step": 155, "train_speed(iter/s)": 0.035424 }, { "acc": 0.5968575, "epoch": 0.13, "learning_rate": 2.0887728459530027e-05, "loss": 1.38553591, "memory(GiB)": 71.11, "step": 160, "train_speed(iter/s)": 0.035433 }, { "acc": 0.60566149, "epoch": 0.13, "learning_rate": 2.154046997389034e-05, "loss": 1.3881608, "memory(GiB)": 71.11, "step": 165, "train_speed(iter/s)": 0.035438 }, { "acc": 0.59593248, "epoch": 0.13, "learning_rate": 2.2193211488250655e-05, "loss": 1.41851988, "memory(GiB)": 71.11, "step": 170, "train_speed(iter/s)": 0.035445 }, { "acc": 0.60208702, "epoch": 0.14, "learning_rate": 2.2845953002610968e-05, "loss": 1.37305822, "memory(GiB)": 71.11, "step": 175, "train_speed(iter/s)": 0.035454 }, { "acc": 0.60938406, "epoch": 0.14, "learning_rate": 2.349869451697128e-05, "loss": 1.37933292, "memory(GiB)": 71.11, "step": 180, "train_speed(iter/s)": 0.035385 }, { "acc": 0.61727815, "epoch": 0.14, "learning_rate": 2.4151436031331595e-05, "loss": 1.3533494, "memory(GiB)": 71.11, "step": 185, "train_speed(iter/s)": 0.035394 }, { "acc": 0.60119219, "epoch": 0.15, "learning_rate": 2.4804177545691905e-05, "loss": 1.36805573, "memory(GiB)": 71.11, "step": 190, "train_speed(iter/s)": 0.035403 }, { "acc": 0.59402022, "epoch": 0.15, "learning_rate": 2.545691906005222e-05, "loss": 1.43324385, "memory(GiB)": 71.11, "step": 195, "train_speed(iter/s)": 0.03533 }, { "acc": 0.59447851, "epoch": 0.16, "learning_rate": 2.6109660574412536e-05, "loss": 1.40030098, "memory(GiB)": 71.11, "step": 200, "train_speed(iter/s)": 0.035306 }, { "acc": 0.60737519, "epoch": 0.16, "learning_rate": 2.6762402088772846e-05, "loss": 1.3658123, "memory(GiB)": 71.11, "step": 205, "train_speed(iter/s)": 0.035312 }, { "acc": 0.60835319, "epoch": 0.16, "learning_rate": 2.741514360313316e-05, "loss": 1.36259966, "memory(GiB)": 71.11, "step": 210, "train_speed(iter/s)": 0.03532 }, { "acc": 0.60979681, "epoch": 0.17, "learning_rate": 2.8067885117493477e-05, "loss": 1.33051977, "memory(GiB)": 71.11, "step": 215, "train_speed(iter/s)": 0.035329 }, { "acc": 0.62091312, "epoch": 0.17, "learning_rate": 2.8720626631853787e-05, "loss": 1.3052928, "memory(GiB)": 71.11, "step": 220, "train_speed(iter/s)": 0.035339 }, { "acc": 0.62655573, "epoch": 0.18, "learning_rate": 2.9373368146214104e-05, "loss": 1.29806547, "memory(GiB)": 71.11, "step": 225, "train_speed(iter/s)": 0.035353 }, { "acc": 0.60786376, "epoch": 0.18, "learning_rate": 3.0026109660574414e-05, "loss": 1.3772687, "memory(GiB)": 71.11, "step": 230, "train_speed(iter/s)": 0.03536 }, { "acc": 0.60273538, "epoch": 0.18, "learning_rate": 3.067885117493473e-05, "loss": 1.36951752, "memory(GiB)": 71.11, "step": 235, "train_speed(iter/s)": 0.035368 }, { "acc": 0.61111588, "epoch": 0.19, "learning_rate": 3.1331592689295045e-05, "loss": 1.35591669, "memory(GiB)": 71.11, "step": 240, "train_speed(iter/s)": 0.035378 }, { "acc": 0.613943, "epoch": 0.19, "learning_rate": 3.1984334203655355e-05, "loss": 1.36014338, "memory(GiB)": 71.11, "step": 245, "train_speed(iter/s)": 0.035382 }, { "acc": 0.61394243, "epoch": 0.2, "learning_rate": 3.263707571801567e-05, "loss": 1.36864185, "memory(GiB)": 71.11, "step": 250, "train_speed(iter/s)": 0.03539 }, { "acc": 0.61200385, "epoch": 0.2, "learning_rate": 3.328981723237598e-05, "loss": 1.3423625, "memory(GiB)": 71.11, "step": 255, "train_speed(iter/s)": 0.035401 }, { "acc": 0.61476417, "epoch": 0.2, "learning_rate": 3.394255874673629e-05, "loss": 1.36127625, "memory(GiB)": 71.11, "step": 260, "train_speed(iter/s)": 0.035413 }, { "acc": 0.61477809, "epoch": 0.21, "learning_rate": 3.459530026109661e-05, "loss": 1.35783281, "memory(GiB)": 71.11, "step": 265, "train_speed(iter/s)": 0.035422 }, { "acc": 0.62444715, "epoch": 0.21, "learning_rate": 3.524804177545692e-05, "loss": 1.31687889, "memory(GiB)": 71.11, "step": 270, "train_speed(iter/s)": 0.035432 }, { "acc": 0.61132975, "epoch": 0.22, "learning_rate": 3.5900783289817236e-05, "loss": 1.3453701, "memory(GiB)": 71.11, "step": 275, "train_speed(iter/s)": 0.035387 }, { "acc": 0.61926041, "epoch": 0.22, "learning_rate": 3.6553524804177546e-05, "loss": 1.30053482, "memory(GiB)": 71.11, "step": 280, "train_speed(iter/s)": 0.035397 }, { "acc": 0.61626806, "epoch": 0.22, "learning_rate": 3.720626631853786e-05, "loss": 1.33418636, "memory(GiB)": 71.11, "step": 285, "train_speed(iter/s)": 0.035406 }, { "acc": 0.61756167, "epoch": 0.23, "learning_rate": 3.7859007832898173e-05, "loss": 1.32053947, "memory(GiB)": 79.4, "step": 290, "train_speed(iter/s)": 0.035387 }, { "acc": 0.61689606, "epoch": 0.23, "learning_rate": 3.8511749347258484e-05, "loss": 1.30887852, "memory(GiB)": 82.21, "step": 295, "train_speed(iter/s)": 0.035371 }, { "acc": 0.62750869, "epoch": 0.24, "learning_rate": 3.91644908616188e-05, "loss": 1.34463081, "memory(GiB)": 76.69, "step": 300, "train_speed(iter/s)": 0.035378 }, { "acc": 0.60788245, "epoch": 0.24, "learning_rate": 3.981723237597911e-05, "loss": 1.38475618, "memory(GiB)": 76.69, "step": 305, "train_speed(iter/s)": 0.035362 }, { "acc": 0.62858248, "epoch": 0.24, "learning_rate": 4.046997389033943e-05, "loss": 1.28304482, "memory(GiB)": 76.69, "step": 310, "train_speed(iter/s)": 0.03537 }, { "acc": 0.63606496, "epoch": 0.25, "learning_rate": 4.112271540469974e-05, "loss": 1.28804827, "memory(GiB)": 76.69, "step": 315, "train_speed(iter/s)": 0.035378 }, { "acc": 0.62939467, "epoch": 0.25, "learning_rate": 4.1775456919060055e-05, "loss": 1.29029045, "memory(GiB)": 76.69, "step": 320, "train_speed(iter/s)": 0.035385 }, { "acc": 0.618082, "epoch": 0.25, "learning_rate": 4.242819843342037e-05, "loss": 1.29355927, "memory(GiB)": 76.69, "step": 325, "train_speed(iter/s)": 0.035395 }, { "acc": 0.61671734, "epoch": 0.26, "learning_rate": 4.308093994778068e-05, "loss": 1.32377872, "memory(GiB)": 76.69, "step": 330, "train_speed(iter/s)": 0.035381 }, { "acc": 0.62930007, "epoch": 0.26, "learning_rate": 4.3733681462141e-05, "loss": 1.26064873, "memory(GiB)": 76.69, "step": 335, "train_speed(iter/s)": 0.035391 }, { "acc": 0.62707295, "epoch": 0.27, "learning_rate": 4.438642297650131e-05, "loss": 1.25469856, "memory(GiB)": 76.69, "step": 340, "train_speed(iter/s)": 0.035377 }, { "acc": 0.63075371, "epoch": 0.27, "learning_rate": 4.503916449086162e-05, "loss": 1.25198593, "memory(GiB)": 76.69, "step": 345, "train_speed(iter/s)": 0.035383 }, { "acc": 0.63224497, "epoch": 0.27, "learning_rate": 4.5691906005221936e-05, "loss": 1.27103462, "memory(GiB)": 76.69, "step": 350, "train_speed(iter/s)": 0.03539 }, { "acc": 0.62364006, "epoch": 0.28, "learning_rate": 4.6344647519582246e-05, "loss": 1.30759945, "memory(GiB)": 76.69, "step": 355, "train_speed(iter/s)": 0.035395 }, { "acc": 0.63504333, "epoch": 0.28, "learning_rate": 4.699738903394256e-05, "loss": 1.27207603, "memory(GiB)": 76.69, "step": 360, "train_speed(iter/s)": 0.0354 }, { "acc": 0.61879263, "epoch": 0.29, "learning_rate": 4.7650130548302874e-05, "loss": 1.32293415, "memory(GiB)": 76.69, "step": 365, "train_speed(iter/s)": 0.035405 }, { "acc": 0.62347574, "epoch": 0.29, "learning_rate": 4.830287206266319e-05, "loss": 1.30256233, "memory(GiB)": 76.69, "step": 370, "train_speed(iter/s)": 0.035409 }, { "acc": 0.64439292, "epoch": 0.29, "learning_rate": 4.89556135770235e-05, "loss": 1.2355484, "memory(GiB)": 85.02, "step": 375, "train_speed(iter/s)": 0.035394 }, { "acc": 0.62272491, "epoch": 0.3, "learning_rate": 4.960835509138381e-05, "loss": 1.30343094, "memory(GiB)": 79.49, "step": 380, "train_speed(iter/s)": 0.035398 }, { "acc": 0.62577806, "epoch": 0.3, "learning_rate": 5.026109660574413e-05, "loss": 1.26718674, "memory(GiB)": 79.49, "step": 385, "train_speed(iter/s)": 0.035387 }, { "acc": 0.62948937, "epoch": 0.31, "learning_rate": 5.091383812010444e-05, "loss": 1.2764926, "memory(GiB)": 79.49, "step": 390, "train_speed(iter/s)": 0.035394 }, { "acc": 0.62468576, "epoch": 0.31, "learning_rate": 5.156657963446475e-05, "loss": 1.27845716, "memory(GiB)": 79.49, "step": 395, "train_speed(iter/s)": 0.035401 }, { "acc": 0.63630462, "epoch": 0.31, "learning_rate": 5.221932114882507e-05, "loss": 1.23259668, "memory(GiB)": 79.49, "step": 400, "train_speed(iter/s)": 0.035391 }, { "acc": 0.62778835, "epoch": 0.32, "learning_rate": 5.287206266318538e-05, "loss": 1.2565299, "memory(GiB)": 79.49, "step": 405, "train_speed(iter/s)": 0.035396 }, { "acc": 0.62839155, "epoch": 0.32, "learning_rate": 5.352480417754569e-05, "loss": 1.29196539, "memory(GiB)": 79.49, "step": 410, "train_speed(iter/s)": 0.035403 }, { "acc": 0.62438302, "epoch": 0.33, "learning_rate": 5.417754569190601e-05, "loss": 1.31461248, "memory(GiB)": 79.49, "step": 415, "train_speed(iter/s)": 0.035408 }, { "acc": 0.63546472, "epoch": 0.33, "learning_rate": 5.483028720626632e-05, "loss": 1.25184908, "memory(GiB)": 79.49, "step": 420, "train_speed(iter/s)": 0.035379 }, { "acc": 0.6268621, "epoch": 0.33, "learning_rate": 5.5483028720626636e-05, "loss": 1.28653288, "memory(GiB)": 79.49, "step": 425, "train_speed(iter/s)": 0.035384 }, { "acc": 0.62927337, "epoch": 0.34, "learning_rate": 5.613577023498695e-05, "loss": 1.3057992, "memory(GiB)": 79.49, "step": 430, "train_speed(iter/s)": 0.035372 }, { "acc": 0.63009191, "epoch": 0.34, "learning_rate": 5.6788511749347264e-05, "loss": 1.27884521, "memory(GiB)": 79.49, "step": 435, "train_speed(iter/s)": 0.035361 }, { "acc": 0.62101679, "epoch": 0.34, "learning_rate": 5.7441253263707574e-05, "loss": 1.32610731, "memory(GiB)": 79.49, "step": 440, "train_speed(iter/s)": 0.035365 }, { "acc": 0.62638683, "epoch": 0.35, "learning_rate": 5.8093994778067884e-05, "loss": 1.26304455, "memory(GiB)": 79.49, "step": 445, "train_speed(iter/s)": 0.03537 }, { "acc": 0.62493896, "epoch": 0.35, "learning_rate": 5.874673629242821e-05, "loss": 1.29808645, "memory(GiB)": 79.49, "step": 450, "train_speed(iter/s)": 0.035357 }, { "acc": 0.63067155, "epoch": 0.36, "learning_rate": 5.939947780678852e-05, "loss": 1.25492554, "memory(GiB)": 79.49, "step": 455, "train_speed(iter/s)": 0.035345 }, { "acc": 0.62847533, "epoch": 0.36, "learning_rate": 6.005221932114883e-05, "loss": 1.29804668, "memory(GiB)": 79.49, "step": 460, "train_speed(iter/s)": 0.035335 }, { "acc": 0.61040778, "epoch": 0.36, "learning_rate": 6.070496083550914e-05, "loss": 1.3139143, "memory(GiB)": 79.49, "step": 465, "train_speed(iter/s)": 0.035312 }, { "acc": 0.62770967, "epoch": 0.37, "learning_rate": 6.135770234986946e-05, "loss": 1.26855469, "memory(GiB)": 79.49, "step": 470, "train_speed(iter/s)": 0.035319 }, { "acc": 0.62000332, "epoch": 0.37, "learning_rate": 6.201044386422978e-05, "loss": 1.30712109, "memory(GiB)": 79.49, "step": 475, "train_speed(iter/s)": 0.035309 }, { "acc": 0.62734308, "epoch": 0.38, "learning_rate": 6.266318537859009e-05, "loss": 1.24606352, "memory(GiB)": 79.49, "step": 480, "train_speed(iter/s)": 0.035316 }, { "acc": 0.63649292, "epoch": 0.38, "learning_rate": 6.33159268929504e-05, "loss": 1.22983913, "memory(GiB)": 79.49, "step": 485, "train_speed(iter/s)": 0.035319 }, { "acc": 0.64232011, "epoch": 0.38, "learning_rate": 6.396866840731071e-05, "loss": 1.23846798, "memory(GiB)": 79.49, "step": 490, "train_speed(iter/s)": 0.035309 }, { "acc": 0.63173985, "epoch": 0.39, "learning_rate": 6.462140992167102e-05, "loss": 1.26958008, "memory(GiB)": 79.49, "step": 495, "train_speed(iter/s)": 0.035315 }, { "acc": 0.6342205, "epoch": 0.39, "learning_rate": 6.527415143603134e-05, "loss": 1.23609667, "memory(GiB)": 79.49, "step": 500, "train_speed(iter/s)": 0.035321 }, { "acc": 0.64169135, "epoch": 0.4, "learning_rate": 6.592689295039165e-05, "loss": 1.2388341, "memory(GiB)": 79.49, "step": 505, "train_speed(iter/s)": 0.035324 }, { "acc": 0.6370995, "epoch": 0.4, "learning_rate": 6.657963446475196e-05, "loss": 1.24629793, "memory(GiB)": 79.49, "step": 510, "train_speed(iter/s)": 0.03533 }, { "acc": 0.64395504, "epoch": 0.4, "learning_rate": 6.723237597911227e-05, "loss": 1.25374393, "memory(GiB)": 79.49, "step": 515, "train_speed(iter/s)": 0.035319 }, { "acc": 0.62348013, "epoch": 0.41, "learning_rate": 6.788511749347258e-05, "loss": 1.30115499, "memory(GiB)": 79.49, "step": 520, "train_speed(iter/s)": 0.035324 }, { "acc": 0.6296721, "epoch": 0.41, "learning_rate": 6.853785900783291e-05, "loss": 1.24414005, "memory(GiB)": 79.49, "step": 525, "train_speed(iter/s)": 0.035316 }, { "acc": 0.64563322, "epoch": 0.42, "learning_rate": 6.919060052219322e-05, "loss": 1.20363417, "memory(GiB)": 87.82, "step": 530, "train_speed(iter/s)": 0.035306 }, { "acc": 0.63340597, "epoch": 0.42, "learning_rate": 6.984334203655353e-05, "loss": 1.26357803, "memory(GiB)": 87.82, "step": 535, "train_speed(iter/s)": 0.035298 }, { "acc": 0.63216286, "epoch": 0.42, "learning_rate": 7.049608355091384e-05, "loss": 1.22720327, "memory(GiB)": 87.82, "step": 540, "train_speed(iter/s)": 0.035302 }, { "acc": 0.63558154, "epoch": 0.43, "learning_rate": 7.114882506527415e-05, "loss": 1.23628178, "memory(GiB)": 87.82, "step": 545, "train_speed(iter/s)": 0.035307 }, { "acc": 0.63481274, "epoch": 0.43, "learning_rate": 7.180156657963447e-05, "loss": 1.27170897, "memory(GiB)": 87.82, "step": 550, "train_speed(iter/s)": 0.035311 }, { "acc": 0.62329593, "epoch": 0.43, "learning_rate": 7.245430809399478e-05, "loss": 1.29271698, "memory(GiB)": 87.82, "step": 555, "train_speed(iter/s)": 0.035315 }, { "acc": 0.62661791, "epoch": 0.44, "learning_rate": 7.310704960835509e-05, "loss": 1.25905991, "memory(GiB)": 87.82, "step": 560, "train_speed(iter/s)": 0.035318 }, { "acc": 0.63888206, "epoch": 0.44, "learning_rate": 7.37597911227154e-05, "loss": 1.23162041, "memory(GiB)": 87.82, "step": 565, "train_speed(iter/s)": 0.035322 }, { "acc": 0.6273633, "epoch": 0.45, "learning_rate": 7.441253263707573e-05, "loss": 1.27724876, "memory(GiB)": 87.82, "step": 570, "train_speed(iter/s)": 0.035314 }, { "acc": 0.65672364, "epoch": 0.45, "learning_rate": 7.506527415143604e-05, "loss": 1.18381844, "memory(GiB)": 87.82, "step": 575, "train_speed(iter/s)": 0.035319 }, { "acc": 0.63787546, "epoch": 0.45, "learning_rate": 7.571801566579635e-05, "loss": 1.24759865, "memory(GiB)": 87.82, "step": 580, "train_speed(iter/s)": 0.035322 }, { "acc": 0.62682576, "epoch": 0.46, "learning_rate": 7.637075718015666e-05, "loss": 1.26602411, "memory(GiB)": 87.82, "step": 585, "train_speed(iter/s)": 0.035314 }, { "acc": 0.64525466, "epoch": 0.46, "learning_rate": 7.702349869451697e-05, "loss": 1.20333176, "memory(GiB)": 87.82, "step": 590, "train_speed(iter/s)": 0.035318 }, { "acc": 0.63623095, "epoch": 0.47, "learning_rate": 7.767624020887729e-05, "loss": 1.23436947, "memory(GiB)": 87.82, "step": 595, "train_speed(iter/s)": 0.035323 }, { "acc": 0.63377829, "epoch": 0.47, "learning_rate": 7.83289817232376e-05, "loss": 1.25122204, "memory(GiB)": 87.82, "step": 600, "train_speed(iter/s)": 0.035326 }, { "acc": 0.64111838, "epoch": 0.47, "learning_rate": 7.898172323759791e-05, "loss": 1.23531017, "memory(GiB)": 87.82, "step": 605, "train_speed(iter/s)": 0.03533 }, { "acc": 0.631183, "epoch": 0.48, "learning_rate": 7.963446475195822e-05, "loss": 1.25364704, "memory(GiB)": 87.82, "step": 610, "train_speed(iter/s)": 0.035335 }, { "acc": 0.63098602, "epoch": 0.48, "learning_rate": 8.028720626631853e-05, "loss": 1.25691967, "memory(GiB)": 87.82, "step": 615, "train_speed(iter/s)": 0.035328 }, { "acc": 0.63813372, "epoch": 0.49, "learning_rate": 8.093994778067886e-05, "loss": 1.22148628, "memory(GiB)": 87.82, "step": 620, "train_speed(iter/s)": 0.035321 }, { "acc": 0.63645906, "epoch": 0.49, "learning_rate": 8.159268929503917e-05, "loss": 1.2717802, "memory(GiB)": 87.82, "step": 625, "train_speed(iter/s)": 0.035324 }, { "acc": 0.63675809, "epoch": 0.49, "learning_rate": 8.224543080939948e-05, "loss": 1.22741051, "memory(GiB)": 87.82, "step": 630, "train_speed(iter/s)": 0.035328 }, { "acc": 0.63550248, "epoch": 0.5, "learning_rate": 8.28981723237598e-05, "loss": 1.25706348, "memory(GiB)": 87.82, "step": 635, "train_speed(iter/s)": 0.035332 }, { "acc": 0.63328991, "epoch": 0.5, "learning_rate": 8.355091383812011e-05, "loss": 1.24116373, "memory(GiB)": 87.82, "step": 640, "train_speed(iter/s)": 0.035337 }, { "acc": 0.64723411, "epoch": 0.51, "learning_rate": 8.420365535248042e-05, "loss": 1.21720247, "memory(GiB)": 87.82, "step": 645, "train_speed(iter/s)": 0.03534 }, { "acc": 0.63475337, "epoch": 0.51, "learning_rate": 8.485639686684074e-05, "loss": 1.25098181, "memory(GiB)": 87.82, "step": 650, "train_speed(iter/s)": 0.035343 }, { "acc": 0.65048108, "epoch": 0.51, "learning_rate": 8.550913838120105e-05, "loss": 1.19720526, "memory(GiB)": 87.82, "step": 655, "train_speed(iter/s)": 0.035348 }, { "acc": 0.63787851, "epoch": 0.52, "learning_rate": 8.616187989556136e-05, "loss": 1.24119263, "memory(GiB)": 87.82, "step": 660, "train_speed(iter/s)": 0.035342 }, { "acc": 0.63413329, "epoch": 0.52, "learning_rate": 8.681462140992167e-05, "loss": 1.27156887, "memory(GiB)": 87.82, "step": 665, "train_speed(iter/s)": 0.035346 }, { "acc": 0.62740493, "epoch": 0.53, "learning_rate": 8.7467362924282e-05, "loss": 1.27027969, "memory(GiB)": 87.82, "step": 670, "train_speed(iter/s)": 0.035337 }, { "acc": 0.64307756, "epoch": 0.53, "learning_rate": 8.812010443864231e-05, "loss": 1.2422493, "memory(GiB)": 87.82, "step": 675, "train_speed(iter/s)": 0.035341 }, { "acc": 0.63943014, "epoch": 0.53, "learning_rate": 8.877284595300262e-05, "loss": 1.2298399, "memory(GiB)": 87.82, "step": 680, "train_speed(iter/s)": 0.035333 }, { "acc": 0.6344151, "epoch": 0.54, "learning_rate": 8.942558746736293e-05, "loss": 1.24074373, "memory(GiB)": 87.82, "step": 685, "train_speed(iter/s)": 0.035317 }, { "acc": 0.62509422, "epoch": 0.54, "learning_rate": 9.007832898172324e-05, "loss": 1.29264908, "memory(GiB)": 90.61, "step": 690, "train_speed(iter/s)": 0.035299 }, { "acc": 0.64127073, "epoch": 0.54, "learning_rate": 9.073107049608356e-05, "loss": 1.21613617, "memory(GiB)": 85.12, "step": 695, "train_speed(iter/s)": 0.035303 }, { "acc": 0.63301926, "epoch": 0.55, "learning_rate": 9.138381201044387e-05, "loss": 1.25347605, "memory(GiB)": 85.12, "step": 700, "train_speed(iter/s)": 0.035307 }, { "acc": 0.63968863, "epoch": 0.55, "learning_rate": 9.203655352480418e-05, "loss": 1.20407486, "memory(GiB)": 85.12, "step": 705, "train_speed(iter/s)": 0.035311 }, { "acc": 0.64357448, "epoch": 0.56, "learning_rate": 9.268929503916449e-05, "loss": 1.19348354, "memory(GiB)": 85.12, "step": 710, "train_speed(iter/s)": 0.035315 }, { "acc": 0.6440393, "epoch": 0.56, "learning_rate": 9.33420365535248e-05, "loss": 1.21024275, "memory(GiB)": 85.12, "step": 715, "train_speed(iter/s)": 0.035319 }, { "acc": 0.64507108, "epoch": 0.56, "learning_rate": 9.399477806788513e-05, "loss": 1.23527927, "memory(GiB)": 85.12, "step": 720, "train_speed(iter/s)": 0.035313 }, { "acc": 0.64306412, "epoch": 0.57, "learning_rate": 9.464751958224544e-05, "loss": 1.24440823, "memory(GiB)": 85.12, "step": 725, "train_speed(iter/s)": 0.035318 }, { "acc": 0.64716368, "epoch": 0.57, "learning_rate": 9.530026109660575e-05, "loss": 1.23672924, "memory(GiB)": 85.12, "step": 730, "train_speed(iter/s)": 0.035321 }, { "acc": 0.6508863, "epoch": 0.58, "learning_rate": 9.595300261096606e-05, "loss": 1.16483746, "memory(GiB)": 85.12, "step": 735, "train_speed(iter/s)": 0.035324 }, { "acc": 0.64414196, "epoch": 0.58, "learning_rate": 9.660574412532638e-05, "loss": 1.24241323, "memory(GiB)": 85.12, "step": 740, "train_speed(iter/s)": 0.035326 }, { "acc": 0.64069014, "epoch": 0.58, "learning_rate": 9.725848563968669e-05, "loss": 1.21243725, "memory(GiB)": 85.12, "step": 745, "train_speed(iter/s)": 0.035329 }, { "acc": 0.6319931, "epoch": 0.59, "learning_rate": 9.7911227154047e-05, "loss": 1.23099251, "memory(GiB)": 85.12, "step": 750, "train_speed(iter/s)": 0.035332 }, { "acc": 0.63715777, "epoch": 0.59, "learning_rate": 9.856396866840731e-05, "loss": 1.23596754, "memory(GiB)": 85.12, "step": 755, "train_speed(iter/s)": 0.035336 }, { "acc": 0.63574848, "epoch": 0.6, "learning_rate": 9.921671018276762e-05, "loss": 1.23194542, "memory(GiB)": 85.12, "step": 760, "train_speed(iter/s)": 0.035339 }, { "acc": 0.63391104, "epoch": 0.6, "learning_rate": 9.986945169712795e-05, "loss": 1.25005703, "memory(GiB)": 85.12, "step": 765, "train_speed(iter/s)": 0.035343 }, { "acc": 0.64336019, "epoch": 0.6, "learning_rate": 9.999998134167974e-05, "loss": 1.19814224, "memory(GiB)": 85.12, "step": 770, "train_speed(iter/s)": 0.035346 }, { "acc": 0.64235554, "epoch": 0.61, "learning_rate": 9.999990554227756e-05, "loss": 1.20386868, "memory(GiB)": 85.12, "step": 775, "train_speed(iter/s)": 0.035348 }, { "acc": 0.63095374, "epoch": 0.61, "learning_rate": 9.999977143573674e-05, "loss": 1.24922619, "memory(GiB)": 85.12, "step": 780, "train_speed(iter/s)": 0.03535 }, { "acc": 0.63008108, "epoch": 0.62, "learning_rate": 9.99995790222137e-05, "loss": 1.26964073, "memory(GiB)": 85.12, "step": 785, "train_speed(iter/s)": 0.035353 }, { "acc": 0.63752484, "epoch": 0.62, "learning_rate": 9.999932830193279e-05, "loss": 1.22619057, "memory(GiB)": 85.12, "step": 790, "train_speed(iter/s)": 0.035348 }, { "acc": 0.63318844, "epoch": 0.62, "learning_rate": 9.999901927518642e-05, "loss": 1.26003723, "memory(GiB)": 85.12, "step": 795, "train_speed(iter/s)": 0.035351 }, { "acc": 0.63203177, "epoch": 0.63, "learning_rate": 9.999865194233496e-05, "loss": 1.24707184, "memory(GiB)": 85.12, "step": 800, "train_speed(iter/s)": 0.035344 }, { "acc": 0.62407198, "epoch": 0.63, "learning_rate": 9.999822630380674e-05, "loss": 1.28778019, "memory(GiB)": 85.12, "step": 805, "train_speed(iter/s)": 0.035345 }, { "acc": 0.62447062, "epoch": 0.63, "learning_rate": 9.999774236009813e-05, "loss": 1.30319834, "memory(GiB)": 85.12, "step": 810, "train_speed(iter/s)": 0.035348 }, { "acc": 0.63752298, "epoch": 0.64, "learning_rate": 9.999720011177348e-05, "loss": 1.24396782, "memory(GiB)": 85.12, "step": 815, "train_speed(iter/s)": 0.035342 }, { "acc": 0.6344676, "epoch": 0.64, "learning_rate": 9.999659955946514e-05, "loss": 1.24418392, "memory(GiB)": 85.12, "step": 820, "train_speed(iter/s)": 0.035327 }, { "acc": 0.63493099, "epoch": 0.65, "learning_rate": 9.999594070387343e-05, "loss": 1.25098581, "memory(GiB)": 85.12, "step": 825, "train_speed(iter/s)": 0.035329 }, { "acc": 0.64141641, "epoch": 0.65, "learning_rate": 9.999522354576669e-05, "loss": 1.21629629, "memory(GiB)": 85.12, "step": 830, "train_speed(iter/s)": 0.035323 }, { "acc": 0.65251746, "epoch": 0.65, "learning_rate": 9.99944480859812e-05, "loss": 1.16716757, "memory(GiB)": 85.12, "step": 835, "train_speed(iter/s)": 0.035309 }, { "acc": 0.63880744, "epoch": 0.66, "learning_rate": 9.999361432542128e-05, "loss": 1.21747561, "memory(GiB)": 85.12, "step": 840, "train_speed(iter/s)": 0.035304 }, { "acc": 0.6420105, "epoch": 0.66, "learning_rate": 9.99927222650592e-05, "loss": 1.2190115, "memory(GiB)": 85.12, "step": 845, "train_speed(iter/s)": 0.035308 }, { "acc": 0.63001771, "epoch": 0.67, "learning_rate": 9.999177190593525e-05, "loss": 1.25841103, "memory(GiB)": 85.12, "step": 850, "train_speed(iter/s)": 0.035276 }, { "acc": 0.62701368, "epoch": 0.67, "learning_rate": 9.999076324915768e-05, "loss": 1.28360729, "memory(GiB)": 85.12, "step": 855, "train_speed(iter/s)": 0.035271 }, { "acc": 0.6483603, "epoch": 0.67, "learning_rate": 9.998969629590274e-05, "loss": 1.19540863, "memory(GiB)": 85.12, "step": 860, "train_speed(iter/s)": 0.035266 }, { "acc": 0.64731088, "epoch": 0.68, "learning_rate": 9.998857104741461e-05, "loss": 1.19496069, "memory(GiB)": 85.12, "step": 865, "train_speed(iter/s)": 0.035269 }, { "acc": 0.64258432, "epoch": 0.68, "learning_rate": 9.998738750500553e-05, "loss": 1.2038908, "memory(GiB)": 85.12, "step": 870, "train_speed(iter/s)": 0.035272 }, { "acc": 0.63443327, "epoch": 0.69, "learning_rate": 9.998614567005569e-05, "loss": 1.24369755, "memory(GiB)": 85.12, "step": 875, "train_speed(iter/s)": 0.035267 }, { "acc": 0.63675046, "epoch": 0.69, "learning_rate": 9.998484554401323e-05, "loss": 1.25242014, "memory(GiB)": 85.12, "step": 880, "train_speed(iter/s)": 0.035271 }, { "acc": 0.63459558, "epoch": 0.69, "learning_rate": 9.99834871283943e-05, "loss": 1.20982609, "memory(GiB)": 85.12, "step": 885, "train_speed(iter/s)": 0.035274 }, { "acc": 0.62059612, "epoch": 0.7, "learning_rate": 9.998207042478298e-05, "loss": 1.29975319, "memory(GiB)": 85.12, "step": 890, "train_speed(iter/s)": 0.035277 }, { "acc": 0.64450974, "epoch": 0.7, "learning_rate": 9.998059543483138e-05, "loss": 1.21123343, "memory(GiB)": 85.12, "step": 895, "train_speed(iter/s)": 0.035281 }, { "acc": 0.64039574, "epoch": 0.71, "learning_rate": 9.997906216025954e-05, "loss": 1.22988548, "memory(GiB)": 85.12, "step": 900, "train_speed(iter/s)": 0.035276 }, { "acc": 0.64195466, "epoch": 0.71, "learning_rate": 9.997747060285548e-05, "loss": 1.23618727, "memory(GiB)": 85.12, "step": 905, "train_speed(iter/s)": 0.035279 }, { "acc": 0.64903636, "epoch": 0.71, "learning_rate": 9.99758207644752e-05, "loss": 1.1861618, "memory(GiB)": 85.12, "step": 910, "train_speed(iter/s)": 0.035276 }, { "acc": 0.64557695, "epoch": 0.72, "learning_rate": 9.997411264704264e-05, "loss": 1.16902866, "memory(GiB)": 85.12, "step": 915, "train_speed(iter/s)": 0.035278 }, { "acc": 0.6429708, "epoch": 0.72, "learning_rate": 9.99723462525497e-05, "loss": 1.20082893, "memory(GiB)": 85.12, "step": 920, "train_speed(iter/s)": 0.035281 }, { "acc": 0.64234171, "epoch": 0.72, "learning_rate": 9.99705215830563e-05, "loss": 1.2100441, "memory(GiB)": 85.12, "step": 925, "train_speed(iter/s)": 0.035283 }, { "acc": 0.64274926, "epoch": 0.73, "learning_rate": 9.996863864069019e-05, "loss": 1.15481606, "memory(GiB)": 85.12, "step": 930, "train_speed(iter/s)": 0.035285 }, { "acc": 0.63187203, "epoch": 0.73, "learning_rate": 9.996669742764722e-05, "loss": 1.26747904, "memory(GiB)": 85.12, "step": 935, "train_speed(iter/s)": 0.035288 }, { "acc": 0.65685897, "epoch": 0.74, "learning_rate": 9.996469794619111e-05, "loss": 1.1622716, "memory(GiB)": 85.12, "step": 940, "train_speed(iter/s)": 0.035283 }, { "acc": 0.64430394, "epoch": 0.74, "learning_rate": 9.996264019865353e-05, "loss": 1.21984491, "memory(GiB)": 85.12, "step": 945, "train_speed(iter/s)": 0.035287 }, { "acc": 0.64375916, "epoch": 0.74, "learning_rate": 9.996052418743414e-05, "loss": 1.23850603, "memory(GiB)": 85.12, "step": 950, "train_speed(iter/s)": 0.035289 }, { "acc": 0.63599143, "epoch": 0.75, "learning_rate": 9.995834991500048e-05, "loss": 1.2474575, "memory(GiB)": 85.12, "step": 955, "train_speed(iter/s)": 0.035292 }, { "acc": 0.63893137, "epoch": 0.75, "learning_rate": 9.99561173838881e-05, "loss": 1.23318357, "memory(GiB)": 85.12, "step": 960, "train_speed(iter/s)": 0.035294 }, { "acc": 0.64323502, "epoch": 0.76, "learning_rate": 9.995382659670043e-05, "loss": 1.20357561, "memory(GiB)": 85.12, "step": 965, "train_speed(iter/s)": 0.035295 }, { "acc": 0.64130268, "epoch": 0.76, "learning_rate": 9.995147755610885e-05, "loss": 1.21935377, "memory(GiB)": 85.12, "step": 970, "train_speed(iter/s)": 0.035299 }, { "acc": 0.64334393, "epoch": 0.76, "learning_rate": 9.994907026485273e-05, "loss": 1.18746109, "memory(GiB)": 85.12, "step": 975, "train_speed(iter/s)": 0.035301 }, { "acc": 0.64836397, "epoch": 0.77, "learning_rate": 9.994660472573929e-05, "loss": 1.18723745, "memory(GiB)": 85.12, "step": 980, "train_speed(iter/s)": 0.035305 }, { "acc": 0.64067945, "epoch": 0.77, "learning_rate": 9.994408094164369e-05, "loss": 1.21092262, "memory(GiB)": 85.12, "step": 985, "train_speed(iter/s)": 0.035308 }, { "acc": 0.65696807, "epoch": 0.78, "learning_rate": 9.994149891550906e-05, "loss": 1.15461454, "memory(GiB)": 85.12, "step": 990, "train_speed(iter/s)": 0.035311 }, { "acc": 0.64268813, "epoch": 0.78, "learning_rate": 9.99388586503464e-05, "loss": 1.2276782, "memory(GiB)": 85.12, "step": 995, "train_speed(iter/s)": 0.0353 }, { "acc": 0.64682531, "epoch": 0.78, "learning_rate": 9.993616014923464e-05, "loss": 1.19210787, "memory(GiB)": 85.12, "step": 1000, "train_speed(iter/s)": 0.035295 }, { "acc": 0.63967905, "epoch": 0.79, "learning_rate": 9.993340341532063e-05, "loss": 1.21900482, "memory(GiB)": 85.12, "step": 1005, "train_speed(iter/s)": 0.035298 }, { "acc": 0.64601598, "epoch": 0.79, "learning_rate": 9.993058845181913e-05, "loss": 1.18572149, "memory(GiB)": 85.12, "step": 1010, "train_speed(iter/s)": 0.0353 }, { "acc": 0.6581532, "epoch": 0.8, "learning_rate": 9.992771526201278e-05, "loss": 1.17555571, "memory(GiB)": 85.12, "step": 1015, "train_speed(iter/s)": 0.035303 }, { "acc": 0.63905029, "epoch": 0.8, "learning_rate": 9.992478384925215e-05, "loss": 1.22506828, "memory(GiB)": 85.12, "step": 1020, "train_speed(iter/s)": 0.035306 }, { "acc": 0.63737507, "epoch": 0.8, "learning_rate": 9.992179421695566e-05, "loss": 1.24342728, "memory(GiB)": 85.12, "step": 1025, "train_speed(iter/s)": 0.035309 }, { "acc": 0.63883562, "epoch": 0.81, "learning_rate": 9.99187463686097e-05, "loss": 1.22715645, "memory(GiB)": 85.12, "step": 1030, "train_speed(iter/s)": 0.035304 }, { "acc": 0.64026632, "epoch": 0.81, "learning_rate": 9.991564030776847e-05, "loss": 1.2529954, "memory(GiB)": 85.12, "step": 1035, "train_speed(iter/s)": 0.035307 }, { "acc": 0.65661197, "epoch": 0.82, "learning_rate": 9.99124760380541e-05, "loss": 1.17418413, "memory(GiB)": 85.12, "step": 1040, "train_speed(iter/s)": 0.035295 }, { "acc": 0.64462228, "epoch": 0.82, "learning_rate": 9.990925356315659e-05, "loss": 1.21107912, "memory(GiB)": 85.12, "step": 1045, "train_speed(iter/s)": 0.035297 }, { "acc": 0.65130072, "epoch": 0.82, "learning_rate": 9.99059728868338e-05, "loss": 1.18552179, "memory(GiB)": 85.12, "step": 1050, "train_speed(iter/s)": 0.0353 }, { "acc": 0.64942465, "epoch": 0.83, "learning_rate": 9.990263401291149e-05, "loss": 1.19978065, "memory(GiB)": 85.12, "step": 1055, "train_speed(iter/s)": 0.035302 }, { "acc": 0.64416938, "epoch": 0.83, "learning_rate": 9.989923694528327e-05, "loss": 1.21087933, "memory(GiB)": 85.12, "step": 1060, "train_speed(iter/s)": 0.035298 }, { "acc": 0.64448967, "epoch": 0.83, "learning_rate": 9.989578168791059e-05, "loss": 1.23659315, "memory(GiB)": 85.12, "step": 1065, "train_speed(iter/s)": 0.0353 }, { "acc": 0.6494998, "epoch": 0.84, "learning_rate": 9.989226824482281e-05, "loss": 1.1761158, "memory(GiB)": 85.12, "step": 1070, "train_speed(iter/s)": 0.035302 }, { "acc": 0.64944773, "epoch": 0.84, "learning_rate": 9.98886966201171e-05, "loss": 1.17971296, "memory(GiB)": 85.12, "step": 1075, "train_speed(iter/s)": 0.035305 }, { "acc": 0.64949713, "epoch": 0.85, "learning_rate": 9.98850668179585e-05, "loss": 1.19467411, "memory(GiB)": 85.12, "step": 1080, "train_speed(iter/s)": 0.035301 }, { "acc": 0.6409584, "epoch": 0.85, "learning_rate": 9.988137884257987e-05, "loss": 1.22220039, "memory(GiB)": 85.12, "step": 1085, "train_speed(iter/s)": 0.035289 }, { "acc": 0.65775137, "epoch": 0.85, "learning_rate": 9.987763269828194e-05, "loss": 1.15307426, "memory(GiB)": 85.12, "step": 1090, "train_speed(iter/s)": 0.035292 }, { "acc": 0.65808535, "epoch": 0.86, "learning_rate": 9.987382838943325e-05, "loss": 1.16586542, "memory(GiB)": 85.12, "step": 1095, "train_speed(iter/s)": 0.035294 }, { "acc": 0.63382239, "epoch": 0.86, "learning_rate": 9.986996592047017e-05, "loss": 1.22561712, "memory(GiB)": 85.12, "step": 1100, "train_speed(iter/s)": 0.035297 }, { "acc": 0.64164915, "epoch": 0.87, "learning_rate": 9.986604529589691e-05, "loss": 1.2104146, "memory(GiB)": 85.12, "step": 1105, "train_speed(iter/s)": 0.035299 }, { "acc": 0.65060873, "epoch": 0.87, "learning_rate": 9.98620665202855e-05, "loss": 1.18811502, "memory(GiB)": 85.12, "step": 1110, "train_speed(iter/s)": 0.035301 }, { "acc": 0.64695697, "epoch": 0.87, "learning_rate": 9.985802959827573e-05, "loss": 1.20489264, "memory(GiB)": 85.12, "step": 1115, "train_speed(iter/s)": 0.035303 }, { "acc": 0.63937025, "epoch": 0.88, "learning_rate": 9.985393453457526e-05, "loss": 1.20603237, "memory(GiB)": 85.12, "step": 1120, "train_speed(iter/s)": 0.035306 }, { "acc": 0.65824099, "epoch": 0.88, "learning_rate": 9.984978133395954e-05, "loss": 1.16597528, "memory(GiB)": 85.12, "step": 1125, "train_speed(iter/s)": 0.035309 }, { "acc": 0.63783302, "epoch": 0.89, "learning_rate": 9.984557000127177e-05, "loss": 1.2381628, "memory(GiB)": 85.12, "step": 1130, "train_speed(iter/s)": 0.035311 }, { "acc": 0.64608231, "epoch": 0.89, "learning_rate": 9.984130054142302e-05, "loss": 1.20785971, "memory(GiB)": 85.12, "step": 1135, "train_speed(iter/s)": 0.035313 }, { "acc": 0.64283195, "epoch": 0.89, "learning_rate": 9.983697295939205e-05, "loss": 1.19586048, "memory(GiB)": 85.12, "step": 1140, "train_speed(iter/s)": 0.035315 }, { "acc": 0.64510169, "epoch": 0.9, "learning_rate": 9.983258726022549e-05, "loss": 1.17720518, "memory(GiB)": 85.12, "step": 1145, "train_speed(iter/s)": 0.035316 }, { "acc": 0.64794512, "epoch": 0.9, "learning_rate": 9.982814344903766e-05, "loss": 1.20341921, "memory(GiB)": 85.12, "step": 1150, "train_speed(iter/s)": 0.035319 }, { "acc": 0.64761252, "epoch": 0.91, "learning_rate": 9.982364153101072e-05, "loss": 1.20330772, "memory(GiB)": 85.12, "step": 1155, "train_speed(iter/s)": 0.035321 }, { "acc": 0.64469285, "epoch": 0.91, "learning_rate": 9.981908151139456e-05, "loss": 1.22739487, "memory(GiB)": 85.12, "step": 1160, "train_speed(iter/s)": 0.035323 }, { "acc": 0.65169878, "epoch": 0.91, "learning_rate": 9.98144633955068e-05, "loss": 1.17159405, "memory(GiB)": 85.12, "step": 1165, "train_speed(iter/s)": 0.035325 }, { "acc": 0.64189563, "epoch": 0.92, "learning_rate": 9.980978718873286e-05, "loss": 1.21570683, "memory(GiB)": 85.12, "step": 1170, "train_speed(iter/s)": 0.035327 }, { "acc": 0.65856137, "epoch": 0.92, "learning_rate": 9.980505289652585e-05, "loss": 1.14105463, "memory(GiB)": 85.12, "step": 1175, "train_speed(iter/s)": 0.035328 }, { "acc": 0.63436284, "epoch": 0.92, "learning_rate": 9.980026052440665e-05, "loss": 1.2412138, "memory(GiB)": 85.12, "step": 1180, "train_speed(iter/s)": 0.035325 }, { "acc": 0.65267048, "epoch": 0.93, "learning_rate": 9.979541007796388e-05, "loss": 1.17890749, "memory(GiB)": 85.12, "step": 1185, "train_speed(iter/s)": 0.035327 }, { "acc": 0.64592175, "epoch": 0.93, "learning_rate": 9.979050156285384e-05, "loss": 1.19027033, "memory(GiB)": 85.12, "step": 1190, "train_speed(iter/s)": 0.03533 }, { "acc": 0.66446619, "epoch": 0.94, "learning_rate": 9.978553498480057e-05, "loss": 1.15592375, "memory(GiB)": 85.12, "step": 1195, "train_speed(iter/s)": 0.035325 }, { "acc": 0.64879594, "epoch": 0.94, "learning_rate": 9.978051034959583e-05, "loss": 1.2092351, "memory(GiB)": 85.12, "step": 1200, "train_speed(iter/s)": 0.035327 }, { "acc": 0.64852567, "epoch": 0.94, "learning_rate": 9.977542766309907e-05, "loss": 1.19442778, "memory(GiB)": 85.12, "step": 1205, "train_speed(iter/s)": 0.035323 }, { "acc": 0.640028, "epoch": 0.95, "learning_rate": 9.977028693123744e-05, "loss": 1.21321182, "memory(GiB)": 85.12, "step": 1210, "train_speed(iter/s)": 0.035325 }, { "acc": 0.64608712, "epoch": 0.95, "learning_rate": 9.976508816000578e-05, "loss": 1.21685104, "memory(GiB)": 85.12, "step": 1215, "train_speed(iter/s)": 0.035317 }, { "acc": 0.65058255, "epoch": 0.96, "learning_rate": 9.975983135546661e-05, "loss": 1.20579329, "memory(GiB)": 85.12, "step": 1220, "train_speed(iter/s)": 0.035308 }, { "acc": 0.64077854, "epoch": 0.96, "learning_rate": 9.975451652375012e-05, "loss": 1.22381687, "memory(GiB)": 85.12, "step": 1225, "train_speed(iter/s)": 0.035304 }, { "acc": 0.64167862, "epoch": 0.96, "learning_rate": 9.974914367105419e-05, "loss": 1.20327978, "memory(GiB)": 85.12, "step": 1230, "train_speed(iter/s)": 0.035306 }, { "acc": 0.64583015, "epoch": 0.97, "learning_rate": 9.974371280364431e-05, "loss": 1.19592552, "memory(GiB)": 85.12, "step": 1235, "train_speed(iter/s)": 0.035308 }, { "acc": 0.6488265, "epoch": 0.97, "learning_rate": 9.973822392785373e-05, "loss": 1.17611341, "memory(GiB)": 85.12, "step": 1240, "train_speed(iter/s)": 0.035304 }, { "acc": 0.65858684, "epoch": 0.98, "learning_rate": 9.973267705008318e-05, "loss": 1.11910753, "memory(GiB)": 85.12, "step": 1245, "train_speed(iter/s)": 0.035306 }, { "acc": 0.63276486, "epoch": 0.98, "learning_rate": 9.97270721768012e-05, "loss": 1.24935932, "memory(GiB)": 85.12, "step": 1250, "train_speed(iter/s)": 0.035298 }, { "acc": 0.63920984, "epoch": 0.98, "learning_rate": 9.972140931454385e-05, "loss": 1.24413643, "memory(GiB)": 85.12, "step": 1255, "train_speed(iter/s)": 0.035299 }, { "acc": 0.64515247, "epoch": 0.99, "learning_rate": 9.971568846991486e-05, "loss": 1.18529148, "memory(GiB)": 85.12, "step": 1260, "train_speed(iter/s)": 0.035301 }, { "acc": 0.64360232, "epoch": 0.99, "learning_rate": 9.970990964958556e-05, "loss": 1.21833725, "memory(GiB)": 85.12, "step": 1265, "train_speed(iter/s)": 0.035303 }, { "acc": 0.64962535, "epoch": 1.0, "learning_rate": 9.970407286029487e-05, "loss": 1.16980963, "memory(GiB)": 85.12, "step": 1270, "train_speed(iter/s)": 0.035306 }, { "acc": 0.64517279, "epoch": 1.0, "learning_rate": 9.969817810884937e-05, "loss": 1.19798498, "memory(GiB)": 85.12, "step": 1275, "train_speed(iter/s)": 0.035307 }, { "epoch": 1.0, "eval_acc": 0.66190833959429, "eval_loss": 1.1170213222503662, "eval_runtime": 85.7668, "eval_samples_per_second": 1.084, "eval_steps_per_second": 1.084, "step": 1276 }, { "acc": 0.66531973, "epoch": 1.0, "learning_rate": 9.969222540212319e-05, "loss": 1.12897282, "memory(GiB)": 85.12, "step": 1280, "train_speed(iter/s)": 0.035228 }, { "acc": 0.64908504, "epoch": 1.01, "learning_rate": 9.968621474705802e-05, "loss": 1.19679098, "memory(GiB)": 85.12, "step": 1285, "train_speed(iter/s)": 0.035225 }, { "acc": 0.65595608, "epoch": 1.01, "learning_rate": 9.96801461506632e-05, "loss": 1.12893848, "memory(GiB)": 85.12, "step": 1290, "train_speed(iter/s)": 0.035228 }, { "acc": 0.66325932, "epoch": 1.01, "learning_rate": 9.967401962001553e-05, "loss": 1.12414293, "memory(GiB)": 85.12, "step": 1295, "train_speed(iter/s)": 0.035225 }, { "acc": 0.63181615, "epoch": 1.02, "learning_rate": 9.966783516225948e-05, "loss": 1.23086386, "memory(GiB)": 85.12, "step": 1300, "train_speed(iter/s)": 0.035227 }, { "acc": 0.64660926, "epoch": 1.02, "learning_rate": 9.966159278460703e-05, "loss": 1.1694212, "memory(GiB)": 85.12, "step": 1305, "train_speed(iter/s)": 0.035223 }, { "acc": 0.65136437, "epoch": 1.03, "learning_rate": 9.965529249433768e-05, "loss": 1.14605751, "memory(GiB)": 85.12, "step": 1310, "train_speed(iter/s)": 0.035226 }, { "acc": 0.64723616, "epoch": 1.03, "learning_rate": 9.964893429879846e-05, "loss": 1.17278271, "memory(GiB)": 85.12, "step": 1315, "train_speed(iter/s)": 0.035228 }, { "acc": 0.63427768, "epoch": 1.03, "learning_rate": 9.9642518205404e-05, "loss": 1.19455042, "memory(GiB)": 85.12, "step": 1320, "train_speed(iter/s)": 0.03523 }, { "acc": 0.65323257, "epoch": 1.04, "learning_rate": 9.963604422163636e-05, "loss": 1.15521383, "memory(GiB)": 85.12, "step": 1325, "train_speed(iter/s)": 0.035233 }, { "acc": 0.65213785, "epoch": 1.04, "learning_rate": 9.962951235504511e-05, "loss": 1.17218103, "memory(GiB)": 85.12, "step": 1330, "train_speed(iter/s)": 0.035225 }, { "acc": 0.63362112, "epoch": 1.05, "learning_rate": 9.962292261324744e-05, "loss": 1.21349621, "memory(GiB)": 85.12, "step": 1335, "train_speed(iter/s)": 0.035222 }, { "acc": 0.64905601, "epoch": 1.05, "learning_rate": 9.961627500392788e-05, "loss": 1.19248028, "memory(GiB)": 85.12, "step": 1340, "train_speed(iter/s)": 0.035224 }, { "acc": 0.63903928, "epoch": 1.05, "learning_rate": 9.960956953483854e-05, "loss": 1.21704388, "memory(GiB)": 85.12, "step": 1345, "train_speed(iter/s)": 0.035226 }, { "acc": 0.64893613, "epoch": 1.06, "learning_rate": 9.960280621379891e-05, "loss": 1.18590031, "memory(GiB)": 85.12, "step": 1350, "train_speed(iter/s)": 0.035228 }, { "acc": 0.66188636, "epoch": 1.06, "learning_rate": 9.959598504869608e-05, "loss": 1.1234787, "memory(GiB)": 85.12, "step": 1355, "train_speed(iter/s)": 0.035225 }, { "acc": 0.65642624, "epoch": 1.07, "learning_rate": 9.958910604748449e-05, "loss": 1.17125835, "memory(GiB)": 85.12, "step": 1360, "train_speed(iter/s)": 0.035227 }, { "acc": 0.64671488, "epoch": 1.07, "learning_rate": 9.958216921818602e-05, "loss": 1.17184534, "memory(GiB)": 85.12, "step": 1365, "train_speed(iter/s)": 0.035224 }, { "acc": 0.65104051, "epoch": 1.07, "learning_rate": 9.957517456889005e-05, "loss": 1.15897675, "memory(GiB)": 85.12, "step": 1370, "train_speed(iter/s)": 0.035227 }, { "acc": 0.6516345, "epoch": 1.08, "learning_rate": 9.956812210775336e-05, "loss": 1.16180744, "memory(GiB)": 85.12, "step": 1375, "train_speed(iter/s)": 0.035229 }, { "acc": 0.64146729, "epoch": 1.08, "learning_rate": 9.956101184300012e-05, "loss": 1.1801156, "memory(GiB)": 85.12, "step": 1380, "train_speed(iter/s)": 0.035231 }, { "acc": 0.64465218, "epoch": 1.09, "learning_rate": 9.955384378292195e-05, "loss": 1.16287785, "memory(GiB)": 85.12, "step": 1385, "train_speed(iter/s)": 0.035233 }, { "acc": 0.65234551, "epoch": 1.09, "learning_rate": 9.954661793587783e-05, "loss": 1.16832972, "memory(GiB)": 85.12, "step": 1390, "train_speed(iter/s)": 0.03523 }, { "acc": 0.64926839, "epoch": 1.09, "learning_rate": 9.953933431029417e-05, "loss": 1.16717663, "memory(GiB)": 85.12, "step": 1395, "train_speed(iter/s)": 0.035232 }, { "acc": 0.6558826, "epoch": 1.1, "learning_rate": 9.953199291466469e-05, "loss": 1.14773283, "memory(GiB)": 85.12, "step": 1400, "train_speed(iter/s)": 0.035234 }, { "acc": 0.64793453, "epoch": 1.1, "learning_rate": 9.952459375755056e-05, "loss": 1.19053068, "memory(GiB)": 85.12, "step": 1405, "train_speed(iter/s)": 0.035237 }, { "acc": 0.64704976, "epoch": 1.11, "learning_rate": 9.951713684758027e-05, "loss": 1.18572483, "memory(GiB)": 85.12, "step": 1410, "train_speed(iter/s)": 0.035238 }, { "acc": 0.64501376, "epoch": 1.11, "learning_rate": 9.950962219344963e-05, "loss": 1.17802401, "memory(GiB)": 85.12, "step": 1415, "train_speed(iter/s)": 0.03524 }, { "acc": 0.64344835, "epoch": 1.11, "learning_rate": 9.950204980392185e-05, "loss": 1.21547565, "memory(GiB)": 85.12, "step": 1420, "train_speed(iter/s)": 0.035242 }, { "acc": 0.65633788, "epoch": 1.12, "learning_rate": 9.94944196878274e-05, "loss": 1.15950899, "memory(GiB)": 85.12, "step": 1425, "train_speed(iter/s)": 0.035244 }, { "acc": 0.64611464, "epoch": 1.12, "learning_rate": 9.948673185406412e-05, "loss": 1.21565819, "memory(GiB)": 85.12, "step": 1430, "train_speed(iter/s)": 0.035246 }, { "acc": 0.64696469, "epoch": 1.12, "learning_rate": 9.947898631159716e-05, "loss": 1.18466921, "memory(GiB)": 85.12, "step": 1435, "train_speed(iter/s)": 0.035249 }, { "acc": 0.65379381, "epoch": 1.13, "learning_rate": 9.947118306945888e-05, "loss": 1.16033335, "memory(GiB)": 85.12, "step": 1440, "train_speed(iter/s)": 0.035245 }, { "acc": 0.64872618, "epoch": 1.13, "learning_rate": 9.946332213674907e-05, "loss": 1.17809114, "memory(GiB)": 85.12, "step": 1445, "train_speed(iter/s)": 0.035247 }, { "acc": 0.64592113, "epoch": 1.14, "learning_rate": 9.945540352263467e-05, "loss": 1.19367371, "memory(GiB)": 85.12, "step": 1450, "train_speed(iter/s)": 0.035249 }, { "acc": 0.65893774, "epoch": 1.14, "learning_rate": 9.944742723634995e-05, "loss": 1.15910034, "memory(GiB)": 85.12, "step": 1455, "train_speed(iter/s)": 0.035247 }, { "acc": 0.63832693, "epoch": 1.14, "learning_rate": 9.943939328719638e-05, "loss": 1.21491098, "memory(GiB)": 85.12, "step": 1460, "train_speed(iter/s)": 0.035244 }, { "acc": 0.65693431, "epoch": 1.15, "learning_rate": 9.943130168454276e-05, "loss": 1.17500277, "memory(GiB)": 85.12, "step": 1465, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66517062, "epoch": 1.15, "learning_rate": 9.942315243782504e-05, "loss": 1.13439531, "memory(GiB)": 85.12, "step": 1470, "train_speed(iter/s)": 0.035248 }, { "acc": 0.65051446, "epoch": 1.16, "learning_rate": 9.941494555654645e-05, "loss": 1.19706593, "memory(GiB)": 85.12, "step": 1475, "train_speed(iter/s)": 0.035245 }, { "acc": 0.64909263, "epoch": 1.16, "learning_rate": 9.940668105027739e-05, "loss": 1.19326334, "memory(GiB)": 85.12, "step": 1480, "train_speed(iter/s)": 0.035247 }, { "acc": 0.65501137, "epoch": 1.16, "learning_rate": 9.939835892865546e-05, "loss": 1.17640152, "memory(GiB)": 85.12, "step": 1485, "train_speed(iter/s)": 0.035249 }, { "acc": 0.64871545, "epoch": 1.17, "learning_rate": 9.938997920138547e-05, "loss": 1.18505135, "memory(GiB)": 85.12, "step": 1490, "train_speed(iter/s)": 0.035245 }, { "acc": 0.65266371, "epoch": 1.17, "learning_rate": 9.938154187823939e-05, "loss": 1.15669746, "memory(GiB)": 85.12, "step": 1495, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66334338, "epoch": 1.18, "learning_rate": 9.937304696905636e-05, "loss": 1.12421255, "memory(GiB)": 85.12, "step": 1500, "train_speed(iter/s)": 0.035244 }, { "acc": 0.65040469, "epoch": 1.18, "learning_rate": 9.93644944837427e-05, "loss": 1.15235605, "memory(GiB)": 85.12, "step": 1505, "train_speed(iter/s)": 0.035246 }, { "acc": 0.64324026, "epoch": 1.18, "learning_rate": 9.935588443227184e-05, "loss": 1.18840065, "memory(GiB)": 85.12, "step": 1510, "train_speed(iter/s)": 0.035248 }, { "acc": 0.64157495, "epoch": 1.19, "learning_rate": 9.934721682468433e-05, "loss": 1.19425764, "memory(GiB)": 85.12, "step": 1515, "train_speed(iter/s)": 0.03525 }, { "acc": 0.65195494, "epoch": 1.19, "learning_rate": 9.933849167108787e-05, "loss": 1.17684612, "memory(GiB)": 85.12, "step": 1520, "train_speed(iter/s)": 0.035248 }, { "acc": 0.64985819, "epoch": 1.2, "learning_rate": 9.932970898165723e-05, "loss": 1.18239994, "memory(GiB)": 85.12, "step": 1525, "train_speed(iter/s)": 0.035249 }, { "acc": 0.65231233, "epoch": 1.2, "learning_rate": 9.932086876663435e-05, "loss": 1.16985979, "memory(GiB)": 85.12, "step": 1530, "train_speed(iter/s)": 0.035247 }, { "acc": 0.65894971, "epoch": 1.2, "learning_rate": 9.931197103632817e-05, "loss": 1.11519146, "memory(GiB)": 85.12, "step": 1535, "train_speed(iter/s)": 0.035248 }, { "acc": 0.64390192, "epoch": 1.21, "learning_rate": 9.930301580111472e-05, "loss": 1.23124371, "memory(GiB)": 85.12, "step": 1540, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66011586, "epoch": 1.21, "learning_rate": 9.929400307143712e-05, "loss": 1.13707018, "memory(GiB)": 85.12, "step": 1545, "train_speed(iter/s)": 0.035248 }, { "acc": 0.65588398, "epoch": 1.21, "learning_rate": 9.928493285780552e-05, "loss": 1.15754347, "memory(GiB)": 85.12, "step": 1550, "train_speed(iter/s)": 0.035246 }, { "acc": 0.64823947, "epoch": 1.22, "learning_rate": 9.927580517079712e-05, "loss": 1.2073925, "memory(GiB)": 85.12, "step": 1555, "train_speed(iter/s)": 0.035247 }, { "acc": 0.65572858, "epoch": 1.22, "learning_rate": 9.926662002105608e-05, "loss": 1.16998863, "memory(GiB)": 85.12, "step": 1560, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66158614, "epoch": 1.23, "learning_rate": 9.925737741929367e-05, "loss": 1.1435194, "memory(GiB)": 85.12, "step": 1565, "train_speed(iter/s)": 0.035251 }, { "acc": 0.64395499, "epoch": 1.23, "learning_rate": 9.924807737628807e-05, "loss": 1.21585579, "memory(GiB)": 85.12, "step": 1570, "train_speed(iter/s)": 0.035254 }, { "acc": 0.64430709, "epoch": 1.23, "learning_rate": 9.923871990288448e-05, "loss": 1.17890778, "memory(GiB)": 85.12, "step": 1575, "train_speed(iter/s)": 0.035256 }, { "acc": 0.64855452, "epoch": 1.24, "learning_rate": 9.922930500999508e-05, "loss": 1.1541831, "memory(GiB)": 85.12, "step": 1580, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65893106, "epoch": 1.24, "learning_rate": 9.9219832708599e-05, "loss": 1.13851299, "memory(GiB)": 85.12, "step": 1585, "train_speed(iter/s)": 0.03525 }, { "acc": 0.65967436, "epoch": 1.25, "learning_rate": 9.921030300974232e-05, "loss": 1.12484913, "memory(GiB)": 85.12, "step": 1590, "train_speed(iter/s)": 0.035251 }, { "acc": 0.64779596, "epoch": 1.25, "learning_rate": 9.920071592453804e-05, "loss": 1.19642706, "memory(GiB)": 85.12, "step": 1595, "train_speed(iter/s)": 0.035253 }, { "acc": 0.64805603, "epoch": 1.25, "learning_rate": 9.919107146416608e-05, "loss": 1.18528366, "memory(GiB)": 85.12, "step": 1600, "train_speed(iter/s)": 0.03525 }, { "acc": 0.6561811, "epoch": 1.26, "learning_rate": 9.918136963987333e-05, "loss": 1.16669703, "memory(GiB)": 85.12, "step": 1605, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66756039, "epoch": 1.26, "learning_rate": 9.917161046297346e-05, "loss": 1.11620274, "memory(GiB)": 85.12, "step": 1610, "train_speed(iter/s)": 0.035254 }, { "acc": 0.65261698, "epoch": 1.27, "learning_rate": 9.916179394484713e-05, "loss": 1.15845966, "memory(GiB)": 85.12, "step": 1615, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66402683, "epoch": 1.27, "learning_rate": 9.915192009694179e-05, "loss": 1.11538677, "memory(GiB)": 85.12, "step": 1620, "train_speed(iter/s)": 0.035253 }, { "acc": 0.65500402, "epoch": 1.27, "learning_rate": 9.91419889307718e-05, "loss": 1.17441206, "memory(GiB)": 85.12, "step": 1625, "train_speed(iter/s)": 0.035255 }, { "acc": 0.65412102, "epoch": 1.28, "learning_rate": 9.913200045791834e-05, "loss": 1.16534052, "memory(GiB)": 85.12, "step": 1630, "train_speed(iter/s)": 0.035257 }, { "acc": 0.64114447, "epoch": 1.28, "learning_rate": 9.912195469002941e-05, "loss": 1.18363466, "memory(GiB)": 85.12, "step": 1635, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65664382, "epoch": 1.29, "learning_rate": 9.911185163881984e-05, "loss": 1.17111397, "memory(GiB)": 85.12, "step": 1640, "train_speed(iter/s)": 0.035259 }, { "acc": 0.65938292, "epoch": 1.29, "learning_rate": 9.910169131607123e-05, "loss": 1.12585945, "memory(GiB)": 85.12, "step": 1645, "train_speed(iter/s)": 0.03526 }, { "acc": 0.64360905, "epoch": 1.29, "learning_rate": 9.909147373363202e-05, "loss": 1.1944355, "memory(GiB)": 85.12, "step": 1650, "train_speed(iter/s)": 0.035262 }, { "acc": 0.6680274, "epoch": 1.3, "learning_rate": 9.908119890341737e-05, "loss": 1.10261869, "memory(GiB)": 85.12, "step": 1655, "train_speed(iter/s)": 0.035264 }, { "acc": 0.6460361, "epoch": 1.3, "learning_rate": 9.907086683740924e-05, "loss": 1.17447681, "memory(GiB)": 85.12, "step": 1660, "train_speed(iter/s)": 0.035266 }, { "acc": 0.65601449, "epoch": 1.3, "learning_rate": 9.906047754765629e-05, "loss": 1.14073763, "memory(GiB)": 85.12, "step": 1665, "train_speed(iter/s)": 0.035268 }, { "acc": 0.65036592, "epoch": 1.31, "learning_rate": 9.905003104627397e-05, "loss": 1.17392483, "memory(GiB)": 85.12, "step": 1670, "train_speed(iter/s)": 0.03527 }, { "acc": 0.65944376, "epoch": 1.31, "learning_rate": 9.90395273454444e-05, "loss": 1.13640366, "memory(GiB)": 85.12, "step": 1675, "train_speed(iter/s)": 0.035263 }, { "acc": 0.65229316, "epoch": 1.32, "learning_rate": 9.902896645741639e-05, "loss": 1.17808762, "memory(GiB)": 85.12, "step": 1680, "train_speed(iter/s)": 0.035261 }, { "acc": 0.6533186, "epoch": 1.32, "learning_rate": 9.901834839450553e-05, "loss": 1.1593545, "memory(GiB)": 85.12, "step": 1685, "train_speed(iter/s)": 0.035258 }, { "acc": 0.64995356, "epoch": 1.32, "learning_rate": 9.900767316909396e-05, "loss": 1.18070507, "memory(GiB)": 85.12, "step": 1690, "train_speed(iter/s)": 0.03526 }, { "acc": 0.65651155, "epoch": 1.33, "learning_rate": 9.899694079363058e-05, "loss": 1.12338991, "memory(GiB)": 85.12, "step": 1695, "train_speed(iter/s)": 0.035262 }, { "acc": 0.65021605, "epoch": 1.33, "learning_rate": 9.898615128063086e-05, "loss": 1.19300032, "memory(GiB)": 85.12, "step": 1700, "train_speed(iter/s)": 0.035263 }, { "acc": 0.63957796, "epoch": 1.34, "learning_rate": 9.897530464267699e-05, "loss": 1.21851835, "memory(GiB)": 85.12, "step": 1705, "train_speed(iter/s)": 0.035261 }, { "acc": 0.66472945, "epoch": 1.34, "learning_rate": 9.896440089241767e-05, "loss": 1.13589916, "memory(GiB)": 85.12, "step": 1710, "train_speed(iter/s)": 0.035258 }, { "acc": 0.65348268, "epoch": 1.34, "learning_rate": 9.895344004256827e-05, "loss": 1.15424995, "memory(GiB)": 85.12, "step": 1715, "train_speed(iter/s)": 0.03526 }, { "acc": 0.6559926, "epoch": 1.35, "learning_rate": 9.894242210591073e-05, "loss": 1.15576687, "memory(GiB)": 85.12, "step": 1720, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65292115, "epoch": 1.35, "learning_rate": 9.893134709529359e-05, "loss": 1.18022537, "memory(GiB)": 85.12, "step": 1725, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66411386, "epoch": 1.36, "learning_rate": 9.892021502363187e-05, "loss": 1.11516771, "memory(GiB)": 85.12, "step": 1730, "train_speed(iter/s)": 0.035257 }, { "acc": 0.63985095, "epoch": 1.36, "learning_rate": 9.89090259039072e-05, "loss": 1.22506847, "memory(GiB)": 85.12, "step": 1735, "train_speed(iter/s)": 0.03525 }, { "acc": 0.64176164, "epoch": 1.36, "learning_rate": 9.889777974916774e-05, "loss": 1.20334921, "memory(GiB)": 85.12, "step": 1740, "train_speed(iter/s)": 0.035251 }, { "acc": 0.63934112, "epoch": 1.37, "learning_rate": 9.888647657252809e-05, "loss": 1.23192434, "memory(GiB)": 85.12, "step": 1745, "train_speed(iter/s)": 0.035253 }, { "acc": 0.65798678, "epoch": 1.37, "learning_rate": 9.887511638716942e-05, "loss": 1.15708418, "memory(GiB)": 85.12, "step": 1750, "train_speed(iter/s)": 0.03525 }, { "acc": 0.6396348, "epoch": 1.38, "learning_rate": 9.886369920633937e-05, "loss": 1.22064037, "memory(GiB)": 85.12, "step": 1755, "train_speed(iter/s)": 0.035243 }, { "acc": 0.64947276, "epoch": 1.38, "learning_rate": 9.885222504335199e-05, "loss": 1.18840179, "memory(GiB)": 85.12, "step": 1760, "train_speed(iter/s)": 0.035246 }, { "acc": 0.65698085, "epoch": 1.38, "learning_rate": 9.884069391158784e-05, "loss": 1.16323624, "memory(GiB)": 85.12, "step": 1765, "train_speed(iter/s)": 0.035247 }, { "acc": 0.65098138, "epoch": 1.39, "learning_rate": 9.88291058244939e-05, "loss": 1.16516037, "memory(GiB)": 85.12, "step": 1770, "train_speed(iter/s)": 0.035249 }, { "acc": 0.65393229, "epoch": 1.39, "learning_rate": 9.881746079558353e-05, "loss": 1.16837893, "memory(GiB)": 85.12, "step": 1775, "train_speed(iter/s)": 0.03525 }, { "acc": 0.64121981, "epoch": 1.39, "learning_rate": 9.880575883843655e-05, "loss": 1.21210432, "memory(GiB)": 85.12, "step": 1780, "train_speed(iter/s)": 0.035248 }, { "acc": 0.66884899, "epoch": 1.4, "learning_rate": 9.879399996669911e-05, "loss": 1.12667084, "memory(GiB)": 85.12, "step": 1785, "train_speed(iter/s)": 0.035249 }, { "acc": 0.65376649, "epoch": 1.4, "learning_rate": 9.878218419408379e-05, "loss": 1.1607131, "memory(GiB)": 85.12, "step": 1790, "train_speed(iter/s)": 0.035247 }, { "acc": 0.64392834, "epoch": 1.41, "learning_rate": 9.877031153436949e-05, "loss": 1.20786915, "memory(GiB)": 85.12, "step": 1795, "train_speed(iter/s)": 0.035248 }, { "acc": 0.65759964, "epoch": 1.41, "learning_rate": 9.875838200140142e-05, "loss": 1.15941496, "memory(GiB)": 85.12, "step": 1800, "train_speed(iter/s)": 0.03525 }, { "acc": 0.65850377, "epoch": 1.41, "learning_rate": 9.874639560909117e-05, "loss": 1.15575294, "memory(GiB)": 85.12, "step": 1805, "train_speed(iter/s)": 0.035251 }, { "acc": 0.65355692, "epoch": 1.42, "learning_rate": 9.873435237141664e-05, "loss": 1.14666672, "memory(GiB)": 85.12, "step": 1810, "train_speed(iter/s)": 0.035249 }, { "acc": 0.64999933, "epoch": 1.42, "learning_rate": 9.872225230242194e-05, "loss": 1.16884727, "memory(GiB)": 85.12, "step": 1815, "train_speed(iter/s)": 0.035246 }, { "acc": 0.65628324, "epoch": 1.43, "learning_rate": 9.871009541621752e-05, "loss": 1.12916546, "memory(GiB)": 85.12, "step": 1820, "train_speed(iter/s)": 0.035243 }, { "acc": 0.65538239, "epoch": 1.43, "learning_rate": 9.869788172698006e-05, "loss": 1.1587122, "memory(GiB)": 85.12, "step": 1825, "train_speed(iter/s)": 0.035241 }, { "acc": 0.65519543, "epoch": 1.43, "learning_rate": 9.868561124895254e-05, "loss": 1.1943471, "memory(GiB)": 85.12, "step": 1830, "train_speed(iter/s)": 0.035235 }, { "acc": 0.65793271, "epoch": 1.44, "learning_rate": 9.867328399644407e-05, "loss": 1.15140657, "memory(GiB)": 85.12, "step": 1835, "train_speed(iter/s)": 0.035236 }, { "acc": 0.64654632, "epoch": 1.44, "learning_rate": 9.866089998383004e-05, "loss": 1.19984751, "memory(GiB)": 85.12, "step": 1840, "train_speed(iter/s)": 0.035238 }, { "acc": 0.6539813, "epoch": 1.45, "learning_rate": 9.864845922555198e-05, "loss": 1.16101418, "memory(GiB)": 85.12, "step": 1845, "train_speed(iter/s)": 0.035239 }, { "acc": 0.65071869, "epoch": 1.45, "learning_rate": 9.863596173611764e-05, "loss": 1.1871336, "memory(GiB)": 85.12, "step": 1850, "train_speed(iter/s)": 0.035241 }, { "acc": 0.64802847, "epoch": 1.45, "learning_rate": 9.862340753010089e-05, "loss": 1.17077522, "memory(GiB)": 85.12, "step": 1855, "train_speed(iter/s)": 0.035238 }, { "acc": 0.6609251, "epoch": 1.46, "learning_rate": 9.861079662214177e-05, "loss": 1.12675228, "memory(GiB)": 85.12, "step": 1860, "train_speed(iter/s)": 0.035236 }, { "acc": 0.652352, "epoch": 1.46, "learning_rate": 9.85981290269464e-05, "loss": 1.17576647, "memory(GiB)": 85.12, "step": 1865, "train_speed(iter/s)": 0.035233 }, { "acc": 0.65460477, "epoch": 1.47, "learning_rate": 9.858540475928706e-05, "loss": 1.17708349, "memory(GiB)": 85.12, "step": 1870, "train_speed(iter/s)": 0.035235 }, { "acc": 0.6584599, "epoch": 1.47, "learning_rate": 9.857262383400207e-05, "loss": 1.13749962, "memory(GiB)": 85.12, "step": 1875, "train_speed(iter/s)": 0.035233 }, { "acc": 0.6569325, "epoch": 1.47, "learning_rate": 9.855978626599585e-05, "loss": 1.14678946, "memory(GiB)": 85.12, "step": 1880, "train_speed(iter/s)": 0.035235 }, { "acc": 0.64635262, "epoch": 1.48, "learning_rate": 9.854689207023887e-05, "loss": 1.17655993, "memory(GiB)": 85.12, "step": 1885, "train_speed(iter/s)": 0.035236 }, { "acc": 0.65549922, "epoch": 1.48, "learning_rate": 9.853394126176763e-05, "loss": 1.14425611, "memory(GiB)": 85.12, "step": 1890, "train_speed(iter/s)": 0.035237 }, { "acc": 0.66941037, "epoch": 1.49, "learning_rate": 9.852093385568466e-05, "loss": 1.08940992, "memory(GiB)": 85.12, "step": 1895, "train_speed(iter/s)": 0.035238 }, { "acc": 0.6526649, "epoch": 1.49, "learning_rate": 9.850786986715846e-05, "loss": 1.15163832, "memory(GiB)": 85.12, "step": 1900, "train_speed(iter/s)": 0.03524 }, { "acc": 0.65831594, "epoch": 1.49, "learning_rate": 9.849474931142353e-05, "loss": 1.12980242, "memory(GiB)": 85.12, "step": 1905, "train_speed(iter/s)": 0.035242 }, { "acc": 0.66181054, "epoch": 1.5, "learning_rate": 9.848157220378038e-05, "loss": 1.14682779, "memory(GiB)": 85.12, "step": 1910, "train_speed(iter/s)": 0.035243 }, { "acc": 0.64066448, "epoch": 1.5, "learning_rate": 9.846833855959539e-05, "loss": 1.22032328, "memory(GiB)": 85.12, "step": 1915, "train_speed(iter/s)": 0.035245 }, { "acc": 0.6627368, "epoch": 1.5, "learning_rate": 9.845504839430091e-05, "loss": 1.11947041, "memory(GiB)": 85.12, "step": 1920, "train_speed(iter/s)": 0.035246 }, { "acc": 0.65865917, "epoch": 1.51, "learning_rate": 9.844170172339521e-05, "loss": 1.16217585, "memory(GiB)": 85.12, "step": 1925, "train_speed(iter/s)": 0.035248 }, { "acc": 0.65347538, "epoch": 1.51, "learning_rate": 9.842829856244247e-05, "loss": 1.16731787, "memory(GiB)": 85.12, "step": 1930, "train_speed(iter/s)": 0.035245 }, { "acc": 0.64839239, "epoch": 1.52, "learning_rate": 9.841483892707268e-05, "loss": 1.18971329, "memory(GiB)": 85.12, "step": 1935, "train_speed(iter/s)": 0.035243 }, { "acc": 0.66025143, "epoch": 1.52, "learning_rate": 9.840132283298172e-05, "loss": 1.12929058, "memory(GiB)": 85.12, "step": 1940, "train_speed(iter/s)": 0.035244 }, { "acc": 0.65671482, "epoch": 1.52, "learning_rate": 9.838775029593135e-05, "loss": 1.12444181, "memory(GiB)": 85.12, "step": 1945, "train_speed(iter/s)": 0.035239 }, { "acc": 0.65074253, "epoch": 1.53, "learning_rate": 9.837412133174911e-05, "loss": 1.14755917, "memory(GiB)": 85.12, "step": 1950, "train_speed(iter/s)": 0.035241 }, { "acc": 0.64375038, "epoch": 1.53, "learning_rate": 9.836043595632832e-05, "loss": 1.17292156, "memory(GiB)": 85.12, "step": 1955, "train_speed(iter/s)": 0.035239 }, { "acc": 0.65182729, "epoch": 1.54, "learning_rate": 9.834669418562811e-05, "loss": 1.17811108, "memory(GiB)": 85.12, "step": 1960, "train_speed(iter/s)": 0.035237 }, { "acc": 0.65077796, "epoch": 1.54, "learning_rate": 9.833289603567341e-05, "loss": 1.1634614, "memory(GiB)": 85.12, "step": 1965, "train_speed(iter/s)": 0.035238 }, { "acc": 0.64500337, "epoch": 1.54, "learning_rate": 9.831904152255486e-05, "loss": 1.18109503, "memory(GiB)": 85.12, "step": 1970, "train_speed(iter/s)": 0.035239 }, { "acc": 0.65929651, "epoch": 1.55, "learning_rate": 9.830513066242882e-05, "loss": 1.14037209, "memory(GiB)": 85.12, "step": 1975, "train_speed(iter/s)": 0.035236 }, { "acc": 0.66190724, "epoch": 1.55, "learning_rate": 9.829116347151737e-05, "loss": 1.16235342, "memory(GiB)": 85.12, "step": 1980, "train_speed(iter/s)": 0.035237 }, { "acc": 0.65755038, "epoch": 1.56, "learning_rate": 9.827713996610826e-05, "loss": 1.16937008, "memory(GiB)": 85.12, "step": 1985, "train_speed(iter/s)": 0.035239 }, { "acc": 0.64079676, "epoch": 1.56, "learning_rate": 9.826306016255498e-05, "loss": 1.19097614, "memory(GiB)": 85.12, "step": 1990, "train_speed(iter/s)": 0.035241 }, { "acc": 0.66023755, "epoch": 1.56, "learning_rate": 9.824892407727656e-05, "loss": 1.12927694, "memory(GiB)": 85.12, "step": 1995, "train_speed(iter/s)": 0.035242 }, { "acc": 0.65292602, "epoch": 1.57, "learning_rate": 9.823473172675777e-05, "loss": 1.16442251, "memory(GiB)": 85.12, "step": 2000, "train_speed(iter/s)": 0.035243 }, { "acc": 0.65421362, "epoch": 1.57, "learning_rate": 9.822048312754893e-05, "loss": 1.16522408, "memory(GiB)": 85.12, "step": 2005, "train_speed(iter/s)": 0.035245 }, { "acc": 0.65284295, "epoch": 1.58, "learning_rate": 9.820617829626598e-05, "loss": 1.17013979, "memory(GiB)": 85.12, "step": 2010, "train_speed(iter/s)": 0.035246 }, { "acc": 0.64222498, "epoch": 1.58, "learning_rate": 9.819181724959044e-05, "loss": 1.23573723, "memory(GiB)": 85.12, "step": 2015, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66108804, "epoch": 1.58, "learning_rate": 9.817740000426932e-05, "loss": 1.13777189, "memory(GiB)": 85.12, "step": 2020, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66587753, "epoch": 1.59, "learning_rate": 9.816292657711527e-05, "loss": 1.11172771, "memory(GiB)": 85.12, "step": 2025, "train_speed(iter/s)": 0.035251 }, { "acc": 0.65847149, "epoch": 1.59, "learning_rate": 9.814839698500641e-05, "loss": 1.14090157, "memory(GiB)": 85.12, "step": 2030, "train_speed(iter/s)": 0.035252 }, { "acc": 0.65127797, "epoch": 1.59, "learning_rate": 9.813381124488631e-05, "loss": 1.16807508, "memory(GiB)": 85.12, "step": 2035, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66447649, "epoch": 1.6, "learning_rate": 9.811916937376409e-05, "loss": 1.16490545, "memory(GiB)": 85.12, "step": 2040, "train_speed(iter/s)": 0.035255 }, { "acc": 0.65319815, "epoch": 1.6, "learning_rate": 9.810447138871426e-05, "loss": 1.15913305, "memory(GiB)": 85.12, "step": 2045, "train_speed(iter/s)": 0.035252 }, { "acc": 0.65084143, "epoch": 1.61, "learning_rate": 9.808971730687684e-05, "loss": 1.15471087, "memory(GiB)": 85.12, "step": 2050, "train_speed(iter/s)": 0.035253 }, { "acc": 0.65464187, "epoch": 1.61, "learning_rate": 9.80749071454572e-05, "loss": 1.14399872, "memory(GiB)": 85.12, "step": 2055, "train_speed(iter/s)": 0.035255 }, { "acc": 0.65305209, "epoch": 1.61, "learning_rate": 9.806004092172616e-05, "loss": 1.15933371, "memory(GiB)": 85.12, "step": 2060, "train_speed(iter/s)": 0.035253 }, { "acc": 0.64206853, "epoch": 1.62, "learning_rate": 9.804511865301989e-05, "loss": 1.18681612, "memory(GiB)": 85.12, "step": 2065, "train_speed(iter/s)": 0.035254 }, { "acc": 0.65472612, "epoch": 1.62, "learning_rate": 9.803014035673987e-05, "loss": 1.17128534, "memory(GiB)": 85.12, "step": 2070, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66416421, "epoch": 1.63, "learning_rate": 9.801510605035303e-05, "loss": 1.12266273, "memory(GiB)": 85.12, "step": 2075, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65653076, "epoch": 1.63, "learning_rate": 9.800001575139152e-05, "loss": 1.11308479, "memory(GiB)": 85.12, "step": 2080, "train_speed(iter/s)": 0.035259 }, { "acc": 0.65959449, "epoch": 1.63, "learning_rate": 9.798486947745282e-05, "loss": 1.12792482, "memory(GiB)": 85.12, "step": 2085, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65187888, "epoch": 1.64, "learning_rate": 9.796966724619967e-05, "loss": 1.16060781, "memory(GiB)": 85.12, "step": 2090, "train_speed(iter/s)": 0.035255 }, { "acc": 0.65552635, "epoch": 1.64, "learning_rate": 9.79544090753601e-05, "loss": 1.13989534, "memory(GiB)": 85.12, "step": 2095, "train_speed(iter/s)": 0.035256 }, { "acc": 0.65974436, "epoch": 1.65, "learning_rate": 9.793909498272733e-05, "loss": 1.1274127, "memory(GiB)": 85.12, "step": 2100, "train_speed(iter/s)": 0.035257 }, { "acc": 0.66153345, "epoch": 1.65, "learning_rate": 9.792372498615981e-05, "loss": 1.13705215, "memory(GiB)": 85.12, "step": 2105, "train_speed(iter/s)": 0.035259 }, { "acc": 0.64935384, "epoch": 1.65, "learning_rate": 9.790829910358122e-05, "loss": 1.17313042, "memory(GiB)": 85.12, "step": 2110, "train_speed(iter/s)": 0.03526 }, { "acc": 0.66233768, "epoch": 1.66, "learning_rate": 9.789281735298032e-05, "loss": 1.09848804, "memory(GiB)": 85.12, "step": 2115, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67054353, "epoch": 1.66, "learning_rate": 9.787727975241111e-05, "loss": 1.11139088, "memory(GiB)": 85.12, "step": 2120, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66447287, "epoch": 1.67, "learning_rate": 9.786168631999269e-05, "loss": 1.13588085, "memory(GiB)": 85.12, "step": 2125, "train_speed(iter/s)": 0.035265 }, { "acc": 0.65222578, "epoch": 1.67, "learning_rate": 9.784603707390922e-05, "loss": 1.17907152, "memory(GiB)": 85.12, "step": 2130, "train_speed(iter/s)": 0.035265 }, { "acc": 0.65970678, "epoch": 1.67, "learning_rate": 9.783033203241006e-05, "loss": 1.14419537, "memory(GiB)": 85.12, "step": 2135, "train_speed(iter/s)": 0.035262 }, { "acc": 0.65925775, "epoch": 1.68, "learning_rate": 9.78145712138095e-05, "loss": 1.1633584, "memory(GiB)": 85.12, "step": 2140, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66307669, "epoch": 1.68, "learning_rate": 9.779875463648698e-05, "loss": 1.12448187, "memory(GiB)": 85.12, "step": 2145, "train_speed(iter/s)": 0.035264 }, { "acc": 0.65571504, "epoch": 1.68, "learning_rate": 9.77828823188869e-05, "loss": 1.20079041, "memory(GiB)": 85.12, "step": 2150, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66194067, "epoch": 1.69, "learning_rate": 9.77669542795187e-05, "loss": 1.10752869, "memory(GiB)": 85.12, "step": 2155, "train_speed(iter/s)": 0.035267 }, { "acc": 0.6523849, "epoch": 1.69, "learning_rate": 9.775097053695677e-05, "loss": 1.15065937, "memory(GiB)": 85.12, "step": 2160, "train_speed(iter/s)": 0.035265 }, { "acc": 0.64300294, "epoch": 1.7, "learning_rate": 9.773493110984047e-05, "loss": 1.17376556, "memory(GiB)": 85.12, "step": 2165, "train_speed(iter/s)": 0.035267 }, { "acc": 0.65471711, "epoch": 1.7, "learning_rate": 9.77188360168741e-05, "loss": 1.16260157, "memory(GiB)": 85.12, "step": 2170, "train_speed(iter/s)": 0.035268 }, { "acc": 0.66860032, "epoch": 1.7, "learning_rate": 9.770268527682687e-05, "loss": 1.09885559, "memory(GiB)": 85.12, "step": 2175, "train_speed(iter/s)": 0.035266 }, { "acc": 0.6501049, "epoch": 1.71, "learning_rate": 9.76864789085329e-05, "loss": 1.17552853, "memory(GiB)": 85.12, "step": 2180, "train_speed(iter/s)": 0.035264 }, { "acc": 0.64576588, "epoch": 1.71, "learning_rate": 9.767021693089116e-05, "loss": 1.19620943, "memory(GiB)": 85.12, "step": 2185, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66087532, "epoch": 1.72, "learning_rate": 9.765389936286545e-05, "loss": 1.1152669, "memory(GiB)": 85.12, "step": 2190, "train_speed(iter/s)": 0.035267 }, { "acc": 0.66009383, "epoch": 1.72, "learning_rate": 9.763752622348445e-05, "loss": 1.13289509, "memory(GiB)": 85.12, "step": 2195, "train_speed(iter/s)": 0.035269 }, { "acc": 0.66180921, "epoch": 1.72, "learning_rate": 9.762109753184159e-05, "loss": 1.1201334, "memory(GiB)": 85.12, "step": 2200, "train_speed(iter/s)": 0.03527 }, { "acc": 0.65290222, "epoch": 1.73, "learning_rate": 9.760461330709513e-05, "loss": 1.1867281, "memory(GiB)": 85.12, "step": 2205, "train_speed(iter/s)": 0.035272 }, { "acc": 0.65245934, "epoch": 1.73, "learning_rate": 9.758807356846804e-05, "loss": 1.1851923, "memory(GiB)": 85.12, "step": 2210, "train_speed(iter/s)": 0.035274 }, { "acc": 0.6554101, "epoch": 1.74, "learning_rate": 9.757147833524808e-05, "loss": 1.12435026, "memory(GiB)": 85.12, "step": 2215, "train_speed(iter/s)": 0.035275 }, { "acc": 0.66130071, "epoch": 1.74, "learning_rate": 9.755482762678768e-05, "loss": 1.11828518, "memory(GiB)": 85.12, "step": 2220, "train_speed(iter/s)": 0.035276 }, { "acc": 0.65428829, "epoch": 1.74, "learning_rate": 9.753812146250398e-05, "loss": 1.17555447, "memory(GiB)": 85.12, "step": 2225, "train_speed(iter/s)": 0.035275 }, { "acc": 0.65815506, "epoch": 1.75, "learning_rate": 9.75213598618788e-05, "loss": 1.14015465, "memory(GiB)": 85.12, "step": 2230, "train_speed(iter/s)": 0.035276 }, { "acc": 0.6582922, "epoch": 1.75, "learning_rate": 9.750454284445859e-05, "loss": 1.13366365, "memory(GiB)": 85.12, "step": 2235, "train_speed(iter/s)": 0.035274 }, { "acc": 0.64991212, "epoch": 1.76, "learning_rate": 9.748767042985442e-05, "loss": 1.17165375, "memory(GiB)": 85.12, "step": 2240, "train_speed(iter/s)": 0.035276 }, { "acc": 0.65585694, "epoch": 1.76, "learning_rate": 9.7470742637742e-05, "loss": 1.15650015, "memory(GiB)": 85.12, "step": 2245, "train_speed(iter/s)": 0.035277 }, { "acc": 0.65717545, "epoch": 1.76, "learning_rate": 9.745375948786158e-05, "loss": 1.15424328, "memory(GiB)": 85.12, "step": 2250, "train_speed(iter/s)": 0.035278 }, { "acc": 0.65874028, "epoch": 1.77, "learning_rate": 9.743672100001793e-05, "loss": 1.14350729, "memory(GiB)": 85.12, "step": 2255, "train_speed(iter/s)": 0.035279 }, { "acc": 0.64995842, "epoch": 1.77, "learning_rate": 9.741962719408047e-05, "loss": 1.16558609, "memory(GiB)": 85.12, "step": 2260, "train_speed(iter/s)": 0.035281 }, { "acc": 0.6412828, "epoch": 1.78, "learning_rate": 9.7402478089983e-05, "loss": 1.24160509, "memory(GiB)": 85.12, "step": 2265, "train_speed(iter/s)": 0.035279 }, { "acc": 0.66066208, "epoch": 1.78, "learning_rate": 9.738527370772387e-05, "loss": 1.13935509, "memory(GiB)": 85.12, "step": 2270, "train_speed(iter/s)": 0.03528 }, { "acc": 0.65170732, "epoch": 1.78, "learning_rate": 9.73680140673659e-05, "loss": 1.15450306, "memory(GiB)": 85.12, "step": 2275, "train_speed(iter/s)": 0.035282 }, { "acc": 0.65960178, "epoch": 1.79, "learning_rate": 9.735069918903635e-05, "loss": 1.13573933, "memory(GiB)": 85.12, "step": 2280, "train_speed(iter/s)": 0.035283 }, { "acc": 0.66337166, "epoch": 1.79, "learning_rate": 9.733332909292684e-05, "loss": 1.15319395, "memory(GiB)": 85.12, "step": 2285, "train_speed(iter/s)": 0.035284 }, { "acc": 0.66128883, "epoch": 1.79, "learning_rate": 9.731590379929345e-05, "loss": 1.158424, "memory(GiB)": 85.12, "step": 2290, "train_speed(iter/s)": 0.035285 }, { "acc": 0.65605984, "epoch": 1.8, "learning_rate": 9.729842332845657e-05, "loss": 1.15069437, "memory(GiB)": 85.12, "step": 2295, "train_speed(iter/s)": 0.035282 }, { "acc": 0.63679175, "epoch": 1.8, "learning_rate": 9.7280887700801e-05, "loss": 1.2136096, "memory(GiB)": 85.12, "step": 2300, "train_speed(iter/s)": 0.035283 }, { "acc": 0.64580112, "epoch": 1.81, "learning_rate": 9.726329693677578e-05, "loss": 1.19345636, "memory(GiB)": 85.12, "step": 2305, "train_speed(iter/s)": 0.035284 }, { "acc": 0.65686107, "epoch": 1.81, "learning_rate": 9.724565105689432e-05, "loss": 1.13980618, "memory(GiB)": 85.12, "step": 2310, "train_speed(iter/s)": 0.03528 }, { "acc": 0.66553755, "epoch": 1.81, "learning_rate": 9.722795008173427e-05, "loss": 1.1280262, "memory(GiB)": 85.12, "step": 2315, "train_speed(iter/s)": 0.035281 }, { "acc": 0.65331993, "epoch": 1.82, "learning_rate": 9.721019403193753e-05, "loss": 1.16992741, "memory(GiB)": 85.12, "step": 2320, "train_speed(iter/s)": 0.035281 }, { "acc": 0.65903072, "epoch": 1.82, "learning_rate": 9.719238292821022e-05, "loss": 1.15253115, "memory(GiB)": 85.12, "step": 2325, "train_speed(iter/s)": 0.035282 }, { "acc": 0.65842004, "epoch": 1.83, "learning_rate": 9.71745167913227e-05, "loss": 1.15765343, "memory(GiB)": 85.12, "step": 2330, "train_speed(iter/s)": 0.035282 }, { "acc": 0.64939175, "epoch": 1.83, "learning_rate": 9.715659564210944e-05, "loss": 1.1643466, "memory(GiB)": 85.12, "step": 2335, "train_speed(iter/s)": 0.035284 }, { "acc": 0.66922626, "epoch": 1.83, "learning_rate": 9.713861950146912e-05, "loss": 1.1116375, "memory(GiB)": 85.12, "step": 2340, "train_speed(iter/s)": 0.035279 }, { "acc": 0.64719138, "epoch": 1.84, "learning_rate": 9.712058839036451e-05, "loss": 1.20366507, "memory(GiB)": 85.12, "step": 2345, "train_speed(iter/s)": 0.035276 }, { "acc": 0.65068803, "epoch": 1.84, "learning_rate": 9.71025023298225e-05, "loss": 1.14096384, "memory(GiB)": 85.12, "step": 2350, "train_speed(iter/s)": 0.035271 }, { "acc": 0.6618053, "epoch": 1.85, "learning_rate": 9.708436134093408e-05, "loss": 1.13345575, "memory(GiB)": 85.12, "step": 2355, "train_speed(iter/s)": 0.035273 }, { "acc": 0.65803671, "epoch": 1.85, "learning_rate": 9.706616544485428e-05, "loss": 1.14154787, "memory(GiB)": 85.12, "step": 2360, "train_speed(iter/s)": 0.035274 }, { "acc": 0.65964813, "epoch": 1.85, "learning_rate": 9.70479146628021e-05, "loss": 1.11802235, "memory(GiB)": 85.12, "step": 2365, "train_speed(iter/s)": 0.035275 }, { "acc": 0.6604876, "epoch": 1.86, "learning_rate": 9.702960901606064e-05, "loss": 1.1343956, "memory(GiB)": 85.12, "step": 2370, "train_speed(iter/s)": 0.035273 }, { "acc": 0.66209579, "epoch": 1.86, "learning_rate": 9.701124852597692e-05, "loss": 1.11040306, "memory(GiB)": 85.12, "step": 2375, "train_speed(iter/s)": 0.035271 }, { "acc": 0.66260943, "epoch": 1.87, "learning_rate": 9.699283321396195e-05, "loss": 1.12860765, "memory(GiB)": 85.12, "step": 2380, "train_speed(iter/s)": 0.035273 }, { "acc": 0.66986876, "epoch": 1.87, "learning_rate": 9.697436310149066e-05, "loss": 1.09946795, "memory(GiB)": 85.12, "step": 2385, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67240357, "epoch": 1.87, "learning_rate": 9.695583821010184e-05, "loss": 1.12718344, "memory(GiB)": 85.12, "step": 2390, "train_speed(iter/s)": 0.035275 }, { "acc": 0.65421391, "epoch": 1.88, "learning_rate": 9.693725856139824e-05, "loss": 1.14639235, "memory(GiB)": 85.12, "step": 2395, "train_speed(iter/s)": 0.035276 }, { "acc": 0.64448304, "epoch": 1.88, "learning_rate": 9.69186241770464e-05, "loss": 1.21207218, "memory(GiB)": 85.12, "step": 2400, "train_speed(iter/s)": 0.035275 }, { "acc": 0.66120443, "epoch": 1.88, "learning_rate": 9.689993507877673e-05, "loss": 1.12814407, "memory(GiB)": 85.12, "step": 2405, "train_speed(iter/s)": 0.035276 }, { "acc": 0.65368838, "epoch": 1.89, "learning_rate": 9.68811912883834e-05, "loss": 1.15181837, "memory(GiB)": 85.12, "step": 2410, "train_speed(iter/s)": 0.035278 }, { "acc": 0.65113912, "epoch": 1.89, "learning_rate": 9.686239282772442e-05, "loss": 1.16465178, "memory(GiB)": 85.12, "step": 2415, "train_speed(iter/s)": 0.035276 }, { "acc": 0.65074034, "epoch": 1.9, "learning_rate": 9.68435397187215e-05, "loss": 1.16231499, "memory(GiB)": 85.12, "step": 2420, "train_speed(iter/s)": 0.035277 }, { "acc": 0.65878544, "epoch": 1.9, "learning_rate": 9.68246319833601e-05, "loss": 1.14520617, "memory(GiB)": 85.12, "step": 2425, "train_speed(iter/s)": 0.035278 }, { "acc": 0.64476914, "epoch": 1.9, "learning_rate": 9.68056696436894e-05, "loss": 1.20143547, "memory(GiB)": 85.12, "step": 2430, "train_speed(iter/s)": 0.035278 }, { "acc": 0.64212346, "epoch": 1.91, "learning_rate": 9.678665272182221e-05, "loss": 1.22368813, "memory(GiB)": 85.12, "step": 2435, "train_speed(iter/s)": 0.035278 }, { "acc": 0.65965805, "epoch": 1.91, "learning_rate": 9.676758123993504e-05, "loss": 1.12663536, "memory(GiB)": 85.12, "step": 2440, "train_speed(iter/s)": 0.035279 }, { "acc": 0.66423464, "epoch": 1.92, "learning_rate": 9.674845522026799e-05, "loss": 1.12610149, "memory(GiB)": 85.12, "step": 2445, "train_speed(iter/s)": 0.03528 }, { "acc": 0.66499667, "epoch": 1.92, "learning_rate": 9.672927468512476e-05, "loss": 1.1370595, "memory(GiB)": 85.12, "step": 2450, "train_speed(iter/s)": 0.035281 }, { "acc": 0.65996475, "epoch": 1.92, "learning_rate": 9.671003965687266e-05, "loss": 1.14243317, "memory(GiB)": 85.12, "step": 2455, "train_speed(iter/s)": 0.035282 }, { "acc": 0.66339087, "epoch": 1.93, "learning_rate": 9.669075015794252e-05, "loss": 1.11444197, "memory(GiB)": 85.12, "step": 2460, "train_speed(iter/s)": 0.035283 }, { "acc": 0.65870614, "epoch": 1.93, "learning_rate": 9.667140621082867e-05, "loss": 1.1119628, "memory(GiB)": 85.12, "step": 2465, "train_speed(iter/s)": 0.035281 }, { "acc": 0.66413293, "epoch": 1.94, "learning_rate": 9.665200783808897e-05, "loss": 1.12356319, "memory(GiB)": 85.12, "step": 2470, "train_speed(iter/s)": 0.035279 }, { "acc": 0.65245867, "epoch": 1.94, "learning_rate": 9.663255506234474e-05, "loss": 1.15396376, "memory(GiB)": 85.12, "step": 2475, "train_speed(iter/s)": 0.035281 }, { "acc": 0.65678835, "epoch": 1.94, "learning_rate": 9.661304790628073e-05, "loss": 1.16068563, "memory(GiB)": 85.12, "step": 2480, "train_speed(iter/s)": 0.035279 }, { "acc": 0.65423017, "epoch": 1.95, "learning_rate": 9.659348639264512e-05, "loss": 1.16609097, "memory(GiB)": 85.12, "step": 2485, "train_speed(iter/s)": 0.03528 }, { "acc": 0.67275624, "epoch": 1.95, "learning_rate": 9.657387054424945e-05, "loss": 1.09347582, "memory(GiB)": 85.12, "step": 2490, "train_speed(iter/s)": 0.035282 }, { "acc": 0.67117119, "epoch": 1.96, "learning_rate": 9.655420038396868e-05, "loss": 1.11645508, "memory(GiB)": 85.12, "step": 2495, "train_speed(iter/s)": 0.035283 }, { "acc": 0.66794109, "epoch": 1.96, "learning_rate": 9.653447593474102e-05, "loss": 1.10899277, "memory(GiB)": 85.12, "step": 2500, "train_speed(iter/s)": 0.035284 }, { "acc": 0.64761634, "epoch": 1.96, "learning_rate": 9.651469721956807e-05, "loss": 1.18057451, "memory(GiB)": 85.12, "step": 2505, "train_speed(iter/s)": 0.035282 }, { "acc": 0.65636969, "epoch": 1.97, "learning_rate": 9.649486426151468e-05, "loss": 1.15329361, "memory(GiB)": 85.12, "step": 2510, "train_speed(iter/s)": 0.035283 }, { "acc": 0.66316671, "epoch": 1.97, "learning_rate": 9.647497708370894e-05, "loss": 1.13145294, "memory(GiB)": 85.12, "step": 2515, "train_speed(iter/s)": 0.035284 }, { "acc": 0.66438212, "epoch": 1.97, "learning_rate": 9.64550357093422e-05, "loss": 1.10747566, "memory(GiB)": 85.12, "step": 2520, "train_speed(iter/s)": 0.035285 }, { "acc": 0.66867604, "epoch": 1.98, "learning_rate": 9.643504016166897e-05, "loss": 1.10191135, "memory(GiB)": 85.12, "step": 2525, "train_speed(iter/s)": 0.035284 }, { "acc": 0.65627751, "epoch": 1.98, "learning_rate": 9.6414990464007e-05, "loss": 1.14236097, "memory(GiB)": 85.12, "step": 2530, "train_speed(iter/s)": 0.035276 }, { "acc": 0.6637876, "epoch": 1.99, "learning_rate": 9.639488663973708e-05, "loss": 1.13195019, "memory(GiB)": 85.12, "step": 2535, "train_speed(iter/s)": 0.035278 }, { "acc": 0.66524153, "epoch": 1.99, "learning_rate": 9.637472871230322e-05, "loss": 1.11181889, "memory(GiB)": 85.12, "step": 2540, "train_speed(iter/s)": 0.035279 }, { "acc": 0.66480532, "epoch": 1.99, "learning_rate": 9.635451670521249e-05, "loss": 1.11331224, "memory(GiB)": 85.12, "step": 2545, "train_speed(iter/s)": 0.035277 }, { "acc": 0.66729088, "epoch": 2.0, "learning_rate": 9.633425064203503e-05, "loss": 1.09206867, "memory(GiB)": 85.12, "step": 2550, "train_speed(iter/s)": 0.035276 }, { "epoch": 2.0, "eval_acc": 0.6775607312797396, "eval_loss": 1.057321310043335, "eval_runtime": 85.2273, "eval_samples_per_second": 1.091, "eval_steps_per_second": 1.091, "step": 2552 }, { "acc": 0.67183886, "epoch": 2.0, "learning_rate": 9.631393054640398e-05, "loss": 1.07645693, "memory(GiB)": 85.12, "step": 2555, "train_speed(iter/s)": 0.035237 }, { "acc": 0.66320515, "epoch": 2.01, "learning_rate": 9.629355644201553e-05, "loss": 1.10909252, "memory(GiB)": 85.12, "step": 2560, "train_speed(iter/s)": 0.035233 }, { "acc": 0.66873536, "epoch": 2.01, "learning_rate": 9.627312835262885e-05, "loss": 1.09901686, "memory(GiB)": 85.12, "step": 2565, "train_speed(iter/s)": 0.035231 }, { "acc": 0.66333971, "epoch": 2.01, "learning_rate": 9.625264630206602e-05, "loss": 1.11735725, "memory(GiB)": 85.12, "step": 2570, "train_speed(iter/s)": 0.035232 }, { "acc": 0.65929585, "epoch": 2.02, "learning_rate": 9.623211031421212e-05, "loss": 1.12093697, "memory(GiB)": 85.12, "step": 2575, "train_speed(iter/s)": 0.035233 }, { "acc": 0.66388683, "epoch": 2.02, "learning_rate": 9.621152041301507e-05, "loss": 1.11663198, "memory(GiB)": 85.12, "step": 2580, "train_speed(iter/s)": 0.035231 }, { "acc": 0.65557284, "epoch": 2.03, "learning_rate": 9.619087662248569e-05, "loss": 1.13853168, "memory(GiB)": 85.12, "step": 2585, "train_speed(iter/s)": 0.035232 }, { "acc": 0.6557622, "epoch": 2.03, "learning_rate": 9.61701789666976e-05, "loss": 1.14238157, "memory(GiB)": 85.12, "step": 2590, "train_speed(iter/s)": 0.035234 }, { "acc": 0.67138915, "epoch": 2.03, "learning_rate": 9.614942746978733e-05, "loss": 1.0764698, "memory(GiB)": 85.12, "step": 2595, "train_speed(iter/s)": 0.035232 }, { "acc": 0.65261388, "epoch": 2.04, "learning_rate": 9.612862215595406e-05, "loss": 1.1417222, "memory(GiB)": 85.12, "step": 2600, "train_speed(iter/s)": 0.035233 }, { "acc": 0.66247792, "epoch": 2.04, "learning_rate": 9.610776304945986e-05, "loss": 1.13462439, "memory(GiB)": 85.12, "step": 2605, "train_speed(iter/s)": 0.035235 }, { "acc": 0.67750583, "epoch": 2.05, "learning_rate": 9.608685017462944e-05, "loss": 1.0703146, "memory(GiB)": 85.12, "step": 2610, "train_speed(iter/s)": 0.035236 }, { "acc": 0.66992674, "epoch": 2.05, "learning_rate": 9.606588355585025e-05, "loss": 1.10587053, "memory(GiB)": 85.12, "step": 2615, "train_speed(iter/s)": 0.035237 }, { "acc": 0.66086774, "epoch": 2.05, "learning_rate": 9.604486321757242e-05, "loss": 1.13685856, "memory(GiB)": 85.12, "step": 2620, "train_speed(iter/s)": 0.035238 }, { "acc": 0.67727141, "epoch": 2.06, "learning_rate": 9.60237891843087e-05, "loss": 1.06712856, "memory(GiB)": 85.12, "step": 2625, "train_speed(iter/s)": 0.035239 }, { "acc": 0.66415348, "epoch": 2.06, "learning_rate": 9.600266148063448e-05, "loss": 1.16380205, "memory(GiB)": 85.12, "step": 2630, "train_speed(iter/s)": 0.03524 }, { "acc": 0.65921197, "epoch": 2.07, "learning_rate": 9.598148013118771e-05, "loss": 1.11800652, "memory(GiB)": 85.12, "step": 2635, "train_speed(iter/s)": 0.035242 }, { "acc": 0.6564095, "epoch": 2.07, "learning_rate": 9.596024516066893e-05, "loss": 1.14403868, "memory(GiB)": 85.12, "step": 2640, "train_speed(iter/s)": 0.03524 }, { "acc": 0.66070809, "epoch": 2.07, "learning_rate": 9.593895659384117e-05, "loss": 1.1139576, "memory(GiB)": 85.12, "step": 2645, "train_speed(iter/s)": 0.035241 }, { "acc": 0.6555994, "epoch": 2.08, "learning_rate": 9.591761445553e-05, "loss": 1.12918062, "memory(GiB)": 85.12, "step": 2650, "train_speed(iter/s)": 0.035239 }, { "acc": 0.66035252, "epoch": 2.08, "learning_rate": 9.589621877062346e-05, "loss": 1.11460495, "memory(GiB)": 85.12, "step": 2655, "train_speed(iter/s)": 0.03524 }, { "acc": 0.66998568, "epoch": 2.08, "learning_rate": 9.5874769564072e-05, "loss": 1.07866659, "memory(GiB)": 85.12, "step": 2660, "train_speed(iter/s)": 0.035239 }, { "acc": 0.65154204, "epoch": 2.09, "learning_rate": 9.585326686088851e-05, "loss": 1.14091015, "memory(GiB)": 85.12, "step": 2665, "train_speed(iter/s)": 0.035238 }, { "acc": 0.6603961, "epoch": 2.09, "learning_rate": 9.583171068614827e-05, "loss": 1.12223263, "memory(GiB)": 85.12, "step": 2670, "train_speed(iter/s)": 0.035237 }, { "acc": 0.66768351, "epoch": 2.1, "learning_rate": 9.58101010649889e-05, "loss": 1.11046982, "memory(GiB)": 85.12, "step": 2675, "train_speed(iter/s)": 0.035238 }, { "acc": 0.66048141, "epoch": 2.1, "learning_rate": 9.578843802261036e-05, "loss": 1.1147171, "memory(GiB)": 85.12, "step": 2680, "train_speed(iter/s)": 0.035239 }, { "acc": 0.66307225, "epoch": 2.1, "learning_rate": 9.576672158427485e-05, "loss": 1.14018593, "memory(GiB)": 85.12, "step": 2685, "train_speed(iter/s)": 0.03524 }, { "acc": 0.66853991, "epoch": 2.11, "learning_rate": 9.574495177530693e-05, "loss": 1.12644806, "memory(GiB)": 85.12, "step": 2690, "train_speed(iter/s)": 0.035242 }, { "acc": 0.67141595, "epoch": 2.11, "learning_rate": 9.572312862109335e-05, "loss": 1.10125408, "memory(GiB)": 85.12, "step": 2695, "train_speed(iter/s)": 0.035243 }, { "acc": 0.66562076, "epoch": 2.12, "learning_rate": 9.570125214708309e-05, "loss": 1.11102467, "memory(GiB)": 85.12, "step": 2700, "train_speed(iter/s)": 0.035241 }, { "acc": 0.6675756, "epoch": 2.12, "learning_rate": 9.567932237878726e-05, "loss": 1.10656528, "memory(GiB)": 85.12, "step": 2705, "train_speed(iter/s)": 0.035242 }, { "acc": 0.67014441, "epoch": 2.12, "learning_rate": 9.565733934177915e-05, "loss": 1.07535477, "memory(GiB)": 85.12, "step": 2710, "train_speed(iter/s)": 0.035241 }, { "acc": 0.67194867, "epoch": 2.13, "learning_rate": 9.563530306169415e-05, "loss": 1.06938372, "memory(GiB)": 85.12, "step": 2715, "train_speed(iter/s)": 0.035242 }, { "acc": 0.65955586, "epoch": 2.13, "learning_rate": 9.56132135642298e-05, "loss": 1.14082947, "memory(GiB)": 85.12, "step": 2720, "train_speed(iter/s)": 0.035244 }, { "acc": 0.65607781, "epoch": 2.14, "learning_rate": 9.559107087514562e-05, "loss": 1.12005463, "memory(GiB)": 85.12, "step": 2725, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66828775, "epoch": 2.14, "learning_rate": 9.556887502026324e-05, "loss": 1.10912933, "memory(GiB)": 85.12, "step": 2730, "train_speed(iter/s)": 0.035243 }, { "acc": 0.65105276, "epoch": 2.14, "learning_rate": 9.554662602546622e-05, "loss": 1.1439889, "memory(GiB)": 85.12, "step": 2735, "train_speed(iter/s)": 0.035242 }, { "acc": 0.66353316, "epoch": 2.15, "learning_rate": 9.552432391670009e-05, "loss": 1.11523571, "memory(GiB)": 85.12, "step": 2740, "train_speed(iter/s)": 0.035243 }, { "acc": 0.65910487, "epoch": 2.15, "learning_rate": 9.550196871997237e-05, "loss": 1.10913839, "memory(GiB)": 85.12, "step": 2745, "train_speed(iter/s)": 0.035244 }, { "acc": 0.66159306, "epoch": 2.16, "learning_rate": 9.547956046135247e-05, "loss": 1.15435734, "memory(GiB)": 85.12, "step": 2750, "train_speed(iter/s)": 0.035245 }, { "acc": 0.65181875, "epoch": 2.16, "learning_rate": 9.545709916697164e-05, "loss": 1.16566019, "memory(GiB)": 85.12, "step": 2755, "train_speed(iter/s)": 0.035244 }, { "acc": 0.66318617, "epoch": 2.16, "learning_rate": 9.543458486302301e-05, "loss": 1.10774937, "memory(GiB)": 85.12, "step": 2760, "train_speed(iter/s)": 0.035245 }, { "acc": 0.6740345, "epoch": 2.17, "learning_rate": 9.541201757576154e-05, "loss": 1.1062582, "memory(GiB)": 85.12, "step": 2765, "train_speed(iter/s)": 0.035246 }, { "acc": 0.64558797, "epoch": 2.17, "learning_rate": 9.538939733150394e-05, "loss": 1.18151665, "memory(GiB)": 85.12, "step": 2770, "train_speed(iter/s)": 0.035247 }, { "acc": 0.64788785, "epoch": 2.17, "learning_rate": 9.53667241566287e-05, "loss": 1.15782328, "memory(GiB)": 85.12, "step": 2775, "train_speed(iter/s)": 0.035243 }, { "acc": 0.65736341, "epoch": 2.18, "learning_rate": 9.534399807757606e-05, "loss": 1.14570007, "memory(GiB)": 85.12, "step": 2780, "train_speed(iter/s)": 0.035244 }, { "acc": 0.66911092, "epoch": 2.18, "learning_rate": 9.532121912084787e-05, "loss": 1.09435015, "memory(GiB)": 85.12, "step": 2785, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6581212, "epoch": 2.19, "learning_rate": 9.529838731300774e-05, "loss": 1.15303545, "memory(GiB)": 85.12, "step": 2790, "train_speed(iter/s)": 0.035244 }, { "acc": 0.67710958, "epoch": 2.19, "learning_rate": 9.527550268068081e-05, "loss": 1.0725668, "memory(GiB)": 85.12, "step": 2795, "train_speed(iter/s)": 0.035243 }, { "acc": 0.65953379, "epoch": 2.19, "learning_rate": 9.525256525055395e-05, "loss": 1.10951128, "memory(GiB)": 85.12, "step": 2800, "train_speed(iter/s)": 0.035244 }, { "acc": 0.65992632, "epoch": 2.2, "learning_rate": 9.522957504937549e-05, "loss": 1.12102213, "memory(GiB)": 85.12, "step": 2805, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66131835, "epoch": 2.2, "learning_rate": 9.520653210395534e-05, "loss": 1.08475084, "memory(GiB)": 85.12, "step": 2810, "train_speed(iter/s)": 0.035243 }, { "acc": 0.66965961, "epoch": 2.21, "learning_rate": 9.518343644116493e-05, "loss": 1.08618603, "memory(GiB)": 85.12, "step": 2815, "train_speed(iter/s)": 0.035244 }, { "acc": 0.65925741, "epoch": 2.21, "learning_rate": 9.516028808793714e-05, "loss": 1.1315218, "memory(GiB)": 85.12, "step": 2820, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66214266, "epoch": 2.21, "learning_rate": 9.51370870712663e-05, "loss": 1.1035429, "memory(GiB)": 85.12, "step": 2825, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66194062, "epoch": 2.22, "learning_rate": 9.511383341820815e-05, "loss": 1.1275753, "memory(GiB)": 85.12, "step": 2830, "train_speed(iter/s)": 0.035248 }, { "acc": 0.66372776, "epoch": 2.22, "learning_rate": 9.509052715587985e-05, "loss": 1.10870571, "memory(GiB)": 85.12, "step": 2835, "train_speed(iter/s)": 0.035244 }, { "acc": 0.66669927, "epoch": 2.23, "learning_rate": 9.506716831145988e-05, "loss": 1.14299908, "memory(GiB)": 85.12, "step": 2840, "train_speed(iter/s)": 0.035242 }, { "acc": 0.66881795, "epoch": 2.23, "learning_rate": 9.504375691218802e-05, "loss": 1.07950726, "memory(GiB)": 85.12, "step": 2845, "train_speed(iter/s)": 0.035243 }, { "acc": 0.67085314, "epoch": 2.23, "learning_rate": 9.502029298536535e-05, "loss": 1.07588711, "memory(GiB)": 85.12, "step": 2850, "train_speed(iter/s)": 0.035243 }, { "acc": 0.65394874, "epoch": 2.24, "learning_rate": 9.499677655835421e-05, "loss": 1.15423851, "memory(GiB)": 85.12, "step": 2855, "train_speed(iter/s)": 0.035242 }, { "acc": 0.64597754, "epoch": 2.24, "learning_rate": 9.49732076585782e-05, "loss": 1.15842009, "memory(GiB)": 85.12, "step": 2860, "train_speed(iter/s)": 0.035243 }, { "acc": 0.67262759, "epoch": 2.25, "learning_rate": 9.494958631352204e-05, "loss": 1.11735392, "memory(GiB)": 85.12, "step": 2865, "train_speed(iter/s)": 0.035244 }, { "acc": 0.66508193, "epoch": 2.25, "learning_rate": 9.492591255073164e-05, "loss": 1.09670143, "memory(GiB)": 85.12, "step": 2870, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67350874, "epoch": 2.25, "learning_rate": 9.490218639781407e-05, "loss": 1.0795311, "memory(GiB)": 85.12, "step": 2875, "train_speed(iter/s)": 0.035243 }, { "acc": 0.66628647, "epoch": 2.26, "learning_rate": 9.487840788243744e-05, "loss": 1.12455454, "memory(GiB)": 85.12, "step": 2880, "train_speed(iter/s)": 0.035243 }, { "acc": 0.6715724, "epoch": 2.26, "learning_rate": 9.485457703233094e-05, "loss": 1.11749763, "memory(GiB)": 85.12, "step": 2885, "train_speed(iter/s)": 0.035244 }, { "acc": 0.6443604, "epoch": 2.26, "learning_rate": 9.483069387528482e-05, "loss": 1.18035517, "memory(GiB)": 85.12, "step": 2890, "train_speed(iter/s)": 0.035244 }, { "acc": 0.6723536, "epoch": 2.27, "learning_rate": 9.480675843915028e-05, "loss": 1.08084173, "memory(GiB)": 85.12, "step": 2895, "train_speed(iter/s)": 0.035243 }, { "acc": 0.66256552, "epoch": 2.27, "learning_rate": 9.478277075183955e-05, "loss": 1.08778219, "memory(GiB)": 85.12, "step": 2900, "train_speed(iter/s)": 0.035241 }, { "acc": 0.65755138, "epoch": 2.28, "learning_rate": 9.47587308413257e-05, "loss": 1.14270163, "memory(GiB)": 85.12, "step": 2905, "train_speed(iter/s)": 0.03524 }, { "acc": 0.64939547, "epoch": 2.28, "learning_rate": 9.473463873564275e-05, "loss": 1.1697154, "memory(GiB)": 85.12, "step": 2910, "train_speed(iter/s)": 0.035239 }, { "acc": 0.67200222, "epoch": 2.28, "learning_rate": 9.471049446288564e-05, "loss": 1.12756548, "memory(GiB)": 85.12, "step": 2915, "train_speed(iter/s)": 0.03524 }, { "acc": 0.66369948, "epoch": 2.29, "learning_rate": 9.468629805121005e-05, "loss": 1.134799, "memory(GiB)": 85.12, "step": 2920, "train_speed(iter/s)": 0.03524 }, { "acc": 0.65741105, "epoch": 2.29, "learning_rate": 9.466204952883252e-05, "loss": 1.12181864, "memory(GiB)": 85.12, "step": 2925, "train_speed(iter/s)": 0.035237 }, { "acc": 0.67293525, "epoch": 2.3, "learning_rate": 9.463774892403033e-05, "loss": 1.08120308, "memory(GiB)": 85.12, "step": 2930, "train_speed(iter/s)": 0.035235 }, { "acc": 0.65945473, "epoch": 2.3, "learning_rate": 9.461339626514153e-05, "loss": 1.12144451, "memory(GiB)": 85.12, "step": 2935, "train_speed(iter/s)": 0.035236 }, { "acc": 0.65807548, "epoch": 2.3, "learning_rate": 9.458899158056482e-05, "loss": 1.1355731, "memory(GiB)": 85.12, "step": 2940, "train_speed(iter/s)": 0.035237 }, { "acc": 0.66247325, "epoch": 2.31, "learning_rate": 9.456453489875963e-05, "loss": 1.11652193, "memory(GiB)": 85.12, "step": 2945, "train_speed(iter/s)": 0.035238 }, { "acc": 0.67103438, "epoch": 2.31, "learning_rate": 9.454002624824598e-05, "loss": 1.10103321, "memory(GiB)": 85.12, "step": 2950, "train_speed(iter/s)": 0.035239 }, { "acc": 0.6699152, "epoch": 2.32, "learning_rate": 9.451546565760452e-05, "loss": 1.07716627, "memory(GiB)": 85.12, "step": 2955, "train_speed(iter/s)": 0.03524 }, { "acc": 0.66332569, "epoch": 2.32, "learning_rate": 9.449085315547645e-05, "loss": 1.1355279, "memory(GiB)": 85.12, "step": 2960, "train_speed(iter/s)": 0.035242 }, { "acc": 0.6688993, "epoch": 2.32, "learning_rate": 9.446618877056353e-05, "loss": 1.08996553, "memory(GiB)": 85.12, "step": 2965, "train_speed(iter/s)": 0.035243 }, { "acc": 0.64563627, "epoch": 2.33, "learning_rate": 9.444147253162799e-05, "loss": 1.18632555, "memory(GiB)": 85.12, "step": 2970, "train_speed(iter/s)": 0.035244 }, { "acc": 0.66807208, "epoch": 2.33, "learning_rate": 9.441670446749253e-05, "loss": 1.09000006, "memory(GiB)": 85.12, "step": 2975, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67623868, "epoch": 2.34, "learning_rate": 9.439188460704035e-05, "loss": 1.06963615, "memory(GiB)": 85.12, "step": 2980, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67707462, "epoch": 2.34, "learning_rate": 9.436701297921499e-05, "loss": 1.06638432, "memory(GiB)": 85.12, "step": 2985, "train_speed(iter/s)": 0.035247 }, { "acc": 0.64844503, "epoch": 2.34, "learning_rate": 9.434208961302037e-05, "loss": 1.15902214, "memory(GiB)": 85.12, "step": 2990, "train_speed(iter/s)": 0.035248 }, { "acc": 0.66300759, "epoch": 2.35, "learning_rate": 9.431711453752074e-05, "loss": 1.11802444, "memory(GiB)": 85.12, "step": 2995, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66209593, "epoch": 2.35, "learning_rate": 9.429208778184066e-05, "loss": 1.12048893, "memory(GiB)": 85.12, "step": 3000, "train_speed(iter/s)": 0.035248 }, { "acc": 0.65673513, "epoch": 2.36, "learning_rate": 9.426700937516498e-05, "loss": 1.12821989, "memory(GiB)": 85.12, "step": 3005, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67200632, "epoch": 2.36, "learning_rate": 9.424187934673872e-05, "loss": 1.08947983, "memory(GiB)": 85.12, "step": 3010, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66114106, "epoch": 2.36, "learning_rate": 9.421669772586716e-05, "loss": 1.1081459, "memory(GiB)": 85.12, "step": 3015, "train_speed(iter/s)": 0.035248 }, { "acc": 0.6648818, "epoch": 2.37, "learning_rate": 9.419146454191572e-05, "loss": 1.09442472, "memory(GiB)": 85.12, "step": 3020, "train_speed(iter/s)": 0.03525 }, { "acc": 0.65600429, "epoch": 2.37, "learning_rate": 9.416617982430994e-05, "loss": 1.11577091, "memory(GiB)": 85.12, "step": 3025, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66453032, "epoch": 2.37, "learning_rate": 9.414084360253547e-05, "loss": 1.11425781, "memory(GiB)": 85.12, "step": 3030, "train_speed(iter/s)": 0.035252 }, { "acc": 0.65060616, "epoch": 2.38, "learning_rate": 9.411545590613803e-05, "loss": 1.13694382, "memory(GiB)": 85.12, "step": 3035, "train_speed(iter/s)": 0.03525 }, { "acc": 0.65530295, "epoch": 2.38, "learning_rate": 9.409001676472335e-05, "loss": 1.16810818, "memory(GiB)": 85.12, "step": 3040, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66546168, "epoch": 2.39, "learning_rate": 9.406452620795714e-05, "loss": 1.11694679, "memory(GiB)": 85.12, "step": 3045, "train_speed(iter/s)": 0.035249 }, { "acc": 0.673035, "epoch": 2.39, "learning_rate": 9.40389842655651e-05, "loss": 1.067171, "memory(GiB)": 85.12, "step": 3050, "train_speed(iter/s)": 0.035249 }, { "acc": 0.6619174, "epoch": 2.39, "learning_rate": 9.401339096733283e-05, "loss": 1.14351206, "memory(GiB)": 85.12, "step": 3055, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66858106, "epoch": 2.4, "learning_rate": 9.398774634310583e-05, "loss": 1.10405941, "memory(GiB)": 85.12, "step": 3060, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66761208, "epoch": 2.4, "learning_rate": 9.396205042278946e-05, "loss": 1.09991446, "memory(GiB)": 85.12, "step": 3065, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66788816, "epoch": 2.41, "learning_rate": 9.393630323634888e-05, "loss": 1.10259113, "memory(GiB)": 85.12, "step": 3070, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66071186, "epoch": 2.41, "learning_rate": 9.391050481380903e-05, "loss": 1.13652515, "memory(GiB)": 85.12, "step": 3075, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66306629, "epoch": 2.41, "learning_rate": 9.388465518525464e-05, "loss": 1.1402585, "memory(GiB)": 85.12, "step": 3080, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66560416, "epoch": 2.42, "learning_rate": 9.385875438083008e-05, "loss": 1.12412586, "memory(GiB)": 85.12, "step": 3085, "train_speed(iter/s)": 0.035253 }, { "acc": 0.65981436, "epoch": 2.42, "learning_rate": 9.383280243073948e-05, "loss": 1.12645855, "memory(GiB)": 85.12, "step": 3090, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66078367, "epoch": 2.43, "learning_rate": 9.380679936524656e-05, "loss": 1.13504505, "memory(GiB)": 85.12, "step": 3095, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66190977, "epoch": 2.43, "learning_rate": 9.378074521467469e-05, "loss": 1.1244626, "memory(GiB)": 85.12, "step": 3100, "train_speed(iter/s)": 0.035252 }, { "acc": 0.65778189, "epoch": 2.43, "learning_rate": 9.375464000940676e-05, "loss": 1.16129856, "memory(GiB)": 85.12, "step": 3105, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66200366, "epoch": 2.44, "learning_rate": 9.37284837798852e-05, "loss": 1.12543259, "memory(GiB)": 85.12, "step": 3110, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66628103, "epoch": 2.44, "learning_rate": 9.370227655661203e-05, "loss": 1.11405783, "memory(GiB)": 85.12, "step": 3115, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66346598, "epoch": 2.45, "learning_rate": 9.367601837014864e-05, "loss": 1.11039734, "memory(GiB)": 85.12, "step": 3120, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66284456, "epoch": 2.45, "learning_rate": 9.364970925111587e-05, "loss": 1.14555759, "memory(GiB)": 85.12, "step": 3125, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65998135, "epoch": 2.45, "learning_rate": 9.362334923019397e-05, "loss": 1.13087349, "memory(GiB)": 85.12, "step": 3130, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66171045, "epoch": 2.46, "learning_rate": 9.359693833812255e-05, "loss": 1.13765631, "memory(GiB)": 85.12, "step": 3135, "train_speed(iter/s)": 0.035257 }, { "acc": 0.66532288, "epoch": 2.46, "learning_rate": 9.357047660570056e-05, "loss": 1.11209698, "memory(GiB)": 85.12, "step": 3140, "train_speed(iter/s)": 0.035258 }, { "acc": 0.67781734, "epoch": 2.46, "learning_rate": 9.354396406378618e-05, "loss": 1.05338039, "memory(GiB)": 85.12, "step": 3145, "train_speed(iter/s)": 0.035259 }, { "acc": 0.66491776, "epoch": 2.47, "learning_rate": 9.35174007432969e-05, "loss": 1.1117816, "memory(GiB)": 85.12, "step": 3150, "train_speed(iter/s)": 0.035259 }, { "acc": 0.65880842, "epoch": 2.47, "learning_rate": 9.34907866752094e-05, "loss": 1.11286173, "memory(GiB)": 85.12, "step": 3155, "train_speed(iter/s)": 0.03526 }, { "acc": 0.66765223, "epoch": 2.48, "learning_rate": 9.346412189055955e-05, "loss": 1.10158033, "memory(GiB)": 85.12, "step": 3160, "train_speed(iter/s)": 0.035261 }, { "acc": 0.65851316, "epoch": 2.48, "learning_rate": 9.343740642044232e-05, "loss": 1.1240366, "memory(GiB)": 85.12, "step": 3165, "train_speed(iter/s)": 0.035262 }, { "acc": 0.66850886, "epoch": 2.48, "learning_rate": 9.341064029601188e-05, "loss": 1.09285021, "memory(GiB)": 85.12, "step": 3170, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67183051, "epoch": 2.49, "learning_rate": 9.338382354848135e-05, "loss": 1.08079424, "memory(GiB)": 85.12, "step": 3175, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67254267, "epoch": 2.49, "learning_rate": 9.335695620912298e-05, "loss": 1.10049, "memory(GiB)": 85.12, "step": 3180, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67040033, "epoch": 2.5, "learning_rate": 9.333003830926799e-05, "loss": 1.09397783, "memory(GiB)": 85.12, "step": 3185, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67311201, "epoch": 2.5, "learning_rate": 9.330306988030651e-05, "loss": 1.07896299, "memory(GiB)": 85.12, "step": 3190, "train_speed(iter/s)": 0.03526 }, { "acc": 0.67690806, "epoch": 2.5, "learning_rate": 9.327605095368769e-05, "loss": 1.10143909, "memory(GiB)": 85.12, "step": 3195, "train_speed(iter/s)": 0.035259 }, { "acc": 0.66498342, "epoch": 2.51, "learning_rate": 9.324898156091948e-05, "loss": 1.11493244, "memory(GiB)": 85.12, "step": 3200, "train_speed(iter/s)": 0.03526 }, { "acc": 0.67564311, "epoch": 2.51, "learning_rate": 9.322186173356873e-05, "loss": 1.08120804, "memory(GiB)": 85.12, "step": 3205, "train_speed(iter/s)": 0.035261 }, { "acc": 0.66903639, "epoch": 2.52, "learning_rate": 9.31946915032611e-05, "loss": 1.08918238, "memory(GiB)": 85.12, "step": 3210, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67794003, "epoch": 2.52, "learning_rate": 9.316747090168101e-05, "loss": 1.07291193, "memory(GiB)": 85.12, "step": 3215, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67113781, "epoch": 2.52, "learning_rate": 9.314019996057161e-05, "loss": 1.08330698, "memory(GiB)": 85.12, "step": 3220, "train_speed(iter/s)": 0.035264 }, { "acc": 0.66346536, "epoch": 2.53, "learning_rate": 9.31128787117348e-05, "loss": 1.13062449, "memory(GiB)": 85.12, "step": 3225, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66941795, "epoch": 2.53, "learning_rate": 9.308550718703111e-05, "loss": 1.11006641, "memory(GiB)": 85.12, "step": 3230, "train_speed(iter/s)": 0.035263 }, { "acc": 0.65443735, "epoch": 2.54, "learning_rate": 9.305808541837969e-05, "loss": 1.12953062, "memory(GiB)": 85.12, "step": 3235, "train_speed(iter/s)": 0.035264 }, { "acc": 0.66207342, "epoch": 2.54, "learning_rate": 9.30306134377583e-05, "loss": 1.10449133, "memory(GiB)": 85.12, "step": 3240, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67379289, "epoch": 2.54, "learning_rate": 9.300309127720326e-05, "loss": 1.07863102, "memory(GiB)": 85.12, "step": 3245, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66185699, "epoch": 2.55, "learning_rate": 9.297551896880938e-05, "loss": 1.10122509, "memory(GiB)": 85.12, "step": 3250, "train_speed(iter/s)": 0.035265 }, { "acc": 0.66234708, "epoch": 2.55, "learning_rate": 9.294789654473002e-05, "loss": 1.14223385, "memory(GiB)": 85.12, "step": 3255, "train_speed(iter/s)": 0.035263 }, { "acc": 0.65402446, "epoch": 2.55, "learning_rate": 9.292022403717688e-05, "loss": 1.13783741, "memory(GiB)": 85.12, "step": 3260, "train_speed(iter/s)": 0.035262 }, { "acc": 0.65600824, "epoch": 2.56, "learning_rate": 9.289250147842014e-05, "loss": 1.15849085, "memory(GiB)": 85.12, "step": 3265, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66205072, "epoch": 2.56, "learning_rate": 9.286472890078832e-05, "loss": 1.1514534, "memory(GiB)": 85.12, "step": 3270, "train_speed(iter/s)": 0.035263 }, { "acc": 0.6654716, "epoch": 2.57, "learning_rate": 9.283690633666826e-05, "loss": 1.11500664, "memory(GiB)": 85.12, "step": 3275, "train_speed(iter/s)": 0.035262 }, { "acc": 0.65892515, "epoch": 2.57, "learning_rate": 9.280903381850511e-05, "loss": 1.13781528, "memory(GiB)": 85.12, "step": 3280, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66229644, "epoch": 2.57, "learning_rate": 9.278111137880228e-05, "loss": 1.11094999, "memory(GiB)": 85.12, "step": 3285, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67452283, "epoch": 2.58, "learning_rate": 9.275313905012135e-05, "loss": 1.0776885, "memory(GiB)": 85.12, "step": 3290, "train_speed(iter/s)": 0.035262 }, { "acc": 0.66590748, "epoch": 2.58, "learning_rate": 9.272511686508215e-05, "loss": 1.08773432, "memory(GiB)": 85.12, "step": 3295, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67609005, "epoch": 2.59, "learning_rate": 9.269704485636259e-05, "loss": 1.06893425, "memory(GiB)": 85.12, "step": 3300, "train_speed(iter/s)": 0.035264 }, { "acc": 0.66203904, "epoch": 2.59, "learning_rate": 9.26689230566987e-05, "loss": 1.13459358, "memory(GiB)": 85.12, "step": 3305, "train_speed(iter/s)": 0.035265 }, { "acc": 0.64897304, "epoch": 2.59, "learning_rate": 9.264075149888459e-05, "loss": 1.15744686, "memory(GiB)": 85.12, "step": 3310, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66094275, "epoch": 2.6, "learning_rate": 9.261253021577236e-05, "loss": 1.15568581, "memory(GiB)": 85.12, "step": 3315, "train_speed(iter/s)": 0.035265 }, { "acc": 0.66136718, "epoch": 2.6, "learning_rate": 9.258425924027212e-05, "loss": 1.10611639, "memory(GiB)": 85.12, "step": 3320, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66478972, "epoch": 2.61, "learning_rate": 9.255593860535194e-05, "loss": 1.10232067, "memory(GiB)": 85.12, "step": 3325, "train_speed(iter/s)": 0.035264 }, { "acc": 0.65793505, "epoch": 2.61, "learning_rate": 9.252756834403778e-05, "loss": 1.14366322, "memory(GiB)": 85.12, "step": 3330, "train_speed(iter/s)": 0.035261 }, { "acc": 0.66237917, "epoch": 2.61, "learning_rate": 9.249914848941348e-05, "loss": 1.12491646, "memory(GiB)": 85.12, "step": 3335, "train_speed(iter/s)": 0.035262 }, { "acc": 0.66611323, "epoch": 2.62, "learning_rate": 9.24706790746207e-05, "loss": 1.09040012, "memory(GiB)": 85.12, "step": 3340, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66260924, "epoch": 2.62, "learning_rate": 9.244216013285894e-05, "loss": 1.10959892, "memory(GiB)": 85.12, "step": 3345, "train_speed(iter/s)": 0.035264 }, { "acc": 0.65978012, "epoch": 2.63, "learning_rate": 9.241359169738537e-05, "loss": 1.11563673, "memory(GiB)": 85.12, "step": 3350, "train_speed(iter/s)": 0.035265 }, { "acc": 0.66533618, "epoch": 2.63, "learning_rate": 9.238497380151495e-05, "loss": 1.10536203, "memory(GiB)": 85.12, "step": 3355, "train_speed(iter/s)": 0.035264 }, { "acc": 0.65439553, "epoch": 2.63, "learning_rate": 9.235630647862031e-05, "loss": 1.1373312, "memory(GiB)": 85.12, "step": 3360, "train_speed(iter/s)": 0.035262 }, { "acc": 0.66412206, "epoch": 2.64, "learning_rate": 9.232758976213167e-05, "loss": 1.09817734, "memory(GiB)": 85.12, "step": 3365, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66531549, "epoch": 2.64, "learning_rate": 9.229882368553692e-05, "loss": 1.10946426, "memory(GiB)": 85.12, "step": 3370, "train_speed(iter/s)": 0.035264 }, { "acc": 0.66333899, "epoch": 2.64, "learning_rate": 9.227000828238146e-05, "loss": 1.0850071, "memory(GiB)": 85.12, "step": 3375, "train_speed(iter/s)": 0.035265 }, { "acc": 0.66884055, "epoch": 2.65, "learning_rate": 9.224114358626823e-05, "loss": 1.09241247, "memory(GiB)": 85.12, "step": 3380, "train_speed(iter/s)": 0.035266 }, { "acc": 0.6862587, "epoch": 2.65, "learning_rate": 9.221222963085765e-05, "loss": 1.06248035, "memory(GiB)": 85.12, "step": 3385, "train_speed(iter/s)": 0.035267 }, { "acc": 0.66075082, "epoch": 2.66, "learning_rate": 9.218326644986758e-05, "loss": 1.1324152, "memory(GiB)": 85.12, "step": 3390, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67370081, "epoch": 2.66, "learning_rate": 9.215425407707329e-05, "loss": 1.08881779, "memory(GiB)": 85.12, "step": 3395, "train_speed(iter/s)": 0.035264 }, { "acc": 0.65856786, "epoch": 2.66, "learning_rate": 9.212519254630742e-05, "loss": 1.12874718, "memory(GiB)": 85.12, "step": 3400, "train_speed(iter/s)": 0.035265 }, { "acc": 0.65734329, "epoch": 2.67, "learning_rate": 9.20960818914599e-05, "loss": 1.15164032, "memory(GiB)": 85.12, "step": 3405, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67054696, "epoch": 2.67, "learning_rate": 9.206692214647803e-05, "loss": 1.10470772, "memory(GiB)": 85.12, "step": 3410, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67578359, "epoch": 2.68, "learning_rate": 9.203771334536626e-05, "loss": 1.08748617, "memory(GiB)": 85.12, "step": 3415, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66613045, "epoch": 2.68, "learning_rate": 9.200845552218626e-05, "loss": 1.12501793, "memory(GiB)": 85.12, "step": 3420, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66724539, "epoch": 2.68, "learning_rate": 9.197914871105696e-05, "loss": 1.11535177, "memory(GiB)": 85.12, "step": 3425, "train_speed(iter/s)": 0.035265 }, { "acc": 0.66614714, "epoch": 2.69, "learning_rate": 9.194979294615432e-05, "loss": 1.11667767, "memory(GiB)": 85.12, "step": 3430, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67240911, "epoch": 2.69, "learning_rate": 9.192038826171138e-05, "loss": 1.10492306, "memory(GiB)": 85.12, "step": 3435, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67085233, "epoch": 2.7, "learning_rate": 9.189093469201833e-05, "loss": 1.11214399, "memory(GiB)": 85.12, "step": 3440, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66316462, "epoch": 2.7, "learning_rate": 9.186143227142225e-05, "loss": 1.11524057, "memory(GiB)": 85.12, "step": 3445, "train_speed(iter/s)": 0.035267 }, { "acc": 0.67732773, "epoch": 2.7, "learning_rate": 9.183188103432729e-05, "loss": 1.06655407, "memory(GiB)": 85.12, "step": 3450, "train_speed(iter/s)": 0.035268 }, { "acc": 0.65240765, "epoch": 2.71, "learning_rate": 9.180228101519443e-05, "loss": 1.15858974, "memory(GiB)": 85.12, "step": 3455, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66337948, "epoch": 2.71, "learning_rate": 9.17726322485416e-05, "loss": 1.12186775, "memory(GiB)": 85.12, "step": 3460, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67122927, "epoch": 2.72, "learning_rate": 9.174293476894356e-05, "loss": 1.08822432, "memory(GiB)": 85.12, "step": 3465, "train_speed(iter/s)": 0.035266 }, { "acc": 0.65668936, "epoch": 2.72, "learning_rate": 9.171318861103188e-05, "loss": 1.14420862, "memory(GiB)": 85.12, "step": 3470, "train_speed(iter/s)": 0.035267 }, { "acc": 0.65638585, "epoch": 2.72, "learning_rate": 9.16833938094949e-05, "loss": 1.13455896, "memory(GiB)": 85.12, "step": 3475, "train_speed(iter/s)": 0.035268 }, { "acc": 0.68326602, "epoch": 2.73, "learning_rate": 9.165355039907766e-05, "loss": 1.05959072, "memory(GiB)": 85.12, "step": 3480, "train_speed(iter/s)": 0.035269 }, { "acc": 0.64972272, "epoch": 2.73, "learning_rate": 9.162365841458192e-05, "loss": 1.13724003, "memory(GiB)": 85.12, "step": 3485, "train_speed(iter/s)": 0.035268 }, { "acc": 0.66329112, "epoch": 2.74, "learning_rate": 9.159371789086606e-05, "loss": 1.10761976, "memory(GiB)": 85.12, "step": 3490, "train_speed(iter/s)": 0.035269 }, { "acc": 0.65560665, "epoch": 2.74, "learning_rate": 9.156372886284507e-05, "loss": 1.14408922, "memory(GiB)": 85.12, "step": 3495, "train_speed(iter/s)": 0.03527 }, { "acc": 0.66591907, "epoch": 2.74, "learning_rate": 9.15336913654905e-05, "loss": 1.1079567, "memory(GiB)": 85.12, "step": 3500, "train_speed(iter/s)": 0.035271 }, { "acc": 0.65924816, "epoch": 2.75, "learning_rate": 9.150360543383042e-05, "loss": 1.12748995, "memory(GiB)": 85.12, "step": 3505, "train_speed(iter/s)": 0.035271 }, { "acc": 0.66098423, "epoch": 2.75, "learning_rate": 9.147347110294941e-05, "loss": 1.13659554, "memory(GiB)": 85.12, "step": 3510, "train_speed(iter/s)": 0.035272 }, { "acc": 0.66335301, "epoch": 2.75, "learning_rate": 9.144328840798848e-05, "loss": 1.09939146, "memory(GiB)": 85.12, "step": 3515, "train_speed(iter/s)": 0.035271 }, { "acc": 0.65486746, "epoch": 2.76, "learning_rate": 9.141305738414499e-05, "loss": 1.14847898, "memory(GiB)": 85.12, "step": 3520, "train_speed(iter/s)": 0.035271 }, { "acc": 0.65917635, "epoch": 2.76, "learning_rate": 9.138277806667271e-05, "loss": 1.14824829, "memory(GiB)": 85.12, "step": 3525, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67288303, "epoch": 2.77, "learning_rate": 9.135245049088173e-05, "loss": 1.05631142, "memory(GiB)": 85.12, "step": 3530, "train_speed(iter/s)": 0.035268 }, { "acc": 0.67359824, "epoch": 2.77, "learning_rate": 9.132207469213836e-05, "loss": 1.06311998, "memory(GiB)": 85.12, "step": 3535, "train_speed(iter/s)": 0.035269 }, { "acc": 0.6569242, "epoch": 2.77, "learning_rate": 9.129165070586523e-05, "loss": 1.12648764, "memory(GiB)": 85.12, "step": 3540, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67112989, "epoch": 2.78, "learning_rate": 9.12611785675411e-05, "loss": 1.10996456, "memory(GiB)": 85.12, "step": 3545, "train_speed(iter/s)": 0.035271 }, { "acc": 0.66401305, "epoch": 2.78, "learning_rate": 9.123065831270091e-05, "loss": 1.11186533, "memory(GiB)": 85.12, "step": 3550, "train_speed(iter/s)": 0.035267 }, { "acc": 0.6686008, "epoch": 2.79, "learning_rate": 9.120008997693569e-05, "loss": 1.08463211, "memory(GiB)": 85.12, "step": 3555, "train_speed(iter/s)": 0.035268 }, { "acc": 0.66068683, "epoch": 2.79, "learning_rate": 9.116947359589255e-05, "loss": 1.12286921, "memory(GiB)": 85.12, "step": 3560, "train_speed(iter/s)": 0.035269 }, { "acc": 0.67136216, "epoch": 2.79, "learning_rate": 9.113880920527463e-05, "loss": 1.10150156, "memory(GiB)": 85.12, "step": 3565, "train_speed(iter/s)": 0.03527 }, { "acc": 0.6669219, "epoch": 2.8, "learning_rate": 9.110809684084107e-05, "loss": 1.11419725, "memory(GiB)": 85.12, "step": 3570, "train_speed(iter/s)": 0.03527 }, { "acc": 0.65082202, "epoch": 2.8, "learning_rate": 9.107733653840692e-05, "loss": 1.17740231, "memory(GiB)": 85.12, "step": 3575, "train_speed(iter/s)": 0.035269 }, { "acc": 0.66411786, "epoch": 2.81, "learning_rate": 9.104652833384317e-05, "loss": 1.10019693, "memory(GiB)": 85.12, "step": 3580, "train_speed(iter/s)": 0.035268 }, { "acc": 0.67525086, "epoch": 2.81, "learning_rate": 9.101567226307664e-05, "loss": 1.06568289, "memory(GiB)": 85.12, "step": 3585, "train_speed(iter/s)": 0.035269 }, { "acc": 0.66854186, "epoch": 2.81, "learning_rate": 9.098476836208997e-05, "loss": 1.10631943, "memory(GiB)": 85.12, "step": 3590, "train_speed(iter/s)": 0.03527 }, { "acc": 0.66509161, "epoch": 2.82, "learning_rate": 9.095381666692164e-05, "loss": 1.10477247, "memory(GiB)": 85.12, "step": 3595, "train_speed(iter/s)": 0.035271 }, { "acc": 0.66377773, "epoch": 2.82, "learning_rate": 9.092281721366575e-05, "loss": 1.09822521, "memory(GiB)": 85.12, "step": 3600, "train_speed(iter/s)": 0.035271 }, { "acc": 0.66034365, "epoch": 2.83, "learning_rate": 9.089177003847218e-05, "loss": 1.15647116, "memory(GiB)": 85.12, "step": 3605, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67322073, "epoch": 2.83, "learning_rate": 9.086067517754646e-05, "loss": 1.09445295, "memory(GiB)": 85.12, "step": 3610, "train_speed(iter/s)": 0.035272 }, { "acc": 0.6580318, "epoch": 2.83, "learning_rate": 9.082953266714968e-05, "loss": 1.14048214, "memory(GiB)": 85.12, "step": 3615, "train_speed(iter/s)": 0.035273 }, { "acc": 0.66556892, "epoch": 2.84, "learning_rate": 9.079834254359854e-05, "loss": 1.10402184, "memory(GiB)": 85.12, "step": 3620, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67437925, "epoch": 2.84, "learning_rate": 9.076710484326522e-05, "loss": 1.09448032, "memory(GiB)": 85.12, "step": 3625, "train_speed(iter/s)": 0.035275 }, { "acc": 0.66442852, "epoch": 2.84, "learning_rate": 9.073581960257742e-05, "loss": 1.14604816, "memory(GiB)": 85.12, "step": 3630, "train_speed(iter/s)": 0.035277 }, { "acc": 0.66115713, "epoch": 2.85, "learning_rate": 9.070448685801829e-05, "loss": 1.07533693, "memory(GiB)": 85.12, "step": 3635, "train_speed(iter/s)": 0.035277 }, { "acc": 0.67220783, "epoch": 2.85, "learning_rate": 9.067310664612631e-05, "loss": 1.11974783, "memory(GiB)": 85.12, "step": 3640, "train_speed(iter/s)": 0.035278 }, { "acc": 0.67338705, "epoch": 2.86, "learning_rate": 9.06416790034954e-05, "loss": 1.11144781, "memory(GiB)": 85.12, "step": 3645, "train_speed(iter/s)": 0.035277 }, { "acc": 0.66914973, "epoch": 2.86, "learning_rate": 9.06102039667747e-05, "loss": 1.08288326, "memory(GiB)": 85.12, "step": 3650, "train_speed(iter/s)": 0.035277 }, { "acc": 0.66724291, "epoch": 2.86, "learning_rate": 9.057868157266873e-05, "loss": 1.09231892, "memory(GiB)": 85.12, "step": 3655, "train_speed(iter/s)": 0.035278 }, { "acc": 0.66573162, "epoch": 2.87, "learning_rate": 9.054711185793712e-05, "loss": 1.13242044, "memory(GiB)": 85.12, "step": 3660, "train_speed(iter/s)": 0.035279 }, { "acc": 0.66399899, "epoch": 2.87, "learning_rate": 9.051549485939472e-05, "loss": 1.08897943, "memory(GiB)": 85.12, "step": 3665, "train_speed(iter/s)": 0.03528 }, { "acc": 0.65977526, "epoch": 2.88, "learning_rate": 9.048383061391159e-05, "loss": 1.12547369, "memory(GiB)": 85.12, "step": 3670, "train_speed(iter/s)": 0.035281 }, { "acc": 0.65447931, "epoch": 2.88, "learning_rate": 9.045211915841279e-05, "loss": 1.16677542, "memory(GiB)": 85.12, "step": 3675, "train_speed(iter/s)": 0.035281 }, { "acc": 0.67030063, "epoch": 2.88, "learning_rate": 9.04203605298785e-05, "loss": 1.12552681, "memory(GiB)": 85.12, "step": 3680, "train_speed(iter/s)": 0.035282 }, { "acc": 0.6612977, "epoch": 2.89, "learning_rate": 9.038855476534385e-05, "loss": 1.1319479, "memory(GiB)": 85.12, "step": 3685, "train_speed(iter/s)": 0.035281 }, { "acc": 0.6624711, "epoch": 2.89, "learning_rate": 9.035670190189902e-05, "loss": 1.09792414, "memory(GiB)": 85.12, "step": 3690, "train_speed(iter/s)": 0.03528 }, { "acc": 0.67158294, "epoch": 2.9, "learning_rate": 9.032480197668902e-05, "loss": 1.10716963, "memory(GiB)": 85.12, "step": 3695, "train_speed(iter/s)": 0.035281 }, { "acc": 0.66656246, "epoch": 2.9, "learning_rate": 9.029285502691382e-05, "loss": 1.1186574, "memory(GiB)": 85.12, "step": 3700, "train_speed(iter/s)": 0.035282 }, { "acc": 0.66599183, "epoch": 2.9, "learning_rate": 9.026086108982819e-05, "loss": 1.09721441, "memory(GiB)": 85.12, "step": 3705, "train_speed(iter/s)": 0.035283 }, { "acc": 0.67279172, "epoch": 2.91, "learning_rate": 9.022882020274167e-05, "loss": 1.09024601, "memory(GiB)": 85.12, "step": 3710, "train_speed(iter/s)": 0.035283 }, { "acc": 0.65694847, "epoch": 2.91, "learning_rate": 9.019673240301862e-05, "loss": 1.12552786, "memory(GiB)": 85.12, "step": 3715, "train_speed(iter/s)": 0.035282 }, { "acc": 0.67931943, "epoch": 2.92, "learning_rate": 9.016459772807804e-05, "loss": 1.08509073, "memory(GiB)": 85.12, "step": 3720, "train_speed(iter/s)": 0.035283 }, { "acc": 0.67091622, "epoch": 2.92, "learning_rate": 9.013241621539364e-05, "loss": 1.08755741, "memory(GiB)": 85.12, "step": 3725, "train_speed(iter/s)": 0.035283 }, { "acc": 0.66849947, "epoch": 2.92, "learning_rate": 9.01001879024937e-05, "loss": 1.11163702, "memory(GiB)": 85.12, "step": 3730, "train_speed(iter/s)": 0.035284 }, { "acc": 0.66142335, "epoch": 2.93, "learning_rate": 9.006791282696113e-05, "loss": 1.11982279, "memory(GiB)": 85.12, "step": 3735, "train_speed(iter/s)": 0.035283 }, { "acc": 0.66708808, "epoch": 2.93, "learning_rate": 9.003559102643335e-05, "loss": 1.12939367, "memory(GiB)": 85.12, "step": 3740, "train_speed(iter/s)": 0.035284 }, { "acc": 0.65388312, "epoch": 2.93, "learning_rate": 9.000322253860225e-05, "loss": 1.13458776, "memory(GiB)": 85.12, "step": 3745, "train_speed(iter/s)": 0.035282 }, { "acc": 0.66235805, "epoch": 2.94, "learning_rate": 8.997080740121417e-05, "loss": 1.12270813, "memory(GiB)": 85.12, "step": 3750, "train_speed(iter/s)": 0.035283 }, { "acc": 0.67565556, "epoch": 2.94, "learning_rate": 8.993834565206989e-05, "loss": 1.08064222, "memory(GiB)": 85.12, "step": 3755, "train_speed(iter/s)": 0.035284 }, { "acc": 0.67491579, "epoch": 2.95, "learning_rate": 8.99058373290245e-05, "loss": 1.07194624, "memory(GiB)": 85.12, "step": 3760, "train_speed(iter/s)": 0.035281 }, { "acc": 0.66467142, "epoch": 2.95, "learning_rate": 8.987328246998742e-05, "loss": 1.1253994, "memory(GiB)": 85.12, "step": 3765, "train_speed(iter/s)": 0.035281 }, { "acc": 0.65193849, "epoch": 2.95, "learning_rate": 8.984068111292232e-05, "loss": 1.16464453, "memory(GiB)": 85.12, "step": 3770, "train_speed(iter/s)": 0.035281 }, { "acc": 0.66975975, "epoch": 2.96, "learning_rate": 8.980803329584712e-05, "loss": 1.07693071, "memory(GiB)": 85.12, "step": 3775, "train_speed(iter/s)": 0.035278 }, { "acc": 0.65947042, "epoch": 2.96, "learning_rate": 8.977533905683393e-05, "loss": 1.12480698, "memory(GiB)": 85.12, "step": 3780, "train_speed(iter/s)": 0.035278 }, { "acc": 0.65800185, "epoch": 2.97, "learning_rate": 8.974259843400894e-05, "loss": 1.1366251, "memory(GiB)": 85.12, "step": 3785, "train_speed(iter/s)": 0.035275 }, { "acc": 0.67165961, "epoch": 2.97, "learning_rate": 8.970981146555247e-05, "loss": 1.10998592, "memory(GiB)": 85.12, "step": 3790, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67054834, "epoch": 2.97, "learning_rate": 8.967697818969889e-05, "loss": 1.08357964, "memory(GiB)": 85.12, "step": 3795, "train_speed(iter/s)": 0.035273 }, { "acc": 0.65625038, "epoch": 2.98, "learning_rate": 8.964409864473656e-05, "loss": 1.13358316, "memory(GiB)": 85.12, "step": 3800, "train_speed(iter/s)": 0.035272 }, { "acc": 0.66279435, "epoch": 2.98, "learning_rate": 8.961117286900777e-05, "loss": 1.10367622, "memory(GiB)": 85.12, "step": 3805, "train_speed(iter/s)": 0.035272 }, { "acc": 0.65385418, "epoch": 2.99, "learning_rate": 8.957820090090877e-05, "loss": 1.14131985, "memory(GiB)": 85.12, "step": 3810, "train_speed(iter/s)": 0.035273 }, { "acc": 0.6783905, "epoch": 2.99, "learning_rate": 8.954518277888966e-05, "loss": 1.06005888, "memory(GiB)": 85.12, "step": 3815, "train_speed(iter/s)": 0.035274 }, { "acc": 0.66349859, "epoch": 2.99, "learning_rate": 8.951211854145434e-05, "loss": 1.11471567, "memory(GiB)": 85.12, "step": 3820, "train_speed(iter/s)": 0.035273 }, { "acc": 0.65545945, "epoch": 3.0, "learning_rate": 8.947900822716053e-05, "loss": 1.18849239, "memory(GiB)": 85.12, "step": 3825, "train_speed(iter/s)": 0.035274 }, { "epoch": 3.0, "eval_acc": 0.6859504132231405, "eval_loss": 1.0273702144622803, "eval_runtime": 85.0825, "eval_samples_per_second": 1.093, "eval_steps_per_second": 1.093, "step": 3828 }, { "acc": 0.66745429, "epoch": 3.0, "learning_rate": 8.944585187461963e-05, "loss": 1.09518471, "memory(GiB)": 85.12, "step": 3830, "train_speed(iter/s)": 0.035248 }, { "acc": 0.6809526, "epoch": 3.01, "learning_rate": 8.941264952249674e-05, "loss": 1.05991888, "memory(GiB)": 85.12, "step": 3835, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67283616, "epoch": 3.01, "learning_rate": 8.937940120951068e-05, "loss": 1.10407562, "memory(GiB)": 85.12, "step": 3840, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66963844, "epoch": 3.01, "learning_rate": 8.934610697443375e-05, "loss": 1.08318148, "memory(GiB)": 85.12, "step": 3845, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67286458, "epoch": 3.02, "learning_rate": 8.931276685609187e-05, "loss": 1.09335747, "memory(GiB)": 85.12, "step": 3850, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68292265, "epoch": 3.02, "learning_rate": 8.927938089336444e-05, "loss": 1.03646545, "memory(GiB)": 85.12, "step": 3855, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67601447, "epoch": 3.03, "learning_rate": 8.924594912518435e-05, "loss": 1.03905144, "memory(GiB)": 85.12, "step": 3860, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68262358, "epoch": 3.03, "learning_rate": 8.921247159053787e-05, "loss": 1.04506426, "memory(GiB)": 85.12, "step": 3865, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67279882, "epoch": 3.03, "learning_rate": 8.917894832846467e-05, "loss": 1.07862301, "memory(GiB)": 85.12, "step": 3870, "train_speed(iter/s)": 0.035249 }, { "acc": 0.65542626, "epoch": 3.04, "learning_rate": 8.914537937805776e-05, "loss": 1.15798473, "memory(GiB)": 85.12, "step": 3875, "train_speed(iter/s)": 0.03525 }, { "acc": 0.68080411, "epoch": 3.04, "learning_rate": 8.911176477846334e-05, "loss": 1.03082008, "memory(GiB)": 85.12, "step": 3880, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66806016, "epoch": 3.04, "learning_rate": 8.907810456888097e-05, "loss": 1.07673035, "memory(GiB)": 85.12, "step": 3885, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67421083, "epoch": 3.05, "learning_rate": 8.90443987885633e-05, "loss": 1.11088667, "memory(GiB)": 85.12, "step": 3890, "train_speed(iter/s)": 0.035252 }, { "acc": 0.65671282, "epoch": 3.05, "learning_rate": 8.901064747681616e-05, "loss": 1.14146271, "memory(GiB)": 85.12, "step": 3895, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67231884, "epoch": 3.06, "learning_rate": 8.897685067299846e-05, "loss": 1.08252373, "memory(GiB)": 85.12, "step": 3900, "train_speed(iter/s)": 0.03525 }, { "acc": 0.677108, "epoch": 3.06, "learning_rate": 8.89430084165222e-05, "loss": 1.05940466, "memory(GiB)": 85.12, "step": 3905, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68133845, "epoch": 3.06, "learning_rate": 8.890912074685236e-05, "loss": 1.02218704, "memory(GiB)": 85.12, "step": 3910, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67558608, "epoch": 3.07, "learning_rate": 8.887518770350685e-05, "loss": 1.06314049, "memory(GiB)": 85.12, "step": 3915, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66001134, "epoch": 3.07, "learning_rate": 8.884120932605653e-05, "loss": 1.12467155, "memory(GiB)": 85.12, "step": 3920, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67021894, "epoch": 3.08, "learning_rate": 8.880718565412511e-05, "loss": 1.07008371, "memory(GiB)": 85.12, "step": 3925, "train_speed(iter/s)": 0.035252 }, { "acc": 0.6668273, "epoch": 3.08, "learning_rate": 8.877311672738913e-05, "loss": 1.09233027, "memory(GiB)": 85.12, "step": 3930, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66963663, "epoch": 3.08, "learning_rate": 8.87390025855779e-05, "loss": 1.07303791, "memory(GiB)": 85.12, "step": 3935, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67969995, "epoch": 3.09, "learning_rate": 8.870484326847345e-05, "loss": 1.06793242, "memory(GiB)": 85.12, "step": 3940, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66840816, "epoch": 3.09, "learning_rate": 8.867063881591049e-05, "loss": 1.08742456, "memory(GiB)": 85.12, "step": 3945, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66912541, "epoch": 3.1, "learning_rate": 8.863638926777639e-05, "loss": 1.09435921, "memory(GiB)": 85.12, "step": 3950, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66795225, "epoch": 3.1, "learning_rate": 8.860209466401106e-05, "loss": 1.1207943, "memory(GiB)": 85.12, "step": 3955, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66590056, "epoch": 3.1, "learning_rate": 8.856775504460702e-05, "loss": 1.07821617, "memory(GiB)": 85.12, "step": 3960, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67165837, "epoch": 3.11, "learning_rate": 8.853337044960918e-05, "loss": 1.08614855, "memory(GiB)": 85.12, "step": 3965, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66955404, "epoch": 3.11, "learning_rate": 8.849894091911503e-05, "loss": 1.09462404, "memory(GiB)": 85.12, "step": 3970, "train_speed(iter/s)": 0.035254 }, { "acc": 0.68378253, "epoch": 3.12, "learning_rate": 8.846446649327437e-05, "loss": 1.03468351, "memory(GiB)": 85.12, "step": 3975, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66864328, "epoch": 3.12, "learning_rate": 8.842994721228936e-05, "loss": 1.07628345, "memory(GiB)": 85.12, "step": 3980, "train_speed(iter/s)": 0.035253 }, { "acc": 0.65653524, "epoch": 3.12, "learning_rate": 8.839538311641451e-05, "loss": 1.13923292, "memory(GiB)": 85.12, "step": 3985, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67333913, "epoch": 3.13, "learning_rate": 8.836077424595657e-05, "loss": 1.07015495, "memory(GiB)": 85.12, "step": 3990, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67381926, "epoch": 3.13, "learning_rate": 8.832612064127448e-05, "loss": 1.06587915, "memory(GiB)": 85.12, "step": 3995, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66329141, "epoch": 3.13, "learning_rate": 8.829142234277936e-05, "loss": 1.10319233, "memory(GiB)": 85.12, "step": 4000, "train_speed(iter/s)": 0.035253 }, { "acc": 0.68063631, "epoch": 3.14, "learning_rate": 8.825667939093449e-05, "loss": 1.07182493, "memory(GiB)": 85.12, "step": 4005, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66322513, "epoch": 3.14, "learning_rate": 8.822189182625516e-05, "loss": 1.12689009, "memory(GiB)": 85.12, "step": 4010, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67340002, "epoch": 3.15, "learning_rate": 8.818705968930874e-05, "loss": 1.07936611, "memory(GiB)": 85.12, "step": 4015, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66903429, "epoch": 3.15, "learning_rate": 8.815218302071452e-05, "loss": 1.10216112, "memory(GiB)": 85.12, "step": 4020, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66168714, "epoch": 3.15, "learning_rate": 8.811726186114377e-05, "loss": 1.117346, "memory(GiB)": 85.12, "step": 4025, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66286302, "epoch": 3.16, "learning_rate": 8.808229625131963e-05, "loss": 1.10122309, "memory(GiB)": 85.12, "step": 4030, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67067194, "epoch": 3.16, "learning_rate": 8.804728623201706e-05, "loss": 1.09093103, "memory(GiB)": 85.12, "step": 4035, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66602073, "epoch": 3.17, "learning_rate": 8.801223184406283e-05, "loss": 1.0800581, "memory(GiB)": 85.12, "step": 4040, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67198768, "epoch": 3.17, "learning_rate": 8.797713312833541e-05, "loss": 1.05843801, "memory(GiB)": 85.12, "step": 4045, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67939711, "epoch": 3.17, "learning_rate": 8.794199012576502e-05, "loss": 1.04836597, "memory(GiB)": 85.12, "step": 4050, "train_speed(iter/s)": 0.035255 }, { "acc": 0.65837584, "epoch": 3.18, "learning_rate": 8.790680287733349e-05, "loss": 1.11400471, "memory(GiB)": 85.12, "step": 4055, "train_speed(iter/s)": 0.035255 }, { "acc": 0.68302851, "epoch": 3.18, "learning_rate": 8.787157142407422e-05, "loss": 1.04046106, "memory(GiB)": 85.12, "step": 4060, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66324239, "epoch": 3.19, "learning_rate": 8.783629580707225e-05, "loss": 1.13401537, "memory(GiB)": 85.12, "step": 4065, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66952362, "epoch": 3.19, "learning_rate": 8.780097606746404e-05, "loss": 1.10449305, "memory(GiB)": 85.12, "step": 4070, "train_speed(iter/s)": 0.035252 }, { "acc": 0.6671958, "epoch": 3.19, "learning_rate": 8.77656122464375e-05, "loss": 1.09806108, "memory(GiB)": 85.12, "step": 4075, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66547656, "epoch": 3.2, "learning_rate": 8.773020438523202e-05, "loss": 1.10695295, "memory(GiB)": 85.12, "step": 4080, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66510615, "epoch": 3.2, "learning_rate": 8.769475252513826e-05, "loss": 1.13047438, "memory(GiB)": 85.12, "step": 4085, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66734986, "epoch": 3.21, "learning_rate": 8.765925670749824e-05, "loss": 1.09393444, "memory(GiB)": 85.12, "step": 4090, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67996044, "epoch": 3.21, "learning_rate": 8.762371697370523e-05, "loss": 1.06481838, "memory(GiB)": 85.12, "step": 4095, "train_speed(iter/s)": 0.035252 }, { "acc": 0.6714335, "epoch": 3.21, "learning_rate": 8.75881333652037e-05, "loss": 1.10486279, "memory(GiB)": 85.12, "step": 4100, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67522779, "epoch": 3.22, "learning_rate": 8.755250592348933e-05, "loss": 1.07262745, "memory(GiB)": 85.12, "step": 4105, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67835989, "epoch": 3.22, "learning_rate": 8.751683469010887e-05, "loss": 1.05402975, "memory(GiB)": 85.12, "step": 4110, "train_speed(iter/s)": 0.035253 }, { "acc": 0.6806859, "epoch": 3.22, "learning_rate": 8.748111970666011e-05, "loss": 1.01472445, "memory(GiB)": 85.12, "step": 4115, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67549682, "epoch": 3.23, "learning_rate": 8.744536101479195e-05, "loss": 1.10141077, "memory(GiB)": 85.12, "step": 4120, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67967439, "epoch": 3.23, "learning_rate": 8.74095586562042e-05, "loss": 1.07214756, "memory(GiB)": 85.12, "step": 4125, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66929421, "epoch": 3.24, "learning_rate": 8.737371267264757e-05, "loss": 1.09711647, "memory(GiB)": 85.12, "step": 4130, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66597781, "epoch": 3.24, "learning_rate": 8.733782310592369e-05, "loss": 1.09240694, "memory(GiB)": 85.12, "step": 4135, "train_speed(iter/s)": 0.035253 }, { "acc": 0.68246903, "epoch": 3.24, "learning_rate": 8.7301889997885e-05, "loss": 1.04920025, "memory(GiB)": 85.12, "step": 4140, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66437941, "epoch": 3.25, "learning_rate": 8.726591339043472e-05, "loss": 1.12496767, "memory(GiB)": 85.12, "step": 4145, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66526365, "epoch": 3.25, "learning_rate": 8.722989332552681e-05, "loss": 1.12980747, "memory(GiB)": 85.12, "step": 4150, "train_speed(iter/s)": 0.035253 }, { "acc": 0.65992541, "epoch": 3.26, "learning_rate": 8.719382984516581e-05, "loss": 1.1206811, "memory(GiB)": 85.12, "step": 4155, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66868792, "epoch": 3.26, "learning_rate": 8.715772299140705e-05, "loss": 1.09060631, "memory(GiB)": 85.12, "step": 4160, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66484022, "epoch": 3.26, "learning_rate": 8.712157280635634e-05, "loss": 1.09277716, "memory(GiB)": 85.12, "step": 4165, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67286205, "epoch": 3.27, "learning_rate": 8.708537933216999e-05, "loss": 1.093993, "memory(GiB)": 85.12, "step": 4170, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66262193, "epoch": 3.27, "learning_rate": 8.704914261105488e-05, "loss": 1.10075159, "memory(GiB)": 85.12, "step": 4175, "train_speed(iter/s)": 0.035253 }, { "acc": 0.6764492, "epoch": 3.28, "learning_rate": 8.701286268526827e-05, "loss": 1.05436106, "memory(GiB)": 85.12, "step": 4180, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66421719, "epoch": 3.28, "learning_rate": 8.69765395971178e-05, "loss": 1.12668571, "memory(GiB)": 85.12, "step": 4185, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66706834, "epoch": 3.28, "learning_rate": 8.694017338896149e-05, "loss": 1.0926302, "memory(GiB)": 85.12, "step": 4190, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67663679, "epoch": 3.29, "learning_rate": 8.690376410320758e-05, "loss": 1.04948025, "memory(GiB)": 85.12, "step": 4195, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66628132, "epoch": 3.29, "learning_rate": 8.686731178231459e-05, "loss": 1.08100224, "memory(GiB)": 85.12, "step": 4200, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67406206, "epoch": 3.3, "learning_rate": 8.68308164687912e-05, "loss": 1.07763453, "memory(GiB)": 85.12, "step": 4205, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66126747, "epoch": 3.3, "learning_rate": 8.679427820519625e-05, "loss": 1.11055937, "memory(GiB)": 85.12, "step": 4210, "train_speed(iter/s)": 0.035253 }, { "acc": 0.65678339, "epoch": 3.3, "learning_rate": 8.675769703413863e-05, "loss": 1.12440147, "memory(GiB)": 85.12, "step": 4215, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66874876, "epoch": 3.31, "learning_rate": 8.672107299827732e-05, "loss": 1.10668583, "memory(GiB)": 85.12, "step": 4220, "train_speed(iter/s)": 0.035253 }, { "acc": 0.6592135, "epoch": 3.31, "learning_rate": 8.668440614032124e-05, "loss": 1.11936607, "memory(GiB)": 85.12, "step": 4225, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67665238, "epoch": 3.32, "learning_rate": 8.664769650302926e-05, "loss": 1.07830534, "memory(GiB)": 85.12, "step": 4230, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66774406, "epoch": 3.32, "learning_rate": 8.661094412921014e-05, "loss": 1.10304251, "memory(GiB)": 85.12, "step": 4235, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68347654, "epoch": 3.32, "learning_rate": 8.657414906172247e-05, "loss": 1.0130826, "memory(GiB)": 85.12, "step": 4240, "train_speed(iter/s)": 0.035248 }, { "acc": 0.65933442, "epoch": 3.33, "learning_rate": 8.653731134347464e-05, "loss": 1.09179993, "memory(GiB)": 85.12, "step": 4245, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66771193, "epoch": 3.33, "learning_rate": 8.650043101742478e-05, "loss": 1.09305897, "memory(GiB)": 85.12, "step": 4250, "train_speed(iter/s)": 0.035249 }, { "acc": 0.65708385, "epoch": 3.33, "learning_rate": 8.646350812658069e-05, "loss": 1.12786036, "memory(GiB)": 85.12, "step": 4255, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67746892, "epoch": 3.34, "learning_rate": 8.642654271399979e-05, "loss": 1.06408577, "memory(GiB)": 85.12, "step": 4260, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66043816, "epoch": 3.34, "learning_rate": 8.638953482278915e-05, "loss": 1.12865877, "memory(GiB)": 85.12, "step": 4265, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67736588, "epoch": 3.35, "learning_rate": 8.63524844961053e-05, "loss": 1.05247889, "memory(GiB)": 85.12, "step": 4270, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67974887, "epoch": 3.35, "learning_rate": 8.631539177715433e-05, "loss": 1.06267481, "memory(GiB)": 85.12, "step": 4275, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67399712, "epoch": 3.35, "learning_rate": 8.62782567091917e-05, "loss": 1.07866135, "memory(GiB)": 85.12, "step": 4280, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68216677, "epoch": 3.36, "learning_rate": 8.624107933552229e-05, "loss": 1.04396935, "memory(GiB)": 85.12, "step": 4285, "train_speed(iter/s)": 0.035248 }, { "acc": 0.65968661, "epoch": 3.36, "learning_rate": 8.620385969950031e-05, "loss": 1.09188328, "memory(GiB)": 85.12, "step": 4290, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67161303, "epoch": 3.37, "learning_rate": 8.616659784452925e-05, "loss": 1.08730984, "memory(GiB)": 85.12, "step": 4295, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67344618, "epoch": 3.37, "learning_rate": 8.612929381406183e-05, "loss": 1.06549606, "memory(GiB)": 85.12, "step": 4300, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67476172, "epoch": 3.37, "learning_rate": 8.609194765159996e-05, "loss": 1.08071413, "memory(GiB)": 85.12, "step": 4305, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67202482, "epoch": 3.38, "learning_rate": 8.605455940069468e-05, "loss": 1.09838877, "memory(GiB)": 85.12, "step": 4310, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67025285, "epoch": 3.38, "learning_rate": 8.601712910494612e-05, "loss": 1.07920656, "memory(GiB)": 85.12, "step": 4315, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67839556, "epoch": 3.39, "learning_rate": 8.597965680800342e-05, "loss": 1.06685658, "memory(GiB)": 85.12, "step": 4320, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67582765, "epoch": 3.39, "learning_rate": 8.594214255356472e-05, "loss": 1.07480011, "memory(GiB)": 85.12, "step": 4325, "train_speed(iter/s)": 0.035251 }, { "acc": 0.6812016, "epoch": 3.39, "learning_rate": 8.590458638537706e-05, "loss": 1.0496768, "memory(GiB)": 85.12, "step": 4330, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68081627, "epoch": 3.4, "learning_rate": 8.58669883472364e-05, "loss": 1.03980932, "memory(GiB)": 85.12, "step": 4335, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66322517, "epoch": 3.4, "learning_rate": 8.58293484829875e-05, "loss": 1.10416336, "memory(GiB)": 85.12, "step": 4340, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67386465, "epoch": 3.41, "learning_rate": 8.579166683652388e-05, "loss": 1.06861038, "memory(GiB)": 85.12, "step": 4345, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67121987, "epoch": 3.41, "learning_rate": 8.575394345178782e-05, "loss": 1.09512386, "memory(GiB)": 85.12, "step": 4350, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66761322, "epoch": 3.41, "learning_rate": 8.571617837277027e-05, "loss": 1.085077, "memory(GiB)": 85.12, "step": 4355, "train_speed(iter/s)": 0.035251 }, { "acc": 0.68047543, "epoch": 3.42, "learning_rate": 8.567837164351075e-05, "loss": 1.06838808, "memory(GiB)": 85.12, "step": 4360, "train_speed(iter/s)": 0.035252 }, { "acc": 0.68031921, "epoch": 3.42, "learning_rate": 8.56405233080974e-05, "loss": 1.06294222, "memory(GiB)": 85.12, "step": 4365, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66957574, "epoch": 3.42, "learning_rate": 8.560263341066689e-05, "loss": 1.09513245, "memory(GiB)": 85.12, "step": 4370, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66336145, "epoch": 3.43, "learning_rate": 8.55647019954043e-05, "loss": 1.09110317, "memory(GiB)": 85.12, "step": 4375, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67718058, "epoch": 3.43, "learning_rate": 8.552672910654317e-05, "loss": 1.07627859, "memory(GiB)": 85.12, "step": 4380, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66453261, "epoch": 3.44, "learning_rate": 8.548871478836542e-05, "loss": 1.11319456, "memory(GiB)": 85.12, "step": 4385, "train_speed(iter/s)": 0.035252 }, { "acc": 0.68078775, "epoch": 3.44, "learning_rate": 8.545065908520123e-05, "loss": 1.04445333, "memory(GiB)": 85.12, "step": 4390, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67710371, "epoch": 3.44, "learning_rate": 8.541256204142905e-05, "loss": 1.04607086, "memory(GiB)": 85.12, "step": 4395, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67265029, "epoch": 3.45, "learning_rate": 8.53744237014756e-05, "loss": 1.07834921, "memory(GiB)": 85.12, "step": 4400, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67727437, "epoch": 3.45, "learning_rate": 8.533624410981567e-05, "loss": 1.06278925, "memory(GiB)": 85.12, "step": 4405, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66954894, "epoch": 3.46, "learning_rate": 8.529802331097223e-05, "loss": 1.08836241, "memory(GiB)": 85.12, "step": 4410, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67296772, "epoch": 3.46, "learning_rate": 8.525976134951626e-05, "loss": 1.06974878, "memory(GiB)": 85.12, "step": 4415, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66709204, "epoch": 3.46, "learning_rate": 8.522145827006675e-05, "loss": 1.06892281, "memory(GiB)": 85.12, "step": 4420, "train_speed(iter/s)": 0.03525 }, { "acc": 0.68249197, "epoch": 3.47, "learning_rate": 8.518311411729068e-05, "loss": 1.05143995, "memory(GiB)": 85.12, "step": 4425, "train_speed(iter/s)": 0.035251 }, { "acc": 0.65883646, "epoch": 3.47, "learning_rate": 8.514472893590285e-05, "loss": 1.10914154, "memory(GiB)": 85.12, "step": 4430, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67034783, "epoch": 3.48, "learning_rate": 8.510630277066594e-05, "loss": 1.06353884, "memory(GiB)": 85.12, "step": 4435, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67994227, "epoch": 3.48, "learning_rate": 8.506783566639045e-05, "loss": 1.07216578, "memory(GiB)": 85.12, "step": 4440, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67223606, "epoch": 3.48, "learning_rate": 8.502932766793462e-05, "loss": 1.06527033, "memory(GiB)": 85.12, "step": 4445, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67538528, "epoch": 3.49, "learning_rate": 8.49907788202043e-05, "loss": 1.04301472, "memory(GiB)": 85.12, "step": 4450, "train_speed(iter/s)": 0.035251 }, { "acc": 0.68265018, "epoch": 3.49, "learning_rate": 8.49521891681531e-05, "loss": 1.05498791, "memory(GiB)": 85.12, "step": 4455, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67961888, "epoch": 3.5, "learning_rate": 8.491355875678211e-05, "loss": 1.06846848, "memory(GiB)": 85.12, "step": 4460, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66131968, "epoch": 3.5, "learning_rate": 8.487488763114e-05, "loss": 1.09861307, "memory(GiB)": 85.12, "step": 4465, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66888275, "epoch": 3.5, "learning_rate": 8.483617583632292e-05, "loss": 1.08226824, "memory(GiB)": 85.12, "step": 4470, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67894645, "epoch": 3.51, "learning_rate": 8.47974234174744e-05, "loss": 1.07799463, "memory(GiB)": 85.12, "step": 4475, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66169238, "epoch": 3.51, "learning_rate": 8.475863041978543e-05, "loss": 1.10147877, "memory(GiB)": 85.12, "step": 4480, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66542506, "epoch": 3.51, "learning_rate": 8.471979688849424e-05, "loss": 1.09339066, "memory(GiB)": 85.12, "step": 4485, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66197405, "epoch": 3.52, "learning_rate": 8.468092286888634e-05, "loss": 1.11503725, "memory(GiB)": 85.12, "step": 4490, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66685591, "epoch": 3.52, "learning_rate": 8.464200840629451e-05, "loss": 1.07581072, "memory(GiB)": 85.12, "step": 4495, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67047796, "epoch": 3.53, "learning_rate": 8.460305354609863e-05, "loss": 1.07585983, "memory(GiB)": 85.12, "step": 4500, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66793823, "epoch": 3.53, "learning_rate": 8.456405833372572e-05, "loss": 1.10174761, "memory(GiB)": 85.12, "step": 4505, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67698441, "epoch": 3.53, "learning_rate": 8.452502281464986e-05, "loss": 1.0740612, "memory(GiB)": 85.12, "step": 4510, "train_speed(iter/s)": 0.035255 }, { "acc": 0.68095536, "epoch": 3.54, "learning_rate": 8.448594703439213e-05, "loss": 1.05643806, "memory(GiB)": 85.12, "step": 4515, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67278633, "epoch": 3.54, "learning_rate": 8.444683103852051e-05, "loss": 1.06161213, "memory(GiB)": 85.12, "step": 4520, "train_speed(iter/s)": 0.035254 }, { "acc": 0.65890441, "epoch": 3.55, "learning_rate": 8.440767487264997e-05, "loss": 1.12471752, "memory(GiB)": 85.12, "step": 4525, "train_speed(iter/s)": 0.035255 }, { "acc": 0.65354443, "epoch": 3.55, "learning_rate": 8.436847858244226e-05, "loss": 1.13845463, "memory(GiB)": 85.12, "step": 4530, "train_speed(iter/s)": 0.035256 }, { "acc": 0.67117491, "epoch": 3.55, "learning_rate": 8.432924221360594e-05, "loss": 1.06492109, "memory(GiB)": 85.12, "step": 4535, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67677555, "epoch": 3.56, "learning_rate": 8.42899658118963e-05, "loss": 1.06711979, "memory(GiB)": 85.12, "step": 4540, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67237253, "epoch": 3.56, "learning_rate": 8.425064942311532e-05, "loss": 1.064182, "memory(GiB)": 85.12, "step": 4545, "train_speed(iter/s)": 0.035256 }, { "acc": 0.67695851, "epoch": 3.57, "learning_rate": 8.42112930931116e-05, "loss": 1.04225302, "memory(GiB)": 85.12, "step": 4550, "train_speed(iter/s)": 0.035257 }, { "acc": 0.67655463, "epoch": 3.57, "learning_rate": 8.417189686778034e-05, "loss": 1.09111786, "memory(GiB)": 85.12, "step": 4555, "train_speed(iter/s)": 0.035257 }, { "acc": 0.67512894, "epoch": 3.57, "learning_rate": 8.413246079306325e-05, "loss": 1.06245384, "memory(GiB)": 85.12, "step": 4560, "train_speed(iter/s)": 0.035258 }, { "acc": 0.6641572, "epoch": 3.58, "learning_rate": 8.40929849149485e-05, "loss": 1.089466, "memory(GiB)": 85.12, "step": 4565, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67081413, "epoch": 3.58, "learning_rate": 8.405346927947071e-05, "loss": 1.06199369, "memory(GiB)": 85.12, "step": 4570, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67029018, "epoch": 3.59, "learning_rate": 8.401391393271081e-05, "loss": 1.10045147, "memory(GiB)": 85.12, "step": 4575, "train_speed(iter/s)": 0.03526 }, { "acc": 0.66921582, "epoch": 3.59, "learning_rate": 8.397431892079608e-05, "loss": 1.09112959, "memory(GiB)": 85.12, "step": 4580, "train_speed(iter/s)": 0.035257 }, { "acc": 0.66224871, "epoch": 3.59, "learning_rate": 8.393468428990005e-05, "loss": 1.09612188, "memory(GiB)": 85.12, "step": 4585, "train_speed(iter/s)": 0.035258 }, { "acc": 0.68403311, "epoch": 3.6, "learning_rate": 8.389501008624244e-05, "loss": 1.06564083, "memory(GiB)": 85.12, "step": 4590, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67375054, "epoch": 3.6, "learning_rate": 8.385529635608913e-05, "loss": 1.08890104, "memory(GiB)": 85.12, "step": 4595, "train_speed(iter/s)": 0.035259 }, { "acc": 0.66678824, "epoch": 3.61, "learning_rate": 8.381554314575206e-05, "loss": 1.07385893, "memory(GiB)": 85.12, "step": 4600, "train_speed(iter/s)": 0.03526 }, { "acc": 0.67612591, "epoch": 3.61, "learning_rate": 8.377575050158929e-05, "loss": 1.07160501, "memory(GiB)": 85.12, "step": 4605, "train_speed(iter/s)": 0.035257 }, { "acc": 0.66233315, "epoch": 3.61, "learning_rate": 8.373591847000476e-05, "loss": 1.12124157, "memory(GiB)": 85.12, "step": 4610, "train_speed(iter/s)": 0.035258 }, { "acc": 0.66368275, "epoch": 3.62, "learning_rate": 8.369604709744842e-05, "loss": 1.12185488, "memory(GiB)": 85.12, "step": 4615, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67620149, "epoch": 3.62, "learning_rate": 8.365613643041606e-05, "loss": 1.0558217, "memory(GiB)": 85.12, "step": 4620, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67760687, "epoch": 3.62, "learning_rate": 8.361618651544932e-05, "loss": 1.06278458, "memory(GiB)": 85.12, "step": 4625, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67772574, "epoch": 3.63, "learning_rate": 8.357619739913557e-05, "loss": 1.05741844, "memory(GiB)": 85.12, "step": 4630, "train_speed(iter/s)": 0.035258 }, { "acc": 0.68779845, "epoch": 3.63, "learning_rate": 8.353616912810793e-05, "loss": 1.03406744, "memory(GiB)": 85.12, "step": 4635, "train_speed(iter/s)": 0.035258 }, { "acc": 0.67806859, "epoch": 3.64, "learning_rate": 8.349610174904517e-05, "loss": 1.07612123, "memory(GiB)": 85.12, "step": 4640, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65736609, "epoch": 3.64, "learning_rate": 8.345599530867166e-05, "loss": 1.11663847, "memory(GiB)": 85.12, "step": 4645, "train_speed(iter/s)": 0.035257 }, { "acc": 0.66991482, "epoch": 3.64, "learning_rate": 8.341584985375733e-05, "loss": 1.08915453, "memory(GiB)": 85.12, "step": 4650, "train_speed(iter/s)": 0.035256 }, { "acc": 0.6729054, "epoch": 3.65, "learning_rate": 8.337566543111756e-05, "loss": 1.06626625, "memory(GiB)": 85.12, "step": 4655, "train_speed(iter/s)": 0.035255 }, { "acc": 0.6768261, "epoch": 3.65, "learning_rate": 8.333544208761326e-05, "loss": 1.04373264, "memory(GiB)": 85.12, "step": 4660, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66950965, "epoch": 3.66, "learning_rate": 8.329517987015067e-05, "loss": 1.11504803, "memory(GiB)": 85.12, "step": 4665, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67875061, "epoch": 3.66, "learning_rate": 8.325487882568138e-05, "loss": 1.05688515, "memory(GiB)": 85.12, "step": 4670, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66837807, "epoch": 3.66, "learning_rate": 8.321453900120223e-05, "loss": 1.07563877, "memory(GiB)": 85.12, "step": 4675, "train_speed(iter/s)": 0.035252 }, { "acc": 0.68004751, "epoch": 3.67, "learning_rate": 8.31741604437553e-05, "loss": 1.06593189, "memory(GiB)": 85.12, "step": 4680, "train_speed(iter/s)": 0.035253 }, { "acc": 0.68714409, "epoch": 3.67, "learning_rate": 8.313374320042785e-05, "loss": 1.03218222, "memory(GiB)": 85.12, "step": 4685, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67838907, "epoch": 3.68, "learning_rate": 8.309328731835228e-05, "loss": 1.05175476, "memory(GiB)": 85.12, "step": 4690, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66823692, "epoch": 3.68, "learning_rate": 8.305279284470595e-05, "loss": 1.08211584, "memory(GiB)": 85.12, "step": 4695, "train_speed(iter/s)": 0.035255 }, { "acc": 0.65161371, "epoch": 3.68, "learning_rate": 8.301225982671133e-05, "loss": 1.13491449, "memory(GiB)": 85.12, "step": 4700, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67203436, "epoch": 3.69, "learning_rate": 8.29716883116358e-05, "loss": 1.08132124, "memory(GiB)": 85.12, "step": 4705, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66406636, "epoch": 3.69, "learning_rate": 8.293107834679159e-05, "loss": 1.11655197, "memory(GiB)": 85.12, "step": 4710, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67401237, "epoch": 3.7, "learning_rate": 8.289042997953585e-05, "loss": 1.09698009, "memory(GiB)": 85.12, "step": 4715, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66169372, "epoch": 3.7, "learning_rate": 8.284974325727043e-05, "loss": 1.13821402, "memory(GiB)": 85.12, "step": 4720, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66252351, "epoch": 3.7, "learning_rate": 8.280901822744198e-05, "loss": 1.12066956, "memory(GiB)": 85.12, "step": 4725, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66670227, "epoch": 3.71, "learning_rate": 8.276825493754176e-05, "loss": 1.07495327, "memory(GiB)": 85.12, "step": 4730, "train_speed(iter/s)": 0.035256 }, { "acc": 0.67759528, "epoch": 3.71, "learning_rate": 8.27274534351057e-05, "loss": 1.06112547, "memory(GiB)": 85.12, "step": 4735, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66779099, "epoch": 3.71, "learning_rate": 8.268661376771425e-05, "loss": 1.07864561, "memory(GiB)": 85.12, "step": 4740, "train_speed(iter/s)": 0.035254 }, { "acc": 0.69414735, "epoch": 3.72, "learning_rate": 8.264573598299238e-05, "loss": 1.01207485, "memory(GiB)": 85.12, "step": 4745, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67531772, "epoch": 3.72, "learning_rate": 8.260482012860957e-05, "loss": 1.05929461, "memory(GiB)": 85.12, "step": 4750, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67257948, "epoch": 3.73, "learning_rate": 8.256386625227955e-05, "loss": 1.09518213, "memory(GiB)": 85.12, "step": 4755, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67445426, "epoch": 3.73, "learning_rate": 8.252287440176053e-05, "loss": 1.09971008, "memory(GiB)": 85.12, "step": 4760, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66840658, "epoch": 3.73, "learning_rate": 8.248184462485493e-05, "loss": 1.10356541, "memory(GiB)": 85.12, "step": 4765, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66937456, "epoch": 3.74, "learning_rate": 8.244077696940944e-05, "loss": 1.09562979, "memory(GiB)": 85.12, "step": 4770, "train_speed(iter/s)": 0.035254 }, { "acc": 0.66603575, "epoch": 3.74, "learning_rate": 8.239967148331488e-05, "loss": 1.08866987, "memory(GiB)": 85.12, "step": 4775, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66549649, "epoch": 3.75, "learning_rate": 8.235852821450622e-05, "loss": 1.11980772, "memory(GiB)": 85.12, "step": 4780, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66984949, "epoch": 3.75, "learning_rate": 8.231734721096246e-05, "loss": 1.10109053, "memory(GiB)": 85.12, "step": 4785, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65589933, "epoch": 3.75, "learning_rate": 8.227612852070665e-05, "loss": 1.13999405, "memory(GiB)": 85.12, "step": 4790, "train_speed(iter/s)": 0.035254 }, { "acc": 0.65890169, "epoch": 3.76, "learning_rate": 8.223487219180573e-05, "loss": 1.12389488, "memory(GiB)": 85.12, "step": 4795, "train_speed(iter/s)": 0.035253 }, { "acc": 0.68056979, "epoch": 3.76, "learning_rate": 8.219357827237056e-05, "loss": 1.06554289, "memory(GiB)": 85.12, "step": 4800, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67042527, "epoch": 3.77, "learning_rate": 8.215224681055585e-05, "loss": 1.11027889, "memory(GiB)": 85.12, "step": 4805, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67441998, "epoch": 3.77, "learning_rate": 8.211087785456005e-05, "loss": 1.06829157, "memory(GiB)": 85.12, "step": 4810, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67253809, "epoch": 3.77, "learning_rate": 8.206947145262541e-05, "loss": 1.08531771, "memory(GiB)": 85.12, "step": 4815, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66685085, "epoch": 3.78, "learning_rate": 8.202802765303776e-05, "loss": 1.10216599, "memory(GiB)": 85.12, "step": 4820, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66690621, "epoch": 3.78, "learning_rate": 8.198654650412659e-05, "loss": 1.10417929, "memory(GiB)": 85.12, "step": 4825, "train_speed(iter/s)": 0.035257 }, { "acc": 0.67201991, "epoch": 3.79, "learning_rate": 8.194502805426494e-05, "loss": 1.0637537, "memory(GiB)": 85.12, "step": 4830, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66858087, "epoch": 3.79, "learning_rate": 8.190347235186932e-05, "loss": 1.07713585, "memory(GiB)": 85.12, "step": 4835, "train_speed(iter/s)": 0.035257 }, { "acc": 0.69056597, "epoch": 3.79, "learning_rate": 8.186187944539973e-05, "loss": 1.01855545, "memory(GiB)": 85.12, "step": 4840, "train_speed(iter/s)": 0.035258 }, { "acc": 0.67178264, "epoch": 3.8, "learning_rate": 8.18202493833595e-05, "loss": 1.08217745, "memory(GiB)": 85.12, "step": 4845, "train_speed(iter/s)": 0.035257 }, { "acc": 0.67628303, "epoch": 3.8, "learning_rate": 8.177858221429536e-05, "loss": 1.05854607, "memory(GiB)": 85.12, "step": 4850, "train_speed(iter/s)": 0.035258 }, { "acc": 0.67847629, "epoch": 3.8, "learning_rate": 8.173687798679723e-05, "loss": 1.06340704, "memory(GiB)": 85.12, "step": 4855, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65820971, "epoch": 3.81, "learning_rate": 8.169513674949829e-05, "loss": 1.13936968, "memory(GiB)": 85.12, "step": 4860, "train_speed(iter/s)": 0.035256 }, { "acc": 0.68040419, "epoch": 3.81, "learning_rate": 8.16533585510749e-05, "loss": 1.04977999, "memory(GiB)": 85.12, "step": 4865, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66686277, "epoch": 3.82, "learning_rate": 8.161154344024646e-05, "loss": 1.10527754, "memory(GiB)": 85.12, "step": 4870, "train_speed(iter/s)": 0.035256 }, { "acc": 0.6596796, "epoch": 3.82, "learning_rate": 8.156969146577548e-05, "loss": 1.11985264, "memory(GiB)": 85.12, "step": 4875, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66763144, "epoch": 3.82, "learning_rate": 8.152780267646743e-05, "loss": 1.08364487, "memory(GiB)": 85.12, "step": 4880, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67398977, "epoch": 3.83, "learning_rate": 8.148587712117068e-05, "loss": 1.07214155, "memory(GiB)": 85.12, "step": 4885, "train_speed(iter/s)": 0.035256 }, { "acc": 0.68034143, "epoch": 3.83, "learning_rate": 8.144391484877655e-05, "loss": 1.0558445, "memory(GiB)": 85.12, "step": 4890, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67083588, "epoch": 3.84, "learning_rate": 8.14019159082191e-05, "loss": 1.0750145, "memory(GiB)": 85.12, "step": 4895, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66514192, "epoch": 3.84, "learning_rate": 8.135988034847521e-05, "loss": 1.10588713, "memory(GiB)": 85.12, "step": 4900, "train_speed(iter/s)": 0.035256 }, { "acc": 0.67397079, "epoch": 3.84, "learning_rate": 8.13178082185644e-05, "loss": 1.05777845, "memory(GiB)": 85.12, "step": 4905, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66549215, "epoch": 3.85, "learning_rate": 8.12756995675489e-05, "loss": 1.09327412, "memory(GiB)": 85.12, "step": 4910, "train_speed(iter/s)": 0.035256 }, { "acc": 0.6661788, "epoch": 3.85, "learning_rate": 8.12335544445335e-05, "loss": 1.09228296, "memory(GiB)": 85.12, "step": 4915, "train_speed(iter/s)": 0.035256 }, { "acc": 0.68010106, "epoch": 3.86, "learning_rate": 8.119137289866551e-05, "loss": 1.06913862, "memory(GiB)": 85.12, "step": 4920, "train_speed(iter/s)": 0.035257 }, { "acc": 0.66819158, "epoch": 3.86, "learning_rate": 8.114915497913473e-05, "loss": 1.09536047, "memory(GiB)": 85.12, "step": 4925, "train_speed(iter/s)": 0.035258 }, { "acc": 0.67470613, "epoch": 3.86, "learning_rate": 8.11069007351734e-05, "loss": 1.06130228, "memory(GiB)": 85.12, "step": 4930, "train_speed(iter/s)": 0.035258 }, { "acc": 0.67194877, "epoch": 3.87, "learning_rate": 8.106461021605607e-05, "loss": 1.06740732, "memory(GiB)": 85.12, "step": 4935, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67518115, "epoch": 3.87, "learning_rate": 8.102228347109962e-05, "loss": 1.09367285, "memory(GiB)": 85.12, "step": 4940, "train_speed(iter/s)": 0.035258 }, { "acc": 0.68004875, "epoch": 3.88, "learning_rate": 8.097992054966317e-05, "loss": 1.05609674, "memory(GiB)": 85.12, "step": 4945, "train_speed(iter/s)": 0.035257 }, { "acc": 0.65617743, "epoch": 3.88, "learning_rate": 8.093752150114804e-05, "loss": 1.11393881, "memory(GiB)": 85.12, "step": 4950, "train_speed(iter/s)": 0.035258 }, { "acc": 0.66117711, "epoch": 3.88, "learning_rate": 8.089508637499765e-05, "loss": 1.12456036, "memory(GiB)": 85.12, "step": 4955, "train_speed(iter/s)": 0.035258 }, { "acc": 0.66858802, "epoch": 3.89, "learning_rate": 8.085261522069752e-05, "loss": 1.1040514, "memory(GiB)": 85.12, "step": 4960, "train_speed(iter/s)": 0.035258 }, { "acc": 0.68399754, "epoch": 3.89, "learning_rate": 8.081010808777517e-05, "loss": 1.01230736, "memory(GiB)": 85.12, "step": 4965, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67364149, "epoch": 3.89, "learning_rate": 8.07675650258001e-05, "loss": 1.05729342, "memory(GiB)": 85.12, "step": 4970, "train_speed(iter/s)": 0.035259 }, { "acc": 0.66905775, "epoch": 3.9, "learning_rate": 8.072498608438363e-05, "loss": 1.10162392, "memory(GiB)": 85.12, "step": 4975, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67838883, "epoch": 3.9, "learning_rate": 8.068237131317904e-05, "loss": 1.03004761, "memory(GiB)": 85.12, "step": 4980, "train_speed(iter/s)": 0.035259 }, { "acc": 0.6654243, "epoch": 3.91, "learning_rate": 8.063972076188132e-05, "loss": 1.09254379, "memory(GiB)": 85.12, "step": 4985, "train_speed(iter/s)": 0.03526 }, { "acc": 0.6707293, "epoch": 3.91, "learning_rate": 8.059703448022715e-05, "loss": 1.06519508, "memory(GiB)": 85.12, "step": 4990, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67212982, "epoch": 3.91, "learning_rate": 8.055431251799499e-05, "loss": 1.09433947, "memory(GiB)": 85.12, "step": 4995, "train_speed(iter/s)": 0.03526 }, { "acc": 0.65150967, "epoch": 3.92, "learning_rate": 8.051155492500478e-05, "loss": 1.13897228, "memory(GiB)": 85.12, "step": 5000, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67477775, "epoch": 3.92, "learning_rate": 8.046876175111813e-05, "loss": 1.07944336, "memory(GiB)": 85.12, "step": 5005, "train_speed(iter/s)": 0.03526 }, { "acc": 0.68616705, "epoch": 3.93, "learning_rate": 8.042593304623803e-05, "loss": 1.05852995, "memory(GiB)": 85.12, "step": 5010, "train_speed(iter/s)": 0.035261 }, { "acc": 0.68202815, "epoch": 3.93, "learning_rate": 8.0383068860309e-05, "loss": 1.05745516, "memory(GiB)": 85.12, "step": 5015, "train_speed(iter/s)": 0.035261 }, { "acc": 0.65037746, "epoch": 3.93, "learning_rate": 8.034016924331686e-05, "loss": 1.1531786, "memory(GiB)": 85.12, "step": 5020, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67385054, "epoch": 3.94, "learning_rate": 8.029723424528884e-05, "loss": 1.07016153, "memory(GiB)": 85.12, "step": 5025, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67603893, "epoch": 3.94, "learning_rate": 8.025426391629329e-05, "loss": 1.06786518, "memory(GiB)": 85.12, "step": 5030, "train_speed(iter/s)": 0.035262 }, { "acc": 0.66948352, "epoch": 3.95, "learning_rate": 8.021125830643991e-05, "loss": 1.09177036, "memory(GiB)": 85.12, "step": 5035, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67932153, "epoch": 3.95, "learning_rate": 8.016821746587947e-05, "loss": 1.08133307, "memory(GiB)": 85.12, "step": 5040, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66458163, "epoch": 3.95, "learning_rate": 8.01251414448038e-05, "loss": 1.0914546, "memory(GiB)": 85.12, "step": 5045, "train_speed(iter/s)": 0.035264 }, { "acc": 0.65853357, "epoch": 3.96, "learning_rate": 8.00820302934458e-05, "loss": 1.12704811, "memory(GiB)": 85.12, "step": 5050, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66499863, "epoch": 3.96, "learning_rate": 8.003888406207932e-05, "loss": 1.1266118, "memory(GiB)": 85.12, "step": 5055, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66503258, "epoch": 3.97, "learning_rate": 7.999570280101912e-05, "loss": 1.10257483, "memory(GiB)": 85.12, "step": 5060, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67188749, "epoch": 3.97, "learning_rate": 7.995248656062081e-05, "loss": 1.0711792, "memory(GiB)": 85.12, "step": 5065, "train_speed(iter/s)": 0.035262 }, { "acc": 0.66948404, "epoch": 3.97, "learning_rate": 7.99092353912808e-05, "loss": 1.09926367, "memory(GiB)": 85.12, "step": 5070, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67658277, "epoch": 3.98, "learning_rate": 7.986594934343621e-05, "loss": 1.06956873, "memory(GiB)": 85.12, "step": 5075, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67322578, "epoch": 3.98, "learning_rate": 7.982262846756488e-05, "loss": 1.07685366, "memory(GiB)": 85.12, "step": 5080, "train_speed(iter/s)": 0.035264 }, { "acc": 0.66474819, "epoch": 3.99, "learning_rate": 7.977927281418518e-05, "loss": 1.12432756, "memory(GiB)": 85.12, "step": 5085, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67171674, "epoch": 3.99, "learning_rate": 7.973588243385612e-05, "loss": 1.09030991, "memory(GiB)": 85.12, "step": 5090, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68104601, "epoch": 3.99, "learning_rate": 7.969245737717718e-05, "loss": 1.03982849, "memory(GiB)": 85.12, "step": 5095, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67410316, "epoch": 4.0, "learning_rate": 7.964899769478827e-05, "loss": 1.07849312, "memory(GiB)": 85.12, "step": 5100, "train_speed(iter/s)": 0.035265 }, { "epoch": 4.0, "eval_acc": 0.6908339594290007, "eval_loss": 0.9954748749732971, "eval_runtime": 84.9758, "eval_samples_per_second": 1.094, "eval_steps_per_second": 1.094, "step": 5104 }, { "acc": 0.68671699, "epoch": 4.0, "learning_rate": 7.960550343736965e-05, "loss": 1.04726982, "memory(GiB)": 85.12, "step": 5105, "train_speed(iter/s)": 0.035245 }, { "acc": 0.68219004, "epoch": 4.0, "learning_rate": 7.956197465564197e-05, "loss": 1.02617922, "memory(GiB)": 85.12, "step": 5110, "train_speed(iter/s)": 0.035244 }, { "acc": 0.68375196, "epoch": 4.01, "learning_rate": 7.951841140036606e-05, "loss": 1.01291771, "memory(GiB)": 85.12, "step": 5115, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66873488, "epoch": 4.01, "learning_rate": 7.947481372234303e-05, "loss": 1.0916275, "memory(GiB)": 85.12, "step": 5120, "train_speed(iter/s)": 0.035242 }, { "acc": 0.67837057, "epoch": 4.02, "learning_rate": 7.943118167241406e-05, "loss": 1.07693701, "memory(GiB)": 85.12, "step": 5125, "train_speed(iter/s)": 0.035243 }, { "acc": 0.66617122, "epoch": 4.02, "learning_rate": 7.938751530146047e-05, "loss": 1.08172598, "memory(GiB)": 85.12, "step": 5130, "train_speed(iter/s)": 0.035242 }, { "acc": 0.68194261, "epoch": 4.02, "learning_rate": 7.934381466040356e-05, "loss": 1.04084024, "memory(GiB)": 85.12, "step": 5135, "train_speed(iter/s)": 0.035243 }, { "acc": 0.68469057, "epoch": 4.03, "learning_rate": 7.930007980020464e-05, "loss": 1.02090225, "memory(GiB)": 85.12, "step": 5140, "train_speed(iter/s)": 0.035243 }, { "acc": 0.68128481, "epoch": 4.03, "learning_rate": 7.925631077186487e-05, "loss": 1.04276752, "memory(GiB)": 85.12, "step": 5145, "train_speed(iter/s)": 0.035244 }, { "acc": 0.67515278, "epoch": 4.04, "learning_rate": 7.92125076264253e-05, "loss": 1.05011406, "memory(GiB)": 85.12, "step": 5150, "train_speed(iter/s)": 0.035245 }, { "acc": 0.68157773, "epoch": 4.04, "learning_rate": 7.916867041496674e-05, "loss": 1.08598862, "memory(GiB)": 85.12, "step": 5155, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67785926, "epoch": 4.04, "learning_rate": 7.912479918860974e-05, "loss": 1.05256701, "memory(GiB)": 85.12, "step": 5160, "train_speed(iter/s)": 0.035246 }, { "acc": 0.68705544, "epoch": 4.05, "learning_rate": 7.908089399851448e-05, "loss": 1.04519091, "memory(GiB)": 85.12, "step": 5165, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67090273, "epoch": 4.05, "learning_rate": 7.903695489588083e-05, "loss": 1.08642006, "memory(GiB)": 85.12, "step": 5170, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67382889, "epoch": 4.06, "learning_rate": 7.899298193194811e-05, "loss": 1.07724438, "memory(GiB)": 85.12, "step": 5175, "train_speed(iter/s)": 0.035245 }, { "acc": 0.69452157, "epoch": 4.06, "learning_rate": 7.894897515799518e-05, "loss": 0.98748884, "memory(GiB)": 85.12, "step": 5180, "train_speed(iter/s)": 0.035246 }, { "acc": 0.66965647, "epoch": 4.06, "learning_rate": 7.890493462534034e-05, "loss": 1.06002617, "memory(GiB)": 85.12, "step": 5185, "train_speed(iter/s)": 0.035243 }, { "acc": 0.6691617, "epoch": 4.07, "learning_rate": 7.886086038534122e-05, "loss": 1.05882254, "memory(GiB)": 85.12, "step": 5190, "train_speed(iter/s)": 0.035244 }, { "acc": 0.67491331, "epoch": 4.07, "learning_rate": 7.881675248939476e-05, "loss": 1.06821861, "memory(GiB)": 85.12, "step": 5195, "train_speed(iter/s)": 0.035245 }, { "acc": 0.68093872, "epoch": 4.08, "learning_rate": 7.877261098893719e-05, "loss": 1.04844933, "memory(GiB)": 85.12, "step": 5200, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67266617, "epoch": 4.08, "learning_rate": 7.872843593544388e-05, "loss": 1.07000122, "memory(GiB)": 85.12, "step": 5205, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67317371, "epoch": 4.08, "learning_rate": 7.868422738042935e-05, "loss": 1.09329424, "memory(GiB)": 85.12, "step": 5210, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66858578, "epoch": 4.09, "learning_rate": 7.863998537544719e-05, "loss": 1.08054285, "memory(GiB)": 85.12, "step": 5215, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66879354, "epoch": 4.09, "learning_rate": 7.859570997208998e-05, "loss": 1.08357344, "memory(GiB)": 85.12, "step": 5220, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66112986, "epoch": 4.09, "learning_rate": 7.855140122198927e-05, "loss": 1.1116806, "memory(GiB)": 85.12, "step": 5225, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67013917, "epoch": 4.1, "learning_rate": 7.850705917681549e-05, "loss": 1.0776103, "memory(GiB)": 85.12, "step": 5230, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67974381, "epoch": 4.1, "learning_rate": 7.846268388827789e-05, "loss": 1.06719589, "memory(GiB)": 85.12, "step": 5235, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6868772, "epoch": 4.11, "learning_rate": 7.841827540812447e-05, "loss": 1.02366819, "memory(GiB)": 85.12, "step": 5240, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67168031, "epoch": 4.11, "learning_rate": 7.837383378814197e-05, "loss": 1.08055744, "memory(GiB)": 85.12, "step": 5245, "train_speed(iter/s)": 0.035243 }, { "acc": 0.67559457, "epoch": 4.11, "learning_rate": 7.832935908015578e-05, "loss": 1.04622164, "memory(GiB)": 85.12, "step": 5250, "train_speed(iter/s)": 0.035244 }, { "acc": 0.66986046, "epoch": 4.12, "learning_rate": 7.828485133602981e-05, "loss": 1.088204, "memory(GiB)": 85.12, "step": 5255, "train_speed(iter/s)": 0.035244 }, { "acc": 0.68791199, "epoch": 4.12, "learning_rate": 7.824031060766662e-05, "loss": 1.02517748, "memory(GiB)": 85.12, "step": 5260, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67395439, "epoch": 4.13, "learning_rate": 7.819573694700707e-05, "loss": 1.04129019, "memory(GiB)": 85.12, "step": 5265, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67076187, "epoch": 4.13, "learning_rate": 7.815113040603057e-05, "loss": 1.09466763, "memory(GiB)": 85.12, "step": 5270, "train_speed(iter/s)": 0.035246 }, { "acc": 0.68308926, "epoch": 4.13, "learning_rate": 7.810649103675478e-05, "loss": 1.01522207, "memory(GiB)": 85.12, "step": 5275, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66764603, "epoch": 4.14, "learning_rate": 7.80618188912357e-05, "loss": 1.10394659, "memory(GiB)": 85.12, "step": 5280, "train_speed(iter/s)": 0.035247 }, { "acc": 0.6767786, "epoch": 4.14, "learning_rate": 7.801711402156752e-05, "loss": 1.06015568, "memory(GiB)": 85.12, "step": 5285, "train_speed(iter/s)": 0.035248 }, { "acc": 0.69466491, "epoch": 4.15, "learning_rate": 7.797237647988259e-05, "loss": 1.00177612, "memory(GiB)": 85.12, "step": 5290, "train_speed(iter/s)": 0.035248 }, { "acc": 0.6722373, "epoch": 4.15, "learning_rate": 7.792760631835138e-05, "loss": 1.09138288, "memory(GiB)": 85.12, "step": 5295, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68212829, "epoch": 4.15, "learning_rate": 7.788280358918239e-05, "loss": 1.0298542, "memory(GiB)": 85.12, "step": 5300, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68358073, "epoch": 4.16, "learning_rate": 7.783796834462208e-05, "loss": 1.04165964, "memory(GiB)": 85.12, "step": 5305, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67844944, "epoch": 4.16, "learning_rate": 7.779310063695486e-05, "loss": 1.06625547, "memory(GiB)": 85.12, "step": 5310, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67359776, "epoch": 4.17, "learning_rate": 7.7748200518503e-05, "loss": 1.08562546, "memory(GiB)": 85.12, "step": 5315, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68680096, "epoch": 4.17, "learning_rate": 7.77032680416265e-05, "loss": 1.01254492, "memory(GiB)": 85.12, "step": 5320, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67717962, "epoch": 4.17, "learning_rate": 7.765830325872318e-05, "loss": 1.03918819, "memory(GiB)": 85.12, "step": 5325, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68738456, "epoch": 4.18, "learning_rate": 7.761330622222849e-05, "loss": 1.01561775, "memory(GiB)": 85.12, "step": 5330, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66343398, "epoch": 4.18, "learning_rate": 7.75682769846155e-05, "loss": 1.08996143, "memory(GiB)": 85.12, "step": 5335, "train_speed(iter/s)": 0.035246 }, { "acc": 0.68137712, "epoch": 4.18, "learning_rate": 7.75232155983948e-05, "loss": 1.06495123, "memory(GiB)": 85.12, "step": 5340, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67577925, "epoch": 4.19, "learning_rate": 7.747812211611454e-05, "loss": 1.0828968, "memory(GiB)": 85.12, "step": 5345, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6752552, "epoch": 4.19, "learning_rate": 7.743299659036023e-05, "loss": 1.05694551, "memory(GiB)": 85.12, "step": 5350, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67123895, "epoch": 4.2, "learning_rate": 7.73878390737548e-05, "loss": 1.07847023, "memory(GiB)": 85.12, "step": 5355, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68291116, "epoch": 4.2, "learning_rate": 7.734264961895843e-05, "loss": 1.05738621, "memory(GiB)": 85.12, "step": 5360, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68122358, "epoch": 4.2, "learning_rate": 7.72974282786686e-05, "loss": 1.03916407, "memory(GiB)": 85.12, "step": 5365, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67760181, "epoch": 4.21, "learning_rate": 7.725217510561993e-05, "loss": 1.05562716, "memory(GiB)": 85.12, "step": 5370, "train_speed(iter/s)": 0.035247 }, { "acc": 0.6754374, "epoch": 4.21, "learning_rate": 7.72068901525842e-05, "loss": 1.04640961, "memory(GiB)": 85.12, "step": 5375, "train_speed(iter/s)": 0.035248 }, { "acc": 0.6791121, "epoch": 4.22, "learning_rate": 7.716157347237022e-05, "loss": 1.04016552, "memory(GiB)": 85.12, "step": 5380, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67782917, "epoch": 4.22, "learning_rate": 7.71162251178238e-05, "loss": 1.05207224, "memory(GiB)": 85.12, "step": 5385, "train_speed(iter/s)": 0.035248 }, { "acc": 0.66641583, "epoch": 4.22, "learning_rate": 7.707084514182772e-05, "loss": 1.09165554, "memory(GiB)": 85.12, "step": 5390, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66752553, "epoch": 4.23, "learning_rate": 7.702543359730158e-05, "loss": 1.08811426, "memory(GiB)": 85.12, "step": 5395, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67671351, "epoch": 4.23, "learning_rate": 7.697999053720185e-05, "loss": 1.03720827, "memory(GiB)": 85.12, "step": 5400, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67424908, "epoch": 4.24, "learning_rate": 7.693451601452173e-05, "loss": 1.05379906, "memory(GiB)": 85.12, "step": 5405, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67244482, "epoch": 4.24, "learning_rate": 7.688901008229107e-05, "loss": 1.09903154, "memory(GiB)": 85.12, "step": 5410, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67872109, "epoch": 4.24, "learning_rate": 7.684347279357642e-05, "loss": 1.06607389, "memory(GiB)": 85.12, "step": 5415, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67639685, "epoch": 4.25, "learning_rate": 7.679790420148084e-05, "loss": 1.06388321, "memory(GiB)": 85.12, "step": 5420, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66824012, "epoch": 4.25, "learning_rate": 7.675230435914394e-05, "loss": 1.08031435, "memory(GiB)": 85.12, "step": 5425, "train_speed(iter/s)": 0.035244 }, { "acc": 0.67619176, "epoch": 4.26, "learning_rate": 7.670667331974171e-05, "loss": 1.05007677, "memory(GiB)": 85.12, "step": 5430, "train_speed(iter/s)": 0.035245 }, { "acc": 0.66911144, "epoch": 4.26, "learning_rate": 7.666101113648658e-05, "loss": 1.06066961, "memory(GiB)": 85.12, "step": 5435, "train_speed(iter/s)": 0.035246 }, { "acc": 0.69307575, "epoch": 4.26, "learning_rate": 7.661531786262728e-05, "loss": 1.00854187, "memory(GiB)": 85.12, "step": 5440, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67891207, "epoch": 4.27, "learning_rate": 7.656959355144879e-05, "loss": 1.05966101, "memory(GiB)": 85.12, "step": 5445, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67878551, "epoch": 4.27, "learning_rate": 7.652383825627226e-05, "loss": 1.0579504, "memory(GiB)": 85.12, "step": 5450, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66832333, "epoch": 4.28, "learning_rate": 7.647805203045504e-05, "loss": 1.08938274, "memory(GiB)": 85.12, "step": 5455, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67352867, "epoch": 4.28, "learning_rate": 7.643223492739048e-05, "loss": 1.07296839, "memory(GiB)": 85.12, "step": 5460, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68797994, "epoch": 4.28, "learning_rate": 7.638638700050796e-05, "loss": 1.03214712, "memory(GiB)": 85.12, "step": 5465, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67085338, "epoch": 4.29, "learning_rate": 7.634050830327282e-05, "loss": 1.07635412, "memory(GiB)": 85.12, "step": 5470, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67759714, "epoch": 4.29, "learning_rate": 7.629459888918627e-05, "loss": 1.06018639, "memory(GiB)": 85.12, "step": 5475, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67132416, "epoch": 4.29, "learning_rate": 7.624865881178535e-05, "loss": 1.08437328, "memory(GiB)": 85.12, "step": 5480, "train_speed(iter/s)": 0.035247 }, { "acc": 0.6840992, "epoch": 4.3, "learning_rate": 7.620268812464284e-05, "loss": 1.03912249, "memory(GiB)": 85.12, "step": 5485, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68261967, "epoch": 4.3, "learning_rate": 7.615668688136724e-05, "loss": 1.07205544, "memory(GiB)": 85.12, "step": 5490, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68234777, "epoch": 4.31, "learning_rate": 7.611065513560264e-05, "loss": 1.03525667, "memory(GiB)": 85.12, "step": 5495, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6711832, "epoch": 4.31, "learning_rate": 7.606459294102876e-05, "loss": 1.08924789, "memory(GiB)": 85.12, "step": 5500, "train_speed(iter/s)": 0.035244 }, { "acc": 0.68011131, "epoch": 4.31, "learning_rate": 7.601850035136078e-05, "loss": 1.03413877, "memory(GiB)": 85.12, "step": 5505, "train_speed(iter/s)": 0.035244 }, { "acc": 0.67553554, "epoch": 4.32, "learning_rate": 7.597237742034938e-05, "loss": 1.05302973, "memory(GiB)": 85.12, "step": 5510, "train_speed(iter/s)": 0.035243 }, { "acc": 0.68478012, "epoch": 4.32, "learning_rate": 7.59262242017805e-05, "loss": 1.01622705, "memory(GiB)": 85.12, "step": 5515, "train_speed(iter/s)": 0.035244 }, { "acc": 0.67683172, "epoch": 4.33, "learning_rate": 7.588004074947556e-05, "loss": 1.07921562, "memory(GiB)": 85.12, "step": 5520, "train_speed(iter/s)": 0.035244 }, { "acc": 0.67525272, "epoch": 4.33, "learning_rate": 7.583382711729114e-05, "loss": 1.06174011, "memory(GiB)": 85.12, "step": 5525, "train_speed(iter/s)": 0.035245 }, { "acc": 0.69840279, "epoch": 4.33, "learning_rate": 7.578758335911901e-05, "loss": 0.98411026, "memory(GiB)": 85.12, "step": 5530, "train_speed(iter/s)": 0.035245 }, { "acc": 0.68036718, "epoch": 4.34, "learning_rate": 7.574130952888614e-05, "loss": 1.07546577, "memory(GiB)": 85.12, "step": 5535, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67380986, "epoch": 4.34, "learning_rate": 7.569500568055448e-05, "loss": 1.06016655, "memory(GiB)": 85.12, "step": 5540, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67196646, "epoch": 4.35, "learning_rate": 7.564867186812105e-05, "loss": 1.07078009, "memory(GiB)": 85.12, "step": 5545, "train_speed(iter/s)": 0.035245 }, { "acc": 0.68534956, "epoch": 4.35, "learning_rate": 7.560230814561781e-05, "loss": 1.0424099, "memory(GiB)": 85.12, "step": 5550, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67195516, "epoch": 4.35, "learning_rate": 7.555591456711157e-05, "loss": 1.06724882, "memory(GiB)": 85.12, "step": 5555, "train_speed(iter/s)": 0.035246 }, { "acc": 0.68624582, "epoch": 4.36, "learning_rate": 7.550949118670395e-05, "loss": 1.03637371, "memory(GiB)": 85.12, "step": 5560, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67780466, "epoch": 4.36, "learning_rate": 7.546303805853136e-05, "loss": 1.0577466, "memory(GiB)": 85.12, "step": 5565, "train_speed(iter/s)": 0.035246 }, { "acc": 0.66886325, "epoch": 4.37, "learning_rate": 7.541655523676489e-05, "loss": 1.08753948, "memory(GiB)": 85.12, "step": 5570, "train_speed(iter/s)": 0.035246 }, { "acc": 0.68775806, "epoch": 4.37, "learning_rate": 7.537004277561022e-05, "loss": 1.02740803, "memory(GiB)": 85.12, "step": 5575, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67876277, "epoch": 4.37, "learning_rate": 7.532350072930764e-05, "loss": 1.02870722, "memory(GiB)": 85.12, "step": 5580, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67490358, "epoch": 4.38, "learning_rate": 7.527692915213193e-05, "loss": 1.07739782, "memory(GiB)": 85.12, "step": 5585, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67852893, "epoch": 4.38, "learning_rate": 7.52303280983923e-05, "loss": 1.04966307, "memory(GiB)": 85.12, "step": 5590, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68157201, "epoch": 4.38, "learning_rate": 7.518369762243232e-05, "loss": 1.0376194, "memory(GiB)": 85.12, "step": 5595, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67769365, "epoch": 4.39, "learning_rate": 7.51370377786299e-05, "loss": 1.05899029, "memory(GiB)": 85.12, "step": 5600, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68378305, "epoch": 4.39, "learning_rate": 7.509034862139717e-05, "loss": 1.06501207, "memory(GiB)": 85.12, "step": 5605, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68093605, "epoch": 4.4, "learning_rate": 7.504363020518046e-05, "loss": 1.05337543, "memory(GiB)": 85.12, "step": 5610, "train_speed(iter/s)": 0.035247 }, { "acc": 0.6819272, "epoch": 4.4, "learning_rate": 7.499688258446024e-05, "loss": 1.03589249, "memory(GiB)": 85.12, "step": 5615, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66478381, "epoch": 4.4, "learning_rate": 7.495010581375097e-05, "loss": 1.11290102, "memory(GiB)": 85.12, "step": 5620, "train_speed(iter/s)": 0.035246 }, { "acc": 0.69163618, "epoch": 4.41, "learning_rate": 7.490329994760118e-05, "loss": 0.99750299, "memory(GiB)": 85.12, "step": 5625, "train_speed(iter/s)": 0.035245 }, { "acc": 0.68824978, "epoch": 4.41, "learning_rate": 7.485646504059328e-05, "loss": 1.0366888, "memory(GiB)": 85.12, "step": 5630, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6706286, "epoch": 4.42, "learning_rate": 7.480960114734357e-05, "loss": 1.09517231, "memory(GiB)": 85.12, "step": 5635, "train_speed(iter/s)": 0.035246 }, { "acc": 0.66850762, "epoch": 4.42, "learning_rate": 7.476270832250213e-05, "loss": 1.09369583, "memory(GiB)": 85.12, "step": 5640, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68807869, "epoch": 4.42, "learning_rate": 7.471578662075281e-05, "loss": 1.04609451, "memory(GiB)": 85.12, "step": 5645, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67306752, "epoch": 4.43, "learning_rate": 7.46688360968131e-05, "loss": 1.05134382, "memory(GiB)": 85.12, "step": 5650, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68500929, "epoch": 4.43, "learning_rate": 7.462185680543413e-05, "loss": 1.03070574, "memory(GiB)": 85.12, "step": 5655, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67167764, "epoch": 4.44, "learning_rate": 7.457484880140057e-05, "loss": 1.08116226, "memory(GiB)": 85.12, "step": 5660, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67725725, "epoch": 4.44, "learning_rate": 7.452781213953054e-05, "loss": 1.05323343, "memory(GiB)": 85.12, "step": 5665, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68552766, "epoch": 4.44, "learning_rate": 7.448074687467564e-05, "loss": 1.03019152, "memory(GiB)": 85.12, "step": 5670, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67342439, "epoch": 4.45, "learning_rate": 7.443365306172076e-05, "loss": 1.07240591, "memory(GiB)": 85.12, "step": 5675, "train_speed(iter/s)": 0.035246 }, { "acc": 0.676577, "epoch": 4.45, "learning_rate": 7.438653075558412e-05, "loss": 1.0539855, "memory(GiB)": 85.12, "step": 5680, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67203903, "epoch": 4.46, "learning_rate": 7.433938001121719e-05, "loss": 1.06724186, "memory(GiB)": 85.12, "step": 5685, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67773714, "epoch": 4.46, "learning_rate": 7.429220088360456e-05, "loss": 1.08628159, "memory(GiB)": 85.12, "step": 5690, "train_speed(iter/s)": 0.035247 }, { "acc": 0.6735105, "epoch": 4.46, "learning_rate": 7.424499342776392e-05, "loss": 1.08572884, "memory(GiB)": 85.12, "step": 5695, "train_speed(iter/s)": 0.035246 }, { "acc": 0.68572245, "epoch": 4.47, "learning_rate": 7.419775769874601e-05, "loss": 1.02478113, "memory(GiB)": 85.12, "step": 5700, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67862587, "epoch": 4.47, "learning_rate": 7.415049375163455e-05, "loss": 1.04869251, "memory(GiB)": 85.12, "step": 5705, "train_speed(iter/s)": 0.035245 }, { "acc": 0.67932053, "epoch": 4.47, "learning_rate": 7.410320164154614e-05, "loss": 1.04899778, "memory(GiB)": 85.12, "step": 5710, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6694593, "epoch": 4.48, "learning_rate": 7.405588142363026e-05, "loss": 1.09497614, "memory(GiB)": 85.12, "step": 5715, "train_speed(iter/s)": 0.035245 }, { "acc": 0.69002652, "epoch": 4.48, "learning_rate": 7.40085331530691e-05, "loss": 1.02802773, "memory(GiB)": 85.12, "step": 5720, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67512789, "epoch": 4.49, "learning_rate": 7.396115688507766e-05, "loss": 1.0342535, "memory(GiB)": 85.12, "step": 5725, "train_speed(iter/s)": 0.035245 }, { "acc": 0.68088655, "epoch": 4.49, "learning_rate": 7.39137526749035e-05, "loss": 1.05951233, "memory(GiB)": 85.12, "step": 5730, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6625989, "epoch": 4.49, "learning_rate": 7.386632057782683e-05, "loss": 1.0969574, "memory(GiB)": 85.12, "step": 5735, "train_speed(iter/s)": 0.035246 }, { "acc": 0.67329836, "epoch": 4.5, "learning_rate": 7.381886064916031e-05, "loss": 1.09573812, "memory(GiB)": 85.12, "step": 5740, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66656322, "epoch": 4.5, "learning_rate": 7.377137294424914e-05, "loss": 1.07542658, "memory(GiB)": 85.12, "step": 5745, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68572235, "epoch": 4.51, "learning_rate": 7.372385751847084e-05, "loss": 1.01502714, "memory(GiB)": 85.12, "step": 5750, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67641573, "epoch": 4.51, "learning_rate": 7.367631442723531e-05, "loss": 1.08796015, "memory(GiB)": 85.12, "step": 5755, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66859589, "epoch": 4.51, "learning_rate": 7.362874372598465e-05, "loss": 1.09382992, "memory(GiB)": 85.12, "step": 5760, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67875175, "epoch": 4.52, "learning_rate": 7.358114547019325e-05, "loss": 1.03851318, "memory(GiB)": 85.12, "step": 5765, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66146779, "epoch": 4.52, "learning_rate": 7.353351971536753e-05, "loss": 1.13547573, "memory(GiB)": 85.12, "step": 5770, "train_speed(iter/s)": 0.035249 }, { "acc": 0.66437259, "epoch": 4.53, "learning_rate": 7.348586651704603e-05, "loss": 1.09721699, "memory(GiB)": 85.12, "step": 5775, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68509789, "epoch": 4.53, "learning_rate": 7.343818593079929e-05, "loss": 1.04163866, "memory(GiB)": 85.12, "step": 5780, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6822547, "epoch": 4.53, "learning_rate": 7.339047801222982e-05, "loss": 1.0457943, "memory(GiB)": 85.12, "step": 5785, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67156363, "epoch": 4.54, "learning_rate": 7.334274281697193e-05, "loss": 1.05314388, "memory(GiB)": 85.12, "step": 5790, "train_speed(iter/s)": 0.035248 }, { "acc": 0.68432736, "epoch": 4.54, "learning_rate": 7.329498040069179e-05, "loss": 1.03443298, "memory(GiB)": 85.12, "step": 5795, "train_speed(iter/s)": 0.035248 }, { "acc": 0.69290547, "epoch": 4.55, "learning_rate": 7.324719081908731e-05, "loss": 0.98860283, "memory(GiB)": 85.12, "step": 5800, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67324476, "epoch": 4.55, "learning_rate": 7.319937412788804e-05, "loss": 1.0707695, "memory(GiB)": 85.12, "step": 5805, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67157531, "epoch": 4.55, "learning_rate": 7.315153038285522e-05, "loss": 1.08696251, "memory(GiB)": 85.12, "step": 5810, "train_speed(iter/s)": 0.03525 }, { "acc": 0.69197111, "epoch": 4.56, "learning_rate": 7.310365963978157e-05, "loss": 1.00116425, "memory(GiB)": 85.12, "step": 5815, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67103248, "epoch": 4.56, "learning_rate": 7.305576195449131e-05, "loss": 1.07649136, "memory(GiB)": 85.12, "step": 5820, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68711181, "epoch": 4.57, "learning_rate": 7.30078373828401e-05, "loss": 1.0179204, "memory(GiB)": 85.12, "step": 5825, "train_speed(iter/s)": 0.035249 }, { "acc": 0.6692019, "epoch": 4.57, "learning_rate": 7.29598859807149e-05, "loss": 1.09381266, "memory(GiB)": 85.12, "step": 5830, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67294335, "epoch": 4.57, "learning_rate": 7.291190780403406e-05, "loss": 1.07299709, "memory(GiB)": 85.12, "step": 5835, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67547169, "epoch": 4.58, "learning_rate": 7.286390290874703e-05, "loss": 1.057125, "memory(GiB)": 85.12, "step": 5840, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67364817, "epoch": 4.58, "learning_rate": 7.281587135083452e-05, "loss": 1.06245632, "memory(GiB)": 85.12, "step": 5845, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67187681, "epoch": 4.58, "learning_rate": 7.276781318630826e-05, "loss": 1.05665264, "memory(GiB)": 85.12, "step": 5850, "train_speed(iter/s)": 0.03525 }, { "acc": 0.66608486, "epoch": 4.59, "learning_rate": 7.271972847121108e-05, "loss": 1.06528816, "memory(GiB)": 85.12, "step": 5855, "train_speed(iter/s)": 0.035251 }, { "acc": 0.66735435, "epoch": 4.59, "learning_rate": 7.267161726161668e-05, "loss": 1.08798895, "memory(GiB)": 85.12, "step": 5860, "train_speed(iter/s)": 0.035251 }, { "acc": 0.68197312, "epoch": 4.6, "learning_rate": 7.262347961362972e-05, "loss": 1.05005894, "memory(GiB)": 85.12, "step": 5865, "train_speed(iter/s)": 0.035249 }, { "acc": 0.69323926, "epoch": 4.6, "learning_rate": 7.257531558338569e-05, "loss": 1.01712046, "memory(GiB)": 85.12, "step": 5870, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67442312, "epoch": 4.6, "learning_rate": 7.252712522705082e-05, "loss": 1.0723731, "memory(GiB)": 85.12, "step": 5875, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67650232, "epoch": 4.61, "learning_rate": 7.247890860082206e-05, "loss": 1.06735249, "memory(GiB)": 85.12, "step": 5880, "train_speed(iter/s)": 0.035249 }, { "acc": 0.6892065, "epoch": 4.61, "learning_rate": 7.243066576092696e-05, "loss": 1.02709103, "memory(GiB)": 85.12, "step": 5885, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67309418, "epoch": 4.62, "learning_rate": 7.238239676362372e-05, "loss": 1.07167187, "memory(GiB)": 85.12, "step": 5890, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67252998, "epoch": 4.62, "learning_rate": 7.233410166520093e-05, "loss": 1.0607296, "memory(GiB)": 85.12, "step": 5895, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68240318, "epoch": 4.62, "learning_rate": 7.228578052197771e-05, "loss": 1.04523249, "memory(GiB)": 85.12, "step": 5900, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68594875, "epoch": 4.63, "learning_rate": 7.223743339030352e-05, "loss": 1.04490318, "memory(GiB)": 85.12, "step": 5905, "train_speed(iter/s)": 0.03525 }, { "acc": 0.68417301, "epoch": 4.63, "learning_rate": 7.21890603265581e-05, "loss": 1.02140636, "memory(GiB)": 85.12, "step": 5910, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67098885, "epoch": 4.64, "learning_rate": 7.214066138715148e-05, "loss": 1.07261381, "memory(GiB)": 85.12, "step": 5915, "train_speed(iter/s)": 0.03525 }, { "acc": 0.68442526, "epoch": 4.64, "learning_rate": 7.209223662852382e-05, "loss": 1.02172146, "memory(GiB)": 85.12, "step": 5920, "train_speed(iter/s)": 0.035249 }, { "acc": 0.68503671, "epoch": 4.64, "learning_rate": 7.204378610714544e-05, "loss": 1.03891659, "memory(GiB)": 85.12, "step": 5925, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66351156, "epoch": 4.65, "learning_rate": 7.199530987951662e-05, "loss": 1.09004173, "memory(GiB)": 85.12, "step": 5930, "train_speed(iter/s)": 0.035246 }, { "acc": 0.6784656, "epoch": 4.65, "learning_rate": 7.194680800216773e-05, "loss": 1.04306164, "memory(GiB)": 85.12, "step": 5935, "train_speed(iter/s)": 0.035247 }, { "acc": 0.66837859, "epoch": 4.66, "learning_rate": 7.189828053165895e-05, "loss": 1.09347191, "memory(GiB)": 85.12, "step": 5940, "train_speed(iter/s)": 0.035247 }, { "acc": 0.68225694, "epoch": 4.66, "learning_rate": 7.184972752458034e-05, "loss": 1.01862364, "memory(GiB)": 85.12, "step": 5945, "train_speed(iter/s)": 0.035247 }, { "acc": 0.67672114, "epoch": 4.66, "learning_rate": 7.180114903755178e-05, "loss": 1.05343723, "memory(GiB)": 85.12, "step": 5950, "train_speed(iter/s)": 0.035248 }, { "acc": 0.67013016, "epoch": 4.67, "learning_rate": 7.175254512722281e-05, "loss": 1.09360905, "memory(GiB)": 85.12, "step": 5955, "train_speed(iter/s)": 0.035249 }, { "acc": 0.6726356, "epoch": 4.67, "learning_rate": 7.170391585027263e-05, "loss": 1.06504726, "memory(GiB)": 85.12, "step": 5960, "train_speed(iter/s)": 0.035249 }, { "acc": 0.67619967, "epoch": 4.67, "learning_rate": 7.165526126341004e-05, "loss": 1.07366638, "memory(GiB)": 85.12, "step": 5965, "train_speed(iter/s)": 0.03525 }, { "acc": 0.68048444, "epoch": 4.68, "learning_rate": 7.160658142337333e-05, "loss": 1.0334444, "memory(GiB)": 85.12, "step": 5970, "train_speed(iter/s)": 0.03525 }, { "acc": 0.67715273, "epoch": 4.68, "learning_rate": 7.155787638693026e-05, "loss": 1.05196743, "memory(GiB)": 85.12, "step": 5975, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67561464, "epoch": 4.69, "learning_rate": 7.150914621087797e-05, "loss": 1.04411011, "memory(GiB)": 85.12, "step": 5980, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67420325, "epoch": 4.69, "learning_rate": 7.146039095204288e-05, "loss": 1.05389662, "memory(GiB)": 85.12, "step": 5985, "train_speed(iter/s)": 0.035252 }, { "acc": 0.67851129, "epoch": 4.69, "learning_rate": 7.141161066728072e-05, "loss": 1.06385975, "memory(GiB)": 85.12, "step": 5990, "train_speed(iter/s)": 0.035251 }, { "acc": 0.67380395, "epoch": 4.7, "learning_rate": 7.136280541347638e-05, "loss": 1.0779253, "memory(GiB)": 85.12, "step": 5995, "train_speed(iter/s)": 0.035252 }, { "acc": 0.66957092, "epoch": 4.7, "learning_rate": 7.131397524754381e-05, "loss": 1.09954481, "memory(GiB)": 85.12, "step": 6000, "train_speed(iter/s)": 0.035253 }, { "acc": 0.67146378, "epoch": 4.71, "learning_rate": 7.126512022642613e-05, "loss": 1.06633472, "memory(GiB)": 85.12, "step": 6005, "train_speed(iter/s)": 0.035252 }, { "acc": 0.68832026, "epoch": 4.71, "learning_rate": 7.121624040709533e-05, "loss": 1.00683203, "memory(GiB)": 85.12, "step": 6010, "train_speed(iter/s)": 0.035253 }, { "acc": 0.66239319, "epoch": 4.71, "learning_rate": 7.116733584655237e-05, "loss": 1.10340586, "memory(GiB)": 85.12, "step": 6015, "train_speed(iter/s)": 0.035253 }, { "acc": 0.68091083, "epoch": 4.72, "learning_rate": 7.11184066018271e-05, "loss": 1.04083786, "memory(GiB)": 85.12, "step": 6020, "train_speed(iter/s)": 0.035254 }, { "acc": 0.68418207, "epoch": 4.72, "learning_rate": 7.106945272997807e-05, "loss": 1.03925686, "memory(GiB)": 85.12, "step": 6025, "train_speed(iter/s)": 0.035254 }, { "acc": 0.68394904, "epoch": 4.73, "learning_rate": 7.102047428809259e-05, "loss": 1.03902893, "memory(GiB)": 85.12, "step": 6030, "train_speed(iter/s)": 0.035255 }, { "acc": 0.6748229, "epoch": 4.73, "learning_rate": 7.097147133328666e-05, "loss": 1.05747595, "memory(GiB)": 85.12, "step": 6035, "train_speed(iter/s)": 0.035254 }, { "acc": 0.65920019, "epoch": 4.73, "learning_rate": 7.092244392270477e-05, "loss": 1.11649303, "memory(GiB)": 85.12, "step": 6040, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67655163, "epoch": 4.74, "learning_rate": 7.087339211352005e-05, "loss": 1.05948544, "memory(GiB)": 85.12, "step": 6045, "train_speed(iter/s)": 0.035255 }, { "acc": 0.68559122, "epoch": 4.74, "learning_rate": 7.0824315962934e-05, "loss": 1.01261806, "memory(GiB)": 85.12, "step": 6050, "train_speed(iter/s)": 0.035255 }, { "acc": 0.67522321, "epoch": 4.75, "learning_rate": 7.077521552817651e-05, "loss": 1.06603298, "memory(GiB)": 85.12, "step": 6055, "train_speed(iter/s)": 0.035254 }, { "acc": 0.68662744, "epoch": 4.75, "learning_rate": 7.072609086650582e-05, "loss": 1.03524733, "memory(GiB)": 85.12, "step": 6060, "train_speed(iter/s)": 0.035254 }, { "acc": 0.67083483, "epoch": 4.75, "learning_rate": 7.067694203520841e-05, "loss": 1.07945566, "memory(GiB)": 85.12, "step": 6065, "train_speed(iter/s)": 0.035255 }, { "acc": 0.68091211, "epoch": 4.76, "learning_rate": 7.062776909159893e-05, "loss": 1.07089872, "memory(GiB)": 85.12, "step": 6070, "train_speed(iter/s)": 0.035255 }, { "acc": 0.68336306, "epoch": 4.76, "learning_rate": 7.057857209302017e-05, "loss": 1.03715401, "memory(GiB)": 85.12, "step": 6075, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66016645, "epoch": 4.76, "learning_rate": 7.0529351096843e-05, "loss": 1.12012835, "memory(GiB)": 85.12, "step": 6080, "train_speed(iter/s)": 0.035255 }, { "acc": 0.66435328, "epoch": 4.77, "learning_rate": 7.048010616046614e-05, "loss": 1.10283918, "memory(GiB)": 85.12, "step": 6085, "train_speed(iter/s)": 0.035256 }, { "acc": 0.68103104, "epoch": 4.77, "learning_rate": 7.043083734131643e-05, "loss": 1.06891689, "memory(GiB)": 85.12, "step": 6090, "train_speed(iter/s)": 0.035256 }, { "acc": 0.66451955, "epoch": 4.78, "learning_rate": 7.038154469684838e-05, "loss": 1.10513678, "memory(GiB)": 85.12, "step": 6095, "train_speed(iter/s)": 0.035256 }, { "acc": 0.6662466, "epoch": 4.78, "learning_rate": 7.033222828454442e-05, "loss": 1.07707005, "memory(GiB)": 85.12, "step": 6100, "train_speed(iter/s)": 0.035257 }, { "acc": 0.6689785, "epoch": 4.78, "learning_rate": 7.028288816191457e-05, "loss": 1.07475443, "memory(GiB)": 85.12, "step": 6105, "train_speed(iter/s)": 0.035257 }, { "acc": 0.6744, "epoch": 4.79, "learning_rate": 7.023352438649662e-05, "loss": 1.07835417, "memory(GiB)": 85.12, "step": 6110, "train_speed(iter/s)": 0.035258 }, { "acc": 0.6828012, "epoch": 4.79, "learning_rate": 7.018413701585587e-05, "loss": 1.0427166, "memory(GiB)": 85.12, "step": 6115, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67784457, "epoch": 4.8, "learning_rate": 7.013472610758515e-05, "loss": 1.03452606, "memory(GiB)": 85.12, "step": 6120, "train_speed(iter/s)": 0.035258 }, { "acc": 0.68209443, "epoch": 4.8, "learning_rate": 7.008529171930476e-05, "loss": 1.05084362, "memory(GiB)": 85.12, "step": 6125, "train_speed(iter/s)": 0.035258 }, { "acc": 0.6858211, "epoch": 4.8, "learning_rate": 7.003583390866234e-05, "loss": 1.00880671, "memory(GiB)": 85.12, "step": 6130, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67488031, "epoch": 4.81, "learning_rate": 6.998635273333289e-05, "loss": 1.06708899, "memory(GiB)": 85.12, "step": 6135, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68384433, "epoch": 4.81, "learning_rate": 6.99368482510186e-05, "loss": 1.01454229, "memory(GiB)": 85.12, "step": 6140, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68791485, "epoch": 4.82, "learning_rate": 6.98873205194489e-05, "loss": 1.03954144, "memory(GiB)": 85.12, "step": 6145, "train_speed(iter/s)": 0.03526 }, { "acc": 0.67316933, "epoch": 4.82, "learning_rate": 6.983776959638032e-05, "loss": 1.05018234, "memory(GiB)": 85.12, "step": 6150, "train_speed(iter/s)": 0.03526 }, { "acc": 0.6828548, "epoch": 4.82, "learning_rate": 6.97881955395964e-05, "loss": 1.02742395, "memory(GiB)": 85.12, "step": 6155, "train_speed(iter/s)": 0.035261 }, { "acc": 0.67206783, "epoch": 4.83, "learning_rate": 6.973859840690766e-05, "loss": 1.08918076, "memory(GiB)": 85.12, "step": 6160, "train_speed(iter/s)": 0.035261 }, { "acc": 0.67323599, "epoch": 4.83, "learning_rate": 6.968897825615158e-05, "loss": 1.07985773, "memory(GiB)": 85.12, "step": 6165, "train_speed(iter/s)": 0.035261 }, { "acc": 0.67930789, "epoch": 4.84, "learning_rate": 6.963933514519243e-05, "loss": 1.04254446, "memory(GiB)": 85.12, "step": 6170, "train_speed(iter/s)": 0.035261 }, { "acc": 0.69380798, "epoch": 4.84, "learning_rate": 6.958966913192127e-05, "loss": 1.00592508, "memory(GiB)": 85.12, "step": 6175, "train_speed(iter/s)": 0.035261 }, { "acc": 0.67768106, "epoch": 4.84, "learning_rate": 6.95399802742559e-05, "loss": 1.04340172, "memory(GiB)": 85.12, "step": 6180, "train_speed(iter/s)": 0.03526 }, { "acc": 0.66789145, "epoch": 4.85, "learning_rate": 6.94902686301407e-05, "loss": 1.09427404, "memory(GiB)": 85.12, "step": 6185, "train_speed(iter/s)": 0.03526 }, { "acc": 0.65930495, "epoch": 4.85, "learning_rate": 6.944053425754668e-05, "loss": 1.10919495, "memory(GiB)": 85.12, "step": 6190, "train_speed(iter/s)": 0.035261 }, { "acc": 0.68507056, "epoch": 4.86, "learning_rate": 6.939077721447129e-05, "loss": 1.0415493, "memory(GiB)": 85.12, "step": 6195, "train_speed(iter/s)": 0.035261 }, { "acc": 0.67345014, "epoch": 4.86, "learning_rate": 6.93409975589385e-05, "loss": 1.07625484, "memory(GiB)": 85.12, "step": 6200, "train_speed(iter/s)": 0.035261 }, { "acc": 0.66527452, "epoch": 4.86, "learning_rate": 6.92911953489986e-05, "loss": 1.08373318, "memory(GiB)": 85.12, "step": 6205, "train_speed(iter/s)": 0.035261 }, { "acc": 0.68124013, "epoch": 4.87, "learning_rate": 6.924137064272815e-05, "loss": 1.05780458, "memory(GiB)": 85.12, "step": 6210, "train_speed(iter/s)": 0.035261 }, { "acc": 0.67599878, "epoch": 4.87, "learning_rate": 6.919152349822999e-05, "loss": 1.06544428, "memory(GiB)": 85.12, "step": 6215, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67822771, "epoch": 4.87, "learning_rate": 6.914165397363318e-05, "loss": 1.08813448, "memory(GiB)": 85.12, "step": 6220, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67499804, "epoch": 4.88, "learning_rate": 6.909176212709272e-05, "loss": 1.04310112, "memory(GiB)": 85.12, "step": 6225, "train_speed(iter/s)": 0.035262 }, { "acc": 0.68530326, "epoch": 4.88, "learning_rate": 6.90418480167898e-05, "loss": 1.02356024, "memory(GiB)": 85.12, "step": 6230, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67177353, "epoch": 4.89, "learning_rate": 6.899191170093148e-05, "loss": 1.09755363, "memory(GiB)": 85.12, "step": 6235, "train_speed(iter/s)": 0.035263 }, { "acc": 0.68792162, "epoch": 4.89, "learning_rate": 6.894195323775078e-05, "loss": 1.01962185, "memory(GiB)": 85.12, "step": 6240, "train_speed(iter/s)": 0.035263 }, { "acc": 0.68935556, "epoch": 4.89, "learning_rate": 6.889197268550648e-05, "loss": 1.00933504, "memory(GiB)": 85.12, "step": 6245, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67911396, "epoch": 4.9, "learning_rate": 6.884197010248314e-05, "loss": 1.0349247, "memory(GiB)": 85.12, "step": 6250, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67767315, "epoch": 4.9, "learning_rate": 6.879194554699106e-05, "loss": 1.04797144, "memory(GiB)": 85.12, "step": 6255, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67949104, "epoch": 4.91, "learning_rate": 6.874189907736608e-05, "loss": 1.0555562, "memory(GiB)": 85.12, "step": 6260, "train_speed(iter/s)": 0.035263 }, { "acc": 0.66246786, "epoch": 4.91, "learning_rate": 6.869183075196968e-05, "loss": 1.08616772, "memory(GiB)": 85.12, "step": 6265, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67599144, "epoch": 4.91, "learning_rate": 6.864174062918875e-05, "loss": 1.05308266, "memory(GiB)": 85.12, "step": 6270, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68216491, "epoch": 4.92, "learning_rate": 6.859162876743565e-05, "loss": 1.05794802, "memory(GiB)": 85.12, "step": 6275, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68431473, "epoch": 4.92, "learning_rate": 6.85414952251481e-05, "loss": 1.03531227, "memory(GiB)": 85.12, "step": 6280, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66319323, "epoch": 4.93, "learning_rate": 6.849134006078904e-05, "loss": 1.10098467, "memory(GiB)": 85.12, "step": 6285, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66633544, "epoch": 4.93, "learning_rate": 6.84411633328467e-05, "loss": 1.0953721, "memory(GiB)": 85.12, "step": 6290, "train_speed(iter/s)": 0.035264 }, { "acc": 0.69023266, "epoch": 4.93, "learning_rate": 6.839096509983436e-05, "loss": 1.01781502, "memory(GiB)": 85.12, "step": 6295, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68030787, "epoch": 4.94, "learning_rate": 6.83407454202905e-05, "loss": 1.07228956, "memory(GiB)": 85.12, "step": 6300, "train_speed(iter/s)": 0.035265 }, { "acc": 0.66776667, "epoch": 4.94, "learning_rate": 6.82905043527785e-05, "loss": 1.0892725, "memory(GiB)": 85.12, "step": 6305, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68062611, "epoch": 4.95, "learning_rate": 6.824024195588677e-05, "loss": 1.04044657, "memory(GiB)": 85.12, "step": 6310, "train_speed(iter/s)": 0.035266 }, { "acc": 0.6697052, "epoch": 4.95, "learning_rate": 6.818995828822852e-05, "loss": 1.07682076, "memory(GiB)": 85.12, "step": 6315, "train_speed(iter/s)": 0.035267 }, { "acc": 0.67304358, "epoch": 4.95, "learning_rate": 6.813965340844183e-05, "loss": 1.09571772, "memory(GiB)": 85.12, "step": 6320, "train_speed(iter/s)": 0.035267 }, { "acc": 0.68612056, "epoch": 4.96, "learning_rate": 6.808932737518944e-05, "loss": 1.02444048, "memory(GiB)": 85.12, "step": 6325, "train_speed(iter/s)": 0.035268 }, { "acc": 0.67951617, "epoch": 4.96, "learning_rate": 6.803898024715884e-05, "loss": 1.07699089, "memory(GiB)": 85.12, "step": 6330, "train_speed(iter/s)": 0.035268 }, { "acc": 0.66642966, "epoch": 4.96, "learning_rate": 6.798861208306204e-05, "loss": 1.09788532, "memory(GiB)": 85.12, "step": 6335, "train_speed(iter/s)": 0.035268 }, { "acc": 0.68518648, "epoch": 4.97, "learning_rate": 6.793822294163565e-05, "loss": 1.03690128, "memory(GiB)": 85.12, "step": 6340, "train_speed(iter/s)": 0.035268 }, { "acc": 0.68445034, "epoch": 4.97, "learning_rate": 6.788781288164072e-05, "loss": 1.04183044, "memory(GiB)": 85.12, "step": 6345, "train_speed(iter/s)": 0.035268 }, { "acc": 0.67292199, "epoch": 4.98, "learning_rate": 6.783738196186267e-05, "loss": 1.06181889, "memory(GiB)": 85.12, "step": 6350, "train_speed(iter/s)": 0.035269 }, { "acc": 0.66322756, "epoch": 4.98, "learning_rate": 6.778693024111128e-05, "loss": 1.11033144, "memory(GiB)": 85.12, "step": 6355, "train_speed(iter/s)": 0.03527 }, { "acc": 0.6683476, "epoch": 4.98, "learning_rate": 6.773645777822054e-05, "loss": 1.09244795, "memory(GiB)": 85.12, "step": 6360, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67698259, "epoch": 4.99, "learning_rate": 6.76859646320487e-05, "loss": 1.05961075, "memory(GiB)": 85.12, "step": 6365, "train_speed(iter/s)": 0.035271 }, { "acc": 0.69028482, "epoch": 4.99, "learning_rate": 6.763545086147806e-05, "loss": 1.02525082, "memory(GiB)": 85.12, "step": 6370, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67674527, "epoch": 5.0, "learning_rate": 6.758491652541499e-05, "loss": 1.06389141, "memory(GiB)": 85.12, "step": 6375, "train_speed(iter/s)": 0.03527 }, { "acc": 0.66953359, "epoch": 5.0, "learning_rate": 6.75343616827899e-05, "loss": 1.06004562, "memory(GiB)": 85.12, "step": 6380, "train_speed(iter/s)": 0.035272 }, { "epoch": 5.0, "eval_acc": 0.6952166291009266, "eval_loss": 0.9776778817176819, "eval_runtime": 84.814, "eval_samples_per_second": 1.097, "eval_steps_per_second": 1.097, "step": 6380 }, { "acc": 0.66123757, "epoch": 5.0, "learning_rate": 6.748378639255701e-05, "loss": 1.09309893, "memory(GiB)": 85.12, "step": 6385, "train_speed(iter/s)": 0.035255 }, { "acc": 0.6866303, "epoch": 5.01, "learning_rate": 6.74331907136945e-05, "loss": 1.01490898, "memory(GiB)": 85.12, "step": 6390, "train_speed(iter/s)": 0.035255 }, { "acc": 0.68321543, "epoch": 5.01, "learning_rate": 6.73825747052042e-05, "loss": 1.04613428, "memory(GiB)": 85.12, "step": 6395, "train_speed(iter/s)": 0.035255 }, { "acc": 0.69075665, "epoch": 5.02, "learning_rate": 6.733193842611176e-05, "loss": 1.02780151, "memory(GiB)": 85.12, "step": 6400, "train_speed(iter/s)": 0.035256 }, { "acc": 0.69020853, "epoch": 5.02, "learning_rate": 6.72812819354664e-05, "loss": 1.0208807, "memory(GiB)": 85.12, "step": 6405, "train_speed(iter/s)": 0.035256 }, { "acc": 0.67573576, "epoch": 5.02, "learning_rate": 6.723060529234095e-05, "loss": 1.03973875, "memory(GiB)": 85.12, "step": 6410, "train_speed(iter/s)": 0.035257 }, { "acc": 0.67599573, "epoch": 5.03, "learning_rate": 6.717990855583171e-05, "loss": 1.048526, "memory(GiB)": 85.12, "step": 6415, "train_speed(iter/s)": 0.035257 }, { "acc": 0.68013263, "epoch": 5.03, "learning_rate": 6.712919178505846e-05, "loss": 1.04929171, "memory(GiB)": 85.12, "step": 6420, "train_speed(iter/s)": 0.035257 }, { "acc": 0.67604647, "epoch": 5.04, "learning_rate": 6.707845503916424e-05, "loss": 1.02617359, "memory(GiB)": 85.12, "step": 6425, "train_speed(iter/s)": 0.035257 }, { "acc": 0.67967114, "epoch": 5.04, "learning_rate": 6.70276983773155e-05, "loss": 1.05347996, "memory(GiB)": 85.12, "step": 6430, "train_speed(iter/s)": 0.035258 }, { "acc": 0.69580359, "epoch": 5.04, "learning_rate": 6.697692185870185e-05, "loss": 1.0092021, "memory(GiB)": 85.12, "step": 6435, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68660007, "epoch": 5.05, "learning_rate": 6.692612554253607e-05, "loss": 1.00932379, "memory(GiB)": 85.12, "step": 6440, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67771325, "epoch": 5.05, "learning_rate": 6.687530948805404e-05, "loss": 1.06307144, "memory(GiB)": 85.12, "step": 6445, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68935843, "epoch": 5.05, "learning_rate": 6.682447375451463e-05, "loss": 0.98286209, "memory(GiB)": 85.12, "step": 6450, "train_speed(iter/s)": 0.03526 }, { "acc": 0.68584781, "epoch": 5.06, "learning_rate": 6.67736184011997e-05, "loss": 1.02812233, "memory(GiB)": 85.12, "step": 6455, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68117771, "epoch": 5.06, "learning_rate": 6.672274348741396e-05, "loss": 1.04238253, "memory(GiB)": 85.12, "step": 6460, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67923732, "epoch": 5.07, "learning_rate": 6.667184907248493e-05, "loss": 1.03050461, "memory(GiB)": 85.12, "step": 6465, "train_speed(iter/s)": 0.035258 }, { "acc": 0.687995, "epoch": 5.07, "learning_rate": 6.662093521576285e-05, "loss": 1.00678883, "memory(GiB)": 85.12, "step": 6470, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67301135, "epoch": 5.07, "learning_rate": 6.657000197662068e-05, "loss": 1.06638184, "memory(GiB)": 85.12, "step": 6475, "train_speed(iter/s)": 0.035257 }, { "acc": 0.6931222, "epoch": 5.08, "learning_rate": 6.651904941445398e-05, "loss": 1.01922817, "memory(GiB)": 85.12, "step": 6480, "train_speed(iter/s)": 0.035258 }, { "acc": 0.68098392, "epoch": 5.08, "learning_rate": 6.64680775886808e-05, "loss": 1.04567719, "memory(GiB)": 85.12, "step": 6485, "train_speed(iter/s)": 0.035258 }, { "acc": 0.68202472, "epoch": 5.09, "learning_rate": 6.641708655874169e-05, "loss": 1.04754171, "memory(GiB)": 85.12, "step": 6490, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67911615, "epoch": 5.09, "learning_rate": 6.636607638409956e-05, "loss": 1.06853676, "memory(GiB)": 85.12, "step": 6495, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68118539, "epoch": 5.09, "learning_rate": 6.63150471242397e-05, "loss": 1.04338474, "memory(GiB)": 85.12, "step": 6500, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67814703, "epoch": 5.1, "learning_rate": 6.62639988386696e-05, "loss": 1.02098122, "memory(GiB)": 85.12, "step": 6505, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68116179, "epoch": 5.1, "learning_rate": 6.6212931586919e-05, "loss": 1.04279861, "memory(GiB)": 85.12, "step": 6510, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68639112, "epoch": 5.11, "learning_rate": 6.616184542853965e-05, "loss": 1.00777941, "memory(GiB)": 85.12, "step": 6515, "train_speed(iter/s)": 0.035259 }, { "acc": 0.67839465, "epoch": 5.11, "learning_rate": 6.611074042310549e-05, "loss": 1.03657656, "memory(GiB)": 85.12, "step": 6520, "train_speed(iter/s)": 0.035259 }, { "acc": 0.68946548, "epoch": 5.11, "learning_rate": 6.605961663021233e-05, "loss": 1.01957436, "memory(GiB)": 85.12, "step": 6525, "train_speed(iter/s)": 0.03526 }, { "acc": 0.69059048, "epoch": 5.12, "learning_rate": 6.600847410947794e-05, "loss": 1.01516457, "memory(GiB)": 85.12, "step": 6530, "train_speed(iter/s)": 0.03526 }, { "acc": 0.66432734, "epoch": 5.12, "learning_rate": 6.595731292054187e-05, "loss": 1.08365002, "memory(GiB)": 85.12, "step": 6535, "train_speed(iter/s)": 0.035261 }, { "acc": 0.67369056, "epoch": 5.13, "learning_rate": 6.590613312306555e-05, "loss": 1.0672245, "memory(GiB)": 85.12, "step": 6540, "train_speed(iter/s)": 0.035261 }, { "acc": 0.68721709, "epoch": 5.13, "learning_rate": 6.5854934776732e-05, "loss": 1.00315237, "memory(GiB)": 85.12, "step": 6545, "train_speed(iter/s)": 0.03526 }, { "acc": 0.67401967, "epoch": 5.13, "learning_rate": 6.580371794124592e-05, "loss": 1.04892883, "memory(GiB)": 85.12, "step": 6550, "train_speed(iter/s)": 0.035261 }, { "acc": 0.69203424, "epoch": 5.14, "learning_rate": 6.575248267633357e-05, "loss": 1.01215448, "memory(GiB)": 85.12, "step": 6555, "train_speed(iter/s)": 0.035261 }, { "acc": 0.68317304, "epoch": 5.14, "learning_rate": 6.57012290417427e-05, "loss": 1.0137826, "memory(GiB)": 85.12, "step": 6560, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67414193, "epoch": 5.14, "learning_rate": 6.564995709724246e-05, "loss": 1.05723057, "memory(GiB)": 85.12, "step": 6565, "train_speed(iter/s)": 0.035263 }, { "acc": 0.68139715, "epoch": 5.15, "learning_rate": 6.55986669026234e-05, "loss": 1.02423239, "memory(GiB)": 85.12, "step": 6570, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67808838, "epoch": 5.15, "learning_rate": 6.554735851769729e-05, "loss": 1.04498472, "memory(GiB)": 85.12, "step": 6575, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67794299, "epoch": 5.16, "learning_rate": 6.549603200229717e-05, "loss": 1.06934061, "memory(GiB)": 85.12, "step": 6580, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67949061, "epoch": 5.16, "learning_rate": 6.54446874162772e-05, "loss": 1.03536024, "memory(GiB)": 85.12, "step": 6585, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68172569, "epoch": 5.16, "learning_rate": 6.539332481951261e-05, "loss": 1.03228369, "memory(GiB)": 85.12, "step": 6590, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67113338, "epoch": 5.17, "learning_rate": 6.534194427189961e-05, "loss": 1.07480278, "memory(GiB)": 85.12, "step": 6595, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68143373, "epoch": 5.17, "learning_rate": 6.529054583335538e-05, "loss": 1.04739819, "memory(GiB)": 85.12, "step": 6600, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68343229, "epoch": 5.18, "learning_rate": 6.523912956381797e-05, "loss": 1.04386349, "memory(GiB)": 85.12, "step": 6605, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68208308, "epoch": 5.18, "learning_rate": 6.518769552324619e-05, "loss": 1.03945503, "memory(GiB)": 85.12, "step": 6610, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68102374, "epoch": 5.18, "learning_rate": 6.513624377161957e-05, "loss": 1.06787729, "memory(GiB)": 85.12, "step": 6615, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68312049, "epoch": 5.19, "learning_rate": 6.508477436893835e-05, "loss": 1.01988068, "memory(GiB)": 85.12, "step": 6620, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68279829, "epoch": 5.19, "learning_rate": 6.503328737522327e-05, "loss": 1.03501797, "memory(GiB)": 85.12, "step": 6625, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68076296, "epoch": 5.2, "learning_rate": 6.498178285051567e-05, "loss": 1.02725744, "memory(GiB)": 85.12, "step": 6630, "train_speed(iter/s)": 0.035267 }, { "acc": 0.70253773, "epoch": 5.2, "learning_rate": 6.493026085487725e-05, "loss": 0.96801195, "memory(GiB)": 85.12, "step": 6635, "train_speed(iter/s)": 0.035267 }, { "acc": 0.69069552, "epoch": 5.2, "learning_rate": 6.487872144839018e-05, "loss": 1.02977905, "memory(GiB)": 85.12, "step": 6640, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68076792, "epoch": 5.21, "learning_rate": 6.482716469115685e-05, "loss": 1.05776606, "memory(GiB)": 85.12, "step": 6645, "train_speed(iter/s)": 0.035267 }, { "acc": 0.68598046, "epoch": 5.21, "learning_rate": 6.47755906432999e-05, "loss": 1.00461483, "memory(GiB)": 85.12, "step": 6650, "train_speed(iter/s)": 0.035267 }, { "acc": 0.67754989, "epoch": 5.22, "learning_rate": 6.472399936496219e-05, "loss": 1.06693382, "memory(GiB)": 85.12, "step": 6655, "train_speed(iter/s)": 0.035267 }, { "acc": 0.703161, "epoch": 5.22, "learning_rate": 6.467239091630657e-05, "loss": 0.96703682, "memory(GiB)": 85.12, "step": 6660, "train_speed(iter/s)": 0.035267 }, { "acc": 0.6778832, "epoch": 5.22, "learning_rate": 6.462076535751603e-05, "loss": 1.05017014, "memory(GiB)": 85.12, "step": 6665, "train_speed(iter/s)": 0.035268 }, { "acc": 0.67984619, "epoch": 5.23, "learning_rate": 6.456912274879339e-05, "loss": 1.02873812, "memory(GiB)": 85.12, "step": 6670, "train_speed(iter/s)": 0.035267 }, { "acc": 0.67118473, "epoch": 5.23, "learning_rate": 6.451746315036149e-05, "loss": 1.05280457, "memory(GiB)": 85.12, "step": 6675, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67446933, "epoch": 5.24, "learning_rate": 6.446578662246287e-05, "loss": 1.04249983, "memory(GiB)": 85.12, "step": 6680, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67916312, "epoch": 5.24, "learning_rate": 6.44140932253599e-05, "loss": 1.04621458, "memory(GiB)": 85.12, "step": 6685, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68936515, "epoch": 5.24, "learning_rate": 6.43623830193345e-05, "loss": 0.99246674, "memory(GiB)": 85.12, "step": 6690, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67838211, "epoch": 5.25, "learning_rate": 6.431065606468832e-05, "loss": 1.06191158, "memory(GiB)": 85.12, "step": 6695, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67258978, "epoch": 5.25, "learning_rate": 6.425891242174247e-05, "loss": 1.08182993, "memory(GiB)": 85.12, "step": 6700, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67531633, "epoch": 5.25, "learning_rate": 6.420715215083755e-05, "loss": 1.0620513, "memory(GiB)": 85.12, "step": 6705, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68797979, "epoch": 5.26, "learning_rate": 6.415537531233354e-05, "loss": 1.03437891, "memory(GiB)": 85.12, "step": 6710, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68858671, "epoch": 5.26, "learning_rate": 6.410358196660972e-05, "loss": 1.0127037, "memory(GiB)": 85.12, "step": 6715, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68950157, "epoch": 5.27, "learning_rate": 6.405177217406467e-05, "loss": 1.00921545, "memory(GiB)": 85.12, "step": 6720, "train_speed(iter/s)": 0.035265 }, { "acc": 0.69257259, "epoch": 5.27, "learning_rate": 6.399994599511607e-05, "loss": 1.01077251, "memory(GiB)": 85.12, "step": 6725, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68077116, "epoch": 5.27, "learning_rate": 6.394810349020083e-05, "loss": 1.0404789, "memory(GiB)": 85.12, "step": 6730, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68140783, "epoch": 5.28, "learning_rate": 6.389624471977476e-05, "loss": 1.0449604, "memory(GiB)": 85.12, "step": 6735, "train_speed(iter/s)": 0.035264 }, { "acc": 0.69492321, "epoch": 5.28, "learning_rate": 6.384436974431274e-05, "loss": 1.00443935, "memory(GiB)": 85.12, "step": 6740, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67667093, "epoch": 5.29, "learning_rate": 6.379247862430851e-05, "loss": 1.05137119, "memory(GiB)": 85.12, "step": 6745, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68313727, "epoch": 5.29, "learning_rate": 6.374057142027463e-05, "loss": 1.03881302, "memory(GiB)": 85.12, "step": 6750, "train_speed(iter/s)": 0.035265 }, { "acc": 0.68495326, "epoch": 5.29, "learning_rate": 6.368864819274243e-05, "loss": 1.03522501, "memory(GiB)": 85.12, "step": 6755, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68464856, "epoch": 5.3, "learning_rate": 6.363670900226191e-05, "loss": 1.03497791, "memory(GiB)": 85.12, "step": 6760, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68600035, "epoch": 5.3, "learning_rate": 6.358475390940172e-05, "loss": 1.02388697, "memory(GiB)": 85.12, "step": 6765, "train_speed(iter/s)": 0.035263 }, { "acc": 0.69201202, "epoch": 5.31, "learning_rate": 6.3532782974749e-05, "loss": 1.02907257, "memory(GiB)": 85.12, "step": 6770, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67293754, "epoch": 5.31, "learning_rate": 6.348079625890943e-05, "loss": 1.0643261, "memory(GiB)": 85.12, "step": 6775, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67401209, "epoch": 5.31, "learning_rate": 6.342879382250701e-05, "loss": 1.0822813, "memory(GiB)": 85.12, "step": 6780, "train_speed(iter/s)": 0.035262 }, { "acc": 0.68836966, "epoch": 5.32, "learning_rate": 6.337677572618417e-05, "loss": 1.03913021, "memory(GiB)": 85.12, "step": 6785, "train_speed(iter/s)": 0.035263 }, { "acc": 0.68064456, "epoch": 5.32, "learning_rate": 6.332474203060155e-05, "loss": 1.02623987, "memory(GiB)": 85.12, "step": 6790, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67679968, "epoch": 5.33, "learning_rate": 6.327269279643792e-05, "loss": 1.06826124, "memory(GiB)": 85.12, "step": 6795, "train_speed(iter/s)": 0.035263 }, { "acc": 0.68307304, "epoch": 5.33, "learning_rate": 6.322062808439029e-05, "loss": 1.02487335, "memory(GiB)": 85.12, "step": 6800, "train_speed(iter/s)": 0.035262 }, { "acc": 0.67592635, "epoch": 5.33, "learning_rate": 6.316854795517364e-05, "loss": 1.07259035, "memory(GiB)": 85.12, "step": 6805, "train_speed(iter/s)": 0.035262 }, { "acc": 0.68332787, "epoch": 5.34, "learning_rate": 6.311645246952097e-05, "loss": 1.00476055, "memory(GiB)": 85.12, "step": 6810, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67505136, "epoch": 5.34, "learning_rate": 6.306434168818315e-05, "loss": 1.07261286, "memory(GiB)": 85.12, "step": 6815, "train_speed(iter/s)": 0.035262 }, { "acc": 0.68755107, "epoch": 5.34, "learning_rate": 6.301221567192892e-05, "loss": 1.01972666, "memory(GiB)": 85.12, "step": 6820, "train_speed(iter/s)": 0.035263 }, { "acc": 0.688377, "epoch": 5.35, "learning_rate": 6.296007448154475e-05, "loss": 1.04110975, "memory(GiB)": 85.12, "step": 6825, "train_speed(iter/s)": 0.035263 }, { "acc": 0.67587614, "epoch": 5.35, "learning_rate": 6.290791817783486e-05, "loss": 1.05248299, "memory(GiB)": 85.12, "step": 6830, "train_speed(iter/s)": 0.035264 }, { "acc": 0.69134288, "epoch": 5.36, "learning_rate": 6.285574682162103e-05, "loss": 1.00044346, "memory(GiB)": 85.12, "step": 6835, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67581382, "epoch": 5.36, "learning_rate": 6.280356047374264e-05, "loss": 1.05379066, "memory(GiB)": 85.12, "step": 6840, "train_speed(iter/s)": 0.035264 }, { "acc": 0.68169432, "epoch": 5.36, "learning_rate": 6.275135919505655e-05, "loss": 1.02964487, "memory(GiB)": 85.12, "step": 6845, "train_speed(iter/s)": 0.035263 }, { "acc": 0.68080144, "epoch": 5.37, "learning_rate": 6.269914304643698e-05, "loss": 1.02860794, "memory(GiB)": 85.12, "step": 6850, "train_speed(iter/s)": 0.035263 }, { "acc": 0.68284755, "epoch": 5.37, "learning_rate": 6.264691208877558e-05, "loss": 1.02458563, "memory(GiB)": 85.12, "step": 6855, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67072563, "epoch": 5.38, "learning_rate": 6.259466638298118e-05, "loss": 1.07109718, "memory(GiB)": 85.12, "step": 6860, "train_speed(iter/s)": 0.035264 }, { "acc": 0.69171076, "epoch": 5.38, "learning_rate": 6.254240598997985e-05, "loss": 1.0028264, "memory(GiB)": 85.12, "step": 6865, "train_speed(iter/s)": 0.035264 }, { "acc": 0.67135153, "epoch": 5.38, "learning_rate": 6.24901309707148e-05, "loss": 1.08797283, "memory(GiB)": 85.12, "step": 6870, "train_speed(iter/s)": 0.035265 }, { "acc": 0.6757844, "epoch": 5.39, "learning_rate": 6.243784138614627e-05, "loss": 1.04371614, "memory(GiB)": 85.12, "step": 6875, "train_speed(iter/s)": 0.035265 }, { "acc": 0.67799335, "epoch": 5.39, "learning_rate": 6.238553729725151e-05, "loss": 1.02313776, "memory(GiB)": 85.12, "step": 6880, "train_speed(iter/s)": 0.035266 }, { "acc": 0.67846012, "epoch": 5.4, "learning_rate": 6.233321876502468e-05, "loss": 1.04524956, "memory(GiB)": 85.12, "step": 6885, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68262143, "epoch": 5.4, "learning_rate": 6.228088585047673e-05, "loss": 1.02941513, "memory(GiB)": 85.12, "step": 6890, "train_speed(iter/s)": 0.035267 }, { "acc": 0.67653875, "epoch": 5.4, "learning_rate": 6.222853861463546e-05, "loss": 1.07044706, "memory(GiB)": 85.12, "step": 6895, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68358874, "epoch": 5.41, "learning_rate": 6.217617711854534e-05, "loss": 1.0135643, "memory(GiB)": 85.12, "step": 6900, "train_speed(iter/s)": 0.035266 }, { "acc": 0.68347077, "epoch": 5.41, "learning_rate": 6.212380142326743e-05, "loss": 1.06620531, "memory(GiB)": 85.12, "step": 6905, "train_speed(iter/s)": 0.035267 }, { "acc": 0.68109732, "epoch": 5.42, "learning_rate": 6.207141158987943e-05, "loss": 1.0621707, "memory(GiB)": 85.12, "step": 6910, "train_speed(iter/s)": 0.035266 }, { "acc": 0.66551132, "epoch": 5.42, "learning_rate": 6.201900767947544e-05, "loss": 1.09948444, "memory(GiB)": 85.12, "step": 6915, "train_speed(iter/s)": 0.035267 }, { "acc": 0.6836462, "epoch": 5.42, "learning_rate": 6.196658975316604e-05, "loss": 1.04037647, "memory(GiB)": 85.12, "step": 6920, "train_speed(iter/s)": 0.035267 }, { "acc": 0.68927107, "epoch": 5.43, "learning_rate": 6.191415787207813e-05, "loss": 1.01255808, "memory(GiB)": 85.12, "step": 6925, "train_speed(iter/s)": 0.035268 }, { "acc": 0.67622333, "epoch": 5.43, "learning_rate": 6.186171209735489e-05, "loss": 1.06019592, "memory(GiB)": 85.12, "step": 6930, "train_speed(iter/s)": 0.035267 }, { "acc": 0.68418527, "epoch": 5.43, "learning_rate": 6.180925249015566e-05, "loss": 1.03336248, "memory(GiB)": 85.12, "step": 6935, "train_speed(iter/s)": 0.035267 }, { "acc": 0.68626051, "epoch": 5.44, "learning_rate": 6.175677911165599e-05, "loss": 1.03925867, "memory(GiB)": 85.12, "step": 6940, "train_speed(iter/s)": 0.035268 }, { "acc": 0.68999286, "epoch": 5.44, "learning_rate": 6.170429202304744e-05, "loss": 1.00964413, "memory(GiB)": 85.12, "step": 6945, "train_speed(iter/s)": 0.035268 }, { "acc": 0.68267813, "epoch": 5.45, "learning_rate": 6.165179128553754e-05, "loss": 1.02514906, "memory(GiB)": 85.12, "step": 6950, "train_speed(iter/s)": 0.035269 }, { "acc": 0.67859344, "epoch": 5.45, "learning_rate": 6.15992769603498e-05, "loss": 1.08015528, "memory(GiB)": 85.12, "step": 6955, "train_speed(iter/s)": 0.035269 }, { "acc": 0.67712188, "epoch": 5.45, "learning_rate": 6.15467491087235e-05, "loss": 1.05515985, "memory(GiB)": 85.12, "step": 6960, "train_speed(iter/s)": 0.035269 }, { "acc": 0.68022337, "epoch": 5.46, "learning_rate": 6.149420779191373e-05, "loss": 1.05591021, "memory(GiB)": 85.12, "step": 6965, "train_speed(iter/s)": 0.03527 }, { "acc": 0.6749332, "epoch": 5.46, "learning_rate": 6.144165307119129e-05, "loss": 1.07103643, "memory(GiB)": 85.12, "step": 6970, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67788014, "epoch": 5.47, "learning_rate": 6.138908500784265e-05, "loss": 1.05876656, "memory(GiB)": 85.12, "step": 6975, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67351999, "epoch": 5.47, "learning_rate": 6.133650366316972e-05, "loss": 1.08339548, "memory(GiB)": 85.12, "step": 6980, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67984676, "epoch": 5.47, "learning_rate": 6.128390909849004e-05, "loss": 1.04141293, "memory(GiB)": 85.12, "step": 6985, "train_speed(iter/s)": 0.035272 }, { "acc": 0.68225818, "epoch": 5.48, "learning_rate": 6.123130137513642e-05, "loss": 1.01653795, "memory(GiB)": 85.12, "step": 6990, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67328138, "epoch": 5.48, "learning_rate": 6.117868055445715e-05, "loss": 1.04721413, "memory(GiB)": 85.12, "step": 6995, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67902741, "epoch": 5.49, "learning_rate": 6.112604669781572e-05, "loss": 1.03500223, "memory(GiB)": 85.12, "step": 7000, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68537979, "epoch": 5.49, "learning_rate": 6.107339986659084e-05, "loss": 1.05380325, "memory(GiB)": 85.12, "step": 7005, "train_speed(iter/s)": 0.035271 }, { "acc": 0.6760211, "epoch": 5.49, "learning_rate": 6.1020740122176343e-05, "loss": 1.06433525, "memory(GiB)": 85.12, "step": 7010, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67585163, "epoch": 5.5, "learning_rate": 6.096806752598112e-05, "loss": 1.08780317, "memory(GiB)": 85.12, "step": 7015, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67884717, "epoch": 5.5, "learning_rate": 6.091538213942908e-05, "loss": 1.03859797, "memory(GiB)": 85.12, "step": 7020, "train_speed(iter/s)": 0.035272 }, { "acc": 0.6934917, "epoch": 5.51, "learning_rate": 6.086268402395898e-05, "loss": 1.00850601, "memory(GiB)": 85.12, "step": 7025, "train_speed(iter/s)": 0.035272 }, { "acc": 0.70368981, "epoch": 5.51, "learning_rate": 6.080997324102449e-05, "loss": 0.97637157, "memory(GiB)": 85.12, "step": 7030, "train_speed(iter/s)": 0.035272 }, { "acc": 0.69735985, "epoch": 5.51, "learning_rate": 6.0757249852094026e-05, "loss": 0.98540497, "memory(GiB)": 85.12, "step": 7035, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67899513, "epoch": 5.52, "learning_rate": 6.07045139186507e-05, "loss": 1.01754208, "memory(GiB)": 85.12, "step": 7040, "train_speed(iter/s)": 0.035273 }, { "acc": 0.68062968, "epoch": 5.52, "learning_rate": 6.065176550219226e-05, "loss": 1.04695129, "memory(GiB)": 85.12, "step": 7045, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67595897, "epoch": 5.53, "learning_rate": 6.0599004664230984e-05, "loss": 1.05315809, "memory(GiB)": 85.12, "step": 7050, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67158957, "epoch": 5.53, "learning_rate": 6.054623146629368e-05, "loss": 1.07019091, "memory(GiB)": 85.12, "step": 7055, "train_speed(iter/s)": 0.035272 }, { "acc": 0.68817263, "epoch": 5.53, "learning_rate": 6.049344596992153e-05, "loss": 1.00896044, "memory(GiB)": 85.12, "step": 7060, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67556105, "epoch": 5.54, "learning_rate": 6.04406482366701e-05, "loss": 1.09273539, "memory(GiB)": 85.12, "step": 7065, "train_speed(iter/s)": 0.03527 }, { "acc": 0.69138508, "epoch": 5.54, "learning_rate": 6.038783832810918e-05, "loss": 0.99918337, "memory(GiB)": 85.12, "step": 7070, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67930202, "epoch": 5.54, "learning_rate": 6.03350163058228e-05, "loss": 1.04020901, "memory(GiB)": 85.12, "step": 7075, "train_speed(iter/s)": 0.035269 }, { "acc": 0.67458458, "epoch": 5.55, "learning_rate": 6.028218223140908e-05, "loss": 1.06615458, "memory(GiB)": 85.12, "step": 7080, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67609491, "epoch": 5.55, "learning_rate": 6.022933616648021e-05, "loss": 1.06678152, "memory(GiB)": 85.12, "step": 7085, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67319765, "epoch": 5.56, "learning_rate": 6.017647817266236e-05, "loss": 1.06721945, "memory(GiB)": 85.12, "step": 7090, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67800503, "epoch": 5.56, "learning_rate": 6.012360831159565e-05, "loss": 1.04036112, "memory(GiB)": 85.12, "step": 7095, "train_speed(iter/s)": 0.035271 }, { "acc": 0.6719317, "epoch": 5.56, "learning_rate": 6.007072664493395e-05, "loss": 1.07102108, "memory(GiB)": 85.12, "step": 7100, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67592273, "epoch": 5.57, "learning_rate": 6.0017833234344963e-05, "loss": 1.0262372, "memory(GiB)": 85.12, "step": 7105, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68355751, "epoch": 5.57, "learning_rate": 5.996492814151011e-05, "loss": 1.02106323, "memory(GiB)": 85.12, "step": 7110, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68380432, "epoch": 5.58, "learning_rate": 5.991201142812436e-05, "loss": 1.03930197, "memory(GiB)": 85.12, "step": 7115, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67916827, "epoch": 5.58, "learning_rate": 5.98590831558963e-05, "loss": 1.03912115, "memory(GiB)": 85.12, "step": 7120, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68918715, "epoch": 5.58, "learning_rate": 5.980614338654794e-05, "loss": 1.01146679, "memory(GiB)": 85.12, "step": 7125, "train_speed(iter/s)": 0.03527 }, { "acc": 0.69327388, "epoch": 5.59, "learning_rate": 5.975319218181474e-05, "loss": 1.01032009, "memory(GiB)": 85.12, "step": 7130, "train_speed(iter/s)": 0.03527 }, { "acc": 0.66974945, "epoch": 5.59, "learning_rate": 5.970022960344549e-05, "loss": 1.06768141, "memory(GiB)": 85.12, "step": 7135, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67780514, "epoch": 5.6, "learning_rate": 5.9647255713202234e-05, "loss": 1.064604, "memory(GiB)": 85.12, "step": 7140, "train_speed(iter/s)": 0.035269 }, { "acc": 0.67822738, "epoch": 5.6, "learning_rate": 5.959427057286019e-05, "loss": 1.04480143, "memory(GiB)": 85.12, "step": 7145, "train_speed(iter/s)": 0.035269 }, { "acc": 0.68090706, "epoch": 5.6, "learning_rate": 5.954127424420773e-05, "loss": 1.03140087, "memory(GiB)": 85.12, "step": 7150, "train_speed(iter/s)": 0.03527 }, { "acc": 0.68836522, "epoch": 5.61, "learning_rate": 5.9488266789046255e-05, "loss": 1.02142658, "memory(GiB)": 85.12, "step": 7155, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67817101, "epoch": 5.61, "learning_rate": 5.943524826919013e-05, "loss": 1.04541025, "memory(GiB)": 85.12, "step": 7160, "train_speed(iter/s)": 0.03527 }, { "acc": 0.67876797, "epoch": 5.62, "learning_rate": 5.9382218746466634e-05, "loss": 1.07195463, "memory(GiB)": 85.12, "step": 7165, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67407222, "epoch": 5.62, "learning_rate": 5.93291782827159e-05, "loss": 1.05578661, "memory(GiB)": 85.12, "step": 7170, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67716932, "epoch": 5.62, "learning_rate": 5.927612693979079e-05, "loss": 1.0597784, "memory(GiB)": 85.12, "step": 7175, "train_speed(iter/s)": 0.035272 }, { "acc": 0.68023987, "epoch": 5.63, "learning_rate": 5.9223064779556846e-05, "loss": 1.0568635, "memory(GiB)": 85.12, "step": 7180, "train_speed(iter/s)": 0.035272 }, { "acc": 0.67762904, "epoch": 5.63, "learning_rate": 5.916999186389227e-05, "loss": 1.05888271, "memory(GiB)": 85.12, "step": 7185, "train_speed(iter/s)": 0.035273 }, { "acc": 0.68073959, "epoch": 5.63, "learning_rate": 5.911690825468774e-05, "loss": 1.01983681, "memory(GiB)": 85.12, "step": 7190, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68627586, "epoch": 5.64, "learning_rate": 5.9063814013846475e-05, "loss": 1.00290499, "memory(GiB)": 85.12, "step": 7195, "train_speed(iter/s)": 0.03527 }, { "acc": 0.68013487, "epoch": 5.64, "learning_rate": 5.901070920328402e-05, "loss": 1.06031981, "memory(GiB)": 85.12, "step": 7200, "train_speed(iter/s)": 0.03527 }, { "acc": 0.68101711, "epoch": 5.65, "learning_rate": 5.89575938849283e-05, "loss": 1.05255985, "memory(GiB)": 85.12, "step": 7205, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68656335, "epoch": 5.65, "learning_rate": 5.8904468120719506e-05, "loss": 1.04291906, "memory(GiB)": 85.12, "step": 7210, "train_speed(iter/s)": 0.035271 }, { "acc": 0.6808826, "epoch": 5.65, "learning_rate": 5.885133197260993e-05, "loss": 1.05651436, "memory(GiB)": 85.12, "step": 7215, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68361244, "epoch": 5.66, "learning_rate": 5.879818550256405e-05, "loss": 1.05124416, "memory(GiB)": 85.12, "step": 7220, "train_speed(iter/s)": 0.035271 }, { "acc": 0.68785329, "epoch": 5.66, "learning_rate": 5.874502877255835e-05, "loss": 1.00955372, "memory(GiB)": 85.12, "step": 7225, "train_speed(iter/s)": 0.035271 }, { "acc": 0.67910919, "epoch": 5.67, "learning_rate": 5.8691861844581295e-05, "loss": 1.06434069, "memory(GiB)": 85.12, "step": 7230, "train_speed(iter/s)": 0.035272 }, { "acc": 0.68168969, "epoch": 5.67, "learning_rate": 5.8638684780633216e-05, "loss": 1.03852062, "memory(GiB)": 85.12, "step": 7235, "train_speed(iter/s)": 0.035272 }, { "acc": 0.69188733, "epoch": 5.67, "learning_rate": 5.858549764272629e-05, "loss": 1.01275368, "memory(GiB)": 85.12, "step": 7240, "train_speed(iter/s)": 0.035272 }, { "acc": 0.68451958, "epoch": 5.68, "learning_rate": 5.853230049288443e-05, "loss": 1.02619667, "memory(GiB)": 85.12, "step": 7245, "train_speed(iter/s)": 0.035273 }, { "acc": 0.67939177, "epoch": 5.68, "learning_rate": 5.847909339314322e-05, "loss": 1.04042921, "memory(GiB)": 85.12, "step": 7250, "train_speed(iter/s)": 0.035273 }, { "acc": 0.66912894, "epoch": 5.69, "learning_rate": 5.842587640554986e-05, "loss": 1.08952456, "memory(GiB)": 85.12, "step": 7255, "train_speed(iter/s)": 0.035274 }, { "acc": 0.68664956, "epoch": 5.69, "learning_rate": 5.8372649592163056e-05, "loss": 1.02034445, "memory(GiB)": 85.12, "step": 7260, "train_speed(iter/s)": 0.035273 }, { "acc": 0.68716116, "epoch": 5.69, "learning_rate": 5.8319413015052993e-05, "loss": 1.017033, "memory(GiB)": 85.12, "step": 7265, "train_speed(iter/s)": 0.035274 }, { "acc": 0.66364636, "epoch": 5.7, "learning_rate": 5.826616673630125e-05, "loss": 1.09431334, "memory(GiB)": 85.12, "step": 7270, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67621994, "epoch": 5.7, "learning_rate": 5.821291081800071e-05, "loss": 1.07470884, "memory(GiB)": 85.12, "step": 7275, "train_speed(iter/s)": 0.035274 }, { "acc": 0.6812994, "epoch": 5.71, "learning_rate": 5.8159645322255475e-05, "loss": 1.07116871, "memory(GiB)": 85.12, "step": 7280, "train_speed(iter/s)": 0.035275 }, { "acc": 0.684375, "epoch": 5.71, "learning_rate": 5.810637031118086e-05, "loss": 1.02568693, "memory(GiB)": 85.12, "step": 7285, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68658953, "epoch": 5.71, "learning_rate": 5.805308584690321e-05, "loss": 1.04678984, "memory(GiB)": 85.12, "step": 7290, "train_speed(iter/s)": 0.035275 }, { "acc": 0.69045858, "epoch": 5.72, "learning_rate": 5.799979199155998e-05, "loss": 1.01404943, "memory(GiB)": 85.12, "step": 7295, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68868189, "epoch": 5.72, "learning_rate": 5.794648880729952e-05, "loss": 1.01892052, "memory(GiB)": 85.12, "step": 7300, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68861918, "epoch": 5.72, "learning_rate": 5.7893176356281056e-05, "loss": 1.0252737, "memory(GiB)": 85.12, "step": 7305, "train_speed(iter/s)": 0.035276 }, { "acc": 0.68878713, "epoch": 5.73, "learning_rate": 5.7839854700674655e-05, "loss": 1.01528711, "memory(GiB)": 85.12, "step": 7310, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67168713, "epoch": 5.73, "learning_rate": 5.778652390266107e-05, "loss": 1.0582777, "memory(GiB)": 85.12, "step": 7315, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68114805, "epoch": 5.74, "learning_rate": 5.773318402443177e-05, "loss": 1.05835133, "memory(GiB)": 85.12, "step": 7320, "train_speed(iter/s)": 0.035277 }, { "acc": 0.67773662, "epoch": 5.74, "learning_rate": 5.767983512818877e-05, "loss": 1.04364738, "memory(GiB)": 85.12, "step": 7325, "train_speed(iter/s)": 0.035278 }, { "acc": 0.68714108, "epoch": 5.74, "learning_rate": 5.762647727614462e-05, "loss": 1.0286314, "memory(GiB)": 85.12, "step": 7330, "train_speed(iter/s)": 0.035278 }, { "acc": 0.69304905, "epoch": 5.75, "learning_rate": 5.757311053052232e-05, "loss": 1.0080122, "memory(GiB)": 85.12, "step": 7335, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68668628, "epoch": 5.75, "learning_rate": 5.7519734953555225e-05, "loss": 1.04978542, "memory(GiB)": 85.12, "step": 7340, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68079734, "epoch": 5.76, "learning_rate": 5.7466350607486994e-05, "loss": 1.03145504, "memory(GiB)": 85.12, "step": 7345, "train_speed(iter/s)": 0.035276 }, { "acc": 0.6759232, "epoch": 5.76, "learning_rate": 5.7412957554571535e-05, "loss": 1.04577227, "memory(GiB)": 85.12, "step": 7350, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67788, "epoch": 5.76, "learning_rate": 5.7359555857072865e-05, "loss": 1.01738691, "memory(GiB)": 85.12, "step": 7355, "train_speed(iter/s)": 0.035277 }, { "acc": 0.67940884, "epoch": 5.77, "learning_rate": 5.730614557726509e-05, "loss": 1.0438838, "memory(GiB)": 85.12, "step": 7360, "train_speed(iter/s)": 0.035277 }, { "acc": 0.67440863, "epoch": 5.77, "learning_rate": 5.725272677743238e-05, "loss": 1.04039993, "memory(GiB)": 85.12, "step": 7365, "train_speed(iter/s)": 0.035278 }, { "acc": 0.66977754, "epoch": 5.78, "learning_rate": 5.719929951986875e-05, "loss": 1.0763092, "memory(GiB)": 85.12, "step": 7370, "train_speed(iter/s)": 0.035278 }, { "acc": 0.68106508, "epoch": 5.78, "learning_rate": 5.71458638668782e-05, "loss": 1.05014811, "memory(GiB)": 85.12, "step": 7375, "train_speed(iter/s)": 0.035279 }, { "acc": 0.68356209, "epoch": 5.78, "learning_rate": 5.7092419880774384e-05, "loss": 1.04596844, "memory(GiB)": 85.12, "step": 7380, "train_speed(iter/s)": 0.035279 }, { "acc": 0.67028294, "epoch": 5.79, "learning_rate": 5.7038967623880766e-05, "loss": 1.07842445, "memory(GiB)": 85.12, "step": 7385, "train_speed(iter/s)": 0.03528 }, { "acc": 0.69012942, "epoch": 5.79, "learning_rate": 5.698550715853041e-05, "loss": 1.01788101, "memory(GiB)": 85.12, "step": 7390, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68352227, "epoch": 5.8, "learning_rate": 5.6932038547065994e-05, "loss": 1.02470961, "memory(GiB)": 85.12, "step": 7395, "train_speed(iter/s)": 0.035276 }, { "acc": 0.68133144, "epoch": 5.8, "learning_rate": 5.687856185183964e-05, "loss": 1.00813093, "memory(GiB)": 85.12, "step": 7400, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67967892, "epoch": 5.8, "learning_rate": 5.682507713521297e-05, "loss": 1.04251871, "memory(GiB)": 85.12, "step": 7405, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67965193, "epoch": 5.81, "learning_rate": 5.677158445955688e-05, "loss": 1.04245062, "memory(GiB)": 85.12, "step": 7410, "train_speed(iter/s)": 0.035276 }, { "acc": 0.68244319, "epoch": 5.81, "learning_rate": 5.6718083887251585e-05, "loss": 1.04257526, "memory(GiB)": 85.12, "step": 7415, "train_speed(iter/s)": 0.035275 }, { "acc": 0.67110071, "epoch": 5.82, "learning_rate": 5.666457548068653e-05, "loss": 1.04886589, "memory(GiB)": 85.12, "step": 7420, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68169503, "epoch": 5.82, "learning_rate": 5.661105930226027e-05, "loss": 1.03461304, "memory(GiB)": 85.12, "step": 7425, "train_speed(iter/s)": 0.035273 }, { "acc": 0.68312588, "epoch": 5.82, "learning_rate": 5.65575354143804e-05, "loss": 1.03809738, "memory(GiB)": 85.12, "step": 7430, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67332888, "epoch": 5.83, "learning_rate": 5.650400387946358e-05, "loss": 1.04974604, "memory(GiB)": 85.12, "step": 7435, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67180338, "epoch": 5.83, "learning_rate": 5.6450464759935306e-05, "loss": 1.07695293, "memory(GiB)": 85.12, "step": 7440, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67879071, "epoch": 5.83, "learning_rate": 5.6396918118229954e-05, "loss": 1.07525129, "memory(GiB)": 85.12, "step": 7445, "train_speed(iter/s)": 0.035274 }, { "acc": 0.69320011, "epoch": 5.84, "learning_rate": 5.63433640167907e-05, "loss": 0.99239464, "memory(GiB)": 85.12, "step": 7450, "train_speed(iter/s)": 0.035274 }, { "acc": 0.68558059, "epoch": 5.84, "learning_rate": 5.628980251806937e-05, "loss": 1.02958326, "memory(GiB)": 85.12, "step": 7455, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68014541, "epoch": 5.85, "learning_rate": 5.6236233684526416e-05, "loss": 1.01991968, "memory(GiB)": 85.12, "step": 7460, "train_speed(iter/s)": 0.035274 }, { "acc": 0.68883257, "epoch": 5.85, "learning_rate": 5.6182657578630896e-05, "loss": 1.00945797, "memory(GiB)": 85.12, "step": 7465, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68561568, "epoch": 5.85, "learning_rate": 5.6129074262860304e-05, "loss": 1.03655548, "memory(GiB)": 85.12, "step": 7470, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68775282, "epoch": 5.86, "learning_rate": 5.607548379970056e-05, "loss": 1.00257397, "memory(GiB)": 85.12, "step": 7475, "train_speed(iter/s)": 0.035274 }, { "acc": 0.68747077, "epoch": 5.86, "learning_rate": 5.602188625164591e-05, "loss": 1.02046089, "memory(GiB)": 85.12, "step": 7480, "train_speed(iter/s)": 0.035275 }, { "acc": 0.67813239, "epoch": 5.87, "learning_rate": 5.5968281681198864e-05, "loss": 1.02797394, "memory(GiB)": 85.12, "step": 7485, "train_speed(iter/s)": 0.035274 }, { "acc": 0.68199043, "epoch": 5.87, "learning_rate": 5.591467015087012e-05, "loss": 1.03519802, "memory(GiB)": 85.12, "step": 7490, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67812767, "epoch": 5.87, "learning_rate": 5.5861051723178494e-05, "loss": 1.052808, "memory(GiB)": 85.12, "step": 7495, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68303499, "epoch": 5.88, "learning_rate": 5.580742646065085e-05, "loss": 1.02687483, "memory(GiB)": 85.12, "step": 7500, "train_speed(iter/s)": 0.035275 }, { "acc": 0.67364564, "epoch": 5.88, "learning_rate": 5.575379442582203e-05, "loss": 1.05254545, "memory(GiB)": 85.12, "step": 7505, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67409277, "epoch": 5.89, "learning_rate": 5.570015568123475e-05, "loss": 1.04328775, "memory(GiB)": 85.12, "step": 7510, "train_speed(iter/s)": 0.035275 }, { "acc": 0.684273, "epoch": 5.89, "learning_rate": 5.564651028943956e-05, "loss": 1.04421234, "memory(GiB)": 85.12, "step": 7515, "train_speed(iter/s)": 0.035276 }, { "acc": 0.68276038, "epoch": 5.89, "learning_rate": 5.559285831299477e-05, "loss": 1.05293627, "memory(GiB)": 85.12, "step": 7520, "train_speed(iter/s)": 0.035276 }, { "acc": 0.68299809, "epoch": 5.9, "learning_rate": 5.553919981446635e-05, "loss": 1.0334218, "memory(GiB)": 85.12, "step": 7525, "train_speed(iter/s)": 0.035276 }, { "acc": 0.68169241, "epoch": 5.9, "learning_rate": 5.548553485642789e-05, "loss": 1.03117504, "memory(GiB)": 85.12, "step": 7530, "train_speed(iter/s)": 0.035276 }, { "acc": 0.69697251, "epoch": 5.91, "learning_rate": 5.543186350146053e-05, "loss": 0.98402576, "memory(GiB)": 85.12, "step": 7535, "train_speed(iter/s)": 0.035276 }, { "acc": 0.68771691, "epoch": 5.91, "learning_rate": 5.537818581215285e-05, "loss": 1.01845856, "memory(GiB)": 85.12, "step": 7540, "train_speed(iter/s)": 0.035277 }, { "acc": 0.67708225, "epoch": 5.91, "learning_rate": 5.53245018511008e-05, "loss": 1.03625803, "memory(GiB)": 85.12, "step": 7545, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68522434, "epoch": 5.92, "learning_rate": 5.527081168090767e-05, "loss": 1.0206852, "memory(GiB)": 85.12, "step": 7550, "train_speed(iter/s)": 0.035277 }, { "acc": 0.69737639, "epoch": 5.92, "learning_rate": 5.521711536418398e-05, "loss": 0.96795731, "memory(GiB)": 85.12, "step": 7555, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68795233, "epoch": 5.92, "learning_rate": 5.5163412963547425e-05, "loss": 1.00420456, "memory(GiB)": 85.12, "step": 7560, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67716355, "epoch": 5.93, "learning_rate": 5.5109704541622787e-05, "loss": 1.05512428, "memory(GiB)": 85.12, "step": 7565, "train_speed(iter/s)": 0.035276 }, { "acc": 0.69076524, "epoch": 5.93, "learning_rate": 5.505599016104187e-05, "loss": 0.99612122, "memory(GiB)": 85.12, "step": 7570, "train_speed(iter/s)": 0.035276 }, { "acc": 0.66488171, "epoch": 5.94, "learning_rate": 5.5002269884443433e-05, "loss": 1.08279037, "memory(GiB)": 85.12, "step": 7575, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68213077, "epoch": 5.94, "learning_rate": 5.4948543774473105e-05, "loss": 1.0349185, "memory(GiB)": 85.12, "step": 7580, "train_speed(iter/s)": 0.035276 }, { "acc": 0.67493806, "epoch": 5.94, "learning_rate": 5.4894811893783316e-05, "loss": 1.06746645, "memory(GiB)": 85.12, "step": 7585, "train_speed(iter/s)": 0.035277 }, { "acc": 0.67195811, "epoch": 5.95, "learning_rate": 5.484107430503322e-05, "loss": 1.07974176, "memory(GiB)": 85.12, "step": 7590, "train_speed(iter/s)": 0.035277 }, { "acc": 0.68496284, "epoch": 5.95, "learning_rate": 5.4787331070888656e-05, "loss": 1.03015051, "memory(GiB)": 85.12, "step": 7595, "train_speed(iter/s)": 0.035277 }, { "acc": 0.67029195, "epoch": 5.96, "learning_rate": 5.473358225402202e-05, "loss": 1.07754288, "memory(GiB)": 85.12, "step": 7600, "train_speed(iter/s)": 0.035275 }, { "acc": 0.67144132, "epoch": 5.96, "learning_rate": 5.467982791711224e-05, "loss": 1.04674873, "memory(GiB)": 85.12, "step": 7605, "train_speed(iter/s)": 0.035276 }, { "acc": 0.69102864, "epoch": 5.96, "learning_rate": 5.4626068122844634e-05, "loss": 1.03087606, "memory(GiB)": 85.12, "step": 7610, "train_speed(iter/s)": 0.035274 }, { "acc": 0.68264012, "epoch": 5.97, "learning_rate": 5.4572302933910926e-05, "loss": 1.03752575, "memory(GiB)": 85.12, "step": 7615, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68130126, "epoch": 5.97, "learning_rate": 5.451853241300913e-05, "loss": 1.01153135, "memory(GiB)": 85.12, "step": 7620, "train_speed(iter/s)": 0.035275 }, { "acc": 0.66839213, "epoch": 5.98, "learning_rate": 5.446475662284346e-05, "loss": 1.09095182, "memory(GiB)": 85.12, "step": 7625, "train_speed(iter/s)": 0.035275 }, { "acc": 0.67351193, "epoch": 5.98, "learning_rate": 5.4410975626124284e-05, "loss": 1.07571201, "memory(GiB)": 85.12, "step": 7630, "train_speed(iter/s)": 0.035275 }, { "acc": 0.68798418, "epoch": 5.98, "learning_rate": 5.435718948556804e-05, "loss": 1.00712776, "memory(GiB)": 85.12, "step": 7635, "train_speed(iter/s)": 0.035275 }, { "acc": 0.70076885, "epoch": 5.99, "learning_rate": 5.430339826389719e-05, "loss": 0.9588829, "memory(GiB)": 85.12, "step": 7640, "train_speed(iter/s)": 0.035274 }, { "acc": 0.67537951, "epoch": 5.99, "learning_rate": 5.424960202384006e-05, "loss": 1.05870562, "memory(GiB)": 85.12, "step": 7645, "train_speed(iter/s)": 0.035275 }, { "acc": 0.69180627, "epoch": 6.0, "learning_rate": 5.419580082813089e-05, "loss": 1.00422449, "memory(GiB)": 85.12, "step": 7650, "train_speed(iter/s)": 0.035275 }, { "acc": 0.66602154, "epoch": 6.0, "learning_rate": 5.414199473950967e-05, "loss": 1.09426003, "memory(GiB)": 85.12, "step": 7655, "train_speed(iter/s)": 0.035276 }, { "epoch": 6.0, "eval_acc": 0.6978462309040822, "eval_loss": 0.9628272652626038, "eval_runtime": 84.9357, "eval_samples_per_second": 1.095, "eval_steps_per_second": 1.095, "step": 7656 } ], "logging_steps": 5, "max_steps": 15312, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 1, "total_flos": 3.3314641424613103e+22, "train_batch_size": 4, "trial_name": null, "trial_params": null }