{ "best_metric": 0.3305796980857849, "best_model_checkpoint": "autotrain-m1b56-8ger6/checkpoint-3462", "epoch": 2.0, "eval_steps": 500, "global_step": 3462, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014442518775274409, "grad_norm": 52.66768264770508, "learning_rate": 2.0192307692307692e-06, "loss": 8.6614, "step": 25 }, { "epoch": 0.028885037550548817, "grad_norm": 70.3343276977539, "learning_rate": 4.423076923076924e-06, "loss": 7.3266, "step": 50 }, { "epoch": 0.043327556325823226, "grad_norm": 64.09215545654297, "learning_rate": 6.730769230769231e-06, "loss": 1.6514, "step": 75 }, { "epoch": 0.057770075101097634, "grad_norm": 29.524045944213867, "learning_rate": 9.134615384615384e-06, "loss": 1.0548, "step": 100 }, { "epoch": 0.07221259387637204, "grad_norm": 14.635738372802734, "learning_rate": 1.153846153846154e-05, "loss": 0.9082, "step": 125 }, { "epoch": 0.08665511265164645, "grad_norm": 19.21646499633789, "learning_rate": 1.3942307692307693e-05, "loss": 0.5165, "step": 150 }, { "epoch": 0.10109763142692085, "grad_norm": 34.39830017089844, "learning_rate": 1.6346153846153847e-05, "loss": 0.5166, "step": 175 }, { "epoch": 0.11554015020219527, "grad_norm": 12.46200180053711, "learning_rate": 1.8750000000000002e-05, "loss": 0.6898, "step": 200 }, { "epoch": 0.12998266897746968, "grad_norm": 44.481101989746094, "learning_rate": 2.1153846153846154e-05, "loss": 0.5929, "step": 225 }, { "epoch": 0.14442518775274407, "grad_norm": 14.731172561645508, "learning_rate": 2.355769230769231e-05, "loss": 0.5474, "step": 250 }, { "epoch": 0.1588677065280185, "grad_norm": 27.139829635620117, "learning_rate": 2.586538461538462e-05, "loss": 0.7523, "step": 275 }, { "epoch": 0.1733102253032929, "grad_norm": 13.601739883422852, "learning_rate": 2.826923076923077e-05, "loss": 0.6244, "step": 300 }, { "epoch": 0.1877527440785673, "grad_norm": 18.18332862854004, "learning_rate": 3.0673076923076926e-05, "loss": 0.618, "step": 325 }, { "epoch": 0.2021952628538417, "grad_norm": 6.123499870300293, "learning_rate": 3.307692307692308e-05, "loss": 0.7482, "step": 350 }, { "epoch": 0.21663778162911612, "grad_norm": 10.999884605407715, "learning_rate": 3.548076923076924e-05, "loss": 0.5383, "step": 375 }, { "epoch": 0.23108030040439054, "grad_norm": 22.577754974365234, "learning_rate": 3.788461538461538e-05, "loss": 0.5854, "step": 400 }, { "epoch": 0.24552281917966493, "grad_norm": 31.469331741333008, "learning_rate": 4.028846153846154e-05, "loss": 0.5403, "step": 425 }, { "epoch": 0.25996533795493937, "grad_norm": 8.64493465423584, "learning_rate": 4.269230769230769e-05, "loss": 0.6795, "step": 450 }, { "epoch": 0.27440785673021373, "grad_norm": 7.3773393630981445, "learning_rate": 4.509615384615385e-05, "loss": 0.5801, "step": 475 }, { "epoch": 0.28885037550548814, "grad_norm": 11.987143516540527, "learning_rate": 4.75e-05, "loss": 0.508, "step": 500 }, { "epoch": 0.30329289428076256, "grad_norm": 7.039488792419434, "learning_rate": 4.9903846153846154e-05, "loss": 0.5415, "step": 525 }, { "epoch": 0.317735413056037, "grad_norm": 13.371904373168945, "learning_rate": 4.9743205649475715e-05, "loss": 0.5646, "step": 550 }, { "epoch": 0.3321779318313114, "grad_norm": 32.50100326538086, "learning_rate": 4.947571153434625e-05, "loss": 0.7043, "step": 575 }, { "epoch": 0.3466204506065858, "grad_norm": 25.765533447265625, "learning_rate": 4.920821741921678e-05, "loss": 0.5854, "step": 600 }, { "epoch": 0.3610629693818602, "grad_norm": 33.29277420043945, "learning_rate": 4.894072330408731e-05, "loss": 0.7418, "step": 625 }, { "epoch": 0.3755054881571346, "grad_norm": 23.995582580566406, "learning_rate": 4.8673229188957844e-05, "loss": 0.7084, "step": 650 }, { "epoch": 0.389948006932409, "grad_norm": 24.641578674316406, "learning_rate": 4.840573507382838e-05, "loss": 0.6657, "step": 675 }, { "epoch": 0.4043905257076834, "grad_norm": 16.46844482421875, "learning_rate": 4.813824095869891e-05, "loss": 0.4334, "step": 700 }, { "epoch": 0.41883304448295783, "grad_norm": 15.955053329467773, "learning_rate": 4.7870746843569445e-05, "loss": 0.4989, "step": 725 }, { "epoch": 0.43327556325823224, "grad_norm": 20.922630310058594, "learning_rate": 4.760325272843998e-05, "loss": 0.5395, "step": 750 }, { "epoch": 0.44771808203350666, "grad_norm": 33.461490631103516, "learning_rate": 4.733575861331051e-05, "loss": 0.7912, "step": 775 }, { "epoch": 0.4621606008087811, "grad_norm": 28.123592376708984, "learning_rate": 4.7068264498181045e-05, "loss": 0.7634, "step": 800 }, { "epoch": 0.47660311958405543, "grad_norm": 6.648211479187012, "learning_rate": 4.6800770383051574e-05, "loss": 0.5912, "step": 825 }, { "epoch": 0.49104563835932985, "grad_norm": 6.500886917114258, "learning_rate": 4.653327626792211e-05, "loss": 0.595, "step": 850 }, { "epoch": 0.5054881571346043, "grad_norm": 17.182079315185547, "learning_rate": 4.626578215279264e-05, "loss": 0.6416, "step": 875 }, { "epoch": 0.5199306759098787, "grad_norm": 20.7849063873291, "learning_rate": 4.5998288037663175e-05, "loss": 0.4613, "step": 900 }, { "epoch": 0.5343731946851531, "grad_norm": 6.231338977813721, "learning_rate": 4.5730793922533704e-05, "loss": 0.4601, "step": 925 }, { "epoch": 0.5488157134604275, "grad_norm": 37.750099182128906, "learning_rate": 4.546329980740424e-05, "loss": 0.6588, "step": 950 }, { "epoch": 0.5632582322357019, "grad_norm": 13.106170654296875, "learning_rate": 4.5195805692274775e-05, "loss": 0.6352, "step": 975 }, { "epoch": 0.5777007510109763, "grad_norm": 12.079797744750977, "learning_rate": 4.4928311577145304e-05, "loss": 0.6447, "step": 1000 }, { "epoch": 0.5921432697862508, "grad_norm": 14.380491256713867, "learning_rate": 4.466081746201583e-05, "loss": 0.4655, "step": 1025 }, { "epoch": 0.6065857885615251, "grad_norm": 18.562135696411133, "learning_rate": 4.4393323346886376e-05, "loss": 0.4547, "step": 1050 }, { "epoch": 0.6210283073367996, "grad_norm": 22.869455337524414, "learning_rate": 4.4125829231756905e-05, "loss": 0.5574, "step": 1075 }, { "epoch": 0.635470826112074, "grad_norm": 15.650333404541016, "learning_rate": 4.3858335116627433e-05, "loss": 0.5149, "step": 1100 }, { "epoch": 0.6499133448873483, "grad_norm": 19.41951560974121, "learning_rate": 4.359084100149797e-05, "loss": 0.5395, "step": 1125 }, { "epoch": 0.6643558636626228, "grad_norm": 14.825141906738281, "learning_rate": 4.3323346886368505e-05, "loss": 0.3763, "step": 1150 }, { "epoch": 0.6787983824378971, "grad_norm": 10.986475944519043, "learning_rate": 4.3055852771239034e-05, "loss": 0.383, "step": 1175 }, { "epoch": 0.6932409012131716, "grad_norm": 6.805363655090332, "learning_rate": 4.278835865610957e-05, "loss": 0.4967, "step": 1200 }, { "epoch": 0.707683419988446, "grad_norm": 9.07087230682373, "learning_rate": 4.25208645409801e-05, "loss": 0.4451, "step": 1225 }, { "epoch": 0.7221259387637204, "grad_norm": 15.876053810119629, "learning_rate": 4.2253370425850634e-05, "loss": 0.4178, "step": 1250 }, { "epoch": 0.7365684575389948, "grad_norm": 10.015765190124512, "learning_rate": 4.198587631072117e-05, "loss": 0.5196, "step": 1275 }, { "epoch": 0.7510109763142692, "grad_norm": 14.343164443969727, "learning_rate": 4.17183821955917e-05, "loss": 0.3987, "step": 1300 }, { "epoch": 0.7654534950895436, "grad_norm": 7.555254936218262, "learning_rate": 4.145088808046223e-05, "loss": 0.4268, "step": 1325 }, { "epoch": 0.779896013864818, "grad_norm": 19.02402114868164, "learning_rate": 4.1183393965332764e-05, "loss": 0.5197, "step": 1350 }, { "epoch": 0.7943385326400925, "grad_norm": 17.702478408813477, "learning_rate": 4.09158998502033e-05, "loss": 0.3667, "step": 1375 }, { "epoch": 0.8087810514153668, "grad_norm": 14.175180435180664, "learning_rate": 4.064840573507383e-05, "loss": 0.4353, "step": 1400 }, { "epoch": 0.8232235701906413, "grad_norm": 11.903855323791504, "learning_rate": 4.0380911619944364e-05, "loss": 0.5338, "step": 1425 }, { "epoch": 0.8376660889659157, "grad_norm": 13.651313781738281, "learning_rate": 4.01134175048149e-05, "loss": 0.4009, "step": 1450 }, { "epoch": 0.85210860774119, "grad_norm": 5.367980480194092, "learning_rate": 3.984592338968543e-05, "loss": 0.5129, "step": 1475 }, { "epoch": 0.8665511265164645, "grad_norm": 18.119848251342773, "learning_rate": 3.957842927455596e-05, "loss": 0.4251, "step": 1500 }, { "epoch": 0.8809936452917388, "grad_norm": 8.821969032287598, "learning_rate": 3.9310935159426494e-05, "loss": 0.3965, "step": 1525 }, { "epoch": 0.8954361640670133, "grad_norm": 5.4203782081604, "learning_rate": 3.904344104429703e-05, "loss": 0.427, "step": 1550 }, { "epoch": 0.9098786828422877, "grad_norm": 10.101963996887207, "learning_rate": 3.877594692916756e-05, "loss": 0.3891, "step": 1575 }, { "epoch": 0.9243212016175621, "grad_norm": 5.893563747406006, "learning_rate": 3.8508452814038094e-05, "loss": 0.4165, "step": 1600 }, { "epoch": 0.9387637203928365, "grad_norm": 6.127294540405273, "learning_rate": 3.8240958698908623e-05, "loss": 0.5194, "step": 1625 }, { "epoch": 0.9532062391681109, "grad_norm": 13.313063621520996, "learning_rate": 3.797346458377916e-05, "loss": 0.4108, "step": 1650 }, { "epoch": 0.9676487579433853, "grad_norm": 14.13697624206543, "learning_rate": 3.7705970468649695e-05, "loss": 0.4612, "step": 1675 }, { "epoch": 0.9820912767186597, "grad_norm": 8.540029525756836, "learning_rate": 3.7438476353520224e-05, "loss": 0.4081, "step": 1700 }, { "epoch": 0.9965337954939342, "grad_norm": 15.01349925994873, "learning_rate": 3.717098223839075e-05, "loss": 0.4559, "step": 1725 }, { "epoch": 1.0, "eval_explained_variance": 0.6565504670143127, "eval_loss": 0.5362390279769897, "eval_mae": 0.5734534859657288, "eval_mse": 0.5362390279769897, "eval_r2": 0.5139721769346014, "eval_rmse": 0.7322834134101868, "eval_runtime": 21.5346, "eval_samples_per_second": 160.765, "eval_steps_per_second": 10.077, "step": 1731 }, { "epoch": 1.0109763142692085, "grad_norm": 21.412073135375977, "learning_rate": 3.6903488123261295e-05, "loss": 0.4508, "step": 1750 }, { "epoch": 1.025418833044483, "grad_norm": 8.56926155090332, "learning_rate": 3.6635994008131824e-05, "loss": 0.4829, "step": 1775 }, { "epoch": 1.0398613518197575, "grad_norm": 9.545137405395508, "learning_rate": 3.6368499893002353e-05, "loss": 0.4499, "step": 1800 }, { "epoch": 1.0543038705950318, "grad_norm": 27.51245880126953, "learning_rate": 3.610100577787289e-05, "loss": 0.4109, "step": 1825 }, { "epoch": 1.0687463893703062, "grad_norm": 12.80823802947998, "learning_rate": 3.583351166274342e-05, "loss": 0.3662, "step": 1850 }, { "epoch": 1.0831889081455806, "grad_norm": 7.883637428283691, "learning_rate": 3.5566017547613954e-05, "loss": 0.4184, "step": 1875 }, { "epoch": 1.097631426920855, "grad_norm": 7.406681537628174, "learning_rate": 3.529852343248449e-05, "loss": 0.3439, "step": 1900 }, { "epoch": 1.1120739456961295, "grad_norm": 19.036779403686523, "learning_rate": 3.503102931735502e-05, "loss": 0.4009, "step": 1925 }, { "epoch": 1.1265164644714039, "grad_norm": 8.52979850769043, "learning_rate": 3.476353520222555e-05, "loss": 0.4397, "step": 1950 }, { "epoch": 1.1409589832466782, "grad_norm": 16.250051498413086, "learning_rate": 3.449604108709609e-05, "loss": 0.4651, "step": 1975 }, { "epoch": 1.1554015020219526, "grad_norm": 5.9189300537109375, "learning_rate": 3.422854697196662e-05, "loss": 0.4419, "step": 2000 }, { "epoch": 1.169844020797227, "grad_norm": 11.332290649414062, "learning_rate": 3.396105285683715e-05, "loss": 0.3755, "step": 2025 }, { "epoch": 1.1842865395725015, "grad_norm": 9.792673110961914, "learning_rate": 3.3693558741707684e-05, "loss": 0.358, "step": 2050 }, { "epoch": 1.1987290583477759, "grad_norm": 14.335423469543457, "learning_rate": 3.342606462657822e-05, "loss": 0.3512, "step": 2075 }, { "epoch": 1.2131715771230502, "grad_norm": 9.749696731567383, "learning_rate": 3.315857051144875e-05, "loss": 0.4072, "step": 2100 }, { "epoch": 1.2276140958983246, "grad_norm": 9.317971229553223, "learning_rate": 3.2891076396319284e-05, "loss": 0.4359, "step": 2125 }, { "epoch": 1.242056614673599, "grad_norm": 14.866842269897461, "learning_rate": 3.262358228118981e-05, "loss": 0.4314, "step": 2150 }, { "epoch": 1.2564991334488735, "grad_norm": 6.312429428100586, "learning_rate": 3.235608816606035e-05, "loss": 0.3819, "step": 2175 }, { "epoch": 1.270941652224148, "grad_norm": 5.175512313842773, "learning_rate": 3.208859405093088e-05, "loss": 0.4469, "step": 2200 }, { "epoch": 1.2853841709994223, "grad_norm": 16.6768856048584, "learning_rate": 3.1821099935801414e-05, "loss": 0.4156, "step": 2225 }, { "epoch": 1.2998266897746968, "grad_norm": 5.419372081756592, "learning_rate": 3.155360582067194e-05, "loss": 0.3966, "step": 2250 }, { "epoch": 1.314269208549971, "grad_norm": 9.6641263961792, "learning_rate": 3.128611170554248e-05, "loss": 0.3622, "step": 2275 }, { "epoch": 1.3287117273252456, "grad_norm": 11.433446884155273, "learning_rate": 3.1018617590413014e-05, "loss": 0.5056, "step": 2300 }, { "epoch": 1.34315424610052, "grad_norm": 8.54787540435791, "learning_rate": 3.075112347528354e-05, "loss": 0.3228, "step": 2325 }, { "epoch": 1.3575967648757943, "grad_norm": 24.707653045654297, "learning_rate": 3.0483629360154076e-05, "loss": 0.3755, "step": 2350 }, { "epoch": 1.3720392836510689, "grad_norm": 13.02287483215332, "learning_rate": 3.021613524502461e-05, "loss": 0.313, "step": 2375 }, { "epoch": 1.3864818024263432, "grad_norm": 7.0347771644592285, "learning_rate": 2.9948641129895144e-05, "loss": 0.3618, "step": 2400 }, { "epoch": 1.4009243212016176, "grad_norm": 3.799116611480713, "learning_rate": 2.9681147014765676e-05, "loss": 0.3338, "step": 2425 }, { "epoch": 1.415366839976892, "grad_norm": 10.154156684875488, "learning_rate": 2.941365289963621e-05, "loss": 0.3184, "step": 2450 }, { "epoch": 1.4298093587521663, "grad_norm": 22.4088191986084, "learning_rate": 2.9146158784506744e-05, "loss": 0.3218, "step": 2475 }, { "epoch": 1.4442518775274409, "grad_norm": 5.571261405944824, "learning_rate": 2.8878664669377277e-05, "loss": 0.4167, "step": 2500 }, { "epoch": 1.4586943963027152, "grad_norm": 10.851147651672363, "learning_rate": 2.8611170554247806e-05, "loss": 0.3881, "step": 2525 }, { "epoch": 1.4731369150779896, "grad_norm": 30.00836181640625, "learning_rate": 2.8343676439118338e-05, "loss": 0.3706, "step": 2550 }, { "epoch": 1.487579433853264, "grad_norm": 12.909472465515137, "learning_rate": 2.8076182323988874e-05, "loss": 0.4325, "step": 2575 }, { "epoch": 1.5020219526285383, "grad_norm": 10.231127738952637, "learning_rate": 2.7808688208859406e-05, "loss": 0.3357, "step": 2600 }, { "epoch": 1.516464471403813, "grad_norm": 7.157652378082275, "learning_rate": 2.754119409372994e-05, "loss": 0.3927, "step": 2625 }, { "epoch": 1.5309069901790873, "grad_norm": 13.10181999206543, "learning_rate": 2.727369997860047e-05, "loss": 0.3189, "step": 2650 }, { "epoch": 1.5453495089543616, "grad_norm": 12.194095611572266, "learning_rate": 2.7006205863471007e-05, "loss": 0.4226, "step": 2675 }, { "epoch": 1.5597920277296362, "grad_norm": 18.289899826049805, "learning_rate": 2.673871174834154e-05, "loss": 0.2865, "step": 2700 }, { "epoch": 1.5742345465049103, "grad_norm": 4.3070068359375, "learning_rate": 2.647121763321207e-05, "loss": 0.3107, "step": 2725 }, { "epoch": 1.588677065280185, "grad_norm": 26.21879768371582, "learning_rate": 2.62037235180826e-05, "loss": 0.3243, "step": 2750 }, { "epoch": 1.6031195840554593, "grad_norm": 8.495038986206055, "learning_rate": 2.593622940295314e-05, "loss": 0.3285, "step": 2775 }, { "epoch": 1.6175621028307337, "grad_norm": 17.74176788330078, "learning_rate": 2.566873528782367e-05, "loss": 0.3819, "step": 2800 }, { "epoch": 1.6320046216060082, "grad_norm": 8.67226505279541, "learning_rate": 2.54012411726942e-05, "loss": 0.4388, "step": 2825 }, { "epoch": 1.6464471403812824, "grad_norm": 9.305310249328613, "learning_rate": 2.5133747057564733e-05, "loss": 0.3801, "step": 2850 }, { "epoch": 1.660889659156557, "grad_norm": 16.156944274902344, "learning_rate": 2.4866252942435266e-05, "loss": 0.337, "step": 2875 }, { "epoch": 1.6753321779318313, "grad_norm": 18.950183868408203, "learning_rate": 2.45987588273058e-05, "loss": 0.3913, "step": 2900 }, { "epoch": 1.6897746967071057, "grad_norm": 4.8534321784973145, "learning_rate": 2.4331264712176334e-05, "loss": 0.3677, "step": 2925 }, { "epoch": 1.7042172154823803, "grad_norm": 7.860241413116455, "learning_rate": 2.4063770597046866e-05, "loss": 0.3495, "step": 2950 }, { "epoch": 1.7186597342576544, "grad_norm": 10.027009010314941, "learning_rate": 2.37962764819174e-05, "loss": 0.3787, "step": 2975 }, { "epoch": 1.733102253032929, "grad_norm": 13.39957046508789, "learning_rate": 2.352878236678793e-05, "loss": 0.3064, "step": 3000 }, { "epoch": 1.7475447718082033, "grad_norm": 8.104743957519531, "learning_rate": 2.3261288251658463e-05, "loss": 0.3703, "step": 3025 }, { "epoch": 1.7619872905834777, "grad_norm": 7.085102558135986, "learning_rate": 2.2993794136529e-05, "loss": 0.3488, "step": 3050 }, { "epoch": 1.7764298093587523, "grad_norm": 8.273953437805176, "learning_rate": 2.272630002139953e-05, "loss": 0.3574, "step": 3075 }, { "epoch": 1.7908723281340264, "grad_norm": 5.399058818817139, "learning_rate": 2.2458805906270064e-05, "loss": 0.3699, "step": 3100 }, { "epoch": 1.805314846909301, "grad_norm": 25.818262100219727, "learning_rate": 2.2191311791140596e-05, "loss": 0.2714, "step": 3125 }, { "epoch": 1.8197573656845754, "grad_norm": 8.441669464111328, "learning_rate": 2.192381767601113e-05, "loss": 0.3225, "step": 3150 }, { "epoch": 1.8341998844598497, "grad_norm": 3.318145751953125, "learning_rate": 2.165632356088166e-05, "loss": 0.3359, "step": 3175 }, { "epoch": 1.8486424032351243, "grad_norm": 3.700218439102173, "learning_rate": 2.1388829445752197e-05, "loss": 0.3138, "step": 3200 }, { "epoch": 1.8630849220103987, "grad_norm": 4.9609246253967285, "learning_rate": 2.1121335330622726e-05, "loss": 0.3429, "step": 3225 }, { "epoch": 1.877527440785673, "grad_norm": 11.287262916564941, "learning_rate": 2.085384121549326e-05, "loss": 0.3926, "step": 3250 }, { "epoch": 1.8919699595609474, "grad_norm": 13.642833709716797, "learning_rate": 2.0586347100363794e-05, "loss": 0.3484, "step": 3275 }, { "epoch": 1.9064124783362217, "grad_norm": 5.669510364532471, "learning_rate": 2.0318852985234326e-05, "loss": 0.3307, "step": 3300 }, { "epoch": 1.9208549971114963, "grad_norm": 11.987211227416992, "learning_rate": 2.005135887010486e-05, "loss": 0.2979, "step": 3325 }, { "epoch": 1.9352975158867707, "grad_norm": 8.090258598327637, "learning_rate": 1.978386475497539e-05, "loss": 0.2786, "step": 3350 }, { "epoch": 1.949740034662045, "grad_norm": 5.356060028076172, "learning_rate": 1.9516370639845923e-05, "loss": 0.281, "step": 3375 }, { "epoch": 1.9641825534373196, "grad_norm": 9.354238510131836, "learning_rate": 1.924887652471646e-05, "loss": 0.2761, "step": 3400 }, { "epoch": 1.9786250722125938, "grad_norm": 12.076613426208496, "learning_rate": 1.8981382409586988e-05, "loss": 0.3394, "step": 3425 }, { "epoch": 1.9930675909878683, "grad_norm": 6.9947428703308105, "learning_rate": 1.8713888294457524e-05, "loss": 0.3652, "step": 3450 }, { "epoch": 2.0, "eval_explained_variance": 0.700911283493042, "eval_loss": 0.3305796980857849, "eval_mae": 0.44259902834892273, "eval_mse": 0.3305796980857849, "eval_r2": 0.7003744220771353, "eval_rmse": 0.5749605894088745, "eval_runtime": 21.4982, "eval_samples_per_second": 161.037, "eval_steps_per_second": 10.094, "step": 3462 } ], "logging_steps": 25, "max_steps": 5193, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7285479708948480.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }