{ "best_metric": 0.34081029891967773, "best_model_checkpoint": "autotrain-99xmg-r4pqr/checkpoint-3462", "epoch": 2.0, "eval_steps": 500, "global_step": 3462, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014442518775274409, "grad_norm": 109.87998962402344, "learning_rate": 2.0192307692307692e-06, "loss": 6.024, "step": 25 }, { "epoch": 0.028885037550548817, "grad_norm": 42.21332550048828, "learning_rate": 4.423076923076924e-06, "loss": 4.159, "step": 50 }, { "epoch": 0.043327556325823226, "grad_norm": 73.7143325805664, "learning_rate": 6.826923076923076e-06, "loss": 1.5598, "step": 75 }, { "epoch": 0.057770075101097634, "grad_norm": 22.827505111694336, "learning_rate": 9.230769230769232e-06, "loss": 1.0843, "step": 100 }, { "epoch": 0.07221259387637204, "grad_norm": 10.978221893310547, "learning_rate": 1.1634615384615386e-05, "loss": 0.8028, "step": 125 }, { "epoch": 0.08665511265164645, "grad_norm": 18.381181716918945, "learning_rate": 1.403846153846154e-05, "loss": 0.5757, "step": 150 }, { "epoch": 0.10109763142692085, "grad_norm": 26.028202056884766, "learning_rate": 1.6442307692307695e-05, "loss": 0.5391, "step": 175 }, { "epoch": 0.11554015020219527, "grad_norm": 11.943533897399902, "learning_rate": 1.8846153846153846e-05, "loss": 0.7264, "step": 200 }, { "epoch": 0.12998266897746968, "grad_norm": 27.598697662353516, "learning_rate": 2.125e-05, "loss": 0.5976, "step": 225 }, { "epoch": 0.14442518775274407, "grad_norm": 2.4630095958709717, "learning_rate": 2.3653846153846153e-05, "loss": 0.436, "step": 250 }, { "epoch": 0.1588677065280185, "grad_norm": 24.022457122802734, "learning_rate": 2.6057692307692312e-05, "loss": 0.7163, "step": 275 }, { "epoch": 0.1733102253032929, "grad_norm": 9.978269577026367, "learning_rate": 2.846153846153846e-05, "loss": 0.6614, "step": 300 }, { "epoch": 0.1877527440785673, "grad_norm": 23.359638214111328, "learning_rate": 3.0865384615384616e-05, "loss": 0.6557, "step": 325 }, { "epoch": 0.2021952628538417, "grad_norm": 22.752307891845703, "learning_rate": 3.326923076923077e-05, "loss": 0.6954, "step": 350 }, { "epoch": 0.21663778162911612, "grad_norm": 4.690310001373291, "learning_rate": 3.5673076923076926e-05, "loss": 0.7825, "step": 375 }, { "epoch": 0.23108030040439054, "grad_norm": 20.00984764099121, "learning_rate": 3.807692307692308e-05, "loss": 0.836, "step": 400 }, { "epoch": 0.24552281917966493, "grad_norm": 25.91275978088379, "learning_rate": 4.0480769230769236e-05, "loss": 0.569, "step": 425 }, { "epoch": 0.25996533795493937, "grad_norm": 15.184056282043457, "learning_rate": 4.288461538461538e-05, "loss": 0.6616, "step": 450 }, { "epoch": 0.27440785673021373, "grad_norm": 4.631957530975342, "learning_rate": 4.528846153846154e-05, "loss": 0.5425, "step": 475 }, { "epoch": 0.28885037550548814, "grad_norm": 6.141411781311035, "learning_rate": 4.76923076923077e-05, "loss": 0.5024, "step": 500 }, { "epoch": 0.30329289428076256, "grad_norm": 4.359855651855469, "learning_rate": 4.9989300235394826e-05, "loss": 0.603, "step": 525 }, { "epoch": 0.317735413056037, "grad_norm": 10.599994659423828, "learning_rate": 4.9721806120265355e-05, "loss": 0.5994, "step": 550 }, { "epoch": 0.3321779318313114, "grad_norm": 44.90914535522461, "learning_rate": 4.945431200513589e-05, "loss": 0.8399, "step": 575 }, { "epoch": 0.3466204506065858, "grad_norm": 5.233628273010254, "learning_rate": 4.918681789000642e-05, "loss": 0.5791, "step": 600 }, { "epoch": 0.3610629693818602, "grad_norm": 21.001466751098633, "learning_rate": 4.8919323774876955e-05, "loss": 0.6869, "step": 625 }, { "epoch": 0.3755054881571346, "grad_norm": 17.496326446533203, "learning_rate": 4.865182965974749e-05, "loss": 0.5309, "step": 650 }, { "epoch": 0.389948006932409, "grad_norm": 16.32991600036621, "learning_rate": 4.838433554461802e-05, "loss": 0.7183, "step": 675 }, { "epoch": 0.4043905257076834, "grad_norm": 7.430429935455322, "learning_rate": 4.811684142948855e-05, "loss": 0.3932, "step": 700 }, { "epoch": 0.41883304448295783, "grad_norm": 7.165083885192871, "learning_rate": 4.784934731435909e-05, "loss": 0.5113, "step": 725 }, { "epoch": 0.43327556325823224, "grad_norm": 10.602783203125, "learning_rate": 4.758185319922962e-05, "loss": 0.5259, "step": 750 }, { "epoch": 0.44771808203350666, "grad_norm": 22.993024826049805, "learning_rate": 4.731435908410015e-05, "loss": 0.678, "step": 775 }, { "epoch": 0.4621606008087811, "grad_norm": 17.77105712890625, "learning_rate": 4.7046864968970685e-05, "loss": 0.6262, "step": 800 }, { "epoch": 0.47660311958405543, "grad_norm": 6.010850429534912, "learning_rate": 4.677937085384122e-05, "loss": 0.5537, "step": 825 }, { "epoch": 0.49104563835932985, "grad_norm": 10.288942337036133, "learning_rate": 4.651187673871175e-05, "loss": 0.5963, "step": 850 }, { "epoch": 0.5054881571346043, "grad_norm": 20.071813583374023, "learning_rate": 4.6244382623582286e-05, "loss": 0.5735, "step": 875 }, { "epoch": 0.5199306759098787, "grad_norm": 5.898088455200195, "learning_rate": 4.5976888508452815e-05, "loss": 0.4843, "step": 900 }, { "epoch": 0.5343731946851531, "grad_norm": 3.5588929653167725, "learning_rate": 4.570939439332335e-05, "loss": 0.4247, "step": 925 }, { "epoch": 0.5488157134604275, "grad_norm": 37.71608352661133, "learning_rate": 4.544190027819388e-05, "loss": 0.6862, "step": 950 }, { "epoch": 0.5632582322357019, "grad_norm": 11.169629096984863, "learning_rate": 4.5174406163064415e-05, "loss": 0.5932, "step": 975 }, { "epoch": 0.5777007510109763, "grad_norm": 6.53275203704834, "learning_rate": 4.4906912047934944e-05, "loss": 0.5161, "step": 1000 }, { "epoch": 0.5921432697862508, "grad_norm": 21.197736740112305, "learning_rate": 4.463941793280548e-05, "loss": 0.4312, "step": 1025 }, { "epoch": 0.6065857885615251, "grad_norm": 16.153993606567383, "learning_rate": 4.4371923817676016e-05, "loss": 0.438, "step": 1050 }, { "epoch": 0.6210283073367996, "grad_norm": 21.46822166442871, "learning_rate": 4.4104429702546545e-05, "loss": 0.4343, "step": 1075 }, { "epoch": 0.635470826112074, "grad_norm": 18.194324493408203, "learning_rate": 4.3836935587417074e-05, "loss": 0.4935, "step": 1100 }, { "epoch": 0.6499133448873483, "grad_norm": 11.8253173828125, "learning_rate": 4.3569441472287616e-05, "loss": 0.509, "step": 1125 }, { "epoch": 0.6643558636626228, "grad_norm": 22.477100372314453, "learning_rate": 4.3301947357158145e-05, "loss": 0.3698, "step": 1150 }, { "epoch": 0.6787983824378971, "grad_norm": 10.39338493347168, "learning_rate": 4.3034453242028674e-05, "loss": 0.4084, "step": 1175 }, { "epoch": 0.6932409012131716, "grad_norm": 4.328056335449219, "learning_rate": 4.276695912689921e-05, "loss": 0.4137, "step": 1200 }, { "epoch": 0.707683419988446, "grad_norm": 7.7573628425598145, "learning_rate": 4.2499465011769746e-05, "loss": 0.4974, "step": 1225 }, { "epoch": 0.7221259387637204, "grad_norm": 27.289525985717773, "learning_rate": 4.2231970896640275e-05, "loss": 0.4999, "step": 1250 }, { "epoch": 0.7365684575389948, "grad_norm": 9.000728607177734, "learning_rate": 4.196447678151081e-05, "loss": 0.4854, "step": 1275 }, { "epoch": 0.7510109763142692, "grad_norm": 9.249234199523926, "learning_rate": 4.169698266638134e-05, "loss": 0.43, "step": 1300 }, { "epoch": 0.7654534950895436, "grad_norm": 10.179132461547852, "learning_rate": 4.1429488551251875e-05, "loss": 0.3916, "step": 1325 }, { "epoch": 0.779896013864818, "grad_norm": 8.030287742614746, "learning_rate": 4.116199443612241e-05, "loss": 0.5957, "step": 1350 }, { "epoch": 0.7943385326400925, "grad_norm": 10.439188003540039, "learning_rate": 4.089450032099294e-05, "loss": 0.3744, "step": 1375 }, { "epoch": 0.8087810514153668, "grad_norm": 14.759039878845215, "learning_rate": 4.062700620586347e-05, "loss": 0.406, "step": 1400 }, { "epoch": 0.8232235701906413, "grad_norm": 5.672801494598389, "learning_rate": 4.035951209073401e-05, "loss": 0.5122, "step": 1425 }, { "epoch": 0.8376660889659157, "grad_norm": 5.283393383026123, "learning_rate": 4.009201797560454e-05, "loss": 0.3474, "step": 1450 }, { "epoch": 0.85210860774119, "grad_norm": 4.710180282592773, "learning_rate": 3.982452386047507e-05, "loss": 0.4959, "step": 1475 }, { "epoch": 0.8665511265164645, "grad_norm": 13.148910522460938, "learning_rate": 3.9557029745345605e-05, "loss": 0.4234, "step": 1500 }, { "epoch": 0.8809936452917388, "grad_norm": 2.738633155822754, "learning_rate": 3.9289535630216134e-05, "loss": 0.4548, "step": 1525 }, { "epoch": 0.8954361640670133, "grad_norm": 5.08731746673584, "learning_rate": 3.902204151508667e-05, "loss": 0.486, "step": 1550 }, { "epoch": 0.9098786828422877, "grad_norm": 16.38525390625, "learning_rate": 3.8754547399957206e-05, "loss": 0.4189, "step": 1575 }, { "epoch": 0.9243212016175621, "grad_norm": 8.546934127807617, "learning_rate": 3.8487053284827735e-05, "loss": 0.4164, "step": 1600 }, { "epoch": 0.9387637203928365, "grad_norm": 16.895854949951172, "learning_rate": 3.8219559169698264e-05, "loss": 0.5782, "step": 1625 }, { "epoch": 0.9532062391681109, "grad_norm": 18.25130271911621, "learning_rate": 3.79520650545688e-05, "loss": 0.3899, "step": 1650 }, { "epoch": 0.9676487579433853, "grad_norm": 8.933642387390137, "learning_rate": 3.7684570939439335e-05, "loss": 0.4484, "step": 1675 }, { "epoch": 0.9820912767186597, "grad_norm": 2.895547389984131, "learning_rate": 3.7417076824309864e-05, "loss": 0.3807, "step": 1700 }, { "epoch": 0.9965337954939342, "grad_norm": 6.855337619781494, "learning_rate": 3.71495827091804e-05, "loss": 0.5451, "step": 1725 }, { "epoch": 1.0, "eval_explained_variance": 0.642084002494812, "eval_loss": 0.4517485201358795, "eval_mae": 0.5261725783348083, "eval_mse": 0.4517485201358795, "eval_r2": 0.590551334956607, "eval_rmse": 0.6721224188804626, "eval_runtime": 22.0997, "eval_samples_per_second": 156.654, "eval_steps_per_second": 9.819, "step": 1731 }, { "epoch": 1.0109763142692085, "grad_norm": 20.85631561279297, "learning_rate": 3.6882088594050936e-05, "loss": 0.4845, "step": 1750 }, { "epoch": 1.025418833044483, "grad_norm": 14.387726783752441, "learning_rate": 3.6614594478921465e-05, "loss": 0.3949, "step": 1775 }, { "epoch": 1.0398613518197575, "grad_norm": 12.336610794067383, "learning_rate": 3.6347100363791994e-05, "loss": 0.3903, "step": 1800 }, { "epoch": 1.0543038705950318, "grad_norm": 12.857579231262207, "learning_rate": 3.607960624866253e-05, "loss": 0.3625, "step": 1825 }, { "epoch": 1.0687463893703062, "grad_norm": 3.4398610591888428, "learning_rate": 3.5812112133533065e-05, "loss": 0.3276, "step": 1850 }, { "epoch": 1.0831889081455806, "grad_norm": 18.700855255126953, "learning_rate": 3.5544618018403594e-05, "loss": 0.4301, "step": 1875 }, { "epoch": 1.097631426920855, "grad_norm": 3.7766644954681396, "learning_rate": 3.527712390327413e-05, "loss": 0.3571, "step": 1900 }, { "epoch": 1.1120739456961295, "grad_norm": 13.447446823120117, "learning_rate": 3.500962978814466e-05, "loss": 0.3691, "step": 1925 }, { "epoch": 1.1265164644714039, "grad_norm": 6.039691925048828, "learning_rate": 3.4742135673015195e-05, "loss": 0.3966, "step": 1950 }, { "epoch": 1.1409589832466782, "grad_norm": 9.641484260559082, "learning_rate": 3.447464155788573e-05, "loss": 0.4171, "step": 1975 }, { "epoch": 1.1554015020219526, "grad_norm": 8.735544204711914, "learning_rate": 3.420714744275626e-05, "loss": 0.4154, "step": 2000 }, { "epoch": 1.169844020797227, "grad_norm": 10.355854034423828, "learning_rate": 3.393965332762679e-05, "loss": 0.3968, "step": 2025 }, { "epoch": 1.1842865395725015, "grad_norm": 10.740571022033691, "learning_rate": 3.367215921249733e-05, "loss": 0.343, "step": 2050 }, { "epoch": 1.1987290583477759, "grad_norm": 3.9777560234069824, "learning_rate": 3.340466509736786e-05, "loss": 0.3848, "step": 2075 }, { "epoch": 1.2131715771230502, "grad_norm": 8.958410263061523, "learning_rate": 3.313717098223839e-05, "loss": 0.4381, "step": 2100 }, { "epoch": 1.2276140958983246, "grad_norm": 7.341928005218506, "learning_rate": 3.2869676867108925e-05, "loss": 0.4134, "step": 2125 }, { "epoch": 1.242056614673599, "grad_norm": 8.02366828918457, "learning_rate": 3.260218275197946e-05, "loss": 0.4799, "step": 2150 }, { "epoch": 1.2564991334488735, "grad_norm": 5.73162841796875, "learning_rate": 3.233468863684999e-05, "loss": 0.3479, "step": 2175 }, { "epoch": 1.270941652224148, "grad_norm": 10.025665283203125, "learning_rate": 3.2067194521720525e-05, "loss": 0.4987, "step": 2200 }, { "epoch": 1.2853841709994223, "grad_norm": 15.222429275512695, "learning_rate": 3.1799700406591054e-05, "loss": 0.4249, "step": 2225 }, { "epoch": 1.2998266897746968, "grad_norm": 6.472146987915039, "learning_rate": 3.153220629146159e-05, "loss": 0.4069, "step": 2250 }, { "epoch": 1.314269208549971, "grad_norm": 2.59798264503479, "learning_rate": 3.1264712176332126e-05, "loss": 0.4369, "step": 2275 }, { "epoch": 1.3287117273252456, "grad_norm": 6.89563512802124, "learning_rate": 3.0997218061202655e-05, "loss": 0.4497, "step": 2300 }, { "epoch": 1.34315424610052, "grad_norm": 7.411588668823242, "learning_rate": 3.0729723946073184e-05, "loss": 0.3562, "step": 2325 }, { "epoch": 1.3575967648757943, "grad_norm": 21.604717254638672, "learning_rate": 3.0462229830943723e-05, "loss": 0.3741, "step": 2350 }, { "epoch": 1.3720392836510689, "grad_norm": 6.267892360687256, "learning_rate": 3.0194735715814255e-05, "loss": 0.3237, "step": 2375 }, { "epoch": 1.3864818024263432, "grad_norm": 17.0023250579834, "learning_rate": 2.9927241600684784e-05, "loss": 0.3928, "step": 2400 }, { "epoch": 1.4009243212016176, "grad_norm": 3.766000509262085, "learning_rate": 2.9659747485555317e-05, "loss": 0.3341, "step": 2425 }, { "epoch": 1.415366839976892, "grad_norm": 16.134517669677734, "learning_rate": 2.9392253370425852e-05, "loss": 0.3264, "step": 2450 }, { "epoch": 1.4298093587521663, "grad_norm": 15.79883861541748, "learning_rate": 2.9124759255296385e-05, "loss": 0.3201, "step": 2475 }, { "epoch": 1.4442518775274409, "grad_norm": 10.560098648071289, "learning_rate": 2.8857265140166917e-05, "loss": 0.3848, "step": 2500 }, { "epoch": 1.4586943963027152, "grad_norm": 24.152523040771484, "learning_rate": 2.858977102503745e-05, "loss": 0.4073, "step": 2525 }, { "epoch": 1.4731369150779896, "grad_norm": 15.787476539611816, "learning_rate": 2.8322276909907985e-05, "loss": 0.4058, "step": 2550 }, { "epoch": 1.487579433853264, "grad_norm": 5.4662604331970215, "learning_rate": 2.8054782794778518e-05, "loss": 0.4019, "step": 2575 }, { "epoch": 1.5020219526285383, "grad_norm": 5.6592698097229, "learning_rate": 2.778728867964905e-05, "loss": 0.3481, "step": 2600 }, { "epoch": 1.516464471403813, "grad_norm": 2.2615883350372314, "learning_rate": 2.751979456451958e-05, "loss": 0.3628, "step": 2625 }, { "epoch": 1.5309069901790873, "grad_norm": 12.71334457397461, "learning_rate": 2.7252300449390118e-05, "loss": 0.3562, "step": 2650 }, { "epoch": 1.5453495089543616, "grad_norm": 6.8426408767700195, "learning_rate": 2.6984806334260647e-05, "loss": 0.4108, "step": 2675 }, { "epoch": 1.5597920277296362, "grad_norm": 28.521926879882812, "learning_rate": 2.671731221913118e-05, "loss": 0.3277, "step": 2700 }, { "epoch": 1.5742345465049103, "grad_norm": 5.261911869049072, "learning_rate": 2.6449818104001712e-05, "loss": 0.3287, "step": 2725 }, { "epoch": 1.588677065280185, "grad_norm": 31.970890045166016, "learning_rate": 2.6182323988872248e-05, "loss": 0.3222, "step": 2750 }, { "epoch": 1.6031195840554593, "grad_norm": 11.813735961914062, "learning_rate": 2.591482987374278e-05, "loss": 0.3125, "step": 2775 }, { "epoch": 1.6175621028307337, "grad_norm": 6.268784523010254, "learning_rate": 2.5647335758613312e-05, "loss": 0.3741, "step": 2800 }, { "epoch": 1.6320046216060082, "grad_norm": 7.095576286315918, "learning_rate": 2.537984164348384e-05, "loss": 0.3723, "step": 2825 }, { "epoch": 1.6464471403812824, "grad_norm": 10.156100273132324, "learning_rate": 2.511234752835438e-05, "loss": 0.3499, "step": 2850 }, { "epoch": 1.660889659156557, "grad_norm": 11.29510498046875, "learning_rate": 2.484485341322491e-05, "loss": 0.29, "step": 2875 }, { "epoch": 1.6753321779318313, "grad_norm": 15.906551361083984, "learning_rate": 2.4577359298095442e-05, "loss": 0.37, "step": 2900 }, { "epoch": 1.6897746967071057, "grad_norm": 6.896731853485107, "learning_rate": 2.4309865182965978e-05, "loss": 0.3577, "step": 2925 }, { "epoch": 1.7042172154823803, "grad_norm": 3.6533708572387695, "learning_rate": 2.4042371067836507e-05, "loss": 0.3122, "step": 2950 }, { "epoch": 1.7186597342576544, "grad_norm": 11.97821044921875, "learning_rate": 2.3774876952707042e-05, "loss": 0.3725, "step": 2975 }, { "epoch": 1.733102253032929, "grad_norm": 12.72447395324707, "learning_rate": 2.3507382837577575e-05, "loss": 0.3379, "step": 3000 }, { "epoch": 1.7475447718082033, "grad_norm": 8.284575462341309, "learning_rate": 2.3239888722448107e-05, "loss": 0.3303, "step": 3025 }, { "epoch": 1.7619872905834777, "grad_norm": 7.413320064544678, "learning_rate": 2.297239460731864e-05, "loss": 0.3731, "step": 3050 }, { "epoch": 1.7764298093587523, "grad_norm": 5.650282382965088, "learning_rate": 2.2704900492189175e-05, "loss": 0.3892, "step": 3075 }, { "epoch": 1.7908723281340264, "grad_norm": 4.727357387542725, "learning_rate": 2.2437406377059704e-05, "loss": 0.3721, "step": 3100 }, { "epoch": 1.805314846909301, "grad_norm": 11.350446701049805, "learning_rate": 2.216991226193024e-05, "loss": 0.3263, "step": 3125 }, { "epoch": 1.8197573656845754, "grad_norm": 7.404331684112549, "learning_rate": 2.1902418146800772e-05, "loss": 0.3016, "step": 3150 }, { "epoch": 1.8341998844598497, "grad_norm": 4.940402984619141, "learning_rate": 2.1634924031671305e-05, "loss": 0.3623, "step": 3175 }, { "epoch": 1.8486424032351243, "grad_norm": 5.797356128692627, "learning_rate": 2.1367429916541837e-05, "loss": 0.3448, "step": 3200 }, { "epoch": 1.8630849220103987, "grad_norm": 2.8347394466400146, "learning_rate": 2.109993580141237e-05, "loss": 0.3467, "step": 3225 }, { "epoch": 1.877527440785673, "grad_norm": 6.575459003448486, "learning_rate": 2.0832441686282902e-05, "loss": 0.3782, "step": 3250 }, { "epoch": 1.8919699595609474, "grad_norm": 11.3712797164917, "learning_rate": 2.0564947571153438e-05, "loss": 0.3759, "step": 3275 }, { "epoch": 1.9064124783362217, "grad_norm": 11.227645874023438, "learning_rate": 2.0297453456023967e-05, "loss": 0.3864, "step": 3300 }, { "epoch": 1.9208549971114963, "grad_norm": 24.690820693969727, "learning_rate": 2.0029959340894502e-05, "loss": 0.3361, "step": 3325 }, { "epoch": 1.9352975158867707, "grad_norm": 6.431740760803223, "learning_rate": 1.9762465225765035e-05, "loss": 0.3138, "step": 3350 }, { "epoch": 1.949740034662045, "grad_norm": 6.420506954193115, "learning_rate": 1.9494971110635567e-05, "loss": 0.282, "step": 3375 }, { "epoch": 1.9641825534373196, "grad_norm": 15.711064338684082, "learning_rate": 1.92274769955061e-05, "loss": 0.3228, "step": 3400 }, { "epoch": 1.9786250722125938, "grad_norm": 10.628539085388184, "learning_rate": 1.8959982880376635e-05, "loss": 0.3787, "step": 3425 }, { "epoch": 1.9930675909878683, "grad_norm": 6.874371528625488, "learning_rate": 1.8692488765247164e-05, "loss": 0.3496, "step": 3450 }, { "epoch": 2.0, "eval_explained_variance": 0.6929686665534973, "eval_loss": 0.34081029891967773, "eval_mae": 0.4530330300331116, "eval_mse": 0.3408103287220001, "eval_r2": 0.6911017288906998, "eval_rmse": 0.5837896466255188, "eval_runtime": 22.0954, "eval_samples_per_second": 156.684, "eval_steps_per_second": 9.821, "step": 3462 } ], "logging_steps": 25, "max_steps": 5193, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7285479708948480.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }