diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9540 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4319, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010351966873706004, + "grad_norm": 15.605315613103867, + "learning_rate": 4.71976401179941e-07, + "loss": 0.6774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7991642951965332, + "step": 5, + "valid_targets_mean": 2069.4, + "valid_targets_min": 544 + }, + { + "epoch": 0.020703933747412008, + "grad_norm": 8.863140569141265, + "learning_rate": 1.0619469026548673e-06, + "loss": 0.7208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6838527917861938, + "step": 10, + "valid_targets_mean": 3883.4, + "valid_targets_min": 313 + }, + { + "epoch": 0.031055900621118012, + "grad_norm": 12.629168153366845, + "learning_rate": 1.6519174041297937e-06, + "loss": 0.685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7600278854370117, + "step": 15, + "valid_targets_mean": 2096.7, + "valid_targets_min": 618 + }, + { + "epoch": 0.041407867494824016, + "grad_norm": 6.799624906702634, + "learning_rate": 2.24188790560472e-06, + "loss": 0.6675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6246192455291748, + "step": 20, + "valid_targets_mean": 3201.5, + "valid_targets_min": 693 + }, + { + "epoch": 0.051759834368530024, + "grad_norm": 4.637130666016448, + "learning_rate": 2.831858407079646e-06, + "loss": 0.6722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7706674337387085, + "step": 25, + "valid_targets_mean": 3519.5, + "valid_targets_min": 636 + }, + { + "epoch": 0.062111801242236024, + "grad_norm": 3.1183737090108545, + "learning_rate": 3.4218289085545726e-06, + "loss": 0.6021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6737767457962036, + "step": 30, + "valid_targets_mean": 3171.9, + "valid_targets_min": 820 + }, + { + "epoch": 0.07246376811594203, + "grad_norm": 2.298602022892457, + "learning_rate": 4.011799410029498e-06, + "loss": 0.5708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5966386795043945, + "step": 35, + "valid_targets_mean": 2413.9, + "valid_targets_min": 854 + }, + { + "epoch": 0.08281573498964803, + "grad_norm": 1.9166901302469286, + "learning_rate": 4.6017699115044254e-06, + "loss": 0.4853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5276696681976318, + "step": 40, + "valid_targets_mean": 1972.9, + "valid_targets_min": 893 + }, + { + "epoch": 0.09316770186335403, + "grad_norm": 2.287553544660079, + "learning_rate": 5.191740412979352e-06, + "loss": 0.4365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.441994309425354, + "step": 45, + "valid_targets_mean": 3530.9, + "valid_targets_min": 928 + }, + { + "epoch": 0.10351966873706005, + "grad_norm": 1.2570057018709198, + "learning_rate": 5.781710914454279e-06, + "loss": 0.4807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5429658889770508, + "step": 50, + "valid_targets_mean": 2901.6, + "valid_targets_min": 756 + }, + { + "epoch": 0.11387163561076605, + "grad_norm": 0.930016752086229, + "learning_rate": 6.371681415929204e-06, + "loss": 0.5788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44122475385665894, + "step": 55, + "valid_targets_mean": 3173.3, + "valid_targets_min": 515 + }, + { + "epoch": 0.12422360248447205, + "grad_norm": 0.8587397931841486, + "learning_rate": 6.961651917404131e-06, + "loss": 0.4867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46632158756256104, + "step": 60, + "valid_targets_mean": 2709.9, + "valid_targets_min": 781 + }, + { + "epoch": 0.13457556935817805, + "grad_norm": 0.9248959615543452, + "learning_rate": 7.551622418879056e-06, + "loss": 0.5078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49240103363990784, + "step": 65, + "valid_targets_mean": 2006.4, + "valid_targets_min": 645 + }, + { + "epoch": 0.14492753623188406, + "grad_norm": 0.7279615677685586, + "learning_rate": 8.141592920353984e-06, + "loss": 0.425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39918971061706543, + "step": 70, + "valid_targets_mean": 3112.9, + "valid_targets_min": 431 + }, + { + "epoch": 0.15527950310559005, + "grad_norm": 0.7659577874600716, + "learning_rate": 8.73156342182891e-06, + "loss": 0.4238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40704938769340515, + "step": 75, + "valid_targets_mean": 2240.1, + "valid_targets_min": 604 + }, + { + "epoch": 0.16563146997929606, + "grad_norm": 0.7216997725030858, + "learning_rate": 9.321533923303837e-06, + "loss": 0.4238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43496978282928467, + "step": 80, + "valid_targets_mean": 3710.6, + "valid_targets_min": 849 + }, + { + "epoch": 0.17598343685300208, + "grad_norm": 0.8774588952929467, + "learning_rate": 9.911504424778762e-06, + "loss": 0.4171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43825411796569824, + "step": 85, + "valid_targets_mean": 1872.3, + "valid_targets_min": 779 + }, + { + "epoch": 0.18633540372670807, + "grad_norm": 0.8451380882184308, + "learning_rate": 1.0501474926253687e-05, + "loss": 0.4484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44401633739471436, + "step": 90, + "valid_targets_mean": 2137.0, + "valid_targets_min": 597 + }, + { + "epoch": 0.19668737060041408, + "grad_norm": 0.6084483563484301, + "learning_rate": 1.1091445427728616e-05, + "loss": 0.4483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3756181001663208, + "step": 95, + "valid_targets_mean": 3593.1, + "valid_targets_min": 642 + }, + { + "epoch": 0.2070393374741201, + "grad_norm": 0.6213761233419673, + "learning_rate": 1.1681415929203541e-05, + "loss": 0.4075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4502703547477722, + "step": 100, + "valid_targets_mean": 3648.6, + "valid_targets_min": 1054 + }, + { + "epoch": 0.21739130434782608, + "grad_norm": 0.9269246228823874, + "learning_rate": 1.2271386430678467e-05, + "loss": 0.4266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4538400173187256, + "step": 105, + "valid_targets_mean": 1649.7, + "valid_targets_min": 1060 + }, + { + "epoch": 0.2277432712215321, + "grad_norm": 0.6575358320968785, + "learning_rate": 1.2861356932153392e-05, + "loss": 0.4556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4259205162525177, + "step": 110, + "valid_targets_mean": 2929.3, + "valid_targets_min": 920 + }, + { + "epoch": 0.23809523809523808, + "grad_norm": 0.7520773860181491, + "learning_rate": 1.345132743362832e-05, + "loss": 0.4091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.51981520652771, + "step": 115, + "valid_targets_mean": 2942.2, + "valid_targets_min": 870 + }, + { + "epoch": 0.2484472049689441, + "grad_norm": 0.6260406424910655, + "learning_rate": 1.4041297935103246e-05, + "loss": 0.4403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.463638037443161, + "step": 120, + "valid_targets_mean": 3665.9, + "valid_targets_min": 863 + }, + { + "epoch": 0.2587991718426501, + "grad_norm": 0.7666598846619594, + "learning_rate": 1.4631268436578171e-05, + "loss": 0.4347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49128270149230957, + "step": 125, + "valid_targets_mean": 2527.6, + "valid_targets_min": 374 + }, + { + "epoch": 0.2691511387163561, + "grad_norm": 0.863409285542385, + "learning_rate": 1.5221238938053098e-05, + "loss": 0.3948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5175631046295166, + "step": 130, + "valid_targets_mean": 2604.3, + "valid_targets_min": 604 + }, + { + "epoch": 0.2795031055900621, + "grad_norm": 0.6514406268066054, + "learning_rate": 1.5811209439528025e-05, + "loss": 0.3959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3603256940841675, + "step": 135, + "valid_targets_mean": 2682.6, + "valid_targets_min": 1039 + }, + { + "epoch": 0.2898550724637681, + "grad_norm": 0.7385382622379217, + "learning_rate": 1.6401179941002953e-05, + "loss": 0.3854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3818318247795105, + "step": 140, + "valid_targets_mean": 2239.2, + "valid_targets_min": 525 + }, + { + "epoch": 0.3002070393374741, + "grad_norm": 0.7388257073327376, + "learning_rate": 1.6991150442477876e-05, + "loss": 0.429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5566807985305786, + "step": 145, + "valid_targets_mean": 3091.8, + "valid_targets_min": 386 + }, + { + "epoch": 0.3105590062111801, + "grad_norm": 0.842626090176198, + "learning_rate": 1.7581120943952803e-05, + "loss": 0.4001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3940976560115814, + "step": 150, + "valid_targets_mean": 1999.4, + "valid_targets_min": 711 + }, + { + "epoch": 0.32091097308488614, + "grad_norm": 0.6925465806852213, + "learning_rate": 1.817109144542773e-05, + "loss": 0.4275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44604888558387756, + "step": 155, + "valid_targets_mean": 2645.6, + "valid_targets_min": 915 + }, + { + "epoch": 0.33126293995859213, + "grad_norm": 0.6871016903538135, + "learning_rate": 1.8761061946902657e-05, + "loss": 0.3953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4031655192375183, + "step": 160, + "valid_targets_mean": 2989.4, + "valid_targets_min": 577 + }, + { + "epoch": 0.3416149068322981, + "grad_norm": 0.7772347914126068, + "learning_rate": 1.935103244837758e-05, + "loss": 0.3942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4824008345603943, + "step": 165, + "valid_targets_mean": 2767.8, + "valid_targets_min": 553 + }, + { + "epoch": 0.35196687370600416, + "grad_norm": 0.7369249527882769, + "learning_rate": 1.9941002949852508e-05, + "loss": 0.3649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3546352982521057, + "step": 170, + "valid_targets_mean": 2191.2, + "valid_targets_min": 709 + }, + { + "epoch": 0.36231884057971014, + "grad_norm": 0.7534283311389234, + "learning_rate": 2.0530973451327435e-05, + "loss": 0.3829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3965192437171936, + "step": 175, + "valid_targets_mean": 2274.2, + "valid_targets_min": 757 + }, + { + "epoch": 0.37267080745341613, + "grad_norm": 0.5744145884839316, + "learning_rate": 2.1120943952802362e-05, + "loss": 0.3689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27616333961486816, + "step": 180, + "valid_targets_mean": 3288.3, + "valid_targets_min": 754 + }, + { + "epoch": 0.3830227743271222, + "grad_norm": 0.7399447267515546, + "learning_rate": 2.171091445427729e-05, + "loss": 0.4155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5079173445701599, + "step": 185, + "valid_targets_mean": 3585.9, + "valid_targets_min": 305 + }, + { + "epoch": 0.39337474120082816, + "grad_norm": 0.787595577330117, + "learning_rate": 2.2300884955752213e-05, + "loss": 0.3672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40638184547424316, + "step": 190, + "valid_targets_mean": 2532.8, + "valid_targets_min": 501 + }, + { + "epoch": 0.40372670807453415, + "grad_norm": 0.8366148088688402, + "learning_rate": 2.289085545722714e-05, + "loss": 0.3903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.344083696603775, + "step": 195, + "valid_targets_mean": 1931.6, + "valid_targets_min": 984 + }, + { + "epoch": 0.4140786749482402, + "grad_norm": 0.7049038068177579, + "learning_rate": 2.3480825958702063e-05, + "loss": 0.366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3272162675857544, + "step": 200, + "valid_targets_mean": 2669.5, + "valid_targets_min": 709 + }, + { + "epoch": 0.4244306418219462, + "grad_norm": 0.722982409943531, + "learning_rate": 2.4070796460176994e-05, + "loss": 0.4846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4433075487613678, + "step": 205, + "valid_targets_mean": 2856.4, + "valid_targets_min": 788 + }, + { + "epoch": 0.43478260869565216, + "grad_norm": 0.6216316854835306, + "learning_rate": 2.466076696165192e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3501080870628357, + "step": 210, + "valid_targets_mean": 2924.4, + "valid_targets_min": 715 + }, + { + "epoch": 0.4451345755693582, + "grad_norm": 0.8036432318505198, + "learning_rate": 2.5250737463126848e-05, + "loss": 0.3722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3556331992149353, + "step": 215, + "valid_targets_mean": 2358.1, + "valid_targets_min": 835 + }, + { + "epoch": 0.4554865424430642, + "grad_norm": 0.9026688718475531, + "learning_rate": 2.584070796460177e-05, + "loss": 0.4376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.362284779548645, + "step": 220, + "valid_targets_mean": 1682.8, + "valid_targets_min": 778 + }, + { + "epoch": 0.4658385093167702, + "grad_norm": 0.8474657010085218, + "learning_rate": 2.64306784660767e-05, + "loss": 0.3687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41895484924316406, + "step": 225, + "valid_targets_mean": 1957.7, + "valid_targets_min": 816 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 0.5989684265924352, + "learning_rate": 2.7020648967551622e-05, + "loss": 0.3956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.270280659198761, + "step": 230, + "valid_targets_mean": 3520.2, + "valid_targets_min": 819 + }, + { + "epoch": 0.4865424430641822, + "grad_norm": 0.7407035731133066, + "learning_rate": 2.761061946902655e-05, + "loss": 0.3462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33700358867645264, + "step": 235, + "valid_targets_mean": 3075.6, + "valid_targets_min": 883 + }, + { + "epoch": 0.4968944099378882, + "grad_norm": 0.6514761903429114, + "learning_rate": 2.8200589970501476e-05, + "loss": 0.3657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40764114260673523, + "step": 240, + "valid_targets_mean": 3008.0, + "valid_targets_min": 524 + }, + { + "epoch": 0.5072463768115942, + "grad_norm": 0.9486281355398519, + "learning_rate": 2.8790560471976407e-05, + "loss": 0.2909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3389992117881775, + "step": 245, + "valid_targets_mean": 2530.0, + "valid_targets_min": 292 + }, + { + "epoch": 0.5175983436853002, + "grad_norm": 0.8255724854520955, + "learning_rate": 2.938053097345133e-05, + "loss": 0.387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5169772505760193, + "step": 250, + "valid_targets_mean": 2672.6, + "valid_targets_min": 633 + }, + { + "epoch": 0.5279503105590062, + "grad_norm": 0.744452870979198, + "learning_rate": 2.9970501474926257e-05, + "loss": 0.4064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38283827900886536, + "step": 255, + "valid_targets_mean": 2401.8, + "valid_targets_min": 1422 + }, + { + "epoch": 0.5383022774327122, + "grad_norm": 0.5635474379382327, + "learning_rate": 3.0560471976401184e-05, + "loss": 0.3819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4241616725921631, + "step": 260, + "valid_targets_mean": 5410.8, + "valid_targets_min": 754 + }, + { + "epoch": 0.5486542443064182, + "grad_norm": 0.676856787568286, + "learning_rate": 3.115044247787611e-05, + "loss": 0.3813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.534885823726654, + "step": 265, + "valid_targets_mean": 4032.4, + "valid_targets_min": 601 + }, + { + "epoch": 0.5590062111801242, + "grad_norm": 0.685650100513104, + "learning_rate": 3.174041297935103e-05, + "loss": 0.3515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34689125418663025, + "step": 270, + "valid_targets_mean": 2478.0, + "valid_targets_min": 216 + }, + { + "epoch": 0.5693581780538303, + "grad_norm": 0.92365940319728, + "learning_rate": 3.233038348082596e-05, + "loss": 0.3328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3527258038520813, + "step": 275, + "valid_targets_mean": 1704.9, + "valid_targets_min": 211 + }, + { + "epoch": 0.5797101449275363, + "grad_norm": 0.7943251607272555, + "learning_rate": 3.2920353982300886e-05, + "loss": 0.3735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31049463152885437, + "step": 280, + "valid_targets_mean": 1748.0, + "valid_targets_min": 910 + }, + { + "epoch": 0.5900621118012422, + "grad_norm": 0.7707949131715036, + "learning_rate": 3.3510324483775816e-05, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3978985548019409, + "step": 285, + "valid_targets_mean": 2196.7, + "valid_targets_min": 345 + }, + { + "epoch": 0.6004140786749482, + "grad_norm": 0.9067101706634203, + "learning_rate": 3.410029498525074e-05, + "loss": 0.3773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3796255588531494, + "step": 290, + "valid_targets_mean": 1666.6, + "valid_targets_min": 912 + }, + { + "epoch": 0.6107660455486542, + "grad_norm": 0.49575725135513243, + "learning_rate": 3.469026548672567e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3226116895675659, + "step": 295, + "valid_targets_mean": 4260.1, + "valid_targets_min": 783 + }, + { + "epoch": 0.6211180124223602, + "grad_norm": 0.7176031931898035, + "learning_rate": 3.5280235988200594e-05, + "loss": 0.3223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32411545515060425, + "step": 300, + "valid_targets_mean": 2275.6, + "valid_targets_min": 743 + }, + { + "epoch": 0.6314699792960663, + "grad_norm": 0.7663570928663194, + "learning_rate": 3.587020648967552e-05, + "loss": 0.3817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3520033359527588, + "step": 305, + "valid_targets_mean": 2414.7, + "valid_targets_min": 652 + }, + { + "epoch": 0.6418219461697723, + "grad_norm": 0.46213987326297384, + "learning_rate": 3.646017699115044e-05, + "loss": 0.323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29329046607017517, + "step": 310, + "valid_targets_mean": 4565.9, + "valid_targets_min": 647 + }, + { + "epoch": 0.6521739130434783, + "grad_norm": 0.6297790214422954, + "learning_rate": 3.705014749262537e-05, + "loss": 0.3365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27002575993537903, + "step": 315, + "valid_targets_mean": 2892.7, + "valid_targets_min": 784 + }, + { + "epoch": 0.6625258799171843, + "grad_norm": 0.6803152264658195, + "learning_rate": 3.7640117994100295e-05, + "loss": 0.3263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35963690280914307, + "step": 320, + "valid_targets_mean": 2670.9, + "valid_targets_min": 673 + }, + { + "epoch": 0.6728778467908902, + "grad_norm": 0.660475255702629, + "learning_rate": 3.8230088495575226e-05, + "loss": 0.3664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3141520619392395, + "step": 325, + "valid_targets_mean": 2466.3, + "valid_targets_min": 785 + }, + { + "epoch": 0.6832298136645962, + "grad_norm": 0.5714885591458042, + "learning_rate": 3.882005899705015e-05, + "loss": 0.3123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2787840962409973, + "step": 330, + "valid_targets_mean": 2959.7, + "valid_targets_min": 754 + }, + { + "epoch": 0.6935817805383023, + "grad_norm": 0.7840125701270887, + "learning_rate": 3.941002949852508e-05, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3164142966270447, + "step": 335, + "valid_targets_mean": 3174.2, + "valid_targets_min": 641 + }, + { + "epoch": 0.7039337474120083, + "grad_norm": 0.6419066522152682, + "learning_rate": 4e-05, + "loss": 0.3102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30930954217910767, + "step": 340, + "valid_targets_mean": 3136.9, + "valid_targets_min": 440 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 0.5059032670699602, + "learning_rate": 3.999973336302744e-05, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25785183906555176, + "step": 345, + "valid_targets_mean": 3533.2, + "valid_targets_min": 328 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 0.791626821129135, + "learning_rate": 3.999893345921928e-05, + "loss": 0.3358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3422420918941498, + "step": 350, + "valid_targets_mean": 2178.9, + "valid_targets_min": 640 + }, + { + "epoch": 0.7349896480331263, + "grad_norm": 0.5850437706333231, + "learning_rate": 3.999760030990392e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3285501003265381, + "step": 355, + "valid_targets_mean": 3447.8, + "valid_targets_min": 1128 + }, + { + "epoch": 0.7453416149068323, + "grad_norm": 1.4793972289409643, + "learning_rate": 3.999573395062805e-05, + "loss": 0.3541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4052942991256714, + "step": 360, + "valid_targets_mean": 2194.2, + "valid_targets_min": 217 + }, + { + "epoch": 0.7556935817805382, + "grad_norm": 0.8161182629951351, + "learning_rate": 3.9993334431155696e-05, + "loss": 0.3496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3312402367591858, + "step": 365, + "valid_targets_mean": 1771.2, + "valid_targets_min": 664 + }, + { + "epoch": 0.7660455486542443, + "grad_norm": 0.7778042883063869, + "learning_rate": 3.9990401815466935e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39662399888038635, + "step": 370, + "valid_targets_mean": 2807.4, + "valid_targets_min": 786 + }, + { + "epoch": 0.7763975155279503, + "grad_norm": 0.9868164908098449, + "learning_rate": 3.9986936181756133e-05, + "loss": 0.3484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3773801326751709, + "step": 375, + "valid_targets_mean": 2612.4, + "valid_targets_min": 1034 + }, + { + "epoch": 0.7867494824016563, + "grad_norm": 0.7554054875779739, + "learning_rate": 3.9982937622429904e-05, + "loss": 0.3218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.378534197807312, + "step": 380, + "valid_targets_mean": 2050.9, + "valid_targets_min": 1109 + }, + { + "epoch": 0.7971014492753623, + "grad_norm": 0.6391636850831167, + "learning_rate": 3.997840624410462e-05, + "loss": 0.3325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30764123797416687, + "step": 385, + "valid_targets_mean": 3234.5, + "valid_targets_min": 958 + }, + { + "epoch": 0.8074534161490683, + "grad_norm": 0.659669602172271, + "learning_rate": 3.997334216760358e-05, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2978909909725189, + "step": 390, + "valid_targets_mean": 4481.5, + "valid_targets_min": 510 + }, + { + "epoch": 0.8178053830227743, + "grad_norm": 0.6718264239579972, + "learning_rate": 3.996774552795379e-05, + "loss": 0.3685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3460586667060852, + "step": 395, + "valid_targets_mean": 2349.4, + "valid_targets_min": 887 + }, + { + "epoch": 0.8281573498964804, + "grad_norm": 0.7476248873647089, + "learning_rate": 3.996161647438236e-05, + "loss": 0.3653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3052156865596771, + "step": 400, + "valid_targets_mean": 2157.8, + "valid_targets_min": 818 + }, + { + "epoch": 0.8385093167701864, + "grad_norm": 0.8223378901356958, + "learning_rate": 3.9954955170312504e-05, + "loss": 0.416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4880888760089874, + "step": 405, + "valid_targets_mean": 3147.2, + "valid_targets_min": 821 + }, + { + "epoch": 0.8488612836438924, + "grad_norm": 0.6823448035407557, + "learning_rate": 3.994776179335923e-05, + "loss": 0.3239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3424914479255676, + "step": 410, + "valid_targets_mean": 2431.7, + "valid_targets_min": 869 + }, + { + "epoch": 0.8592132505175983, + "grad_norm": 0.7312590665354671, + "learning_rate": 3.9940036535324564e-05, + "loss": 0.3355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3406717777252197, + "step": 415, + "valid_targets_mean": 2271.6, + "valid_targets_min": 400 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 0.6805623092937332, + "learning_rate": 3.9931779602192435e-05, + "loss": 0.3095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35298052430152893, + "step": 420, + "valid_targets_mean": 2380.3, + "valid_targets_min": 331 + }, + { + "epoch": 0.8799171842650103, + "grad_norm": 0.47734066638902245, + "learning_rate": 3.9922991214123226e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27308475971221924, + "step": 425, + "valid_targets_mean": 3988.6, + "valid_targets_min": 570 + }, + { + "epoch": 0.8902691511387164, + "grad_norm": 0.5978371647266324, + "learning_rate": 3.991367160544783e-05, + "loss": 0.3574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22311019897460938, + "step": 430, + "valid_targets_mean": 2887.8, + "valid_targets_min": 663 + }, + { + "epoch": 0.9006211180124224, + "grad_norm": 0.5853594710092753, + "learning_rate": 3.99038210246615e-05, + "loss": 0.3862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33275023102760315, + "step": 435, + "valid_targets_mean": 3190.8, + "valid_targets_min": 750 + }, + { + "epoch": 0.9109730848861284, + "grad_norm": 0.6824932837618515, + "learning_rate": 3.9893439734417125e-05, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2892998158931732, + "step": 440, + "valid_targets_mean": 2007.6, + "valid_targets_min": 937 + }, + { + "epoch": 0.9213250517598344, + "grad_norm": 0.7388214988568738, + "learning_rate": 3.9882528011518286e-05, + "loss": 0.3598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4132966697216034, + "step": 445, + "valid_targets_mean": 2261.5, + "valid_targets_min": 747 + }, + { + "epoch": 0.9316770186335404, + "grad_norm": 0.6447680427593593, + "learning_rate": 3.987108614691186e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3517792522907257, + "step": 450, + "valid_targets_mean": 2310.1, + "valid_targets_min": 918 + }, + { + "epoch": 0.9420289855072463, + "grad_norm": 0.679191901913392, + "learning_rate": 3.985911444568026e-05, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2909126877784729, + "step": 455, + "valid_targets_mean": 2311.9, + "valid_targets_min": 742 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.793829538406422, + "learning_rate": 3.9846613227033305e-05, + "loss": 0.3663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34115058183670044, + "step": 460, + "valid_targets_mean": 2087.2, + "valid_targets_min": 708 + }, + { + "epoch": 0.9627329192546584, + "grad_norm": 0.5926367081080217, + "learning_rate": 3.98335828242997e-05, + "loss": 0.3408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2802141606807709, + "step": 465, + "valid_targets_mean": 2543.6, + "valid_targets_min": 821 + }, + { + "epoch": 0.9730848861283644, + "grad_norm": 0.5823847098766494, + "learning_rate": 3.982002358491817e-05, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.293674111366272, + "step": 470, + "valid_targets_mean": 2787.9, + "valid_targets_min": 404 + }, + { + "epoch": 0.9834368530020704, + "grad_norm": 0.7514989296192166, + "learning_rate": 3.980593587042816e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3089297413825989, + "step": 475, + "valid_targets_mean": 2357.3, + "valid_targets_min": 617 + }, + { + "epoch": 0.9937888198757764, + "grad_norm": 0.7508286456561156, + "learning_rate": 3.979132005646022e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2705974578857422, + "step": 480, + "valid_targets_mean": 2060.9, + "valid_targets_min": 495 + }, + { + "epoch": 1.0041407867494825, + "grad_norm": 0.578303130786325, + "learning_rate": 3.9776176532726005e-05, + "loss": 0.3067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23351214826107025, + "step": 485, + "valid_targets_mean": 2921.1, + "valid_targets_min": 862 + }, + { + "epoch": 1.0144927536231885, + "grad_norm": 0.8565681891585248, + "learning_rate": 3.976050570300783e-05, + "loss": 0.3846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4955739974975586, + "step": 490, + "valid_targets_mean": 2399.9, + "valid_targets_min": 397 + }, + { + "epoch": 1.0248447204968945, + "grad_norm": 0.8525085460239903, + "learning_rate": 3.974430798514796e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34586983919143677, + "step": 495, + "valid_targets_mean": 3405.7, + "valid_targets_min": 692 + }, + { + "epoch": 1.0351966873706004, + "grad_norm": 0.6107145043153749, + "learning_rate": 3.972758381103744e-05, + "loss": 0.3328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25020384788513184, + "step": 500, + "valid_targets_mean": 2676.7, + "valid_targets_min": 693 + }, + { + "epoch": 1.0455486542443064, + "grad_norm": 0.3712861445621319, + "learning_rate": 3.9710333626604585e-05, + "loss": 0.2865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2056269347667694, + "step": 505, + "valid_targets_mean": 5489.6, + "valid_targets_min": 677 + }, + { + "epoch": 1.0559006211180124, + "grad_norm": 0.5445195023826013, + "learning_rate": 3.969255789180309e-05, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3427426815032959, + "step": 510, + "valid_targets_mean": 4128.8, + "valid_targets_min": 909 + }, + { + "epoch": 1.0662525879917184, + "grad_norm": 1.5522590817967603, + "learning_rate": 3.9674257080599775e-05, + "loss": 0.3005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3245624303817749, + "step": 515, + "valid_targets_mean": 2571.8, + "valid_targets_min": 988 + }, + { + "epoch": 1.0766045548654244, + "grad_norm": 0.7670802874384615, + "learning_rate": 3.9655431680961924e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3730795085430145, + "step": 520, + "valid_targets_mean": 2089.7, + "valid_targets_min": 509 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 0.8808573279945764, + "learning_rate": 3.9636082194844285e-05, + "loss": 0.3202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31017860770225525, + "step": 525, + "valid_targets_mean": 2748.6, + "valid_targets_min": 746 + }, + { + "epoch": 1.0973084886128364, + "grad_norm": 0.5678915273276016, + "learning_rate": 3.9616209138175705e-05, + "loss": 0.3206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3231387436389923, + "step": 530, + "valid_targets_mean": 3620.4, + "valid_targets_min": 1076 + }, + { + "epoch": 1.1076604554865424, + "grad_norm": 0.6715392961412922, + "learning_rate": 3.959581304084536e-05, + "loss": 0.3538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42259538173675537, + "step": 535, + "valid_targets_mean": 4127.1, + "valid_targets_min": 406 + }, + { + "epoch": 1.1180124223602483, + "grad_norm": 0.6122700992261885, + "learning_rate": 3.9574894446688594e-05, + "loss": 0.3044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.323467493057251, + "step": 540, + "valid_targets_mean": 3492.6, + "valid_targets_min": 684 + }, + { + "epoch": 1.1283643892339545, + "grad_norm": 0.8429230339257078, + "learning_rate": 3.955345391347249e-05, + "loss": 0.3377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45265573263168335, + "step": 545, + "valid_targets_mean": 2018.4, + "valid_targets_min": 873 + }, + { + "epoch": 1.1387163561076605, + "grad_norm": 0.5177578813671116, + "learning_rate": 3.9531492012880915e-05, + "loss": 0.272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24492105841636658, + "step": 550, + "valid_targets_mean": 3538.9, + "valid_targets_min": 631 + }, + { + "epoch": 1.1490683229813665, + "grad_norm": 0.9358791063800329, + "learning_rate": 3.9509009330499356e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3208366632461548, + "step": 555, + "valid_targets_mean": 2221.3, + "valid_targets_min": 735 + }, + { + "epoch": 1.1594202898550725, + "grad_norm": 1.492654485940082, + "learning_rate": 3.948600646579923e-05, + "loss": 0.3741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30150753259658813, + "step": 560, + "valid_targets_mean": 1951.8, + "valid_targets_min": 508 + }, + { + "epoch": 1.1697722567287785, + "grad_norm": 0.5458431310144255, + "learning_rate": 3.946248403212197e-05, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34849831461906433, + "step": 565, + "valid_targets_mean": 4525.2, + "valid_targets_min": 886 + }, + { + "epoch": 1.1801242236024845, + "grad_norm": 1.3216443391427874, + "learning_rate": 3.943844265666263e-05, + "loss": 0.3464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811647057533264, + "step": 570, + "valid_targets_mean": 2566.6, + "valid_targets_min": 931 + }, + { + "epoch": 1.1904761904761905, + "grad_norm": 0.5379526619584627, + "learning_rate": 3.9413882980453155e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2871326506137848, + "step": 575, + "valid_targets_mean": 3341.6, + "valid_targets_min": 752 + }, + { + "epoch": 1.2008281573498965, + "grad_norm": 0.5970973416160769, + "learning_rate": 3.9388805658345325e-05, + "loss": 0.3592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32768434286117554, + "step": 580, + "valid_targets_mean": 4324.7, + "valid_targets_min": 686 + }, + { + "epoch": 1.2111801242236024, + "grad_norm": 0.7650985133734655, + "learning_rate": 3.9363211358993264e-05, + "loss": 0.3126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35999956727027893, + "step": 585, + "valid_targets_mean": 2116.5, + "valid_targets_min": 709 + }, + { + "epoch": 1.2215320910973084, + "grad_norm": 0.6626315464935324, + "learning_rate": 3.9337100764835616e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17115123569965363, + "step": 590, + "valid_targets_mean": 4721.6, + "valid_targets_min": 590 + }, + { + "epoch": 1.2318840579710144, + "grad_norm": 0.608747167208178, + "learning_rate": 3.931047457207736e-05, + "loss": 0.3522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3361900746822357, + "step": 595, + "valid_targets_mean": 3093.6, + "valid_targets_min": 1291 + }, + { + "epoch": 1.2422360248447206, + "grad_norm": 0.7141578114058345, + "learning_rate": 3.928333349067125e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3286536931991577, + "step": 600, + "valid_targets_mean": 2283.4, + "valid_targets_min": 402 + }, + { + "epoch": 1.2525879917184266, + "grad_norm": 0.5678041106872164, + "learning_rate": 3.925567824429885e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29660943150520325, + "step": 605, + "valid_targets_mean": 3684.8, + "valid_targets_min": 1120 + }, + { + "epoch": 1.2629399585921326, + "grad_norm": 0.722727884026502, + "learning_rate": 3.922750957035128e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2989663779735565, + "step": 610, + "valid_targets_mean": 2112.3, + "valid_targets_min": 371 + }, + { + "epoch": 1.2732919254658386, + "grad_norm": 0.5666973822209642, + "learning_rate": 3.919882821990953e-05, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28699982166290283, + "step": 615, + "valid_targets_mean": 3570.9, + "valid_targets_min": 780 + }, + { + "epoch": 1.2836438923395446, + "grad_norm": 0.6949218726412446, + "learning_rate": 3.9169634957724465e-05, + "loss": 0.2894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2630559206008911, + "step": 620, + "valid_targets_mean": 1976.1, + "valid_targets_min": 216 + }, + { + "epoch": 1.2939958592132506, + "grad_norm": 0.7553316125934354, + "learning_rate": 3.913993056219636e-05, + "loss": 0.3238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3031398057937622, + "step": 625, + "valid_targets_mean": 2566.6, + "valid_targets_min": 1097 + }, + { + "epoch": 1.3043478260869565, + "grad_norm": 0.6012551894727312, + "learning_rate": 3.9109715825354254e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30872005224227905, + "step": 630, + "valid_targets_mean": 2901.4, + "valid_targets_min": 884 + }, + { + "epoch": 1.3146997929606625, + "grad_norm": 0.6499249693828386, + "learning_rate": 3.907899155283472e-05, + "loss": 0.3054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2922044098377228, + "step": 635, + "valid_targets_mean": 2478.1, + "valid_targets_min": 644 + }, + { + "epoch": 1.3250517598343685, + "grad_norm": 0.6607109178219058, + "learning_rate": 3.904775856386047e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32309627532958984, + "step": 640, + "valid_targets_mean": 2581.8, + "valid_targets_min": 822 + }, + { + "epoch": 1.3354037267080745, + "grad_norm": 0.99359277719902, + "learning_rate": 3.9016017691218465e-05, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3399587869644165, + "step": 645, + "valid_targets_mean": 2634.0, + "valid_targets_min": 780 + }, + { + "epoch": 1.3457556935817805, + "grad_norm": 0.6998735259668638, + "learning_rate": 3.8983769781237725e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35572707653045654, + "step": 650, + "valid_targets_mean": 2802.4, + "valid_targets_min": 288 + }, + { + "epoch": 1.3561076604554865, + "grad_norm": 0.6543925730470436, + "learning_rate": 3.8951015693766755e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3457895815372467, + "step": 655, + "valid_targets_mean": 2723.4, + "valid_targets_min": 1094 + }, + { + "epoch": 1.3664596273291925, + "grad_norm": 0.867046377934047, + "learning_rate": 3.8917756302150627e-05, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34926313161849976, + "step": 660, + "valid_targets_mean": 2280.2, + "valid_targets_min": 697 + }, + { + "epoch": 1.3768115942028984, + "grad_norm": 0.8720982266233283, + "learning_rate": 3.8883992493207696e-05, + "loss": 0.3581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3935870826244354, + "step": 665, + "valid_targets_mean": 2969.8, + "valid_targets_min": 848 + }, + { + "epoch": 1.3871635610766044, + "grad_norm": 0.7607813486400433, + "learning_rate": 3.8849725167205934e-05, + "loss": 0.376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35601186752319336, + "step": 670, + "valid_targets_mean": 2010.1, + "valid_targets_min": 625 + }, + { + "epoch": 1.3975155279503104, + "grad_norm": 0.6244741241809822, + "learning_rate": 3.8814955237838954e-05, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33208197355270386, + "step": 675, + "valid_targets_mean": 2881.0, + "valid_targets_min": 633 + }, + { + "epoch": 1.4078674948240166, + "grad_norm": 0.701432966146093, + "learning_rate": 3.8779683632201625e-05, + "loss": 0.3234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2493334263563156, + "step": 680, + "valid_targets_mean": 2480.6, + "valid_targets_min": 726 + }, + { + "epoch": 1.4182194616977226, + "grad_norm": 0.6386263618060875, + "learning_rate": 3.8743911290765354e-05, + "loss": 0.3563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33178985118865967, + "step": 685, + "valid_targets_mean": 2496.4, + "valid_targets_min": 555 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7163290212539811, + "learning_rate": 3.870763916735303e-05, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42771202325820923, + "step": 690, + "valid_targets_mean": 2670.1, + "valid_targets_min": 305 + }, + { + "epoch": 1.4389233954451346, + "grad_norm": 0.6173973599046334, + "learning_rate": 3.867086822911358e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3355981111526489, + "step": 695, + "valid_targets_mean": 4034.1, + "valid_targets_min": 960 + }, + { + "epoch": 1.4492753623188406, + "grad_norm": 0.593581318712642, + "learning_rate": 3.863359945649615e-05, + "loss": 0.2928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3260822296142578, + "step": 700, + "valid_targets_mean": 3011.0, + "valid_targets_min": 566 + }, + { + "epoch": 1.4596273291925466, + "grad_norm": 0.7037825237005169, + "learning_rate": 3.859583384322402e-05, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2873789072036743, + "step": 705, + "valid_targets_mean": 2114.8, + "valid_targets_min": 281 + }, + { + "epoch": 1.4699792960662525, + "grad_norm": 0.4809742053953339, + "learning_rate": 3.855757239626807e-05, + "loss": 0.2784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3173772394657135, + "step": 710, + "valid_targets_mean": 5494.3, + "valid_targets_min": 313 + }, + { + "epoch": 1.4803312629399585, + "grad_norm": 0.7992784007276763, + "learning_rate": 3.851881613581993e-05, + "loss": 0.3428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37110865116119385, + "step": 715, + "valid_targets_mean": 1696.1, + "valid_targets_min": 613 + }, + { + "epoch": 1.4906832298136645, + "grad_norm": 0.7460466376190822, + "learning_rate": 3.847956609526481e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36755889654159546, + "step": 720, + "valid_targets_mean": 2678.8, + "valid_targets_min": 551 + }, + { + "epoch": 1.5010351966873707, + "grad_norm": 0.8015485930507722, + "learning_rate": 3.843982332115389e-05, + "loss": 0.3648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5440657734870911, + "step": 725, + "valid_targets_mean": 3186.0, + "valid_targets_min": 435 + }, + { + "epoch": 1.5113871635610767, + "grad_norm": 0.6245802617185957, + "learning_rate": 3.839958887317649e-05, + "loss": 0.2775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27157890796661377, + "step": 730, + "valid_targets_mean": 2370.6, + "valid_targets_min": 787 + }, + { + "epoch": 1.5217391304347827, + "grad_norm": 0.8406585786723727, + "learning_rate": 3.8358863824131726e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3849491477012634, + "step": 735, + "valid_targets_mean": 2885.7, + "valid_targets_min": 728 + }, + { + "epoch": 1.5320910973084887, + "grad_norm": 0.6705418668893592, + "learning_rate": 3.831764925989999e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597661018371582, + "step": 740, + "valid_targets_mean": 2369.4, + "valid_targets_min": 777 + }, + { + "epoch": 1.5424430641821947, + "grad_norm": 0.7570661925654296, + "learning_rate": 3.8275946279413946e-05, + "loss": 0.3126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4379417598247528, + "step": 745, + "valid_targets_mean": 3104.5, + "valid_targets_min": 709 + }, + { + "epoch": 1.5527950310559007, + "grad_norm": 0.7568141088774238, + "learning_rate": 3.823375599462924e-05, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3063448667526245, + "step": 750, + "valid_targets_mean": 1793.1, + "valid_targets_min": 583 + }, + { + "epoch": 1.5631469979296067, + "grad_norm": 0.5008197907800821, + "learning_rate": 3.819107953049485e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23246562480926514, + "step": 755, + "valid_targets_mean": 3298.4, + "valid_targets_min": 741 + }, + { + "epoch": 1.5734989648033126, + "grad_norm": 0.6451644332169639, + "learning_rate": 3.814791802492309e-05, + "loss": 0.3332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4341411888599396, + "step": 760, + "valid_targets_mean": 3766.1, + "valid_targets_min": 550 + }, + { + "epoch": 1.5838509316770186, + "grad_norm": 0.6316711474608361, + "learning_rate": 3.810427262875928e-05, + "loss": 0.3632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40523141622543335, + "step": 765, + "valid_targets_mean": 2900.8, + "valid_targets_min": 549 + }, + { + "epoch": 1.5942028985507246, + "grad_norm": 0.6559079761601386, + "learning_rate": 3.8060144505751066e-05, + "loss": 0.3555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3753317594528198, + "step": 770, + "valid_targets_mean": 2754.6, + "valid_targets_min": 923 + }, + { + "epoch": 1.6045548654244306, + "grad_norm": 0.8409357657293262, + "learning_rate": 3.8015534832517346e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31297141313552856, + "step": 775, + "valid_targets_mean": 1537.7, + "valid_targets_min": 400 + }, + { + "epoch": 1.6149068322981366, + "grad_norm": 0.5230610037042105, + "learning_rate": 3.797044479851693e-05, + "loss": 0.3394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34146177768707275, + "step": 780, + "valid_targets_mean": 4368.2, + "valid_targets_min": 564 + }, + { + "epoch": 1.6252587991718426, + "grad_norm": 0.8017960578186035, + "learning_rate": 3.7924875606016856e-05, + "loss": 0.3011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31278154253959656, + "step": 785, + "valid_targets_mean": 2435.0, + "valid_targets_min": 309 + }, + { + "epoch": 1.6356107660455486, + "grad_norm": 0.7307900776630412, + "learning_rate": 3.7878828470060274e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24949385225772858, + "step": 790, + "valid_targets_mean": 1969.8, + "valid_targets_min": 689 + }, + { + "epoch": 1.6459627329192545, + "grad_norm": 0.6089402692068703, + "learning_rate": 3.783230461843406e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2793858051300049, + "step": 795, + "valid_targets_mean": 2419.0, + "valid_targets_min": 303 + }, + { + "epoch": 1.6563146997929605, + "grad_norm": 0.604875615097612, + "learning_rate": 3.7785305291636126e-05, + "loss": 0.3101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35383036732673645, + "step": 800, + "valid_targets_mean": 2854.2, + "valid_targets_min": 666 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.7840139460025201, + "learning_rate": 3.773783174284228e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.270771324634552, + "step": 805, + "valid_targets_mean": 1783.1, + "valid_targets_min": 597 + }, + { + "epoch": 1.6770186335403725, + "grad_norm": 0.5439330951892583, + "learning_rate": 3.768988523787287e-05, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2746136784553528, + "step": 810, + "valid_targets_mean": 3407.2, + "valid_targets_min": 459 + }, + { + "epoch": 1.6873706004140787, + "grad_norm": 0.707062238751807, + "learning_rate": 3.764146705515898e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30619674921035767, + "step": 815, + "valid_targets_mean": 2142.9, + "valid_targets_min": 923 + }, + { + "epoch": 1.6977225672877847, + "grad_norm": 0.6851395668736608, + "learning_rate": 3.759257848570838e-05, + "loss": 0.3534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5384789705276489, + "step": 820, + "valid_targets_mean": 3692.9, + "valid_targets_min": 642 + }, + { + "epoch": 1.7080745341614907, + "grad_norm": 0.6969892105656047, + "learning_rate": 3.754322083307107e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34344714879989624, + "step": 825, + "valid_targets_mean": 2168.1, + "valid_targets_min": 880 + }, + { + "epoch": 1.7184265010351967, + "grad_norm": 0.6350233312646498, + "learning_rate": 3.749339541330457e-05, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21109239757061005, + "step": 830, + "valid_targets_mean": 2116.4, + "valid_targets_min": 518 + }, + { + "epoch": 1.7287784679089027, + "grad_norm": 0.6242925285847643, + "learning_rate": 3.7443103554938794e-05, + "loss": 0.3074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35286158323287964, + "step": 835, + "valid_targets_mean": 2796.6, + "valid_targets_min": 347 + }, + { + "epoch": 1.7391304347826086, + "grad_norm": 0.7068835221058171, + "learning_rate": 3.739234659894062e-05, + "loss": 0.2794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27447009086608887, + "step": 840, + "valid_targets_mean": 1944.8, + "valid_targets_min": 488 + }, + { + "epoch": 1.7494824016563149, + "grad_norm": 0.7775094105121269, + "learning_rate": 3.7341125898678154e-05, + "loss": 0.3409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3335605561733246, + "step": 845, + "valid_targets_mean": 2268.7, + "valid_targets_min": 655 + }, + { + "epoch": 1.7598343685300208, + "grad_norm": 0.7026899191258092, + "learning_rate": 3.7289442819884644e-05, + "loss": 0.3313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3098892867565155, + "step": 850, + "valid_targets_mean": 2165.6, + "valid_targets_min": 886 + }, + { + "epoch": 1.7701863354037268, + "grad_norm": 0.7785694411643936, + "learning_rate": 3.723729874062206e-05, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5063181519508362, + "step": 855, + "valid_targets_mean": 3067.9, + "valid_targets_min": 845 + }, + { + "epoch": 1.7805383022774328, + "grad_norm": 0.6864346250242391, + "learning_rate": 3.718469505124434e-05, + "loss": 0.3438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30108267068862915, + "step": 860, + "valid_targets_mean": 2758.5, + "valid_targets_min": 747 + }, + { + "epoch": 1.7908902691511388, + "grad_norm": 0.6688063851094667, + "learning_rate": 3.7131633154360336e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3029250204563141, + "step": 865, + "valid_targets_mean": 2407.8, + "valid_targets_min": 945 + }, + { + "epoch": 1.8012422360248448, + "grad_norm": 0.6814618177356577, + "learning_rate": 3.707811446479639e-05, + "loss": 0.2774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3125041127204895, + "step": 870, + "valid_targets_mean": 2367.2, + "valid_targets_min": 1157 + }, + { + "epoch": 1.8115942028985508, + "grad_norm": 0.6124859430419072, + "learning_rate": 3.702414040955866e-05, + "loss": 0.3282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33444851636886597, + "step": 875, + "valid_targets_mean": 2857.4, + "valid_targets_min": 789 + }, + { + "epoch": 1.8219461697722568, + "grad_norm": 0.5332243076283965, + "learning_rate": 3.696971242779499e-05, + "loss": 0.2784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30144771933555603, + "step": 880, + "valid_targets_mean": 3346.8, + "valid_targets_min": 734 + }, + { + "epoch": 1.8322981366459627, + "grad_norm": 0.6418575705848031, + "learning_rate": 3.691483197075664e-05, + "loss": 0.314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2444503903388977, + "step": 885, + "valid_targets_mean": 2092.2, + "valid_targets_min": 707 + }, + { + "epoch": 1.8426501035196687, + "grad_norm": 0.6840565309784318, + "learning_rate": 3.685950050175946e-05, + "loss": 0.2981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3534913659095764, + "step": 890, + "valid_targets_mean": 2626.1, + "valid_targets_min": 883 + }, + { + "epoch": 1.8530020703933747, + "grad_norm": 0.6411961833023883, + "learning_rate": 3.680371949614503e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3057364523410797, + "step": 895, + "valid_targets_mean": 2653.8, + "valid_targets_min": 788 + }, + { + "epoch": 1.8633540372670807, + "grad_norm": 0.734143938781199, + "learning_rate": 3.6747490441241166e-05, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2625513970851898, + "step": 900, + "valid_targets_mean": 1966.4, + "valid_targets_min": 342 + }, + { + "epoch": 1.8737060041407867, + "grad_norm": 0.6002740004630833, + "learning_rate": 3.669081483632238e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24936066567897797, + "step": 905, + "valid_targets_mean": 2350.6, + "valid_targets_min": 581 + }, + { + "epoch": 1.8840579710144927, + "grad_norm": 0.6634082462286021, + "learning_rate": 3.6633694192569835e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2440337836742401, + "step": 910, + "valid_targets_mean": 2182.1, + "valid_targets_min": 768 + }, + { + "epoch": 1.8944099378881987, + "grad_norm": 0.6598417402185687, + "learning_rate": 3.657613003303109e-05, + "loss": 0.3533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3100677728652954, + "step": 915, + "valid_targets_mean": 2527.1, + "valid_targets_min": 869 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.7136251407724076, + "learning_rate": 3.651812389257947e-05, + "loss": 0.3177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40685153007507324, + "step": 920, + "valid_targets_mean": 2883.4, + "valid_targets_min": 438 + }, + { + "epoch": 1.9151138716356106, + "grad_norm": 0.8221505552013975, + "learning_rate": 3.645967731787313e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.348793625831604, + "step": 925, + "valid_targets_mean": 1815.2, + "valid_targets_min": 471 + }, + { + "epoch": 1.9254658385093166, + "grad_norm": 0.6714962697810101, + "learning_rate": 3.640079186731385e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3066626787185669, + "step": 930, + "valid_targets_mean": 2163.9, + "valid_targets_min": 739 + }, + { + "epoch": 1.9358178053830226, + "grad_norm": 0.8603452197126946, + "learning_rate": 3.634146911100547e-05, + "loss": 0.3325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3235263228416443, + "step": 935, + "valid_targets_mean": 2000.6, + "valid_targets_min": 778 + }, + { + "epoch": 1.9461697722567288, + "grad_norm": 0.8181375628740634, + "learning_rate": 3.6281710630711984e-05, + "loss": 0.3459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33535006642341614, + "step": 940, + "valid_targets_mean": 2283.9, + "valid_targets_min": 787 + }, + { + "epoch": 1.9565217391304348, + "grad_norm": 0.7661975352385241, + "learning_rate": 3.6221518019815436e-05, + "loss": 0.3142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32310929894447327, + "step": 945, + "valid_targets_mean": 2044.9, + "valid_targets_min": 870 + }, + { + "epoch": 1.9668737060041408, + "grad_norm": 0.7546886437201982, + "learning_rate": 3.616089288327336e-05, + "loss": 0.2886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3442019522190094, + "step": 950, + "valid_targets_mean": 2058.4, + "valid_targets_min": 908 + }, + { + "epoch": 1.9772256728778468, + "grad_norm": 0.6284194122874336, + "learning_rate": 3.609983683757606e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41605764627456665, + "step": 955, + "valid_targets_mean": 4267.4, + "valid_targets_min": 939 + }, + { + "epoch": 1.9875776397515528, + "grad_norm": 0.565257067101154, + "learning_rate": 3.603835151070345e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27033889293670654, + "step": 960, + "valid_targets_mean": 3668.6, + "valid_targets_min": 912 + }, + { + "epoch": 1.9979296066252588, + "grad_norm": 0.6360153374329626, + "learning_rate": 3.5976438542081666e-05, + "loss": 0.2872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2408621609210968, + "step": 965, + "valid_targets_mean": 2069.4, + "valid_targets_min": 774 + }, + { + "epoch": 2.008281573498965, + "grad_norm": 0.6357166576139243, + "learning_rate": 3.591409958253937e-05, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2636573612689972, + "step": 970, + "valid_targets_mean": 2536.4, + "valid_targets_min": 622 + }, + { + "epoch": 2.018633540372671, + "grad_norm": 0.6716098344095058, + "learning_rate": 3.5851336294263696e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25086358189582825, + "step": 975, + "valid_targets_mean": 2756.8, + "valid_targets_min": 1130 + }, + { + "epoch": 2.028985507246377, + "grad_norm": 0.4956351374220249, + "learning_rate": 3.578815035075597e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2566456198692322, + "step": 980, + "valid_targets_mean": 5390.4, + "valid_targets_min": 815 + }, + { + "epoch": 2.039337474120083, + "grad_norm": 0.7064220242643727, + "learning_rate": 3.572454343678705e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2861258387565613, + "step": 985, + "valid_targets_mean": 2059.2, + "valid_targets_min": 501 + }, + { + "epoch": 2.049689440993789, + "grad_norm": 0.8729780469833862, + "learning_rate": 3.566051724835245e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32387927174568176, + "step": 990, + "valid_targets_mean": 1951.7, + "valid_targets_min": 656 + }, + { + "epoch": 2.060041407867495, + "grad_norm": 0.6569866235371754, + "learning_rate": 3.559607349262705e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23175671696662903, + "step": 995, + "valid_targets_mean": 2289.1, + "valid_targets_min": 895 + }, + { + "epoch": 2.070393374741201, + "grad_norm": 0.9247889042046914, + "learning_rate": 3.5531213887919667e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2623339295387268, + "step": 1000, + "valid_targets_mean": 2524.4, + "valid_targets_min": 232 + }, + { + "epoch": 2.080745341614907, + "grad_norm": 0.6363817576343097, + "learning_rate": 3.546594016362716e-05, + "loss": 0.2895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28919100761413574, + "step": 1005, + "valid_targets_mean": 2776.9, + "valid_targets_min": 617 + }, + { + "epoch": 2.091097308488613, + "grad_norm": 0.7881930796673811, + "learning_rate": 3.540025406018834e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2384035289287567, + "step": 1010, + "valid_targets_mean": 1624.8, + "valid_targets_min": 487 + }, + { + "epoch": 2.101449275362319, + "grad_norm": 0.4465477565347781, + "learning_rate": 3.533415732903759e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2667323648929596, + "step": 1015, + "valid_targets_mean": 5425.1, + "valid_targets_min": 783 + }, + { + "epoch": 2.111801242236025, + "grad_norm": 0.7631555157748504, + "learning_rate": 3.5267651732558146e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3835008442401886, + "step": 1020, + "valid_targets_mean": 2624.8, + "valid_targets_min": 510 + }, + { + "epoch": 2.122153209109731, + "grad_norm": 0.55664148104698, + "learning_rate": 3.520073904403509e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25191134214401245, + "step": 1025, + "valid_targets_mean": 3272.8, + "valid_targets_min": 972 + }, + { + "epoch": 2.132505175983437, + "grad_norm": 0.8149036515302649, + "learning_rate": 3.513342104760809e-05, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22442390024662018, + "step": 1030, + "valid_targets_mean": 1833.1, + "valid_targets_min": 818 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 0.5802425971474474, + "learning_rate": 3.506569953822383e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2896159291267395, + "step": 1035, + "valid_targets_mean": 3547.2, + "valid_targets_min": 342 + }, + { + "epoch": 2.153209109730849, + "grad_norm": 0.6221670768855667, + "learning_rate": 3.4997576321588126e-05, + "loss": 0.2757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22672376036643982, + "step": 1040, + "valid_targets_mean": 2443.9, + "valid_targets_min": 647 + }, + { + "epoch": 2.1635610766045548, + "grad_norm": 0.8278897691479519, + "learning_rate": 3.492905321411781e-05, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2788503170013428, + "step": 1045, + "valid_targets_mean": 2161.9, + "valid_targets_min": 821 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 0.6727746895274888, + "learning_rate": 3.486013204289227e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21209818124771118, + "step": 1050, + "valid_targets_mean": 2909.8, + "valid_targets_min": 1008 + }, + { + "epoch": 2.1842650103519667, + "grad_norm": 0.6334119835007679, + "learning_rate": 3.479081464560475e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29021257162094116, + "step": 1055, + "valid_targets_mean": 3242.4, + "valid_targets_min": 542 + }, + { + "epoch": 2.1946169772256727, + "grad_norm": 1.1830366048428838, + "learning_rate": 3.4721102870513345e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36102619767189026, + "step": 1060, + "valid_targets_mean": 2006.9, + "valid_targets_min": 839 + }, + { + "epoch": 2.2049689440993787, + "grad_norm": 0.8106585345203733, + "learning_rate": 3.465099857639173e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27605193853378296, + "step": 1065, + "valid_targets_mean": 1677.1, + "valid_targets_min": 705 + }, + { + "epoch": 2.2153209109730847, + "grad_norm": 0.5880377494754921, + "learning_rate": 3.458050363247957e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24764445424079895, + "step": 1070, + "valid_targets_mean": 3182.0, + "valid_targets_min": 700 + }, + { + "epoch": 2.2256728778467907, + "grad_norm": 0.5697074711554126, + "learning_rate": 3.450961991843271e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24443545937538147, + "step": 1075, + "valid_targets_mean": 4197.8, + "valid_targets_min": 1715 + }, + { + "epoch": 2.2360248447204967, + "grad_norm": 0.7599961285605185, + "learning_rate": 3.4438349324273044e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3468702733516693, + "step": 1080, + "valid_targets_mean": 2894.7, + "valid_targets_min": 904 + }, + { + "epoch": 2.246376811594203, + "grad_norm": 0.7320534504878216, + "learning_rate": 3.436669375033812e-05, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2806239724159241, + "step": 1085, + "valid_targets_mean": 2801.9, + "valid_targets_min": 804 + }, + { + "epoch": 2.256728778467909, + "grad_norm": 0.6651232453856196, + "learning_rate": 3.429465510723046e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25253286957740784, + "step": 1090, + "valid_targets_mean": 2427.6, + "valid_targets_min": 404 + }, + { + "epoch": 2.267080745341615, + "grad_norm": 0.7318417075725385, + "learning_rate": 3.4222235315766634e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2881121039390564, + "step": 1095, + "valid_targets_mean": 2698.2, + "valid_targets_min": 553 + }, + { + "epoch": 2.277432712215321, + "grad_norm": 0.5060721290785742, + "learning_rate": 3.414943630692605e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1891990602016449, + "step": 1100, + "valid_targets_mean": 3982.4, + "valid_targets_min": 1146 + }, + { + "epoch": 2.287784679089027, + "grad_norm": 0.6388973076559393, + "learning_rate": 3.407626002179943e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833097577095032, + "step": 1105, + "valid_targets_mean": 2923.7, + "valid_targets_min": 837 + }, + { + "epoch": 2.298136645962733, + "grad_norm": 0.7050108379240185, + "learning_rate": 3.40027084115371e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25936251878738403, + "step": 1110, + "valid_targets_mean": 2194.5, + "valid_targets_min": 761 + }, + { + "epoch": 2.308488612836439, + "grad_norm": 1.1896188271617125, + "learning_rate": 3.3928783437296906e-05, + "loss": 0.2785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23933464288711548, + "step": 1115, + "valid_targets_mean": 2121.6, + "valid_targets_min": 435 + }, + { + "epoch": 2.318840579710145, + "grad_norm": 0.5651209935539805, + "learning_rate": 3.385448707019199e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25165855884552, + "step": 1120, + "valid_targets_mean": 4061.7, + "valid_targets_min": 695 + }, + { + "epoch": 2.329192546583851, + "grad_norm": 0.7262731183176127, + "learning_rate": 3.37798212912382e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2577860951423645, + "step": 1125, + "valid_targets_mean": 2202.4, + "valid_targets_min": 815 + }, + { + "epoch": 2.339544513457557, + "grad_norm": 0.5618393584824415, + "learning_rate": 3.370478809130126e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1696050465106964, + "step": 1130, + "valid_targets_mean": 2500.9, + "valid_targets_min": 347 + }, + { + "epoch": 2.349896480331263, + "grad_norm": 0.7126252849015065, + "learning_rate": 3.3629389471043686e-05, + "loss": 0.2928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3189730942249298, + "step": 1135, + "valid_targets_mean": 3294.7, + "valid_targets_min": 1051 + }, + { + "epoch": 2.360248447204969, + "grad_norm": 0.9604299856824035, + "learning_rate": 3.355362744087147e-05, + "loss": 0.3133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40159639716148376, + "step": 1140, + "valid_targets_mean": 3716.9, + "valid_targets_min": 314 + }, + { + "epoch": 2.370600414078675, + "grad_norm": 0.6100748889704821, + "learning_rate": 3.347750402088046e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25193271040916443, + "step": 1145, + "valid_targets_mean": 3066.1, + "valid_targets_min": 954 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.6513131119801923, + "learning_rate": 3.3401021240802446e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23295412957668304, + "step": 1150, + "valid_targets_mean": 2498.0, + "valid_targets_min": 428 + }, + { + "epoch": 2.391304347826087, + "grad_norm": 0.6813554157755068, + "learning_rate": 3.332418113995116e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19543245434761047, + "step": 1155, + "valid_targets_mean": 2158.0, + "valid_targets_min": 831 + }, + { + "epoch": 2.401656314699793, + "grad_norm": 0.7313840895870419, + "learning_rate": 3.3246985767167763e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22841569781303406, + "step": 1160, + "valid_targets_mean": 2154.1, + "valid_targets_min": 400 + }, + { + "epoch": 2.412008281573499, + "grad_norm": 0.5935846326263404, + "learning_rate": 3.316943718076633e-05, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.333047479391098, + "step": 1165, + "valid_targets_mean": 4248.6, + "valid_targets_min": 943 + }, + { + "epoch": 2.422360248447205, + "grad_norm": 0.791044272962795, + "learning_rate": 3.3091537448478854e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26495736837387085, + "step": 1170, + "valid_targets_mean": 2369.9, + "valid_targets_min": 975 + }, + { + "epoch": 2.432712215320911, + "grad_norm": 0.6676783405628622, + "learning_rate": 3.301328864740024e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33358559012413025, + "step": 1175, + "valid_targets_mean": 2824.4, + "valid_targets_min": 438 + }, + { + "epoch": 2.443064182194617, + "grad_norm": 0.6210999987816006, + "learning_rate": 3.2934692863932826e-05, + "loss": 0.2883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37713438272476196, + "step": 1180, + "valid_targets_mean": 4190.8, + "valid_targets_min": 972 + }, + { + "epoch": 2.453416149068323, + "grad_norm": 0.9102484172009931, + "learning_rate": 3.285575219373079e-05, + "loss": 0.2817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27761310338974, + "step": 1185, + "valid_targets_mean": 2286.0, + "valid_targets_min": 683 + }, + { + "epoch": 2.463768115942029, + "grad_norm": 0.8264325779605302, + "learning_rate": 3.2776468741644254e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35481444001197815, + "step": 1190, + "valid_targets_mean": 1918.8, + "valid_targets_min": 488 + }, + { + "epoch": 2.474120082815735, + "grad_norm": 0.7271253546023544, + "learning_rate": 3.26968446216632e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26062947511672974, + "step": 1195, + "valid_targets_mean": 2231.8, + "valid_targets_min": 709 + }, + { + "epoch": 2.4844720496894412, + "grad_norm": 0.8973929287276415, + "learning_rate": 3.2616881956861025e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27928027510643005, + "step": 1200, + "valid_targets_mean": 1896.2, + "valid_targets_min": 348 + }, + { + "epoch": 2.494824016563147, + "grad_norm": 0.4082069685047567, + "learning_rate": 3.2536582879338046e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18940213322639465, + "step": 1205, + "valid_targets_mean": 4323.4, + "valid_targets_min": 968 + }, + { + "epoch": 2.505175983436853, + "grad_norm": 0.7017656676375894, + "learning_rate": 3.245594953016455e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2417268604040146, + "step": 1210, + "valid_targets_mean": 2325.2, + "valid_targets_min": 812 + }, + { + "epoch": 2.5155279503105588, + "grad_norm": 0.6814415938184818, + "learning_rate": 3.237498405932374e-05, + "loss": 0.2775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33832401037216187, + "step": 1215, + "valid_targets_mean": 2867.0, + "valid_targets_min": 887 + }, + { + "epoch": 2.525879917184265, + "grad_norm": 0.7801644137826693, + "learning_rate": 3.2293688625654414e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27744320034980774, + "step": 1220, + "valid_targets_mean": 1826.2, + "valid_targets_min": 551 + }, + { + "epoch": 2.536231884057971, + "grad_norm": 0.7921338973324987, + "learning_rate": 3.221206539679342e-05, + "loss": 0.2726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2806174159049988, + "step": 1225, + "valid_targets_mean": 4052.1, + "valid_targets_min": 953 + }, + { + "epoch": 2.546583850931677, + "grad_norm": 0.7638655535817076, + "learning_rate": 3.213011654911781e-05, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22334516048431396, + "step": 1230, + "valid_targets_mean": 1654.2, + "valid_targets_min": 677 + }, + { + "epoch": 2.556935817805383, + "grad_norm": 0.9533183958110001, + "learning_rate": 3.204784426768685e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22882500290870667, + "step": 1235, + "valid_targets_mean": 3256.3, + "valid_targets_min": 917 + }, + { + "epoch": 2.567287784679089, + "grad_norm": 0.3343640416944216, + "learning_rate": 3.1965250746183755e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13794702291488647, + "step": 1240, + "valid_targets_mean": 5393.2, + "valid_targets_min": 363 + }, + { + "epoch": 2.577639751552795, + "grad_norm": 0.6369559557128881, + "learning_rate": 3.1882338186857164e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2859078645706177, + "step": 1245, + "valid_targets_mean": 2821.6, + "valid_targets_min": 740 + }, + { + "epoch": 2.587991718426501, + "grad_norm": 0.6063783751731852, + "learning_rate": 3.1799108800462466e-05, + "loss": 0.3386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31201863288879395, + "step": 1250, + "valid_targets_mean": 3289.1, + "valid_targets_min": 789 + }, + { + "epoch": 2.598343685300207, + "grad_norm": 0.5142821780449842, + "learning_rate": 3.1715564806202815e-05, + "loss": 0.3602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2918277978897095, + "step": 1255, + "valid_targets_mean": 3862.7, + "valid_targets_min": 802 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.5889004684696514, + "learning_rate": 3.1631708431669985e-05, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2696582078933716, + "step": 1260, + "valid_targets_mean": 3824.6, + "valid_targets_min": 899 + }, + { + "epoch": 2.619047619047619, + "grad_norm": 0.6639591263396689, + "learning_rate": 3.1547541912784965e-05, + "loss": 0.2819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25455427169799805, + "step": 1265, + "valid_targets_mean": 2578.6, + "valid_targets_min": 337 + }, + { + "epoch": 2.629399585921325, + "grad_norm": 0.6938668640706942, + "learning_rate": 3.146306749373833e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.285896360874176, + "step": 1270, + "valid_targets_mean": 2242.8, + "valid_targets_min": 658 + }, + { + "epoch": 2.639751552795031, + "grad_norm": 0.6635900887759165, + "learning_rate": 3.137828742693041e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3423786163330078, + "step": 1275, + "valid_targets_mean": 3281.0, + "valid_targets_min": 980 + }, + { + "epoch": 2.650103519668737, + "grad_norm": 0.6801128006550414, + "learning_rate": 3.129320397291125e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2767699360847473, + "step": 1280, + "valid_targets_mean": 2735.9, + "valid_targets_min": 464 + }, + { + "epoch": 2.660455486542443, + "grad_norm": 0.7025184682700185, + "learning_rate": 3.12078194003203e-05, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27691537141799927, + "step": 1285, + "valid_targets_mean": 2134.4, + "valid_targets_min": 626 + }, + { + "epoch": 2.670807453416149, + "grad_norm": 0.5924464858785424, + "learning_rate": 3.112213598582596e-05, + "loss": 0.2732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15232308208942413, + "step": 1290, + "valid_targets_mean": 4905.1, + "valid_targets_min": 777 + }, + { + "epoch": 2.681159420289855, + "grad_norm": 0.7877428337019158, + "learning_rate": 3.1036156014064846e-05, + "loss": 0.2453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24164274334907532, + "step": 1295, + "valid_targets_mean": 2376.1, + "valid_targets_min": 1018 + }, + { + "epoch": 2.691511387163561, + "grad_norm": 0.7142467961580515, + "learning_rate": 3.094988177758091e-05, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37197333574295044, + "step": 1300, + "valid_targets_mean": 3346.9, + "valid_targets_min": 628 + }, + { + "epoch": 2.701863354037267, + "grad_norm": 0.5870935195942067, + "learning_rate": 3.086331557676426e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2159366011619568, + "step": 1305, + "valid_targets_mean": 2964.8, + "valid_targets_min": 274 + }, + { + "epoch": 2.712215320910973, + "grad_norm": 0.6872956928546283, + "learning_rate": 3.0776459719789876e-05, + "loss": 0.2727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31350186467170715, + "step": 1310, + "valid_targets_mean": 2581.4, + "valid_targets_min": 696 + }, + { + "epoch": 2.722567287784679, + "grad_norm": 0.7486101772028452, + "learning_rate": 3.0689316522556026e-05, + "loss": 0.3293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2855913043022156, + "step": 1315, + "valid_targets_mean": 2561.9, + "valid_targets_min": 750 + }, + { + "epoch": 2.732919254658385, + "grad_norm": 0.4579078783293101, + "learning_rate": 3.060188830862254e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27316346764564514, + "step": 1320, + "valid_targets_mean": 6253.2, + "valid_targets_min": 780 + }, + { + "epoch": 2.7432712215320914, + "grad_norm": 0.5358219876585554, + "learning_rate": 3.0514177409148854e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19187697768211365, + "step": 1325, + "valid_targets_mean": 2720.8, + "valid_targets_min": 386 + }, + { + "epoch": 2.753623188405797, + "grad_norm": 0.5552497910549137, + "learning_rate": 3.042618616283184e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27454033493995667, + "step": 1330, + "valid_targets_mean": 3399.2, + "valid_targets_min": 397 + }, + { + "epoch": 2.7639751552795033, + "grad_norm": 0.6823834665507209, + "learning_rate": 3.0337916915843437e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2507593631744385, + "step": 1335, + "valid_targets_mean": 2280.2, + "valid_targets_min": 857 + }, + { + "epoch": 2.774327122153209, + "grad_norm": 0.7550596937365505, + "learning_rate": 3.024937202176813e-05, + "loss": 0.3365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2813539505004883, + "step": 1340, + "valid_targets_mean": 1925.6, + "valid_targets_min": 800 + }, + { + "epoch": 2.7846790890269153, + "grad_norm": 0.620351083586836, + "learning_rate": 3.016055384154016e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27360090613365173, + "step": 1345, + "valid_targets_mean": 2753.7, + "valid_targets_min": 1050 + }, + { + "epoch": 2.795031055900621, + "grad_norm": 0.5870647895388508, + "learning_rate": 3.007146474338061e-05, + "loss": 0.2453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25962918996810913, + "step": 1350, + "valid_targets_mean": 3009.2, + "valid_targets_min": 843 + }, + { + "epoch": 2.8053830227743273, + "grad_norm": 0.7132808075099697, + "learning_rate": 2.9982107102734225e-05, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28429892659187317, + "step": 1355, + "valid_targets_mean": 2095.7, + "valid_targets_min": 1051 + }, + { + "epoch": 2.8157349896480333, + "grad_norm": 0.5718661527861477, + "learning_rate": 2.9892483302206067e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20819316804409027, + "step": 1360, + "valid_targets_mean": 2768.9, + "valid_targets_min": 258 + }, + { + "epoch": 2.8260869565217392, + "grad_norm": 0.3634008953249174, + "learning_rate": 2.9802595731498027e-05, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1665099710226059, + "step": 1365, + "valid_targets_mean": 6357.5, + "valid_targets_min": 977 + }, + { + "epoch": 2.8364389233954452, + "grad_norm": 0.7314349692520135, + "learning_rate": 2.9712446787345076e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30443474650382996, + "step": 1370, + "valid_targets_mean": 2490.6, + "valid_targets_min": 540 + }, + { + "epoch": 2.846790890269151, + "grad_norm": 0.5362978528913575, + "learning_rate": 2.962203887345137e-05, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2595052719116211, + "step": 1375, + "valid_targets_mean": 3539.8, + "valid_targets_min": 720 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6062346483149705, + "learning_rate": 2.9531374400426158e-05, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24060280621051788, + "step": 1380, + "valid_targets_mean": 2836.4, + "valid_targets_min": 652 + }, + { + "epoch": 2.867494824016563, + "grad_norm": 0.7082210941894594, + "learning_rate": 2.9440455785719496e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2264600545167923, + "step": 1385, + "valid_targets_mean": 1664.6, + "valid_targets_min": 495 + }, + { + "epoch": 2.877846790890269, + "grad_norm": 0.6123887449000341, + "learning_rate": 2.934928545355781e-05, + "loss": 0.2984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3257123827934265, + "step": 1390, + "valid_targets_mean": 3369.2, + "valid_targets_min": 829 + }, + { + "epoch": 2.888198757763975, + "grad_norm": 0.6864043917128869, + "learning_rate": 2.925786583487922e-05, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31987008452415466, + "step": 1395, + "valid_targets_mean": 2711.7, + "valid_targets_min": 810 + }, + { + "epoch": 2.898550724637681, + "grad_norm": 0.6080342575365325, + "learning_rate": 2.916619936726877e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373320758342743, + "step": 1400, + "valid_targets_mean": 3641.7, + "valid_targets_min": 881 + }, + { + "epoch": 2.908902691511387, + "grad_norm": 0.7181192036864602, + "learning_rate": 2.9074288494893407e-05, + "loss": 0.3389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31169378757476807, + "step": 1405, + "valid_targets_mean": 2594.9, + "valid_targets_min": 787 + }, + { + "epoch": 2.919254658385093, + "grad_norm": 0.7116785040649848, + "learning_rate": 2.8982135668436796e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2377220094203949, + "step": 1410, + "valid_targets_mean": 2565.6, + "valid_targets_min": 812 + }, + { + "epoch": 2.929606625258799, + "grad_norm": 0.527003497336332, + "learning_rate": 2.8889743345034012e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24901200830936432, + "step": 1415, + "valid_targets_mean": 3355.5, + "valid_targets_min": 906 + }, + { + "epoch": 2.939958592132505, + "grad_norm": 0.7215643198974966, + "learning_rate": 2.8797113988205992e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26589393615722656, + "step": 1420, + "valid_targets_mean": 2117.3, + "valid_targets_min": 518 + }, + { + "epoch": 2.950310559006211, + "grad_norm": 0.7304693761897525, + "learning_rate": 2.8704250067793853e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3305160701274872, + "step": 1425, + "valid_targets_mean": 2289.8, + "valid_targets_min": 795 + }, + { + "epoch": 2.960662525879917, + "grad_norm": 0.5524041278998529, + "learning_rate": 2.8611154059893072e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2436435967683792, + "step": 1430, + "valid_targets_mean": 3544.5, + "valid_targets_min": 642 + }, + { + "epoch": 2.971014492753623, + "grad_norm": 0.6949146976027709, + "learning_rate": 2.851782844678741e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24652817845344543, + "step": 1435, + "valid_targets_mean": 2396.6, + "valid_targets_min": 693 + }, + { + "epoch": 2.981366459627329, + "grad_norm": 0.855845523061833, + "learning_rate": 2.8424275716882764e-05, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35203707218170166, + "step": 1440, + "valid_targets_mean": 2468.2, + "valid_targets_min": 960 + }, + { + "epoch": 2.991718426501035, + "grad_norm": 0.6069280690104846, + "learning_rate": 2.8330498364640803e-05, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2389097809791565, + "step": 1445, + "valid_targets_mean": 2823.1, + "valid_targets_min": 850 + }, + { + "epoch": 3.002070393374741, + "grad_norm": 0.7079484446595876, + "learning_rate": 2.823649889051245e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33429914712905884, + "step": 1450, + "valid_targets_mean": 2347.8, + "valid_targets_min": 650 + }, + { + "epoch": 3.012422360248447, + "grad_norm": 0.5927362264974598, + "learning_rate": 2.8142279800871226e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17803658545017242, + "step": 1455, + "valid_targets_mean": 2970.0, + "valid_targets_min": 683 + }, + { + "epoch": 3.022774327122153, + "grad_norm": 0.6173739269582847, + "learning_rate": 2.8047843607946416e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21716290712356567, + "step": 1460, + "valid_targets_mean": 3280.1, + "valid_targets_min": 625 + }, + { + "epoch": 3.0331262939958594, + "grad_norm": 0.8835749274852875, + "learning_rate": 2.7953192829756087e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23659950494766235, + "step": 1465, + "valid_targets_mean": 1768.8, + "valid_targets_min": 692 + }, + { + "epoch": 3.0434782608695654, + "grad_norm": 0.6416344650777536, + "learning_rate": 2.7858329990039922e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26247477531433105, + "step": 1470, + "valid_targets_mean": 4569.3, + "valid_targets_min": 658 + }, + { + "epoch": 3.0538302277432714, + "grad_norm": 0.7529415797399801, + "learning_rate": 2.7763257618191965e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26717787981033325, + "step": 1475, + "valid_targets_mean": 2397.6, + "valid_targets_min": 277 + }, + { + "epoch": 3.0641821946169774, + "grad_norm": 0.7317704953810572, + "learning_rate": 2.7667978249193142e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21715983748435974, + "step": 1480, + "valid_targets_mean": 2296.4, + "valid_targets_min": 769 + }, + { + "epoch": 3.0745341614906834, + "grad_norm": 0.5247604954590532, + "learning_rate": 2.757249442354373e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27207493782043457, + "step": 1485, + "valid_targets_mean": 5021.0, + "valid_targets_min": 496 + }, + { + "epoch": 3.0848861283643894, + "grad_norm": 1.0520512297726068, + "learning_rate": 2.747680868719553e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2122560292482376, + "step": 1490, + "valid_targets_mean": 2820.4, + "valid_targets_min": 725 + }, + { + "epoch": 3.0952380952380953, + "grad_norm": 0.6767778323794827, + "learning_rate": 2.738092359148405e-05, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2775751054286957, + "step": 1495, + "valid_targets_mean": 2690.2, + "valid_targets_min": 830 + }, + { + "epoch": 3.1055900621118013, + "grad_norm": 0.6576226728100677, + "learning_rate": 2.7284841693060462e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20576655864715576, + "step": 1500, + "valid_targets_mean": 2834.8, + "valid_targets_min": 768 + }, + { + "epoch": 2.439222042139384, + "grad_norm": 0.8205999048363152, + "learning_rate": 3.295112400968073e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2548583745956421, + "step": 1505, + "valid_targets_mean": 1901.8, + "valid_targets_min": 292 + }, + { + "epoch": 2.447325769854133, + "grad_norm": 0.5315794072702741, + "learning_rate": 3.2889429817593494e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1704677790403366, + "step": 1510, + "valid_targets_mean": 3963.4, + "valid_targets_min": 594 + }, + { + "epoch": 2.4554294975688817, + "grad_norm": 0.7664196020253414, + "learning_rate": 3.2827525129535135e-05, + "loss": 0.2847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1991099715232849, + "step": 1515, + "valid_targets_mean": 2012.4, + "valid_targets_min": 624 + }, + { + "epoch": 2.4635332252836304, + "grad_norm": 0.8521592624429181, + "learning_rate": 3.276541095646482e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29062721133232117, + "step": 1520, + "valid_targets_mean": 1823.2, + "valid_targets_min": 501 + }, + { + "epoch": 2.471636952998379, + "grad_norm": 0.5852765718678786, + "learning_rate": 3.2703088312762825e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24727652966976166, + "step": 1525, + "valid_targets_mean": 3405.0, + "valid_targets_min": 363 + }, + { + "epoch": 2.479740680713128, + "grad_norm": 0.813053837691479, + "learning_rate": 3.2640558216213914e-05, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30369141697883606, + "step": 1530, + "valid_targets_mean": 2343.3, + "valid_targets_min": 696 + }, + { + "epoch": 2.487844408427877, + "grad_norm": 0.8194320234055189, + "learning_rate": 3.2577821687990764e-05, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23056931793689728, + "step": 1535, + "valid_targets_mean": 2043.1, + "valid_targets_min": 216 + }, + { + "epoch": 2.4959481361426255, + "grad_norm": 0.7683424849834839, + "learning_rate": 3.2514879752637236e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2309729903936386, + "step": 1540, + "valid_targets_mean": 2539.1, + "valid_targets_min": 331 + }, + { + "epoch": 2.5040518638573745, + "grad_norm": 0.776312961878328, + "learning_rate": 3.2451733438051705e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18839803338050842, + "step": 1545, + "valid_targets_mean": 1955.8, + "valid_targets_min": 514 + }, + { + "epoch": 2.512155591572123, + "grad_norm": 0.655759060281187, + "learning_rate": 3.238838377547023e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.189316064119339, + "step": 1550, + "valid_targets_mean": 2929.6, + "valid_targets_min": 757 + }, + { + "epoch": 2.520259319286872, + "grad_norm": 1.6044275923948417, + "learning_rate": 3.2324831799449724e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17633099853992462, + "step": 1555, + "valid_targets_mean": 2779.9, + "valid_targets_min": 820 + }, + { + "epoch": 2.528363047001621, + "grad_norm": 0.6561320237172028, + "learning_rate": 3.226107854785106e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1860257238149643, + "step": 1560, + "valid_targets_mean": 3073.5, + "valid_targets_min": 800 + }, + { + "epoch": 2.5364667747163696, + "grad_norm": 0.6383285185047638, + "learning_rate": 3.2197125061822135e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2523868680000305, + "step": 1565, + "valid_targets_mean": 3199.5, + "valid_targets_min": 727 + }, + { + "epoch": 2.5445705024311183, + "grad_norm": 0.7962930999328149, + "learning_rate": 3.213297238578082e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20864513516426086, + "step": 1570, + "valid_targets_mean": 2732.2, + "valid_targets_min": 1016 + }, + { + "epoch": 2.5526742301458674, + "grad_norm": 0.7676276530225514, + "learning_rate": 3.206862156739799e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3047863245010376, + "step": 1575, + "valid_targets_mean": 2614.4, + "valid_targets_min": 514 + }, + { + "epoch": 2.560777957860616, + "grad_norm": 0.8226887644393849, + "learning_rate": 3.2004073657580314e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2075314223766327, + "step": 1580, + "valid_targets_mean": 2038.8, + "valid_targets_min": 337 + }, + { + "epoch": 2.5688816855753647, + "grad_norm": 0.7831791976947534, + "learning_rate": 3.193932971045316e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19461211562156677, + "step": 1585, + "valid_targets_mean": 2053.1, + "valid_targets_min": 658 + }, + { + "epoch": 2.5769854132901133, + "grad_norm": 0.5644239851430982, + "learning_rate": 3.187439078334338e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18497009575366974, + "step": 1590, + "valid_targets_mean": 4375.7, + "valid_targets_min": 704 + }, + { + "epoch": 2.585089141004862, + "grad_norm": 0.7466943240187409, + "learning_rate": 3.180925793676199e-05, + "loss": 0.2696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34886205196380615, + "step": 1595, + "valid_targets_mean": 3252.1, + "valid_targets_min": 704 + }, + { + "epoch": 2.593192868719611, + "grad_norm": 0.654235875346469, + "learning_rate": 3.1743932234386905e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23556801676750183, + "step": 1600, + "valid_targets_mean": 2707.2, + "valid_targets_min": 440 + }, + { + "epoch": 2.6012965964343597, + "grad_norm": 0.8478531512416733, + "learning_rate": 3.167841474304555e-05, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540985345840454, + "step": 1605, + "valid_targets_mean": 2202.8, + "valid_targets_min": 807 + }, + { + "epoch": 2.6094003241491084, + "grad_norm": 0.7568048524801189, + "learning_rate": 3.161270653269743e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2507561445236206, + "step": 1610, + "valid_targets_mean": 2478.7, + "valid_targets_min": 884 + }, + { + "epoch": 2.6175040518638575, + "grad_norm": 0.6768880292640026, + "learning_rate": 3.154680867641666e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2119147777557373, + "step": 1615, + "valid_targets_mean": 2845.9, + "valid_targets_min": 933 + }, + { + "epoch": 2.625607779578606, + "grad_norm": 0.6937745038803551, + "learning_rate": 3.1480722250374454e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15716394782066345, + "step": 1620, + "valid_targets_mean": 3715.1, + "valid_targets_min": 509 + }, + { + "epoch": 2.633711507293355, + "grad_norm": 0.687030718167616, + "learning_rate": 3.1414448333821526e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26078009605407715, + "step": 1625, + "valid_targets_mean": 2993.9, + "valid_targets_min": 835 + }, + { + "epoch": 2.641815235008104, + "grad_norm": 0.794998960315971, + "learning_rate": 3.1347988009070496e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22042764723300934, + "step": 1630, + "valid_targets_mean": 2182.0, + "valid_targets_min": 939 + }, + { + "epoch": 2.6499189627228525, + "grad_norm": 0.5288765889583341, + "learning_rate": 3.1281342361478184e-05, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18932923674583435, + "step": 1635, + "valid_targets_mean": 4350.9, + "valid_targets_min": 916 + }, + { + "epoch": 2.658022690437601, + "grad_norm": 0.8061510946147543, + "learning_rate": 3.121451247942789e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23893296718597412, + "step": 1640, + "valid_targets_mean": 2239.7, + "valid_targets_min": 1029 + }, + { + "epoch": 2.6661264181523503, + "grad_norm": 0.8897502162805498, + "learning_rate": 3.1147499454311654e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24748632311820984, + "step": 1645, + "valid_targets_mean": 3042.6, + "valid_targets_min": 883 + }, + { + "epoch": 2.674230145867099, + "grad_norm": 0.7411104018017028, + "learning_rate": 3.1080304380512386e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3214663565158844, + "step": 1650, + "valid_targets_mean": 3191.7, + "valid_targets_min": 954 + }, + { + "epoch": 2.6823338735818476, + "grad_norm": 0.6001100044618921, + "learning_rate": 3.101292835538602e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21718555688858032, + "step": 1655, + "valid_targets_mean": 2929.2, + "valid_targets_min": 510 + }, + { + "epoch": 2.6904376012965967, + "grad_norm": 0.5090566874249545, + "learning_rate": 3.0945372479243605e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1351870894432068, + "step": 1660, + "valid_targets_mean": 3636.1, + "valid_targets_min": 511 + }, + { + "epoch": 2.6985413290113454, + "grad_norm": 0.8691139316583438, + "learning_rate": 3.087763785533328e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24571505188941956, + "step": 1665, + "valid_targets_mean": 2121.1, + "valid_targets_min": 1000 + }, + { + "epoch": 2.706645056726094, + "grad_norm": 0.7213977715140599, + "learning_rate": 3.0809725589822325e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22522029280662537, + "step": 1670, + "valid_targets_mean": 2168.9, + "valid_targets_min": 752 + }, + { + "epoch": 2.7147487844408427, + "grad_norm": 0.6699076338131222, + "learning_rate": 3.074163679177907e-05, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24370843172073364, + "step": 1675, + "valid_targets_mean": 2977.4, + "valid_targets_min": 953 + }, + { + "epoch": 2.7228525121555913, + "grad_norm": 0.6699594167700161, + "learning_rate": 3.067337257315477e-05, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19450387358665466, + "step": 1680, + "valid_targets_mean": 2758.7, + "valid_targets_min": 924 + }, + { + "epoch": 2.7309562398703404, + "grad_norm": 0.7672860021178419, + "learning_rate": 3.0604934048765444e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22159236669540405, + "step": 1685, + "valid_targets_mean": 2533.1, + "valid_targets_min": 511 + }, + { + "epoch": 2.739059967585089, + "grad_norm": 0.7039751582203525, + "learning_rate": 3.05363223362737e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2049209177494049, + "step": 1690, + "valid_targets_mean": 2859.5, + "valid_targets_min": 950 + }, + { + "epoch": 2.7471636952998377, + "grad_norm": 2.679447162042586, + "learning_rate": 3.0467538556170463e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1371978223323822, + "step": 1695, + "valid_targets_mean": 2292.4, + "valid_targets_min": 525 + }, + { + "epoch": 2.755267423014587, + "grad_norm": 0.6779419086781114, + "learning_rate": 3.0398583831756655e-05, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20101284980773926, + "step": 1700, + "valid_targets_mean": 2922.6, + "valid_targets_min": 734 + }, + { + "epoch": 2.7633711507293355, + "grad_norm": 0.6929703972487979, + "learning_rate": 3.03294592891249e-05, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2838400602340698, + "step": 1705, + "valid_targets_mean": 3094.0, + "valid_targets_min": 697 + }, + { + "epoch": 2.771474878444084, + "grad_norm": 0.4798656780585094, + "learning_rate": 3.0260166057141086e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18296658992767334, + "step": 1710, + "valid_targets_mean": 5082.8, + "valid_targets_min": 471 + }, + { + "epoch": 2.7795786061588332, + "grad_norm": 0.7731022915468665, + "learning_rate": 3.0190705267425956e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25233393907546997, + "step": 1715, + "valid_targets_mean": 2334.8, + "valid_targets_min": 602 + }, + { + "epoch": 2.787682333873582, + "grad_norm": 0.833833609952511, + "learning_rate": 3.0121078054336633e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27136874198913574, + "step": 1720, + "valid_targets_mean": 3887.7, + "valid_targets_min": 629 + }, + { + "epoch": 2.7957860615883305, + "grad_norm": 0.572927781409477, + "learning_rate": 3.005128555494806e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16209685802459717, + "step": 1725, + "valid_targets_mean": 3344.1, + "valid_targets_min": 929 + }, + { + "epoch": 2.8038897893030796, + "grad_norm": 0.8175035205190527, + "learning_rate": 2.998132890903448e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2571600377559662, + "step": 1730, + "valid_targets_mean": 2496.4, + "valid_targets_min": 841 + }, + { + "epoch": 2.8119935170178283, + "grad_norm": 0.820561644375052, + "learning_rate": 2.9911209259050763e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21497473120689392, + "step": 1735, + "valid_targets_mean": 2354.7, + "valid_targets_min": 1149 + }, + { + "epoch": 2.820097244732577, + "grad_norm": 0.6492150323752979, + "learning_rate": 2.984092775011382e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16714109480381012, + "step": 1740, + "valid_targets_mean": 2458.3, + "valid_targets_min": 787 + }, + { + "epoch": 2.828200972447326, + "grad_norm": 0.7319549857733452, + "learning_rate": 2.9770485529983834e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22950349748134613, + "step": 1745, + "valid_targets_mean": 3142.0, + "valid_targets_min": 1024 + }, + { + "epoch": 2.8363047001620747, + "grad_norm": 0.6560080208991743, + "learning_rate": 2.9699883749045564e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1986587643623352, + "step": 1750, + "valid_targets_mean": 3457.0, + "valid_targets_min": 477 + }, + { + "epoch": 2.8444084278768234, + "grad_norm": 0.6382253092955735, + "learning_rate": 2.962912356028953e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23129406571388245, + "step": 1755, + "valid_targets_mean": 2934.9, + "valid_targets_min": 859 + }, + { + "epoch": 2.852512155591572, + "grad_norm": 0.5301256913380961, + "learning_rate": 2.95582061192932e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18352752923965454, + "step": 1760, + "valid_targets_mean": 5318.9, + "valid_targets_min": 288 + }, + { + "epoch": 2.8606158833063207, + "grad_norm": 0.5551356639022342, + "learning_rate": 2.9487132584202115e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15121974050998688, + "step": 1765, + "valid_targets_mean": 3240.4, + "valid_targets_min": 972 + }, + { + "epoch": 2.8687196110210698, + "grad_norm": 0.6494961371690191, + "learning_rate": 2.9415904115710964e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27294760942459106, + "step": 1770, + "valid_targets_mean": 3565.1, + "valid_targets_min": 882 + }, + { + "epoch": 2.8768233387358184, + "grad_norm": 0.755878858613056, + "learning_rate": 2.9344521877044633e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2581571936607361, + "step": 1775, + "valid_targets_mean": 2870.9, + "valid_targets_min": 852 + }, + { + "epoch": 2.884927066450567, + "grad_norm": 0.6896180082571636, + "learning_rate": 2.927298703393924e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3024943768978119, + "step": 1780, + "valid_targets_mean": 3049.6, + "valid_targets_min": 466 + }, + { + "epoch": 2.893030794165316, + "grad_norm": 0.5548812442234702, + "learning_rate": 2.9201300754623046e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23311251401901245, + "step": 1785, + "valid_targets_mean": 4044.0, + "valid_targets_min": 686 + }, + { + "epoch": 2.901134521880065, + "grad_norm": 1.438754224110583, + "learning_rate": 2.9129464209797404e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24073530733585358, + "step": 1790, + "valid_targets_mean": 1909.4, + "valid_targets_min": 617 + }, + { + "epoch": 2.9092382495948135, + "grad_norm": 0.7830491458219581, + "learning_rate": 2.9057478572617644e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24332499504089355, + "step": 1795, + "valid_targets_mean": 2556.9, + "valid_targets_min": 1002 + }, + { + "epoch": 2.9173419773095626, + "grad_norm": 0.6145444178155638, + "learning_rate": 2.898534501867391e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25278767943382263, + "step": 1800, + "valid_targets_mean": 3891.1, + "valid_targets_min": 971 + }, + { + "epoch": 2.9254457050243112, + "grad_norm": 0.3981787416840784, + "learning_rate": 2.8913064725971947e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14965100586414337, + "step": 1805, + "valid_targets_mean": 5392.8, + "valid_targets_min": 908 + }, + { + "epoch": 2.93354943273906, + "grad_norm": 0.5818947273050361, + "learning_rate": 2.8840638874913894e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24818670749664307, + "step": 1810, + "valid_targets_mean": 3600.8, + "valid_targets_min": 509 + }, + { + "epoch": 2.941653160453809, + "grad_norm": 0.7145037253167454, + "learning_rate": 2.8768068648278976e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1713862121105194, + "step": 1815, + "valid_targets_mean": 2208.1, + "valid_targets_min": 815 + }, + { + "epoch": 2.9497568881685576, + "grad_norm": 0.6506752973751263, + "learning_rate": 2.8695355231204206e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1586257517337799, + "step": 1820, + "valid_targets_mean": 2533.8, + "valid_targets_min": 918 + }, + { + "epoch": 2.9578606158833063, + "grad_norm": 0.9308540302999432, + "learning_rate": 2.862249981116502e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18500950932502747, + "step": 1825, + "valid_targets_mean": 1953.1, + "valid_targets_min": 386 + }, + { + "epoch": 2.965964343598055, + "grad_norm": 0.868527787807027, + "learning_rate": 2.854950357795589e-05, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30140724778175354, + "step": 1830, + "valid_targets_mean": 2394.5, + "valid_targets_min": 544 + }, + { + "epoch": 2.974068071312804, + "grad_norm": 0.7675779936155871, + "learning_rate": 2.847636772367091e-05, + "loss": 0.2659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38385745882987976, + "step": 1835, + "valid_targets_mean": 3416.3, + "valid_targets_min": 652 + }, + { + "epoch": 2.9821717990275527, + "grad_norm": 0.5095446691779011, + "learning_rate": 2.8403093442684287e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17920516431331635, + "step": 1840, + "valid_targets_mean": 4411.1, + "valid_targets_min": 968 + }, + { + "epoch": 2.9902755267423013, + "grad_norm": 0.7425898253446929, + "learning_rate": 2.8329681931630877e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21636709570884705, + "step": 1845, + "valid_targets_mean": 2047.3, + "valid_targets_min": 332 + }, + { + "epoch": 2.99837925445705, + "grad_norm": 0.7698249801134349, + "learning_rate": 2.825613438938663e-05, + "loss": 0.2574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3017503619194031, + "step": 1850, + "valid_targets_mean": 2716.4, + "valid_targets_min": 815 + }, + { + "epoch": 3.006482982171799, + "grad_norm": 0.7062953708878924, + "learning_rate": 2.8182452017048983e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16801732778549194, + "step": 1855, + "valid_targets_mean": 2252.6, + "valid_targets_min": 612 + }, + { + "epoch": 3.0145867098865478, + "grad_norm": 0.7009970781236867, + "learning_rate": 2.81086360179173e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3520157039165497, + "step": 1860, + "valid_targets_mean": 3688.4, + "valid_targets_min": 821 + }, + { + "epoch": 3.0226904376012964, + "grad_norm": 1.0084991924004758, + "learning_rate": 2.8034687597473164e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21333259344100952, + "step": 1865, + "valid_targets_mean": 2459.2, + "valid_targets_min": 286 + }, + { + "epoch": 3.0307941653160455, + "grad_norm": 0.5137156161040282, + "learning_rate": 2.796060796336074e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18863826990127563, + "step": 1870, + "valid_targets_mean": 5101.9, + "valid_targets_min": 689 + }, + { + "epoch": 3.038897893030794, + "grad_norm": 1.8100288546636265, + "learning_rate": 2.7886398325367018e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2229488492012024, + "step": 1875, + "valid_targets_mean": 2298.8, + "valid_targets_min": 353 + }, + { + "epoch": 3.047001620745543, + "grad_norm": 0.720734898463899, + "learning_rate": 2.7812059895402064e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20065172016620636, + "step": 1880, + "valid_targets_mean": 3090.3, + "valid_targets_min": 681 + }, + { + "epoch": 3.055105348460292, + "grad_norm": 0.8640799066873229, + "learning_rate": 2.773759388747925e-05, + "loss": 0.3024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4552222788333893, + "step": 1885, + "valid_targets_mean": 3287.7, + "valid_targets_min": 804 + }, + { + "epoch": 3.0632090761750406, + "grad_norm": 0.6353362563989103, + "learning_rate": 2.7663001517695386e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24317996203899384, + "step": 1890, + "valid_targets_mean": 3646.6, + "valid_targets_min": 630 + }, + { + "epoch": 3.0713128038897892, + "grad_norm": 0.7914447945449794, + "learning_rate": 2.7588284004210907e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20814718306064606, + "step": 1895, + "valid_targets_mean": 2367.0, + "valid_targets_min": 849 + }, + { + "epoch": 3.079416531604538, + "grad_norm": 0.6958540521779719, + "learning_rate": 2.7513442567229936e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20249110460281372, + "step": 1900, + "valid_targets_mean": 2836.4, + "valid_targets_min": 1033 + }, + { + "epoch": 3.087520259319287, + "grad_norm": 0.596529064303546, + "learning_rate": 2.7438478428980407e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14787033200263977, + "step": 1905, + "valid_targets_mean": 3139.7, + "valid_targets_min": 475 + }, + { + "epoch": 3.0956239870340356, + "grad_norm": 0.7078002600984272, + "learning_rate": 2.7363392813694047e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3391125798225403, + "step": 1910, + "valid_targets_mean": 3532.0, + "valid_targets_min": 345 + }, + { + "epoch": 3.1037277147487843, + "grad_norm": 0.7778209985657967, + "learning_rate": 2.7288186947586426e-05, + "loss": 0.2797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18052786588668823, + "step": 1915, + "valid_targets_mean": 2382.4, + "valid_targets_min": 815 + }, + { + "epoch": 3.1118314424635334, + "grad_norm": 0.6013057667276556, + "learning_rate": 2.7212862058836925e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20441167056560516, + "step": 1920, + "valid_targets_mean": 4624.4, + "valid_targets_min": 1011 + }, + { + "epoch": 3.119935170178282, + "grad_norm": 0.8433721924819828, + "learning_rate": 2.713741937756865e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21256756782531738, + "step": 1925, + "valid_targets_mean": 2051.4, + "valid_targets_min": 1096 + }, + { + "epoch": 3.1280388978930307, + "grad_norm": 0.8267912865776698, + "learning_rate": 2.7061860135828384e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23010283708572388, + "step": 1930, + "valid_targets_mean": 2566.1, + "valid_targets_min": 1053 + }, + { + "epoch": 3.1361426256077793, + "grad_norm": 0.9281087995213807, + "learning_rate": 2.6986185567566442e-05, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26588061451911926, + "step": 1935, + "valid_targets_mean": 2648.5, + "valid_targets_min": 1102 + }, + { + "epoch": 3.1442463533225284, + "grad_norm": 0.7402140669755561, + "learning_rate": 2.6910396908616527e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24440577626228333, + "step": 1940, + "valid_targets_mean": 2880.9, + "valid_targets_min": 1200 + }, + { + "epoch": 3.152350081037277, + "grad_norm": 0.9250550622058952, + "learning_rate": 2.6834495396675526e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18351495265960693, + "step": 1945, + "valid_targets_mean": 1815.4, + "valid_targets_min": 792 + }, + { + "epoch": 3.1604538087520258, + "grad_norm": 0.7075464086399091, + "learning_rate": 2.6758482271283347e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14974619448184967, + "step": 1950, + "valid_targets_mean": 2622.6, + "valid_targets_min": 918 + }, + { + "epoch": 3.168557536466775, + "grad_norm": 0.9235868375401112, + "learning_rate": 2.668235877380263e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24251984059810638, + "step": 1955, + "valid_targets_mean": 1804.3, + "valid_targets_min": 958 + }, + { + "epoch": 3.1766612641815235, + "grad_norm": 0.6596779615208653, + "learning_rate": 2.660612614739849e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14453783631324768, + "step": 1960, + "valid_targets_mean": 2709.9, + "valid_targets_min": 976 + }, + { + "epoch": 3.184764991896272, + "grad_norm": 0.766766066884446, + "learning_rate": 2.652978563701822e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1874874234199524, + "step": 1965, + "valid_targets_mean": 2113.1, + "valid_targets_min": 551 + }, + { + "epoch": 3.1928687196110213, + "grad_norm": 0.8124849594835287, + "learning_rate": 2.645333848937095e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23300279676914215, + "step": 1970, + "valid_targets_mean": 2446.9, + "valid_targets_min": 901 + }, + { + "epoch": 3.20097244732577, + "grad_norm": 0.46106063693318805, + "learning_rate": 2.6376785952907292e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10534625500440598, + "step": 1975, + "valid_targets_mean": 4224.2, + "valid_targets_min": 910 + }, + { + "epoch": 3.2090761750405186, + "grad_norm": 0.7587794893394673, + "learning_rate": 2.630012927779896e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2638416290283203, + "step": 1980, + "valid_targets_mean": 2923.1, + "valid_targets_min": 460 + }, + { + "epoch": 3.217179902755267, + "grad_norm": 0.8879684966937325, + "learning_rate": 2.6223369715918338e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22857242822647095, + "step": 1985, + "valid_targets_mean": 1645.6, + "valid_targets_min": 501 + }, + { + "epoch": 3.2252836304700163, + "grad_norm": 0.7748370854682728, + "learning_rate": 2.614650852081805e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24988800287246704, + "step": 1990, + "valid_targets_mean": 2849.4, + "valid_targets_min": 782 + }, + { + "epoch": 3.233387358184765, + "grad_norm": 0.7495261062213084, + "learning_rate": 2.606954694771047e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23291251063346863, + "step": 1995, + "valid_targets_mean": 2843.9, + "valid_targets_min": 981 + }, + { + "epoch": 3.2414910858995136, + "grad_norm": 0.7547991819930768, + "learning_rate": 2.5992486253447258e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21426227688789368, + "step": 2000, + "valid_targets_mean": 2390.5, + "valid_targets_min": 525 + }, + { + "epoch": 3.2495948136142627, + "grad_norm": 0.8127669361485503, + "learning_rate": 2.5915327696498787e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12424997985363007, + "step": 2005, + "valid_targets_mean": 1779.2, + "valid_targets_min": 518 + }, + { + "epoch": 3.2576985413290114, + "grad_norm": 0.7721290973743158, + "learning_rate": 2.583807253693362e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18391962349414825, + "step": 2010, + "valid_targets_mean": 2541.4, + "valid_targets_min": 1182 + }, + { + "epoch": 3.26580226904376, + "grad_norm": 0.7334128634083178, + "learning_rate": 2.576072203639794e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19778266549110413, + "step": 2015, + "valid_targets_mean": 2840.1, + "valid_targets_min": 980 + }, + { + "epoch": 3.2739059967585087, + "grad_norm": 0.9287603147287846, + "learning_rate": 2.5683277458094926e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2278834730386734, + "step": 2020, + "valid_targets_mean": 1949.1, + "valid_targets_min": 815 + }, + { + "epoch": 3.282009724473258, + "grad_norm": 0.8397730144943303, + "learning_rate": 2.560574006676413e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2002110779285431, + "step": 2025, + "valid_targets_mean": 3523.7, + "valid_targets_min": 310 + }, + { + "epoch": 3.2901134521880064, + "grad_norm": 0.4390442212313819, + "learning_rate": 2.5528111128660826e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13749784231185913, + "step": 2030, + "valid_targets_mean": 5424.6, + "valid_targets_min": 232 + }, + { + "epoch": 3.298217179902755, + "grad_norm": 0.6699615633322983, + "learning_rate": 2.545039191153533e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2043614387512207, + "step": 2035, + "valid_targets_mean": 3574.8, + "valid_targets_min": 1054 + }, + { + "epoch": 3.306320907617504, + "grad_norm": 0.8633099775212048, + "learning_rate": 2.53725836846123e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1649923026561737, + "step": 2040, + "valid_targets_mean": 1737.0, + "valid_targets_min": 456 + }, + { + "epoch": 3.314424635332253, + "grad_norm": 0.6256214069648829, + "learning_rate": 2.5294687718569994e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14858299493789673, + "step": 2045, + "valid_targets_mean": 3804.9, + "valid_targets_min": 769 + }, + { + "epoch": 3.3225283630470015, + "grad_norm": 0.9944856313019108, + "learning_rate": 2.5216705285519525e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17022621631622314, + "step": 2050, + "valid_targets_mean": 2688.2, + "valid_targets_min": 269 + }, + { + "epoch": 3.3306320907617506, + "grad_norm": 0.6389414039882365, + "learning_rate": 2.5138637658984116e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12931106984615326, + "step": 2055, + "valid_targets_mean": 2573.4, + "valid_targets_min": 597 + }, + { + "epoch": 3.3387358184764993, + "grad_norm": 0.6456462614939432, + "learning_rate": 2.5060486113878244e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22942790389060974, + "step": 2060, + "valid_targets_mean": 3076.4, + "valid_targets_min": 640 + }, + { + "epoch": 3.346839546191248, + "grad_norm": 0.4552379058782566, + "learning_rate": 2.4982251926486873e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0790659636259079, + "step": 2065, + "valid_targets_mean": 3547.6, + "valid_targets_min": 747 + }, + { + "epoch": 3.354943273905997, + "grad_norm": 0.5502283454291953, + "learning_rate": 2.490393637444458e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16592127084732056, + "step": 2070, + "valid_targets_mean": 4692.4, + "valid_targets_min": 988 + }, + { + "epoch": 3.3630470016207457, + "grad_norm": 0.8617017867222999, + "learning_rate": 2.482554073671471e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24501700699329376, + "step": 2075, + "valid_targets_mean": 2338.4, + "valid_targets_min": 937 + }, + { + "epoch": 3.3711507293354943, + "grad_norm": 0.7899472946015639, + "learning_rate": 2.4747066293568452e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15188781917095184, + "step": 2080, + "valid_targets_mean": 2104.1, + "valid_targets_min": 556 + }, + { + "epoch": 3.379254457050243, + "grad_norm": 0.8193544565230731, + "learning_rate": 2.4668514326564e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17579089105129242, + "step": 2085, + "valid_targets_mean": 2063.2, + "valid_targets_min": 553 + }, + { + "epoch": 3.387358184764992, + "grad_norm": 0.889128650784621, + "learning_rate": 2.4589886118525556e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18815834820270538, + "step": 2090, + "valid_targets_mean": 1816.8, + "valid_targets_min": 471 + }, + { + "epoch": 3.3954619124797407, + "grad_norm": 0.7550229654302467, + "learning_rate": 2.4511182953522405e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26341044902801514, + "step": 2095, + "valid_targets_mean": 3581.5, + "valid_targets_min": 656 + }, + { + "epoch": 3.4035656401944894, + "grad_norm": 0.8942138699800349, + "learning_rate": 2.4432406116847954e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2436755895614624, + "step": 2100, + "valid_targets_mean": 1985.3, + "valid_targets_min": 916 + }, + { + "epoch": 3.411669367909238, + "grad_norm": 0.5028709815573968, + "learning_rate": 2.435355689499874e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11536405980587006, + "step": 2105, + "valid_targets_mean": 3964.9, + "valid_targets_min": 233 + }, + { + "epoch": 3.419773095623987, + "grad_norm": 0.714825678842735, + "learning_rate": 2.4274636575653398e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15641966462135315, + "step": 2110, + "valid_targets_mean": 2424.9, + "valid_targets_min": 865 + }, + { + "epoch": 3.427876823338736, + "grad_norm": 0.8122455585254386, + "learning_rate": 2.4195646447651663e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2142556607723236, + "step": 2115, + "valid_targets_mean": 4355.4, + "valid_targets_min": 1109 + }, + { + "epoch": 3.4359805510534844, + "grad_norm": 0.783700231697767, + "learning_rate": 2.411658780097331e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3505910634994507, + "step": 2120, + "valid_targets_mean": 4021.6, + "valid_targets_min": 1086 + }, + { + "epoch": 3.4440842787682335, + "grad_norm": 0.7313909744848299, + "learning_rate": 2.4037461926717075e-05, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17269133031368256, + "step": 2125, + "valid_targets_mean": 2451.2, + "valid_targets_min": 514 + }, + { + "epoch": 3.452188006482982, + "grad_norm": 0.8088615988871327, + "learning_rate": 2.395827011707959e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20847368240356445, + "step": 2130, + "valid_targets_mean": 2553.2, + "valid_targets_min": 658 + }, + { + "epoch": 3.460291734197731, + "grad_norm": 0.5900039519353123, + "learning_rate": 2.3879013665334258e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20145244896411896, + "step": 2135, + "valid_targets_mean": 4923.4, + "valid_targets_min": 677 + }, + { + "epoch": 3.46839546191248, + "grad_norm": 0.6378206622182675, + "learning_rate": 2.3799693865810163e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16754154860973358, + "step": 2140, + "valid_targets_mean": 3790.3, + "valid_targets_min": 750 + }, + { + "epoch": 3.4764991896272286, + "grad_norm": 0.7389226745729313, + "learning_rate": 2.37203120138709e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2016831338405609, + "step": 2145, + "valid_targets_mean": 3017.4, + "valid_targets_min": 913 + }, + { + "epoch": 3.4846029173419772, + "grad_norm": 0.542849963051636, + "learning_rate": 2.3640869405893446e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19606372714042664, + "step": 2150, + "valid_targets_mean": 3942.9, + "valid_targets_min": 464 + }, + { + "epoch": 3.492706645056726, + "grad_norm": 0.8814880321553831, + "learning_rate": 2.3561367339246976e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2154223620891571, + "step": 2155, + "valid_targets_mean": 1835.9, + "valid_targets_min": 778 + }, + { + "epoch": 3.500810372771475, + "grad_norm": 0.7067348415715224, + "learning_rate": 2.3481807112271678e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.166023850440979, + "step": 2160, + "valid_targets_mean": 3050.1, + "valid_targets_min": 890 + }, + { + "epoch": 3.5089141004862237, + "grad_norm": 0.620735692816103, + "learning_rate": 2.3402190024257543e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13485875725746155, + "step": 2165, + "valid_targets_mean": 3070.6, + "valid_targets_min": 680 + }, + { + "epoch": 3.5170178282009723, + "grad_norm": 0.7429662318750067, + "learning_rate": 2.3322517375423165e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16115394234657288, + "step": 2170, + "valid_targets_mean": 2298.1, + "valid_targets_min": 712 + }, + { + "epoch": 3.525121555915721, + "grad_norm": 0.8477525841685281, + "learning_rate": 2.3242790466894494e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19504885375499725, + "step": 2175, + "valid_targets_mean": 2246.0, + "valid_targets_min": 398 + }, + { + "epoch": 3.53322528363047, + "grad_norm": 0.7627025545672735, + "learning_rate": 2.316301060068359e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21119363605976105, + "step": 2180, + "valid_targets_mean": 2926.6, + "valid_targets_min": 692 + }, + { + "epoch": 3.5413290113452187, + "grad_norm": 1.100127953832155, + "learning_rate": 2.3083179079667347e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20743992924690247, + "step": 2185, + "valid_targets_mean": 4087.3, + "valid_targets_min": 924 + }, + { + "epoch": 3.5494327390599674, + "grad_norm": 0.5805399953892108, + "learning_rate": 2.300329720756625e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15286213159561157, + "step": 2190, + "valid_targets_mean": 3119.1, + "valid_targets_min": 342 + }, + { + "epoch": 3.5575364667747165, + "grad_norm": 0.7519460987432748, + "learning_rate": 2.2923366288923045e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18701784312725067, + "step": 2195, + "valid_targets_mean": 2679.4, + "valid_targets_min": 704 + }, + { + "epoch": 3.565640194489465, + "grad_norm": 0.5738422561843207, + "learning_rate": 2.2843387629081453e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14277049899101257, + "step": 2200, + "valid_targets_mean": 4261.6, + "valid_targets_min": 835 + }, + { + "epoch": 3.5737439222042138, + "grad_norm": 0.6995930320107968, + "learning_rate": 2.2763362534164854e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1426575481891632, + "step": 2205, + "valid_targets_mean": 2615.9, + "valid_targets_min": 550 + }, + { + "epoch": 3.581847649918963, + "grad_norm": 0.778790384658622, + "learning_rate": 2.268329231105498e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22932958602905273, + "step": 2210, + "valid_targets_mean": 2972.4, + "valid_targets_min": 704 + }, + { + "epoch": 3.5899513776337115, + "grad_norm": 0.8621285854700218, + "learning_rate": 2.2603178267370504e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20100966095924377, + "step": 2215, + "valid_targets_mean": 2199.8, + "valid_targets_min": 509 + }, + { + "epoch": 3.59805510534846, + "grad_norm": 0.6366888964927337, + "learning_rate": 2.2523021711445746e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14098083972930908, + "step": 2220, + "valid_targets_mean": 3288.6, + "valid_targets_min": 814 + }, + { + "epoch": 3.6061588330632093, + "grad_norm": 1.031030067314881, + "learning_rate": 2.2442823952309308e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21637287735939026, + "step": 2225, + "valid_targets_mean": 1423.6, + "valid_targets_min": 521 + }, + { + "epoch": 3.614262560777958, + "grad_norm": 0.8638929404500169, + "learning_rate": 2.2362586299662642e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2800928056240082, + "step": 2230, + "valid_targets_mean": 2027.9, + "valid_targets_min": 386 + }, + { + "epoch": 3.6223662884927066, + "grad_norm": 0.7529589056185002, + "learning_rate": 2.228231006385873e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3154662251472473, + "step": 2235, + "valid_targets_mean": 4337.9, + "valid_targets_min": 566 + }, + { + "epoch": 3.6304700162074557, + "grad_norm": 0.6840352862675642, + "learning_rate": 2.2201996555880633e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13636550307273865, + "step": 2240, + "valid_targets_mean": 2383.8, + "valid_targets_min": 477 + }, + { + "epoch": 3.6385737439222043, + "grad_norm": 0.7568859162264532, + "learning_rate": 2.2121647087320105e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17221365869045258, + "step": 2245, + "valid_targets_mean": 2137.6, + "valid_targets_min": 671 + }, + { + "epoch": 3.646677471636953, + "grad_norm": 0.8960270729679528, + "learning_rate": 2.204126297035617e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23943698406219482, + "step": 2250, + "valid_targets_mean": 1959.3, + "valid_targets_min": 562 + }, + { + "epoch": 3.6547811993517016, + "grad_norm": 0.7464106224245589, + "learning_rate": 2.196084551773368e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18965956568717957, + "step": 2255, + "valid_targets_mean": 2451.1, + "valid_targets_min": 973 + }, + { + "epoch": 3.6628849270664503, + "grad_norm": 0.9722153370680764, + "learning_rate": 2.1880396042741906e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22785764932632446, + "step": 2260, + "valid_targets_mean": 1551.8, + "valid_targets_min": 757 + }, + { + "epoch": 3.6709886547811994, + "grad_norm": 0.4697676488300176, + "learning_rate": 2.179991585919307e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0863395407795906, + "step": 2265, + "valid_targets_mean": 3504.8, + "valid_targets_min": 522 + }, + { + "epoch": 3.679092382495948, + "grad_norm": 0.7371456685043023, + "learning_rate": 2.1719406281400873e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2391626387834549, + "step": 2270, + "valid_targets_mean": 2812.4, + "valid_targets_min": 636 + }, + { + "epoch": 3.6871961102106967, + "grad_norm": 0.7116149527659587, + "learning_rate": 2.163886862415908e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16868996620178223, + "step": 2275, + "valid_targets_mean": 3199.4, + "valid_targets_min": 823 + }, + { + "epoch": 3.695299837925446, + "grad_norm": 0.6944396008804573, + "learning_rate": 2.155830420272e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36398154497146606, + "step": 2280, + "valid_targets_mean": 4588.9, + "valid_targets_min": 857 + }, + { + "epoch": 3.7034035656401945, + "grad_norm": 0.49891846247542965, + "learning_rate": 2.1477714332773022e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12793442606925964, + "step": 2285, + "valid_targets_mean": 4197.6, + "valid_targets_min": 750 + }, + { + "epoch": 3.711507293354943, + "grad_norm": 0.7737664275138527, + "learning_rate": 2.139710033042314e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21408765017986298, + "step": 2290, + "valid_targets_mean": 2392.2, + "valid_targets_min": 436 + }, + { + "epoch": 3.719611021069692, + "grad_norm": 0.6249478567474501, + "learning_rate": 2.1316463512169453e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17073558270931244, + "step": 2295, + "valid_targets_mean": 3448.7, + "valid_targets_min": 816 + }, + { + "epoch": 3.727714748784441, + "grad_norm": 0.6319183901564622, + "learning_rate": 2.1235805194883665e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12544557452201843, + "step": 2300, + "valid_targets_mean": 3194.1, + "valid_targets_min": 828 + }, + { + "epoch": 3.7358184764991895, + "grad_norm": 0.8809027809587772, + "learning_rate": 2.115512669578857e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21802709996700287, + "step": 2305, + "valid_targets_mean": 2093.7, + "valid_targets_min": 540 + }, + { + "epoch": 3.7439222042139386, + "grad_norm": 0.8125759794942795, + "learning_rate": 2.107442933243656e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1779268980026245, + "step": 2310, + "valid_targets_mean": 2595.6, + "valid_targets_min": 1016 + }, + { + "epoch": 3.7520259319286873, + "grad_norm": 0.7211723580416524, + "learning_rate": 2.099371442268809e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1611754596233368, + "step": 2315, + "valid_targets_mean": 2815.6, + "valid_targets_min": 715 + }, + { + "epoch": 3.760129659643436, + "grad_norm": 0.7848416454395023, + "learning_rate": 2.0912983284690157e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2059643268585205, + "step": 2320, + "valid_targets_mean": 2567.2, + "valid_targets_min": 232 + }, + { + "epoch": 3.768233387358185, + "grad_norm": 0.5904887355051087, + "learning_rate": 2.0832237236854794e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15662437677383423, + "step": 2325, + "valid_targets_mean": 3666.7, + "valid_targets_min": 1253 + }, + { + "epoch": 3.7763371150729337, + "grad_norm": 0.7236461690914054, + "learning_rate": 2.0751477597837528e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12969183921813965, + "step": 2330, + "valid_targets_mean": 2251.1, + "valid_targets_min": 590 + }, + { + "epoch": 3.7844408427876823, + "grad_norm": 0.9505422297729229, + "learning_rate": 2.0670705686515822e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23993048071861267, + "step": 2335, + "valid_targets_mean": 1928.8, + "valid_targets_min": 505 + }, + { + "epoch": 3.792544570502431, + "grad_norm": 0.6637526217519724, + "learning_rate": 2.0589922821967566e-05, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1568320393562317, + "step": 2340, + "valid_targets_mean": 2895.9, + "valid_targets_min": 662 + }, + { + "epoch": 3.8006482982171796, + "grad_norm": 1.0128117172897237, + "learning_rate": 2.0509130323449545e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15534460544586182, + "step": 2345, + "valid_targets_mean": 1361.9, + "valid_targets_min": 488 + }, + { + "epoch": 3.8087520259319287, + "grad_norm": 0.6671945777380213, + "learning_rate": 2.0428329510375838e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30660581588745117, + "step": 2350, + "valid_targets_mean": 4413.9, + "valid_targets_min": 531 + }, + { + "epoch": 3.8168557536466774, + "grad_norm": 0.5544375126811655, + "learning_rate": 2.0347521702296333e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17179805040359497, + "step": 2355, + "valid_targets_mean": 4360.7, + "valid_targets_min": 700 + }, + { + "epoch": 3.824959481361426, + "grad_norm": 0.8902971352705107, + "learning_rate": 2.026670821887516e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16486603021621704, + "step": 2360, + "valid_targets_mean": 1924.4, + "valid_targets_min": 514 + }, + { + "epoch": 3.833063209076175, + "grad_norm": 0.691767954969929, + "learning_rate": 2.0185890379869115e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29504096508026123, + "step": 2365, + "valid_targets_mean": 3782.2, + "valid_targets_min": 1077 + }, + { + "epoch": 3.841166936790924, + "grad_norm": 0.7902862954174232, + "learning_rate": 2.0105069505106126e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2295757234096527, + "step": 2370, + "valid_targets_mean": 2529.1, + "valid_targets_min": 650 + }, + { + "epoch": 3.8492706645056725, + "grad_norm": 0.6983178682113452, + "learning_rate": 2.00242469144637e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1746961772441864, + "step": 2375, + "valid_targets_mean": 3053.4, + "valid_targets_min": 860 + }, + { + "epoch": 3.8573743922204216, + "grad_norm": 0.7564438056864309, + "learning_rate": 1.994342392784738e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17830297350883484, + "step": 2380, + "valid_targets_mean": 2944.7, + "valid_targets_min": 1044 + }, + { + "epoch": 3.86547811993517, + "grad_norm": 0.755822155072044, + "learning_rate": 1.9862601865169154e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19303062558174133, + "step": 2385, + "valid_targets_mean": 2711.5, + "valid_targets_min": 1487 + }, + { + "epoch": 3.873581847649919, + "grad_norm": 0.7160490637060567, + "learning_rate": 1.9781782046325938e-05, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18017929792404175, + "step": 2390, + "valid_targets_mean": 2449.2, + "valid_targets_min": 510 + }, + { + "epoch": 3.881685575364668, + "grad_norm": 0.6508016267439924, + "learning_rate": 1.9700965791177986e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22605565190315247, + "step": 2395, + "valid_targets_mean": 3326.8, + "valid_targets_min": 525 + }, + { + "epoch": 3.8897893030794166, + "grad_norm": 0.6852974910747845, + "learning_rate": 1.9620154419527372e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24458330869674683, + "step": 2400, + "valid_targets_mean": 3742.1, + "valid_targets_min": 531 + }, + { + "epoch": 3.8978930307941653, + "grad_norm": 0.8183442535515982, + "learning_rate": 1.953934925109641e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3048601746559143, + "step": 2405, + "valid_targets_mean": 2523.3, + "valid_targets_min": 540 + }, + { + "epoch": 3.9059967585089144, + "grad_norm": 0.7630671943332308, + "learning_rate": 1.945855160550611e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16103631258010864, + "step": 2410, + "valid_targets_mean": 2636.6, + "valid_targets_min": 604 + }, + { + "epoch": 3.914100486223663, + "grad_norm": 0.8070470089137307, + "learning_rate": 1.937776280225463e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20676636695861816, + "step": 2415, + "valid_targets_mean": 2609.7, + "valid_targets_min": 840 + }, + { + "epoch": 3.9222042139384117, + "grad_norm": 0.9463383844932369, + "learning_rate": 1.929698416069571e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.197508305311203, + "step": 2420, + "valid_targets_mean": 1525.7, + "valid_targets_min": 759 + }, + { + "epoch": 3.9303079416531603, + "grad_norm": 0.6974073097875326, + "learning_rate": 1.9216217000017182e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21730129420757294, + "step": 2425, + "valid_targets_mean": 2918.9, + "valid_targets_min": 693 + }, + { + "epoch": 3.938411669367909, + "grad_norm": 0.6196655766852457, + "learning_rate": 1.9135462639219325e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22163273394107819, + "step": 2430, + "valid_targets_mean": 4302.2, + "valid_targets_min": 914 + }, + { + "epoch": 3.946515397082658, + "grad_norm": 1.0083387028610686, + "learning_rate": 1.905472239709343e-05, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27771952748298645, + "step": 2435, + "valid_targets_mean": 1752.2, + "valid_targets_min": 792 + }, + { + "epoch": 3.9546191247974067, + "grad_norm": 0.6436193457753687, + "learning_rate": 1.89739975922002e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24347497522830963, + "step": 2440, + "valid_targets_mean": 3727.1, + "valid_targets_min": 731 + }, + { + "epoch": 3.9627228525121554, + "grad_norm": 0.7463707250780504, + "learning_rate": 1.889328954284823e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21482862532138824, + "step": 2445, + "valid_targets_mean": 2699.9, + "valid_targets_min": 575 + }, + { + "epoch": 3.9708265802269045, + "grad_norm": 0.8578748435821815, + "learning_rate": 1.8812599567072496e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22225850820541382, + "step": 2450, + "valid_targets_mean": 2473.5, + "valid_targets_min": 781 + }, + { + "epoch": 3.978930307941653, + "grad_norm": 0.6000598430860175, + "learning_rate": 1.873192898261281e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17323483526706696, + "step": 2455, + "valid_targets_mean": 3969.9, + "valid_targets_min": 501 + }, + { + "epoch": 3.987034035656402, + "grad_norm": 0.6411349311068479, + "learning_rate": 1.8651279106892317e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22670185565948486, + "step": 2460, + "valid_targets_mean": 3840.9, + "valid_targets_min": 506 + }, + { + "epoch": 3.995137763371151, + "grad_norm": 0.8265211359345982, + "learning_rate": 1.8570651256995933e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16508886218070984, + "step": 2465, + "valid_targets_mean": 1857.5, + "valid_targets_min": 407 + }, + { + "epoch": 4.003241491085899, + "grad_norm": 0.7438519471306131, + "learning_rate": 1.849004674964891e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13882270455360413, + "step": 2470, + "valid_targets_mean": 2407.8, + "valid_targets_min": 699 + }, + { + "epoch": 4.011345218800648, + "grad_norm": 0.666660079984095, + "learning_rate": 1.840946690119528e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18586783111095428, + "step": 2475, + "valid_targets_mean": 3428.2, + "valid_targets_min": 631 + }, + { + "epoch": 4.019448946515397, + "grad_norm": 0.6224308701315978, + "learning_rate": 1.8328913027576373e-05, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1357424110174179, + "step": 2480, + "valid_targets_mean": 2990.6, + "valid_targets_min": 297 + }, + { + "epoch": 4.0275526742301455, + "grad_norm": 0.9706356061146716, + "learning_rate": 1.824838644430934e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17957650125026703, + "step": 2485, + "valid_targets_mean": 2300.4, + "valid_targets_min": 819 + }, + { + "epoch": 4.035656401944895, + "grad_norm": 0.5507038011859006, + "learning_rate": 1.8167888466465652e-05, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574670970439911, + "step": 2490, + "valid_targets_mean": 6137.8, + "valid_targets_min": 269 + }, + { + "epoch": 4.043760129659644, + "grad_norm": 0.9201955501879038, + "learning_rate": 1.8087420408649596e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1650194525718689, + "step": 2495, + "valid_targets_mean": 1996.8, + "valid_targets_min": 971 + }, + { + "epoch": 4.051863857374392, + "grad_norm": 0.7731697108102025, + "learning_rate": 1.8006983584976877e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18308573961257935, + "step": 2500, + "valid_targets_mean": 2703.9, + "valid_targets_min": 740 + }, + { + "epoch": 4.059967585089141, + "grad_norm": 0.7203113706129127, + "learning_rate": 1.7926579309053098e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1433395892381668, + "step": 2505, + "valid_targets_mean": 2686.1, + "valid_targets_min": 601 + }, + { + "epoch": 4.06807131280389, + "grad_norm": 0.857648318619981, + "learning_rate": 1.7846208893952346e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17913417518138885, + "step": 2510, + "valid_targets_mean": 2421.8, + "valid_targets_min": 216 + }, + { + "epoch": 4.076175040518638, + "grad_norm": 0.9249873120711297, + "learning_rate": 1.7765873652195713e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19423171877861023, + "step": 2515, + "valid_targets_mean": 2475.4, + "valid_targets_min": 946 + }, + { + "epoch": 4.084278768233387, + "grad_norm": 2.311467435227256, + "learning_rate": 1.7685574895729886e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4830706715583801, + "step": 2520, + "valid_targets_mean": 3542.9, + "valid_targets_min": 305 + }, + { + "epoch": 4.0923824959481365, + "grad_norm": 0.654022585178441, + "learning_rate": 1.7605313935905722e-05, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1987391710281372, + "step": 2525, + "valid_targets_mean": 4264.9, + "valid_targets_min": 549 + }, + { + "epoch": 4.100486223662885, + "grad_norm": 0.7899542972251363, + "learning_rate": 1.7525092083456795e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15167073905467987, + "step": 2530, + "valid_targets_mean": 2471.2, + "valid_targets_min": 544 + }, + { + "epoch": 4.108589951377634, + "grad_norm": 0.7446749286045431, + "learning_rate": 1.744491064847805e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14512816071510315, + "step": 2535, + "valid_targets_mean": 3130.6, + "valid_targets_min": 828 + }, + { + "epoch": 4.116693679092383, + "grad_norm": 0.6706352002553055, + "learning_rate": 1.7364770940404375e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18727947771549225, + "step": 2540, + "valid_targets_mean": 3859.0, + "valid_targets_min": 972 + }, + { + "epoch": 4.124797406807131, + "grad_norm": 0.7374471536131514, + "learning_rate": 1.7284674267989213e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3415883183479309, + "step": 2545, + "valid_targets_mean": 3796.5, + "valid_targets_min": 731 + }, + { + "epoch": 4.13290113452188, + "grad_norm": 0.6623719219174707, + "learning_rate": 1.72046219392832e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09289093315601349, + "step": 2550, + "valid_targets_mean": 2261.0, + "valid_targets_min": 372 + }, + { + "epoch": 4.1410048622366284, + "grad_norm": 0.6819916449930195, + "learning_rate": 1.712461526161279e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19168910384178162, + "step": 2555, + "valid_targets_mean": 3595.6, + "valid_targets_min": 431 + }, + { + "epoch": 4.1491085899513775, + "grad_norm": 1.006431838006035, + "learning_rate": 1.7044655541558934e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19053317606449127, + "step": 2560, + "valid_targets_mean": 2357.0, + "valid_targets_min": 939 + }, + { + "epoch": 4.157212317666127, + "grad_norm": 0.7533939089362455, + "learning_rate": 1.69647440849357e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373582273721695, + "step": 2565, + "valid_targets_mean": 2927.2, + "valid_targets_min": 947 + }, + { + "epoch": 4.165316045380875, + "grad_norm": 0.890040140849173, + "learning_rate": 1.6884882196768985e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.188620924949646, + "step": 2570, + "valid_targets_mean": 2630.1, + "valid_targets_min": 734 + }, + { + "epoch": 4.173419773095624, + "grad_norm": 0.9465451734827055, + "learning_rate": 1.680507118127518e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19093164801597595, + "step": 2575, + "valid_targets_mean": 1947.6, + "valid_targets_min": 594 + }, + { + "epoch": 4.181523500810373, + "grad_norm": 0.7658272802842436, + "learning_rate": 1.6725312341839895e-05, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14707794785499573, + "step": 2580, + "valid_targets_mean": 2918.4, + "valid_targets_min": 965 + }, + { + "epoch": 4.189627228525121, + "grad_norm": 0.6443361980317487, + "learning_rate": 1.664560698099664e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15699368715286255, + "step": 2585, + "valid_targets_mean": 3664.1, + "valid_targets_min": 1065 + }, + { + "epoch": 4.19773095623987, + "grad_norm": 0.9445201634571403, + "learning_rate": 1.6565956400405586e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19105768203735352, + "step": 2590, + "valid_targets_mean": 1996.1, + "valid_targets_min": 776 + }, + { + "epoch": 4.2058346839546195, + "grad_norm": 1.5563352274482452, + "learning_rate": 1.6486361900832284e-05, + "loss": 0.142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1398153007030487, + "step": 2595, + "valid_targets_mean": 1860.5, + "valid_targets_min": 741 + }, + { + "epoch": 4.213938411669368, + "grad_norm": 0.6946128489566771, + "learning_rate": 1.6406824782126428e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22305166721343994, + "step": 2600, + "valid_targets_mean": 3348.3, + "valid_targets_min": 959 + }, + { + "epoch": 4.222042139384117, + "grad_norm": 0.6233066610164918, + "learning_rate": 1.632734634320064e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22936005890369415, + "step": 2605, + "valid_targets_mean": 3911.8, + "valid_targets_min": 925 + }, + { + "epoch": 4.230145867098866, + "grad_norm": 0.6881169895401946, + "learning_rate": 1.6247927882009256e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13565057516098022, + "step": 2610, + "valid_targets_mean": 3660.1, + "valid_targets_min": 1077 + }, + { + "epoch": 4.238249594813614, + "grad_norm": 0.9276168493947143, + "learning_rate": 1.6168570695527096e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2158156931400299, + "step": 2615, + "valid_targets_mean": 2655.6, + "valid_targets_min": 909 + }, + { + "epoch": 4.246353322528363, + "grad_norm": 1.0419792453131715, + "learning_rate": 1.6089276079728334e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12811069190502167, + "step": 2620, + "valid_targets_mean": 1496.8, + "valid_targets_min": 303 + }, + { + "epoch": 4.254457050243111, + "grad_norm": 0.5184090018750442, + "learning_rate": 1.6010045329565294e-05, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08075259625911713, + "step": 2625, + "valid_targets_mean": 3308.2, + "valid_targets_min": 705 + }, + { + "epoch": 4.2625607779578605, + "grad_norm": 1.374064211424771, + "learning_rate": 1.5930879738947328e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14756430685520172, + "step": 2630, + "valid_targets_mean": 2306.1, + "valid_targets_min": 875 + }, + { + "epoch": 4.27066450567261, + "grad_norm": 0.7709350033003551, + "learning_rate": 1.585178060071966e-05, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16429486870765686, + "step": 2635, + "valid_targets_mean": 2699.4, + "valid_targets_min": 471 + }, + { + "epoch": 4.278768233387358, + "grad_norm": 0.7949963216390307, + "learning_rate": 1.5772749206642296e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11400672793388367, + "step": 2640, + "valid_targets_mean": 2194.9, + "valid_targets_min": 564 + }, + { + "epoch": 4.286871961102107, + "grad_norm": 1.0554910650197593, + "learning_rate": 1.5693786847368918e-05, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1564255654811859, + "step": 2645, + "valid_targets_mean": 1670.0, + "valid_targets_min": 742 + }, + { + "epoch": 4.294975688816856, + "grad_norm": 0.8555240936332876, + "learning_rate": 1.5614894812425806e-05, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1624247133731842, + "step": 2650, + "valid_targets_mean": 2291.0, + "valid_targets_min": 685 + }, + { + "epoch": 4.303079416531604, + "grad_norm": 0.7761516523216345, + "learning_rate": 1.5536074390190786e-05, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12907105684280396, + "step": 2655, + "valid_targets_mean": 2245.6, + "valid_targets_min": 518 + }, + { + "epoch": 4.311183144246353, + "grad_norm": 1.4090587432444437, + "learning_rate": 1.5457326867872177e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17307105660438538, + "step": 2660, + "valid_targets_mean": 2902.2, + "valid_targets_min": 757 + }, + { + "epoch": 4.319286871961102, + "grad_norm": 0.7599287679035327, + "learning_rate": 1.5378653531487784e-05, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1532491147518158, + "step": 2665, + "valid_targets_mean": 2631.6, + "valid_targets_min": 539 + }, + { + "epoch": 4.327390599675851, + "grad_norm": 0.8485351714572734, + "learning_rate": 1.5300055665843875e-05, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14278289675712585, + "step": 2670, + "valid_targets_mean": 2180.6, + "valid_targets_min": 355 + }, + { + "epoch": 4.3354943273906, + "grad_norm": 0.7356937932327555, + "learning_rate": 1.5221534554514225e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13728389143943787, + "step": 2675, + "valid_targets_mean": 2713.6, + "valid_targets_min": 908 + }, + { + "epoch": 4.343598055105349, + "grad_norm": 0.8514962373379015, + "learning_rate": 1.5143091479819146e-05, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12912364304065704, + "step": 2680, + "valid_targets_mean": 2137.4, + "valid_targets_min": 636 + }, + { + "epoch": 4.351701782820097, + "grad_norm": 0.8511576532571051, + "learning_rate": 1.5064727722804531e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20828333497047424, + "step": 2685, + "valid_targets_mean": 2588.8, + "valid_targets_min": 495 + }, + { + "epoch": 4.359805510534846, + "grad_norm": 1.0492092151976131, + "learning_rate": 1.4986444563220948e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23015139997005463, + "step": 2690, + "valid_targets_mean": 1931.1, + "valid_targets_min": 426 + }, + { + "epoch": 4.367909238249595, + "grad_norm": 0.9100981933490145, + "learning_rate": 1.4908243279502741e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1979110985994339, + "step": 2695, + "valid_targets_mean": 2374.7, + "valid_targets_min": 1030 + }, + { + "epoch": 4.376012965964343, + "grad_norm": 0.7595733722776951, + "learning_rate": 1.4830125148747138e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817144751548767, + "step": 2700, + "valid_targets_mean": 2891.0, + "valid_targets_min": 782 + }, + { + "epoch": 4.3841166936790925, + "grad_norm": 0.7794754495340108, + "learning_rate": 1.475209144669341e-05, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2341780811548233, + "step": 2705, + "valid_targets_mean": 3009.4, + "valid_targets_min": 915 + }, + { + "epoch": 4.392220421393841, + "grad_norm": 0.9584422683500831, + "learning_rate": 1.4674143447702036e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13136430084705353, + "step": 2710, + "valid_targets_mean": 1982.9, + "valid_targets_min": 671 + }, + { + "epoch": 4.40032414910859, + "grad_norm": 0.5758324638844335, + "learning_rate": 1.4596282424733877e-05, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06660260260105133, + "step": 2715, + "valid_targets_mean": 3760.6, + "valid_targets_min": 805 + }, + { + "epoch": 4.408427876823339, + "grad_norm": 0.6558036998170294, + "learning_rate": 1.4518509649329406e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.157700315117836, + "step": 2720, + "valid_targets_mean": 3546.7, + "valid_targets_min": 932 + }, + { + "epoch": 4.416531604538087, + "grad_norm": 0.8633304546821023, + "learning_rate": 1.4440826391587926e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12797395884990692, + "step": 2725, + "valid_targets_mean": 3217.9, + "valid_targets_min": 735 + }, + { + "epoch": 4.424635332252836, + "grad_norm": 0.6582694588059188, + "learning_rate": 1.4363233920146855e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16166099905967712, + "step": 2730, + "valid_targets_mean": 4224.6, + "valid_targets_min": 801 + }, + { + "epoch": 4.432739059967585, + "grad_norm": 0.7022399878776832, + "learning_rate": 1.4285733502160955e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1490454524755478, + "step": 2735, + "valid_targets_mean": 3313.8, + "valid_targets_min": 977 + }, + { + "epoch": 4.4408427876823335, + "grad_norm": 0.6688054411278289, + "learning_rate": 1.4208326403281702e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919623762369156, + "step": 2740, + "valid_targets_mean": 3607.4, + "valid_targets_min": 693 + }, + { + "epoch": 4.448946515397083, + "grad_norm": 0.9020567053374119, + "learning_rate": 1.4131013887636576e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16687409579753876, + "step": 2745, + "valid_targets_mean": 2358.4, + "valid_targets_min": 801 + }, + { + "epoch": 4.457050243111832, + "grad_norm": 0.7645681159215328, + "learning_rate": 1.4053797217808432e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2162127047777176, + "step": 2750, + "valid_targets_mean": 3857.8, + "valid_targets_min": 853 + }, + { + "epoch": 4.46515397082658, + "grad_norm": 0.867803300126707, + "learning_rate": 1.3976677654814866e-05, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11421076953411102, + "step": 2755, + "valid_targets_mean": 2012.0, + "valid_targets_min": 510 + }, + { + "epoch": 4.473257698541329, + "grad_norm": 0.8742220073053529, + "learning_rate": 1.3899656458087647e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15495306253433228, + "step": 2760, + "valid_targets_mean": 2445.5, + "valid_targets_min": 782 + }, + { + "epoch": 4.481361426256078, + "grad_norm": 0.7571940426598677, + "learning_rate": 1.3822734885452136e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919158697128296, + "step": 2765, + "valid_targets_mean": 3757.5, + "valid_targets_min": 824 + }, + { + "epoch": 4.489465153970826, + "grad_norm": 0.8691721790493876, + "learning_rate": 1.3745914193106715e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14633077383041382, + "step": 2770, + "valid_targets_mean": 1987.1, + "valid_targets_min": 406 + }, + { + "epoch": 4.4975688816855754, + "grad_norm": 1.000044264127931, + "learning_rate": 1.366919563560233e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3116513788700104, + "step": 2775, + "valid_targets_mean": 4190.8, + "valid_targets_min": 605 + }, + { + "epoch": 4.5056726094003245, + "grad_norm": 0.8331745332190477, + "learning_rate": 1.3592580465821956e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18027907609939575, + "step": 2780, + "valid_targets_mean": 2925.8, + "valid_targets_min": 809 + }, + { + "epoch": 4.513776337115073, + "grad_norm": 0.8471157769543586, + "learning_rate": 1.3516069934960174e-05, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18600761890411377, + "step": 2785, + "valid_targets_mean": 2895.5, + "valid_targets_min": 859 + }, + { + "epoch": 4.521880064829822, + "grad_norm": 0.8127559212585381, + "learning_rate": 1.3439665292502695e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16321048140525818, + "step": 2790, + "valid_targets_mean": 2676.2, + "valid_targets_min": 899 + }, + { + "epoch": 4.52998379254457, + "grad_norm": 0.8681867438751043, + "learning_rate": 1.3363367786205985e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16139662265777588, + "step": 2795, + "valid_targets_mean": 2765.1, + "valid_targets_min": 1075 + }, + { + "epoch": 4.538087520259319, + "grad_norm": 0.6147735291418077, + "learning_rate": 1.3287178662076893e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2413632571697235, + "step": 2800, + "valid_targets_mean": 4606.2, + "valid_targets_min": 679 + }, + { + "epoch": 4.546191247974068, + "grad_norm": 0.7563381558755065, + "learning_rate": 1.3211099164352261e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21764537692070007, + "step": 2805, + "valid_targets_mean": 2891.3, + "valid_targets_min": 521 + }, + { + "epoch": 4.5542949756888165, + "grad_norm": 0.7369725771049391, + "learning_rate": 1.3135130535478655e-05, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15378154814243317, + "step": 2810, + "valid_targets_mean": 3029.8, + "valid_targets_min": 686 + }, + { + "epoch": 4.562398703403566, + "grad_norm": 0.8372190818629114, + "learning_rate": 1.3059274016092057e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17605245113372803, + "step": 2815, + "valid_targets_mean": 3103.2, + "valid_targets_min": 736 + }, + { + "epoch": 4.570502431118315, + "grad_norm": 0.831632617042843, + "learning_rate": 1.2983530844997585e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1470744013786316, + "step": 2820, + "valid_targets_mean": 2123.9, + "valid_targets_min": 524 + }, + { + "epoch": 4.578606158833063, + "grad_norm": 0.8005009026500092, + "learning_rate": 1.2907902259149287e-05, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17905943095684052, + "step": 2825, + "valid_targets_mean": 2288.1, + "valid_targets_min": 669 + }, + { + "epoch": 4.586709886547812, + "grad_norm": 0.7146959989523917, + "learning_rate": 1.2832389493629928e-05, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18556524813175201, + "step": 2830, + "valid_targets_mean": 3489.2, + "valid_targets_min": 866 + }, + { + "epoch": 4.594813614262561, + "grad_norm": 0.982213080774264, + "learning_rate": 1.275699378163083e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20789432525634766, + "step": 2835, + "valid_targets_mean": 2519.7, + "valid_targets_min": 328 + }, + { + "epoch": 4.602917341977309, + "grad_norm": 0.5100944274599919, + "learning_rate": 1.2681716354431704e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09983259439468384, + "step": 2840, + "valid_targets_mean": 5123.9, + "valid_targets_min": 505 + }, + { + "epoch": 4.611021069692058, + "grad_norm": 0.9532072125518306, + "learning_rate": 1.2606558441380587e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2288963943719864, + "step": 2845, + "valid_targets_mean": 1986.6, + "valid_targets_min": 459 + }, + { + "epoch": 4.6191247974068075, + "grad_norm": 0.7215937040297506, + "learning_rate": 1.2531521269873736e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18275988101959229, + "step": 2850, + "valid_targets_mean": 3461.2, + "valid_targets_min": 865 + }, + { + "epoch": 4.627228525121556, + "grad_norm": 0.7854523287609847, + "learning_rate": 1.245660606533559e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2999517619609833, + "step": 2855, + "valid_targets_mean": 3333.9, + "valid_targets_min": 710 + }, + { + "epoch": 4.635332252836305, + "grad_norm": 0.9116092627493867, + "learning_rate": 1.2381814051198751e-05, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13685894012451172, + "step": 2860, + "valid_targets_mean": 1892.0, + "valid_targets_min": 704 + }, + { + "epoch": 4.643435980551054, + "grad_norm": 2.35524174458915, + "learning_rate": 1.2307146448884021e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1372871696949005, + "step": 2865, + "valid_targets_mean": 2473.6, + "valid_targets_min": 555 + }, + { + "epoch": 4.651539708265802, + "grad_norm": 0.7956965758481088, + "learning_rate": 1.2232604477780445e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20719727873802185, + "step": 2870, + "valid_targets_mean": 4660.5, + "valid_targets_min": 1343 + }, + { + "epoch": 4.659643435980551, + "grad_norm": 0.7578731157658531, + "learning_rate": 1.2158189355225382e-05, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16715043783187866, + "step": 2875, + "valid_targets_mean": 3336.8, + "valid_targets_min": 1080 + }, + { + "epoch": 4.667747163695299, + "grad_norm": 0.5455735164703488, + "learning_rate": 1.2083902296484659e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1390770971775055, + "step": 2880, + "valid_targets_mean": 5098.4, + "valid_targets_min": 741 + }, + { + "epoch": 4.6758508914100485, + "grad_norm": 0.8754352764073323, + "learning_rate": 1.2009744514732698e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1272560954093933, + "step": 2885, + "valid_targets_mean": 1902.8, + "valid_targets_min": 856 + }, + { + "epoch": 4.683954619124798, + "grad_norm": 0.7125376899744773, + "learning_rate": 1.1935717221032707e-05, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12357346713542938, + "step": 2890, + "valid_targets_mean": 3547.1, + "valid_targets_min": 686 + }, + { + "epoch": 4.692058346839546, + "grad_norm": 0.9406847214639302, + "learning_rate": 1.1861821624316916e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1456756889820099, + "step": 2895, + "valid_targets_mean": 2065.8, + "valid_targets_min": 974 + }, + { + "epoch": 4.700162074554295, + "grad_norm": 0.6883619788971869, + "learning_rate": 1.1788058931366822e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2756902575492859, + "step": 2900, + "valid_targets_mean": 4024.9, + "valid_targets_min": 880 + }, + { + "epoch": 4.708265802269044, + "grad_norm": 0.7882633472589065, + "learning_rate": 1.1714430346793479e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13730135560035706, + "step": 2905, + "valid_targets_mean": 2645.8, + "valid_targets_min": 928 + }, + { + "epoch": 4.716369529983792, + "grad_norm": 0.8885552000120186, + "learning_rate": 1.1640937073017837e-05, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11991922557353973, + "step": 2910, + "valid_targets_mean": 2493.4, + "valid_targets_min": 918 + }, + { + "epoch": 4.724473257698541, + "grad_norm": 0.9089134585480407, + "learning_rate": 1.1567580310251097e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16488614678382874, + "step": 2915, + "valid_targets_mean": 2730.6, + "valid_targets_min": 571 + }, + { + "epoch": 4.73257698541329, + "grad_norm": 0.9805367472588664, + "learning_rate": 1.1494361256475105e-05, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1491793394088745, + "step": 2920, + "valid_targets_mean": 1666.0, + "valid_targets_min": 363 + }, + { + "epoch": 4.740680713128039, + "grad_norm": 0.9415315025271603, + "learning_rate": 1.1421281107422804e-05, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2649831473827362, + "step": 2925, + "valid_targets_mean": 2495.4, + "valid_targets_min": 334 + }, + { + "epoch": 4.748784440842788, + "grad_norm": 0.7579993458608958, + "learning_rate": 1.1348341056558709e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21222791075706482, + "step": 2930, + "valid_targets_mean": 3278.5, + "valid_targets_min": 807 + }, + { + "epoch": 4.756888168557537, + "grad_norm": 0.8752970421825866, + "learning_rate": 1.1275542295059384e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11920589953660965, + "step": 2935, + "valid_targets_mean": 2097.3, + "valid_targets_min": 701 + }, + { + "epoch": 4.764991896272285, + "grad_norm": 0.9284513357192202, + "learning_rate": 1.1202886011794023e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2457813024520874, + "step": 2940, + "valid_targets_mean": 2385.9, + "valid_targets_min": 782 + }, + { + "epoch": 4.773095623987034, + "grad_norm": 0.9398075096458122, + "learning_rate": 1.1130373393305004e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12277652323246002, + "step": 2945, + "valid_targets_mean": 2361.6, + "valid_targets_min": 741 + }, + { + "epoch": 4.781199351701783, + "grad_norm": 1.1737385550719055, + "learning_rate": 1.1058005623788564e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.140442356467247, + "step": 2950, + "valid_targets_mean": 1775.1, + "valid_targets_min": 1008 + }, + { + "epoch": 4.789303079416531, + "grad_norm": 0.8063596837128346, + "learning_rate": 1.0985783885075407e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18340390920639038, + "step": 2955, + "valid_targets_mean": 2692.0, + "valid_targets_min": 697 + }, + { + "epoch": 4.7974068071312805, + "grad_norm": 0.8609461611073953, + "learning_rate": 1.0913709356611411e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14733055233955383, + "step": 2960, + "valid_targets_mean": 2302.0, + "valid_targets_min": 860 + }, + { + "epoch": 4.805510534846029, + "grad_norm": 0.5940642190543134, + "learning_rate": 1.0841783215438406e-05, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19221073389053345, + "step": 2965, + "valid_targets_mean": 5023.8, + "valid_targets_min": 807 + }, + { + "epoch": 4.813614262560778, + "grad_norm": 1.0256200972517306, + "learning_rate": 1.07700066361749e-05, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23758450150489807, + "step": 2970, + "valid_targets_mean": 2783.9, + "valid_targets_min": 633 + }, + { + "epoch": 4.821717990275527, + "grad_norm": 0.8057695967861453, + "learning_rate": 1.0698380790996921e-05, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15349268913269043, + "step": 2975, + "valid_targets_mean": 3128.6, + "valid_targets_min": 1141 + }, + { + "epoch": 4.829821717990275, + "grad_norm": 0.7274055955361053, + "learning_rate": 1.0626906849618903e-05, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16329367458820343, + "step": 2980, + "valid_targets_mean": 3118.8, + "valid_targets_min": 761 + }, + { + "epoch": 4.837925445705024, + "grad_norm": 0.8346294549363144, + "learning_rate": 1.0555585979274513e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559146285057068, + "step": 2985, + "valid_targets_mean": 3082.2, + "valid_targets_min": 522 + }, + { + "epoch": 4.846029173419773, + "grad_norm": 1.3240258810633323, + "learning_rate": 1.0484419344697667e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13262823224067688, + "step": 2990, + "valid_targets_mean": 1733.1, + "valid_targets_min": 564 + }, + { + "epoch": 4.854132901134522, + "grad_norm": 0.995006362928102, + "learning_rate": 1.0413408108103445e-05, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20635709166526794, + "step": 2995, + "valid_targets_mean": 3296.1, + "valid_targets_min": 562 + }, + { + "epoch": 4.862236628849271, + "grad_norm": 0.8460865973678914, + "learning_rate": 1.0342553429169163e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11528654396533966, + "step": 3000, + "valid_targets_mean": 1933.8, + "valid_targets_min": 607 + }, + { + "epoch": 4.87034035656402, + "grad_norm": 1.147006877758278, + "learning_rate": 1.0271856465015388e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18788880109786987, + "step": 3005, + "valid_targets_mean": 1968.5, + "valid_targets_min": 626 + }, + { + "epoch": 4.878444084278768, + "grad_norm": 0.8479478722559892, + "learning_rate": 1.0201318370187065e-05, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13283419609069824, + "step": 3010, + "valid_targets_mean": 2158.6, + "valid_targets_min": 496 + }, + { + "epoch": 4.886547811993517, + "grad_norm": 0.7961067473066475, + "learning_rate": 1.0130940296634683e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18867862224578857, + "step": 3015, + "valid_targets_mean": 2611.9, + "valid_targets_min": 514 + }, + { + "epoch": 4.894651539708266, + "grad_norm": 1.0151300176784335, + "learning_rate": 1.0060723393695411e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23992988467216492, + "step": 3020, + "valid_targets_mean": 2529.1, + "valid_targets_min": 525 + }, + { + "epoch": 4.902755267423014, + "grad_norm": 0.9733399168762327, + "learning_rate": 9.990668808074378e-06, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14787709712982178, + "step": 3025, + "valid_targets_mean": 1709.7, + "valid_targets_min": 781 + }, + { + "epoch": 4.9108589951377635, + "grad_norm": 0.72365986298008, + "learning_rate": 9.920777683825906e-06, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19551479816436768, + "step": 3030, + "valid_targets_mean": 2936.8, + "valid_targets_min": 371 + }, + { + "epoch": 4.918962722852513, + "grad_norm": 0.8027516059720535, + "learning_rate": 9.851051162334871e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1954280436038971, + "step": 3035, + "valid_targets_mean": 2570.2, + "valid_targets_min": 761 + }, + { + "epoch": 4.927066450567261, + "grad_norm": 0.762963457517037, + "learning_rate": 9.781490382298018e-06, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15942156314849854, + "step": 3040, + "valid_targets_mean": 3299.4, + "valid_targets_min": 943 + }, + { + "epoch": 4.93517017828201, + "grad_norm": 0.8720989205317229, + "learning_rate": 9.712096479705382e-06, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11061131954193115, + "step": 3045, + "valid_targets_mean": 2380.6, + "valid_targets_min": 733 + }, + { + "epoch": 4.943273905996758, + "grad_norm": 0.6183829564964957, + "learning_rate": 9.642870587821761e-06, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13789555430412292, + "step": 3050, + "valid_targets_mean": 3705.7, + "valid_targets_min": 722 + }, + { + "epoch": 4.951377633711507, + "grad_norm": 0.9202251472551713, + "learning_rate": 9.573813837168166e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16328653693199158, + "step": 3055, + "valid_targets_mean": 2631.2, + "valid_targets_min": 461 + }, + { + "epoch": 4.959481361426256, + "grad_norm": 0.6337283642064705, + "learning_rate": 9.504927355503399e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0957586020231247, + "step": 3060, + "valid_targets_mean": 3474.9, + "valid_targets_min": 715 + }, + { + "epoch": 4.9675850891410045, + "grad_norm": 0.9004406261162295, + "learning_rate": 9.436212267805591e-06, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12372364848852158, + "step": 3065, + "valid_targets_mean": 1903.6, + "valid_targets_min": 656 + }, + { + "epoch": 4.975688816855754, + "grad_norm": 0.5981555247573468, + "learning_rate": 9.367669696253885e-06, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10008936375379562, + "step": 3070, + "valid_targets_mean": 3947.9, + "valid_targets_min": 910 + }, + { + "epoch": 4.983792544570503, + "grad_norm": 0.7429447660340368, + "learning_rate": 9.299300760210059e-06, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2495230734348297, + "step": 3075, + "valid_targets_mean": 3267.3, + "valid_targets_min": 613 + }, + { + "epoch": 4.991896272285251, + "grad_norm": 0.8545859108818904, + "learning_rate": 9.231106576200268e-06, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1375666856765747, + "step": 3080, + "valid_targets_mean": 2512.9, + "valid_targets_min": 1033 + }, + { + "epoch": 5.0, + "grad_norm": 1.0000828363963175, + "learning_rate": 9.163088257896825e-06, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12231343239545822, + "step": 3085, + "valid_targets_mean": 1496.2, + "valid_targets_min": 692 + }, + { + "epoch": 5.008103727714749, + "grad_norm": 1.283136207413487, + "learning_rate": 9.095246916099978e-06, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16958777606487274, + "step": 3090, + "valid_targets_mean": 2647.4, + "valid_targets_min": 582 + }, + { + "epoch": 5.016207455429497, + "grad_norm": 0.943279239764171, + "learning_rate": 9.027583658719812e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12485413998365402, + "step": 3095, + "valid_targets_mean": 1937.0, + "valid_targets_min": 484 + }, + { + "epoch": 5.024311183144246, + "grad_norm": 0.8436158526907404, + "learning_rate": 8.960099590758104e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1455596685409546, + "step": 3100, + "valid_targets_mean": 2281.4, + "valid_targets_min": 820 + }, + { + "epoch": 5.0324149108589955, + "grad_norm": 0.9944315406392032, + "learning_rate": 8.892795814290342e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19890674948692322, + "step": 3105, + "valid_targets_mean": 2202.4, + "valid_targets_min": 754 + }, + { + "epoch": 5.040518638573744, + "grad_norm": 0.6387733217996346, + "learning_rate": 8.825673428447668e-06, + "loss": 0.1279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13640737533569336, + "step": 3110, + "valid_targets_mean": 4190.6, + "valid_targets_min": 698 + }, + { + "epoch": 5.048622366288493, + "grad_norm": 0.7019234673750581, + "learning_rate": 8.758733529398945e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09768907725811005, + "step": 3115, + "valid_targets_mean": 2849.2, + "valid_targets_min": 862 + }, + { + "epoch": 5.056726094003242, + "grad_norm": 1.384661286889182, + "learning_rate": 8.691977210332892e-06, + "loss": 0.1274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13747167587280273, + "step": 3120, + "valid_targets_mean": 2221.4, + "valid_targets_min": 670 + }, + { + "epoch": 5.06482982171799, + "grad_norm": 0.930073668995111, + "learning_rate": 8.625405561440172e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11428385972976685, + "step": 3125, + "valid_targets_mean": 1706.4, + "valid_targets_min": 628 + }, + { + "epoch": 5.072933549432739, + "grad_norm": 0.7331543719322425, + "learning_rate": 8.559019669895648e-06, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22601112723350525, + "step": 3130, + "valid_targets_mean": 3574.3, + "valid_targets_min": 398 + }, + { + "epoch": 5.081037277147487, + "grad_norm": 0.9113971174023665, + "learning_rate": 8.492820619840563e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.287741094827652, + "step": 3135, + "valid_targets_mean": 2638.6, + "valid_targets_min": 341 + }, + { + "epoch": 5.0891410048622365, + "grad_norm": 0.8833049318176625, + "learning_rate": 8.426809492364907e-06, + "loss": 0.1215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11894373595714569, + "step": 3140, + "valid_targets_mean": 2332.4, + "valid_targets_min": 822 + }, + { + "epoch": 5.097244732576986, + "grad_norm": 0.5946354001150366, + "learning_rate": 8.360987365489698e-06, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1098465770483017, + "step": 3145, + "valid_targets_mean": 3904.6, + "valid_targets_min": 1072 + }, + { + "epoch": 5.105348460291734, + "grad_norm": 0.8044122587163143, + "learning_rate": 8.295355314149413e-06, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13747110962867737, + "step": 3150, + "valid_targets_mean": 2310.0, + "valid_targets_min": 337 + }, + { + "epoch": 5.113452188006483, + "grad_norm": 0.5980128769879898, + "learning_rate": 8.229914410174435e-06, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08813309669494629, + "step": 3155, + "valid_targets_mean": 3792.1, + "valid_targets_min": 993 + }, + { + "epoch": 5.121555915721232, + "grad_norm": 0.622312258802971, + "learning_rate": 8.16466572227352e-06, + "loss": 0.1068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0873100534081459, + "step": 3160, + "valid_targets_mean": 3185.2, + "valid_targets_min": 554 + }, + { + "epoch": 5.12965964343598, + "grad_norm": 0.8954975866967348, + "learning_rate": 8.099610316016373e-06, + "loss": 0.1456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13367177546024323, + "step": 3165, + "valid_targets_mean": 2045.3, + "valid_targets_min": 501 + }, + { + "epoch": 5.137763371150729, + "grad_norm": 0.8258936353394385, + "learning_rate": 8.03474925381625e-06, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16128379106521606, + "step": 3170, + "valid_targets_mean": 3819.5, + "valid_targets_min": 883 + }, + { + "epoch": 5.145867098865478, + "grad_norm": 0.9892899982440995, + "learning_rate": 7.97008359491257e-06, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1371055245399475, + "step": 3175, + "valid_targets_mean": 1996.4, + "valid_targets_min": 902 + }, + { + "epoch": 5.153970826580227, + "grad_norm": 0.9178033916752635, + "learning_rate": 7.905614395353649e-06, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13662882149219513, + "step": 3180, + "valid_targets_mean": 2094.7, + "valid_targets_min": 269 + }, + { + "epoch": 5.162074554294976, + "grad_norm": 0.9445719514300331, + "learning_rate": 7.841342707979442e-06, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30393123626708984, + "step": 3185, + "valid_targets_mean": 3291.1, + "valid_targets_min": 862 + }, + { + "epoch": 5.170178282009725, + "grad_norm": 0.7306623592816158, + "learning_rate": 7.77726958240437e-06, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.100005142390728, + "step": 3190, + "valid_targets_mean": 2373.6, + "valid_targets_min": 1047 + }, + { + "epoch": 5.178282009724473, + "grad_norm": 0.5681715595432818, + "learning_rate": 7.713396065000133e-06, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16320055723190308, + "step": 3195, + "valid_targets_mean": 5024.4, + "valid_targets_min": 936 + }, + { + "epoch": 5.186385737439222, + "grad_norm": 0.9141392131056238, + "learning_rate": 7.649723198878676e-06, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17028486728668213, + "step": 3200, + "valid_targets_mean": 2462.1, + "valid_targets_min": 855 + }, + { + "epoch": 5.194489465153971, + "grad_norm": 1.0211463763575084, + "learning_rate": 7.586252023875125e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13707610964775085, + "step": 3205, + "valid_targets_mean": 1633.6, + "valid_targets_min": 501 + }, + { + "epoch": 5.2025931928687195, + "grad_norm": 0.9857389473887078, + "learning_rate": 7.522983576530791e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1323859989643097, + "step": 3210, + "valid_targets_mean": 1957.2, + "valid_targets_min": 550 + }, + { + "epoch": 5.210696920583469, + "grad_norm": 1.0143771026388138, + "learning_rate": 7.459918890076272e-06, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16915765404701233, + "step": 3215, + "valid_targets_mean": 3357.4, + "valid_targets_min": 689 + }, + { + "epoch": 5.218800648298217, + "grad_norm": 0.8291361309836706, + "learning_rate": 7.397058994414563e-06, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18082496523857117, + "step": 3220, + "valid_targets_mean": 2756.2, + "valid_targets_min": 1082 + }, + { + "epoch": 5.226904376012966, + "grad_norm": 0.6737718264710545, + "learning_rate": 7.3344049161042495e-06, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14597007632255554, + "step": 3225, + "valid_targets_mean": 4198.1, + "valid_targets_min": 1181 + }, + { + "epoch": 5.235008103727715, + "grad_norm": 0.6961828666596263, + "learning_rate": 7.271957678342738e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07819020748138428, + "step": 3230, + "valid_targets_mean": 2952.6, + "valid_targets_min": 601 + }, + { + "epoch": 5.243111831442463, + "grad_norm": 1.2860828286107657, + "learning_rate": 7.209718300949519e-06, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11555998027324677, + "step": 3235, + "valid_targets_mean": 2056.3, + "valid_targets_min": 974 + }, + { + "epoch": 5.251215559157212, + "grad_norm": 0.6896094917138008, + "learning_rate": 7.14768780034957e-06, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18422646820545197, + "step": 3240, + "valid_targets_mean": 3863.6, + "valid_targets_min": 954 + }, + { + "epoch": 5.259319286871961, + "grad_norm": 0.8253153069121804, + "learning_rate": 7.085867189556697e-06, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20815926790237427, + "step": 3245, + "valid_targets_mean": 3034.3, + "valid_targets_min": 786 + }, + { + "epoch": 5.26742301458671, + "grad_norm": 0.7086894492399584, + "learning_rate": 7.024257478157015e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10221417248249054, + "step": 3250, + "valid_targets_mean": 3288.2, + "valid_targets_min": 1042 + }, + { + "epoch": 5.275526742301459, + "grad_norm": 0.5798277120600573, + "learning_rate": 6.96285967229249e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14061783254146576, + "step": 3255, + "valid_targets_mean": 5321.4, + "valid_targets_min": 632 + }, + { + "epoch": 5.283630470016208, + "grad_norm": 0.6297473245719093, + "learning_rate": 6.901674774644449e-06, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09192270785570145, + "step": 3260, + "valid_targets_mean": 3162.9, + "valid_targets_min": 722 + }, + { + "epoch": 5.291734197730956, + "grad_norm": 0.655326604774191, + "learning_rate": 6.840703784417262e-06, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1667039543390274, + "step": 3265, + "valid_targets_mean": 3882.1, + "valid_targets_min": 910 + }, + { + "epoch": 5.299837925445705, + "grad_norm": 0.9464717792115727, + "learning_rate": 6.779947697321974e-06, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11113858968019485, + "step": 3270, + "valid_targets_mean": 2199.8, + "valid_targets_min": 342 + }, + { + "epoch": 5.307941653160454, + "grad_norm": 0.8508764069285351, + "learning_rate": 6.719407505560094e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14729882776737213, + "step": 3275, + "valid_targets_mean": 2665.4, + "valid_targets_min": 1089 + }, + { + "epoch": 5.316045380875202, + "grad_norm": 1.03542912623809, + "learning_rate": 6.659084197807348e-06, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4161680340766907, + "step": 3280, + "valid_targets_mean": 4139.7, + "valid_targets_min": 656 + }, + { + "epoch": 5.3241491085899515, + "grad_norm": 0.8555285929048103, + "learning_rate": 6.598978759197554e-06, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19164308905601501, + "step": 3285, + "valid_targets_mean": 2472.9, + "valid_targets_min": 488 + }, + { + "epoch": 5.332252836304701, + "grad_norm": 0.7884153355344533, + "learning_rate": 6.539092171306541e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13263387978076935, + "step": 3290, + "valid_targets_mean": 2924.2, + "valid_targets_min": 873 + }, + { + "epoch": 5.340356564019449, + "grad_norm": 0.7139375465777716, + "learning_rate": 6.479425412136093e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08301765471696854, + "step": 3295, + "valid_targets_mean": 3015.2, + "valid_targets_min": 456 + }, + { + "epoch": 5.348460291734198, + "grad_norm": 0.8136742499601274, + "learning_rate": 6.419979456098016e-06, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0914689153432846, + "step": 3300, + "valid_targets_mean": 2395.7, + "valid_targets_min": 471 + }, + { + "epoch": 5.356564019448946, + "grad_norm": 0.8426989185718268, + "learning_rate": 6.360755273998174e-06, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18558555841445923, + "step": 3305, + "valid_targets_mean": 3175.8, + "valid_targets_min": 426 + }, + { + "epoch": 5.364667747163695, + "grad_norm": 0.7043825571960722, + "learning_rate": 6.301753833020691e-06, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.185553640127182, + "step": 3310, + "valid_targets_mean": 3304.7, + "valid_targets_min": 863 + }, + { + "epoch": 5.372771474878444, + "grad_norm": 0.9549981037856647, + "learning_rate": 6.242976096712112e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22695674002170563, + "step": 3315, + "valid_targets_mean": 2425.8, + "valid_targets_min": 334 + }, + { + "epoch": 5.3808752025931925, + "grad_norm": 0.8195898613602235, + "learning_rate": 6.18442302496568e-06, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13929906487464905, + "step": 3320, + "valid_targets_mean": 2468.8, + "valid_targets_min": 524 + }, + { + "epoch": 5.388978930307942, + "grad_norm": 1.029682002666224, + "learning_rate": 6.1260955740056835e-06, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1550445258617401, + "step": 3325, + "valid_targets_mean": 2025.9, + "valid_targets_min": 1103 + }, + { + "epoch": 5.397082658022691, + "grad_norm": 1.1169106309979446, + "learning_rate": 6.067994696371797e-06, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15255971252918243, + "step": 3330, + "valid_targets_mean": 2080.6, + "valid_targets_min": 1215 + }, + { + "epoch": 5.405186385737439, + "grad_norm": 0.8850686193118384, + "learning_rate": 6.010121340903574e-06, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1398320496082306, + "step": 3335, + "valid_targets_mean": 2204.1, + "valid_targets_min": 882 + }, + { + "epoch": 5.413290113452188, + "grad_norm": 0.6189940261103392, + "learning_rate": 5.952476452724898e-06, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10511147975921631, + "step": 3340, + "valid_targets_mean": 3397.1, + "valid_targets_min": 536 + }, + { + "epoch": 5.421393841166937, + "grad_norm": 0.7869848129875665, + "learning_rate": 5.895060973228606e-06, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17136400938034058, + "step": 3345, + "valid_targets_mean": 3590.9, + "valid_targets_min": 216 + }, + { + "epoch": 5.429497568881685, + "grad_norm": 0.9786557729553311, + "learning_rate": 5.837875840061064e-06, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12982967495918274, + "step": 3350, + "valid_targets_mean": 1677.5, + "valid_targets_min": 514 + }, + { + "epoch": 5.437601296596434, + "grad_norm": 0.8317312217862712, + "learning_rate": 5.780921987106878e-06, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12355664372444153, + "step": 3355, + "valid_targets_mean": 2306.0, + "valid_targets_min": 950 + }, + { + "epoch": 5.4457050243111835, + "grad_norm": 0.7338499031126493, + "learning_rate": 5.724200344473651e-06, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19103437662124634, + "step": 3360, + "valid_targets_mean": 3675.8, + "valid_targets_min": 949 + }, + { + "epoch": 5.453808752025932, + "grad_norm": 0.6656448002153311, + "learning_rate": 5.66771183847677e-06, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09611663967370987, + "step": 3365, + "valid_targets_mean": 2633.0, + "valid_targets_min": 612 + }, + { + "epoch": 5.461912479740681, + "grad_norm": 1.0874356250956578, + "learning_rate": 5.611457391624309e-06, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15315888822078705, + "step": 3370, + "valid_targets_mean": 3031.9, + "valid_targets_min": 472 + }, + { + "epoch": 5.47001620745543, + "grad_norm": 1.0445039076091298, + "learning_rate": 5.555437922601918e-06, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16164973378181458, + "step": 3375, + "valid_targets_mean": 1917.6, + "valid_targets_min": 514 + }, + { + "epoch": 5.478119935170178, + "grad_norm": 0.8104164724449195, + "learning_rate": 5.499654346257879e-06, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12006053328514099, + "step": 3380, + "valid_targets_mean": 2426.0, + "valid_targets_min": 901 + }, + { + "epoch": 5.486223662884927, + "grad_norm": 0.7493612429318233, + "learning_rate": 5.444107573588116e-06, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1710258424282074, + "step": 3385, + "valid_targets_mean": 3943.8, + "valid_targets_min": 594 + }, + { + "epoch": 5.4943273905996755, + "grad_norm": 0.893693814456575, + "learning_rate": 5.388798511721329e-06, + "loss": 0.1456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19235970079898834, + "step": 3390, + "valid_targets_mean": 2304.8, + "valid_targets_min": 487 + }, + { + "epoch": 5.5024311183144246, + "grad_norm": 0.9281024443148513, + "learning_rate": 5.333728063904213e-06, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13582965731620789, + "step": 3395, + "valid_targets_mean": 3602.9, + "valid_targets_min": 754 + }, + { + "epoch": 5.510534846029174, + "grad_norm": 0.7276815688419767, + "learning_rate": 5.278897129486656e-06, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13215868175029755, + "step": 3400, + "valid_targets_mean": 3562.9, + "valid_targets_min": 945 + }, + { + "epoch": 5.518638573743922, + "grad_norm": 0.8662128457006715, + "learning_rate": 5.224306603907095e-06, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12716642022132874, + "step": 3405, + "valid_targets_mean": 2820.1, + "valid_targets_min": 929 + }, + { + "epoch": 5.526742301458671, + "grad_norm": 0.7807235589640765, + "learning_rate": 5.169957378677859e-06, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1254601776599884, + "step": 3410, + "valid_targets_mean": 2691.8, + "valid_targets_min": 978 + }, + { + "epoch": 5.53484602917342, + "grad_norm": 0.7560906227588559, + "learning_rate": 5.11585034137064e-06, + "loss": 0.1318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23728054761886597, + "step": 3415, + "valid_targets_mean": 3586.0, + "valid_targets_min": 656 + }, + { + "epoch": 5.542949756888168, + "grad_norm": 1.102731743141789, + "learning_rate": 5.061986375601977e-06, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13842478394508362, + "step": 3420, + "valid_targets_mean": 1943.7, + "valid_targets_min": 645 + }, + { + "epoch": 5.551053484602917, + "grad_norm": 0.7825513713620168, + "learning_rate": 5.0083663610188215e-06, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11432783305644989, + "step": 3425, + "valid_targets_mean": 2848.9, + "valid_targets_min": 740 + }, + { + "epoch": 5.5591572123176665, + "grad_norm": 0.9134357132489729, + "learning_rate": 4.954991173284207e-06, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16197806596755981, + "step": 3430, + "valid_targets_mean": 2609.7, + "valid_targets_min": 677 + }, + { + "epoch": 5.567260940032415, + "grad_norm": 0.9586959035897938, + "learning_rate": 4.901861684062899e-06, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11932115256786346, + "step": 3435, + "valid_targets_mean": 2300.0, + "valid_targets_min": 652 + }, + { + "epoch": 5.575364667747164, + "grad_norm": 0.9077572994362951, + "learning_rate": 4.848978761007206e-06, + "loss": 0.1211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15258963406085968, + "step": 3440, + "valid_targets_mean": 2265.6, + "valid_targets_min": 932 + }, + { + "epoch": 5.583468395461912, + "grad_norm": 1.1023316942136527, + "learning_rate": 4.796343267742782e-06, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14780212938785553, + "step": 3445, + "valid_targets_mean": 1948.9, + "valid_targets_min": 841 + }, + { + "epoch": 5.591572123176661, + "grad_norm": 0.7251051495316837, + "learning_rate": 4.743956063854529e-06, + "loss": 0.1311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09377004206180573, + "step": 3450, + "valid_targets_mean": 2942.9, + "valid_targets_min": 811 + }, + { + "epoch": 5.59967585089141, + "grad_norm": 0.8488916792785556, + "learning_rate": 4.691818004872557e-06, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12640199065208435, + "step": 3455, + "valid_targets_mean": 2410.6, + "valid_targets_min": 972 + }, + { + "epoch": 5.607779578606159, + "grad_norm": 0.8737493446227516, + "learning_rate": 4.639929942258217e-06, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11130397021770477, + "step": 3460, + "valid_targets_mean": 1994.3, + "valid_targets_min": 742 + }, + { + "epoch": 5.6158833063209075, + "grad_norm": 0.8723045293450462, + "learning_rate": 4.588292723390204e-06, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18794122338294983, + "step": 3465, + "valid_targets_mean": 2340.5, + "valid_targets_min": 626 + }, + { + "epoch": 5.623987034035657, + "grad_norm": 0.9131558045501164, + "learning_rate": 4.536907191550694e-06, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18654656410217285, + "step": 3470, + "valid_targets_mean": 3264.6, + "valid_targets_min": 940 + }, + { + "epoch": 5.632090761750405, + "grad_norm": 0.7832447854160407, + "learning_rate": 4.4857741859116024e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10856875032186508, + "step": 3475, + "valid_targets_mean": 4026.0, + "valid_targets_min": 828 + }, + { + "epoch": 5.640194489465154, + "grad_norm": 0.9603685340587842, + "learning_rate": 4.434894541520862e-06, + "loss": 0.1285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12167471647262573, + "step": 3480, + "valid_targets_mean": 2858.6, + "valid_targets_min": 681 + }, + { + "epoch": 5.648298217179903, + "grad_norm": 0.8143816562047289, + "learning_rate": 4.3842690892887795e-06, + "loss": 0.1283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11910448223352432, + "step": 3485, + "valid_targets_mean": 2518.2, + "valid_targets_min": 883 + }, + { + "epoch": 5.656401944894651, + "grad_norm": 1.0269959031972293, + "learning_rate": 4.333898655974484e-06, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13059552013874054, + "step": 3490, + "valid_targets_mean": 2272.3, + "valid_targets_min": 565 + }, + { + "epoch": 5.6645056726094, + "grad_norm": 0.8493579524196188, + "learning_rate": 4.283784064172405e-06, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23569592833518982, + "step": 3495, + "valid_targets_mean": 2847.4, + "valid_targets_min": 549 + }, + { + "epoch": 5.672609400324149, + "grad_norm": 1.0045947500475891, + "learning_rate": 4.233926132298867e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12646427750587463, + "step": 3500, + "valid_targets_mean": 1906.7, + "valid_targets_min": 724 + }, + { + "epoch": 5.680713128038898, + "grad_norm": 1.4216952442247126, + "learning_rate": 4.1843256745787e-06, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18572121858596802, + "step": 3505, + "valid_targets_mean": 1459.9, + "valid_targets_min": 544 + }, + { + "epoch": 5.688816855753647, + "grad_norm": 0.9002937899719397, + "learning_rate": 4.134983501031942e-06, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11701390147209167, + "step": 3510, + "valid_targets_mean": 2314.2, + "valid_targets_min": 776 + }, + { + "epoch": 5.696920583468396, + "grad_norm": 0.8596504160989296, + "learning_rate": 4.085900417460633e-06, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15798306465148926, + "step": 3515, + "valid_targets_mean": 2459.4, + "valid_targets_min": 1225 + }, + { + "epoch": 5.705024311183144, + "grad_norm": 1.0572710496515947, + "learning_rate": 4.037077225435628e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14602217078208923, + "step": 3520, + "valid_targets_mean": 1989.2, + "valid_targets_min": 645 + }, + { + "epoch": 5.713128038897893, + "grad_norm": 0.5224530736412875, + "learning_rate": 3.988514722283523e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0725245401263237, + "step": 3525, + "valid_targets_mean": 3406.3, + "valid_targets_min": 553 + }, + { + "epoch": 5.721231766612641, + "grad_norm": 0.9644662300746688, + "learning_rate": 3.940213701073636e-06, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10431959480047226, + "step": 3530, + "valid_targets_mean": 1862.7, + "valid_targets_min": 1094 + }, + { + "epoch": 5.72933549432739, + "grad_norm": 0.6423940525393956, + "learning_rate": 3.892174950605039e-06, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0762447863817215, + "step": 3535, + "valid_targets_mean": 2733.1, + "valid_targets_min": 737 + }, + { + "epoch": 5.7374392220421395, + "grad_norm": 1.0504974617543583, + "learning_rate": 3.844399255393705e-06, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12259531021118164, + "step": 3540, + "valid_targets_mean": 2066.3, + "valid_targets_min": 509 + }, + { + "epoch": 5.745542949756889, + "grad_norm": 1.0177720813084683, + "learning_rate": 3.7968873956596563e-06, + "loss": 0.1244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18689052760601044, + "step": 3545, + "valid_targets_mean": 2081.0, + "valid_targets_min": 812 + }, + { + "epoch": 5.753646677471637, + "grad_norm": 1.081526403816516, + "learning_rate": 3.749640147314264e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15227672457695007, + "step": 3550, + "valid_targets_mean": 1617.8, + "valid_targets_min": 450 + }, + { + "epoch": 5.761750405186386, + "grad_norm": 1.281386145306865, + "learning_rate": 3.7026582819475443e-06, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1037423312664032, + "step": 3555, + "valid_targets_mean": 1829.5, + "valid_targets_min": 694 + }, + { + "epoch": 5.769854132901134, + "grad_norm": 1.166110176855929, + "learning_rate": 3.6559425668155733e-06, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11849828064441681, + "step": 3560, + "valid_targets_mean": 2195.3, + "valid_targets_min": 759 + }, + { + "epoch": 5.777957860615883, + "grad_norm": 0.5899092092985859, + "learning_rate": 3.6094937648279647e-06, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10452926158905029, + "step": 3565, + "valid_targets_mean": 3788.1, + "valid_targets_min": 778 + }, + { + "epoch": 5.786061588330632, + "grad_norm": 0.6076238022877796, + "learning_rate": 3.563312634535383e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09219780564308167, + "step": 3570, + "valid_targets_mean": 4160.1, + "valid_targets_min": 893 + }, + { + "epoch": 5.7941653160453805, + "grad_norm": 0.9424647522354361, + "learning_rate": 3.517399930117196e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13615809381008148, + "step": 3575, + "valid_targets_mean": 2198.9, + "valid_targets_min": 866 + }, + { + "epoch": 5.80226904376013, + "grad_norm": 0.5696560681892507, + "learning_rate": 3.4717564013691087e-06, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11359340697526932, + "step": 3580, + "valid_targets_mean": 5013.9, + "valid_targets_min": 968 + }, + { + "epoch": 5.810372771474879, + "grad_norm": 0.8141064051451715, + "learning_rate": 3.4263827936909744e-06, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12475259602069855, + "step": 3585, + "valid_targets_mean": 2504.2, + "valid_targets_min": 806 + }, + { + "epoch": 5.818476499189627, + "grad_norm": 0.5674431699647517, + "learning_rate": 3.38127984807457e-06, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09661044925451279, + "step": 3590, + "valid_targets_mean": 4857.5, + "valid_targets_min": 834 + }, + { + "epoch": 5.826580226904376, + "grad_norm": 0.7103962514018813, + "learning_rate": 3.3364483010915237e-06, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12006492912769318, + "step": 3595, + "valid_targets_mean": 3394.4, + "valid_targets_min": 644 + }, + { + "epoch": 5.834683954619125, + "grad_norm": 0.9765118903364278, + "learning_rate": 3.2918888848812913e-06, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18907558917999268, + "step": 3600, + "valid_targets_mean": 2537.3, + "valid_targets_min": 232 + }, + { + "epoch": 5.842787682333873, + "grad_norm": 0.6452405236633486, + "learning_rate": 3.2476023271391698e-06, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10429327189922333, + "step": 3605, + "valid_targets_mean": 3199.5, + "valid_targets_min": 912 + }, + { + "epoch": 5.8508914100486225, + "grad_norm": 0.9704787354907897, + "learning_rate": 3.2035893511044524e-06, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12537583708763123, + "step": 3610, + "valid_targets_mean": 2012.1, + "valid_targets_min": 505 + }, + { + "epoch": 5.858995137763371, + "grad_norm": 1.0125575644527998, + "learning_rate": 3.159850675548577e-06, + "loss": 0.1254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13451366126537323, + "step": 3615, + "valid_targets_mean": 2137.6, + "valid_targets_min": 844 + }, + { + "epoch": 5.86709886547812, + "grad_norm": 0.5958492150298547, + "learning_rate": 3.116387014763429e-06, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18298457562923431, + "step": 3620, + "valid_targets_mean": 4842.6, + "valid_targets_min": 1062 + }, + { + "epoch": 5.875202593192869, + "grad_norm": 0.9194613793350569, + "learning_rate": 3.073199078549638e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14356893301010132, + "step": 3625, + "valid_targets_mean": 2403.4, + "valid_targets_min": 769 + }, + { + "epoch": 5.883306320907618, + "grad_norm": 0.7587379022022571, + "learning_rate": 3.0302875722050064e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19144773483276367, + "step": 3630, + "valid_targets_mean": 3720.6, + "valid_targets_min": 742 + }, + { + "epoch": 5.891410048622366, + "grad_norm": 0.757032598883858, + "learning_rate": 2.987653196513003e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10905779898166656, + "step": 3635, + "valid_targets_mean": 2983.9, + "valid_targets_min": 404 + }, + { + "epoch": 5.899513776337115, + "grad_norm": 0.84410146266729, + "learning_rate": 2.9452966477312815e-06, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12055391073226929, + "step": 3640, + "valid_targets_mean": 2604.8, + "valid_targets_min": 953 + }, + { + "epoch": 5.9076175040518635, + "grad_norm": 0.6912606615140152, + "learning_rate": 2.9032186175803545e-06, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.231064572930336, + "step": 3645, + "valid_targets_mean": 4160.1, + "valid_targets_min": 589 + }, + { + "epoch": 5.915721231766613, + "grad_norm": 0.9150252191276331, + "learning_rate": 2.8614197932322585e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14955416321754456, + "step": 3650, + "valid_targets_mean": 2538.5, + "valid_targets_min": 1130 + }, + { + "epoch": 5.923824959481362, + "grad_norm": 1.0537159718577105, + "learning_rate": 2.819900857299358e-06, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12648676335811615, + "step": 3655, + "valid_targets_mean": 1749.5, + "valid_targets_min": 551 + }, + { + "epoch": 5.93192868719611, + "grad_norm": 0.9562074384683694, + "learning_rate": 2.778662487823187e-06, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11901277303695679, + "step": 3660, + "valid_targets_mean": 3005.4, + "valid_targets_min": 885 + }, + { + "epoch": 5.940032414910859, + "grad_norm": 0.9786475511741156, + "learning_rate": 2.7377053582633652e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406671404838562, + "step": 3665, + "valid_targets_mean": 2011.9, + "valid_targets_min": 746 + }, + { + "epoch": 5.948136142625608, + "grad_norm": 0.780480994666007, + "learning_rate": 2.6970301374866337e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15554064512252808, + "step": 3670, + "valid_targets_mean": 2561.8, + "valid_targets_min": 697 + }, + { + "epoch": 5.956239870340356, + "grad_norm": 0.6847569982772912, + "learning_rate": 2.656637489755889e-06, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08473924547433853, + "step": 3675, + "valid_targets_mean": 2566.4, + "valid_targets_min": 617 + }, + { + "epoch": 5.964343598055105, + "grad_norm": 1.1663803129546406, + "learning_rate": 2.616528074719371e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16240835189819336, + "step": 3680, + "valid_targets_mean": 1863.8, + "valid_targets_min": 735 + }, + { + "epoch": 5.9724473257698545, + "grad_norm": 1.0505470362211855, + "learning_rate": 2.576702547399863e-06, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11963564902544022, + "step": 3685, + "valid_targets_mean": 2972.6, + "valid_targets_min": 924 + }, + { + "epoch": 5.980551053484603, + "grad_norm": 0.6153132292802707, + "learning_rate": 2.53716155818402e-06, + "loss": 0.1199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10222367942333221, + "step": 3690, + "valid_targets_mean": 3769.6, + "valid_targets_min": 496 + }, + { + "epoch": 5.988654781199352, + "grad_norm": 0.7706022269958611, + "learning_rate": 2.49790575281172e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10004860907793045, + "step": 3695, + "valid_targets_mean": 4118.3, + "valid_targets_min": 461 + }, + { + "epoch": 5.9967585089141, + "grad_norm": 1.1497778298396315, + "learning_rate": 2.4589357723655405e-06, + "loss": 0.1159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12909851968288422, + "step": 3700, + "valid_targets_mean": 1985.4, + "valid_targets_min": 912 + }, + { + "epoch": 6.004862236628849, + "grad_norm": 0.8065263620717076, + "learning_rate": 2.4202522532602846e-06, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12583133578300476, + "step": 3705, + "valid_targets_mean": 2561.8, + "valid_targets_min": 706 + }, + { + "epoch": 6.012965964343598, + "grad_norm": 0.47440335088604885, + "learning_rate": 2.381855827232571e-06, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08017963171005249, + "step": 3710, + "valid_targets_mean": 4353.3, + "valid_targets_min": 232 + }, + { + "epoch": 6.021069692058346, + "grad_norm": 0.8208816345120449, + "learning_rate": 2.343747121330544e-06, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12535922229290009, + "step": 3715, + "valid_targets_mean": 2856.1, + "valid_targets_min": 941 + }, + { + "epoch": 6.0291734197730955, + "grad_norm": 0.7429830314090826, + "learning_rate": 2.3059267579036183e-06, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10140064358711243, + "step": 3720, + "valid_targets_mean": 3342.2, + "valid_targets_min": 1157 + }, + { + "epoch": 6.037277147487845, + "grad_norm": 0.5938762525392249, + "learning_rate": 2.268395354592312e-06, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11517813801765442, + "step": 3725, + "valid_targets_mean": 5581.0, + "valid_targets_min": 232 + }, + { + "epoch": 6.045380875202593, + "grad_norm": 0.8583542985755236, + "learning_rate": 2.2311535243181637e-06, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2661886513233185, + "step": 3730, + "valid_targets_mean": 3863.1, + "valid_targets_min": 443 + }, + { + "epoch": 6.053484602917342, + "grad_norm": 0.8438682223939366, + "learning_rate": 2.1942018752737227e-06, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08933848142623901, + "step": 3735, + "valid_targets_mean": 2379.1, + "valid_targets_min": 929 + }, + { + "epoch": 6.061588330632091, + "grad_norm": 0.4750345988672662, + "learning_rate": 2.1575410109126293e-06, + "loss": 0.1062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07867272198200226, + "step": 3740, + "valid_targets_mean": 5703.2, + "valid_targets_min": 767 + }, + { + "epoch": 6.069692058346839, + "grad_norm": 0.7820914107075465, + "learning_rate": 2.121171529939734e-06, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15075111389160156, + "step": 3745, + "valid_targets_mean": 3570.4, + "valid_targets_min": 763 + }, + { + "epoch": 6.077795786061588, + "grad_norm": 0.7594471532290356, + "learning_rate": 2.085094026301349e-06, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20603245496749878, + "step": 3750, + "valid_targets_mean": 3489.7, + "valid_targets_min": 968 + }, + { + "epoch": 6.085899513776337, + "grad_norm": 0.6047064426773207, + "learning_rate": 2.0493090891755262e-06, + "loss": 0.1361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07911926507949829, + "step": 3755, + "valid_targets_mean": 3143.4, + "valid_targets_min": 973 + }, + { + "epoch": 6.094003241491086, + "grad_norm": 0.5079708101672858, + "learning_rate": 2.013817302962444e-06, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17122353613376617, + "step": 3760, + "valid_targets_mean": 6064.8, + "valid_targets_min": 1526 + }, + { + "epoch": 6.102106969205835, + "grad_norm": 0.8625230922660629, + "learning_rate": 1.9786192472748643e-06, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10119368880987167, + "step": 3765, + "valid_targets_mean": 2206.5, + "valid_targets_min": 924 + }, + { + "epoch": 6.110210696920584, + "grad_norm": 1.0935738204994028, + "learning_rate": 1.9437154969286577e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12805625796318054, + "step": 3770, + "valid_targets_mean": 1756.6, + "valid_targets_min": 534 + }, + { + "epoch": 6.118314424635332, + "grad_norm": 0.7094021347408048, + "learning_rate": 1.9091066219334365e-06, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09319410473108292, + "step": 3775, + "valid_targets_mean": 3571.6, + "valid_targets_min": 600 + }, + { + "epoch": 6.126418152350081, + "grad_norm": 0.8810616245799291, + "learning_rate": 1.8747931874832325e-06, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06927265226840973, + "step": 3780, + "valid_targets_mean": 3894.1, + "valid_targets_min": 954 + }, + { + "epoch": 6.13452188006483, + "grad_norm": 0.7966465543664667, + "learning_rate": 1.8407757539472548e-06, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1974271982908249, + "step": 3785, + "valid_targets_mean": 3178.2, + "valid_targets_min": 450 + }, + { + "epoch": 6.1426256077795784, + "grad_norm": 0.5378695627769883, + "learning_rate": 1.8070548768607744e-06, + "loss": 0.1343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10642120242118835, + "step": 3790, + "valid_targets_mean": 4619.8, + "valid_targets_min": 850 + }, + { + "epoch": 6.1507293354943275, + "grad_norm": 1.0147575200870909, + "learning_rate": 1.773631106915996e-06, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09471525251865387, + "step": 3795, + "valid_targets_mean": 2203.6, + "valid_targets_min": 1137 + }, + { + "epoch": 6.158833063209076, + "grad_norm": 0.7484970018269975, + "learning_rate": 1.740504989953129e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11913225054740906, + "step": 3800, + "valid_targets_mean": 2594.4, + "valid_targets_min": 907 + }, + { + "epoch": 6.166936790923825, + "grad_norm": 0.8241935721612473, + "learning_rate": 1.707677066951432e-06, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21923650801181793, + "step": 3805, + "valid_targets_mean": 3371.1, + "valid_targets_min": 644 + }, + { + "epoch": 6.175040518638574, + "grad_norm": 0.7791725998755106, + "learning_rate": 1.6751478740203776e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14890174567699432, + "step": 3810, + "valid_targets_mean": 3476.3, + "valid_targets_min": 800 + }, + { + "epoch": 6.183144246353322, + "grad_norm": 0.9316060672966924, + "learning_rate": 1.6429179423909248e-06, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09502556920051575, + "step": 3815, + "valid_targets_mean": 1851.6, + "valid_targets_min": 319 + }, + { + "epoch": 6.191247974068071, + "grad_norm": 0.6681121617992075, + "learning_rate": 1.6109877984068089e-06, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10410622507333755, + "step": 3820, + "valid_targets_mean": 3242.0, + "valid_targets_min": 645 + }, + { + "epoch": 6.19935170178282, + "grad_norm": 0.7966050962281183, + "learning_rate": 1.5793579635159883e-06, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12778472900390625, + "step": 3825, + "valid_targets_mean": 2881.1, + "valid_targets_min": 1181 + }, + { + "epoch": 6.207455429497569, + "grad_norm": 1.0811104704970367, + "learning_rate": 1.5480289542620686e-06, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14305716753005981, + "step": 3830, + "valid_targets_mean": 1729.1, + "valid_targets_min": 505 + }, + { + "epoch": 6.215559157212318, + "grad_norm": 0.8218570136420974, + "learning_rate": 1.517001282275936e-06, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19680121541023254, + "step": 3835, + "valid_targets_mean": 3000.9, + "valid_targets_min": 776 + }, + { + "epoch": 6.223662884927067, + "grad_norm": 1.3444875208456322, + "learning_rate": 1.486275454267354e-06, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1285068839788437, + "step": 3840, + "valid_targets_mean": 2764.1, + "valid_targets_min": 604 + }, + { + "epoch": 6.231766612641815, + "grad_norm": 0.8944598377852021, + "learning_rate": 1.4558519720166975e-06, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16025900840759277, + "step": 3845, + "valid_targets_mean": 2910.8, + "valid_targets_min": 906 + }, + { + "epoch": 6.239870340356564, + "grad_norm": 0.651296867676762, + "learning_rate": 1.4257313323667798e-06, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08879795670509338, + "step": 3850, + "valid_targets_mean": 3112.5, + "valid_targets_min": 400 + }, + { + "epoch": 6.247974068071313, + "grad_norm": 0.5999610443428787, + "learning_rate": 1.3959140272146998e-06, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11679062247276306, + "step": 3855, + "valid_targets_mean": 3881.8, + "valid_targets_min": 853 + }, + { + "epoch": 6.256077795786061, + "grad_norm": 0.9636049482804864, + "learning_rate": 1.366400543503854e-06, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18295209109783173, + "step": 3860, + "valid_targets_mean": 2168.9, + "valid_targets_min": 607 + }, + { + "epoch": 6.2641815235008105, + "grad_norm": 0.78397490864793, + "learning_rate": 1.3371913632159506e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.089485764503479, + "step": 3865, + "valid_targets_mean": 2398.6, + "valid_targets_min": 871 + }, + { + "epoch": 6.272285251215559, + "grad_norm": 0.7820564283288564, + "learning_rate": 1.3082869633631413e-06, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1150301992893219, + "step": 3870, + "valid_targets_mean": 2335.1, + "valid_targets_min": 927 + }, + { + "epoch": 6.280388978930308, + "grad_norm": 1.027079175401604, + "learning_rate": 1.2796878159802595e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15304911136627197, + "step": 3875, + "valid_targets_mean": 2105.1, + "valid_targets_min": 954 + }, + { + "epoch": 6.288492706645057, + "grad_norm": 0.6199836306967389, + "learning_rate": 1.2513943881170754e-06, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13365209102630615, + "step": 3880, + "valid_targets_mean": 4391.9, + "valid_targets_min": 1005 + }, + { + "epoch": 6.296596434359805, + "grad_norm": 0.7727236218240091, + "learning_rate": 1.2234071418306903e-06, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10507519543170929, + "step": 3885, + "valid_targets_mean": 2839.7, + "valid_targets_min": 671 + }, + { + "epoch": 6.304700162074554, + "grad_norm": 0.8259960625219598, + "learning_rate": 1.1957265341779855e-06, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06842942535877228, + "step": 3890, + "valid_targets_mean": 2111.8, + "valid_targets_min": 509 + }, + { + "epoch": 6.312803889789303, + "grad_norm": 1.3006108319686243, + "learning_rate": 1.1683530172081592e-06, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16923511028289795, + "step": 3895, + "valid_targets_mean": 2721.0, + "valid_targets_min": 318 + }, + { + "epoch": 6.3209076175040515, + "grad_norm": 0.9212042910130562, + "learning_rate": 1.1412870379553387e-06, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11085739731788635, + "step": 3900, + "valid_targets_mean": 2037.5, + "valid_targets_min": 928 + }, + { + "epoch": 6.329011345218801, + "grad_norm": 0.7484624073581349, + "learning_rate": 1.1145290384312846e-06, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10903692245483398, + "step": 3905, + "valid_targets_mean": 2927.1, + "valid_targets_min": 662 + }, + { + "epoch": 6.33711507293355, + "grad_norm": 0.7780860184446243, + "learning_rate": 1.0880794556181762e-06, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13929596543312073, + "step": 3910, + "valid_targets_mean": 3093.2, + "valid_targets_min": 844 + }, + { + "epoch": 6.345218800648298, + "grad_norm": 0.857143392794329, + "learning_rate": 1.0619387214614662e-06, + "loss": 0.1038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08489274978637695, + "step": 3915, + "valid_targets_mean": 2249.3, + "valid_targets_min": 754 + }, + { + "epoch": 6.353322528363047, + "grad_norm": 0.7892887000224863, + "learning_rate": 1.0361072628628354e-06, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1285485327243805, + "step": 3920, + "valid_targets_mean": 3397.4, + "valid_targets_min": 759 + }, + { + "epoch": 6.361426256077796, + "grad_norm": 0.8963361991084041, + "learning_rate": 1.0105855016732113e-06, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15654143691062927, + "step": 3925, + "valid_targets_mean": 2594.8, + "valid_targets_min": 347 + }, + { + "epoch": 6.369529983792544, + "grad_norm": 0.9164724545517589, + "learning_rate": 9.853738546858893e-07, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09162580966949463, + "step": 3930, + "valid_targets_mean": 2038.8, + "valid_targets_min": 704 + }, + { + "epoch": 6.377633711507293, + "grad_norm": 0.6269508433513853, + "learning_rate": 9.604727336297203e-07, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10261643677949905, + "step": 3935, + "valid_targets_mean": 3776.9, + "valid_targets_min": 518 + }, + { + "epoch": 6.3857374392220425, + "grad_norm": 0.7267537967221318, + "learning_rate": 9.358825451623832e-07, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16209650039672852, + "step": 3940, + "valid_targets_mean": 3149.4, + "valid_targets_min": 484 + }, + { + "epoch": 6.393841166936791, + "grad_norm": 0.8819007736794746, + "learning_rate": 9.116036908637582e-07, + "loss": 0.1257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13063153624534607, + "step": 3945, + "valid_targets_mean": 2710.8, + "valid_targets_min": 918 + }, + { + "epoch": 6.40194489465154, + "grad_norm": 0.6745885226985678, + "learning_rate": 8.876365672293441e-07, + "loss": 0.091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08468009531497955, + "step": 3950, + "valid_targets_mean": 3238.0, + "valid_targets_min": 967 + }, + { + "epoch": 6.410048622366288, + "grad_norm": 0.8819209149898708, + "learning_rate": 8.639815656638162e-07, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15380942821502686, + "step": 3955, + "valid_targets_mean": 2853.2, + "valid_targets_min": 873 + }, + { + "epoch": 6.418152350081037, + "grad_norm": 0.8845074945045972, + "learning_rate": 8.406390724745961e-07, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1663370132446289, + "step": 3960, + "valid_targets_mean": 2657.0, + "valid_targets_min": 277 + }, + { + "epoch": 6.426256077795786, + "grad_norm": 0.7576749630747052, + "learning_rate": 8.176094688655789e-07, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1374589502811432, + "step": 3965, + "valid_targets_mean": 3111.9, + "valid_targets_min": 501 + }, + { + "epoch": 6.434359805510534, + "grad_norm": 0.8629454796074068, + "learning_rate": 7.948931309308872e-07, + "loss": 0.1229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13856954872608185, + "step": 3970, + "valid_targets_mean": 3491.9, + "valid_targets_min": 314 + }, + { + "epoch": 6.4424635332252835, + "grad_norm": 1.0231777837569396, + "learning_rate": 7.724904296487246e-07, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11940930783748627, + "step": 3975, + "valid_targets_mean": 1845.0, + "valid_targets_min": 870 + }, + { + "epoch": 6.450567260940033, + "grad_norm": 0.8238449493780197, + "learning_rate": 7.504017308753386e-07, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18350258469581604, + "step": 3980, + "valid_targets_mean": 4485.2, + "valid_targets_min": 217 + }, + { + "epoch": 6.458670988654781, + "grad_norm": 0.8464204623046769, + "learning_rate": 7.286273953390278e-07, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.258614182472229, + "step": 3985, + "valid_targets_mean": 3313.9, + "valid_targets_min": 810 + }, + { + "epoch": 6.46677471636953, + "grad_norm": 0.8918238849564315, + "learning_rate": 7.071677786342568e-07, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1031871885061264, + "step": 3990, + "valid_targets_mean": 2293.3, + "valid_targets_min": 864 + }, + { + "epoch": 6.474878444084279, + "grad_norm": 0.8528535194867968, + "learning_rate": 6.860232312158554e-07, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15150435268878937, + "step": 3995, + "valid_targets_mean": 2968.9, + "valid_targets_min": 747 + }, + { + "epoch": 6.482982171799027, + "grad_norm": 0.7636360851041402, + "learning_rate": 6.651940983932737e-07, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19292369484901428, + "step": 4000, + "valid_targets_mean": 3176.4, + "valid_targets_min": 1041 + }, + { + "epoch": 6.491085899513776, + "grad_norm": 1.0126383046774314, + "learning_rate": 6.44680720324975e-07, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12578508257865906, + "step": 4005, + "valid_targets_mean": 1690.8, + "valid_targets_min": 828 + }, + { + "epoch": 6.4991896272285254, + "grad_norm": 0.7355254192324758, + "learning_rate": 6.244834320128501e-07, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10512825846672058, + "step": 4010, + "valid_targets_mean": 2727.1, + "valid_targets_min": 835 + }, + { + "epoch": 6.507293354943274, + "grad_norm": 0.8817717410341444, + "learning_rate": 6.0460256329677e-07, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961059033870697, + "step": 4015, + "valid_targets_mean": 1961.8, + "valid_targets_min": 851 + }, + { + "epoch": 6.515397082658023, + "grad_norm": 0.9530199455424444, + "learning_rate": 5.850384388491814e-07, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11027082055807114, + "step": 4020, + "valid_targets_mean": 2610.9, + "valid_targets_min": 726 + }, + { + "epoch": 6.523500810372772, + "grad_norm": 1.1973587173189302, + "learning_rate": 5.657913781698221e-07, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15702632069587708, + "step": 4025, + "valid_targets_mean": 2178.3, + "valid_targets_min": 735 + }, + { + "epoch": 6.53160453808752, + "grad_norm": 0.9362106839228681, + "learning_rate": 5.468616955804873e-07, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1587480902671814, + "step": 4030, + "valid_targets_mean": 2364.8, + "valid_targets_min": 402 + }, + { + "epoch": 6.539708265802269, + "grad_norm": 0.8971025729416715, + "learning_rate": 5.282497002198983e-07, + "loss": 0.1034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10483712702989578, + "step": 4035, + "valid_targets_mean": 2060.6, + "valid_targets_min": 906 + }, + { + "epoch": 6.547811993517017, + "grad_norm": 2.337830249158839, + "learning_rate": 5.099556960386686e-07, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.114591583609581, + "step": 4040, + "valid_targets_mean": 1819.8, + "valid_targets_min": 589 + }, + { + "epoch": 6.5559157212317665, + "grad_norm": 0.9163559690136783, + "learning_rate": 4.919799817943238e-07, + "loss": 0.1064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1364617496728897, + "step": 4045, + "valid_targets_mean": 2475.9, + "valid_targets_min": 328 + }, + { + "epoch": 6.564019448946516, + "grad_norm": 1.0530486479633936, + "learning_rate": 4.7432285104642703e-07, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1412011682987213, + "step": 4050, + "valid_targets_mean": 1812.3, + "valid_targets_min": 495 + }, + { + "epoch": 6.572123176661265, + "grad_norm": 1.0490371458755887, + "learning_rate": 4.569845921517968e-07, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1328582465648651, + "step": 4055, + "valid_targets_mean": 2563.8, + "valid_targets_min": 968 + }, + { + "epoch": 6.580226904376013, + "grad_norm": 0.8827517020820953, + "learning_rate": 4.399654882597726e-07, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09797355532646179, + "step": 4060, + "valid_targets_mean": 1908.7, + "valid_targets_min": 734 + }, + { + "epoch": 6.588330632090762, + "grad_norm": 0.5796761962828608, + "learning_rate": 4.232658173076232e-07, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08027474582195282, + "step": 4065, + "valid_targets_mean": 4144.1, + "valid_targets_min": 872 + }, + { + "epoch": 6.59643435980551, + "grad_norm": 1.0909407783873655, + "learning_rate": 4.068858520159724e-07, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11777451634407043, + "step": 4070, + "valid_targets_mean": 2117.9, + "valid_targets_min": 292 + }, + { + "epoch": 6.604538087520259, + "grad_norm": 0.6833254490640153, + "learning_rate": 3.9082585988437617e-07, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1639232188463211, + "step": 4075, + "valid_targets_mean": 3476.4, + "valid_targets_min": 822 + }, + { + "epoch": 6.612641815235008, + "grad_norm": 0.9742355762056493, + "learning_rate": 3.7508610318693684e-07, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1798877716064453, + "step": 4080, + "valid_targets_mean": 2434.2, + "valid_targets_min": 782 + }, + { + "epoch": 6.620745542949757, + "grad_norm": 0.7781187724054021, + "learning_rate": 3.596668389680247e-07, + "loss": 0.1188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09959834814071655, + "step": 4085, + "valid_targets_mean": 2723.8, + "valid_targets_min": 529 + }, + { + "epoch": 6.628849270664506, + "grad_norm": 0.8466343867882937, + "learning_rate": 3.445683190380833e-07, + "loss": 0.1141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08893650770187378, + "step": 4090, + "valid_targets_mean": 1926.0, + "valid_targets_min": 568 + }, + { + "epoch": 6.636952998379255, + "grad_norm": 0.7908915562547193, + "learning_rate": 3.297907899695019e-07, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09306427836418152, + "step": 4095, + "valid_targets_mean": 2579.3, + "valid_targets_min": 939 + }, + { + "epoch": 6.645056726094003, + "grad_norm": 0.6540049138105144, + "learning_rate": 3.1533449309262056e-07, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12161879241466522, + "step": 4100, + "valid_targets_mean": 3311.7, + "valid_targets_min": 817 + }, + { + "epoch": 6.653160453808752, + "grad_norm": 0.8462221047782975, + "learning_rate": 3.0119966449174474e-07, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0935630202293396, + "step": 4105, + "valid_targets_mean": 1882.8, + "valid_targets_min": 840 + }, + { + "epoch": 6.661264181523501, + "grad_norm": 0.81490063291204, + "learning_rate": 2.8738653500133494e-07, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10476881265640259, + "step": 4110, + "valid_targets_mean": 2528.9, + "valid_targets_min": 984 + }, + { + "epoch": 6.669367909238249, + "grad_norm": 0.6366928478749754, + "learning_rate": 2.738953302022096e-07, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08438228070735931, + "step": 4115, + "valid_targets_mean": 3672.2, + "valid_targets_min": 930 + }, + { + "epoch": 6.6774716369529985, + "grad_norm": 0.5680139767437395, + "learning_rate": 2.6072627041785925e-07, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05923807621002197, + "step": 4120, + "valid_targets_mean": 3334.2, + "valid_targets_min": 913 + }, + { + "epoch": 6.685575364667747, + "grad_norm": 3.566418268679015, + "learning_rate": 2.478795707108672e-07, + "loss": 0.1124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0984240397810936, + "step": 4125, + "valid_targets_mean": 3397.5, + "valid_targets_min": 518 + }, + { + "epoch": 6.693679092382496, + "grad_norm": 0.8102395858284243, + "learning_rate": 2.3535544087938345e-07, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1713753342628479, + "step": 4130, + "valid_targets_mean": 2797.9, + "valid_targets_min": 731 + }, + { + "epoch": 6.701782820097245, + "grad_norm": 0.9439721686588282, + "learning_rate": 2.2315408545370288e-07, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15056179463863373, + "step": 4135, + "valid_targets_mean": 2218.5, + "valid_targets_min": 624 + }, + { + "epoch": 6.709886547811994, + "grad_norm": 0.9719771412362007, + "learning_rate": 2.1127570369292361e-07, + "loss": 0.1266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1161581426858902, + "step": 4140, + "valid_targets_mean": 2372.7, + "valid_targets_min": 560 + }, + { + "epoch": 6.717990275526742, + "grad_norm": 0.7885505138738974, + "learning_rate": 1.9972048958168954e-07, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08716851472854614, + "step": 4145, + "valid_targets_mean": 2679.0, + "valid_targets_min": 727 + }, + { + "epoch": 6.726094003241491, + "grad_norm": 0.7564261208584763, + "learning_rate": 1.8848863182703513e-07, + "loss": 0.1293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14512531459331512, + "step": 4150, + "valid_targets_mean": 3178.8, + "valid_targets_min": 836 + }, + { + "epoch": 6.7341977309562395, + "grad_norm": 0.8230943188429811, + "learning_rate": 1.7758031385528784e-07, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19820144772529602, + "step": 4155, + "valid_targets_mean": 2705.9, + "valid_targets_min": 904 + }, + { + "epoch": 6.742301458670989, + "grad_norm": 0.7304895753350193, + "learning_rate": 1.6699571380908385e-07, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1148165911436081, + "step": 4160, + "valid_targets_mean": 2799.2, + "valid_targets_min": 740 + }, + { + "epoch": 6.750405186385738, + "grad_norm": 0.8783884446179141, + "learning_rate": 1.5673500454445046e-07, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1160724088549614, + "step": 4165, + "valid_targets_mean": 1911.4, + "valid_targets_min": 757 + }, + { + "epoch": 6.758508914100486, + "grad_norm": 0.8452843386698703, + "learning_rate": 1.4679835362799266e-07, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17844310402870178, + "step": 4170, + "valid_targets_mean": 2903.6, + "valid_targets_min": 717 + }, + { + "epoch": 6.766612641815235, + "grad_norm": 0.9641957278215031, + "learning_rate": 1.3718592333414881e-07, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1178508773446083, + "step": 4175, + "valid_targets_mean": 1679.4, + "valid_targets_min": 456 + }, + { + "epoch": 6.774716369529984, + "grad_norm": 0.6732633682506574, + "learning_rate": 1.2789787064253934e-07, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12225890159606934, + "step": 4180, + "valid_targets_mean": 3461.0, + "valid_targets_min": 695 + }, + { + "epoch": 6.782820097244732, + "grad_norm": 0.9548675664452392, + "learning_rate": 1.1893434723541542e-07, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12099964916706085, + "step": 4185, + "valid_targets_mean": 1765.4, + "valid_targets_min": 348 + }, + { + "epoch": 6.790923824959481, + "grad_norm": 0.7229822657096528, + "learning_rate": 1.1029549949516549e-07, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16705356538295746, + "step": 4190, + "valid_targets_mean": 4125.2, + "valid_targets_min": 562 + }, + { + "epoch": 6.7990275526742305, + "grad_norm": 1.0053755294682665, + "learning_rate": 1.0198146850193935e-07, + "loss": 0.1376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10085690766572952, + "step": 4195, + "valid_targets_mean": 1957.5, + "valid_targets_min": 464 + }, + { + "epoch": 6.807131280388979, + "grad_norm": 1.335653057078746, + "learning_rate": 9.399239003132999e-08, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09673996269702911, + "step": 4200, + "valid_targets_mean": 1523.4, + "valid_targets_min": 705 + }, + { + "epoch": 6.815235008103728, + "grad_norm": 0.8800015171398834, + "learning_rate": 8.632839455216869e-08, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11644886434078217, + "step": 4205, + "valid_targets_mean": 2234.9, + "valid_targets_min": 650 + }, + { + "epoch": 6.823338735818476, + "grad_norm": 0.782216107013055, + "learning_rate": 7.89896072243912e-08, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10838295519351959, + "step": 4210, + "valid_targets_mean": 2606.8, + "valid_targets_min": 1066 + }, + { + "epoch": 6.831442463533225, + "grad_norm": 0.7331468581917359, + "learning_rate": 7.197614789698604e-08, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27159422636032104, + "step": 4215, + "valid_targets_mean": 4613.2, + "valid_targets_min": 1164 + }, + { + "epoch": 6.839546191247974, + "grad_norm": 0.7847565716440916, + "learning_rate": 6.528813110604714e-08, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10585910081863403, + "step": 4220, + "valid_targets_mean": 2765.0, + "valid_targets_min": 632 + }, + { + "epoch": 6.847649918962723, + "grad_norm": 0.8253246012683277, + "learning_rate": 5.8925666072899845e-08, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14212317764759064, + "step": 4225, + "valid_targets_mean": 2604.4, + "valid_targets_min": 583 + }, + { + "epoch": 6.855753646677472, + "grad_norm": 0.798983689706256, + "learning_rate": 5.288885670231336e-08, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10902281105518341, + "step": 4230, + "valid_targets_mean": 2613.6, + "valid_targets_min": 461 + }, + { + "epoch": 6.863857374392221, + "grad_norm": 0.8467915925619014, + "learning_rate": 4.717780158080887e-08, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1464867889881134, + "step": 4235, + "valid_targets_mean": 2088.5, + "valid_targets_min": 522 + }, + { + "epoch": 6.871961102106969, + "grad_norm": 1.0029490169977413, + "learning_rate": 4.1792593975049644e-08, + "loss": 0.1264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12026522308588028, + "step": 4240, + "valid_targets_mean": 1802.7, + "valid_targets_min": 819 + }, + { + "epoch": 6.880064829821718, + "grad_norm": 0.9011238391462046, + "learning_rate": 3.6733321830315636e-08, + "loss": 0.1337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11811887472867966, + "step": 4245, + "valid_targets_mean": 2323.1, + "valid_targets_min": 949 + }, + { + "epoch": 6.888168557536467, + "grad_norm": 0.7165823854167281, + "learning_rate": 3.200006776906461e-08, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09492816030979156, + "step": 4250, + "valid_targets_mean": 2960.4, + "valid_targets_min": 814 + }, + { + "epoch": 6.896272285251215, + "grad_norm": 0.9450518017052805, + "learning_rate": 2.7592909089593224e-08, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14789974689483643, + "step": 4255, + "valid_targets_mean": 2218.0, + "valid_targets_min": 761 + }, + { + "epoch": 6.904376012965964, + "grad_norm": 0.7311474778057845, + "learning_rate": 2.351191776475581e-08, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1463954895734787, + "step": 4260, + "valid_targets_mean": 3855.9, + "valid_targets_min": 1393 + }, + { + "epoch": 6.9124797406807135, + "grad_norm": 0.6301982317715115, + "learning_rate": 1.9757160440814217e-08, + "loss": 0.1343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10000479966402054, + "step": 4265, + "valid_targets_mean": 3855.6, + "valid_targets_min": 809 + }, + { + "epoch": 6.920583468395462, + "grad_norm": 0.9798344906644698, + "learning_rate": 1.6328698436327562e-08, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16356664896011353, + "step": 4270, + "valid_targets_mean": 2240.9, + "valid_targets_min": 622 + }, + { + "epoch": 6.928687196110211, + "grad_norm": 0.8892538533790851, + "learning_rate": 1.3226587741159702e-08, + "loss": 0.1134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13213740289211273, + "step": 4275, + "valid_targets_mean": 2755.2, + "valid_targets_min": 736 + }, + { + "epoch": 6.93679092382496, + "grad_norm": 0.8637275559260358, + "learning_rate": 1.0450879015566629e-08, + "loss": 0.1239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10389816761016846, + "step": 4280, + "valid_targets_mean": 2268.2, + "valid_targets_min": 302 + }, + { + "epoch": 6.944894651539708, + "grad_norm": 0.7857182787493622, + "learning_rate": 8.001617589368238e-09, + "loss": 0.1201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09846210479736328, + "step": 4285, + "valid_targets_mean": 2556.7, + "valid_targets_min": 883 + }, + { + "epoch": 6.952998379254457, + "grad_norm": 0.5942319504605537, + "learning_rate": 5.878843461200046e-09, + "loss": 0.122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08767960220575333, + "step": 4290, + "valid_targets_mean": 4519.6, + "valid_targets_min": 783 + }, + { + "epoch": 6.961102106969205, + "grad_norm": 0.8250059679447841, + "learning_rate": 4.082591297873695e-09, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17609858512878418, + "step": 4295, + "valid_targets_mean": 3104.8, + "valid_targets_min": 398 + }, + { + "epoch": 6.9692058346839545, + "grad_norm": 0.7499567331158492, + "learning_rate": 2.6128904338018624e-09, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08152391016483307, + "step": 4300, + "valid_targets_mean": 2886.6, + "valid_targets_min": 342 + }, + { + "epoch": 6.977309562398704, + "grad_norm": 0.9718691223278781, + "learning_rate": 1.4697648705186417e-09, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15906545519828796, + "step": 4305, + "valid_targets_mean": 2148.2, + "valid_targets_min": 565 + }, + { + "epoch": 6.985413290113452, + "grad_norm": 0.7283394841772546, + "learning_rate": 6.532332762931859e-10, + "loss": 0.0901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12135720252990723, + "step": 4310, + "valid_targets_mean": 3223.2, + "valid_targets_min": 741 + }, + { + "epoch": 6.993517017828201, + "grad_norm": 0.7328657780903466, + "learning_rate": 1.6330898581884414e-10, + "loss": 0.1321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09660885483026505, + "step": 4315, + "valid_targets_mean": 2950.4, + "valid_targets_min": 780 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20906896889209747, + "step": 4319, + "total_flos": 702151473364992.0, + "train_loss": 0.11156423039722288, + "train_runtime": 12711.7053, + "train_samples_per_second": 5.432, + "train_steps_per_second": 0.34, + "valid_targets_mean": 3750.0, + "valid_targets_min": 750 + } + ], + "logging_steps": 5, + "max_steps": 4319, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 702151473364992.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}