diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10222 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4627, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007564296520423601, + "grad_norm": 19.058784422912282, + "learning_rate": 3.455723542116631e-07, + "loss": 0.7153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7505241632461548, + "step": 5, + "valid_targets_mean": 3790.7, + "valid_targets_min": 239 + }, + { + "epoch": 0.015128593040847202, + "grad_norm": 17.486795894826894, + "learning_rate": 7.77537796976242e-07, + "loss": 0.6813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6587737798690796, + "step": 10, + "valid_targets_mean": 5055.8, + "valid_targets_min": 943 + }, + { + "epoch": 0.0226928895612708, + "grad_norm": 17.853758617180958, + "learning_rate": 1.209503239740821e-06, + "loss": 0.71, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7153316736221313, + "step": 15, + "valid_targets_mean": 5290.3, + "valid_targets_min": 803 + }, + { + "epoch": 0.030257186081694403, + "grad_norm": 13.456561875333087, + "learning_rate": 1.6414686825053995e-06, + "loss": 0.6532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6719971895217896, + "step": 20, + "valid_targets_mean": 4477.1, + "valid_targets_min": 461 + }, + { + "epoch": 0.037821482602118005, + "grad_norm": 8.589481200641513, + "learning_rate": 2.0734341252699786e-06, + "loss": 0.5971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6080985069274902, + "step": 25, + "valid_targets_mean": 4065.7, + "valid_targets_min": 797 + }, + { + "epoch": 0.0453857791225416, + "grad_norm": 4.7772774074698585, + "learning_rate": 2.505399568034557e-06, + "loss": 0.5155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4876701831817627, + "step": 30, + "valid_targets_mean": 5669.8, + "valid_targets_min": 727 + }, + { + "epoch": 0.0529500756429652, + "grad_norm": 2.9036893563370665, + "learning_rate": 2.9373650107991366e-06, + "loss": 0.4905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44498470425605774, + "step": 35, + "valid_targets_mean": 4442.9, + "valid_targets_min": 795 + }, + { + "epoch": 0.060514372163388806, + "grad_norm": 1.5763541463717798, + "learning_rate": 3.369330453563715e-06, + "loss": 0.4976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.492475301027298, + "step": 40, + "valid_targets_mean": 5954.6, + "valid_targets_min": 583 + }, + { + "epoch": 0.0680786686838124, + "grad_norm": 1.4394911122910266, + "learning_rate": 3.801295896328294e-06, + "loss": 0.4597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5117167234420776, + "step": 45, + "valid_targets_mean": 4686.0, + "valid_targets_min": 610 + }, + { + "epoch": 0.07564296520423601, + "grad_norm": 1.1660007728840187, + "learning_rate": 4.233261339092873e-06, + "loss": 0.456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4414580464363098, + "step": 50, + "valid_targets_mean": 4432.8, + "valid_targets_min": 710 + }, + { + "epoch": 0.0832072617246596, + "grad_norm": 1.0994270688976955, + "learning_rate": 4.665226781857452e-06, + "loss": 0.4252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5082100629806519, + "step": 55, + "valid_targets_mean": 4157.1, + "valid_targets_min": 620 + }, + { + "epoch": 0.0907715582450832, + "grad_norm": 0.8284788849012524, + "learning_rate": 5.09719222462203e-06, + "loss": 0.4232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44552695751190186, + "step": 60, + "valid_targets_mean": 4501.6, + "valid_targets_min": 705 + }, + { + "epoch": 0.09833585476550681, + "grad_norm": 0.6310211132039384, + "learning_rate": 5.52915766738661e-06, + "loss": 0.3835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3460736572742462, + "step": 65, + "valid_targets_mean": 4682.3, + "valid_targets_min": 342 + }, + { + "epoch": 0.1059001512859304, + "grad_norm": 0.6177628509419143, + "learning_rate": 5.961123110151188e-06, + "loss": 0.3921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38838571310043335, + "step": 70, + "valid_targets_mean": 5989.1, + "valid_targets_min": 1412 + }, + { + "epoch": 0.11346444780635401, + "grad_norm": 0.6780059407854138, + "learning_rate": 6.393088552915767e-06, + "loss": 0.3815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3895452618598938, + "step": 75, + "valid_targets_mean": 4225.5, + "valid_targets_min": 391 + }, + { + "epoch": 0.12102874432677761, + "grad_norm": 0.5643766880346435, + "learning_rate": 6.825053995680346e-06, + "loss": 0.3558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3587391972541809, + "step": 80, + "valid_targets_mean": 5413.6, + "valid_targets_min": 559 + }, + { + "epoch": 0.12859304084720122, + "grad_norm": 0.9090969514772852, + "learning_rate": 7.257019438444926e-06, + "loss": 0.3704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3452695310115814, + "step": 85, + "valid_targets_mean": 4190.5, + "valid_targets_min": 561 + }, + { + "epoch": 0.1361573373676248, + "grad_norm": 0.5075113193939108, + "learning_rate": 7.688984881209504e-06, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3218182325363159, + "step": 90, + "valid_targets_mean": 5132.1, + "valid_targets_min": 821 + }, + { + "epoch": 0.1437216338880484, + "grad_norm": 0.4582977864357091, + "learning_rate": 8.120950323974082e-06, + "loss": 0.3514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3215479254722595, + "step": 95, + "valid_targets_mean": 6527.9, + "valid_targets_min": 629 + }, + { + "epoch": 0.15128593040847202, + "grad_norm": 0.48765027679444184, + "learning_rate": 8.552915766738662e-06, + "loss": 0.3573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33919212222099304, + "step": 100, + "valid_targets_mean": 5894.4, + "valid_targets_min": 1048 + }, + { + "epoch": 0.1588502269288956, + "grad_norm": 0.6167067050085139, + "learning_rate": 8.98488120950324e-06, + "loss": 0.347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3706921935081482, + "step": 105, + "valid_targets_mean": 4482.9, + "valid_targets_min": 571 + }, + { + "epoch": 0.1664145234493192, + "grad_norm": 0.5778398632077648, + "learning_rate": 9.41684665226782e-06, + "loss": 0.3286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3193400502204895, + "step": 110, + "valid_targets_mean": 5415.0, + "valid_targets_min": 1665 + }, + { + "epoch": 0.17397881996974282, + "grad_norm": 0.5647079800698889, + "learning_rate": 9.848812095032398e-06, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3541482090950012, + "step": 115, + "valid_targets_mean": 5123.8, + "valid_targets_min": 1269 + }, + { + "epoch": 0.1815431164901664, + "grad_norm": 0.5820680453553746, + "learning_rate": 1.0280777537796978e-05, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30285853147506714, + "step": 120, + "valid_targets_mean": 4204.3, + "valid_targets_min": 556 + }, + { + "epoch": 0.18910741301059, + "grad_norm": 0.47973201670838, + "learning_rate": 1.0712742980561557e-05, + "loss": 0.3178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3058018088340759, + "step": 125, + "valid_targets_mean": 5260.8, + "valid_targets_min": 868 + }, + { + "epoch": 0.19667170953101362, + "grad_norm": 0.5518953360121684, + "learning_rate": 1.1144708423326134e-05, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3173016905784607, + "step": 130, + "valid_targets_mean": 5620.1, + "valid_targets_min": 2169 + }, + { + "epoch": 0.2042360060514372, + "grad_norm": 0.5717143166242276, + "learning_rate": 1.1576673866090712e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35504987835884094, + "step": 135, + "valid_targets_mean": 5265.2, + "valid_targets_min": 785 + }, + { + "epoch": 0.2118003025718608, + "grad_norm": 0.47865096065818147, + "learning_rate": 1.2008639308855293e-05, + "loss": 0.2921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2680158019065857, + "step": 140, + "valid_targets_mean": 5838.5, + "valid_targets_min": 2352 + }, + { + "epoch": 0.21936459909228442, + "grad_norm": 0.5085856003812484, + "learning_rate": 1.2440604751619871e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3085848391056061, + "step": 145, + "valid_targets_mean": 4785.6, + "valid_targets_min": 653 + }, + { + "epoch": 0.22692889561270801, + "grad_norm": 0.5716698549308541, + "learning_rate": 1.287257019438445e-05, + "loss": 0.2984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28182756900787354, + "step": 150, + "valid_targets_mean": 4273.5, + "valid_targets_min": 825 + }, + { + "epoch": 0.2344931921331316, + "grad_norm": 0.5245334119291116, + "learning_rate": 1.330453563714903e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.264009952545166, + "step": 155, + "valid_targets_mean": 4866.6, + "valid_targets_min": 467 + }, + { + "epoch": 0.24205748865355523, + "grad_norm": 0.4935476283105332, + "learning_rate": 1.3736501079913609e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28478801250457764, + "step": 160, + "valid_targets_mean": 5374.4, + "valid_targets_min": 2048 + }, + { + "epoch": 0.24962178517397882, + "grad_norm": 0.49385239011355125, + "learning_rate": 1.4168466522678186e-05, + "loss": 0.2936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3246859908103943, + "step": 165, + "valid_targets_mean": 6078.2, + "valid_targets_min": 1784 + }, + { + "epoch": 0.25718608169440244, + "grad_norm": 0.5031354977672869, + "learning_rate": 1.4600431965442764e-05, + "loss": 0.2931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2915901839733124, + "step": 170, + "valid_targets_mean": 5410.8, + "valid_targets_min": 1914 + }, + { + "epoch": 0.264750378214826, + "grad_norm": 0.5743132892962913, + "learning_rate": 1.5032397408207345e-05, + "loss": 0.286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3496958315372467, + "step": 175, + "valid_targets_mean": 4596.1, + "valid_targets_min": 747 + }, + { + "epoch": 0.2723146747352496, + "grad_norm": 0.6417430609782164, + "learning_rate": 1.5464362850971925e-05, + "loss": 0.2873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27849793434143066, + "step": 180, + "valid_targets_mean": 4129.8, + "valid_targets_min": 711 + }, + { + "epoch": 0.27987897125567324, + "grad_norm": 0.5525787543900736, + "learning_rate": 1.5896328293736503e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27930599451065063, + "step": 185, + "valid_targets_mean": 5218.1, + "valid_targets_min": 2197 + }, + { + "epoch": 0.2874432677760968, + "grad_norm": 0.5887797757799174, + "learning_rate": 1.6328293736501082e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3260801434516907, + "step": 190, + "valid_targets_mean": 4696.6, + "valid_targets_min": 1128 + }, + { + "epoch": 0.2950075642965204, + "grad_norm": 0.5611439547047103, + "learning_rate": 1.676025917926566e-05, + "loss": 0.2916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26734042167663574, + "step": 195, + "valid_targets_mean": 5061.5, + "valid_targets_min": 594 + }, + { + "epoch": 0.30257186081694404, + "grad_norm": 0.8142320877951509, + "learning_rate": 1.719222462203024e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2744123935699463, + "step": 200, + "valid_targets_mean": 4981.4, + "valid_targets_min": 2432 + }, + { + "epoch": 0.3101361573373676, + "grad_norm": 0.5453776678521185, + "learning_rate": 1.7624190064794818e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2693478465080261, + "step": 205, + "valid_targets_mean": 4538.8, + "valid_targets_min": 2627 + }, + { + "epoch": 0.3177004538577912, + "grad_norm": 0.572823640761258, + "learning_rate": 1.8056155507559396e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24936965107917786, + "step": 210, + "valid_targets_mean": 4717.1, + "valid_targets_min": 1796 + }, + { + "epoch": 0.32526475037821484, + "grad_norm": 0.48857192086223755, + "learning_rate": 1.8488120950323975e-05, + "loss": 0.2703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27971357107162476, + "step": 215, + "valid_targets_mean": 5259.7, + "valid_targets_min": 2080 + }, + { + "epoch": 0.3328290468986384, + "grad_norm": 0.5832326533648584, + "learning_rate": 1.8920086393088553e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2944541871547699, + "step": 220, + "valid_targets_mean": 5435.7, + "valid_targets_min": 622 + }, + { + "epoch": 0.340393343419062, + "grad_norm": 0.5304901414198607, + "learning_rate": 1.9352051835853135e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2581842541694641, + "step": 225, + "valid_targets_mean": 5475.9, + "valid_targets_min": 1850 + }, + { + "epoch": 0.34795763993948564, + "grad_norm": 0.5584973891534915, + "learning_rate": 1.9784017278617714e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2856540381908417, + "step": 230, + "valid_targets_mean": 5040.7, + "valid_targets_min": 837 + }, + { + "epoch": 0.3555219364599092, + "grad_norm": 0.7615559789848529, + "learning_rate": 2.021598272138229e-05, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27540600299835205, + "step": 235, + "valid_targets_mean": 4862.9, + "valid_targets_min": 921 + }, + { + "epoch": 0.3630862329803328, + "grad_norm": 0.5494333839539911, + "learning_rate": 2.064794816414687e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25492146611213684, + "step": 240, + "valid_targets_mean": 4764.6, + "valid_targets_min": 1899 + }, + { + "epoch": 0.37065052950075644, + "grad_norm": 0.5675208041808972, + "learning_rate": 2.107991360691145e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30120372772216797, + "step": 245, + "valid_targets_mean": 4816.8, + "valid_targets_min": 496 + }, + { + "epoch": 0.37821482602118, + "grad_norm": 0.5516462624650181, + "learning_rate": 2.1511879049676025e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2463807463645935, + "step": 250, + "valid_targets_mean": 4821.3, + "valid_targets_min": 892 + }, + { + "epoch": 0.3857791225416036, + "grad_norm": 0.5808471847669502, + "learning_rate": 2.1943844492440607e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25385603308677673, + "step": 255, + "valid_targets_mean": 4108.7, + "valid_targets_min": 697 + }, + { + "epoch": 0.39334341906202724, + "grad_norm": 1.3051521187963644, + "learning_rate": 2.2375809935205186e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2327377051115036, + "step": 260, + "valid_targets_mean": 3904.6, + "valid_targets_min": 526 + }, + { + "epoch": 0.4009077155824508, + "grad_norm": 0.540688473321198, + "learning_rate": 2.2807775377969764e-05, + "loss": 0.2775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616848945617676, + "step": 265, + "valid_targets_mean": 5200.8, + "valid_targets_min": 723 + }, + { + "epoch": 0.4084720121028744, + "grad_norm": 0.5572137635803951, + "learning_rate": 2.3239740820734343e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2894093096256256, + "step": 270, + "valid_targets_mean": 5997.7, + "valid_targets_min": 2741 + }, + { + "epoch": 0.41603630862329805, + "grad_norm": 0.5219797727194595, + "learning_rate": 2.3671706263498925e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23676098883152008, + "step": 275, + "valid_targets_mean": 5215.8, + "valid_targets_min": 372 + }, + { + "epoch": 0.4236006051437216, + "grad_norm": 0.7269531089041338, + "learning_rate": 2.41036717062635e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29276716709136963, + "step": 280, + "valid_targets_mean": 5125.2, + "valid_targets_min": 941 + }, + { + "epoch": 0.43116490166414523, + "grad_norm": 0.6854032248788718, + "learning_rate": 2.453563714902808e-05, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2404939830303192, + "step": 285, + "valid_targets_mean": 4140.1, + "valid_targets_min": 913 + }, + { + "epoch": 0.43872919818456885, + "grad_norm": 0.5427787800863983, + "learning_rate": 2.496760259179266e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29159748554229736, + "step": 290, + "valid_targets_mean": 5585.8, + "valid_targets_min": 738 + }, + { + "epoch": 0.4462934947049924, + "grad_norm": 0.6325587088135726, + "learning_rate": 2.5399568034557236e-05, + "loss": 0.2709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24903929233551025, + "step": 295, + "valid_targets_mean": 4124.8, + "valid_targets_min": 760 + }, + { + "epoch": 0.45385779122541603, + "grad_norm": 0.6302528172341179, + "learning_rate": 2.5831533477321818e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2755891680717468, + "step": 300, + "valid_targets_mean": 5041.9, + "valid_targets_min": 765 + }, + { + "epoch": 0.46142208774583965, + "grad_norm": 0.6101680849058813, + "learning_rate": 2.6263498920086393e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23466572165489197, + "step": 305, + "valid_targets_mean": 4039.0, + "valid_targets_min": 1489 + }, + { + "epoch": 0.4689863842662632, + "grad_norm": 0.5098554824207316, + "learning_rate": 2.6695464362850975e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24894092977046967, + "step": 310, + "valid_targets_mean": 5165.2, + "valid_targets_min": 543 + }, + { + "epoch": 0.47655068078668683, + "grad_norm": 0.5153701765469025, + "learning_rate": 2.7127429805615553e-05, + "loss": 0.2987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24448314309120178, + "step": 315, + "valid_targets_mean": 4878.4, + "valid_targets_min": 1165 + }, + { + "epoch": 0.48411497730711045, + "grad_norm": 0.5150020480544099, + "learning_rate": 2.755939524838013e-05, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24626128375530243, + "step": 320, + "valid_targets_mean": 5491.0, + "valid_targets_min": 2393 + }, + { + "epoch": 0.491679273827534, + "grad_norm": 0.44779275422444326, + "learning_rate": 2.799136069114471e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25347915291786194, + "step": 325, + "valid_targets_mean": 6012.2, + "valid_targets_min": 1728 + }, + { + "epoch": 0.49924357034795763, + "grad_norm": 0.5689898293216941, + "learning_rate": 2.842332613390929e-05, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2442246973514557, + "step": 330, + "valid_targets_mean": 5822.6, + "valid_targets_min": 1755 + }, + { + "epoch": 0.5068078668683812, + "grad_norm": 0.4828012134297725, + "learning_rate": 2.885529157667387e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22166290879249573, + "step": 335, + "valid_targets_mean": 5591.9, + "valid_targets_min": 2595 + }, + { + "epoch": 0.5143721633888049, + "grad_norm": 0.5206534739708626, + "learning_rate": 2.9287257019438446e-05, + "loss": 0.254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24545502662658691, + "step": 340, + "valid_targets_mean": 4507.7, + "valid_targets_min": 1277 + }, + { + "epoch": 0.5219364599092284, + "grad_norm": 0.5375743058233416, + "learning_rate": 2.9719222462203028e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24855747818946838, + "step": 345, + "valid_targets_mean": 4502.4, + "valid_targets_min": 706 + }, + { + "epoch": 0.529500756429652, + "grad_norm": 0.47062515075329686, + "learning_rate": 3.0151187904967603e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23291119933128357, + "step": 350, + "valid_targets_mean": 5619.4, + "valid_targets_min": 729 + }, + { + "epoch": 0.5370650529500757, + "grad_norm": 0.5065710560218952, + "learning_rate": 3.058315334773218e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23068980872631073, + "step": 355, + "valid_targets_mean": 5067.8, + "valid_targets_min": 800 + }, + { + "epoch": 0.5446293494704992, + "grad_norm": 0.6071927129549767, + "learning_rate": 3.101511879049676e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2714073061943054, + "step": 360, + "valid_targets_mean": 4807.8, + "valid_targets_min": 321 + }, + { + "epoch": 0.5521936459909228, + "grad_norm": 0.5932902639247645, + "learning_rate": 3.144708423326134e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2581622004508972, + "step": 365, + "valid_targets_mean": 4726.7, + "valid_targets_min": 263 + }, + { + "epoch": 0.5597579425113465, + "grad_norm": 0.5342014246005196, + "learning_rate": 3.1879049676025925e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28950175642967224, + "step": 370, + "valid_targets_mean": 5759.0, + "valid_targets_min": 736 + }, + { + "epoch": 0.56732223903177, + "grad_norm": 0.6598267814370526, + "learning_rate": 3.23110151187905e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23706093430519104, + "step": 375, + "valid_targets_mean": 3705.8, + "valid_targets_min": 511 + }, + { + "epoch": 0.5748865355521936, + "grad_norm": 0.5781352859246836, + "learning_rate": 3.274298056155508e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2128780633211136, + "step": 380, + "valid_targets_mean": 4299.5, + "valid_targets_min": 565 + }, + { + "epoch": 0.5824508320726173, + "grad_norm": 0.5201196925356668, + "learning_rate": 3.317494600431966e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847621440887451, + "step": 385, + "valid_targets_mean": 5042.4, + "valid_targets_min": 825 + }, + { + "epoch": 0.5900151285930408, + "grad_norm": 0.571209286797819, + "learning_rate": 3.360691144708423e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261477530002594, + "step": 390, + "valid_targets_mean": 4204.5, + "valid_targets_min": 831 + }, + { + "epoch": 0.5975794251134644, + "grad_norm": 0.5703991894869983, + "learning_rate": 3.4038876889848814e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22247353196144104, + "step": 395, + "valid_targets_mean": 4025.1, + "valid_targets_min": 811 + }, + { + "epoch": 0.6051437216338881, + "grad_norm": 0.5694115354716299, + "learning_rate": 3.447084233261339e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540602684020996, + "step": 400, + "valid_targets_mean": 4897.0, + "valid_targets_min": 1926 + }, + { + "epoch": 0.6127080181543116, + "grad_norm": 0.543873277094909, + "learning_rate": 3.490280777537797e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2412918210029602, + "step": 405, + "valid_targets_mean": 5347.6, + "valid_targets_min": 2336 + }, + { + "epoch": 0.6202723146747352, + "grad_norm": 0.5539147349076818, + "learning_rate": 3.533477321814255e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27449312806129456, + "step": 410, + "valid_targets_mean": 4702.8, + "valid_targets_min": 641 + }, + { + "epoch": 0.6278366111951589, + "grad_norm": 0.5147878532498119, + "learning_rate": 3.5766738660907135e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21562246978282928, + "step": 415, + "valid_targets_mean": 4832.5, + "valid_targets_min": 2092 + }, + { + "epoch": 0.6354009077155824, + "grad_norm": 0.5550659610309338, + "learning_rate": 3.619870410367171e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23504087328910828, + "step": 420, + "valid_targets_mean": 4465.6, + "valid_targets_min": 643 + }, + { + "epoch": 0.642965204236006, + "grad_norm": 0.5881025450085932, + "learning_rate": 3.6630669546436286e-05, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2751597464084625, + "step": 425, + "valid_targets_mean": 5010.4, + "valid_targets_min": 575 + }, + { + "epoch": 0.6505295007564297, + "grad_norm": 0.5850477950512473, + "learning_rate": 3.706263498920087e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2643757462501526, + "step": 430, + "valid_targets_mean": 4947.0, + "valid_targets_min": 678 + }, + { + "epoch": 0.6580937972768532, + "grad_norm": 0.8028527221952423, + "learning_rate": 3.749460043196544e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23027735948562622, + "step": 435, + "valid_targets_mean": 4532.6, + "valid_targets_min": 632 + }, + { + "epoch": 0.6656580937972768, + "grad_norm": 0.5991434210232877, + "learning_rate": 3.7926565874730025e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21481779217720032, + "step": 440, + "valid_targets_mean": 4295.4, + "valid_targets_min": 1930 + }, + { + "epoch": 0.6732223903177005, + "grad_norm": 0.601475737977575, + "learning_rate": 3.83585313174946e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2731437683105469, + "step": 445, + "valid_targets_mean": 4081.8, + "valid_targets_min": 789 + }, + { + "epoch": 0.680786686838124, + "grad_norm": 0.544970754754265, + "learning_rate": 3.879049676025918e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2417784333229065, + "step": 450, + "valid_targets_mean": 4546.8, + "valid_targets_min": 687 + }, + { + "epoch": 0.6883509833585476, + "grad_norm": 0.5772939399178783, + "learning_rate": 3.9222462203023764e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23565149307250977, + "step": 455, + "valid_targets_mean": 4297.6, + "valid_targets_min": 662 + }, + { + "epoch": 0.6959152798789713, + "grad_norm": 0.557273760966465, + "learning_rate": 3.965442764578834e-05, + "loss": 0.2411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23695287108421326, + "step": 460, + "valid_targets_mean": 5605.1, + "valid_targets_min": 478 + }, + { + "epoch": 0.7034795763993948, + "grad_norm": 0.552751474708773, + "learning_rate": 3.9999994307824485e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23529154062271118, + "step": 465, + "valid_targets_mean": 4646.6, + "valid_targets_min": 843 + }, + { + "epoch": 0.7110438729198184, + "grad_norm": 0.5778857269434653, + "learning_rate": 3.9999795082021543e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3176231384277344, + "step": 470, + "valid_targets_mean": 5449.4, + "valid_targets_min": 538 + }, + { + "epoch": 0.7186081694402421, + "grad_norm": 0.5157110157373845, + "learning_rate": 3.999931125068276e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2607157230377197, + "step": 475, + "valid_targets_mean": 5027.3, + "valid_targets_min": 951 + }, + { + "epoch": 0.7261724659606656, + "grad_norm": 0.4933102641109027, + "learning_rate": 3.9998542820693246e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20662082731723785, + "step": 480, + "valid_targets_mean": 5754.7, + "valid_targets_min": 635 + }, + { + "epoch": 0.7337367624810892, + "grad_norm": 0.5426556147022381, + "learning_rate": 3.9997489802988096e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23219190537929535, + "step": 485, + "valid_targets_mean": 4775.2, + "valid_targets_min": 729 + }, + { + "epoch": 0.7413010590015129, + "grad_norm": 0.4802976906182571, + "learning_rate": 3.9996152212552195e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22934921085834503, + "step": 490, + "valid_targets_mean": 5057.2, + "valid_targets_min": 639 + }, + { + "epoch": 0.7488653555219364, + "grad_norm": 0.6122232574312276, + "learning_rate": 3.999453006842002e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.244462788105011, + "step": 495, + "valid_targets_mean": 4329.6, + "valid_targets_min": 574 + }, + { + "epoch": 0.75642965204236, + "grad_norm": 0.5032216034650763, + "learning_rate": 3.999262339367536e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25049036741256714, + "step": 500, + "valid_targets_mean": 4904.4, + "valid_targets_min": 1007 + }, + { + "epoch": 0.7639939485627837, + "grad_norm": 0.6238287961984283, + "learning_rate": 3.9990432215451006e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26027706265449524, + "step": 505, + "valid_targets_mean": 3938.6, + "valid_targets_min": 639 + }, + { + "epoch": 0.7715582450832073, + "grad_norm": 0.5535062102674121, + "learning_rate": 3.998795656492836e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23881468176841736, + "step": 510, + "valid_targets_mean": 4196.7, + "valid_targets_min": 744 + }, + { + "epoch": 0.7791225416036308, + "grad_norm": 0.4897831464999995, + "learning_rate": 3.998519647733696e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2844647169113159, + "step": 515, + "valid_targets_mean": 6344.6, + "valid_targets_min": 601 + }, + { + "epoch": 0.7866868381240545, + "grad_norm": 0.5746692133321736, + "learning_rate": 3.998215199195403e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23050951957702637, + "step": 520, + "valid_targets_mean": 4142.7, + "valid_targets_min": 547 + }, + { + "epoch": 0.794251134644478, + "grad_norm": 0.49159881041487485, + "learning_rate": 3.997882315210388e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2426537573337555, + "step": 525, + "valid_targets_mean": 5857.8, + "valid_targets_min": 1111 + }, + { + "epoch": 0.8018154311649016, + "grad_norm": 0.4965814246543715, + "learning_rate": 3.997521000515731e-05, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22411620616912842, + "step": 530, + "valid_targets_mean": 5881.9, + "valid_targets_min": 1661 + }, + { + "epoch": 0.8093797276853253, + "grad_norm": 0.5557055978474669, + "learning_rate": 3.997131260253092e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25896748900413513, + "step": 535, + "valid_targets_mean": 4466.9, + "valid_targets_min": 592 + }, + { + "epoch": 0.8169440242057489, + "grad_norm": 0.4931361228707487, + "learning_rate": 3.9967130999686405e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.249818816781044, + "step": 540, + "valid_targets_mean": 4829.9, + "valid_targets_min": 932 + }, + { + "epoch": 0.8245083207261724, + "grad_norm": 0.5733474556869196, + "learning_rate": 3.996266525612973e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2596126198768616, + "step": 545, + "valid_targets_mean": 3922.8, + "valid_targets_min": 709 + }, + { + "epoch": 0.8320726172465961, + "grad_norm": 0.5330244819228089, + "learning_rate": 3.9957915435410334e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2414190173149109, + "step": 550, + "valid_targets_mean": 5284.2, + "valid_targets_min": 1105 + }, + { + "epoch": 0.8396369137670197, + "grad_norm": 0.505181937458707, + "learning_rate": 3.995288160512015e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21243470907211304, + "step": 555, + "valid_targets_mean": 4745.8, + "valid_targets_min": 684 + }, + { + "epoch": 0.8472012102874432, + "grad_norm": 0.5379741211579597, + "learning_rate": 3.9947563836892725e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22850680351257324, + "step": 560, + "valid_targets_mean": 4773.2, + "valid_targets_min": 678 + }, + { + "epoch": 0.8547655068078669, + "grad_norm": 0.47372136138382365, + "learning_rate": 3.994196220640214e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.245782271027565, + "step": 565, + "valid_targets_mean": 6413.9, + "valid_targets_min": 2439 + }, + { + "epoch": 0.8623298033282905, + "grad_norm": 0.5036552343967776, + "learning_rate": 3.993607679336197e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24518650770187378, + "step": 570, + "valid_targets_mean": 4881.8, + "valid_targets_min": 80 + }, + { + "epoch": 0.869894099848714, + "grad_norm": 0.4729695754905281, + "learning_rate": 3.992990768152412e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2571276128292084, + "step": 575, + "valid_targets_mean": 5548.9, + "valid_targets_min": 899 + }, + { + "epoch": 0.8774583963691377, + "grad_norm": 0.4880547361382435, + "learning_rate": 3.9923454958677676e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2370157688856125, + "step": 580, + "valid_targets_mean": 5140.8, + "valid_targets_min": 920 + }, + { + "epoch": 0.8850226928895613, + "grad_norm": 0.46748870246205226, + "learning_rate": 3.991671871664759e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22495149075984955, + "step": 585, + "valid_targets_mean": 4875.2, + "valid_targets_min": 710 + }, + { + "epoch": 0.8925869894099848, + "grad_norm": 0.499170334869546, + "learning_rate": 3.9909699051293455e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2324574738740921, + "step": 590, + "valid_targets_mean": 4126.8, + "valid_targets_min": 672 + }, + { + "epoch": 0.9001512859304085, + "grad_norm": 0.5367539699554082, + "learning_rate": 3.990239606250805e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22277456521987915, + "step": 595, + "valid_targets_mean": 4875.0, + "valid_targets_min": 723 + }, + { + "epoch": 0.9077155824508321, + "grad_norm": 0.46291726470280686, + "learning_rate": 3.989480985421602e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21236249804496765, + "step": 600, + "valid_targets_mean": 5088.1, + "valid_targets_min": 1250 + }, + { + "epoch": 0.9152798789712556, + "grad_norm": 0.4944239868363532, + "learning_rate": 3.988694053437229e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2805103063583374, + "step": 605, + "valid_targets_mean": 5184.2, + "valid_targets_min": 686 + }, + { + "epoch": 0.9228441754916793, + "grad_norm": 0.9200503353718761, + "learning_rate": 3.987878821496062e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23494309186935425, + "step": 610, + "valid_targets_mean": 4795.9, + "valid_targets_min": 512 + }, + { + "epoch": 0.9304084720121029, + "grad_norm": 0.45541430053417153, + "learning_rate": 3.9870353011991955e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.258321613073349, + "step": 615, + "valid_targets_mean": 5087.6, + "valid_targets_min": 708 + }, + { + "epoch": 0.9379727685325264, + "grad_norm": 0.8707406129846743, + "learning_rate": 3.986163504550281e-05, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2552441656589508, + "step": 620, + "valid_targets_mean": 3817.4, + "valid_targets_min": 710 + }, + { + "epoch": 0.9455370650529501, + "grad_norm": 0.453725523450877, + "learning_rate": 3.985263443955351e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1943071186542511, + "step": 625, + "valid_targets_mean": 4514.4, + "valid_targets_min": 797 + }, + { + "epoch": 0.9531013615733737, + "grad_norm": 0.4650173033154279, + "learning_rate": 3.9843351322226496e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25859200954437256, + "step": 630, + "valid_targets_mean": 6504.8, + "valid_targets_min": 776 + }, + { + "epoch": 0.9606656580937972, + "grad_norm": 0.5183996451091001, + "learning_rate": 3.983378582562446e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2556428909301758, + "step": 635, + "valid_targets_mean": 4473.8, + "valid_targets_min": 517 + }, + { + "epoch": 0.9682299546142209, + "grad_norm": 0.45985350131749686, + "learning_rate": 3.982393808586843e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21027632057666779, + "step": 640, + "valid_targets_mean": 5296.2, + "valid_targets_min": 1148 + }, + { + "epoch": 0.9757942511346445, + "grad_norm": 0.552817017780005, + "learning_rate": 3.981380824309594e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.233289435505867, + "step": 645, + "valid_targets_mean": 3743.4, + "valid_targets_min": 474 + }, + { + "epoch": 0.983358547655068, + "grad_norm": 0.43042887165024196, + "learning_rate": 3.9803396441458917e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21018920838832855, + "step": 650, + "valid_targets_mean": 5031.2, + "valid_targets_min": 1803 + }, + { + "epoch": 0.9909228441754917, + "grad_norm": 0.48810810379501984, + "learning_rate": 3.979270282912169e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22822780907154083, + "step": 655, + "valid_targets_mean": 3996.4, + "valid_targets_min": 711 + }, + { + "epoch": 0.9984871406959153, + "grad_norm": 0.4961721738119889, + "learning_rate": 3.9781727558258896e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22998680174350739, + "step": 660, + "valid_targets_mean": 4072.9, + "valid_targets_min": 606 + }, + { + "epoch": 1.006051437216339, + "grad_norm": 0.42967061225570735, + "learning_rate": 3.977047078505327e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19665825366973877, + "step": 665, + "valid_targets_mean": 4729.6, + "valid_targets_min": 873 + }, + { + "epoch": 1.0136157337367624, + "grad_norm": 0.4669397363717281, + "learning_rate": 3.975893266969346e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.242275670170784, + "step": 670, + "valid_targets_mean": 4723.4, + "valid_targets_min": 351 + }, + { + "epoch": 1.021180030257186, + "grad_norm": 0.5341853659851667, + "learning_rate": 3.9747113376371704e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24473215639591217, + "step": 675, + "valid_targets_mean": 4814.8, + "valid_targets_min": 628 + }, + { + "epoch": 1.0287443267776097, + "grad_norm": 0.44043846408793436, + "learning_rate": 3.9735013073281564e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24138009548187256, + "step": 680, + "valid_targets_mean": 6298.2, + "valid_targets_min": 603 + }, + { + "epoch": 1.0363086232980332, + "grad_norm": 0.5003817149788394, + "learning_rate": 3.972263193261545e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.201238214969635, + "step": 685, + "valid_targets_mean": 4762.5, + "valid_targets_min": 1345 + }, + { + "epoch": 1.0438729198184569, + "grad_norm": 0.4523758197633264, + "learning_rate": 3.970997013056224e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24418559670448303, + "step": 690, + "valid_targets_mean": 6598.0, + "valid_targets_min": 643 + }, + { + "epoch": 1.0514372163388805, + "grad_norm": 0.4562342522704356, + "learning_rate": 3.969702784730471e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20137807726860046, + "step": 695, + "valid_targets_mean": 5147.5, + "valid_targets_min": 754 + }, + { + "epoch": 1.059001512859304, + "grad_norm": 0.5011541733249977, + "learning_rate": 3.9683805267017035e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20954594016075134, + "step": 700, + "valid_targets_mean": 4591.2, + "valid_targets_min": 632 + }, + { + "epoch": 1.0665658093797277, + "grad_norm": 0.5176851021286845, + "learning_rate": 3.9670302577862124e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2231058031320572, + "step": 705, + "valid_targets_mean": 4647.5, + "valid_targets_min": 680 + }, + { + "epoch": 1.0741301059001513, + "grad_norm": 0.48011232672163284, + "learning_rate": 3.965651997198893e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22521618008613586, + "step": 710, + "valid_targets_mean": 5078.5, + "valid_targets_min": 641 + }, + { + "epoch": 1.0816944024205748, + "grad_norm": 0.4767421483930465, + "learning_rate": 3.964245764552978e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19617989659309387, + "step": 715, + "valid_targets_mean": 4258.9, + "valid_targets_min": 617 + }, + { + "epoch": 1.0892586989409985, + "grad_norm": 0.4104761710291779, + "learning_rate": 3.9628115798597505e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21169023215770721, + "step": 720, + "valid_targets_mean": 6263.8, + "valid_targets_min": 1551 + }, + { + "epoch": 1.0968229954614221, + "grad_norm": 0.578932648769492, + "learning_rate": 3.961349463528266e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22551500797271729, + "step": 725, + "valid_targets_mean": 5194.9, + "valid_targets_min": 662 + }, + { + "epoch": 1.1043872919818456, + "grad_norm": 0.4523935856836432, + "learning_rate": 3.959859436365057e-05, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21178296208381653, + "step": 730, + "valid_targets_mean": 5042.5, + "valid_targets_min": 574 + }, + { + "epoch": 1.1119515885022693, + "grad_norm": 0.549130917647338, + "learning_rate": 3.95834151957384e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2177426964044571, + "step": 735, + "valid_targets_mean": 4108.4, + "valid_targets_min": 791 + }, + { + "epoch": 1.119515885022693, + "grad_norm": 0.480466921821431, + "learning_rate": 3.956795734755213e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25843045115470886, + "step": 740, + "valid_targets_mean": 5000.5, + "valid_targets_min": 1302 + }, + { + "epoch": 1.1270801815431164, + "grad_norm": 0.4572027005951852, + "learning_rate": 3.955222103906346e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19742241501808167, + "step": 745, + "valid_targets_mean": 4523.0, + "valid_targets_min": 538 + }, + { + "epoch": 1.13464447806354, + "grad_norm": 0.5232066650642148, + "learning_rate": 3.953620649420672e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2422420084476471, + "step": 750, + "valid_targets_mean": 5202.1, + "valid_targets_min": 1226 + }, + { + "epoch": 1.1422087745839637, + "grad_norm": 0.8229962292212416, + "learning_rate": 3.951991394087565e-05, + "loss": 0.2279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25070637464523315, + "step": 755, + "valid_targets_mean": 4194.7, + "valid_targets_min": 660 + }, + { + "epoch": 1.1497730711043872, + "grad_norm": 0.4391216524045533, + "learning_rate": 3.950334361092016e-05, + "loss": 0.2112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19606730341911316, + "step": 760, + "valid_targets_mean": 4927.8, + "valid_targets_min": 813 + }, + { + "epoch": 1.1573373676248109, + "grad_norm": 0.4725978644426478, + "learning_rate": 3.948649574014306e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2184830904006958, + "step": 765, + "valid_targets_mean": 4678.1, + "valid_targets_min": 727 + }, + { + "epoch": 1.1649016641452345, + "grad_norm": 0.47754919190950834, + "learning_rate": 3.946937056829666e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21192710101604462, + "step": 770, + "valid_targets_mean": 4708.8, + "valid_targets_min": 746 + }, + { + "epoch": 1.172465960665658, + "grad_norm": 0.38443453641184966, + "learning_rate": 3.9451968339079405e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19585570693016052, + "step": 775, + "valid_targets_mean": 6528.1, + "valid_targets_min": 587 + }, + { + "epoch": 1.1800302571860817, + "grad_norm": 0.8213353673339491, + "learning_rate": 3.9434289300132355e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24312201142311096, + "step": 780, + "valid_targets_mean": 4561.7, + "valid_targets_min": 578 + }, + { + "epoch": 1.1875945537065054, + "grad_norm": 0.5298968932603556, + "learning_rate": 3.941633370303572e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21988344192504883, + "step": 785, + "valid_targets_mean": 4631.4, + "valid_targets_min": 943 + }, + { + "epoch": 1.1951588502269288, + "grad_norm": 0.5091383820044767, + "learning_rate": 3.939810180330523e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20156526565551758, + "step": 790, + "valid_targets_mean": 4222.6, + "valid_targets_min": 653 + }, + { + "epoch": 1.2027231467473525, + "grad_norm": 0.4445758252408178, + "learning_rate": 3.9379593860388515e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23469144105911255, + "step": 795, + "valid_targets_mean": 5451.9, + "valid_targets_min": 2175 + }, + { + "epoch": 1.2102874432677762, + "grad_norm": 0.463702459884302, + "learning_rate": 3.936081013766143e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20840583741664886, + "step": 800, + "valid_targets_mean": 4844.2, + "valid_targets_min": 914 + }, + { + "epoch": 1.2178517397881996, + "grad_norm": 0.4949026215273616, + "learning_rate": 3.9341750902424294e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22504504024982452, + "step": 805, + "valid_targets_mean": 4049.9, + "valid_targets_min": 729 + }, + { + "epoch": 1.2254160363086233, + "grad_norm": 0.45408544850056803, + "learning_rate": 3.932241642589807e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18734216690063477, + "step": 810, + "valid_targets_mean": 5329.8, + "valid_targets_min": 674 + }, + { + "epoch": 1.232980332829047, + "grad_norm": 0.5377546774189591, + "learning_rate": 3.930280698322053e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2399473488330841, + "step": 815, + "valid_targets_mean": 4738.1, + "valid_targets_min": 699 + }, + { + "epoch": 1.2405446293494704, + "grad_norm": 0.5558393542261922, + "learning_rate": 3.928292285344234e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24337920546531677, + "step": 820, + "valid_targets_mean": 4106.7, + "valid_targets_min": 482 + }, + { + "epoch": 1.248108925869894, + "grad_norm": 0.4955456861000246, + "learning_rate": 3.926276431952306e-05, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22561465203762054, + "step": 825, + "valid_targets_mean": 5400.7, + "valid_targets_min": 1936 + }, + { + "epoch": 1.2556732223903178, + "grad_norm": 0.5418907911165872, + "learning_rate": 3.924233166832714e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22385621070861816, + "step": 830, + "valid_targets_mean": 4220.5, + "valid_targets_min": 695 + }, + { + "epoch": 1.2632375189107412, + "grad_norm": 0.8088318043473532, + "learning_rate": 3.922162519061986e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1984093189239502, + "step": 835, + "valid_targets_mean": 6244.4, + "valid_targets_min": 2041 + }, + { + "epoch": 1.2708018154311649, + "grad_norm": 0.4634253765330138, + "learning_rate": 3.920064518106313e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20426660776138306, + "step": 840, + "valid_targets_mean": 4512.1, + "valid_targets_min": 515 + }, + { + "epoch": 1.2783661119515886, + "grad_norm": 0.5244110379217045, + "learning_rate": 3.917939193821136e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22082944214344025, + "step": 845, + "valid_targets_mean": 4429.2, + "valid_targets_min": 717 + }, + { + "epoch": 1.2859304084720122, + "grad_norm": 0.43483407663968293, + "learning_rate": 3.915786576450719e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18737539649009705, + "step": 850, + "valid_targets_mean": 5059.6, + "valid_targets_min": 2458 + }, + { + "epoch": 1.2934947049924357, + "grad_norm": 0.4500103073766876, + "learning_rate": 3.913606696627715e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1978236436843872, + "step": 855, + "valid_targets_mean": 4524.5, + "valid_targets_min": 823 + }, + { + "epoch": 1.3010590015128594, + "grad_norm": 0.43028683215121355, + "learning_rate": 3.911399585372735e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20554819703102112, + "step": 860, + "valid_targets_mean": 5189.1, + "valid_targets_min": 610 + }, + { + "epoch": 1.3086232980332828, + "grad_norm": 0.47627559033509903, + "learning_rate": 3.909165274093906e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21493449807167053, + "step": 865, + "valid_targets_mean": 4325.8, + "valid_targets_min": 781 + }, + { + "epoch": 1.3161875945537065, + "grad_norm": 0.44911088564844376, + "learning_rate": 3.906903794586422e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21005885303020477, + "step": 870, + "valid_targets_mean": 6040.7, + "valid_targets_min": 981 + }, + { + "epoch": 1.3237518910741302, + "grad_norm": 0.47753214474506056, + "learning_rate": 3.9046151790320905e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.218666210770607, + "step": 875, + "valid_targets_mean": 4738.8, + "valid_targets_min": 725 + }, + { + "epoch": 1.3313161875945538, + "grad_norm": 0.43454590648723285, + "learning_rate": 3.902299459998879e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18541982769966125, + "step": 880, + "valid_targets_mean": 5201.4, + "valid_targets_min": 2448 + }, + { + "epoch": 1.3388804841149773, + "grad_norm": 0.5156990006182292, + "learning_rate": 3.8999566704404476e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2260858565568924, + "step": 885, + "valid_targets_mean": 4169.7, + "valid_targets_min": 662 + }, + { + "epoch": 1.346444780635401, + "grad_norm": 0.4731314327931749, + "learning_rate": 3.8975868436956826e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21620869636535645, + "step": 890, + "valid_targets_mean": 4723.3, + "valid_targets_min": 696 + }, + { + "epoch": 1.3540090771558244, + "grad_norm": 0.5539722000570312, + "learning_rate": 3.895190013488219e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23198354244232178, + "step": 895, + "valid_targets_mean": 4731.4, + "valid_targets_min": 819 + }, + { + "epoch": 1.361573373676248, + "grad_norm": 0.5363884689933821, + "learning_rate": 3.892766213925965e-05, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21815639734268188, + "step": 900, + "valid_targets_mean": 3884.6, + "valid_targets_min": 512 + }, + { + "epoch": 1.3691376701966718, + "grad_norm": 0.46987778063844016, + "learning_rate": 3.890315479500611e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2229079008102417, + "step": 905, + "valid_targets_mean": 5161.0, + "valid_targets_min": 888 + }, + { + "epoch": 1.3767019667170954, + "grad_norm": 0.44993849954248105, + "learning_rate": 3.887837845087144e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28664612770080566, + "step": 910, + "valid_targets_mean": 6543.5, + "valid_targets_min": 714 + }, + { + "epoch": 1.384266263237519, + "grad_norm": 0.43051713745825515, + "learning_rate": 3.885333345943349e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19189190864562988, + "step": 915, + "valid_targets_mean": 4741.1, + "valid_targets_min": 594 + }, + { + "epoch": 1.3918305597579426, + "grad_norm": 0.4805693823287516, + "learning_rate": 3.882802017709307e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2096354365348816, + "step": 920, + "valid_targets_mean": 5091.5, + "valid_targets_min": 639 + }, + { + "epoch": 1.399394856278366, + "grad_norm": 0.44999557297889736, + "learning_rate": 3.880243896406889e-05, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22826901078224182, + "step": 925, + "valid_targets_mean": 6125.1, + "valid_targets_min": 1021 + }, + { + "epoch": 1.4069591527987897, + "grad_norm": 0.6717221286999777, + "learning_rate": 3.877659018439242e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20602205395698547, + "step": 930, + "valid_targets_mean": 4478.8, + "valid_targets_min": 873 + }, + { + "epoch": 1.4145234493192134, + "grad_norm": 0.5169880935134787, + "learning_rate": 3.8750474205902715e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23473864793777466, + "step": 935, + "valid_targets_mean": 4604.6, + "valid_targets_min": 584 + }, + { + "epoch": 1.422087745839637, + "grad_norm": 0.5143284382260722, + "learning_rate": 3.872409140024119e-05, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2324678599834442, + "step": 940, + "valid_targets_mean": 4128.6, + "valid_targets_min": 553 + }, + { + "epoch": 1.4296520423600605, + "grad_norm": 0.4971476793353403, + "learning_rate": 3.8697442142846314e-05, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23143915832042694, + "step": 945, + "valid_targets_mean": 4315.7, + "valid_targets_min": 576 + }, + { + "epoch": 1.4372163388804842, + "grad_norm": 0.4666851291643823, + "learning_rate": 3.867052681294828e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21455539762973785, + "step": 950, + "valid_targets_mean": 4984.6, + "valid_targets_min": 1899 + }, + { + "epoch": 1.4447806354009076, + "grad_norm": 0.46105558167163246, + "learning_rate": 3.8643345793563606e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19814419746398926, + "step": 955, + "valid_targets_mean": 5288.9, + "valid_targets_min": 474 + }, + { + "epoch": 1.4523449319213313, + "grad_norm": 0.48073898579476537, + "learning_rate": 3.86158994714897e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22171683609485626, + "step": 960, + "valid_targets_mean": 4704.9, + "valid_targets_min": 692 + }, + { + "epoch": 1.459909228441755, + "grad_norm": 0.6948950979765578, + "learning_rate": 3.858818823729931e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23459002375602722, + "step": 965, + "valid_targets_mean": 4975.9, + "valid_targets_min": 644 + }, + { + "epoch": 1.4674735249621786, + "grad_norm": 0.46636381876975636, + "learning_rate": 3.856021248533501e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2228977084159851, + "step": 970, + "valid_targets_mean": 5289.2, + "valid_targets_min": 761 + }, + { + "epoch": 1.475037821482602, + "grad_norm": 0.4797474742441079, + "learning_rate": 3.853197261370357e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23371240496635437, + "step": 975, + "valid_targets_mean": 4752.7, + "valid_targets_min": 602 + }, + { + "epoch": 1.4826021180030258, + "grad_norm": 0.4546404058434165, + "learning_rate": 3.850346902427031e-05, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20179343223571777, + "step": 980, + "valid_targets_mean": 4409.6, + "valid_targets_min": 1001 + }, + { + "epoch": 1.4901664145234492, + "grad_norm": 0.4436841609266024, + "learning_rate": 3.847470212265334e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16983595490455627, + "step": 985, + "valid_targets_mean": 4392.0, + "valid_targets_min": 478 + }, + { + "epoch": 1.497730711043873, + "grad_norm": 0.5042058235443482, + "learning_rate": 3.844567231821784e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23303237557411194, + "step": 990, + "valid_targets_mean": 4484.5, + "valid_targets_min": 461 + }, + { + "epoch": 1.5052950075642966, + "grad_norm": 0.5004283160780041, + "learning_rate": 3.8416380024070175e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2369617521762848, + "step": 995, + "valid_targets_mean": 5191.1, + "valid_targets_min": 565 + }, + { + "epoch": 1.5128593040847202, + "grad_norm": 0.4692148393457612, + "learning_rate": 3.838682565705209e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20685143768787384, + "step": 1000, + "valid_targets_mean": 4244.8, + "valid_targets_min": 1197 + }, + { + "epoch": 1.5204236006051437, + "grad_norm": 0.47144086137537927, + "learning_rate": 3.83570096377347e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22664126753807068, + "step": 1005, + "valid_targets_mean": 4241.9, + "valid_targets_min": 534 + }, + { + "epoch": 1.5279878971255674, + "grad_norm": 0.47989912387325484, + "learning_rate": 3.8326932390412584e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2084951400756836, + "step": 1010, + "valid_targets_mean": 5305.8, + "valid_targets_min": 1825 + }, + { + "epoch": 1.5355521936459908, + "grad_norm": 0.43046270561239386, + "learning_rate": 3.829659434309765e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23686754703521729, + "step": 1015, + "valid_targets_mean": 5570.4, + "valid_targets_min": 1879 + }, + { + "epoch": 1.5431164901664145, + "grad_norm": 0.44060406935360946, + "learning_rate": 3.8265995927513155e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.209512859582901, + "step": 1020, + "valid_targets_mean": 4430.0, + "valid_targets_min": 797 + }, + { + "epoch": 1.5506807866868382, + "grad_norm": 0.4466657762243492, + "learning_rate": 3.823513757908748e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21596594154834747, + "step": 1025, + "valid_targets_mean": 5183.5, + "valid_targets_min": 2442 + }, + { + "epoch": 1.5582450832072618, + "grad_norm": 0.4485453853157434, + "learning_rate": 3.820401973694796e-05, + "loss": 0.2169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19774681329727173, + "step": 1030, + "valid_targets_mean": 4629.1, + "valid_targets_min": 1966 + }, + { + "epoch": 1.5658093797276853, + "grad_norm": 0.4594903967129758, + "learning_rate": 3.817264284391464e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2078075408935547, + "step": 1035, + "valid_targets_mean": 4402.6, + "valid_targets_min": 702 + }, + { + "epoch": 1.573373676248109, + "grad_norm": 0.474824847270428, + "learning_rate": 3.8141007346493964e-05, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20694419741630554, + "step": 1040, + "valid_targets_mean": 3961.7, + "valid_targets_min": 615 + }, + { + "epoch": 1.5809379727685324, + "grad_norm": 0.49033992895453127, + "learning_rate": 3.8109113694872436e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20570620894432068, + "step": 1045, + "valid_targets_mean": 5230.1, + "valid_targets_min": 917 + }, + { + "epoch": 1.588502269288956, + "grad_norm": 0.4305573822598727, + "learning_rate": 3.80769623429102e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1743043065071106, + "step": 1050, + "valid_targets_mean": 4900.6, + "valid_targets_min": 2108 + }, + { + "epoch": 1.5960665658093798, + "grad_norm": 0.4002556203162745, + "learning_rate": 3.804455374813456e-05, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20624500513076782, + "step": 1055, + "valid_targets_mean": 5544.1, + "valid_targets_min": 831 + }, + { + "epoch": 1.6036308623298035, + "grad_norm": 0.46098253527478694, + "learning_rate": 3.8011888371733536e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21792328357696533, + "step": 1060, + "valid_targets_mean": 4735.6, + "valid_targets_min": 709 + }, + { + "epoch": 1.611195158850227, + "grad_norm": 0.4323494824069755, + "learning_rate": 3.797896667854924e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21409177780151367, + "step": 1065, + "valid_targets_mean": 5365.8, + "valid_targets_min": 1040 + }, + { + "epoch": 1.6187594553706506, + "grad_norm": 0.48091408016876297, + "learning_rate": 3.7945789137071264e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22255632281303406, + "step": 1070, + "valid_targets_mean": 5012.9, + "valid_targets_min": 167 + }, + { + "epoch": 1.626323751891074, + "grad_norm": 0.5061657168291416, + "learning_rate": 3.791235621943005e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23962201178073883, + "step": 1075, + "valid_targets_mean": 4036.9, + "valid_targets_min": 541 + }, + { + "epoch": 1.6338880484114977, + "grad_norm": 0.5930611198562358, + "learning_rate": 3.7878668401390157e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27482351660728455, + "step": 1080, + "valid_targets_mean": 2996.6, + "valid_targets_min": 535 + }, + { + "epoch": 1.6414523449319214, + "grad_norm": 0.49753058500461794, + "learning_rate": 3.784472616234345e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21738803386688232, + "step": 1085, + "valid_targets_mean": 3911.5, + "valid_targets_min": 492 + }, + { + "epoch": 1.649016641452345, + "grad_norm": 0.40647299631956285, + "learning_rate": 3.7810529985302354e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23166729509830475, + "step": 1090, + "valid_targets_mean": 5627.1, + "valid_targets_min": 701 + }, + { + "epoch": 1.6565809379727685, + "grad_norm": 0.4505033940645788, + "learning_rate": 3.77760803568929e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.185359388589859, + "step": 1095, + "valid_targets_mean": 4292.2, + "valid_targets_min": 631 + }, + { + "epoch": 1.6641452344931922, + "grad_norm": 0.5731571126335391, + "learning_rate": 3.774137776734788e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23054252564907074, + "step": 1100, + "valid_targets_mean": 4484.4, + "valid_targets_min": 821 + }, + { + "epoch": 1.6717095310136156, + "grad_norm": 0.4784802801846398, + "learning_rate": 3.770642271049979e-05, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2124379724264145, + "step": 1105, + "valid_targets_mean": 4393.8, + "valid_targets_min": 775 + }, + { + "epoch": 1.6792738275340393, + "grad_norm": 0.5134935440685385, + "learning_rate": 3.767121568377387e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21781983971595764, + "step": 1110, + "valid_targets_mean": 4165.0, + "valid_targets_min": 432 + }, + { + "epoch": 1.686838124054463, + "grad_norm": 0.47819089354766536, + "learning_rate": 3.763575718818099e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2285154014825821, + "step": 1115, + "valid_targets_mean": 4118.8, + "valid_targets_min": 841 + }, + { + "epoch": 1.6944024205748867, + "grad_norm": 0.4468206203889348, + "learning_rate": 3.760004772831052e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25908660888671875, + "step": 1120, + "valid_targets_mean": 4985.1, + "valid_targets_min": 627 + }, + { + "epoch": 1.70196671709531, + "grad_norm": 0.5621098040132918, + "learning_rate": 3.7564087812323176e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21479949355125427, + "step": 1125, + "valid_targets_mean": 3941.1, + "valid_targets_min": 640 + }, + { + "epoch": 1.7095310136157338, + "grad_norm": 0.47379514306217524, + "learning_rate": 3.7527877951943745e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2008206844329834, + "step": 1130, + "valid_targets_mean": 4877.2, + "valid_targets_min": 635 + }, + { + "epoch": 1.7170953101361572, + "grad_norm": 0.4138650914953261, + "learning_rate": 3.749141866245385e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23996658623218536, + "step": 1135, + "valid_targets_mean": 5840.4, + "valid_targets_min": 3031 + }, + { + "epoch": 1.724659606656581, + "grad_norm": 0.45004503636425675, + "learning_rate": 3.745471046268459e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2210957556962967, + "step": 1140, + "valid_targets_mean": 5032.8, + "valid_targets_min": 778 + }, + { + "epoch": 1.7322239031770046, + "grad_norm": 0.45471390290097, + "learning_rate": 3.7417753875009156e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19280803203582764, + "step": 1145, + "valid_targets_mean": 4949.8, + "valid_targets_min": 665 + }, + { + "epoch": 1.7397881996974283, + "grad_norm": 0.419924001810217, + "learning_rate": 3.738054942533541e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2240639328956604, + "step": 1150, + "valid_targets_mean": 6157.3, + "valid_targets_min": 1996 + }, + { + "epoch": 1.7473524962178517, + "grad_norm": 0.4040114013155955, + "learning_rate": 3.734309764309839e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19116798043251038, + "step": 1155, + "valid_targets_mean": 5477.1, + "valid_targets_min": 533 + }, + { + "epoch": 1.7549167927382754, + "grad_norm": 0.44527208272245994, + "learning_rate": 3.7305399061252795e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21052294969558716, + "step": 1160, + "valid_targets_mean": 5120.7, + "valid_targets_min": 2604 + }, + { + "epoch": 1.7624810892586988, + "grad_norm": 0.47454028755125877, + "learning_rate": 3.726745421626537e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18771414458751678, + "step": 1165, + "valid_targets_mean": 3559.8, + "valid_targets_min": 745 + }, + { + "epoch": 1.7700453857791225, + "grad_norm": 0.43637966175679893, + "learning_rate": 3.7229263648107285e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21924227476119995, + "step": 1170, + "valid_targets_mean": 5584.8, + "valid_targets_min": 730 + }, + { + "epoch": 1.7776096822995462, + "grad_norm": 0.48512408703189136, + "learning_rate": 3.7190827900246474e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2824190557003021, + "step": 1175, + "valid_targets_mean": 4822.8, + "valid_targets_min": 324 + }, + { + "epoch": 1.7851739788199699, + "grad_norm": 0.4085379036791164, + "learning_rate": 3.715214751963987e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20510166883468628, + "step": 1180, + "valid_targets_mean": 5139.8, + "valid_targets_min": 654 + }, + { + "epoch": 1.7927382753403933, + "grad_norm": 0.4729236016827413, + "learning_rate": 3.711322305672563e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20955510437488556, + "step": 1185, + "valid_targets_mean": 5596.6, + "valid_targets_min": 518 + }, + { + "epoch": 1.800302571860817, + "grad_norm": 0.48668657427380835, + "learning_rate": 3.707405506541532e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.318617582321167, + "step": 1190, + "valid_targets_mean": 6068.5, + "valid_targets_min": 1636 + }, + { + "epoch": 1.8078668683812404, + "grad_norm": 0.4638826690530358, + "learning_rate": 3.703464410308601e-05, + "loss": 0.1973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20980286598205566, + "step": 1195, + "valid_targets_mean": 5101.1, + "valid_targets_min": 582 + }, + { + "epoch": 1.8154311649016641, + "grad_norm": 0.45348458053897023, + "learning_rate": 3.699499073057234e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22972343862056732, + "step": 1200, + "valid_targets_mean": 5133.1, + "valid_targets_min": 580 + }, + { + "epoch": 1.8229954614220878, + "grad_norm": 0.46690448724128925, + "learning_rate": 3.6955095512158554e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20475982129573822, + "step": 1205, + "valid_targets_mean": 4050.1, + "valid_targets_min": 263 + }, + { + "epoch": 1.8305597579425115, + "grad_norm": 0.38598946854284577, + "learning_rate": 3.691495901557048e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.191079780459404, + "step": 1210, + "valid_targets_mean": 5022.6, + "valid_targets_min": 831 + }, + { + "epoch": 1.838124054462935, + "grad_norm": 0.4758287501019245, + "learning_rate": 3.6874581811967425e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2127455770969391, + "step": 1215, + "valid_targets_mean": 4499.1, + "valid_targets_min": 1004 + }, + { + "epoch": 1.8456883509833586, + "grad_norm": 0.5274867081099741, + "learning_rate": 3.683396447593406e-05, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23229683935642242, + "step": 1220, + "valid_targets_mean": 3733.7, + "valid_targets_min": 620 + }, + { + "epoch": 1.853252647503782, + "grad_norm": 0.5325193798816835, + "learning_rate": 3.6793107585472234e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18608064949512482, + "step": 1225, + "valid_targets_mean": 4011.7, + "valid_targets_min": 651 + }, + { + "epoch": 1.8608169440242057, + "grad_norm": 0.4796697965319729, + "learning_rate": 3.675201172199277e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21180656552314758, + "step": 1230, + "valid_targets_mean": 4636.1, + "valid_targets_min": 664 + }, + { + "epoch": 1.8683812405446294, + "grad_norm": 0.46507622330055887, + "learning_rate": 3.6710677470307174e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19449734687805176, + "step": 1235, + "valid_targets_mean": 5142.5, + "valid_targets_min": 684 + }, + { + "epoch": 1.875945537065053, + "grad_norm": 0.4498886408384691, + "learning_rate": 3.6669105418619307e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24638114869594574, + "step": 1240, + "valid_targets_mean": 4571.3, + "valid_targets_min": 947 + }, + { + "epoch": 1.8835098335854765, + "grad_norm": 0.38613380520350143, + "learning_rate": 3.6627296158517035e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20049728453159332, + "step": 1245, + "valid_targets_mean": 5270.2, + "valid_targets_min": 506 + }, + { + "epoch": 1.8910741301059002, + "grad_norm": 0.42668472851640654, + "learning_rate": 3.658525028496382e-05, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20964312553405762, + "step": 1250, + "valid_targets_mean": 5350.7, + "valid_targets_min": 939 + }, + { + "epoch": 1.8986384266263236, + "grad_norm": 0.45077998259356017, + "learning_rate": 3.654296839629017e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21868251264095306, + "step": 1255, + "valid_targets_mean": 4306.2, + "valid_targets_min": 648 + }, + { + "epoch": 1.9062027231467473, + "grad_norm": 0.4172755513143054, + "learning_rate": 3.650045109418526e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20290303230285645, + "step": 1260, + "valid_targets_mean": 4538.8, + "valid_targets_min": 729 + }, + { + "epoch": 1.913767019667171, + "grad_norm": 0.39469743941144136, + "learning_rate": 3.645769898368826e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2144225537776947, + "step": 1265, + "valid_targets_mean": 5632.7, + "valid_targets_min": 1531 + }, + { + "epoch": 1.9213313161875947, + "grad_norm": 0.43115820343190236, + "learning_rate": 3.641471267317976e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1905321180820465, + "step": 1270, + "valid_targets_mean": 5414.7, + "valid_targets_min": 3137 + }, + { + "epoch": 1.9288956127080181, + "grad_norm": 0.43617197993062395, + "learning_rate": 3.637149277437313e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18612077832221985, + "step": 1275, + "valid_targets_mean": 4280.9, + "valid_targets_min": 913 + }, + { + "epoch": 1.9364599092284418, + "grad_norm": 0.4134925897382423, + "learning_rate": 3.6328039902305806e-05, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2115355283021927, + "step": 1280, + "valid_targets_mean": 5742.0, + "valid_targets_min": 2905 + }, + { + "epoch": 1.9440242057488653, + "grad_norm": 0.48642626046401954, + "learning_rate": 3.628435467533051e-05, + "loss": 0.1973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20169591903686523, + "step": 1285, + "valid_targets_mean": 4847.1, + "valid_targets_min": 697 + }, + { + "epoch": 1.951588502269289, + "grad_norm": 0.45981955132140206, + "learning_rate": 3.624043771510647e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2202417254447937, + "step": 1290, + "valid_targets_mean": 4461.9, + "valid_targets_min": 768 + }, + { + "epoch": 1.9591527987897126, + "grad_norm": 0.5548992370748099, + "learning_rate": 3.619628964659061e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20979902148246765, + "step": 1295, + "valid_targets_mean": 4232.5, + "valid_targets_min": 605 + }, + { + "epoch": 1.9667170953101363, + "grad_norm": 0.45509897761384305, + "learning_rate": 3.61519110980286e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23807252943515778, + "step": 1300, + "valid_targets_mean": 5210.3, + "valid_targets_min": 653 + }, + { + "epoch": 1.9742813918305597, + "grad_norm": 0.4954616432107395, + "learning_rate": 3.6107302700945925e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19750761985778809, + "step": 1305, + "valid_targets_mean": 4131.4, + "valid_targets_min": 1256 + }, + { + "epoch": 1.9818456883509834, + "grad_norm": 0.4950490512679412, + "learning_rate": 3.6062465090138936e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21456892788410187, + "step": 1310, + "valid_targets_mean": 4869.4, + "valid_targets_min": 276 + }, + { + "epoch": 1.9894099848714069, + "grad_norm": 0.4002985029551449, + "learning_rate": 3.6017398903665787e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21733352541923523, + "step": 1315, + "valid_targets_mean": 5251.1, + "valid_targets_min": 509 + }, + { + "epoch": 1.9969742813918305, + "grad_norm": 0.45582333361758404, + "learning_rate": 3.597210478283735e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18914233148097992, + "step": 1320, + "valid_targets_mean": 4262.2, + "valid_targets_min": 627 + }, + { + "epoch": 2.004538577912254, + "grad_norm": 0.40702543737998387, + "learning_rate": 3.5926583372208106e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540646106004715, + "step": 1325, + "valid_targets_mean": 4975.2, + "valid_targets_min": 700 + }, + { + "epoch": 2.012102874432678, + "grad_norm": 0.4496790435497547, + "learning_rate": 3.588083531956698e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2218005359172821, + "step": 1330, + "valid_targets_mean": 5816.9, + "valid_targets_min": 904 + }, + { + "epoch": 2.0196671709531016, + "grad_norm": 0.46863438000971996, + "learning_rate": 3.583486127592807e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18488076329231262, + "step": 1335, + "valid_targets_mean": 5440.6, + "valid_targets_min": 3085 + }, + { + "epoch": 2.027231467473525, + "grad_norm": 0.5151916546138103, + "learning_rate": 3.5788661895521455e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2258104532957077, + "step": 1340, + "valid_targets_mean": 4155.2, + "valid_targets_min": 630 + }, + { + "epoch": 2.0347957639939485, + "grad_norm": 0.49594114456507576, + "learning_rate": 3.574223783578385e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22207194566726685, + "step": 1345, + "valid_targets_mean": 4207.0, + "valid_targets_min": 315 + }, + { + "epoch": 2.042360060514372, + "grad_norm": 0.4943620883953129, + "learning_rate": 3.569558975734923e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2514224350452423, + "step": 1350, + "valid_targets_mean": 4149.8, + "valid_targets_min": 344 + }, + { + "epoch": 2.049924357034796, + "grad_norm": 0.5253116434763031, + "learning_rate": 3.564871832403948e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2232326716184616, + "step": 1355, + "valid_targets_mean": 4119.2, + "valid_targets_min": 276 + }, + { + "epoch": 2.0574886535552195, + "grad_norm": 0.5737491130225292, + "learning_rate": 3.560162420285489e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18539798259735107, + "step": 1360, + "valid_targets_mean": 3559.8, + "valid_targets_min": 636 + }, + { + "epoch": 2.065052950075643, + "grad_norm": 0.4385320487150631, + "learning_rate": 3.555430806396471e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17747828364372253, + "step": 1365, + "valid_targets_mean": 5143.0, + "valid_targets_min": 786 + }, + { + "epoch": 2.0726172465960664, + "grad_norm": 0.4516448141785158, + "learning_rate": 3.55067705806976e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17075631022453308, + "step": 1370, + "valid_targets_mean": 3945.9, + "valid_targets_min": 708 + }, + { + "epoch": 2.08018154311649, + "grad_norm": 0.4568200680232734, + "learning_rate": 3.545901242953203e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752496063709259, + "step": 1375, + "valid_targets_mean": 4810.1, + "valid_targets_min": 589 + }, + { + "epoch": 2.0877458396369137, + "grad_norm": 0.42407318340899935, + "learning_rate": 3.541103429008666e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18580807745456696, + "step": 1380, + "valid_targets_mean": 4368.6, + "valid_targets_min": 1246 + }, + { + "epoch": 2.0953101361573374, + "grad_norm": 0.4653908136883707, + "learning_rate": 3.5362836845110716e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19469863176345825, + "step": 1385, + "valid_targets_mean": 4756.0, + "valid_targets_min": 733 + }, + { + "epoch": 2.102874432677761, + "grad_norm": 0.5635103968025736, + "learning_rate": 3.5314420780474186e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2790437936782837, + "step": 1390, + "valid_targets_mean": 4166.2, + "valid_targets_min": 710 + }, + { + "epoch": 2.1104387291981848, + "grad_norm": 0.43205481737773477, + "learning_rate": 3.5265786785158145e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1872987449169159, + "step": 1395, + "valid_targets_mean": 5274.1, + "valid_targets_min": 864 + }, + { + "epoch": 2.118003025718608, + "grad_norm": 0.4774922261941819, + "learning_rate": 3.5216935551244896e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21239280700683594, + "step": 1400, + "valid_targets_mean": 4354.2, + "valid_targets_min": 728 + }, + { + "epoch": 2.1255673222390317, + "grad_norm": 0.4294646533171388, + "learning_rate": 3.516786777390813e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19841662049293518, + "step": 1405, + "valid_targets_mean": 4863.3, + "valid_targets_min": 831 + }, + { + "epoch": 2.1331316187594553, + "grad_norm": 0.4456489658314288, + "learning_rate": 3.511858415140307e-05, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17594018578529358, + "step": 1410, + "valid_targets_mean": 4792.0, + "valid_targets_min": 313 + }, + { + "epoch": 2.140695915279879, + "grad_norm": 0.44465763211771253, + "learning_rate": 3.506908538505648e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18716633319854736, + "step": 1415, + "valid_targets_mean": 5343.8, + "valid_targets_min": 251 + }, + { + "epoch": 2.1482602118003027, + "grad_norm": 0.39752123335367506, + "learning_rate": 3.501937217925673e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18770697712898254, + "step": 1420, + "valid_targets_mean": 6363.2, + "valid_targets_min": 2667 + }, + { + "epoch": 2.1558245083207264, + "grad_norm": 0.39155216335682075, + "learning_rate": 3.496944524144375e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19712628424167633, + "step": 1425, + "valid_targets_mean": 5730.4, + "valid_targets_min": 1000 + }, + { + "epoch": 2.1633888048411496, + "grad_norm": 0.43195947367825904, + "learning_rate": 3.4919305282098946e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24122843146324158, + "step": 1430, + "valid_targets_mean": 6412.1, + "valid_targets_min": 889 + }, + { + "epoch": 2.1709531013615733, + "grad_norm": 0.4398943111486007, + "learning_rate": 3.486895301473515e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19659355282783508, + "step": 1435, + "valid_targets_mean": 5464.9, + "valid_targets_min": 1276 + }, + { + "epoch": 2.178517397881997, + "grad_norm": 0.48362329298743195, + "learning_rate": 3.4818389155886394e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2112506628036499, + "step": 1440, + "valid_targets_mean": 4218.0, + "valid_targets_min": 1231 + }, + { + "epoch": 2.1860816944024206, + "grad_norm": 0.42445784479643406, + "learning_rate": 3.476761442509776e-05, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20175385475158691, + "step": 1445, + "valid_targets_mean": 5042.2, + "valid_targets_min": 650 + }, + { + "epoch": 2.1936459909228443, + "grad_norm": 0.46863858194637786, + "learning_rate": 3.4716629544915124e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18960747122764587, + "step": 1450, + "valid_targets_mean": 4570.9, + "valid_targets_min": 289 + }, + { + "epoch": 2.201210287443268, + "grad_norm": 0.4081654878828203, + "learning_rate": 3.4665435240874883e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21481245756149292, + "step": 1455, + "valid_targets_mean": 5856.9, + "valid_targets_min": 697 + }, + { + "epoch": 2.208774583963691, + "grad_norm": 0.4111346302978917, + "learning_rate": 3.46140322414936e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19531169533729553, + "step": 1460, + "valid_targets_mean": 5301.0, + "valid_targets_min": 2624 + }, + { + "epoch": 2.216338880484115, + "grad_norm": 0.4377217717133345, + "learning_rate": 3.456242127825769e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20428526401519775, + "step": 1465, + "valid_targets_mean": 5484.6, + "valid_targets_min": 825 + }, + { + "epoch": 2.2239031770045385, + "grad_norm": 0.4506208136550606, + "learning_rate": 3.4510603085612984e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23383532464504242, + "step": 1470, + "valid_targets_mean": 5352.6, + "valid_targets_min": 510 + }, + { + "epoch": 2.231467473524962, + "grad_norm": 0.43942764521770533, + "learning_rate": 3.445857840095425e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22128233313560486, + "step": 1475, + "valid_targets_mean": 5222.1, + "valid_targets_min": 580 + }, + { + "epoch": 2.239031770045386, + "grad_norm": 0.4503478854825486, + "learning_rate": 3.4406347964614725e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21977970004081726, + "step": 1480, + "valid_targets_mean": 4908.6, + "valid_targets_min": 899 + }, + { + "epoch": 2.2465960665658096, + "grad_norm": 0.4413787827481323, + "learning_rate": 3.4353912519855605e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19306686520576477, + "step": 1485, + "valid_targets_mean": 4952.8, + "valid_targets_min": 458 + }, + { + "epoch": 2.254160363086233, + "grad_norm": 0.5587470714781764, + "learning_rate": 3.4301272812855425e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2252560257911682, + "step": 1490, + "valid_targets_mean": 4599.5, + "valid_targets_min": 671 + }, + { + "epoch": 2.2617246596066565, + "grad_norm": 0.4687262608875152, + "learning_rate": 3.4248429592699455e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25899726152420044, + "step": 1495, + "valid_targets_mean": 4973.8, + "valid_targets_min": 1007 + }, + { + "epoch": 2.26928895612708, + "grad_norm": 0.49079428216545606, + "learning_rate": 3.419538361136906e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.182472363114357, + "step": 1500, + "valid_targets_mean": 5454.8, + "valid_targets_min": 308 + }, + { + "epoch": 2.276853252647504, + "grad_norm": 0.49359503726236653, + "learning_rate": 3.4142135623730954e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19357086718082428, + "step": 1505, + "valid_targets_mean": 3701.8, + "valid_targets_min": 606 + }, + { + "epoch": 2.2844175491679275, + "grad_norm": 0.4153158668013505, + "learning_rate": 3.408868638752652e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17256566882133484, + "step": 1510, + "valid_targets_mean": 4801.5, + "valid_targets_min": 547 + }, + { + "epoch": 2.291981845688351, + "grad_norm": 0.4244264255761697, + "learning_rate": 3.4035036663360975e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1777007281780243, + "step": 1515, + "valid_targets_mean": 4332.2, + "valid_targets_min": 633 + }, + { + "epoch": 2.2995461422087744, + "grad_norm": 0.45909023723152254, + "learning_rate": 3.398118721469255e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20535892248153687, + "step": 1520, + "valid_targets_mean": 4871.4, + "valid_targets_min": 718 + }, + { + "epoch": 2.307110438729198, + "grad_norm": 0.4473665556214724, + "learning_rate": 3.392713880782168e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23217563331127167, + "step": 1525, + "valid_targets_mean": 5512.5, + "valid_targets_min": 1040 + }, + { + "epoch": 2.3146747352496218, + "grad_norm": 0.42374613530545113, + "learning_rate": 3.3872892211880024e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817803978919983, + "step": 1530, + "valid_targets_mean": 5129.2, + "valid_targets_min": 518 + }, + { + "epoch": 2.3222390317700454, + "grad_norm": 0.4276274337247246, + "learning_rate": 3.381844819881956e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18477100133895874, + "step": 1535, + "valid_targets_mean": 4986.2, + "valid_targets_min": 3046 + }, + { + "epoch": 2.329803328290469, + "grad_norm": 0.47276929234987713, + "learning_rate": 3.376380754340161e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18416282534599304, + "step": 1540, + "valid_targets_mean": 5228.8, + "valid_targets_min": 768 + }, + { + "epoch": 2.3373676248108928, + "grad_norm": 0.4196095119449812, + "learning_rate": 3.370897102318579e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20898360013961792, + "step": 1545, + "valid_targets_mean": 4759.8, + "valid_targets_min": 492 + }, + { + "epoch": 2.344931921331316, + "grad_norm": 0.4748812000021928, + "learning_rate": 3.365393941851895e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19465558230876923, + "step": 1550, + "valid_targets_mean": 5065.6, + "valid_targets_min": 593 + }, + { + "epoch": 2.3524962178517397, + "grad_norm": 0.45592369911420205, + "learning_rate": 3.3598713512524095e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20027290284633636, + "step": 1555, + "valid_targets_mean": 4513.0, + "valid_targets_min": 813 + }, + { + "epoch": 2.3600605143721634, + "grad_norm": 0.47210399853307966, + "learning_rate": 3.3543294091089196e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1730816662311554, + "step": 1560, + "valid_targets_mean": 4316.6, + "valid_targets_min": 1518 + }, + { + "epoch": 2.367624810892587, + "grad_norm": 0.41410239848604946, + "learning_rate": 3.348768194285604e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18337824940681458, + "step": 1565, + "valid_targets_mean": 5466.7, + "valid_targets_min": 757 + }, + { + "epoch": 2.3751891074130107, + "grad_norm": 0.45973957172447916, + "learning_rate": 3.343187785920899e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1856488138437271, + "step": 1570, + "valid_targets_mean": 4216.2, + "valid_targets_min": 307 + }, + { + "epoch": 2.3827534039334344, + "grad_norm": 0.4378622827189698, + "learning_rate": 3.337588263426376e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19100350141525269, + "step": 1575, + "valid_targets_mean": 5184.4, + "valid_targets_min": 619 + }, + { + "epoch": 2.3903177004538576, + "grad_norm": 0.4386524001539198, + "learning_rate": 3.331969706485604e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17415636777877808, + "step": 1580, + "valid_targets_mean": 4543.0, + "valid_targets_min": 370 + }, + { + "epoch": 2.3978819969742813, + "grad_norm": 0.43023639119991364, + "learning_rate": 3.3263321950530244e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20897901058197021, + "step": 1585, + "valid_targets_mean": 4961.1, + "valid_targets_min": 662 + }, + { + "epoch": 2.405446293494705, + "grad_norm": 0.42713339383685595, + "learning_rate": 3.320675809352807e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19284377992153168, + "step": 1590, + "valid_targets_mean": 5614.0, + "valid_targets_min": 688 + }, + { + "epoch": 2.4130105900151286, + "grad_norm": 0.5523601616423389, + "learning_rate": 3.31500062987771e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19253116846084595, + "step": 1595, + "valid_targets_mean": 4295.9, + "valid_targets_min": 557 + }, + { + "epoch": 2.4205748865355523, + "grad_norm": 0.5607373427621611, + "learning_rate": 3.309306737387936e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1754857301712036, + "step": 1600, + "valid_targets_mean": 4523.5, + "valid_targets_min": 780 + }, + { + "epoch": 2.428139183055976, + "grad_norm": 0.4720039398009578, + "learning_rate": 3.303594212909981e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2118072211742401, + "step": 1605, + "valid_targets_mean": 4806.7, + "valid_targets_min": 1473 + }, + { + "epoch": 2.435703479576399, + "grad_norm": 0.42277502230438585, + "learning_rate": 3.297863137735483e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18252259492874146, + "step": 1610, + "valid_targets_mean": 4898.9, + "valid_targets_min": 1972 + }, + { + "epoch": 2.443267776096823, + "grad_norm": 0.399237439365295, + "learning_rate": 3.292113593420064e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21672359108924866, + "step": 1615, + "valid_targets_mean": 5876.3, + "valid_targets_min": 583 + }, + { + "epoch": 2.4508320726172466, + "grad_norm": 0.4486669185061525, + "learning_rate": 3.2863456617821686e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19526150822639465, + "step": 1620, + "valid_targets_mean": 4571.8, + "valid_targets_min": 563 + }, + { + "epoch": 2.4583963691376702, + "grad_norm": 0.44285518845846517, + "learning_rate": 3.280559424901902e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1940869390964508, + "step": 1625, + "valid_targets_mean": 4653.3, + "valid_targets_min": 932 + }, + { + "epoch": 2.465960665658094, + "grad_norm": 0.43307975933325155, + "learning_rate": 3.274754965119859e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19831156730651855, + "step": 1630, + "valid_targets_mean": 6298.4, + "valid_targets_min": 831 + }, + { + "epoch": 2.4735249621785176, + "grad_norm": 0.44815134988666483, + "learning_rate": 3.268932365035957e-05, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17851749062538147, + "step": 1635, + "valid_targets_mean": 4155.0, + "valid_targets_min": 873 + }, + { + "epoch": 2.481089258698941, + "grad_norm": 0.4758137431057834, + "learning_rate": 3.2630917075082545e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21051645278930664, + "step": 1640, + "valid_targets_mean": 4256.6, + "valid_targets_min": 543 + }, + { + "epoch": 2.4886535552193645, + "grad_norm": 0.4856266870931895, + "learning_rate": 3.257233075651776e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2091045379638672, + "step": 1645, + "valid_targets_mean": 3996.8, + "valid_targets_min": 772 + }, + { + "epoch": 2.496217851739788, + "grad_norm": 0.4596232547209677, + "learning_rate": 3.251356552837331e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20302638411521912, + "step": 1650, + "valid_targets_mean": 4664.9, + "valid_targets_min": 559 + }, + { + "epoch": 2.503782148260212, + "grad_norm": 0.445645358686214, + "learning_rate": 3.24546222269032e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20104148983955383, + "step": 1655, + "valid_targets_mean": 4935.3, + "valid_targets_min": 648 + }, + { + "epoch": 2.5113464447806355, + "grad_norm": 0.4163593861795244, + "learning_rate": 3.239550169089554e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20811429619789124, + "step": 1660, + "valid_targets_mean": 4935.7, + "valid_targets_min": 846 + }, + { + "epoch": 2.5189107413010587, + "grad_norm": 0.4506628867755876, + "learning_rate": 3.233620476166052e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19512595236301422, + "step": 1665, + "valid_targets_mean": 4339.9, + "valid_targets_min": 841 + }, + { + "epoch": 2.5264750378214824, + "grad_norm": 0.4613561611964294, + "learning_rate": 3.227673228301852e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2082904875278473, + "step": 1670, + "valid_targets_mean": 5157.9, + "valid_targets_min": 239 + }, + { + "epoch": 2.534039334341906, + "grad_norm": 0.4300736454991396, + "learning_rate": 3.221708510128803e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22433291375637054, + "step": 1675, + "valid_targets_mean": 5423.8, + "valid_targets_min": 874 + }, + { + "epoch": 2.5416036308623298, + "grad_norm": 0.4490922816670387, + "learning_rate": 3.215726406527366e-05, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2263888120651245, + "step": 1680, + "valid_targets_mean": 5079.9, + "valid_targets_min": 716 + }, + { + "epoch": 2.5491679273827534, + "grad_norm": 0.4252724518754956, + "learning_rate": 3.209727002625403e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18171165883541107, + "step": 1685, + "valid_targets_mean": 5157.6, + "valid_targets_min": 736 + }, + { + "epoch": 2.556732223903177, + "grad_norm": 0.5227593631370397, + "learning_rate": 3.203710383796968e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22884613275527954, + "step": 1690, + "valid_targets_mean": 3696.8, + "valid_targets_min": 583 + }, + { + "epoch": 2.564296520423601, + "grad_norm": 0.40394341246737125, + "learning_rate": 3.197676635661088e-05, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16832637786865234, + "step": 1695, + "valid_targets_mean": 4719.4, + "valid_targets_min": 883 + }, + { + "epoch": 2.5718608169440245, + "grad_norm": 0.4100683829903881, + "learning_rate": 3.191625844080549e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17842620611190796, + "step": 1700, + "valid_targets_mean": 4411.6, + "valid_targets_min": 765 + }, + { + "epoch": 2.5794251134644477, + "grad_norm": 0.4485180434538262, + "learning_rate": 3.185558095160673e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21995443105697632, + "step": 1705, + "valid_targets_mean": 5099.1, + "valid_targets_min": 679 + }, + { + "epoch": 2.5869894099848714, + "grad_norm": 0.38074545321551795, + "learning_rate": 3.1794734752480904e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16947045922279358, + "step": 1710, + "valid_targets_mean": 5466.0, + "valid_targets_min": 1958 + }, + { + "epoch": 2.594553706505295, + "grad_norm": 0.3895212805166888, + "learning_rate": 3.173372070929516e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17781910300254822, + "step": 1715, + "valid_targets_mean": 5277.6, + "valid_targets_min": 361 + }, + { + "epoch": 2.6021180030257187, + "grad_norm": 0.41926890792799176, + "learning_rate": 3.1672539690305085e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18371908366680145, + "step": 1720, + "valid_targets_mean": 4538.6, + "valid_targets_min": 653 + }, + { + "epoch": 2.609682299546142, + "grad_norm": 0.41803066739386174, + "learning_rate": 3.161119256614245e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19282883405685425, + "step": 1725, + "valid_targets_mean": 4689.3, + "valid_targets_min": 332 + }, + { + "epoch": 2.6172465960665656, + "grad_norm": 0.4163373440445869, + "learning_rate": 3.1549680209802755e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21184735000133514, + "step": 1730, + "valid_targets_mean": 5217.1, + "valid_targets_min": 625 + }, + { + "epoch": 2.6248108925869893, + "grad_norm": 0.43278351740246923, + "learning_rate": 3.148800349663284e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17267610132694244, + "step": 1735, + "valid_targets_mean": 5302.0, + "valid_targets_min": 961 + }, + { + "epoch": 2.632375189107413, + "grad_norm": 0.4301909115075832, + "learning_rate": 3.142616330431838e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21546387672424316, + "step": 1740, + "valid_targets_mean": 5153.0, + "valid_targets_min": 677 + }, + { + "epoch": 2.6399394856278366, + "grad_norm": 0.43182873571569885, + "learning_rate": 3.136416051287145e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2066439390182495, + "step": 1745, + "valid_targets_mean": 5306.9, + "valid_targets_min": 786 + }, + { + "epoch": 2.6475037821482603, + "grad_norm": 0.4085125515579365, + "learning_rate": 3.130199600461797e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1787445843219757, + "step": 1750, + "valid_targets_mean": 5295.1, + "valid_targets_min": 619 + }, + { + "epoch": 2.655068078668684, + "grad_norm": 0.42252400798877315, + "learning_rate": 3.1239670664185175e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2020232379436493, + "step": 1755, + "valid_targets_mean": 5475.5, + "valid_targets_min": 514 + }, + { + "epoch": 2.6626323751891077, + "grad_norm": 0.896652188415317, + "learning_rate": 3.1177185378488984e-05, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18937832117080688, + "step": 1760, + "valid_targets_mean": 3316.6, + "valid_targets_min": 635 + }, + { + "epoch": 2.670196671709531, + "grad_norm": 0.4448738975213965, + "learning_rate": 3.111454103672143e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19633612036705017, + "step": 1765, + "valid_targets_mean": 4133.8, + "valid_targets_min": 712 + }, + { + "epoch": 2.6777609682299546, + "grad_norm": 0.4466168476220182, + "learning_rate": 3.105173853033796e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20809951424598694, + "step": 1770, + "valid_targets_mean": 4109.6, + "valid_targets_min": 667 + }, + { + "epoch": 2.6853252647503782, + "grad_norm": 0.4951441062549152, + "learning_rate": 3.098877875304478e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18588414788246155, + "step": 1775, + "valid_targets_mean": 5792.4, + "valid_targets_min": 716 + }, + { + "epoch": 2.692889561270802, + "grad_norm": 0.3783523235737496, + "learning_rate": 3.092566260078614e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17155107855796814, + "step": 1780, + "valid_targets_mean": 5417.6, + "valid_targets_min": 687 + }, + { + "epoch": 2.700453857791225, + "grad_norm": 0.41078514757696655, + "learning_rate": 3.086239097173155e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2063501477241516, + "step": 1785, + "valid_targets_mean": 5055.7, + "valid_targets_min": 892 + }, + { + "epoch": 2.708018154311649, + "grad_norm": 0.43887043899986533, + "learning_rate": 3.079896476626303e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21934330463409424, + "step": 1790, + "valid_targets_mean": 5341.4, + "valid_targets_min": 891 + }, + { + "epoch": 2.7155824508320725, + "grad_norm": 0.4255605078324983, + "learning_rate": 3.073538488696229e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18903899192810059, + "step": 1795, + "valid_targets_mean": 5615.9, + "valid_targets_min": 592 + }, + { + "epoch": 2.723146747352496, + "grad_norm": 0.589823722023372, + "learning_rate": 3.0671652238597873e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1876818835735321, + "step": 1800, + "valid_targets_mean": 5229.2, + "valid_targets_min": 1796 + }, + { + "epoch": 2.73071104387292, + "grad_norm": 0.48691578647433853, + "learning_rate": 3.060776772811231e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2318037450313568, + "step": 1805, + "valid_targets_mean": 4098.2, + "valid_targets_min": 830 + }, + { + "epoch": 2.7382753403933435, + "grad_norm": 0.660918859398447, + "learning_rate": 3.0543732264609174e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2070668637752533, + "step": 1810, + "valid_targets_mean": 5188.2, + "valid_targets_min": 1829 + }, + { + "epoch": 2.745839636913767, + "grad_norm": 0.42428761873536697, + "learning_rate": 3.0479546759340176e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17107611894607544, + "step": 1815, + "valid_targets_mean": 4367.8, + "valid_targets_min": 324 + }, + { + "epoch": 2.753403933434191, + "grad_norm": 0.4370704783114705, + "learning_rate": 3.0415212125692184e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19081702828407288, + "step": 1820, + "valid_targets_mean": 4557.9, + "valid_targets_min": 723 + }, + { + "epoch": 2.760968229954614, + "grad_norm": 0.46349591557486075, + "learning_rate": 3.0350729279174212e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19641894102096558, + "step": 1825, + "valid_targets_mean": 5278.9, + "valid_targets_min": 2337 + }, + { + "epoch": 2.768532526475038, + "grad_norm": 0.524642612915043, + "learning_rate": 3.0286099137404426e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19328051805496216, + "step": 1830, + "valid_targets_mean": 5464.6, + "valid_targets_min": 663 + }, + { + "epoch": 2.7760968229954615, + "grad_norm": 0.43508313940321147, + "learning_rate": 3.0221322620097047e-05, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19558432698249817, + "step": 1835, + "valid_targets_mean": 4772.1, + "valid_targets_min": 2237 + }, + { + "epoch": 2.783661119515885, + "grad_norm": 0.4041659272772562, + "learning_rate": 3.01564006490493e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20566602051258087, + "step": 1840, + "valid_targets_mean": 6452.6, + "valid_targets_min": 2725 + }, + { + "epoch": 2.7912254160363084, + "grad_norm": 0.4030427238187041, + "learning_rate": 3.0091334148128265e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17462749779224396, + "step": 1845, + "valid_targets_mean": 5316.8, + "valid_targets_min": 600 + }, + { + "epoch": 2.798789712556732, + "grad_norm": 0.4961824149557135, + "learning_rate": 3.002612404325774e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2178715467453003, + "step": 1850, + "valid_targets_mean": 3882.9, + "valid_targets_min": 540 + }, + { + "epoch": 2.8063540090771557, + "grad_norm": 0.42175695435983496, + "learning_rate": 2.9960771262405085e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1930299699306488, + "step": 1855, + "valid_targets_mean": 5572.1, + "valid_targets_min": 2244 + }, + { + "epoch": 2.8139183055975794, + "grad_norm": 0.40908551740974003, + "learning_rate": 2.9895276735567988e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17899967730045319, + "step": 1860, + "valid_targets_mean": 5095.8, + "valid_targets_min": 1007 + }, + { + "epoch": 2.821482602118003, + "grad_norm": 0.4248876340373272, + "learning_rate": 2.982964139476124e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1757224202156067, + "step": 1865, + "valid_targets_mean": 4697.9, + "valid_targets_min": 1035 + }, + { + "epoch": 2.8290468986384267, + "grad_norm": 0.42593735085657447, + "learning_rate": 2.9763866174003473e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23707634210586548, + "step": 1870, + "valid_targets_mean": 6115.4, + "valid_targets_min": 1386 + }, + { + "epoch": 2.8366111951588504, + "grad_norm": 0.45026114969687053, + "learning_rate": 2.9697952009303886e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.187404602766037, + "step": 1875, + "valid_targets_mean": 4572.1, + "valid_targets_min": 823 + }, + { + "epoch": 2.844175491679274, + "grad_norm": 0.42649362408455643, + "learning_rate": 2.9631899838648887e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18948125839233398, + "step": 1880, + "valid_targets_mean": 4665.5, + "valid_targets_min": 911 + }, + { + "epoch": 2.8517397881996973, + "grad_norm": 0.3720008592357275, + "learning_rate": 2.9565710601988783e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16390863060951233, + "step": 1885, + "valid_targets_mean": 5894.8, + "valid_targets_min": 638 + }, + { + "epoch": 2.859304084720121, + "grad_norm": 0.43144241147993634, + "learning_rate": 2.9499385241224395e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20528359711170197, + "step": 1890, + "valid_targets_mean": 5719.8, + "valid_targets_min": 1110 + }, + { + "epoch": 2.8668683812405447, + "grad_norm": 0.4390908805212251, + "learning_rate": 2.943292470019361e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17955082654953003, + "step": 1895, + "valid_targets_mean": 4001.1, + "valid_targets_min": 688 + }, + { + "epoch": 2.8744326777609683, + "grad_norm": 0.38596306723643325, + "learning_rate": 2.936632992465803e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18508818745613098, + "step": 1900, + "valid_targets_mean": 5744.8, + "valid_targets_min": 4281 + }, + { + "epoch": 2.8819969742813916, + "grad_norm": 0.4795728053309178, + "learning_rate": 2.9299601862289453e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18243417143821716, + "step": 1905, + "valid_targets_mean": 3936.1, + "valid_targets_min": 856 + }, + { + "epoch": 2.8895612708018152, + "grad_norm": 0.4594467335481041, + "learning_rate": 2.92327414626564e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18039527535438538, + "step": 1910, + "valid_targets_mean": 4143.9, + "valid_targets_min": 634 + }, + { + "epoch": 2.897125567322239, + "grad_norm": 0.39267385150227535, + "learning_rate": 2.9165749677210615e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1811700463294983, + "step": 1915, + "valid_targets_mean": 5184.6, + "valid_targets_min": 968 + }, + { + "epoch": 2.9046898638426626, + "grad_norm": 0.4868122959844636, + "learning_rate": 2.9098627459273516e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20172417163848877, + "step": 1920, + "valid_targets_mean": 4043.6, + "valid_targets_min": 553 + }, + { + "epoch": 2.9122541603630863, + "grad_norm": 0.46754401466860396, + "learning_rate": 2.9031375764022627e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18514400720596313, + "step": 1925, + "valid_targets_mean": 4837.5, + "valid_targets_min": 333 + }, + { + "epoch": 2.91981845688351, + "grad_norm": 0.4056142272807577, + "learning_rate": 2.8963995548477996e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17639204859733582, + "step": 1930, + "valid_targets_mean": 4832.6, + "valid_targets_min": 635 + }, + { + "epoch": 2.9273827534039336, + "grad_norm": 0.621399576217883, + "learning_rate": 2.8896487771488564e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21237429976463318, + "step": 1935, + "valid_targets_mean": 3703.5, + "valid_targets_min": 711 + }, + { + "epoch": 2.9349470499243573, + "grad_norm": 0.42986488062021244, + "learning_rate": 2.882885339371852e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20180043578147888, + "step": 1940, + "valid_targets_mean": 5049.2, + "valid_targets_min": 775 + }, + { + "epoch": 2.9425113464447805, + "grad_norm": 0.4259342195024064, + "learning_rate": 2.8761093377633657e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1935410350561142, + "step": 1945, + "valid_targets_mean": 5046.2, + "valid_targets_min": 969 + }, + { + "epoch": 2.950075642965204, + "grad_norm": 0.4108347413723792, + "learning_rate": 2.8693208687487617e-05, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1827128529548645, + "step": 1950, + "valid_targets_mean": 5555.4, + "valid_targets_min": 1354 + }, + { + "epoch": 2.957639939485628, + "grad_norm": 0.4045575068570252, + "learning_rate": 2.8625200289308242e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19365639984607697, + "step": 1955, + "valid_targets_mean": 5364.6, + "valid_targets_min": 1894 + }, + { + "epoch": 2.9652042360060515, + "grad_norm": 0.4645036427391149, + "learning_rate": 2.855706915088378e-05, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20481233298778534, + "step": 1960, + "valid_targets_mean": 4193.6, + "valid_targets_min": 842 + }, + { + "epoch": 2.9727685325264748, + "grad_norm": 0.45940755177813786, + "learning_rate": 2.8488816241749123e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1904478818178177, + "step": 1965, + "valid_targets_mean": 3872.9, + "valid_targets_min": 793 + }, + { + "epoch": 2.9803328290468984, + "grad_norm": 0.4316864521638559, + "learning_rate": 2.8420442533171995e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18184807896614075, + "step": 1970, + "valid_targets_mean": 4514.9, + "valid_targets_min": 680 + }, + { + "epoch": 2.987897125567322, + "grad_norm": 0.41611560629943883, + "learning_rate": 2.8351948998139187e-05, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19364595413208008, + "step": 1975, + "valid_targets_mean": 4439.6, + "valid_targets_min": 834 + }, + { + "epoch": 2.995461422087746, + "grad_norm": 0.37967027522286967, + "learning_rate": 2.8283336611342634e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18696758151054382, + "step": 1980, + "valid_targets_mean": 5346.8, + "valid_targets_min": 697 + }, + { + "epoch": 3.0030257186081695, + "grad_norm": 0.42663777104066586, + "learning_rate": 2.8214606349165587e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1831974983215332, + "step": 1985, + "valid_targets_mean": 5566.9, + "valid_targets_min": 2660 + }, + { + "epoch": 3.010590015128593, + "grad_norm": 0.4650435000504031, + "learning_rate": 2.8145759189668748e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17917796969413757, + "step": 1990, + "valid_targets_mean": 4684.1, + "valid_targets_min": 908 + }, + { + "epoch": 3.018154311649017, + "grad_norm": 0.4426288047149243, + "learning_rate": 2.8076796112576273e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2020592838525772, + "step": 1995, + "valid_targets_mean": 4922.2, + "valid_targets_min": 606 + }, + { + "epoch": 3.02571860816944, + "grad_norm": 0.43542206084007723, + "learning_rate": 2.8007718099261886e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18459182977676392, + "step": 2000, + "valid_targets_mean": 5220.1, + "valid_targets_min": 432 + }, + { + "epoch": 3.0332829046898637, + "grad_norm": 0.5115612104025097, + "learning_rate": 2.7938526132734923e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17348910868167877, + "step": 2005, + "valid_targets_mean": 4200.2, + "valid_targets_min": 633 + }, + { + "epoch": 3.0408472012102874, + "grad_norm": 0.4885826302307685, + "learning_rate": 2.7869221197626307e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1716952621936798, + "step": 2010, + "valid_targets_mean": 4950.1, + "valid_targets_min": 2131 + }, + { + "epoch": 3.048411497730711, + "grad_norm": 0.41076102496913264, + "learning_rate": 2.7799804280174547e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20202207565307617, + "step": 2015, + "valid_targets_mean": 5878.2, + "valid_targets_min": 803 + }, + { + "epoch": 3.0559757942511347, + "grad_norm": 0.5180887749992029, + "learning_rate": 2.773027636821171e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1683463305234909, + "step": 2020, + "valid_targets_mean": 4372.1, + "valid_targets_min": 770 + }, + { + "epoch": 3.0635400907715584, + "grad_norm": 0.4322804601397516, + "learning_rate": 2.7660638451149377e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1704043745994568, + "step": 2025, + "valid_targets_mean": 5918.9, + "valid_targets_min": 746 + }, + { + "epoch": 3.0711043872919817, + "grad_norm": 0.47686737853887257, + "learning_rate": 2.7590891519964523e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16545140743255615, + "step": 2030, + "valid_targets_mean": 4430.4, + "valid_targets_min": 793 + }, + { + "epoch": 3.0786686838124053, + "grad_norm": 0.49402131162912494, + "learning_rate": 2.7521036567185467e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1749613881111145, + "step": 2035, + "valid_targets_mean": 4374.2, + "valid_targets_min": 865 + }, + { + "epoch": 3.086232980332829, + "grad_norm": 0.4881940238049801, + "learning_rate": 2.74510745868777e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18618690967559814, + "step": 2040, + "valid_targets_mean": 4110.2, + "valid_targets_min": 980 + }, + { + "epoch": 3.0937972768532527, + "grad_norm": 0.46079816423016295, + "learning_rate": 2.7381006574629764e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17987105250358582, + "step": 2045, + "valid_targets_mean": 4255.6, + "valid_targets_min": 925 + }, + { + "epoch": 3.1013615733736764, + "grad_norm": 0.43563452348577103, + "learning_rate": 2.7310833527539092e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1708250641822815, + "step": 2050, + "valid_targets_mean": 4816.2, + "valid_targets_min": 674 + }, + { + "epoch": 3.1089258698941, + "grad_norm": 0.45727170609137696, + "learning_rate": 2.7240556444197794e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2242300808429718, + "step": 2055, + "valid_targets_mean": 5413.7, + "valid_targets_min": 868 + }, + { + "epoch": 3.1164901664145233, + "grad_norm": 0.4239934474026591, + "learning_rate": 2.7170176324678466e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18958163261413574, + "step": 2060, + "valid_targets_mean": 4843.6, + "valid_targets_min": 828 + }, + { + "epoch": 3.124054462934947, + "grad_norm": 0.41161803317312445, + "learning_rate": 2.7099694170519954e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17357945442199707, + "step": 2065, + "valid_targets_mean": 5419.1, + "valid_targets_min": 2847 + }, + { + "epoch": 3.1316187594553706, + "grad_norm": 0.46439839012149947, + "learning_rate": 2.702911098471309e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18104906380176544, + "step": 2070, + "valid_targets_mean": 4799.9, + "valid_targets_min": 405 + }, + { + "epoch": 3.1391830559757943, + "grad_norm": 0.4750168385041568, + "learning_rate": 2.6958427771686442e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22222860157489777, + "step": 2075, + "valid_targets_mean": 4794.3, + "valid_targets_min": 785 + }, + { + "epoch": 3.146747352496218, + "grad_norm": 0.45925547907351283, + "learning_rate": 2.6887645537292e-05, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18778520822525024, + "step": 2080, + "valid_targets_mean": 4274.8, + "valid_targets_min": 992 + }, + { + "epoch": 3.1543116490166416, + "grad_norm": 0.46409549568295294, + "learning_rate": 2.681676528879087e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16386860609054565, + "step": 2085, + "valid_targets_mean": 4708.4, + "valid_targets_min": 894 + }, + { + "epoch": 3.161875945537065, + "grad_norm": 0.40226973478172007, + "learning_rate": 2.674578803483894e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16758553683757782, + "step": 2090, + "valid_targets_mean": 4964.8, + "valid_targets_min": 2351 + }, + { + "epoch": 3.1694402420574885, + "grad_norm": 0.4606500236707901, + "learning_rate": 2.6674714785472543e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17645390331745148, + "step": 2095, + "valid_targets_mean": 4287.9, + "valid_targets_min": 730 + }, + { + "epoch": 3.177004538577912, + "grad_norm": 0.4595539891435698, + "learning_rate": 2.660354655209403e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1777542382478714, + "step": 2100, + "valid_targets_mean": 4656.0, + "valid_targets_min": 1155 + }, + { + "epoch": 3.184568835098336, + "grad_norm": 0.5052944223699203, + "learning_rate": 2.653228434745746e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19221368432044983, + "step": 2105, + "valid_targets_mean": 4579.3, + "valid_targets_min": 849 + }, + { + "epoch": 3.1921331316187596, + "grad_norm": 0.4365900032242633, + "learning_rate": 2.6460929185654106e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16588276624679565, + "step": 2110, + "valid_targets_mean": 4732.4, + "valid_targets_min": 1479 + }, + { + "epoch": 3.1996974281391832, + "grad_norm": 0.38714216198205725, + "learning_rate": 2.6389482082098078e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2058732509613037, + "step": 2115, + "valid_targets_mean": 6171.4, + "valid_targets_min": 1037 + }, + { + "epoch": 3.2072617246596065, + "grad_norm": 0.4664429065916817, + "learning_rate": 2.6317944053511853e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17922240495681763, + "step": 2120, + "valid_targets_mean": 4987.2, + "valid_targets_min": 922 + }, + { + "epoch": 3.21482602118003, + "grad_norm": 0.45793374742151755, + "learning_rate": 2.6246316117911804e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18207323551177979, + "step": 2125, + "valid_targets_mean": 4121.9, + "valid_targets_min": 1004 + }, + { + "epoch": 3.222390317700454, + "grad_norm": 0.41559127939266016, + "learning_rate": 2.6174599294593738e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15605135262012482, + "step": 2130, + "valid_targets_mean": 5834.4, + "valid_targets_min": 3253 + }, + { + "epoch": 3.2299546142208775, + "grad_norm": 0.4164323582327294, + "learning_rate": 2.6102794604118345e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14102093875408173, + "step": 2135, + "valid_targets_mean": 4710.9, + "valid_targets_min": 372 + }, + { + "epoch": 3.237518910741301, + "grad_norm": 0.5017731025945408, + "learning_rate": 2.6030903068296724e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2099505364894867, + "step": 2140, + "valid_targets_mean": 3947.9, + "valid_targets_min": 470 + }, + { + "epoch": 3.245083207261725, + "grad_norm": 0.47115821805759717, + "learning_rate": 2.5958925710175803e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1678295135498047, + "step": 2145, + "valid_targets_mean": 4823.2, + "valid_targets_min": 970 + }, + { + "epoch": 3.252647503782148, + "grad_norm": 0.45159081763090103, + "learning_rate": 2.5886863554023807e-05, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15815192461013794, + "step": 2150, + "valid_targets_mean": 5319.1, + "valid_targets_min": 671 + }, + { + "epoch": 3.2602118003025717, + "grad_norm": 0.42568279295852773, + "learning_rate": 2.581471762531568e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752796769142151, + "step": 2155, + "valid_targets_mean": 5423.6, + "valid_targets_min": 1749 + }, + { + "epoch": 3.2677760968229954, + "grad_norm": 0.42884597917657213, + "learning_rate": 2.574248895071846e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1902637481689453, + "step": 2160, + "valid_targets_mean": 5324.8, + "valid_targets_min": 2412 + }, + { + "epoch": 3.275340393343419, + "grad_norm": 0.40297700891719374, + "learning_rate": 2.5670178558076724e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2010069340467453, + "step": 2165, + "valid_targets_mean": 5642.1, + "valid_targets_min": 582 + }, + { + "epoch": 3.2829046898638428, + "grad_norm": 0.39785109407004443, + "learning_rate": 2.5597787476397918e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19962245225906372, + "step": 2170, + "valid_targets_mean": 6636.6, + "valid_targets_min": 637 + }, + { + "epoch": 3.2904689863842664, + "grad_norm": 0.4203008064142974, + "learning_rate": 2.5525316735837713e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17891038954257965, + "step": 2175, + "valid_targets_mean": 4486.1, + "valid_targets_min": 664 + }, + { + "epoch": 3.29803328290469, + "grad_norm": 0.4649648211079789, + "learning_rate": 2.545276736768538e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17492157220840454, + "step": 2180, + "valid_targets_mean": 4677.6, + "valid_targets_min": 635 + }, + { + "epoch": 3.3055975794251133, + "grad_norm": 0.483126836509343, + "learning_rate": 2.5380140404349094e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2014307975769043, + "step": 2185, + "valid_targets_mean": 4459.1, + "valid_targets_min": 543 + }, + { + "epoch": 3.313161875945537, + "grad_norm": 0.4936867068007607, + "learning_rate": 2.5307436879341226e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1675454080104828, + "step": 2190, + "valid_targets_mean": 3605.6, + "valid_targets_min": 473 + }, + { + "epoch": 3.3207261724659607, + "grad_norm": 0.41968907431035, + "learning_rate": 2.523465782726366e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16863587498664856, + "step": 2195, + "valid_targets_mean": 4795.8, + "valid_targets_min": 919 + }, + { + "epoch": 3.3282904689863844, + "grad_norm": 0.4582209550444048, + "learning_rate": 2.5161804283793078e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18313094973564148, + "step": 2200, + "valid_targets_mean": 4105.4, + "valid_targets_min": 486 + }, + { + "epoch": 3.335854765506808, + "grad_norm": 0.5132053187633295, + "learning_rate": 2.508887728566617e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1868969351053238, + "step": 2205, + "valid_targets_mean": 4217.6, + "valid_targets_min": 648 + }, + { + "epoch": 3.3434190620272313, + "grad_norm": 0.4521820461040364, + "learning_rate": 2.5015877870664956e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1981300413608551, + "step": 2210, + "valid_targets_mean": 4518.8, + "valid_targets_min": 753 + }, + { + "epoch": 3.350983358547655, + "grad_norm": 0.46305852598208186, + "learning_rate": 2.494280707760195e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23940429091453552, + "step": 2215, + "valid_targets_mean": 4959.1, + "valid_targets_min": 364 + }, + { + "epoch": 3.3585476550680786, + "grad_norm": 0.4555340985592675, + "learning_rate": 2.4869665946305416e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17548419535160065, + "step": 2220, + "valid_targets_mean": 4283.8, + "valid_targets_min": 881 + }, + { + "epoch": 3.3661119515885023, + "grad_norm": 0.42662145474665314, + "learning_rate": 2.479645551760457e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17197197675704956, + "step": 2225, + "valid_targets_mean": 5630.0, + "valid_targets_min": 1287 + }, + { + "epoch": 3.373676248108926, + "grad_norm": 0.47357132814368386, + "learning_rate": 2.4723176833314746e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22617679834365845, + "step": 2230, + "valid_targets_mean": 4526.4, + "valid_targets_min": 344 + }, + { + "epoch": 3.3812405446293496, + "grad_norm": 0.48402693776368155, + "learning_rate": 2.4649830936222587e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20617449283599854, + "step": 2235, + "valid_targets_mean": 4520.4, + "valid_targets_min": 625 + }, + { + "epoch": 3.3888048411497733, + "grad_norm": 0.4224957696383896, + "learning_rate": 2.457641887007121e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1821061074733734, + "step": 2240, + "valid_targets_mean": 5160.4, + "valid_targets_min": 502 + }, + { + "epoch": 3.3963691376701965, + "grad_norm": 0.7483855310876051, + "learning_rate": 2.4502941679545332e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19686508178710938, + "step": 2245, + "valid_targets_mean": 4059.4, + "valid_targets_min": 565 + }, + { + "epoch": 3.40393343419062, + "grad_norm": 0.36065371648657907, + "learning_rate": 2.442940041025643e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16048836708068848, + "step": 2250, + "valid_targets_mean": 6123.6, + "valid_targets_min": 2298 + }, + { + "epoch": 3.411497730711044, + "grad_norm": 0.4624913171269963, + "learning_rate": 2.4355796108727847e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20563645660877228, + "step": 2255, + "valid_targets_mean": 5261.8, + "valid_targets_min": 923 + }, + { + "epoch": 3.4190620272314676, + "grad_norm": 0.4389355151757325, + "learning_rate": 2.4282129822379896e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18385478854179382, + "step": 2260, + "valid_targets_mean": 4616.0, + "valid_targets_min": 831 + }, + { + "epoch": 3.4266263237518912, + "grad_norm": 0.430635795414073, + "learning_rate": 2.4208402599514957e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17968051135540009, + "step": 2265, + "valid_targets_mean": 4419.5, + "valid_targets_min": 796 + }, + { + "epoch": 3.4341906202723145, + "grad_norm": 0.43408866333909196, + "learning_rate": 2.4134615489302577e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17835617065429688, + "step": 2270, + "valid_targets_mean": 4954.6, + "valid_targets_min": 662 + }, + { + "epoch": 3.441754916792738, + "grad_norm": 0.38546381741261365, + "learning_rate": 2.4060769541764516e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1849050521850586, + "step": 2275, + "valid_targets_mean": 5661.0, + "valid_targets_min": 924 + }, + { + "epoch": 3.449319213313162, + "grad_norm": 0.36932351801842705, + "learning_rate": 2.39868658077598e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1613771617412567, + "step": 2280, + "valid_targets_mean": 5770.8, + "valid_targets_min": 3163 + }, + { + "epoch": 3.4568835098335855, + "grad_norm": 0.41601773727093444, + "learning_rate": 2.3912905338969815e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19377577304840088, + "step": 2285, + "valid_targets_mean": 5156.2, + "valid_targets_min": 1963 + }, + { + "epoch": 3.464447806354009, + "grad_norm": 0.42662998952413006, + "learning_rate": 2.383888918788328e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17014576494693756, + "step": 2290, + "valid_targets_mean": 4985.0, + "valid_targets_min": 1421 + }, + { + "epoch": 3.472012102874433, + "grad_norm": 0.4400712878742386, + "learning_rate": 2.37648184077813e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17249350249767303, + "step": 2295, + "valid_targets_mean": 4962.9, + "valid_targets_min": 534 + }, + { + "epoch": 3.4795763993948565, + "grad_norm": 0.41166735172705915, + "learning_rate": 2.3690694052722384e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17984776198863983, + "step": 2300, + "valid_targets_mean": 4898.3, + "valid_targets_min": 882 + }, + { + "epoch": 3.4871406959152798, + "grad_norm": 0.8846258046228039, + "learning_rate": 2.361651717752742e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19693221151828766, + "step": 2305, + "valid_targets_mean": 4449.8, + "valid_targets_min": 579 + }, + { + "epoch": 3.4947049924357034, + "grad_norm": 0.46403787630805754, + "learning_rate": 2.35422888377647e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18936261534690857, + "step": 2310, + "valid_targets_mean": 4979.9, + "valid_targets_min": 678 + }, + { + "epoch": 3.502269288956127, + "grad_norm": 0.46099375157582895, + "learning_rate": 2.3468010089734854e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23048534989356995, + "step": 2315, + "valid_targets_mean": 4629.5, + "valid_targets_min": 837 + }, + { + "epoch": 3.5098335854765508, + "grad_norm": 0.6381971347550925, + "learning_rate": 2.3393681990455877e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16190201044082642, + "step": 2320, + "valid_targets_mean": 3028.9, + "valid_targets_min": 289 + }, + { + "epoch": 3.517397881996974, + "grad_norm": 0.42880307161777065, + "learning_rate": 2.331930559764801e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16737881302833557, + "step": 2325, + "valid_targets_mean": 4284.9, + "valid_targets_min": 642 + }, + { + "epoch": 3.5249621785173977, + "grad_norm": 0.4561996764949505, + "learning_rate": 2.3244881969718768e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20272359251976013, + "step": 2330, + "valid_targets_mean": 4521.2, + "valid_targets_min": 543 + }, + { + "epoch": 3.5325264750378214, + "grad_norm": 0.4728536704189786, + "learning_rate": 2.317041216574782e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19389662146568298, + "step": 2335, + "valid_targets_mean": 4810.5, + "valid_targets_min": 651 + }, + { + "epoch": 3.540090771558245, + "grad_norm": 0.40806576729419003, + "learning_rate": 2.309589724547195e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756885051727295, + "step": 2340, + "valid_targets_mean": 5160.0, + "valid_targets_min": 860 + }, + { + "epoch": 3.5476550680786687, + "grad_norm": 0.4620549711410424, + "learning_rate": 2.3021338269269968e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1837664544582367, + "step": 2345, + "valid_targets_mean": 3869.1, + "valid_targets_min": 458 + }, + { + "epoch": 3.5552193645990924, + "grad_norm": 0.4565329021286906, + "learning_rate": 2.2946736298147605e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20962950587272644, + "step": 2350, + "valid_targets_mean": 4646.9, + "valid_targets_min": 694 + }, + { + "epoch": 3.562783661119516, + "grad_norm": 0.467940605445892, + "learning_rate": 2.287209239372244e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17753881216049194, + "step": 2355, + "valid_targets_mean": 4308.3, + "valid_targets_min": 695 + }, + { + "epoch": 3.5703479576399397, + "grad_norm": 0.48279623085929807, + "learning_rate": 2.2797407618208784e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19052711129188538, + "step": 2360, + "valid_targets_mean": 3844.9, + "valid_targets_min": 658 + }, + { + "epoch": 3.577912254160363, + "grad_norm": 0.4593538362268239, + "learning_rate": 2.2722683034402543e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1733306348323822, + "step": 2365, + "valid_targets_mean": 4635.1, + "valid_targets_min": 614 + }, + { + "epoch": 3.5854765506807866, + "grad_norm": 0.4544985432412134, + "learning_rate": 2.264791970566613e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18841543793678284, + "step": 2370, + "valid_targets_mean": 4705.8, + "valid_targets_min": 879 + }, + { + "epoch": 3.5930408472012103, + "grad_norm": 0.45753745132477175, + "learning_rate": 2.2573118695913303e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19560250639915466, + "step": 2375, + "valid_targets_mean": 4873.1, + "valid_targets_min": 660 + }, + { + "epoch": 3.600605143721634, + "grad_norm": 0.443248808335626, + "learning_rate": 2.2498281069594045e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20620426535606384, + "step": 2380, + "valid_targets_mean": 5684.8, + "valid_targets_min": 502 + }, + { + "epoch": 3.608169440242057, + "grad_norm": 0.4440169248740688, + "learning_rate": 2.2423407891679405e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17821472883224487, + "step": 2385, + "valid_targets_mean": 4816.6, + "valid_targets_min": 881 + }, + { + "epoch": 3.615733736762481, + "grad_norm": 0.3915151274706588, + "learning_rate": 2.2348500227646347e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17493531107902527, + "step": 2390, + "valid_targets_mean": 5246.4, + "valid_targets_min": 789 + }, + { + "epoch": 3.6232980332829046, + "grad_norm": 0.43570753821263747, + "learning_rate": 2.2273559143462574e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1876915693283081, + "step": 2395, + "valid_targets_mean": 4848.5, + "valid_targets_min": 721 + }, + { + "epoch": 3.6308623298033282, + "grad_norm": 0.43800373462337433, + "learning_rate": 2.21985857055714e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16755543649196625, + "step": 2400, + "valid_targets_mean": 4680.5, + "valid_targets_min": 540 + }, + { + "epoch": 3.638426626323752, + "grad_norm": 0.38868595481400137, + "learning_rate": 2.212358098087652e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1818462312221527, + "step": 2405, + "valid_targets_mean": 5988.6, + "valid_targets_min": 2237 + }, + { + "epoch": 3.6459909228441756, + "grad_norm": 0.38746154157768226, + "learning_rate": 2.2048546036726867e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18854598701000214, + "step": 2410, + "valid_targets_mean": 5940.6, + "valid_targets_min": 650 + }, + { + "epoch": 3.6535552193645993, + "grad_norm": 0.4176036178181955, + "learning_rate": 2.1973481940901403e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15259526669979095, + "step": 2415, + "valid_targets_mean": 4415.6, + "valid_targets_min": 470 + }, + { + "epoch": 3.661119515885023, + "grad_norm": 0.41410661612753646, + "learning_rate": 2.1898389761593933e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17064417898654938, + "step": 2420, + "valid_targets_mean": 5270.3, + "valid_targets_min": 2559 + }, + { + "epoch": 3.668683812405446, + "grad_norm": 0.48834300834228367, + "learning_rate": 2.1823270567397908e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18752998113632202, + "step": 2425, + "valid_targets_mean": 4708.6, + "valid_targets_min": 681 + }, + { + "epoch": 3.67624810892587, + "grad_norm": 0.4696483622125557, + "learning_rate": 2.1748125427291203e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16924205422401428, + "step": 2430, + "valid_targets_mean": 4393.2, + "valid_targets_min": 345 + }, + { + "epoch": 3.6838124054462935, + "grad_norm": 0.4987791009849064, + "learning_rate": 2.1672955410620916e-05, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16017551720142365, + "step": 2435, + "valid_targets_mean": 4371.1, + "valid_targets_min": 535 + }, + { + "epoch": 3.691376701966717, + "grad_norm": 0.4325171641316711, + "learning_rate": 2.1597761587088146e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17629767954349518, + "step": 2440, + "valid_targets_mean": 4894.4, + "valid_targets_min": 883 + }, + { + "epoch": 3.6989409984871404, + "grad_norm": 0.4736685289222699, + "learning_rate": 2.1522545026732793e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14739350974559784, + "step": 2445, + "valid_targets_mean": 4710.0, + "valid_targets_min": 848 + }, + { + "epoch": 3.706505295007564, + "grad_norm": 0.4512152628668887, + "learning_rate": 2.1447306799918285e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16980385780334473, + "step": 2450, + "valid_targets_mean": 5042.8, + "valid_targets_min": 789 + }, + { + "epoch": 3.7140695915279878, + "grad_norm": 0.4576435044941357, + "learning_rate": 2.137204797731638e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1822136491537094, + "step": 2455, + "valid_targets_mean": 5460.8, + "valid_targets_min": 697 + }, + { + "epoch": 3.7216338880484114, + "grad_norm": 0.4631213031026051, + "learning_rate": 2.1296769629891946e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19493688642978668, + "step": 2460, + "valid_targets_mean": 4408.8, + "valid_targets_min": 779 + }, + { + "epoch": 3.729198184568835, + "grad_norm": 0.37665036712818883, + "learning_rate": 2.1221472828887672e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16041424870491028, + "step": 2465, + "valid_targets_mean": 5879.9, + "valid_targets_min": 1259 + }, + { + "epoch": 3.736762481089259, + "grad_norm": 0.4179212731663629, + "learning_rate": 2.1146158645808845e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18884560465812683, + "step": 2470, + "valid_targets_mean": 5352.8, + "valid_targets_min": 2763 + }, + { + "epoch": 3.7443267776096825, + "grad_norm": 0.41500424559921534, + "learning_rate": 2.107082815240813e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15109623968601227, + "step": 2475, + "valid_targets_mean": 5304.8, + "valid_targets_min": 1296 + }, + { + "epoch": 3.751891074130106, + "grad_norm": 0.48903761793465, + "learning_rate": 2.099548242067028e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1728041172027588, + "step": 2480, + "valid_targets_mean": 3551.8, + "valid_targets_min": 732 + }, + { + "epoch": 3.7594553706505294, + "grad_norm": 0.3925523813669197, + "learning_rate": 2.0920122522796894e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1809263825416565, + "step": 2485, + "valid_targets_mean": 5726.9, + "valid_targets_min": 1075 + }, + { + "epoch": 3.767019667170953, + "grad_norm": 0.3811307614768285, + "learning_rate": 2.0844749531191164e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17140492796897888, + "step": 2490, + "valid_targets_mean": 5197.9, + "valid_targets_min": 706 + }, + { + "epoch": 3.7745839636913767, + "grad_norm": 0.4723320494664942, + "learning_rate": 2.076936451844263e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20417089760303497, + "step": 2495, + "valid_targets_mean": 5198.6, + "valid_targets_min": 808 + }, + { + "epoch": 3.7821482602118004, + "grad_norm": 0.4735562331191886, + "learning_rate": 2.0693968557311858e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1843431442975998, + "step": 2500, + "valid_targets_mean": 4259.9, + "valid_targets_min": 586 + }, + { + "epoch": 3.789712556732224, + "grad_norm": 0.46605787164924223, + "learning_rate": 2.061856272071525e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1783793866634369, + "step": 2505, + "valid_targets_mean": 4357.8, + "valid_targets_min": 709 + }, + { + "epoch": 3.7972768532526473, + "grad_norm": 0.4258832263694262, + "learning_rate": 2.0543148081709726e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18057414889335632, + "step": 2510, + "valid_targets_mean": 5267.4, + "valid_targets_min": 797 + }, + { + "epoch": 3.804841149773071, + "grad_norm": 0.4403025638542816, + "learning_rate": 2.0467725713477463e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22269657254219055, + "step": 2515, + "valid_targets_mean": 5570.6, + "valid_targets_min": 956 + }, + { + "epoch": 3.8124054462934946, + "grad_norm": 0.42145758561605795, + "learning_rate": 2.0392296689310646e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1556507647037506, + "step": 2520, + "valid_targets_mean": 4532.9, + "valid_targets_min": 844 + }, + { + "epoch": 3.8199697428139183, + "grad_norm": 0.45716031065040713, + "learning_rate": 2.0316862082596153e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17538976669311523, + "step": 2525, + "valid_targets_mean": 5158.4, + "valid_targets_min": 922 + }, + { + "epoch": 3.827534039334342, + "grad_norm": 0.47592674209274544, + "learning_rate": 2.024142296680032e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19242042303085327, + "step": 2530, + "valid_targets_mean": 4368.2, + "valid_targets_min": 559 + }, + { + "epoch": 3.8350983358547657, + "grad_norm": 0.4216297325551423, + "learning_rate": 2.0165980415453643e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18463507294654846, + "step": 2535, + "valid_targets_mean": 4899.9, + "valid_targets_min": 951 + }, + { + "epoch": 3.8426626323751893, + "grad_norm": 0.408114244972223, + "learning_rate": 2.0090535502135516e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18673035502433777, + "step": 2540, + "valid_targets_mean": 4995.6, + "valid_targets_min": 499 + }, + { + "epoch": 3.8502269288956126, + "grad_norm": 0.4559095603030895, + "learning_rate": 2.0015089300458928e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18540602922439575, + "step": 2545, + "valid_targets_mean": 4267.0, + "valid_targets_min": 482 + }, + { + "epoch": 3.8577912254160363, + "grad_norm": 0.43246068560103573, + "learning_rate": 1.9939642884055215e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1851813793182373, + "step": 2550, + "valid_targets_mean": 5570.6, + "valid_targets_min": 1990 + }, + { + "epoch": 3.86535552193646, + "grad_norm": 0.40837587717862334, + "learning_rate": 1.9864197326558784e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17242997884750366, + "step": 2555, + "valid_targets_mean": 4821.5, + "valid_targets_min": 1103 + }, + { + "epoch": 3.8729198184568836, + "grad_norm": 0.526014027378549, + "learning_rate": 1.9788753701591767e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17967309057712555, + "step": 2560, + "valid_targets_mean": 4044.2, + "valid_targets_min": 538 + }, + { + "epoch": 3.8804841149773073, + "grad_norm": 0.43157336608044927, + "learning_rate": 1.9713313082748867e-05, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18985623121261597, + "step": 2565, + "valid_targets_mean": 4637.7, + "valid_targets_min": 456 + }, + { + "epoch": 3.8880484114977305, + "grad_norm": 0.4159194231850038, + "learning_rate": 1.963787654358194e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18229806423187256, + "step": 2570, + "valid_targets_mean": 5123.1, + "valid_targets_min": 702 + }, + { + "epoch": 3.895612708018154, + "grad_norm": 0.45085006380846965, + "learning_rate": 1.9562445157584826e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19042447209358215, + "step": 2575, + "valid_targets_mean": 4716.1, + "valid_targets_min": 717 + }, + { + "epoch": 3.903177004538578, + "grad_norm": 0.43137412386375706, + "learning_rate": 1.9487019998178042e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18286067247390747, + "step": 2580, + "valid_targets_mean": 4466.0, + "valid_targets_min": 940 + }, + { + "epoch": 3.9107413010590015, + "grad_norm": 0.4965792101957091, + "learning_rate": 1.9411602138693457e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18717160820960999, + "step": 2585, + "valid_targets_mean": 6286.9, + "valid_targets_min": 1458 + }, + { + "epoch": 3.918305597579425, + "grad_norm": 0.6490298129257558, + "learning_rate": 1.9336192652359088e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16575998067855835, + "step": 2590, + "valid_targets_mean": 5423.4, + "valid_targets_min": 802 + }, + { + "epoch": 3.925869894099849, + "grad_norm": 0.4383331497273429, + "learning_rate": 1.9260792612283816e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16938605904579163, + "step": 2595, + "valid_targets_mean": 4913.7, + "valid_targets_min": 1531 + }, + { + "epoch": 3.9334341906202726, + "grad_norm": 0.49828779485449154, + "learning_rate": 1.9185403091442044e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18119296431541443, + "step": 2600, + "valid_targets_mean": 3814.1, + "valid_targets_min": 468 + }, + { + "epoch": 3.940998487140696, + "grad_norm": 0.49147145967933253, + "learning_rate": 1.9110025162658522e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17999988794326782, + "step": 2605, + "valid_targets_mean": 4321.2, + "valid_targets_min": 537 + }, + { + "epoch": 3.9485627836611195, + "grad_norm": 0.4221075762512994, + "learning_rate": 1.903465989859305e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1725933998823166, + "step": 2610, + "valid_targets_mean": 4869.0, + "valid_targets_min": 929 + }, + { + "epoch": 3.956127080181543, + "grad_norm": 0.4345597802764291, + "learning_rate": 1.8959308371725157e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1820940226316452, + "step": 2615, + "valid_targets_mean": 4674.4, + "valid_targets_min": 611 + }, + { + "epoch": 3.963691376701967, + "grad_norm": 0.4692245197589479, + "learning_rate": 1.8883971654338927e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18120905756950378, + "step": 2620, + "valid_targets_mean": 4830.3, + "valid_targets_min": 714 + }, + { + "epoch": 3.9712556732223905, + "grad_norm": 0.3967835982808758, + "learning_rate": 1.8808650818507695e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17867086827754974, + "step": 2625, + "valid_targets_mean": 5038.8, + "valid_targets_min": 2633 + }, + { + "epoch": 3.9788199697428137, + "grad_norm": 0.44838758067100143, + "learning_rate": 1.8733346936078768e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16222552955150604, + "step": 2630, + "valid_targets_mean": 5533.2, + "valid_targets_min": 824 + }, + { + "epoch": 3.9863842662632374, + "grad_norm": 0.48431745535271226, + "learning_rate": 1.8658061078658224e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1916062831878662, + "step": 2635, + "valid_targets_mean": 4110.4, + "valid_targets_min": 627 + }, + { + "epoch": 3.993948562783661, + "grad_norm": 0.47680301163534866, + "learning_rate": 1.8582794317595628e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1811482459306717, + "step": 2640, + "valid_targets_mean": 4020.2, + "valid_targets_min": 663 + }, + { + "epoch": 4.001512859304085, + "grad_norm": 0.46463818534745904, + "learning_rate": 1.8507547723968795e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16638009250164032, + "step": 2645, + "valid_targets_mean": 3962.8, + "valid_targets_min": 522 + }, + { + "epoch": 4.009077155824508, + "grad_norm": 0.41554851090079853, + "learning_rate": 1.8432322368568562e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16338765621185303, + "step": 2650, + "valid_targets_mean": 4404.9, + "valid_targets_min": 843 + }, + { + "epoch": 4.016641452344932, + "grad_norm": 0.5770464302701627, + "learning_rate": 1.835711932188351e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18008169531822205, + "step": 2655, + "valid_targets_mean": 3572.7, + "valid_targets_min": 642 + }, + { + "epoch": 4.024205748865356, + "grad_norm": 0.4525771024595579, + "learning_rate": 1.8281939654084783e-05, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17555324733257294, + "step": 2660, + "valid_targets_mean": 5826.1, + "valid_targets_min": 321 + }, + { + "epoch": 4.031770045385779, + "grad_norm": 0.3758422902364508, + "learning_rate": 1.820678443501083e-05, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13354334235191345, + "step": 2665, + "valid_targets_mean": 5234.1, + "valid_targets_min": 941 + }, + { + "epoch": 4.039334341906203, + "grad_norm": 0.48385102695876686, + "learning_rate": 1.8131654734152165e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16652946174144745, + "step": 2670, + "valid_targets_mean": 4157.3, + "valid_targets_min": 726 + }, + { + "epoch": 4.046898638426626, + "grad_norm": 0.41464631491138004, + "learning_rate": 1.805655162063619e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17274890840053558, + "step": 2675, + "valid_targets_mean": 5395.8, + "valid_targets_min": 924 + }, + { + "epoch": 4.05446293494705, + "grad_norm": 0.3576877331596476, + "learning_rate": 1.798147616321195e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14228424429893494, + "step": 2680, + "valid_targets_mean": 6599.6, + "valid_targets_min": 3450 + }, + { + "epoch": 4.062027231467473, + "grad_norm": 0.42216558468027454, + "learning_rate": 1.7906429430234927e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685303896665573, + "step": 2685, + "valid_targets_mean": 5240.1, + "valid_targets_min": 849 + }, + { + "epoch": 4.069591527987897, + "grad_norm": 0.43009627202726336, + "learning_rate": 1.783141248965184e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13526831567287445, + "step": 2690, + "valid_targets_mean": 5489.6, + "valid_targets_min": 1941 + }, + { + "epoch": 4.077155824508321, + "grad_norm": 0.44577441502625953, + "learning_rate": 1.775642640898547e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14539185166358948, + "step": 2695, + "valid_targets_mean": 4562.2, + "valid_targets_min": 514 + }, + { + "epoch": 4.084720121028744, + "grad_norm": 0.4644816906536246, + "learning_rate": 1.7681472255319417e-05, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16671213507652283, + "step": 2700, + "valid_targets_mean": 4358.8, + "valid_targets_min": 768 + }, + { + "epoch": 4.092284417549168, + "grad_norm": 0.4876809684009793, + "learning_rate": 1.7606551095282978e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1734774261713028, + "step": 2705, + "valid_targets_mean": 3740.9, + "valid_targets_min": 423 + }, + { + "epoch": 4.099848714069592, + "grad_norm": 0.38721781374833864, + "learning_rate": 1.753166399503591e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15614727139472961, + "step": 2710, + "valid_targets_mean": 5812.4, + "valid_targets_min": 1874 + }, + { + "epoch": 4.107413010590015, + "grad_norm": 0.4989057932943494, + "learning_rate": 1.74568120202533e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21545648574829102, + "step": 2715, + "valid_targets_mean": 4326.6, + "valid_targets_min": 913 + }, + { + "epoch": 4.114977307110439, + "grad_norm": 0.5034602177391847, + "learning_rate": 1.7381996236110386e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17472974956035614, + "step": 2720, + "valid_targets_mean": 3635.9, + "valid_targets_min": 586 + }, + { + "epoch": 4.122541603630863, + "grad_norm": 0.9829745879005233, + "learning_rate": 1.730721770726739e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908668577671051, + "step": 2725, + "valid_targets_mean": 4817.6, + "valid_targets_min": 944 + }, + { + "epoch": 4.130105900151286, + "grad_norm": 0.526475392969517, + "learning_rate": 1.7232477497854377e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16699153184890747, + "step": 2730, + "valid_targets_mean": 3689.2, + "valid_targets_min": 956 + }, + { + "epoch": 4.13767019667171, + "grad_norm": 0.5000977100526197, + "learning_rate": 1.7157776671456114e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1865454614162445, + "step": 2735, + "valid_targets_mean": 3762.7, + "valid_targets_min": 391 + }, + { + "epoch": 4.145234493192133, + "grad_norm": 0.4234081246667148, + "learning_rate": 1.7083116291096926e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1912594586610794, + "step": 2740, + "valid_targets_mean": 5724.4, + "valid_targets_min": 1161 + }, + { + "epoch": 4.1527987897125564, + "grad_norm": 0.41678834278096616, + "learning_rate": 1.7008497419225578e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16795726120471954, + "step": 2745, + "valid_targets_mean": 5573.5, + "valid_targets_min": 496 + }, + { + "epoch": 4.16036308623298, + "grad_norm": 0.4621994786376751, + "learning_rate": 1.6933921117700156e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18064820766448975, + "step": 2750, + "valid_targets_mean": 4568.9, + "valid_targets_min": 773 + }, + { + "epoch": 4.167927382753404, + "grad_norm": 0.4826289668834577, + "learning_rate": 1.6859388447772936e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17181620001792908, + "step": 2755, + "valid_targets_mean": 4864.2, + "valid_targets_min": 932 + }, + { + "epoch": 4.1754916792738275, + "grad_norm": 0.6397951450093655, + "learning_rate": 1.6784900470075312e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756332814693451, + "step": 2760, + "valid_targets_mean": 3957.1, + "valid_targets_min": 767 + }, + { + "epoch": 4.183055975794251, + "grad_norm": 0.44994217785156254, + "learning_rate": 1.6710458244602695e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15611310303211212, + "step": 2765, + "valid_targets_mean": 4696.2, + "valid_targets_min": 736 + }, + { + "epoch": 4.190620272314675, + "grad_norm": 0.4237563016526201, + "learning_rate": 1.66360628306994e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1666485220193863, + "step": 2770, + "valid_targets_mean": 5272.4, + "valid_targets_min": 753 + }, + { + "epoch": 4.1981845688350985, + "grad_norm": 0.5395432298525558, + "learning_rate": 1.656171528704361e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17594173550605774, + "step": 2775, + "valid_targets_mean": 3776.5, + "valid_targets_min": 620 + }, + { + "epoch": 4.205748865355522, + "grad_norm": 0.5832213537978989, + "learning_rate": 1.648741667163229e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17744117975234985, + "step": 2780, + "valid_targets_mean": 3303.4, + "valid_targets_min": 583 + }, + { + "epoch": 4.213313161875946, + "grad_norm": 0.5256449374178925, + "learning_rate": 1.641316804176613e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16040672361850739, + "step": 2785, + "valid_targets_mean": 3447.4, + "valid_targets_min": 519 + }, + { + "epoch": 4.2208774583963695, + "grad_norm": 0.5028188639011574, + "learning_rate": 1.6338970454034527e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17807036638259888, + "step": 2790, + "valid_targets_mean": 4636.7, + "valid_targets_min": 787 + }, + { + "epoch": 4.228441754916792, + "grad_norm": 0.4997025752135004, + "learning_rate": 1.626482496430049e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1678473949432373, + "step": 2795, + "valid_targets_mean": 4014.8, + "valid_targets_min": 442 + }, + { + "epoch": 4.236006051437216, + "grad_norm": 0.45565425172448304, + "learning_rate": 1.6190732627685686e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16583865880966187, + "step": 2800, + "valid_targets_mean": 4589.2, + "valid_targets_min": 405 + }, + { + "epoch": 4.24357034795764, + "grad_norm": 0.4025133264233328, + "learning_rate": 1.611669449855537e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16866248846054077, + "step": 2805, + "valid_targets_mean": 5671.2, + "valid_targets_min": 711 + }, + { + "epoch": 4.251134644478063, + "grad_norm": 0.48967416716184, + "learning_rate": 1.6042711630503406e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16977915167808533, + "step": 2810, + "valid_targets_mean": 4551.2, + "valid_targets_min": 514 + }, + { + "epoch": 4.258698940998487, + "grad_norm": 0.44902519039938005, + "learning_rate": 1.5968785076337273e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1602228879928589, + "step": 2815, + "valid_targets_mean": 4892.4, + "valid_targets_min": 526 + }, + { + "epoch": 4.266263237518911, + "grad_norm": 0.5206132784127442, + "learning_rate": 1.5894915888063085e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19216004014015198, + "step": 2820, + "valid_targets_mean": 3800.8, + "valid_targets_min": 552 + }, + { + "epoch": 4.273827534039334, + "grad_norm": 0.4447385592944839, + "learning_rate": 1.5821105116870594e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15732452273368835, + "step": 2825, + "valid_targets_mean": 4333.0, + "valid_targets_min": 694 + }, + { + "epoch": 4.281391830559758, + "grad_norm": 0.4279569711895869, + "learning_rate": 1.5747353813118276e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1582874357700348, + "step": 2830, + "valid_targets_mean": 4712.3, + "valid_targets_min": 614 + }, + { + "epoch": 4.288956127080182, + "grad_norm": 0.5213550677478702, + "learning_rate": 1.567366302631835e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16362854838371277, + "step": 2835, + "valid_targets_mean": 4576.0, + "valid_targets_min": 776 + }, + { + "epoch": 4.296520423600605, + "grad_norm": 0.47091022792617493, + "learning_rate": 1.560003380512185e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.168619304895401, + "step": 2840, + "valid_targets_mean": 4807.8, + "valid_targets_min": 547 + }, + { + "epoch": 4.304084720121029, + "grad_norm": 0.47159810197254937, + "learning_rate": 1.5526467197303715e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.182538241147995, + "step": 2845, + "valid_targets_mean": 4528.5, + "valid_targets_min": 780 + }, + { + "epoch": 4.311649016641453, + "grad_norm": 0.47889319712753947, + "learning_rate": 1.5452964249747848e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14952681958675385, + "step": 2850, + "valid_targets_mean": 4010.2, + "valid_targets_min": 730 + }, + { + "epoch": 4.319213313161876, + "grad_norm": 0.4256664007842176, + "learning_rate": 1.537952600843227e-05, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14827358722686768, + "step": 2855, + "valid_targets_mean": 4466.1, + "valid_targets_min": 716 + }, + { + "epoch": 4.326777609682299, + "grad_norm": 0.40977956425276274, + "learning_rate": 1.5306153518414197e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15310370922088623, + "step": 2860, + "valid_targets_mean": 5170.0, + "valid_targets_min": 332 + }, + { + "epoch": 4.334341906202723, + "grad_norm": 0.462683167593508, + "learning_rate": 1.523284782381514e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14999224245548248, + "step": 2865, + "valid_targets_mean": 5006.0, + "valid_targets_min": 239 + }, + { + "epoch": 4.3419062027231465, + "grad_norm": 0.45878776063067306, + "learning_rate": 1.5159609967806135e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.185188889503479, + "step": 2870, + "valid_targets_mean": 4822.1, + "valid_targets_min": 1063 + }, + { + "epoch": 4.34947049924357, + "grad_norm": 0.41643746040516816, + "learning_rate": 1.5086440992592826e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15771910548210144, + "step": 2875, + "valid_targets_mean": 5071.6, + "valid_targets_min": 614 + }, + { + "epoch": 4.357034795763994, + "grad_norm": 0.4803323187058902, + "learning_rate": 1.5013341939400628e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18151001632213593, + "step": 2880, + "valid_targets_mean": 4068.4, + "valid_targets_min": 506 + }, + { + "epoch": 4.364599092284418, + "grad_norm": 0.49357279196784626, + "learning_rate": 1.4940313848459975e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16743910312652588, + "step": 2885, + "valid_targets_mean": 4971.4, + "valid_targets_min": 899 + }, + { + "epoch": 4.372163388804841, + "grad_norm": 0.47252098051012686, + "learning_rate": 1.4867357758991474e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18773172795772552, + "step": 2890, + "valid_targets_mean": 5009.6, + "valid_targets_min": 506 + }, + { + "epoch": 4.379727685325265, + "grad_norm": 0.4343325194878802, + "learning_rate": 1.4794474709191082e-05, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1580682247877121, + "step": 2895, + "valid_targets_mean": 5471.8, + "valid_targets_min": 474 + }, + { + "epoch": 4.387291981845689, + "grad_norm": 0.6607347840108052, + "learning_rate": 1.4721665736215416e-05, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17931993305683136, + "step": 2900, + "valid_targets_mean": 5108.2, + "valid_targets_min": 905 + }, + { + "epoch": 4.394856278366112, + "grad_norm": 0.4590615908212395, + "learning_rate": 1.4648931876166931e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1773712933063507, + "step": 2905, + "valid_targets_mean": 4593.6, + "valid_targets_min": 717 + }, + { + "epoch": 4.402420574886536, + "grad_norm": 0.45820730146428945, + "learning_rate": 1.4576274164079183e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1603582799434662, + "step": 2910, + "valid_targets_mean": 4533.9, + "valid_targets_min": 524 + }, + { + "epoch": 4.409984871406959, + "grad_norm": 0.5249917435247164, + "learning_rate": 1.4503693633902128e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20168936252593994, + "step": 2915, + "valid_targets_mean": 4979.8, + "valid_targets_min": 2321 + }, + { + "epoch": 4.417549167927382, + "grad_norm": 0.4815350204188176, + "learning_rate": 1.4431191318487372e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16469869017601013, + "step": 2920, + "valid_targets_mean": 4120.6, + "valid_targets_min": 652 + }, + { + "epoch": 4.425113464447806, + "grad_norm": 0.4797606831091243, + "learning_rate": 1.4358768249573514e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17197684943675995, + "step": 2925, + "valid_targets_mean": 4215.9, + "valid_targets_min": 660 + }, + { + "epoch": 4.43267776096823, + "grad_norm": 0.43573686034480597, + "learning_rate": 1.4286425457771427e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17867511510849, + "step": 2930, + "valid_targets_mean": 5221.4, + "valid_targets_min": 2249 + }, + { + "epoch": 4.440242057488653, + "grad_norm": 0.41672734352822066, + "learning_rate": 1.4214163972549604e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13659437000751495, + "step": 2935, + "valid_targets_mean": 5257.1, + "valid_targets_min": 2090 + }, + { + "epoch": 4.447806354009077, + "grad_norm": 0.48856441504073617, + "learning_rate": 1.4141984822219521e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15636491775512695, + "step": 2940, + "valid_targets_mean": 4395.1, + "valid_targets_min": 600 + }, + { + "epoch": 4.455370650529501, + "grad_norm": 0.4772725352943584, + "learning_rate": 1.4069889033920998e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20085512101650238, + "step": 2945, + "valid_targets_mean": 4495.1, + "valid_targets_min": 607 + }, + { + "epoch": 4.462934947049924, + "grad_norm": 0.42051910317593916, + "learning_rate": 1.3997877633607557e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16399016976356506, + "step": 2950, + "valid_targets_mean": 5878.0, + "valid_targets_min": 869 + }, + { + "epoch": 4.470499243570348, + "grad_norm": 0.42806628012013004, + "learning_rate": 1.3925951646031864e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17599345743656158, + "step": 2955, + "valid_targets_mean": 5046.3, + "valid_targets_min": 900 + }, + { + "epoch": 4.478063540090772, + "grad_norm": 0.3960989399268624, + "learning_rate": 1.3854112094731116e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16925032436847687, + "step": 2960, + "valid_targets_mean": 6420.0, + "valid_targets_min": 1942 + }, + { + "epoch": 4.4856278366111955, + "grad_norm": 0.46152209852293663, + "learning_rate": 1.3782360002012485e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14222872257232666, + "step": 2965, + "valid_targets_mean": 4932.2, + "valid_targets_min": 2050 + }, + { + "epoch": 4.493192133131619, + "grad_norm": 0.4611561798527351, + "learning_rate": 1.3710696388938574e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17129528522491455, + "step": 2970, + "valid_targets_mean": 4561.2, + "valid_targets_min": 493 + }, + { + "epoch": 4.500756429652043, + "grad_norm": 0.5236948649019548, + "learning_rate": 1.3639122275312886e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1523115336894989, + "step": 2975, + "valid_targets_mean": 4292.9, + "valid_targets_min": 732 + }, + { + "epoch": 4.508320726172466, + "grad_norm": 0.49450938589759885, + "learning_rate": 1.3567638679665296e-05, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17702379822731018, + "step": 2980, + "valid_targets_mean": 4186.6, + "valid_targets_min": 575 + }, + { + "epoch": 4.515885022692889, + "grad_norm": 0.43334140448201086, + "learning_rate": 1.3496246619237585e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17083197832107544, + "step": 2985, + "valid_targets_mean": 5292.4, + "valid_targets_min": 1003 + }, + { + "epoch": 4.523449319213313, + "grad_norm": 0.472497861189218, + "learning_rate": 1.3424947109968944e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16844433546066284, + "step": 2990, + "valid_targets_mean": 4980.1, + "valid_targets_min": 738 + }, + { + "epoch": 4.531013615733737, + "grad_norm": 0.49098116229371497, + "learning_rate": 1.3353741166481515e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15978705883026123, + "step": 2995, + "valid_targets_mean": 4352.1, + "valid_targets_min": 1871 + }, + { + "epoch": 4.53857791225416, + "grad_norm": 0.4383059631684414, + "learning_rate": 1.3282629802065974e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16577211022377014, + "step": 3000, + "valid_targets_mean": 5097.1, + "valid_targets_min": 970 + }, + { + "epoch": 4.546142208774584, + "grad_norm": 0.45601726097153666, + "learning_rate": 1.3211614028667077e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2073879837989807, + "step": 3005, + "valid_targets_mean": 4979.4, + "valid_targets_min": 723 + }, + { + "epoch": 4.553706505295008, + "grad_norm": 0.4059819676532039, + "learning_rate": 1.3140694856869297e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14776208996772766, + "step": 3010, + "valid_targets_mean": 5175.5, + "valid_targets_min": 1354 + }, + { + "epoch": 4.561270801815431, + "grad_norm": 0.50274521759109, + "learning_rate": 1.306987329588242e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16442856192588806, + "step": 3015, + "valid_targets_mean": 4776.6, + "valid_targets_min": 913 + }, + { + "epoch": 4.568835098335855, + "grad_norm": 0.4264689299807262, + "learning_rate": 1.2999150353527182e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16325069963932037, + "step": 3020, + "valid_targets_mean": 4675.2, + "valid_targets_min": 167 + }, + { + "epoch": 4.576399394856279, + "grad_norm": 0.4440378341428616, + "learning_rate": 1.2928527036220944e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1781526505947113, + "step": 3025, + "valid_targets_mean": 4669.3, + "valid_targets_min": 881 + }, + { + "epoch": 4.583963691376702, + "grad_norm": 0.44802586160013663, + "learning_rate": 1.285800434896336e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1558399349451065, + "step": 3030, + "valid_targets_mean": 5058.0, + "valid_targets_min": 579 + }, + { + "epoch": 4.591527987897125, + "grad_norm": 0.4382924500174268, + "learning_rate": 1.2787583295322063e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12798433005809784, + "step": 3035, + "valid_targets_mean": 4102.2, + "valid_targets_min": 372 + }, + { + "epoch": 4.599092284417549, + "grad_norm": 0.495690878784159, + "learning_rate": 1.2717264877418409e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16291014850139618, + "step": 3040, + "valid_targets_mean": 4889.7, + "valid_targets_min": 1085 + }, + { + "epoch": 4.6066565809379725, + "grad_norm": 0.4139498162424246, + "learning_rate": 1.2647050095913211e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1578187495470047, + "step": 3045, + "valid_targets_mean": 5494.0, + "valid_targets_min": 1253 + }, + { + "epoch": 4.614220877458396, + "grad_norm": 0.47999890638951775, + "learning_rate": 1.2576939949992468e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1848933845758438, + "step": 3050, + "valid_targets_mean": 4716.2, + "valid_targets_min": 760 + }, + { + "epoch": 4.62178517397882, + "grad_norm": 0.5019757162184942, + "learning_rate": 1.2506935437353192e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19041526317596436, + "step": 3055, + "valid_targets_mean": 4226.5, + "valid_targets_min": 551 + }, + { + "epoch": 4.6293494704992435, + "grad_norm": 0.5289337309331459, + "learning_rate": 1.2437037554189186e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17573757469654083, + "step": 3060, + "valid_targets_mean": 4741.9, + "valid_targets_min": 876 + }, + { + "epoch": 4.636913767019667, + "grad_norm": 0.40854843689777737, + "learning_rate": 1.2367247295176855e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17039918899536133, + "step": 3065, + "valid_targets_mean": 5760.3, + "valid_targets_min": 1998 + }, + { + "epoch": 4.644478063540091, + "grad_norm": 0.4314054991151234, + "learning_rate": 1.2297565653461087e-05, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15315118432044983, + "step": 3070, + "valid_targets_mean": 5566.9, + "valid_targets_min": 2093 + }, + { + "epoch": 4.6520423600605145, + "grad_norm": 0.525526201288757, + "learning_rate": 1.2227993620641083e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17355704307556152, + "step": 3075, + "valid_targets_mean": 3794.9, + "valid_targets_min": 676 + }, + { + "epoch": 4.659606656580938, + "grad_norm": 0.4676478453236289, + "learning_rate": 1.2158532186756275e-05, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1458236575126648, + "step": 3080, + "valid_targets_mean": 4220.9, + "valid_targets_min": 803 + }, + { + "epoch": 4.667170953101362, + "grad_norm": 0.4387137897170397, + "learning_rate": 1.2089182340272227e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16402631998062134, + "step": 3085, + "valid_targets_mean": 5154.2, + "valid_targets_min": 1986 + }, + { + "epoch": 4.6747352496217855, + "grad_norm": 0.5151503192275158, + "learning_rate": 1.201994506806655e-05, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15883511304855347, + "step": 3090, + "valid_targets_mean": 5183.6, + "valid_targets_min": 614 + }, + { + "epoch": 4.682299546142209, + "grad_norm": 0.4919027448221628, + "learning_rate": 1.1950821355414894e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2019597738981247, + "step": 3095, + "valid_targets_mean": 4693.2, + "valid_targets_min": 795 + }, + { + "epoch": 4.689863842662632, + "grad_norm": 0.411381231559992, + "learning_rate": 1.1881812185976902e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15423153340816498, + "step": 3100, + "valid_targets_mean": 5120.1, + "valid_targets_min": 974 + }, + { + "epoch": 4.697428139183056, + "grad_norm": 0.433674376108163, + "learning_rate": 1.1812918541782215e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17236676812171936, + "step": 3105, + "valid_targets_mean": 5125.4, + "valid_targets_min": 456 + }, + { + "epoch": 4.704992435703479, + "grad_norm": 0.5330642664335918, + "learning_rate": 1.1744141403216503e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2719995379447937, + "step": 3110, + "valid_targets_mean": 5730.6, + "valid_targets_min": 894 + }, + { + "epoch": 4.712556732223903, + "grad_norm": 0.589306427456538, + "learning_rate": 1.1675481749007518e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.211774080991745, + "step": 3115, + "valid_targets_mean": 3777.1, + "valid_targets_min": 258 + }, + { + "epoch": 4.720121028744327, + "grad_norm": 0.43617038752664566, + "learning_rate": 1.1606940556211147e-05, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1602756232023239, + "step": 3120, + "valid_targets_mean": 5495.8, + "valid_targets_min": 1985 + }, + { + "epoch": 4.72768532526475, + "grad_norm": 0.43013622893573156, + "learning_rate": 1.1538518800197538e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16299334168434143, + "step": 3125, + "valid_targets_mean": 4878.0, + "valid_targets_min": 664 + }, + { + "epoch": 4.735249621785174, + "grad_norm": 0.4243645747448522, + "learning_rate": 1.1470217454637193e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15592533349990845, + "step": 3130, + "valid_targets_mean": 4838.5, + "valid_targets_min": 830 + }, + { + "epoch": 4.742813918305598, + "grad_norm": 0.42684650496375437, + "learning_rate": 1.1402037491487112e-05, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17367637157440186, + "step": 3135, + "valid_targets_mean": 5900.9, + "valid_targets_min": 898 + }, + { + "epoch": 4.750378214826021, + "grad_norm": 0.4361124117092106, + "learning_rate": 1.1333979880976992e-05, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17264147102832794, + "step": 3140, + "valid_targets_mean": 4803.4, + "valid_targets_min": 918 + }, + { + "epoch": 4.757942511346445, + "grad_norm": 0.45343151422164607, + "learning_rate": 1.1266045591595391e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18170015513896942, + "step": 3145, + "valid_targets_mean": 4986.0, + "valid_targets_min": 1012 + }, + { + "epoch": 4.765506807866869, + "grad_norm": 0.5071698615673526, + "learning_rate": 1.1198235590075951e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17840242385864258, + "step": 3150, + "valid_targets_mean": 4062.6, + "valid_targets_min": 706 + }, + { + "epoch": 4.7730711043872915, + "grad_norm": 0.4581144263893417, + "learning_rate": 1.1130550841383662e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15911459922790527, + "step": 3155, + "valid_targets_mean": 4572.2, + "valid_targets_min": 525 + }, + { + "epoch": 4.780635400907715, + "grad_norm": 0.42922503083348856, + "learning_rate": 1.1062992308701089e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1482914686203003, + "step": 3160, + "valid_targets_mean": 4370.2, + "valid_targets_min": 871 + }, + { + "epoch": 4.788199697428139, + "grad_norm": 0.4259676210192377, + "learning_rate": 1.0995560953414701e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15134750306606293, + "step": 3165, + "valid_targets_mean": 5120.9, + "valid_targets_min": 903 + }, + { + "epoch": 4.795763993948563, + "grad_norm": 0.4931505035733333, + "learning_rate": 1.0928257735101186e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14641442894935608, + "step": 3170, + "valid_targets_mean": 3489.4, + "valid_targets_min": 498 + }, + { + "epoch": 4.803328290468986, + "grad_norm": 0.4657115402405436, + "learning_rate": 1.0861083611513781e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1858498901128769, + "step": 3175, + "valid_targets_mean": 4672.9, + "valid_targets_min": 653 + }, + { + "epoch": 4.81089258698941, + "grad_norm": 0.4963708205352106, + "learning_rate": 1.0794039538568653e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1865379959344864, + "step": 3180, + "valid_targets_mean": 4229.4, + "valid_targets_min": 515 + }, + { + "epoch": 4.818456883509834, + "grad_norm": 0.5019745644106931, + "learning_rate": 1.0727126470331299e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17774225771427155, + "step": 3185, + "valid_targets_mean": 4163.1, + "valid_targets_min": 509 + }, + { + "epoch": 4.826021180030257, + "grad_norm": 0.41638071512531366, + "learning_rate": 1.0660345359002941e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1485077440738678, + "step": 3190, + "valid_targets_mean": 4660.1, + "valid_targets_min": 736 + }, + { + "epoch": 4.833585476550681, + "grad_norm": 0.44572452901778975, + "learning_rate": 1.0593697154907027e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16914719343185425, + "step": 3195, + "valid_targets_mean": 5350.4, + "valid_targets_min": 744 + }, + { + "epoch": 4.841149773071105, + "grad_norm": 0.6533490446460197, + "learning_rate": 1.0527182806475662e-05, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1586613953113556, + "step": 3200, + "valid_targets_mean": 4271.1, + "valid_targets_min": 597 + }, + { + "epoch": 4.848714069591528, + "grad_norm": 0.45373202080563063, + "learning_rate": 1.0460803260236134e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1581738293170929, + "step": 3205, + "valid_targets_mean": 5267.1, + "valid_targets_min": 649 + }, + { + "epoch": 4.856278366111952, + "grad_norm": 0.4179137546333998, + "learning_rate": 1.0394559460797446e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17204821109771729, + "step": 3210, + "valid_targets_mean": 5295.6, + "valid_targets_min": 543 + }, + { + "epoch": 4.863842662632376, + "grad_norm": 0.4122805539335566, + "learning_rate": 1.0328452350836842e-05, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18401654064655304, + "step": 3215, + "valid_targets_mean": 5601.4, + "valid_targets_min": 742 + }, + { + "epoch": 4.871406959152798, + "grad_norm": 0.40302438542126645, + "learning_rate": 1.0262482871086443e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1483723670244217, + "step": 3220, + "valid_targets_mean": 6198.2, + "valid_targets_min": 848 + }, + { + "epoch": 4.878971255673222, + "grad_norm": 0.45468069712107095, + "learning_rate": 1.019665196031982e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17304138839244843, + "step": 3225, + "valid_targets_mean": 4215.2, + "valid_targets_min": 871 + }, + { + "epoch": 4.886535552193646, + "grad_norm": 0.44734310925531656, + "learning_rate": 1.013096055533866e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16383050382137299, + "step": 3230, + "valid_targets_mean": 4358.6, + "valid_targets_min": 750 + }, + { + "epoch": 4.8940998487140694, + "grad_norm": 0.4425864585330284, + "learning_rate": 1.006540959095941e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17266994714736938, + "step": 3235, + "valid_targets_mean": 4547.7, + "valid_targets_min": 537 + }, + { + "epoch": 4.901664145234493, + "grad_norm": 0.42155951333967423, + "learning_rate": 1.0000000000000006e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1378743052482605, + "step": 3240, + "valid_targets_mean": 4115.5, + "valid_targets_min": 263 + }, + { + "epoch": 4.909228441754917, + "grad_norm": 0.4299630738249049, + "learning_rate": 9.93473271326655e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1550338864326477, + "step": 3245, + "valid_targets_mean": 4854.7, + "valid_targets_min": 2796 + }, + { + "epoch": 4.9167927382753405, + "grad_norm": 0.44329248279038397, + "learning_rate": 9.869608659540129e-06, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17476975917816162, + "step": 3250, + "valid_targets_mean": 5090.2, + "valid_targets_min": 1022 + }, + { + "epoch": 4.924357034795764, + "grad_norm": 0.428271116473207, + "learning_rate": 9.804628765563542e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15965984761714935, + "step": 3255, + "valid_targets_mean": 4787.6, + "valid_targets_min": 804 + }, + { + "epoch": 4.931921331316188, + "grad_norm": 0.45332178784102023, + "learning_rate": 9.739793956028143e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14514422416687012, + "step": 3260, + "valid_targets_mean": 5900.1, + "valid_targets_min": 435 + }, + { + "epoch": 4.9394856278366115, + "grad_norm": 0.4444157485949258, + "learning_rate": 9.675105153560668e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440054178237915, + "step": 3265, + "valid_targets_mean": 4733.8, + "valid_targets_min": 584 + }, + { + "epoch": 4.947049924357035, + "grad_norm": 0.4348481496365988, + "learning_rate": 9.610563278710128e-06, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.180255725979805, + "step": 3270, + "valid_targets_mean": 5171.2, + "valid_targets_min": 762 + }, + { + "epoch": 4.954614220877458, + "grad_norm": 0.45864775112962614, + "learning_rate": 9.546169249934654e-06, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17231298983097076, + "step": 3275, + "valid_targets_mean": 4814.8, + "valid_targets_min": 333 + }, + { + "epoch": 4.962178517397882, + "grad_norm": 0.5156923688354568, + "learning_rate": 9.481923983588508e-06, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18253746628761292, + "step": 3280, + "valid_targets_mean": 3727.0, + "valid_targets_min": 447 + }, + { + "epoch": 4.969742813918305, + "grad_norm": 0.4101590443569368, + "learning_rate": 9.417828393908955e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12951171398162842, + "step": 3285, + "valid_targets_mean": 4951.4, + "valid_targets_min": 416 + }, + { + "epoch": 4.977307110438729, + "grad_norm": 0.42241612118878036, + "learning_rate": 9.353883393003347e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17528489232063293, + "step": 3290, + "valid_targets_mean": 5294.8, + "valid_targets_min": 578 + }, + { + "epoch": 4.984871406959153, + "grad_norm": 0.3957796359048021, + "learning_rate": 9.290089890836068e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16144408285617828, + "step": 3295, + "valid_targets_mean": 5681.1, + "valid_targets_min": 947 + }, + { + "epoch": 4.992435703479576, + "grad_norm": 0.4371247368929281, + "learning_rate": 9.226448795215598e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1603410542011261, + "step": 3300, + "valid_targets_mean": 4666.8, + "valid_targets_min": 718 + }, + { + "epoch": 5.0, + "grad_norm": 0.44364123498321484, + "learning_rate": 9.162961011781632e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18622517585754395, + "step": 3305, + "valid_targets_mean": 4746.6, + "valid_targets_min": 1724 + }, + { + "epoch": 5.007564296520424, + "grad_norm": 0.48356773402835035, + "learning_rate": 9.099627443992163e-06, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1390305906534195, + "step": 3310, + "valid_targets_mean": 4232.3, + "valid_targets_min": 636 + }, + { + "epoch": 5.015128593040847, + "grad_norm": 0.40848481053739466, + "learning_rate": 9.036448993110603e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1643286496400833, + "step": 3315, + "valid_targets_mean": 5712.9, + "valid_targets_min": 1201 + }, + { + "epoch": 5.022692889561271, + "grad_norm": 0.4515946515252321, + "learning_rate": 8.97342655819303e-06, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15688204765319824, + "step": 3320, + "valid_targets_mean": 4522.5, + "valid_targets_min": 737 + }, + { + "epoch": 5.030257186081695, + "grad_norm": 0.4329236745402465, + "learning_rate": 8.910561036075325e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.152809739112854, + "step": 3325, + "valid_targets_mean": 4936.4, + "valid_targets_min": 263 + }, + { + "epoch": 5.037821482602118, + "grad_norm": 0.4478790148739838, + "learning_rate": 8.847853321360423e-06, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13745734095573425, + "step": 3330, + "valid_targets_mean": 4847.0, + "valid_targets_min": 2085 + }, + { + "epoch": 5.045385779122542, + "grad_norm": 0.46929256146302734, + "learning_rate": 8.785304306405605e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14126646518707275, + "step": 3335, + "valid_targets_mean": 4518.4, + "valid_targets_min": 919 + }, + { + "epoch": 5.052950075642965, + "grad_norm": 0.45325256164746236, + "learning_rate": 8.722914881309801e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1484193503856659, + "step": 3340, + "valid_targets_mean": 4938.5, + "valid_targets_min": 541 + }, + { + "epoch": 5.0605143721633885, + "grad_norm": 0.488278580977796, + "learning_rate": 8.660685933900869e-06, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16470378637313843, + "step": 3345, + "valid_targets_mean": 4565.0, + "valid_targets_min": 850 + }, + { + "epoch": 5.068078668683812, + "grad_norm": 0.5386047975887774, + "learning_rate": 8.59861834972306e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17436347901821136, + "step": 3350, + "valid_targets_mean": 3495.9, + "valid_targets_min": 457 + }, + { + "epoch": 5.075642965204236, + "grad_norm": 0.505045707433957, + "learning_rate": 8.536713012024305e-06, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1657446324825287, + "step": 3355, + "valid_targets_mean": 4460.2, + "valid_targets_min": 639 + }, + { + "epoch": 5.0832072617246595, + "grad_norm": 0.4834428008879641, + "learning_rate": 8.474970801743724e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14867718517780304, + "step": 3360, + "valid_targets_mean": 5020.8, + "valid_targets_min": 726 + }, + { + "epoch": 5.090771558245083, + "grad_norm": 0.601215561668829, + "learning_rate": 8.413392597499075e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14667610824108124, + "step": 3365, + "valid_targets_mean": 3490.2, + "valid_targets_min": 442 + }, + { + "epoch": 5.098335854765507, + "grad_norm": 0.4279890944625253, + "learning_rate": 8.351979275574207e-06, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1501058042049408, + "step": 3370, + "valid_targets_mean": 5297.9, + "valid_targets_min": 873 + }, + { + "epoch": 5.1059001512859306, + "grad_norm": 0.44340326773424427, + "learning_rate": 8.290731709906643e-06, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16302546858787537, + "step": 3375, + "valid_targets_mean": 4976.1, + "valid_targets_min": 1877 + }, + { + "epoch": 5.113464447806354, + "grad_norm": 0.5128759287926937, + "learning_rate": 8.229650772075153e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15705181658267975, + "step": 3380, + "valid_targets_mean": 3650.5, + "valid_targets_min": 506 + }, + { + "epoch": 5.121028744326778, + "grad_norm": 0.4688934592390907, + "learning_rate": 8.168737331287269e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15802708268165588, + "step": 3385, + "valid_targets_mean": 4826.7, + "valid_targets_min": 841 + }, + { + "epoch": 5.128593040847202, + "grad_norm": 0.5338964793918589, + "learning_rate": 8.107992254367003e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17902442812919617, + "step": 3390, + "valid_targets_mean": 3932.9, + "valid_targets_min": 650 + }, + { + "epoch": 5.136157337367624, + "grad_norm": 0.4961972344616427, + "learning_rate": 8.047416405742479e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15935659408569336, + "step": 3395, + "valid_targets_mean": 4673.6, + "valid_targets_min": 672 + }, + { + "epoch": 5.143721633888048, + "grad_norm": 0.41951146240242676, + "learning_rate": 7.987010647433606e-06, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18392476439476013, + "step": 3400, + "valid_targets_mean": 6500.2, + "valid_targets_min": 665 + }, + { + "epoch": 5.151285930408472, + "grad_norm": 0.5308721732777357, + "learning_rate": 7.926775839039851e-06, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1562509834766388, + "step": 3405, + "valid_targets_mean": 3936.5, + "valid_targets_min": 593 + }, + { + "epoch": 5.158850226928895, + "grad_norm": 0.4254042910385951, + "learning_rate": 7.866712837728016e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1462964117527008, + "step": 3410, + "valid_targets_mean": 5346.2, + "valid_targets_min": 944 + }, + { + "epoch": 5.166414523449319, + "grad_norm": 0.5552776613133977, + "learning_rate": 7.80682249821997e-06, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11865056306123734, + "step": 3415, + "valid_targets_mean": 4060.9, + "valid_targets_min": 660 + }, + { + "epoch": 5.173978819969743, + "grad_norm": 0.5104198651404502, + "learning_rate": 7.747105672780561e-06, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15125545859336853, + "step": 3420, + "valid_targets_mean": 4615.4, + "valid_targets_min": 628 + }, + { + "epoch": 5.181543116490166, + "grad_norm": 0.472114678915561, + "learning_rate": 7.68756321120546e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16353891789913177, + "step": 3425, + "valid_targets_mean": 4357.2, + "valid_targets_min": 723 + }, + { + "epoch": 5.18910741301059, + "grad_norm": 0.7788327189272692, + "learning_rate": 7.628195960809039e-06, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1500215232372284, + "step": 3430, + "valid_targets_mean": 4284.7, + "valid_targets_min": 754 + }, + { + "epoch": 5.196671709531014, + "grad_norm": 0.43343966922095256, + "learning_rate": 7.569004766412369e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14865446090698242, + "step": 3435, + "valid_targets_mean": 4969.2, + "valid_targets_min": 586 + }, + { + "epoch": 5.204236006051437, + "grad_norm": 0.47577740785963674, + "learning_rate": 7.509990470331159e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1581379622220993, + "step": 3440, + "valid_targets_mean": 4133.3, + "valid_targets_min": 712 + }, + { + "epoch": 5.211800302571861, + "grad_norm": 0.3926350192189327, + "learning_rate": 7.451153912363784e-06, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13046491146087646, + "step": 3445, + "valid_targets_mean": 6214.4, + "valid_targets_min": 2307 + }, + { + "epoch": 5.219364599092285, + "grad_norm": 0.4176746702190328, + "learning_rate": 7.392495929779333e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1757950782775879, + "step": 3450, + "valid_targets_mean": 5443.2, + "valid_targets_min": 546 + }, + { + "epoch": 5.2269288956127085, + "grad_norm": 0.4515587545805908, + "learning_rate": 7.334017357305674e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16047310829162598, + "step": 3455, + "valid_targets_mean": 4970.7, + "valid_targets_min": 550 + }, + { + "epoch": 5.234493192133131, + "grad_norm": 0.45597339531041936, + "learning_rate": 7.2757190271176115e-06, + "loss": 0.1458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14856447279453278, + "step": 3460, + "valid_targets_mean": 4530.4, + "valid_targets_min": 710 + }, + { + "epoch": 5.242057488653555, + "grad_norm": 0.5008734692788059, + "learning_rate": 7.217601768825023e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14690902829170227, + "step": 3465, + "valid_targets_mean": 5124.4, + "valid_targets_min": 583 + }, + { + "epoch": 5.249621785173979, + "grad_norm": 0.4989691903624996, + "learning_rate": 7.15966640946105e-06, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14532163739204407, + "step": 3470, + "valid_targets_mean": 4449.6, + "valid_targets_min": 695 + }, + { + "epoch": 5.257186081694402, + "grad_norm": 0.4129795536595691, + "learning_rate": 7.101913773470346e-06, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14961427450180054, + "step": 3475, + "valid_targets_mean": 5730.8, + "valid_targets_min": 3290 + }, + { + "epoch": 5.264750378214826, + "grad_norm": 0.4834264222381325, + "learning_rate": 7.044344682697326e-06, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14742320775985718, + "step": 3480, + "valid_targets_mean": 4838.9, + "valid_targets_min": 515 + }, + { + "epoch": 5.27231467473525, + "grad_norm": 0.5026805612919963, + "learning_rate": 6.986959956374473e-06, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17401841282844543, + "step": 3485, + "valid_targets_mean": 4044.6, + "valid_targets_min": 781 + }, + { + "epoch": 5.279878971255673, + "grad_norm": 0.4735788451733253, + "learning_rate": 6.929760411110698e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1525282859802246, + "step": 3490, + "valid_targets_mean": 4527.6, + "valid_targets_min": 812 + }, + { + "epoch": 5.287443267776097, + "grad_norm": 0.4391485486379499, + "learning_rate": 6.872746860879702e-06, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1371186375617981, + "step": 3495, + "valid_targets_mean": 4986.9, + "valid_targets_min": 80 + }, + { + "epoch": 5.295007564296521, + "grad_norm": 0.4167604767357861, + "learning_rate": 6.815920117008399e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15365831553936005, + "step": 3500, + "valid_targets_mean": 5418.2, + "valid_targets_min": 617 + }, + { + "epoch": 5.302571860816944, + "grad_norm": 0.42828839637040433, + "learning_rate": 6.759280988165373e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13771694898605347, + "step": 3505, + "valid_targets_mean": 5017.8, + "valid_targets_min": 1328 + }, + { + "epoch": 5.310136157337368, + "grad_norm": 0.4506403163352829, + "learning_rate": 6.702830280349353e-06, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15530502796173096, + "step": 3510, + "valid_targets_mean": 4390.8, + "valid_targets_min": 1004 + }, + { + "epoch": 5.317700453857791, + "grad_norm": 0.4340685132852124, + "learning_rate": 6.6465687968777725e-06, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.146540105342865, + "step": 3515, + "valid_targets_mean": 4825.2, + "valid_targets_min": 642 + }, + { + "epoch": 5.3252647503782145, + "grad_norm": 0.46671624987800375, + "learning_rate": 6.590497338375317e-06, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1502562165260315, + "step": 3520, + "valid_targets_mean": 4549.4, + "valid_targets_min": 540 + }, + { + "epoch": 5.332829046898638, + "grad_norm": 0.5101933773328257, + "learning_rate": 6.534616702762537e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17042289674282074, + "step": 3525, + "valid_targets_mean": 4879.7, + "valid_targets_min": 795 + }, + { + "epoch": 5.340393343419062, + "grad_norm": 0.5293012627631964, + "learning_rate": 6.478927685244494e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18156251311302185, + "step": 3530, + "valid_targets_mean": 3960.2, + "valid_targets_min": 719 + }, + { + "epoch": 5.3479576399394855, + "grad_norm": 0.47166025682277357, + "learning_rate": 6.423431078299443e-06, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17513303458690643, + "step": 3535, + "valid_targets_mean": 4714.2, + "valid_targets_min": 692 + }, + { + "epoch": 5.355521936459909, + "grad_norm": 0.4337643659228156, + "learning_rate": 6.3681276716675435e-06, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18963715434074402, + "step": 3540, + "valid_targets_mean": 5398.2, + "valid_targets_min": 740 + }, + { + "epoch": 5.363086232980333, + "grad_norm": 0.44696298609992435, + "learning_rate": 6.3130182523396484e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17503423988819122, + "step": 3545, + "valid_targets_mean": 4828.9, + "valid_targets_min": 2715 + }, + { + "epoch": 5.3706505295007565, + "grad_norm": 0.44174975125123683, + "learning_rate": 6.258103604546087e-06, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1407729834318161, + "step": 3550, + "valid_targets_mean": 4479.8, + "valid_targets_min": 2428 + }, + { + "epoch": 5.37821482602118, + "grad_norm": 0.44916605681525745, + "learning_rate": 6.2033845097454985e-06, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14504870772361755, + "step": 3555, + "valid_targets_mean": 4913.3, + "valid_targets_min": 940 + }, + { + "epoch": 5.385779122541604, + "grad_norm": 0.4400921867214306, + "learning_rate": 6.14886174661373e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15662983059883118, + "step": 3560, + "valid_targets_mean": 4724.1, + "valid_targets_min": 620 + }, + { + "epoch": 5.3933434190620275, + "grad_norm": 0.47087556709614853, + "learning_rate": 6.0945360910327476e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1466415971517563, + "step": 3565, + "valid_targets_mean": 4426.4, + "valid_targets_min": 1086 + }, + { + "epoch": 5.400907715582451, + "grad_norm": 0.5433230621680141, + "learning_rate": 6.040408316079575e-06, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13208290934562683, + "step": 3570, + "valid_targets_mean": 2785.8, + "valid_targets_min": 167 + }, + { + "epoch": 5.408472012102875, + "grad_norm": 0.46344685976956507, + "learning_rate": 5.986479192015337e-06, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2303861677646637, + "step": 3575, + "valid_targets_mean": 5746.5, + "valid_targets_min": 932 + }, + { + "epoch": 5.416036308623298, + "grad_norm": 0.4310949358505575, + "learning_rate": 5.932749486274239e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14768637716770172, + "step": 3580, + "valid_targets_mean": 5434.6, + "valid_targets_min": 666 + }, + { + "epoch": 5.423600605143721, + "grad_norm": 0.4677000573971819, + "learning_rate": 5.8792199634527205e-06, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16156046092510223, + "step": 3585, + "valid_targets_mean": 5433.3, + "valid_targets_min": 756 + }, + { + "epoch": 5.431164901664145, + "grad_norm": 0.49393458847349964, + "learning_rate": 5.82589138529851e-06, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1611177623271942, + "step": 3590, + "valid_targets_mean": 4461.6, + "valid_targets_min": 595 + }, + { + "epoch": 5.438729198184569, + "grad_norm": 0.5687948770753518, + "learning_rate": 5.7727645106998e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15704114735126495, + "step": 3595, + "valid_targets_mean": 4090.4, + "valid_targets_min": 286 + }, + { + "epoch": 5.446293494704992, + "grad_norm": 0.5444741623582646, + "learning_rate": 5.719840095674476e-06, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1610284447669983, + "step": 3600, + "valid_targets_mean": 3557.4, + "valid_targets_min": 560 + }, + { + "epoch": 5.453857791225416, + "grad_norm": 0.43128286547039024, + "learning_rate": 5.667118893359331e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1486874222755432, + "step": 3605, + "valid_targets_mean": 4866.7, + "valid_targets_min": 1302 + }, + { + "epoch": 5.46142208774584, + "grad_norm": 0.4595711511111565, + "learning_rate": 5.614601653999338e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16856805980205536, + "step": 3610, + "valid_targets_mean": 4922.6, + "valid_targets_min": 557 + }, + { + "epoch": 5.468986384266263, + "grad_norm": 0.46656320107157945, + "learning_rate": 5.5622891249370234e-06, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20066699385643005, + "step": 3615, + "valid_targets_mean": 5136.1, + "valid_targets_min": 688 + }, + { + "epoch": 5.476550680786687, + "grad_norm": 0.4411187508138855, + "learning_rate": 5.5101820506017865e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1552569568157196, + "step": 3620, + "valid_targets_mean": 5095.8, + "valid_targets_min": 635 + }, + { + "epoch": 5.484114977307111, + "grad_norm": 0.4862124542907482, + "learning_rate": 5.458281172499298e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1585727334022522, + "step": 3625, + "valid_targets_mean": 3937.2, + "valid_targets_min": 496 + }, + { + "epoch": 5.491679273827534, + "grad_norm": 0.4292729342401527, + "learning_rate": 5.406587229200997e-06, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12494321167469025, + "step": 3630, + "valid_targets_mean": 4550.4, + "valid_targets_min": 1536 + }, + { + "epoch": 5.499243570347957, + "grad_norm": 0.4047902663925935, + "learning_rate": 5.355100956333546e-06, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14619508385658264, + "step": 3635, + "valid_targets_mean": 5542.1, + "valid_targets_min": 570 + }, + { + "epoch": 5.506807866868381, + "grad_norm": 0.5096706493017572, + "learning_rate": 5.303823086568347e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15569140017032623, + "step": 3640, + "valid_targets_mean": 3900.1, + "valid_targets_min": 856 + }, + { + "epoch": 5.5143721633888045, + "grad_norm": 0.4255353489871283, + "learning_rate": 5.252754349611182e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14665234088897705, + "step": 3645, + "valid_targets_mean": 5351.4, + "valid_targets_min": 1002 + }, + { + "epoch": 5.521936459909228, + "grad_norm": 0.452780407445397, + "learning_rate": 5.201895472191743e-06, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1493845283985138, + "step": 3650, + "valid_targets_mean": 5155.4, + "valid_targets_min": 1700 + }, + { + "epoch": 5.529500756429652, + "grad_norm": 0.43780341220663965, + "learning_rate": 5.151247178053349e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15060928463935852, + "step": 3655, + "valid_targets_mean": 5135.1, + "valid_targets_min": 1275 + }, + { + "epoch": 5.537065052950076, + "grad_norm": 0.5052751593749545, + "learning_rate": 5.100810187942639e-06, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1450476050376892, + "step": 3660, + "valid_targets_mean": 5569.8, + "valid_targets_min": 2214 + }, + { + "epoch": 5.544629349470499, + "grad_norm": 0.47706320379830186, + "learning_rate": 5.050585219599289e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428034007549286, + "step": 3665, + "valid_targets_mean": 4428.3, + "valid_targets_min": 345 + }, + { + "epoch": 5.552193645990923, + "grad_norm": 0.4868390792247118, + "learning_rate": 5.0005729877458155e-06, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447855830192566, + "step": 3670, + "valid_targets_mean": 4278.9, + "valid_targets_min": 579 + }, + { + "epoch": 5.559757942511347, + "grad_norm": 0.4567698222657148, + "learning_rate": 4.950774204077433e-06, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161106675863266, + "step": 3675, + "valid_targets_mean": 4954.6, + "valid_targets_min": 579 + }, + { + "epoch": 5.56732223903177, + "grad_norm": 0.42355294049312686, + "learning_rate": 4.901189577251864e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1554052084684372, + "step": 3680, + "valid_targets_mean": 5065.8, + "valid_targets_min": 1009 + }, + { + "epoch": 5.574886535552194, + "grad_norm": 0.4156925770441878, + "learning_rate": 4.851819812879303e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1524563580751419, + "step": 3685, + "valid_targets_mean": 5432.8, + "valid_targets_min": 1562 + }, + { + "epoch": 5.582450832072618, + "grad_norm": 0.4746990695302771, + "learning_rate": 4.80266561351237e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16682183742523193, + "step": 3690, + "valid_targets_mean": 4135.4, + "valid_targets_min": 736 + }, + { + "epoch": 5.590015128593041, + "grad_norm": 0.5340419322270344, + "learning_rate": 4.753727678636082e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16570830345153809, + "step": 3695, + "valid_targets_mean": 4032.8, + "valid_targets_min": 802 + }, + { + "epoch": 5.597579425113464, + "grad_norm": 0.46287738612206497, + "learning_rate": 4.7050067046579324e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15962907671928406, + "step": 3700, + "valid_targets_mean": 4799.6, + "valid_targets_min": 855 + }, + { + "epoch": 5.605143721633888, + "grad_norm": 0.4388012169199975, + "learning_rate": 4.656503384897988e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13840925693511963, + "step": 3705, + "valid_targets_mean": 4313.9, + "valid_targets_min": 733 + }, + { + "epoch": 5.612708018154311, + "grad_norm": 0.478903080739202, + "learning_rate": 4.6082184095789686e-06, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15945684909820557, + "step": 3710, + "valid_targets_mean": 4372.9, + "valid_targets_min": 892 + }, + { + "epoch": 5.620272314674735, + "grad_norm": 0.43574711337202326, + "learning_rate": 4.56015246581649e-06, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18066449463367462, + "step": 3715, + "valid_targets_mean": 5704.6, + "valid_targets_min": 716 + }, + { + "epoch": 5.627836611195159, + "grad_norm": 0.4685680736943704, + "learning_rate": 4.512306237609232e-06, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15282121300697327, + "step": 3720, + "valid_targets_mean": 5270.2, + "valid_targets_min": 569 + }, + { + "epoch": 5.635400907715582, + "grad_norm": 0.4897093192549938, + "learning_rate": 4.464680405829249e-06, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14707836508750916, + "step": 3725, + "valid_targets_mean": 4000.9, + "valid_targets_min": 473 + }, + { + "epoch": 5.642965204236006, + "grad_norm": 0.4848515119309501, + "learning_rate": 4.4172756482122535e-06, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1577150821685791, + "step": 3730, + "valid_targets_mean": 4438.2, + "valid_targets_min": 1148 + }, + { + "epoch": 5.65052950075643, + "grad_norm": 0.47760265587876977, + "learning_rate": 4.370092639347978e-06, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17945009469985962, + "step": 3735, + "valid_targets_mean": 4574.9, + "valid_targets_min": 504 + }, + { + "epoch": 5.6580937972768535, + "grad_norm": 0.4826246472719842, + "learning_rate": 4.3231320506705775e-06, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16503837704658508, + "step": 3740, + "valid_targets_mean": 4244.0, + "valid_targets_min": 762 + }, + { + "epoch": 5.665658093797277, + "grad_norm": 0.4948486143484778, + "learning_rate": 4.2763945504490835e-06, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1612137407064438, + "step": 3745, + "valid_targets_mean": 3841.2, + "valid_targets_min": 898 + }, + { + "epoch": 5.673222390317701, + "grad_norm": 0.5418667804393014, + "learning_rate": 4.229880803777859e-06, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1569966971874237, + "step": 3750, + "valid_targets_mean": 5043.6, + "valid_targets_min": 789 + }, + { + "epoch": 5.680786686838124, + "grad_norm": 0.44525023164036787, + "learning_rate": 4.183591472567186e-06, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15684780478477478, + "step": 3755, + "valid_targets_mean": 5266.2, + "valid_targets_min": 2689 + }, + { + "epoch": 5.688350983358547, + "grad_norm": 0.42073209891417795, + "learning_rate": 4.137527215533805e-06, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16824714839458466, + "step": 3760, + "valid_targets_mean": 6439.3, + "valid_targets_min": 607 + }, + { + "epoch": 5.695915279878971, + "grad_norm": 0.4862094128743182, + "learning_rate": 4.091688688191564e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15628470480442047, + "step": 3765, + "valid_targets_mean": 4402.2, + "valid_targets_min": 800 + }, + { + "epoch": 5.703479576399395, + "grad_norm": 0.537061705110618, + "learning_rate": 4.046076542842077e-06, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1753963828086853, + "step": 3770, + "valid_targets_mean": 3481.6, + "valid_targets_min": 730 + }, + { + "epoch": 5.711043872919818, + "grad_norm": 0.49689760167531866, + "learning_rate": 4.000691428565453e-06, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18343189358711243, + "step": 3775, + "valid_targets_mean": 4551.9, + "valid_targets_min": 732 + }, + { + "epoch": 5.718608169440242, + "grad_norm": 0.48330032612845897, + "learning_rate": 3.9555339912110355e-06, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1796744465827942, + "step": 3780, + "valid_targets_mean": 4892.2, + "valid_targets_min": 760 + }, + { + "epoch": 5.726172465960666, + "grad_norm": 0.5276122434209084, + "learning_rate": 3.910604873388248e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17993156611919403, + "step": 3785, + "valid_targets_mean": 3504.7, + "valid_targets_min": 574 + }, + { + "epoch": 5.733736762481089, + "grad_norm": 0.4887711580738052, + "learning_rate": 3.8659047144574245e-06, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1559087038040161, + "step": 3790, + "valid_targets_mean": 4371.9, + "valid_targets_min": 638 + }, + { + "epoch": 5.741301059001513, + "grad_norm": 1.3298249041601897, + "learning_rate": 3.821434150520715e-06, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17897151410579681, + "step": 3795, + "valid_targets_mean": 4587.8, + "valid_targets_min": 756 + }, + { + "epoch": 5.748865355521937, + "grad_norm": 0.43958731365820686, + "learning_rate": 3.777193814413045e-06, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17036233842372894, + "step": 3800, + "valid_targets_mean": 5104.9, + "valid_targets_min": 486 + }, + { + "epoch": 5.75642965204236, + "grad_norm": 0.4549903255374407, + "learning_rate": 3.7331843356930806e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1566658914089203, + "step": 3805, + "valid_targets_mean": 4717.1, + "valid_targets_min": 818 + }, + { + "epoch": 5.763993948562784, + "grad_norm": 0.4541303493990637, + "learning_rate": 3.6894063406343094e-06, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15038271248340607, + "step": 3810, + "valid_targets_mean": 4360.2, + "valid_targets_min": 1600 + }, + { + "epoch": 5.771558245083208, + "grad_norm": 0.4563736572350106, + "learning_rate": 3.645860452216099e-06, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1912127137184143, + "step": 3815, + "valid_targets_mean": 5395.8, + "valid_targets_min": 828 + }, + { + "epoch": 5.7791225416036305, + "grad_norm": 0.4551743372988032, + "learning_rate": 3.6025472901148463e-06, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14934727549552917, + "step": 3820, + "valid_targets_mean": 4297.6, + "valid_targets_min": 723 + }, + { + "epoch": 5.786686838124054, + "grad_norm": 0.5419707059621975, + "learning_rate": 3.5594674706951505e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17843681573867798, + "step": 3825, + "valid_targets_mean": 3665.1, + "valid_targets_min": 869 + }, + { + "epoch": 5.794251134644478, + "grad_norm": 0.41228701610809887, + "learning_rate": 3.5166216070010538e-06, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16986562311649323, + "step": 3830, + "valid_targets_mean": 5923.2, + "valid_targets_min": 727 + }, + { + "epoch": 5.8018154311649015, + "grad_norm": 0.5633480031927796, + "learning_rate": 3.474010308747291e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1795251965522766, + "step": 3835, + "valid_targets_mean": 3345.4, + "valid_targets_min": 488 + }, + { + "epoch": 5.809379727685325, + "grad_norm": 0.4548488396337465, + "learning_rate": 3.431634182310648e-06, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15337982773780823, + "step": 3840, + "valid_targets_mean": 5548.0, + "valid_targets_min": 603 + }, + { + "epoch": 5.816944024205749, + "grad_norm": 0.5594216753768365, + "learning_rate": 3.3894938307213152e-06, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16239002346992493, + "step": 3845, + "valid_targets_mean": 3887.2, + "valid_targets_min": 625 + }, + { + "epoch": 5.8245083207261725, + "grad_norm": 0.48899700932766244, + "learning_rate": 3.3475898536543027e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14301034808158875, + "step": 3850, + "valid_targets_mean": 3694.9, + "valid_targets_min": 783 + }, + { + "epoch": 5.832072617246596, + "grad_norm": 0.5375659757559488, + "learning_rate": 3.305922847420917e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1484188735485077, + "step": 3855, + "valid_targets_mean": 4786.7, + "valid_targets_min": 980 + }, + { + "epoch": 5.83963691376702, + "grad_norm": 0.4496456556488915, + "learning_rate": 3.2644934049602563e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.143708735704422, + "step": 3860, + "valid_targets_mean": 4360.6, + "valid_targets_min": 451 + }, + { + "epoch": 5.8472012102874436, + "grad_norm": 0.5024320220862686, + "learning_rate": 3.2233021158307977e-06, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16948488354682922, + "step": 3865, + "valid_targets_mean": 3960.7, + "valid_targets_min": 653 + }, + { + "epoch": 5.854765506807867, + "grad_norm": 0.4794661098479003, + "learning_rate": 3.1823495662019945e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18447071313858032, + "step": 3870, + "valid_targets_mean": 4347.0, + "valid_targets_min": 869 + }, + { + "epoch": 5.86232980332829, + "grad_norm": 0.5335315955728364, + "learning_rate": 3.1416363388459327e-06, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16665160655975342, + "step": 3875, + "valid_targets_mean": 3852.0, + "valid_targets_min": 442 + }, + { + "epoch": 5.869894099848714, + "grad_norm": 0.4521764880267215, + "learning_rate": 3.101163013129045e-06, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1629321575164795, + "step": 3880, + "valid_targets_mean": 5032.3, + "valid_targets_min": 1726 + }, + { + "epoch": 5.877458396369137, + "grad_norm": 0.44215316112526387, + "learning_rate": 3.0609301650038636e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16144798696041107, + "step": 3885, + "valid_targets_mean": 4983.0, + "valid_targets_min": 1035 + }, + { + "epoch": 5.885022692889561, + "grad_norm": 0.45192189450308085, + "learning_rate": 3.02093836700081e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1504105031490326, + "step": 3890, + "valid_targets_mean": 4960.3, + "valid_targets_min": 660 + }, + { + "epoch": 5.892586989409985, + "grad_norm": 0.48426768810953463, + "learning_rate": 2.9811881882200743e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13621945679187775, + "step": 3895, + "valid_targets_mean": 5686.2, + "valid_targets_min": 975 + }, + { + "epoch": 5.900151285930408, + "grad_norm": 0.42046160161979335, + "learning_rate": 2.9416801943234998e-06, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1531427800655365, + "step": 3900, + "valid_targets_mean": 4956.7, + "valid_targets_min": 623 + }, + { + "epoch": 5.907715582450832, + "grad_norm": 0.39285175944912043, + "learning_rate": 2.9024149475265373e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13850067555904388, + "step": 3905, + "valid_targets_mean": 5538.1, + "valid_targets_min": 562 + }, + { + "epoch": 5.915279878971256, + "grad_norm": 0.4675547807289267, + "learning_rate": 2.863393006590238e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1579177975654602, + "step": 3910, + "valid_targets_mean": 4398.9, + "valid_targets_min": 695 + }, + { + "epoch": 5.922844175491679, + "grad_norm": 0.5044920538844487, + "learning_rate": 2.8246149268133204e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13913246989250183, + "step": 3915, + "valid_targets_mean": 4010.9, + "valid_targets_min": 992 + }, + { + "epoch": 5.930408472012103, + "grad_norm": 0.4734152534913723, + "learning_rate": 2.786081260024236e-06, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16348206996917725, + "step": 3920, + "valid_targets_mean": 4878.4, + "valid_targets_min": 765 + }, + { + "epoch": 5.937972768532527, + "grad_norm": 0.4212958889997933, + "learning_rate": 2.747792554573352e-06, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16852790117263794, + "step": 3925, + "valid_targets_mean": 5669.1, + "valid_targets_min": 907 + }, + { + "epoch": 5.94553706505295, + "grad_norm": 0.45812842620540806, + "learning_rate": 2.7097493553251307e-06, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20022347569465637, + "step": 3930, + "valid_targets_mean": 5328.2, + "valid_targets_min": 585 + }, + { + "epoch": 5.953101361573374, + "grad_norm": 0.6055937435419297, + "learning_rate": 2.6719522036503654e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15267616510391235, + "step": 3935, + "valid_targets_mean": 4613.9, + "valid_targets_min": 512 + }, + { + "epoch": 5.960665658093797, + "grad_norm": 0.4411679407281343, + "learning_rate": 2.634401637418511e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13407060503959656, + "step": 3940, + "valid_targets_mean": 5239.2, + "valid_targets_min": 773 + }, + { + "epoch": 5.968229954614221, + "grad_norm": 0.4846638993954495, + "learning_rate": 2.5970981909899817e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752455234527588, + "step": 3945, + "valid_targets_mean": 4496.4, + "valid_targets_min": 848 + }, + { + "epoch": 5.975794251134644, + "grad_norm": 0.5259996428825984, + "learning_rate": 2.5600423952085884e-06, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1702825427055359, + "step": 3950, + "valid_targets_mean": 4433.2, + "valid_targets_min": 694 + }, + { + "epoch": 5.983358547655068, + "grad_norm": 0.45060137388491545, + "learning_rate": 2.5232347773939704e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15999829769134521, + "step": 3955, + "valid_targets_mean": 4916.9, + "valid_targets_min": 932 + }, + { + "epoch": 5.990922844175492, + "grad_norm": 0.49433677369908263, + "learning_rate": 2.4866758613340734e-06, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17644256353378296, + "step": 3960, + "valid_targets_mean": 3945.7, + "valid_targets_min": 611 + }, + { + "epoch": 5.998487140695915, + "grad_norm": 0.4731628539536409, + "learning_rate": 2.4503661672777244e-06, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15683284401893616, + "step": 3965, + "valid_targets_mean": 4375.0, + "valid_targets_min": 345 + }, + { + "epoch": 6.006051437216339, + "grad_norm": 0.7191490640882733, + "learning_rate": 2.4143062119272263e-06, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2411782443523407, + "step": 3970, + "valid_targets_mean": 5717.2, + "valid_targets_min": 577 + }, + { + "epoch": 6.013615733736763, + "grad_norm": 0.45234451996666436, + "learning_rate": 2.3784965084309697e-06, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135951966047287, + "step": 3975, + "valid_targets_mean": 4790.9, + "valid_targets_min": 524 + }, + { + "epoch": 6.021180030257186, + "grad_norm": 0.4357043407051513, + "learning_rate": 2.3429375663761734e-06, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14513269066810608, + "step": 3980, + "valid_targets_mean": 4690.6, + "valid_targets_min": 307 + }, + { + "epoch": 6.02874432677761, + "grad_norm": 0.4842329006340026, + "learning_rate": 2.307629891781611e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1446702480316162, + "step": 3985, + "valid_targets_mean": 4378.6, + "valid_targets_min": 702 + }, + { + "epoch": 6.036308623298034, + "grad_norm": 0.5364048099568542, + "learning_rate": 2.2725739870904075e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1525079607963562, + "step": 3990, + "valid_targets_mean": 4930.4, + "valid_targets_min": 795 + }, + { + "epoch": 6.043872919818457, + "grad_norm": 0.43526201026202, + "learning_rate": 2.2377703511629023e-06, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1793811321258545, + "step": 3995, + "valid_targets_mean": 5215.1, + "valid_targets_min": 739 + }, + { + "epoch": 6.05143721633888, + "grad_norm": 0.5106416304534394, + "learning_rate": 2.2032194792695517e-06, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1592015027999878, + "step": 4000, + "valid_targets_mean": 4197.9, + "valid_targets_min": 650 + }, + { + "epoch": 6.059001512859304, + "grad_norm": 0.4414429719089798, + "learning_rate": 2.1689218630838528e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17658647894859314, + "step": 4005, + "valid_targets_mean": 5331.7, + "valid_targets_min": 1472 + }, + { + "epoch": 6.0665658093797274, + "grad_norm": 0.54354392739444, + "learning_rate": 2.1348779906753856e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14778614044189453, + "step": 4010, + "valid_targets_mean": 5743.7, + "valid_targets_min": 765 + }, + { + "epoch": 6.074130105900151, + "grad_norm": 0.43018503151003995, + "learning_rate": 2.101088346502833e-06, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13991913199424744, + "step": 4015, + "valid_targets_mean": 5941.0, + "valid_targets_min": 1067 + }, + { + "epoch": 6.081694402420575, + "grad_norm": 0.5551337425133364, + "learning_rate": 2.067553411407117e-06, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15724679827690125, + "step": 4020, + "valid_targets_mean": 3873.4, + "valid_targets_min": 525 + }, + { + "epoch": 6.0892586989409985, + "grad_norm": 0.44853100000660745, + "learning_rate": 2.0342736626045356e-06, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14757779240608215, + "step": 4025, + "valid_targets_mean": 4953.8, + "valid_targets_min": 723 + }, + { + "epoch": 6.096822995461422, + "grad_norm": 0.45503441348935464, + "learning_rate": 2.0012495736799753e-06, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1418561339378357, + "step": 4030, + "valid_targets_mean": 5156.3, + "valid_targets_min": 932 + }, + { + "epoch": 6.104387291981846, + "grad_norm": 0.40513522356616916, + "learning_rate": 1.9684816145801776e-06, + "loss": 0.1456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13881005346775055, + "step": 4035, + "valid_targets_mean": 5304.2, + "valid_targets_min": 2569 + }, + { + "epoch": 6.1119515885022695, + "grad_norm": 0.4269784848098862, + "learning_rate": 1.9359702516070553e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1601366251707077, + "step": 4040, + "valid_targets_mean": 5462.7, + "valid_targets_min": 573 + }, + { + "epoch": 6.119515885022693, + "grad_norm": 0.5519519701322759, + "learning_rate": 1.9037159474110333e-06, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1714671403169632, + "step": 4045, + "valid_targets_mean": 3356.6, + "valid_targets_min": 778 + }, + { + "epoch": 6.127080181543117, + "grad_norm": 0.4351482020097601, + "learning_rate": 1.8717191609844931e-06, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16394942998886108, + "step": 4050, + "valid_targets_mean": 5265.4, + "valid_targets_min": 766 + }, + { + "epoch": 6.1346444780635405, + "grad_norm": 0.40124586897572423, + "learning_rate": 1.8399803476552303e-06, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12816530466079712, + "step": 4055, + "valid_targets_mean": 4822.0, + "valid_targets_min": 1189 + }, + { + "epoch": 6.142208774583963, + "grad_norm": 0.384206751077286, + "learning_rate": 1.8084999590799678e-06, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15090115368366241, + "step": 4060, + "valid_targets_mean": 6546.3, + "valid_targets_min": 2772 + }, + { + "epoch": 6.149773071104387, + "grad_norm": 1.2611731543758526, + "learning_rate": 1.7772784432379398e-06, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1444360464811325, + "step": 4065, + "valid_targets_mean": 4284.9, + "valid_targets_min": 562 + }, + { + "epoch": 6.157337367624811, + "grad_norm": 0.414208754194147, + "learning_rate": 1.7463162444245174e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15277935564517975, + "step": 4070, + "valid_targets_mean": 5458.4, + "valid_targets_min": 1847 + }, + { + "epoch": 6.164901664145234, + "grad_norm": 0.5352430006908002, + "learning_rate": 1.7156138032448621e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17013967037200928, + "step": 4075, + "valid_targets_mean": 4358.4, + "valid_targets_min": 627 + }, + { + "epoch": 6.172465960665658, + "grad_norm": 0.4390587640921899, + "learning_rate": 1.6851715566076942e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1464182734489441, + "step": 4080, + "valid_targets_mean": 5010.7, + "valid_targets_min": 498 + }, + { + "epoch": 6.180030257186082, + "grad_norm": 0.5455898884169155, + "learning_rate": 1.6549899377190448e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1335110366344452, + "step": 4085, + "valid_targets_mean": 3511.6, + "valid_targets_min": 457 + }, + { + "epoch": 6.187594553706505, + "grad_norm": 0.4098562706112613, + "learning_rate": 1.6250693760761072e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14616535604000092, + "step": 4090, + "valid_targets_mean": 5443.2, + "valid_targets_min": 894 + }, + { + "epoch": 6.195158850226929, + "grad_norm": 0.5540401439486693, + "learning_rate": 1.5954102974611218e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18993410468101501, + "step": 4095, + "valid_targets_mean": 4211.4, + "valid_targets_min": 662 + }, + { + "epoch": 6.202723146747353, + "grad_norm": 0.4054113810918477, + "learning_rate": 1.5660131239353037e-06, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15200453996658325, + "step": 4100, + "valid_targets_mean": 5708.1, + "valid_targets_min": 1104 + }, + { + "epoch": 6.210287443267776, + "grad_norm": 0.590958853321242, + "learning_rate": 1.536878273832858e-06, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16703468561172485, + "step": 4105, + "valid_targets_mean": 4328.2, + "valid_targets_min": 619 + }, + { + "epoch": 6.2178517397882, + "grad_norm": 0.4431701530215047, + "learning_rate": 1.5080061617550157e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16911979019641876, + "step": 4110, + "valid_targets_mean": 5307.1, + "valid_targets_min": 801 + }, + { + "epoch": 6.225416036308624, + "grad_norm": 0.447619549856025, + "learning_rate": 1.4793971985641298e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16727328300476074, + "step": 4115, + "valid_targets_mean": 5365.2, + "valid_targets_min": 1941 + }, + { + "epoch": 6.2329803328290465, + "grad_norm": 0.49490312967592337, + "learning_rate": 1.45105179137784e-06, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16049346327781677, + "step": 4120, + "valid_targets_mean": 4772.8, + "valid_targets_min": 971 + }, + { + "epoch": 6.24054462934947, + "grad_norm": 0.5002729276708221, + "learning_rate": 1.4229703435632702e-06, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15890288352966309, + "step": 4125, + "valid_targets_mean": 4810.2, + "valid_targets_min": 871 + }, + { + "epoch": 6.248108925869894, + "grad_norm": 0.3972740664066531, + "learning_rate": 1.395153254731285e-06, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1513948142528534, + "step": 4130, + "valid_targets_mean": 5640.1, + "valid_targets_min": 714 + }, + { + "epoch": 6.2556732223903175, + "grad_norm": 0.43980618632091506, + "learning_rate": 1.367600920730816e-06, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1656402349472046, + "step": 4135, + "valid_targets_mean": 5542.3, + "valid_targets_min": 1749 + }, + { + "epoch": 6.263237518910741, + "grad_norm": 0.4443207443673087, + "learning_rate": 1.3403137336432193e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1583198606967926, + "step": 4140, + "valid_targets_mean": 5384.4, + "valid_targets_min": 753 + }, + { + "epoch": 6.270801815431165, + "grad_norm": 0.4753683286569819, + "learning_rate": 1.313292081776698e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17670109868049622, + "step": 4145, + "valid_targets_mean": 4728.7, + "valid_targets_min": 488 + }, + { + "epoch": 6.278366111951589, + "grad_norm": 0.4969598551390092, + "learning_rate": 1.286536349660783e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14761213958263397, + "step": 4150, + "valid_targets_mean": 3709.7, + "valid_targets_min": 645 + }, + { + "epoch": 6.285930408472012, + "grad_norm": 0.5059874781677619, + "learning_rate": 1.2600469180408403e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1481596827507019, + "step": 4155, + "valid_targets_mean": 4018.0, + "valid_targets_min": 342 + }, + { + "epoch": 6.293494704992436, + "grad_norm": 0.41740987872915114, + "learning_rate": 1.2338241638726811e-06, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16702906787395477, + "step": 4160, + "valid_targets_mean": 6024.6, + "valid_targets_min": 581 + }, + { + "epoch": 6.30105900151286, + "grad_norm": 0.4637071225113284, + "learning_rate": 1.2078684603171787e-06, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.139248326420784, + "step": 4165, + "valid_targets_mean": 4567.2, + "valid_targets_min": 1063 + }, + { + "epoch": 6.308623298033283, + "grad_norm": 0.5155678448385924, + "learning_rate": 1.1821801767349616e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15290087461471558, + "step": 4170, + "valid_targets_mean": 4242.0, + "valid_targets_min": 654 + }, + { + "epoch": 6.316187594553707, + "grad_norm": 0.5065606805513911, + "learning_rate": 1.1567596786811652e-06, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13100755214691162, + "step": 4175, + "valid_targets_mean": 3437.8, + "valid_targets_min": 858 + }, + { + "epoch": 6.32375189107413, + "grad_norm": 0.44032005356083065, + "learning_rate": 1.1316073279002172e-06, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14807850122451782, + "step": 4180, + "valid_targets_mean": 4680.6, + "valid_targets_min": 522 + }, + { + "epoch": 6.331316187594553, + "grad_norm": 0.4547949954293122, + "learning_rate": 1.1067234823206951e-06, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14795613288879395, + "step": 4185, + "valid_targets_mean": 4760.6, + "valid_targets_min": 476 + }, + { + "epoch": 6.338880484114977, + "grad_norm": 0.4554441505241382, + "learning_rate": 1.0821084960502404e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1481642872095108, + "step": 4190, + "valid_targets_mean": 4714.2, + "valid_targets_min": 2218 + }, + { + "epoch": 6.346444780635401, + "grad_norm": 0.4378844494460036, + "learning_rate": 1.0577627193705098e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16023436188697815, + "step": 4195, + "valid_targets_mean": 4950.8, + "valid_targets_min": 698 + }, + { + "epoch": 6.354009077155824, + "grad_norm": 0.41590217934510926, + "learning_rate": 1.0336864987321938e-06, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13555586338043213, + "step": 4200, + "valid_targets_mean": 5161.0, + "valid_targets_min": 2019 + }, + { + "epoch": 6.361573373676248, + "grad_norm": 0.5143664594370752, + "learning_rate": 1.0098801767500842e-06, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1585438847541809, + "step": 4205, + "valid_targets_mean": 4201.8, + "valid_targets_min": 2109 + }, + { + "epoch": 6.369137670196672, + "grad_norm": 0.4399131246610985, + "learning_rate": 9.863440921982104e-07, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13819454610347748, + "step": 4210, + "valid_targets_mean": 4746.9, + "valid_targets_min": 747 + }, + { + "epoch": 6.376701966717095, + "grad_norm": 0.8335656415915854, + "learning_rate": 9.630785800049947e-07, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16780400276184082, + "step": 4215, + "valid_targets_mean": 4925.7, + "valid_targets_min": 746 + }, + { + "epoch": 6.384266263237519, + "grad_norm": 0.4067684611911977, + "learning_rate": 9.40083971248511e-07, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1596384346485138, + "step": 4220, + "valid_targets_mean": 6230.5, + "valid_targets_min": 587 + }, + { + "epoch": 6.391830559757943, + "grad_norm": 0.43309543072557627, + "learning_rate": 9.173605931517526e-07, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15254181623458862, + "step": 4225, + "valid_targets_mean": 5193.5, + "valid_targets_min": 551 + }, + { + "epoch": 6.3993948562783665, + "grad_norm": 0.4574867684924863, + "learning_rate": 8.949087690780023e-07, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15973302721977234, + "step": 4230, + "valid_targets_mean": 4612.9, + "valid_targets_min": 506 + }, + { + "epoch": 6.406959152798789, + "grad_norm": 0.4542653244014224, + "learning_rate": 8.727288185262029e-07, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1596071720123291, + "step": 4235, + "valid_targets_mean": 4488.4, + "valid_targets_min": 416 + }, + { + "epoch": 6.414523449319213, + "grad_norm": 0.4572109475990556, + "learning_rate": 8.508210571264186e-07, + "loss": 0.1458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14880454540252686, + "step": 4240, + "valid_targets_mean": 5173.1, + "valid_targets_min": 716 + }, + { + "epoch": 6.422087745839637, + "grad_norm": 0.6070546959330988, + "learning_rate": 8.291857966353545e-07, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16235873103141785, + "step": 4245, + "valid_targets_mean": 3878.2, + "valid_targets_min": 434 + }, + { + "epoch": 6.42965204236006, + "grad_norm": 0.5670598499888627, + "learning_rate": 8.078233449319128e-07, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16082099080085754, + "step": 4250, + "valid_targets_mean": 3953.2, + "valid_targets_min": 511 + }, + { + "epoch": 6.437216338880484, + "grad_norm": 0.41041322824939125, + "learning_rate": 7.867340060128037e-07, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13654690980911255, + "step": 4255, + "valid_targets_mean": 5684.5, + "valid_targets_min": 361 + }, + { + "epoch": 6.444780635400908, + "grad_norm": 0.4199160542149558, + "learning_rate": 7.659180799882371e-07, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15124070644378662, + "step": 4260, + "valid_targets_mean": 5140.1, + "valid_targets_min": 332 + }, + { + "epoch": 6.452344931921331, + "grad_norm": 0.4939206548010923, + "learning_rate": 7.453758630776398e-07, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16383801400661469, + "step": 4265, + "valid_targets_mean": 4354.5, + "valid_targets_min": 850 + }, + { + "epoch": 6.459909228441755, + "grad_norm": 0.4501260238297891, + "learning_rate": 7.25107647605432e-07, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15669146180152893, + "step": 4270, + "valid_targets_mean": 5121.8, + "valid_targets_min": 677 + }, + { + "epoch": 6.467473524962179, + "grad_norm": 0.4734703379153941, + "learning_rate": 7.051137219968885e-07, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14317908883094788, + "step": 4275, + "valid_targets_mean": 4474.1, + "valid_targets_min": 571 + }, + { + "epoch": 6.475037821482602, + "grad_norm": 0.4564012604214816, + "learning_rate": 6.853943707740218e-07, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12744159996509552, + "step": 4280, + "valid_targets_mean": 4480.9, + "valid_targets_min": 1132 + }, + { + "epoch": 6.482602118003026, + "grad_norm": 0.4721394856153527, + "learning_rate": 6.659498745515258e-07, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1510826200246811, + "step": 4285, + "valid_targets_mean": 4945.0, + "valid_targets_min": 706 + }, + { + "epoch": 6.49016641452345, + "grad_norm": 0.5177102676493053, + "learning_rate": 6.467805100328117e-07, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14990335702896118, + "step": 4290, + "valid_targets_mean": 4293.1, + "valid_targets_min": 496 + }, + { + "epoch": 6.497730711043873, + "grad_norm": 0.4743441140245587, + "learning_rate": 6.278865500060271e-07, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18568217754364014, + "step": 4295, + "valid_targets_mean": 4790.9, + "valid_targets_min": 1246 + }, + { + "epoch": 6.505295007564296, + "grad_norm": 0.46507859849231403, + "learning_rate": 6.092682633402103e-07, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14523780345916748, + "step": 4300, + "valid_targets_mean": 5654.4, + "valid_targets_min": 1877 + }, + { + "epoch": 6.51285930408472, + "grad_norm": 0.43490921069799204, + "learning_rate": 5.909259149814505e-07, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1442725956439972, + "step": 4305, + "valid_targets_mean": 5352.5, + "valid_targets_min": 474 + }, + { + "epoch": 6.5204236006051435, + "grad_norm": 0.4707066879537463, + "learning_rate": 5.728597659491142e-07, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1738138496875763, + "step": 4310, + "valid_targets_mean": 4893.9, + "valid_targets_min": 254 + }, + { + "epoch": 6.527987897125567, + "grad_norm": 0.4241793853464637, + "learning_rate": 5.550700733321379e-07, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1556629091501236, + "step": 4315, + "valid_targets_mean": 5057.2, + "valid_targets_min": 2822 + }, + { + "epoch": 6.535552193645991, + "grad_norm": 0.46368809571656755, + "learning_rate": 5.375570902853633e-07, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13116100430488586, + "step": 4320, + "valid_targets_mean": 4321.1, + "valid_targets_min": 507 + }, + { + "epoch": 6.5431164901664145, + "grad_norm": 0.5052781045290954, + "learning_rate": 5.203210660259439e-07, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16794294118881226, + "step": 4325, + "valid_targets_mean": 4214.9, + "valid_targets_min": 611 + }, + { + "epoch": 6.550680786686838, + "grad_norm": 0.5073596132262154, + "learning_rate": 5.033622458297859e-07, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1713084876537323, + "step": 4330, + "valid_targets_mean": 4483.1, + "valid_targets_min": 929 + }, + { + "epoch": 6.558245083207262, + "grad_norm": 0.4964554507482467, + "learning_rate": 4.866808710280691e-07, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1499142050743103, + "step": 4335, + "valid_targets_mean": 4372.1, + "valid_targets_min": 486 + }, + { + "epoch": 6.5658093797276855, + "grad_norm": 0.519199836361022, + "learning_rate": 4.702771790038041e-07, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542699635028839, + "step": 4340, + "valid_targets_mean": 3608.7, + "valid_targets_min": 520 + }, + { + "epoch": 6.573373676248109, + "grad_norm": 0.5427545547266298, + "learning_rate": 4.5415140318846306e-07, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18496286869049072, + "step": 4345, + "valid_targets_mean": 4414.1, + "valid_targets_min": 769 + }, + { + "epoch": 6.580937972768533, + "grad_norm": 0.4891244974517963, + "learning_rate": 4.383037730586481e-07, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1510729044675827, + "step": 4350, + "valid_targets_mean": 4612.4, + "valid_targets_min": 345 + }, + { + "epoch": 6.588502269288956, + "grad_norm": 0.4882037237746936, + "learning_rate": 4.227345141328343e-07, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17036430537700653, + "step": 4355, + "valid_targets_mean": 5320.2, + "valid_targets_min": 1887 + }, + { + "epoch": 6.59606656580938, + "grad_norm": 0.4307780734929585, + "learning_rate": 4.074438479681564e-07, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1584295928478241, + "step": 4360, + "valid_targets_mean": 5430.9, + "valid_targets_min": 927 + }, + { + "epoch": 6.603630862329803, + "grad_norm": 0.4772782028419242, + "learning_rate": 3.924319921572561e-07, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1360929310321808, + "step": 4365, + "valid_targets_mean": 3927.8, + "valid_targets_min": 688 + }, + { + "epoch": 6.611195158850227, + "grad_norm": 0.50833656176906, + "learning_rate": 3.7769916032518227e-07, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16856953501701355, + "step": 4370, + "valid_targets_mean": 4089.0, + "valid_targets_min": 711 + }, + { + "epoch": 6.61875945537065, + "grad_norm": 0.656485093888422, + "learning_rate": 3.63245562126362e-07, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15266793966293335, + "step": 4375, + "valid_targets_mean": 5172.1, + "valid_targets_min": 642 + }, + { + "epoch": 6.626323751891074, + "grad_norm": 0.4561975368678506, + "learning_rate": 3.490714032416032e-07, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.183062344789505, + "step": 4380, + "valid_targets_mean": 5307.6, + "valid_targets_min": 1195 + }, + { + "epoch": 6.633888048411498, + "grad_norm": 0.7110131773558642, + "learning_rate": 3.351768853751769e-07, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13424867391586304, + "step": 4385, + "valid_targets_mean": 4276.8, + "valid_targets_min": 509 + }, + { + "epoch": 6.641452344931921, + "grad_norm": 0.4678252102427936, + "learning_rate": 3.2156220625194633e-07, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14004451036453247, + "step": 4390, + "valid_targets_mean": 5015.1, + "valid_targets_min": 1259 + }, + { + "epoch": 6.649016641452345, + "grad_norm": 0.4246923427532061, + "learning_rate": 3.082275596145445e-07, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17100787162780762, + "step": 4395, + "valid_targets_mean": 5502.5, + "valid_targets_min": 2260 + }, + { + "epoch": 6.656580937972769, + "grad_norm": 0.5330628132067585, + "learning_rate": 2.951731352206322e-07, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1702652871608734, + "step": 4400, + "valid_targets_mean": 3936.1, + "valid_targets_min": 665 + }, + { + "epoch": 6.664145234493192, + "grad_norm": 0.4965404997875294, + "learning_rate": 2.8239911884018423e-07, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16452646255493164, + "step": 4405, + "valid_targets_mean": 4274.4, + "valid_targets_min": 877 + }, + { + "epoch": 6.671709531013616, + "grad_norm": 0.4915934752549612, + "learning_rate": 2.69905692252852e-07, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15462955832481384, + "step": 4410, + "valid_targets_mean": 4155.4, + "valid_targets_min": 632 + }, + { + "epoch": 6.67927382753404, + "grad_norm": 0.4336803794043365, + "learning_rate": 2.576930332453742e-07, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18209487199783325, + "step": 4415, + "valid_targets_mean": 6130.0, + "valid_targets_min": 637 + }, + { + "epoch": 6.6868381240544625, + "grad_norm": 0.4812053413355094, + "learning_rate": 2.4576131560905216e-07, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16345643997192383, + "step": 4420, + "valid_targets_mean": 4934.9, + "valid_targets_min": 977 + }, + { + "epoch": 6.694402420574886, + "grad_norm": 0.49964213190276036, + "learning_rate": 2.341107091372674e-07, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17566847801208496, + "step": 4425, + "valid_targets_mean": 4736.9, + "valid_targets_min": 923 + }, + { + "epoch": 6.70196671709531, + "grad_norm": 0.4652703083015772, + "learning_rate": 2.2274137962307264e-07, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14961759746074677, + "step": 4430, + "valid_targets_mean": 4612.9, + "valid_targets_min": 1135 + }, + { + "epoch": 6.709531013615734, + "grad_norm": 0.43970643901140566, + "learning_rate": 2.1165348885683557e-07, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1801401823759079, + "step": 4435, + "valid_targets_mean": 5360.0, + "valid_targets_min": 805 + }, + { + "epoch": 6.717095310136157, + "grad_norm": 0.5006540034502627, + "learning_rate": 2.0084719462392544e-07, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15871846675872803, + "step": 4440, + "valid_targets_mean": 4147.4, + "valid_targets_min": 740 + }, + { + "epoch": 6.724659606656581, + "grad_norm": 0.4655065162357877, + "learning_rate": 1.903226507024769e-07, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17267106473445892, + "step": 4445, + "valid_targets_mean": 4900.9, + "valid_targets_min": 994 + }, + { + "epoch": 6.732223903177005, + "grad_norm": 0.4776542317912404, + "learning_rate": 1.800800068611941e-07, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14296205341815948, + "step": 4450, + "valid_targets_mean": 4429.6, + "valid_targets_min": 644 + }, + { + "epoch": 6.739788199697428, + "grad_norm": 0.4645194361477701, + "learning_rate": 1.7011940885723222e-07, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.158104807138443, + "step": 4455, + "valid_targets_mean": 4528.9, + "valid_targets_min": 719 + }, + { + "epoch": 6.747352496217852, + "grad_norm": 0.463140877582521, + "learning_rate": 1.60440998434106e-07, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15552181005477905, + "step": 4460, + "valid_targets_mean": 4513.7, + "valid_targets_min": 333 + }, + { + "epoch": 6.754916792738276, + "grad_norm": 0.4310101427133752, + "learning_rate": 1.5104491331968674e-07, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15951894223690033, + "step": 4465, + "valid_targets_mean": 5195.2, + "valid_targets_min": 814 + }, + { + "epoch": 6.762481089258699, + "grad_norm": 0.48515138640133515, + "learning_rate": 1.4193128722423954e-07, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1668251007795334, + "step": 4470, + "valid_targets_mean": 4821.4, + "valid_targets_min": 1072 + }, + { + "epoch": 6.770045385779122, + "grad_norm": 0.46924780947969497, + "learning_rate": 1.3310024983851367e-07, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14621658623218536, + "step": 4475, + "valid_targets_mean": 4700.8, + "valid_targets_min": 578 + }, + { + "epoch": 6.777609682299547, + "grad_norm": 0.47997006639888423, + "learning_rate": 1.2455192683189955e-07, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1525563895702362, + "step": 4480, + "valid_targets_mean": 4628.7, + "valid_targets_min": 749 + }, + { + "epoch": 6.785173978819969, + "grad_norm": 0.44729024533554707, + "learning_rate": 1.1628643985064802e-07, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606258600950241, + "step": 4485, + "valid_targets_mean": 5041.1, + "valid_targets_min": 2257 + }, + { + "epoch": 6.792738275340393, + "grad_norm": 0.5567659415227543, + "learning_rate": 1.0830390651613399e-07, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1706101894378662, + "step": 4490, + "valid_targets_mean": 3684.8, + "valid_targets_min": 533 + }, + { + "epoch": 6.800302571860817, + "grad_norm": 0.44905642734474416, + "learning_rate": 1.0060444042317984e-07, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1561523973941803, + "step": 4495, + "valid_targets_mean": 5226.9, + "valid_targets_min": 2218 + }, + { + "epoch": 6.80786686838124, + "grad_norm": 0.44526000556057144, + "learning_rate": 9.318815113843915e-08, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14291979372501373, + "step": 4500, + "valid_targets_mean": 4976.4, + "valid_targets_min": 970 + }, + { + "epoch": 6.815431164901664, + "grad_norm": 0.41389807910948084, + "learning_rate": 8.605514419884442e-08, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1524887979030609, + "step": 4505, + "valid_targets_mean": 5439.3, + "valid_targets_min": 2445 + }, + { + "epoch": 6.822995461422088, + "grad_norm": 0.573972759244845, + "learning_rate": 7.92055211100995e-08, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14557316899299622, + "step": 4510, + "valid_targets_mean": 4493.8, + "valid_targets_min": 1014 + }, + { + "epoch": 6.8305597579425115, + "grad_norm": 0.49554661750919793, + "learning_rate": 7.263937934523402e-08, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.146365225315094, + "step": 4515, + "valid_targets_mean": 4681.9, + "valid_targets_min": 662 + }, + { + "epoch": 6.838124054462935, + "grad_norm": 0.41268242005804795, + "learning_rate": 6.635681234321789e-08, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12661297619342804, + "step": 4520, + "valid_targets_mean": 5688.6, + "valid_targets_min": 672 + }, + { + "epoch": 6.845688350983359, + "grad_norm": 0.5245346275052402, + "learning_rate": 6.035790950764008e-08, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15378622710704803, + "step": 4525, + "valid_targets_mean": 3729.6, + "valid_targets_min": 435 + }, + { + "epoch": 6.8532526475037825, + "grad_norm": 0.536981712222097, + "learning_rate": 5.464275620542081e-08, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16862401366233826, + "step": 4530, + "valid_targets_mean": 4265.4, + "valid_targets_min": 617 + }, + { + "epoch": 6.860816944024206, + "grad_norm": 0.661040289886661, + "learning_rate": 4.921143376560355e-08, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19529613852500916, + "step": 4535, + "valid_targets_mean": 4173.4, + "valid_targets_min": 903 + }, + { + "epoch": 6.868381240544629, + "grad_norm": 0.4025079338035215, + "learning_rate": 4.4064019478207154e-08, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14101910591125488, + "step": 4540, + "valid_targets_mean": 5818.1, + "valid_targets_min": 527 + }, + { + "epoch": 6.875945537065053, + "grad_norm": 0.4700049710206088, + "learning_rate": 3.920058659310666e-08, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16515228152275085, + "step": 4545, + "valid_targets_mean": 4960.9, + "valid_targets_min": 620 + }, + { + "epoch": 6.883509833585476, + "grad_norm": 0.4603067526871369, + "learning_rate": 3.4621204319011946e-08, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14470770955085754, + "step": 4550, + "valid_targets_mean": 4982.2, + "valid_targets_min": 831 + }, + { + "epoch": 6.8910741301059, + "grad_norm": 0.4673350592645921, + "learning_rate": 3.032593782246629e-08, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14939948916435242, + "step": 4555, + "valid_targets_mean": 4254.9, + "valid_targets_min": 987 + }, + { + "epoch": 6.898638426626324, + "grad_norm": 0.4517397456464997, + "learning_rate": 2.6314848226927094e-08, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15728193521499634, + "step": 4560, + "valid_targets_mean": 4525.2, + "valid_targets_min": 711 + }, + { + "epoch": 6.906202723146747, + "grad_norm": 0.5117006909099115, + "learning_rate": 2.258799261189326e-08, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17386361956596375, + "step": 4565, + "valid_targets_mean": 4593.4, + "valid_targets_min": 630 + }, + { + "epoch": 6.913767019667171, + "grad_norm": 0.41306104008904443, + "learning_rate": 1.9145424012096957e-08, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15774936974048615, + "step": 4570, + "valid_targets_mean": 6134.3, + "valid_targets_min": 2953 + }, + { + "epoch": 6.921331316187595, + "grad_norm": 0.40696459882097896, + "learning_rate": 1.5987191416744208e-08, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1495034098625183, + "step": 4575, + "valid_targets_mean": 6611.8, + "valid_targets_min": 1330 + }, + { + "epoch": 6.928895612708018, + "grad_norm": 0.47931424197263106, + "learning_rate": 1.3113339768817679e-08, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16390691697597504, + "step": 4580, + "valid_targets_mean": 4471.8, + "valid_targets_min": 723 + }, + { + "epoch": 6.936459909228442, + "grad_norm": 0.4461599389375818, + "learning_rate": 1.0523909964441636e-08, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15007872879505157, + "step": 4585, + "valid_targets_mean": 4946.4, + "valid_targets_min": 738 + }, + { + "epoch": 6.944024205748866, + "grad_norm": 0.430644073341138, + "learning_rate": 8.218938852295744e-09, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11911873519420624, + "step": 4590, + "valid_targets_mean": 5387.9, + "valid_targets_min": 681 + }, + { + "epoch": 6.9515885022692885, + "grad_norm": 0.544666118636178, + "learning_rate": 6.1984592330954776e-09, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12848445773124695, + "step": 4595, + "valid_targets_mean": 4979.9, + "valid_targets_min": 364 + }, + { + "epoch": 6.959152798789713, + "grad_norm": 0.47313846425247624, + "learning_rate": 4.4624998591191735e-09, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.164044588804245, + "step": 4600, + "valid_targets_mean": 4802.0, + "valid_targets_min": 1697 + }, + { + "epoch": 6.966717095310136, + "grad_norm": 0.5499679479102836, + "learning_rate": 3.0110854337994654e-09, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1561359018087387, + "step": 4605, + "valid_targets_mean": 3980.2, + "valid_targets_min": 711 + }, + { + "epoch": 6.9742813918305595, + "grad_norm": 0.47453995839514496, + "learning_rate": 1.8442366113791132e-09, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16566257178783417, + "step": 4610, + "valid_targets_mean": 4593.8, + "valid_targets_min": 932 + }, + { + "epoch": 6.981845688350983, + "grad_norm": 0.4033635076972133, + "learning_rate": 9.619699966090245e-10, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15273799002170563, + "step": 4615, + "valid_targets_mean": 6477.4, + "valid_targets_min": 1671 + }, + { + "epoch": 6.989409984871407, + "grad_norm": 0.45663261359056423, + "learning_rate": 3.642981445173277e-10, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14979088306427002, + "step": 4620, + "valid_targets_mean": 4614.1, + "valid_targets_min": 625 + }, + { + "epoch": 6.9969742813918305, + "grad_norm": 0.42449100353688324, + "learning_rate": 5.1229560225074525e-11, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13834691047668457, + "step": 4625, + "valid_targets_mean": 5519.8, + "valid_targets_min": 2736 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13807812333106995, + "step": 4627, + "total_flos": 1697568073121792.0, + "train_loss": 0.19562295946828587, + "train_runtime": 28413.4165, + "train_samples_per_second": 2.604, + "train_steps_per_second": 0.163, + "valid_targets_mean": 4912.6, + "valid_targets_min": 607 + } + ], + "logging_steps": 5, + "max_steps": 4627, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1697568073121792.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}