diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9980 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4515, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007751937984496124, + "grad_norm": 10.737414411251518, + "learning_rate": 3.5398230088495575e-07, + "loss": 0.6122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5790383219718933, + "step": 5, + "valid_targets_mean": 5473.9, + "valid_targets_min": 2003 + }, + { + "epoch": 0.015503875968992248, + "grad_norm": 12.969006644336845, + "learning_rate": 7.964601769911505e-07, + "loss": 0.6056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6179184317588806, + "step": 10, + "valid_targets_mean": 4275.1, + "valid_targets_min": 2399 + }, + { + "epoch": 0.023255813953488372, + "grad_norm": 10.73852678993892, + "learning_rate": 1.2389380530973452e-06, + "loss": 0.6059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6460979580879211, + "step": 15, + "valid_targets_mean": 5231.6, + "valid_targets_min": 343 + }, + { + "epoch": 0.031007751937984496, + "grad_norm": 10.022340877272923, + "learning_rate": 1.68141592920354e-06, + "loss": 0.5736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5392199158668518, + "step": 20, + "valid_targets_mean": 5241.8, + "valid_targets_min": 310 + }, + { + "epoch": 0.03875968992248062, + "grad_norm": 5.037849526968358, + "learning_rate": 2.1238938053097345e-06, + "loss": 0.4978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3818526268005371, + "step": 25, + "valid_targets_mean": 5527.9, + "valid_targets_min": 614 + }, + { + "epoch": 0.046511627906976744, + "grad_norm": 4.262678982312242, + "learning_rate": 2.5663716814159294e-06, + "loss": 0.5324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6495624780654907, + "step": 30, + "valid_targets_mean": 3715.2, + "valid_targets_min": 408 + }, + { + "epoch": 0.05426356589147287, + "grad_norm": 2.3482137938148466, + "learning_rate": 3.0088495575221242e-06, + "loss": 0.4639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5476200580596924, + "step": 35, + "valid_targets_mean": 5361.5, + "valid_targets_min": 2437 + }, + { + "epoch": 0.06201550387596899, + "grad_norm": 1.46476205992856, + "learning_rate": 3.4513274336283186e-06, + "loss": 0.4293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46131038665771484, + "step": 40, + "valid_targets_mean": 5086.5, + "valid_targets_min": 272 + }, + { + "epoch": 0.06976744186046512, + "grad_norm": 1.3283216378548661, + "learning_rate": 3.8938053097345135e-06, + "loss": 0.4646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44244861602783203, + "step": 45, + "valid_targets_mean": 3978.5, + "valid_targets_min": 1865 + }, + { + "epoch": 0.07751937984496124, + "grad_norm": 1.0028919580355764, + "learning_rate": 4.336283185840709e-06, + "loss": 0.4339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4484736919403076, + "step": 50, + "valid_targets_mean": 5080.6, + "valid_targets_min": 2468 + }, + { + "epoch": 0.08527131782945736, + "grad_norm": 0.8265483597929661, + "learning_rate": 4.778761061946903e-06, + "loss": 0.3985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43008673191070557, + "step": 55, + "valid_targets_mean": 4308.8, + "valid_targets_min": 598 + }, + { + "epoch": 0.09302325581395349, + "grad_norm": 0.7109955989422742, + "learning_rate": 5.2212389380530985e-06, + "loss": 0.3726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34888190031051636, + "step": 60, + "valid_targets_mean": 4633.4, + "valid_targets_min": 2442 + }, + { + "epoch": 0.10077519379844961, + "grad_norm": 0.655988135659704, + "learning_rate": 5.663716814159292e-06, + "loss": 0.3799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34532463550567627, + "step": 65, + "valid_targets_mean": 3801.8, + "valid_targets_min": 299 + }, + { + "epoch": 0.10852713178294573, + "grad_norm": 0.5932763195572598, + "learning_rate": 6.1061946902654865e-06, + "loss": 0.3585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32048162817955017, + "step": 70, + "valid_targets_mean": 4474.1, + "valid_targets_min": 1430 + }, + { + "epoch": 0.11627906976744186, + "grad_norm": 0.5372600166593284, + "learning_rate": 6.548672566371682e-06, + "loss": 0.3061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3458303213119507, + "step": 75, + "valid_targets_mean": 5349.9, + "valid_targets_min": 2632 + }, + { + "epoch": 0.12403100775193798, + "grad_norm": 0.5539936993292226, + "learning_rate": 6.991150442477876e-06, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3393269181251526, + "step": 80, + "valid_targets_mean": 4613.9, + "valid_targets_min": 642 + }, + { + "epoch": 0.13178294573643412, + "grad_norm": 0.5623006682180124, + "learning_rate": 7.4336283185840714e-06, + "loss": 0.348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37088829278945923, + "step": 85, + "valid_targets_mean": 5265.9, + "valid_targets_min": 2035 + }, + { + "epoch": 0.13953488372093023, + "grad_norm": 0.5992837863592103, + "learning_rate": 7.876106194690266e-06, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35172638297080994, + "step": 90, + "valid_targets_mean": 4771.9, + "valid_targets_min": 1048 + }, + { + "epoch": 0.14728682170542637, + "grad_norm": 0.45192427418479936, + "learning_rate": 8.31858407079646e-06, + "loss": 0.3372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3198494017124176, + "step": 95, + "valid_targets_mean": 5918.0, + "valid_targets_min": 368 + }, + { + "epoch": 0.15503875968992248, + "grad_norm": 0.625926887246749, + "learning_rate": 8.761061946902656e-06, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40857595205307007, + "step": 100, + "valid_targets_mean": 5248.5, + "valid_targets_min": 570 + }, + { + "epoch": 0.16279069767441862, + "grad_norm": 0.6811575704902189, + "learning_rate": 9.203539823008851e-06, + "loss": 0.3315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38290804624557495, + "step": 105, + "valid_targets_mean": 4731.9, + "valid_targets_min": 422 + }, + { + "epoch": 0.17054263565891473, + "grad_norm": 0.5566478870304039, + "learning_rate": 9.646017699115045e-06, + "loss": 0.3422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.313689649105072, + "step": 110, + "valid_targets_mean": 4493.0, + "valid_targets_min": 485 + }, + { + "epoch": 0.17829457364341086, + "grad_norm": 0.41287589462882723, + "learning_rate": 1.008849557522124e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23904478549957275, + "step": 115, + "valid_targets_mean": 4980.3, + "valid_targets_min": 632 + }, + { + "epoch": 0.18604651162790697, + "grad_norm": 0.522088179939139, + "learning_rate": 1.0530973451327436e-05, + "loss": 0.3139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31741863489151, + "step": 120, + "valid_targets_mean": 3859.9, + "valid_targets_min": 361 + }, + { + "epoch": 0.1937984496124031, + "grad_norm": 0.5022551201886791, + "learning_rate": 1.0973451327433629e-05, + "loss": 0.2936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29989367723464966, + "step": 125, + "valid_targets_mean": 4806.8, + "valid_targets_min": 2268 + }, + { + "epoch": 0.20155038759689922, + "grad_norm": 0.43337385085835556, + "learning_rate": 1.1415929203539825e-05, + "loss": 0.316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26593732833862305, + "step": 130, + "valid_targets_mean": 5703.4, + "valid_targets_min": 861 + }, + { + "epoch": 0.20930232558139536, + "grad_norm": 0.5362034077276845, + "learning_rate": 1.1858407079646019e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26804763078689575, + "step": 135, + "valid_targets_mean": 5329.9, + "valid_targets_min": 436 + }, + { + "epoch": 0.21705426356589147, + "grad_norm": 0.7869829849569835, + "learning_rate": 1.2300884955752212e-05, + "loss": 0.295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2954621911048889, + "step": 140, + "valid_targets_mean": 4087.9, + "valid_targets_min": 367 + }, + { + "epoch": 0.2248062015503876, + "grad_norm": 0.4284133599416018, + "learning_rate": 1.2743362831858408e-05, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22574186325073242, + "step": 145, + "valid_targets_mean": 4778.1, + "valid_targets_min": 658 + }, + { + "epoch": 0.23255813953488372, + "grad_norm": 0.43346642775630995, + "learning_rate": 1.3185840707964604e-05, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2442144900560379, + "step": 150, + "valid_targets_mean": 4813.1, + "valid_targets_min": 370 + }, + { + "epoch": 0.24031007751937986, + "grad_norm": 0.4612125175821135, + "learning_rate": 1.3628318584070797e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25243520736694336, + "step": 155, + "valid_targets_mean": 4157.1, + "valid_targets_min": 806 + }, + { + "epoch": 0.24806201550387597, + "grad_norm": 0.4456387275424371, + "learning_rate": 1.4070796460176991e-05, + "loss": 0.3351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28738337755203247, + "step": 160, + "valid_targets_mean": 5481.4, + "valid_targets_min": 1940 + }, + { + "epoch": 0.2558139534883721, + "grad_norm": 0.5654786805272515, + "learning_rate": 1.4513274336283187e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3355092406272888, + "step": 165, + "valid_targets_mean": 5211.9, + "valid_targets_min": 1841 + }, + { + "epoch": 0.26356589147286824, + "grad_norm": 0.4528552978301014, + "learning_rate": 1.4955752212389383e-05, + "loss": 0.2687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24449273943901062, + "step": 170, + "valid_targets_mean": 4395.9, + "valid_targets_min": 716 + }, + { + "epoch": 0.2713178294573643, + "grad_norm": 0.5070510356795248, + "learning_rate": 1.5398230088495576e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2762889266014099, + "step": 175, + "valid_targets_mean": 5060.6, + "valid_targets_min": 332 + }, + { + "epoch": 0.27906976744186046, + "grad_norm": 0.4109586147637433, + "learning_rate": 1.5840707964601772e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27871203422546387, + "step": 180, + "valid_targets_mean": 5762.0, + "valid_targets_min": 317 + }, + { + "epoch": 0.2868217054263566, + "grad_norm": 0.5312588834611185, + "learning_rate": 1.628318584070797e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21110810339450836, + "step": 185, + "valid_targets_mean": 4100.6, + "valid_targets_min": 312 + }, + { + "epoch": 0.29457364341085274, + "grad_norm": 0.5125684260379816, + "learning_rate": 1.672566371681416e-05, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2507615089416504, + "step": 190, + "valid_targets_mean": 4268.3, + "valid_targets_min": 2439 + }, + { + "epoch": 0.3023255813953488, + "grad_norm": 0.6649775832883916, + "learning_rate": 1.7168141592920354e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41589394211769104, + "step": 195, + "valid_targets_mean": 3739.2, + "valid_targets_min": 536 + }, + { + "epoch": 0.31007751937984496, + "grad_norm": 0.5366482871518907, + "learning_rate": 1.761061946902655e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27435043454170227, + "step": 200, + "valid_targets_mean": 5772.8, + "valid_targets_min": 2700 + }, + { + "epoch": 0.3178294573643411, + "grad_norm": 0.5074784266785846, + "learning_rate": 1.8053097345132743e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2721036970615387, + "step": 205, + "valid_targets_mean": 4397.0, + "valid_targets_min": 661 + }, + { + "epoch": 0.32558139534883723, + "grad_norm": 0.614506165275271, + "learning_rate": 1.849557522123894e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30919021368026733, + "step": 210, + "valid_targets_mean": 4218.9, + "valid_targets_min": 472 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5419591558618669, + "learning_rate": 1.8938053097345135e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23832359910011292, + "step": 215, + "valid_targets_mean": 5124.2, + "valid_targets_min": 2981 + }, + { + "epoch": 0.34108527131782945, + "grad_norm": 0.5664964725729073, + "learning_rate": 1.9380530973451328e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373562753200531, + "step": 220, + "valid_targets_mean": 3882.1, + "valid_targets_min": 282 + }, + { + "epoch": 0.3488372093023256, + "grad_norm": 0.4209274671883887, + "learning_rate": 1.9823008849557524e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1936969757080078, + "step": 225, + "valid_targets_mean": 5481.6, + "valid_targets_min": 752 + }, + { + "epoch": 0.35658914728682173, + "grad_norm": 0.5074693857643466, + "learning_rate": 2.0265486725663717e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25986820459365845, + "step": 230, + "valid_targets_mean": 5012.3, + "valid_targets_min": 1855 + }, + { + "epoch": 0.3643410852713178, + "grad_norm": 0.5512784168137245, + "learning_rate": 2.0707964601769913e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27459990978240967, + "step": 235, + "valid_targets_mean": 4073.4, + "valid_targets_min": 477 + }, + { + "epoch": 0.37209302325581395, + "grad_norm": 0.4347839613276555, + "learning_rate": 2.115044247787611e-05, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24624979496002197, + "step": 240, + "valid_targets_mean": 5311.7, + "valid_targets_min": 968 + }, + { + "epoch": 0.3798449612403101, + "grad_norm": 0.5190963993227697, + "learning_rate": 2.15929203539823e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252674400806427, + "step": 245, + "valid_targets_mean": 4257.4, + "valid_targets_min": 2687 + }, + { + "epoch": 0.3875968992248062, + "grad_norm": 0.5887863557318684, + "learning_rate": 2.2035398230088498e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33351632952690125, + "step": 250, + "valid_targets_mean": 5280.2, + "valid_targets_min": 726 + }, + { + "epoch": 0.3953488372093023, + "grad_norm": 0.5068755318361087, + "learning_rate": 2.247787610619469e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20935970544815063, + "step": 255, + "valid_targets_mean": 4357.3, + "valid_targets_min": 365 + }, + { + "epoch": 0.40310077519379844, + "grad_norm": 0.4660973149935113, + "learning_rate": 2.2920353982300883e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2345641404390335, + "step": 260, + "valid_targets_mean": 4675.7, + "valid_targets_min": 472 + }, + { + "epoch": 0.4108527131782946, + "grad_norm": 0.4943641747428354, + "learning_rate": 2.3362831858407083e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25245875120162964, + "step": 265, + "valid_targets_mean": 4586.7, + "valid_targets_min": 336 + }, + { + "epoch": 0.4186046511627907, + "grad_norm": 0.5733463209321147, + "learning_rate": 2.3805309734513275e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2648470103740692, + "step": 270, + "valid_targets_mean": 4878.5, + "valid_targets_min": 1433 + }, + { + "epoch": 0.4263565891472868, + "grad_norm": 0.5797109099250628, + "learning_rate": 2.424778761061947e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23868052661418915, + "step": 275, + "valid_targets_mean": 4204.3, + "valid_targets_min": 1999 + }, + { + "epoch": 0.43410852713178294, + "grad_norm": 0.5102454710373611, + "learning_rate": 2.4690265486725668e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19877958297729492, + "step": 280, + "valid_targets_mean": 4676.9, + "valid_targets_min": 720 + }, + { + "epoch": 0.4418604651162791, + "grad_norm": 0.5432212420309249, + "learning_rate": 2.513274336283186e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30621248483657837, + "step": 285, + "valid_targets_mean": 4931.0, + "valid_targets_min": 1914 + }, + { + "epoch": 0.4496124031007752, + "grad_norm": 0.5173046537936798, + "learning_rate": 2.5575221238938056e-05, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2177397459745407, + "step": 290, + "valid_targets_mean": 3951.5, + "valid_targets_min": 1846 + }, + { + "epoch": 0.4573643410852713, + "grad_norm": 0.47098388353840676, + "learning_rate": 2.601769911504425e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23170709609985352, + "step": 295, + "valid_targets_mean": 5152.0, + "valid_targets_min": 285 + }, + { + "epoch": 0.46511627906976744, + "grad_norm": 0.5012563709590744, + "learning_rate": 2.6460176991150442e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2445349246263504, + "step": 300, + "valid_targets_mean": 4086.8, + "valid_targets_min": 568 + }, + { + "epoch": 0.4728682170542636, + "grad_norm": 0.7598417766003686, + "learning_rate": 2.690265486725664e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2794634699821472, + "step": 305, + "valid_targets_mean": 6700.9, + "valid_targets_min": 2494 + }, + { + "epoch": 0.4806201550387597, + "grad_norm": 0.48809284746866477, + "learning_rate": 2.7345132743362834e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23253998160362244, + "step": 310, + "valid_targets_mean": 4574.2, + "valid_targets_min": 600 + }, + { + "epoch": 0.4883720930232558, + "grad_norm": 0.5629581496018895, + "learning_rate": 2.7787610619469027e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.275101900100708, + "step": 315, + "valid_targets_mean": 4482.4, + "valid_targets_min": 380 + }, + { + "epoch": 0.49612403100775193, + "grad_norm": 0.5149842618437763, + "learning_rate": 2.8230088495575226e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26416015625, + "step": 320, + "valid_targets_mean": 4816.7, + "valid_targets_min": 2959 + }, + { + "epoch": 0.5038759689922481, + "grad_norm": 0.5415600825598276, + "learning_rate": 2.867256637168142e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811618149280548, + "step": 325, + "valid_targets_mean": 5667.7, + "valid_targets_min": 559 + }, + { + "epoch": 0.5116279069767442, + "grad_norm": 0.5507831167513966, + "learning_rate": 2.9115044247787612e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2697560787200928, + "step": 330, + "valid_targets_mean": 4328.9, + "valid_targets_min": 314 + }, + { + "epoch": 0.5193798449612403, + "grad_norm": 0.444364635402833, + "learning_rate": 2.9557522123893808e-05, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26696139574050903, + "step": 335, + "valid_targets_mean": 5249.1, + "valid_targets_min": 216 + }, + { + "epoch": 0.5271317829457365, + "grad_norm": 0.5882361391451728, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2548302412033081, + "step": 340, + "valid_targets_mean": 3739.8, + "valid_targets_min": 2192 + }, + { + "epoch": 0.5348837209302325, + "grad_norm": 0.5152875122005142, + "learning_rate": 3.0442477876106197e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23482279479503632, + "step": 345, + "valid_targets_mean": 3619.9, + "valid_targets_min": 325 + }, + { + "epoch": 0.5426356589147286, + "grad_norm": 0.4815633298934971, + "learning_rate": 3.0884955752212396e-05, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2350481003522873, + "step": 350, + "valid_targets_mean": 4721.2, + "valid_targets_min": 1866 + }, + { + "epoch": 0.5503875968992248, + "grad_norm": 0.4648544079642719, + "learning_rate": 3.132743362831859e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.181733176112175, + "step": 355, + "valid_targets_mean": 5043.9, + "valid_targets_min": 1925 + }, + { + "epoch": 0.5581395348837209, + "grad_norm": 0.5037726577202668, + "learning_rate": 3.176991150442478e-05, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29893919825553894, + "step": 360, + "valid_targets_mean": 4146.6, + "valid_targets_min": 419 + }, + { + "epoch": 0.5658914728682171, + "grad_norm": 0.4516342191965917, + "learning_rate": 3.2212389380530975e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.251533180475235, + "step": 365, + "valid_targets_mean": 5648.9, + "valid_targets_min": 1948 + }, + { + "epoch": 0.5736434108527132, + "grad_norm": 0.510314234160131, + "learning_rate": 3.265486725663717e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2301664650440216, + "step": 370, + "valid_targets_mean": 4642.9, + "valid_targets_min": 357 + }, + { + "epoch": 0.5813953488372093, + "grad_norm": 0.5221420257676286, + "learning_rate": 3.309734513274337e-05, + "loss": 0.254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25842761993408203, + "step": 375, + "valid_targets_mean": 5166.7, + "valid_targets_min": 330 + }, + { + "epoch": 0.5891472868217055, + "grad_norm": 0.47841880099325107, + "learning_rate": 3.353982300884956e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23205696046352386, + "step": 380, + "valid_targets_mean": 4452.9, + "valid_targets_min": 2554 + }, + { + "epoch": 0.5968992248062015, + "grad_norm": 0.607600902063009, + "learning_rate": 3.398230088495575e-05, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23086626827716827, + "step": 385, + "valid_targets_mean": 4332.0, + "valid_targets_min": 2478 + }, + { + "epoch": 0.6046511627906976, + "grad_norm": 0.5649861293006265, + "learning_rate": 3.4424778761061945e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2606998682022095, + "step": 390, + "valid_targets_mean": 3863.3, + "valid_targets_min": 355 + }, + { + "epoch": 0.6124031007751938, + "grad_norm": 0.5467855289956822, + "learning_rate": 3.4867256637168145e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24656961858272552, + "step": 395, + "valid_targets_mean": 4666.2, + "valid_targets_min": 1840 + }, + { + "epoch": 0.6201550387596899, + "grad_norm": 0.4033482565091616, + "learning_rate": 3.530973451327434e-05, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19554291665554047, + "step": 400, + "valid_targets_mean": 5451.3, + "valid_targets_min": 285 + }, + { + "epoch": 0.627906976744186, + "grad_norm": 0.408434222869372, + "learning_rate": 3.575221238938053e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20927652716636658, + "step": 405, + "valid_targets_mean": 5175.1, + "valid_targets_min": 719 + }, + { + "epoch": 0.6356589147286822, + "grad_norm": 0.5142450144995636, + "learning_rate": 3.619469026548673e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28972408175468445, + "step": 410, + "valid_targets_mean": 4840.2, + "valid_targets_min": 330 + }, + { + "epoch": 0.6434108527131783, + "grad_norm": 0.5130835281170955, + "learning_rate": 3.663716814159292e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27192896604537964, + "step": 415, + "valid_targets_mean": 4130.9, + "valid_targets_min": 836 + }, + { + "epoch": 0.6511627906976745, + "grad_norm": 0.5264620060585927, + "learning_rate": 3.707964601769912e-05, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30271440744400024, + "step": 420, + "valid_targets_mean": 4858.8, + "valid_targets_min": 566 + }, + { + "epoch": 0.6589147286821705, + "grad_norm": 0.48973643965756275, + "learning_rate": 3.7522123893805314e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19385236501693726, + "step": 425, + "valid_targets_mean": 4338.8, + "valid_targets_min": 808 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.47842223367968456, + "learning_rate": 3.796460176991151e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24353685975074768, + "step": 430, + "valid_targets_mean": 4000.3, + "valid_targets_min": 2352 + }, + { + "epoch": 0.6744186046511628, + "grad_norm": 0.45816881401918463, + "learning_rate": 3.840707964601771e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597430646419525, + "step": 435, + "valid_targets_mean": 5271.6, + "valid_targets_min": 2162 + }, + { + "epoch": 0.6821705426356589, + "grad_norm": 0.6017326535707582, + "learning_rate": 3.88495575221239e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24369432032108307, + "step": 440, + "valid_targets_mean": 3065.4, + "valid_targets_min": 304 + }, + { + "epoch": 0.689922480620155, + "grad_norm": 0.5404602142547137, + "learning_rate": 3.929203539823009e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.256170392036438, + "step": 445, + "valid_targets_mean": 3592.6, + "valid_targets_min": 639 + }, + { + "epoch": 0.6976744186046512, + "grad_norm": 0.4154279511759285, + "learning_rate": 3.9734513274336285e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21489651501178741, + "step": 450, + "valid_targets_mean": 4713.6, + "valid_targets_min": 639 + }, + { + "epoch": 0.7054263565891473, + "grad_norm": 0.3730389962722997, + "learning_rate": 3.999997608524118e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1950041949748993, + "step": 455, + "valid_targets_mean": 6850.4, + "valid_targets_min": 2445 + }, + { + "epoch": 0.7131782945736435, + "grad_norm": 0.5017736757425001, + "learning_rate": 3.999970704486118e-05, + "loss": 0.2408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2439410388469696, + "step": 460, + "valid_targets_mean": 4726.7, + "valid_targets_min": 1933 + }, + { + "epoch": 0.7209302325581395, + "grad_norm": 0.5145474700945507, + "learning_rate": 3.999913907468731e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2903444170951843, + "step": 465, + "valid_targets_mean": 3954.4, + "valid_targets_min": 606 + }, + { + "epoch": 0.7286821705426356, + "grad_norm": 0.39985614862149943, + "learning_rate": 3.9998272183208866e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23064105212688446, + "step": 470, + "valid_targets_mean": 5725.1, + "valid_targets_min": 1885 + }, + { + "epoch": 0.7364341085271318, + "grad_norm": 0.47106899849704376, + "learning_rate": 3.999710638338303e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2777732014656067, + "step": 475, + "valid_targets_mean": 4932.4, + "valid_targets_min": 2437 + }, + { + "epoch": 0.7441860465116279, + "grad_norm": 0.5041908945885922, + "learning_rate": 3.999564169263465e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22737307846546173, + "step": 480, + "valid_targets_mean": 4427.4, + "valid_targets_min": 1030 + }, + { + "epoch": 0.751937984496124, + "grad_norm": 0.49803283079798477, + "learning_rate": 3.9993878132856044e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2756238579750061, + "step": 485, + "valid_targets_mean": 4327.5, + "valid_targets_min": 783 + }, + { + "epoch": 0.7596899224806202, + "grad_norm": 0.4206493823677622, + "learning_rate": 3.999181573040663e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2046414017677307, + "step": 490, + "valid_targets_mean": 5396.8, + "valid_targets_min": 646 + }, + { + "epoch": 0.7674418604651163, + "grad_norm": 0.4197719415269444, + "learning_rate": 3.9989454516112524e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2041558027267456, + "step": 495, + "valid_targets_mean": 4688.8, + "valid_targets_min": 527 + }, + { + "epoch": 0.7751937984496124, + "grad_norm": 0.49956665072862294, + "learning_rate": 3.9986794525266115e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2720697820186615, + "step": 500, + "valid_targets_mean": 4991.1, + "valid_targets_min": 2320 + }, + { + "epoch": 0.7829457364341085, + "grad_norm": 0.5668838261772827, + "learning_rate": 3.998383579762552e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22524034976959229, + "step": 505, + "valid_targets_mean": 4758.6, + "valid_targets_min": 623 + }, + { + "epoch": 0.7906976744186046, + "grad_norm": 0.4067566904060681, + "learning_rate": 3.998057837741396e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20347392559051514, + "step": 510, + "valid_targets_mean": 5299.1, + "valid_targets_min": 957 + }, + { + "epoch": 0.7984496124031008, + "grad_norm": 0.4983262141277965, + "learning_rate": 3.997702231331917e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22835250198841095, + "step": 515, + "valid_targets_mean": 4676.3, + "valid_targets_min": 1825 + }, + { + "epoch": 0.8062015503875969, + "grad_norm": 0.4874737761112375, + "learning_rate": 3.9973167658492577e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1982721984386444, + "step": 520, + "valid_targets_mean": 3951.7, + "valid_targets_min": 308 + }, + { + "epoch": 0.813953488372093, + "grad_norm": 0.4874095166955721, + "learning_rate": 3.99690144705486e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27957573533058167, + "step": 525, + "valid_targets_mean": 4523.2, + "valid_targets_min": 630 + }, + { + "epoch": 0.8217054263565892, + "grad_norm": 0.46829582316727847, + "learning_rate": 3.996456281156372e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2339038848876953, + "step": 530, + "valid_targets_mean": 4400.8, + "valid_targets_min": 996 + }, + { + "epoch": 0.8294573643410853, + "grad_norm": 0.4173667508062997, + "learning_rate": 3.995981274807561e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19771169126033783, + "step": 535, + "valid_targets_mean": 4821.5, + "valid_targets_min": 978 + }, + { + "epoch": 0.8372093023255814, + "grad_norm": 0.4998240530264507, + "learning_rate": 3.995476435108205e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23739448189735413, + "step": 540, + "valid_targets_mean": 3693.3, + "valid_targets_min": 527 + }, + { + "epoch": 0.8449612403100775, + "grad_norm": 0.40077148780586164, + "learning_rate": 3.994941769603999e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25493863224983215, + "step": 545, + "valid_targets_mean": 6980.4, + "valid_targets_min": 3136 + }, + { + "epoch": 0.8527131782945736, + "grad_norm": 0.5106995708311232, + "learning_rate": 3.9943772862864303e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.295318067073822, + "step": 550, + "valid_targets_mean": 4588.8, + "valid_targets_min": 2486 + }, + { + "epoch": 0.8604651162790697, + "grad_norm": 0.4775850652610667, + "learning_rate": 3.993782993592668e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2367231398820877, + "step": 555, + "valid_targets_mean": 4133.6, + "valid_targets_min": 513 + }, + { + "epoch": 0.8682170542635659, + "grad_norm": 0.4680021737678082, + "learning_rate": 3.9931589004054305e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23238858580589294, + "step": 560, + "valid_targets_mean": 4216.1, + "valid_targets_min": 516 + }, + { + "epoch": 0.875968992248062, + "grad_norm": 0.5582382675085276, + "learning_rate": 3.992505016052858e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24689146876335144, + "step": 565, + "valid_targets_mean": 4033.6, + "valid_targets_min": 1922 + }, + { + "epoch": 0.8837209302325582, + "grad_norm": 0.48825810766144667, + "learning_rate": 3.991821350308369e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23387084901332855, + "step": 570, + "valid_targets_mean": 4010.4, + "valid_targets_min": 946 + }, + { + "epoch": 0.8914728682170543, + "grad_norm": 0.3468810555031447, + "learning_rate": 3.991107913390516e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16606643795967102, + "step": 575, + "valid_targets_mean": 6431.9, + "valid_targets_min": 2510 + }, + { + "epoch": 0.8992248062015504, + "grad_norm": 0.4163415879240496, + "learning_rate": 3.990364715962833e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1997481882572174, + "step": 580, + "valid_targets_mean": 5320.4, + "valid_targets_min": 2104 + }, + { + "epoch": 0.9069767441860465, + "grad_norm": 0.5473724440573992, + "learning_rate": 3.989591769133675e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24773220717906952, + "step": 585, + "valid_targets_mean": 4261.8, + "valid_targets_min": 434 + }, + { + "epoch": 0.9147286821705426, + "grad_norm": 0.5436217489799219, + "learning_rate": 3.988789084456054e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26351064443588257, + "step": 590, + "valid_targets_mean": 3915.2, + "valid_targets_min": 551 + }, + { + "epoch": 0.9224806201550387, + "grad_norm": 0.41536583827338946, + "learning_rate": 3.9879566739274626e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559744715690613, + "step": 595, + "valid_targets_mean": 5054.2, + "valid_targets_min": 2545 + }, + { + "epoch": 0.9302325581395349, + "grad_norm": 0.35411011586501473, + "learning_rate": 3.987094549989699e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18530899286270142, + "step": 600, + "valid_targets_mean": 5971.2, + "valid_targets_min": 1835 + }, + { + "epoch": 0.937984496124031, + "grad_norm": 0.4392551065067926, + "learning_rate": 3.98620272552868e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21953071653842926, + "step": 605, + "valid_targets_mean": 4565.2, + "valid_targets_min": 1942 + }, + { + "epoch": 0.9457364341085271, + "grad_norm": 0.47803844935864886, + "learning_rate": 3.985281213874244e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29870736598968506, + "step": 610, + "valid_targets_mean": 4237.6, + "valid_targets_min": 326 + }, + { + "epoch": 0.9534883720930233, + "grad_norm": 0.44870011943315186, + "learning_rate": 3.984330028799957e-05, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2335856556892395, + "step": 615, + "valid_targets_mean": 4557.3, + "valid_targets_min": 849 + }, + { + "epoch": 0.9612403100775194, + "grad_norm": 0.5014485102007264, + "learning_rate": 3.9833491845229065e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.253334641456604, + "step": 620, + "valid_targets_mean": 3717.8, + "valid_targets_min": 926 + }, + { + "epoch": 0.9689922480620154, + "grad_norm": 0.4539009088514093, + "learning_rate": 3.982338695703486e-05, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2949668765068054, + "step": 625, + "valid_targets_mean": 4934.7, + "valid_targets_min": 685 + }, + { + "epoch": 0.9767441860465116, + "grad_norm": 0.42836269237630537, + "learning_rate": 3.9812985774451763e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23034946620464325, + "step": 630, + "valid_targets_mean": 4939.8, + "valid_targets_min": 2461 + }, + { + "epoch": 0.9844961240310077, + "grad_norm": 0.6158981927965868, + "learning_rate": 3.980228845294323e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23234328627586365, + "step": 635, + "valid_targets_mean": 4101.9, + "valid_targets_min": 386 + }, + { + "epoch": 0.9922480620155039, + "grad_norm": 0.4762142114139161, + "learning_rate": 3.9791295152399014e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20974400639533997, + "step": 640, + "valid_targets_mean": 3889.9, + "valid_targets_min": 266 + }, + { + "epoch": 1.0, + "grad_norm": 0.5061517747796611, + "learning_rate": 3.978000603713276e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21345987915992737, + "step": 645, + "valid_targets_mean": 3841.9, + "valid_targets_min": 725 + }, + { + "epoch": 1.0077519379844961, + "grad_norm": 0.7869218457307112, + "learning_rate": 3.976842127587959e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23174425959587097, + "step": 650, + "valid_targets_mean": 5303.6, + "valid_targets_min": 1966 + }, + { + "epoch": 1.0155038759689923, + "grad_norm": 0.5003906965370145, + "learning_rate": 3.975654104179356e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22817862033843994, + "step": 655, + "valid_targets_mean": 3844.0, + "valid_targets_min": 568 + }, + { + "epoch": 1.0232558139534884, + "grad_norm": 0.4114203739700514, + "learning_rate": 3.9744365512445056e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25271815061569214, + "step": 660, + "valid_targets_mean": 5901.1, + "valid_targets_min": 2452 + }, + { + "epoch": 1.0310077519379846, + "grad_norm": 0.4575096509840006, + "learning_rate": 3.973189486981818e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2162925899028778, + "step": 665, + "valid_targets_mean": 4902.4, + "valid_targets_min": 497 + }, + { + "epoch": 1.0387596899224807, + "grad_norm": 0.4021600509804852, + "learning_rate": 3.971912930030799e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19043004512786865, + "step": 670, + "valid_targets_mean": 4917.0, + "valid_targets_min": 646 + }, + { + "epoch": 1.0465116279069768, + "grad_norm": 0.4989156170962161, + "learning_rate": 3.970606899471774e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17856532335281372, + "step": 675, + "valid_targets_mean": 4752.6, + "valid_targets_min": 502 + }, + { + "epoch": 1.054263565891473, + "grad_norm": 0.3847796119558988, + "learning_rate": 3.969271414825599e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20729967951774597, + "step": 680, + "valid_targets_mean": 5683.9, + "valid_targets_min": 1844 + }, + { + "epoch": 1.062015503875969, + "grad_norm": 0.47657171710131, + "learning_rate": 3.967906496053377e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21222332119941711, + "step": 685, + "valid_targets_mean": 4150.8, + "valid_targets_min": 962 + }, + { + "epoch": 1.069767441860465, + "grad_norm": 0.3858090579432838, + "learning_rate": 3.966512163556149e-05, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2170460969209671, + "step": 690, + "valid_targets_mean": 5164.2, + "valid_targets_min": 1885 + }, + { + "epoch": 1.0775193798449612, + "grad_norm": 0.4290059612627937, + "learning_rate": 3.965088438174597e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908944547176361, + "step": 695, + "valid_targets_mean": 4788.8, + "valid_targets_min": 676 + }, + { + "epoch": 1.0852713178294573, + "grad_norm": 0.4409936183324337, + "learning_rate": 3.963635341188729e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23212580382823944, + "step": 700, + "valid_targets_mean": 4988.8, + "valid_targets_min": 558 + }, + { + "epoch": 1.0930232558139534, + "grad_norm": 0.45813599715819825, + "learning_rate": 3.962152894317564e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23683017492294312, + "step": 705, + "valid_targets_mean": 4570.3, + "valid_targets_min": 2488 + }, + { + "epoch": 1.1007751937984496, + "grad_norm": 0.40951900195560925, + "learning_rate": 3.960641119718802e-05, + "loss": 0.2143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2278529703617096, + "step": 710, + "valid_targets_mean": 5026.8, + "valid_targets_min": 393 + }, + { + "epoch": 1.1085271317829457, + "grad_norm": 0.36962646479291916, + "learning_rate": 3.9591000399884974e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19535881280899048, + "step": 715, + "valid_targets_mean": 5893.4, + "valid_targets_min": 1714 + }, + { + "epoch": 1.1162790697674418, + "grad_norm": 0.4169785909813559, + "learning_rate": 3.957529678160721e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22500549256801605, + "step": 720, + "valid_targets_mean": 5345.6, + "valid_targets_min": 512 + }, + { + "epoch": 1.124031007751938, + "grad_norm": 0.49607819631085537, + "learning_rate": 3.9559300577072115e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25468283891677856, + "step": 725, + "valid_targets_mean": 5040.8, + "valid_targets_min": 2775 + }, + { + "epoch": 1.1317829457364341, + "grad_norm": 0.5069100805388594, + "learning_rate": 3.954301202537032e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1782360076904297, + "step": 730, + "valid_targets_mean": 5846.9, + "valid_targets_min": 2328 + }, + { + "epoch": 1.1395348837209303, + "grad_norm": 0.5044747278952141, + "learning_rate": 3.952643136996205e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22031991183757782, + "step": 735, + "valid_targets_mean": 3806.4, + "valid_targets_min": 250 + }, + { + "epoch": 1.1472868217054264, + "grad_norm": 0.713115718875641, + "learning_rate": 3.950955885867353e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17436224222183228, + "step": 740, + "valid_targets_mean": 4936.1, + "valid_targets_min": 2127 + }, + { + "epoch": 1.1550387596899225, + "grad_norm": 0.5036250359903303, + "learning_rate": 3.9492394743693266e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2408798635005951, + "step": 745, + "valid_targets_mean": 3656.3, + "valid_targets_min": 555 + }, + { + "epoch": 1.1627906976744187, + "grad_norm": 0.46006096792307244, + "learning_rate": 3.94749392815683e-05, + "loss": 0.1973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19669455289840698, + "step": 750, + "valid_targets_mean": 4952.6, + "valid_targets_min": 1766 + }, + { + "epoch": 1.1705426356589148, + "grad_norm": 0.4672041249500244, + "learning_rate": 3.9457192733200315e-05, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1791488230228424, + "step": 755, + "valid_targets_mean": 4441.9, + "valid_targets_min": 650 + }, + { + "epoch": 1.178294573643411, + "grad_norm": 0.42348721602696954, + "learning_rate": 3.9439155363841814e-05, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17398425936698914, + "step": 760, + "valid_targets_mean": 4538.8, + "valid_targets_min": 277 + }, + { + "epoch": 1.1860465116279069, + "grad_norm": 0.5878356373078284, + "learning_rate": 3.9420827443092104e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2349127233028412, + "step": 765, + "valid_targets_mean": 4086.5, + "valid_targets_min": 644 + }, + { + "epoch": 1.193798449612403, + "grad_norm": 0.476452037868028, + "learning_rate": 3.940220924489327e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2374195158481598, + "step": 770, + "valid_targets_mean": 3906.2, + "valid_targets_min": 487 + }, + { + "epoch": 1.2015503875968991, + "grad_norm": 0.4353925367240353, + "learning_rate": 3.938330104752609e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20411117374897003, + "step": 775, + "valid_targets_mean": 4184.5, + "valid_targets_min": 317 + }, + { + "epoch": 1.2093023255813953, + "grad_norm": 0.44738786787353246, + "learning_rate": 3.936410313360589e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29115378856658936, + "step": 780, + "valid_targets_mean": 5176.8, + "valid_targets_min": 2427 + }, + { + "epoch": 1.2170542635658914, + "grad_norm": 0.5025078305803677, + "learning_rate": 3.934461579007829e-05, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.194045752286911, + "step": 785, + "valid_targets_mean": 4190.2, + "valid_targets_min": 2274 + }, + { + "epoch": 1.2248062015503876, + "grad_norm": 0.399168491370644, + "learning_rate": 3.932483930821495e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19120316207408905, + "step": 790, + "valid_targets_mean": 5678.2, + "valid_targets_min": 2268 + }, + { + "epoch": 1.2325581395348837, + "grad_norm": 0.40122275394036194, + "learning_rate": 3.930477398360915e-05, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20044457912445068, + "step": 795, + "valid_targets_mean": 5078.8, + "valid_targets_min": 1999 + }, + { + "epoch": 1.2403100775193798, + "grad_norm": 0.42534010971764247, + "learning_rate": 3.928442011617148e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21228013932704926, + "step": 800, + "valid_targets_mean": 3933.1, + "valid_targets_min": 757 + }, + { + "epoch": 1.248062015503876, + "grad_norm": 0.7069266989704566, + "learning_rate": 3.926377801012526e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22949740290641785, + "step": 805, + "valid_targets_mean": 4512.6, + "valid_targets_min": 299 + }, + { + "epoch": 1.255813953488372, + "grad_norm": 0.42027583645711425, + "learning_rate": 3.9242847974002026e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2134920060634613, + "step": 810, + "valid_targets_mean": 4943.6, + "valid_targets_min": 1805 + }, + { + "epoch": 1.2635658914728682, + "grad_norm": 0.39016064981111986, + "learning_rate": 3.9221630320636935e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19790124893188477, + "step": 815, + "valid_targets_mean": 4984.6, + "valid_targets_min": 2085 + }, + { + "epoch": 1.2713178294573644, + "grad_norm": 0.48120226759216767, + "learning_rate": 3.9200125367164076e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19904130697250366, + "step": 820, + "valid_targets_mean": 3452.6, + "valid_targets_min": 477 + }, + { + "epoch": 1.2790697674418605, + "grad_norm": 0.4551570397834542, + "learning_rate": 3.917833343501171e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19769467413425446, + "step": 825, + "valid_targets_mean": 5811.2, + "valid_targets_min": 923 + }, + { + "epoch": 1.2868217054263567, + "grad_norm": 0.43735207764564604, + "learning_rate": 3.91562548498975e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20643508434295654, + "step": 830, + "valid_targets_mean": 4570.1, + "valid_targets_min": 2493 + }, + { + "epoch": 1.2945736434108528, + "grad_norm": 0.4299580048955895, + "learning_rate": 3.913388994182364e-05, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18837128579616547, + "step": 835, + "valid_targets_mean": 4744.1, + "valid_targets_min": 881 + }, + { + "epoch": 1.302325581395349, + "grad_norm": 0.3538726377495065, + "learning_rate": 3.9111239045071876e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13884110748767853, + "step": 840, + "valid_targets_mean": 5657.9, + "valid_targets_min": 453 + }, + { + "epoch": 1.310077519379845, + "grad_norm": 0.48494658304811955, + "learning_rate": 3.9088302498198564e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21122536063194275, + "step": 845, + "valid_targets_mean": 4122.8, + "valid_targets_min": 264 + }, + { + "epoch": 1.3178294573643412, + "grad_norm": 0.40106643807384035, + "learning_rate": 3.90650806440296e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19900798797607422, + "step": 850, + "valid_targets_mean": 5050.8, + "valid_targets_min": 2416 + }, + { + "epoch": 1.3255813953488373, + "grad_norm": 0.3960604184673925, + "learning_rate": 3.904157382965526e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22484715282917023, + "step": 855, + "valid_targets_mean": 5748.9, + "valid_targets_min": 1856 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.3257220820239636, + "learning_rate": 3.9017782406425045e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16785819828510284, + "step": 860, + "valid_targets_mean": 5923.4, + "valid_targets_min": 2779 + }, + { + "epoch": 1.3410852713178294, + "grad_norm": 0.4239797338208263, + "learning_rate": 3.899370672994244e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1850738823413849, + "step": 865, + "valid_targets_mean": 4976.4, + "valid_targets_min": 695 + }, + { + "epoch": 1.3488372093023255, + "grad_norm": 0.4680128138024682, + "learning_rate": 3.896934716005956e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23862537741661072, + "step": 870, + "valid_targets_mean": 4609.4, + "valid_targets_min": 321 + }, + { + "epoch": 1.3565891472868217, + "grad_norm": 0.34295981747721627, + "learning_rate": 3.8944704060871803e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19128797948360443, + "step": 875, + "valid_targets_mean": 6440.4, + "valid_targets_min": 2928 + }, + { + "epoch": 1.3643410852713178, + "grad_norm": 0.39579678053682266, + "learning_rate": 3.891977780071238e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22662851214408875, + "step": 880, + "valid_targets_mean": 5590.4, + "valid_targets_min": 418 + }, + { + "epoch": 1.372093023255814, + "grad_norm": 0.3650270597088362, + "learning_rate": 3.889456875214685e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18867811560630798, + "step": 885, + "valid_targets_mean": 5108.4, + "valid_targets_min": 2550 + }, + { + "epoch": 1.37984496124031, + "grad_norm": 0.43546961058361106, + "learning_rate": 3.8869077291967514e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18624623119831085, + "step": 890, + "valid_targets_mean": 3812.8, + "valid_targets_min": 310 + }, + { + "epoch": 1.3875968992248062, + "grad_norm": 0.35003941903300767, + "learning_rate": 3.88433038011878e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17150872945785522, + "step": 895, + "valid_targets_mean": 5599.2, + "valid_targets_min": 2853 + }, + { + "epoch": 1.3953488372093024, + "grad_norm": 0.40616386423722534, + "learning_rate": 3.881724866503656e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.221075639128685, + "step": 900, + "valid_targets_mean": 4683.5, + "valid_targets_min": 1955 + }, + { + "epoch": 1.4031007751937985, + "grad_norm": 0.45770780156293883, + "learning_rate": 3.879091227295233e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3284824788570404, + "step": 905, + "valid_targets_mean": 5965.8, + "valid_targets_min": 2467 + }, + { + "epoch": 1.4108527131782946, + "grad_norm": 0.43076323744174305, + "learning_rate": 3.876429501857748e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2374260425567627, + "step": 910, + "valid_targets_mean": 5230.9, + "valid_targets_min": 2401 + }, + { + "epoch": 1.4186046511627908, + "grad_norm": 0.4744851861080025, + "learning_rate": 3.873739729975237e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22709140181541443, + "step": 915, + "valid_targets_mean": 3975.9, + "valid_targets_min": 281 + }, + { + "epoch": 1.4263565891472867, + "grad_norm": 0.4914323912239341, + "learning_rate": 3.871021951850935e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2681000232696533, + "step": 920, + "valid_targets_mean": 4153.2, + "valid_targets_min": 579 + }, + { + "epoch": 1.4341085271317828, + "grad_norm": 0.5702552607786674, + "learning_rate": 3.868276208106681e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23841242492198944, + "step": 925, + "valid_targets_mean": 4588.2, + "valid_targets_min": 330 + }, + { + "epoch": 1.441860465116279, + "grad_norm": 0.4960800862921102, + "learning_rate": 3.8655025397823054e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19032783806324005, + "step": 930, + "valid_targets_mean": 4889.1, + "valid_targets_min": 792 + }, + { + "epoch": 1.449612403100775, + "grad_norm": 0.3982547722090709, + "learning_rate": 3.862700988335022e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19539685547351837, + "step": 935, + "valid_targets_mean": 5027.4, + "valid_targets_min": 2790 + }, + { + "epoch": 1.4573643410852712, + "grad_norm": 0.4822298209707658, + "learning_rate": 3.8598715956388026e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19648897647857666, + "step": 940, + "valid_targets_mean": 3873.2, + "valid_targets_min": 346 + }, + { + "epoch": 1.4651162790697674, + "grad_norm": 0.372486771897578, + "learning_rate": 3.8570144039837564e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15964803099632263, + "step": 945, + "valid_targets_mean": 4909.3, + "valid_targets_min": 2298 + }, + { + "epoch": 1.4728682170542635, + "grad_norm": 0.39582842776828425, + "learning_rate": 3.854129456075495e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1935088038444519, + "step": 950, + "valid_targets_mean": 5237.1, + "valid_targets_min": 2445 + }, + { + "epoch": 1.4806201550387597, + "grad_norm": 0.4768595855727291, + "learning_rate": 3.8512167950344916e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24240124225616455, + "step": 955, + "valid_targets_mean": 4105.6, + "valid_targets_min": 600 + }, + { + "epoch": 1.4883720930232558, + "grad_norm": 0.4935988281203597, + "learning_rate": 3.848276464395445e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24553832411766052, + "step": 960, + "valid_targets_mean": 4142.4, + "valid_targets_min": 821 + }, + { + "epoch": 1.496124031007752, + "grad_norm": 0.3508044933691806, + "learning_rate": 3.8453085081066195e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1662253439426422, + "step": 965, + "valid_targets_mean": 5088.2, + "valid_targets_min": 869 + }, + { + "epoch": 1.503875968992248, + "grad_norm": 0.40679599062954963, + "learning_rate": 3.842312970529193e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2687034010887146, + "step": 970, + "valid_targets_mean": 5584.9, + "valid_targets_min": 265 + }, + { + "epoch": 1.5116279069767442, + "grad_norm": 0.5352927860483538, + "learning_rate": 3.839289896436592e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24231748282909393, + "step": 975, + "valid_targets_mean": 3208.9, + "valid_targets_min": 401 + }, + { + "epoch": 1.5193798449612403, + "grad_norm": 0.4748472407205153, + "learning_rate": 3.836239331013825e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18979281187057495, + "step": 980, + "valid_targets_mean": 3701.5, + "valid_targets_min": 455 + }, + { + "epoch": 1.5271317829457365, + "grad_norm": 0.345182798324371, + "learning_rate": 3.8331613198568056e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.164521723985672, + "step": 985, + "valid_targets_mean": 5305.6, + "valid_targets_min": 2154 + }, + { + "epoch": 1.5348837209302326, + "grad_norm": 0.34148764388998604, + "learning_rate": 3.830055908971668e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817236989736557, + "step": 990, + "valid_targets_mean": 5507.2, + "valid_targets_min": 322 + }, + { + "epoch": 1.5426356589147288, + "grad_norm": 0.3914530838647767, + "learning_rate": 3.8269231447740844e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18969038128852844, + "step": 995, + "valid_targets_mean": 5196.0, + "valid_targets_min": 827 + }, + { + "epoch": 1.550387596899225, + "grad_norm": 0.39571077331421817, + "learning_rate": 3.823763074088568e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.209140807390213, + "step": 1000, + "valid_targets_mean": 5359.5, + "valid_targets_min": 1861 + }, + { + "epoch": 1.558139534883721, + "grad_norm": 0.3713493032556294, + "learning_rate": 3.8205757441477755e-05, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1935080587863922, + "step": 1005, + "valid_targets_mean": 5409.0, + "valid_targets_min": 2022 + }, + { + "epoch": 1.5658914728682172, + "grad_norm": 0.40758343223754234, + "learning_rate": 3.8173612025917984e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19840386509895325, + "step": 1010, + "valid_targets_mean": 5117.1, + "valid_targets_min": 590 + }, + { + "epoch": 1.5736434108527133, + "grad_norm": 0.3957074242428162, + "learning_rate": 3.8141194974674534e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20921745896339417, + "step": 1015, + "valid_targets_mean": 4456.0, + "valid_targets_min": 391 + }, + { + "epoch": 1.5813953488372094, + "grad_norm": 0.3733502693439458, + "learning_rate": 3.810850677227561e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16597923636436462, + "step": 1020, + "valid_targets_mean": 4493.0, + "valid_targets_min": 529 + }, + { + "epoch": 1.5891472868217056, + "grad_norm": 0.3628573096363499, + "learning_rate": 3.8075547907302255e-05, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20087650418281555, + "step": 1025, + "valid_targets_mean": 5676.1, + "valid_targets_min": 1946 + }, + { + "epoch": 1.5968992248062015, + "grad_norm": 0.4127367151722258, + "learning_rate": 3.804231887238103e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2233181893825531, + "step": 1030, + "valid_targets_mean": 4449.3, + "valid_targets_min": 977 + }, + { + "epoch": 1.6046511627906976, + "grad_norm": 0.3649731232133132, + "learning_rate": 3.800882016417662e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16599664092063904, + "step": 1035, + "valid_targets_mean": 4848.2, + "valid_targets_min": 1859 + }, + { + "epoch": 1.6124031007751938, + "grad_norm": 0.46927973730672984, + "learning_rate": 3.797505228338447e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2043415606021881, + "step": 1040, + "valid_targets_mean": 3859.1, + "valid_targets_min": 658 + }, + { + "epoch": 1.62015503875969, + "grad_norm": 0.452023920939608, + "learning_rate": 3.794101573472325e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24635906517505646, + "step": 1045, + "valid_targets_mean": 4854.6, + "valid_targets_min": 712 + }, + { + "epoch": 1.627906976744186, + "grad_norm": 0.41920457500146513, + "learning_rate": 3.7906711026927344e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21003258228302002, + "step": 1050, + "valid_targets_mean": 4362.9, + "valid_targets_min": 2643 + }, + { + "epoch": 1.6356589147286822, + "grad_norm": 0.46857169083181116, + "learning_rate": 3.787213867273921e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2265489399433136, + "step": 1055, + "valid_targets_mean": 4190.8, + "valid_targets_min": 1976 + }, + { + "epoch": 1.6434108527131783, + "grad_norm": 0.4767624399813519, + "learning_rate": 3.783729918890176e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17004413902759552, + "step": 1060, + "valid_targets_mean": 5151.8, + "valid_targets_min": 734 + }, + { + "epoch": 1.6511627906976745, + "grad_norm": 0.3947749262809709, + "learning_rate": 3.780219309615061e-05, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20741719007492065, + "step": 1065, + "valid_targets_mean": 4775.2, + "valid_targets_min": 2041 + }, + { + "epoch": 1.6589147286821704, + "grad_norm": 0.3652762324269706, + "learning_rate": 3.7766820919206294e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1898358166217804, + "step": 1070, + "valid_targets_mean": 5115.6, + "valid_targets_min": 395 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.4172044880561881, + "learning_rate": 3.7731183186766444e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19984492659568787, + "step": 1075, + "valid_targets_mean": 4925.4, + "valid_targets_min": 2372 + }, + { + "epoch": 1.6744186046511627, + "grad_norm": 0.37343486351450866, + "learning_rate": 3.7695280431497845e-05, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21672552824020386, + "step": 1080, + "valid_targets_mean": 5672.4, + "valid_targets_min": 3114 + }, + { + "epoch": 1.6821705426356588, + "grad_norm": 0.4094562646804994, + "learning_rate": 3.765911319002854e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2076679766178131, + "step": 1085, + "valid_targets_mean": 4861.9, + "valid_targets_min": 351 + }, + { + "epoch": 1.689922480620155, + "grad_norm": 0.31444076763215517, + "learning_rate": 3.762268200293973e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13465389609336853, + "step": 1090, + "valid_targets_mean": 4960.0, + "valid_targets_min": 566 + }, + { + "epoch": 1.697674418604651, + "grad_norm": 0.4339023669513262, + "learning_rate": 3.7585987414757774e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1692432165145874, + "step": 1095, + "valid_targets_mean": 3976.5, + "valid_targets_min": 1131 + }, + { + "epoch": 1.7054263565891472, + "grad_norm": 0.493759660874177, + "learning_rate": 3.754902997394597e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20814771950244904, + "step": 1100, + "valid_targets_mean": 4421.8, + "valid_targets_min": 682 + }, + { + "epoch": 1.7131782945736433, + "grad_norm": 0.39667125615949944, + "learning_rate": 3.7511810232896435e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24969351291656494, + "step": 1105, + "valid_targets_mean": 6635.4, + "valid_targets_min": 767 + }, + { + "epoch": 1.7209302325581395, + "grad_norm": 0.40845510116998796, + "learning_rate": 3.7474328747921776e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2053869366645813, + "step": 1110, + "valid_targets_mean": 4257.3, + "valid_targets_min": 669 + }, + { + "epoch": 1.7286821705426356, + "grad_norm": 0.3654829840675916, + "learning_rate": 3.743658607924683e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20669598877429962, + "step": 1115, + "valid_targets_mean": 5434.2, + "valid_targets_min": 599 + }, + { + "epoch": 1.7364341085271318, + "grad_norm": 0.437762903353236, + "learning_rate": 3.739858279100028e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2025665044784546, + "step": 1120, + "valid_targets_mean": 5428.5, + "valid_targets_min": 3371 + }, + { + "epoch": 1.744186046511628, + "grad_norm": 0.5392878995782371, + "learning_rate": 3.736031945120621e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22214415669441223, + "step": 1125, + "valid_targets_mean": 4204.6, + "valid_targets_min": 1972 + }, + { + "epoch": 1.751937984496124, + "grad_norm": 0.550987196574115, + "learning_rate": 3.732179663177559e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17631106078624725, + "step": 1130, + "valid_targets_mean": 5127.2, + "valid_targets_min": 2529 + }, + { + "epoch": 1.7596899224806202, + "grad_norm": 0.3704297654863633, + "learning_rate": 3.728301490849778e-05, + "loss": 0.2116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18758273124694824, + "step": 1135, + "valid_targets_mean": 5341.6, + "valid_targets_min": 2320 + }, + { + "epoch": 1.7674418604651163, + "grad_norm": 0.466284801284017, + "learning_rate": 3.7243974861031915e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27421271800994873, + "step": 1140, + "valid_targets_mean": 4469.6, + "valid_targets_min": 2055 + }, + { + "epoch": 1.7751937984496124, + "grad_norm": 0.4548187236481328, + "learning_rate": 3.720467707289819e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19912035763263702, + "step": 1145, + "valid_targets_mean": 5421.1, + "valid_targets_min": 297 + }, + { + "epoch": 1.7829457364341086, + "grad_norm": 0.4234654458795664, + "learning_rate": 3.7165122131469205e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20600225031375885, + "step": 1150, + "valid_targets_mean": 4677.9, + "valid_targets_min": 632 + }, + { + "epoch": 1.7906976744186047, + "grad_norm": 0.4892297020617883, + "learning_rate": 3.712531062796114e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2325432002544403, + "step": 1155, + "valid_targets_mean": 3676.9, + "valid_targets_min": 378 + }, + { + "epoch": 1.7984496124031009, + "grad_norm": 0.4615952861064993, + "learning_rate": 3.708524315742494e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20138488709926605, + "step": 1160, + "valid_targets_mean": 3587.5, + "valid_targets_min": 649 + }, + { + "epoch": 1.806201550387597, + "grad_norm": 0.4107586531975636, + "learning_rate": 3.704492031873742e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18871745467185974, + "step": 1165, + "valid_targets_mean": 4104.7, + "valid_targets_min": 322 + }, + { + "epoch": 1.8139534883720931, + "grad_norm": 0.4364150036844027, + "learning_rate": 3.700434271459229e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18455129861831665, + "step": 1170, + "valid_targets_mean": 4351.6, + "valid_targets_min": 528 + }, + { + "epoch": 1.8217054263565893, + "grad_norm": 0.40802425859518093, + "learning_rate": 3.696351095149117e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20364965498447418, + "step": 1175, + "valid_targets_mean": 4112.6, + "valid_targets_min": 567 + }, + { + "epoch": 1.8294573643410854, + "grad_norm": 0.4032478517472396, + "learning_rate": 3.692242563973454e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25360697507858276, + "step": 1180, + "valid_targets_mean": 5295.9, + "valid_targets_min": 556 + }, + { + "epoch": 1.8372093023255816, + "grad_norm": 0.4614085026878937, + "learning_rate": 3.688108739341258e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20508089661598206, + "step": 1185, + "valid_targets_mean": 3781.7, + "valid_targets_min": 270 + }, + { + "epoch": 1.8449612403100775, + "grad_norm": 0.3802819813887152, + "learning_rate": 3.683949683039602e-05, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20417264103889465, + "step": 1190, + "valid_targets_mean": 4810.9, + "valid_targets_min": 946 + }, + { + "epoch": 1.8527131782945736, + "grad_norm": 0.43348296046621254, + "learning_rate": 3.679765457232687e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19152264297008514, + "step": 1195, + "valid_targets_mean": 5837.1, + "valid_targets_min": 272 + }, + { + "epoch": 1.8604651162790697, + "grad_norm": 0.40270266495111867, + "learning_rate": 3.675556124460918e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18859389424324036, + "step": 1200, + "valid_targets_mean": 4652.8, + "valid_targets_min": 2805 + }, + { + "epoch": 1.8682170542635659, + "grad_norm": 0.3784724852972052, + "learning_rate": 3.6713217476399654e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22137880325317383, + "step": 1205, + "valid_targets_mean": 5666.8, + "valid_targets_min": 1380 + }, + { + "epoch": 1.875968992248062, + "grad_norm": 0.3563867466614007, + "learning_rate": 3.6670623900598264e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17663565278053284, + "step": 1210, + "valid_targets_mean": 4829.3, + "valid_targets_min": 2526 + }, + { + "epoch": 1.8837209302325582, + "grad_norm": 0.32541904092615365, + "learning_rate": 3.662778115383876e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1512431651353836, + "step": 1215, + "valid_targets_mean": 5500.1, + "valid_targets_min": 300 + }, + { + "epoch": 1.8914728682170543, + "grad_norm": 0.3562253665751596, + "learning_rate": 3.6584689876479206e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16320425271987915, + "step": 1220, + "valid_targets_mean": 4741.8, + "valid_targets_min": 335 + }, + { + "epoch": 1.8992248062015504, + "grad_norm": 0.4242425218762759, + "learning_rate": 3.654135071259237e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18462225794792175, + "step": 1225, + "valid_targets_mean": 3879.1, + "valid_targets_min": 725 + }, + { + "epoch": 1.9069767441860463, + "grad_norm": 0.44990621216299576, + "learning_rate": 3.6497764309956104e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2571909725666046, + "step": 1230, + "valid_targets_mean": 5112.8, + "valid_targets_min": 2435 + }, + { + "epoch": 1.9147286821705425, + "grad_norm": 0.3514156745181372, + "learning_rate": 3.645393132004367e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2239769548177719, + "step": 1235, + "valid_targets_mean": 5990.8, + "valid_targets_min": 2526 + }, + { + "epoch": 1.9224806201550386, + "grad_norm": 0.4438288563404829, + "learning_rate": 3.640985239801399e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23280391097068787, + "step": 1240, + "valid_targets_mean": 3919.9, + "valid_targets_min": 339 + }, + { + "epoch": 1.9302325581395348, + "grad_norm": 0.42245513107437865, + "learning_rate": 3.636552820270189e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20294125378131866, + "step": 1245, + "valid_targets_mean": 4123.6, + "valid_targets_min": 608 + }, + { + "epoch": 1.937984496124031, + "grad_norm": 0.36173604376638907, + "learning_rate": 3.632095939660817e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19740913808345795, + "step": 1250, + "valid_targets_mean": 4450.7, + "valid_targets_min": 243 + }, + { + "epoch": 1.945736434108527, + "grad_norm": 0.4185963445294981, + "learning_rate": 3.627614664588981e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20765337347984314, + "step": 1255, + "valid_targets_mean": 4475.6, + "valid_targets_min": 614 + }, + { + "epoch": 1.9534883720930232, + "grad_norm": 0.38392441124931787, + "learning_rate": 3.623109062034994e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19816569983959198, + "step": 1260, + "valid_targets_mean": 4867.6, + "valid_targets_min": 655 + }, + { + "epoch": 1.9612403100775193, + "grad_norm": 0.43839350861224635, + "learning_rate": 3.618579199342783e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20989343523979187, + "step": 1265, + "valid_targets_mean": 4077.1, + "valid_targets_min": 860 + }, + { + "epoch": 1.9689922480620154, + "grad_norm": 0.4040165567082679, + "learning_rate": 3.614025144218887e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21057480573654175, + "step": 1270, + "valid_targets_mean": 4318.6, + "valid_targets_min": 573 + }, + { + "epoch": 1.9767441860465116, + "grad_norm": 0.43337547842383023, + "learning_rate": 3.60944696473144e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1987258493900299, + "step": 1275, + "valid_targets_mean": 4023.1, + "valid_targets_min": 526 + }, + { + "epoch": 1.9844961240310077, + "grad_norm": 0.3271715231556842, + "learning_rate": 3.604844729309158e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17882460355758667, + "step": 1280, + "valid_targets_mean": 6442.8, + "valid_targets_min": 2484 + }, + { + "epoch": 1.9922480620155039, + "grad_norm": 0.40322376508720353, + "learning_rate": 3.6002185067403126e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21025332808494568, + "step": 1285, + "valid_targets_mean": 4968.1, + "valid_targets_min": 1984 + }, + { + "epoch": 2.0, + "grad_norm": 0.43384474464309075, + "learning_rate": 3.5955683661717045e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22120501101016998, + "step": 1290, + "valid_targets_mean": 3698.2, + "valid_targets_min": 549 + }, + { + "epoch": 2.007751937984496, + "grad_norm": 0.3930458616713528, + "learning_rate": 3.590894377107629e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2099405825138092, + "step": 1295, + "valid_targets_mean": 5265.2, + "valid_targets_min": 528 + }, + { + "epoch": 2.0155038759689923, + "grad_norm": 0.3382222565954035, + "learning_rate": 3.586196609408841e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16293954849243164, + "step": 1300, + "valid_targets_mean": 5672.2, + "valid_targets_min": 2311 + }, + { + "epoch": 2.0232558139534884, + "grad_norm": 0.4908644970549336, + "learning_rate": 3.5814751332915025e-05, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22366786003112793, + "step": 1305, + "valid_targets_mean": 3243.5, + "valid_targets_min": 628 + }, + { + "epoch": 2.0310077519379846, + "grad_norm": 0.3498811091866486, + "learning_rate": 3.576730019326144e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15233008563518524, + "step": 1310, + "valid_targets_mean": 4671.6, + "valid_targets_min": 529 + }, + { + "epoch": 2.0387596899224807, + "grad_norm": 0.413386188431943, + "learning_rate": 3.571961338436599e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18065381050109863, + "step": 1315, + "valid_targets_mean": 5047.1, + "valid_targets_min": 845 + }, + { + "epoch": 2.046511627906977, + "grad_norm": 0.37597109516074934, + "learning_rate": 3.567169161898954e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1616477519273758, + "step": 1320, + "valid_targets_mean": 4590.8, + "valid_targets_min": 941 + }, + { + "epoch": 2.054263565891473, + "grad_norm": 0.41487589705713607, + "learning_rate": 3.562353561340473e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1990424543619156, + "step": 1325, + "valid_targets_mean": 5129.2, + "valid_targets_min": 2305 + }, + { + "epoch": 2.062015503875969, + "grad_norm": 0.3711795564505497, + "learning_rate": 3.557514608738536e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15171518921852112, + "step": 1330, + "valid_targets_mean": 4377.2, + "valid_targets_min": 575 + }, + { + "epoch": 2.0697674418604652, + "grad_norm": 0.4175667094746574, + "learning_rate": 3.552652376419557e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16562065482139587, + "step": 1335, + "valid_targets_mean": 4697.5, + "valid_targets_min": 286 + }, + { + "epoch": 2.0775193798449614, + "grad_norm": 0.4306958245785991, + "learning_rate": 3.5477669370579045e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1900160312652588, + "step": 1340, + "valid_targets_mean": 4857.3, + "valid_targets_min": 1846 + }, + { + "epoch": 2.0852713178294575, + "grad_norm": 0.4476150200629465, + "learning_rate": 3.542858363674819e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18552112579345703, + "step": 1345, + "valid_targets_mean": 6009.1, + "valid_targets_min": 314 + }, + { + "epoch": 2.0930232558139537, + "grad_norm": 1.2948036152332547, + "learning_rate": 3.537926729637316e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19947974383831024, + "step": 1350, + "valid_targets_mean": 5068.8, + "valid_targets_min": 2881 + }, + { + "epoch": 2.10077519379845, + "grad_norm": 0.41477365418280177, + "learning_rate": 3.532972108657093e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19240254163742065, + "step": 1355, + "valid_targets_mean": 4253.2, + "valid_targets_min": 434 + }, + { + "epoch": 2.108527131782946, + "grad_norm": 0.42076615883749335, + "learning_rate": 3.527994574789425e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1795780062675476, + "step": 1360, + "valid_targets_mean": 4628.8, + "valid_targets_min": 677 + }, + { + "epoch": 2.116279069767442, + "grad_norm": 0.3617903288384957, + "learning_rate": 3.5229942024320614e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1568509340286255, + "step": 1365, + "valid_targets_mean": 4954.9, + "valid_targets_min": 712 + }, + { + "epoch": 2.124031007751938, + "grad_norm": 0.5456999997368401, + "learning_rate": 3.517971066324111e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17306216061115265, + "step": 1370, + "valid_targets_mean": 3624.9, + "valid_targets_min": 335 + }, + { + "epoch": 2.1317829457364343, + "grad_norm": 0.38334555353165817, + "learning_rate": 3.512925241544925e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19654911756515503, + "step": 1375, + "valid_targets_mean": 5664.1, + "valid_targets_min": 1799 + }, + { + "epoch": 2.13953488372093, + "grad_norm": 0.43336348909756756, + "learning_rate": 3.5078568035129755e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2114173173904419, + "step": 1380, + "valid_targets_mean": 4490.0, + "valid_targets_min": 535 + }, + { + "epoch": 2.147286821705426, + "grad_norm": 0.43851230664405455, + "learning_rate": 3.502765827984731e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2103298455476761, + "step": 1385, + "valid_targets_mean": 4057.0, + "valid_targets_min": 2866 + }, + { + "epoch": 2.1550387596899223, + "grad_norm": 0.48397504627229887, + "learning_rate": 3.497652391053517e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2291339933872223, + "step": 1390, + "valid_targets_mean": 4031.1, + "valid_targets_min": 630 + }, + { + "epoch": 2.1627906976744184, + "grad_norm": 0.4549549638224178, + "learning_rate": 3.492516569148388e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21782903373241425, + "step": 1395, + "valid_targets_mean": 4231.4, + "valid_targets_min": 2014 + }, + { + "epoch": 2.1705426356589146, + "grad_norm": 0.36020932761428015, + "learning_rate": 3.487358439032975e-05, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18451038002967834, + "step": 1400, + "valid_targets_mean": 6068.2, + "valid_targets_min": 2874 + }, + { + "epoch": 2.1782945736434107, + "grad_norm": 0.3920924834443207, + "learning_rate": 3.4821780778043474e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1814422309398651, + "step": 1405, + "valid_targets_mean": 4700.9, + "valid_targets_min": 656 + }, + { + "epoch": 2.186046511627907, + "grad_norm": 0.3750763170625423, + "learning_rate": 3.4769755628918545e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1634986400604248, + "step": 1410, + "valid_targets_mean": 4980.3, + "valid_targets_min": 2167 + }, + { + "epoch": 2.193798449612403, + "grad_norm": 0.42291953615168815, + "learning_rate": 3.471750972055972e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16338089108467102, + "step": 1415, + "valid_targets_mean": 4139.0, + "valid_targets_min": 391 + }, + { + "epoch": 2.201550387596899, + "grad_norm": 0.4171884044672125, + "learning_rate": 3.466504383387135e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1964506208896637, + "step": 1420, + "valid_targets_mean": 4754.2, + "valid_targets_min": 2815 + }, + { + "epoch": 2.2093023255813953, + "grad_norm": 0.3903545245483966, + "learning_rate": 3.461235875304577e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20105893909931183, + "step": 1425, + "valid_targets_mean": 4361.9, + "valid_targets_min": 250 + }, + { + "epoch": 2.2170542635658914, + "grad_norm": 0.3750810698508955, + "learning_rate": 3.455945526555153e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17983251810073853, + "step": 1430, + "valid_targets_mean": 5131.6, + "valid_targets_min": 2493 + }, + { + "epoch": 2.2248062015503876, + "grad_norm": 0.37223290231346184, + "learning_rate": 3.450633416212162e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16431176662445068, + "step": 1435, + "valid_targets_mean": 4377.4, + "valid_targets_min": 613 + }, + { + "epoch": 2.2325581395348837, + "grad_norm": 0.3352051342701091, + "learning_rate": 3.4452996236741744e-05, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1869511604309082, + "step": 1440, + "valid_targets_mean": 5356.9, + "valid_targets_min": 2703 + }, + { + "epoch": 2.24031007751938, + "grad_norm": 0.432086006561443, + "learning_rate": 3.439944228663829e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21285605430603027, + "step": 1445, + "valid_targets_mean": 4237.8, + "valid_targets_min": 1886 + }, + { + "epoch": 2.248062015503876, + "grad_norm": 0.3275443973114785, + "learning_rate": 3.434567311226656e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15418857336044312, + "step": 1450, + "valid_targets_mean": 5285.2, + "valid_targets_min": 566 + }, + { + "epoch": 2.255813953488372, + "grad_norm": 0.3576878996712879, + "learning_rate": 3.4291689517298734e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.186420738697052, + "step": 1455, + "valid_targets_mean": 5496.6, + "valid_targets_min": 321 + }, + { + "epoch": 2.2635658914728682, + "grad_norm": 0.4962339025365947, + "learning_rate": 3.423749230861188e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17092068493366241, + "step": 1460, + "valid_targets_mean": 3222.7, + "valid_targets_min": 402 + }, + { + "epoch": 2.2713178294573644, + "grad_norm": 0.4812137783785399, + "learning_rate": 3.418308229627588e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27093812823295593, + "step": 1465, + "valid_targets_mean": 4862.1, + "valid_targets_min": 556 + }, + { + "epoch": 2.2790697674418605, + "grad_norm": 0.4153567019223971, + "learning_rate": 3.412846029354134e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.237184539437294, + "step": 1470, + "valid_targets_mean": 4620.6, + "valid_targets_min": 455 + }, + { + "epoch": 2.2868217054263567, + "grad_norm": 0.3426626225579909, + "learning_rate": 3.4073627116827425e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18062137067317963, + "step": 1475, + "valid_targets_mean": 5811.9, + "valid_targets_min": 2245 + }, + { + "epoch": 2.294573643410853, + "grad_norm": 0.38476093721663684, + "learning_rate": 3.4018583585709636e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18126948177814484, + "step": 1480, + "valid_targets_mean": 4559.2, + "valid_targets_min": 322 + }, + { + "epoch": 2.302325581395349, + "grad_norm": 0.4096766811442832, + "learning_rate": 3.39633305229076e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20921531319618225, + "step": 1485, + "valid_targets_mean": 5167.4, + "valid_targets_min": 684 + }, + { + "epoch": 2.310077519379845, + "grad_norm": 0.38639646068488026, + "learning_rate": 3.390786875427275e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15107879042625427, + "step": 1490, + "valid_targets_mean": 4523.8, + "valid_targets_min": 1865 + }, + { + "epoch": 2.317829457364341, + "grad_norm": 0.4502008909642411, + "learning_rate": 3.385219910877599e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1977536380290985, + "step": 1495, + "valid_targets_mean": 4173.2, + "valid_targets_min": 1948 + }, + { + "epoch": 2.3255813953488373, + "grad_norm": 0.3796879053543267, + "learning_rate": 3.3796322418495276e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648637056350708, + "step": 1500, + "valid_targets_mean": 4416.7, + "valid_targets_min": 304 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 0.3789163353016251, + "learning_rate": 3.374023951860322e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563669592142105, + "step": 1505, + "valid_targets_mean": 5124.2, + "valid_targets_min": 2981 + }, + { + "epoch": 2.3410852713178296, + "grad_norm": 0.41364685765156334, + "learning_rate": 3.368395124735459e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16633863747119904, + "step": 1510, + "valid_targets_mean": 3882.1, + "valid_targets_min": 282 + }, + { + "epoch": 2.3488372093023258, + "grad_norm": 0.35667587906507076, + "learning_rate": 3.3627458446073775e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13330259919166565, + "step": 1515, + "valid_targets_mean": 5481.6, + "valid_targets_min": 752 + }, + { + "epoch": 2.356589147286822, + "grad_norm": 0.3881816160616292, + "learning_rate": 3.357076195914221e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18566307425498962, + "step": 1520, + "valid_targets_mean": 5012.3, + "valid_targets_min": 1855 + }, + { + "epoch": 2.3643410852713176, + "grad_norm": 0.4712604845214267, + "learning_rate": 3.351386263398578e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19617591798305511, + "step": 1525, + "valid_targets_mean": 4073.4, + "valid_targets_min": 477 + }, + { + "epoch": 2.3720930232558137, + "grad_norm": 0.35048104567381255, + "learning_rate": 3.34567613210621e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17707285284996033, + "step": 1530, + "valid_targets_mean": 5311.7, + "valid_targets_min": 968 + }, + { + "epoch": 2.37984496124031, + "grad_norm": 0.41501213355240485, + "learning_rate": 3.3399458873847865e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18096289038658142, + "step": 1535, + "valid_targets_mean": 4257.4, + "valid_targets_min": 2687 + }, + { + "epoch": 2.387596899224806, + "grad_norm": 0.3935199588486319, + "learning_rate": 3.334195614882606e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23064912855625153, + "step": 1540, + "valid_targets_mean": 5280.2, + "valid_targets_min": 726 + }, + { + "epoch": 2.395348837209302, + "grad_norm": 0.39245757715509993, + "learning_rate": 3.3284254005473164e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14799197018146515, + "step": 1545, + "valid_targets_mean": 4357.3, + "valid_targets_min": 365 + }, + { + "epoch": 2.4031007751937983, + "grad_norm": 0.3960910530726775, + "learning_rate": 3.3226353306246296e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16391268372535706, + "step": 1550, + "valid_targets_mean": 4675.7, + "valid_targets_min": 472 + }, + { + "epoch": 2.4108527131782944, + "grad_norm": 0.399983594538759, + "learning_rate": 3.316825491657033e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18162378668785095, + "step": 1555, + "valid_targets_mean": 4586.7, + "valid_targets_min": 336 + }, + { + "epoch": 2.4186046511627906, + "grad_norm": 0.4339613073850326, + "learning_rate": 3.310995970482498e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18091610074043274, + "step": 1560, + "valid_targets_mean": 4878.5, + "valid_targets_min": 1433 + }, + { + "epoch": 2.4263565891472867, + "grad_norm": 0.40487359759812325, + "learning_rate": 3.3051468542331784e-05, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17559471726417542, + "step": 1565, + "valid_targets_mean": 4204.3, + "valid_targets_min": 1999 + }, + { + "epoch": 2.434108527131783, + "grad_norm": 0.37204854898669026, + "learning_rate": 3.2992782303341104e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1342979371547699, + "step": 1570, + "valid_targets_mean": 4676.9, + "valid_targets_min": 720 + }, + { + "epoch": 2.441860465116279, + "grad_norm": 0.43162119589856907, + "learning_rate": 3.2933901865019064e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2307354062795639, + "step": 1575, + "valid_targets_mean": 4931.0, + "valid_targets_min": 1914 + }, + { + "epoch": 2.449612403100775, + "grad_norm": 0.4576769904055123, + "learning_rate": 3.28748281074344e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15882378816604614, + "step": 1580, + "valid_targets_mean": 3951.5, + "valid_targets_min": 1846 + }, + { + "epoch": 2.4573643410852712, + "grad_norm": 0.4015392257462172, + "learning_rate": 3.281556191354538e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15286649763584137, + "step": 1585, + "valid_targets_mean": 5152.0, + "valid_targets_min": 285 + }, + { + "epoch": 2.4651162790697674, + "grad_norm": 0.42917005882818327, + "learning_rate": 3.2756104169186524e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17661848664283752, + "step": 1590, + "valid_targets_mean": 4086.8, + "valid_targets_min": 568 + }, + { + "epoch": 2.4728682170542635, + "grad_norm": 0.4411320746574963, + "learning_rate": 3.269645576305541e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.184126615524292, + "step": 1595, + "valid_targets_mean": 6700.9, + "valid_targets_min": 2494 + }, + { + "epoch": 2.4806201550387597, + "grad_norm": 0.4047963399547679, + "learning_rate": 3.2636617586699375e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16849125921726227, + "step": 1600, + "valid_targets_mean": 4574.2, + "valid_targets_min": 600 + }, + { + "epoch": 2.488372093023256, + "grad_norm": 0.44245747159170795, + "learning_rate": 3.257659053450223e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20396322011947632, + "step": 1605, + "valid_targets_mean": 4482.4, + "valid_targets_min": 380 + }, + { + "epoch": 2.496124031007752, + "grad_norm": 0.43654194588770506, + "learning_rate": 3.251637550367082e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19797450304031372, + "step": 1610, + "valid_targets_mean": 4816.7, + "valid_targets_min": 2959 + }, + { + "epoch": 2.503875968992248, + "grad_norm": 0.43464122667550903, + "learning_rate": 3.245597339422165e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1967172473669052, + "step": 1615, + "valid_targets_mean": 5667.7, + "valid_targets_min": 559 + }, + { + "epoch": 2.511627906976744, + "grad_norm": 0.4652650203493568, + "learning_rate": 3.2395385108967486e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19761139154434204, + "step": 1620, + "valid_targets_mean": 4328.9, + "valid_targets_min": 314 + }, + { + "epoch": 2.5193798449612403, + "grad_norm": 0.40709753507837126, + "learning_rate": 3.233461155350375e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20508559048175812, + "step": 1625, + "valid_targets_mean": 5249.1, + "valid_targets_min": 216 + }, + { + "epoch": 2.5271317829457365, + "grad_norm": 0.4491598648675841, + "learning_rate": 3.227365363619507e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1938488781452179, + "step": 1630, + "valid_targets_mean": 3739.8, + "valid_targets_min": 2192 + }, + { + "epoch": 2.5348837209302326, + "grad_norm": 0.4742827982147315, + "learning_rate": 3.221251226816168e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17324459552764893, + "step": 1635, + "valid_targets_mean": 3619.9, + "valid_targets_min": 325 + }, + { + "epoch": 2.5426356589147288, + "grad_norm": 0.40634153675562273, + "learning_rate": 3.21511883632658e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17292270064353943, + "step": 1640, + "valid_targets_mean": 4721.2, + "valid_targets_min": 1866 + }, + { + "epoch": 2.550387596899225, + "grad_norm": 0.3502337789022322, + "learning_rate": 3.208968283809795e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13709160685539246, + "step": 1645, + "valid_targets_mean": 5043.9, + "valid_targets_min": 1925 + }, + { + "epoch": 2.558139534883721, + "grad_norm": 0.46216095941040614, + "learning_rate": 3.202799661196331e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22641834616661072, + "step": 1650, + "valid_targets_mean": 4146.6, + "valid_targets_min": 419 + }, + { + "epoch": 2.565891472868217, + "grad_norm": 0.3770036373887086, + "learning_rate": 3.196613060686791e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19278298318386078, + "step": 1655, + "valid_targets_mean": 5648.9, + "valid_targets_min": 1948 + }, + { + "epoch": 2.5736434108527133, + "grad_norm": 0.3885989041243395, + "learning_rate": 3.1904085747504927e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17080789804458618, + "step": 1660, + "valid_targets_mean": 4642.9, + "valid_targets_min": 357 + }, + { + "epoch": 2.5813953488372094, + "grad_norm": 0.4011197982513075, + "learning_rate": 3.184186296124077e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1865064799785614, + "step": 1665, + "valid_targets_mean": 5166.7, + "valid_targets_min": 330 + }, + { + "epoch": 2.5891472868217056, + "grad_norm": 0.3745953386421688, + "learning_rate": 3.1779463178101317e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17259354889392853, + "step": 1670, + "valid_targets_mean": 4452.9, + "valid_targets_min": 2554 + }, + { + "epoch": 2.5968992248062017, + "grad_norm": 0.4235759003308612, + "learning_rate": 3.1716887330757935e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17030495405197144, + "step": 1675, + "valid_targets_mean": 4332.0, + "valid_targets_min": 2478 + }, + { + "epoch": 2.604651162790698, + "grad_norm": 0.4854637458573741, + "learning_rate": 3.165413635451358e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19602999091148376, + "step": 1680, + "valid_targets_mean": 3863.3, + "valid_targets_min": 355 + }, + { + "epoch": 2.612403100775194, + "grad_norm": 0.40699113598693853, + "learning_rate": 3.159121118728882e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18548724055290222, + "step": 1685, + "valid_targets_mean": 4666.2, + "valid_targets_min": 1840 + }, + { + "epoch": 2.62015503875969, + "grad_norm": 0.34544920663620954, + "learning_rate": 3.152811276960778e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14782579243183136, + "step": 1690, + "valid_targets_mean": 5451.3, + "valid_targets_min": 285 + }, + { + "epoch": 2.6279069767441863, + "grad_norm": 0.3927708940246696, + "learning_rate": 3.1464842044584134e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15799960494041443, + "step": 1695, + "valid_targets_mean": 5175.1, + "valid_targets_min": 719 + }, + { + "epoch": 2.6356589147286824, + "grad_norm": 0.40704479344798744, + "learning_rate": 3.140139995790697e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21562063694000244, + "step": 1700, + "valid_targets_mean": 4840.2, + "valid_targets_min": 330 + }, + { + "epoch": 2.6434108527131785, + "grad_norm": 0.4492192562060312, + "learning_rate": 3.1337787457826676e-05, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20294302701950073, + "step": 1705, + "valid_targets_mean": 4130.9, + "valid_targets_min": 836 + }, + { + "epoch": 2.6511627906976747, + "grad_norm": 0.439737875037387, + "learning_rate": 3.127400549514075e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22795331478118896, + "step": 1710, + "valid_targets_mean": 4858.8, + "valid_targets_min": 566 + }, + { + "epoch": 2.6589147286821704, + "grad_norm": 0.3937483563358435, + "learning_rate": 3.121005502317961e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14470066130161285, + "step": 1715, + "valid_targets_mean": 4338.8, + "valid_targets_min": 808 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.43258662767772366, + "learning_rate": 3.114593699779233e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18938937783241272, + "step": 1720, + "valid_targets_mean": 4000.3, + "valid_targets_min": 2352 + }, + { + "epoch": 2.6744186046511627, + "grad_norm": 0.38825080259416433, + "learning_rate": 3.108165237733235e-05, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20304493606090546, + "step": 1725, + "valid_targets_mean": 5271.6, + "valid_targets_min": 2162 + }, + { + "epoch": 2.682170542635659, + "grad_norm": 0.5030719627542097, + "learning_rate": 3.101720212264315e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1815469115972519, + "step": 1730, + "valid_targets_mean": 3065.4, + "valid_targets_min": 304 + }, + { + "epoch": 2.689922480620155, + "grad_norm": 0.48074649750101905, + "learning_rate": 3.0952587197043916e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1928318440914154, + "step": 1735, + "valid_targets_mean": 3592.6, + "valid_targets_min": 639 + }, + { + "epoch": 2.697674418604651, + "grad_norm": 0.38224384351415913, + "learning_rate": 3.0887808566315123e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16647925972938538, + "step": 1740, + "valid_targets_mean": 4713.6, + "valid_targets_min": 639 + }, + { + "epoch": 2.705426356589147, + "grad_norm": 0.33777469296551843, + "learning_rate": 3.0822867198684076e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15009137988090515, + "step": 1745, + "valid_targets_mean": 6850.4, + "valid_targets_min": 2445 + }, + { + "epoch": 2.7131782945736433, + "grad_norm": 0.44540770518271033, + "learning_rate": 3.075776406481048e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18640130758285522, + "step": 1750, + "valid_targets_mean": 4726.7, + "valid_targets_min": 1933 + }, + { + "epoch": 2.7209302325581395, + "grad_norm": 0.44372094538107276, + "learning_rate": 3.0692500137771926e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22654184699058533, + "step": 1755, + "valid_targets_mean": 3954.4, + "valid_targets_min": 606 + }, + { + "epoch": 2.7286821705426356, + "grad_norm": 0.369529532189345, + "learning_rate": 3.062707639304928e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17843511700630188, + "step": 1760, + "valid_targets_mean": 5725.1, + "valid_targets_min": 1885 + }, + { + "epoch": 2.7364341085271318, + "grad_norm": 0.40621309446299825, + "learning_rate": 3.0561493808512216e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21624553203582764, + "step": 1765, + "valid_targets_mean": 4932.4, + "valid_targets_min": 2437 + }, + { + "epoch": 2.744186046511628, + "grad_norm": 0.4324605534368406, + "learning_rate": 3.0495753364404513e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1789764165878296, + "step": 1770, + "valid_targets_mean": 4427.4, + "valid_targets_min": 1030 + }, + { + "epoch": 2.751937984496124, + "grad_norm": 0.44772637547939986, + "learning_rate": 3.042985604332943e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21675977110862732, + "step": 1775, + "valid_targets_mean": 4327.5, + "valid_targets_min": 783 + }, + { + "epoch": 2.75968992248062, + "grad_norm": 0.3320961105254609, + "learning_rate": 3.036380283023502e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15626108646392822, + "step": 1780, + "valid_targets_mean": 5396.8, + "valid_targets_min": 646 + }, + { + "epoch": 2.7674418604651163, + "grad_norm": 0.37789640533310415, + "learning_rate": 3.0297594712399428e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15670335292816162, + "step": 1785, + "valid_targets_mean": 4688.8, + "valid_targets_min": 527 + }, + { + "epoch": 2.7751937984496124, + "grad_norm": 0.40427238608967736, + "learning_rate": 3.0231232679416088e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2058807611465454, + "step": 1790, + "valid_targets_mean": 4991.1, + "valid_targets_min": 2320 + }, + { + "epoch": 2.7829457364341086, + "grad_norm": 0.3958864673220442, + "learning_rate": 3.0164717723178976e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1790355145931244, + "step": 1795, + "valid_targets_mean": 4758.6, + "valid_targets_min": 623 + }, + { + "epoch": 2.7906976744186047, + "grad_norm": 0.3482330544899987, + "learning_rate": 3.0098050837867755e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15739408135414124, + "step": 1800, + "valid_targets_mean": 5299.1, + "valid_targets_min": 957 + }, + { + "epoch": 2.798449612403101, + "grad_norm": 0.41439611401362086, + "learning_rate": 3.003123301993295e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16220103204250336, + "step": 1805, + "valid_targets_mean": 4676.3, + "valid_targets_min": 1825 + }, + { + "epoch": 2.806201550387597, + "grad_norm": 0.41973002932333475, + "learning_rate": 2.9964265268081e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15398794412612915, + "step": 1810, + "valid_targets_mean": 3951.7, + "valid_targets_min": 308 + }, + { + "epoch": 2.813953488372093, + "grad_norm": 0.41316632273979337, + "learning_rate": 2.9897148583259386e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21641714870929718, + "step": 1815, + "valid_targets_mean": 4523.2, + "valid_targets_min": 630 + }, + { + "epoch": 2.8217054263565893, + "grad_norm": 0.44522140399389565, + "learning_rate": 2.982988396864165e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18898218870162964, + "step": 1820, + "valid_targets_mean": 4400.8, + "valid_targets_min": 996 + }, + { + "epoch": 2.8294573643410854, + "grad_norm": 0.3738531998626034, + "learning_rate": 2.9762472429612375e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15361550450325012, + "step": 1825, + "valid_targets_mean": 4821.5, + "valid_targets_min": 978 + }, + { + "epoch": 2.8372093023255816, + "grad_norm": 0.43083740204233706, + "learning_rate": 2.9694914973752194e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1844104826450348, + "step": 1830, + "valid_targets_mean": 3693.3, + "valid_targets_min": 527 + }, + { + "epoch": 2.8449612403100772, + "grad_norm": 0.3659912670375286, + "learning_rate": 2.962721261082272e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20017662644386292, + "step": 1835, + "valid_targets_mean": 6980.4, + "valid_targets_min": 3136 + }, + { + "epoch": 2.8527131782945734, + "grad_norm": 0.4567000170536253, + "learning_rate": 2.9559366352751445e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23465114831924438, + "step": 1840, + "valid_targets_mean": 4588.8, + "valid_targets_min": 2486 + }, + { + "epoch": 2.8604651162790695, + "grad_norm": 0.40929029223114577, + "learning_rate": 2.9491377213616618e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18239933252334595, + "step": 1845, + "valid_targets_mean": 4133.6, + "valid_targets_min": 513 + }, + { + "epoch": 2.8682170542635657, + "grad_norm": 0.40525101121640406, + "learning_rate": 2.94232462096321e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1767432689666748, + "step": 1850, + "valid_targets_mean": 4216.1, + "valid_targets_min": 516 + }, + { + "epoch": 2.875968992248062, + "grad_norm": 0.4366325773783958, + "learning_rate": 2.9354974359132135e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19949214160442352, + "step": 1855, + "valid_targets_mean": 4033.6, + "valid_targets_min": 1922 + }, + { + "epoch": 2.883720930232558, + "grad_norm": 0.42935985240712937, + "learning_rate": 2.92865626825562e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18675842881202698, + "step": 1860, + "valid_targets_mean": 4010.4, + "valid_targets_min": 946 + }, + { + "epoch": 2.891472868217054, + "grad_norm": 0.31457168919286277, + "learning_rate": 2.921801220243368e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13208898901939392, + "step": 1865, + "valid_targets_mean": 6431.9, + "valid_targets_min": 2510 + }, + { + "epoch": 2.89922480620155, + "grad_norm": 0.39154537620319585, + "learning_rate": 2.9149323943368635e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15021896362304688, + "step": 1870, + "valid_targets_mean": 5320.4, + "valid_targets_min": 2104 + }, + { + "epoch": 2.9069767441860463, + "grad_norm": 0.4215081843896084, + "learning_rate": 2.9080498932024462e-05, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19808581471443176, + "step": 1875, + "valid_targets_mean": 4261.8, + "valid_targets_min": 434 + }, + { + "epoch": 2.9147286821705425, + "grad_norm": 0.48180287200478444, + "learning_rate": 2.901153819710855e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20577339828014374, + "step": 1880, + "valid_targets_mean": 3915.2, + "valid_targets_min": 551 + }, + { + "epoch": 2.9224806201550386, + "grad_norm": 0.4046284897843421, + "learning_rate": 2.8942442769356896e-05, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20008404552936554, + "step": 1885, + "valid_targets_mean": 5054.2, + "valid_targets_min": 2545 + }, + { + "epoch": 2.9302325581395348, + "grad_norm": 0.3202021785981192, + "learning_rate": 2.8873213681518747e-05, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14765912294387817, + "step": 1890, + "valid_targets_mean": 5971.2, + "valid_targets_min": 1835 + }, + { + "epoch": 2.937984496124031, + "grad_norm": 0.3975741557425795, + "learning_rate": 2.8803851968341092e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17159457504749298, + "step": 1895, + "valid_targets_mean": 4565.2, + "valid_targets_min": 1942 + }, + { + "epoch": 2.945736434108527, + "grad_norm": 0.46251815005036806, + "learning_rate": 2.873435866655326e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24192339181900024, + "step": 1900, + "valid_targets_mean": 4237.6, + "valid_targets_min": 326 + }, + { + "epoch": 2.953488372093023, + "grad_norm": 0.41403119618696854, + "learning_rate": 2.8664734814851377e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18902885913848877, + "step": 1905, + "valid_targets_mean": 4557.3, + "valid_targets_min": 849 + }, + { + "epoch": 2.9612403100775193, + "grad_norm": 0.44851409111675167, + "learning_rate": 2.8594981453882874e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20263975858688354, + "step": 1910, + "valid_targets_mean": 3717.8, + "valid_targets_min": 926 + }, + { + "epoch": 2.9689922480620154, + "grad_norm": 0.42903678457431993, + "learning_rate": 2.85250996262309e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23578867316246033, + "step": 1915, + "valid_targets_mean": 4934.7, + "valid_targets_min": 685 + }, + { + "epoch": 2.9767441860465116, + "grad_norm": 0.7267563478310851, + "learning_rate": 2.8455090376398783e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18642187118530273, + "step": 1920, + "valid_targets_mean": 4939.8, + "valid_targets_min": 2461 + }, + { + "epoch": 2.9844961240310077, + "grad_norm": 0.45652911775394833, + "learning_rate": 2.8384954750794383e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1863420158624649, + "step": 1925, + "valid_targets_mean": 4101.9, + "valid_targets_min": 386 + }, + { + "epoch": 2.992248062015504, + "grad_norm": 0.4162197137776795, + "learning_rate": 2.8314693797714453e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16806048154830933, + "step": 1930, + "valid_targets_mean": 3889.9, + "valid_targets_min": 266 + }, + { + "epoch": 3.0, + "grad_norm": 0.4173426583399949, + "learning_rate": 2.8244308567328995e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1593976765871048, + "step": 1935, + "valid_targets_mean": 3841.9, + "valid_targets_min": 725 + }, + { + "epoch": 3.007751937984496, + "grad_norm": 0.31143453647713937, + "learning_rate": 2.8173800111665534e-05, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1310623288154602, + "step": 1940, + "valid_targets_mean": 6594.8, + "valid_targets_min": 3125 + }, + { + "epoch": 3.0155038759689923, + "grad_norm": 0.37798360065942155, + "learning_rate": 2.8103169484593408e-05, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14523474872112274, + "step": 1945, + "valid_targets_mean": 4545.6, + "valid_targets_min": 391 + }, + { + "epoch": 3.0232558139534884, + "grad_norm": 0.4193657818911918, + "learning_rate": 2.8032417741808026e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19101709127426147, + "step": 1950, + "valid_targets_mean": 4638.9, + "valid_targets_min": 1098 + }, + { + "epoch": 3.0310077519379846, + "grad_norm": 0.4292392293342452, + "learning_rate": 2.7961545940815073e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14101950824260712, + "step": 1955, + "valid_targets_mean": 4463.8, + "valid_targets_min": 734 + }, + { + "epoch": 3.0387596899224807, + "grad_norm": 0.445566159385126, + "learning_rate": 2.7890555140914712e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15082740783691406, + "step": 1960, + "valid_targets_mean": 4373.4, + "valid_targets_min": 196 + }, + { + "epoch": 3.046511627906977, + "grad_norm": 0.3879287131571122, + "learning_rate": 2.7819446403185737e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.155793234705925, + "step": 1965, + "valid_targets_mean": 4823.5, + "valid_targets_min": 424 + }, + { + "epoch": 3.054263565891473, + "grad_norm": 0.4580658977138339, + "learning_rate": 2.774822079046973e-05, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1928856372833252, + "step": 1970, + "valid_targets_mean": 4411.8, + "valid_targets_min": 994 + }, + { + "epoch": 3.062015503875969, + "grad_norm": 0.37791164078529926, + "learning_rate": 2.7676879367355182e-05, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22396092116832733, + "step": 1975, + "valid_targets_mean": 6053.8, + "valid_targets_min": 341 + }, + { + "epoch": 3.0697674418604652, + "grad_norm": 0.5813711025395043, + "learning_rate": 2.7605423200161544e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20629319548606873, + "step": 1980, + "valid_targets_mean": 2834.8, + "valid_targets_min": 594 + }, + { + "epoch": 3.0775193798449614, + "grad_norm": 0.5229150181707595, + "learning_rate": 2.753385335692334e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19532498717308044, + "step": 1985, + "valid_targets_mean": 5435.9, + "valid_targets_min": 2333 + }, + { + "epoch": 3.0852713178294575, + "grad_norm": 0.5039292916210872, + "learning_rate": 2.7462170907374152e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2393975555896759, + "step": 1990, + "valid_targets_mean": 3852.1, + "valid_targets_min": 621 + }, + { + "epoch": 3.0930232558139537, + "grad_norm": 0.4334986389248226, + "learning_rate": 2.7390376922930676e-05, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2017873227596283, + "step": 1995, + "valid_targets_mean": 4789.2, + "valid_targets_min": 578 + }, + { + "epoch": 3.10077519379845, + "grad_norm": 0.48940426768725165, + "learning_rate": 2.7318472476676678e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20767296850681305, + "step": 2000, + "valid_targets_mean": 4572.1, + "valid_targets_min": 564 + }, + { + "epoch": 3.108527131782946, + "grad_norm": 0.45021483225829007, + "learning_rate": 2.7246458643346954e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19874969124794006, + "step": 2005, + "valid_targets_mean": 4382.3, + "valid_targets_min": 400 + }, + { + "epoch": 3.116279069767442, + "grad_norm": 0.39974386281581625, + "learning_rate": 2.7174336499311306e-05, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15649522840976715, + "step": 2010, + "valid_targets_mean": 4884.1, + "valid_targets_min": 1197 + }, + { + "epoch": 3.124031007751938, + "grad_norm": 0.4810534052770901, + "learning_rate": 2.7102107122558388e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.196878120303154, + "step": 2015, + "valid_targets_mean": 4465.6, + "valid_targets_min": 422 + }, + { + "epoch": 3.1317829457364343, + "grad_norm": 0.4405420076380904, + "learning_rate": 2.7029771592679667e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2112666368484497, + "step": 2020, + "valid_targets_mean": 4233.7, + "valid_targets_min": 2211 + }, + { + "epoch": 3.13953488372093, + "grad_norm": 0.435544733463545, + "learning_rate": 2.695733099085322e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1643523871898651, + "step": 2025, + "valid_targets_mean": 4607.8, + "valid_targets_min": 313 + }, + { + "epoch": 3.147286821705426, + "grad_norm": 0.39286659071704916, + "learning_rate": 2.6884786399827617e-05, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1477556824684143, + "step": 2030, + "valid_targets_mean": 4724.6, + "valid_targets_min": 1978 + }, + { + "epoch": 3.1550387596899223, + "grad_norm": 0.3724002460323432, + "learning_rate": 2.6812138903905725e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16990327835083008, + "step": 2035, + "valid_targets_mean": 4912.9, + "valid_targets_min": 2789 + }, + { + "epoch": 3.1627906976744184, + "grad_norm": 0.3944253467485725, + "learning_rate": 2.6739389588928496e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15294812619686127, + "step": 2040, + "valid_targets_mean": 4411.3, + "valid_targets_min": 470 + }, + { + "epoch": 3.1705426356589146, + "grad_norm": 0.4077204261905621, + "learning_rate": 2.6666539542258742e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2099585086107254, + "step": 2045, + "valid_targets_mean": 5898.4, + "valid_targets_min": 2703 + }, + { + "epoch": 3.1782945736434107, + "grad_norm": 0.3700464997656355, + "learning_rate": 2.6593589852764886e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17447850108146667, + "step": 2050, + "valid_targets_mean": 5101.3, + "valid_targets_min": 242 + }, + { + "epoch": 3.186046511627907, + "grad_norm": 0.4095360723042356, + "learning_rate": 2.6520541610804688e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18860718607902527, + "step": 2055, + "valid_targets_mean": 4771.2, + "valid_targets_min": 2471 + }, + { + "epoch": 3.193798449612403, + "grad_norm": 0.42170127535958324, + "learning_rate": 2.6447395908208933e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19056911766529083, + "step": 2060, + "valid_targets_mean": 4609.7, + "valid_targets_min": 487 + }, + { + "epoch": 3.201550387596899, + "grad_norm": 0.3731793258953492, + "learning_rate": 2.6374153838265117e-05, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14581088721752167, + "step": 2065, + "valid_targets_mean": 4326.2, + "valid_targets_min": 282 + }, + { + "epoch": 3.2093023255813953, + "grad_norm": 0.39762533461899896, + "learning_rate": 2.6300816495701124e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15880948305130005, + "step": 2070, + "valid_targets_mean": 4789.0, + "valid_targets_min": 1925 + }, + { + "epoch": 3.2170542635658914, + "grad_norm": 0.44748063624921613, + "learning_rate": 2.6227384976668848e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.170176699757576, + "step": 2075, + "valid_targets_mean": 3995.2, + "valid_targets_min": 1668 + }, + { + "epoch": 3.2248062015503876, + "grad_norm": 0.36135621157101455, + "learning_rate": 2.6153860378727805e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1492878496646881, + "step": 2080, + "valid_targets_mean": 4992.2, + "valid_targets_min": 330 + }, + { + "epoch": 3.2325581395348837, + "grad_norm": 0.4154124841987222, + "learning_rate": 2.6080243800828742e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17003397643566132, + "step": 2085, + "valid_targets_mean": 4658.8, + "valid_targets_min": 1209 + }, + { + "epoch": 3.24031007751938, + "grad_norm": 0.37368780763861736, + "learning_rate": 2.600653634329719e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1633392572402954, + "step": 2090, + "valid_targets_mean": 4566.2, + "valid_targets_min": 2411 + }, + { + "epoch": 3.248062015503876, + "grad_norm": 0.3979676356096421, + "learning_rate": 2.593273910781705e-05, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17785364389419556, + "step": 2095, + "valid_targets_mean": 4691.6, + "valid_targets_min": 432 + }, + { + "epoch": 3.255813953488372, + "grad_norm": 0.4225515784722321, + "learning_rate": 2.585885319741409e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18224605917930603, + "step": 2100, + "valid_targets_mean": 4253.3, + "valid_targets_min": 756 + }, + { + "epoch": 3.2635658914728682, + "grad_norm": 0.38016881244736056, + "learning_rate": 2.5784879716439495e-05, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12859958410263062, + "step": 2105, + "valid_targets_mean": 3910.2, + "valid_targets_min": 549 + }, + { + "epoch": 3.2713178294573644, + "grad_norm": 0.5201402499872219, + "learning_rate": 2.5710819770553322e-05, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15190503001213074, + "step": 2110, + "valid_targets_mean": 4282.5, + "valid_targets_min": 460 + }, + { + "epoch": 3.2790697674418605, + "grad_norm": 0.36744579209677214, + "learning_rate": 2.5636674466708002e-05, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14565041661262512, + "step": 2115, + "valid_targets_mean": 4384.2, + "valid_targets_min": 328 + }, + { + "epoch": 3.2868217054263567, + "grad_norm": 0.41793705033807765, + "learning_rate": 2.5562444913131802e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16959373652935028, + "step": 2120, + "valid_targets_mean": 4006.3, + "valid_targets_min": 412 + }, + { + "epoch": 3.294573643410853, + "grad_norm": 0.4168288113393459, + "learning_rate": 2.548813221931221e-05, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15278933942317963, + "step": 2125, + "valid_targets_mean": 4835.8, + "valid_targets_min": 1716 + }, + { + "epoch": 3.302325581395349, + "grad_norm": 0.43979678843894293, + "learning_rate": 2.5413737495979426e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16088305413722992, + "step": 2130, + "valid_targets_mean": 4291.2, + "valid_targets_min": 2104 + }, + { + "epoch": 3.310077519379845, + "grad_norm": 0.3934052531036587, + "learning_rate": 2.5339261855089694e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15298032760620117, + "step": 2135, + "valid_targets_mean": 4588.1, + "valid_targets_min": 329 + }, + { + "epoch": 3.317829457364341, + "grad_norm": 0.36169781906861537, + "learning_rate": 2.5264706409808725e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16061750054359436, + "step": 2140, + "valid_targets_mean": 5402.7, + "valid_targets_min": 709 + }, + { + "epoch": 3.3255813953488373, + "grad_norm": 0.4010813470774762, + "learning_rate": 2.5190072274495028e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1538451462984085, + "step": 2145, + "valid_targets_mean": 4433.6, + "valid_targets_min": 339 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.3582230333794788, + "learning_rate": 2.511536056468328e-05, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13346490263938904, + "step": 2150, + "valid_targets_mean": 4958.1, + "valid_targets_min": 676 + }, + { + "epoch": 3.3410852713178296, + "grad_norm": 0.4020066100217078, + "learning_rate": 2.5040572397067645e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19228683412075043, + "step": 2155, + "valid_targets_mean": 5232.5, + "valid_targets_min": 760 + }, + { + "epoch": 3.3488372093023258, + "grad_norm": 0.3855313842797188, + "learning_rate": 2.4965708889485062e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17724131047725677, + "step": 2160, + "valid_targets_mean": 4920.0, + "valid_targets_min": 1946 + }, + { + "epoch": 3.356589147286822, + "grad_norm": 0.4532744687578762, + "learning_rate": 2.489077116089858e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22503584623336792, + "step": 2165, + "valid_targets_mean": 4651.2, + "valid_targets_min": 272 + }, + { + "epoch": 3.3643410852713176, + "grad_norm": 0.4366754770288092, + "learning_rate": 2.4815760331380578e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2072749137878418, + "step": 2170, + "valid_targets_mean": 4974.6, + "valid_targets_min": 450 + }, + { + "epoch": 3.3720930232558137, + "grad_norm": 0.5215057560813444, + "learning_rate": 2.4740677522096078e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2247978001832962, + "step": 2175, + "valid_targets_mean": 3434.2, + "valid_targets_min": 352 + }, + { + "epoch": 3.37984496124031, + "grad_norm": 0.43695153967165795, + "learning_rate": 2.4665523855285954e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1621866375207901, + "step": 2180, + "valid_targets_mean": 3792.3, + "valid_targets_min": 364 + }, + { + "epoch": 3.387596899224806, + "grad_norm": 0.40940303711183107, + "learning_rate": 2.4590300454250164e-05, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14481836557388306, + "step": 2185, + "valid_targets_mean": 4906.2, + "valid_targets_min": 2115 + }, + { + "epoch": 3.395348837209302, + "grad_norm": 0.41242022872473294, + "learning_rate": 2.4515008443330963e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16337113082408905, + "step": 2190, + "valid_targets_mean": 4234.1, + "valid_targets_min": 339 + }, + { + "epoch": 3.4031007751937983, + "grad_norm": 0.34565747520342643, + "learning_rate": 2.4439648947896103e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14244988560676575, + "step": 2195, + "valid_targets_mean": 5911.4, + "valid_targets_min": 216 + }, + { + "epoch": 3.4108527131782944, + "grad_norm": 0.47813710313391616, + "learning_rate": 2.4364223094322004e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16893121600151062, + "step": 2200, + "valid_targets_mean": 4232.1, + "valid_targets_min": 284 + }, + { + "epoch": 3.4186046511627906, + "grad_norm": 0.41110932893026, + "learning_rate": 2.4288732009976928e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14816223084926605, + "step": 2205, + "valid_targets_mean": 4381.5, + "valid_targets_min": 2163 + }, + { + "epoch": 3.4263565891472867, + "grad_norm": 0.4329916569520111, + "learning_rate": 2.4213176823204103e-05, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15532240271568298, + "step": 2210, + "valid_targets_mean": 3792.0, + "valid_targets_min": 575 + }, + { + "epoch": 3.434108527131783, + "grad_norm": 0.47293777686859195, + "learning_rate": 2.4137558663304907e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17490163445472717, + "step": 2215, + "valid_targets_mean": 4627.6, + "valid_targets_min": 304 + }, + { + "epoch": 3.441860465116279, + "grad_norm": 0.42026578348250343, + "learning_rate": 2.4061878660521927e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14874975383281708, + "step": 2220, + "valid_targets_mean": 3723.0, + "valid_targets_min": 2346 + }, + { + "epoch": 3.449612403100775, + "grad_norm": 0.5448600259555658, + "learning_rate": 2.3986137946022132e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23847255110740662, + "step": 2225, + "valid_targets_mean": 3648.1, + "valid_targets_min": 556 + }, + { + "epoch": 3.4573643410852712, + "grad_norm": 0.3526959052609803, + "learning_rate": 2.3910337651879902e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15136197209358215, + "step": 2230, + "valid_targets_mean": 5305.6, + "valid_targets_min": 632 + }, + { + "epoch": 3.4651162790697674, + "grad_norm": 0.4609105179523114, + "learning_rate": 2.3834478911060144e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1996048092842102, + "step": 2235, + "valid_targets_mean": 4194.4, + "valid_targets_min": 628 + }, + { + "epoch": 3.4728682170542635, + "grad_norm": 0.3632917288631249, + "learning_rate": 2.3758562857401365e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14042022824287415, + "step": 2240, + "valid_targets_mean": 4892.4, + "valid_targets_min": 431 + }, + { + "epoch": 3.4806201550387597, + "grad_norm": 0.3596297602071797, + "learning_rate": 2.3682590625598682e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1568746566772461, + "step": 2245, + "valid_targets_mean": 5468.7, + "valid_targets_min": 818 + }, + { + "epoch": 3.488372093023256, + "grad_norm": 0.36656096196128835, + "learning_rate": 2.360656335118692e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15595552325248718, + "step": 2250, + "valid_targets_mean": 4770.1, + "valid_targets_min": 330 + }, + { + "epoch": 3.496124031007752, + "grad_norm": 0.4048396000613103, + "learning_rate": 2.3530482170523604e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2218673974275589, + "step": 2255, + "valid_targets_mean": 4977.7, + "valid_targets_min": 346 + }, + { + "epoch": 3.503875968992248, + "grad_norm": 0.4120002351387735, + "learning_rate": 2.3454348220771966e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1626529097557068, + "step": 2260, + "valid_targets_mean": 4942.2, + "valid_targets_min": 1938 + }, + { + "epoch": 3.511627906976744, + "grad_norm": 0.46155281185784397, + "learning_rate": 2.337816263988397e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2315581887960434, + "step": 2265, + "valid_targets_mean": 5495.6, + "valid_targets_min": 1092 + }, + { + "epoch": 3.5193798449612403, + "grad_norm": 0.4144849480419605, + "learning_rate": 2.3301926566583292e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.166578471660614, + "step": 2270, + "valid_targets_mean": 4457.1, + "valid_targets_min": 2280 + }, + { + "epoch": 3.5271317829457365, + "grad_norm": 0.4109601135912686, + "learning_rate": 2.3225641140348306e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17378325760364532, + "step": 2275, + "valid_targets_mean": 4608.7, + "valid_targets_min": 551 + }, + { + "epoch": 3.5348837209302326, + "grad_norm": 0.4582644976154119, + "learning_rate": 2.3149307501395056e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15773174166679382, + "step": 2280, + "valid_targets_mean": 4992.6, + "valid_targets_min": 740 + }, + { + "epoch": 3.5426356589147288, + "grad_norm": 0.46360151480816175, + "learning_rate": 2.3072926790660203e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20285546779632568, + "step": 2285, + "valid_targets_mean": 4078.0, + "valid_targets_min": 532 + }, + { + "epoch": 3.550387596899225, + "grad_norm": 0.4408826537960868, + "learning_rate": 2.2996500149783973e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13840195536613464, + "step": 2290, + "valid_targets_mean": 4301.3, + "valid_targets_min": 606 + }, + { + "epoch": 3.558139534883721, + "grad_norm": 0.41302173935323416, + "learning_rate": 2.292002872109309e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19090792536735535, + "step": 2295, + "valid_targets_mean": 4607.9, + "valid_targets_min": 1945 + }, + { + "epoch": 3.565891472868217, + "grad_norm": 0.48709929421908504, + "learning_rate": 2.284351364758373e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16162961721420288, + "step": 2300, + "valid_targets_mean": 2969.8, + "valid_targets_min": 411 + }, + { + "epoch": 3.5736434108527133, + "grad_norm": 0.34907911319518736, + "learning_rate": 2.2766956072904397e-05, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16097891330718994, + "step": 2305, + "valid_targets_mean": 5617.7, + "valid_targets_min": 343 + }, + { + "epoch": 3.5813953488372094, + "grad_norm": 0.41643473859763513, + "learning_rate": 2.2690357141338853e-05, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16287288069725037, + "step": 2310, + "valid_targets_mean": 4270.3, + "valid_targets_min": 1388 + }, + { + "epoch": 3.5891472868217056, + "grad_norm": 0.36847179752276477, + "learning_rate": 2.2613717997789016e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1488853245973587, + "step": 2315, + "valid_targets_mean": 4785.6, + "valid_targets_min": 661 + }, + { + "epoch": 3.5968992248062017, + "grad_norm": 0.41943679729107547, + "learning_rate": 2.2537039787757817e-05, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19536623358726501, + "step": 2320, + "valid_targets_mean": 4583.6, + "valid_targets_min": 2379 + }, + { + "epoch": 3.604651162790698, + "grad_norm": 0.3773107032307372, + "learning_rate": 2.2460323657332138e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13412323594093323, + "step": 2325, + "valid_targets_mean": 4938.5, + "valid_targets_min": 766 + }, + { + "epoch": 3.612403100775194, + "grad_norm": 0.41099143457360815, + "learning_rate": 2.2383570753165615e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.146214097738266, + "step": 2330, + "valid_targets_mean": 4063.8, + "valid_targets_min": 520 + }, + { + "epoch": 3.62015503875969, + "grad_norm": 0.3494048218491353, + "learning_rate": 2.2306782222461547e-05, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1244577169418335, + "step": 2335, + "valid_targets_mean": 4674.1, + "valid_targets_min": 516 + }, + { + "epoch": 3.6279069767441863, + "grad_norm": 0.4137755237450408, + "learning_rate": 2.2229959212955735e-05, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1691616028547287, + "step": 2340, + "valid_targets_mean": 4078.2, + "valid_targets_min": 957 + }, + { + "epoch": 3.6356589147286824, + "grad_norm": 0.41202438782783035, + "learning_rate": 2.2153102872899313e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19243177771568298, + "step": 2345, + "valid_targets_mean": 5443.7, + "valid_targets_min": 684 + }, + { + "epoch": 3.6434108527131785, + "grad_norm": 0.4168252301471801, + "learning_rate": 2.20762143510416e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1762881577014923, + "step": 2350, + "valid_targets_mean": 4094.8, + "valid_targets_min": 436 + }, + { + "epoch": 3.6511627906976747, + "grad_norm": 0.3658382509380567, + "learning_rate": 2.1999294796612926e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13929161429405212, + "step": 2355, + "valid_targets_mean": 5087.1, + "valid_targets_min": 2622 + }, + { + "epoch": 3.6589147286821704, + "grad_norm": 0.806554291125336, + "learning_rate": 2.1922345359307468e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24180075526237488, + "step": 2360, + "valid_targets_mean": 3961.2, + "valid_targets_min": 567 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 0.4010351294747989, + "learning_rate": 2.184536718926604e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15781967341899872, + "step": 2365, + "valid_targets_mean": 4302.3, + "valid_targets_min": 481 + }, + { + "epoch": 3.6744186046511627, + "grad_norm": 0.4910349746865378, + "learning_rate": 2.1768361437058924e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15803927183151245, + "step": 2370, + "valid_targets_mean": 5285.8, + "valid_targets_min": 1922 + }, + { + "epoch": 3.682170542635659, + "grad_norm": 0.5033930845464233, + "learning_rate": 2.1691329253668657e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16929282248020172, + "step": 2375, + "valid_targets_mean": 3518.6, + "valid_targets_min": 661 + }, + { + "epoch": 3.689922480620155, + "grad_norm": 0.4812536807942641, + "learning_rate": 2.1614271790472835e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16329653561115265, + "step": 2380, + "valid_targets_mean": 3963.1, + "valid_targets_min": 513 + }, + { + "epoch": 3.697674418604651, + "grad_norm": 0.4106493741878364, + "learning_rate": 2.1537190199226917e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15833979845046997, + "step": 2385, + "valid_targets_mean": 4434.0, + "valid_targets_min": 678 + }, + { + "epoch": 3.705426356589147, + "grad_norm": 0.42703025254446647, + "learning_rate": 2.1460085632046983e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19955435395240784, + "step": 2390, + "valid_targets_mean": 4425.7, + "valid_targets_min": 268 + }, + { + "epoch": 3.7131782945736433, + "grad_norm": 0.3400275265721782, + "learning_rate": 2.138295924139253e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13872955739498138, + "step": 2395, + "valid_targets_mean": 5467.9, + "valid_targets_min": 319 + }, + { + "epoch": 3.7209302325581395, + "grad_norm": 0.3682059763421827, + "learning_rate": 2.1305812180049258e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14761081337928772, + "step": 2400, + "valid_targets_mean": 5667.8, + "valid_targets_min": 2894 + }, + { + "epoch": 3.7286821705426356, + "grad_norm": 0.3974454201846236, + "learning_rate": 2.12286456011118e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18874874711036682, + "step": 2405, + "valid_targets_mean": 5224.0, + "valid_targets_min": 3081 + }, + { + "epoch": 3.7364341085271318, + "grad_norm": 0.39427590403656537, + "learning_rate": 2.1151460657966543e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14733751118183136, + "step": 2410, + "valid_targets_mean": 4060.3, + "valid_targets_min": 2348 + }, + { + "epoch": 3.744186046511628, + "grad_norm": 0.4347000971418473, + "learning_rate": 2.1074258504274324e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16277927160263062, + "step": 2415, + "valid_targets_mean": 4161.8, + "valid_targets_min": 325 + }, + { + "epoch": 3.751937984496124, + "grad_norm": 0.46642613155184653, + "learning_rate": 2.099704029395325e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17865529656410217, + "step": 2420, + "valid_targets_mean": 3506.7, + "valid_targets_min": 322 + }, + { + "epoch": 3.75968992248062, + "grad_norm": 0.36926786871608897, + "learning_rate": 2.0919807181161413e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1872124969959259, + "step": 2425, + "valid_targets_mean": 5992.2, + "valid_targets_min": 2637 + }, + { + "epoch": 3.7674418604651163, + "grad_norm": 0.40797959752050406, + "learning_rate": 2.0842560320279647e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13483797013759613, + "step": 2430, + "valid_targets_mean": 3735.1, + "valid_targets_min": 396 + }, + { + "epoch": 3.7751937984496124, + "grad_norm": 0.3569093694297914, + "learning_rate": 2.0765300865894273e-05, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16950270533561707, + "step": 2435, + "valid_targets_mean": 5004.4, + "valid_targets_min": 341 + }, + { + "epoch": 3.7829457364341086, + "grad_norm": 0.489744210232547, + "learning_rate": 2.068802997277984e-05, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14903788268566132, + "step": 2440, + "valid_targets_mean": 4375.1, + "valid_targets_min": 613 + }, + { + "epoch": 3.7906976744186047, + "grad_norm": 0.4631630261801319, + "learning_rate": 2.061074879588187e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17495691776275635, + "step": 2445, + "valid_targets_mean": 4443.9, + "valid_targets_min": 2333 + }, + { + "epoch": 3.798449612403101, + "grad_norm": 0.381627105046557, + "learning_rate": 2.0533458490299608e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19416113197803497, + "step": 2450, + "valid_targets_mean": 5910.8, + "valid_targets_min": 813 + }, + { + "epoch": 3.806201550387597, + "grad_norm": 0.3672369381288124, + "learning_rate": 2.0456160211268726e-05, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15462546050548553, + "step": 2455, + "valid_targets_mean": 4590.2, + "valid_targets_min": 425 + }, + { + "epoch": 3.813953488372093, + "grad_norm": 0.36355092211128553, + "learning_rate": 2.037885511414408e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1489698588848114, + "step": 2460, + "valid_targets_mean": 4814.1, + "valid_targets_min": 250 + }, + { + "epoch": 3.8217054263565893, + "grad_norm": 0.5834913333456886, + "learning_rate": 2.030154435438243e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17469245195388794, + "step": 2465, + "valid_targets_mean": 4683.1, + "valid_targets_min": 227 + }, + { + "epoch": 3.8294573643410854, + "grad_norm": 0.3750569957368585, + "learning_rate": 2.0224229087525176e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16605322062969208, + "step": 2470, + "valid_targets_mean": 5050.2, + "valid_targets_min": 2206 + }, + { + "epoch": 3.8372093023255816, + "grad_norm": 0.4159111267532109, + "learning_rate": 2.0146910469181083e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16885146498680115, + "step": 2475, + "valid_targets_mean": 4601.3, + "valid_targets_min": 761 + }, + { + "epoch": 3.8449612403100772, + "grad_norm": 0.3729264179998901, + "learning_rate": 2.006958965500901e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15141122043132782, + "step": 2480, + "valid_targets_mean": 4849.9, + "valid_targets_min": 2352 + }, + { + "epoch": 3.8527131782945734, + "grad_norm": 0.5766276206946744, + "learning_rate": 1.9992267800700643e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19176653027534485, + "step": 2485, + "valid_targets_mean": 3846.1, + "valid_targets_min": 731 + }, + { + "epoch": 3.8604651162790695, + "grad_norm": 0.3706800726667539, + "learning_rate": 1.991494606196321e-05, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340998411178589, + "step": 2490, + "valid_targets_mean": 4474.6, + "valid_targets_min": 269 + }, + { + "epoch": 3.8682170542635657, + "grad_norm": 0.44584862226366007, + "learning_rate": 1.98376255945022e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16283494234085083, + "step": 2495, + "valid_targets_mean": 4311.2, + "valid_targets_min": 1115 + }, + { + "epoch": 3.875968992248062, + "grad_norm": 0.3592938976849822, + "learning_rate": 1.9760307554004122e-05, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18098889291286469, + "step": 2500, + "valid_targets_mean": 5765.5, + "valid_targets_min": 528 + }, + { + "epoch": 3.883720930232558, + "grad_norm": 0.4223273599027322, + "learning_rate": 1.9682993096119197e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16485443711280823, + "step": 2505, + "valid_targets_mean": 4137.4, + "valid_targets_min": 901 + }, + { + "epoch": 3.891472868217054, + "grad_norm": 0.4448395910223574, + "learning_rate": 1.9605683376444086e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1999254673719406, + "step": 2510, + "valid_targets_mean": 3733.8, + "valid_targets_min": 266 + }, + { + "epoch": 3.89922480620155, + "grad_norm": 0.44711782753625473, + "learning_rate": 1.9528379550504663e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2123761773109436, + "step": 2515, + "valid_targets_mean": 3842.1, + "valid_targets_min": 489 + }, + { + "epoch": 3.9069767441860463, + "grad_norm": 0.3585629672419737, + "learning_rate": 1.945108277373869e-05, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16551396250724792, + "step": 2520, + "valid_targets_mean": 5320.1, + "valid_targets_min": 513 + }, + { + "epoch": 3.9147286821705425, + "grad_norm": 0.38593055920198166, + "learning_rate": 1.9373794201478554e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17138083279132843, + "step": 2525, + "valid_targets_mean": 4754.8, + "valid_targets_min": 325 + }, + { + "epoch": 3.9224806201550386, + "grad_norm": 0.34993836610858403, + "learning_rate": 1.9296514988934018e-05, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13611240684986115, + "step": 2530, + "valid_targets_mean": 4849.8, + "valid_targets_min": 299 + }, + { + "epoch": 3.9302325581395348, + "grad_norm": 0.38998762581393137, + "learning_rate": 1.921924629117498e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16259539127349854, + "step": 2535, + "valid_targets_mean": 4528.8, + "valid_targets_min": 1955 + }, + { + "epoch": 3.937984496124031, + "grad_norm": 0.4406751870739783, + "learning_rate": 1.9141989263114136e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1572207659482956, + "step": 2540, + "valid_targets_mean": 3975.7, + "valid_targets_min": 709 + }, + { + "epoch": 3.945736434108527, + "grad_norm": 0.45031010235314506, + "learning_rate": 1.9064745059489774e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1765103042125702, + "step": 2545, + "valid_targets_mean": 3726.0, + "valid_targets_min": 282 + }, + { + "epoch": 3.953488372093023, + "grad_norm": 0.6078467791119643, + "learning_rate": 1.8987514834848503e-05, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17429549992084503, + "step": 2550, + "valid_targets_mean": 3989.9, + "valid_targets_min": 685 + }, + { + "epoch": 3.9612403100775193, + "grad_norm": 0.3631710161577126, + "learning_rate": 1.8910299743527993e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1885673701763153, + "step": 2555, + "valid_targets_mean": 5834.6, + "valid_targets_min": 585 + }, + { + "epoch": 3.9689922480620154, + "grad_norm": 0.49167632435325864, + "learning_rate": 1.8833100939639694e-05, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20458854734897614, + "step": 2560, + "valid_targets_mean": 3560.4, + "valid_targets_min": 419 + }, + { + "epoch": 3.9767441860465116, + "grad_norm": 0.3634420862575136, + "learning_rate": 1.875591957705166e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13289107382297516, + "step": 2565, + "valid_targets_mean": 5413.6, + "valid_targets_min": 1805 + }, + { + "epoch": 3.9844961240310077, + "grad_norm": 0.4586334241085507, + "learning_rate": 1.867875680937122e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2052847146987915, + "step": 2570, + "valid_targets_mean": 4061.1, + "valid_targets_min": 615 + }, + { + "epoch": 3.992248062015504, + "grad_norm": 0.43210061293605817, + "learning_rate": 1.860161378992778e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2044166624546051, + "step": 2575, + "valid_targets_mean": 4605.4, + "valid_targets_min": 605 + }, + { + "epoch": 4.0, + "grad_norm": 0.37276933625985337, + "learning_rate": 1.8524491671755563e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1616727113723755, + "step": 2580, + "valid_targets_mean": 5067.4, + "valid_targets_min": 527 + }, + { + "epoch": 4.007751937984496, + "grad_norm": 0.36563074448629657, + "learning_rate": 1.8447391607576416e-05, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13933786749839783, + "step": 2585, + "valid_targets_mean": 4913.7, + "valid_targets_min": 740 + }, + { + "epoch": 4.015503875968992, + "grad_norm": 0.4772926288066321, + "learning_rate": 1.8370314749782524e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18599829077720642, + "step": 2590, + "valid_targets_mean": 4072.6, + "valid_targets_min": 666 + }, + { + "epoch": 4.023255813953488, + "grad_norm": 0.4964995939447069, + "learning_rate": 1.8293262250419215e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16809843480587006, + "step": 2595, + "valid_targets_mean": 3508.6, + "valid_targets_min": 2368 + }, + { + "epoch": 4.0310077519379846, + "grad_norm": 0.5174597685405774, + "learning_rate": 1.8216235261167765e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2505348324775696, + "step": 2600, + "valid_targets_mean": 4262.7, + "valid_targets_min": 970 + }, + { + "epoch": 4.038759689922481, + "grad_norm": 0.40606990941323745, + "learning_rate": 1.8139234933328126e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.138991117477417, + "step": 2605, + "valid_targets_mean": 5074.9, + "valid_targets_min": 680 + }, + { + "epoch": 4.046511627906977, + "grad_norm": 0.4634993018273127, + "learning_rate": 1.8062262417801767e-05, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1454424411058426, + "step": 2610, + "valid_targets_mean": 4132.0, + "valid_targets_min": 315 + }, + { + "epoch": 4.054263565891473, + "grad_norm": 0.45594388080755227, + "learning_rate": 1.7985318865074445e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13495397567749023, + "step": 2615, + "valid_targets_mean": 4592.6, + "valid_targets_min": 908 + }, + { + "epoch": 4.062015503875969, + "grad_norm": 0.3632193067972436, + "learning_rate": 1.790840542519903e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14542677998542786, + "step": 2620, + "valid_targets_mean": 6443.2, + "valid_targets_min": 674 + }, + { + "epoch": 4.069767441860465, + "grad_norm": 0.4084216853540113, + "learning_rate": 1.7831523247778296e-05, + "loss": 0.1361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15561866760253906, + "step": 2625, + "valid_targets_mean": 4696.6, + "valid_targets_min": 669 + }, + { + "epoch": 4.077519379844961, + "grad_norm": 0.4622453535570728, + "learning_rate": 1.775467348194774e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1541646271944046, + "step": 2630, + "valid_targets_mean": 4509.7, + "valid_targets_min": 585 + }, + { + "epoch": 4.0852713178294575, + "grad_norm": 0.36481781559899207, + "learning_rate": 1.767785727635843e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1567380130290985, + "step": 2635, + "valid_targets_mean": 6221.8, + "valid_targets_min": 761 + }, + { + "epoch": 4.093023255813954, + "grad_norm": 0.3792078104165168, + "learning_rate": 1.7601075779159806e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12530960142612457, + "step": 2640, + "valid_targets_mean": 4478.4, + "valid_targets_min": 361 + }, + { + "epoch": 4.10077519379845, + "grad_norm": 0.4219019340853787, + "learning_rate": 1.7524330137982524e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18058227002620697, + "step": 2645, + "valid_targets_mean": 4726.9, + "valid_targets_min": 2654 + }, + { + "epoch": 4.108527131782946, + "grad_norm": 0.42766284883408445, + "learning_rate": 1.7447621499921342e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16935157775878906, + "step": 2650, + "valid_targets_mean": 4662.6, + "valid_targets_min": 2874 + }, + { + "epoch": 4.116279069767442, + "grad_norm": 0.4480590715700904, + "learning_rate": 1.7370951011517906e-05, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19573025405406952, + "step": 2655, + "valid_targets_mean": 4988.3, + "valid_targets_min": 887 + }, + { + "epoch": 4.124031007751938, + "grad_norm": 0.4647682503763762, + "learning_rate": 1.7294319818743677e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1806863695383072, + "step": 2660, + "valid_targets_mean": 4018.7, + "valid_targets_min": 684 + }, + { + "epoch": 4.131782945736434, + "grad_norm": 0.33468042082332256, + "learning_rate": 1.7217729066982754e-05, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13625994324684143, + "step": 2665, + "valid_targets_mean": 5762.8, + "valid_targets_min": 3241 + }, + { + "epoch": 4.1395348837209305, + "grad_norm": 0.35879066729195264, + "learning_rate": 1.7141179901014798e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1351138949394226, + "step": 2670, + "valid_targets_mean": 5373.6, + "valid_targets_min": 2492 + }, + { + "epoch": 4.147286821705427, + "grad_norm": 0.38289139539979355, + "learning_rate": 1.7064673464997885e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14373061060905457, + "step": 2675, + "valid_targets_mean": 4701.4, + "valid_targets_min": 1523 + }, + { + "epoch": 4.155038759689923, + "grad_norm": 0.3514618484402962, + "learning_rate": 1.698821090245141e-05, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12618312239646912, + "step": 2680, + "valid_targets_mean": 5099.0, + "valid_targets_min": 1436 + }, + { + "epoch": 4.162790697674419, + "grad_norm": 0.4029673695317988, + "learning_rate": 1.691179335623904e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15906205773353577, + "step": 2685, + "valid_targets_mean": 5508.1, + "valid_targets_min": 432 + }, + { + "epoch": 4.170542635658915, + "grad_norm": 0.4944425640026309, + "learning_rate": 1.6835421968551556e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14945298433303833, + "step": 2690, + "valid_targets_mean": 3783.1, + "valid_targets_min": 770 + }, + { + "epoch": 4.178294573643411, + "grad_norm": 0.4380361387817116, + "learning_rate": 1.675909788088984e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18563194572925568, + "step": 2695, + "valid_targets_mean": 4740.7, + "valid_targets_min": 2335 + }, + { + "epoch": 4.186046511627907, + "grad_norm": 0.4771028211600623, + "learning_rate": 1.6682822234047796e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1575438231229782, + "step": 2700, + "valid_targets_mean": 3456.6, + "valid_targets_min": 352 + }, + { + "epoch": 4.1937984496124034, + "grad_norm": 0.41423182041175527, + "learning_rate": 1.660659616809529e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13387396931648254, + "step": 2705, + "valid_targets_mean": 4664.9, + "valid_targets_min": 1948 + }, + { + "epoch": 4.2015503875969, + "grad_norm": 0.530813584525465, + "learning_rate": 1.653042082236112e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3004525601863861, + "step": 2710, + "valid_targets_mean": 4756.2, + "valid_targets_min": 332 + }, + { + "epoch": 4.209302325581396, + "grad_norm": 0.44640047447531017, + "learning_rate": 1.6454297335415975e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1683686077594757, + "step": 2715, + "valid_targets_mean": 3983.3, + "valid_targets_min": 558 + }, + { + "epoch": 4.217054263565892, + "grad_norm": 0.4231137236979658, + "learning_rate": 1.6378226845055454e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17019568383693695, + "step": 2720, + "valid_targets_mean": 4835.2, + "valid_targets_min": 600 + }, + { + "epoch": 4.224806201550388, + "grad_norm": 0.386736159227142, + "learning_rate": 1.6302210488282993e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18334552645683289, + "step": 2725, + "valid_targets_mean": 5246.4, + "valid_targets_min": 479 + }, + { + "epoch": 4.232558139534884, + "grad_norm": 0.4229332730901514, + "learning_rate": 1.6226249401292932e-05, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1335538774728775, + "step": 2730, + "valid_targets_mean": 4478.2, + "valid_targets_min": 265 + }, + { + "epoch": 4.24031007751938, + "grad_norm": 0.5239959011141749, + "learning_rate": 1.6150344719453513e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13435792922973633, + "step": 2735, + "valid_targets_mean": 4364.5, + "valid_targets_min": 389 + }, + { + "epoch": 4.248062015503876, + "grad_norm": 0.3585379143026963, + "learning_rate": 1.6074497577289893e-05, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12283202260732651, + "step": 2740, + "valid_targets_mean": 5174.9, + "valid_targets_min": 2143 + }, + { + "epoch": 4.2558139534883725, + "grad_norm": 0.41779854478657813, + "learning_rate": 1.599870910846721e-05, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13781797885894775, + "step": 2745, + "valid_targets_mean": 4340.5, + "valid_targets_min": 2348 + }, + { + "epoch": 4.263565891472869, + "grad_norm": 0.47639510138543717, + "learning_rate": 1.5922980445773618e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20977307856082916, + "step": 2750, + "valid_targets_mean": 4089.1, + "valid_targets_min": 570 + }, + { + "epoch": 4.271317829457364, + "grad_norm": 0.4321321807850601, + "learning_rate": 1.584731272110338e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1279362142086029, + "step": 2755, + "valid_targets_mean": 4232.4, + "valid_targets_min": 661 + }, + { + "epoch": 4.27906976744186, + "grad_norm": 0.4163782737967656, + "learning_rate": 1.5771707065439925e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1572471261024475, + "step": 2760, + "valid_targets_mean": 4172.6, + "valid_targets_min": 362 + }, + { + "epoch": 4.286821705426356, + "grad_norm": 0.44476929142723237, + "learning_rate": 1.5696164608838957e-05, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12877783179283142, + "step": 2765, + "valid_targets_mean": 3667.2, + "valid_targets_min": 324 + }, + { + "epoch": 4.294573643410852, + "grad_norm": 0.44033521779323154, + "learning_rate": 1.5620686480411568e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1912871152162552, + "step": 2770, + "valid_targets_mean": 4556.9, + "valid_targets_min": 571 + }, + { + "epoch": 4.3023255813953485, + "grad_norm": 0.43835417771534674, + "learning_rate": 1.5545273808307358e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18065080046653748, + "step": 2775, + "valid_targets_mean": 4829.0, + "valid_targets_min": 701 + }, + { + "epoch": 4.310077519379845, + "grad_norm": 0.4359801265972791, + "learning_rate": 1.546992771969755e-05, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816236674785614, + "step": 2780, + "valid_targets_mean": 4372.9, + "valid_targets_min": 313 + }, + { + "epoch": 4.317829457364341, + "grad_norm": 0.4410756401612138, + "learning_rate": 1.5394649340758184e-05, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15419849753379822, + "step": 2785, + "valid_targets_mean": 5035.1, + "valid_targets_min": 2134 + }, + { + "epoch": 4.325581395348837, + "grad_norm": 0.4559093303470707, + "learning_rate": 1.5319439796653258e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17408901453018188, + "step": 2790, + "valid_targets_mean": 4336.3, + "valid_targets_min": 1990 + }, + { + "epoch": 4.333333333333333, + "grad_norm": 0.3733614937743712, + "learning_rate": 1.5244300211517916e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1421089917421341, + "step": 2795, + "valid_targets_mean": 4486.0, + "valid_targets_min": 266 + }, + { + "epoch": 4.341085271317829, + "grad_norm": 0.4384718248610877, + "learning_rate": 1.5169231708441625e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15485528111457825, + "step": 2800, + "valid_targets_mean": 4390.1, + "valid_targets_min": 458 + }, + { + "epoch": 4.348837209302325, + "grad_norm": 0.35486111522759517, + "learning_rate": 1.5094235409451452e-05, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14936892688274384, + "step": 2805, + "valid_targets_mean": 5619.9, + "valid_targets_min": 2606 + }, + { + "epoch": 4.3565891472868215, + "grad_norm": 0.4641264262776539, + "learning_rate": 1.5019312435495215e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16389770805835724, + "step": 2810, + "valid_targets_mean": 4543.3, + "valid_targets_min": 531 + }, + { + "epoch": 4.364341085271318, + "grad_norm": 0.4095733450935873, + "learning_rate": 1.4944463906424773e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15087318420410156, + "step": 2815, + "valid_targets_mean": 5138.6, + "valid_targets_min": 2324 + }, + { + "epoch": 4.372093023255814, + "grad_norm": 0.43224144450256036, + "learning_rate": 1.486969094097929e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17015056312084198, + "step": 2820, + "valid_targets_mean": 4383.4, + "valid_targets_min": 196 + }, + { + "epoch": 4.37984496124031, + "grad_norm": 0.42039391089480094, + "learning_rate": 1.4794994656768485e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16320443153381348, + "step": 2825, + "valid_targets_mean": 4572.7, + "valid_targets_min": 1677 + }, + { + "epoch": 4.387596899224806, + "grad_norm": 0.4360528678353131, + "learning_rate": 1.4720376170255958e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17029808461666107, + "step": 2830, + "valid_targets_mean": 4660.2, + "valid_targets_min": 2294 + }, + { + "epoch": 4.395348837209302, + "grad_norm": 0.47146218786067823, + "learning_rate": 1.464583659674248e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19265028834342957, + "step": 2835, + "valid_targets_mean": 4134.2, + "valid_targets_min": 328 + }, + { + "epoch": 4.403100775193798, + "grad_norm": 0.44499692816713254, + "learning_rate": 1.4571377050349347e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16304971277713776, + "step": 2840, + "valid_targets_mean": 4391.5, + "valid_targets_min": 370 + }, + { + "epoch": 4.410852713178294, + "grad_norm": 0.46432590434836263, + "learning_rate": 1.44969986440017e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540422886610031, + "step": 2845, + "valid_targets_mean": 4667.3, + "valid_targets_min": 335 + }, + { + "epoch": 4.4186046511627906, + "grad_norm": 0.4999077483071989, + "learning_rate": 1.4422702489411894e-05, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18639114499092102, + "step": 2850, + "valid_targets_mean": 3962.9, + "valid_targets_min": 546 + }, + { + "epoch": 4.426356589147287, + "grad_norm": 0.405932784042207, + "learning_rate": 1.4348489697062925e-05, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1444004476070404, + "step": 2855, + "valid_targets_mean": 4650.3, + "valid_targets_min": 1526 + }, + { + "epoch": 4.434108527131783, + "grad_norm": 0.3934528365825335, + "learning_rate": 1.4274361376191767e-05, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1553172767162323, + "step": 2860, + "valid_targets_mean": 5694.0, + "valid_targets_min": 2890 + }, + { + "epoch": 4.441860465116279, + "grad_norm": 0.40913794771831696, + "learning_rate": 1.420031863477283e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494569182395935, + "step": 2865, + "valid_targets_mean": 4331.6, + "valid_targets_min": 319 + }, + { + "epoch": 4.449612403100775, + "grad_norm": 0.47285700783628626, + "learning_rate": 1.4126362579501401e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15232403576374054, + "step": 2870, + "valid_targets_mean": 3542.6, + "valid_targets_min": 370 + }, + { + "epoch": 4.457364341085271, + "grad_norm": 0.4650848027329248, + "learning_rate": 1.4052494315777091e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1610431671142578, + "step": 2875, + "valid_targets_mean": 3635.2, + "valid_targets_min": 1079 + }, + { + "epoch": 4.465116279069767, + "grad_norm": 0.4653775809202961, + "learning_rate": 1.3978714947687308e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20595189929008484, + "step": 2880, + "valid_targets_mean": 4235.9, + "valid_targets_min": 453 + }, + { + "epoch": 4.4728682170542635, + "grad_norm": 0.3856889005381559, + "learning_rate": 1.390502557799077e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14087346196174622, + "step": 2885, + "valid_targets_mean": 4527.4, + "valid_targets_min": 2189 + }, + { + "epoch": 4.48062015503876, + "grad_norm": 0.3945151120253326, + "learning_rate": 1.3831427308101021e-05, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1581602394580841, + "step": 2890, + "valid_targets_mean": 4957.9, + "valid_targets_min": 1819 + }, + { + "epoch": 4.488372093023256, + "grad_norm": 0.40342280135663167, + "learning_rate": 1.375792123806996e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14058300852775574, + "step": 2895, + "valid_targets_mean": 4725.2, + "valid_targets_min": 643 + }, + { + "epoch": 4.496124031007752, + "grad_norm": 0.4036139924740885, + "learning_rate": 1.3684508466571385e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18375971913337708, + "step": 2900, + "valid_targets_mean": 5136.7, + "valid_targets_min": 536 + }, + { + "epoch": 4.503875968992248, + "grad_norm": 0.5271710172062103, + "learning_rate": 1.3611190090884611e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17250756919384003, + "step": 2905, + "valid_targets_mean": 3673.4, + "valid_targets_min": 249 + }, + { + "epoch": 4.511627906976744, + "grad_norm": 0.40659265968531066, + "learning_rate": 1.3537967206878036e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15408380329608917, + "step": 2910, + "valid_targets_mean": 4791.4, + "valid_targets_min": 869 + }, + { + "epoch": 4.51937984496124, + "grad_norm": 0.3722111335904797, + "learning_rate": 1.3464840908992768e-05, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12856316566467285, + "step": 2915, + "valid_targets_mean": 4795.7, + "valid_targets_min": 2420 + }, + { + "epoch": 4.5271317829457365, + "grad_norm": 0.4556137521413307, + "learning_rate": 1.3391812290226272e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2201414704322815, + "step": 2920, + "valid_targets_mean": 4536.7, + "valid_targets_min": 694 + }, + { + "epoch": 4.534883720930233, + "grad_norm": 0.4743449244975186, + "learning_rate": 1.3318882442116048e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21773910522460938, + "step": 2925, + "valid_targets_mean": 4512.2, + "valid_targets_min": 581 + }, + { + "epoch": 4.542635658914729, + "grad_norm": 0.47202078789113744, + "learning_rate": 1.3246052454723275e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17931053042411804, + "step": 2930, + "valid_targets_mean": 4000.9, + "valid_targets_min": 683 + }, + { + "epoch": 4.550387596899225, + "grad_norm": 0.363075525223068, + "learning_rate": 1.3173323416616546e-05, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1436081826686859, + "step": 2935, + "valid_targets_mean": 5052.1, + "valid_targets_min": 766 + }, + { + "epoch": 4.558139534883721, + "grad_norm": 0.4252120402864096, + "learning_rate": 1.310069641485562e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1449616551399231, + "step": 2940, + "valid_targets_mean": 5067.6, + "valid_targets_min": 968 + }, + { + "epoch": 4.565891472868217, + "grad_norm": 0.41309349051810224, + "learning_rate": 1.3028172534975125e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15337392687797546, + "step": 2945, + "valid_targets_mean": 4410.0, + "valid_targets_min": 625 + }, + { + "epoch": 4.573643410852713, + "grad_norm": 0.5160731043484095, + "learning_rate": 1.2955752860968356e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1839904487133026, + "step": 2950, + "valid_targets_mean": 4195.2, + "valid_targets_min": 1004 + }, + { + "epoch": 4.5813953488372094, + "grad_norm": 0.39603413769839324, + "learning_rate": 1.2883438475271088e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1449984461069107, + "step": 2955, + "valid_targets_mean": 4908.1, + "valid_targets_min": 1489 + }, + { + "epoch": 4.589147286821706, + "grad_norm": 0.4659985680055713, + "learning_rate": 1.2811230458745372e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21567106246948242, + "step": 2960, + "valid_targets_mean": 4454.5, + "valid_targets_min": 665 + }, + { + "epoch": 4.596899224806202, + "grad_norm": 0.4523280898732182, + "learning_rate": 1.2739129890663386e-05, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15529918670654297, + "step": 2965, + "valid_targets_mean": 4419.5, + "valid_targets_min": 962 + }, + { + "epoch": 4.604651162790698, + "grad_norm": 0.3913593277889339, + "learning_rate": 1.266713784869131e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15807433426380157, + "step": 2970, + "valid_targets_mean": 4846.1, + "valid_targets_min": 734 + }, + { + "epoch": 4.612403100775194, + "grad_norm": 0.4182683219376584, + "learning_rate": 1.2595255408873232e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13988031446933746, + "step": 2975, + "valid_targets_mean": 4320.1, + "valid_targets_min": 523 + }, + { + "epoch": 4.62015503875969, + "grad_norm": 0.35362147101841246, + "learning_rate": 1.2523483645615031e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11566026508808136, + "step": 2980, + "valid_targets_mean": 5738.1, + "valid_targets_min": 1805 + }, + { + "epoch": 4.627906976744186, + "grad_norm": 0.4359085932620926, + "learning_rate": 1.2451823631668333e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2180645763874054, + "step": 2985, + "valid_targets_mean": 5415.4, + "valid_targets_min": 1779 + }, + { + "epoch": 4.635658914728682, + "grad_norm": 0.4178432880182616, + "learning_rate": 1.2380276438114495e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15357699990272522, + "step": 2990, + "valid_targets_mean": 3995.6, + "valid_targets_min": 921 + }, + { + "epoch": 4.6434108527131785, + "grad_norm": 0.3928742323402058, + "learning_rate": 1.2308843134348569e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14658121764659882, + "step": 2995, + "valid_targets_mean": 4797.7, + "valid_targets_min": 808 + }, + { + "epoch": 4.651162790697675, + "grad_norm": 0.40724671685704517, + "learning_rate": 1.2237524788063335e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13327175378799438, + "step": 3000, + "valid_targets_mean": 4289.8, + "valid_targets_min": 654 + }, + { + "epoch": 4.658914728682171, + "grad_norm": 0.38017870244977436, + "learning_rate": 1.2166322465233325e-05, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15827676653862, + "step": 3005, + "valid_targets_mean": 5934.4, + "valid_targets_min": 2154 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.4241213390458187, + "learning_rate": 1.2095237230098925e-05, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15015850961208344, + "step": 3010, + "valid_targets_mean": 3942.2, + "valid_targets_min": 2726 + }, + { + "epoch": 4.674418604651163, + "grad_norm": 0.42983747059070593, + "learning_rate": 1.2024270145150428e-05, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1613459587097168, + "step": 3015, + "valid_targets_mean": 4662.0, + "valid_targets_min": 2112 + }, + { + "epoch": 4.682170542635659, + "grad_norm": 0.4541648568256799, + "learning_rate": 1.1953422271112171e-05, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1784931868314743, + "step": 3020, + "valid_targets_mean": 4353.8, + "valid_targets_min": 682 + }, + { + "epoch": 4.689922480620155, + "grad_norm": 0.37376376389283156, + "learning_rate": 1.1882694666926691e-05, + "loss": 0.1458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10884034633636475, + "step": 3025, + "valid_targets_mean": 4409.1, + "valid_targets_min": 533 + }, + { + "epoch": 4.6976744186046515, + "grad_norm": 0.4214595584685805, + "learning_rate": 1.1812088389738875e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21922525763511658, + "step": 3030, + "valid_targets_mean": 6067.9, + "valid_targets_min": 797 + }, + { + "epoch": 4.705426356589148, + "grad_norm": 0.35642720118665594, + "learning_rate": 1.1741604494880177e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14536528289318085, + "step": 3035, + "valid_targets_mean": 5993.4, + "valid_targets_min": 752 + }, + { + "epoch": 4.713178294573644, + "grad_norm": 0.4788498710665514, + "learning_rate": 1.167124403585283e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16465994715690613, + "step": 3040, + "valid_targets_mean": 3857.6, + "valid_targets_min": 302 + }, + { + "epoch": 4.720930232558139, + "grad_norm": 0.46807401286424594, + "learning_rate": 1.1601008064314127e-05, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17980757355690002, + "step": 3045, + "valid_targets_mean": 4050.8, + "valid_targets_min": 411 + }, + { + "epoch": 4.728682170542635, + "grad_norm": 0.3493024998293867, + "learning_rate": 1.1530897630060664e-05, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14402084052562714, + "step": 3050, + "valid_targets_mean": 5970.5, + "valid_targets_min": 3312 + }, + { + "epoch": 4.736434108527131, + "grad_norm": 1.1433311286570027, + "learning_rate": 1.146091378101267e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18359369039535522, + "step": 3055, + "valid_targets_mean": 5394.9, + "valid_targets_min": 2047 + }, + { + "epoch": 4.7441860465116275, + "grad_norm": 0.3656089625075003, + "learning_rate": 1.1391057563198352e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15548282861709595, + "step": 3060, + "valid_targets_mean": 5395.7, + "valid_targets_min": 1999 + }, + { + "epoch": 4.751937984496124, + "grad_norm": 0.3682957712457608, + "learning_rate": 1.1321330020738238e-05, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12282656878232956, + "step": 3065, + "valid_targets_mean": 4505.5, + "valid_targets_min": 433 + }, + { + "epoch": 4.75968992248062, + "grad_norm": 0.41853152057960064, + "learning_rate": 1.1251732195829581e-05, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13153740763664246, + "step": 3070, + "valid_targets_mean": 4470.2, + "valid_targets_min": 2352 + }, + { + "epoch": 4.767441860465116, + "grad_norm": 0.38423534152358596, + "learning_rate": 1.118226512873081e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17503979802131653, + "step": 3075, + "valid_targets_mean": 4954.4, + "valid_targets_min": 307 + }, + { + "epoch": 4.775193798449612, + "grad_norm": 0.37157618329698217, + "learning_rate": 1.1112929857745924e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13206711411476135, + "step": 3080, + "valid_targets_mean": 5568.5, + "valid_targets_min": 2292 + }, + { + "epoch": 4.782945736434108, + "grad_norm": 0.509246178657668, + "learning_rate": 1.1043727419209023e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1961718052625656, + "step": 3085, + "valid_targets_mean": 3839.4, + "valid_targets_min": 266 + }, + { + "epoch": 4.790697674418604, + "grad_norm": 0.4608884016393757, + "learning_rate": 1.0974658847468775e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18118709325790405, + "step": 3090, + "valid_targets_mean": 4658.3, + "valid_targets_min": 609 + }, + { + "epoch": 4.7984496124031, + "grad_norm": 0.3407586601849243, + "learning_rate": 1.0905725174873021e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13145892322063446, + "step": 3095, + "valid_targets_mean": 5481.1, + "valid_targets_min": 2463 + }, + { + "epoch": 4.8062015503875966, + "grad_norm": 0.44372721977998414, + "learning_rate": 1.0836927431753268e-05, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1747463047504425, + "step": 3100, + "valid_targets_mean": 4381.4, + "valid_targets_min": 2634 + }, + { + "epoch": 4.813953488372093, + "grad_norm": 0.4313480237295467, + "learning_rate": 1.0768266646409331e-05, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1479579210281372, + "step": 3105, + "valid_targets_mean": 3838.2, + "valid_targets_min": 558 + }, + { + "epoch": 4.821705426356589, + "grad_norm": 0.40712005319072236, + "learning_rate": 1.0699743845093977e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16829770803451538, + "step": 3110, + "valid_targets_mean": 5049.5, + "valid_targets_min": 635 + }, + { + "epoch": 4.829457364341085, + "grad_norm": 0.4038620874534929, + "learning_rate": 1.0631360051997545e-05, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13558568060398102, + "step": 3115, + "valid_targets_mean": 4396.6, + "valid_targets_min": 477 + }, + { + "epoch": 4.837209302325581, + "grad_norm": 0.46074509090856464, + "learning_rate": 1.0563116289232664e-05, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13045081496238708, + "step": 3120, + "valid_targets_mean": 3530.9, + "valid_targets_min": 341 + }, + { + "epoch": 4.844961240310077, + "grad_norm": 0.43813490790924364, + "learning_rate": 1.0495013576818974e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15511000156402588, + "step": 3125, + "valid_targets_mean": 4225.4, + "valid_targets_min": 670 + }, + { + "epoch": 4.852713178294573, + "grad_norm": 0.4380047569329961, + "learning_rate": 1.0427052932667872e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19142797589302063, + "step": 3130, + "valid_targets_mean": 4649.4, + "valid_targets_min": 1866 + }, + { + "epoch": 4.8604651162790695, + "grad_norm": 0.3622958483592568, + "learning_rate": 1.0359235372567303e-05, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16687515377998352, + "step": 3135, + "valid_targets_mean": 5099.6, + "valid_targets_min": 2400 + }, + { + "epoch": 4.868217054263566, + "grad_norm": 0.4538324825468118, + "learning_rate": 1.0291561910166575e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1640026718378067, + "step": 3140, + "valid_targets_mean": 4020.4, + "valid_targets_min": 637 + }, + { + "epoch": 4.875968992248062, + "grad_norm": 0.3815270313643839, + "learning_rate": 1.0224033556961221e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15679693222045898, + "step": 3145, + "valid_targets_mean": 5416.8, + "valid_targets_min": 2731 + }, + { + "epoch": 4.883720930232558, + "grad_norm": 0.37173879163162826, + "learning_rate": 1.0156651322277859e-05, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15583065152168274, + "step": 3150, + "valid_targets_mean": 6000.7, + "valid_targets_min": 2168 + }, + { + "epoch": 4.891472868217054, + "grad_norm": 0.44542528707551793, + "learning_rate": 1.0089416213259109e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2258930504322052, + "step": 3155, + "valid_targets_mean": 5374.3, + "valid_targets_min": 1335 + }, + { + "epoch": 4.89922480620155, + "grad_norm": 0.46872132575511044, + "learning_rate": 1.0022329234848568e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20413146913051605, + "step": 3160, + "valid_targets_mean": 4518.1, + "valid_targets_min": 1923 + }, + { + "epoch": 4.906976744186046, + "grad_norm": 0.4064894961029224, + "learning_rate": 9.95539138977575e-06, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12033580243587494, + "step": 3165, + "valid_targets_mean": 4383.8, + "valid_targets_min": 606 + }, + { + "epoch": 4.9147286821705425, + "grad_norm": 0.3163430136720968, + "learning_rate": 9.88860367854112e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14553579688072205, + "step": 3170, + "valid_targets_mean": 7166.2, + "valid_targets_min": 3325 + }, + { + "epoch": 4.922480620155039, + "grad_norm": 0.38091013274436497, + "learning_rate": 9.821967099401137e-06, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1597091257572174, + "step": 3175, + "valid_targets_mean": 5441.1, + "valid_targets_min": 1887 + }, + { + "epoch": 4.930232558139535, + "grad_norm": 0.34441694300189507, + "learning_rate": 9.755482648353335e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13708937168121338, + "step": 3180, + "valid_targets_mean": 5538.1, + "valid_targets_min": 2859 + }, + { + "epoch": 4.937984496124031, + "grad_norm": 0.37415077897828763, + "learning_rate": 9.689151319121433e-06, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14037328958511353, + "step": 3185, + "valid_targets_mean": 5194.1, + "valid_targets_min": 2266 + }, + { + "epoch": 4.945736434108527, + "grad_norm": 0.44777782502419616, + "learning_rate": 9.62297410314047e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18096795678138733, + "step": 3190, + "valid_targets_mean": 4345.2, + "valid_targets_min": 750 + }, + { + "epoch": 4.953488372093023, + "grad_norm": 0.38931181856536856, + "learning_rate": 9.556951989542032e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11744767427444458, + "step": 3195, + "valid_targets_mean": 4002.5, + "valid_targets_min": 462 + }, + { + "epoch": 4.961240310077519, + "grad_norm": 0.36948872010681133, + "learning_rate": 9.491085965139402e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12257242947816849, + "step": 3200, + "valid_targets_mean": 5256.6, + "valid_targets_min": 611 + }, + { + "epoch": 4.9689922480620154, + "grad_norm": 0.41347264041909937, + "learning_rate": 9.425377014412848e-06, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15283584594726562, + "step": 3205, + "valid_targets_mean": 4368.1, + "valid_targets_min": 891 + }, + { + "epoch": 4.976744186046512, + "grad_norm": 0.42750383797473995, + "learning_rate": 9.35982611949493e-06, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18603989481925964, + "step": 3210, + "valid_targets_mean": 4814.5, + "valid_targets_min": 483 + }, + { + "epoch": 4.984496124031008, + "grad_norm": 0.43059647799726375, + "learning_rate": 9.294434260155765e-06, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22478413581848145, + "step": 3215, + "valid_targets_mean": 5452.1, + "valid_targets_min": 1531 + }, + { + "epoch": 4.992248062015504, + "grad_norm": 0.39768354831373554, + "learning_rate": 9.229202413788419e-06, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15155833959579468, + "step": 3220, + "valid_targets_mean": 4607.6, + "valid_targets_min": 2135 + }, + { + "epoch": 5.0, + "grad_norm": 0.38185972751001357, + "learning_rate": 9.164131555394288e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13077522814273834, + "step": 3225, + "valid_targets_mean": 4259.6, + "valid_targets_min": 380 + }, + { + "epoch": 5.007751937984496, + "grad_norm": 0.41906475146843863, + "learning_rate": 9.099222657568554e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15627771615982056, + "step": 3230, + "valid_targets_mean": 5040.4, + "valid_targets_min": 1940 + }, + { + "epoch": 5.015503875968992, + "grad_norm": 0.4052151818524859, + "learning_rate": 9.034476690485579e-06, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13936452567577362, + "step": 3235, + "valid_targets_mean": 5023.8, + "valid_targets_min": 705 + }, + { + "epoch": 5.023255813953488, + "grad_norm": 0.37188613632576223, + "learning_rate": 8.969894621884467e-06, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12986522912979126, + "step": 3240, + "valid_targets_mean": 5463.1, + "valid_targets_min": 412 + }, + { + "epoch": 5.0310077519379846, + "grad_norm": 0.3919445371842698, + "learning_rate": 8.905477417054595e-06, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12915056943893433, + "step": 3245, + "valid_targets_mean": 5352.3, + "valid_targets_min": 2354 + }, + { + "epoch": 5.038759689922481, + "grad_norm": 0.40240515277621475, + "learning_rate": 8.841226038821147e-06, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1427655816078186, + "step": 3250, + "valid_targets_mean": 4806.9, + "valid_targets_min": 927 + }, + { + "epoch": 5.046511627906977, + "grad_norm": 0.4482311614474207, + "learning_rate": 8.77714144753075e-06, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1503133922815323, + "step": 3255, + "valid_targets_mean": 4343.8, + "valid_targets_min": 188 + }, + { + "epoch": 5.054263565891473, + "grad_norm": 0.41632602975114186, + "learning_rate": 8.713224601037116e-06, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11666247993707657, + "step": 3260, + "valid_targets_mean": 3756.2, + "valid_targets_min": 412 + }, + { + "epoch": 5.062015503875969, + "grad_norm": 0.4421325070582108, + "learning_rate": 8.649476454686734e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14591653645038605, + "step": 3265, + "valid_targets_mean": 4298.9, + "valid_targets_min": 2252 + }, + { + "epoch": 5.069767441860465, + "grad_norm": 0.3706889070551245, + "learning_rate": 8.58589796130457e-06, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1363983452320099, + "step": 3270, + "valid_targets_mean": 5321.2, + "valid_targets_min": 435 + }, + { + "epoch": 5.077519379844961, + "grad_norm": 0.42292771021367553, + "learning_rate": 8.522490071179834e-06, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1678886115550995, + "step": 3275, + "valid_targets_mean": 4751.9, + "valid_targets_min": 610 + }, + { + "epoch": 5.0852713178294575, + "grad_norm": 0.5828499776845351, + "learning_rate": 8.459253732051793e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17978990077972412, + "step": 3280, + "valid_targets_mean": 4170.8, + "valid_targets_min": 267 + }, + { + "epoch": 5.093023255813954, + "grad_norm": 0.4615372722971231, + "learning_rate": 8.396189889095581e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18241898715496063, + "step": 3285, + "valid_targets_mean": 4325.0, + "valid_targets_min": 1860 + }, + { + "epoch": 5.10077519379845, + "grad_norm": 0.3817650649549942, + "learning_rate": 8.333299484908081e-06, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1241084486246109, + "step": 3290, + "valid_targets_mean": 4798.9, + "valid_targets_min": 670 + }, + { + "epoch": 5.108527131782946, + "grad_norm": 0.4194147503687341, + "learning_rate": 8.27058345949384e-06, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14202088117599487, + "step": 3295, + "valid_targets_mean": 4392.2, + "valid_targets_min": 1724 + }, + { + "epoch": 5.116279069767442, + "grad_norm": 0.38881934176704125, + "learning_rate": 8.208042750251017e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12283352017402649, + "step": 3300, + "valid_targets_mean": 5001.8, + "valid_targets_min": 860 + }, + { + "epoch": 5.124031007751938, + "grad_norm": 0.41598556767284195, + "learning_rate": 8.14567829195737e-06, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12442460656166077, + "step": 3305, + "valid_targets_mean": 4178.7, + "valid_targets_min": 1866 + }, + { + "epoch": 5.131782945736434, + "grad_norm": 0.5128407487191675, + "learning_rate": 8.083491016756282e-06, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17859122157096863, + "step": 3310, + "valid_targets_mean": 3686.2, + "valid_targets_min": 761 + }, + { + "epoch": 5.1395348837209305, + "grad_norm": 0.3758183019008081, + "learning_rate": 8.021481854142843e-06, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14447617530822754, + "step": 3315, + "valid_targets_mean": 5073.4, + "valid_targets_min": 1295 + }, + { + "epoch": 5.147286821705427, + "grad_norm": 0.47801726631549907, + "learning_rate": 7.959651730949938e-06, + "loss": 0.14, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1533583104610443, + "step": 3320, + "valid_targets_mean": 3810.4, + "valid_targets_min": 688 + }, + { + "epoch": 5.155038759689923, + "grad_norm": 0.3860929841594393, + "learning_rate": 7.898001571334393e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14451035857200623, + "step": 3325, + "valid_targets_mean": 5514.4, + "valid_targets_min": 587 + }, + { + "epoch": 5.162790697674419, + "grad_norm": 0.3927336266773621, + "learning_rate": 7.836532296763195e-06, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14219003915786743, + "step": 3330, + "valid_targets_mean": 5720.4, + "valid_targets_min": 854 + }, + { + "epoch": 5.170542635658915, + "grad_norm": 0.4701871029698711, + "learning_rate": 7.775244825999674e-06, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13866694271564484, + "step": 3335, + "valid_targets_mean": 4240.4, + "valid_targets_min": 327 + }, + { + "epoch": 5.178294573643411, + "grad_norm": 0.5611394213445632, + "learning_rate": 7.7141400750898e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1594468057155609, + "step": 3340, + "valid_targets_mean": 3598.9, + "valid_targets_min": 332 + }, + { + "epoch": 5.186046511627907, + "grad_norm": 0.46697871764537624, + "learning_rate": 7.653218957348478e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17361919581890106, + "step": 3345, + "valid_targets_mean": 4588.7, + "valid_targets_min": 750 + }, + { + "epoch": 5.1937984496124034, + "grad_norm": 0.40704733172155627, + "learning_rate": 7.592482383345905e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16594815254211426, + "step": 3350, + "valid_targets_mean": 5162.1, + "valid_targets_min": 2503 + }, + { + "epoch": 5.2015503875969, + "grad_norm": 0.43347382037931187, + "learning_rate": 7.531931260893956e-06, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12546521425247192, + "step": 3355, + "valid_targets_mean": 4259.8, + "valid_targets_min": 2493 + }, + { + "epoch": 5.209302325581396, + "grad_norm": 0.451832191981657, + "learning_rate": 7.47156649503261e-06, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14870810508728027, + "step": 3360, + "valid_targets_mean": 4736.9, + "valid_targets_min": 2527 + }, + { + "epoch": 5.217054263565892, + "grad_norm": 0.5092134368319543, + "learning_rate": 7.411388988016444e-06, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18897545337677002, + "step": 3365, + "valid_targets_mean": 4291.3, + "valid_targets_min": 1979 + }, + { + "epoch": 5.224806201550388, + "grad_norm": 0.3992712621608886, + "learning_rate": 7.351399639301116e-06, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16864998638629913, + "step": 3370, + "valid_targets_mean": 5468.9, + "valid_targets_min": 2411 + }, + { + "epoch": 5.232558139534884, + "grad_norm": 0.43818725229100414, + "learning_rate": 7.291599345529936e-06, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14088185131549835, + "step": 3375, + "valid_targets_mean": 4397.0, + "valid_targets_min": 227 + }, + { + "epoch": 5.24031007751938, + "grad_norm": 0.42669853677221803, + "learning_rate": 7.231989000520485e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16974776983261108, + "step": 3380, + "valid_targets_mean": 5304.2, + "valid_targets_min": 818 + }, + { + "epoch": 5.248062015503876, + "grad_norm": 0.42231966887434225, + "learning_rate": 7.172569495251214e-06, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15044859051704407, + "step": 3385, + "valid_targets_mean": 4793.6, + "valid_targets_min": 893 + }, + { + "epoch": 5.2558139534883725, + "grad_norm": 0.3774758090043144, + "learning_rate": 7.113341717848154e-06, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14274142682552338, + "step": 3390, + "valid_targets_mean": 5430.0, + "valid_targets_min": 567 + }, + { + "epoch": 5.263565891472869, + "grad_norm": 0.4221322079184396, + "learning_rate": 7.05430655357163e-06, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1326403170824051, + "step": 3395, + "valid_targets_mean": 4851.8, + "valid_targets_min": 1855 + }, + { + "epoch": 5.271317829457364, + "grad_norm": 0.41865340457663663, + "learning_rate": 6.995464884803056e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15016970038414001, + "step": 3400, + "valid_targets_mean": 4839.2, + "valid_targets_min": 685 + }, + { + "epoch": 5.27906976744186, + "grad_norm": 0.37620910591252854, + "learning_rate": 6.936817591031704e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11961726099252701, + "step": 3405, + "valid_targets_mean": 5236.1, + "valid_targets_min": 263 + }, + { + "epoch": 5.286821705426356, + "grad_norm": 0.41396399132639294, + "learning_rate": 6.878365548841583e-06, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15676863491535187, + "step": 3410, + "valid_targets_mean": 5105.6, + "valid_targets_min": 434 + }, + { + "epoch": 5.294573643410852, + "grad_norm": 0.37430857448740984, + "learning_rate": 6.820109631898337e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13516280055046082, + "step": 3415, + "valid_targets_mean": 5407.8, + "valid_targets_min": 361 + }, + { + "epoch": 5.3023255813953485, + "grad_norm": 0.39609033417068207, + "learning_rate": 6.7620507109361876e-06, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11794918775558472, + "step": 3420, + "valid_targets_mean": 4567.0, + "valid_targets_min": 770 + }, + { + "epoch": 5.310077519379845, + "grad_norm": 0.6118599264333754, + "learning_rate": 6.704189653744897e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1839725375175476, + "step": 3425, + "valid_targets_mean": 4516.2, + "valid_targets_min": 484 + }, + { + "epoch": 5.317829457364341, + "grad_norm": 0.4381562079932628, + "learning_rate": 6.646527325156842e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16818967461585999, + "step": 3430, + "valid_targets_mean": 4886.8, + "valid_targets_min": 420 + }, + { + "epoch": 5.325581395348837, + "grad_norm": 0.4226204361999147, + "learning_rate": 6.5890645870340445e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1639600247144699, + "step": 3435, + "valid_targets_mean": 5103.1, + "valid_targets_min": 696 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 0.3710849263141366, + "learning_rate": 6.531802298255303e-06, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12949195504188538, + "step": 3440, + "valid_targets_mean": 4989.1, + "valid_targets_min": 2837 + }, + { + "epoch": 5.341085271317829, + "grad_norm": 0.4435129156035455, + "learning_rate": 6.474741314703359e-06, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15639318525791168, + "step": 3445, + "valid_targets_mean": 4721.8, + "valid_targets_min": 617 + }, + { + "epoch": 5.348837209302325, + "grad_norm": 0.3625818524554749, + "learning_rate": 6.417882489252123e-06, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12721361219882965, + "step": 3450, + "valid_targets_mean": 5639.6, + "valid_targets_min": 3016 + }, + { + "epoch": 5.3565891472868215, + "grad_norm": 0.4323434478569545, + "learning_rate": 6.361226671753881e-06, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1502252072095871, + "step": 3455, + "valid_targets_mean": 5072.4, + "valid_targets_min": 346 + }, + { + "epoch": 5.364341085271318, + "grad_norm": 0.37196863493668914, + "learning_rate": 6.304774709026633e-06, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12962721288204193, + "step": 3460, + "valid_targets_mean": 5211.2, + "valid_targets_min": 2412 + }, + { + "epoch": 5.372093023255814, + "grad_norm": 0.4392929005011265, + "learning_rate": 6.248527444841417e-06, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15446701645851135, + "step": 3465, + "valid_targets_mean": 4372.7, + "valid_targets_min": 2320 + }, + { + "epoch": 5.37984496124031, + "grad_norm": 0.45498046007325976, + "learning_rate": 6.1924857199097e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1651144027709961, + "step": 3470, + "valid_targets_mean": 4883.4, + "valid_targets_min": 936 + }, + { + "epoch": 5.387596899224806, + "grad_norm": 0.42317989191246896, + "learning_rate": 6.136650371870816e-06, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12391775101423264, + "step": 3475, + "valid_targets_mean": 6439.6, + "valid_targets_min": 2368 + }, + { + "epoch": 5.395348837209302, + "grad_norm": 0.4440482712927205, + "learning_rate": 6.081022235279439e-06, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13077440857887268, + "step": 3480, + "valid_targets_mean": 4186.4, + "valid_targets_min": 2293 + }, + { + "epoch": 5.403100775193798, + "grad_norm": 0.45472821818832726, + "learning_rate": 6.025602141593128e-06, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13112467527389526, + "step": 3485, + "valid_targets_mean": 4406.1, + "valid_targets_min": 454 + }, + { + "epoch": 5.410852713178294, + "grad_norm": 0.3962937912503978, + "learning_rate": 5.970390919159872e-06, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16544798016548157, + "step": 3490, + "valid_targets_mean": 5308.0, + "valid_targets_min": 797 + }, + { + "epoch": 5.4186046511627906, + "grad_norm": 0.3643941673945403, + "learning_rate": 5.915389393205717e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11564834415912628, + "step": 3495, + "valid_targets_mean": 4933.6, + "valid_targets_min": 573 + }, + { + "epoch": 5.426356589147287, + "grad_norm": 0.42998784565296494, + "learning_rate": 5.86059838582246e-06, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15233772993087769, + "step": 3500, + "valid_targets_mean": 4981.4, + "valid_targets_min": 269 + }, + { + "epoch": 5.434108527131783, + "grad_norm": 0.41822740876158127, + "learning_rate": 5.806018715955317e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1299438178539276, + "step": 3505, + "valid_targets_mean": 4537.4, + "valid_targets_min": 1080 + }, + { + "epoch": 5.441860465116279, + "grad_norm": 0.4528399940397294, + "learning_rate": 5.751651199390704e-06, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16579484939575195, + "step": 3510, + "valid_targets_mean": 4755.4, + "valid_targets_min": 2776 + }, + { + "epoch": 5.449612403100775, + "grad_norm": 0.4684592080080512, + "learning_rate": 5.697496648744063e-06, + "loss": 0.1353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.143550843000412, + "step": 3515, + "valid_targets_mean": 3519.6, + "valid_targets_min": 530 + }, + { + "epoch": 5.457364341085271, + "grad_norm": 0.45674225562117354, + "learning_rate": 5.643555873447677e-06, + "loss": 0.1408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14884954690933228, + "step": 3520, + "valid_targets_mean": 4183.1, + "valid_targets_min": 764 + }, + { + "epoch": 5.465116279069767, + "grad_norm": 0.4035257697649145, + "learning_rate": 5.589829679738597e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1421164870262146, + "step": 3525, + "valid_targets_mean": 5205.2, + "valid_targets_min": 2073 + }, + { + "epoch": 5.4728682170542635, + "grad_norm": 0.5117576530097852, + "learning_rate": 5.536318870646587e-06, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16778147220611572, + "step": 3530, + "valid_targets_mean": 5321.6, + "valid_targets_min": 639 + }, + { + "epoch": 5.48062015503876, + "grad_norm": 0.4104772923087062, + "learning_rate": 5.483024245982116e-06, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13928261399269104, + "step": 3535, + "valid_targets_mean": 4891.7, + "valid_targets_min": 410 + }, + { + "epoch": 5.488372093023256, + "grad_norm": 0.4268402728473486, + "learning_rate": 5.429946602324414e-06, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1458597481250763, + "step": 3540, + "valid_targets_mean": 4448.0, + "valid_targets_min": 277 + }, + { + "epoch": 5.496124031007752, + "grad_norm": 0.4715412702854216, + "learning_rate": 5.377086733009551e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14967858791351318, + "step": 3545, + "valid_targets_mean": 4361.0, + "valid_targets_min": 704 + }, + { + "epoch": 5.503875968992248, + "grad_norm": 0.5279261179055396, + "learning_rate": 5.3244454281185984e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20234504342079163, + "step": 3550, + "valid_targets_mean": 3873.4, + "valid_targets_min": 615 + }, + { + "epoch": 5.511627906976744, + "grad_norm": 0.507981142775861, + "learning_rate": 5.272023474465799e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1777733415365219, + "step": 3555, + "valid_targets_mean": 3797.2, + "valid_targets_min": 472 + }, + { + "epoch": 5.51937984496124, + "grad_norm": 0.3833421328022469, + "learning_rate": 5.219821655586821e-06, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16572965681552887, + "step": 3560, + "valid_targets_mean": 5526.5, + "valid_targets_min": 551 + }, + { + "epoch": 5.5271317829457365, + "grad_norm": 0.43394338581146247, + "learning_rate": 5.167840751727034e-06, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15046624839305878, + "step": 3565, + "valid_targets_mean": 4108.6, + "valid_targets_min": 536 + }, + { + "epoch": 5.534883720930233, + "grad_norm": 0.40622205843630427, + "learning_rate": 5.116081539829874e-06, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16226355731487274, + "step": 3570, + "valid_targets_mean": 6044.9, + "valid_targets_min": 1779 + }, + { + "epoch": 5.542635658914729, + "grad_norm": 0.4029911479453827, + "learning_rate": 5.064544793525192e-06, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14648669958114624, + "step": 3575, + "valid_targets_mean": 5268.5, + "valid_targets_min": 249 + }, + { + "epoch": 5.550387596899225, + "grad_norm": 0.5364248446568755, + "learning_rate": 5.013231283117723e-06, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14924436807632446, + "step": 3580, + "valid_targets_mean": 4603.7, + "valid_targets_min": 2372 + }, + { + "epoch": 5.558139534883721, + "grad_norm": 0.40849395379526177, + "learning_rate": 4.962141775575553e-06, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340319961309433, + "step": 3585, + "valid_targets_mean": 4452.3, + "valid_targets_min": 2157 + }, + { + "epoch": 5.565891472868217, + "grad_norm": 0.3507387441886511, + "learning_rate": 4.9112770345186664e-06, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10928727686405182, + "step": 3590, + "valid_targets_mean": 5409.7, + "valid_targets_min": 2109 + }, + { + "epoch": 5.573643410852713, + "grad_norm": 0.4457364584152074, + "learning_rate": 4.8606378202075215e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13773344457149506, + "step": 3595, + "valid_targets_mean": 4546.3, + "valid_targets_min": 327 + }, + { + "epoch": 5.5813953488372094, + "grad_norm": 0.4973298360537683, + "learning_rate": 4.81022488953171e-06, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1543937474489212, + "step": 3600, + "valid_targets_mean": 4128.4, + "valid_targets_min": 349 + }, + { + "epoch": 5.589147286821706, + "grad_norm": 0.36859713218728013, + "learning_rate": 4.760038995998612e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14406907558441162, + "step": 3605, + "valid_targets_mean": 5601.7, + "valid_targets_min": 2213 + }, + { + "epoch": 5.596899224806202, + "grad_norm": 0.3775608828670771, + "learning_rate": 4.710080889722157e-06, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11484645307064056, + "step": 3610, + "valid_targets_mean": 4418.9, + "valid_targets_min": 781 + }, + { + "epoch": 5.604651162790698, + "grad_norm": 0.5585581576269192, + "learning_rate": 4.660351317411598e-06, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16293855011463165, + "step": 3615, + "valid_targets_mean": 3673.5, + "valid_targets_min": 853 + }, + { + "epoch": 5.612403100775194, + "grad_norm": 0.40317305487140387, + "learning_rate": 4.610851022360372e-06, + "loss": 0.1472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16022725403308868, + "step": 3620, + "valid_targets_mean": 5046.8, + "valid_targets_min": 719 + }, + { + "epoch": 5.62015503875969, + "grad_norm": 0.46629312553258945, + "learning_rate": 4.561580744434962e-06, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16484293341636658, + "step": 3625, + "valid_targets_mean": 4345.7, + "valid_targets_min": 719 + }, + { + "epoch": 5.627906976744186, + "grad_norm": 0.4517994866451772, + "learning_rate": 4.512541220063849e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1768956333398819, + "step": 3630, + "valid_targets_mean": 5144.0, + "valid_targets_min": 1570 + }, + { + "epoch": 5.635658914728682, + "grad_norm": 0.4266612314681182, + "learning_rate": 4.463733182226526e-06, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12670007348060608, + "step": 3635, + "valid_targets_mean": 4100.2, + "valid_targets_min": 2116 + }, + { + "epoch": 5.6434108527131785, + "grad_norm": 0.4680346659273191, + "learning_rate": 4.415157360442517e-06, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17845088243484497, + "step": 3640, + "valid_targets_mean": 4356.6, + "valid_targets_min": 322 + }, + { + "epoch": 5.651162790697675, + "grad_norm": 0.3890028520850903, + "learning_rate": 4.366814480760461e-06, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12099163234233856, + "step": 3645, + "valid_targets_mean": 5003.2, + "valid_targets_min": 2596 + }, + { + "epoch": 5.658914728682171, + "grad_norm": 0.5488303595169921, + "learning_rate": 4.318705265747298e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1828697919845581, + "step": 3650, + "valid_targets_mean": 3999.2, + "valid_targets_min": 528 + }, + { + "epoch": 5.666666666666667, + "grad_norm": 0.42060572333330015, + "learning_rate": 4.2708304344774554e-06, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13645608723163605, + "step": 3655, + "valid_targets_mean": 4687.2, + "valid_targets_min": 2741 + }, + { + "epoch": 5.674418604651163, + "grad_norm": 0.4387274771557827, + "learning_rate": 4.223190702522084e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14829561114311218, + "step": 3660, + "valid_targets_mean": 4422.6, + "valid_targets_min": 757 + }, + { + "epoch": 5.682170542635659, + "grad_norm": 0.38684240070794124, + "learning_rate": 4.1757867819383695e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15705367922782898, + "step": 3665, + "valid_targets_mean": 5591.4, + "valid_targets_min": 2183 + }, + { + "epoch": 5.689922480620155, + "grad_norm": 0.3924859326932451, + "learning_rate": 4.128619381258914e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1718716323375702, + "step": 3670, + "valid_targets_mean": 5566.4, + "valid_targets_min": 344 + }, + { + "epoch": 5.6976744186046515, + "grad_norm": 0.4255325828911711, + "learning_rate": 4.081689205481103e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15730968117713928, + "step": 3675, + "valid_targets_mean": 4940.1, + "valid_targets_min": 2527 + }, + { + "epoch": 5.705426356589148, + "grad_norm": 0.4523673615926739, + "learning_rate": 4.034996956056596e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17630687355995178, + "step": 3680, + "valid_targets_mean": 4730.1, + "valid_targets_min": 836 + }, + { + "epoch": 5.713178294573644, + "grad_norm": 0.3996998930591703, + "learning_rate": 3.988543330880852e-06, + "loss": 0.1287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12216873466968536, + "step": 3685, + "valid_targets_mean": 4865.8, + "valid_targets_min": 2765 + }, + { + "epoch": 5.720930232558139, + "grad_norm": 0.47040209067719235, + "learning_rate": 3.942329024282665e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16137559711933136, + "step": 3690, + "valid_targets_mean": 4531.0, + "valid_targets_min": 730 + }, + { + "epoch": 5.728682170542635, + "grad_norm": 0.3630897503273467, + "learning_rate": 3.896354727013811e-06, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12590524554252625, + "step": 3695, + "valid_targets_mean": 5248.6, + "valid_targets_min": 2452 + }, + { + "epoch": 5.736434108527131, + "grad_norm": 0.4715032977447836, + "learning_rate": 3.850621126238716e-06, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15132960677146912, + "step": 3700, + "valid_targets_mean": 3941.0, + "valid_targets_min": 325 + }, + { + "epoch": 5.7441860465116275, + "grad_norm": 0.4654118997053404, + "learning_rate": 3.8051289055241825e-06, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1751362681388855, + "step": 3705, + "valid_targets_mean": 4353.2, + "valid_targets_min": 2659 + }, + { + "epoch": 5.751937984496124, + "grad_norm": 0.5227568375922278, + "learning_rate": 3.759878744829182e-06, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18965643644332886, + "step": 3710, + "valid_targets_mean": 4459.4, + "valid_targets_min": 733 + }, + { + "epoch": 5.75968992248062, + "grad_norm": 0.4609905003497403, + "learning_rate": 3.7148713204946752e-06, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1515987515449524, + "step": 3715, + "valid_targets_mean": 4371.8, + "valid_targets_min": 621 + }, + { + "epoch": 5.767441860465116, + "grad_norm": 0.3918991200592125, + "learning_rate": 3.67010730523353e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12248925864696503, + "step": 3720, + "valid_targets_mean": 4360.6, + "valid_targets_min": 370 + }, + { + "epoch": 5.775193798449612, + "grad_norm": 0.467773056692704, + "learning_rate": 3.6255873681204332e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17771819233894348, + "step": 3725, + "valid_targets_mean": 4304.9, + "valid_targets_min": 288 + }, + { + "epoch": 5.782945736434108, + "grad_norm": 0.377912933539592, + "learning_rate": 3.5813121745819144e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14115823805332184, + "step": 3730, + "valid_targets_mean": 4955.0, + "valid_targets_min": 285 + }, + { + "epoch": 5.790697674418604, + "grad_norm": 0.3996511259289835, + "learning_rate": 3.537282386386387e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13390158116817474, + "step": 3735, + "valid_targets_mean": 4518.8, + "valid_targets_min": 339 + }, + { + "epoch": 5.7984496124031, + "grad_norm": 0.3701488362865712, + "learning_rate": 3.4934986616342737e-06, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14555737376213074, + "step": 3740, + "valid_targets_mean": 5657.8, + "valid_targets_min": 2347 + }, + { + "epoch": 5.8062015503875966, + "grad_norm": 0.45199830351728787, + "learning_rate": 3.449961654748146e-06, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.150042325258255, + "step": 3745, + "valid_targets_mean": 4127.3, + "valid_targets_min": 2290 + }, + { + "epoch": 5.813953488372093, + "grad_norm": 0.4369336909116495, + "learning_rate": 3.406672016462964e-06, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13401828706264496, + "step": 3750, + "valid_targets_mean": 4274.8, + "valid_targets_min": 1856 + }, + { + "epoch": 5.821705426356589, + "grad_norm": 0.4834110812261461, + "learning_rate": 3.363630393816335e-06, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15827834606170654, + "step": 3755, + "valid_targets_mean": 3873.9, + "valid_targets_min": 2432 + }, + { + "epoch": 5.829457364341085, + "grad_norm": 0.4597445261763014, + "learning_rate": 3.3208374301388504e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17994539439678192, + "step": 3760, + "valid_targets_mean": 4758.0, + "valid_targets_min": 1984 + }, + { + "epoch": 5.837209302325581, + "grad_norm": 0.42360510165536924, + "learning_rate": 3.278293765044469e-06, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13492318987846375, + "step": 3765, + "valid_targets_mean": 4426.1, + "valid_targets_min": 233 + }, + { + "epoch": 5.844961240310077, + "grad_norm": 0.48922365457669503, + "learning_rate": 3.236000034420963e-06, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14569556713104248, + "step": 3770, + "valid_targets_mean": 3687.8, + "valid_targets_min": 242 + }, + { + "epoch": 5.852713178294573, + "grad_norm": 0.4033773146053126, + "learning_rate": 3.193956870420396e-06, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18744111061096191, + "step": 3775, + "valid_targets_mean": 5528.1, + "valid_targets_min": 611 + }, + { + "epoch": 5.8604651162790695, + "grad_norm": 0.34857475686830064, + "learning_rate": 3.1521649014496925e-06, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13359975814819336, + "step": 3780, + "valid_targets_mean": 5863.0, + "valid_targets_min": 2553 + }, + { + "epoch": 5.868217054263566, + "grad_norm": 0.4313376511492457, + "learning_rate": 3.110624752161229e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13781484961509705, + "step": 3785, + "valid_targets_mean": 5246.6, + "valid_targets_min": 578 + }, + { + "epoch": 5.875968992248062, + "grad_norm": 0.35972262523748805, + "learning_rate": 3.069337043443523e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1296289563179016, + "step": 3790, + "valid_targets_mean": 5488.9, + "valid_targets_min": 1966 + }, + { + "epoch": 5.883720930232558, + "grad_norm": 0.46395239595319404, + "learning_rate": 3.0283023924119236e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1438305675983429, + "step": 3795, + "valid_targets_mean": 5045.4, + "valid_targets_min": 548 + }, + { + "epoch": 5.891472868217054, + "grad_norm": 0.4216115298877531, + "learning_rate": 2.9875214123993967e-06, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13032379746437073, + "step": 3800, + "valid_targets_mean": 5036.0, + "valid_targets_min": 513 + }, + { + "epoch": 5.89922480620155, + "grad_norm": 0.369645204601132, + "learning_rate": 2.94699471294738e-06, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10359348356723785, + "step": 3805, + "valid_targets_mean": 4782.0, + "valid_targets_min": 395 + }, + { + "epoch": 5.906976744186046, + "grad_norm": 0.4329223252021753, + "learning_rate": 2.9067228997966347e-06, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14920011162757874, + "step": 3810, + "valid_targets_mean": 4213.8, + "valid_targets_min": 265 + }, + { + "epoch": 5.9147286821705425, + "grad_norm": 0.38061473387366296, + "learning_rate": 2.8667065748782177e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11807483434677124, + "step": 3815, + "valid_targets_mean": 4682.1, + "valid_targets_min": 2866 + }, + { + "epoch": 5.922480620155039, + "grad_norm": 0.4630422264492311, + "learning_rate": 2.826946336304481e-06, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1536155343055725, + "step": 3820, + "valid_targets_mean": 4259.2, + "valid_targets_min": 826 + }, + { + "epoch": 5.930232558139535, + "grad_norm": 0.4336604749432643, + "learning_rate": 2.7874427783601234e-06, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13880223035812378, + "step": 3825, + "valid_targets_mean": 4519.3, + "valid_targets_min": 609 + }, + { + "epoch": 5.937984496124031, + "grad_norm": 0.5243528481538801, + "learning_rate": 2.7481964914933157e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17028310894966125, + "step": 3830, + "valid_targets_mean": 3623.1, + "valid_targets_min": 591 + }, + { + "epoch": 5.945736434108527, + "grad_norm": 0.4789980300582059, + "learning_rate": 2.7092080623068674e-06, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756812334060669, + "step": 3835, + "valid_targets_mean": 4744.0, + "valid_targets_min": 772 + }, + { + "epoch": 5.953488372093023, + "grad_norm": 0.45611253393283935, + "learning_rate": 2.6704780735494784e-06, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1426219344139099, + "step": 3840, + "valid_targets_mean": 4248.6, + "valid_targets_min": 525 + }, + { + "epoch": 5.961240310077519, + "grad_norm": 0.3857782088985867, + "learning_rate": 2.6320071041070018e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11612827330827713, + "step": 3845, + "valid_targets_mean": 4773.6, + "valid_targets_min": 2511 + }, + { + "epoch": 5.9689922480620154, + "grad_norm": 0.4166115486481006, + "learning_rate": 2.593795728993804e-06, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18450096249580383, + "step": 3850, + "valid_targets_mean": 5148.2, + "valid_targets_min": 2061 + }, + { + "epoch": 5.976744186046512, + "grad_norm": 0.4335144952819728, + "learning_rate": 2.555844519344186e-06, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622152328491211, + "step": 3855, + "valid_targets_mean": 5090.6, + "valid_targets_min": 790 + }, + { + "epoch": 5.984496124031008, + "grad_norm": 0.3565273610492274, + "learning_rate": 2.518154042403813e-06, + "loss": 0.1408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12373974919319153, + "step": 3860, + "valid_targets_mean": 5277.1, + "valid_targets_min": 2691 + }, + { + "epoch": 5.992248062015504, + "grad_norm": 0.36972590062662825, + "learning_rate": 2.480724861521264e-06, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10760015994310379, + "step": 3865, + "valid_targets_mean": 4705.8, + "valid_targets_min": 2256 + }, + { + "epoch": 6.0, + "grad_norm": 0.3774316986540542, + "learning_rate": 2.443557536139598e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13823938369750977, + "step": 3870, + "valid_targets_mean": 5121.6, + "valid_targets_min": 2951 + }, + { + "epoch": 6.007751937984496, + "grad_norm": 0.4491491812525188, + "learning_rate": 2.406652621787999e-06, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1544957160949707, + "step": 3875, + "valid_targets_mean": 3907.7, + "valid_targets_min": 472 + }, + { + "epoch": 6.015503875968992, + "grad_norm": 0.39058530830234023, + "learning_rate": 2.3700106700734705e-06, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11147475242614746, + "step": 3880, + "valid_targets_mean": 4658.1, + "valid_targets_min": 412 + }, + { + "epoch": 6.023255813953488, + "grad_norm": 0.4764404360508427, + "learning_rate": 2.3336322286725823e-06, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1638670563697815, + "step": 3885, + "valid_targets_mean": 3920.1, + "valid_targets_min": 792 + }, + { + "epoch": 6.0310077519379846, + "grad_norm": 0.41031515357008064, + "learning_rate": 2.2975178413233047e-06, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11806363612413406, + "step": 3890, + "valid_targets_mean": 4190.1, + "valid_targets_min": 2164 + }, + { + "epoch": 6.038759689922481, + "grad_norm": 0.4080067588790369, + "learning_rate": 2.261668047816863e-06, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16606493294239044, + "step": 3895, + "valid_targets_mean": 5487.4, + "valid_targets_min": 272 + }, + { + "epoch": 6.046511627906977, + "grad_norm": 0.4182771476362144, + "learning_rate": 2.226083383989668e-06, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12919700145721436, + "step": 3900, + "valid_targets_mean": 4410.3, + "valid_targets_min": 453 + }, + { + "epoch": 6.054263565891473, + "grad_norm": 0.465578156889018, + "learning_rate": 2.1907643817153313e-06, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1524408608675003, + "step": 3905, + "valid_targets_mean": 4006.7, + "valid_targets_min": 402 + }, + { + "epoch": 6.062015503875969, + "grad_norm": 0.49003831755013766, + "learning_rate": 2.1557115688966835e-06, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15983235836029053, + "step": 3910, + "valid_targets_mean": 3748.0, + "valid_targets_min": 1984 + }, + { + "epoch": 6.069767441860465, + "grad_norm": 0.4793985294195704, + "learning_rate": 2.120925469457904e-06, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1331721544265747, + "step": 3915, + "valid_targets_mean": 3765.6, + "valid_targets_min": 1989 + }, + { + "epoch": 6.077519379844961, + "grad_norm": 0.4857730761757885, + "learning_rate": 2.086406603336686e-06, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.223957359790802, + "step": 3920, + "valid_targets_mean": 4345.2, + "valid_targets_min": 487 + }, + { + "epoch": 6.0852713178294575, + "grad_norm": 0.38478031129260776, + "learning_rate": 2.052155486476466e-06, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.141106978058815, + "step": 3925, + "valid_targets_mean": 5665.4, + "valid_targets_min": 1766 + }, + { + "epoch": 6.093023255813954, + "grad_norm": 0.572101341083877, + "learning_rate": 2.0181726308187044e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16148856282234192, + "step": 3930, + "valid_targets_mean": 3327.9, + "valid_targets_min": 393 + }, + { + "epoch": 6.10077519379845, + "grad_norm": 0.5702565613060158, + "learning_rate": 1.984458544295247e-06, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15981945395469666, + "step": 3935, + "valid_targets_mean": 3374.9, + "valid_targets_min": 665 + }, + { + "epoch": 6.108527131782946, + "grad_norm": 0.41153314259954854, + "learning_rate": 1.9510137308207187e-06, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14380812644958496, + "step": 3940, + "valid_targets_mean": 4357.6, + "valid_targets_min": 343 + }, + { + "epoch": 6.116279069767442, + "grad_norm": 0.42473098729111763, + "learning_rate": 1.9178386902850033e-06, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12821288406848907, + "step": 3945, + "valid_targets_mean": 3923.3, + "valid_targets_min": 266 + }, + { + "epoch": 6.124031007751938, + "grad_norm": 0.4233818856902045, + "learning_rate": 1.884933918545766e-06, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14326541125774384, + "step": 3950, + "valid_targets_mean": 4490.9, + "valid_targets_min": 513 + }, + { + "epoch": 6.131782945736434, + "grad_norm": 0.4075534532593769, + "learning_rate": 1.8522999074210357e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13676217198371887, + "step": 3955, + "valid_targets_mean": 5278.7, + "valid_targets_min": 773 + }, + { + "epoch": 6.1395348837209305, + "grad_norm": 0.4831447381224601, + "learning_rate": 1.819937144681876e-06, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14638128876686096, + "step": 3960, + "valid_targets_mean": 4841.4, + "valid_targets_min": 1843 + }, + { + "epoch": 6.147286821705427, + "grad_norm": 0.5692373377592979, + "learning_rate": 1.7878461140450709e-06, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17473667860031128, + "step": 3965, + "valid_targets_mean": 3916.5, + "valid_targets_min": 1954 + }, + { + "epoch": 6.155038759689923, + "grad_norm": 0.4411787512540076, + "learning_rate": 1.7560272951659007e-06, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16196417808532715, + "step": 3970, + "valid_targets_mean": 4841.2, + "valid_targets_min": 681 + }, + { + "epoch": 6.162790697674419, + "grad_norm": 0.4144797822314153, + "learning_rate": 1.7244811636309865e-06, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12838700413703918, + "step": 3975, + "valid_targets_mean": 5786.5, + "valid_targets_min": 536 + }, + { + "epoch": 6.170542635658915, + "grad_norm": 0.4579031301080518, + "learning_rate": 1.693208190951159e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1469820737838745, + "step": 3980, + "valid_targets_mean": 4262.1, + "valid_targets_min": 2558 + }, + { + "epoch": 6.178294573643411, + "grad_norm": 0.35185050016875424, + "learning_rate": 1.6622088445544337e-06, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.129560187458992, + "step": 3985, + "valid_targets_mean": 5511.1, + "valid_targets_min": 710 + }, + { + "epoch": 6.186046511627907, + "grad_norm": 0.41851723149449205, + "learning_rate": 1.6314835877790035e-06, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15272796154022217, + "step": 3990, + "valid_targets_mean": 4655.8, + "valid_targets_min": 332 + }, + { + "epoch": 6.1937984496124034, + "grad_norm": 0.40076139878422207, + "learning_rate": 1.6010328798663332e-06, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12933820486068726, + "step": 3995, + "valid_targets_mean": 5362.1, + "valid_targets_min": 3053 + }, + { + "epoch": 6.2015503875969, + "grad_norm": 0.45341665170317824, + "learning_rate": 1.570857175954279e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12798666954040527, + "step": 4000, + "valid_targets_mean": 4509.7, + "valid_targets_min": 489 + }, + { + "epoch": 6.209302325581396, + "grad_norm": 0.3607620367511857, + "learning_rate": 1.540956927070294e-06, + "loss": 0.128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11347877979278564, + "step": 4005, + "valid_targets_mean": 5619.4, + "valid_targets_min": 2424 + }, + { + "epoch": 6.217054263565892, + "grad_norm": 0.5026070546003747, + "learning_rate": 1.5113325801246914e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18720689415931702, + "step": 4010, + "valid_targets_mean": 4427.4, + "valid_targets_min": 1430 + }, + { + "epoch": 6.224806201550388, + "grad_norm": 0.43575139062965373, + "learning_rate": 1.4819845779039477e-06, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1679738312959671, + "step": 4015, + "valid_targets_mean": 5928.5, + "valid_targets_min": 808 + }, + { + "epoch": 6.232558139534884, + "grad_norm": 0.42143605193627004, + "learning_rate": 1.4529133590641009e-06, + "loss": 0.1343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13898807764053345, + "step": 4020, + "valid_targets_mean": 4824.2, + "valid_targets_min": 1828 + }, + { + "epoch": 6.24031007751938, + "grad_norm": 0.4220444192180161, + "learning_rate": 1.4241193581241964e-06, + "loss": 0.1369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12464825809001923, + "step": 4025, + "valid_targets_mean": 4312.4, + "valid_targets_min": 280 + }, + { + "epoch": 6.248062015503876, + "grad_norm": 0.4246528594263321, + "learning_rate": 1.395603005459769e-06, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11180758476257324, + "step": 4030, + "valid_targets_mean": 5064.3, + "valid_targets_min": 268 + }, + { + "epoch": 6.2558139534883725, + "grad_norm": 0.4601989305033969, + "learning_rate": 1.367364727296434e-06, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17486141622066498, + "step": 4035, + "valid_targets_mean": 4553.9, + "valid_targets_min": 455 + }, + { + "epoch": 6.263565891472869, + "grad_norm": 0.41743085880863806, + "learning_rate": 1.3394049457035063e-06, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11269478499889374, + "step": 4040, + "valid_targets_mean": 4022.4, + "valid_targets_min": 396 + }, + { + "epoch": 6.271317829457364, + "grad_norm": 0.3957905593683059, + "learning_rate": 1.3117240785876994e-06, + "loss": 0.1397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1279354691505432, + "step": 4045, + "valid_targets_mean": 4760.3, + "valid_targets_min": 573 + }, + { + "epoch": 6.27906976744186, + "grad_norm": 0.46939606376712706, + "learning_rate": 1.2843225396868707e-06, + "loss": 0.1278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14345014095306396, + "step": 4050, + "valid_targets_mean": 3840.0, + "valid_targets_min": 977 + }, + { + "epoch": 6.286821705426356, + "grad_norm": 0.45118172267262807, + "learning_rate": 1.2572007385638264e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13466718792915344, + "step": 4055, + "valid_targets_mean": 5589.4, + "valid_targets_min": 2910 + }, + { + "epoch": 6.294573643410852, + "grad_norm": 0.47093501864092696, + "learning_rate": 1.2303590806002341e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17266318202018738, + "step": 4060, + "valid_targets_mean": 4088.8, + "valid_targets_min": 1934 + }, + { + "epoch": 6.3023255813953485, + "grad_norm": 0.43593257987581496, + "learning_rate": 1.2037979669905341e-06, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1565391570329666, + "step": 4065, + "valid_targets_mean": 5561.3, + "valid_targets_min": 2171 + }, + { + "epoch": 6.310077519379845, + "grad_norm": 0.38442258109857474, + "learning_rate": 1.1775177947359472e-06, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.120355024933815, + "step": 4070, + "valid_targets_mean": 5508.9, + "valid_targets_min": 600 + }, + { + "epoch": 6.317829457364341, + "grad_norm": 0.5116245670615639, + "learning_rate": 1.1515189566385598e-06, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15075933933258057, + "step": 4075, + "valid_targets_mean": 3669.8, + "valid_targets_min": 289 + }, + { + "epoch": 6.325581395348837, + "grad_norm": 0.519726609280829, + "learning_rate": 1.125801841295422e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1663362681865692, + "step": 4080, + "valid_targets_mean": 3507.8, + "valid_targets_min": 755 + }, + { + "epoch": 6.333333333333333, + "grad_norm": 0.5066281048339958, + "learning_rate": 1.1003668330927674e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17226609587669373, + "step": 4085, + "valid_targets_mean": 3913.7, + "valid_targets_min": 891 + }, + { + "epoch": 6.341085271317829, + "grad_norm": 0.4168253714412732, + "learning_rate": 1.0752143122002502e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13902395963668823, + "step": 4090, + "valid_targets_mean": 4731.2, + "valid_targets_min": 310 + }, + { + "epoch": 6.348837209302325, + "grad_norm": 0.3943285404063543, + "learning_rate": 1.0503446545652718e-06, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13005225360393524, + "step": 4095, + "valid_targets_mean": 4982.7, + "valid_targets_min": 575 + }, + { + "epoch": 6.3565891472868215, + "grad_norm": 0.4120867129337237, + "learning_rate": 1.0257582319073612e-06, + "loss": 0.1276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11511364579200745, + "step": 4100, + "valid_targets_mean": 4254.6, + "valid_targets_min": 869 + }, + { + "epoch": 6.364341085271318, + "grad_norm": 0.44941410097724094, + "learning_rate": 1.001455411712613e-06, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14647594094276428, + "step": 4105, + "valid_targets_mean": 4231.6, + "valid_targets_min": 709 + }, + { + "epoch": 6.372093023255814, + "grad_norm": 0.38228475170706705, + "learning_rate": 9.774365572281973e-07, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12529915571212769, + "step": 4110, + "valid_targets_mean": 5377.4, + "valid_targets_min": 2890 + }, + { + "epoch": 6.37984496124031, + "grad_norm": 0.40870796264529896, + "learning_rate": 9.537020274569376e-07, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1220163106918335, + "step": 4115, + "valid_targets_mean": 4960.4, + "valid_targets_min": 617 + }, + { + "epoch": 6.387596899224806, + "grad_norm": 0.4785371009759146, + "learning_rate": 9.302521771519291e-07, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15229693055152893, + "step": 4120, + "valid_targets_mean": 4179.5, + "valid_targets_min": 794 + }, + { + "epoch": 6.395348837209302, + "grad_norm": 0.5482438722247103, + "learning_rate": 9.070873568112537e-07, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20323488116264343, + "step": 4125, + "valid_targets_mean": 3866.2, + "valid_targets_min": 528 + }, + { + "epoch": 6.403100775193798, + "grad_norm": 0.34393102511247997, + "learning_rate": 8.842079126727365e-07, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17043393850326538, + "step": 4130, + "valid_targets_mean": 8063.3, + "valid_targets_min": 1395 + }, + { + "epoch": 6.410852713178294, + "grad_norm": 0.48778735386271366, + "learning_rate": 8.616141867087569e-07, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1254102885723114, + "step": 4135, + "valid_targets_mean": 4153.1, + "valid_targets_min": 497 + }, + { + "epoch": 6.4186046511627906, + "grad_norm": 0.42788835357838334, + "learning_rate": 8.393065166211544e-07, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1304156631231308, + "step": 4140, + "valid_targets_mean": 4457.7, + "valid_targets_min": 1134 + }, + { + "epoch": 6.426356589147287, + "grad_norm": 0.39619956994694927, + "learning_rate": 8.172852358361782e-07, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729072630405426, + "step": 4145, + "valid_targets_mean": 5565.4, + "valid_targets_min": 629 + }, + { + "epoch": 6.434108527131783, + "grad_norm": 0.4776261582174204, + "learning_rate": 7.955506734994922e-07, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17891857028007507, + "step": 4150, + "valid_targets_mean": 4085.9, + "valid_targets_min": 482 + }, + { + "epoch": 6.441860465116279, + "grad_norm": 0.4278671968152442, + "learning_rate": 7.741031544712663e-07, + "loss": 0.1358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12771019339561462, + "step": 4155, + "valid_targets_mean": 4428.9, + "valid_targets_min": 479 + }, + { + "epoch": 6.449612403100775, + "grad_norm": 0.37776538158237155, + "learning_rate": 7.529429993213222e-07, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12023401260375977, + "step": 4160, + "valid_targets_mean": 5098.1, + "valid_targets_min": 994 + }, + { + "epoch": 6.457364341085271, + "grad_norm": 0.43694272212869895, + "learning_rate": 7.320705243243287e-07, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14688435196876526, + "step": 4165, + "valid_targets_mean": 5080.8, + "valid_targets_min": 675 + }, + { + "epoch": 6.465116279069767, + "grad_norm": 0.4225966998298987, + "learning_rate": 7.114860414550917e-07, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14561529457569122, + "step": 4170, + "valid_targets_mean": 5137.9, + "valid_targets_min": 2089 + }, + { + "epoch": 6.4728682170542635, + "grad_norm": 0.4688876624797445, + "learning_rate": 6.911898583838738e-07, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15033042430877686, + "step": 4175, + "valid_targets_mean": 4029.8, + "valid_targets_min": 661 + }, + { + "epoch": 6.48062015503876, + "grad_norm": 0.4340667422133109, + "learning_rate": 6.711822784718158e-07, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15977023541927338, + "step": 4180, + "valid_targets_mean": 4863.2, + "valid_targets_min": 2046 + }, + { + "epoch": 6.488372093023256, + "grad_norm": 0.4569382782087973, + "learning_rate": 6.5146360076638e-07, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1681194007396698, + "step": 4185, + "valid_targets_mean": 4351.9, + "valid_targets_min": 315 + }, + { + "epoch": 6.496124031007752, + "grad_norm": 0.4554589036178452, + "learning_rate": 6.320341199968982e-07, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15873485803604126, + "step": 4190, + "valid_targets_mean": 4564.2, + "valid_targets_min": 658 + }, + { + "epoch": 6.503875968992248, + "grad_norm": 0.49218409403445584, + "learning_rate": 6.128941265701627e-07, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1481098085641861, + "step": 4195, + "valid_targets_mean": 3601.8, + "valid_targets_min": 352 + }, + { + "epoch": 6.511627906976744, + "grad_norm": 0.4096283466727092, + "learning_rate": 5.94043906566073e-07, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13861069083213806, + "step": 4200, + "valid_targets_mean": 4857.5, + "valid_targets_min": 326 + }, + { + "epoch": 6.51937984496124, + "grad_norm": 0.46885200416042144, + "learning_rate": 5.754837417333781e-07, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14652873575687408, + "step": 4205, + "valid_targets_mean": 3894.9, + "valid_targets_min": 906 + }, + { + "epoch": 6.5271317829457365, + "grad_norm": 0.3885968728101716, + "learning_rate": 5.572139094854478e-07, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12030985951423645, + "step": 4210, + "valid_targets_mean": 4954.5, + "valid_targets_min": 2183 + }, + { + "epoch": 6.534883720930233, + "grad_norm": 0.37139089742268966, + "learning_rate": 5.392346828961504e-07, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11080891638994217, + "step": 4215, + "valid_targets_mean": 5050.3, + "valid_targets_min": 298 + }, + { + "epoch": 6.542635658914729, + "grad_norm": 0.3853585068404549, + "learning_rate": 5.215463306957436e-07, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13822655379772186, + "step": 4220, + "valid_targets_mean": 5455.0, + "valid_targets_min": 2416 + }, + { + "epoch": 6.550387596899225, + "grad_norm": 0.4726856345826331, + "learning_rate": 5.0414911726687e-07, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22890380024909973, + "step": 4225, + "valid_targets_mean": 5294.4, + "valid_targets_min": 781 + }, + { + "epoch": 6.558139534883721, + "grad_norm": 0.4031349839399349, + "learning_rate": 4.870433026406152e-07, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13684213161468506, + "step": 4230, + "valid_targets_mean": 4680.8, + "valid_targets_min": 2718 + }, + { + "epoch": 6.565891472868217, + "grad_norm": 0.48960339790783236, + "learning_rate": 4.7022914249260197e-07, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440485715866089, + "step": 4235, + "valid_targets_mean": 4090.7, + "valid_targets_min": 2062 + }, + { + "epoch": 6.573643410852713, + "grad_norm": 0.48029936611780594, + "learning_rate": 4.537068881391848e-07, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15450087189674377, + "step": 4240, + "valid_targets_mean": 3792.6, + "valid_targets_min": 2354 + }, + { + "epoch": 6.5813953488372094, + "grad_norm": 0.411026213138413, + "learning_rate": 4.37476786533686e-07, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11528407782316208, + "step": 4245, + "valid_targets_mean": 5034.7, + "valid_targets_min": 682 + }, + { + "epoch": 6.589147286821706, + "grad_norm": 0.3613114304130289, + "learning_rate": 4.2153908026270504e-07, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11450152099132538, + "step": 4250, + "valid_targets_mean": 5106.2, + "valid_targets_min": 3567 + }, + { + "epoch": 6.596899224806202, + "grad_norm": 0.49968357376260497, + "learning_rate": 4.058940075424933e-07, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1560279130935669, + "step": 4255, + "valid_targets_mean": 4061.5, + "valid_targets_min": 2069 + }, + { + "epoch": 6.604651162790698, + "grad_norm": 0.46911068601775274, + "learning_rate": 3.9054180221539394e-07, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13993695378303528, + "step": 4260, + "valid_targets_mean": 3780.2, + "valid_targets_min": 790 + }, + { + "epoch": 6.612403100775194, + "grad_norm": 0.3917124798281757, + "learning_rate": 3.7548269374634736e-07, + "loss": 0.1344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11809076368808746, + "step": 4265, + "valid_targets_mean": 4906.1, + "valid_targets_min": 253 + }, + { + "epoch": 6.62015503875969, + "grad_norm": 0.43882837837397076, + "learning_rate": 3.607169072194583e-07, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13036519289016724, + "step": 4270, + "valid_targets_mean": 4900.6, + "valid_targets_min": 449 + }, + { + "epoch": 6.627906976744186, + "grad_norm": 0.3783203483240392, + "learning_rate": 3.4624466333464057e-07, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13775168359279633, + "step": 4275, + "valid_targets_mean": 5567.1, + "valid_targets_min": 523 + }, + { + "epoch": 6.635658914728682, + "grad_norm": 0.3505902467720475, + "learning_rate": 3.320661784043e-07, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11130744218826294, + "step": 4280, + "valid_targets_mean": 6773.1, + "valid_targets_min": 3288 + }, + { + "epoch": 6.6434108527131785, + "grad_norm": 0.4364368560735114, + "learning_rate": 3.1818166435012563e-07, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15424305200576782, + "step": 4285, + "valid_targets_mean": 4766.6, + "valid_targets_min": 605 + }, + { + "epoch": 6.651162790697675, + "grad_norm": 0.49428323333345386, + "learning_rate": 3.0459132869989914e-07, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17941808700561523, + "step": 4290, + "valid_targets_mean": 4228.8, + "valid_targets_min": 575 + }, + { + "epoch": 6.658914728682171, + "grad_norm": 0.4532498891303768, + "learning_rate": 2.912953745844083e-07, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16425731778144836, + "step": 4295, + "valid_targets_mean": 4930.4, + "valid_targets_min": 512 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.4484218818013772, + "learning_rate": 2.7829400073440704e-07, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14921866357326508, + "step": 4300, + "valid_targets_mean": 4864.6, + "valid_targets_min": 2160 + }, + { + "epoch": 6.674418604651163, + "grad_norm": 0.5128051450701466, + "learning_rate": 2.655874014776427e-07, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19552041590213776, + "step": 4305, + "valid_targets_mean": 4560.8, + "valid_targets_min": 324 + }, + { + "epoch": 6.682170542635659, + "grad_norm": 0.39420076435511847, + "learning_rate": 2.531757667359491e-07, + "loss": 0.1343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11401358246803284, + "step": 4310, + "valid_targets_mean": 5222.8, + "valid_targets_min": 2452 + }, + { + "epoch": 6.689922480620155, + "grad_norm": 0.39424587927802124, + "learning_rate": 2.4105928202241557e-07, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10992264747619629, + "step": 4315, + "valid_targets_mean": 4744.0, + "valid_targets_min": 946 + }, + { + "epoch": 6.6976744186046515, + "grad_norm": 0.4909208750282455, + "learning_rate": 2.2923812843861136e-07, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1754213571548462, + "step": 4320, + "valid_targets_mean": 4488.9, + "valid_targets_min": 267 + }, + { + "epoch": 6.705426356589148, + "grad_norm": 0.46076430916005, + "learning_rate": 2.1771248267186795e-07, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15366077423095703, + "step": 4325, + "valid_targets_mean": 3990.3, + "valid_targets_min": 412 + }, + { + "epoch": 6.713178294573644, + "grad_norm": 0.44837990403963107, + "learning_rate": 2.064825169926632e-07, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14431065320968628, + "step": 4330, + "valid_targets_mean": 4223.2, + "valid_targets_min": 1821 + }, + { + "epoch": 6.720930232558139, + "grad_norm": 0.3689938810371774, + "learning_rate": 1.9554839925201686e-07, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16173899173736572, + "step": 4335, + "valid_targets_mean": 6193.9, + "valid_targets_min": 3050 + }, + { + "epoch": 6.728682170542635, + "grad_norm": 0.3834490175252527, + "learning_rate": 1.849102928790014e-07, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15902329981327057, + "step": 4340, + "valid_targets_mean": 5950.6, + "valid_targets_min": 532 + }, + { + "epoch": 6.736434108527131, + "grad_norm": 0.3806645179110598, + "learning_rate": 1.74568356878293e-07, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15495096147060394, + "step": 4345, + "valid_targets_mean": 5389.5, + "valid_targets_min": 2093 + }, + { + "epoch": 6.7441860465116275, + "grad_norm": 0.5001568087478473, + "learning_rate": 1.6452274582779537e-07, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14459748566150665, + "step": 4350, + "valid_targets_mean": 4592.8, + "valid_targets_min": 555 + }, + { + "epoch": 6.751937984496124, + "grad_norm": 0.5854439307900624, + "learning_rate": 1.5477360987633305e-07, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19409656524658203, + "step": 4355, + "valid_targets_mean": 3547.4, + "valid_targets_min": 529 + }, + { + "epoch": 6.75968992248062, + "grad_norm": 0.4665900083114806, + "learning_rate": 1.453210947413952e-07, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1850074976682663, + "step": 4360, + "valid_targets_mean": 4564.8, + "valid_targets_min": 540 + }, + { + "epoch": 6.767441860465116, + "grad_norm": 0.48350023695445227, + "learning_rate": 1.3616534170697747e-07, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15944364666938782, + "step": 4365, + "valid_targets_mean": 3584.8, + "valid_targets_min": 263 + }, + { + "epoch": 6.775193798449612, + "grad_norm": 0.45038375032977, + "learning_rate": 1.2730648762144804e-07, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18844370543956757, + "step": 4370, + "valid_targets_mean": 5290.8, + "valid_targets_min": 806 + }, + { + "epoch": 6.782945736434108, + "grad_norm": 0.4134945839078117, + "learning_rate": 1.1874466489551817e-07, + "loss": 0.1408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15977919101715088, + "step": 4375, + "valid_targets_mean": 4978.0, + "valid_targets_min": 325 + }, + { + "epoch": 6.790697674418604, + "grad_norm": 0.40450500367697456, + "learning_rate": 1.1048000150025939e-07, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13722053170204163, + "step": 4380, + "valid_targets_mean": 4658.6, + "valid_targets_min": 3135 + }, + { + "epoch": 6.7984496124031, + "grad_norm": 0.41715346551607424, + "learning_rate": 1.0251262096518499e-07, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10508380830287933, + "step": 4385, + "valid_targets_mean": 4255.1, + "valid_targets_min": 250 + }, + { + "epoch": 6.8062015503875966, + "grad_norm": 0.5216272062569919, + "learning_rate": 9.484264237641372e-08, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21273067593574524, + "step": 4390, + "valid_targets_mean": 4022.2, + "valid_targets_min": 608 + }, + { + "epoch": 6.813953488372093, + "grad_norm": 0.39630599414920137, + "learning_rate": 8.747018037488009e-08, + "loss": 0.1337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12072727829217911, + "step": 4395, + "valid_targets_mean": 4854.1, + "valid_targets_min": 233 + }, + { + "epoch": 6.821705426356589, + "grad_norm": 0.4277418685702825, + "learning_rate": 8.039534515462244e-08, + "loss": 0.129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11388542503118515, + "step": 4400, + "valid_targets_mean": 4361.8, + "valid_targets_min": 821 + }, + { + "epoch": 6.829457364341085, + "grad_norm": 0.38765433024613494, + "learning_rate": 7.361824246114424e-08, + "loss": 0.1309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13917176425457, + "step": 4405, + "valid_targets_mean": 5343.9, + "valid_targets_min": 1942 + }, + { + "epoch": 6.837209302325581, + "grad_norm": 0.34398297746177137, + "learning_rate": 6.713897358982202e-08, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.113133504986763, + "step": 4410, + "valid_targets_mean": 5645.0, + "valid_targets_min": 678 + }, + { + "epoch": 6.844961240310077, + "grad_norm": 0.48279011534912997, + "learning_rate": 6.095763538440214e-08, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12714777886867523, + "step": 4415, + "valid_targets_mean": 3936.2, + "valid_targets_min": 216 + }, + { + "epoch": 6.852713178294573, + "grad_norm": 0.4180463055838533, + "learning_rate": 5.507432023554637e-08, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1416483223438263, + "step": 4420, + "valid_targets_mean": 4545.9, + "valid_targets_min": 1037 + }, + { + "epoch": 6.8604651162790695, + "grad_norm": 0.4294181180018212, + "learning_rate": 4.948911607945084e-08, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19967518746852875, + "step": 4425, + "valid_targets_mean": 5607.9, + "valid_targets_min": 615 + }, + { + "epoch": 6.868217054263566, + "grad_norm": 0.3477503979746384, + "learning_rate": 4.420210639653366e-08, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1086292415857315, + "step": 4430, + "valid_targets_mean": 5510.9, + "valid_targets_min": 2659 + }, + { + "epoch": 6.875968992248062, + "grad_norm": 0.41768003376691165, + "learning_rate": 3.921337021018934e-08, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428183615207672, + "step": 4435, + "valid_targets_mean": 5068.1, + "valid_targets_min": 459 + }, + { + "epoch": 6.883720930232558, + "grad_norm": 0.490921604098694, + "learning_rate": 3.452298208560301e-08, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17244720458984375, + "step": 4440, + "valid_targets_mean": 4010.2, + "valid_targets_min": 669 + }, + { + "epoch": 6.891472868217054, + "grad_norm": 0.36536468948119627, + "learning_rate": 3.0131012128642445e-08, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11829351633787155, + "step": 4445, + "valid_targets_mean": 5238.6, + "valid_targets_min": 573 + }, + { + "epoch": 6.89922480620155, + "grad_norm": 0.43543485475431953, + "learning_rate": 2.6037525984798918e-08, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14736925065517426, + "step": 4450, + "valid_targets_mean": 4508.6, + "valid_targets_min": 2594 + }, + { + "epoch": 6.906976744186046, + "grad_norm": 0.457191809542186, + "learning_rate": 2.2242584838219062e-08, + "loss": 0.1419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1291424185037613, + "step": 4455, + "valid_targets_mean": 4140.1, + "valid_targets_min": 363 + }, + { + "epoch": 6.9147286821705425, + "grad_norm": 0.3441924236982618, + "learning_rate": 1.8746245410781184e-08, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09382060170173645, + "step": 4460, + "valid_targets_mean": 5934.2, + "valid_targets_min": 2203 + }, + { + "epoch": 6.922480620155039, + "grad_norm": 0.46978533968904, + "learning_rate": 1.5548559961253707e-08, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16701874136924744, + "step": 4465, + "valid_targets_mean": 4124.2, + "valid_targets_min": 435 + }, + { + "epoch": 6.930232558139535, + "grad_norm": 0.45884185400938543, + "learning_rate": 1.2649576284509135e-08, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15522664785385132, + "step": 4470, + "valid_targets_mean": 4735.6, + "valid_targets_min": 748 + }, + { + "epoch": 6.937984496124031, + "grad_norm": 0.37304600716915987, + "learning_rate": 1.004933771080907e-08, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15063238143920898, + "step": 4475, + "valid_targets_mean": 5812.3, + "valid_targets_min": 1844 + }, + { + "epoch": 6.945736434108527, + "grad_norm": 0.4417268532948254, + "learning_rate": 7.747883105166942e-09, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12845158576965332, + "step": 4480, + "valid_targets_mean": 3979.6, + "valid_targets_min": 329 + }, + { + "epoch": 6.953488372093023, + "grad_norm": 0.4206373369632711, + "learning_rate": 5.745246866748489e-09, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1285676211118698, + "step": 4485, + "valid_targets_mean": 4420.4, + "valid_targets_min": 864 + }, + { + "epoch": 6.961240310077519, + "grad_norm": 0.409189864441095, + "learning_rate": 4.041458928378816e-09, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13274972140789032, + "step": 4490, + "valid_targets_mean": 5082.7, + "valid_targets_min": 2020 + }, + { + "epoch": 6.9689922480620154, + "grad_norm": 0.7633572465865676, + "learning_rate": 2.6365447560761093e-09, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332360953092575, + "step": 4495, + "valid_targets_mean": 3844.5, + "valid_targets_min": 300 + }, + { + "epoch": 6.976744186046512, + "grad_norm": 0.4810580142176617, + "learning_rate": 1.5305253486852523e-09, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16817598044872284, + "step": 4500, + "valid_targets_mean": 4177.8, + "valid_targets_min": 529 + }, + { + "epoch": 6.984496124031008, + "grad_norm": 0.4276342091229402, + "learning_rate": 7.234172375558679e-10, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13658341765403748, + "step": 4505, + "valid_targets_mean": 4540.3, + "valid_targets_min": 1720 + }, + { + "epoch": 6.992248062015504, + "grad_norm": 0.41945236764032184, + "learning_rate": 2.1523248629806703e-10, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14896142482757568, + "step": 4510, + "valid_targets_mean": 4939.6, + "valid_targets_min": 694 + }, + { + "epoch": 7.0, + "grad_norm": 0.4017760557242996, + "learning_rate": 5.978690600372261e-12, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12985655665397644, + "step": 4515, + "valid_targets_mean": 5961.6, + "valid_targets_min": 2002 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12985655665397644, + "step": 4515, + "total_flos": 1219521762033664.0, + "train_loss": 0.10701951415567831, + "train_runtime": 13204.5072, + "train_samples_per_second": 5.464, + "train_steps_per_second": 0.342, + "valid_targets_mean": 5961.6, + "valid_targets_min": 2002 + } + ], + "logging_steps": 5, + "max_steps": 4515, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1219521762033664.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}