diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10299 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4662, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0075075075075075074, + "grad_norm": 11.018023139973177, + "learning_rate": 3.426124197002142e-07, + "loss": 0.6962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.666359007358551, + "step": 5, + "valid_targets_mean": 3406.2, + "valid_targets_min": 590 + }, + { + "epoch": 0.015015015015015015, + "grad_norm": 14.300498234778246, + "learning_rate": 7.708779443254819e-07, + "loss": 0.6812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7260710597038269, + "step": 10, + "valid_targets_mean": 2929.3, + "valid_targets_min": 540 + }, + { + "epoch": 0.02252252252252252, + "grad_norm": 10.308334513580103, + "learning_rate": 1.1991434689507496e-06, + "loss": 0.694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7622815370559692, + "step": 15, + "valid_targets_mean": 4942.4, + "valid_targets_min": 474 + }, + { + "epoch": 0.03003003003003003, + "grad_norm": 9.480467405179247, + "learning_rate": 1.6274089935760173e-06, + "loss": 0.63, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5915040969848633, + "step": 20, + "valid_targets_mean": 4988.7, + "valid_targets_min": 506 + }, + { + "epoch": 0.03753753753753754, + "grad_norm": 6.98322327787682, + "learning_rate": 2.055674518201285e-06, + "loss": 0.634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6035254001617432, + "step": 25, + "valid_targets_mean": 4144.8, + "valid_targets_min": 406 + }, + { + "epoch": 0.04504504504504504, + "grad_norm": 4.519545202299929, + "learning_rate": 2.4839400428265524e-06, + "loss": 0.6108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6693063974380493, + "step": 30, + "valid_targets_mean": 2645.9, + "valid_targets_min": 811 + }, + { + "epoch": 0.052552552552552555, + "grad_norm": 2.254075439509915, + "learning_rate": 2.9122055674518203e-06, + "loss": 0.583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6287423968315125, + "step": 35, + "valid_targets_mean": 5337.9, + "valid_targets_min": 424 + }, + { + "epoch": 0.06006006006006006, + "grad_norm": 1.5869006029617074, + "learning_rate": 3.3404710920770882e-06, + "loss": 0.4981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5136668682098389, + "step": 40, + "valid_targets_mean": 4675.1, + "valid_targets_min": 683 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 1.2871585570908246, + "learning_rate": 3.7687366167023558e-06, + "loss": 0.498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4761298894882202, + "step": 45, + "valid_targets_mean": 4580.2, + "valid_targets_min": 695 + }, + { + "epoch": 0.07507507507507508, + "grad_norm": 1.1073292407135484, + "learning_rate": 4.197002141327624e-06, + "loss": 0.4618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4800295829772949, + "step": 50, + "valid_targets_mean": 3829.7, + "valid_targets_min": 408 + }, + { + "epoch": 0.08258258258258258, + "grad_norm": 0.8919504665868666, + "learning_rate": 4.625267665952891e-06, + "loss": 0.5237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.525561511516571, + "step": 55, + "valid_targets_mean": 5515.5, + "valid_targets_min": 571 + }, + { + "epoch": 0.09009009009009009, + "grad_norm": 0.865780829661676, + "learning_rate": 5.05353319057816e-06, + "loss": 0.5163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5415364503860474, + "step": 60, + "valid_targets_mean": 3433.2, + "valid_targets_min": 511 + }, + { + "epoch": 0.09759759759759759, + "grad_norm": 0.5872913252913201, + "learning_rate": 5.481798715203427e-06, + "loss": 0.4707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3888802230358124, + "step": 65, + "valid_targets_mean": 5232.6, + "valid_targets_min": 664 + }, + { + "epoch": 0.10510510510510511, + "grad_norm": 0.5994653626627001, + "learning_rate": 5.910064239828695e-06, + "loss": 0.4821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43450668454170227, + "step": 70, + "valid_targets_mean": 5187.1, + "valid_targets_min": 460 + }, + { + "epoch": 0.11261261261261261, + "grad_norm": 0.6662883071321841, + "learning_rate": 6.338329764453962e-06, + "loss": 0.4331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3964645564556122, + "step": 75, + "valid_targets_mean": 3182.0, + "valid_targets_min": 880 + }, + { + "epoch": 0.12012012012012012, + "grad_norm": 0.6301518751447736, + "learning_rate": 6.76659528907923e-06, + "loss": 0.4374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37995022535324097, + "step": 80, + "valid_targets_mean": 3907.1, + "valid_targets_min": 773 + }, + { + "epoch": 0.12762762762762764, + "grad_norm": 0.6479909629866574, + "learning_rate": 7.194860813704497e-06, + "loss": 0.4749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43532025814056396, + "step": 85, + "valid_targets_mean": 3381.8, + "valid_targets_min": 727 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.601954910384077, + "learning_rate": 7.623126338329765e-06, + "loss": 0.449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37232670187950134, + "step": 90, + "valid_targets_mean": 3859.4, + "valid_targets_min": 681 + }, + { + "epoch": 0.14264264264264265, + "grad_norm": 0.6010699211352588, + "learning_rate": 8.051391862955033e-06, + "loss": 0.4036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3912065625190735, + "step": 95, + "valid_targets_mean": 4532.2, + "valid_targets_min": 525 + }, + { + "epoch": 0.15015015015015015, + "grad_norm": 0.6776934870921543, + "learning_rate": 8.4796573875803e-06, + "loss": 0.4009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3918367922306061, + "step": 100, + "valid_targets_mean": 3923.7, + "valid_targets_min": 631 + }, + { + "epoch": 0.15765765765765766, + "grad_norm": 0.5815397148408697, + "learning_rate": 8.907922912205568e-06, + "loss": 0.422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40705981850624084, + "step": 105, + "valid_targets_mean": 4907.4, + "valid_targets_min": 624 + }, + { + "epoch": 0.16516516516516516, + "grad_norm": 0.6207231229898003, + "learning_rate": 9.336188436830836e-06, + "loss": 0.4197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35407790541648865, + "step": 110, + "valid_targets_mean": 4749.1, + "valid_targets_min": 357 + }, + { + "epoch": 0.17267267267267267, + "grad_norm": 0.7161182478604338, + "learning_rate": 9.764453961456105e-06, + "loss": 0.4271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3748543858528137, + "step": 115, + "valid_targets_mean": 2599.0, + "valid_targets_min": 495 + }, + { + "epoch": 0.18018018018018017, + "grad_norm": 0.6433782885571062, + "learning_rate": 1.019271948608137e-05, + "loss": 0.4129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44952958822250366, + "step": 120, + "valid_targets_mean": 3456.8, + "valid_targets_min": 521 + }, + { + "epoch": 0.18768768768768768, + "grad_norm": 0.608760987576289, + "learning_rate": 1.062098501070664e-05, + "loss": 0.396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4421440064907074, + "step": 125, + "valid_targets_mean": 4304.8, + "valid_targets_min": 676 + }, + { + "epoch": 0.19519519519519518, + "grad_norm": 0.6056247540086898, + "learning_rate": 1.1049250535331907e-05, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3524633049964905, + "step": 130, + "valid_targets_mean": 4378.4, + "valid_targets_min": 699 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.7470542876873747, + "learning_rate": 1.1477516059957175e-05, + "loss": 0.3928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3509212136268616, + "step": 135, + "valid_targets_mean": 3348.1, + "valid_targets_min": 580 + }, + { + "epoch": 0.21021021021021022, + "grad_norm": 0.5297517332166896, + "learning_rate": 1.1905781584582443e-05, + "loss": 0.3971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3493044376373291, + "step": 140, + "valid_targets_mean": 4820.6, + "valid_targets_min": 588 + }, + { + "epoch": 0.21771771771771772, + "grad_norm": 0.8236972811131019, + "learning_rate": 1.233404710920771e-05, + "loss": 0.4229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4382990002632141, + "step": 145, + "valid_targets_mean": 4910.1, + "valid_targets_min": 704 + }, + { + "epoch": 0.22522522522522523, + "grad_norm": 0.5641188867988419, + "learning_rate": 1.2762312633832978e-05, + "loss": 0.4053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4189315438270569, + "step": 150, + "valid_targets_mean": 4847.2, + "valid_targets_min": 479 + }, + { + "epoch": 0.23273273273273273, + "grad_norm": 0.759436996305121, + "learning_rate": 1.3190578158458245e-05, + "loss": 0.3995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4512358009815216, + "step": 155, + "valid_targets_mean": 3609.1, + "valid_targets_min": 660 + }, + { + "epoch": 0.24024024024024024, + "grad_norm": 0.5776616654260357, + "learning_rate": 1.3618843683083513e-05, + "loss": 0.3982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.366862416267395, + "step": 160, + "valid_targets_mean": 4489.5, + "valid_targets_min": 709 + }, + { + "epoch": 0.24774774774774774, + "grad_norm": 0.5621030834623711, + "learning_rate": 1.404710920770878e-05, + "loss": 0.3562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3245847225189209, + "step": 165, + "valid_targets_mean": 3611.0, + "valid_targets_min": 857 + }, + { + "epoch": 0.2552552552552553, + "grad_norm": 0.6383475705856398, + "learning_rate": 1.4475374732334048e-05, + "loss": 0.3633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3641152083873749, + "step": 170, + "valid_targets_mean": 3776.7, + "valid_targets_min": 273 + }, + { + "epoch": 0.2627627627627628, + "grad_norm": 0.6387883870614447, + "learning_rate": 1.4903640256959315e-05, + "loss": 0.3892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4122806787490845, + "step": 175, + "valid_targets_mean": 4106.8, + "valid_targets_min": 367 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.5859730062305724, + "learning_rate": 1.5331905781584583e-05, + "loss": 0.4145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4545392692089081, + "step": 180, + "valid_targets_mean": 5412.8, + "valid_targets_min": 485 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 0.6291566957212851, + "learning_rate": 1.576017130620985e-05, + "loss": 0.3586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34801965951919556, + "step": 185, + "valid_targets_mean": 3683.8, + "valid_targets_min": 388 + }, + { + "epoch": 0.2852852852852853, + "grad_norm": 0.7714952807971188, + "learning_rate": 1.618843683083512e-05, + "loss": 0.4031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33357149362564087, + "step": 190, + "valid_targets_mean": 3992.8, + "valid_targets_min": 645 + }, + { + "epoch": 0.2927927927927928, + "grad_norm": 0.6022688124054402, + "learning_rate": 1.6616702355460387e-05, + "loss": 0.3632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32963526248931885, + "step": 195, + "valid_targets_mean": 4479.6, + "valid_targets_min": 376 + }, + { + "epoch": 0.3003003003003003, + "grad_norm": 0.5461125797479052, + "learning_rate": 1.7044967880085653e-05, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3471255898475647, + "step": 200, + "valid_targets_mean": 5124.2, + "valid_targets_min": 2440 + }, + { + "epoch": 0.3078078078078078, + "grad_norm": 0.6479231652954496, + "learning_rate": 1.7473233404710924e-05, + "loss": 0.3709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36097437143325806, + "step": 205, + "valid_targets_mean": 3874.2, + "valid_targets_min": 536 + }, + { + "epoch": 0.3153153153153153, + "grad_norm": 0.6307321400936947, + "learning_rate": 1.790149892933619e-05, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32168492674827576, + "step": 210, + "valid_targets_mean": 4215.9, + "valid_targets_min": 750 + }, + { + "epoch": 0.3228228228228228, + "grad_norm": 0.7342733098843338, + "learning_rate": 1.8329764453961457e-05, + "loss": 0.3716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3192724585533142, + "step": 215, + "valid_targets_mean": 3300.5, + "valid_targets_min": 465 + }, + { + "epoch": 0.3303303303303303, + "grad_norm": 0.6269217295173055, + "learning_rate": 1.8758029978586724e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.393771767616272, + "step": 220, + "valid_targets_mean": 3780.5, + "valid_targets_min": 520 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.6699586606597604, + "learning_rate": 1.9186295503211994e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3441130518913269, + "step": 225, + "valid_targets_mean": 3570.4, + "valid_targets_min": 367 + }, + { + "epoch": 0.34534534534534533, + "grad_norm": 0.6212030440388314, + "learning_rate": 1.961456102783726e-05, + "loss": 0.3249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28860020637512207, + "step": 230, + "valid_targets_mean": 4043.8, + "valid_targets_min": 797 + }, + { + "epoch": 0.35285285285285284, + "grad_norm": 0.5933247879185318, + "learning_rate": 2.004282655246253e-05, + "loss": 0.3663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2963605523109436, + "step": 235, + "valid_targets_mean": 4519.4, + "valid_targets_min": 679 + }, + { + "epoch": 0.36036036036036034, + "grad_norm": 0.5915857140141431, + "learning_rate": 2.0471092077087794e-05, + "loss": 0.3707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3590105175971985, + "step": 240, + "valid_targets_mean": 4801.9, + "valid_targets_min": 786 + }, + { + "epoch": 0.36786786786786785, + "grad_norm": 0.6926624493340766, + "learning_rate": 2.089935760171306e-05, + "loss": 0.3699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3375079035758972, + "step": 245, + "valid_targets_mean": 3064.8, + "valid_targets_min": 636 + }, + { + "epoch": 0.37537537537537535, + "grad_norm": 0.5625060900455121, + "learning_rate": 2.1327623126338334e-05, + "loss": 0.3717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2878052592277527, + "step": 250, + "valid_targets_mean": 4984.9, + "valid_targets_min": 389 + }, + { + "epoch": 0.38288288288288286, + "grad_norm": 0.4895346484151352, + "learning_rate": 2.17558886509636e-05, + "loss": 0.3319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3224838972091675, + "step": 255, + "valid_targets_mean": 5919.2, + "valid_targets_min": 631 + }, + { + "epoch": 0.39039039039039036, + "grad_norm": 0.6186907667129198, + "learning_rate": 2.2184154175588867e-05, + "loss": 0.3133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3299029767513275, + "step": 260, + "valid_targets_mean": 4151.9, + "valid_targets_min": 694 + }, + { + "epoch": 0.3978978978978979, + "grad_norm": 0.637540257330486, + "learning_rate": 2.2612419700214137e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31127655506134033, + "step": 265, + "valid_targets_mean": 4565.4, + "valid_targets_min": 667 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.6482997719326944, + "learning_rate": 2.3040685224839404e-05, + "loss": 0.371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3371477723121643, + "step": 270, + "valid_targets_mean": 4799.7, + "valid_targets_min": 565 + }, + { + "epoch": 0.41291291291291293, + "grad_norm": 0.630240697831865, + "learning_rate": 2.346895074946467e-05, + "loss": 0.4042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3549198508262634, + "step": 275, + "valid_targets_mean": 3973.2, + "valid_targets_min": 571 + }, + { + "epoch": 0.42042042042042044, + "grad_norm": 0.5378005948550142, + "learning_rate": 2.3897216274089937e-05, + "loss": 0.3568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3140275776386261, + "step": 280, + "valid_targets_mean": 4524.4, + "valid_targets_min": 466 + }, + { + "epoch": 0.42792792792792794, + "grad_norm": 0.5496707584360776, + "learning_rate": 2.4325481798715207e-05, + "loss": 0.3439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27984702587127686, + "step": 285, + "valid_targets_mean": 5251.4, + "valid_targets_min": 871 + }, + { + "epoch": 0.43543543543543545, + "grad_norm": 0.6054529261217243, + "learning_rate": 2.4753747323340474e-05, + "loss": 0.3399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2717384994029999, + "step": 290, + "valid_targets_mean": 5978.3, + "valid_targets_min": 665 + }, + { + "epoch": 0.44294294294294295, + "grad_norm": 0.60946853866193, + "learning_rate": 2.518201284796574e-05, + "loss": 0.3324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31978553533554077, + "step": 295, + "valid_targets_mean": 3610.3, + "valid_targets_min": 729 + }, + { + "epoch": 0.45045045045045046, + "grad_norm": 0.7112237553385803, + "learning_rate": 2.5610278372591007e-05, + "loss": 0.365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36318349838256836, + "step": 300, + "valid_targets_mean": 2997.6, + "valid_targets_min": 398 + }, + { + "epoch": 0.45795795795795796, + "grad_norm": 0.5754561363093669, + "learning_rate": 2.6038543897216277e-05, + "loss": 0.3475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3775876760482788, + "step": 305, + "valid_targets_mean": 5485.2, + "valid_targets_min": 291 + }, + { + "epoch": 0.46546546546546547, + "grad_norm": 0.5225836627231101, + "learning_rate": 2.6466809421841544e-05, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2630726099014282, + "step": 310, + "valid_targets_mean": 5477.4, + "valid_targets_min": 1953 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.6631066150226813, + "learning_rate": 2.689507494646681e-05, + "loss": 0.3341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4078267812728882, + "step": 315, + "valid_targets_mean": 4227.4, + "valid_targets_min": 691 + }, + { + "epoch": 0.4804804804804805, + "grad_norm": 0.5625056894327084, + "learning_rate": 2.732334047109208e-05, + "loss": 0.3307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34400713443756104, + "step": 320, + "valid_targets_mean": 6082.6, + "valid_targets_min": 756 + }, + { + "epoch": 0.487987987987988, + "grad_norm": 0.5720940827291631, + "learning_rate": 2.7751605995717347e-05, + "loss": 0.3544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.364154189825058, + "step": 325, + "valid_targets_mean": 5058.0, + "valid_targets_min": 1324 + }, + { + "epoch": 0.4954954954954955, + "grad_norm": 0.6142764560167397, + "learning_rate": 2.8179871520342614e-05, + "loss": 0.3567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3340011239051819, + "step": 330, + "valid_targets_mean": 3809.8, + "valid_targets_min": 606 + }, + { + "epoch": 0.503003003003003, + "grad_norm": 0.6862489554890606, + "learning_rate": 2.860813704496788e-05, + "loss": 0.3377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.347086638212204, + "step": 335, + "valid_targets_mean": 3381.4, + "valid_targets_min": 529 + }, + { + "epoch": 0.5105105105105106, + "grad_norm": 0.5884162448896875, + "learning_rate": 2.903640256959315e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3551441431045532, + "step": 340, + "valid_targets_mean": 4280.0, + "valid_targets_min": 656 + }, + { + "epoch": 0.5180180180180181, + "grad_norm": 0.5985519809481195, + "learning_rate": 2.9464668094218417e-05, + "loss": 0.3718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3538270592689514, + "step": 345, + "valid_targets_mean": 4933.6, + "valid_targets_min": 679 + }, + { + "epoch": 0.5255255255255256, + "grad_norm": 0.6608489868770677, + "learning_rate": 2.9892933618843684e-05, + "loss": 0.3821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3977230191230774, + "step": 350, + "valid_targets_mean": 4619.8, + "valid_targets_min": 617 + }, + { + "epoch": 0.5330330330330331, + "grad_norm": 0.6666435654761541, + "learning_rate": 3.032119914346895e-05, + "loss": 0.3205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3276807963848114, + "step": 355, + "valid_targets_mean": 3228.0, + "valid_targets_min": 396 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.5700169845584041, + "learning_rate": 3.0749464668094224e-05, + "loss": 0.3373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252112478017807, + "step": 360, + "valid_targets_mean": 4043.2, + "valid_targets_min": 470 + }, + { + "epoch": 0.5480480480480481, + "grad_norm": 0.5969317196396643, + "learning_rate": 3.117773019271949e-05, + "loss": 0.3344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.257426917552948, + "step": 365, + "valid_targets_mean": 3501.2, + "valid_targets_min": 637 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.5338582932799799, + "learning_rate": 3.160599571734476e-05, + "loss": 0.3469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4118390679359436, + "step": 370, + "valid_targets_mean": 6022.6, + "valid_targets_min": 668 + }, + { + "epoch": 0.5630630630630631, + "grad_norm": 0.7156868770693144, + "learning_rate": 3.2034261241970024e-05, + "loss": 0.342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33580997586250305, + "step": 375, + "valid_targets_mean": 3146.6, + "valid_targets_min": 580 + }, + { + "epoch": 0.5705705705705706, + "grad_norm": 0.6105442252909425, + "learning_rate": 3.246252676659529e-05, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29976391792297363, + "step": 380, + "valid_targets_mean": 4423.6, + "valid_targets_min": 579 + }, + { + "epoch": 0.5780780780780781, + "grad_norm": 0.48535869781119606, + "learning_rate": 3.289079229122056e-05, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2958953380584717, + "step": 385, + "valid_targets_mean": 5096.5, + "valid_targets_min": 1111 + }, + { + "epoch": 0.5855855855855856, + "grad_norm": 0.5293285996606892, + "learning_rate": 3.3319057815845824e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29280033707618713, + "step": 390, + "valid_targets_mean": 4133.6, + "valid_targets_min": 664 + }, + { + "epoch": 0.5930930930930931, + "grad_norm": 0.6364074446274751, + "learning_rate": 3.37473233404711e-05, + "loss": 0.3546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3336215019226074, + "step": 395, + "valid_targets_mean": 3474.1, + "valid_targets_min": 277 + }, + { + "epoch": 0.6006006006006006, + "grad_norm": 0.7064277061832516, + "learning_rate": 3.4175588865096364e-05, + "loss": 0.3238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38183677196502686, + "step": 400, + "valid_targets_mean": 4343.8, + "valid_targets_min": 831 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.5234255969114723, + "learning_rate": 3.460385438972163e-05, + "loss": 0.3259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3263946771621704, + "step": 405, + "valid_targets_mean": 5912.4, + "valid_targets_min": 525 + }, + { + "epoch": 0.6156156156156156, + "grad_norm": 0.6859056535544392, + "learning_rate": 3.50321199143469e-05, + "loss": 0.3613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4518062472343445, + "step": 410, + "valid_targets_mean": 4294.0, + "valid_targets_min": 672 + }, + { + "epoch": 0.6231231231231231, + "grad_norm": 0.593812126409495, + "learning_rate": 3.5460385438972164e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.284976601600647, + "step": 415, + "valid_targets_mean": 4542.8, + "valid_targets_min": 732 + }, + { + "epoch": 0.6306306306306306, + "grad_norm": 0.5542761631226861, + "learning_rate": 3.588865096359743e-05, + "loss": 0.3575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3369911313056946, + "step": 420, + "valid_targets_mean": 4173.8, + "valid_targets_min": 623 + }, + { + "epoch": 0.6381381381381381, + "grad_norm": 0.694482077890855, + "learning_rate": 3.63169164882227e-05, + "loss": 0.331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35457199811935425, + "step": 425, + "valid_targets_mean": 3347.3, + "valid_targets_min": 323 + }, + { + "epoch": 0.6456456456456456, + "grad_norm": 0.5087776401105766, + "learning_rate": 3.674518201284797e-05, + "loss": 0.3077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2870054244995117, + "step": 430, + "valid_targets_mean": 5496.8, + "valid_targets_min": 995 + }, + { + "epoch": 0.6531531531531531, + "grad_norm": 0.5710934865190025, + "learning_rate": 3.717344753747324e-05, + "loss": 0.3449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3406960964202881, + "step": 435, + "valid_targets_mean": 4272.1, + "valid_targets_min": 578 + }, + { + "epoch": 0.6606606606606606, + "grad_norm": 0.6532417856194169, + "learning_rate": 3.7601713062098504e-05, + "loss": 0.3358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3105970621109009, + "step": 440, + "valid_targets_mean": 4052.7, + "valid_targets_min": 469 + }, + { + "epoch": 0.6681681681681682, + "grad_norm": 0.5810179731496691, + "learning_rate": 3.802997858672377e-05, + "loss": 0.3534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2967357933521271, + "step": 445, + "valid_targets_mean": 5399.5, + "valid_targets_min": 481 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4933051723343659, + "learning_rate": 3.8458244111349045e-05, + "loss": 0.307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2656002640724182, + "step": 450, + "valid_targets_mean": 5050.2, + "valid_targets_min": 536 + }, + { + "epoch": 0.6831831831831832, + "grad_norm": 0.5117823141215931, + "learning_rate": 3.8886509635974305e-05, + "loss": 0.3345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3394744396209717, + "step": 455, + "valid_targets_mean": 4780.2, + "valid_targets_min": 698 + }, + { + "epoch": 0.6906906906906907, + "grad_norm": 0.5513150421712305, + "learning_rate": 3.931477516059957e-05, + "loss": 0.3344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38678789138793945, + "step": 460, + "valid_targets_mean": 4995.6, + "valid_targets_min": 787 + }, + { + "epoch": 0.6981981981981982, + "grad_norm": 0.5223578821175074, + "learning_rate": 3.974304068522484e-05, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3217771053314209, + "step": 465, + "valid_targets_mean": 5331.4, + "valid_targets_min": 1007 + }, + { + "epoch": 0.7057057057057057, + "grad_norm": 0.5938737922502441, + "learning_rate": 3.999997756656772e-05, + "loss": 0.3306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31824183464050293, + "step": 470, + "valid_targets_mean": 4223.0, + "valid_targets_min": 452 + }, + { + "epoch": 0.7132132132132132, + "grad_norm": 0.49626310595338613, + "learning_rate": 3.9999725191032436e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26925772428512573, + "step": 475, + "valid_targets_mean": 5763.9, + "valid_targets_min": 761 + }, + { + "epoch": 0.7207207207207207, + "grad_norm": 0.6276161448251935, + "learning_rate": 3.999919240172184e-05, + "loss": 0.3228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.325122594833374, + "step": 480, + "valid_targets_mean": 4395.4, + "valid_targets_min": 662 + }, + { + "epoch": 0.7282282282282282, + "grad_norm": 0.598897987227093, + "learning_rate": 3.9998379206106104e-05, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2764396369457245, + "step": 485, + "valid_targets_mean": 5031.5, + "valid_targets_min": 836 + }, + { + "epoch": 0.7357357357357357, + "grad_norm": 0.5920738798268582, + "learning_rate": 3.9997285615586944e-05, + "loss": 0.3449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33547401428222656, + "step": 490, + "valid_targets_mean": 4218.7, + "valid_targets_min": 416 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.5388292737396405, + "learning_rate": 3.999591164549747e-05, + "loss": 0.3216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30026373267173767, + "step": 495, + "valid_targets_mean": 5046.2, + "valid_targets_min": 885 + }, + { + "epoch": 0.7507507507507507, + "grad_norm": 0.7138413019632255, + "learning_rate": 3.999425731510195e-05, + "loss": 0.3161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3095625340938568, + "step": 500, + "valid_targets_mean": 3115.5, + "valid_targets_min": 696 + }, + { + "epoch": 0.7582582582582582, + "grad_norm": 0.5065067605354636, + "learning_rate": 3.9992322647595543e-05, + "loss": 0.3132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2668853998184204, + "step": 505, + "valid_targets_mean": 4890.2, + "valid_targets_min": 583 + }, + { + "epoch": 0.7657657657657657, + "grad_norm": 0.48772270569522996, + "learning_rate": 3.999010767010401e-05, + "loss": 0.2792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21127644181251526, + "step": 510, + "valid_targets_mean": 4030.5, + "valid_targets_min": 620 + }, + { + "epoch": 0.7732732732732732, + "grad_norm": 0.688910852981923, + "learning_rate": 3.998761241368328e-05, + "loss": 0.3356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31102752685546875, + "step": 515, + "valid_targets_mean": 2963.6, + "valid_targets_min": 662 + }, + { + "epoch": 0.7807807807807807, + "grad_norm": 0.590399156229966, + "learning_rate": 3.9984836913319044e-05, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.295174777507782, + "step": 520, + "valid_targets_mean": 3820.8, + "valid_targets_min": 547 + }, + { + "epoch": 0.7882882882882883, + "grad_norm": 0.7498553089205983, + "learning_rate": 3.998178120792627e-05, + "loss": 0.3289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29107627272605896, + "step": 525, + "valid_targets_mean": 4623.9, + "valid_targets_min": 463 + }, + { + "epoch": 0.7957957957957958, + "grad_norm": 0.541430521248251, + "learning_rate": 3.9978445340348646e-05, + "loss": 0.3114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35057809948921204, + "step": 530, + "valid_targets_mean": 4895.9, + "valid_targets_min": 810 + }, + { + "epoch": 0.8033033033033034, + "grad_norm": 0.6145616694631061, + "learning_rate": 3.997482935735796e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2825467586517334, + "step": 535, + "valid_targets_mean": 4079.4, + "valid_targets_min": 593 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.6457216047110463, + "learning_rate": 3.997093330965349e-05, + "loss": 0.3289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37978658080101013, + "step": 540, + "valid_targets_mean": 3493.6, + "valid_targets_min": 513 + }, + { + "epoch": 0.8183183183183184, + "grad_norm": 0.5512024613609153, + "learning_rate": 3.9966757251861256e-05, + "loss": 0.3083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30473941564559937, + "step": 545, + "valid_targets_mean": 4363.6, + "valid_targets_min": 798 + }, + { + "epoch": 0.8258258258258259, + "grad_norm": 0.5404758251655593, + "learning_rate": 3.996230124253328e-05, + "loss": 0.2928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2958792746067047, + "step": 550, + "valid_targets_mean": 4708.9, + "valid_targets_min": 480 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5409326702314928, + "learning_rate": 3.995756534414672e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3055042028427124, + "step": 555, + "valid_targets_mean": 4889.0, + "valid_targets_min": 634 + }, + { + "epoch": 0.8408408408408409, + "grad_norm": 0.877523092977663, + "learning_rate": 3.9952549623103065e-05, + "loss": 0.3031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27620965242385864, + "step": 560, + "valid_targets_mean": 3901.4, + "valid_targets_min": 782 + }, + { + "epoch": 0.8483483483483484, + "grad_norm": 0.5593566066914221, + "learning_rate": 3.994725414972713e-05, + "loss": 0.331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33034461736679077, + "step": 565, + "valid_targets_mean": 4464.5, + "valid_targets_min": 847 + }, + { + "epoch": 0.8558558558558559, + "grad_norm": 0.6652170124791761, + "learning_rate": 3.994167899826614e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.258162260055542, + "step": 570, + "valid_targets_mean": 3842.9, + "valid_targets_min": 410 + }, + { + "epoch": 0.8633633633633634, + "grad_norm": 0.5319260103081287, + "learning_rate": 3.993582424688861e-05, + "loss": 0.2819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2436560094356537, + "step": 575, + "valid_targets_mean": 4503.2, + "valid_targets_min": 810 + }, + { + "epoch": 0.8708708708708709, + "grad_norm": 0.5435805707257723, + "learning_rate": 3.9929689977683315e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2837572991847992, + "step": 580, + "valid_targets_mean": 4475.9, + "valid_targets_min": 697 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.6279074659022925, + "learning_rate": 3.992327627665814e-05, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3020919859409332, + "step": 585, + "valid_targets_mean": 3179.3, + "valid_targets_min": 602 + }, + { + "epoch": 0.8858858858858859, + "grad_norm": 0.6751035596060991, + "learning_rate": 3.99165832337388e-05, + "loss": 0.3098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2893165349960327, + "step": 590, + "valid_targets_mean": 3005.4, + "valid_targets_min": 504 + }, + { + "epoch": 0.8933933933933934, + "grad_norm": 0.9253018906213728, + "learning_rate": 3.990961094276767e-05, + "loss": 0.3072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3459416627883911, + "step": 595, + "valid_targets_mean": 2077.6, + "valid_targets_min": 485 + }, + { + "epoch": 0.9009009009009009, + "grad_norm": 0.6687225112938137, + "learning_rate": 3.990235950150242e-05, + "loss": 0.3079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36818212270736694, + "step": 600, + "valid_targets_mean": 3371.7, + "valid_targets_min": 715 + }, + { + "epoch": 0.9084084084084084, + "grad_norm": 0.5328749806757945, + "learning_rate": 3.9894829011614627e-05, + "loss": 0.3135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25350844860076904, + "step": 605, + "valid_targets_mean": 3857.8, + "valid_targets_min": 521 + }, + { + "epoch": 0.9159159159159159, + "grad_norm": 0.675741595187441, + "learning_rate": 3.988701957868842e-05, + "loss": 0.3172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3360707759857178, + "step": 610, + "valid_targets_mean": 2917.3, + "valid_targets_min": 605 + }, + { + "epoch": 0.9234234234234234, + "grad_norm": 0.6571910621752247, + "learning_rate": 3.987893131221892e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2482086569070816, + "step": 615, + "valid_targets_mean": 3313.1, + "valid_targets_min": 510 + }, + { + "epoch": 0.9309309309309309, + "grad_norm": 0.6979768745974733, + "learning_rate": 3.987056432561075e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3537478744983673, + "step": 620, + "valid_targets_mean": 3899.6, + "valid_targets_min": 471 + }, + { + "epoch": 0.9384384384384384, + "grad_norm": 0.46982739010374835, + "learning_rate": 3.986191873617645e-05, + "loss": 0.318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27537375688552856, + "step": 625, + "valid_targets_mean": 5885.1, + "valid_targets_min": 593 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.5400448261626185, + "learning_rate": 3.985299466513479e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3220127820968628, + "step": 630, + "valid_targets_mean": 4269.6, + "valid_targets_min": 558 + }, + { + "epoch": 0.9534534534534534, + "grad_norm": 0.5242379048874888, + "learning_rate": 3.984379223760912e-05, + "loss": 0.3102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36460551619529724, + "step": 635, + "valid_targets_mean": 4742.2, + "valid_targets_min": 670 + }, + { + "epoch": 0.960960960960961, + "grad_norm": 0.5056910710384006, + "learning_rate": 3.983431158262558e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2684648334980011, + "step": 640, + "valid_targets_mean": 4608.9, + "valid_targets_min": 805 + }, + { + "epoch": 0.9684684684684685, + "grad_norm": 0.5297540154671703, + "learning_rate": 3.982455283311131e-05, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25975459814071655, + "step": 645, + "valid_targets_mean": 4382.8, + "valid_targets_min": 772 + }, + { + "epoch": 0.975975975975976, + "grad_norm": 0.555629136176801, + "learning_rate": 3.981451612589259e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26162397861480713, + "step": 650, + "valid_targets_mean": 4827.0, + "valid_targets_min": 611 + }, + { + "epoch": 0.9834834834834835, + "grad_norm": 0.7355128171152777, + "learning_rate": 3.980420160169289e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28353631496429443, + "step": 655, + "valid_targets_mean": 4060.4, + "valid_targets_min": 766 + }, + { + "epoch": 0.990990990990991, + "grad_norm": 0.6098329219635357, + "learning_rate": 3.979360940513092e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2917211651802063, + "step": 660, + "valid_targets_mean": 3110.2, + "valid_targets_min": 536 + }, + { + "epoch": 0.9984984984984985, + "grad_norm": 0.6602685528415261, + "learning_rate": 3.978273968471865e-05, + "loss": 0.3291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37517815828323364, + "step": 665, + "valid_targets_mean": 3097.9, + "valid_targets_min": 623 + }, + { + "epoch": 1.006006006006006, + "grad_norm": 0.5990905965495634, + "learning_rate": 3.97715925928591e-05, + "loss": 0.3701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31573376059532166, + "step": 670, + "valid_targets_mean": 5335.8, + "valid_targets_min": 657 + }, + { + "epoch": 1.0135135135135136, + "grad_norm": 0.5051572310687712, + "learning_rate": 3.9760168285844345e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2896278500556946, + "step": 675, + "valid_targets_mean": 4987.1, + "valid_targets_min": 724 + }, + { + "epoch": 1.021021021021021, + "grad_norm": 0.5358689615840903, + "learning_rate": 3.974846692385323e-05, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29317814111709595, + "step": 680, + "valid_targets_mean": 5053.4, + "valid_targets_min": 723 + }, + { + "epoch": 1.0285285285285286, + "grad_norm": 0.7789287894144826, + "learning_rate": 3.973648867094917e-05, + "loss": 0.3181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3238382935523987, + "step": 685, + "valid_targets_mean": 3482.4, + "valid_targets_min": 492 + }, + { + "epoch": 1.0360360360360361, + "grad_norm": 0.5563018194673951, + "learning_rate": 3.972423369507783e-05, + "loss": 0.3166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3259349465370178, + "step": 690, + "valid_targets_mean": 5252.0, + "valid_targets_min": 850 + }, + { + "epoch": 1.0435435435435436, + "grad_norm": 0.7250068591194968, + "learning_rate": 3.971170216806477e-05, + "loss": 0.3198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3443298935890198, + "step": 695, + "valid_targets_mean": 2799.4, + "valid_targets_min": 588 + }, + { + "epoch": 1.0510510510510511, + "grad_norm": 0.7089445149146085, + "learning_rate": 3.969889426561304e-05, + "loss": 0.3328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31399601697921753, + "step": 700, + "valid_targets_mean": 3693.1, + "valid_targets_min": 606 + }, + { + "epoch": 1.0585585585585586, + "grad_norm": 0.4834579345868247, + "learning_rate": 3.968581016730073e-05, + "loss": 0.2893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28393715620040894, + "step": 705, + "valid_targets_mean": 5210.3, + "valid_targets_min": 1146 + }, + { + "epoch": 1.0660660660660661, + "grad_norm": 0.580488043726015, + "learning_rate": 3.9672450056578426e-05, + "loss": 0.3114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3525095582008362, + "step": 710, + "valid_targets_mean": 4987.9, + "valid_targets_min": 872 + }, + { + "epoch": 1.0735735735735736, + "grad_norm": 0.5830757190666622, + "learning_rate": 3.965881412076665e-05, + "loss": 0.2927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2518344223499298, + "step": 715, + "valid_targets_mean": 3772.8, + "valid_targets_min": 301 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.603911299798853, + "learning_rate": 3.964490255105325e-05, + "loss": 0.2897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2885856330394745, + "step": 720, + "valid_targets_mean": 3888.7, + "valid_targets_min": 610 + }, + { + "epoch": 1.0885885885885886, + "grad_norm": 0.721271255619491, + "learning_rate": 3.96307155424907e-05, + "loss": 0.3009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33753496408462524, + "step": 725, + "valid_targets_mean": 3255.9, + "valid_targets_min": 323 + }, + { + "epoch": 1.0960960960960962, + "grad_norm": 0.5688287102135644, + "learning_rate": 3.961625329399335e-05, + "loss": 0.288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24445214867591858, + "step": 730, + "valid_targets_mean": 4295.9, + "valid_targets_min": 322 + }, + { + "epoch": 1.1036036036036037, + "grad_norm": 0.5822780762193985, + "learning_rate": 3.960151600833469e-05, + "loss": 0.303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3242020606994629, + "step": 735, + "valid_targets_mean": 4509.4, + "valid_targets_min": 749 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.6988383396255673, + "learning_rate": 3.958650389214444e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2766273021697998, + "step": 740, + "valid_targets_mean": 3003.1, + "valid_targets_min": 528 + }, + { + "epoch": 1.1186186186186187, + "grad_norm": 0.5530037303674098, + "learning_rate": 3.95712171559057e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2562218904495239, + "step": 745, + "valid_targets_mean": 4301.1, + "valid_targets_min": 761 + }, + { + "epoch": 1.1261261261261262, + "grad_norm": 0.5917466496490228, + "learning_rate": 3.955565601395199e-05, + "loss": 0.2995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27801835536956787, + "step": 750, + "valid_targets_mean": 4016.3, + "valid_targets_min": 611 + }, + { + "epoch": 1.1336336336336337, + "grad_norm": 0.5314431651828969, + "learning_rate": 3.953982068446424e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2895042896270752, + "step": 755, + "valid_targets_mean": 4829.9, + "valid_targets_min": 689 + }, + { + "epoch": 1.1411411411411412, + "grad_norm": 0.5403700692721292, + "learning_rate": 3.952371138946773e-05, + "loss": 0.311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2900671660900116, + "step": 760, + "valid_targets_mean": 5102.2, + "valid_targets_min": 811 + }, + { + "epoch": 1.1486486486486487, + "grad_norm": 0.4466270912829498, + "learning_rate": 3.950732835482896e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23144987225532532, + "step": 765, + "valid_targets_mean": 4800.4, + "valid_targets_min": 566 + }, + { + "epoch": 1.1561561561561562, + "grad_norm": 0.5126441976083996, + "learning_rate": 3.9490671810252525e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2398991733789444, + "step": 770, + "valid_targets_mean": 3322.2, + "valid_targets_min": 260 + }, + { + "epoch": 1.1636636636636637, + "grad_norm": 0.6835376056729422, + "learning_rate": 3.947374198927787e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2788790464401245, + "step": 775, + "valid_targets_mean": 2824.4, + "valid_targets_min": 481 + }, + { + "epoch": 1.1711711711711712, + "grad_norm": 0.7480255450833108, + "learning_rate": 3.945653912927599e-05, + "loss": 0.3116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41352379322052, + "step": 780, + "valid_targets_mean": 3108.0, + "valid_targets_min": 525 + }, + { + "epoch": 1.1786786786786787, + "grad_norm": 0.7012989879636262, + "learning_rate": 3.943906347144615e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2697921395301819, + "step": 785, + "valid_targets_mean": 2765.4, + "valid_targets_min": 559 + }, + { + "epoch": 1.1861861861861862, + "grad_norm": 0.6294134200242447, + "learning_rate": 3.942131526081249e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28815174102783203, + "step": 790, + "valid_targets_mean": 5498.2, + "valid_targets_min": 760 + }, + { + "epoch": 1.1936936936936937, + "grad_norm": 0.5405122434808368, + "learning_rate": 3.940329474622055e-05, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26721876859664917, + "step": 795, + "valid_targets_mean": 4779.1, + "valid_targets_min": 657 + }, + { + "epoch": 1.2012012012012012, + "grad_norm": 0.6027467491848216, + "learning_rate": 3.938500218033384e-05, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28523164987564087, + "step": 800, + "valid_targets_mean": 3985.0, + "valid_targets_min": 673 + }, + { + "epoch": 1.2087087087087087, + "grad_norm": 0.49337094306984625, + "learning_rate": 3.936643781963024e-05, + "loss": 0.3508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3622555136680603, + "step": 805, + "valid_targets_mean": 6322.6, + "valid_targets_min": 390 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.5183504295911457, + "learning_rate": 3.9347601924398475e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2589194178581238, + "step": 810, + "valid_targets_mean": 4678.0, + "valid_targets_min": 899 + }, + { + "epoch": 1.2237237237237237, + "grad_norm": 0.5342990678808706, + "learning_rate": 3.932849475873438e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2209431231021881, + "step": 815, + "valid_targets_mean": 4677.4, + "valid_targets_min": 815 + }, + { + "epoch": 1.2312312312312312, + "grad_norm": 0.4783352437662895, + "learning_rate": 3.9309116590537266e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2841101884841919, + "step": 820, + "valid_targets_mean": 4398.6, + "valid_targets_min": 874 + }, + { + "epoch": 1.2387387387387387, + "grad_norm": 0.5066260881718098, + "learning_rate": 3.9289467691506125e-05, + "loss": 0.3159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2934754490852356, + "step": 825, + "valid_targets_mean": 4760.6, + "valid_targets_min": 670 + }, + { + "epoch": 1.2462462462462462, + "grad_norm": 0.6920092254384417, + "learning_rate": 3.926954833713585e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3148398697376251, + "step": 830, + "valid_targets_mean": 2882.6, + "valid_targets_min": 635 + }, + { + "epoch": 1.2537537537537538, + "grad_norm": 0.49074537646024496, + "learning_rate": 3.924935880671334e-05, + "loss": 0.3005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2520464062690735, + "step": 835, + "valid_targets_mean": 5137.5, + "valid_targets_min": 1027 + }, + { + "epoch": 1.2612612612612613, + "grad_norm": 0.6626501857332863, + "learning_rate": 3.922889938331361e-05, + "loss": 0.3003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38677817583084106, + "step": 840, + "valid_targets_mean": 3420.8, + "valid_targets_min": 627 + }, + { + "epoch": 1.2687687687687688, + "grad_norm": 0.5332552957049799, + "learning_rate": 3.920817035379581e-05, + "loss": 0.2804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30235785245895386, + "step": 845, + "valid_targets_mean": 4300.7, + "valid_targets_min": 645 + }, + { + "epoch": 1.2762762762762763, + "grad_norm": 0.5542167310126439, + "learning_rate": 3.9187172008799195e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3331342339515686, + "step": 850, + "valid_targets_mean": 4431.8, + "valid_targets_min": 382 + }, + { + "epoch": 1.2837837837837838, + "grad_norm": 0.47602297520673265, + "learning_rate": 3.9165904642739085e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24128374457359314, + "step": 855, + "valid_targets_mean": 5224.0, + "valid_targets_min": 565 + }, + { + "epoch": 1.2912912912912913, + "grad_norm": 0.4907394738004159, + "learning_rate": 3.914436855380269e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2902330458164215, + "step": 860, + "valid_targets_mean": 4872.4, + "valid_targets_min": 469 + }, + { + "epoch": 1.2987987987987988, + "grad_norm": 0.5780103792011559, + "learning_rate": 3.9122564043944956e-05, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.324124276638031, + "step": 865, + "valid_targets_mean": 4095.2, + "valid_targets_min": 836 + }, + { + "epoch": 1.3063063063063063, + "grad_norm": 1.4238637814086352, + "learning_rate": 3.910049141888433e-05, + "loss": 0.3061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3050238788127899, + "step": 870, + "valid_targets_mean": 5166.2, + "valid_targets_min": 716 + }, + { + "epoch": 1.3138138138138138, + "grad_norm": 0.49302688153022367, + "learning_rate": 3.907815098809849e-05, + "loss": 0.268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2744116187095642, + "step": 875, + "valid_targets_mean": 5715.9, + "valid_targets_min": 491 + }, + { + "epoch": 1.3213213213213213, + "grad_norm": 0.6362296442287568, + "learning_rate": 3.9055543064819945e-05, + "loss": 0.3095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28804486989974976, + "step": 880, + "valid_targets_mean": 3622.8, + "valid_targets_min": 642 + }, + { + "epoch": 1.3288288288288288, + "grad_norm": 0.627623374325793, + "learning_rate": 3.9032667966031704e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3453376293182373, + "step": 885, + "valid_targets_mean": 3533.0, + "valid_targets_min": 600 + }, + { + "epoch": 1.3363363363363363, + "grad_norm": 0.6493174026676456, + "learning_rate": 3.9009526012462834e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3329022228717804, + "step": 890, + "valid_targets_mean": 4443.5, + "valid_targets_min": 699 + }, + { + "epoch": 1.3438438438438438, + "grad_norm": 0.5328791757099103, + "learning_rate": 3.89861175285839e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2938082218170166, + "step": 895, + "valid_targets_mean": 4643.6, + "valid_targets_min": 674 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5404971152718483, + "learning_rate": 3.89624428426025e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3295516073703766, + "step": 900, + "valid_targets_mean": 5400.6, + "valid_targets_min": 431 + }, + { + "epoch": 1.3588588588588588, + "grad_norm": 0.5148482135727713, + "learning_rate": 3.8938502286458586e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2672591209411621, + "step": 905, + "valid_targets_mean": 4434.6, + "valid_targets_min": 414 + }, + { + "epoch": 1.3663663663663663, + "grad_norm": 0.46940700753823117, + "learning_rate": 3.891429619581986e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25686946511268616, + "step": 910, + "valid_targets_mean": 5001.9, + "valid_targets_min": 750 + }, + { + "epoch": 1.3738738738738738, + "grad_norm": 0.5588622944159107, + "learning_rate": 3.888982491007705e-05, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37934252619743347, + "step": 915, + "valid_targets_mean": 4426.3, + "valid_targets_min": 480 + }, + { + "epoch": 1.3813813813813813, + "grad_norm": 0.5032151155279457, + "learning_rate": 3.886508877233914e-05, + "loss": 0.3002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24294015765190125, + "step": 920, + "valid_targets_mean": 4203.2, + "valid_targets_min": 537 + }, + { + "epoch": 1.3888888888888888, + "grad_norm": 0.6375386060346272, + "learning_rate": 3.8840088129428594e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2843095660209656, + "step": 925, + "valid_targets_mean": 2762.1, + "valid_targets_min": 628 + }, + { + "epoch": 1.3963963963963963, + "grad_norm": 0.42954687639507877, + "learning_rate": 3.881482333187647e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24300193786621094, + "step": 930, + "valid_targets_mean": 6139.7, + "valid_targets_min": 1420 + }, + { + "epoch": 1.4039039039039038, + "grad_norm": 0.5288152668995841, + "learning_rate": 3.8789294733917486e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27250343561172485, + "step": 935, + "valid_targets_mean": 5227.9, + "valid_targets_min": 601 + }, + { + "epoch": 1.4114114114114114, + "grad_norm": 0.4018203487416013, + "learning_rate": 3.876350269348509e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2769063115119934, + "step": 940, + "valid_targets_mean": 7001.6, + "valid_targets_min": 791 + }, + { + "epoch": 1.4189189189189189, + "grad_norm": 0.4733738366398111, + "learning_rate": 3.873744757220642e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2413673847913742, + "step": 945, + "valid_targets_mean": 5955.6, + "valid_targets_min": 723 + }, + { + "epoch": 1.4264264264264264, + "grad_norm": 0.40178048986196624, + "learning_rate": 3.871112973539725e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22361788153648376, + "step": 950, + "valid_targets_mean": 5932.0, + "valid_targets_min": 684 + }, + { + "epoch": 1.4339339339339339, + "grad_norm": 0.5858661329948348, + "learning_rate": 3.868454955205685e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26762083172798157, + "step": 955, + "valid_targets_mean": 4612.1, + "valid_targets_min": 638 + }, + { + "epoch": 1.4414414414414414, + "grad_norm": 0.59370232296713, + "learning_rate": 3.86577073948628e-05, + "loss": 0.3087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.245170459151268, + "step": 960, + "valid_targets_mean": 4544.8, + "valid_targets_min": 700 + }, + { + "epoch": 1.4489489489489489, + "grad_norm": 0.5776834809155071, + "learning_rate": 3.8630603640165815e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3211957812309265, + "step": 965, + "valid_targets_mean": 3560.0, + "valid_targets_min": 462 + }, + { + "epoch": 1.4564564564564564, + "grad_norm": 0.5619424459014478, + "learning_rate": 3.860323866798443e-05, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21985271573066711, + "step": 970, + "valid_targets_mean": 4147.0, + "valid_targets_min": 964 + }, + { + "epoch": 1.4639639639639639, + "grad_norm": 0.5783533174246083, + "learning_rate": 3.857561286199968e-05, + "loss": 0.2969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29089707136154175, + "step": 975, + "valid_targets_mean": 3580.6, + "valid_targets_min": 489 + }, + { + "epoch": 1.4714714714714714, + "grad_norm": 0.4955776875207588, + "learning_rate": 3.85477266095497e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2684110999107361, + "step": 980, + "valid_targets_mean": 4121.5, + "valid_targets_min": 565 + }, + { + "epoch": 1.478978978978979, + "grad_norm": 0.5707278559095864, + "learning_rate": 3.851958030162433e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3071454167366028, + "step": 985, + "valid_targets_mean": 4583.5, + "valid_targets_min": 711 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.5349960494922307, + "learning_rate": 3.8491174332859625e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2876233160495758, + "step": 990, + "valid_targets_mean": 3913.2, + "valid_targets_min": 652 + }, + { + "epoch": 1.493993993993994, + "grad_norm": 0.7242433186973595, + "learning_rate": 3.84625091015323e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2717621624469757, + "step": 995, + "valid_targets_mean": 3196.1, + "valid_targets_min": 613 + }, + { + "epoch": 1.5015015015015014, + "grad_norm": 0.559821205756259, + "learning_rate": 3.843358500955416e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27973076701164246, + "step": 1000, + "valid_targets_mean": 3837.9, + "valid_targets_min": 875 + }, + { + "epoch": 1.509009009009009, + "grad_norm": 0.5377924424199211, + "learning_rate": 3.840440246246648e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2526509165763855, + "step": 1005, + "valid_targets_mean": 3364.9, + "valid_targets_min": 732 + }, + { + "epoch": 1.5165165165165164, + "grad_norm": 0.6037241854694765, + "learning_rate": 3.837496186943428e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3358380198478699, + "step": 1010, + "valid_targets_mean": 4278.9, + "valid_targets_min": 638 + }, + { + "epoch": 1.524024024024024, + "grad_norm": 0.4711675437570829, + "learning_rate": 3.8345263643240634e-05, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26968130469322205, + "step": 1015, + "valid_targets_mean": 5544.2, + "valid_targets_min": 648 + }, + { + "epoch": 1.5315315315315314, + "grad_norm": 0.5958428128963504, + "learning_rate": 3.831530820028084e-05, + "loss": 0.3192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32176512479782104, + "step": 1020, + "valid_targets_mean": 3855.1, + "valid_targets_min": 674 + }, + { + "epoch": 1.539039039039039, + "grad_norm": 0.4865056298850766, + "learning_rate": 3.828509596055662e-05, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2525503635406494, + "step": 1025, + "valid_targets_mean": 6991.5, + "valid_targets_min": 567 + }, + { + "epoch": 1.5465465465465464, + "grad_norm": 0.5566299314673824, + "learning_rate": 3.82546273476702e-05, + "loss": 0.3577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2959470748901367, + "step": 1030, + "valid_targets_mean": 4056.7, + "valid_targets_min": 528 + }, + { + "epoch": 1.554054054054054, + "grad_norm": 0.6266122143638768, + "learning_rate": 3.822390278881839e-05, + "loss": 0.2924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31994956731796265, + "step": 1035, + "valid_targets_mean": 3313.5, + "valid_targets_min": 870 + }, + { + "epoch": 1.5615615615615615, + "grad_norm": 0.5379754675133966, + "learning_rate": 3.819292271478659e-05, + "loss": 0.2853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26793667674064636, + "step": 1040, + "valid_targets_mean": 3280.4, + "valid_targets_min": 540 + }, + { + "epoch": 1.569069069069069, + "grad_norm": 0.42701565978810035, + "learning_rate": 3.816168755994274e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21840263903141022, + "step": 1045, + "valid_targets_mean": 4529.6, + "valid_targets_min": 904 + }, + { + "epoch": 1.5765765765765765, + "grad_norm": 0.5000336725002723, + "learning_rate": 3.813019776223124e-05, + "loss": 0.286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2662447690963745, + "step": 1050, + "valid_targets_mean": 4867.2, + "valid_targets_min": 988 + }, + { + "epoch": 1.584084084084084, + "grad_norm": 0.5648658240320987, + "learning_rate": 3.809845376316681e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29331260919570923, + "step": 1055, + "valid_targets_mean": 4341.2, + "valid_targets_min": 328 + }, + { + "epoch": 1.5915915915915915, + "grad_norm": 0.5284383529439995, + "learning_rate": 3.8066456007828304e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23197105526924133, + "step": 1060, + "valid_targets_mean": 3968.8, + "valid_targets_min": 389 + }, + { + "epoch": 1.599099099099099, + "grad_norm": 0.5216262376393843, + "learning_rate": 3.803420494485244e-05, + "loss": 0.273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597469687461853, + "step": 1065, + "valid_targets_mean": 4115.9, + "valid_targets_min": 579 + }, + { + "epoch": 1.6066066066066065, + "grad_norm": 0.4939695307516829, + "learning_rate": 3.800170102642757e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23144802451133728, + "step": 1070, + "valid_targets_mean": 3866.6, + "valid_targets_min": 486 + }, + { + "epoch": 1.614114114114114, + "grad_norm": 0.5829421644769698, + "learning_rate": 3.796894470828726e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.222642183303833, + "step": 1075, + "valid_targets_mean": 4570.8, + "valid_targets_min": 844 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.5335734411442947, + "learning_rate": 3.793593644970397e-05, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2972537577152252, + "step": 1080, + "valid_targets_mean": 4192.4, + "valid_targets_min": 648 + }, + { + "epoch": 1.629129129129129, + "grad_norm": 0.7509523435082093, + "learning_rate": 3.7902676713482584e-05, + "loss": 0.2991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3354663848876953, + "step": 1085, + "valid_targets_mean": 3534.2, + "valid_targets_min": 875 + }, + { + "epoch": 1.6366366366366365, + "grad_norm": 0.45680598867761874, + "learning_rate": 3.786916596595391e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2275020182132721, + "step": 1090, + "valid_targets_mean": 4664.6, + "valid_targets_min": 768 + }, + { + "epoch": 1.644144144144144, + "grad_norm": 0.614422278972368, + "learning_rate": 3.783540467696817e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28407660126686096, + "step": 1095, + "valid_targets_mean": 3863.8, + "valid_targets_min": 409 + }, + { + "epoch": 1.6516516516516515, + "grad_norm": 0.5074836388259119, + "learning_rate": 3.78013933198884e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23754659295082092, + "step": 1100, + "valid_targets_mean": 3914.6, + "valid_targets_min": 1027 + }, + { + "epoch": 1.659159159159159, + "grad_norm": 0.45212979425084815, + "learning_rate": 3.776713237158379e-05, + "loss": 0.2728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2909110188484192, + "step": 1105, + "valid_targets_mean": 5534.1, + "valid_targets_min": 655 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.6316240705989837, + "learning_rate": 3.773262231242305e-05, + "loss": 0.3053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2717238664627075, + "step": 1110, + "valid_targets_mean": 3312.1, + "valid_targets_min": 367 + }, + { + "epoch": 1.674174174174174, + "grad_norm": 0.5332254751800973, + "learning_rate": 3.7697863626267615e-05, + "loss": 0.3138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2853066623210907, + "step": 1115, + "valid_targets_mean": 3955.4, + "valid_targets_min": 731 + }, + { + "epoch": 1.6816816816816815, + "grad_norm": 0.6436721167931566, + "learning_rate": 3.7662856800464907e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33219900727272034, + "step": 1120, + "valid_targets_mean": 4150.2, + "valid_targets_min": 402 + }, + { + "epoch": 1.689189189189189, + "grad_norm": 0.41517090061996553, + "learning_rate": 3.762760232584148e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2333643138408661, + "step": 1125, + "valid_targets_mean": 5115.1, + "valid_targets_min": 841 + }, + { + "epoch": 1.6966966966966965, + "grad_norm": 0.5176992190887993, + "learning_rate": 3.7592100696696134e-05, + "loss": 0.2732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570965886116028, + "step": 1130, + "valid_targets_mean": 3668.6, + "valid_targets_min": 656 + }, + { + "epoch": 1.704204204204204, + "grad_norm": 0.511279913702578, + "learning_rate": 3.755635241079301e-05, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22817838191986084, + "step": 1135, + "valid_targets_mean": 3872.6, + "valid_targets_min": 475 + }, + { + "epoch": 1.7117117117117115, + "grad_norm": 0.6164706177166612, + "learning_rate": 3.7520357969354584e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3176250457763672, + "step": 1140, + "valid_targets_mean": 3526.4, + "valid_targets_min": 693 + }, + { + "epoch": 1.7192192192192193, + "grad_norm": 0.4109077325218178, + "learning_rate": 3.748411787705466e-05, + "loss": 0.3218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2506415843963623, + "step": 1145, + "valid_targets_mean": 5783.5, + "valid_targets_min": 524 + }, + { + "epoch": 1.7267267267267268, + "grad_norm": 0.47340788265525957, + "learning_rate": 3.744763264201125e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28487300872802734, + "step": 1150, + "valid_targets_mean": 5147.9, + "valid_targets_min": 311 + }, + { + "epoch": 1.7342342342342343, + "grad_norm": 0.6361764376110007, + "learning_rate": 3.741090277577953e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33571434020996094, + "step": 1155, + "valid_targets_mean": 3613.9, + "valid_targets_min": 633 + }, + { + "epoch": 1.7417417417417418, + "grad_norm": 0.7263029802880552, + "learning_rate": 3.73739287933446e-05, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30988189578056335, + "step": 1160, + "valid_targets_mean": 2780.0, + "valid_targets_min": 388 + }, + { + "epoch": 1.7492492492492493, + "grad_norm": 0.5312249305901402, + "learning_rate": 3.733671121311426e-05, + "loss": 0.2691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25757136940956116, + "step": 1165, + "valid_targets_mean": 4026.9, + "valid_targets_min": 605 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.5476708426420898, + "learning_rate": 3.729925055691182e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30963361263275146, + "step": 1170, + "valid_targets_mean": 4677.7, + "valid_targets_min": 434 + }, + { + "epoch": 1.7642642642642643, + "grad_norm": 0.613322007834183, + "learning_rate": 3.7261547349968675e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34018972516059875, + "step": 1175, + "valid_targets_mean": 3271.2, + "valid_targets_min": 407 + }, + { + "epoch": 1.7717717717717718, + "grad_norm": 0.6162258596946204, + "learning_rate": 3.722360212091703e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32241833209991455, + "step": 1180, + "valid_targets_mean": 3718.6, + "valid_targets_min": 466 + }, + { + "epoch": 1.7792792792792793, + "grad_norm": 0.664703409023323, + "learning_rate": 3.7185415401782434e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33108675479888916, + "step": 1185, + "valid_targets_mean": 5692.8, + "valid_targets_min": 702 + }, + { + "epoch": 1.7867867867867868, + "grad_norm": 0.5138638922358946, + "learning_rate": 3.7146987727976354e-05, + "loss": 0.2819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30338913202285767, + "step": 1190, + "valid_targets_mean": 5333.4, + "valid_targets_min": 752 + }, + { + "epoch": 1.7942942942942943, + "grad_norm": 0.4986654846084407, + "learning_rate": 3.7108319638288654e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2863334119319916, + "step": 1195, + "valid_targets_mean": 4185.2, + "valid_targets_min": 683 + }, + { + "epoch": 1.8018018018018018, + "grad_norm": 0.514824462976426, + "learning_rate": 3.706941167488002e-05, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3966107666492462, + "step": 1200, + "valid_targets_mean": 4997.3, + "valid_targets_min": 583 + }, + { + "epoch": 1.8093093093093093, + "grad_norm": 0.5002026479937768, + "learning_rate": 3.70302643832744e-05, + "loss": 0.3046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3028540015220642, + "step": 1205, + "valid_targets_mean": 4384.4, + "valid_targets_min": 646 + }, + { + "epoch": 1.8168168168168168, + "grad_norm": 0.5733662063497278, + "learning_rate": 3.699087831235133e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28234004974365234, + "step": 1210, + "valid_targets_mean": 3882.9, + "valid_targets_min": 781 + }, + { + "epoch": 1.8243243243243243, + "grad_norm": 0.49934816295444895, + "learning_rate": 3.6951254014338236e-05, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27083009481430054, + "step": 1215, + "valid_targets_mean": 4725.4, + "valid_targets_min": 497 + }, + { + "epoch": 1.8318318318318318, + "grad_norm": 0.732103811366184, + "learning_rate": 3.69113920448027e-05, + "loss": 0.2718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3054293394088745, + "step": 1220, + "valid_targets_mean": 2746.9, + "valid_targets_min": 478 + }, + { + "epoch": 1.8393393393393394, + "grad_norm": 0.6337417424539031, + "learning_rate": 3.687129296264468e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26185324788093567, + "step": 1225, + "valid_targets_mean": 4788.4, + "valid_targets_min": 695 + }, + { + "epoch": 1.8468468468468469, + "grad_norm": 0.5621683788362308, + "learning_rate": 3.683095733008864e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2606724202632904, + "step": 1230, + "valid_targets_mean": 3237.6, + "valid_targets_min": 534 + }, + { + "epoch": 1.8543543543543544, + "grad_norm": 0.5288214426455534, + "learning_rate": 3.6790385712675705e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3149155378341675, + "step": 1235, + "valid_targets_mean": 3973.2, + "valid_targets_min": 572 + }, + { + "epoch": 1.8618618618618619, + "grad_norm": 0.4813442560130786, + "learning_rate": 3.6749578679255715e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811282277107239, + "step": 1240, + "valid_targets_mean": 5313.8, + "valid_targets_min": 420 + }, + { + "epoch": 1.8693693693693694, + "grad_norm": 0.6170835431330234, + "learning_rate": 3.6708536801979246e-05, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32795318961143494, + "step": 1245, + "valid_targets_mean": 3692.1, + "valid_targets_min": 724 + }, + { + "epoch": 1.8768768768768769, + "grad_norm": 0.5375710532679772, + "learning_rate": 3.666726065628959e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23410853743553162, + "step": 1250, + "valid_targets_mean": 3626.6, + "valid_targets_min": 663 + }, + { + "epoch": 1.8843843843843844, + "grad_norm": 0.5400535513286914, + "learning_rate": 3.6625750820914694e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3105897903442383, + "step": 1255, + "valid_targets_mean": 3530.0, + "valid_targets_min": 400 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.7192704328062035, + "learning_rate": 3.658400787785903e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31259477138519287, + "step": 1260, + "valid_targets_mean": 2492.8, + "valid_targets_min": 367 + }, + { + "epoch": 1.8993993993993994, + "grad_norm": 0.5689420833987145, + "learning_rate": 3.654203241239546e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2669737935066223, + "step": 1265, + "valid_targets_mean": 5125.1, + "valid_targets_min": 766 + }, + { + "epoch": 1.906906906906907, + "grad_norm": 0.6991606704570577, + "learning_rate": 3.649982501305701e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3605828285217285, + "step": 1270, + "valid_targets_mean": 2681.3, + "valid_targets_min": 603 + }, + { + "epoch": 1.9144144144144144, + "grad_norm": 0.5136360235806671, + "learning_rate": 3.6457386271628615e-05, + "loss": 0.2815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559846043586731, + "step": 1275, + "valid_targets_mean": 4230.4, + "valid_targets_min": 467 + }, + { + "epoch": 1.921921921921922, + "grad_norm": 0.5755425636039267, + "learning_rate": 3.641471678313883e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32143378257751465, + "step": 1280, + "valid_targets_mean": 4531.5, + "valid_targets_min": 723 + }, + { + "epoch": 1.9294294294294294, + "grad_norm": 0.5172980766220905, + "learning_rate": 3.6371817145851514e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3124349117279053, + "step": 1285, + "valid_targets_mean": 5154.6, + "valid_targets_min": 690 + }, + { + "epoch": 1.936936936936937, + "grad_norm": 0.5579160547269432, + "learning_rate": 3.6328687961257374e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23261310160160065, + "step": 1290, + "valid_targets_mean": 3315.2, + "valid_targets_min": 432 + }, + { + "epoch": 1.9444444444444444, + "grad_norm": 0.471485760499144, + "learning_rate": 3.62853298340656e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23489312827587128, + "step": 1295, + "valid_targets_mean": 4736.7, + "valid_targets_min": 689 + }, + { + "epoch": 1.951951951951952, + "grad_norm": 0.45892772148849675, + "learning_rate": 3.624174337219536e-05, + "loss": 0.2919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23488348722457886, + "step": 1300, + "valid_targets_mean": 4925.0, + "valid_targets_min": 602 + }, + { + "epoch": 1.9594594594594594, + "grad_norm": 0.6569352257148999, + "learning_rate": 3.6197929186767255e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26401716470718384, + "step": 1305, + "valid_targets_mean": 3648.6, + "valid_targets_min": 358 + }, + { + "epoch": 1.966966966966967, + "grad_norm": 0.47233655136122743, + "learning_rate": 3.6153887892094786e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2689140737056732, + "step": 1310, + "valid_targets_mean": 4251.4, + "valid_targets_min": 521 + }, + { + "epoch": 1.9744744744744744, + "grad_norm": 0.4937344312534394, + "learning_rate": 3.6109620105675734e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564142346382141, + "step": 1315, + "valid_targets_mean": 4050.1, + "valid_targets_min": 846 + }, + { + "epoch": 1.981981981981982, + "grad_norm": 0.5040985465764987, + "learning_rate": 3.606512644818347e-05, + "loss": 0.2959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27598607540130615, + "step": 1320, + "valid_targets_mean": 5606.7, + "valid_targets_min": 561 + }, + { + "epoch": 1.9894894894894894, + "grad_norm": 0.503656514625708, + "learning_rate": 3.6020407543458295e-05, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24646782875061035, + "step": 1325, + "valid_targets_mean": 4084.1, + "valid_targets_min": 542 + }, + { + "epoch": 1.996996996996997, + "grad_norm": 0.48663621599509704, + "learning_rate": 3.597546401849866e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3501949608325958, + "step": 1330, + "valid_targets_mean": 6161.8, + "valid_targets_min": 829 + }, + { + "epoch": 2.0045045045045047, + "grad_norm": 0.4886145150612323, + "learning_rate": 3.593029650345242e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2267410159111023, + "step": 1335, + "valid_targets_mean": 4201.8, + "valid_targets_min": 585 + }, + { + "epoch": 2.012012012012012, + "grad_norm": 0.918223183885553, + "learning_rate": 3.588490563160793e-05, + "loss": 0.263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20230059325695038, + "step": 1340, + "valid_targets_mean": 3759.8, + "valid_targets_min": 668 + }, + { + "epoch": 2.0195195195195197, + "grad_norm": 0.41635246230968925, + "learning_rate": 3.583929203938523e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20971567928791046, + "step": 1345, + "valid_targets_mean": 6513.2, + "valid_targets_min": 506 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.5324224076323132, + "learning_rate": 3.5793456366327074e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2577601969242096, + "step": 1350, + "valid_targets_mean": 4199.0, + "valid_targets_min": 611 + }, + { + "epoch": 2.0345345345345347, + "grad_norm": 0.666399334701293, + "learning_rate": 3.5747399255090016e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25944650173187256, + "step": 1355, + "valid_targets_mean": 3050.4, + "valid_targets_min": 513 + }, + { + "epoch": 2.042042042042042, + "grad_norm": 0.5560230204936001, + "learning_rate": 3.5701121351435346e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30907580256462097, + "step": 1360, + "valid_targets_mean": 4342.1, + "valid_targets_min": 483 + }, + { + "epoch": 2.0495495495495497, + "grad_norm": 0.5093455715924559, + "learning_rate": 3.565462330422006e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23613181710243225, + "step": 1365, + "valid_targets_mean": 4160.1, + "valid_targets_min": 624 + }, + { + "epoch": 2.057057057057057, + "grad_norm": 0.5143252526017971, + "learning_rate": 3.560790576538777e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28200429677963257, + "step": 1370, + "valid_targets_mean": 5243.5, + "valid_targets_min": 529 + }, + { + "epoch": 2.0645645645645647, + "grad_norm": 0.6420496487933931, + "learning_rate": 3.556096938995953e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3083629906177521, + "step": 1375, + "valid_targets_mean": 3444.8, + "valid_targets_min": 474 + }, + { + "epoch": 2.0720720720720722, + "grad_norm": 0.6041025909633767, + "learning_rate": 3.551381483602472e-05, + "loss": 0.3043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34277862310409546, + "step": 1380, + "valid_targets_mean": 5168.2, + "valid_targets_min": 656 + }, + { + "epoch": 2.0795795795795797, + "grad_norm": 0.5065151180208464, + "learning_rate": 3.546644276473174e-05, + "loss": 0.2703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22405189275741577, + "step": 1385, + "valid_targets_mean": 4451.1, + "valid_targets_min": 653 + }, + { + "epoch": 2.0870870870870872, + "grad_norm": 0.4653600091882295, + "learning_rate": 3.541885384027877e-05, + "loss": 0.2867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23494037985801697, + "step": 1390, + "valid_targets_mean": 4773.1, + "valid_targets_min": 515 + }, + { + "epoch": 2.0945945945945947, + "grad_norm": 0.4737552542155004, + "learning_rate": 3.53710487299045e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20185862481594086, + "step": 1395, + "valid_targets_mean": 4372.3, + "valid_targets_min": 656 + }, + { + "epoch": 2.1021021021021022, + "grad_norm": 0.618244462622168, + "learning_rate": 3.5323028103878695e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28137439489364624, + "step": 1400, + "valid_targets_mean": 3066.6, + "valid_targets_min": 558 + }, + { + "epoch": 2.1096096096096097, + "grad_norm": 0.6276854706539091, + "learning_rate": 3.527479263549287e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24222517013549805, + "step": 1405, + "valid_targets_mean": 3623.4, + "valid_targets_min": 562 + }, + { + "epoch": 2.1171171171171173, + "grad_norm": 0.5119888962756797, + "learning_rate": 3.52263430010508e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24120667576789856, + "step": 1410, + "valid_targets_mean": 4184.2, + "valid_targets_min": 571 + }, + { + "epoch": 2.1246246246246248, + "grad_norm": 0.40193152670779986, + "learning_rate": 3.517767987985906e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21013514697551727, + "step": 1415, + "valid_targets_mean": 7112.9, + "valid_targets_min": 958 + }, + { + "epoch": 2.1321321321321323, + "grad_norm": 0.6042469303834141, + "learning_rate": 3.5128803954217516e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24615558981895447, + "step": 1420, + "valid_targets_mean": 3546.8, + "valid_targets_min": 647 + }, + { + "epoch": 2.1396396396396398, + "grad_norm": 0.517598650535331, + "learning_rate": 3.507971590940972e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2518162727355957, + "step": 1425, + "valid_targets_mean": 4095.2, + "valid_targets_min": 700 + }, + { + "epoch": 2.1471471471471473, + "grad_norm": 0.5314103976572915, + "learning_rate": 3.503041643369332e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29391518235206604, + "step": 1430, + "valid_targets_mean": 4662.9, + "valid_targets_min": 673 + }, + { + "epoch": 2.1546546546546548, + "grad_norm": 0.7994874645886633, + "learning_rate": 3.4980906218290425e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24668291211128235, + "step": 1435, + "valid_targets_mean": 4393.5, + "valid_targets_min": 639 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.5508650913511237, + "learning_rate": 3.493118595737789e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26784124970436096, + "step": 1440, + "valid_targets_mean": 3928.2, + "valid_targets_min": 482 + }, + { + "epoch": 2.16966966966967, + "grad_norm": 0.48256368285307105, + "learning_rate": 3.4881256348077606e-05, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2078143209218979, + "step": 1445, + "valid_targets_mean": 4895.9, + "valid_targets_min": 605 + }, + { + "epoch": 2.1771771771771773, + "grad_norm": 0.5518092824723564, + "learning_rate": 3.4831118090446695e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27934592962265015, + "step": 1450, + "valid_targets_mean": 4175.9, + "valid_targets_min": 557 + }, + { + "epoch": 2.184684684684685, + "grad_norm": 0.6510442042007786, + "learning_rate": 3.478077188746774e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25130707025527954, + "step": 1455, + "valid_targets_mean": 2940.2, + "valid_targets_min": 746 + }, + { + "epoch": 2.1921921921921923, + "grad_norm": 0.6653821572090174, + "learning_rate": 3.473021844503885e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29144030809402466, + "step": 1460, + "valid_targets_mean": 3176.5, + "valid_targets_min": 466 + }, + { + "epoch": 2.1996996996997, + "grad_norm": 0.5835363387672817, + "learning_rate": 3.4679458471963876e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2668296694755554, + "step": 1465, + "valid_targets_mean": 3661.9, + "valid_targets_min": 657 + }, + { + "epoch": 2.2072072072072073, + "grad_norm": 0.473510630048058, + "learning_rate": 3.462849267994237e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2349366545677185, + "step": 1470, + "valid_targets_mean": 4610.1, + "valid_targets_min": 583 + }, + { + "epoch": 2.214714714714715, + "grad_norm": 0.5034796149297331, + "learning_rate": 3.457732178355966e-05, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22843249142169952, + "step": 1475, + "valid_targets_mean": 4560.3, + "valid_targets_min": 870 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.6628701647407635, + "learning_rate": 3.4525946500276815e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33600878715515137, + "step": 1480, + "valid_targets_mean": 4715.0, + "valid_targets_min": 409 + }, + { + "epoch": 2.22972972972973, + "grad_norm": 0.4438069409661387, + "learning_rate": 3.447436755042059e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23582454025745392, + "step": 1485, + "valid_targets_mean": 6454.2, + "valid_targets_min": 720 + }, + { + "epoch": 2.2372372372372373, + "grad_norm": 0.5589297614395898, + "learning_rate": 3.442258565717333e-05, + "loss": 0.2906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3400152325630188, + "step": 1490, + "valid_targets_mean": 5422.4, + "valid_targets_min": 928 + }, + { + "epoch": 2.244744744744745, + "grad_norm": 0.5331121771902878, + "learning_rate": 3.437060154656281e-05, + "loss": 0.2911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31767523288726807, + "step": 1495, + "valid_targets_mean": 4458.2, + "valid_targets_min": 291 + }, + { + "epoch": 2.2522522522522523, + "grad_norm": 0.40712934057436334, + "learning_rate": 3.4318415947452085e-05, + "loss": 0.252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28288692235946655, + "step": 1500, + "valid_targets_mean": 6532.0, + "valid_targets_min": 648 + }, + { + "epoch": 2.25975975975976, + "grad_norm": 0.45673386040732317, + "learning_rate": 3.4266029591529246e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2521343529224396, + "step": 1505, + "valid_targets_mean": 5607.8, + "valid_targets_min": 725 + }, + { + "epoch": 2.2672672672672673, + "grad_norm": 0.6013211253935284, + "learning_rate": 3.421344321329719e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28024786710739136, + "step": 1510, + "valid_targets_mean": 3546.8, + "valid_targets_min": 515 + }, + { + "epoch": 2.274774774774775, + "grad_norm": 0.9210774515682121, + "learning_rate": 3.4160657550063286e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2434246838092804, + "step": 1515, + "valid_targets_mean": 3957.6, + "valid_targets_min": 444 + }, + { + "epoch": 2.2822822822822824, + "grad_norm": 0.5034622925230656, + "learning_rate": 3.410767334192905e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2538553476333618, + "step": 1520, + "valid_targets_mean": 4692.2, + "valid_targets_min": 605 + }, + { + "epoch": 2.28978978978979, + "grad_norm": 0.6140827528489169, + "learning_rate": 3.40544913317798e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2580946087837219, + "step": 1525, + "valid_targets_mean": 3429.3, + "valid_targets_min": 549 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.4522744249612033, + "learning_rate": 3.400111226527417e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22649413347244263, + "step": 1530, + "valid_targets_mean": 5068.2, + "valid_targets_min": 731 + }, + { + "epoch": 2.304804804804805, + "grad_norm": 0.49640935192095975, + "learning_rate": 3.3947536890833744e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515903115272522, + "step": 1535, + "valid_targets_mean": 4226.1, + "valid_targets_min": 726 + }, + { + "epoch": 2.3123123123123124, + "grad_norm": 1.016722378855143, + "learning_rate": 3.389376595963248e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40263426303863525, + "step": 1540, + "valid_targets_mean": 3808.3, + "valid_targets_min": 756 + }, + { + "epoch": 2.31981981981982, + "grad_norm": 0.520047683970159, + "learning_rate": 3.3839800225586233e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24709469079971313, + "step": 1545, + "valid_targets_mean": 4321.6, + "valid_targets_min": 663 + }, + { + "epoch": 2.3273273273273274, + "grad_norm": 0.4955591390887456, + "learning_rate": 3.378564044534216e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24357634782791138, + "step": 1550, + "valid_targets_mean": 4470.6, + "valid_targets_min": 670 + }, + { + "epoch": 2.334834834834835, + "grad_norm": 0.5304638752162832, + "learning_rate": 3.373128737826812e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27869170904159546, + "step": 1555, + "valid_targets_mean": 3793.8, + "valid_targets_min": 252 + }, + { + "epoch": 2.3423423423423424, + "grad_norm": 0.6052044405674071, + "learning_rate": 3.367674178644201e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2955855429172516, + "step": 1560, + "valid_targets_mean": 3434.8, + "valid_targets_min": 662 + }, + { + "epoch": 2.34984984984985, + "grad_norm": 0.45641925043102494, + "learning_rate": 3.362200443464112e-05, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25648972392082214, + "step": 1565, + "valid_targets_mean": 5664.2, + "valid_targets_min": 487 + }, + { + "epoch": 2.3573573573573574, + "grad_norm": 0.4104470454624804, + "learning_rate": 3.356707609033136e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1871146857738495, + "step": 1570, + "valid_targets_mean": 5469.4, + "valid_targets_min": 650 + }, + { + "epoch": 2.364864864864865, + "grad_norm": 0.5305663319530391, + "learning_rate": 3.351195752365654e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20778313279151917, + "step": 1575, + "valid_targets_mean": 3653.4, + "valid_targets_min": 548 + }, + { + "epoch": 2.3723723723723724, + "grad_norm": 0.952177731638365, + "learning_rate": 3.3456649507427537e-05, + "loss": 0.2785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30603039264678955, + "step": 1580, + "valid_targets_mean": 3391.8, + "valid_targets_min": 541 + }, + { + "epoch": 2.37987987987988, + "grad_norm": 0.625558644174225, + "learning_rate": 3.340115281711149e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2915250360965729, + "step": 1585, + "valid_targets_mean": 3354.9, + "valid_targets_min": 376 + }, + { + "epoch": 2.3873873873873874, + "grad_norm": 0.4383965531441897, + "learning_rate": 3.3345468230820924e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20519056916236877, + "step": 1590, + "valid_targets_mean": 5249.9, + "valid_targets_min": 576 + }, + { + "epoch": 2.394894894894895, + "grad_norm": 0.7263449171228276, + "learning_rate": 3.32895965293028e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24948550760746002, + "step": 1595, + "valid_targets_mean": 3813.8, + "valid_targets_min": 700 + }, + { + "epoch": 2.4024024024024024, + "grad_norm": 0.7704936140888641, + "learning_rate": 3.323353849592764e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3838420808315277, + "step": 1600, + "valid_targets_mean": 4764.8, + "valid_targets_min": 930 + }, + { + "epoch": 2.40990990990991, + "grad_norm": 0.5262208670623167, + "learning_rate": 3.317729491667848e-05, + "loss": 0.2854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26990848779678345, + "step": 1605, + "valid_targets_mean": 4314.4, + "valid_targets_min": 728 + }, + { + "epoch": 2.4174174174174174, + "grad_norm": 0.4905094743952616, + "learning_rate": 3.3120866580139856e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24697014689445496, + "step": 1610, + "valid_targets_mean": 5779.1, + "valid_targets_min": 863 + }, + { + "epoch": 2.424924924924925, + "grad_norm": 0.5795672522700869, + "learning_rate": 3.306425427748681e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3122454881668091, + "step": 1615, + "valid_targets_mean": 4678.4, + "valid_targets_min": 597 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.5127623781647497, + "learning_rate": 3.3007458802473705e-05, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2780628502368927, + "step": 1620, + "valid_targets_mean": 5270.1, + "valid_targets_min": 408 + }, + { + "epoch": 2.43993993993994, + "grad_norm": 0.6000042814068866, + "learning_rate": 3.2950480951423166e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26539573073387146, + "step": 1625, + "valid_targets_mean": 2969.9, + "valid_targets_min": 292 + }, + { + "epoch": 2.4474474474474475, + "grad_norm": 0.5601820602459276, + "learning_rate": 3.289332152321489e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34554967284202576, + "step": 1630, + "valid_targets_mean": 4328.4, + "valid_targets_min": 570 + }, + { + "epoch": 2.454954954954955, + "grad_norm": 0.5017335839364364, + "learning_rate": 3.2835981319274436e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31424376368522644, + "step": 1635, + "valid_targets_mean": 4981.0, + "valid_targets_min": 513 + }, + { + "epoch": 2.4624624624624625, + "grad_norm": 0.5989444157971933, + "learning_rate": 3.277846114356201e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29167452454566956, + "step": 1640, + "valid_targets_mean": 4280.9, + "valid_targets_min": 583 + }, + { + "epoch": 2.46996996996997, + "grad_norm": 0.4818252524686284, + "learning_rate": 3.272076180256117e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2451312243938446, + "step": 1645, + "valid_targets_mean": 5266.5, + "valid_targets_min": 456 + }, + { + "epoch": 2.4774774774774775, + "grad_norm": 0.5474083922971454, + "learning_rate": 3.2662884105267545e-05, + "loss": 0.2802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28529974818229675, + "step": 1650, + "valid_targets_mean": 4656.7, + "valid_targets_min": 323 + }, + { + "epoch": 2.484984984984985, + "grad_norm": 0.3927761927981878, + "learning_rate": 3.260482886317745e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2354969084262848, + "step": 1655, + "valid_targets_mean": 6819.5, + "valid_targets_min": 769 + }, + { + "epoch": 2.4924924924924925, + "grad_norm": 0.5095139881585496, + "learning_rate": 3.254659689027656e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25443506240844727, + "step": 1660, + "valid_targets_mean": 4691.8, + "valid_targets_min": 754 + }, + { + "epoch": 2.5, + "grad_norm": 0.48122470806496775, + "learning_rate": 3.2488189003028456e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25341033935546875, + "step": 1665, + "valid_targets_mean": 5473.4, + "valid_targets_min": 667 + }, + { + "epoch": 2.5075075075075075, + "grad_norm": 0.535101793490416, + "learning_rate": 3.24296060203632e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29341810941696167, + "step": 1670, + "valid_targets_mean": 4545.0, + "valid_targets_min": 727 + }, + { + "epoch": 2.515015015015015, + "grad_norm": 0.5407510878775967, + "learning_rate": 3.2370848763665854e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.298563152551651, + "step": 1675, + "valid_targets_mean": 4343.1, + "valid_targets_min": 487 + }, + { + "epoch": 2.5225225225225225, + "grad_norm": 0.7328443730397846, + "learning_rate": 3.231191805676494e-05, + "loss": 0.2687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24576136469841003, + "step": 1680, + "valid_targets_mean": 4246.7, + "valid_targets_min": 381 + }, + { + "epoch": 2.53003003003003, + "grad_norm": 0.4953608748768144, + "learning_rate": 3.22528147259209e-05, + "loss": 0.2894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3240085542201996, + "step": 1685, + "valid_targets_mean": 5762.2, + "valid_targets_min": 734 + }, + { + "epoch": 2.5375375375375375, + "grad_norm": 0.6439990940182198, + "learning_rate": 3.2193539599814546e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2948322892189026, + "step": 1690, + "valid_targets_mean": 3211.7, + "valid_targets_min": 785 + }, + { + "epoch": 2.545045045045045, + "grad_norm": 0.5649997969615153, + "learning_rate": 3.213409350953539e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2883799374103546, + "step": 1695, + "valid_targets_mean": 3863.2, + "valid_targets_min": 655 + }, + { + "epoch": 2.5525525525525525, + "grad_norm": 0.6654178682497041, + "learning_rate": 3.207447728857e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2643584907054901, + "step": 1700, + "valid_targets_mean": 3119.7, + "valid_targets_min": 557 + }, + { + "epoch": 2.56006006006006, + "grad_norm": 0.6865303086852059, + "learning_rate": 3.201469177279036e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27196943759918213, + "step": 1705, + "valid_targets_mean": 2167.2, + "valid_targets_min": 570 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.5565838052443746, + "learning_rate": 3.195473780044208e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2557700276374817, + "step": 1710, + "valid_targets_mean": 3659.3, + "valid_targets_min": 446 + }, + { + "epoch": 2.575075075075075, + "grad_norm": 0.5197442676457078, + "learning_rate": 3.189461621213271e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2678113579750061, + "step": 1715, + "valid_targets_mean": 4663.4, + "valid_targets_min": 609 + }, + { + "epoch": 2.5825825825825826, + "grad_norm": 0.5166944613990696, + "learning_rate": 3.18343278508199e-05, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24757936596870422, + "step": 1720, + "valid_targets_mean": 4511.7, + "valid_targets_min": 460 + }, + { + "epoch": 2.59009009009009, + "grad_norm": 0.4761985812024692, + "learning_rate": 3.177387356179962e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20293045043945312, + "step": 1725, + "valid_targets_mean": 4317.4, + "valid_targets_min": 477 + }, + { + "epoch": 2.5975975975975976, + "grad_norm": 0.5529954148422914, + "learning_rate": 3.1713254192694284e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24401406943798065, + "step": 1730, + "valid_targets_mean": 3834.1, + "valid_targets_min": 610 + }, + { + "epoch": 2.605105105105105, + "grad_norm": 0.4955364019562657, + "learning_rate": 3.1652470593440885e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22095078229904175, + "step": 1735, + "valid_targets_mean": 5501.8, + "valid_targets_min": 681 + }, + { + "epoch": 2.6126126126126126, + "grad_norm": 0.45641687982187235, + "learning_rate": 3.1591523616279055e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20338258147239685, + "step": 1740, + "valid_targets_mean": 4772.7, + "valid_targets_min": 700 + }, + { + "epoch": 2.62012012012012, + "grad_norm": 0.6226280659286444, + "learning_rate": 3.153041411573914e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3405519723892212, + "step": 1745, + "valid_targets_mean": 4217.1, + "valid_targets_min": 518 + }, + { + "epoch": 2.6276276276276276, + "grad_norm": 0.6357283334230661, + "learning_rate": 3.1469142948630194e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27663713693618774, + "step": 1750, + "valid_targets_mean": 3277.6, + "valid_targets_min": 416 + }, + { + "epoch": 2.635135135135135, + "grad_norm": 0.5459413685496709, + "learning_rate": 3.140771097402798e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.257800430059433, + "step": 1755, + "valid_targets_mean": 4285.2, + "valid_targets_min": 485 + }, + { + "epoch": 2.6426426426426426, + "grad_norm": 0.5121436251283435, + "learning_rate": 3.134611905326295e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2595219910144806, + "step": 1760, + "valid_targets_mean": 4998.1, + "valid_targets_min": 900 + }, + { + "epoch": 2.65015015015015, + "grad_norm": 0.5290785372037086, + "learning_rate": 3.12843680499081e-05, + "loss": 0.2823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.380996972322464, + "step": 1765, + "valid_targets_mean": 5265.2, + "valid_targets_min": 668 + }, + { + "epoch": 2.6576576576576576, + "grad_norm": 0.7048181374115327, + "learning_rate": 3.122245882976693e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2908703684806824, + "step": 1770, + "valid_targets_mean": 2736.2, + "valid_targets_min": 909 + }, + { + "epoch": 2.665165165165165, + "grad_norm": 0.47236135012538977, + "learning_rate": 3.116039226086128e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30878859758377075, + "step": 1775, + "valid_targets_mean": 5567.8, + "valid_targets_min": 635 + }, + { + "epoch": 2.6726726726726726, + "grad_norm": 0.5445102500932314, + "learning_rate": 3.1098169213419155e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2582714259624481, + "step": 1780, + "valid_targets_mean": 3773.7, + "valid_targets_min": 376 + }, + { + "epoch": 2.68018018018018, + "grad_norm": 0.4265129001988632, + "learning_rate": 3.103579055986251e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2143097072839737, + "step": 1785, + "valid_targets_mean": 5390.5, + "valid_targets_min": 704 + }, + { + "epoch": 2.6876876876876876, + "grad_norm": 0.5312743639290497, + "learning_rate": 3.0973257174795074e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3563080430030823, + "step": 1790, + "valid_targets_mean": 4896.1, + "valid_targets_min": 666 + }, + { + "epoch": 2.695195195195195, + "grad_norm": 0.46913349560934214, + "learning_rate": 3.0910569934990004e-05, + "loss": 0.2995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22366014122962952, + "step": 1795, + "valid_targets_mean": 5595.9, + "valid_targets_min": 764 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4542845059111592, + "learning_rate": 3.084772971937766e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.238237202167511, + "step": 1800, + "valid_targets_mean": 4653.6, + "valid_targets_min": 745 + }, + { + "epoch": 2.71021021021021, + "grad_norm": 0.5378230783334604, + "learning_rate": 3.078473740903325e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30502867698669434, + "step": 1805, + "valid_targets_mean": 4299.5, + "valid_targets_min": 755 + }, + { + "epoch": 2.7177177177177176, + "grad_norm": 0.5591431243642122, + "learning_rate": 3.07215938871645e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.232755646109581, + "step": 1810, + "valid_targets_mean": 3461.2, + "valid_targets_min": 744 + }, + { + "epoch": 2.725225225225225, + "grad_norm": 0.6091187770028943, + "learning_rate": 3.0658300039099235e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2591820955276489, + "step": 1815, + "valid_targets_mean": 3384.4, + "valid_targets_min": 685 + }, + { + "epoch": 2.7327327327327327, + "grad_norm": 0.6447111768897391, + "learning_rate": 3.0594856752272987e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25599902868270874, + "step": 1820, + "valid_targets_mean": 3581.4, + "valid_targets_min": 657 + }, + { + "epoch": 2.74024024024024, + "grad_norm": 0.44894238539094306, + "learning_rate": 3.053126491621657e-05, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2371259331703186, + "step": 1825, + "valid_targets_mean": 4889.4, + "valid_targets_min": 511 + }, + { + "epoch": 2.7477477477477477, + "grad_norm": 0.5214275290965238, + "learning_rate": 3.046752542254357e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22412139177322388, + "step": 1830, + "valid_targets_mean": 3863.1, + "valid_targets_min": 670 + }, + { + "epoch": 2.755255255255255, + "grad_norm": 0.5573497856924043, + "learning_rate": 3.0403639164937865e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24575021862983704, + "step": 1835, + "valid_targets_mean": 3198.3, + "valid_targets_min": 461 + }, + { + "epoch": 2.7627627627627627, + "grad_norm": 0.47880699639373114, + "learning_rate": 3.0339607039141107e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25173407793045044, + "step": 1840, + "valid_targets_mean": 4396.7, + "valid_targets_min": 277 + }, + { + "epoch": 2.77027027027027, + "grad_norm": 0.5293345660103224, + "learning_rate": 3.027542994294013e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24035535752773285, + "step": 1845, + "valid_targets_mean": 3506.6, + "valid_targets_min": 562 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5030154656400301, + "learning_rate": 3.0211108776154394e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24871063232421875, + "step": 1850, + "valid_targets_mean": 4511.1, + "valid_targets_min": 654 + }, + { + "epoch": 2.785285285285285, + "grad_norm": 0.4172382663816932, + "learning_rate": 3.0146644440623355e-05, + "loss": 0.2676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22848474979400635, + "step": 1855, + "valid_targets_mean": 5617.8, + "valid_targets_min": 761 + }, + { + "epoch": 2.7927927927927927, + "grad_norm": 0.6045475468870822, + "learning_rate": 3.0082037840193812e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26820242404937744, + "step": 1860, + "valid_targets_mean": 3179.9, + "valid_targets_min": 612 + }, + { + "epoch": 2.8003003003003, + "grad_norm": 0.5418178665003754, + "learning_rate": 3.001728988070724e-05, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3145066499710083, + "step": 1865, + "valid_targets_mean": 4837.1, + "valid_targets_min": 709 + }, + { + "epoch": 2.8078078078078077, + "grad_norm": 0.5134558903404736, + "learning_rate": 2.995240146998712e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21890783309936523, + "step": 1870, + "valid_targets_mean": 4246.1, + "valid_targets_min": 536 + }, + { + "epoch": 2.815315315315315, + "grad_norm": 0.583571397927643, + "learning_rate": 2.9887373517826153e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27330344915390015, + "step": 1875, + "valid_targets_mean": 3632.4, + "valid_targets_min": 630 + }, + { + "epoch": 2.8228228228228227, + "grad_norm": 0.47961940508462947, + "learning_rate": 2.9822206935973548e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2063194215297699, + "step": 1880, + "valid_targets_mean": 4621.7, + "valid_targets_min": 492 + }, + { + "epoch": 2.83033033033033, + "grad_norm": 0.5920736352732671, + "learning_rate": 2.9756902638122213e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23864233493804932, + "step": 1885, + "valid_targets_mean": 4123.9, + "valid_targets_min": 629 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.5565272788823533, + "learning_rate": 2.969146153989598e-05, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22772765159606934, + "step": 1890, + "valid_targets_mean": 3277.6, + "valid_targets_min": 713 + }, + { + "epoch": 2.8453453453453452, + "grad_norm": 0.6239789579469722, + "learning_rate": 2.9625884558836716e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26187804341316223, + "step": 1895, + "valid_targets_mean": 2902.2, + "valid_targets_min": 503 + }, + { + "epoch": 2.8528528528528527, + "grad_norm": 0.5680461426104465, + "learning_rate": 2.95601726143915e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3167528510093689, + "step": 1900, + "valid_targets_mean": 4054.1, + "valid_targets_min": 697 + }, + { + "epoch": 2.8603603603603602, + "grad_norm": 0.45456838507650504, + "learning_rate": 2.949432662789971e-05, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29696983098983765, + "step": 1905, + "valid_targets_mean": 6072.6, + "valid_targets_min": 598 + }, + { + "epoch": 2.8678678678678677, + "grad_norm": 0.496183629571747, + "learning_rate": 2.942834752258012e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21913406252861023, + "step": 1910, + "valid_targets_mean": 4109.5, + "valid_targets_min": 879 + }, + { + "epoch": 2.8753753753753752, + "grad_norm": 0.5334630488557025, + "learning_rate": 2.936223622351794e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31040453910827637, + "step": 1915, + "valid_targets_mean": 4864.2, + "valid_targets_min": 328 + }, + { + "epoch": 2.8828828828828827, + "grad_norm": 0.5005858977972606, + "learning_rate": 2.929599365765185e-05, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31291884183883667, + "step": 1920, + "valid_targets_mean": 5317.4, + "valid_targets_min": 547 + }, + { + "epoch": 2.8903903903903903, + "grad_norm": 0.4974443542961955, + "learning_rate": 2.9229620753761013e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.301303505897522, + "step": 1925, + "valid_targets_mean": 4499.8, + "valid_targets_min": 625 + }, + { + "epoch": 2.8978978978978978, + "grad_norm": 0.5691190427521686, + "learning_rate": 2.9163118442452046e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27089810371398926, + "step": 1930, + "valid_targets_mean": 4097.7, + "valid_targets_min": 667 + }, + { + "epoch": 2.9054054054054053, + "grad_norm": 0.4198119186033015, + "learning_rate": 2.9096487656145968e-05, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24759484827518463, + "step": 1935, + "valid_targets_mean": 6335.4, + "valid_targets_min": 564 + }, + { + "epoch": 2.9129129129129128, + "grad_norm": 0.5531720093912547, + "learning_rate": 2.9029729329065134e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23836839199066162, + "step": 1940, + "valid_targets_mean": 3271.4, + "valid_targets_min": 510 + }, + { + "epoch": 2.9204204204204203, + "grad_norm": 0.4289864597777698, + "learning_rate": 2.896284439722013e-05, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2916072607040405, + "step": 1945, + "valid_targets_mean": 6052.6, + "valid_targets_min": 466 + }, + { + "epoch": 2.9279279279279278, + "grad_norm": 0.47685929908340674, + "learning_rate": 2.8895833798396657e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22440025210380554, + "step": 1950, + "valid_targets_mean": 4654.8, + "valid_targets_min": 550 + }, + { + "epoch": 2.9354354354354353, + "grad_norm": 0.4511746235008368, + "learning_rate": 2.882869847214237e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2768084406852722, + "step": 1955, + "valid_targets_mean": 6411.6, + "valid_targets_min": 648 + }, + { + "epoch": 2.942942942942943, + "grad_norm": 0.3897834130570682, + "learning_rate": 2.876143935975373e-05, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23787030577659607, + "step": 1960, + "valid_targets_mean": 6063.3, + "valid_targets_min": 668 + }, + { + "epoch": 2.9504504504504503, + "grad_norm": 0.5645744222530638, + "learning_rate": 2.8694057404262757e-05, + "loss": 0.2618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26880699396133423, + "step": 1965, + "valid_targets_mean": 3741.8, + "valid_targets_min": 651 + }, + { + "epoch": 2.957957957957958, + "grad_norm": 0.48669690646418146, + "learning_rate": 2.862655355042387e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33517420291900635, + "step": 1970, + "valid_targets_mean": 5469.8, + "valid_targets_min": 723 + }, + { + "epoch": 2.9654654654654653, + "grad_norm": 0.4988811225771945, + "learning_rate": 2.85589287447006e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2532571256160736, + "step": 1975, + "valid_targets_mean": 4500.7, + "valid_targets_min": 824 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.5067859361295777, + "learning_rate": 2.849118393525233e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22295460104942322, + "step": 1980, + "valid_targets_mean": 4019.2, + "valid_targets_min": 439 + }, + { + "epoch": 2.9804804804804803, + "grad_norm": 0.5614437078904748, + "learning_rate": 2.8423320071920986e-05, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25288334488868713, + "step": 1985, + "valid_targets_mean": 3232.5, + "valid_targets_min": 790 + }, + { + "epoch": 2.987987987987988, + "grad_norm": 0.4074367928471507, + "learning_rate": 2.835533810621777e-05, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22124746441841125, + "step": 1990, + "valid_targets_mean": 6511.8, + "valid_targets_min": 610 + }, + { + "epoch": 2.9954954954954953, + "grad_norm": 0.4746388334736856, + "learning_rate": 2.8287238991309746e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22991371154785156, + "step": 1995, + "valid_targets_mean": 4459.8, + "valid_targets_min": 742 + }, + { + "epoch": 3.003003003003003, + "grad_norm": 0.5433525185386173, + "learning_rate": 2.8219023682006533e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2538215219974518, + "step": 2000, + "valid_targets_mean": 3868.6, + "valid_targets_min": 593 + }, + { + "epoch": 3.0105105105105103, + "grad_norm": 0.6522092709294501, + "learning_rate": 2.8150693134746895e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25473958253860474, + "step": 2005, + "valid_targets_mean": 4023.8, + "valid_targets_min": 939 + }, + { + "epoch": 3.018018018018018, + "grad_norm": 0.4893307325525604, + "learning_rate": 2.8082248307585332e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20237843692302704, + "step": 2010, + "valid_targets_mean": 4555.2, + "valid_targets_min": 590 + }, + { + "epoch": 3.0255255255255253, + "grad_norm": 0.4667369885254396, + "learning_rate": 2.801369016017865e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18705838918685913, + "step": 2015, + "valid_targets_mean": 4288.2, + "valid_targets_min": 690 + }, + { + "epoch": 3.033033033033033, + "grad_norm": 0.49033218061993816, + "learning_rate": 2.7945019653772504e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2408279925584793, + "step": 2020, + "valid_targets_mean": 5262.1, + "valid_targets_min": 515 + }, + { + "epoch": 3.0405405405405403, + "grad_norm": 0.5391107334148109, + "learning_rate": 2.7876237751187917e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21394947171211243, + "step": 2025, + "valid_targets_mean": 3897.1, + "valid_targets_min": 1733 + }, + { + "epoch": 3.048048048048048, + "grad_norm": 0.6193116901994394, + "learning_rate": 2.7807345416807793e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23730601370334625, + "step": 2030, + "valid_targets_mean": 3262.4, + "valid_targets_min": 439 + }, + { + "epoch": 3.0555555555555554, + "grad_norm": 0.5319064047511843, + "learning_rate": 2.773834361656339e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2560746669769287, + "step": 2035, + "valid_targets_mean": 4626.2, + "valid_targets_min": 550 + }, + { + "epoch": 3.063063063063063, + "grad_norm": 0.7633521135620636, + "learning_rate": 2.766923331792075e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30914023518562317, + "step": 2040, + "valid_targets_mean": 3140.8, + "valid_targets_min": 595 + }, + { + "epoch": 3.0705705705705704, + "grad_norm": 1.0068010745411222, + "learning_rate": 2.7600015489867188e-05, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2271232008934021, + "step": 2045, + "valid_targets_mean": 4060.6, + "valid_targets_min": 667 + }, + { + "epoch": 3.078078078078078, + "grad_norm": 0.5451000708994922, + "learning_rate": 2.753069110289766e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22742116451263428, + "step": 2050, + "valid_targets_mean": 3681.6, + "valid_targets_min": 668 + }, + { + "epoch": 3.0855855855855854, + "grad_norm": 0.5547733055241225, + "learning_rate": 2.7461261129001174e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24244071543216705, + "step": 2055, + "valid_targets_mean": 3975.5, + "valid_targets_min": 495 + }, + { + "epoch": 3.093093093093093, + "grad_norm": 0.42158312641460743, + "learning_rate": 2.7391726541647163e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724860966205597, + "step": 2060, + "valid_targets_mean": 5298.3, + "valid_targets_min": 423 + }, + { + "epoch": 3.1006006006006004, + "grad_norm": 0.5499035304566007, + "learning_rate": 2.7322088315771834e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24446254968643188, + "step": 2065, + "valid_targets_mean": 3607.4, + "valid_targets_min": 478 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 0.5590055107325951, + "learning_rate": 2.725234742776448e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23849673569202423, + "step": 2070, + "valid_targets_mean": 5030.8, + "valid_targets_min": 659 + }, + { + "epoch": 3.1156156156156154, + "grad_norm": 0.478630517596832, + "learning_rate": 2.7182504855453834e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20381265878677368, + "step": 2075, + "valid_targets_mean": 4702.2, + "valid_targets_min": 865 + }, + { + "epoch": 3.123123123123123, + "grad_norm": 0.5451665017950299, + "learning_rate": 2.7112561578094327e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2420223355293274, + "step": 2080, + "valid_targets_mean": 5865.1, + "valid_targets_min": 611 + }, + { + "epoch": 3.1306306306306304, + "grad_norm": 0.5244203768707708, + "learning_rate": 2.704251857635234e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20124025642871857, + "step": 2085, + "valid_targets_mean": 3856.1, + "valid_targets_min": 622 + }, + { + "epoch": 3.138138138138138, + "grad_norm": 0.553596247360053, + "learning_rate": 2.69723768322925e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2544230818748474, + "step": 2090, + "valid_targets_mean": 3771.1, + "valid_targets_min": 479 + }, + { + "epoch": 3.1456456456456454, + "grad_norm": 0.5826774718951724, + "learning_rate": 2.6902137329363892e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27420639991760254, + "step": 2095, + "valid_targets_mean": 3996.2, + "valid_targets_min": 703 + }, + { + "epoch": 3.153153153153153, + "grad_norm": 0.6143348318690984, + "learning_rate": 2.683180105238625e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2823088765144348, + "step": 2100, + "valid_targets_mean": 3603.2, + "valid_targets_min": 664 + }, + { + "epoch": 3.1606606606606604, + "grad_norm": 0.5590576178147459, + "learning_rate": 2.676136898753617e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25955310463905334, + "step": 2105, + "valid_targets_mean": 3985.4, + "valid_targets_min": 773 + }, + { + "epoch": 3.1681681681681684, + "grad_norm": 0.596190398717275, + "learning_rate": 2.6690842122333286e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24386368691921234, + "step": 2110, + "valid_targets_mean": 3912.7, + "valid_targets_min": 728 + }, + { + "epoch": 3.175675675675676, + "grad_norm": 0.6071315295064825, + "learning_rate": 2.6620221445626416e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23695698380470276, + "step": 2115, + "valid_targets_mean": 3008.4, + "valid_targets_min": 656 + }, + { + "epoch": 3.1831831831831834, + "grad_norm": 0.6533097039267628, + "learning_rate": 2.6549507947579685e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2619466185569763, + "step": 2120, + "valid_targets_mean": 5412.9, + "valid_targets_min": 491 + }, + { + "epoch": 3.190690690690691, + "grad_norm": 0.49336937759151656, + "learning_rate": 2.6478702619658672e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23457828164100647, + "step": 2125, + "valid_targets_mean": 4639.4, + "valid_targets_min": 456 + }, + { + "epoch": 3.1981981981981984, + "grad_norm": 0.48643384497808734, + "learning_rate": 2.6407806454616472e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18629509210586548, + "step": 2130, + "valid_targets_mean": 3978.7, + "valid_targets_min": 815 + }, + { + "epoch": 3.205705705705706, + "grad_norm": 0.7137473060732604, + "learning_rate": 2.633682044647982e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34111836552619934, + "step": 2135, + "valid_targets_mean": 2986.7, + "valid_targets_min": 211 + }, + { + "epoch": 3.2132132132132134, + "grad_norm": 0.4960650096175783, + "learning_rate": 2.626574559053512e-05, + "loss": 0.254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2693920433521271, + "step": 2140, + "valid_targets_mean": 6290.2, + "valid_targets_min": 728 + }, + { + "epoch": 3.220720720720721, + "grad_norm": 0.543711754415262, + "learning_rate": 2.619458288331449e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34854060411453247, + "step": 2145, + "valid_targets_mean": 4955.6, + "valid_targets_min": 273 + }, + { + "epoch": 3.2282282282282284, + "grad_norm": 0.4118913840003615, + "learning_rate": 2.6123333322581806e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22846825420856476, + "step": 2150, + "valid_targets_mean": 6945.4, + "valid_targets_min": 620 + }, + { + "epoch": 3.235735735735736, + "grad_norm": 0.6226838119580127, + "learning_rate": 2.6051997907318724e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2525981664657593, + "step": 2155, + "valid_targets_mean": 3692.9, + "valid_targets_min": 649 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.4814778903119386, + "learning_rate": 2.5980577637710632e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21874025464057922, + "step": 2160, + "valid_targets_mean": 5269.8, + "valid_targets_min": 814 + }, + { + "epoch": 3.250750750750751, + "grad_norm": 0.48643926272296106, + "learning_rate": 2.5909073515132667e-05, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23266538977622986, + "step": 2165, + "valid_targets_mean": 4732.6, + "valid_targets_min": 898 + }, + { + "epoch": 3.2582582582582584, + "grad_norm": 0.5108891449221944, + "learning_rate": 2.5837486542135648e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2183685451745987, + "step": 2170, + "valid_targets_mean": 4184.6, + "valid_targets_min": 659 + }, + { + "epoch": 3.265765765765766, + "grad_norm": 0.5625875087588166, + "learning_rate": 2.576581772243204e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21907004714012146, + "step": 2175, + "valid_targets_mean": 5702.3, + "valid_targets_min": 447 + }, + { + "epoch": 3.2732732732732734, + "grad_norm": 0.44524330986567173, + "learning_rate": 2.5694068060881856e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2379521131515503, + "step": 2180, + "valid_targets_mean": 5613.1, + "valid_targets_min": 444 + }, + { + "epoch": 3.280780780780781, + "grad_norm": 0.41557044227218803, + "learning_rate": 2.5622238563478603e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1911526620388031, + "step": 2185, + "valid_targets_mean": 6414.1, + "valid_targets_min": 926 + }, + { + "epoch": 3.2882882882882885, + "grad_norm": 0.6461866697440014, + "learning_rate": 2.555033023733514e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2639209032058716, + "step": 2190, + "valid_targets_mean": 3629.4, + "valid_targets_min": 580 + }, + { + "epoch": 3.295795795795796, + "grad_norm": 0.5159050633898964, + "learning_rate": 2.547834409066958e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27777135372161865, + "step": 2195, + "valid_targets_mean": 5053.3, + "valid_targets_min": 906 + }, + { + "epoch": 3.3033033033033035, + "grad_norm": 0.477902570867691, + "learning_rate": 2.540628113279116e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3054026961326599, + "step": 2200, + "valid_targets_mean": 6047.9, + "valid_targets_min": 806 + }, + { + "epoch": 3.310810810810811, + "grad_norm": 0.45215264311766057, + "learning_rate": 2.5334142374086053e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22195221483707428, + "step": 2205, + "valid_targets_mean": 5648.2, + "valid_targets_min": 742 + }, + { + "epoch": 3.3183183183183185, + "grad_norm": 0.8158948988984059, + "learning_rate": 2.5261928826003246e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3665313720703125, + "step": 2210, + "valid_targets_mean": 4771.2, + "valid_targets_min": 663 + }, + { + "epoch": 3.325825825825826, + "grad_norm": 0.6202523003371706, + "learning_rate": 2.518964150104034e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22199711203575134, + "step": 2215, + "valid_targets_mean": 2897.6, + "valid_targets_min": 698 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.6530333048449412, + "learning_rate": 2.511728141272934e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24529844522476196, + "step": 2220, + "valid_targets_mean": 3370.7, + "valid_targets_min": 503 + }, + { + "epoch": 3.340840840840841, + "grad_norm": 0.5905198582269967, + "learning_rate": 2.5044849575622458e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2153581976890564, + "step": 2225, + "valid_targets_mean": 4749.2, + "valid_targets_min": 668 + }, + { + "epoch": 3.3483483483483485, + "grad_norm": 0.4868966063934165, + "learning_rate": 2.4972347005277903e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2968922257423401, + "step": 2230, + "valid_targets_mean": 5965.1, + "valid_targets_min": 498 + }, + { + "epoch": 3.355855855855856, + "grad_norm": 0.8961039788278082, + "learning_rate": 2.489977471824561e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23392842710018158, + "step": 2235, + "valid_targets_mean": 3846.9, + "valid_targets_min": 368 + }, + { + "epoch": 3.3633633633633635, + "grad_norm": 0.6752840687920286, + "learning_rate": 2.4827133732053014e-05, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25479578971862793, + "step": 2240, + "valid_targets_mean": 3281.4, + "valid_targets_min": 291 + }, + { + "epoch": 3.370870870870871, + "grad_norm": 0.5573523830693498, + "learning_rate": 2.475442506519077e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20670834183692932, + "step": 2245, + "valid_targets_mean": 3441.7, + "valid_targets_min": 637 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 0.47716647459061995, + "learning_rate": 2.468164973709847e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21283169090747833, + "step": 2250, + "valid_targets_mean": 4464.2, + "valid_targets_min": 910 + }, + { + "epoch": 3.385885885885886, + "grad_norm": 0.5064998483360107, + "learning_rate": 2.4608808768150357e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19695068895816803, + "step": 2255, + "valid_targets_mean": 3949.1, + "valid_targets_min": 738 + }, + { + "epoch": 3.3933933933933935, + "grad_norm": 0.5175599454597674, + "learning_rate": 2.4535903179641026e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2480076104402542, + "step": 2260, + "valid_targets_mean": 5609.1, + "valid_targets_min": 1410 + }, + { + "epoch": 3.400900900900901, + "grad_norm": 0.42240678217071037, + "learning_rate": 2.4462933993771088e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26719754934310913, + "step": 2265, + "valid_targets_mean": 6515.5, + "valid_targets_min": 726 + }, + { + "epoch": 3.4084084084084085, + "grad_norm": 0.506514863267876, + "learning_rate": 2.438990223363284e-05, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25513604283332825, + "step": 2270, + "valid_targets_mean": 4471.0, + "valid_targets_min": 732 + }, + { + "epoch": 3.415915915915916, + "grad_norm": 0.5367632072440157, + "learning_rate": 2.4316808923195926e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22310830652713776, + "step": 2275, + "valid_targets_mean": 4361.2, + "valid_targets_min": 700 + }, + { + "epoch": 3.4234234234234235, + "grad_norm": 0.44852402801221397, + "learning_rate": 2.4243655087293e-05, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26332229375839233, + "step": 2280, + "valid_targets_mean": 6653.6, + "valid_targets_min": 583 + }, + { + "epoch": 3.430930930930931, + "grad_norm": 0.48402583907149443, + "learning_rate": 2.4170441751605308e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24115243554115295, + "step": 2285, + "valid_targets_mean": 4474.8, + "valid_targets_min": 723 + }, + { + "epoch": 3.4384384384384385, + "grad_norm": 0.58593699094807, + "learning_rate": 2.4097169942648356e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16966642439365387, + "step": 2290, + "valid_targets_mean": 5481.7, + "valid_targets_min": 1177 + }, + { + "epoch": 3.445945945945946, + "grad_norm": 0.4710415176014163, + "learning_rate": 2.4023840687757476e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20201632380485535, + "step": 2295, + "valid_targets_mean": 5250.4, + "valid_targets_min": 717 + }, + { + "epoch": 3.4534534534534536, + "grad_norm": 0.5084005135734221, + "learning_rate": 2.395045501507347e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20893435180187225, + "step": 2300, + "valid_targets_mean": 4357.3, + "valid_targets_min": 489 + }, + { + "epoch": 3.460960960960961, + "grad_norm": 0.619899875868498, + "learning_rate": 2.387701395352815e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21758192777633667, + "step": 2305, + "valid_targets_mean": 2881.0, + "valid_targets_min": 578 + }, + { + "epoch": 3.4684684684684686, + "grad_norm": 0.48471567383203296, + "learning_rate": 2.380351853282992e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24553707242012024, + "step": 2310, + "valid_targets_mean": 5598.6, + "valid_targets_min": 552 + }, + { + "epoch": 3.475975975975976, + "grad_norm": 0.6056696323887422, + "learning_rate": 2.372996978344937e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26694750785827637, + "step": 2315, + "valid_targets_mean": 3924.4, + "valid_targets_min": 523 + }, + { + "epoch": 3.4834834834834836, + "grad_norm": 0.601885827026815, + "learning_rate": 2.3656368736604786e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28267890214920044, + "step": 2320, + "valid_targets_mean": 3601.5, + "valid_targets_min": 724 + }, + { + "epoch": 3.490990990990991, + "grad_norm": 0.6254412393703039, + "learning_rate": 2.3582716424247728e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540096044540405, + "step": 2325, + "valid_targets_mean": 2951.5, + "valid_targets_min": 537 + }, + { + "epoch": 3.4984984984984986, + "grad_norm": 0.5798449489473179, + "learning_rate": 2.3509013879048526e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30426591634750366, + "step": 2330, + "valid_targets_mean": 4265.3, + "valid_targets_min": 712 + }, + { + "epoch": 3.506006006006006, + "grad_norm": 0.4923122631477329, + "learning_rate": 2.3435262134381823e-05, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1547277569770813, + "step": 2335, + "valid_targets_mean": 4490.8, + "valid_targets_min": 620 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 0.49204973031276056, + "learning_rate": 2.3361462224312094e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1860155165195465, + "step": 2340, + "valid_targets_mean": 4172.9, + "valid_targets_min": 968 + }, + { + "epoch": 3.521021021021021, + "grad_norm": 0.5430832529381092, + "learning_rate": 2.3287615183579126e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23752620816230774, + "step": 2345, + "valid_targets_mean": 3755.6, + "valid_targets_min": 762 + }, + { + "epoch": 3.5285285285285286, + "grad_norm": 0.47304505139451253, + "learning_rate": 2.3213722047583517e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21495875716209412, + "step": 2350, + "valid_targets_mean": 5194.9, + "valid_targets_min": 781 + }, + { + "epoch": 3.536036036036036, + "grad_norm": 0.5593072985053018, + "learning_rate": 2.3139783852372157e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32621026039123535, + "step": 2355, + "valid_targets_mean": 5330.6, + "valid_targets_min": 483 + }, + { + "epoch": 3.5435435435435436, + "grad_norm": 0.5133771865366825, + "learning_rate": 2.306580163462373e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24318471550941467, + "step": 2360, + "valid_targets_mean": 5257.0, + "valid_targets_min": 850 + }, + { + "epoch": 3.551051051051051, + "grad_norm": 0.7096881143231963, + "learning_rate": 2.299177643163413e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24741387367248535, + "step": 2365, + "valid_targets_mean": 2511.2, + "valid_targets_min": 508 + }, + { + "epoch": 3.5585585585585586, + "grad_norm": 0.5184545440577579, + "learning_rate": 2.2917709281301946e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28968262672424316, + "step": 2370, + "valid_targets_mean": 4808.0, + "valid_targets_min": 628 + }, + { + "epoch": 3.566066066066066, + "grad_norm": 0.5497412004652409, + "learning_rate": 2.2843601222113915e-05, + "loss": 0.2846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24640163779258728, + "step": 2375, + "valid_targets_mean": 4700.1, + "valid_targets_min": 418 + }, + { + "epoch": 3.5735735735735736, + "grad_norm": 0.543092075993824, + "learning_rate": 2.2769453293130345e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2757692039012909, + "step": 2380, + "valid_targets_mean": 3818.9, + "valid_targets_min": 423 + }, + { + "epoch": 3.581081081081081, + "grad_norm": 0.4756311989207704, + "learning_rate": 2.2695266533970556e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2674590051174164, + "step": 2385, + "valid_targets_mean": 5847.6, + "valid_targets_min": 794 + }, + { + "epoch": 3.5885885885885886, + "grad_norm": 0.7040802634858209, + "learning_rate": 2.2621041984798304e-05, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27846670150756836, + "step": 2390, + "valid_targets_mean": 2509.4, + "valid_targets_min": 686 + }, + { + "epoch": 3.596096096096096, + "grad_norm": 0.6323033311574447, + "learning_rate": 2.2546780686307183e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23559826612472534, + "step": 2395, + "valid_targets_mean": 4216.6, + "valid_targets_min": 608 + }, + { + "epoch": 3.6036036036036037, + "grad_norm": 0.5439419831820919, + "learning_rate": 2.2472483679706067e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.271353542804718, + "step": 2400, + "valid_targets_mean": 4691.6, + "valid_targets_min": 755 + }, + { + "epoch": 3.611111111111111, + "grad_norm": 0.657338134532918, + "learning_rate": 2.2398152006704463e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26753100752830505, + "step": 2405, + "valid_targets_mean": 3173.4, + "valid_targets_min": 404 + }, + { + "epoch": 3.6186186186186187, + "grad_norm": 0.45053072622125706, + "learning_rate": 2.232378670949795e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18780098855495453, + "step": 2410, + "valid_targets_mean": 5203.4, + "valid_targets_min": 1334 + }, + { + "epoch": 3.626126126126126, + "grad_norm": 0.4023749713359982, + "learning_rate": 2.2249388830753534e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17731866240501404, + "step": 2415, + "valid_targets_mean": 5612.2, + "valid_targets_min": 480 + }, + { + "epoch": 3.6336336336336337, + "grad_norm": 0.5062285511706474, + "learning_rate": 2.217495941359506e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23712016642093658, + "step": 2420, + "valid_targets_mean": 4567.2, + "valid_targets_min": 567 + }, + { + "epoch": 3.641141141141141, + "grad_norm": 0.5407394227692897, + "learning_rate": 2.2100499501588558e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25140225887298584, + "step": 2425, + "valid_targets_mean": 4480.6, + "valid_targets_min": 801 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.5432137809783814, + "learning_rate": 2.2026010138727628e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23100072145462036, + "step": 2430, + "valid_targets_mean": 4535.2, + "valid_targets_min": 696 + }, + { + "epoch": 3.656156156156156, + "grad_norm": 0.5702741738198654, + "learning_rate": 2.1951492369418786e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24533988535404205, + "step": 2435, + "valid_targets_mean": 4315.2, + "valid_targets_min": 571 + }, + { + "epoch": 3.6636636636636637, + "grad_norm": 0.6650337977127149, + "learning_rate": 2.1876947238466838e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2869791090488434, + "step": 2440, + "valid_targets_mean": 3385.6, + "valid_targets_min": 557 + }, + { + "epoch": 3.671171171171171, + "grad_norm": 0.473867504379004, + "learning_rate": 2.1802375791060232e-05, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18947833776474, + "step": 2445, + "valid_targets_mean": 5216.6, + "valid_targets_min": 742 + }, + { + "epoch": 3.6786786786786787, + "grad_norm": 0.7199290550127364, + "learning_rate": 2.1727779072756388e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30827030539512634, + "step": 2450, + "valid_targets_mean": 4398.8, + "valid_targets_min": 696 + }, + { + "epoch": 3.686186186186186, + "grad_norm": 0.6169439986122458, + "learning_rate": 2.1653158129467048e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2743998169898987, + "step": 2455, + "valid_targets_mean": 3159.7, + "valid_targets_min": 683 + }, + { + "epoch": 3.6936936936936937, + "grad_norm": 0.630068337186312, + "learning_rate": 2.1578514007443602e-05, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2756376266479492, + "step": 2460, + "valid_targets_mean": 3321.1, + "valid_targets_min": 663 + }, + { + "epoch": 3.701201201201201, + "grad_norm": 8.40963413100615, + "learning_rate": 2.1503847753262447e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21705785393714905, + "step": 2465, + "valid_targets_mean": 5065.1, + "valid_targets_min": 1345 + }, + { + "epoch": 3.7087087087087087, + "grad_norm": 0.8840964089151675, + "learning_rate": 2.1429160413810268e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2861403822898865, + "step": 2470, + "valid_targets_mean": 5522.8, + "valid_targets_min": 991 + }, + { + "epoch": 3.7162162162162162, + "grad_norm": 0.47904088821045393, + "learning_rate": 2.1354453036269397e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.182481050491333, + "step": 2475, + "valid_targets_mean": 3823.9, + "valid_targets_min": 657 + }, + { + "epoch": 3.7237237237237237, + "grad_norm": 0.6350177279756045, + "learning_rate": 2.1279726668103112e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.289498507976532, + "step": 2480, + "valid_targets_mean": 3174.9, + "valid_targets_min": 597 + }, + { + "epoch": 3.7312312312312312, + "grad_norm": 0.5475085951271096, + "learning_rate": 2.1204982357040974e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2232646644115448, + "step": 2485, + "valid_targets_mean": 3746.9, + "valid_targets_min": 684 + }, + { + "epoch": 3.7387387387387387, + "grad_norm": 0.5721638552098921, + "learning_rate": 2.1130221151064095e-05, + "loss": 0.2408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2669849693775177, + "step": 2490, + "valid_targets_mean": 4861.1, + "valid_targets_min": 469 + }, + { + "epoch": 3.7462462462462462, + "grad_norm": 0.46167196232037994, + "learning_rate": 2.105544409839048e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1929030418395996, + "step": 2495, + "valid_targets_mean": 4768.8, + "valid_targets_min": 888 + }, + { + "epoch": 3.7537537537537538, + "grad_norm": 0.5709387953688929, + "learning_rate": 2.098065224746031e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26624026894569397, + "step": 2500, + "valid_targets_mean": 3260.7, + "valid_targets_min": 788 + }, + { + "epoch": 3.7612612612612613, + "grad_norm": 0.5636970256534266, + "learning_rate": 2.0905846646921266e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29653650522232056, + "step": 2505, + "valid_targets_mean": 3569.1, + "valid_targets_min": 856 + }, + { + "epoch": 3.7687687687687688, + "grad_norm": 0.4293486036135388, + "learning_rate": 2.0831028345613807e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22826270759105682, + "step": 2510, + "valid_targets_mean": 5393.5, + "valid_targets_min": 404 + }, + { + "epoch": 3.7762762762762763, + "grad_norm": 0.5914611642027671, + "learning_rate": 2.0756198392556458e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22963778674602509, + "step": 2515, + "valid_targets_mean": 2932.8, + "valid_targets_min": 513 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 0.5163134230928269, + "learning_rate": 2.0681357836931114e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.207438126206398, + "step": 2520, + "valid_targets_mean": 4660.4, + "valid_targets_min": 750 + }, + { + "epoch": 3.7912912912912913, + "grad_norm": 0.7190308106564102, + "learning_rate": 2.0606507728068338e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2699480652809143, + "step": 2525, + "valid_targets_mean": 2463.4, + "valid_targets_min": 475 + }, + { + "epoch": 3.798798798798799, + "grad_norm": 0.6346404271383566, + "learning_rate": 2.0531649115432626e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26241040229797363, + "step": 2530, + "valid_targets_mean": 3470.4, + "valid_targets_min": 613 + }, + { + "epoch": 3.8063063063063063, + "grad_norm": 0.5405574803276272, + "learning_rate": 2.0456783048607708e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2565130591392517, + "step": 2535, + "valid_targets_mean": 4024.1, + "valid_targets_min": 669 + }, + { + "epoch": 3.813813813813814, + "grad_norm": 0.604890803793606, + "learning_rate": 2.038191057728183e-05, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2380007952451706, + "step": 2540, + "valid_targets_mean": 3456.8, + "valid_targets_min": 662 + }, + { + "epoch": 3.8213213213213213, + "grad_norm": 0.6793469587756615, + "learning_rate": 2.0307032751233038e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2839970290660858, + "step": 2545, + "valid_targets_mean": 4329.7, + "valid_targets_min": 681 + }, + { + "epoch": 3.828828828828829, + "grad_norm": 0.5213037499939942, + "learning_rate": 2.023215062031445e-05, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2534673511981964, + "step": 2550, + "valid_targets_mean": 4234.5, + "valid_targets_min": 437 + }, + { + "epoch": 3.8363363363363363, + "grad_norm": 0.48716095297652656, + "learning_rate": 2.0157265234439545e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17926955223083496, + "step": 2555, + "valid_targets_mean": 4508.6, + "valid_targets_min": 404 + }, + { + "epoch": 3.843843843843844, + "grad_norm": 0.45658767125521904, + "learning_rate": 2.0082377643567427e-05, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16734261810779572, + "step": 2560, + "valid_targets_mean": 4532.5, + "valid_targets_min": 484 + }, + { + "epoch": 3.8513513513513513, + "grad_norm": 0.5887721035383384, + "learning_rate": 2.0007488897688145e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2615935802459717, + "step": 2565, + "valid_targets_mean": 3602.4, + "valid_targets_min": 596 + }, + { + "epoch": 3.858858858858859, + "grad_norm": 0.5556930711337941, + "learning_rate": 1.9932600046807914e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1911911964416504, + "step": 2570, + "valid_targets_mean": 3493.8, + "valid_targets_min": 801 + }, + { + "epoch": 3.8663663663663663, + "grad_norm": 0.4918999956248638, + "learning_rate": 1.985771214093444e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19995930790901184, + "step": 2575, + "valid_targets_mean": 4204.2, + "valid_targets_min": 634 + }, + { + "epoch": 3.873873873873874, + "grad_norm": 0.52841578647817, + "learning_rate": 1.9782826230062155e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28531813621520996, + "step": 2580, + "valid_targets_mean": 4070.0, + "valid_targets_min": 731 + }, + { + "epoch": 3.8813813813813813, + "grad_norm": 0.5040983553316225, + "learning_rate": 1.9707943364157552e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18574920296669006, + "step": 2585, + "valid_targets_mean": 3998.8, + "valid_targets_min": 416 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.49554889736984387, + "learning_rate": 1.9633064593144408e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20392203330993652, + "step": 2590, + "valid_targets_mean": 4092.8, + "valid_targets_min": 638 + }, + { + "epoch": 3.8963963963963963, + "grad_norm": 0.4727254866083527, + "learning_rate": 1.9558190966889093e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21000804007053375, + "step": 2595, + "valid_targets_mean": 5088.9, + "valid_targets_min": 615 + }, + { + "epoch": 3.903903903903904, + "grad_norm": 0.43824564462204646, + "learning_rate": 1.9483323535185838e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1834757924079895, + "step": 2600, + "valid_targets_mean": 4795.2, + "valid_targets_min": 852 + }, + { + "epoch": 3.9114114114114114, + "grad_norm": 0.4414256886221553, + "learning_rate": 1.940846334774203e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729404479265213, + "step": 2605, + "valid_targets_mean": 4699.6, + "valid_targets_min": 635 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 0.5060621091961622, + "learning_rate": 1.933361145416348e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2322886735200882, + "step": 2610, + "valid_targets_mean": 6213.0, + "valid_targets_min": 639 + }, + { + "epoch": 3.9264264264264264, + "grad_norm": 0.593672866808795, + "learning_rate": 1.9258768903939706e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24251478910446167, + "step": 2615, + "valid_targets_mean": 3359.5, + "valid_targets_min": 591 + }, + { + "epoch": 3.933933933933934, + "grad_norm": 0.4404905443315122, + "learning_rate": 1.9183936746429234e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2132512629032135, + "step": 2620, + "valid_targets_mean": 5353.2, + "valid_targets_min": 322 + }, + { + "epoch": 3.9414414414414414, + "grad_norm": 0.5116438369388818, + "learning_rate": 1.9109116030844874e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27721619606018066, + "step": 2625, + "valid_targets_mean": 4330.2, + "valid_targets_min": 757 + }, + { + "epoch": 3.948948948948949, + "grad_norm": 0.5905044794050749, + "learning_rate": 1.9034307806239004e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33162492513656616, + "step": 2630, + "valid_targets_mean": 4979.0, + "valid_targets_min": 434 + }, + { + "epoch": 3.9564564564564564, + "grad_norm": 0.5353633419703424, + "learning_rate": 1.8959513121488868e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23549598455429077, + "step": 2635, + "valid_targets_mean": 4418.6, + "valid_targets_min": 858 + }, + { + "epoch": 3.963963963963964, + "grad_norm": 0.47963900371966894, + "learning_rate": 1.8884733025281876e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1948413848876953, + "step": 2640, + "valid_targets_mean": 4141.8, + "valid_targets_min": 726 + }, + { + "epoch": 3.9714714714714714, + "grad_norm": 0.44606918683200836, + "learning_rate": 1.880996856610088e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21382328867912292, + "step": 2645, + "valid_targets_mean": 5472.8, + "valid_targets_min": 745 + }, + { + "epoch": 3.978978978978979, + "grad_norm": 0.5149587332986721, + "learning_rate": 1.8735220792209513e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19594892859458923, + "step": 2650, + "valid_targets_mean": 4153.9, + "valid_targets_min": 598 + }, + { + "epoch": 3.9864864864864864, + "grad_norm": 0.5874287195534135, + "learning_rate": 1.8660490751637435e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22586959600448608, + "step": 2655, + "valid_targets_mean": 3378.9, + "valid_targets_min": 506 + }, + { + "epoch": 3.993993993993994, + "grad_norm": 0.46189741004980345, + "learning_rate": 1.858577949216569e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2193407416343689, + "step": 2660, + "valid_targets_mean": 4348.1, + "valid_targets_min": 447 + }, + { + "epoch": 4.001501501501502, + "grad_norm": 0.5233378328639746, + "learning_rate": 1.8511088061311982e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2248275727033615, + "step": 2665, + "valid_targets_mean": 3952.7, + "valid_targets_min": 671 + }, + { + "epoch": 4.009009009009009, + "grad_norm": 0.6071971400069938, + "learning_rate": 1.8436417506316013e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23917962610721588, + "step": 2670, + "valid_targets_mean": 3420.5, + "valid_targets_min": 429 + }, + { + "epoch": 4.016516516516517, + "grad_norm": 0.5914162200350297, + "learning_rate": 1.8361768874124778e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2445971816778183, + "step": 2675, + "valid_targets_mean": 4843.9, + "valid_targets_min": 471 + }, + { + "epoch": 4.024024024024024, + "grad_norm": 0.547907620188072, + "learning_rate": 1.8287143211377893e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1973641812801361, + "step": 2680, + "valid_targets_mean": 4041.2, + "valid_targets_min": 418 + }, + { + "epoch": 4.031531531531532, + "grad_norm": 0.5133198173072647, + "learning_rate": 1.8212541564392924e-05, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17949888110160828, + "step": 2685, + "valid_targets_mean": 3785.6, + "valid_targets_min": 600 + }, + { + "epoch": 4.039039039039039, + "grad_norm": 0.47846466241843566, + "learning_rate": 1.813796497915073e-05, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564462423324585, + "step": 2690, + "valid_targets_mean": 5844.4, + "valid_targets_min": 506 + }, + { + "epoch": 4.046546546546547, + "grad_norm": 0.569435583148903, + "learning_rate": 1.806341450128076e-05, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22870904207229614, + "step": 2695, + "valid_targets_mean": 4163.9, + "valid_targets_min": 699 + }, + { + "epoch": 4.054054054054054, + "grad_norm": 0.5070243213845931, + "learning_rate": 1.798889117604643e-05, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23002973198890686, + "step": 2700, + "valid_targets_mean": 4445.3, + "valid_targets_min": 705 + }, + { + "epoch": 4.061561561561562, + "grad_norm": 0.4867044644660382, + "learning_rate": 1.7914396048330428e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19053609669208527, + "step": 2705, + "valid_targets_mean": 4508.4, + "valid_targets_min": 569 + }, + { + "epoch": 4.069069069069069, + "grad_norm": 0.5805381784749756, + "learning_rate": 1.7839930162620128e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1698979139328003, + "step": 2710, + "valid_targets_mean": 3140.1, + "valid_targets_min": 798 + }, + { + "epoch": 4.076576576576577, + "grad_norm": 0.602504277734938, + "learning_rate": 1.7765494562992878e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18390074372291565, + "step": 2715, + "valid_targets_mean": 3297.9, + "valid_targets_min": 534 + }, + { + "epoch": 4.084084084084084, + "grad_norm": 0.6470125645401709, + "learning_rate": 1.7691090293101386e-05, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28710392117500305, + "step": 2720, + "valid_targets_mean": 3918.4, + "valid_targets_min": 437 + }, + { + "epoch": 4.091591591591592, + "grad_norm": 0.5066063002958375, + "learning_rate": 1.76167183961591e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2291889488697052, + "step": 2725, + "valid_targets_mean": 5055.3, + "valid_targets_min": 388 + }, + { + "epoch": 4.099099099099099, + "grad_norm": 0.6455258191217119, + "learning_rate": 1.7542379914925575e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23442783951759338, + "step": 2730, + "valid_targets_mean": 2962.8, + "valid_targets_min": 666 + }, + { + "epoch": 4.106606606606607, + "grad_norm": 0.7386040382747444, + "learning_rate": 1.7468075891691832e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21252450346946716, + "step": 2735, + "valid_targets_mean": 3100.8, + "valid_targets_min": 518 + }, + { + "epoch": 4.114114114114114, + "grad_norm": 0.48445294014230816, + "learning_rate": 1.7393807368265777e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22077061235904694, + "step": 2740, + "valid_targets_mean": 5577.0, + "valid_targets_min": 614 + }, + { + "epoch": 4.121621621621622, + "grad_norm": 0.48865023064949326, + "learning_rate": 1.731957538595756e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19657564163208008, + "step": 2745, + "valid_targets_mean": 4633.6, + "valid_targets_min": 542 + }, + { + "epoch": 4.129129129129129, + "grad_norm": 0.6690952004993063, + "learning_rate": 1.7245380985565014e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18894386291503906, + "step": 2750, + "valid_targets_mean": 3604.8, + "valid_targets_min": 437 + }, + { + "epoch": 4.136636636636637, + "grad_norm": 0.7195870104883615, + "learning_rate": 1.7171225207359023e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27104708552360535, + "step": 2755, + "valid_targets_mean": 3011.1, + "valid_targets_min": 948 + }, + { + "epoch": 4.1441441441441444, + "grad_norm": 0.5379416037342353, + "learning_rate": 1.7097109091068965e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3290520906448364, + "step": 2760, + "valid_targets_mean": 6327.2, + "valid_targets_min": 875 + }, + { + "epoch": 4.151651651651652, + "grad_norm": 0.4819288545219707, + "learning_rate": 1.7023033675868107e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19341519474983215, + "step": 2765, + "valid_targets_mean": 5650.7, + "valid_targets_min": 1052 + }, + { + "epoch": 4.1591591591591595, + "grad_norm": 0.4339406422217775, + "learning_rate": 1.694900000035907e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18829095363616943, + "step": 2770, + "valid_targets_mean": 5408.4, + "valid_targets_min": 808 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 0.5494279815313058, + "learning_rate": 1.687500910255924e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29033520817756653, + "step": 2775, + "valid_targets_mean": 4524.4, + "valid_targets_min": 660 + }, + { + "epoch": 4.1741741741741745, + "grad_norm": 0.4489742319836925, + "learning_rate": 1.6801062019886216e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2034509927034378, + "step": 2780, + "valid_targets_mean": 4920.3, + "valid_targets_min": 427 + }, + { + "epoch": 4.181681681681682, + "grad_norm": 0.5280057369516138, + "learning_rate": 1.6727159789143276e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25000298023223877, + "step": 2785, + "valid_targets_mean": 6445.3, + "valid_targets_min": 460 + }, + { + "epoch": 4.1891891891891895, + "grad_norm": 0.570852623942594, + "learning_rate": 1.665330344650484e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23752200603485107, + "step": 2790, + "valid_targets_mean": 4025.2, + "valid_targets_min": 639 + }, + { + "epoch": 4.196696696696697, + "grad_norm": 0.5022860617467421, + "learning_rate": 1.6579494027501926e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25862377882003784, + "step": 2795, + "valid_targets_mean": 5591.9, + "valid_targets_min": 1079 + }, + { + "epoch": 4.2042042042042045, + "grad_norm": 0.5808638456409436, + "learning_rate": 1.6505732567007644e-05, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2629520297050476, + "step": 2800, + "valid_targets_mean": 4279.3, + "valid_targets_min": 681 + }, + { + "epoch": 4.211711711711712, + "grad_norm": 0.5011677067782193, + "learning_rate": 1.6432020099222686e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17377246916294098, + "step": 2805, + "valid_targets_mean": 4540.3, + "valid_targets_min": 506 + }, + { + "epoch": 4.2192192192192195, + "grad_norm": 0.6312221103561748, + "learning_rate": 1.635835765766082e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1711842268705368, + "step": 2810, + "valid_targets_mean": 4152.6, + "valid_targets_min": 576 + }, + { + "epoch": 4.226726726726727, + "grad_norm": 0.5455241347866726, + "learning_rate": 1.6284746275134413e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2519665062427521, + "step": 2815, + "valid_targets_mean": 4392.9, + "valid_targets_min": 605 + }, + { + "epoch": 4.2342342342342345, + "grad_norm": 0.544839041978784, + "learning_rate": 1.621118698373992e-05, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22317945957183838, + "step": 2820, + "valid_targets_mean": 4256.4, + "valid_targets_min": 508 + }, + { + "epoch": 4.241741741741742, + "grad_norm": 0.5207115855205976, + "learning_rate": 1.6137680814843447e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26101672649383545, + "step": 2825, + "valid_targets_mean": 4533.8, + "valid_targets_min": 702 + }, + { + "epoch": 4.2492492492492495, + "grad_norm": 0.7554547871541064, + "learning_rate": 1.6064228799066272e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2488705813884735, + "step": 2830, + "valid_targets_mean": 3717.3, + "valid_targets_min": 599 + }, + { + "epoch": 4.256756756756757, + "grad_norm": 0.5076882374389426, + "learning_rate": 1.5990831966270396e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20610791444778442, + "step": 2835, + "valid_targets_mean": 5082.6, + "valid_targets_min": 660 + }, + { + "epoch": 4.2642642642642645, + "grad_norm": 0.563842648058139, + "learning_rate": 1.5917491345544113e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18700063228607178, + "step": 2840, + "valid_targets_mean": 4268.7, + "valid_targets_min": 596 + }, + { + "epoch": 4.271771771771772, + "grad_norm": 0.5630065305626606, + "learning_rate": 1.584420796518756e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2228190302848816, + "step": 2845, + "valid_targets_mean": 3847.0, + "valid_targets_min": 865 + }, + { + "epoch": 4.2792792792792795, + "grad_norm": 0.5990679311909958, + "learning_rate": 1.5770982852698337e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2809998095035553, + "step": 2850, + "valid_targets_mean": 3885.7, + "valid_targets_min": 634 + }, + { + "epoch": 4.286786786786787, + "grad_norm": 0.5512552937482088, + "learning_rate": 1.5697817034757052e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1838597059249878, + "step": 2855, + "valid_targets_mean": 3917.1, + "valid_targets_min": 550 + }, + { + "epoch": 4.2942942942942945, + "grad_norm": 0.560081521292376, + "learning_rate": 1.5624711537212967e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20176030695438385, + "step": 2860, + "valid_targets_mean": 3504.7, + "valid_targets_min": 694 + }, + { + "epoch": 4.301801801801802, + "grad_norm": 0.7258528593213625, + "learning_rate": 1.5551667385069593e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22372370958328247, + "step": 2865, + "valid_targets_mean": 2295.4, + "valid_targets_min": 423 + }, + { + "epoch": 4.3093093093093096, + "grad_norm": 0.7471444365279868, + "learning_rate": 1.5478685602470324e-05, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24615955352783203, + "step": 2870, + "valid_targets_mean": 3811.6, + "valid_targets_min": 748 + }, + { + "epoch": 4.316816816816817, + "grad_norm": 0.6829203066694415, + "learning_rate": 1.5405767212684078e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21476216614246368, + "step": 2875, + "valid_targets_mean": 3288.6, + "valid_targets_min": 535 + }, + { + "epoch": 4.324324324324325, + "grad_norm": 0.5733535380374483, + "learning_rate": 1.5332913238090962e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2381066530942917, + "step": 2880, + "valid_targets_mean": 3251.5, + "valid_targets_min": 697 + }, + { + "epoch": 4.331831831831832, + "grad_norm": 0.6499639228286483, + "learning_rate": 1.52601247001679e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23366223275661469, + "step": 2885, + "valid_targets_mean": 2987.1, + "valid_targets_min": 539 + }, + { + "epoch": 4.33933933933934, + "grad_norm": 0.6285744664120035, + "learning_rate": 1.5187402619474361e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.195681631565094, + "step": 2890, + "valid_targets_mean": 4077.1, + "valid_targets_min": 678 + }, + { + "epoch": 4.346846846846847, + "grad_norm": 0.6210882117570284, + "learning_rate": 1.5114748015638013e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21708352863788605, + "step": 2895, + "valid_targets_mean": 4107.8, + "valid_targets_min": 760 + }, + { + "epoch": 4.354354354354355, + "grad_norm": 0.5855096649034774, + "learning_rate": 1.5042161907340438e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25563323497772217, + "step": 2900, + "valid_targets_mean": 3565.5, + "valid_targets_min": 562 + }, + { + "epoch": 4.361861861861862, + "grad_norm": 0.5300025815920703, + "learning_rate": 1.4969645312302851e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22752434015274048, + "step": 2905, + "valid_targets_mean": 4113.8, + "valid_targets_min": 381 + }, + { + "epoch": 4.36936936936937, + "grad_norm": 0.5804189010231626, + "learning_rate": 1.4897199247271842e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1874658167362213, + "step": 2910, + "valid_targets_mean": 3798.9, + "valid_targets_min": 452 + }, + { + "epoch": 4.376876876876877, + "grad_norm": 0.48760141515061334, + "learning_rate": 1.4824824728005092e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.257068395614624, + "step": 2915, + "valid_targets_mean": 5025.7, + "valid_targets_min": 584 + }, + { + "epoch": 4.384384384384385, + "grad_norm": 0.658477050968013, + "learning_rate": 1.4752522769257152e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22656235098838806, + "step": 2920, + "valid_targets_mean": 3549.4, + "valid_targets_min": 624 + }, + { + "epoch": 4.391891891891892, + "grad_norm": 0.7492347794498558, + "learning_rate": 1.468029438476521e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22716817259788513, + "step": 2925, + "valid_targets_mean": 2520.4, + "valid_targets_min": 536 + }, + { + "epoch": 4.3993993993994, + "grad_norm": 0.5532580940259121, + "learning_rate": 1.4608140587234887e-05, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20708701014518738, + "step": 2930, + "valid_targets_mean": 3850.1, + "valid_targets_min": 761 + }, + { + "epoch": 4.406906906906907, + "grad_norm": 0.4546505507730152, + "learning_rate": 1.453606238832602e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21117320656776428, + "step": 2935, + "valid_targets_mean": 6005.3, + "valid_targets_min": 813 + }, + { + "epoch": 4.414414414414415, + "grad_norm": 0.7252302746614107, + "learning_rate": 1.4464060798638484e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18549787998199463, + "step": 2940, + "valid_targets_mean": 3605.3, + "valid_targets_min": 847 + }, + { + "epoch": 4.421921921921922, + "grad_norm": 0.43525814011469965, + "learning_rate": 1.4392136827698032e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16544996201992035, + "step": 2945, + "valid_targets_mean": 5133.1, + "valid_targets_min": 591 + }, + { + "epoch": 4.42942942942943, + "grad_norm": 0.6290052391762472, + "learning_rate": 1.4320291483942135e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21728135645389557, + "step": 2950, + "valid_targets_mean": 3071.6, + "valid_targets_min": 601 + }, + { + "epoch": 4.436936936936937, + "grad_norm": 0.568290272425125, + "learning_rate": 1.424852577470584e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18296922743320465, + "step": 2955, + "valid_targets_mean": 5362.2, + "valid_targets_min": 803 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.603259783928807, + "learning_rate": 1.417684070620764e-05, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23192979395389557, + "step": 2960, + "valid_targets_mean": 3260.2, + "valid_targets_min": 713 + }, + { + "epoch": 4.451951951951952, + "grad_norm": 0.5712898946994219, + "learning_rate": 1.4105237283535376e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2266012728214264, + "step": 2965, + "valid_targets_mean": 4215.6, + "valid_targets_min": 691 + }, + { + "epoch": 4.45945945945946, + "grad_norm": 0.6445579585402789, + "learning_rate": 1.403371651063216e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24278521537780762, + "step": 2970, + "valid_targets_mean": 3313.9, + "valid_targets_min": 374 + }, + { + "epoch": 4.466966966966967, + "grad_norm": 0.5733027694106058, + "learning_rate": 1.3962279390282261e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2066403329372406, + "step": 2975, + "valid_targets_mean": 4162.3, + "valid_targets_min": 764 + }, + { + "epoch": 4.474474474474475, + "grad_norm": 0.613924525173538, + "learning_rate": 1.3890926924097071e-05, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20435193181037903, + "step": 2980, + "valid_targets_mean": 3513.4, + "valid_targets_min": 652 + }, + { + "epoch": 4.481981981981982, + "grad_norm": 0.5403888711125328, + "learning_rate": 1.3819660112501054e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20203976333141327, + "step": 2985, + "valid_targets_mean": 4108.9, + "valid_targets_min": 655 + }, + { + "epoch": 4.48948948948949, + "grad_norm": 0.5594472556122174, + "learning_rate": 1.3748479954717735e-05, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20039498805999756, + "step": 2990, + "valid_targets_mean": 3769.4, + "valid_targets_min": 504 + }, + { + "epoch": 4.496996996996997, + "grad_norm": 0.5426786497323537, + "learning_rate": 1.3677387448755657e-05, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21668657660484314, + "step": 2995, + "valid_targets_mean": 3945.2, + "valid_targets_min": 667 + }, + { + "epoch": 4.504504504504505, + "grad_norm": 0.4470248026304529, + "learning_rate": 1.360638359139442e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2280035763978958, + "step": 3000, + "valid_targets_mean": 5678.4, + "valid_targets_min": 1551 + }, + { + "epoch": 4.512012012012012, + "grad_norm": 0.609794314414809, + "learning_rate": 1.3535469378170683e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2744963467121124, + "step": 3005, + "valid_targets_mean": 3750.4, + "valid_targets_min": 674 + }, + { + "epoch": 4.51951951951952, + "grad_norm": 0.4627448347366898, + "learning_rate": 1.3464645803364228e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21215581893920898, + "step": 3010, + "valid_targets_mean": 5590.1, + "valid_targets_min": 528 + }, + { + "epoch": 4.527027027027027, + "grad_norm": 0.6035357299819745, + "learning_rate": 1.3393913859983996e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2198573499917984, + "step": 3015, + "valid_targets_mean": 6302.9, + "valid_targets_min": 789 + }, + { + "epoch": 4.534534534534535, + "grad_norm": 0.5643676209847303, + "learning_rate": 1.3323274539754177e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25696855783462524, + "step": 3020, + "valid_targets_mean": 4303.8, + "valid_targets_min": 634 + }, + { + "epoch": 4.542042042042042, + "grad_norm": 0.570048922978165, + "learning_rate": 1.3252728833100296e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19746547937393188, + "step": 3025, + "valid_targets_mean": 3560.8, + "valid_targets_min": 598 + }, + { + "epoch": 4.54954954954955, + "grad_norm": 0.4464417263684137, + "learning_rate": 1.3182277729135358e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19879616796970367, + "step": 3030, + "valid_targets_mean": 7131.6, + "valid_targets_min": 460 + }, + { + "epoch": 4.557057057057057, + "grad_norm": 0.5089601591681008, + "learning_rate": 1.3111922215645922e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23360319435596466, + "step": 3035, + "valid_targets_mean": 4634.0, + "valid_targets_min": 654 + }, + { + "epoch": 4.564564564564565, + "grad_norm": 0.6823720809640251, + "learning_rate": 1.3041663279078311e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2097683995962143, + "step": 3040, + "valid_targets_mean": 2433.6, + "valid_targets_min": 562 + }, + { + "epoch": 4.572072072072072, + "grad_norm": 0.6700606322768459, + "learning_rate": 1.297150190452473e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2061670869588852, + "step": 3045, + "valid_targets_mean": 2720.6, + "valid_targets_min": 529 + }, + { + "epoch": 4.57957957957958, + "grad_norm": 0.6228866174244457, + "learning_rate": 1.2901439075709506e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18749915063381195, + "step": 3050, + "valid_targets_mean": 3271.4, + "valid_targets_min": 416 + }, + { + "epoch": 4.587087087087087, + "grad_norm": 0.600075319444778, + "learning_rate": 1.2831475774975247e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24314680695533752, + "step": 3055, + "valid_targets_mean": 4222.2, + "valid_targets_min": 520 + }, + { + "epoch": 4.594594594594595, + "grad_norm": 0.4219894648635082, + "learning_rate": 1.2761612983269094e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18051698803901672, + "step": 3060, + "valid_targets_mean": 6413.9, + "valid_targets_min": 703 + }, + { + "epoch": 4.602102102102102, + "grad_norm": 0.5430062852225149, + "learning_rate": 1.2691851680128966e-05, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18320244550704956, + "step": 3065, + "valid_targets_mean": 3880.8, + "valid_targets_min": 727 + }, + { + "epoch": 4.60960960960961, + "grad_norm": 0.4910324433811435, + "learning_rate": 1.2622192843669826e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1959684193134308, + "step": 3070, + "valid_targets_mean": 5296.5, + "valid_targets_min": 512 + }, + { + "epoch": 4.617117117117117, + "grad_norm": 0.5134831494706088, + "learning_rate": 1.255263745056996e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21355494856834412, + "step": 3075, + "valid_targets_mean": 5030.8, + "valid_targets_min": 469 + }, + { + "epoch": 4.624624624624625, + "grad_norm": 0.5527873198315599, + "learning_rate": 1.2483186476057281e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1989469826221466, + "step": 3080, + "valid_targets_mean": 3602.9, + "valid_targets_min": 630 + }, + { + "epoch": 4.632132132132132, + "grad_norm": 0.7003658441849595, + "learning_rate": 1.2413840893895668e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20069898664951324, + "step": 3085, + "valid_targets_mean": 3747.1, + "valid_targets_min": 474 + }, + { + "epoch": 4.63963963963964, + "grad_norm": 0.5774616260959406, + "learning_rate": 1.2344601676371312e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2032512128353119, + "step": 3090, + "valid_targets_mean": 5143.7, + "valid_targets_min": 1085 + }, + { + "epoch": 4.647147147147147, + "grad_norm": 0.609625942880746, + "learning_rate": 1.2275469794279068e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24463210999965668, + "step": 3095, + "valid_targets_mean": 3302.1, + "valid_targets_min": 211 + }, + { + "epoch": 4.654654654654655, + "grad_norm": 0.5539889957775487, + "learning_rate": 1.220644621690885e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19467709958553314, + "step": 3100, + "valid_targets_mean": 3723.1, + "valid_targets_min": 637 + }, + { + "epoch": 4.662162162162162, + "grad_norm": 0.5324494392088087, + "learning_rate": 1.2137531912032058e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2482069581747055, + "step": 3105, + "valid_targets_mean": 4753.6, + "valid_targets_min": 634 + }, + { + "epoch": 4.66966966966967, + "grad_norm": 0.5048180559075982, + "learning_rate": 1.2068727845887995e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24020060896873474, + "step": 3110, + "valid_targets_mean": 5051.0, + "valid_targets_min": 462 + }, + { + "epoch": 4.677177177177177, + "grad_norm": 0.4832724873118443, + "learning_rate": 1.2000034983170305e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22151488065719604, + "step": 3115, + "valid_targets_mean": 5193.5, + "valid_targets_min": 674 + }, + { + "epoch": 4.684684684684685, + "grad_norm": 0.5679523589750985, + "learning_rate": 1.193145428701347e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2017378807067871, + "step": 3120, + "valid_targets_mean": 4752.9, + "valid_targets_min": 764 + }, + { + "epoch": 4.692192192192192, + "grad_norm": 0.8966754020165514, + "learning_rate": 1.18629867189793e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24184086918830872, + "step": 3125, + "valid_targets_mean": 2982.5, + "valid_targets_min": 515 + }, + { + "epoch": 4.6996996996997, + "grad_norm": 0.49411819508382815, + "learning_rate": 1.1794633239043449e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2202903926372528, + "step": 3130, + "valid_targets_mean": 4615.9, + "valid_targets_min": 738 + }, + { + "epoch": 4.707207207207207, + "grad_norm": 0.4928998645668594, + "learning_rate": 1.1726394805581957e-05, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21853674948215485, + "step": 3135, + "valid_targets_mean": 5042.8, + "valid_targets_min": 656 + }, + { + "epoch": 4.714714714714715, + "grad_norm": 0.5803190883509034, + "learning_rate": 1.1658272375357797e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24375799298286438, + "step": 3140, + "valid_targets_mean": 4044.2, + "valid_targets_min": 525 + }, + { + "epoch": 4.722222222222222, + "grad_norm": 0.5795814998007034, + "learning_rate": 1.1590266903507499e-05, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2230663299560547, + "step": 3145, + "valid_targets_mean": 4489.6, + "valid_targets_min": 930 + }, + { + "epoch": 4.72972972972973, + "grad_norm": 0.6742257430638947, + "learning_rate": 1.1522379343527708e-05, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23786276578903198, + "step": 3150, + "valid_targets_mean": 4348.9, + "valid_targets_min": 534 + }, + { + "epoch": 4.737237237237237, + "grad_norm": 0.49627665219198364, + "learning_rate": 1.1454610647261859e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2170691192150116, + "step": 3155, + "valid_targets_mean": 4579.3, + "valid_targets_min": 701 + }, + { + "epoch": 4.744744744744745, + "grad_norm": 0.4965773011218609, + "learning_rate": 1.1386961764886806e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21680960059165955, + "step": 3160, + "valid_targets_mean": 4751.6, + "valid_targets_min": 970 + }, + { + "epoch": 4.752252252252252, + "grad_norm": 0.46564876478736006, + "learning_rate": 1.1319433644899496e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20024517178535461, + "step": 3165, + "valid_targets_mean": 5608.7, + "valid_targets_min": 565 + }, + { + "epoch": 4.75975975975976, + "grad_norm": 0.8219903727111237, + "learning_rate": 1.1252027234103709e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21334412693977356, + "step": 3170, + "valid_targets_mean": 3558.3, + "valid_targets_min": 684 + }, + { + "epoch": 4.767267267267267, + "grad_norm": 0.5331331638771257, + "learning_rate": 1.118474347759673e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23813585937023163, + "step": 3175, + "valid_targets_mean": 4562.6, + "valid_targets_min": 700 + }, + { + "epoch": 4.774774774774775, + "grad_norm": 0.7204879070632291, + "learning_rate": 1.1117583318756128e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21081534028053284, + "step": 3180, + "valid_targets_mean": 4559.9, + "valid_targets_min": 347 + }, + { + "epoch": 4.782282282282282, + "grad_norm": 0.6342862982793008, + "learning_rate": 1.1050547699226522e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21464157104492188, + "step": 3185, + "valid_targets_mean": 4187.8, + "valid_targets_min": 611 + }, + { + "epoch": 4.78978978978979, + "grad_norm": 0.5094911353991051, + "learning_rate": 1.09836375589064e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21020804345607758, + "step": 3190, + "valid_targets_mean": 5157.7, + "valid_targets_min": 1150 + }, + { + "epoch": 4.797297297297297, + "grad_norm": 0.47526721126268623, + "learning_rate": 1.0916853835934891e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22524023056030273, + "step": 3195, + "valid_targets_mean": 5497.2, + "valid_targets_min": 714 + }, + { + "epoch": 4.804804804804805, + "grad_norm": 0.7451217907444951, + "learning_rate": 1.0850197466678662e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20764270424842834, + "step": 3200, + "valid_targets_mean": 4149.2, + "valid_targets_min": 628 + }, + { + "epoch": 4.812312312312312, + "grad_norm": 0.4957698184650846, + "learning_rate": 1.0783669385718762e-05, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21627801656723022, + "step": 3205, + "valid_targets_mean": 5803.0, + "valid_targets_min": 790 + }, + { + "epoch": 4.81981981981982, + "grad_norm": 0.5309967929446427, + "learning_rate": 1.0717270525837523e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2549176514148712, + "step": 3210, + "valid_targets_mean": 5175.2, + "valid_targets_min": 558 + }, + { + "epoch": 4.827327327327327, + "grad_norm": 0.6675846311975975, + "learning_rate": 1.0651001818005487e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2339574694633484, + "step": 3215, + "valid_targets_mean": 3242.3, + "valid_targets_min": 499 + }, + { + "epoch": 4.834834834834835, + "grad_norm": 0.5348472304451167, + "learning_rate": 1.0584864191368345e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2503393888473511, + "step": 3220, + "valid_targets_mean": 5664.9, + "valid_targets_min": 487 + }, + { + "epoch": 4.842342342342342, + "grad_norm": 0.5338144959827715, + "learning_rate": 1.0518858573233911e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21501460671424866, + "step": 3225, + "valid_targets_mean": 4275.4, + "valid_targets_min": 631 + }, + { + "epoch": 4.84984984984985, + "grad_norm": 0.583867201837148, + "learning_rate": 1.0452985889059151e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2584874629974365, + "step": 3230, + "valid_targets_mean": 3758.3, + "valid_targets_min": 367 + }, + { + "epoch": 4.857357357357357, + "grad_norm": 0.4704381431905369, + "learning_rate": 1.0387247062437144e-05, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2556239068508148, + "step": 3235, + "valid_targets_mean": 5887.3, + "valid_targets_min": 489 + }, + { + "epoch": 4.864864864864865, + "grad_norm": 0.6249528070324213, + "learning_rate": 1.0321643015084187e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23891428112983704, + "step": 3240, + "valid_targets_mean": 3309.9, + "valid_targets_min": 301 + }, + { + "epoch": 4.872372372372372, + "grad_norm": 0.44764920429718186, + "learning_rate": 1.0256174666826841e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19511452317237854, + "step": 3245, + "valid_targets_mean": 5169.8, + "valid_targets_min": 980 + }, + { + "epoch": 4.87987987987988, + "grad_norm": 0.5340956174536189, + "learning_rate": 1.0190842935589065e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18626420199871063, + "step": 3250, + "valid_targets_mean": 3967.4, + "valid_targets_min": 798 + }, + { + "epoch": 4.887387387387387, + "grad_norm": 0.56288269038886, + "learning_rate": 1.0125648737379307e-05, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20922933518886566, + "step": 3255, + "valid_targets_mean": 4145.1, + "valid_targets_min": 511 + }, + { + "epoch": 4.894894894894895, + "grad_norm": 0.4659624202841574, + "learning_rate": 1.0060592986277693e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18871676921844482, + "step": 3260, + "valid_targets_mean": 5421.9, + "valid_targets_min": 2150 + }, + { + "epoch": 4.902402402402402, + "grad_norm": 0.5691277577002785, + "learning_rate": 9.995676594423186e-06, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22213050723075867, + "step": 3265, + "valid_targets_mean": 4555.8, + "valid_targets_min": 839 + }, + { + "epoch": 4.90990990990991, + "grad_norm": 0.6445683801786387, + "learning_rate": 9.930900472000834e-06, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20591846108436584, + "step": 3270, + "valid_targets_mean": 3039.4, + "valid_targets_min": 574 + }, + { + "epoch": 4.9174174174174174, + "grad_norm": 0.48632405588473493, + "learning_rate": 9.866265527228961e-06, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.224864199757576, + "step": 3275, + "valid_targets_mean": 5041.8, + "valid_targets_min": 562 + }, + { + "epoch": 4.924924924924925, + "grad_norm": 0.4551299572592259, + "learning_rate": 9.801772666346462e-06, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17636257410049438, + "step": 3280, + "valid_targets_mean": 5264.3, + "valid_targets_min": 260 + }, + { + "epoch": 4.9324324324324325, + "grad_norm": 0.5303503051433811, + "learning_rate": 9.737422793600092e-06, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2605491876602173, + "step": 3285, + "valid_targets_mean": 4923.2, + "valid_targets_min": 551 + }, + { + "epoch": 4.93993993993994, + "grad_norm": 0.5619328215648045, + "learning_rate": 9.67321681123179e-06, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21425651013851166, + "step": 3290, + "valid_targets_mean": 4545.6, + "valid_targets_min": 515 + }, + { + "epoch": 4.9474474474474475, + "grad_norm": 0.4969814195952852, + "learning_rate": 9.609155619466016e-06, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19319353997707367, + "step": 3295, + "valid_targets_mean": 4383.9, + "valid_targets_min": 711 + }, + { + "epoch": 4.954954954954955, + "grad_norm": 0.6244151395343142, + "learning_rate": 9.545240116497143e-06, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18449707329273224, + "step": 3300, + "valid_targets_mean": 3211.4, + "valid_targets_min": 484 + }, + { + "epoch": 4.9624624624624625, + "grad_norm": 0.5893223235253725, + "learning_rate": 9.481471198476855e-06, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22556568682193756, + "step": 3305, + "valid_targets_mean": 5031.6, + "valid_targets_min": 684 + }, + { + "epoch": 4.96996996996997, + "grad_norm": 0.4915902916242456, + "learning_rate": 9.417849759501603e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2357444167137146, + "step": 3310, + "valid_targets_mean": 5379.2, + "valid_targets_min": 860 + }, + { + "epoch": 4.9774774774774775, + "grad_norm": 0.5245483991646102, + "learning_rate": 9.354376691600034e-06, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18305274844169617, + "step": 3315, + "valid_targets_mean": 4195.8, + "valid_targets_min": 388 + }, + { + "epoch": 4.984984984984985, + "grad_norm": 0.49053726456598334, + "learning_rate": 9.2910528847205e-06, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23515072464942932, + "step": 3320, + "valid_targets_mean": 4802.1, + "valid_targets_min": 666 + }, + { + "epoch": 4.9924924924924925, + "grad_norm": 0.5458070078892359, + "learning_rate": 9.227879226718595e-06, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2729540467262268, + "step": 3325, + "valid_targets_mean": 4229.2, + "valid_targets_min": 716 + }, + { + "epoch": 5.0, + "grad_norm": 0.4617612878592164, + "learning_rate": 9.164856603344681e-06, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19838783144950867, + "step": 3330, + "valid_targets_mean": 5383.6, + "valid_targets_min": 1005 + }, + { + "epoch": 5.0075075075075075, + "grad_norm": 0.5006372512661854, + "learning_rate": 9.10198589823149e-06, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21871143579483032, + "step": 3335, + "valid_targets_mean": 4899.0, + "valid_targets_min": 478 + }, + { + "epoch": 5.015015015015015, + "grad_norm": 0.493071836759497, + "learning_rate": 9.039267992881724e-06, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15860621631145477, + "step": 3340, + "valid_targets_mean": 4637.3, + "valid_targets_min": 539 + }, + { + "epoch": 5.0225225225225225, + "grad_norm": 0.5942000008682329, + "learning_rate": 8.97670376665569e-06, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21339187026023865, + "step": 3345, + "valid_targets_mean": 4111.1, + "valid_targets_min": 482 + }, + { + "epoch": 5.03003003003003, + "grad_norm": 0.5098653931776418, + "learning_rate": 8.914294096758995e-06, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21369382739067078, + "step": 3350, + "valid_targets_mean": 5529.7, + "valid_targets_min": 571 + }, + { + "epoch": 5.0375375375375375, + "grad_norm": 0.48801798277011926, + "learning_rate": 8.852039858230217e-06, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16710881888866425, + "step": 3355, + "valid_targets_mean": 4623.7, + "valid_targets_min": 694 + }, + { + "epoch": 5.045045045045045, + "grad_norm": 0.5406142033120225, + "learning_rate": 8.78994192392865e-06, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18688131868839264, + "step": 3360, + "valid_targets_mean": 3961.8, + "valid_targets_min": 529 + }, + { + "epoch": 5.0525525525525525, + "grad_norm": 0.49064863606657333, + "learning_rate": 8.728001164522068e-06, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19211602210998535, + "step": 3365, + "valid_targets_mean": 5132.5, + "valid_targets_min": 557 + }, + { + "epoch": 5.06006006006006, + "grad_norm": 0.5428647141950651, + "learning_rate": 8.666218448474517e-06, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19043126702308655, + "step": 3370, + "valid_targets_mean": 4518.9, + "valid_targets_min": 623 + }, + { + "epoch": 5.0675675675675675, + "grad_norm": 0.5285735991494391, + "learning_rate": 8.604594642034126e-06, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22566662728786469, + "step": 3375, + "valid_targets_mean": 4554.3, + "valid_targets_min": 628 + }, + { + "epoch": 5.075075075075075, + "grad_norm": 0.6397497283612681, + "learning_rate": 8.543130609220982e-06, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17037567496299744, + "step": 3380, + "valid_targets_mean": 3197.0, + "valid_targets_min": 631 + }, + { + "epoch": 5.0825825825825826, + "grad_norm": 0.5624772140252701, + "learning_rate": 8.481827211814991e-06, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2726784348487854, + "step": 3385, + "valid_targets_mean": 4160.6, + "valid_targets_min": 727 + }, + { + "epoch": 5.09009009009009, + "grad_norm": 0.5877366129284431, + "learning_rate": 8.420685309343835e-06, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20283541083335876, + "step": 3390, + "valid_targets_mean": 3707.2, + "valid_targets_min": 663 + }, + { + "epoch": 5.097597597597598, + "grad_norm": 0.6489090565754443, + "learning_rate": 8.35970575907087e-06, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2192349135875702, + "step": 3395, + "valid_targets_mean": 5407.1, + "valid_targets_min": 672 + }, + { + "epoch": 5.105105105105105, + "grad_norm": 0.5565202403018633, + "learning_rate": 8.298889415983142e-06, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1590254306793213, + "step": 3400, + "valid_targets_mean": 3583.3, + "valid_targets_min": 735 + }, + { + "epoch": 5.112612612612613, + "grad_norm": 0.41960211523789304, + "learning_rate": 8.23823713277938e-06, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14446228742599487, + "step": 3405, + "valid_targets_mean": 5082.1, + "valid_targets_min": 276 + }, + { + "epoch": 5.12012012012012, + "grad_norm": 0.6517049110770983, + "learning_rate": 8.17774975985806e-06, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23023463785648346, + "step": 3410, + "valid_targets_mean": 4596.8, + "valid_targets_min": 786 + }, + { + "epoch": 5.127627627627628, + "grad_norm": 0.8256063611735115, + "learning_rate": 8.117428145305464e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19371360540390015, + "step": 3415, + "valid_targets_mean": 3733.1, + "valid_targets_min": 946 + }, + { + "epoch": 5.135135135135135, + "grad_norm": 0.47817908036094603, + "learning_rate": 8.057273134883794e-06, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20559421181678772, + "step": 3420, + "valid_targets_mean": 5914.1, + "valid_targets_min": 746 + }, + { + "epoch": 5.142642642642643, + "grad_norm": 0.6404035798093046, + "learning_rate": 7.997285572019316e-06, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19509994983673096, + "step": 3425, + "valid_targets_mean": 3609.6, + "valid_targets_min": 719 + }, + { + "epoch": 5.15015015015015, + "grad_norm": 0.7814615076086997, + "learning_rate": 7.937466297790542e-06, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17219263315200806, + "step": 3430, + "valid_targets_mean": 4124.9, + "valid_targets_min": 593 + }, + { + "epoch": 5.157657657657658, + "grad_norm": 0.5711178140418124, + "learning_rate": 7.877816150916422e-06, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20192334055900574, + "step": 3435, + "valid_targets_mean": 4497.1, + "valid_targets_min": 673 + }, + { + "epoch": 5.165165165165165, + "grad_norm": 0.6605000045012411, + "learning_rate": 7.81833596774459e-06, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21957352757453918, + "step": 3440, + "valid_targets_mean": 3223.1, + "valid_targets_min": 617 + }, + { + "epoch": 5.172672672672673, + "grad_norm": 1.2228952471867771, + "learning_rate": 7.759026582239639e-06, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2631041407585144, + "step": 3445, + "valid_targets_mean": 4348.8, + "valid_targets_min": 574 + }, + { + "epoch": 5.18018018018018, + "grad_norm": 0.858620752436695, + "learning_rate": 7.699888825971433e-06, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.162127286195755, + "step": 3450, + "valid_targets_mean": 6938.4, + "valid_targets_min": 683 + }, + { + "epoch": 5.187687687687688, + "grad_norm": 0.6070759956430632, + "learning_rate": 7.640923528103441e-06, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2934483289718628, + "step": 3455, + "valid_targets_mean": 4662.9, + "valid_targets_min": 739 + }, + { + "epoch": 5.195195195195195, + "grad_norm": 0.5186921298999412, + "learning_rate": 7.582131515381108e-06, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2093011438846588, + "step": 3460, + "valid_targets_mean": 4782.7, + "valid_targets_min": 492 + }, + { + "epoch": 5.202702702702703, + "grad_norm": 0.5661778349613429, + "learning_rate": 7.5235136121202675e-06, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24795249104499817, + "step": 3465, + "valid_targets_mean": 4599.2, + "valid_targets_min": 324 + }, + { + "epoch": 5.21021021021021, + "grad_norm": 0.5360911975296877, + "learning_rate": 7.465070640195606e-06, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26138007640838623, + "step": 3470, + "valid_targets_mean": 4960.6, + "valid_targets_min": 376 + }, + { + "epoch": 5.217717717717718, + "grad_norm": 0.5642393094183789, + "learning_rate": 7.406803419029094e-06, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24130575358867645, + "step": 3475, + "valid_targets_mean": 4966.7, + "valid_targets_min": 821 + }, + { + "epoch": 5.225225225225225, + "grad_norm": 0.4550076790955841, + "learning_rate": 7.3487127655785295e-06, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1952485740184784, + "step": 3480, + "valid_targets_mean": 5512.1, + "valid_targets_min": 622 + }, + { + "epoch": 5.232732732732733, + "grad_norm": 0.6885372734490723, + "learning_rate": 7.290799494326077e-06, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16893789172172546, + "step": 3485, + "valid_targets_mean": 4656.0, + "valid_targets_min": 477 + }, + { + "epoch": 5.24024024024024, + "grad_norm": 0.5045869253443142, + "learning_rate": 7.233064417266846e-06, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16010183095932007, + "step": 3490, + "valid_targets_mean": 3909.6, + "valid_targets_min": 609 + }, + { + "epoch": 5.247747747747748, + "grad_norm": 0.6679277663347103, + "learning_rate": 7.1755083438975056e-06, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2579156756401062, + "step": 3495, + "valid_targets_mean": 3035.2, + "valid_targets_min": 410 + }, + { + "epoch": 5.255255255255255, + "grad_norm": 0.48166787337419537, + "learning_rate": 7.118132081204936e-06, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17768923938274384, + "step": 3500, + "valid_targets_mean": 4611.5, + "valid_targets_min": 540 + }, + { + "epoch": 5.262762762762763, + "grad_norm": 0.6437356623831512, + "learning_rate": 7.060936433654901e-06, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22873665392398834, + "step": 3505, + "valid_targets_mean": 4224.2, + "valid_targets_min": 1214 + }, + { + "epoch": 5.27027027027027, + "grad_norm": 0.5041184297812231, + "learning_rate": 7.003922203180813e-06, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17131251096725464, + "step": 3510, + "valid_targets_mean": 4996.4, + "valid_targets_min": 762 + }, + { + "epoch": 5.277777777777778, + "grad_norm": 0.42636959404581054, + "learning_rate": 6.947090189172425e-06, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17408744990825653, + "step": 3515, + "valid_targets_mean": 6548.1, + "valid_targets_min": 681 + }, + { + "epoch": 5.285285285285285, + "grad_norm": 0.7004634661839949, + "learning_rate": 6.89044118846467e-06, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21478641033172607, + "step": 3520, + "valid_targets_mean": 3402.0, + "valid_targets_min": 534 + }, + { + "epoch": 5.292792792792793, + "grad_norm": 0.6584020869113968, + "learning_rate": 6.833975995326465e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17992106080055237, + "step": 3525, + "valid_targets_mean": 3046.6, + "valid_targets_min": 592 + }, + { + "epoch": 5.3003003003003, + "grad_norm": 0.505387458907387, + "learning_rate": 6.777695401449591e-06, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20670431852340698, + "step": 3530, + "valid_targets_mean": 5285.2, + "valid_targets_min": 893 + }, + { + "epoch": 5.307807807807808, + "grad_norm": 0.5767720272580488, + "learning_rate": 6.721600195937578e-06, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18273615837097168, + "step": 3535, + "valid_targets_mean": 3702.8, + "valid_targets_min": 877 + }, + { + "epoch": 5.315315315315315, + "grad_norm": 0.5468384364940457, + "learning_rate": 6.665691165294654e-06, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2389727234840393, + "step": 3540, + "valid_targets_mean": 4842.5, + "valid_targets_min": 1056 + }, + { + "epoch": 5.322822822822823, + "grad_norm": 0.5417666029457835, + "learning_rate": 6.609969093414699e-06, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2088838666677475, + "step": 3545, + "valid_targets_mean": 4254.4, + "valid_targets_min": 367 + }, + { + "epoch": 5.33033033033033, + "grad_norm": 0.5649058281181645, + "learning_rate": 6.55443476157029e-06, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1812952756881714, + "step": 3550, + "valid_targets_mean": 4252.7, + "valid_targets_min": 673 + }, + { + "epoch": 5.337837837837838, + "grad_norm": 0.5452388205148444, + "learning_rate": 6.4990889484017e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1796906590461731, + "step": 3555, + "valid_targets_mean": 3792.6, + "valid_targets_min": 600 + }, + { + "epoch": 5.345345345345345, + "grad_norm": 0.5732225965401323, + "learning_rate": 6.443932429906013e-06, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2930184006690979, + "step": 3560, + "valid_targets_mean": 4310.6, + "valid_targets_min": 541 + }, + { + "epoch": 5.352852852852853, + "grad_norm": 0.6028750589087655, + "learning_rate": 6.3889659794262334e-06, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21735335886478424, + "step": 3565, + "valid_targets_mean": 3685.1, + "valid_targets_min": 583 + }, + { + "epoch": 5.36036036036036, + "grad_norm": 0.602478347893381, + "learning_rate": 6.334190367640449e-06, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18190817534923553, + "step": 3570, + "valid_targets_mean": 4439.8, + "valid_targets_min": 656 + }, + { + "epoch": 5.367867867867868, + "grad_norm": 0.5557346424426345, + "learning_rate": 6.279606362551016e-06, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1880781054496765, + "step": 3575, + "valid_targets_mean": 4198.0, + "valid_targets_min": 456 + }, + { + "epoch": 5.375375375375375, + "grad_norm": 0.4806991447402713, + "learning_rate": 6.225214729473794e-06, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22571183741092682, + "step": 3580, + "valid_targets_mean": 5347.3, + "valid_targets_min": 652 + }, + { + "epoch": 5.382882882882883, + "grad_norm": 0.6189915178471305, + "learning_rate": 6.171016231027418e-06, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22540658712387085, + "step": 3585, + "valid_targets_mean": 3396.4, + "valid_targets_min": 619 + }, + { + "epoch": 5.39039039039039, + "grad_norm": 0.4781145702083429, + "learning_rate": 6.117011627122617e-06, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22122199833393097, + "step": 3590, + "valid_targets_mean": 5498.2, + "valid_targets_min": 407 + }, + { + "epoch": 5.397897897897898, + "grad_norm": 0.501236643073855, + "learning_rate": 6.063201674951535e-06, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19432801008224487, + "step": 3595, + "valid_targets_mean": 4399.0, + "valid_targets_min": 748 + }, + { + "epoch": 5.405405405405405, + "grad_norm": 0.5083333895430631, + "learning_rate": 6.009587128977128e-06, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21600385010242462, + "step": 3600, + "valid_targets_mean": 4527.8, + "valid_targets_min": 441 + }, + { + "epoch": 5.412912912912913, + "grad_norm": 0.43801311222355105, + "learning_rate": 5.956168740922592e-06, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17686530947685242, + "step": 3605, + "valid_targets_mean": 5968.8, + "valid_targets_min": 650 + }, + { + "epoch": 5.42042042042042, + "grad_norm": 0.5142170347253675, + "learning_rate": 5.902947259760805e-06, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16562317311763763, + "step": 3610, + "valid_targets_mean": 4170.2, + "valid_targets_min": 562 + }, + { + "epoch": 5.427927927927928, + "grad_norm": 0.57494822664475, + "learning_rate": 5.84992343170385e-06, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19728896021842957, + "step": 3615, + "valid_targets_mean": 3665.9, + "valid_targets_min": 602 + }, + { + "epoch": 5.435435435435435, + "grad_norm": 0.6682604390565138, + "learning_rate": 5.797098000192527e-06, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18809694051742554, + "step": 3620, + "valid_targets_mean": 5038.7, + "valid_targets_min": 709 + }, + { + "epoch": 5.442942942942943, + "grad_norm": 0.5420242165199347, + "learning_rate": 5.744471705885946e-06, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19964119791984558, + "step": 3625, + "valid_targets_mean": 4113.4, + "valid_targets_min": 400 + }, + { + "epoch": 5.45045045045045, + "grad_norm": 0.4897694331671357, + "learning_rate": 5.692045286651145e-06, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21164345741271973, + "step": 3630, + "valid_targets_mean": 5133.8, + "valid_targets_min": 842 + }, + { + "epoch": 5.457957957957958, + "grad_norm": 0.5130796784439051, + "learning_rate": 5.6398194775527305e-06, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1640782356262207, + "step": 3635, + "valid_targets_mean": 4620.8, + "valid_targets_min": 957 + }, + { + "epoch": 5.465465465465465, + "grad_norm": 0.7096256538606244, + "learning_rate": 5.587795010842578e-06, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.229142963886261, + "step": 3640, + "valid_targets_mean": 3584.5, + "valid_targets_min": 291 + }, + { + "epoch": 5.472972972972973, + "grad_norm": 0.45320281126185963, + "learning_rate": 5.535972615949565e-06, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15912821888923645, + "step": 3645, + "valid_targets_mean": 6046.6, + "valid_targets_min": 565 + }, + { + "epoch": 5.48048048048048, + "grad_norm": 0.6580766138686164, + "learning_rate": 5.484353019469344e-06, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24230948090553284, + "step": 3650, + "valid_targets_mean": 3922.4, + "valid_targets_min": 618 + }, + { + "epoch": 5.487987987987988, + "grad_norm": 0.560301246319454, + "learning_rate": 5.432936945154159e-06, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18417420983314514, + "step": 3655, + "valid_targets_mean": 4271.6, + "valid_targets_min": 590 + }, + { + "epoch": 5.495495495495495, + "grad_norm": 0.6206056063840636, + "learning_rate": 5.381725113902689e-06, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17472167313098907, + "step": 3660, + "valid_targets_mean": 3848.2, + "valid_targets_min": 581 + }, + { + "epoch": 5.503003003003003, + "grad_norm": 0.4966662168616601, + "learning_rate": 5.330718243749942e-06, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.191782146692276, + "step": 3665, + "valid_targets_mean": 4883.6, + "valid_targets_min": 648 + }, + { + "epoch": 5.51051051051051, + "grad_norm": 0.5471645413638431, + "learning_rate": 5.279917049857209e-06, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17728394269943237, + "step": 3670, + "valid_targets_mean": 3883.9, + "valid_targets_min": 531 + }, + { + "epoch": 5.518018018018018, + "grad_norm": 0.49020284035840306, + "learning_rate": 5.229322244501996e-06, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2151007056236267, + "step": 3675, + "valid_targets_mean": 5625.6, + "valid_targets_min": 710 + }, + { + "epoch": 5.525525525525525, + "grad_norm": 0.502623769306998, + "learning_rate": 5.1789345370680726e-06, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18778328597545624, + "step": 3680, + "valid_targets_mean": 5465.0, + "valid_targets_min": 604 + }, + { + "epoch": 5.533033033033033, + "grad_norm": 0.6082538279267172, + "learning_rate": 5.128754634035509e-06, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2307537943124771, + "step": 3685, + "valid_targets_mean": 3897.9, + "valid_targets_min": 475 + }, + { + "epoch": 5.54054054054054, + "grad_norm": 0.4677087975060976, + "learning_rate": 5.078783238970775e-06, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1452668309211731, + "step": 3690, + "valid_targets_mean": 4697.0, + "valid_targets_min": 820 + }, + { + "epoch": 5.548048048048048, + "grad_norm": 0.5258809236597033, + "learning_rate": 5.029021052516874e-06, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1686006635427475, + "step": 3695, + "valid_targets_mean": 5070.6, + "valid_targets_min": 874 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.5418543854859791, + "learning_rate": 4.979468772383525e-06, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23895879089832306, + "step": 3700, + "valid_targets_mean": 4703.9, + "valid_targets_min": 799 + }, + { + "epoch": 5.563063063063063, + "grad_norm": 0.5580957987554833, + "learning_rate": 4.93012709333736e-06, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18779677152633667, + "step": 3705, + "valid_targets_mean": 3780.4, + "valid_targets_min": 700 + }, + { + "epoch": 5.57057057057057, + "grad_norm": 0.5796418427811311, + "learning_rate": 4.880996707192227e-06, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18160055577754974, + "step": 3710, + "valid_targets_mean": 3617.5, + "valid_targets_min": 691 + }, + { + "epoch": 5.578078078078078, + "grad_norm": 0.625613489620117, + "learning_rate": 4.832078302799437e-06, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2596314549446106, + "step": 3715, + "valid_targets_mean": 4067.5, + "valid_targets_min": 928 + }, + { + "epoch": 5.585585585585585, + "grad_norm": 0.4752383217997911, + "learning_rate": 4.7833725660381356e-06, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15992337465286255, + "step": 3720, + "valid_targets_mean": 4770.9, + "valid_targets_min": 761 + }, + { + "epoch": 5.593093093093093, + "grad_norm": 0.5779722936684374, + "learning_rate": 4.734880179805685e-06, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18457244336605072, + "step": 3725, + "valid_targets_mean": 3342.9, + "valid_targets_min": 691 + }, + { + "epoch": 5.6006006006006, + "grad_norm": 0.550075630002128, + "learning_rate": 4.686601824008079e-06, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18044841289520264, + "step": 3730, + "valid_targets_mean": 4945.1, + "valid_targets_min": 906 + }, + { + "epoch": 5.608108108108108, + "grad_norm": 0.631353153629254, + "learning_rate": 4.638538175550422e-06, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21107108891010284, + "step": 3735, + "valid_targets_mean": 3289.5, + "valid_targets_min": 552 + }, + { + "epoch": 5.615615615615615, + "grad_norm": 0.7558092942754301, + "learning_rate": 4.590689908327428e-06, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20774057507514954, + "step": 3740, + "valid_targets_mean": 2326.2, + "valid_targets_min": 684 + }, + { + "epoch": 5.623123123123123, + "grad_norm": 0.6050928606517935, + "learning_rate": 4.543057693213973e-06, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.230237677693367, + "step": 3745, + "valid_targets_mean": 3875.2, + "valid_targets_min": 504 + }, + { + "epoch": 5.63063063063063, + "grad_norm": 0.5844617202863044, + "learning_rate": 4.495642198055707e-06, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2029937505722046, + "step": 3750, + "valid_targets_mean": 4027.4, + "valid_targets_min": 536 + }, + { + "epoch": 5.638138138138138, + "grad_norm": 0.7169568097777915, + "learning_rate": 4.4484440876596625e-06, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22157275676727295, + "step": 3755, + "valid_targets_mean": 2619.1, + "valid_targets_min": 657 + }, + { + "epoch": 5.645645645645645, + "grad_norm": 0.5563696214524937, + "learning_rate": 4.401464023784945e-06, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1895512342453003, + "step": 3760, + "valid_targets_mean": 4033.1, + "valid_targets_min": 639 + }, + { + "epoch": 5.653153153153153, + "grad_norm": 0.7184497569373026, + "learning_rate": 4.354702665133457e-06, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17378777265548706, + "step": 3765, + "valid_targets_mean": 4711.1, + "valid_targets_min": 460 + }, + { + "epoch": 5.66066066066066, + "grad_norm": 0.672562372546706, + "learning_rate": 4.308160667340671e-06, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23928751051425934, + "step": 3770, + "valid_targets_mean": 2964.8, + "valid_targets_min": 402 + }, + { + "epoch": 5.668168168168168, + "grad_norm": 0.4887664405970491, + "learning_rate": 4.261838682966413e-06, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19159993529319763, + "step": 3775, + "valid_targets_mean": 5129.8, + "valid_targets_min": 752 + }, + { + "epoch": 5.675675675675675, + "grad_norm": 0.5457059516129726, + "learning_rate": 4.215737361485737e-06, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21986320614814758, + "step": 3780, + "valid_targets_mean": 5641.1, + "valid_targets_min": 915 + }, + { + "epoch": 5.683183183183183, + "grad_norm": 0.5115604631418055, + "learning_rate": 4.1698573492798e-06, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333058923482895, + "step": 3785, + "valid_targets_mean": 3278.0, + "valid_targets_min": 647 + }, + { + "epoch": 5.6906906906906904, + "grad_norm": 0.7200821505353998, + "learning_rate": 4.124199289626818e-06, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22922024130821228, + "step": 3790, + "valid_targets_mean": 2683.2, + "valid_targets_min": 625 + }, + { + "epoch": 5.698198198198198, + "grad_norm": 0.42145237729701907, + "learning_rate": 4.078763822693021e-06, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18455691635608673, + "step": 3795, + "valid_targets_mean": 6690.8, + "valid_targets_min": 920 + }, + { + "epoch": 5.7057057057057055, + "grad_norm": 0.5325975126541347, + "learning_rate": 4.033551585523716e-06, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19546617567539215, + "step": 3800, + "valid_targets_mean": 5228.1, + "valid_targets_min": 594 + }, + { + "epoch": 5.713213213213213, + "grad_norm": 0.6601846614023362, + "learning_rate": 3.988563212034315e-06, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15158836543560028, + "step": 3805, + "valid_targets_mean": 3667.6, + "valid_targets_min": 358 + }, + { + "epoch": 5.7207207207207205, + "grad_norm": 0.6027115950409477, + "learning_rate": 3.943799333001466e-06, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19812247157096863, + "step": 3810, + "valid_targets_mean": 3187.2, + "valid_targets_min": 542 + }, + { + "epoch": 5.728228228228228, + "grad_norm": 0.7999468435844861, + "learning_rate": 3.899260576054209e-06, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24161022901535034, + "step": 3815, + "valid_targets_mean": 2608.4, + "valid_targets_min": 416 + }, + { + "epoch": 5.7357357357357355, + "grad_norm": 0.6197457192371475, + "learning_rate": 3.854947565665172e-06, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20346400141716003, + "step": 3820, + "valid_targets_mean": 3887.9, + "valid_targets_min": 641 + }, + { + "epoch": 5.743243243243243, + "grad_norm": 0.5811299922570492, + "learning_rate": 3.810860923141824e-06, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2590106129646301, + "step": 3825, + "valid_targets_mean": 4175.2, + "valid_targets_min": 750 + }, + { + "epoch": 5.7507507507507505, + "grad_norm": 0.6108735444142156, + "learning_rate": 3.767001266617747e-06, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21409021317958832, + "step": 3830, + "valid_targets_mean": 3856.4, + "valid_targets_min": 504 + }, + { + "epoch": 5.758258258258258, + "grad_norm": 0.5427297235952976, + "learning_rate": 3.7233692110439814e-06, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20367692410945892, + "step": 3835, + "valid_targets_mean": 4545.9, + "valid_targets_min": 666 + }, + { + "epoch": 5.7657657657657655, + "grad_norm": 0.4682965132065941, + "learning_rate": 3.679965368180416e-06, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1938939094543457, + "step": 3840, + "valid_targets_mean": 5888.4, + "valid_targets_min": 915 + }, + { + "epoch": 5.773273273273273, + "grad_norm": 0.5399446918848115, + "learning_rate": 3.6367903465871755e-06, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24548377096652985, + "step": 3845, + "valid_targets_mean": 5109.5, + "valid_targets_min": 602 + }, + { + "epoch": 5.7807807807807805, + "grad_norm": 0.4864834575204545, + "learning_rate": 3.59384475161612e-06, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21027377247810364, + "step": 3850, + "valid_targets_mean": 4894.0, + "valid_targets_min": 636 + }, + { + "epoch": 5.788288288288288, + "grad_norm": 0.5438912695013488, + "learning_rate": 3.5511291854023466e-06, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23461908102035522, + "step": 3855, + "valid_targets_mean": 4590.5, + "valid_targets_min": 666 + }, + { + "epoch": 5.7957957957957955, + "grad_norm": 0.5232434234880898, + "learning_rate": 3.508644246855739e-06, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20156699419021606, + "step": 3860, + "valid_targets_mean": 4557.0, + "valid_targets_min": 694 + }, + { + "epoch": 5.803303303303303, + "grad_norm": 0.5868070563906084, + "learning_rate": 3.466390531652588e-06, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19073697924613953, + "step": 3865, + "valid_targets_mean": 4407.1, + "valid_targets_min": 259 + }, + { + "epoch": 5.8108108108108105, + "grad_norm": 0.5169034481605863, + "learning_rate": 3.4243686322272195e-06, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2452038675546646, + "step": 3870, + "valid_targets_mean": 5199.6, + "valid_targets_min": 564 + }, + { + "epoch": 5.818318318318318, + "grad_norm": 0.5384261747880087, + "learning_rate": 3.3825791377636998e-06, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2101140320301056, + "step": 3875, + "valid_targets_mean": 4844.0, + "valid_targets_min": 427 + }, + { + "epoch": 5.8258258258258255, + "grad_norm": 0.7881322308404421, + "learning_rate": 3.341022634187585e-06, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23327091336250305, + "step": 3880, + "valid_targets_mean": 3447.8, + "valid_targets_min": 485 + }, + { + "epoch": 5.833333333333333, + "grad_norm": 0.5514296552758701, + "learning_rate": 3.2996997041576795e-06, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20250976085662842, + "step": 3885, + "valid_targets_mean": 4440.7, + "valid_targets_min": 434 + }, + { + "epoch": 5.8408408408408405, + "grad_norm": 0.651802460092001, + "learning_rate": 3.2586109270578836e-06, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2257906049489975, + "step": 3890, + "valid_targets_mean": 4516.6, + "valid_targets_min": 623 + }, + { + "epoch": 5.848348348348348, + "grad_norm": 0.5643346645467794, + "learning_rate": 3.217756878989069e-06, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22649335861206055, + "step": 3895, + "valid_targets_mean": 4361.2, + "valid_targets_min": 764 + }, + { + "epoch": 5.8558558558558556, + "grad_norm": 0.6943626771374626, + "learning_rate": 3.1771381327609997e-06, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23687607049942017, + "step": 3900, + "valid_targets_mean": 3878.1, + "valid_targets_min": 391 + }, + { + "epoch": 5.863363363363363, + "grad_norm": 0.5703396214687839, + "learning_rate": 3.1367552578842986e-06, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22399486601352692, + "step": 3905, + "valid_targets_mean": 4644.7, + "valid_targets_min": 439 + }, + { + "epoch": 5.870870870870871, + "grad_norm": 3.5482019538966396, + "learning_rate": 3.096608820562468e-06, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14243656396865845, + "step": 3910, + "valid_targets_mean": 3601.3, + "valid_targets_min": 723 + }, + { + "epoch": 5.878378378378378, + "grad_norm": 0.6997352643905546, + "learning_rate": 3.0566993836839387e-06, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22578692436218262, + "step": 3915, + "valid_targets_mean": 2871.8, + "valid_targets_min": 481 + }, + { + "epoch": 5.885885885885886, + "grad_norm": 0.61982038677274, + "learning_rate": 3.017027506814203e-06, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.212412029504776, + "step": 3920, + "valid_targets_mean": 3693.6, + "valid_targets_min": 641 + }, + { + "epoch": 5.893393393393393, + "grad_norm": 0.502898749801898, + "learning_rate": 2.9775937461879368e-06, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18976537883281708, + "step": 3925, + "valid_targets_mean": 5294.2, + "valid_targets_min": 649 + }, + { + "epoch": 5.900900900900901, + "grad_norm": 0.705380106461315, + "learning_rate": 2.9383986547012222e-06, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21276956796646118, + "step": 3930, + "valid_targets_mean": 2696.2, + "valid_targets_min": 569 + }, + { + "epoch": 5.908408408408408, + "grad_norm": 0.6824495687596949, + "learning_rate": 2.8994427819037873e-06, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27314475178718567, + "step": 3935, + "valid_targets_mean": 5253.1, + "valid_targets_min": 879 + }, + { + "epoch": 5.915915915915916, + "grad_norm": 0.6133652704497186, + "learning_rate": 2.8607266739913077e-06, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24844960868358612, + "step": 3940, + "valid_targets_mean": 4655.8, + "valid_targets_min": 819 + }, + { + "epoch": 5.923423423423423, + "grad_norm": 0.5714988842182993, + "learning_rate": 2.822250873797743e-06, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19975441694259644, + "step": 3945, + "valid_targets_mean": 4395.9, + "valid_targets_min": 726 + }, + { + "epoch": 5.930930930930931, + "grad_norm": 0.8230004411475231, + "learning_rate": 2.784015920787719e-06, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3142227828502655, + "step": 3950, + "valid_targets_mean": 4321.6, + "valid_targets_min": 853 + }, + { + "epoch": 5.938438438438438, + "grad_norm": 0.572231017896013, + "learning_rate": 2.746022351048978e-06, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2292689085006714, + "step": 3955, + "valid_targets_mean": 4084.4, + "valid_targets_min": 615 + }, + { + "epoch": 5.945945945945946, + "grad_norm": 0.8188722407870543, + "learning_rate": 2.708270697284865e-06, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24927443265914917, + "step": 3960, + "valid_targets_mean": 3767.1, + "valid_targets_min": 686 + }, + { + "epoch": 5.953453453453453, + "grad_norm": 0.5185251151107919, + "learning_rate": 2.6707614888068365e-06, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19668208062648773, + "step": 3965, + "valid_targets_mean": 4310.9, + "valid_targets_min": 423 + }, + { + "epoch": 5.960960960960961, + "grad_norm": 0.5542673950873109, + "learning_rate": 2.633495251527054e-06, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24870452284812927, + "step": 3970, + "valid_targets_mean": 5467.7, + "valid_targets_min": 973 + }, + { + "epoch": 5.968468468468468, + "grad_norm": 0.6048295638583769, + "learning_rate": 2.5964725079510134e-06, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17934688925743103, + "step": 3975, + "valid_targets_mean": 3691.8, + "valid_targets_min": 715 + }, + { + "epoch": 5.975975975975976, + "grad_norm": 0.5714679755869654, + "learning_rate": 2.5596937771702113e-06, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18001726269721985, + "step": 3980, + "valid_targets_mean": 3830.1, + "valid_targets_min": 698 + }, + { + "epoch": 5.983483483483483, + "grad_norm": 0.5678385470019257, + "learning_rate": 2.523159574854872e-06, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19507171213626862, + "step": 3985, + "valid_targets_mean": 3516.0, + "valid_targets_min": 311 + }, + { + "epoch": 5.990990990990991, + "grad_norm": 0.685934053595724, + "learning_rate": 2.4868704132467115e-06, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2302461564540863, + "step": 3990, + "valid_targets_mean": 2869.4, + "valid_targets_min": 584 + }, + { + "epoch": 5.998498498498498, + "grad_norm": 0.6065925509118129, + "learning_rate": 2.45082680115176e-06, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19993174076080322, + "step": 3995, + "valid_targets_mean": 3674.2, + "valid_targets_min": 414 + }, + { + "epoch": 6.006006006006006, + "grad_norm": 0.4490328583511514, + "learning_rate": 2.4150292439332355e-06, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19256381690502167, + "step": 4000, + "valid_targets_mean": 5463.9, + "valid_targets_min": 623 + }, + { + "epoch": 6.013513513513513, + "grad_norm": 0.5503944239111109, + "learning_rate": 2.3794782435044384e-06, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17218606173992157, + "step": 4005, + "valid_targets_mean": 3465.8, + "valid_targets_min": 930 + }, + { + "epoch": 6.021021021021021, + "grad_norm": 0.5533090724442471, + "learning_rate": 2.344174298321733e-06, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.213973268866539, + "step": 4010, + "valid_targets_mean": 4543.8, + "valid_targets_min": 570 + }, + { + "epoch": 6.028528528528528, + "grad_norm": 0.6792685607327363, + "learning_rate": 2.3091179033775445e-06, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2688198983669281, + "step": 4015, + "valid_targets_mean": 3235.7, + "valid_targets_min": 605 + }, + { + "epoch": 6.036036036036036, + "grad_norm": 0.46026148213907403, + "learning_rate": 2.27430955019343e-06, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1564239263534546, + "step": 4020, + "valid_targets_mean": 5029.9, + "valid_targets_min": 1069 + }, + { + "epoch": 6.043543543543543, + "grad_norm": 0.5322076051668858, + "learning_rate": 2.239749726813183e-06, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19674474000930786, + "step": 4025, + "valid_targets_mean": 4798.2, + "valid_targets_min": 478 + }, + { + "epoch": 6.051051051051051, + "grad_norm": 0.5891185848467975, + "learning_rate": 2.20543891779599e-06, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18149876594543457, + "step": 4030, + "valid_targets_mean": 4520.9, + "valid_targets_min": 648 + }, + { + "epoch": 6.058558558558558, + "grad_norm": 0.6755239064217278, + "learning_rate": 2.1713776042096302e-06, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2210536003112793, + "step": 4035, + "valid_targets_mean": 3545.1, + "valid_targets_min": 665 + }, + { + "epoch": 6.066066066066066, + "grad_norm": 0.6088360110015828, + "learning_rate": 2.1375662636237493e-06, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22115203738212585, + "step": 4040, + "valid_targets_mean": 5270.9, + "valid_targets_min": 656 + }, + { + "epoch": 6.073573573573573, + "grad_norm": 0.5822383908375098, + "learning_rate": 2.1040053701031414e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21398739516735077, + "step": 4045, + "valid_targets_mean": 4107.5, + "valid_targets_min": 483 + }, + { + "epoch": 6.081081081081081, + "grad_norm": 0.6863357601545187, + "learning_rate": 2.0706953942011165e-06, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21303421258926392, + "step": 4050, + "valid_targets_mean": 3397.6, + "valid_targets_min": 691 + }, + { + "epoch": 6.088588588588588, + "grad_norm": 0.6835718993938039, + "learning_rate": 2.0376368029528935e-06, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2734207510948181, + "step": 4055, + "valid_targets_mean": 3650.6, + "valid_targets_min": 652 + }, + { + "epoch": 6.096096096096096, + "grad_norm": 0.45120552286100196, + "learning_rate": 2.0048300598690627e-06, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25283950567245483, + "step": 4060, + "valid_targets_mean": 7365.3, + "valid_targets_min": 993 + }, + { + "epoch": 6.103603603603603, + "grad_norm": 0.63959559159808, + "learning_rate": 1.9722756249290785e-06, + "loss": 0.2116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2537308633327484, + "step": 4065, + "valid_targets_mean": 4346.6, + "valid_targets_min": 668 + }, + { + "epoch": 6.111111111111111, + "grad_norm": 0.6496824147481889, + "learning_rate": 1.939973954574812e-06, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15889085829257965, + "step": 4070, + "valid_targets_mean": 3991.0, + "valid_targets_min": 738 + }, + { + "epoch": 6.118618618618618, + "grad_norm": 0.5705409403401243, + "learning_rate": 1.9079255017041487e-06, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17797492444515228, + "step": 4075, + "valid_targets_mean": 4241.4, + "valid_targets_min": 618 + }, + { + "epoch": 6.126126126126126, + "grad_norm": 0.4815875979801429, + "learning_rate": 1.8761307156646547e-06, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15079014003276825, + "step": 4080, + "valid_targets_mean": 4996.1, + "valid_targets_min": 513 + }, + { + "epoch": 6.133633633633633, + "grad_norm": 0.5146233292308801, + "learning_rate": 1.8445900422472495e-06, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18511003255844116, + "step": 4085, + "valid_targets_mean": 4634.7, + "valid_targets_min": 685 + }, + { + "epoch": 6.141141141141141, + "grad_norm": 0.5624700118014101, + "learning_rate": 1.8133039236799788e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2083277553319931, + "step": 4090, + "valid_targets_mean": 4171.3, + "valid_targets_min": 511 + }, + { + "epoch": 6.148648648648648, + "grad_norm": 0.7177782923244695, + "learning_rate": 1.7822727986217957e-06, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18404030799865723, + "step": 4095, + "valid_targets_mean": 2409.4, + "valid_targets_min": 593 + }, + { + "epoch": 6.156156156156156, + "grad_norm": 0.5228873362141804, + "learning_rate": 1.7514971021564275e-06, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1940818727016449, + "step": 4100, + "valid_targets_mean": 4779.9, + "valid_targets_min": 511 + }, + { + "epoch": 6.163663663663663, + "grad_norm": 0.4487406897661156, + "learning_rate": 1.7209772657862657e-06, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1845383495092392, + "step": 4105, + "valid_targets_mean": 6135.4, + "valid_targets_min": 515 + }, + { + "epoch": 6.171171171171171, + "grad_norm": 0.5106337969240039, + "learning_rate": 1.6907137174263156e-06, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17622476816177368, + "step": 4110, + "valid_targets_mean": 4512.0, + "valid_targets_min": 463 + }, + { + "epoch": 6.178678678678678, + "grad_norm": 0.6441468445611757, + "learning_rate": 1.6607068813981952e-06, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2345384955406189, + "step": 4115, + "valid_targets_mean": 3844.5, + "valid_targets_min": 471 + }, + { + "epoch": 6.186186186186186, + "grad_norm": 0.5581311436354888, + "learning_rate": 1.6309571784242062e-06, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17409540712833405, + "step": 4120, + "valid_targets_mean": 4888.1, + "valid_targets_min": 645 + }, + { + "epoch": 6.193693693693693, + "grad_norm": 0.5762764403446647, + "learning_rate": 1.601465025621398e-06, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22839732468128204, + "step": 4125, + "valid_targets_mean": 4442.8, + "valid_targets_min": 669 + }, + { + "epoch": 6.201201201201201, + "grad_norm": 0.488635665892412, + "learning_rate": 1.5722308364957517e-06, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19067010283470154, + "step": 4130, + "valid_targets_mean": 5109.4, + "valid_targets_min": 606 + }, + { + "epoch": 6.208708708708708, + "grad_norm": 0.5702695176534012, + "learning_rate": 1.5432550209363606e-06, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19797605276107788, + "step": 4135, + "valid_targets_mean": 4942.1, + "valid_targets_min": 694 + }, + { + "epoch": 6.216216216216216, + "grad_norm": 0.5547765610808129, + "learning_rate": 1.5145379852097054e-06, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21563218533992767, + "step": 4140, + "valid_targets_mean": 4537.9, + "valid_targets_min": 498 + }, + { + "epoch": 6.223723723723724, + "grad_norm": 0.6133297188420032, + "learning_rate": 1.4860801319539354e-06, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2022094428539276, + "step": 4145, + "valid_targets_mean": 3766.9, + "valid_targets_min": 602 + }, + { + "epoch": 6.231231231231231, + "grad_norm": 0.5500242111051548, + "learning_rate": 1.457881860173236e-06, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14841407537460327, + "step": 4150, + "valid_targets_mean": 3554.3, + "valid_targets_min": 628 + }, + { + "epoch": 6.238738738738739, + "grad_norm": 0.6493235742255749, + "learning_rate": 1.429943565232228e-06, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29499688744544983, + "step": 4155, + "valid_targets_mean": 4105.4, + "valid_targets_min": 391 + }, + { + "epoch": 6.246246246246246, + "grad_norm": 0.46401350568259536, + "learning_rate": 1.4022656388504418e-06, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1871238350868225, + "step": 4160, + "valid_targets_mean": 5385.7, + "valid_targets_min": 467 + }, + { + "epoch": 6.253753753753754, + "grad_norm": 0.5751734802750025, + "learning_rate": 1.3748484690967967e-06, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17546819150447845, + "step": 4165, + "valid_targets_mean": 4171.8, + "valid_targets_min": 564 + }, + { + "epoch": 6.261261261261261, + "grad_norm": 0.824863745151526, + "learning_rate": 1.3476924403841762e-06, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21639591455459595, + "step": 4170, + "valid_targets_mean": 2713.2, + "valid_targets_min": 473 + }, + { + "epoch": 6.268768768768769, + "grad_norm": 0.570544358689332, + "learning_rate": 1.3207979334640419e-06, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20927639305591583, + "step": 4175, + "valid_targets_mean": 4503.6, + "valid_targets_min": 774 + }, + { + "epoch": 6.276276276276276, + "grad_norm": 0.5313264329238784, + "learning_rate": 1.294165325421084e-06, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29111388325691223, + "step": 4180, + "valid_targets_mean": 5414.2, + "valid_targets_min": 731 + }, + { + "epoch": 6.283783783783784, + "grad_norm": 0.5675843811457961, + "learning_rate": 1.2677949896679408e-06, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20922914147377014, + "step": 4185, + "valid_targets_mean": 4288.4, + "valid_targets_min": 434 + }, + { + "epoch": 6.291291291291291, + "grad_norm": 0.5380343131647501, + "learning_rate": 1.241687295939966e-06, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14380943775177002, + "step": 4190, + "valid_targets_mean": 4689.5, + "valid_targets_min": 367 + }, + { + "epoch": 6.298798798798799, + "grad_norm": 0.5899919262888533, + "learning_rate": 1.2158426102900345e-06, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19863969087600708, + "step": 4195, + "valid_targets_mean": 4058.4, + "valid_targets_min": 727 + }, + { + "epoch": 6.306306306306306, + "grad_norm": 0.5264653142973713, + "learning_rate": 1.1902612950834213e-06, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.197919100522995, + "step": 4200, + "valid_targets_mean": 4504.0, + "valid_targets_min": 727 + }, + { + "epoch": 6.313813813813814, + "grad_norm": 0.5816807471532681, + "learning_rate": 1.164943708992714e-06, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19351375102996826, + "step": 4205, + "valid_targets_mean": 3772.9, + "valid_targets_min": 725 + }, + { + "epoch": 6.321321321321321, + "grad_norm": 0.6196400182011077, + "learning_rate": 1.1398902069927842e-06, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3165440261363983, + "step": 4210, + "valid_targets_mean": 4516.8, + "valid_targets_min": 374 + }, + { + "epoch": 6.328828828828829, + "grad_norm": 0.5403053431375547, + "learning_rate": 1.115101140355812e-06, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20008732378482819, + "step": 4215, + "valid_targets_mean": 4993.6, + "valid_targets_min": 667 + }, + { + "epoch": 6.336336336336337, + "grad_norm": 0.5311224743152807, + "learning_rate": 1.0905768566463593e-06, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2345670610666275, + "step": 4220, + "valid_targets_mean": 5031.6, + "valid_targets_min": 559 + }, + { + "epoch": 6.343843843843844, + "grad_norm": 0.7193489501948159, + "learning_rate": 1.0663176997165037e-06, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18373778462409973, + "step": 4225, + "valid_targets_mean": 2563.9, + "valid_targets_min": 614 + }, + { + "epoch": 6.351351351351352, + "grad_norm": 0.6194973179199644, + "learning_rate": 1.0423240097010056e-06, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17724689841270447, + "step": 4230, + "valid_targets_mean": 2932.5, + "valid_targets_min": 550 + }, + { + "epoch": 6.358858858858859, + "grad_norm": 0.5938101431895507, + "learning_rate": 1.018596123012545e-06, + "loss": 0.2014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15485897660255432, + "step": 4235, + "valid_targets_mean": 3859.1, + "valid_targets_min": 565 + }, + { + "epoch": 6.366366366366367, + "grad_norm": 0.5832105338347596, + "learning_rate": 9.951343723370145e-07, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27296674251556396, + "step": 4240, + "valid_targets_mean": 4697.3, + "valid_targets_min": 651 + }, + { + "epoch": 6.373873873873874, + "grad_norm": 0.6257938479877299, + "learning_rate": 9.719390866288325e-07, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21452078223228455, + "step": 4245, + "valid_targets_mean": 3980.8, + "valid_targets_min": 644 + }, + { + "epoch": 6.381381381381382, + "grad_norm": 0.4748467821348681, + "learning_rate": 9.490105911063519e-07, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19311422109603882, + "step": 4250, + "valid_targets_mean": 4970.2, + "valid_targets_min": 958 + }, + { + "epoch": 6.388888888888889, + "grad_norm": 0.49848343014176094, + "learning_rate": 9.263492072472879e-07, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20664718747138977, + "step": 4255, + "valid_targets_mean": 5091.6, + "valid_targets_min": 530 + }, + { + "epoch": 6.396396396396397, + "grad_norm": 0.4839998287321489, + "learning_rate": 9.039552527842188e-07, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15590183436870575, + "step": 4260, + "valid_targets_mean": 4334.7, + "valid_targets_min": 755 + }, + { + "epoch": 6.403903903903904, + "grad_norm": 0.47675581127591066, + "learning_rate": 8.818290417001218e-07, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18418073654174805, + "step": 4265, + "valid_targets_mean": 5461.1, + "valid_targets_min": 875 + }, + { + "epoch": 6.411411411411412, + "grad_norm": 0.5148421630943572, + "learning_rate": 8.599708842239795e-07, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1673521250486374, + "step": 4270, + "valid_targets_mean": 5613.2, + "valid_targets_min": 791 + }, + { + "epoch": 6.418918918918919, + "grad_norm": 0.6034041442260967, + "learning_rate": 8.383810868264253e-07, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2042214274406433, + "step": 4275, + "valid_targets_mean": 4745.4, + "valid_targets_min": 521 + }, + { + "epoch": 6.426426426426427, + "grad_norm": 0.44620270158602615, + "learning_rate": 8.17059952215451e-07, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14886009693145752, + "step": 4280, + "valid_targets_mean": 5529.8, + "valid_targets_min": 1189 + }, + { + "epoch": 6.433933933933934, + "grad_norm": 0.5070235125913082, + "learning_rate": 7.960077793321552e-07, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19193494319915771, + "step": 4285, + "valid_targets_mean": 5251.6, + "valid_targets_min": 425 + }, + { + "epoch": 6.441441441441442, + "grad_norm": 0.5615395471791965, + "learning_rate": 7.752248633465576e-07, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22860676050186157, + "step": 4290, + "valid_targets_mean": 3936.7, + "valid_targets_min": 583 + }, + { + "epoch": 6.448948948948949, + "grad_norm": 0.5442260146056317, + "learning_rate": 7.547114956534574e-07, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1491059809923172, + "step": 4295, + "valid_targets_mean": 3889.1, + "valid_targets_min": 485 + }, + { + "epoch": 6.456456456456457, + "grad_norm": 0.5835357955664173, + "learning_rate": 7.344679638683527e-07, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2454603910446167, + "step": 4300, + "valid_targets_mean": 5267.1, + "valid_targets_min": 578 + }, + { + "epoch": 6.463963963963964, + "grad_norm": 0.6370221393360167, + "learning_rate": 7.144945518234014e-07, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20245075225830078, + "step": 4305, + "valid_targets_mean": 3993.6, + "valid_targets_min": 292 + }, + { + "epoch": 6.471471471471472, + "grad_norm": 0.487081794320264, + "learning_rate": 6.947915395634463e-07, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22089102864265442, + "step": 4310, + "valid_targets_mean": 4927.1, + "valid_targets_min": 801 + }, + { + "epoch": 6.478978978978979, + "grad_norm": 0.6970402670081776, + "learning_rate": 6.753592033420809e-07, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19842252135276794, + "step": 4315, + "valid_targets_mean": 2896.4, + "valid_targets_min": 475 + }, + { + "epoch": 6.486486486486487, + "grad_norm": 0.5079593962075888, + "learning_rate": 6.561978156177939e-07, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1689176708459854, + "step": 4320, + "valid_targets_mean": 4254.1, + "valid_targets_min": 861 + }, + { + "epoch": 6.493993993993994, + "grad_norm": 0.6574113141398381, + "learning_rate": 6.37307645050127e-07, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2085978239774704, + "step": 4325, + "valid_targets_mean": 3372.8, + "valid_targets_min": 478 + }, + { + "epoch": 6.501501501501502, + "grad_norm": 0.508520487800978, + "learning_rate": 6.186889564959209e-07, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23924149572849274, + "step": 4330, + "valid_targets_mean": 5251.2, + "valid_targets_min": 565 + }, + { + "epoch": 6.509009009009009, + "grad_norm": 0.6324952400411343, + "learning_rate": 6.003420110056035e-07, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2557426691055298, + "step": 4335, + "valid_targets_mean": 4012.0, + "valid_targets_min": 503 + }, + { + "epoch": 6.516516516516517, + "grad_norm": 0.553904609232918, + "learning_rate": 5.822670658195173e-07, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21928340196609497, + "step": 4340, + "valid_targets_mean": 4482.4, + "valid_targets_min": 462 + }, + { + "epoch": 6.524024024024024, + "grad_norm": 0.616465266517478, + "learning_rate": 5.644643743643308e-07, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21219468116760254, + "step": 4345, + "valid_targets_mean": 3722.6, + "valid_targets_min": 504 + }, + { + "epoch": 6.531531531531532, + "grad_norm": 0.7300734830816725, + "learning_rate": 5.469341862494637e-07, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2306327223777771, + "step": 4350, + "valid_targets_mean": 2665.6, + "valid_targets_min": 527 + }, + { + "epoch": 6.539039039039039, + "grad_norm": 0.5642202986656522, + "learning_rate": 5.296767472636077e-07, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1920192539691925, + "step": 4355, + "valid_targets_mean": 3940.9, + "valid_targets_min": 461 + }, + { + "epoch": 6.546546546546547, + "grad_norm": 0.5150972885704285, + "learning_rate": 5.12692299371269e-07, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14734071493148804, + "step": 4360, + "valid_targets_mean": 4389.3, + "valid_targets_min": 590 + }, + { + "epoch": 6.554054054054054, + "grad_norm": 0.6913329068038451, + "learning_rate": 4.959810807093779e-07, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2543680667877197, + "step": 4365, + "valid_targets_mean": 4525.6, + "valid_targets_min": 554 + }, + { + "epoch": 6.561561561561562, + "grad_norm": 0.5451201542998624, + "learning_rate": 4.795433255839488e-07, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21131403744220734, + "step": 4370, + "valid_targets_mean": 4360.6, + "valid_targets_min": 654 + }, + { + "epoch": 6.569069069069069, + "grad_norm": 0.47040078136948726, + "learning_rate": 4.633792644667967e-07, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19125640392303467, + "step": 4375, + "valid_targets_mean": 5975.6, + "valid_targets_min": 1693 + }, + { + "epoch": 6.576576576576577, + "grad_norm": 0.5920967555082476, + "learning_rate": 4.474891239923063e-07, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21285581588745117, + "step": 4380, + "valid_targets_mean": 4406.5, + "valid_targets_min": 643 + }, + { + "epoch": 6.584084084084084, + "grad_norm": 0.7205742446023933, + "learning_rate": 4.318731269542542e-07, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21254310011863708, + "step": 4385, + "valid_targets_mean": 3167.4, + "valid_targets_min": 489 + }, + { + "epoch": 6.591591591591592, + "grad_norm": 0.5312844013389322, + "learning_rate": 4.1653149230268087e-07, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18900719285011292, + "step": 4390, + "valid_targets_mean": 4654.4, + "valid_targets_min": 479 + }, + { + "epoch": 6.599099099099099, + "grad_norm": 0.5723780339209558, + "learning_rate": 4.014644351408259e-07, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22007179260253906, + "step": 4395, + "valid_targets_mean": 4301.8, + "valid_targets_min": 879 + }, + { + "epoch": 6.606606606606607, + "grad_norm": 0.5052598547107464, + "learning_rate": 3.8667216672211515e-07, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17064377665519714, + "step": 4400, + "valid_targets_mean": 4867.7, + "valid_targets_min": 651 + }, + { + "epoch": 6.614114114114114, + "grad_norm": 0.7316891946951816, + "learning_rate": 3.7215489444718313e-07, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22059103846549988, + "step": 4405, + "valid_targets_mean": 2672.8, + "valid_targets_min": 724 + }, + { + "epoch": 6.621621621621622, + "grad_norm": 0.6761848493284879, + "learning_rate": 3.5791282186099064e-07, + "loss": 0.2279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23260068893432617, + "step": 4410, + "valid_targets_mean": 3445.1, + "valid_targets_min": 613 + }, + { + "epoch": 6.629129129129129, + "grad_norm": 0.6708523109273121, + "learning_rate": 3.439461486499451e-07, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23754608631134033, + "step": 4415, + "valid_targets_mean": 3454.1, + "valid_targets_min": 495 + }, + { + "epoch": 6.636636636636637, + "grad_norm": 0.5130944622824821, + "learning_rate": 3.302550706391161e-07, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540905237197876, + "step": 4420, + "valid_targets_mean": 4449.4, + "valid_targets_min": 676 + }, + { + "epoch": 6.6441441441441444, + "grad_norm": 0.5253445851442728, + "learning_rate": 3.168397797894818e-07, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19345726072788239, + "step": 4425, + "valid_targets_mean": 5712.4, + "valid_targets_min": 814 + }, + { + "epoch": 6.651651651651652, + "grad_norm": 0.4999642956754218, + "learning_rate": 3.03700464195249e-07, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1533123254776001, + "step": 4430, + "valid_targets_mean": 4014.4, + "valid_targets_min": 448 + }, + { + "epoch": 6.6591591591591595, + "grad_norm": 0.6600694282993609, + "learning_rate": 2.9083730808120214e-07, + "loss": 0.252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26119160652160645, + "step": 4435, + "valid_targets_mean": 3377.4, + "valid_targets_min": 445 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.6967687532254948, + "learning_rate": 2.78250491800125e-07, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1988351047039032, + "step": 4440, + "valid_targets_mean": 2430.9, + "valid_targets_min": 396 + }, + { + "epoch": 6.6741741741741745, + "grad_norm": 0.6534875940602458, + "learning_rate": 2.6594019183027844e-07, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2011052370071411, + "step": 4445, + "valid_targets_mean": 3745.1, + "valid_targets_min": 709 + }, + { + "epoch": 6.681681681681682, + "grad_norm": 0.6527183652558144, + "learning_rate": 2.5390658077292017e-07, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19569085538387299, + "step": 4450, + "valid_targets_mean": 3687.9, + "valid_targets_min": 539 + }, + { + "epoch": 6.6891891891891895, + "grad_norm": 0.8645812862940178, + "learning_rate": 2.4214982734988013e-07, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22479039430618286, + "step": 4455, + "valid_targets_mean": 2105.8, + "valid_targets_min": 704 + }, + { + "epoch": 6.696696696696697, + "grad_norm": 0.5255004317144049, + "learning_rate": 2.3067009640120652e-07, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26815474033355713, + "step": 4460, + "valid_targets_mean": 5218.8, + "valid_targets_min": 565 + }, + { + "epoch": 6.7042042042042045, + "grad_norm": 0.5916763601809271, + "learning_rate": 2.194675488828457e-07, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1834341287612915, + "step": 4465, + "valid_targets_mean": 3968.9, + "valid_targets_min": 704 + }, + { + "epoch": 6.711711711711712, + "grad_norm": 0.614175690495789, + "learning_rate": 2.0854234186438837e-07, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24795103073120117, + "step": 4470, + "valid_targets_mean": 3751.3, + "valid_targets_min": 613 + }, + { + "epoch": 6.7192192192192195, + "grad_norm": 0.5171279571165366, + "learning_rate": 1.9789462852686237e-07, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19156940281391144, + "step": 4475, + "valid_targets_mean": 4793.4, + "valid_targets_min": 679 + }, + { + "epoch": 6.726726726726727, + "grad_norm": 0.5178172693023037, + "learning_rate": 1.875245581605989e-07, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16094420850276947, + "step": 4480, + "valid_targets_mean": 4665.6, + "valid_targets_min": 471 + }, + { + "epoch": 6.7342342342342345, + "grad_norm": 0.48834241533960204, + "learning_rate": 1.7743227616312308e-07, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16197466850280762, + "step": 4485, + "valid_targets_mean": 4962.2, + "valid_targets_min": 1052 + }, + { + "epoch": 6.741741741741742, + "grad_norm": 0.5234925189927083, + "learning_rate": 1.676179240371245e-07, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16373220086097717, + "step": 4490, + "valid_targets_mean": 3922.8, + "valid_targets_min": 464 + }, + { + "epoch": 6.7492492492492495, + "grad_norm": 0.6352684311920057, + "learning_rate": 1.5808163938846986e-07, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19435149431228638, + "step": 4495, + "valid_targets_mean": 3218.4, + "valid_targets_min": 478 + }, + { + "epoch": 6.756756756756757, + "grad_norm": 0.6229594370415693, + "learning_rate": 1.4882355592427344e-07, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16698959469795227, + "step": 4500, + "valid_targets_mean": 3428.0, + "valid_targets_min": 665 + }, + { + "epoch": 6.7642642642642645, + "grad_norm": 0.6138075392270004, + "learning_rate": 1.3984380345102966e-07, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21965721249580383, + "step": 4505, + "valid_targets_mean": 3452.5, + "valid_targets_min": 408 + }, + { + "epoch": 6.771771771771772, + "grad_norm": 0.5674625446735158, + "learning_rate": 1.3114250787278128e-07, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17410314083099365, + "step": 4510, + "valid_targets_mean": 5005.2, + "valid_targets_min": 560 + }, + { + "epoch": 6.7792792792792795, + "grad_norm": 0.5558176857114882, + "learning_rate": 1.2271979118936074e-07, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17912578582763672, + "step": 4515, + "valid_targets_mean": 3863.8, + "valid_targets_min": 853 + }, + { + "epoch": 6.786786786786787, + "grad_norm": 0.635474987483276, + "learning_rate": 1.1457577149468268e-07, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17979639768600464, + "step": 4520, + "valid_targets_mean": 3391.7, + "valid_targets_min": 429 + }, + { + "epoch": 6.7942942942942945, + "grad_norm": 0.794907141047451, + "learning_rate": 1.0671056297507642e-07, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19865292310714722, + "step": 4525, + "valid_targets_mean": 2033.5, + "valid_targets_min": 584 + }, + { + "epoch": 6.801801801801802, + "grad_norm": 0.49215782912003203, + "learning_rate": 9.91242759077049e-08, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15488871932029724, + "step": 4530, + "valid_targets_mean": 4630.8, + "valid_targets_min": 639 + }, + { + "epoch": 6.8093093093093096, + "grad_norm": 0.6173142764754961, + "learning_rate": 9.181701665899268e-08, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18007370829582214, + "step": 4535, + "valid_targets_mean": 4323.4, + "valid_targets_min": 696 + }, + { + "epoch": 6.816816816816817, + "grad_norm": 0.7575394360176801, + "learning_rate": 8.47888876831604e-08, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24876675009727478, + "step": 4540, + "valid_targets_mean": 3193.3, + "valid_targets_min": 277 + }, + { + "epoch": 6.824324324324325, + "grad_norm": 0.5819569698944355, + "learning_rate": 7.803998752076824e-08, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17346233129501343, + "step": 4545, + "valid_targets_mean": 5187.9, + "valid_targets_min": 643 + }, + { + "epoch": 6.831831831831832, + "grad_norm": 0.4396136217756035, + "learning_rate": 7.157041079734584e-08, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18130743503570557, + "step": 4550, + "valid_targets_mean": 5762.4, + "valid_targets_min": 1228 + }, + { + "epoch": 6.83933933933934, + "grad_norm": 0.4687226820184863, + "learning_rate": 6.538024822206224e-08, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1852032095193863, + "step": 4555, + "valid_targets_mean": 5690.4, + "valid_targets_min": 702 + }, + { + "epoch": 6.846846846846847, + "grad_norm": 0.6527972240202853, + "learning_rate": 5.946958658645141e-08, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21088294684886932, + "step": 4560, + "valid_targets_mean": 3305.9, + "valid_targets_min": 371 + }, + { + "epoch": 6.854354354354355, + "grad_norm": 0.5747007613275369, + "learning_rate": 5.383850876319763e-08, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16404512524604797, + "step": 4565, + "valid_targets_mean": 3822.6, + "valid_targets_min": 609 + }, + { + "epoch": 6.861861861861862, + "grad_norm": 0.5712891848122477, + "learning_rate": 4.848709370498084e-08, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24901172518730164, + "step": 4570, + "valid_targets_mean": 5079.1, + "valid_targets_min": 574 + }, + { + "epoch": 6.86936936936937, + "grad_norm": 0.4949929365077814, + "learning_rate": 4.34154164433509e-08, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17748858034610748, + "step": 4575, + "valid_targets_mean": 4491.8, + "valid_targets_min": 634 + }, + { + "epoch": 6.876876876876877, + "grad_norm": 0.5203752172969807, + "learning_rate": 3.86235480876973e-08, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2086370289325714, + "step": 4580, + "valid_targets_mean": 5460.8, + "valid_targets_min": 622 + }, + { + "epoch": 6.884384384384385, + "grad_norm": 0.5124535100635832, + "learning_rate": 3.411155582423664e-08, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16645345091819763, + "step": 4585, + "valid_targets_mean": 4649.4, + "valid_targets_min": 549 + }, + { + "epoch": 6.891891891891892, + "grad_norm": 0.6965621530697311, + "learning_rate": 2.9879502915075574e-08, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21695250272750854, + "step": 4590, + "valid_targets_mean": 3357.6, + "valid_targets_min": 704 + }, + { + "epoch": 6.8993993993994, + "grad_norm": 0.5009549067493665, + "learning_rate": 2.592744869732933e-08, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2433704286813736, + "step": 4595, + "valid_targets_mean": 5021.1, + "valid_targets_min": 535 + }, + { + "epoch": 6.906906906906907, + "grad_norm": 0.6325006211055703, + "learning_rate": 2.2255448582280127e-08, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1930675059556961, + "step": 4600, + "valid_targets_mean": 3954.0, + "valid_targets_min": 562 + }, + { + "epoch": 6.914414414414415, + "grad_norm": 0.5272145532637147, + "learning_rate": 1.8863554054606715e-08, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1625824272632599, + "step": 4605, + "valid_targets_mean": 4323.2, + "valid_targets_min": 1190 + }, + { + "epoch": 6.921921921921922, + "grad_norm": 0.5329661463696542, + "learning_rate": 1.5751812671658264e-08, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19332969188690186, + "step": 4610, + "valid_targets_mean": 4747.9, + "valid_targets_min": 906 + }, + { + "epoch": 6.92942942942943, + "grad_norm": 0.5417369482444132, + "learning_rate": 1.2920268062794894e-08, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19969046115875244, + "step": 4615, + "valid_targets_mean": 4192.8, + "valid_targets_min": 728 + }, + { + "epoch": 6.936936936936937, + "grad_norm": 0.6878033594654033, + "learning_rate": 1.0368959928763744e-08, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21517297625541687, + "step": 4620, + "valid_targets_mean": 3462.9, + "valid_targets_min": 709 + }, + { + "epoch": 6.944444444444445, + "grad_norm": 0.5692311923901623, + "learning_rate": 8.097924041157168e-09, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17874173820018768, + "step": 4625, + "valid_targets_mean": 3834.8, + "valid_targets_min": 605 + }, + { + "epoch": 6.951951951951952, + "grad_norm": 0.8120108501447949, + "learning_rate": 6.107192241897597e-09, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19116508960723877, + "step": 4630, + "valid_targets_mean": 3737.9, + "valid_targets_min": 756 + }, + { + "epoch": 6.95945945945946, + "grad_norm": 0.6343758559646698, + "learning_rate": 4.396792442800113e-09, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2372244894504547, + "step": 4635, + "valid_targets_mean": 5020.9, + "valid_targets_min": 821 + }, + { + "epoch": 6.966966966966967, + "grad_norm": 0.4747536212588191, + "learning_rate": 2.9667486251794274e-09, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15623793005943298, + "step": 4640, + "valid_targets_mean": 5340.8, + "valid_targets_min": 611 + }, + { + "epoch": 6.974474474474475, + "grad_norm": 0.4663889436387785, + "learning_rate": 1.8170808395079342e-09, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.204878568649292, + "step": 4645, + "valid_targets_mean": 5754.1, + "valid_targets_min": 2157 + }, + { + "epoch": 6.981981981981982, + "grad_norm": 0.5618969419816342, + "learning_rate": 9.47805205140373e-10, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2027861475944519, + "step": 4650, + "valid_targets_mean": 4324.8, + "valid_targets_min": 736 + }, + { + "epoch": 6.98948948948949, + "grad_norm": 0.440752938378733, + "learning_rate": 3.589339100917855e-10, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21129390597343445, + "step": 4655, + "valid_targets_mean": 6267.5, + "valid_targets_min": 686 + }, + { + "epoch": 6.996996996996997, + "grad_norm": 0.5759092607170879, + "learning_rate": 5.0475210853218045e-11, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21579566597938538, + "step": 4660, + "valid_targets_mean": 4775.2, + "valid_targets_min": 824 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1870056539773941, + "step": 4662, + "total_flos": 1420700030337024.0, + "train_loss": 0.25631797409579254, + "train_runtime": 31285.8836, + "train_samples_per_second": 2.384, + "train_steps_per_second": 0.149, + "valid_targets_mean": 4345.4, + "valid_targets_min": 624 + } + ], + "logging_steps": 5, + "max_steps": 4662, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1420700030337024.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}