diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9606 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4347, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008051529790660225, + "grad_norm": 18.887031329342683, + "learning_rate": 3.6781609195402303e-07, + "loss": 0.7617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7170521020889282, + "step": 5, + "valid_targets_mean": 4721.3, + "valid_targets_min": 995 + }, + { + "epoch": 0.01610305958132045, + "grad_norm": 17.610592482733104, + "learning_rate": 8.275862068965518e-07, + "loss": 0.7308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7500532865524292, + "step": 10, + "valid_targets_mean": 4560.7, + "valid_targets_min": 1282 + }, + { + "epoch": 0.024154589371980676, + "grad_norm": 17.389455158716025, + "learning_rate": 1.2873563218390806e-06, + "loss": 0.719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7645566463470459, + "step": 15, + "valid_targets_mean": 4224.9, + "valid_targets_min": 1424 + }, + { + "epoch": 0.0322061191626409, + "grad_norm": 12.900467657800844, + "learning_rate": 1.7471264367816093e-06, + "loss": 0.6498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6394810676574707, + "step": 20, + "valid_targets_mean": 3705.6, + "valid_targets_min": 613 + }, + { + "epoch": 0.040257648953301126, + "grad_norm": 8.27577785552847, + "learning_rate": 2.206896551724138e-06, + "loss": 0.6021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6195220351219177, + "step": 25, + "valid_targets_mean": 4815.3, + "valid_targets_min": 1591 + }, + { + "epoch": 0.04830917874396135, + "grad_norm": 4.570129109709082, + "learning_rate": 2.666666666666667e-06, + "loss": 0.5337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4826958477497101, + "step": 30, + "valid_targets_mean": 4646.1, + "valid_targets_min": 1763 + }, + { + "epoch": 0.05636070853462158, + "grad_norm": 2.9815097303289773, + "learning_rate": 3.1264367816091956e-06, + "loss": 0.5145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5005267858505249, + "step": 35, + "valid_targets_mean": 4164.2, + "valid_targets_min": 1256 + }, + { + "epoch": 0.0644122383252818, + "grad_norm": 1.6024853481776866, + "learning_rate": 3.5862068965517243e-06, + "loss": 0.4877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43937644362449646, + "step": 40, + "valid_targets_mean": 4963.1, + "valid_targets_min": 1405 + }, + { + "epoch": 0.07246376811594203, + "grad_norm": 1.1550666348042289, + "learning_rate": 4.0459770114942535e-06, + "loss": 0.4836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45346975326538086, + "step": 45, + "valid_targets_mean": 5371.4, + "valid_targets_min": 1739 + }, + { + "epoch": 0.08051529790660225, + "grad_norm": 1.1056770926806292, + "learning_rate": 4.505747126436782e-06, + "loss": 0.4879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4831688106060028, + "step": 50, + "valid_targets_mean": 5321.5, + "valid_targets_min": 1966 + }, + { + "epoch": 0.08856682769726248, + "grad_norm": 0.9520259825035011, + "learning_rate": 4.965517241379311e-06, + "loss": 0.4173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43055927753448486, + "step": 55, + "valid_targets_mean": 4930.1, + "valid_targets_min": 1613 + }, + { + "epoch": 0.0966183574879227, + "grad_norm": 0.7379387322001421, + "learning_rate": 5.42528735632184e-06, + "loss": 0.4301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41147735714912415, + "step": 60, + "valid_targets_mean": 4576.1, + "valid_targets_min": 693 + }, + { + "epoch": 0.10466988727858294, + "grad_norm": 0.6751399532440802, + "learning_rate": 5.8850574712643685e-06, + "loss": 0.3953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40098029375076294, + "step": 65, + "valid_targets_mean": 4767.1, + "valid_targets_min": 1984 + }, + { + "epoch": 0.11272141706924316, + "grad_norm": 0.6451168287397513, + "learning_rate": 6.344827586206898e-06, + "loss": 0.3955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41312819719314575, + "step": 70, + "valid_targets_mean": 4322.2, + "valid_targets_min": 523 + }, + { + "epoch": 0.12077294685990338, + "grad_norm": 0.6097646247381094, + "learning_rate": 6.804597701149426e-06, + "loss": 0.4048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3773424029350281, + "step": 75, + "valid_targets_mean": 4922.2, + "valid_targets_min": 1736 + }, + { + "epoch": 0.1288244766505636, + "grad_norm": 0.6955263869864199, + "learning_rate": 7.264367816091955e-06, + "loss": 0.386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3395404815673828, + "step": 80, + "valid_targets_mean": 4326.2, + "valid_targets_min": 822 + }, + { + "epoch": 0.13687600644122383, + "grad_norm": 0.6047921080138658, + "learning_rate": 7.724137931034483e-06, + "loss": 0.373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33859458565711975, + "step": 85, + "valid_targets_mean": 4358.8, + "valid_targets_min": 2465 + }, + { + "epoch": 0.14492753623188406, + "grad_norm": 0.5884903953581806, + "learning_rate": 8.183908045977013e-06, + "loss": 0.3308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.323479026556015, + "step": 90, + "valid_targets_mean": 4712.4, + "valid_targets_min": 2373 + }, + { + "epoch": 0.1529790660225443, + "grad_norm": 0.5738600516460006, + "learning_rate": 8.643678160919541e-06, + "loss": 0.3431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33414769172668457, + "step": 95, + "valid_targets_mean": 4288.1, + "valid_targets_min": 1850 + }, + { + "epoch": 0.1610305958132045, + "grad_norm": 0.6365501852080249, + "learning_rate": 9.10344827586207e-06, + "loss": 0.3526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41237330436706543, + "step": 100, + "valid_targets_mean": 4589.5, + "valid_targets_min": 617 + }, + { + "epoch": 0.16908212560386474, + "grad_norm": 0.5810936825227695, + "learning_rate": 9.563218390804598e-06, + "loss": 0.3694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3179246783256531, + "step": 105, + "valid_targets_mean": 4583.3, + "valid_targets_min": 892 + }, + { + "epoch": 0.17713365539452497, + "grad_norm": 0.6085982104059682, + "learning_rate": 1.0022988505747126e-05, + "loss": 0.3688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3603907823562622, + "step": 110, + "valid_targets_mean": 3972.6, + "valid_targets_min": 529 + }, + { + "epoch": 0.18518518518518517, + "grad_norm": 0.6219836430762767, + "learning_rate": 1.0482758620689658e-05, + "loss": 0.3434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36107924580574036, + "step": 115, + "valid_targets_mean": 3626.8, + "valid_targets_min": 784 + }, + { + "epoch": 0.1932367149758454, + "grad_norm": 0.5706613275227181, + "learning_rate": 1.0942528735632186e-05, + "loss": 0.3638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35068589448928833, + "step": 120, + "valid_targets_mean": 4590.1, + "valid_targets_min": 708 + }, + { + "epoch": 0.20128824476650564, + "grad_norm": 0.5239098489712369, + "learning_rate": 1.1402298850574713e-05, + "loss": 0.3214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31050339341163635, + "step": 125, + "valid_targets_mean": 4365.3, + "valid_targets_min": 919 + }, + { + "epoch": 0.20933977455716588, + "grad_norm": 0.5408209063854124, + "learning_rate": 1.1862068965517241e-05, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.343374639749527, + "step": 130, + "valid_targets_mean": 4735.1, + "valid_targets_min": 502 + }, + { + "epoch": 0.21739130434782608, + "grad_norm": 0.5429590967253727, + "learning_rate": 1.2321839080459773e-05, + "loss": 0.3132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2973455786705017, + "step": 135, + "valid_targets_mean": 4331.6, + "valid_targets_min": 1580 + }, + { + "epoch": 0.22544283413848631, + "grad_norm": 0.5564263430607277, + "learning_rate": 1.2781609195402301e-05, + "loss": 0.3223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33352577686309814, + "step": 140, + "valid_targets_mean": 4605.9, + "valid_targets_min": 973 + }, + { + "epoch": 0.23349436392914655, + "grad_norm": 0.59405705309966, + "learning_rate": 1.324137931034483e-05, + "loss": 0.3197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31183719635009766, + "step": 145, + "valid_targets_mean": 3893.1, + "valid_targets_min": 1253 + }, + { + "epoch": 0.24154589371980675, + "grad_norm": 0.5158909121745847, + "learning_rate": 1.3701149425287356e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2867949604988098, + "step": 150, + "valid_targets_mean": 4840.7, + "valid_targets_min": 1486 + }, + { + "epoch": 0.249597423510467, + "grad_norm": 0.6568765520342051, + "learning_rate": 1.4160919540229888e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3462482690811157, + "step": 155, + "valid_targets_mean": 3947.3, + "valid_targets_min": 1429 + }, + { + "epoch": 0.2576489533011272, + "grad_norm": 0.6071351268604575, + "learning_rate": 1.4620689655172416e-05, + "loss": 0.3274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33247584104537964, + "step": 160, + "valid_targets_mean": 4600.6, + "valid_targets_min": 1925 + }, + { + "epoch": 0.26570048309178745, + "grad_norm": 0.538560909580689, + "learning_rate": 1.5080459770114944e-05, + "loss": 0.3158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31049293279647827, + "step": 165, + "valid_targets_mean": 5258.5, + "valid_targets_min": 1983 + }, + { + "epoch": 0.27375201288244766, + "grad_norm": 0.568539391929678, + "learning_rate": 1.5540229885057473e-05, + "loss": 0.3157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30851513147354126, + "step": 170, + "valid_targets_mean": 4449.2, + "valid_targets_min": 453 + }, + { + "epoch": 0.28180354267310787, + "grad_norm": 0.6078239781315768, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.302283376455307, + "step": 175, + "valid_targets_mean": 4618.0, + "valid_targets_min": 2377 + }, + { + "epoch": 0.2898550724637681, + "grad_norm": 0.5885746435904814, + "learning_rate": 1.645977011494253e-05, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3404327630996704, + "step": 180, + "valid_targets_mean": 4848.8, + "valid_targets_min": 1075 + }, + { + "epoch": 0.29790660225442833, + "grad_norm": 0.6287600513879442, + "learning_rate": 1.691954022988506e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35874509811401367, + "step": 185, + "valid_targets_mean": 4003.5, + "valid_targets_min": 321 + }, + { + "epoch": 0.3059581320450886, + "grad_norm": 0.5118887113449945, + "learning_rate": 1.7379310344827586e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3192656636238098, + "step": 190, + "valid_targets_mean": 5059.1, + "valid_targets_min": 2133 + }, + { + "epoch": 0.3140096618357488, + "grad_norm": 0.5676499502591437, + "learning_rate": 1.7839080459770116e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2645234167575836, + "step": 195, + "valid_targets_mean": 4453.4, + "valid_targets_min": 1599 + }, + { + "epoch": 0.322061191626409, + "grad_norm": 0.5756721398531897, + "learning_rate": 1.8298850574712646e-05, + "loss": 0.3204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3237614631652832, + "step": 200, + "valid_targets_mean": 4916.9, + "valid_targets_min": 2747 + }, + { + "epoch": 0.33011272141706927, + "grad_norm": 0.661205961638061, + "learning_rate": 1.8758620689655173e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30946576595306396, + "step": 205, + "valid_targets_mean": 4202.1, + "valid_targets_min": 1793 + }, + { + "epoch": 0.33816425120772947, + "grad_norm": 0.5585042067771965, + "learning_rate": 1.9218390804597703e-05, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30198991298675537, + "step": 210, + "valid_targets_mean": 4014.8, + "valid_targets_min": 1521 + }, + { + "epoch": 0.3462157809983897, + "grad_norm": 0.5364392722283728, + "learning_rate": 1.9678160919540233e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3075558841228485, + "step": 215, + "valid_targets_mean": 4535.4, + "valid_targets_min": 736 + }, + { + "epoch": 0.35426731078904994, + "grad_norm": 0.5478504018061776, + "learning_rate": 2.013793103448276e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30238276720046997, + "step": 220, + "valid_targets_mean": 4278.9, + "valid_targets_min": 506 + }, + { + "epoch": 0.36231884057971014, + "grad_norm": 0.5543366789399233, + "learning_rate": 2.059770114942529e-05, + "loss": 0.3067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31759655475616455, + "step": 225, + "valid_targets_mean": 5445.6, + "valid_targets_min": 1006 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 0.6286539962949731, + "learning_rate": 2.1057471264367816e-05, + "loss": 0.2964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30352896451950073, + "step": 230, + "valid_targets_mean": 3805.3, + "valid_targets_min": 676 + }, + { + "epoch": 0.3784219001610306, + "grad_norm": 0.5644783614075699, + "learning_rate": 2.1517241379310346e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3186013102531433, + "step": 235, + "valid_targets_mean": 4621.7, + "valid_targets_min": 1571 + }, + { + "epoch": 0.3864734299516908, + "grad_norm": 0.5931980260785429, + "learning_rate": 2.1977011494252873e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28426745533943176, + "step": 240, + "valid_targets_mean": 4771.1, + "valid_targets_min": 2048 + }, + { + "epoch": 0.394524959742351, + "grad_norm": 0.5236909740791719, + "learning_rate": 2.2436781609195406e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2973197102546692, + "step": 245, + "valid_targets_mean": 4744.9, + "valid_targets_min": 1209 + }, + { + "epoch": 0.4025764895330113, + "grad_norm": 0.5306243392897267, + "learning_rate": 2.2896551724137933e-05, + "loss": 0.2952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3088257312774658, + "step": 250, + "valid_targets_mean": 5178.1, + "valid_targets_min": 1966 + }, + { + "epoch": 0.4106280193236715, + "grad_norm": 0.48677773497495813, + "learning_rate": 2.3356321839080463e-05, + "loss": 0.285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26008594036102295, + "step": 255, + "valid_targets_mean": 5040.2, + "valid_targets_min": 1177 + }, + { + "epoch": 0.41867954911433175, + "grad_norm": 1.2774083864267622, + "learning_rate": 2.381609195402299e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2775506377220154, + "step": 260, + "valid_targets_mean": 4319.1, + "valid_targets_min": 570 + }, + { + "epoch": 0.42673107890499196, + "grad_norm": 0.6457594097564714, + "learning_rate": 2.427586206896552e-05, + "loss": 0.2886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2859913408756256, + "step": 265, + "valid_targets_mean": 3785.8, + "valid_targets_min": 284 + }, + { + "epoch": 0.43478260869565216, + "grad_norm": 0.5721794133457904, + "learning_rate": 2.4735632183908046e-05, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2737387716770172, + "step": 270, + "valid_targets_mean": 4904.9, + "valid_targets_min": 1197 + }, + { + "epoch": 0.4428341384863124, + "grad_norm": 0.6155727873490212, + "learning_rate": 2.5195402298850576e-05, + "loss": 0.2987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.317902147769928, + "step": 275, + "valid_targets_mean": 3661.2, + "valid_targets_min": 818 + }, + { + "epoch": 0.45088566827697263, + "grad_norm": 0.6349757508130273, + "learning_rate": 2.5655172413793103e-05, + "loss": 0.282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2630305290222168, + "step": 280, + "valid_targets_mean": 3326.7, + "valid_targets_min": 941 + }, + { + "epoch": 0.45893719806763283, + "grad_norm": 0.6148296415866233, + "learning_rate": 2.6114942528735636e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27396440505981445, + "step": 285, + "valid_targets_mean": 4434.6, + "valid_targets_min": 1038 + }, + { + "epoch": 0.4669887278582931, + "grad_norm": 0.5645172489838126, + "learning_rate": 2.6574712643678166e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25225305557250977, + "step": 290, + "valid_targets_mean": 4993.3, + "valid_targets_min": 1962 + }, + { + "epoch": 0.4750402576489533, + "grad_norm": 0.5739342126572219, + "learning_rate": 2.7034482758620693e-05, + "loss": 0.2957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.340742826461792, + "step": 295, + "valid_targets_mean": 4796.8, + "valid_targets_min": 710 + }, + { + "epoch": 0.4830917874396135, + "grad_norm": 0.5587075968581772, + "learning_rate": 2.749425287356322e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27218180894851685, + "step": 300, + "valid_targets_mean": 4788.8, + "valid_targets_min": 882 + }, + { + "epoch": 0.49114331723027377, + "grad_norm": 0.7790422573070022, + "learning_rate": 2.795402298850575e-05, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25361332297325134, + "step": 305, + "valid_targets_mean": 3807.0, + "valid_targets_min": 344 + }, + { + "epoch": 0.499194847020934, + "grad_norm": 0.5359108312034309, + "learning_rate": 2.8413793103448276e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24744702875614166, + "step": 310, + "valid_targets_mean": 4844.2, + "valid_targets_min": 1948 + }, + { + "epoch": 0.5072463768115942, + "grad_norm": 0.7694244503734501, + "learning_rate": 2.8873563218390806e-05, + "loss": 0.2574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25962382555007935, + "step": 315, + "valid_targets_mean": 3957.3, + "valid_targets_min": 751 + }, + { + "epoch": 0.5152979066022544, + "grad_norm": 0.6998112569580612, + "learning_rate": 2.9333333333333333e-05, + "loss": 0.2823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.283919095993042, + "step": 320, + "valid_targets_mean": 4178.9, + "valid_targets_min": 1504 + }, + { + "epoch": 0.5233494363929146, + "grad_norm": 0.5572107376686236, + "learning_rate": 2.9793103448275866e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3307899236679077, + "step": 325, + "valid_targets_mean": 4934.9, + "valid_targets_min": 860 + }, + { + "epoch": 0.5314009661835749, + "grad_norm": 0.5601650228344617, + "learning_rate": 3.0252873563218396e-05, + "loss": 0.2824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26864737272262573, + "step": 330, + "valid_targets_mean": 3976.2, + "valid_targets_min": 1313 + }, + { + "epoch": 0.5394524959742351, + "grad_norm": 0.6400241549564539, + "learning_rate": 3.071264367816092e-05, + "loss": 0.2817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2869405746459961, + "step": 335, + "valid_targets_mean": 4011.7, + "valid_targets_min": 952 + }, + { + "epoch": 0.5475040257648953, + "grad_norm": 0.5021644926973708, + "learning_rate": 3.117241379310345e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2651926279067993, + "step": 340, + "valid_targets_mean": 5310.1, + "valid_targets_min": 692 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.819724946601287, + "learning_rate": 3.1632183908045976e-05, + "loss": 0.2768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.297665536403656, + "step": 345, + "valid_targets_mean": 3748.9, + "valid_targets_min": 249 + }, + { + "epoch": 0.5636070853462157, + "grad_norm": 0.49635945683598137, + "learning_rate": 3.2091954022988506e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27254384756088257, + "step": 350, + "valid_targets_mean": 4190.4, + "valid_targets_min": 380 + }, + { + "epoch": 0.571658615136876, + "grad_norm": 1.1643886506532481, + "learning_rate": 3.2551724137931036e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2615281343460083, + "step": 355, + "valid_targets_mean": 3684.1, + "valid_targets_min": 304 + }, + { + "epoch": 0.5797101449275363, + "grad_norm": 0.5045131044975655, + "learning_rate": 3.3011494252873566e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28446757793426514, + "step": 360, + "valid_targets_mean": 5213.9, + "valid_targets_min": 2518 + }, + { + "epoch": 0.5877616747181964, + "grad_norm": 0.5901966768401951, + "learning_rate": 3.3471264367816096e-05, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2504952549934387, + "step": 365, + "valid_targets_mean": 4182.6, + "valid_targets_min": 691 + }, + { + "epoch": 0.5958132045088567, + "grad_norm": 0.620399649393727, + "learning_rate": 3.3931034482758626e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2381853461265564, + "step": 370, + "valid_targets_mean": 4586.0, + "valid_targets_min": 1672 + }, + { + "epoch": 0.6038647342995169, + "grad_norm": 0.5417315383509183, + "learning_rate": 3.4390804597701156e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2672516405582428, + "step": 375, + "valid_targets_mean": 4311.4, + "valid_targets_min": 1082 + }, + { + "epoch": 0.6119162640901772, + "grad_norm": 0.6433514192784039, + "learning_rate": 3.485057471264368e-05, + "loss": 0.284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32015344500541687, + "step": 380, + "valid_targets_mean": 3902.9, + "valid_targets_min": 1623 + }, + { + "epoch": 0.6199677938808373, + "grad_norm": 0.4775998328542989, + "learning_rate": 3.531034482758621e-05, + "loss": 0.2664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24788491427898407, + "step": 385, + "valid_targets_mean": 4189.4, + "valid_targets_min": 713 + }, + { + "epoch": 0.6280193236714976, + "grad_norm": 0.5122265709609899, + "learning_rate": 3.577011494252874e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27622339129447937, + "step": 390, + "valid_targets_mean": 4446.6, + "valid_targets_min": 1670 + }, + { + "epoch": 0.6360708534621579, + "grad_norm": 0.659466553378636, + "learning_rate": 3.622988505747126e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3116128742694855, + "step": 395, + "valid_targets_mean": 4667.6, + "valid_targets_min": 1070 + }, + { + "epoch": 0.644122383252818, + "grad_norm": 0.5849425616180388, + "learning_rate": 3.668965517241379e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27755099534988403, + "step": 400, + "valid_targets_mean": 4714.6, + "valid_targets_min": 995 + }, + { + "epoch": 0.6521739130434783, + "grad_norm": 0.5327108961779472, + "learning_rate": 3.714942528735633e-05, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27385851740837097, + "step": 405, + "valid_targets_mean": 5260.8, + "valid_targets_min": 2141 + }, + { + "epoch": 0.6602254428341385, + "grad_norm": 0.5998643197455865, + "learning_rate": 3.760919540229885e-05, + "loss": 0.2872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3069554567337036, + "step": 410, + "valid_targets_mean": 4323.8, + "valid_targets_min": 831 + }, + { + "epoch": 0.6682769726247987, + "grad_norm": 0.5690712799581026, + "learning_rate": 3.806896551724138e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29719623923301697, + "step": 415, + "valid_targets_mean": 4270.9, + "valid_targets_min": 1089 + }, + { + "epoch": 0.6763285024154589, + "grad_norm": 0.5754513117973928, + "learning_rate": 3.852873563218391e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29911819100379944, + "step": 420, + "valid_targets_mean": 4512.4, + "valid_targets_min": 1988 + }, + { + "epoch": 0.6843800322061192, + "grad_norm": 0.6061703211904361, + "learning_rate": 3.898850574712644e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23556432127952576, + "step": 425, + "valid_targets_mean": 3921.6, + "valid_targets_min": 1217 + }, + { + "epoch": 0.6924315619967794, + "grad_norm": 0.5391890622955845, + "learning_rate": 3.9448275862068966e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23832057416439056, + "step": 430, + "valid_targets_mean": 4345.2, + "valid_targets_min": 2090 + }, + { + "epoch": 0.7004830917874396, + "grad_norm": 0.6101927641358206, + "learning_rate": 3.9908045977011496e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2649371325969696, + "step": 435, + "valid_targets_mean": 4433.6, + "valid_targets_min": 1317 + }, + { + "epoch": 0.7085346215780999, + "grad_norm": 0.5646390271799305, + "learning_rate": 3.9999896813789735e-05, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27699440717697144, + "step": 440, + "valid_targets_mean": 4894.1, + "valid_targets_min": 731 + }, + { + "epoch": 0.71658615136876, + "grad_norm": 0.5718807548846909, + "learning_rate": 3.999947762163533e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29374945163726807, + "step": 445, + "valid_targets_mean": 3907.8, + "valid_targets_min": 1022 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 0.5614682322405541, + "learning_rate": 3.999873598115203e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.257183313369751, + "step": 450, + "valid_targets_mean": 4987.9, + "valid_targets_min": 1860 + }, + { + "epoch": 0.7326892109500805, + "grad_norm": 0.5300074790618707, + "learning_rate": 3.999767190429718e-05, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23682445287704468, + "step": 455, + "valid_targets_mean": 5225.4, + "valid_targets_min": 2450 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.6542049865844981, + "learning_rate": 3.99962854082267e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29080742597579956, + "step": 460, + "valid_targets_mean": 4597.2, + "valid_targets_min": 2222 + }, + { + "epoch": 0.748792270531401, + "grad_norm": 0.502946880278254, + "learning_rate": 3.9994576515294864e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28522011637687683, + "step": 465, + "valid_targets_mean": 4706.1, + "valid_targets_min": 1773 + }, + { + "epoch": 0.7568438003220612, + "grad_norm": 0.5050040823394027, + "learning_rate": 3.999254525305386e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2506832480430603, + "step": 470, + "valid_targets_mean": 5194.9, + "valid_targets_min": 715 + }, + { + "epoch": 0.7648953301127214, + "grad_norm": 0.8322414020355918, + "learning_rate": 3.999019165425341e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29580700397491455, + "step": 475, + "valid_targets_mean": 3748.2, + "valid_targets_min": 1077 + }, + { + "epoch": 0.7729468599033816, + "grad_norm": 0.5680386432970268, + "learning_rate": 3.99875157568402e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2671373188495636, + "step": 480, + "valid_targets_mean": 4587.8, + "valid_targets_min": 775 + }, + { + "epoch": 0.7809983896940419, + "grad_norm": 0.5112203887556883, + "learning_rate": 3.998451760395729e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2868801951408386, + "step": 485, + "valid_targets_mean": 4774.4, + "valid_targets_min": 1792 + }, + { + "epoch": 0.789049919484702, + "grad_norm": 0.5606005265672219, + "learning_rate": 3.99811972439434e-05, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2751631736755371, + "step": 490, + "valid_targets_mean": 4579.5, + "valid_targets_min": 1396 + }, + { + "epoch": 0.7971014492753623, + "grad_norm": 0.5467905444044749, + "learning_rate": 3.997755473033218e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2562072277069092, + "step": 495, + "valid_targets_mean": 3827.3, + "valid_targets_min": 1348 + }, + { + "epoch": 0.8051529790660226, + "grad_norm": 0.5387721194984708, + "learning_rate": 3.997359012185127e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2520545721054077, + "step": 500, + "valid_targets_mean": 4061.6, + "valid_targets_min": 547 + }, + { + "epoch": 0.8132045088566827, + "grad_norm": 0.5976518636156888, + "learning_rate": 3.996930348242141e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23626355826854706, + "step": 505, + "valid_targets_mean": 4540.9, + "valid_targets_min": 1484 + }, + { + "epoch": 0.821256038647343, + "grad_norm": 0.6166919816090995, + "learning_rate": 3.996469488115539e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2615845203399658, + "step": 510, + "valid_targets_mean": 4508.8, + "valid_targets_min": 1246 + }, + { + "epoch": 0.8293075684380032, + "grad_norm": 0.530813397454018, + "learning_rate": 3.995976439235694e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2612318694591522, + "step": 515, + "valid_targets_mean": 4225.1, + "valid_targets_min": 1044 + }, + { + "epoch": 0.8373590982286635, + "grad_norm": 0.6517729260520128, + "learning_rate": 3.995451209551953e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23576730489730835, + "step": 520, + "valid_targets_mean": 5023.0, + "valid_targets_min": 1701 + }, + { + "epoch": 0.8454106280193237, + "grad_norm": 0.4740752251218264, + "learning_rate": 3.994893807532509e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28204214572906494, + "step": 525, + "valid_targets_mean": 4959.8, + "valid_targets_min": 1081 + }, + { + "epoch": 0.8534621578099839, + "grad_norm": 0.5257899294040256, + "learning_rate": 3.994304242164265e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2655045986175537, + "step": 530, + "valid_targets_mean": 4981.6, + "valid_targets_min": 2051 + }, + { + "epoch": 0.8615136876006442, + "grad_norm": 0.536523092714892, + "learning_rate": 3.9936825229526855e-05, + "loss": 0.2861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29214853048324585, + "step": 535, + "valid_targets_mean": 4368.4, + "valid_targets_min": 2191 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 0.5028299546325571, + "learning_rate": 3.9930286599216506e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27183425426483154, + "step": 540, + "valid_targets_mean": 5098.8, + "valid_targets_min": 2186 + }, + { + "epoch": 0.8776167471819646, + "grad_norm": 0.5635419055915679, + "learning_rate": 3.9923426636132866e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2873607277870178, + "step": 545, + "valid_targets_mean": 4578.5, + "valid_targets_min": 1671 + }, + { + "epoch": 0.8856682769726248, + "grad_norm": 0.5724413640787459, + "learning_rate": 3.991624545087801e-05, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27895426750183105, + "step": 550, + "valid_targets_mean": 3958.7, + "valid_targets_min": 1181 + }, + { + "epoch": 0.893719806763285, + "grad_norm": 0.5024007626126078, + "learning_rate": 3.9908743159233016e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20082515478134155, + "step": 555, + "valid_targets_mean": 4550.9, + "valid_targets_min": 1029 + }, + { + "epoch": 0.9017713365539453, + "grad_norm": 0.5485922659002295, + "learning_rate": 3.990091988215612e-05, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2621651887893677, + "step": 560, + "valid_targets_mean": 4163.2, + "valid_targets_min": 1507 + }, + { + "epoch": 0.9098228663446055, + "grad_norm": 0.5184933123196136, + "learning_rate": 3.989277574578074e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20918220281600952, + "step": 565, + "valid_targets_mean": 3785.8, + "valid_targets_min": 990 + }, + { + "epoch": 0.9178743961352657, + "grad_norm": 0.5186343425685319, + "learning_rate": 3.9884310881413473e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2877514362335205, + "step": 570, + "valid_targets_mean": 5365.6, + "valid_targets_min": 956 + }, + { + "epoch": 0.9259259259259259, + "grad_norm": 0.5097833267576443, + "learning_rate": 3.987552542553194e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23448419570922852, + "step": 575, + "valid_targets_mean": 4300.2, + "valid_targets_min": 1615 + }, + { + "epoch": 0.9339774557165862, + "grad_norm": 0.5183101791069704, + "learning_rate": 3.9866419519782636e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2744702696800232, + "step": 580, + "valid_targets_mean": 4674.6, + "valid_targets_min": 267 + }, + { + "epoch": 0.9420289855072463, + "grad_norm": 0.5621277031078485, + "learning_rate": 3.985699331097858e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2877187728881836, + "step": 585, + "valid_targets_mean": 4813.2, + "valid_targets_min": 720 + }, + { + "epoch": 0.9500805152979066, + "grad_norm": 0.6940945000218365, + "learning_rate": 3.984724695109702e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29620370268821716, + "step": 590, + "valid_targets_mean": 5132.4, + "valid_targets_min": 1631 + }, + { + "epoch": 0.9581320450885669, + "grad_norm": 0.6065162564844915, + "learning_rate": 3.983718059727693e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27759110927581787, + "step": 595, + "valid_targets_mean": 3982.5, + "valid_targets_min": 1549 + }, + { + "epoch": 0.966183574879227, + "grad_norm": 0.5059275833472612, + "learning_rate": 3.9826794411816495e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27101898193359375, + "step": 600, + "valid_targets_mean": 4788.5, + "valid_targets_min": 1713 + }, + { + "epoch": 0.9742351046698873, + "grad_norm": 0.4992312775195265, + "learning_rate": 3.981608856217049e-05, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23076659440994263, + "step": 605, + "valid_targets_mean": 4342.8, + "valid_targets_min": 1263 + }, + { + "epoch": 0.9822866344605475, + "grad_norm": 0.5353175524067539, + "learning_rate": 3.980506322094761e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2311723828315735, + "step": 610, + "valid_targets_mean": 4810.8, + "valid_targets_min": 570 + }, + { + "epoch": 0.9903381642512077, + "grad_norm": 0.6330855933495575, + "learning_rate": 3.979371856590762e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28571778535842896, + "step": 615, + "valid_targets_mean": 4376.3, + "valid_targets_min": 2240 + }, + { + "epoch": 0.998389694041868, + "grad_norm": 0.5397840170469355, + "learning_rate": 3.978205477995856e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24998024106025696, + "step": 620, + "valid_targets_mean": 4034.1, + "valid_targets_min": 1638 + }, + { + "epoch": 1.0064412238325282, + "grad_norm": 0.5507361899409122, + "learning_rate": 3.9770072051153754e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252638041973114, + "step": 625, + "valid_targets_mean": 4238.3, + "valid_targets_min": 1021 + }, + { + "epoch": 1.0144927536231885, + "grad_norm": 0.5602531648929935, + "learning_rate": 3.9757770572688786e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23810608685016632, + "step": 630, + "valid_targets_mean": 3836.5, + "valid_targets_min": 1569 + }, + { + "epoch": 1.0225442834138487, + "grad_norm": 0.5389828337838378, + "learning_rate": 3.9745150542898405e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20239052176475525, + "step": 635, + "valid_targets_mean": 3506.9, + "valid_targets_min": 1175 + }, + { + "epoch": 1.0305958132045088, + "grad_norm": 0.5471627213957879, + "learning_rate": 3.97322121652533e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23700180649757385, + "step": 640, + "valid_targets_mean": 4167.8, + "valid_targets_min": 380 + }, + { + "epoch": 1.038647342995169, + "grad_norm": 0.5304941534687796, + "learning_rate": 3.971895564835683e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21442700922489166, + "step": 645, + "valid_targets_mean": 4103.1, + "valid_targets_min": 1034 + }, + { + "epoch": 1.0466988727858293, + "grad_norm": 0.5123253191465812, + "learning_rate": 3.970538120594166e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22118300199508667, + "step": 650, + "valid_targets_mean": 4709.2, + "valid_targets_min": 1225 + }, + { + "epoch": 1.0547504025764896, + "grad_norm": 0.5144164225120834, + "learning_rate": 3.9691489056866324e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2365712821483612, + "step": 655, + "valid_targets_mean": 4630.8, + "valid_targets_min": 862 + }, + { + "epoch": 1.0628019323671498, + "grad_norm": 0.6461651582671288, + "learning_rate": 3.9677279425111684e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24799947440624237, + "step": 660, + "valid_targets_mean": 4166.1, + "valid_targets_min": 1309 + }, + { + "epoch": 1.07085346215781, + "grad_norm": 0.5024189972964114, + "learning_rate": 3.9662752539777314e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24382641911506653, + "step": 665, + "valid_targets_mean": 5176.7, + "valid_targets_min": 521 + }, + { + "epoch": 1.0789049919484701, + "grad_norm": 0.9391803347175582, + "learning_rate": 3.9647908635077845e-05, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.277189165353775, + "step": 670, + "valid_targets_mean": 4315.8, + "valid_targets_min": 703 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 0.4882832004268316, + "learning_rate": 3.963274795033913e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24163475632667542, + "step": 675, + "valid_targets_mean": 5134.3, + "valid_targets_min": 710 + }, + { + "epoch": 1.0950080515297906, + "grad_norm": 0.6167872996926629, + "learning_rate": 3.9617270729994436e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24507923424243927, + "step": 680, + "valid_targets_mean": 4063.7, + "valid_targets_min": 1058 + }, + { + "epoch": 1.103059581320451, + "grad_norm": 0.6232571737295083, + "learning_rate": 3.960147722358046e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23517994582653046, + "step": 685, + "valid_targets_mean": 3350.7, + "valid_targets_min": 736 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.5205417579091651, + "learning_rate": 3.958536768573335e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25678330659866333, + "step": 690, + "valid_targets_mean": 4774.0, + "valid_targets_min": 1681 + }, + { + "epoch": 1.1191626409017714, + "grad_norm": 0.5540696760333587, + "learning_rate": 3.956894237618456e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2578514814376831, + "step": 695, + "valid_targets_mean": 3721.6, + "valid_targets_min": 543 + }, + { + "epoch": 1.1272141706924317, + "grad_norm": 0.5703134217215095, + "learning_rate": 3.955220155975669e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27087417244911194, + "step": 700, + "valid_targets_mean": 4647.0, + "valid_targets_min": 1837 + }, + { + "epoch": 1.1352657004830917, + "grad_norm": 0.5281006763450222, + "learning_rate": 3.9535145506359206e-05, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2202616184949875, + "step": 705, + "valid_targets_mean": 4643.4, + "valid_targets_min": 548 + }, + { + "epoch": 1.143317230273752, + "grad_norm": 0.4639603789841704, + "learning_rate": 3.951777449098408e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2633700966835022, + "step": 710, + "valid_targets_mean": 5459.8, + "valid_targets_min": 2264 + }, + { + "epoch": 1.1513687600644122, + "grad_norm": 0.469137374700247, + "learning_rate": 3.9500088793701387e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19833050668239594, + "step": 715, + "valid_targets_mean": 4964.5, + "valid_targets_min": 2160 + }, + { + "epoch": 1.1594202898550725, + "grad_norm": 0.4905202669992282, + "learning_rate": 3.948208869965473e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24042034149169922, + "step": 720, + "valid_targets_mean": 4569.5, + "valid_targets_min": 776 + }, + { + "epoch": 1.1674718196457328, + "grad_norm": 0.5271378070562325, + "learning_rate": 3.946377449905672e-05, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24370059370994568, + "step": 725, + "valid_targets_mean": 4409.8, + "valid_targets_min": 1705 + }, + { + "epoch": 1.1755233494363928, + "grad_norm": 0.48816490928722517, + "learning_rate": 3.9445146487184226e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23395568132400513, + "step": 730, + "valid_targets_mean": 4742.6, + "valid_targets_min": 1740 + }, + { + "epoch": 1.183574879227053, + "grad_norm": 0.508703086089454, + "learning_rate": 3.942620496437366e-05, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2787438929080963, + "step": 735, + "valid_targets_mean": 5090.3, + "valid_targets_min": 720 + }, + { + "epoch": 1.1916264090177133, + "grad_norm": 0.5274435230388868, + "learning_rate": 3.940695023601612e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23083099722862244, + "step": 740, + "valid_targets_mean": 4942.7, + "valid_targets_min": 1747 + }, + { + "epoch": 1.1996779388083736, + "grad_norm": 0.48187263788077117, + "learning_rate": 3.938738261255247e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21453389525413513, + "step": 745, + "valid_targets_mean": 4564.6, + "valid_targets_min": 1401 + }, + { + "epoch": 1.2077294685990339, + "grad_norm": 0.7495259641509139, + "learning_rate": 3.9367502409468315e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21770915389060974, + "step": 750, + "valid_targets_mean": 4150.0, + "valid_targets_min": 848 + }, + { + "epoch": 1.2157809983896941, + "grad_norm": 0.513119058591006, + "learning_rate": 3.934730994728893e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2321898341178894, + "step": 755, + "valid_targets_mean": 3742.6, + "valid_targets_min": 1247 + }, + { + "epoch": 1.2238325281803544, + "grad_norm": 0.5088681345844467, + "learning_rate": 3.932680555157413e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2543366551399231, + "step": 760, + "valid_targets_mean": 4571.6, + "valid_targets_min": 745 + }, + { + "epoch": 1.2318840579710144, + "grad_norm": 0.5212074947311489, + "learning_rate": 3.9305989552912936e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24161793291568756, + "step": 765, + "valid_targets_mean": 4267.8, + "valid_targets_min": 2059 + }, + { + "epoch": 1.2399355877616747, + "grad_norm": 0.6379814909420424, + "learning_rate": 3.928486228691831e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25767624378204346, + "step": 770, + "valid_targets_mean": 4049.4, + "valid_targets_min": 1596 + }, + { + "epoch": 1.247987117552335, + "grad_norm": 0.5355001588932113, + "learning_rate": 3.926342409422175e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23288603127002716, + "step": 775, + "valid_targets_mean": 3844.3, + "valid_targets_min": 823 + }, + { + "epoch": 1.2560386473429952, + "grad_norm": 0.5212100454034752, + "learning_rate": 3.924167532046773e-05, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24769359827041626, + "step": 780, + "valid_targets_mean": 4338.0, + "valid_targets_min": 1507 + }, + { + "epoch": 1.2640901771336555, + "grad_norm": 0.5991969877035959, + "learning_rate": 3.9219616316308215e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23935574293136597, + "step": 785, + "valid_targets_mean": 4815.1, + "valid_targets_min": 1755 + }, + { + "epoch": 1.2721417069243155, + "grad_norm": 0.5852623692737184, + "learning_rate": 3.919724743739694e-05, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2316393107175827, + "step": 790, + "valid_targets_mean": 4748.1, + "valid_targets_min": 1010 + }, + { + "epoch": 1.2801932367149758, + "grad_norm": 0.5400353515222852, + "learning_rate": 3.91745690443837e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24360281229019165, + "step": 795, + "valid_targets_mean": 4905.1, + "valid_targets_min": 1697 + }, + { + "epoch": 1.288244766505636, + "grad_norm": 0.5161454582163929, + "learning_rate": 3.915158150290855e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2185833752155304, + "step": 800, + "valid_targets_mean": 4708.6, + "valid_targets_min": 1210 + }, + { + "epoch": 1.2962962962962963, + "grad_norm": 0.5421912889681342, + "learning_rate": 3.912828518359588e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24869637191295624, + "step": 805, + "valid_targets_mean": 5677.7, + "valid_targets_min": 2666 + }, + { + "epoch": 1.3043478260869565, + "grad_norm": 0.5587294675034205, + "learning_rate": 3.910468046204846e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.240371972322464, + "step": 810, + "valid_targets_mean": 3585.4, + "valid_targets_min": 677 + }, + { + "epoch": 1.3123993558776168, + "grad_norm": 0.5803843717029648, + "learning_rate": 3.908076771884139e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23448866605758667, + "step": 815, + "valid_targets_mean": 4076.1, + "valid_targets_min": 1353 + }, + { + "epoch": 1.320450885668277, + "grad_norm": 0.6615696992092541, + "learning_rate": 3.905654733951595e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24138154089450836, + "step": 820, + "valid_targets_mean": 3298.6, + "valid_targets_min": 713 + }, + { + "epoch": 1.3285024154589373, + "grad_norm": 0.48424332287717764, + "learning_rate": 3.9032019714573366e-05, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2244749665260315, + "step": 825, + "valid_targets_mean": 4370.7, + "valid_targets_min": 1299 + }, + { + "epoch": 1.3365539452495974, + "grad_norm": 0.5116233310783836, + "learning_rate": 3.9007185239468554e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2363509237766266, + "step": 830, + "valid_targets_mean": 4251.2, + "valid_targets_min": 1057 + }, + { + "epoch": 1.3446054750402576, + "grad_norm": 0.5726251093172268, + "learning_rate": 3.8982044314603725e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25756919384002686, + "step": 835, + "valid_targets_mean": 4103.4, + "valid_targets_min": 818 + }, + { + "epoch": 1.3526570048309179, + "grad_norm": 0.5399551753973628, + "learning_rate": 3.8956597345321927e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23214933276176453, + "step": 840, + "valid_targets_mean": 3571.1, + "valid_targets_min": 997 + }, + { + "epoch": 1.3607085346215781, + "grad_norm": 0.5153551957359142, + "learning_rate": 3.893084474190051e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24375241994857788, + "step": 845, + "valid_targets_mean": 4158.2, + "valid_targets_min": 952 + }, + { + "epoch": 1.3687600644122382, + "grad_norm": 0.48984930240931984, + "learning_rate": 3.890478691954452e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558751702308655, + "step": 850, + "valid_targets_mean": 4738.9, + "valid_targets_min": 656 + }, + { + "epoch": 1.3768115942028984, + "grad_norm": 0.5179307198966028, + "learning_rate": 3.8878424298379996e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22707146406173706, + "step": 855, + "valid_targets_mean": 4074.3, + "valid_targets_min": 1101 + }, + { + "epoch": 1.3848631239935587, + "grad_norm": 0.6293401724856043, + "learning_rate": 3.885175730344718e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24598611891269684, + "step": 860, + "valid_targets_mean": 4331.4, + "valid_targets_min": 1429 + }, + { + "epoch": 1.392914653784219, + "grad_norm": 0.5429776165767362, + "learning_rate": 3.882478636469372e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25051477551460266, + "step": 865, + "valid_targets_mean": 4375.6, + "valid_targets_min": 1616 + }, + { + "epoch": 1.4009661835748792, + "grad_norm": 0.5668228228210831, + "learning_rate": 3.879751191696766e-05, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2403651624917984, + "step": 870, + "valid_targets_mean": 3847.9, + "valid_targets_min": 656 + }, + { + "epoch": 1.4090177133655395, + "grad_norm": 0.4916125568229993, + "learning_rate": 3.8769934400010506e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23794618248939514, + "step": 875, + "valid_targets_mean": 4820.1, + "valid_targets_min": 284 + }, + { + "epoch": 1.4170692431561998, + "grad_norm": 0.48401044866312015, + "learning_rate": 3.8742054258450085e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25705933570861816, + "step": 880, + "valid_targets_mean": 4531.7, + "valid_targets_min": 1442 + }, + { + "epoch": 1.42512077294686, + "grad_norm": 0.5305875988771065, + "learning_rate": 3.871387194179338e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20840579271316528, + "step": 885, + "valid_targets_mean": 4164.4, + "valid_targets_min": 2047 + }, + { + "epoch": 1.43317230273752, + "grad_norm": 0.5592288973797526, + "learning_rate": 3.868538790441931e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24872620403766632, + "step": 890, + "valid_targets_mean": 4424.3, + "valid_targets_min": 1035 + }, + { + "epoch": 1.4412238325281803, + "grad_norm": 0.46555566538841864, + "learning_rate": 3.865660260557138e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21978382766246796, + "step": 895, + "valid_targets_mean": 4317.1, + "valid_targets_min": 2278 + }, + { + "epoch": 1.4492753623188406, + "grad_norm": 0.5185263723338586, + "learning_rate": 3.8627516509350286e-05, + "loss": 0.2379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2476550042629242, + "step": 900, + "valid_targets_mean": 3795.6, + "valid_targets_min": 1701 + }, + { + "epoch": 1.4573268921095008, + "grad_norm": 0.5514262995566678, + "learning_rate": 3.859813008470644e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24102932214736938, + "step": 905, + "valid_targets_mean": 3926.9, + "valid_targets_min": 1209 + }, + { + "epoch": 1.465378421900161, + "grad_norm": 0.4713138062680591, + "learning_rate": 3.856844380543239e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23857206106185913, + "step": 910, + "valid_targets_mean": 4829.1, + "valid_targets_min": 1822 + }, + { + "epoch": 1.4734299516908211, + "grad_norm": 0.46745151947518565, + "learning_rate": 3.8538458150155186e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21306023001670837, + "step": 915, + "valid_targets_mean": 5670.3, + "valid_targets_min": 1223 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.5399378076969245, + "learning_rate": 3.850817360232869e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23806512355804443, + "step": 920, + "valid_targets_mean": 5083.9, + "valid_targets_min": 2183 + }, + { + "epoch": 1.4895330112721417, + "grad_norm": 0.6208023209932018, + "learning_rate": 3.8477590650225735e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26910722255706787, + "step": 925, + "valid_targets_mean": 4777.4, + "valid_targets_min": 1044 + }, + { + "epoch": 1.497584541062802, + "grad_norm": 0.5213298121096795, + "learning_rate": 3.8446709786930305e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24538570642471313, + "step": 930, + "valid_targets_mean": 4161.3, + "valid_targets_min": 1399 + }, + { + "epoch": 1.5056360708534622, + "grad_norm": 0.5236314811464815, + "learning_rate": 3.841553151032953e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23516438901424408, + "step": 935, + "valid_targets_mean": 5095.7, + "valid_targets_min": 1027 + }, + { + "epoch": 1.5136876006441224, + "grad_norm": 0.44460197327326756, + "learning_rate": 3.8384056323105695e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23193703591823578, + "step": 940, + "valid_targets_mean": 5266.9, + "valid_targets_min": 1863 + }, + { + "epoch": 1.5217391304347827, + "grad_norm": 0.6021732933768308, + "learning_rate": 3.835228473272814e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2238101363182068, + "step": 945, + "valid_targets_mean": 4458.2, + "valid_targets_min": 751 + }, + { + "epoch": 1.529790660225443, + "grad_norm": 0.5548844712586349, + "learning_rate": 3.832021725144506e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2418593019247055, + "step": 950, + "valid_targets_mean": 4469.2, + "valid_targets_min": 1717 + }, + { + "epoch": 1.537842190016103, + "grad_norm": 0.46334027352601115, + "learning_rate": 3.828785439627523e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24094292521476746, + "step": 955, + "valid_targets_mean": 5478.1, + "valid_targets_min": 1745 + }, + { + "epoch": 1.5458937198067633, + "grad_norm": 0.5074800565232367, + "learning_rate": 3.825519668899972e-05, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25097841024398804, + "step": 960, + "valid_targets_mean": 5015.4, + "valid_targets_min": 1369 + }, + { + "epoch": 1.5539452495974235, + "grad_norm": 0.5212941157766611, + "learning_rate": 3.8222244656153444e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2439125031232834, + "step": 965, + "valid_targets_mean": 4481.0, + "valid_targets_min": 973 + }, + { + "epoch": 1.5619967793880838, + "grad_norm": 0.5111240156665687, + "learning_rate": 3.818899882901666e-05, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22901548445224762, + "step": 970, + "valid_targets_mean": 4092.8, + "valid_targets_min": 1591 + }, + { + "epoch": 1.5700483091787438, + "grad_norm": 0.512770693439504, + "learning_rate": 3.815545974360644e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23196808993816376, + "step": 975, + "valid_targets_mean": 4075.3, + "valid_targets_min": 1956 + }, + { + "epoch": 1.578099838969404, + "grad_norm": 0.5042161332697872, + "learning_rate": 3.812162794066802e-05, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25058242678642273, + "step": 980, + "valid_targets_mean": 4614.4, + "valid_targets_min": 339 + }, + { + "epoch": 1.5861513687600644, + "grad_norm": 0.4846950795943882, + "learning_rate": 3.8087503965666057e-05, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2251492440700531, + "step": 985, + "valid_targets_mean": 4310.9, + "valid_targets_min": 1978 + }, + { + "epoch": 1.5942028985507246, + "grad_norm": 0.49445825909902524, + "learning_rate": 3.805308836877586e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2309640347957611, + "step": 990, + "valid_targets_mean": 4083.0, + "valid_targets_min": 688 + }, + { + "epoch": 1.6022544283413849, + "grad_norm": 0.522939331982219, + "learning_rate": 3.80183817048745e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23157933354377747, + "step": 995, + "valid_targets_mean": 3599.1, + "valid_targets_min": 358 + }, + { + "epoch": 1.6103059581320451, + "grad_norm": 0.5144792659864994, + "learning_rate": 3.7983384533531894e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2594456374645233, + "step": 1000, + "valid_targets_mean": 4572.1, + "valid_targets_min": 1116 + }, + { + "epoch": 1.6183574879227054, + "grad_norm": 0.514940069429718, + "learning_rate": 3.7948097419001736e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23250789940357208, + "step": 1005, + "valid_targets_mean": 4625.0, + "valid_targets_min": 350 + }, + { + "epoch": 1.6264090177133657, + "grad_norm": 0.52221869816674, + "learning_rate": 3.7912520930212445e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21590971946716309, + "step": 1010, + "valid_targets_mean": 4035.4, + "valid_targets_min": 1483 + }, + { + "epoch": 1.634460547504026, + "grad_norm": 0.42946813411877144, + "learning_rate": 3.7876655640757974e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23158404231071472, + "step": 1015, + "valid_targets_mean": 5336.1, + "valid_targets_min": 979 + }, + { + "epoch": 1.642512077294686, + "grad_norm": 0.4983259472306404, + "learning_rate": 3.784050212888857e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2527810335159302, + "step": 1020, + "valid_targets_mean": 5022.8, + "valid_targets_min": 2939 + }, + { + "epoch": 1.6505636070853462, + "grad_norm": 0.46712880691077496, + "learning_rate": 3.780406097750141e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22650480270385742, + "step": 1025, + "valid_targets_mean": 4311.5, + "valid_targets_min": 1275 + }, + { + "epoch": 1.6586151368760065, + "grad_norm": 0.5417140597671484, + "learning_rate": 3.776733277413127e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.195145845413208, + "step": 1030, + "valid_targets_mean": 4207.8, + "valid_targets_min": 1043 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.4883088038596097, + "learning_rate": 3.7730318110941004e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21643765270709991, + "step": 1035, + "valid_targets_mean": 4391.3, + "valid_targets_min": 2660 + }, + { + "epoch": 1.6747181964573268, + "grad_norm": 0.5289568127223118, + "learning_rate": 3.7693017584712013e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23360106348991394, + "step": 1040, + "valid_targets_mean": 4826.2, + "valid_targets_min": 2011 + }, + { + "epoch": 1.682769726247987, + "grad_norm": 0.5576792989418248, + "learning_rate": 3.765543179683462e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24486708641052246, + "step": 1045, + "valid_targets_mean": 3934.4, + "valid_targets_min": 1683 + }, + { + "epoch": 1.6908212560386473, + "grad_norm": 0.5017617827412163, + "learning_rate": 3.7617561353298395e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24896810948848724, + "step": 1050, + "valid_targets_mean": 4011.8, + "valid_targets_min": 321 + }, + { + "epoch": 1.6988727858293076, + "grad_norm": 0.4860709875293544, + "learning_rate": 3.7579406864682327e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24139925837516785, + "step": 1055, + "valid_targets_mean": 4299.9, + "valid_targets_min": 667 + }, + { + "epoch": 1.7069243156199678, + "grad_norm": 0.5345894675911452, + "learning_rate": 3.7540968946145036e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2598978877067566, + "step": 1060, + "valid_targets_mean": 4928.6, + "valid_targets_min": 1283 + }, + { + "epoch": 1.714975845410628, + "grad_norm": 0.5465814570656329, + "learning_rate": 3.750224821741486e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2219647765159607, + "step": 1065, + "valid_targets_mean": 5155.9, + "valid_targets_min": 1303 + }, + { + "epoch": 1.7230273752012883, + "grad_norm": 0.5300553470904004, + "learning_rate": 3.7463245302779795e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28267112374305725, + "step": 1070, + "valid_targets_mean": 3852.3, + "valid_targets_min": 599 + }, + { + "epoch": 1.7310789049919486, + "grad_norm": 0.4624218505703108, + "learning_rate": 3.742396083107751e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21661648154258728, + "step": 1075, + "valid_targets_mean": 4283.7, + "valid_targets_min": 1147 + }, + { + "epoch": 1.7391304347826086, + "grad_norm": 0.4993979746767997, + "learning_rate": 3.7384395435685166e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21909837424755096, + "step": 1080, + "valid_targets_mean": 4359.1, + "valid_targets_min": 2250 + }, + { + "epoch": 1.747181964573269, + "grad_norm": 0.5407067040296573, + "learning_rate": 3.7344549754509196e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22699233889579773, + "step": 1085, + "valid_targets_mean": 3677.0, + "valid_targets_min": 892 + }, + { + "epoch": 1.7552334943639292, + "grad_norm": 0.4642737834973646, + "learning_rate": 3.7304424429975046e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22292137145996094, + "step": 1090, + "valid_targets_mean": 5690.0, + "valid_targets_min": 782 + }, + { + "epoch": 1.7632850241545892, + "grad_norm": 0.5671519558623886, + "learning_rate": 3.726402010901681e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21334418654441833, + "step": 1095, + "valid_targets_mean": 4616.9, + "valid_targets_min": 1898 + }, + { + "epoch": 1.7713365539452495, + "grad_norm": 0.5083634785423456, + "learning_rate": 3.722333744306678e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21738044917583466, + "step": 1100, + "valid_targets_mean": 5448.9, + "valid_targets_min": 1726 + }, + { + "epoch": 1.7793880837359097, + "grad_norm": 0.8896163020318464, + "learning_rate": 3.7182377088044984e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22921855747699738, + "step": 1105, + "valid_targets_mean": 4703.8, + "valid_targets_min": 1508 + }, + { + "epoch": 1.78743961352657, + "grad_norm": 0.5450639925225488, + "learning_rate": 3.7141139704348576e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21833300590515137, + "step": 1110, + "valid_targets_mean": 3445.8, + "valid_targets_min": 1099 + }, + { + "epoch": 1.7954911433172303, + "grad_norm": 0.6520638172384545, + "learning_rate": 3.7099625956841175e-05, + "loss": 0.223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25272321701049805, + "step": 1115, + "valid_targets_mean": 3248.3, + "valid_targets_min": 617 + }, + { + "epoch": 1.8035426731078905, + "grad_norm": 0.4493969588039246, + "learning_rate": 3.70578365148422e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23505555093288422, + "step": 1120, + "valid_targets_mean": 5024.8, + "valid_targets_min": 1833 + }, + { + "epoch": 1.8115942028985508, + "grad_norm": 0.4827090106067237, + "learning_rate": 3.701577205211604e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2535856366157532, + "step": 1125, + "valid_targets_mean": 4428.5, + "valid_targets_min": 990 + }, + { + "epoch": 1.819645732689211, + "grad_norm": 0.6263613356926185, + "learning_rate": 3.697343324686119e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23104625940322876, + "step": 1130, + "valid_targets_mean": 4673.7, + "valid_targets_min": 1449 + }, + { + "epoch": 1.8276972624798713, + "grad_norm": 0.5298323694554691, + "learning_rate": 3.693082078169933e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24033012986183167, + "step": 1135, + "valid_targets_mean": 4732.1, + "valid_targets_min": 1605 + }, + { + "epoch": 1.8357487922705316, + "grad_norm": 0.4783853550563579, + "learning_rate": 3.68879353436643e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028733789920807, + "step": 1140, + "valid_targets_mean": 4867.9, + "valid_targets_min": 2154 + }, + { + "epoch": 1.8438003220611916, + "grad_norm": 0.4595078299986765, + "learning_rate": 3.684477762419108e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.215766042470932, + "step": 1145, + "valid_targets_mean": 4522.2, + "valid_targets_min": 2208 + }, + { + "epoch": 1.8518518518518519, + "grad_norm": 0.5149239335560051, + "learning_rate": 3.6801348319104546e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2230435311794281, + "step": 1150, + "valid_targets_mean": 3680.6, + "valid_targets_min": 434 + }, + { + "epoch": 1.8599033816425121, + "grad_norm": 0.42919344493593076, + "learning_rate": 3.675764812860833e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20287317037582397, + "step": 1155, + "valid_targets_mean": 5642.9, + "valid_targets_min": 2036 + }, + { + "epoch": 1.8679549114331722, + "grad_norm": 0.4884349396198345, + "learning_rate": 3.671367775727353e-05, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24478939175605774, + "step": 1160, + "valid_targets_mean": 4762.1, + "valid_targets_min": 775 + }, + { + "epoch": 1.8760064412238324, + "grad_norm": 0.49953971630838795, + "learning_rate": 3.666943791402726e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2292686104774475, + "step": 1165, + "valid_targets_mean": 4543.8, + "valid_targets_min": 1026 + }, + { + "epoch": 1.8840579710144927, + "grad_norm": 0.4644474561197982, + "learning_rate": 3.662492931214137e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597373127937317, + "step": 1170, + "valid_targets_mean": 5243.1, + "valid_targets_min": 1246 + }, + { + "epoch": 1.892109500805153, + "grad_norm": 0.5180686308684069, + "learning_rate": 3.6580152669220784e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2606375813484192, + "step": 1175, + "valid_targets_mean": 4800.7, + "valid_targets_min": 2456 + }, + { + "epoch": 1.9001610305958132, + "grad_norm": 0.42092491518373576, + "learning_rate": 3.6535108707192053e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22271546721458435, + "step": 1180, + "valid_targets_mean": 5251.3, + "valid_targets_min": 1616 + }, + { + "epoch": 1.9082125603864735, + "grad_norm": 0.5826394657065203, + "learning_rate": 3.648979815229167e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20627357065677643, + "step": 1185, + "valid_targets_mean": 3834.6, + "valid_targets_min": 693 + }, + { + "epoch": 1.9162640901771337, + "grad_norm": 0.7082935353213954, + "learning_rate": 3.644422173505433e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24815203249454498, + "step": 1190, + "valid_targets_mean": 4642.2, + "valid_targets_min": 338 + }, + { + "epoch": 1.924315619967794, + "grad_norm": 0.5088420525828131, + "learning_rate": 3.639838019030123e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23133976757526398, + "step": 1195, + "valid_targets_mean": 4064.8, + "valid_targets_min": 1849 + }, + { + "epoch": 1.9323671497584543, + "grad_norm": 0.4807696052913685, + "learning_rate": 3.635227425712812e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24485018849372864, + "step": 1200, + "valid_targets_mean": 5358.3, + "valid_targets_min": 1956 + }, + { + "epoch": 1.9404186795491143, + "grad_norm": 0.5374569252336697, + "learning_rate": 3.6305904678893504e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23684567213058472, + "step": 1205, + "valid_targets_mean": 4465.9, + "valid_targets_min": 2036 + }, + { + "epoch": 1.9484702093397746, + "grad_norm": 0.4315082896253737, + "learning_rate": 3.6259272203206535e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20846745371818542, + "step": 1210, + "valid_targets_mean": 5167.9, + "valid_targets_min": 1434 + }, + { + "epoch": 1.9565217391304348, + "grad_norm": 0.5262891588589963, + "learning_rate": 3.621237758191505e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24596351385116577, + "step": 1215, + "valid_targets_mean": 3761.8, + "valid_targets_min": 1066 + }, + { + "epoch": 1.9645732689210949, + "grad_norm": 0.49628184032222405, + "learning_rate": 3.616522157109342e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21257467567920685, + "step": 1220, + "valid_targets_mean": 4065.5, + "valid_targets_min": 1897 + }, + { + "epoch": 1.9726247987117551, + "grad_norm": 0.4787291955056688, + "learning_rate": 3.6117804931030324e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2495032548904419, + "step": 1225, + "valid_targets_mean": 4767.3, + "valid_targets_min": 930 + }, + { + "epoch": 1.9806763285024154, + "grad_norm": 0.4952065016883059, + "learning_rate": 3.607012842621657e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23980122804641724, + "step": 1230, + "valid_targets_mean": 4069.4, + "valid_targets_min": 1260 + }, + { + "epoch": 1.9887278582930756, + "grad_norm": 0.5230866628171786, + "learning_rate": 3.602219282533269e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2368500828742981, + "step": 1235, + "valid_targets_mean": 4682.7, + "valid_targets_min": 2343 + }, + { + "epoch": 1.996779388083736, + "grad_norm": 0.5246193826991147, + "learning_rate": 3.597399890123659e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21333156526088715, + "step": 1240, + "valid_targets_mean": 4692.2, + "valid_targets_min": 2047 + }, + { + "epoch": 2.004830917874396, + "grad_norm": 0.505863313320365, + "learning_rate": 3.5925547430951094e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20909734070301056, + "step": 1245, + "valid_targets_mean": 4637.2, + "valid_targets_min": 989 + }, + { + "epoch": 2.0128824476650564, + "grad_norm": 0.43133643927265986, + "learning_rate": 3.587683919565136e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19329163432121277, + "step": 1250, + "valid_targets_mean": 5283.4, + "valid_targets_min": 1835 + }, + { + "epoch": 2.0209339774557167, + "grad_norm": 0.4837745442669591, + "learning_rate": 3.582787498065237e-05, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20409853756427765, + "step": 1255, + "valid_targets_mean": 4767.1, + "valid_targets_min": 1074 + }, + { + "epoch": 2.028985507246377, + "grad_norm": 0.6452848255637806, + "learning_rate": 3.577865557539621e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1907821148633957, + "step": 1260, + "valid_targets_mean": 4763.1, + "valid_targets_min": 392 + }, + { + "epoch": 2.037037037037037, + "grad_norm": 0.5867010578619666, + "learning_rate": 3.572918177343935e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1922091245651245, + "step": 1265, + "valid_targets_mean": 4118.4, + "valid_targets_min": 631 + }, + { + "epoch": 2.0450885668276975, + "grad_norm": 0.48474397713094575, + "learning_rate": 3.567945437243987e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19150424003601074, + "step": 1270, + "valid_targets_mean": 4600.4, + "valid_targets_min": 1066 + }, + { + "epoch": 2.0531400966183573, + "grad_norm": 0.5113649508317663, + "learning_rate": 3.5629474174144564e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.253940224647522, + "step": 1275, + "valid_targets_mean": 5131.0, + "valid_targets_min": 2438 + }, + { + "epoch": 2.0611916264090175, + "grad_norm": 0.491663340986429, + "learning_rate": 3.5579241984376065e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22499999403953552, + "step": 1280, + "valid_targets_mean": 3951.9, + "valid_targets_min": 1010 + }, + { + "epoch": 2.069243156199678, + "grad_norm": 0.520462248826658, + "learning_rate": 3.5528758613019804e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19735172390937805, + "step": 1285, + "valid_targets_mean": 3831.2, + "valid_targets_min": 1088 + }, + { + "epoch": 2.077294685990338, + "grad_norm": 0.5206723700369398, + "learning_rate": 3.547802487401097e-05, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23212730884552002, + "step": 1290, + "valid_targets_mean": 5061.6, + "valid_targets_min": 2180 + }, + { + "epoch": 2.0853462157809983, + "grad_norm": 0.577226384156968, + "learning_rate": 3.54270415853214e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20443934202194214, + "step": 1295, + "valid_targets_mean": 3395.8, + "valid_targets_min": 627 + }, + { + "epoch": 2.0933977455716586, + "grad_norm": 0.4423746810496331, + "learning_rate": 3.537580956894638e-05, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21375350654125214, + "step": 1300, + "valid_targets_mean": 5160.8, + "valid_targets_min": 1507 + }, + { + "epoch": 2.101449275362319, + "grad_norm": 0.5548629803187833, + "learning_rate": 3.532432965089138e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22949251532554626, + "step": 1305, + "valid_targets_mean": 5229.6, + "valid_targets_min": 2557 + }, + { + "epoch": 2.109500805152979, + "grad_norm": 0.4906711255865245, + "learning_rate": 3.527260266115876e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2087794542312622, + "step": 1310, + "valid_targets_mean": 5409.4, + "valid_targets_min": 2349 + }, + { + "epoch": 2.1175523349436394, + "grad_norm": 0.6560805934109221, + "learning_rate": 3.522062943373438e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17596739530563354, + "step": 1315, + "valid_targets_mean": 4226.9, + "valid_targets_min": 1025 + }, + { + "epoch": 2.1256038647342996, + "grad_norm": 0.7107665752653891, + "learning_rate": 3.516841080657413e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23091405630111694, + "step": 1320, + "valid_targets_mean": 4053.8, + "valid_targets_min": 689 + }, + { + "epoch": 2.13365539452496, + "grad_norm": 0.5185713231757747, + "learning_rate": 3.511594762159046e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20578667521476746, + "step": 1325, + "valid_targets_mean": 4018.0, + "valid_targets_min": 978 + }, + { + "epoch": 2.14170692431562, + "grad_norm": 0.463159581350649, + "learning_rate": 3.506324072463878e-05, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1929136961698532, + "step": 1330, + "valid_targets_mean": 4945.8, + "valid_targets_min": 2099 + }, + { + "epoch": 2.14975845410628, + "grad_norm": 0.5447005808188629, + "learning_rate": 3.5010290965503826e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23045824468135834, + "step": 1335, + "valid_targets_mean": 4698.5, + "valid_targets_min": 1716 + }, + { + "epoch": 2.1578099838969402, + "grad_norm": 0.4595107321034477, + "learning_rate": 3.495709919788597e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18875578045845032, + "step": 1340, + "valid_targets_mean": 4457.4, + "valid_targets_min": 755 + }, + { + "epoch": 2.1658615136876005, + "grad_norm": 0.6481051628858272, + "learning_rate": 3.490366627938742e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22552573680877686, + "step": 1345, + "valid_targets_mean": 4907.9, + "valid_targets_min": 1381 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 0.8255339190022734, + "learning_rate": 3.484999307149846e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20174431800842285, + "step": 1350, + "valid_targets_mean": 3897.0, + "valid_targets_min": 979 + }, + { + "epoch": 2.181964573268921, + "grad_norm": 0.47550594396325907, + "learning_rate": 3.47960804395835e-05, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21997377276420593, + "step": 1355, + "valid_targets_mean": 4916.5, + "valid_targets_min": 990 + }, + { + "epoch": 2.1900161030595813, + "grad_norm": 0.5142914581894688, + "learning_rate": 3.474192925286714e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2234359234571457, + "step": 1360, + "valid_targets_mean": 4057.0, + "valid_targets_min": 554 + }, + { + "epoch": 2.1980676328502415, + "grad_norm": 0.5135241141747307, + "learning_rate": 3.468754038442017e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21173028647899628, + "step": 1365, + "valid_targets_mean": 4122.8, + "valid_targets_min": 963 + }, + { + "epoch": 2.206119162640902, + "grad_norm": 0.5281981416757465, + "learning_rate": 3.463291471114548e-05, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2580035328865051, + "step": 1370, + "valid_targets_mean": 4534.7, + "valid_targets_min": 1253 + }, + { + "epoch": 2.214170692431562, + "grad_norm": 0.546080448981762, + "learning_rate": 3.4578053113763936e-05, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2357328236103058, + "step": 1375, + "valid_targets_mean": 4083.2, + "valid_targets_min": 1119 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.43510725221723323, + "learning_rate": 3.452295647680014e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.210770383477211, + "step": 1380, + "valid_targets_mean": 5021.1, + "valid_targets_min": 1913 + }, + { + "epoch": 2.2302737520128826, + "grad_norm": 0.45576706352160945, + "learning_rate": 3.4467625688568245e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20456649363040924, + "step": 1385, + "valid_targets_mean": 4813.6, + "valid_targets_min": 1879 + }, + { + "epoch": 2.238325281803543, + "grad_norm": 0.4605011820162796, + "learning_rate": 3.4412061641157546e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1862925887107849, + "step": 1390, + "valid_targets_mean": 5375.2, + "valid_targets_min": 1405 + }, + { + "epoch": 2.246376811594203, + "grad_norm": 0.5067717085593049, + "learning_rate": 3.435626523041815e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20806747674942017, + "step": 1395, + "valid_targets_mean": 4125.7, + "valid_targets_min": 2104 + }, + { + "epoch": 2.2544283413848634, + "grad_norm": 0.503211076543084, + "learning_rate": 3.430023735594653e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23167727887630463, + "step": 1400, + "valid_targets_mean": 4897.6, + "valid_targets_min": 344 + }, + { + "epoch": 2.262479871175523, + "grad_norm": 0.5252844399823857, + "learning_rate": 3.4243978921071005e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18407735228538513, + "step": 1405, + "valid_targets_mean": 4546.8, + "valid_targets_min": 1908 + }, + { + "epoch": 2.2705314009661834, + "grad_norm": 0.5703046217142366, + "learning_rate": 3.418749083283719e-05, + "loss": 0.223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23428402841091156, + "step": 1410, + "valid_targets_mean": 4894.1, + "valid_targets_min": 1720 + }, + { + "epoch": 2.2785829307568437, + "grad_norm": 0.45360806117795466, + "learning_rate": 3.413077400199334e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16744893789291382, + "step": 1415, + "valid_targets_mean": 4128.2, + "valid_targets_min": 1177 + }, + { + "epoch": 2.286634460547504, + "grad_norm": 0.48066382062291113, + "learning_rate": 3.407382934297571e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21223318576812744, + "step": 1420, + "valid_targets_mean": 4697.8, + "valid_targets_min": 1982 + }, + { + "epoch": 2.2946859903381642, + "grad_norm": 0.4623146781727566, + "learning_rate": 3.4016657773893785e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.186685249209404, + "step": 1425, + "valid_targets_mean": 5018.8, + "valid_targets_min": 1843 + }, + { + "epoch": 2.3027375201288245, + "grad_norm": 0.5051350867881649, + "learning_rate": 3.3959260216515495e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1981533169746399, + "step": 1430, + "valid_targets_mean": 4461.0, + "valid_targets_min": 1264 + }, + { + "epoch": 2.3107890499194848, + "grad_norm": 0.547928503478585, + "learning_rate": 3.3901637596252325e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20269712805747986, + "step": 1435, + "valid_targets_mean": 4606.9, + "valid_targets_min": 216 + }, + { + "epoch": 2.318840579710145, + "grad_norm": 0.5050671980985264, + "learning_rate": 3.384379084214443e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21335461735725403, + "step": 1440, + "valid_targets_mean": 3892.7, + "valid_targets_min": 1209 + }, + { + "epoch": 2.3268921095008053, + "grad_norm": 0.48909304735454484, + "learning_rate": 3.378572088684562e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19523759186267853, + "step": 1445, + "valid_targets_mean": 4354.8, + "valid_targets_min": 1308 + }, + { + "epoch": 2.3349436392914655, + "grad_norm": 0.6388146830801954, + "learning_rate": 3.372742866660836e-05, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.198106050491333, + "step": 1450, + "valid_targets_mean": 4365.9, + "valid_targets_min": 713 + }, + { + "epoch": 2.342995169082126, + "grad_norm": 0.5410396103360308, + "learning_rate": 3.3668915121268636e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23412463068962097, + "step": 1455, + "valid_targets_mean": 4374.0, + "valid_targets_min": 547 + }, + { + "epoch": 2.3510466988727856, + "grad_norm": 0.5291219901130383, + "learning_rate": 3.361018119423085e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2332005500793457, + "step": 1460, + "valid_targets_mean": 3958.9, + "valid_targets_min": 1381 + }, + { + "epoch": 2.359098228663446, + "grad_norm": 0.5100995870174685, + "learning_rate": 3.3551227832452555e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2195730209350586, + "step": 1465, + "valid_targets_mean": 4228.3, + "valid_targets_min": 2352 + }, + { + "epoch": 2.367149758454106, + "grad_norm": 0.5322099118621038, + "learning_rate": 3.3492055986429235e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22615352272987366, + "step": 1470, + "valid_targets_mean": 4539.8, + "valid_targets_min": 296 + }, + { + "epoch": 2.3752012882447664, + "grad_norm": 0.4656836442072764, + "learning_rate": 3.3432666610178936e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17955255508422852, + "step": 1475, + "valid_targets_mean": 4149.8, + "valid_targets_min": 1631 + }, + { + "epoch": 2.3832528180354267, + "grad_norm": 0.5493172940293227, + "learning_rate": 3.3373060661226944e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23940381407737732, + "step": 1480, + "valid_targets_mean": 4181.2, + "valid_targets_min": 2011 + }, + { + "epoch": 2.391304347826087, + "grad_norm": 0.49597904545260957, + "learning_rate": 3.331323910059027e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24300578236579895, + "step": 1485, + "valid_targets_mean": 3967.4, + "valid_targets_min": 677 + }, + { + "epoch": 2.399355877616747, + "grad_norm": 0.5003561031279338, + "learning_rate": 3.3253202892762244e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1889512538909912, + "step": 1490, + "valid_targets_mean": 4254.9, + "valid_targets_min": 1570 + }, + { + "epoch": 2.4074074074074074, + "grad_norm": 0.4683723297524737, + "learning_rate": 3.319295300569686e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22197048366069794, + "step": 1495, + "valid_targets_mean": 5005.4, + "valid_targets_min": 857 + }, + { + "epoch": 2.4154589371980677, + "grad_norm": 0.5071114785034765, + "learning_rate": 3.3132490410793294e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20913785696029663, + "step": 1500, + "valid_targets_mean": 4472.4, + "valid_targets_min": 1073 + }, + { + "epoch": 2.423510466988728, + "grad_norm": 0.4572009276147089, + "learning_rate": 3.3071816082880115e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19436806440353394, + "step": 1505, + "valid_targets_mean": 4905.2, + "valid_targets_min": 1404 + }, + { + "epoch": 2.4315619967793882, + "grad_norm": 0.5338897691126591, + "learning_rate": 3.3010931000199674e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22107122838497162, + "step": 1510, + "valid_targets_mean": 4076.6, + "valid_targets_min": 1904 + }, + { + "epoch": 2.4396135265700485, + "grad_norm": 0.49724905678619213, + "learning_rate": 3.2949836144392256e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22516636550426483, + "step": 1515, + "valid_targets_mean": 5009.3, + "valid_targets_min": 1353 + }, + { + "epoch": 2.4476650563607087, + "grad_norm": 0.5108239733431804, + "learning_rate": 3.28885325004803e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23558905720710754, + "step": 1520, + "valid_targets_mean": 4774.4, + "valid_targets_min": 1731 + }, + { + "epoch": 2.455716586151369, + "grad_norm": 0.4908585219445986, + "learning_rate": 3.282702105685251e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22913925349712372, + "step": 1525, + "valid_targets_mean": 5611.8, + "valid_targets_min": 2104 + }, + { + "epoch": 2.463768115942029, + "grad_norm": 0.4523927665088012, + "learning_rate": 3.2765302805247885e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20795124769210815, + "step": 1530, + "valid_targets_mean": 4889.2, + "valid_targets_min": 2184 + }, + { + "epoch": 2.471819645732689, + "grad_norm": 0.5485354909981668, + "learning_rate": 3.270337874073977e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2317976951599121, + "step": 1535, + "valid_targets_mean": 5319.1, + "valid_targets_min": 1932 + }, + { + "epoch": 2.4798711755233493, + "grad_norm": 0.5061836838324374, + "learning_rate": 3.264124986171981e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25826138257980347, + "step": 1540, + "valid_targets_mean": 4537.9, + "valid_targets_min": 453 + }, + { + "epoch": 2.4879227053140096, + "grad_norm": 0.47633311927326305, + "learning_rate": 3.2578917169881816e-05, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22371219098567963, + "step": 1545, + "valid_targets_mean": 4426.5, + "valid_targets_min": 1435 + }, + { + "epoch": 2.49597423510467, + "grad_norm": 0.528382297311333, + "learning_rate": 3.2516381670205665e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22219571471214294, + "step": 1550, + "valid_targets_mean": 4377.4, + "valid_targets_min": 812 + }, + { + "epoch": 2.50402576489533, + "grad_norm": 0.496647756887993, + "learning_rate": 3.245364437094105e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22214139997959137, + "step": 1555, + "valid_targets_mean": 4800.2, + "valid_targets_min": 1516 + }, + { + "epoch": 2.5120772946859904, + "grad_norm": 0.5176776733548973, + "learning_rate": 3.239070628359126e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2536792755126953, + "step": 1560, + "valid_targets_mean": 4532.4, + "valid_targets_min": 420 + }, + { + "epoch": 2.5201288244766507, + "grad_norm": 0.4521203535869814, + "learning_rate": 3.232756842289685e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1837652325630188, + "step": 1565, + "valid_targets_mean": 4955.4, + "valid_targets_min": 1611 + }, + { + "epoch": 2.528180354267311, + "grad_norm": 0.48332269443651105, + "learning_rate": 3.2264231806819286e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18321290612220764, + "step": 1570, + "valid_targets_mean": 4541.4, + "valid_targets_min": 1371 + }, + { + "epoch": 2.536231884057971, + "grad_norm": 0.5203474071856915, + "learning_rate": 3.220069745652456e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2053850144147873, + "step": 1575, + "valid_targets_mean": 4024.7, + "valid_targets_min": 380 + }, + { + "epoch": 2.544283413848631, + "grad_norm": 0.46470085704874836, + "learning_rate": 3.213696639636666e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19794431328773499, + "step": 1580, + "valid_targets_mean": 4241.2, + "valid_targets_min": 756 + }, + { + "epoch": 2.5523349436392913, + "grad_norm": 0.4921456777251355, + "learning_rate": 3.207303965387114e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2276471108198166, + "step": 1585, + "valid_targets_mean": 4376.2, + "valid_targets_min": 327 + }, + { + "epoch": 2.5603864734299515, + "grad_norm": 0.5461925808616269, + "learning_rate": 3.200891825971846e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2170354425907135, + "step": 1590, + "valid_targets_mean": 4376.3, + "valid_targets_min": 1624 + }, + { + "epoch": 2.5684380032206118, + "grad_norm": 0.45331868338223147, + "learning_rate": 3.194460324772746e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17769229412078857, + "step": 1595, + "valid_targets_mean": 4559.6, + "valid_targets_min": 1717 + }, + { + "epoch": 2.576489533011272, + "grad_norm": 0.5481556666487605, + "learning_rate": 3.188009565483861e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21930727362632751, + "step": 1600, + "valid_targets_mean": 3786.7, + "valid_targets_min": 627 + }, + { + "epoch": 2.5845410628019323, + "grad_norm": 0.5306078402232887, + "learning_rate": 3.1815396521097376e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22777387499809265, + "step": 1605, + "valid_targets_mean": 4098.1, + "valid_targets_min": 581 + }, + { + "epoch": 2.5925925925925926, + "grad_norm": 0.6396015312802733, + "learning_rate": 3.1750506889637366e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2606090009212494, + "step": 1610, + "valid_targets_mean": 4573.8, + "valid_targets_min": 513 + }, + { + "epoch": 2.600644122383253, + "grad_norm": 0.5139629491463821, + "learning_rate": 3.1685427806663574e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22717058658599854, + "step": 1615, + "valid_targets_mean": 4616.6, + "valid_targets_min": 2109 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.4896772812229126, + "learning_rate": 3.1620160321435475e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2393844723701477, + "step": 1620, + "valid_targets_mean": 4748.1, + "valid_targets_min": 1671 + }, + { + "epoch": 2.6167471819645733, + "grad_norm": 0.5123496934824213, + "learning_rate": 3.155470548625014e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570544481277466, + "step": 1625, + "valid_targets_mean": 4746.9, + "valid_targets_min": 1642 + }, + { + "epoch": 2.6247987117552336, + "grad_norm": 0.5011886008293388, + "learning_rate": 3.1489064356425235e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21146038174629211, + "step": 1630, + "valid_targets_mean": 4106.5, + "valid_targets_min": 1263 + }, + { + "epoch": 2.632850241545894, + "grad_norm": 0.4918629952935143, + "learning_rate": 3.142323799028204e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.205857515335083, + "step": 1635, + "valid_targets_mean": 4128.4, + "valid_targets_min": 713 + }, + { + "epoch": 2.640901771336554, + "grad_norm": 0.4975884336977351, + "learning_rate": 3.135722744912836e-05, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19787558913230896, + "step": 1640, + "valid_targets_mean": 5678.5, + "valid_targets_min": 886 + }, + { + "epoch": 2.6489533011272144, + "grad_norm": 0.48481732390442694, + "learning_rate": 3.129103379724143e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1850062608718872, + "step": 1645, + "valid_targets_mean": 3477.6, + "valid_targets_min": 752 + }, + { + "epoch": 2.6570048309178746, + "grad_norm": 0.4665528515319807, + "learning_rate": 3.122465810185075e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21181383728981018, + "step": 1650, + "valid_targets_mean": 4864.5, + "valid_targets_min": 1598 + }, + { + "epoch": 2.6650563607085345, + "grad_norm": 0.5077396920276867, + "learning_rate": 3.1158101433120863e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19636741280555725, + "step": 1655, + "valid_targets_mean": 3697.4, + "valid_targets_min": 339 + }, + { + "epoch": 2.6731078904991947, + "grad_norm": 0.5055566499587948, + "learning_rate": 3.1091364864134136e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20249465107917786, + "step": 1660, + "valid_targets_mean": 4576.9, + "valid_targets_min": 543 + }, + { + "epoch": 2.681159420289855, + "grad_norm": 0.49702037708190555, + "learning_rate": 3.102444947087342e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17359226942062378, + "step": 1665, + "valid_targets_mean": 3802.2, + "valid_targets_min": 1850 + }, + { + "epoch": 2.6892109500805152, + "grad_norm": 0.543548482081646, + "learning_rate": 3.0957356332204745e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21617215871810913, + "step": 1670, + "valid_targets_mean": 5324.2, + "valid_targets_min": 1401 + }, + { + "epoch": 2.6972624798711755, + "grad_norm": 0.473211339207569, + "learning_rate": 3.089008652985989e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20709389448165894, + "step": 1675, + "valid_targets_mean": 4730.4, + "valid_targets_min": 1283 + }, + { + "epoch": 2.7053140096618358, + "grad_norm": 0.48495909325226566, + "learning_rate": 3.082264114841892e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19697055220603943, + "step": 1680, + "valid_targets_mean": 4703.1, + "valid_targets_min": 1393 + }, + { + "epoch": 2.713365539452496, + "grad_norm": 0.525234042627054, + "learning_rate": 3.07550212752928e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21602725982666016, + "step": 1685, + "valid_targets_mean": 4527.6, + "valid_targets_min": 728 + }, + { + "epoch": 2.7214170692431563, + "grad_norm": 0.49507933380428965, + "learning_rate": 3.068722800070574e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23652076721191406, + "step": 1690, + "valid_targets_mean": 3893.8, + "valid_targets_min": 1490 + }, + { + "epoch": 2.7294685990338166, + "grad_norm": 0.478229441737468, + "learning_rate": 3.0619262417677695e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20760485529899597, + "step": 1695, + "valid_targets_mean": 4375.8, + "valid_targets_min": 1930 + }, + { + "epoch": 2.7375201288244764, + "grad_norm": 0.4760382784479069, + "learning_rate": 3.055112562200673e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20306912064552307, + "step": 1700, + "valid_targets_mean": 4093.1, + "valid_targets_min": 892 + }, + { + "epoch": 2.7455716586151366, + "grad_norm": 0.567194545288659, + "learning_rate": 3.0482818712251318e-05, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22697149217128754, + "step": 1705, + "valid_targets_mean": 5224.9, + "valid_targets_min": 679 + }, + { + "epoch": 2.753623188405797, + "grad_norm": 0.7966562099200152, + "learning_rate": 3.0414342789712675e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644144296646118, + "step": 1710, + "valid_targets_mean": 3641.8, + "valid_targets_min": 267 + }, + { + "epoch": 2.761674718196457, + "grad_norm": 0.5194781986137333, + "learning_rate": 3.034569895841699e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20732516050338745, + "step": 1715, + "valid_targets_mean": 4665.7, + "valid_targets_min": 1796 + }, + { + "epoch": 2.7697262479871174, + "grad_norm": 0.5139542297559575, + "learning_rate": 3.0276888325097583e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2747165858745575, + "step": 1720, + "valid_targets_mean": 5593.7, + "valid_targets_min": 863 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5017349754200195, + "learning_rate": 3.020791199917713e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19357708096504211, + "step": 1725, + "valid_targets_mean": 4411.8, + "valid_targets_min": 775 + }, + { + "epoch": 2.785829307568438, + "grad_norm": 0.47883623225450245, + "learning_rate": 3.0138771092749722e-05, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21340464055538177, + "step": 1730, + "valid_targets_mean": 4259.2, + "valid_targets_min": 702 + }, + { + "epoch": 2.793880837359098, + "grad_norm": 0.5042451347947459, + "learning_rate": 3.006946672056297e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20300012826919556, + "step": 1735, + "valid_targets_mean": 4185.3, + "valid_targets_min": 1004 + }, + { + "epoch": 2.8019323671497585, + "grad_norm": 0.5599312568052899, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22356563806533813, + "step": 1740, + "valid_targets_mean": 4172.1, + "valid_targets_min": 1019 + }, + { + "epoch": 2.8099838969404187, + "grad_norm": 0.5188094581424917, + "learning_rate": 2.993037205106147e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21803626418113708, + "step": 1745, + "valid_targets_mean": 4445.8, + "valid_targets_min": 1772 + }, + { + "epoch": 2.818035426731079, + "grad_norm": 0.48903765537814037, + "learning_rate": 2.9860583996347495e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21702814102172852, + "step": 1750, + "valid_targets_mean": 4806.7, + "valid_targets_min": 1671 + }, + { + "epoch": 2.8260869565217392, + "grad_norm": 0.48803716906398215, + "learning_rate": 2.9790636961039524e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22425541281700134, + "step": 1755, + "valid_targets_mean": 4643.0, + "valid_targets_min": 1506 + }, + { + "epoch": 2.8341384863123995, + "grad_norm": 0.48749480729575717, + "learning_rate": 2.9720532072882268e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19417119026184082, + "step": 1760, + "valid_targets_mean": 4002.8, + "valid_targets_min": 1522 + }, + { + "epoch": 2.8421900161030598, + "grad_norm": 0.45848716202240636, + "learning_rate": 2.965027046216544e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1857365071773529, + "step": 1765, + "valid_targets_mean": 4783.7, + "valid_targets_min": 1919 + }, + { + "epoch": 2.85024154589372, + "grad_norm": 0.6718800164601025, + "learning_rate": 2.9579853261705573e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23533181846141815, + "step": 1770, + "valid_targets_mean": 4673.9, + "valid_targets_min": 1311 + }, + { + "epoch": 2.8582930756843803, + "grad_norm": 0.4660110391421861, + "learning_rate": 2.950928160682775e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19263693690299988, + "step": 1775, + "valid_targets_mean": 4255.1, + "valid_targets_min": 1076 + }, + { + "epoch": 2.86634460547504, + "grad_norm": 0.5145254890484248, + "learning_rate": 2.943855663534731e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23376551270484924, + "step": 1780, + "valid_targets_mean": 4397.2, + "valid_targets_min": 1787 + }, + { + "epoch": 2.8743961352657004, + "grad_norm": 0.4906091551370421, + "learning_rate": 2.9367679487551473e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20546765625476837, + "step": 1785, + "valid_targets_mean": 3990.4, + "valid_targets_min": 1477 + }, + { + "epoch": 2.8824476650563606, + "grad_norm": 0.5093898731159036, + "learning_rate": 2.929665130618098e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22851824760437012, + "step": 1790, + "valid_targets_mean": 5522.1, + "valid_targets_min": 1762 + }, + { + "epoch": 2.890499194847021, + "grad_norm": 0.4518802511006292, + "learning_rate": 2.9225473236411655e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18847951292991638, + "step": 1795, + "valid_targets_mean": 4818.7, + "valid_targets_min": 1111 + }, + { + "epoch": 2.898550724637681, + "grad_norm": 0.6937426790422542, + "learning_rate": 2.915414642583596e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20088736712932587, + "step": 1800, + "valid_targets_mean": 3600.9, + "valid_targets_min": 848 + }, + { + "epoch": 2.9066022544283414, + "grad_norm": 0.46521668715049397, + "learning_rate": 2.9082672024444485e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22432249784469604, + "step": 1805, + "valid_targets_mean": 4322.0, + "valid_targets_min": 1358 + }, + { + "epoch": 2.9146537842190017, + "grad_norm": 0.436860595041999, + "learning_rate": 2.901105118460737e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1966436803340912, + "step": 1810, + "valid_targets_mean": 4940.1, + "valid_targets_min": 1944 + }, + { + "epoch": 2.922705314009662, + "grad_norm": 0.5361044008606226, + "learning_rate": 2.8939285061055807e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2078220695257187, + "step": 1815, + "valid_targets_mean": 3302.5, + "valid_targets_min": 609 + }, + { + "epoch": 2.930756843800322, + "grad_norm": 0.49990872670719105, + "learning_rate": 2.8867374810863325e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2396693229675293, + "step": 1820, + "valid_targets_mean": 4933.6, + "valid_targets_min": 1197 + }, + { + "epoch": 2.938808373590982, + "grad_norm": 0.4676050983632418, + "learning_rate": 2.8795321593427227e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21761463582515717, + "step": 1825, + "valid_targets_mean": 4473.9, + "valid_targets_min": 1740 + }, + { + "epoch": 2.9468599033816423, + "grad_norm": 0.4579800892905144, + "learning_rate": 2.8723126570449813e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22557538747787476, + "step": 1830, + "valid_targets_mean": 4990.2, + "valid_targets_min": 1104 + }, + { + "epoch": 2.9549114331723025, + "grad_norm": 0.521039627014324, + "learning_rate": 2.8650790905919724e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21462702751159668, + "step": 1835, + "valid_targets_mean": 4125.9, + "valid_targets_min": 1187 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.5446614093095541, + "learning_rate": 2.8578315766093133e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22720134258270264, + "step": 1840, + "valid_targets_mean": 4199.9, + "valid_targets_min": 379 + }, + { + "epoch": 2.971014492753623, + "grad_norm": 0.5085501802829419, + "learning_rate": 2.850570231947493e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20165984332561493, + "step": 1845, + "valid_targets_mean": 4610.2, + "valid_targets_min": 745 + }, + { + "epoch": 2.9790660225442833, + "grad_norm": 0.4995531673717949, + "learning_rate": 2.8432951736799933e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21701423823833466, + "step": 1850, + "valid_targets_mean": 4243.4, + "valid_targets_min": 1952 + }, + { + "epoch": 2.9871175523349436, + "grad_norm": 0.5654118721016965, + "learning_rate": 2.8360065191013967e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22924692928791046, + "step": 1855, + "valid_targets_mean": 3948.6, + "valid_targets_min": 1818 + }, + { + "epoch": 2.995169082125604, + "grad_norm": 0.5368747366497263, + "learning_rate": 2.8287043857254957e-05, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18569841980934143, + "step": 1860, + "valid_targets_mean": 4243.3, + "valid_targets_min": 944 + }, + { + "epoch": 3.003220611916264, + "grad_norm": 0.5308612324214572, + "learning_rate": 2.8213888912834026e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20392943918704987, + "step": 1865, + "valid_targets_mean": 5240.9, + "valid_targets_min": 3176 + }, + { + "epoch": 3.0112721417069244, + "grad_norm": 0.5850366317547473, + "learning_rate": 2.814060153721644e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22741422057151794, + "step": 1870, + "valid_targets_mean": 4518.1, + "valid_targets_min": 1313 + }, + { + "epoch": 3.0193236714975846, + "grad_norm": 0.5456534322958014, + "learning_rate": 2.8067182912002663e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19902676343917847, + "step": 1875, + "valid_targets_mean": 4697.9, + "valid_targets_min": 1401 + }, + { + "epoch": 3.027375201288245, + "grad_norm": 0.5866691020588805, + "learning_rate": 2.7993634220909254e-05, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17653107643127441, + "step": 1880, + "valid_targets_mean": 5312.7, + "valid_targets_min": 1668 + }, + { + "epoch": 3.035426731078905, + "grad_norm": 0.4839014869325985, + "learning_rate": 2.7919956649749826e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.207495778799057, + "step": 1885, + "valid_targets_mean": 4792.9, + "valid_targets_min": 1475 + }, + { + "epoch": 3.0434782608695654, + "grad_norm": 0.5792725572749162, + "learning_rate": 2.784615138641588e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2462862730026245, + "step": 1890, + "valid_targets_mean": 3582.7, + "valid_targets_min": 1264 + }, + { + "epoch": 3.0515297906602252, + "grad_norm": 0.5471292037490801, + "learning_rate": 2.7772219620857685e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2000941038131714, + "step": 1895, + "valid_targets_mean": 4260.8, + "valid_targets_min": 1662 + }, + { + "epoch": 3.0595813204508855, + "grad_norm": 0.5418789392536902, + "learning_rate": 2.769816254506509e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17109820246696472, + "step": 1900, + "valid_targets_mean": 4285.0, + "valid_targets_min": 688 + }, + { + "epoch": 3.0676328502415457, + "grad_norm": 0.5696873068816691, + "learning_rate": 2.76239813530483e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17864085733890533, + "step": 1905, + "valid_targets_mean": 3839.0, + "valid_targets_min": 356 + }, + { + "epoch": 3.075684380032206, + "grad_norm": 0.5257872930167539, + "learning_rate": 2.7549677240818628e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20962268114089966, + "step": 1910, + "valid_targets_mean": 4587.1, + "valid_targets_min": 854 + }, + { + "epoch": 3.0837359098228663, + "grad_norm": 0.5648446285032245, + "learning_rate": 2.7475251406369197e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20219004154205322, + "step": 1915, + "valid_targets_mean": 4792.9, + "valid_targets_min": 1897 + }, + { + "epoch": 3.0917874396135265, + "grad_norm": 0.454963234903237, + "learning_rate": 2.740070504965565e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1661757528781891, + "step": 1920, + "valid_targets_mean": 5107.2, + "valid_targets_min": 1842 + }, + { + "epoch": 3.099838969404187, + "grad_norm": 0.5802646506856157, + "learning_rate": 2.7326039372576782e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20974227786064148, + "step": 1925, + "valid_targets_mean": 4211.8, + "valid_targets_min": 1850 + }, + { + "epoch": 3.107890499194847, + "grad_norm": 0.5115859872263251, + "learning_rate": 2.7251255578955186e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.191048726439476, + "step": 1930, + "valid_targets_mean": 4847.3, + "valid_targets_min": 2149 + }, + { + "epoch": 3.1159420289855073, + "grad_norm": 0.4838092911137751, + "learning_rate": 2.7176354874517805e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18548092246055603, + "step": 1935, + "valid_targets_mean": 4928.9, + "valid_targets_min": 1746 + }, + { + "epoch": 3.1239935587761676, + "grad_norm": 0.6716396355587732, + "learning_rate": 2.7101338466876542e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17967697978019714, + "step": 1940, + "valid_targets_mean": 3838.4, + "valid_targets_min": 933 + }, + { + "epoch": 3.132045088566828, + "grad_norm": 0.5175526656495621, + "learning_rate": 2.702620756550874e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18851426243782043, + "step": 1945, + "valid_targets_mean": 4249.6, + "valid_targets_min": 1164 + }, + { + "epoch": 3.140096618357488, + "grad_norm": 0.5292721987453661, + "learning_rate": 2.6950963381737728e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20516784489154816, + "step": 1950, + "valid_targets_mean": 4134.1, + "valid_targets_min": 1487 + }, + { + "epoch": 3.148148148148148, + "grad_norm": 0.5429459342739271, + "learning_rate": 2.687560712871325e-05, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19477809965610504, + "step": 1955, + "valid_targets_mean": 4617.6, + "valid_targets_min": 321 + }, + { + "epoch": 3.156199677938808, + "grad_norm": 0.5204990053162142, + "learning_rate": 2.6800140021391933e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2121405303478241, + "step": 1960, + "valid_targets_mean": 5504.8, + "valid_targets_min": 2133 + }, + { + "epoch": 3.1642512077294684, + "grad_norm": 0.5107603352588185, + "learning_rate": 2.6724563276517697e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.217079758644104, + "step": 1965, + "valid_targets_mean": 4874.7, + "valid_targets_min": 578 + }, + { + "epoch": 3.1723027375201287, + "grad_norm": 0.5709422965320798, + "learning_rate": 2.6648878112602115e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20290029048919678, + "step": 1970, + "valid_targets_mean": 4168.8, + "valid_targets_min": 537 + }, + { + "epoch": 3.180354267310789, + "grad_norm": 0.5236384072204857, + "learning_rate": 2.6573085749904784e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18646635115146637, + "step": 1975, + "valid_targets_mean": 5792.3, + "valid_targets_min": 3148 + }, + { + "epoch": 3.1884057971014492, + "grad_norm": 0.4931013592583483, + "learning_rate": 2.6497187410413676e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16621598601341248, + "step": 1980, + "valid_targets_mean": 4182.5, + "valid_targets_min": 626 + }, + { + "epoch": 3.1964573268921095, + "grad_norm": 0.513477571797092, + "learning_rate": 2.642118431782537e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19758376479148865, + "step": 1985, + "valid_targets_mean": 4814.4, + "valid_targets_min": 841 + }, + { + "epoch": 3.2045088566827697, + "grad_norm": 0.5208664241436397, + "learning_rate": 2.6345077697525394e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2036036252975464, + "step": 1990, + "valid_targets_mean": 4699.8, + "valid_targets_min": 1314 + }, + { + "epoch": 3.21256038647343, + "grad_norm": 0.48590086014586165, + "learning_rate": 2.6268868776568416e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1840904951095581, + "step": 1995, + "valid_targets_mean": 4029.8, + "valid_targets_min": 2119 + }, + { + "epoch": 3.2206119162640903, + "grad_norm": 0.48975245964069325, + "learning_rate": 2.619255878365849e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19650408625602722, + "step": 2000, + "valid_targets_mean": 4923.2, + "valid_targets_min": 2035 + }, + { + "epoch": 3.2286634460547505, + "grad_norm": 0.4730736998762011, + "learning_rate": 2.6116148949129237e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19848835468292236, + "step": 2005, + "valid_targets_mean": 5154.9, + "valid_targets_min": 862 + }, + { + "epoch": 3.236714975845411, + "grad_norm": 0.49293775625702296, + "learning_rate": 2.603964050492401e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1910780966281891, + "step": 2010, + "valid_targets_mean": 5177.5, + "valid_targets_min": 1035 + }, + { + "epoch": 3.244766505636071, + "grad_norm": 0.4352279629169526, + "learning_rate": 2.5963034684576024e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1654304563999176, + "step": 2015, + "valid_targets_mean": 4889.2, + "valid_targets_min": 755 + }, + { + "epoch": 3.2528180354267313, + "grad_norm": 0.48928557887722884, + "learning_rate": 2.5886332723188484e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17599686980247498, + "step": 2020, + "valid_targets_mean": 3929.6, + "valid_targets_min": 627 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 0.4602375034247986, + "learning_rate": 2.5809535857414637e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19927427172660828, + "step": 2025, + "valid_targets_mean": 4907.2, + "valid_targets_min": 700 + }, + { + "epoch": 3.2689210950080514, + "grad_norm": 0.45533522839686313, + "learning_rate": 2.573264532543788e-05, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1822260171175003, + "step": 2030, + "valid_targets_mean": 4557.0, + "valid_targets_min": 2401 + }, + { + "epoch": 3.2769726247987117, + "grad_norm": 0.5432317512688624, + "learning_rate": 2.5655662366951778e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1943272203207016, + "step": 2035, + "valid_targets_mean": 3990.8, + "valid_targets_min": 1026 + }, + { + "epoch": 3.285024154589372, + "grad_norm": 0.5340485828335133, + "learning_rate": 2.557858822314007e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2411888986825943, + "step": 2040, + "valid_targets_mean": 4090.6, + "valid_targets_min": 1032 + }, + { + "epoch": 3.293075684380032, + "grad_norm": 0.5155182145466647, + "learning_rate": 2.5501424136656635e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2276889681816101, + "step": 2045, + "valid_targets_mean": 4593.6, + "valid_targets_min": 574 + }, + { + "epoch": 3.3011272141706924, + "grad_norm": 0.5465406153378882, + "learning_rate": 2.5424171351605518e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17450806498527527, + "step": 2050, + "valid_targets_mean": 4325.6, + "valid_targets_min": 1283 + }, + { + "epoch": 3.3091787439613527, + "grad_norm": 0.5652814304177497, + "learning_rate": 2.5346831113520827e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21284984052181244, + "step": 2055, + "valid_targets_mean": 4811.6, + "valid_targets_min": 1066 + }, + { + "epoch": 3.317230273752013, + "grad_norm": 0.5717869882020237, + "learning_rate": 2.526940466934664e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19881507754325867, + "step": 2060, + "valid_targets_mean": 3001.5, + "valid_targets_min": 350 + }, + { + "epoch": 3.325281803542673, + "grad_norm": 0.5193991609655757, + "learning_rate": 2.5191893267416964e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19282767176628113, + "step": 2065, + "valid_targets_mean": 4379.1, + "valid_targets_min": 943 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.5087654352874541, + "learning_rate": 2.5114298157435526e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22454917430877686, + "step": 2070, + "valid_targets_mean": 5994.5, + "valid_targets_min": 1253 + }, + { + "epoch": 3.3413848631239937, + "grad_norm": 0.4773374743223842, + "learning_rate": 2.503662059045568e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19372783601284027, + "step": 2075, + "valid_targets_mean": 4415.6, + "valid_targets_min": 1046 + }, + { + "epoch": 3.3494363929146536, + "grad_norm": 0.5211750691881805, + "learning_rate": 2.4958861818860217e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20317870378494263, + "step": 2080, + "valid_targets_mean": 4938.8, + "valid_targets_min": 1053 + }, + { + "epoch": 3.357487922705314, + "grad_norm": 0.580769513908195, + "learning_rate": 2.488102309634119e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17012670636177063, + "step": 2085, + "valid_targets_mean": 3300.9, + "valid_targets_min": 570 + }, + { + "epoch": 3.365539452495974, + "grad_norm": 0.4677262023270228, + "learning_rate": 2.480310567787967e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21143823862075806, + "step": 2090, + "valid_targets_mean": 4960.4, + "valid_targets_min": 1486 + }, + { + "epoch": 3.3735909822866343, + "grad_norm": 0.498695274074395, + "learning_rate": 2.4725110819725542e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19077152013778687, + "step": 2095, + "valid_targets_mean": 4409.8, + "valid_targets_min": 1603 + }, + { + "epoch": 3.3816425120772946, + "grad_norm": 0.6606944270313985, + "learning_rate": 2.464703977937723e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2209235429763794, + "step": 2100, + "valid_targets_mean": 5160.9, + "valid_targets_min": 2260 + }, + { + "epoch": 3.389694041867955, + "grad_norm": 0.48392843927081547, + "learning_rate": 2.456889381556144e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1707090586423874, + "step": 2105, + "valid_targets_mean": 4587.8, + "valid_targets_min": 1717 + }, + { + "epoch": 3.397745571658615, + "grad_norm": 0.4875147573168212, + "learning_rate": 2.449067418821285e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18568046391010284, + "step": 2110, + "valid_targets_mean": 4881.9, + "valid_targets_min": 2069 + }, + { + "epoch": 3.4057971014492754, + "grad_norm": 0.5240738697697526, + "learning_rate": 2.4412382158453807e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17656230926513672, + "step": 2115, + "valid_targets_mean": 4386.1, + "valid_targets_min": 1483 + }, + { + "epoch": 3.4138486312399356, + "grad_norm": 0.5359424822294189, + "learning_rate": 2.4334018988573983e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22453176975250244, + "step": 2120, + "valid_targets_mean": 4363.1, + "valid_targets_min": 2099 + }, + { + "epoch": 3.421900161030596, + "grad_norm": 0.7044397785288949, + "learning_rate": 2.425558594201004e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17415092885494232, + "step": 2125, + "valid_targets_mean": 4853.5, + "valid_targets_min": 1658 + }, + { + "epoch": 3.429951690821256, + "grad_norm": 0.7313542515268883, + "learning_rate": 2.417708428332525e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18305009603500366, + "step": 2130, + "valid_targets_mean": 4156.8, + "valid_targets_min": 317 + }, + { + "epoch": 3.4380032206119164, + "grad_norm": 0.4738591164631795, + "learning_rate": 2.4098515278189097e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1723197102546692, + "step": 2135, + "valid_targets_mean": 4533.4, + "valid_targets_min": 1353 + }, + { + "epoch": 3.4460547504025767, + "grad_norm": 0.5276265701938105, + "learning_rate": 2.4019880193356902e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17899435758590698, + "step": 2140, + "valid_targets_mean": 4434.9, + "valid_targets_min": 675 + }, + { + "epoch": 3.454106280193237, + "grad_norm": 0.5632489582281142, + "learning_rate": 2.3941180296649348e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22366738319396973, + "step": 2145, + "valid_targets_mean": 3995.8, + "valid_targets_min": 1400 + }, + { + "epoch": 3.4621578099838968, + "grad_norm": 0.5526994009194625, + "learning_rate": 2.3862416856932087e-05, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19457975029945374, + "step": 2150, + "valid_targets_mean": 3929.9, + "valid_targets_min": 1593 + }, + { + "epoch": 3.470209339774557, + "grad_norm": 0.5417281294909433, + "learning_rate": 2.378359114409527e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20209567248821259, + "step": 2155, + "valid_targets_mean": 4491.2, + "valid_targets_min": 1771 + }, + { + "epoch": 3.4782608695652173, + "grad_norm": 0.5283860783665116, + "learning_rate": 2.370470442903306e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2074143886566162, + "step": 2160, + "valid_targets_mean": 3453.6, + "valid_targets_min": 656 + }, + { + "epoch": 3.4863123993558776, + "grad_norm": 0.5596014550678241, + "learning_rate": 2.362575798362315e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20553532242774963, + "step": 2165, + "valid_targets_mean": 3728.8, + "valid_targets_min": 1291 + }, + { + "epoch": 3.494363929146538, + "grad_norm": 0.47388886907429295, + "learning_rate": 2.3546753080706242e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2011997401714325, + "step": 2170, + "valid_targets_mean": 4747.0, + "valid_targets_min": 1499 + }, + { + "epoch": 3.502415458937198, + "grad_norm": 0.5433473628742698, + "learning_rate": 2.346769099406557e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22939231991767883, + "step": 2175, + "valid_targets_mean": 3726.4, + "valid_targets_min": 824 + }, + { + "epoch": 3.5104669887278583, + "grad_norm": 0.4457288216134543, + "learning_rate": 2.33885729984063e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1754472553730011, + "step": 2180, + "valid_targets_mean": 5101.0, + "valid_targets_min": 1832 + }, + { + "epoch": 3.5185185185185186, + "grad_norm": 0.511189888500102, + "learning_rate": 2.3309400369335033e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18584825098514557, + "step": 2185, + "valid_targets_mean": 4445.9, + "valid_targets_min": 1920 + }, + { + "epoch": 3.526570048309179, + "grad_norm": 0.5632502133321703, + "learning_rate": 2.3230174383339196e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2198297381401062, + "step": 2190, + "valid_targets_mean": 4171.1, + "valid_targets_min": 758 + }, + { + "epoch": 3.534621578099839, + "grad_norm": 0.4874975024015211, + "learning_rate": 2.3150896317766505e-05, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1995978057384491, + "step": 2195, + "valid_targets_mean": 4628.1, + "valid_targets_min": 1282 + }, + { + "epoch": 3.542673107890499, + "grad_norm": 0.47843928271386893, + "learning_rate": 2.3071567450804325e-05, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690368950366974, + "step": 2200, + "valid_targets_mean": 4314.7, + "valid_targets_min": 1529 + }, + { + "epoch": 3.550724637681159, + "grad_norm": 0.5030690747235215, + "learning_rate": 2.299218906145909e-05, + "loss": 0.1883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17591848969459534, + "step": 2205, + "valid_targets_mean": 4831.5, + "valid_targets_min": 1692 + }, + { + "epoch": 3.5587761674718195, + "grad_norm": 0.4445594459012944, + "learning_rate": 2.2912762429535684e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2045142650604248, + "step": 2210, + "valid_targets_mean": 5075.3, + "valid_targets_min": 1787 + }, + { + "epoch": 3.5668276972624797, + "grad_norm": 0.5066371584429301, + "learning_rate": 2.2833288835616784e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20660072565078735, + "step": 2215, + "valid_targets_mean": 4255.6, + "valid_targets_min": 978 + }, + { + "epoch": 3.57487922705314, + "grad_norm": 0.5314312269324657, + "learning_rate": 2.2753769561042235e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1856955885887146, + "step": 2220, + "valid_targets_mean": 4382.4, + "valid_targets_min": 306 + }, + { + "epoch": 3.5829307568438002, + "grad_norm": 0.594664193968932, + "learning_rate": 2.2674205887888386e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22779573500156403, + "step": 2225, + "valid_targets_mean": 4177.1, + "valid_targets_min": 581 + }, + { + "epoch": 3.5909822866344605, + "grad_norm": 0.4922219770303433, + "learning_rate": 2.259459909894742e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22328010201454163, + "step": 2230, + "valid_targets_mean": 5152.0, + "valid_targets_min": 851 + }, + { + "epoch": 3.5990338164251208, + "grad_norm": 0.48869544172575563, + "learning_rate": 2.2514950477706657e-05, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20914164185523987, + "step": 2235, + "valid_targets_mean": 5138.6, + "valid_targets_min": 1484 + }, + { + "epoch": 3.607085346215781, + "grad_norm": 0.6963749789958893, + "learning_rate": 2.2435261308327875e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1746867597103119, + "step": 2240, + "valid_targets_mean": 4765.4, + "valid_targets_min": 1839 + }, + { + "epoch": 3.6151368760064413, + "grad_norm": 0.5367886666270889, + "learning_rate": 2.2355532875626612e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18968860805034637, + "step": 2245, + "valid_targets_mean": 3609.9, + "valid_targets_min": 1136 + }, + { + "epoch": 3.6231884057971016, + "grad_norm": 0.5074205448750082, + "learning_rate": 2.2275766465051444e-05, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20984122157096863, + "step": 2250, + "valid_targets_mean": 4557.3, + "valid_targets_min": 818 + }, + { + "epoch": 3.631239935587762, + "grad_norm": 0.5328336917497768, + "learning_rate": 2.2195963362663236e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1873650997877121, + "step": 2255, + "valid_targets_mean": 4251.6, + "valid_targets_min": 1417 + }, + { + "epoch": 3.639291465378422, + "grad_norm": 0.5279031470689559, + "learning_rate": 2.211612485511446e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19223317503929138, + "step": 2260, + "valid_targets_mean": 4547.8, + "valid_targets_min": 989 + }, + { + "epoch": 3.6473429951690823, + "grad_norm": 0.4807142652014487, + "learning_rate": 2.2036252229628392e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17448017001152039, + "step": 2265, + "valid_targets_mean": 4428.8, + "valid_targets_min": 1791 + }, + { + "epoch": 3.6553945249597426, + "grad_norm": 0.46162214621632697, + "learning_rate": 2.19563467739784e-05, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15724977850914001, + "step": 2270, + "valid_targets_mean": 4100.6, + "valid_targets_min": 1856 + }, + { + "epoch": 3.6634460547504024, + "grad_norm": 0.437259128811763, + "learning_rate": 2.1876409776467165e-05, + "loss": 0.2014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19419334828853607, + "step": 2275, + "valid_targets_mean": 5604.3, + "valid_targets_min": 1799 + }, + { + "epoch": 3.6714975845410627, + "grad_norm": 0.509046081849507, + "learning_rate": 2.1796442525905923e-05, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1819322556257248, + "step": 2280, + "valid_targets_mean": 3572.2, + "valid_targets_min": 1228 + }, + { + "epoch": 3.679549114331723, + "grad_norm": 0.512456182600672, + "learning_rate": 2.171644631159366e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21677514910697937, + "step": 2285, + "valid_targets_mean": 4920.6, + "valid_targets_min": 720 + }, + { + "epoch": 3.687600644122383, + "grad_norm": 3.5749072057598865, + "learning_rate": 2.163642242329633e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1948278695344925, + "step": 2290, + "valid_targets_mean": 4014.2, + "valid_targets_min": 1316 + }, + { + "epoch": 3.6956521739130435, + "grad_norm": 0.48865940439627575, + "learning_rate": 2.1556372151226097e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21440473198890686, + "step": 2295, + "valid_targets_mean": 4626.4, + "valid_targets_min": 1967 + }, + { + "epoch": 3.7037037037037037, + "grad_norm": 0.5877525411956264, + "learning_rate": 2.1476296786020502e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173828125, + "step": 2300, + "valid_targets_mean": 4059.1, + "valid_targets_min": 345 + }, + { + "epoch": 3.711755233494364, + "grad_norm": 0.532071939990665, + "learning_rate": 2.139619761872163e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20449981093406677, + "step": 2305, + "valid_targets_mean": 3978.1, + "valid_targets_min": 987 + }, + { + "epoch": 3.7198067632850242, + "grad_norm": 0.5500257667017805, + "learning_rate": 2.1316075940755363e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19600236415863037, + "step": 2310, + "valid_targets_mean": 4358.1, + "valid_targets_min": 2076 + }, + { + "epoch": 3.7278582930756845, + "grad_norm": 0.4910824420542707, + "learning_rate": 2.1235933043910488e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1749979555606842, + "step": 2315, + "valid_targets_mean": 3997.7, + "valid_targets_min": 1403 + }, + { + "epoch": 3.7359098228663448, + "grad_norm": 0.49001315022408615, + "learning_rate": 2.1155770220317918e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1976926326751709, + "step": 2320, + "valid_targets_mean": 4852.8, + "valid_targets_min": 506 + }, + { + "epoch": 3.7439613526570046, + "grad_norm": 0.6702681948241392, + "learning_rate": 2.107558876242983e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19468314945697784, + "step": 2325, + "valid_targets_mean": 3499.4, + "valid_targets_min": 1520 + }, + { + "epoch": 3.752012882447665, + "grad_norm": 0.4609480240941858, + "learning_rate": 2.0995389962998845e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19903123378753662, + "step": 2330, + "valid_targets_mean": 4891.3, + "valid_targets_min": 1678 + }, + { + "epoch": 3.760064412238325, + "grad_norm": 0.48913542025927365, + "learning_rate": 2.091517511505719e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21223650872707367, + "step": 2335, + "valid_targets_mean": 4559.2, + "valid_targets_min": 2650 + }, + { + "epoch": 3.7681159420289854, + "grad_norm": 0.46883840985224706, + "learning_rate": 2.0834945511895816e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1941649466753006, + "step": 2340, + "valid_targets_mean": 5603.5, + "valid_targets_min": 2498 + }, + { + "epoch": 3.7761674718196456, + "grad_norm": 0.4681238690714185, + "learning_rate": 2.0754702447043585e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1943756639957428, + "step": 2345, + "valid_targets_mean": 4566.8, + "valid_targets_min": 677 + }, + { + "epoch": 3.784219001610306, + "grad_norm": 0.4482811469832385, + "learning_rate": 2.0674447214246394e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1913878321647644, + "step": 2350, + "valid_targets_mean": 5163.6, + "valid_targets_min": 2337 + }, + { + "epoch": 3.792270531400966, + "grad_norm": 0.47556405809315727, + "learning_rate": 2.059418110744633e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18677163124084473, + "step": 2355, + "valid_targets_mean": 4713.3, + "valid_targets_min": 933 + }, + { + "epoch": 3.8003220611916264, + "grad_norm": 0.4690759790686193, + "learning_rate": 2.0513905420760798e-05, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19385367631912231, + "step": 2360, + "valid_targets_mean": 4902.2, + "valid_targets_min": 627 + }, + { + "epoch": 3.8083735909822867, + "grad_norm": 0.5121675478037654, + "learning_rate": 2.043362144846164e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19691093266010284, + "step": 2365, + "valid_targets_mean": 3866.6, + "valid_targets_min": 689 + }, + { + "epoch": 3.816425120772947, + "grad_norm": 0.52059333383564, + "learning_rate": 2.035333048495431e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16973920166492462, + "step": 2370, + "valid_targets_mean": 3510.0, + "valid_targets_min": 909 + }, + { + "epoch": 3.824476650563607, + "grad_norm": 0.5076146647866968, + "learning_rate": 2.0273033824756964e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17860174179077148, + "step": 2375, + "valid_targets_mean": 3703.1, + "valid_targets_min": 506 + }, + { + "epoch": 3.8325281803542675, + "grad_norm": 0.6011850519404554, + "learning_rate": 2.0192732762479616e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20434610545635223, + "step": 2380, + "valid_targets_mean": 4579.6, + "valid_targets_min": 631 + }, + { + "epoch": 3.8405797101449277, + "grad_norm": 0.5345131849791486, + "learning_rate": 2.011242859280325e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21205343306064606, + "step": 2385, + "valid_targets_mean": 4124.2, + "valid_targets_min": 823 + }, + { + "epoch": 3.848631239935588, + "grad_norm": 0.5114321926687082, + "learning_rate": 2.0032122610458947e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17156703770160675, + "step": 2390, + "valid_targets_mean": 4274.9, + "valid_targets_min": 1740 + }, + { + "epoch": 3.8566827697262482, + "grad_norm": 0.6336174608958026, + "learning_rate": 1.9951816110207004e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1964995563030243, + "step": 2395, + "valid_targets_mean": 4251.1, + "valid_targets_min": 1610 + }, + { + "epoch": 3.864734299516908, + "grad_norm": 0.49492043165747124, + "learning_rate": 1.9871510386816103e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17603623867034912, + "step": 2400, + "valid_targets_mean": 4051.9, + "valid_targets_min": 1925 + }, + { + "epoch": 3.8727858293075683, + "grad_norm": 0.5681035881235681, + "learning_rate": 1.979120673504235e-05, + "loss": 0.2058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20705150067806244, + "step": 2405, + "valid_targets_mean": 5603.1, + "valid_targets_min": 3701 + }, + { + "epoch": 3.8808373590982286, + "grad_norm": 1.2929630707396456, + "learning_rate": 1.9710906449608498e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19122429192066193, + "step": 2410, + "valid_targets_mean": 4886.8, + "valid_targets_min": 1569 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.47492876907794285, + "learning_rate": 1.9630610825182992e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18603213131427765, + "step": 2415, + "valid_targets_mean": 4944.1, + "valid_targets_min": 1929 + }, + { + "epoch": 3.896940418679549, + "grad_norm": 0.5030538323026981, + "learning_rate": 1.955032115635915e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22285720705986023, + "step": 2420, + "valid_targets_mean": 4422.1, + "valid_targets_min": 1352 + }, + { + "epoch": 3.9049919484702094, + "grad_norm": 0.5031818050359901, + "learning_rate": 1.9470038737634257e-05, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19892553985118866, + "step": 2425, + "valid_targets_mean": 4099.8, + "valid_targets_min": 1262 + }, + { + "epoch": 3.9130434782608696, + "grad_norm": 0.5757670497420159, + "learning_rate": 1.9389764863388706e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19620567560195923, + "step": 2430, + "valid_targets_mean": 3868.5, + "valid_targets_min": 545 + }, + { + "epoch": 3.92109500805153, + "grad_norm": 0.48359405966706115, + "learning_rate": 1.9309500827865136e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1803748607635498, + "step": 2435, + "valid_targets_mean": 4338.0, + "valid_targets_min": 1348 + }, + { + "epoch": 3.92914653784219, + "grad_norm": 0.4640923476833218, + "learning_rate": 1.9229247925147553e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1907849907875061, + "step": 2440, + "valid_targets_mean": 4583.9, + "valid_targets_min": 1771 + }, + { + "epoch": 3.9371980676328504, + "grad_norm": 0.5117773057032822, + "learning_rate": 1.9149007449140462e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17059685289859772, + "step": 2445, + "valid_targets_mean": 4338.1, + "valid_targets_min": 1784 + }, + { + "epoch": 3.9452495974235102, + "grad_norm": 0.5915049699700269, + "learning_rate": 1.906878069354804e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21201738715171814, + "step": 2450, + "valid_targets_mean": 3018.6, + "valid_targets_min": 1027 + }, + { + "epoch": 3.9533011272141705, + "grad_norm": 0.47160593652811594, + "learning_rate": 1.898856895185322e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16955235600471497, + "step": 2455, + "valid_targets_mean": 4181.1, + "valid_targets_min": 1075 + }, + { + "epoch": 3.9613526570048307, + "grad_norm": 0.49702142957897827, + "learning_rate": 1.8908373517296888e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19719739258289337, + "step": 2460, + "valid_targets_mean": 4295.4, + "valid_targets_min": 249 + }, + { + "epoch": 3.969404186795491, + "grad_norm": 0.5129921266484054, + "learning_rate": 1.882819568285701e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19295062124729156, + "step": 2465, + "valid_targets_mean": 3921.4, + "valid_targets_min": 966 + }, + { + "epoch": 3.9774557165861513, + "grad_norm": 0.45382648008126336, + "learning_rate": 1.874803674122778e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1945837438106537, + "step": 2470, + "valid_targets_mean": 5054.1, + "valid_targets_min": 1828 + }, + { + "epoch": 3.9855072463768115, + "grad_norm": 0.47820396478837407, + "learning_rate": 1.8667897984798804e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21562841534614563, + "step": 2475, + "valid_targets_mean": 4692.4, + "valid_targets_min": 300 + }, + { + "epoch": 3.993558776167472, + "grad_norm": 0.539195337968872, + "learning_rate": 1.858778070563422e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1887316107749939, + "step": 2480, + "valid_targets_mean": 4306.7, + "valid_targets_min": 590 + }, + { + "epoch": 4.001610305958132, + "grad_norm": 0.5374780928948082, + "learning_rate": 1.8507686195451918e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25074928998947144, + "step": 2485, + "valid_targets_mean": 4598.8, + "valid_targets_min": 374 + }, + { + "epoch": 4.009661835748792, + "grad_norm": 0.4944011317477905, + "learning_rate": 1.8427615745602667e-05, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15997666120529175, + "step": 2490, + "valid_targets_mean": 4936.9, + "valid_targets_min": 1591 + }, + { + "epoch": 4.017713365539453, + "grad_norm": 0.4765635224384645, + "learning_rate": 1.834757064704933e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1592136025428772, + "step": 2495, + "valid_targets_mean": 4652.1, + "valid_targets_min": 1149 + }, + { + "epoch": 4.025764895330113, + "grad_norm": 0.4956167235108182, + "learning_rate": 1.826755219034603e-05, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19604167342185974, + "step": 2500, + "valid_targets_mean": 4386.4, + "valid_targets_min": 380 + }, + { + "epoch": 4.033816425120773, + "grad_norm": 0.45476926402320006, + "learning_rate": 1.818756166561733e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1637008637189865, + "step": 2505, + "valid_targets_mean": 4699.8, + "valid_targets_min": 1197 + }, + { + "epoch": 4.041867954911433, + "grad_norm": 0.43733602489225165, + "learning_rate": 1.8107600362537473e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16251038014888763, + "step": 2510, + "valid_targets_mean": 4847.2, + "valid_targets_min": 1813 + }, + { + "epoch": 4.049919484702094, + "grad_norm": 0.49193894405362015, + "learning_rate": 1.8027669570309572e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16793620586395264, + "step": 2515, + "valid_targets_mean": 4591.9, + "valid_targets_min": 506 + }, + { + "epoch": 4.057971014492754, + "grad_norm": 0.5075017593056104, + "learning_rate": 1.7947770577644787e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729605346918106, + "step": 2520, + "valid_targets_mean": 4395.8, + "valid_targets_min": 656 + }, + { + "epoch": 4.066022544283414, + "grad_norm": 0.499304090315969, + "learning_rate": 1.786790467274161e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17558912932872772, + "step": 2525, + "valid_targets_mean": 4637.6, + "valid_targets_min": 2011 + }, + { + "epoch": 4.074074074074074, + "grad_norm": 0.5559504833423238, + "learning_rate": 1.778807314326505e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18689918518066406, + "step": 2530, + "valid_targets_mean": 3930.3, + "valid_targets_min": 548 + }, + { + "epoch": 4.082125603864735, + "grad_norm": 0.47641769475056645, + "learning_rate": 1.7708277276325886e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1637909710407257, + "step": 2535, + "valid_targets_mean": 5154.6, + "valid_targets_min": 1826 + }, + { + "epoch": 4.090177133655395, + "grad_norm": 0.5182362997358064, + "learning_rate": 1.762851835845992e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18916486203670502, + "step": 2540, + "valid_targets_mean": 4544.1, + "valid_targets_min": 570 + }, + { + "epoch": 4.098228663446055, + "grad_norm": 0.4862343355871757, + "learning_rate": 1.754879767560723e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18593139946460724, + "step": 2545, + "valid_targets_mean": 4532.4, + "valid_targets_min": 1521 + }, + { + "epoch": 4.106280193236715, + "grad_norm": 0.527960071470203, + "learning_rate": 1.746911651309144e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19690777361392975, + "step": 2550, + "valid_targets_mean": 3843.6, + "valid_targets_min": 881 + }, + { + "epoch": 4.114331723027375, + "grad_norm": 0.5103425212083436, + "learning_rate": 1.7389476155598974e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21908551454544067, + "step": 2555, + "valid_targets_mean": 5082.4, + "valid_targets_min": 784 + }, + { + "epoch": 4.122383252818035, + "grad_norm": 0.7756087805548777, + "learning_rate": 1.7309877887158388e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1794763058423996, + "step": 2560, + "valid_targets_mean": 2992.2, + "valid_targets_min": 467 + }, + { + "epoch": 4.130434782608695, + "grad_norm": 0.4567684656881922, + "learning_rate": 1.723032299111964e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15088678896427155, + "step": 2565, + "valid_targets_mean": 4922.8, + "valid_targets_min": 1327 + }, + { + "epoch": 4.138486312399356, + "grad_norm": 0.5425918961875108, + "learning_rate": 1.7150812750133382e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1673865020275116, + "step": 2570, + "valid_targets_mean": 3965.9, + "valid_targets_min": 1407 + }, + { + "epoch": 4.146537842190016, + "grad_norm": 0.6838407873867918, + "learning_rate": 1.707134844613032e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15312550961971283, + "step": 2575, + "valid_targets_mean": 4199.9, + "valid_targets_min": 1891 + }, + { + "epoch": 4.154589371980676, + "grad_norm": 0.5385560587074719, + "learning_rate": 1.699193136030052e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17279349267482758, + "step": 2580, + "valid_targets_mean": 3938.8, + "valid_targets_min": 880 + }, + { + "epoch": 4.162640901771336, + "grad_norm": 0.5502968325727028, + "learning_rate": 1.6912562773072765e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18601390719413757, + "step": 2585, + "valid_targets_mean": 4653.2, + "valid_targets_min": 2040 + }, + { + "epoch": 4.170692431561997, + "grad_norm": 0.5862578517681709, + "learning_rate": 1.6833243964093877e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2094813883304596, + "step": 2590, + "valid_targets_mean": 4215.5, + "valid_targets_min": 1508 + }, + { + "epoch": 4.178743961352657, + "grad_norm": 0.525233464465349, + "learning_rate": 1.6753976212208137e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17922204732894897, + "step": 2595, + "valid_targets_mean": 3851.4, + "valid_targets_min": 667 + }, + { + "epoch": 4.186795491143317, + "grad_norm": 0.6610861468502892, + "learning_rate": 1.667476079543664e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17614664137363434, + "step": 2600, + "valid_targets_mean": 3052.1, + "valid_targets_min": 691 + }, + { + "epoch": 4.194847020933977, + "grad_norm": 0.5630100784699484, + "learning_rate": 1.659559899095667e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20966169238090515, + "step": 2605, + "valid_targets_mean": 5309.3, + "valid_targets_min": 1187 + }, + { + "epoch": 4.202898550724638, + "grad_norm": 0.6807488645292253, + "learning_rate": 1.651649207508114e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19908779859542847, + "step": 2610, + "valid_targets_mean": 4216.5, + "valid_targets_min": 1027 + }, + { + "epoch": 4.210950080515298, + "grad_norm": 0.49692416499926767, + "learning_rate": 1.643744132323801e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19740644097328186, + "step": 2615, + "valid_targets_mean": 4830.8, + "valid_targets_min": 1913 + }, + { + "epoch": 4.219001610305958, + "grad_norm": 0.6036326467813666, + "learning_rate": 1.6358448009949714e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22196456789970398, + "step": 2620, + "valid_targets_mean": 5333.3, + "valid_targets_min": 2355 + }, + { + "epoch": 4.2270531400966185, + "grad_norm": 0.4829391874239536, + "learning_rate": 1.6279513408812603e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1743869185447693, + "step": 2625, + "valid_targets_mean": 5413.8, + "valid_targets_min": 1251 + }, + { + "epoch": 4.235104669887279, + "grad_norm": 0.5048080599206834, + "learning_rate": 1.620063879247643e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16595719754695892, + "step": 2630, + "valid_targets_mean": 3924.2, + "valid_targets_min": 443 + }, + { + "epoch": 4.243156199677939, + "grad_norm": 0.46752510731782826, + "learning_rate": 1.6121825432623827e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1762651652097702, + "step": 2635, + "valid_targets_mean": 5055.4, + "valid_targets_min": 1403 + }, + { + "epoch": 4.251207729468599, + "grad_norm": 0.4797644865040805, + "learning_rate": 1.6043074599949785e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17031241953372955, + "step": 2640, + "valid_targets_mean": 4893.6, + "valid_targets_min": 966 + }, + { + "epoch": 4.2592592592592595, + "grad_norm": 1.0165849437996184, + "learning_rate": 1.5964387564141192e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19583508372306824, + "step": 2645, + "valid_targets_mean": 4988.9, + "valid_targets_min": 1611 + }, + { + "epoch": 4.26731078904992, + "grad_norm": 0.6039054133342181, + "learning_rate": 1.588576559385635e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17843933403491974, + "step": 2650, + "valid_targets_mean": 4234.7, + "valid_targets_min": 1211 + }, + { + "epoch": 4.27536231884058, + "grad_norm": 0.512856167089867, + "learning_rate": 1.5807209956704505e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1996535062789917, + "step": 2655, + "valid_targets_mean": 4500.6, + "valid_targets_min": 1506 + }, + { + "epoch": 4.28341384863124, + "grad_norm": 0.4991515466828228, + "learning_rate": 1.5728721919225428e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14569713175296783, + "step": 2660, + "valid_targets_mean": 4720.1, + "valid_targets_min": 1211 + }, + { + "epoch": 4.291465378421901, + "grad_norm": 0.9601831661650259, + "learning_rate": 1.5650302746869004e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1880815625190735, + "step": 2665, + "valid_targets_mean": 4171.6, + "valid_targets_min": 833 + }, + { + "epoch": 4.29951690821256, + "grad_norm": 0.4412736536027149, + "learning_rate": 1.5571953703974813e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16794951260089874, + "step": 2670, + "valid_targets_mean": 5724.5, + "valid_targets_min": 2565 + }, + { + "epoch": 4.30756843800322, + "grad_norm": 0.49660094639156666, + "learning_rate": 1.5493676053751747e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17535799741744995, + "step": 2675, + "valid_targets_mean": 4800.3, + "valid_targets_min": 1829 + }, + { + "epoch": 4.3156199677938805, + "grad_norm": 0.5456531223844872, + "learning_rate": 1.5415471058257638e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17335663735866547, + "step": 2680, + "valid_targets_mean": 4570.4, + "valid_targets_min": 1658 + }, + { + "epoch": 4.323671497584541, + "grad_norm": 0.45678206025420953, + "learning_rate": 1.533733997837893e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.169780433177948, + "step": 2685, + "valid_targets_mean": 4216.1, + "valid_targets_min": 1449 + }, + { + "epoch": 4.331723027375201, + "grad_norm": 0.5415606665772222, + "learning_rate": 1.5259284073810333e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16021227836608887, + "step": 2690, + "valid_targets_mean": 3275.1, + "valid_targets_min": 984 + }, + { + "epoch": 4.339774557165861, + "grad_norm": 0.5136691048212793, + "learning_rate": 1.5181304603034513e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1827298104763031, + "step": 2695, + "valid_targets_mean": 4889.6, + "valid_targets_min": 1238 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 1.4914173173538663, + "learning_rate": 1.5103402823301819e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18776141107082367, + "step": 2700, + "valid_targets_mean": 4532.6, + "valid_targets_min": 850 + }, + { + "epoch": 4.355877616747182, + "grad_norm": 0.5083803805655037, + "learning_rate": 1.5025579990609973e-05, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16755394637584686, + "step": 2705, + "valid_targets_mean": 4128.1, + "valid_targets_min": 379 + }, + { + "epoch": 4.363929146537842, + "grad_norm": 0.500974762279468, + "learning_rate": 1.4947837359683882e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17978541553020477, + "step": 2710, + "valid_targets_mean": 5342.4, + "valid_targets_min": 2152 + }, + { + "epoch": 4.371980676328502, + "grad_norm": 0.5603545589277986, + "learning_rate": 1.487017618395534e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.198044091463089, + "step": 2715, + "valid_targets_mean": 4740.4, + "valid_targets_min": 1746 + }, + { + "epoch": 4.3800322061191626, + "grad_norm": 0.4771112752411543, + "learning_rate": 1.479259771554288e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1699572205543518, + "step": 2720, + "valid_targets_mean": 4872.8, + "valid_targets_min": 448 + }, + { + "epoch": 4.388083735909823, + "grad_norm": 0.521661536652827, + "learning_rate": 1.4715103205231545e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18613924086093903, + "step": 2725, + "valid_targets_mean": 4061.7, + "valid_targets_min": 995 + }, + { + "epoch": 4.396135265700483, + "grad_norm": 0.537250383283014, + "learning_rate": 1.463769390245273e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.178946852684021, + "step": 2730, + "valid_targets_mean": 4061.8, + "valid_targets_min": 900 + }, + { + "epoch": 4.404186795491143, + "grad_norm": 0.5244437127559632, + "learning_rate": 1.4560371055264052e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18043842911720276, + "step": 2735, + "valid_targets_mean": 4802.7, + "valid_targets_min": 1843 + }, + { + "epoch": 4.412238325281804, + "grad_norm": 0.5146870682156327, + "learning_rate": 1.448313591032922e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17479149997234344, + "step": 2740, + "valid_targets_mean": 5214.2, + "valid_targets_min": 1866 + }, + { + "epoch": 4.420289855072464, + "grad_norm": 0.4691547439815975, + "learning_rate": 1.4405989712897923e-05, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18224862217903137, + "step": 2745, + "valid_targets_mean": 5050.6, + "valid_targets_min": 1427 + }, + { + "epoch": 4.428341384863124, + "grad_norm": 0.5424155500541844, + "learning_rate": 1.4328933706785782e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1883089393377304, + "step": 2750, + "valid_targets_mean": 4057.4, + "valid_targets_min": 854 + }, + { + "epoch": 4.436392914653784, + "grad_norm": 0.5100283152717094, + "learning_rate": 1.4251969134354247e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16344183683395386, + "step": 2755, + "valid_targets_mean": 4199.4, + "valid_targets_min": 1557 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.5071596893081406, + "learning_rate": 1.4175097236490627e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1783141791820526, + "step": 2760, + "valid_targets_mean": 4905.3, + "valid_targets_min": 383 + }, + { + "epoch": 4.452495974235105, + "grad_norm": 0.4808405938396642, + "learning_rate": 1.409831925258805e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19857333600521088, + "step": 2765, + "valid_targets_mean": 4961.1, + "valid_targets_min": 847 + }, + { + "epoch": 4.460547504025765, + "grad_norm": 0.48057741594526054, + "learning_rate": 1.4021636420525466e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1644965559244156, + "step": 2770, + "valid_targets_mean": 4509.1, + "valid_targets_min": 1227 + }, + { + "epoch": 4.468599033816425, + "grad_norm": 0.5128321881887902, + "learning_rate": 1.3945049976647726e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1749866008758545, + "step": 2775, + "valid_targets_mean": 4825.0, + "valid_targets_min": 1201 + }, + { + "epoch": 4.476650563607086, + "grad_norm": 0.6327024505387485, + "learning_rate": 1.3868561155745628e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19712504744529724, + "step": 2780, + "valid_targets_mean": 4430.3, + "valid_targets_min": 703 + }, + { + "epoch": 4.484702093397746, + "grad_norm": 0.4913167577071679, + "learning_rate": 1.3792171191036001e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15714874863624573, + "step": 2785, + "valid_targets_mean": 4104.7, + "valid_targets_min": 1663 + }, + { + "epoch": 4.492753623188406, + "grad_norm": 0.5394186960786538, + "learning_rate": 1.3715881314141835e-05, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19951429963111877, + "step": 2790, + "valid_targets_mean": 4620.1, + "valid_targets_min": 1843 + }, + { + "epoch": 4.500805152979066, + "grad_norm": 0.5953737763754138, + "learning_rate": 1.3639692755072429e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2141798585653305, + "step": 2795, + "valid_targets_mean": 4660.8, + "valid_targets_min": 1401 + }, + { + "epoch": 4.508856682769727, + "grad_norm": 0.5453366269316137, + "learning_rate": 1.3563606742203548e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20897048711776733, + "step": 2800, + "valid_targets_mean": 4447.4, + "valid_targets_min": 1113 + }, + { + "epoch": 4.516908212560386, + "grad_norm": 0.531617180983723, + "learning_rate": 1.3487624502257598e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20468959212303162, + "step": 2805, + "valid_targets_mean": 4891.9, + "valid_targets_min": 2624 + }, + { + "epoch": 4.524959742351046, + "grad_norm": 1.0175978905323302, + "learning_rate": 1.3411747260283905e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1867702454328537, + "step": 2810, + "valid_targets_mean": 4380.1, + "valid_targets_min": 800 + }, + { + "epoch": 4.533011272141707, + "grad_norm": 0.5172120222306495, + "learning_rate": 1.333597623963892e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17019790410995483, + "step": 2815, + "valid_targets_mean": 4795.6, + "valid_targets_min": 626 + }, + { + "epoch": 4.541062801932367, + "grad_norm": 0.6059009807629652, + "learning_rate": 1.3260312661966487e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17550814151763916, + "step": 2820, + "valid_targets_mean": 5061.1, + "valid_targets_min": 1263 + }, + { + "epoch": 4.549114331723027, + "grad_norm": 0.613180864431363, + "learning_rate": 1.3184757747178187e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19658298790454865, + "step": 2825, + "valid_targets_mean": 5041.3, + "valid_targets_min": 1795 + }, + { + "epoch": 4.557165861513687, + "grad_norm": 0.5633900947396653, + "learning_rate": 1.3109312713433642e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18244323134422302, + "step": 2830, + "valid_targets_mean": 4282.2, + "valid_targets_min": 543 + }, + { + "epoch": 4.565217391304348, + "grad_norm": 0.9588780998775635, + "learning_rate": 1.3033978777120861e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690833568572998, + "step": 2835, + "valid_targets_mean": 3991.0, + "valid_targets_min": 1088 + }, + { + "epoch": 4.573268921095008, + "grad_norm": 0.5205077973662069, + "learning_rate": 1.2958757152836671e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22376713156700134, + "step": 2840, + "valid_targets_mean": 4458.4, + "valid_targets_min": 2143 + }, + { + "epoch": 4.581320450885668, + "grad_norm": 0.5253874104264332, + "learning_rate": 1.2883649053367106e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1675848662853241, + "step": 2845, + "valid_targets_mean": 4003.0, + "valid_targets_min": 845 + }, + { + "epoch": 4.5893719806763285, + "grad_norm": 0.5452379881212879, + "learning_rate": 1.2808655689667846e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1796257197856903, + "step": 2850, + "valid_targets_mean": 4152.3, + "valid_targets_min": 455 + }, + { + "epoch": 4.597423510466989, + "grad_norm": 0.5239905621350787, + "learning_rate": 1.2733778270844712e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18232783675193787, + "step": 2855, + "valid_targets_mean": 4419.6, + "valid_targets_min": 529 + }, + { + "epoch": 4.605475040257649, + "grad_norm": 0.5607032815508621, + "learning_rate": 1.265901800413416e-05, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17835384607315063, + "step": 2860, + "valid_targets_mean": 3395.8, + "valid_targets_min": 627 + }, + { + "epoch": 4.613526570048309, + "grad_norm": 0.9496967272226038, + "learning_rate": 1.2584376094883832e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17716357111930847, + "step": 2865, + "valid_targets_mean": 4483.6, + "valid_targets_min": 710 + }, + { + "epoch": 4.6215780998389695, + "grad_norm": 0.7842117859067653, + "learning_rate": 1.250985374653311e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17786236107349396, + "step": 2870, + "valid_targets_mean": 4693.0, + "valid_targets_min": 1134 + }, + { + "epoch": 4.62962962962963, + "grad_norm": 0.5033684151280889, + "learning_rate": 1.2435452160593698e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17148058116436005, + "step": 2875, + "valid_targets_mean": 4303.3, + "valid_targets_min": 1429 + }, + { + "epoch": 4.63768115942029, + "grad_norm": 0.5032006756891246, + "learning_rate": 1.2361172536630288e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16900071501731873, + "step": 2880, + "valid_targets_mean": 4471.4, + "valid_targets_min": 1507 + }, + { + "epoch": 4.64573268921095, + "grad_norm": 0.5449401674057853, + "learning_rate": 1.2287016072241195e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19873306155204773, + "step": 2885, + "valid_targets_mean": 4050.9, + "valid_targets_min": 321 + }, + { + "epoch": 4.6537842190016105, + "grad_norm": 0.5002283792241559, + "learning_rate": 1.221298396303904e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17124618589878082, + "step": 2890, + "valid_targets_mean": 5187.2, + "valid_targets_min": 1399 + }, + { + "epoch": 4.661835748792271, + "grad_norm": 0.4706451280493202, + "learning_rate": 1.2139077402631495e-05, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14620235562324524, + "step": 2895, + "valid_targets_mean": 4477.4, + "valid_targets_min": 1454 + }, + { + "epoch": 4.669887278582931, + "grad_norm": 0.5330785987823722, + "learning_rate": 1.2065297582602037e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17816966772079468, + "step": 2900, + "valid_targets_mean": 4355.4, + "valid_targets_min": 2065 + }, + { + "epoch": 4.677938808373591, + "grad_norm": 0.4942305241126504, + "learning_rate": 1.199164569249071e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18558406829833984, + "step": 2905, + "valid_targets_mean": 4951.6, + "valid_targets_min": 1299 + }, + { + "epoch": 4.685990338164252, + "grad_norm": 0.537628904978011, + "learning_rate": 1.191812291977497e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19403138756752014, + "step": 2910, + "valid_targets_mean": 4024.0, + "valid_targets_min": 1450 + }, + { + "epoch": 4.694041867954912, + "grad_norm": 0.5439040484202802, + "learning_rate": 1.1844730449850546e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19234639406204224, + "step": 2915, + "valid_targets_mean": 5036.8, + "valid_targets_min": 1058 + }, + { + "epoch": 4.702093397745571, + "grad_norm": 0.583242909808935, + "learning_rate": 1.1771469466012309e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20131342113018036, + "step": 2920, + "valid_targets_mean": 4106.1, + "valid_targets_min": 983 + }, + { + "epoch": 4.710144927536232, + "grad_norm": 0.5038233636197879, + "learning_rate": 1.1698341149435196e-05, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1684148907661438, + "step": 2925, + "valid_targets_mean": 4802.6, + "valid_targets_min": 1952 + }, + { + "epoch": 4.718196457326892, + "grad_norm": 0.4600414999991662, + "learning_rate": 1.1625346679155179e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15985998511314392, + "step": 2930, + "valid_targets_mean": 4799.9, + "valid_targets_min": 1859 + }, + { + "epoch": 4.726247987117552, + "grad_norm": 0.4936755991303249, + "learning_rate": 1.1552487232050242e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17861491441726685, + "step": 2935, + "valid_targets_mean": 4306.8, + "valid_targets_min": 1605 + }, + { + "epoch": 4.734299516908212, + "grad_norm": 0.44442070244437953, + "learning_rate": 1.1479763982821414e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17784011363983154, + "step": 2940, + "valid_targets_mean": 5608.9, + "valid_targets_min": 1956 + }, + { + "epoch": 4.7423510466988725, + "grad_norm": 0.5637401886713175, + "learning_rate": 1.1407178103973834e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18229396641254425, + "step": 2945, + "valid_targets_mean": 3870.8, + "valid_targets_min": 864 + }, + { + "epoch": 4.750402576489533, + "grad_norm": 0.45308334583470944, + "learning_rate": 1.1334730765797843e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19959984719753265, + "step": 2950, + "valid_targets_mean": 5085.4, + "valid_targets_min": 1791 + }, + { + "epoch": 4.758454106280193, + "grad_norm": 0.5571182515606663, + "learning_rate": 1.1262423136350087e-05, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22160165011882782, + "step": 2955, + "valid_targets_mean": 3792.0, + "valid_targets_min": 1099 + }, + { + "epoch": 4.766505636070853, + "grad_norm": 0.5901554994508056, + "learning_rate": 1.1190256381434738e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17873704433441162, + "step": 2960, + "valid_targets_mean": 3899.3, + "valid_targets_min": 778 + }, + { + "epoch": 4.774557165861514, + "grad_norm": 0.5158336477050018, + "learning_rate": 1.1118231664584674e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1886855959892273, + "step": 2965, + "valid_targets_mean": 4861.4, + "valid_targets_min": 1898 + }, + { + "epoch": 4.782608695652174, + "grad_norm": 0.4887213717876692, + "learning_rate": 1.1046350147042681e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20001167058944702, + "step": 2970, + "valid_targets_mean": 4675.2, + "valid_targets_min": 952 + }, + { + "epoch": 4.790660225442834, + "grad_norm": 0.48768071610571156, + "learning_rate": 1.0974612987742807e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18149898946285248, + "step": 2975, + "valid_targets_mean": 4411.8, + "valid_targets_min": 1129 + }, + { + "epoch": 4.798711755233494, + "grad_norm": 0.4873268036344298, + "learning_rate": 1.0903021343291613e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20546609163284302, + "step": 2980, + "valid_targets_mean": 4418.5, + "valid_targets_min": 608 + }, + { + "epoch": 4.806763285024155, + "grad_norm": 0.5054671612514106, + "learning_rate": 1.0831576367949555e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18941310048103333, + "step": 2985, + "valid_targets_mean": 4868.4, + "valid_targets_min": 2311 + }, + { + "epoch": 4.814814814814815, + "grad_norm": 0.5242076423511247, + "learning_rate": 1.0760279213612362e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16793029010295868, + "step": 2990, + "valid_targets_mean": 5404.2, + "valid_targets_min": 2117 + }, + { + "epoch": 4.822866344605475, + "grad_norm": 0.6828436818331252, + "learning_rate": 1.068913102979248e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19285649061203003, + "step": 2995, + "valid_targets_mean": 3826.8, + "valid_targets_min": 1070 + }, + { + "epoch": 4.830917874396135, + "grad_norm": 0.47219942742537285, + "learning_rate": 1.0618132963600507e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18546539545059204, + "step": 3000, + "valid_targets_mean": 5087.4, + "valid_targets_min": 1959 + }, + { + "epoch": 4.838969404186796, + "grad_norm": 0.4457820811603146, + "learning_rate": 1.0547286159726743e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19360345602035522, + "step": 3005, + "valid_targets_mean": 5867.6, + "valid_targets_min": 1035 + }, + { + "epoch": 4.847020933977456, + "grad_norm": 0.5847539672595908, + "learning_rate": 1.047659176042268e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16391941905021667, + "step": 3010, + "valid_targets_mean": 3891.6, + "valid_targets_min": 578 + }, + { + "epoch": 4.855072463768116, + "grad_norm": 0.5080943188478495, + "learning_rate": 1.0406050905482647e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1736719310283661, + "step": 3015, + "valid_targets_mean": 4204.6, + "valid_targets_min": 631 + }, + { + "epoch": 4.8631239935587764, + "grad_norm": 0.4950021070139557, + "learning_rate": 1.033566473222539e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16896328330039978, + "step": 3020, + "valid_targets_mean": 5143.3, + "valid_targets_min": 2767 + }, + { + "epoch": 4.871175523349437, + "grad_norm": 0.5149173904980839, + "learning_rate": 1.0265434375475744e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1894487589597702, + "step": 3025, + "valid_targets_mean": 4708.5, + "valid_targets_min": 656 + }, + { + "epoch": 4.879227053140097, + "grad_norm": 0.5128722203180325, + "learning_rate": 1.0195360967546342e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18005244433879852, + "step": 3030, + "valid_targets_mean": 5025.9, + "valid_targets_min": 1716 + }, + { + "epoch": 4.887278582930757, + "grad_norm": 0.5010958581493115, + "learning_rate": 1.0125445638219369e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19406850636005402, + "step": 3035, + "valid_targets_mean": 4813.1, + "valid_targets_min": 908 + }, + { + "epoch": 4.8953301127214175, + "grad_norm": 0.5210294803713594, + "learning_rate": 1.00556895147283e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16892248392105103, + "step": 3040, + "valid_targets_mean": 3893.8, + "valid_targets_min": 1273 + }, + { + "epoch": 4.903381642512077, + "grad_norm": 0.6293372498483756, + "learning_rate": 9.986093721739793e-06, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1870511919260025, + "step": 3045, + "valid_targets_mean": 3776.4, + "valid_targets_min": 296 + }, + { + "epoch": 4.911433172302738, + "grad_norm": 0.5480638562689124, + "learning_rate": 9.916659381335524e-06, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15634512901306152, + "step": 3050, + "valid_targets_mean": 4189.9, + "valid_targets_min": 1195 + }, + { + "epoch": 4.919484702093397, + "grad_norm": 0.5734404649785564, + "learning_rate": 9.847387612994065e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1715972125530243, + "step": 3055, + "valid_targets_mean": 4624.1, + "valid_targets_min": 999 + }, + { + "epoch": 4.927536231884058, + "grad_norm": 0.4755904568874477, + "learning_rate": 9.778279533572894e-06, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1521787941455841, + "step": 3060, + "valid_targets_mean": 4754.1, + "valid_targets_min": 1047 + }, + { + "epoch": 4.935587761674718, + "grad_norm": 0.7122666442809198, + "learning_rate": 9.70933625729035e-06, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17114746570587158, + "step": 3065, + "valid_targets_mean": 5142.9, + "valid_targets_min": 1175 + }, + { + "epoch": 4.943639291465378, + "grad_norm": 0.7359607180808936, + "learning_rate": 9.640558895707681e-06, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18806086480617523, + "step": 3070, + "valid_targets_mean": 4064.7, + "valid_targets_min": 1615 + }, + { + "epoch": 4.951690821256038, + "grad_norm": 0.5379368590841539, + "learning_rate": 9.571948557711104e-06, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17751240730285645, + "step": 3075, + "valid_targets_mean": 3381.2, + "valid_targets_min": 249 + }, + { + "epoch": 4.959742351046699, + "grad_norm": 0.4777499224064591, + "learning_rate": 9.503506349493959e-06, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1592390239238739, + "step": 3080, + "valid_targets_mean": 4629.0, + "valid_targets_min": 1254 + }, + { + "epoch": 4.967793880837359, + "grad_norm": 0.5139616446857665, + "learning_rate": 9.435233374538848e-06, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18165704607963562, + "step": 3085, + "valid_targets_mean": 4665.1, + "valid_targets_min": 1736 + }, + { + "epoch": 4.975845410628019, + "grad_norm": 0.5162555365137094, + "learning_rate": 9.367130733599863e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19067494571208954, + "step": 3090, + "valid_targets_mean": 4688.7, + "valid_targets_min": 1872 + }, + { + "epoch": 4.9838969404186795, + "grad_norm": 0.569575182274062, + "learning_rate": 9.299199524684815e-06, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16250163316726685, + "step": 3095, + "valid_targets_mean": 4311.8, + "valid_targets_min": 1994 + }, + { + "epoch": 4.99194847020934, + "grad_norm": 0.5520222968798817, + "learning_rate": 9.23144084303756e-06, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1921304166316986, + "step": 3100, + "valid_targets_mean": 4493.3, + "valid_targets_min": 2050 + }, + { + "epoch": 5.0, + "grad_norm": 0.7884221741023202, + "learning_rate": 9.163855781120302e-06, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20062533020973206, + "step": 3105, + "valid_targets_mean": 3796.8, + "valid_targets_min": 374 + }, + { + "epoch": 5.00805152979066, + "grad_norm": 0.5195634108838502, + "learning_rate": 9.096445428596026e-06, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1869000345468521, + "step": 3110, + "valid_targets_mean": 4366.8, + "valid_targets_min": 1253 + }, + { + "epoch": 5.0161030595813205, + "grad_norm": 0.5278497870764118, + "learning_rate": 9.029210872310884e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1439373940229416, + "step": 3115, + "valid_targets_mean": 4365.9, + "valid_targets_min": 554 + }, + { + "epoch": 5.024154589371981, + "grad_norm": 0.5284272892101102, + "learning_rate": 8.962153196276713e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14253228902816772, + "step": 3120, + "valid_targets_mean": 4662.7, + "valid_targets_min": 312 + }, + { + "epoch": 5.032206119162641, + "grad_norm": 0.5474998021835664, + "learning_rate": 8.895273481653527e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17854666709899902, + "step": 3125, + "valid_targets_mean": 4428.5, + "valid_targets_min": 627 + }, + { + "epoch": 5.040257648953301, + "grad_norm": 1.2329548853780503, + "learning_rate": 8.828572806732103e-06, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1708088219165802, + "step": 3130, + "valid_targets_mean": 4686.6, + "valid_targets_min": 1916 + }, + { + "epoch": 5.048309178743962, + "grad_norm": 0.5097310950347612, + "learning_rate": 8.76205224691659e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17174428701400757, + "step": 3135, + "valid_targets_mean": 4785.3, + "valid_targets_min": 1860 + }, + { + "epoch": 5.056360708534622, + "grad_norm": 0.46008600902359126, + "learning_rate": 8.695712874707169e-06, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16250503063201904, + "step": 3140, + "valid_targets_mean": 4901.8, + "valid_targets_min": 402 + }, + { + "epoch": 5.064412238325282, + "grad_norm": 0.47656692190311706, + "learning_rate": 8.629555759682756e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15433499217033386, + "step": 3145, + "valid_targets_mean": 5206.9, + "valid_targets_min": 892 + }, + { + "epoch": 5.072463768115942, + "grad_norm": 0.46684035634871635, + "learning_rate": 8.563581968483774e-06, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15580695867538452, + "step": 3150, + "valid_targets_mean": 4968.5, + "valid_targets_min": 1514 + }, + { + "epoch": 5.080515297906603, + "grad_norm": 0.4817709325187039, + "learning_rate": 8.497792564794935e-06, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1596037745475769, + "step": 3155, + "valid_targets_mean": 4873.9, + "valid_targets_min": 656 + }, + { + "epoch": 5.088566827697263, + "grad_norm": 0.49561234768208556, + "learning_rate": 8.432188609328112e-06, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18620729446411133, + "step": 3160, + "valid_targets_mean": 4277.6, + "valid_targets_min": 429 + }, + { + "epoch": 5.096618357487923, + "grad_norm": 0.5342689640020111, + "learning_rate": 8.366771159805222e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1623041033744812, + "step": 3165, + "valid_targets_mean": 3849.9, + "valid_targets_min": 1692 + }, + { + "epoch": 5.1046698872785825, + "grad_norm": 0.5558864483320473, + "learning_rate": 8.301541270941178e-06, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19302302598953247, + "step": 3170, + "valid_targets_mean": 3954.7, + "valid_targets_min": 894 + }, + { + "epoch": 5.112721417069243, + "grad_norm": 0.5483085784932584, + "learning_rate": 8.236499994426886e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17752034962177277, + "step": 3175, + "valid_targets_mean": 4114.2, + "valid_targets_min": 321 + }, + { + "epoch": 5.120772946859903, + "grad_norm": 0.5129349338303597, + "learning_rate": 8.171648378912272e-06, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1786811649799347, + "step": 3180, + "valid_targets_mean": 5010.1, + "valid_targets_min": 1693 + }, + { + "epoch": 5.128824476650563, + "grad_norm": 0.47618052790196236, + "learning_rate": 8.1069874699894e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14539986848831177, + "step": 3185, + "valid_targets_mean": 4915.1, + "valid_targets_min": 1295 + }, + { + "epoch": 5.1368760064412236, + "grad_norm": 0.5408793562526828, + "learning_rate": 8.042518310175607e-06, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1692083477973938, + "step": 3190, + "valid_targets_mean": 4689.8, + "valid_targets_min": 693 + }, + { + "epoch": 5.144927536231884, + "grad_norm": 0.427889087391779, + "learning_rate": 7.978241938896679e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13784319162368774, + "step": 3195, + "valid_targets_mean": 4693.8, + "valid_targets_min": 2255 + }, + { + "epoch": 5.152979066022544, + "grad_norm": 0.5199711838065452, + "learning_rate": 7.914159392470118e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16833321750164032, + "step": 3200, + "valid_targets_mean": 4362.7, + "valid_targets_min": 848 + }, + { + "epoch": 5.161030595813204, + "grad_norm": 0.47753654786071936, + "learning_rate": 7.850271704088396e-06, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14919407665729523, + "step": 3205, + "valid_targets_mean": 4483.1, + "valid_targets_min": 2048 + }, + { + "epoch": 5.169082125603865, + "grad_norm": 0.47729884629127567, + "learning_rate": 7.786579903802342e-06, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15295487642288208, + "step": 3210, + "valid_targets_mean": 4766.6, + "valid_targets_min": 1817 + }, + { + "epoch": 5.177133655394525, + "grad_norm": 0.5186129305793417, + "learning_rate": 7.723085018504512e-06, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1621103286743164, + "step": 3215, + "valid_targets_mean": 4525.6, + "valid_targets_min": 529 + }, + { + "epoch": 5.185185185185185, + "grad_norm": 0.562831677149766, + "learning_rate": 7.659788071912612e-06, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16404971480369568, + "step": 3220, + "valid_targets_mean": 3796.4, + "valid_targets_min": 1477 + }, + { + "epoch": 5.193236714975845, + "grad_norm": 0.48044538637214657, + "learning_rate": 7.59669008455304e-06, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16649338603019714, + "step": 3225, + "valid_targets_mean": 5432.8, + "valid_targets_min": 1858 + }, + { + "epoch": 5.201288244766506, + "grad_norm": 0.5790721887585502, + "learning_rate": 7.533792073744395e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17744669318199158, + "step": 3230, + "valid_targets_mean": 4592.4, + "valid_targets_min": 935 + }, + { + "epoch": 5.209339774557166, + "grad_norm": 0.5717495383522813, + "learning_rate": 7.471095053581086e-06, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1721886694431305, + "step": 3235, + "valid_targets_mean": 4073.8, + "valid_targets_min": 823 + }, + { + "epoch": 5.217391304347826, + "grad_norm": 0.5319904040215578, + "learning_rate": 7.4086000349169864e-06, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15195900201797485, + "step": 3240, + "valid_targets_mean": 4558.5, + "valid_targets_min": 296 + }, + { + "epoch": 5.225442834138486, + "grad_norm": 0.6199276315504786, + "learning_rate": 7.346308025349138e-06, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16166193783283234, + "step": 3245, + "valid_targets_mean": 3986.2, + "valid_targets_min": 848 + }, + { + "epoch": 5.233494363929147, + "grad_norm": 0.6244366963899998, + "learning_rate": 7.2842200292014805e-06, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17683975398540497, + "step": 3250, + "valid_targets_mean": 3601.2, + "valid_targets_min": 760 + }, + { + "epoch": 5.241545893719807, + "grad_norm": 0.5241028588187276, + "learning_rate": 7.2223370475086896e-06, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1595272421836853, + "step": 3255, + "valid_targets_mean": 4269.6, + "valid_targets_min": 1279 + }, + { + "epoch": 5.249597423510467, + "grad_norm": 0.4473386775088612, + "learning_rate": 7.160660078000028e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16722847521305084, + "step": 3260, + "valid_targets_mean": 5286.9, + "valid_targets_min": 2305 + }, + { + "epoch": 5.2576489533011275, + "grad_norm": 0.5009816722230198, + "learning_rate": 7.099190115083259e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17297816276550293, + "step": 3265, + "valid_targets_mean": 4930.7, + "valid_targets_min": 2434 + }, + { + "epoch": 5.265700483091788, + "grad_norm": 0.5600085277272723, + "learning_rate": 7.037928149828608e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18866902589797974, + "step": 3270, + "valid_targets_mean": 4635.0, + "valid_targets_min": 1444 + }, + { + "epoch": 5.273752012882448, + "grad_norm": 0.5153595033812705, + "learning_rate": 6.97687516995279e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15133386850357056, + "step": 3275, + "valid_targets_mean": 4087.8, + "valid_targets_min": 1111 + }, + { + "epoch": 5.281803542673108, + "grad_norm": 0.5588332326443939, + "learning_rate": 6.916032159803088e-06, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20162728428840637, + "step": 3280, + "valid_targets_mean": 4709.9, + "valid_targets_min": 2151 + }, + { + "epoch": 5.2898550724637685, + "grad_norm": 0.5640390114972573, + "learning_rate": 6.855400100341458e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17432144284248352, + "step": 3285, + "valid_targets_mean": 3788.3, + "valid_targets_min": 543 + }, + { + "epoch": 5.297906602254429, + "grad_norm": 0.5376565233222369, + "learning_rate": 6.794979969128755e-06, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16141164302825928, + "step": 3290, + "valid_targets_mean": 4118.3, + "valid_targets_min": 1706 + }, + { + "epoch": 5.305958132045088, + "grad_norm": 0.5378942356008535, + "learning_rate": 6.7347727403089325e-06, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17127177119255066, + "step": 3295, + "valid_targets_mean": 4600.5, + "valid_targets_min": 1136 + }, + { + "epoch": 5.314009661835748, + "grad_norm": 0.48776680886655227, + "learning_rate": 6.674779384593373e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13992200791835785, + "step": 3300, + "valid_targets_mean": 4386.2, + "valid_targets_min": 1725 + }, + { + "epoch": 5.322061191626409, + "grad_norm": 0.5329626208836868, + "learning_rate": 6.61500086924519e-06, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17515888810157776, + "step": 3305, + "valid_targets_mean": 4181.7, + "valid_targets_min": 290 + }, + { + "epoch": 5.330112721417069, + "grad_norm": 0.46016033498034964, + "learning_rate": 6.555438158063683e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17031973600387573, + "step": 3310, + "valid_targets_mean": 5609.8, + "valid_targets_min": 1475 + }, + { + "epoch": 5.338164251207729, + "grad_norm": 0.7890593958793812, + "learning_rate": 6.4960922113687695e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17659761011600494, + "step": 3315, + "valid_targets_mean": 3941.4, + "valid_targets_min": 1450 + }, + { + "epoch": 5.3462157809983895, + "grad_norm": 0.5703421349872555, + "learning_rate": 6.4369639859855115e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17057812213897705, + "step": 3320, + "valid_targets_mean": 4658.4, + "valid_targets_min": 1863 + }, + { + "epoch": 5.35426731078905, + "grad_norm": 0.564126182226765, + "learning_rate": 6.378054435228671e-06, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18041761219501495, + "step": 3325, + "valid_targets_mean": 4159.0, + "valid_targets_min": 759 + }, + { + "epoch": 5.36231884057971, + "grad_norm": 0.4973048643598519, + "learning_rate": 6.319364508887371e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1592758297920227, + "step": 3330, + "valid_targets_mean": 4736.2, + "valid_targets_min": 1812 + }, + { + "epoch": 5.37037037037037, + "grad_norm": 0.569769342446689, + "learning_rate": 6.260895153209763e-06, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18152937293052673, + "step": 3335, + "valid_targets_mean": 4268.9, + "valid_targets_min": 1431 + }, + { + "epoch": 5.3784219001610305, + "grad_norm": 0.5016879405573162, + "learning_rate": 6.202647310887764e-06, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17833814024925232, + "step": 3340, + "valid_targets_mean": 4741.9, + "valid_targets_min": 1679 + }, + { + "epoch": 5.386473429951691, + "grad_norm": 0.5247407647892519, + "learning_rate": 6.14462192104188e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20044955611228943, + "step": 3345, + "valid_targets_mean": 4656.0, + "valid_targets_min": 2099 + }, + { + "epoch": 5.394524959742351, + "grad_norm": 0.47014870620352595, + "learning_rate": 6.086819919206051e-06, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17010408639907837, + "step": 3350, + "valid_targets_mean": 5653.4, + "valid_targets_min": 2097 + }, + { + "epoch": 5.402576489533011, + "grad_norm": 0.6060057637724512, + "learning_rate": 6.029242237312554e-06, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18121179938316345, + "step": 3355, + "valid_targets_mean": 5222.9, + "valid_targets_min": 532 + }, + { + "epoch": 5.4106280193236715, + "grad_norm": 0.5270241265183322, + "learning_rate": 5.971889803676996e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16754843294620514, + "step": 3360, + "valid_targets_mean": 4372.1, + "valid_targets_min": 1892 + }, + { + "epoch": 5.418679549114332, + "grad_norm": 0.5131324243265083, + "learning_rate": 5.914763542983355e-06, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18260961771011353, + "step": 3365, + "valid_targets_mean": 5181.3, + "valid_targets_min": 1175 + }, + { + "epoch": 5.426731078904992, + "grad_norm": 0.5192149826300284, + "learning_rate": 5.857864376269051e-06, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18823757767677307, + "step": 3370, + "valid_targets_mean": 4809.2, + "valid_targets_min": 1171 + }, + { + "epoch": 5.434782608695652, + "grad_norm": 0.5980250860155677, + "learning_rate": 5.801193220910108e-06, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16703063249588013, + "step": 3375, + "valid_targets_mean": 4509.7, + "valid_targets_min": 1367 + }, + { + "epoch": 5.442834138486313, + "grad_norm": 0.551837937511895, + "learning_rate": 5.744750990606356e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15874727070331573, + "step": 3380, + "valid_targets_mean": 3712.6, + "valid_targets_min": 311 + }, + { + "epoch": 5.450885668276973, + "grad_norm": 0.49161398228706626, + "learning_rate": 5.688538595366706e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.155176043510437, + "step": 3385, + "valid_targets_mean": 4476.1, + "valid_targets_min": 1936 + }, + { + "epoch": 5.458937198067633, + "grad_norm": 0.5355381631745951, + "learning_rate": 5.632556941494482e-06, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.153697669506073, + "step": 3390, + "valid_targets_mean": 3974.2, + "valid_targets_min": 1569 + }, + { + "epoch": 5.466988727858293, + "grad_norm": 0.5115948764108772, + "learning_rate": 5.5768069315727895e-06, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1722053587436676, + "step": 3395, + "valid_targets_mean": 4293.4, + "valid_targets_min": 2260 + }, + { + "epoch": 5.475040257648954, + "grad_norm": 0.4799188086454644, + "learning_rate": 5.521289464449975e-06, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17728543281555176, + "step": 3400, + "valid_targets_mean": 5211.7, + "valid_targets_min": 363 + }, + { + "epoch": 5.483091787439614, + "grad_norm": 0.5570316610757228, + "learning_rate": 5.46600543522515e-06, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16196802258491516, + "step": 3405, + "valid_targets_mean": 4058.4, + "valid_targets_min": 653 + }, + { + "epoch": 5.491143317230274, + "grad_norm": 0.6736055907952796, + "learning_rate": 5.410955735233736e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171076238155365, + "step": 3410, + "valid_targets_mean": 5974.8, + "valid_targets_min": 2681 + }, + { + "epoch": 5.499194847020934, + "grad_norm": 0.5508461582818147, + "learning_rate": 5.3561412520331025e-06, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14637523889541626, + "step": 3415, + "valid_targets_mean": 4094.3, + "valid_targets_min": 904 + }, + { + "epoch": 5.507246376811594, + "grad_norm": 0.5142188355081284, + "learning_rate": 5.30156286938826e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1733294427394867, + "step": 3420, + "valid_targets_mean": 4461.4, + "valid_targets_min": 1105 + }, + { + "epoch": 5.515297906602254, + "grad_norm": 0.5905481814384393, + "learning_rate": 5.24722146725761e-06, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17892040312290192, + "step": 3425, + "valid_targets_mean": 4002.8, + "valid_targets_min": 1672 + }, + { + "epoch": 5.523349436392914, + "grad_norm": 0.45334104473741593, + "learning_rate": 5.193117921778743e-06, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13965705037117004, + "step": 3430, + "valid_targets_mean": 4802.2, + "valid_targets_min": 1812 + }, + { + "epoch": 5.531400966183575, + "grad_norm": 0.742354374585886, + "learning_rate": 5.139253105254336e-06, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18780140578746796, + "step": 3435, + "valid_targets_mean": 4082.1, + "valid_targets_min": 1116 + }, + { + "epoch": 5.539452495974235, + "grad_norm": 0.44385771676975855, + "learning_rate": 5.085627886138078e-06, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14621910452842712, + "step": 3440, + "valid_targets_mean": 5131.4, + "valid_targets_min": 1610 + }, + { + "epoch": 5.547504025764895, + "grad_norm": 0.47820099076305106, + "learning_rate": 5.032243129020671e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16836556792259216, + "step": 3445, + "valid_targets_mean": 5182.5, + "valid_targets_min": 1770 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.5335135512220336, + "learning_rate": 4.9790996946158695e-06, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18833035230636597, + "step": 3450, + "valid_targets_mean": 4468.0, + "valid_targets_min": 1464 + }, + { + "epoch": 5.563607085346216, + "grad_norm": 0.5452849565299809, + "learning_rate": 4.926198439746641e-06, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16549217700958252, + "step": 3455, + "valid_targets_mean": 3943.1, + "valid_targets_min": 1663 + }, + { + "epoch": 5.571658615136876, + "grad_norm": 0.47569229716225025, + "learning_rate": 4.873540217331325e-06, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14578670263290405, + "step": 3460, + "valid_targets_mean": 4942.0, + "valid_targets_min": 1587 + }, + { + "epoch": 5.579710144927536, + "grad_norm": 0.6257220241925262, + "learning_rate": 4.82112587636989e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14457204937934875, + "step": 3465, + "valid_targets_mean": 4796.1, + "valid_targets_min": 1025 + }, + { + "epoch": 5.587761674718196, + "grad_norm": 0.5126249751180715, + "learning_rate": 4.768956261930233e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21914073824882507, + "step": 3470, + "valid_targets_mean": 5159.1, + "valid_targets_min": 1672 + }, + { + "epoch": 5.595813204508857, + "grad_norm": 0.5030438743653675, + "learning_rate": 4.717032215134576e-06, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816645860671997, + "step": 3475, + "valid_targets_mean": 5501.6, + "valid_targets_min": 1066 + }, + { + "epoch": 5.603864734299517, + "grad_norm": 0.7789904581155889, + "learning_rate": 4.66535457314589e-06, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1629607379436493, + "step": 3480, + "valid_targets_mean": 5301.9, + "valid_targets_min": 1650 + }, + { + "epoch": 5.611916264090177, + "grad_norm": 0.5094527470833449, + "learning_rate": 4.613924169154406e-06, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16425767540931702, + "step": 3485, + "valid_targets_mean": 4554.6, + "valid_targets_min": 1654 + }, + { + "epoch": 5.6199677938808374, + "grad_norm": 0.48660247544704416, + "learning_rate": 4.5627418323641705e-06, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17449656128883362, + "step": 3490, + "valid_targets_mean": 5440.4, + "valid_targets_min": 3305 + }, + { + "epoch": 5.628019323671498, + "grad_norm": 0.6588365172160204, + "learning_rate": 4.51180838797969e-06, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16700059175491333, + "step": 3495, + "valid_targets_mean": 3524.9, + "valid_targets_min": 431 + }, + { + "epoch": 5.636070853462158, + "grad_norm": 0.5649424861690088, + "learning_rate": 4.461124657192612e-06, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16424767673015594, + "step": 3500, + "valid_targets_mean": 3976.2, + "valid_targets_min": 327 + }, + { + "epoch": 5.644122383252818, + "grad_norm": 0.5402856059826509, + "learning_rate": 4.410691457168488e-06, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17043063044548035, + "step": 3505, + "valid_targets_mean": 4236.4, + "valid_targets_min": 1605 + }, + { + "epoch": 5.6521739130434785, + "grad_norm": 0.6565234736534431, + "learning_rate": 4.3605096010336115e-06, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19410403072834015, + "step": 3510, + "valid_targets_mean": 3966.3, + "valid_targets_min": 374 + }, + { + "epoch": 5.660225442834139, + "grad_norm": 0.46809524453228724, + "learning_rate": 4.310579897861902e-06, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14890244603157043, + "step": 3515, + "valid_targets_mean": 5078.9, + "valid_targets_min": 1866 + }, + { + "epoch": 5.668276972624799, + "grad_norm": 0.5402583356363803, + "learning_rate": 4.26090315266185e-06, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18138787150382996, + "step": 3520, + "valid_targets_mean": 4479.4, + "valid_targets_min": 1667 + }, + { + "epoch": 5.676328502415459, + "grad_norm": 0.5567198301706011, + "learning_rate": 4.2114801663635504e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19387924671173096, + "step": 3525, + "valid_targets_mean": 4855.1, + "valid_targets_min": 1475 + }, + { + "epoch": 5.6843800322061195, + "grad_norm": 0.5076182712626346, + "learning_rate": 4.1623117358057865e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16965937614440918, + "step": 3530, + "valid_targets_mean": 4802.6, + "valid_targets_min": 2133 + }, + { + "epoch": 5.692431561996779, + "grad_norm": 0.5638233787105313, + "learning_rate": 4.113398653723168e-06, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1688123494386673, + "step": 3535, + "valid_targets_mean": 4551.8, + "valid_targets_min": 590 + }, + { + "epoch": 5.70048309178744, + "grad_norm": 0.6026682576927842, + "learning_rate": 4.0647417087333776e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17929375171661377, + "step": 3540, + "valid_targets_mean": 4203.6, + "valid_targets_min": 1303 + }, + { + "epoch": 5.708534621578099, + "grad_norm": 0.4840327343136473, + "learning_rate": 4.0163416853244385e-06, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15817004442214966, + "step": 3545, + "valid_targets_mean": 4813.0, + "valid_targets_min": 321 + }, + { + "epoch": 5.71658615136876, + "grad_norm": 0.5122311264991213, + "learning_rate": 3.968199363842056e-06, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15911847352981567, + "step": 3550, + "valid_targets_mean": 4163.8, + "valid_targets_min": 2141 + }, + { + "epoch": 5.72463768115942, + "grad_norm": 0.5526613981940242, + "learning_rate": 3.920315520477065e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16690890491008759, + "step": 3555, + "valid_targets_mean": 3781.4, + "valid_targets_min": 434 + }, + { + "epoch": 5.73268921095008, + "grad_norm": 0.5154673260140975, + "learning_rate": 3.872690927252891e-06, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18027852475643158, + "step": 3560, + "valid_targets_mean": 4373.6, + "valid_targets_min": 1594 + }, + { + "epoch": 5.7407407407407405, + "grad_norm": 0.48703994856912225, + "learning_rate": 3.825326352013119e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16559578478336334, + "step": 3565, + "valid_targets_mean": 4382.0, + "valid_targets_min": 1603 + }, + { + "epoch": 5.748792270531401, + "grad_norm": 0.5472851978165418, + "learning_rate": 3.7782225584091016e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15613017976284027, + "step": 3570, + "valid_targets_mean": 4052.4, + "valid_targets_min": 339 + }, + { + "epoch": 5.756843800322061, + "grad_norm": 0.5873806542864478, + "learning_rate": 3.731380305887644e-06, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17352142930030823, + "step": 3575, + "valid_targets_mean": 4468.3, + "valid_targets_min": 860 + }, + { + "epoch": 5.764895330112721, + "grad_norm": 0.562715584188104, + "learning_rate": 3.684800349678781e-06, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19309145212173462, + "step": 3580, + "valid_targets_mean": 3819.3, + "valid_targets_min": 831 + }, + { + "epoch": 5.7729468599033815, + "grad_norm": 0.584825018902196, + "learning_rate": 3.638483440783576e-06, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22266249358654022, + "step": 3585, + "valid_targets_mean": 4084.6, + "valid_targets_min": 1924 + }, + { + "epoch": 5.780998389694042, + "grad_norm": 0.5288099238647191, + "learning_rate": 3.5924303259620307e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15193787217140198, + "step": 3590, + "valid_targets_mean": 4209.9, + "valid_targets_min": 963 + }, + { + "epoch": 5.789049919484702, + "grad_norm": 0.5275609216950717, + "learning_rate": 3.546641747721036e-06, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16384585201740265, + "step": 3595, + "valid_targets_mean": 4627.3, + "valid_targets_min": 1626 + }, + { + "epoch": 5.797101449275362, + "grad_norm": 0.725525221737914, + "learning_rate": 3.501118444302394e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18506191670894623, + "step": 3600, + "valid_targets_mean": 3873.7, + "valid_targets_min": 756 + }, + { + "epoch": 5.805152979066023, + "grad_norm": 0.5082224637456563, + "learning_rate": 3.4558611496709384e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.165011465549469, + "step": 3605, + "valid_targets_mean": 4355.2, + "valid_targets_min": 776 + }, + { + "epoch": 5.813204508856683, + "grad_norm": 0.8025807922419831, + "learning_rate": 3.4108705935026685e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15931957960128784, + "step": 3610, + "valid_targets_mean": 3695.9, + "valid_targets_min": 1119 + }, + { + "epoch": 5.821256038647343, + "grad_norm": 0.5168844656413334, + "learning_rate": 3.3661475011730206e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15300890803337097, + "step": 3615, + "valid_targets_mean": 4747.3, + "valid_targets_min": 1185 + }, + { + "epoch": 5.829307568438003, + "grad_norm": 0.5249631418738182, + "learning_rate": 3.321692593745147e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15493711829185486, + "step": 3620, + "valid_targets_mean": 3726.0, + "valid_targets_min": 1610 + }, + { + "epoch": 5.837359098228664, + "grad_norm": 0.5331107342319671, + "learning_rate": 3.2775065879582948e-06, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17267771065235138, + "step": 3625, + "valid_targets_mean": 5503.0, + "valid_targets_min": 1978 + }, + { + "epoch": 5.845410628019324, + "grad_norm": 0.5222892591386691, + "learning_rate": 3.233590196216263e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816999316215515, + "step": 3630, + "valid_targets_mean": 4842.2, + "valid_targets_min": 545 + }, + { + "epoch": 5.853462157809984, + "grad_norm": 0.5132154525081916, + "learning_rate": 3.1899441265759036e-06, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19336271286010742, + "step": 3635, + "valid_targets_mean": 5282.2, + "valid_targets_min": 1948 + }, + { + "epoch": 5.861513687600644, + "grad_norm": 0.5087976961623909, + "learning_rate": 3.1465690827356955e-06, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17752087116241455, + "step": 3640, + "valid_targets_mean": 4912.0, + "valid_targets_min": 1166 + }, + { + "epoch": 5.869565217391305, + "grad_norm": 0.5723530802904422, + "learning_rate": 3.103465764024438e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1567440927028656, + "step": 3645, + "valid_targets_mean": 3882.6, + "valid_targets_min": 627 + }, + { + "epoch": 5.877616747181965, + "grad_norm": 0.6212444706400745, + "learning_rate": 3.0606348653899288e-06, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16555482149124146, + "step": 3650, + "valid_targets_mean": 4280.1, + "valid_targets_min": 1429 + }, + { + "epoch": 5.885668276972625, + "grad_norm": 0.6037643784134114, + "learning_rate": 3.0180770773877866e-06, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1539427787065506, + "step": 3655, + "valid_targets_mean": 3446.1, + "valid_targets_min": 864 + }, + { + "epoch": 5.8937198067632846, + "grad_norm": 0.5417204859207484, + "learning_rate": 2.9757930861703223e-06, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333686113357544, + "step": 3660, + "valid_targets_mean": 4272.0, + "valid_targets_min": 993 + }, + { + "epoch": 5.901771336553946, + "grad_norm": 0.545060770024728, + "learning_rate": 2.9337835734754504e-06, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16815856099128723, + "step": 3665, + "valid_targets_mean": 4364.4, + "valid_targets_min": 1968 + }, + { + "epoch": 5.909822866344605, + "grad_norm": 0.4807913485701676, + "learning_rate": 2.892049216615724e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16856727004051208, + "step": 3670, + "valid_targets_mean": 4645.9, + "valid_targets_min": 1236 + }, + { + "epoch": 5.917874396135265, + "grad_norm": 0.5872896027676431, + "learning_rate": 2.850590688467405e-06, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17276933789253235, + "step": 3675, + "valid_targets_mean": 4079.1, + "valid_targets_min": 758 + }, + { + "epoch": 5.925925925925926, + "grad_norm": 0.5081772741434722, + "learning_rate": 2.8094086574595934e-06, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17815062403678894, + "step": 3680, + "valid_targets_mean": 4622.2, + "valid_targets_min": 506 + }, + { + "epoch": 5.933977455716586, + "grad_norm": 0.5314566948595292, + "learning_rate": 2.768503787563497e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15440833568572998, + "step": 3685, + "valid_targets_mean": 4177.4, + "valid_targets_min": 1764 + }, + { + "epoch": 5.942028985507246, + "grad_norm": 0.5944657537451915, + "learning_rate": 2.7278767382816828e-06, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15870711207389832, + "step": 3690, + "valid_targets_mean": 4034.2, + "valid_targets_min": 346 + }, + { + "epoch": 5.950080515297906, + "grad_norm": 0.5150609346752764, + "learning_rate": 2.687528164637474e-06, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16893523931503296, + "step": 3695, + "valid_targets_mean": 4341.7, + "valid_targets_min": 544 + }, + { + "epoch": 5.958132045088567, + "grad_norm": 0.622800014898735, + "learning_rate": 2.647458717164357e-06, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1420368254184723, + "step": 3700, + "valid_targets_mean": 4181.0, + "valid_targets_min": 930 + }, + { + "epoch": 5.966183574879227, + "grad_norm": 0.5118850984097806, + "learning_rate": 2.607669041895535e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17651256918907166, + "step": 3705, + "valid_targets_mean": 5766.2, + "valid_targets_min": 2526 + }, + { + "epoch": 5.974235104669887, + "grad_norm": 0.5922490816235029, + "learning_rate": 2.568159780353476e-06, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15372015535831451, + "step": 3710, + "valid_targets_mean": 3693.7, + "valid_targets_min": 614 + }, + { + "epoch": 5.982286634460547, + "grad_norm": 0.5223810749820793, + "learning_rate": 2.5289315695395834e-06, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17238521575927734, + "step": 3715, + "valid_targets_mean": 3895.1, + "valid_targets_min": 703 + }, + { + "epoch": 5.990338164251208, + "grad_norm": 0.6140059891446761, + "learning_rate": 2.489985041923928e-06, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18047456443309784, + "step": 3720, + "valid_targets_mean": 4203.8, + "valid_targets_min": 989 + }, + { + "epoch": 5.998389694041868, + "grad_norm": 0.5110659722384324, + "learning_rate": 2.4513208254350486e-06, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1803944855928421, + "step": 3725, + "valid_targets_mean": 4611.9, + "valid_targets_min": 1053 + }, + { + "epoch": 6.006441223832528, + "grad_norm": 0.46649632563361154, + "learning_rate": 2.412939543449828e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1679632067680359, + "step": 3730, + "valid_targets_mean": 4868.8, + "valid_targets_min": 1373 + }, + { + "epoch": 6.0144927536231885, + "grad_norm": 0.5752464912619374, + "learning_rate": 2.3748418147834394e-06, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14780664443969727, + "step": 3735, + "valid_targets_mean": 3625.4, + "valid_targets_min": 705 + }, + { + "epoch": 6.022544283413849, + "grad_norm": 0.4873295942554316, + "learning_rate": 2.337028253679381e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14143216609954834, + "step": 3740, + "valid_targets_mean": 4609.2, + "valid_targets_min": 1716 + }, + { + "epoch": 6.030595813204509, + "grad_norm": 0.4572827913243831, + "learning_rate": 2.299499469799542e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1580767184495926, + "step": 3745, + "valid_targets_mean": 5816.4, + "valid_targets_min": 1119 + }, + { + "epoch": 6.038647342995169, + "grad_norm": 0.5186439466567816, + "learning_rate": 2.262256068214421e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17476163804531097, + "step": 3750, + "valid_targets_mean": 4437.0, + "valid_targets_min": 757 + }, + { + "epoch": 6.0466988727858295, + "grad_norm": 0.5118711095108422, + "learning_rate": 2.2252986493933237e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16541600227355957, + "step": 3755, + "valid_targets_mean": 4525.7, + "valid_targets_min": 2162 + }, + { + "epoch": 6.05475040257649, + "grad_norm": 0.5109434582294012, + "learning_rate": 2.18862780919471e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15595577657222748, + "step": 3760, + "valid_targets_mean": 4511.2, + "valid_targets_min": 1010 + }, + { + "epoch": 6.06280193236715, + "grad_norm": 0.5324485507591655, + "learning_rate": 2.152244138856585e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1731036901473999, + "step": 3765, + "valid_targets_mean": 4594.2, + "valid_targets_min": 1788 + }, + { + "epoch": 6.07085346215781, + "grad_norm": 0.4936303320259845, + "learning_rate": 2.1161482249869513e-06, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14089539647102356, + "step": 3770, + "valid_targets_mean": 5116.8, + "valid_targets_min": 422 + }, + { + "epoch": 6.078904991948471, + "grad_norm": 0.5698965758566077, + "learning_rate": 2.080340649554369e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16755038499832153, + "step": 3775, + "valid_targets_mean": 4225.4, + "valid_targets_min": 536 + }, + { + "epoch": 6.086956521739131, + "grad_norm": 0.5229210197667631, + "learning_rate": 2.044821989878558e-06, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16623234748840332, + "step": 3780, + "valid_targets_mean": 4262.9, + "valid_targets_min": 1577 + }, + { + "epoch": 6.095008051529791, + "grad_norm": 0.5459701049906057, + "learning_rate": 2.0095928186210956e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.147607684135437, + "step": 3785, + "valid_targets_mean": 4724.4, + "valid_targets_min": 1787 + }, + { + "epoch": 6.1030595813204505, + "grad_norm": 0.5581543823448861, + "learning_rate": 1.974653703776188e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18220457434654236, + "step": 3790, + "valid_targets_mean": 5031.3, + "valid_targets_min": 392 + }, + { + "epoch": 6.111111111111111, + "grad_norm": 0.5197842393741545, + "learning_rate": 1.9400052086615153e-06, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598493456840515, + "step": 3795, + "valid_targets_mean": 4375.8, + "valid_targets_min": 1759 + }, + { + "epoch": 6.119162640901771, + "grad_norm": 0.5494186611639191, + "learning_rate": 1.9056478919091236e-06, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19067052006721497, + "step": 3800, + "valid_targets_mean": 4341.0, + "valid_targets_min": 692 + }, + { + "epoch": 6.127214170692431, + "grad_norm": 0.5103976820611801, + "learning_rate": 1.8715823074564587e-06, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15958425402641296, + "step": 3805, + "valid_targets_mean": 4729.6, + "valid_targets_min": 362 + }, + { + "epoch": 6.1352657004830915, + "grad_norm": 0.5752435508668234, + "learning_rate": 1.837809004537401e-06, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1595839112997055, + "step": 3810, + "valid_targets_mean": 4986.8, + "valid_targets_min": 897 + }, + { + "epoch": 6.143317230273752, + "grad_norm": 0.6232969103900382, + "learning_rate": 1.8043285276734334e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17083576321601868, + "step": 3815, + "valid_targets_mean": 3214.8, + "valid_targets_min": 547 + }, + { + "epoch": 6.151368760064412, + "grad_norm": 0.5085611865605083, + "learning_rate": 1.7711414166648365e-06, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15489043295383453, + "step": 3820, + "valid_targets_mean": 4428.2, + "valid_targets_min": 1416 + }, + { + "epoch": 6.159420289855072, + "grad_norm": 0.5686690339243041, + "learning_rate": 1.7382482065820138e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16790664196014404, + "step": 3825, + "valid_targets_mean": 4888.5, + "valid_targets_min": 1755 + }, + { + "epoch": 6.1674718196457325, + "grad_norm": 0.6929429650574811, + "learning_rate": 1.7056494277568503e-06, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1905716359615326, + "step": 3830, + "valid_targets_mean": 3701.2, + "valid_targets_min": 1170 + }, + { + "epoch": 6.175523349436393, + "grad_norm": 0.6367228356359814, + "learning_rate": 1.6733456057741592e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1782214492559433, + "step": 3835, + "valid_targets_mean": 3788.4, + "valid_targets_min": 1742 + }, + { + "epoch": 6.183574879227053, + "grad_norm": 0.532748190271915, + "learning_rate": 1.641337261463216e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18782779574394226, + "step": 3840, + "valid_targets_mean": 4424.2, + "valid_targets_min": 745 + }, + { + "epoch": 6.191626409017713, + "grad_norm": 0.5944352055775861, + "learning_rate": 1.6096249108893602e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14986079931259155, + "step": 3845, + "valid_targets_mean": 4633.4, + "valid_targets_min": 817 + }, + { + "epoch": 6.199677938808374, + "grad_norm": 0.4718113061336701, + "learning_rate": 1.5782090653456616e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15445095300674438, + "step": 3850, + "valid_targets_mean": 5366.3, + "valid_targets_min": 2465 + }, + { + "epoch": 6.207729468599034, + "grad_norm": 0.5299032276018538, + "learning_rate": 1.547090231344699e-06, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1652597188949585, + "step": 3855, + "valid_targets_mean": 4490.8, + "valid_targets_min": 2050 + }, + { + "epoch": 6.215780998389694, + "grad_norm": 0.4937926792080132, + "learning_rate": 1.5162689106103746e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16262544691562653, + "step": 3860, + "valid_targets_mean": 5297.2, + "valid_targets_min": 758 + }, + { + "epoch": 6.223832528180354, + "grad_norm": 0.5983118290721653, + "learning_rate": 1.4857456000698366e-06, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21267688274383545, + "step": 3865, + "valid_targets_mean": 5259.9, + "valid_targets_min": 1772 + }, + { + "epoch": 6.231884057971015, + "grad_norm": 0.5186630493468379, + "learning_rate": 1.4555207918454662e-06, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20158061385154724, + "step": 3870, + "valid_targets_mean": 5251.4, + "valid_targets_min": 570 + }, + { + "epoch": 6.239935587761675, + "grad_norm": 0.5732567692410732, + "learning_rate": 1.4255949732469309e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15088540315628052, + "step": 3875, + "valid_targets_mean": 3437.3, + "valid_targets_min": 1053 + }, + { + "epoch": 6.247987117552335, + "grad_norm": 0.5122360577374575, + "learning_rate": 1.3959686267633488e-06, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1651184856891632, + "step": 3880, + "valid_targets_mean": 5088.6, + "valid_targets_min": 1995 + }, + { + "epoch": 6.256038647342995, + "grad_norm": 0.48110861215986517, + "learning_rate": 1.3666422300554905e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16647136211395264, + "step": 3885, + "valid_targets_mean": 5231.4, + "valid_targets_min": 2355 + }, + { + "epoch": 6.264090177133656, + "grad_norm": 0.582851491108072, + "learning_rate": 1.3376162559480822e-06, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18534335494041443, + "step": 3890, + "valid_targets_mean": 4512.3, + "valid_targets_min": 2039 + }, + { + "epoch": 6.272141706924316, + "grad_norm": 0.5404662794082068, + "learning_rate": 1.308891172422193e-06, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919654756784439, + "step": 3895, + "valid_targets_mean": 4829.6, + "valid_targets_min": 972 + }, + { + "epoch": 6.280193236714976, + "grad_norm": 0.628437639331324, + "learning_rate": 1.2804674426076757e-06, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21055009961128235, + "step": 3900, + "valid_targets_mean": 4256.2, + "valid_targets_min": 846 + }, + { + "epoch": 6.2882447665056365, + "grad_norm": 0.4797286979992782, + "learning_rate": 1.2523455247757088e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14173908531665802, + "step": 3905, + "valid_targets_mean": 5012.4, + "valid_targets_min": 1956 + }, + { + "epoch": 6.296296296296296, + "grad_norm": 0.5914383870555415, + "learning_rate": 1.224525872331408e-06, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17256557941436768, + "step": 3910, + "valid_targets_mean": 3722.4, + "valid_targets_min": 1185 + }, + { + "epoch": 6.304347826086957, + "grad_norm": 0.5475160234396864, + "learning_rate": 1.1970089338065071e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16874639689922333, + "step": 3915, + "valid_targets_mean": 4964.1, + "valid_targets_min": 1411 + }, + { + "epoch": 6.312399355877616, + "grad_norm": 0.6352630163683538, + "learning_rate": 1.1697951528521422e-06, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16274841129779816, + "step": 3920, + "valid_targets_mean": 3514.2, + "valid_targets_min": 1903 + }, + { + "epoch": 6.320450885668277, + "grad_norm": 0.5450852465931986, + "learning_rate": 1.1428849682316766e-06, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16449615359306335, + "step": 3925, + "valid_targets_mean": 4061.4, + "valid_targets_min": 1262 + }, + { + "epoch": 6.328502415458937, + "grad_norm": 0.48987328370522754, + "learning_rate": 1.116278813813647e-06, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13572901487350464, + "step": 3930, + "valid_targets_mean": 4505.6, + "valid_targets_min": 1616 + }, + { + "epoch": 6.336553945249597, + "grad_norm": 0.503141267860805, + "learning_rate": 1.08997711856476e-06, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16356249153614044, + "step": 3935, + "valid_targets_mean": 4598.4, + "valid_targets_min": 1028 + }, + { + "epoch": 6.344605475040257, + "grad_norm": 0.7893130983095663, + "learning_rate": 1.0639803065429755e-06, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20338299870491028, + "step": 3940, + "valid_targets_mean": 5540.6, + "valid_targets_min": 2279 + }, + { + "epoch": 6.352657004830918, + "grad_norm": 0.5464519825128046, + "learning_rate": 1.0382887968906718e-06, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1746448576450348, + "step": 3945, + "valid_targets_mean": 3995.6, + "valid_targets_min": 1677 + }, + { + "epoch": 6.360708534621578, + "grad_norm": 0.5620117079550653, + "learning_rate": 1.012903003827883e-06, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1740240752696991, + "step": 3950, + "valid_targets_mean": 3910.2, + "valid_targets_min": 1147 + }, + { + "epoch": 6.368760064412238, + "grad_norm": 0.5014202584177402, + "learning_rate": 9.87823336645628e-07, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15709742903709412, + "step": 3955, + "valid_targets_mean": 4344.8, + "valid_targets_min": 2201 + }, + { + "epoch": 6.3768115942028984, + "grad_norm": 0.5362938011516779, + "learning_rate": 9.630501996993091e-07, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17129208147525787, + "step": 3960, + "valid_targets_mean": 4717.5, + "valid_targets_min": 1989 + }, + { + "epoch": 6.384863123993559, + "grad_norm": 0.44355010700619574, + "learning_rate": 9.385839924021844e-07, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14925384521484375, + "step": 3965, + "valid_targets_mean": 5392.1, + "valid_targets_min": 2034 + }, + { + "epoch": 6.392914653784219, + "grad_norm": 0.5637410567366077, + "learning_rate": 9.144251092189416e-07, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2033105343580246, + "step": 3970, + "valid_targets_mean": 4397.6, + "valid_targets_min": 529 + }, + { + "epoch": 6.400966183574879, + "grad_norm": 0.5246003448080508, + "learning_rate": 8.905739396593316e-07, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16925036907196045, + "step": 3975, + "valid_targets_mean": 4739.1, + "valid_targets_min": 720 + }, + { + "epoch": 6.4090177133655395, + "grad_norm": 0.5406121384840578, + "learning_rate": 8.670308682718853e-07, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1594831645488739, + "step": 3980, + "valid_targets_mean": 4337.4, + "valid_targets_min": 800 + }, + { + "epoch": 6.4170692431562, + "grad_norm": 0.4785613416301052, + "learning_rate": 8.437962746377204e-07, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622602641582489, + "step": 3985, + "valid_targets_mean": 5365.6, + "valid_targets_min": 1316 + }, + { + "epoch": 6.42512077294686, + "grad_norm": 0.4972685992182721, + "learning_rate": 8.208705333644129e-07, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1673496663570404, + "step": 3990, + "valid_targets_mean": 4327.2, + "valid_targets_min": 873 + }, + { + "epoch": 6.43317230273752, + "grad_norm": 0.51768549782515, + "learning_rate": 7.982540140799688e-07, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161638081073761, + "step": 3995, + "valid_targets_mean": 4245.7, + "valid_targets_min": 1725 + }, + { + "epoch": 6.4412238325281805, + "grad_norm": 0.6255107806087558, + "learning_rate": 7.759470814268489e-07, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16319170594215393, + "step": 4000, + "valid_targets_mean": 3573.4, + "valid_targets_min": 216 + }, + { + "epoch": 6.449275362318841, + "grad_norm": 0.5117136147623905, + "learning_rate": 7.539500950561063e-07, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18081682920455933, + "step": 4005, + "valid_targets_mean": 5030.6, + "valid_targets_min": 941 + }, + { + "epoch": 6.457326892109501, + "grad_norm": 0.5410913172933577, + "learning_rate": 7.322634096215831e-07, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15669681131839752, + "step": 4010, + "valid_targets_mean": 4348.8, + "valid_targets_min": 713 + }, + { + "epoch": 6.465378421900161, + "grad_norm": 0.4986868222815654, + "learning_rate": 7.108873747741807e-07, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14274120330810547, + "step": 4015, + "valid_targets_mean": 4543.1, + "valid_targets_min": 1717 + }, + { + "epoch": 6.473429951690822, + "grad_norm": 0.469433549650835, + "learning_rate": 6.898223351562405e-07, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15934091806411743, + "step": 4020, + "valid_targets_mean": 5432.7, + "valid_targets_min": 2526 + }, + { + "epoch": 6.481481481481482, + "grad_norm": 0.622115989263229, + "learning_rate": 6.690686303959748e-07, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18453718721866608, + "step": 4025, + "valid_targets_mean": 3467.2, + "valid_targets_min": 545 + }, + { + "epoch": 6.489533011272142, + "grad_norm": 0.5270621108254152, + "learning_rate": 6.48626595101991e-07, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19507281482219696, + "step": 4030, + "valid_targets_mean": 5011.7, + "valid_targets_min": 2115 + }, + { + "epoch": 6.4975845410628015, + "grad_norm": 0.5614424641002262, + "learning_rate": 6.284965588579028e-07, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1735997200012207, + "step": 4035, + "valid_targets_mean": 4858.7, + "valid_targets_min": 1516 + }, + { + "epoch": 6.505636070853463, + "grad_norm": 0.6277678468034912, + "learning_rate": 6.08678846217019e-07, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173121377825737, + "step": 4040, + "valid_targets_mean": 4697.3, + "valid_targets_min": 434 + }, + { + "epoch": 6.513687600644122, + "grad_norm": 0.557886874518275, + "learning_rate": 5.891737766970984e-07, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16518916189670563, + "step": 4045, + "valid_targets_mean": 4401.9, + "valid_targets_min": 596 + }, + { + "epoch": 6.521739130434782, + "grad_norm": 0.5026467498773549, + "learning_rate": 5.699816647752077e-07, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16781866550445557, + "step": 4050, + "valid_targets_mean": 4928.8, + "valid_targets_min": 2215 + }, + { + "epoch": 6.5297906602254425, + "grad_norm": 0.5451182471548547, + "learning_rate": 5.511028198826496e-07, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17092815041542053, + "step": 4055, + "valid_targets_mean": 4106.2, + "valid_targets_min": 451 + }, + { + "epoch": 6.537842190016103, + "grad_norm": 0.5743885198199623, + "learning_rate": 5.32537546399976e-07, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16788442432880402, + "step": 4060, + "valid_targets_mean": 4289.1, + "valid_targets_min": 1684 + }, + { + "epoch": 6.545893719806763, + "grad_norm": 0.6037224090967725, + "learning_rate": 5.142861436520763e-07, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20297744870185852, + "step": 4065, + "valid_targets_mean": 3997.9, + "valid_targets_min": 383 + }, + { + "epoch": 6.553945249597423, + "grad_norm": 0.8388825418667704, + "learning_rate": 4.963489059033477e-07, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16553989052772522, + "step": 4070, + "valid_targets_mean": 4784.5, + "valid_targets_min": 2033 + }, + { + "epoch": 6.561996779388084, + "grad_norm": 0.5970930999537574, + "learning_rate": 4.787261223529616e-07, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17344771325588226, + "step": 4075, + "valid_targets_mean": 3786.6, + "valid_targets_min": 898 + }, + { + "epoch": 6.570048309178744, + "grad_norm": 0.471204909767901, + "learning_rate": 4.6141807713019793e-07, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13417160511016846, + "step": 4080, + "valid_targets_mean": 5032.4, + "valid_targets_min": 1912 + }, + { + "epoch": 6.578099838969404, + "grad_norm": 0.4884625900862524, + "learning_rate": 4.444250492898539e-07, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16039326786994934, + "step": 4085, + "valid_targets_mean": 5670.8, + "valid_targets_min": 1963 + }, + { + "epoch": 6.586151368760064, + "grad_norm": 1.3562926760409872, + "learning_rate": 4.277473128077625e-07, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15605992078781128, + "step": 4090, + "valid_targets_mean": 5004.9, + "valid_targets_min": 390 + }, + { + "epoch": 6.594202898550725, + "grad_norm": 0.5293266001333903, + "learning_rate": 4.113851365763544e-07, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1735745668411255, + "step": 4095, + "valid_targets_mean": 5240.4, + "valid_targets_min": 1701 + }, + { + "epoch": 6.602254428341385, + "grad_norm": 0.5578624317503144, + "learning_rate": 3.953387844003431e-07, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1669292151927948, + "step": 4100, + "valid_targets_mean": 4350.2, + "valid_targets_min": 2309 + }, + { + "epoch": 6.610305958132045, + "grad_norm": 0.604399390427962, + "learning_rate": 3.7960851499245554e-07, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14910921454429626, + "step": 4105, + "valid_targets_mean": 3900.4, + "valid_targets_min": 823 + }, + { + "epoch": 6.618357487922705, + "grad_norm": 0.5270613962718504, + "learning_rate": 3.6419458196926825e-07, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1642162799835205, + "step": 4110, + "valid_targets_mean": 4613.5, + "valid_targets_min": 1191 + }, + { + "epoch": 6.626409017713366, + "grad_norm": 0.4768249623670376, + "learning_rate": 3.4909723384712436e-07, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15536585450172424, + "step": 4115, + "valid_targets_mean": 4920.4, + "valid_targets_min": 1743 + }, + { + "epoch": 6.634460547504026, + "grad_norm": 0.7880877824521434, + "learning_rate": 3.3431671403811207e-07, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14975430071353912, + "step": 4120, + "valid_targets_mean": 4408.2, + "valid_targets_min": 431 + }, + { + "epoch": 6.642512077294686, + "grad_norm": 0.5055031311515263, + "learning_rate": 3.198532608461524e-07, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18191225826740265, + "step": 4125, + "valid_targets_mean": 5325.4, + "valid_targets_min": 881 + }, + { + "epoch": 6.650563607085346, + "grad_norm": 0.7377711817281254, + "learning_rate": 3.0570710746314903e-07, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1757345199584961, + "step": 4130, + "valid_targets_mean": 4204.0, + "valid_targets_min": 851 + }, + { + "epoch": 6.658615136876007, + "grad_norm": 0.49184719888721345, + "learning_rate": 2.9187848196524205e-07, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542222797870636, + "step": 4135, + "valid_targets_mean": 4731.2, + "valid_targets_min": 429 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.6105305368391408, + "learning_rate": 2.7836760730910464e-07, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17159178853034973, + "step": 4140, + "valid_targets_mean": 3496.8, + "valid_targets_min": 397 + }, + { + "epoch": 6.674718196457327, + "grad_norm": 0.5519746733795567, + "learning_rate": 2.6517470132838117e-07, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1730884313583374, + "step": 4145, + "valid_targets_mean": 4672.2, + "valid_targets_min": 1995 + }, + { + "epoch": 6.6827697262479875, + "grad_norm": 0.5635266522753974, + "learning_rate": 2.522999767301482e-07, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15275679528713226, + "step": 4150, + "valid_targets_mean": 3927.0, + "valid_targets_min": 300 + }, + { + "epoch": 6.690821256038648, + "grad_norm": 0.5061168238375812, + "learning_rate": 2.3974364109149886e-07, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14958354830741882, + "step": 4155, + "valid_targets_mean": 3901.1, + "valid_targets_min": 1810 + }, + { + "epoch": 6.698872785829307, + "grad_norm": 0.5040398527635441, + "learning_rate": 2.2750589685619495e-07, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17061063647270203, + "step": 4160, + "valid_targets_mean": 4585.4, + "valid_targets_min": 1591 + }, + { + "epoch": 6.706924315619968, + "grad_norm": 0.5035025471661269, + "learning_rate": 2.1558694133139823e-07, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16706478595733643, + "step": 4165, + "valid_targets_mean": 4401.9, + "valid_targets_min": 310 + }, + { + "epoch": 6.714975845410628, + "grad_norm": 0.5623618030459923, + "learning_rate": 2.039869666844929e-07, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1561906784772873, + "step": 4170, + "valid_targets_mean": 3450.5, + "valid_targets_min": 1034 + }, + { + "epoch": 6.723027375201288, + "grad_norm": 0.5216795359225037, + "learning_rate": 1.9270615993998375e-07, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1636110544204712, + "step": 4175, + "valid_targets_mean": 4135.8, + "valid_targets_min": 1656 + }, + { + "epoch": 6.731078904991948, + "grad_norm": 0.5368469010480501, + "learning_rate": 1.817447029764874e-07, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16674436628818512, + "step": 4180, + "valid_targets_mean": 4527.6, + "valid_targets_min": 1381 + }, + { + "epoch": 6.739130434782608, + "grad_norm": 0.6438973279011997, + "learning_rate": 1.7110277252379238e-07, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15082503855228424, + "step": 4185, + "valid_targets_mean": 4736.3, + "valid_targets_min": 1379 + }, + { + "epoch": 6.747181964573269, + "grad_norm": 0.4998287056476511, + "learning_rate": 1.607805401600149e-07, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17229287326335907, + "step": 4190, + "valid_targets_mean": 4969.6, + "valid_targets_min": 1283 + }, + { + "epoch": 6.755233494363929, + "grad_norm": 0.599169633423643, + "learning_rate": 1.5077817230883419e-07, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15904340147972107, + "step": 4195, + "valid_targets_mean": 4552.9, + "valid_targets_min": 1866 + }, + { + "epoch": 6.763285024154589, + "grad_norm": 0.570759522637566, + "learning_rate": 1.4109583023679706e-07, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15024811029434204, + "step": 4200, + "valid_targets_mean": 5746.9, + "valid_targets_min": 1892 + }, + { + "epoch": 6.7713365539452495, + "grad_norm": 0.5403647002807387, + "learning_rate": 1.3173367005073545e-07, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17785856127738953, + "step": 4205, + "valid_targets_mean": 4047.6, + "valid_targets_min": 2002 + }, + { + "epoch": 6.77938808373591, + "grad_norm": 0.5226238218670288, + "learning_rate": 1.2269184269523282e-07, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16204693913459778, + "step": 4210, + "valid_targets_mean": 4730.7, + "valid_targets_min": 1508 + }, + { + "epoch": 6.78743961352657, + "grad_norm": 0.5555947999177602, + "learning_rate": 1.1397049395020842e-07, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16874821484088898, + "step": 4215, + "valid_targets_mean": 4100.5, + "valid_targets_min": 1171 + }, + { + "epoch": 6.79549114331723, + "grad_norm": 0.5473832146836152, + "learning_rate": 1.0556976442854805e-07, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18583595752716064, + "step": 4220, + "valid_targets_mean": 4135.4, + "valid_targets_min": 344 + }, + { + "epoch": 6.8035426731078905, + "grad_norm": 0.5690884856327217, + "learning_rate": 9.748978957385025e-08, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1753610521554947, + "step": 4225, + "valid_targets_mean": 4328.1, + "valid_targets_min": 669 + }, + { + "epoch": 6.811594202898551, + "grad_norm": 0.5625787536362429, + "learning_rate": 8.9730699658237e-08, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15724310278892517, + "step": 4230, + "valid_targets_mean": 4711.9, + "valid_targets_min": 2299 + }, + { + "epoch": 6.819645732689211, + "grad_norm": 0.5162132722455169, + "learning_rate": 8.229261978025316e-08, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17424245178699493, + "step": 4235, + "valid_targets_mean": 4760.0, + "valid_targets_min": 2276 + }, + { + "epoch": 6.827697262479871, + "grad_norm": 0.6156039773080834, + "learning_rate": 7.517566986285474e-08, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574200689792633, + "step": 4240, + "valid_targets_mean": 3681.5, + "valid_targets_min": 751 + }, + { + "epoch": 6.835748792270532, + "grad_norm": 0.5832771610140742, + "learning_rate": 6.837996465146823e-08, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17290878295898438, + "step": 4245, + "valid_targets_mean": 3748.6, + "valid_targets_min": 284 + }, + { + "epoch": 6.843800322061192, + "grad_norm": 0.5576720036336581, + "learning_rate": 6.190561371214321e-08, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16148659586906433, + "step": 4250, + "valid_targets_mean": 4166.0, + "valid_targets_min": 930 + }, + { + "epoch": 6.851851851851852, + "grad_norm": 0.5878319666889452, + "learning_rate": 5.575272142978927e-08, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.172722727060318, + "step": 4255, + "valid_targets_mean": 4018.2, + "valid_targets_min": 754 + }, + { + "epoch": 6.859903381642512, + "grad_norm": 0.479443149906381, + "learning_rate": 4.992138700649074e-08, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13948974013328552, + "step": 4260, + "valid_targets_mean": 4858.1, + "valid_targets_min": 1813 + }, + { + "epoch": 6.867954911433173, + "grad_norm": 0.5541385500661399, + "learning_rate": 4.4411704459903506e-08, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1660829335451126, + "step": 4265, + "valid_targets_mean": 4232.4, + "valid_targets_min": 1054 + }, + { + "epoch": 6.876006441223833, + "grad_norm": 0.6179225703483298, + "learning_rate": 3.92237626217451e-08, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16906727850437164, + "step": 4270, + "valid_targets_mean": 4460.8, + "valid_targets_min": 1530 + }, + { + "epoch": 6.884057971014493, + "grad_norm": 0.5783966854225258, + "learning_rate": 3.435764513635809e-08, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17848993837833405, + "step": 4275, + "valid_targets_mean": 4309.5, + "valid_targets_min": 1717 + }, + { + "epoch": 6.892109500805153, + "grad_norm": 0.5351867682144865, + "learning_rate": 2.9813430459364465e-08, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17882059514522552, + "step": 4280, + "valid_targets_mean": 4416.9, + "valid_targets_min": 1611 + }, + { + "epoch": 6.900161030595813, + "grad_norm": 0.573670983161385, + "learning_rate": 2.5591191856397802e-08, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15430384874343872, + "step": 4285, + "valid_targets_mean": 4570.9, + "valid_targets_min": 1407 + }, + { + "epoch": 6.908212560386474, + "grad_norm": 0.5344897585917823, + "learning_rate": 2.1690997401928593e-08, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16887977719306946, + "step": 4290, + "valid_targets_mean": 4439.5, + "valid_targets_min": 2172 + }, + { + "epoch": 6.916264090177133, + "grad_norm": 0.551697601245166, + "learning_rate": 1.811290997815851e-08, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17137417197227478, + "step": 4295, + "valid_targets_mean": 4672.2, + "valid_targets_min": 590 + }, + { + "epoch": 6.9243156199677935, + "grad_norm": 0.5535638309128108, + "learning_rate": 1.485698727400564e-08, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14466409385204315, + "step": 4300, + "valid_targets_mean": 3469.3, + "valid_targets_min": 1197 + }, + { + "epoch": 6.932367149758454, + "grad_norm": 0.5119337022576111, + "learning_rate": 1.1923281784185226e-08, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16587556898593903, + "step": 4305, + "valid_targets_mean": 4396.5, + "valid_targets_min": 1299 + }, + { + "epoch": 6.940418679549114, + "grad_norm": 0.5923102041383612, + "learning_rate": 9.311840808357009e-09, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1823520064353943, + "step": 4310, + "valid_targets_mean": 4216.1, + "valid_targets_min": 1303 + }, + { + "epoch": 6.948470209339774, + "grad_norm": 0.5103445914494756, + "learning_rate": 7.022706450354744e-09, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15162253379821777, + "step": 4315, + "valid_targets_mean": 5503.6, + "valid_targets_min": 2231 + }, + { + "epoch": 6.956521739130435, + "grad_norm": 0.533406608295802, + "learning_rate": 5.055915617522278e-09, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14400282502174377, + "step": 4320, + "valid_targets_mean": 4314.7, + "valid_targets_min": 1448 + }, + { + "epoch": 6.964573268921095, + "grad_norm": 0.6879701190779768, + "learning_rate": 3.411500020109593e-09, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1823330670595169, + "step": 4325, + "valid_targets_mean": 3070.6, + "valid_targets_min": 467 + }, + { + "epoch": 6.972624798711755, + "grad_norm": 0.6418600229930465, + "learning_rate": 2.08948617075988e-09, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15779449045658112, + "step": 4330, + "valid_targets_mean": 3922.4, + "valid_targets_min": 1291 + }, + { + "epoch": 6.980676328502415, + "grad_norm": 0.5848882869027459, + "learning_rate": 1.0898953840898786e-09, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18286889791488647, + "step": 4335, + "valid_targets_mean": 4633.8, + "valid_targets_min": 695 + }, + { + "epoch": 6.988727858293076, + "grad_norm": 0.45093430680878505, + "learning_rate": 4.127437763390418e-10, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15890920162200928, + "step": 4340, + "valid_targets_mean": 5343.4, + "valid_targets_min": 2478 + }, + { + "epoch": 6.996779388083736, + "grad_norm": 0.5378585489037748, + "learning_rate": 5.804226511196831e-11, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15117883682250977, + "step": 4345, + "valid_targets_mean": 3975.8, + "valid_targets_min": 2081 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17696434259414673, + "step": 4347, + "total_flos": 1518375161495552.0, + "train_loss": 0.21155054198480733, + "train_runtime": 25607.106, + "train_samples_per_second": 2.714, + "train_steps_per_second": 0.17, + "valid_targets_mean": 4512.6, + "valid_targets_min": 535 + } + ], + "logging_steps": 5, + "max_steps": 4347, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1518375161495552.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}