diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9727 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4403, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00794912559618442, + "grad_norm": 17.84746630996863, + "learning_rate": 3.6281179138322e-07, + "loss": 0.6173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6814424991607666, + "step": 5, + "valid_targets_mean": 4120.6, + "valid_targets_min": 293 + }, + { + "epoch": 0.01589825119236884, + "grad_norm": 17.156517918811076, + "learning_rate": 8.163265306122449e-07, + "loss": 0.6758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6257226467132568, + "step": 10, + "valid_targets_mean": 3674.1, + "valid_targets_min": 434 + }, + { + "epoch": 0.02384737678855326, + "grad_norm": 18.01419791342929, + "learning_rate": 1.26984126984127e-06, + "loss": 0.7098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7265408635139465, + "step": 15, + "valid_targets_mean": 3347.9, + "valid_targets_min": 619 + }, + { + "epoch": 0.03179650238473768, + "grad_norm": 11.167185739612897, + "learning_rate": 1.723356009070295e-06, + "loss": 0.6202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5243589282035828, + "step": 20, + "valid_targets_mean": 4327.7, + "valid_targets_min": 295 + }, + { + "epoch": 0.0397456279809221, + "grad_norm": 8.22954319900581, + "learning_rate": 2.17687074829932e-06, + "loss": 0.5494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5256298780441284, + "step": 25, + "valid_targets_mean": 3764.8, + "valid_targets_min": 1259 + }, + { + "epoch": 0.04769475357710652, + "grad_norm": 5.286623836409655, + "learning_rate": 2.6303854875283447e-06, + "loss": 0.5207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5739851593971252, + "step": 30, + "valid_targets_mean": 3315.1, + "valid_targets_min": 936 + }, + { + "epoch": 0.05564387917329094, + "grad_norm": 3.091625342947559, + "learning_rate": 3.08390022675737e-06, + "loss": 0.4249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46355006098747253, + "step": 35, + "valid_targets_mean": 3561.8, + "valid_targets_min": 557 + }, + { + "epoch": 0.06359300476947535, + "grad_norm": 1.6120343190128226, + "learning_rate": 3.537414965986395e-06, + "loss": 0.4443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.429388165473938, + "step": 40, + "valid_targets_mean": 4614.9, + "valid_targets_min": 834 + }, + { + "epoch": 0.07154213036565978, + "grad_norm": 1.1722091804408992, + "learning_rate": 3.99092970521542e-06, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38033077120780945, + "step": 45, + "valid_targets_mean": 4552.9, + "valid_targets_min": 606 + }, + { + "epoch": 0.0794912559618442, + "grad_norm": 1.2237202548885548, + "learning_rate": 4.444444444444444e-06, + "loss": 0.4028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39322787523269653, + "step": 50, + "valid_targets_mean": 2935.4, + "valid_targets_min": 654 + }, + { + "epoch": 0.08744038155802862, + "grad_norm": 0.9959193308010645, + "learning_rate": 4.897959183673469e-06, + "loss": 0.3929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3938031792640686, + "step": 55, + "valid_targets_mean": 3294.7, + "valid_targets_min": 357 + }, + { + "epoch": 0.09538950715421304, + "grad_norm": 0.7829705793426464, + "learning_rate": 5.3514739229024945e-06, + "loss": 0.3598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37601467967033386, + "step": 60, + "valid_targets_mean": 4138.0, + "valid_targets_min": 266 + }, + { + "epoch": 0.10333863275039745, + "grad_norm": 0.7891875286042732, + "learning_rate": 5.80498866213152e-06, + "loss": 0.3652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31892889738082886, + "step": 65, + "valid_targets_mean": 3174.8, + "valid_targets_min": 359 + }, + { + "epoch": 0.11128775834658187, + "grad_norm": 0.5962960338010875, + "learning_rate": 6.258503401360545e-06, + "loss": 0.336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2778470516204834, + "step": 70, + "valid_targets_mean": 4142.1, + "valid_targets_min": 917 + }, + { + "epoch": 0.1192368839427663, + "grad_norm": 0.6600711718977473, + "learning_rate": 6.71201814058957e-06, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32065966725349426, + "step": 75, + "valid_targets_mean": 3403.6, + "valid_targets_min": 836 + }, + { + "epoch": 0.1271860095389507, + "grad_norm": 0.5724896809709736, + "learning_rate": 7.165532879818595e-06, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3127829134464264, + "step": 80, + "valid_targets_mean": 3913.0, + "valid_targets_min": 241 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.6797026306142546, + "learning_rate": 7.61904761904762e-06, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32160434126853943, + "step": 85, + "valid_targets_mean": 3563.1, + "valid_targets_min": 262 + }, + { + "epoch": 0.14308426073131955, + "grad_norm": 0.7278883756953868, + "learning_rate": 8.072562358276645e-06, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3462420701980591, + "step": 90, + "valid_targets_mean": 2732.4, + "valid_targets_min": 755 + }, + { + "epoch": 0.151033386327504, + "grad_norm": 0.5708083904356873, + "learning_rate": 8.52607709750567e-06, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3244127333164215, + "step": 95, + "valid_targets_mean": 4137.4, + "valid_targets_min": 281 + }, + { + "epoch": 0.1589825119236884, + "grad_norm": 0.6276207848939579, + "learning_rate": 8.979591836734695e-06, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26650288701057434, + "step": 100, + "valid_targets_mean": 3918.8, + "valid_targets_min": 679 + }, + { + "epoch": 0.1669316375198728, + "grad_norm": 0.5159242585947521, + "learning_rate": 9.43310657596372e-06, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2642788887023926, + "step": 105, + "valid_targets_mean": 4179.4, + "valid_targets_min": 1342 + }, + { + "epoch": 0.17488076311605724, + "grad_norm": 0.507714398672033, + "learning_rate": 9.886621315192746e-06, + "loss": 0.2774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23547717928886414, + "step": 110, + "valid_targets_mean": 3923.2, + "valid_targets_min": 469 + }, + { + "epoch": 0.18282988871224165, + "grad_norm": 0.5564144664645884, + "learning_rate": 1.034013605442177e-05, + "loss": 0.2829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28326016664505005, + "step": 115, + "valid_targets_mean": 3575.0, + "valid_targets_min": 269 + }, + { + "epoch": 0.1907790143084261, + "grad_norm": 0.5695684791526006, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26167941093444824, + "step": 120, + "valid_targets_mean": 3303.6, + "valid_targets_min": 641 + }, + { + "epoch": 0.1987281399046105, + "grad_norm": 0.5237201400320065, + "learning_rate": 1.124716553287982e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28204721212387085, + "step": 125, + "valid_targets_mean": 4681.8, + "valid_targets_min": 569 + }, + { + "epoch": 0.2066772655007949, + "grad_norm": 0.49001217913037826, + "learning_rate": 1.1700680272108845e-05, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25121694803237915, + "step": 130, + "valid_targets_mean": 4638.6, + "valid_targets_min": 1821 + }, + { + "epoch": 0.21462639109697934, + "grad_norm": 0.5448380739456933, + "learning_rate": 1.215419501133787e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24607300758361816, + "step": 135, + "valid_targets_mean": 3504.9, + "valid_targets_min": 673 + }, + { + "epoch": 0.22257551669316375, + "grad_norm": 0.5687526210815153, + "learning_rate": 1.2607709750566895e-05, + "loss": 0.2802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2677372694015503, + "step": 140, + "valid_targets_mean": 3854.4, + "valid_targets_min": 1329 + }, + { + "epoch": 0.23052464228934816, + "grad_norm": 0.6758185299669368, + "learning_rate": 1.3061224489795918e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31545472145080566, + "step": 145, + "valid_targets_mean": 3157.5, + "valid_targets_min": 748 + }, + { + "epoch": 0.2384737678855326, + "grad_norm": 0.5061882696731755, + "learning_rate": 1.3514739229024945e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2856583595275879, + "step": 150, + "valid_targets_mean": 4388.9, + "valid_targets_min": 478 + }, + { + "epoch": 0.246422893481717, + "grad_norm": 0.5014286356533292, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2581082582473755, + "step": 155, + "valid_targets_mean": 4534.4, + "valid_targets_min": 891 + }, + { + "epoch": 0.2543720190779014, + "grad_norm": 0.45068416242985176, + "learning_rate": 1.4421768707482994e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21287065744400024, + "step": 160, + "valid_targets_mean": 4851.8, + "valid_targets_min": 1754 + }, + { + "epoch": 0.26232114467408585, + "grad_norm": 0.47658632064664735, + "learning_rate": 1.4875283446712018e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22702300548553467, + "step": 165, + "valid_targets_mean": 4325.5, + "valid_targets_min": 343 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.5277022645179128, + "learning_rate": 1.5328798185941044e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2435617595911026, + "step": 170, + "valid_targets_mean": 3762.1, + "valid_targets_min": 581 + }, + { + "epoch": 0.27821939586645467, + "grad_norm": 0.5760228675809232, + "learning_rate": 1.578231292517007e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2474684715270996, + "step": 175, + "valid_targets_mean": 4084.1, + "valid_targets_min": 740 + }, + { + "epoch": 0.2861685214626391, + "grad_norm": 0.6439319078197573, + "learning_rate": 1.6235827664399097e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3096400797367096, + "step": 180, + "valid_targets_mean": 2994.4, + "valid_targets_min": 273 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.5928535221147084, + "learning_rate": 1.668934240362812e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24331536889076233, + "step": 185, + "valid_targets_mean": 3329.2, + "valid_targets_min": 714 + }, + { + "epoch": 0.302066772655008, + "grad_norm": 1.033627870156792, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2561456561088562, + "step": 190, + "valid_targets_mean": 3033.4, + "valid_targets_min": 683 + }, + { + "epoch": 0.31001589825119236, + "grad_norm": 0.669117146581437, + "learning_rate": 1.759637188208617e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26669400930404663, + "step": 195, + "valid_targets_mean": 3196.8, + "valid_targets_min": 279 + }, + { + "epoch": 0.3179650238473768, + "grad_norm": 0.5158072838218852, + "learning_rate": 1.8049886621315194e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20886510610580444, + "step": 200, + "valid_targets_mean": 3514.2, + "valid_targets_min": 230 + }, + { + "epoch": 0.32591414944356123, + "grad_norm": 0.4417940976227962, + "learning_rate": 1.8503401360544218e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2474132776260376, + "step": 205, + "valid_targets_mean": 5143.9, + "valid_targets_min": 657 + }, + { + "epoch": 0.3338632750397456, + "grad_norm": 0.5669027262046396, + "learning_rate": 1.8956916099773243e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2375866174697876, + "step": 210, + "valid_targets_mean": 4060.5, + "valid_targets_min": 787 + }, + { + "epoch": 0.34181240063593005, + "grad_norm": 0.5058043503905316, + "learning_rate": 1.941043083900227e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21849530935287476, + "step": 215, + "valid_targets_mean": 4479.1, + "valid_targets_min": 871 + }, + { + "epoch": 0.3497615262321145, + "grad_norm": 0.7472969855667505, + "learning_rate": 1.9863945578231295e-05, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22537925839424133, + "step": 220, + "valid_targets_mean": 3716.9, + "valid_targets_min": 1008 + }, + { + "epoch": 0.35771065182829886, + "grad_norm": 0.4763106253774264, + "learning_rate": 2.031746031746032e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22549283504486084, + "step": 225, + "valid_targets_mean": 4696.7, + "valid_targets_min": 1345 + }, + { + "epoch": 0.3656597774244833, + "grad_norm": 0.5870776260460628, + "learning_rate": 2.0770975056689343e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2403266578912735, + "step": 230, + "valid_targets_mean": 3434.6, + "valid_targets_min": 659 + }, + { + "epoch": 0.37360890302066774, + "grad_norm": 0.5224207873951022, + "learning_rate": 2.122448979591837e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20927098393440247, + "step": 235, + "valid_targets_mean": 3761.1, + "valid_targets_min": 233 + }, + { + "epoch": 0.3815580286168522, + "grad_norm": 0.5356608057317997, + "learning_rate": 2.1678004535147395e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22495192289352417, + "step": 240, + "valid_targets_mean": 3884.0, + "valid_targets_min": 976 + }, + { + "epoch": 0.38950715421303655, + "grad_norm": 0.5368634114039467, + "learning_rate": 2.213151927437642e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.202118381857872, + "step": 245, + "valid_targets_mean": 4109.8, + "valid_targets_min": 366 + }, + { + "epoch": 0.397456279809221, + "grad_norm": 0.5223166530343325, + "learning_rate": 2.2585034013605444e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2209833413362503, + "step": 250, + "valid_targets_mean": 4501.5, + "valid_targets_min": 589 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.6182508332886121, + "learning_rate": 2.3038548752834472e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25408124923706055, + "step": 255, + "valid_targets_mean": 3629.1, + "valid_targets_min": 601 + }, + { + "epoch": 0.4133545310015898, + "grad_norm": 0.5912718878361719, + "learning_rate": 2.3492063492063496e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24555028975009918, + "step": 260, + "valid_targets_mean": 3606.7, + "valid_targets_min": 327 + }, + { + "epoch": 0.42130365659777425, + "grad_norm": 0.5750322104699228, + "learning_rate": 2.394557823129252e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21299511194229126, + "step": 265, + "valid_targets_mean": 3439.4, + "valid_targets_min": 314 + }, + { + "epoch": 0.4292527821939587, + "grad_norm": 0.706039350107576, + "learning_rate": 2.439909297052154e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22689072787761688, + "step": 270, + "valid_targets_mean": 2713.6, + "valid_targets_min": 614 + }, + { + "epoch": 0.43720190779014306, + "grad_norm": 0.4645382324491832, + "learning_rate": 2.4852607709750566e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.186517596244812, + "step": 275, + "valid_targets_mean": 4288.1, + "valid_targets_min": 1067 + }, + { + "epoch": 0.4451510333863275, + "grad_norm": 0.5461367695964425, + "learning_rate": 2.5306122448979597e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21831746399402618, + "step": 280, + "valid_targets_mean": 3579.9, + "valid_targets_min": 598 + }, + { + "epoch": 0.45310015898251194, + "grad_norm": 0.4657927124996843, + "learning_rate": 2.5759637188208618e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1893748790025711, + "step": 285, + "valid_targets_mean": 4740.9, + "valid_targets_min": 1740 + }, + { + "epoch": 0.4610492845786963, + "grad_norm": 0.6348763363997716, + "learning_rate": 2.6213151927437642e-05, + "loss": 0.2334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19953399896621704, + "step": 290, + "valid_targets_mean": 2926.6, + "valid_targets_min": 542 + }, + { + "epoch": 0.46899841017488075, + "grad_norm": 0.625040649676204, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24280428886413574, + "step": 295, + "valid_targets_mean": 3343.4, + "valid_targets_min": 699 + }, + { + "epoch": 0.4769475357710652, + "grad_norm": 0.6020091340791197, + "learning_rate": 2.7120181405895694e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2328951060771942, + "step": 300, + "valid_targets_mean": 3270.6, + "valid_targets_min": 349 + }, + { + "epoch": 0.4848966613672496, + "grad_norm": 0.5774743472055955, + "learning_rate": 2.757369614512472e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18854255974292755, + "step": 305, + "valid_targets_mean": 3489.1, + "valid_targets_min": 670 + }, + { + "epoch": 0.492845786963434, + "grad_norm": 0.5002918848960409, + "learning_rate": 2.8027210884353743e-05, + "loss": 0.1993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20658744871616364, + "step": 310, + "valid_targets_mean": 4087.5, + "valid_targets_min": 646 + }, + { + "epoch": 0.5007949125596184, + "grad_norm": 0.5258071361084128, + "learning_rate": 2.8480725623582767e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26666340231895447, + "step": 315, + "valid_targets_mean": 4920.9, + "valid_targets_min": 711 + }, + { + "epoch": 0.5087440381558028, + "grad_norm": 0.5359657194543509, + "learning_rate": 2.893424036281179e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2450794130563736, + "step": 320, + "valid_targets_mean": 4277.5, + "valid_targets_min": 811 + }, + { + "epoch": 0.5166931637519873, + "grad_norm": 0.47155499713480653, + "learning_rate": 2.938775510204082e-05, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22731101512908936, + "step": 325, + "valid_targets_mean": 4231.8, + "valid_targets_min": 649 + }, + { + "epoch": 0.5246422893481717, + "grad_norm": 0.5626140591094698, + "learning_rate": 2.9841269841269844e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24118876457214355, + "step": 330, + "valid_targets_mean": 3969.8, + "valid_targets_min": 614 + }, + { + "epoch": 0.5325914149443561, + "grad_norm": 0.4432872807332565, + "learning_rate": 3.0294784580498868e-05, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19566431641578674, + "step": 335, + "valid_targets_mean": 4741.9, + "valid_targets_min": 601 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.938740381890964, + "learning_rate": 3.074829931972789e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24197694659233093, + "step": 340, + "valid_targets_mean": 3894.6, + "valid_targets_min": 533 + }, + { + "epoch": 0.548489666136725, + "grad_norm": 0.5053259602931793, + "learning_rate": 3.1201814058956924e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2000350058078766, + "step": 345, + "valid_targets_mean": 4115.1, + "valid_targets_min": 639 + }, + { + "epoch": 0.5564387917329093, + "grad_norm": 0.5110974452473003, + "learning_rate": 3.1655328798185945e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20815539360046387, + "step": 350, + "valid_targets_mean": 4070.4, + "valid_targets_min": 1042 + }, + { + "epoch": 0.5643879173290938, + "grad_norm": 0.5374530089049312, + "learning_rate": 3.2108843537414965e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19174043834209442, + "step": 355, + "valid_targets_mean": 3816.1, + "valid_targets_min": 481 + }, + { + "epoch": 0.5723370429252782, + "grad_norm": 0.5278680977737629, + "learning_rate": 3.256235827664399e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19281309843063354, + "step": 360, + "valid_targets_mean": 4304.8, + "valid_targets_min": 1126 + }, + { + "epoch": 0.5802861685214626, + "grad_norm": 0.5556493209390555, + "learning_rate": 3.3015873015873014e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20964595675468445, + "step": 365, + "valid_targets_mean": 3496.3, + "valid_targets_min": 249 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.5099107198036154, + "learning_rate": 3.346938775510204e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1972021907567978, + "step": 370, + "valid_targets_mean": 4024.8, + "valid_targets_min": 644 + }, + { + "epoch": 0.5961844197138315, + "grad_norm": 0.5259960559393442, + "learning_rate": 3.392290249433107e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2222093790769577, + "step": 375, + "valid_targets_mean": 3776.1, + "valid_targets_min": 872 + }, + { + "epoch": 0.604133545310016, + "grad_norm": 0.5644984822926026, + "learning_rate": 3.437641723356009e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1983184814453125, + "step": 380, + "valid_targets_mean": 3157.5, + "valid_targets_min": 273 + }, + { + "epoch": 0.6120826709062003, + "grad_norm": 0.526845185443891, + "learning_rate": 3.482993197278912e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21050560474395752, + "step": 385, + "valid_targets_mean": 4162.3, + "valid_targets_min": 257 + }, + { + "epoch": 0.6200317965023847, + "grad_norm": 0.5111567928477996, + "learning_rate": 3.5283446712018146e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2091737687587738, + "step": 390, + "valid_targets_mean": 4373.3, + "valid_targets_min": 1257 + }, + { + "epoch": 0.6279809220985691, + "grad_norm": 0.4792741938563976, + "learning_rate": 3.573696145124717e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21705269813537598, + "step": 395, + "valid_targets_mean": 4804.3, + "valid_targets_min": 1353 + }, + { + "epoch": 0.6359300476947536, + "grad_norm": 0.5918892454757362, + "learning_rate": 3.6190476190476195e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2312270998954773, + "step": 400, + "valid_targets_mean": 3560.3, + "valid_targets_min": 1152 + }, + { + "epoch": 0.643879173290938, + "grad_norm": 0.5068901074600425, + "learning_rate": 3.6643990929705216e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908341348171234, + "step": 405, + "valid_targets_mean": 3449.1, + "valid_targets_min": 261 + }, + { + "epoch": 0.6518282988871225, + "grad_norm": 0.5793934181213, + "learning_rate": 3.7097505668934243e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1862396001815796, + "step": 410, + "valid_targets_mean": 2984.0, + "valid_targets_min": 636 + }, + { + "epoch": 0.6597774244833068, + "grad_norm": 0.5690118134642445, + "learning_rate": 3.755102040816327e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23144859075546265, + "step": 415, + "valid_targets_mean": 4501.0, + "valid_targets_min": 1604 + }, + { + "epoch": 0.6677265500794912, + "grad_norm": 0.5776584245877077, + "learning_rate": 3.800453514739229e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21334588527679443, + "step": 420, + "valid_targets_mean": 3043.1, + "valid_targets_min": 277 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.4713962722172494, + "learning_rate": 3.845804988662132e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.193765789270401, + "step": 425, + "valid_targets_mean": 4792.8, + "valid_targets_min": 1292 + }, + { + "epoch": 0.6836248012718601, + "grad_norm": 0.43246949767784015, + "learning_rate": 3.891156462585034e-05, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1918296366930008, + "step": 430, + "valid_targets_mean": 5232.2, + "valid_targets_min": 818 + }, + { + "epoch": 0.6915739268680445, + "grad_norm": 0.6617748262724793, + "learning_rate": 3.936507936507937e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21801866590976715, + "step": 435, + "valid_targets_mean": 2876.1, + "valid_targets_min": 1104 + }, + { + "epoch": 0.699523052464229, + "grad_norm": 0.5970309098523798, + "learning_rate": 3.9818594104308396e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21752095222473145, + "step": 440, + "valid_targets_mean": 3932.8, + "valid_targets_min": 813 + }, + { + "epoch": 0.7074721780604134, + "grad_norm": 0.5161262404357317, + "learning_rate": 3.999994341346418e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.221147358417511, + "step": 445, + "valid_targets_mean": 3660.2, + "valid_targets_min": 736 + }, + { + "epoch": 0.7154213036565977, + "grad_norm": 0.5628100325252552, + "learning_rate": 3.999959760801596e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509196400642395, + "step": 450, + "valid_targets_mean": 3745.4, + "valid_targets_min": 668 + }, + { + "epoch": 0.7233704292527822, + "grad_norm": 0.4798788268427838, + "learning_rate": 3.999893743951281e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.249254047870636, + "step": 455, + "valid_targets_mean": 4542.9, + "valid_targets_min": 675 + }, + { + "epoch": 0.7313195548489666, + "grad_norm": 0.5152240231525378, + "learning_rate": 3.9997962918331554e-05, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19418257474899292, + "step": 460, + "valid_targets_mean": 3978.0, + "valid_targets_min": 678 + }, + { + "epoch": 0.739268680445151, + "grad_norm": 0.5512712084043713, + "learning_rate": 3.999667405979019e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20612534880638123, + "step": 465, + "valid_targets_mean": 2988.6, + "valid_targets_min": 618 + }, + { + "epoch": 0.7472178060413355, + "grad_norm": 0.7851942416001637, + "learning_rate": 3.9995070884147604e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22632327675819397, + "step": 470, + "valid_targets_mean": 3359.4, + "valid_targets_min": 968 + }, + { + "epoch": 0.7551669316375199, + "grad_norm": 0.5537518840736152, + "learning_rate": 3.999315341660325e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21473893523216248, + "step": 475, + "valid_targets_mean": 3244.5, + "valid_targets_min": 581 + }, + { + "epoch": 0.7631160572337043, + "grad_norm": 0.5088365488324662, + "learning_rate": 3.9990921687296785e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21233266592025757, + "step": 480, + "valid_targets_mean": 4417.6, + "valid_targets_min": 673 + }, + { + "epoch": 0.7710651828298887, + "grad_norm": 0.5591275224242683, + "learning_rate": 3.998837573130758e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23937103152275085, + "step": 485, + "valid_targets_mean": 3575.7, + "valid_targets_min": 534 + }, + { + "epoch": 0.7790143084260731, + "grad_norm": 0.41500354568791326, + "learning_rate": 3.9985515588654166e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19625945389270782, + "step": 490, + "valid_targets_mean": 4809.2, + "valid_targets_min": 2182 + }, + { + "epoch": 0.7869634340222575, + "grad_norm": 0.5197016278046811, + "learning_rate": 3.99823413042936e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16841718554496765, + "step": 495, + "valid_targets_mean": 3910.1, + "valid_targets_min": 1124 + }, + { + "epoch": 0.794912559618442, + "grad_norm": 0.42605527843070085, + "learning_rate": 3.997885292812078e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21589577198028564, + "step": 500, + "valid_targets_mean": 5083.4, + "valid_targets_min": 631 + }, + { + "epoch": 0.8028616852146264, + "grad_norm": 0.6563615392059224, + "learning_rate": 3.997505051496764e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.269468754529953, + "step": 505, + "valid_targets_mean": 3249.7, + "valid_targets_min": 612 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.511370927432411, + "learning_rate": 3.997093412460229e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23530468344688416, + "step": 510, + "valid_targets_mean": 4039.5, + "valid_targets_min": 671 + }, + { + "epoch": 0.8187599364069952, + "grad_norm": 0.46207600265174703, + "learning_rate": 3.9966503821728074e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18872712552547455, + "step": 515, + "valid_targets_mean": 3918.5, + "valid_targets_min": 911 + }, + { + "epoch": 0.8267090620031796, + "grad_norm": 0.518466058860564, + "learning_rate": 3.996175967598258e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.258210152387619, + "step": 520, + "valid_targets_mean": 3838.6, + "valid_targets_min": 509 + }, + { + "epoch": 0.834658187599364, + "grad_norm": 0.5496770684162091, + "learning_rate": 3.995670176193651e-05, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1955585479736328, + "step": 525, + "valid_targets_mean": 3015.4, + "valid_targets_min": 601 + }, + { + "epoch": 0.8426073131955485, + "grad_norm": 0.5141119419529661, + "learning_rate": 3.9951330159092554e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1896553933620453, + "step": 530, + "valid_targets_mean": 3632.0, + "valid_targets_min": 235 + }, + { + "epoch": 0.8505564387917329, + "grad_norm": 0.6019780534491145, + "learning_rate": 3.994564495188405e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19973322749137878, + "step": 535, + "valid_targets_mean": 3989.1, + "valid_targets_min": 545 + }, + { + "epoch": 0.8585055643879174, + "grad_norm": 0.43954090925747025, + "learning_rate": 3.9939646229673775e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1840040683746338, + "step": 540, + "valid_targets_mean": 4522.1, + "valid_targets_min": 1633 + }, + { + "epoch": 0.8664546899841018, + "grad_norm": 0.5977708417760155, + "learning_rate": 3.993333408675244e-05, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20497910678386688, + "step": 545, + "valid_targets_mean": 2984.9, + "valid_targets_min": 656 + }, + { + "epoch": 0.8744038155802861, + "grad_norm": 0.4757849711588045, + "learning_rate": 3.9926708622337285e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24841903150081635, + "step": 550, + "valid_targets_mean": 4137.8, + "valid_targets_min": 556 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.4822074195433229, + "learning_rate": 3.991976994057046e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1875840723514557, + "step": 555, + "valid_targets_mean": 4260.8, + "valid_targets_min": 1101 + }, + { + "epoch": 0.890302066772655, + "grad_norm": 0.5842684547912087, + "learning_rate": 3.991251815051741e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19977149367332458, + "step": 560, + "valid_targets_mean": 2972.9, + "valid_targets_min": 534 + }, + { + "epoch": 0.8982511923688394, + "grad_norm": 0.652464883044752, + "learning_rate": 3.990495336616519e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20372867584228516, + "step": 565, + "valid_targets_mean": 3338.4, + "valid_targets_min": 622 + }, + { + "epoch": 0.9062003179650239, + "grad_norm": 0.6216148839044923, + "learning_rate": 3.989707570642062e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19672566652297974, + "step": 570, + "valid_targets_mean": 3842.1, + "valid_targets_min": 1334 + }, + { + "epoch": 0.9141494435612083, + "grad_norm": 0.5103176135370198, + "learning_rate": 3.988888529510844e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20896919071674347, + "step": 575, + "valid_targets_mean": 3890.2, + "valid_targets_min": 679 + }, + { + "epoch": 0.9220985691573926, + "grad_norm": 0.5595134395559079, + "learning_rate": 3.988038226096939e-05, + "loss": 0.2112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2230646163225174, + "step": 580, + "valid_targets_mean": 3206.8, + "valid_targets_min": 255 + }, + { + "epoch": 0.9300476947535771, + "grad_norm": 0.46172488073234735, + "learning_rate": 3.9871566737658144e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17313021421432495, + "step": 585, + "valid_targets_mean": 3876.9, + "valid_targets_min": 734 + }, + { + "epoch": 0.9379968203497615, + "grad_norm": 0.4500918322083041, + "learning_rate": 3.986243886374124e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1883338987827301, + "step": 590, + "valid_targets_mean": 4360.1, + "valid_targets_min": 729 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.46535487326329783, + "learning_rate": 3.985299878269486e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.238117516040802, + "step": 595, + "valid_targets_mean": 4255.5, + "valid_targets_min": 1384 + }, + { + "epoch": 0.9538950715421304, + "grad_norm": 0.49612437665460657, + "learning_rate": 3.9843246642902646e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22181783616542816, + "step": 600, + "valid_targets_mean": 3831.8, + "valid_targets_min": 305 + }, + { + "epoch": 0.9618441971383148, + "grad_norm": 0.48706304600729844, + "learning_rate": 3.98331825976533e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1820496767759323, + "step": 605, + "valid_targets_mean": 3962.2, + "valid_targets_min": 960 + }, + { + "epoch": 0.9697933227344993, + "grad_norm": 0.4308975740551794, + "learning_rate": 3.98228068051382e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21952344477176666, + "step": 610, + "valid_targets_mean": 5032.3, + "valid_targets_min": 2134 + }, + { + "epoch": 0.9777424483306836, + "grad_norm": 0.5474648098644498, + "learning_rate": 3.9812119428448926e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20172721147537231, + "step": 615, + "valid_targets_mean": 3203.3, + "valid_targets_min": 588 + }, + { + "epoch": 0.985691573926868, + "grad_norm": 0.4453809222709813, + "learning_rate": 3.9801120635574664e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1718732863664627, + "step": 620, + "valid_targets_mean": 3914.3, + "valid_targets_min": 560 + }, + { + "epoch": 0.9936406995230525, + "grad_norm": 0.5334825618659923, + "learning_rate": 3.978981059939961e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20379891991615295, + "step": 625, + "valid_targets_mean": 3863.4, + "valid_targets_min": 619 + }, + { + "epoch": 1.0015898251192368, + "grad_norm": 0.5529835485096336, + "learning_rate": 3.977818949770022e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2112482190132141, + "step": 630, + "valid_targets_mean": 3267.1, + "valid_targets_min": 308 + }, + { + "epoch": 1.0095389507154213, + "grad_norm": 0.5122916288062801, + "learning_rate": 3.976625751314241e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2054670751094818, + "step": 635, + "valid_targets_mean": 4767.6, + "valid_targets_min": 908 + }, + { + "epoch": 1.0174880763116056, + "grad_norm": 0.5068393688463881, + "learning_rate": 3.975401483327871e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1859699785709381, + "step": 640, + "valid_targets_mean": 3215.8, + "valid_targets_min": 691 + }, + { + "epoch": 1.0254372019077902, + "grad_norm": 0.4145850767881214, + "learning_rate": 3.974146165054532e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2187093198299408, + "step": 645, + "valid_targets_mean": 4994.4, + "valid_targets_min": 1438 + }, + { + "epoch": 1.0333863275039745, + "grad_norm": 0.4582537603183787, + "learning_rate": 3.972859816225904e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1593848317861557, + "step": 650, + "valid_targets_mean": 3891.4, + "valid_targets_min": 297 + }, + { + "epoch": 1.041335453100159, + "grad_norm": 0.4567275257490204, + "learning_rate": 3.97154245706142e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18935450911521912, + "step": 655, + "valid_targets_mean": 4475.3, + "valid_targets_min": 560 + }, + { + "epoch": 1.0492845786963434, + "grad_norm": 0.5140859045337463, + "learning_rate": 3.970194108267952e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18552550673484802, + "step": 660, + "valid_targets_mean": 3475.7, + "valid_targets_min": 240 + }, + { + "epoch": 1.0572337042925277, + "grad_norm": 0.40778078731763423, + "learning_rate": 3.968814791039477e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18175040185451508, + "step": 665, + "valid_targets_mean": 4828.3, + "valid_targets_min": 781 + }, + { + "epoch": 1.0651828298887123, + "grad_norm": 0.5263082294572751, + "learning_rate": 3.967404527056751e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25833356380462646, + "step": 670, + "valid_targets_mean": 4129.8, + "valid_targets_min": 604 + }, + { + "epoch": 1.0731319554848966, + "grad_norm": 0.4960768768163948, + "learning_rate": 3.9659633384869626e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17353063821792603, + "step": 675, + "valid_targets_mean": 4446.4, + "valid_targets_min": 1213 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 0.48100083595612203, + "learning_rate": 3.964491247983392e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1870320737361908, + "step": 680, + "valid_targets_mean": 3895.7, + "valid_targets_min": 605 + }, + { + "epoch": 1.0890302066772655, + "grad_norm": 0.61106090311707, + "learning_rate": 3.962988278685047e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19367003440856934, + "step": 685, + "valid_targets_mean": 3629.8, + "valid_targets_min": 369 + }, + { + "epoch": 1.09697933227345, + "grad_norm": 0.4365754632817891, + "learning_rate": 3.961454454216305e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19389833509922028, + "step": 690, + "valid_targets_mean": 4258.8, + "valid_targets_min": 197 + }, + { + "epoch": 1.1049284578696343, + "grad_norm": 0.5262716250941029, + "learning_rate": 3.9598897986865364e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19017469882965088, + "step": 695, + "valid_targets_mean": 3231.6, + "valid_targets_min": 628 + }, + { + "epoch": 1.1128775834658187, + "grad_norm": 0.49861602576948466, + "learning_rate": 3.9582943366897316e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1815868318080902, + "step": 700, + "valid_targets_mean": 3687.8, + "valid_targets_min": 510 + }, + { + "epoch": 1.1208267090620032, + "grad_norm": 0.48175933314623326, + "learning_rate": 3.956668093304112e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1780034601688385, + "step": 705, + "valid_targets_mean": 4006.3, + "valid_targets_min": 824 + }, + { + "epoch": 1.1287758346581875, + "grad_norm": 0.5179802897696733, + "learning_rate": 3.9550110940917313e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20583273470401764, + "step": 710, + "valid_targets_mean": 3334.9, + "valid_targets_min": 598 + }, + { + "epoch": 1.136724960254372, + "grad_norm": 0.4267427875463337, + "learning_rate": 3.953323365098082e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18678268790245056, + "step": 715, + "valid_targets_mean": 4681.6, + "valid_targets_min": 1042 + }, + { + "epoch": 1.1446740858505564, + "grad_norm": 0.4891452481904024, + "learning_rate": 3.9516049328516795e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19785034656524658, + "step": 720, + "valid_targets_mean": 4126.4, + "valid_targets_min": 524 + }, + { + "epoch": 1.1526232114467407, + "grad_norm": 0.5279349481342677, + "learning_rate": 3.949855824363647e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19800621271133423, + "step": 725, + "valid_targets_mean": 3239.9, + "valid_targets_min": 846 + }, + { + "epoch": 1.1605723370429253, + "grad_norm": 0.8095806249907248, + "learning_rate": 3.948076067127294e-05, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17679157853126526, + "step": 730, + "valid_targets_mean": 3760.4, + "valid_targets_min": 352 + }, + { + "epoch": 1.1685214626391096, + "grad_norm": 0.7153084766269169, + "learning_rate": 3.946265689117677e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1971210539340973, + "step": 735, + "valid_targets_mean": 2638.4, + "valid_targets_min": 787 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.4535582899393148, + "learning_rate": 3.944424718791169e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16410158574581146, + "step": 740, + "valid_targets_mean": 3738.6, + "valid_targets_min": 1269 + }, + { + "epoch": 1.1844197138314785, + "grad_norm": 0.47691889948518285, + "learning_rate": 3.942553185085003e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19451865553855896, + "step": 745, + "valid_targets_mean": 3723.2, + "valid_targets_min": 502 + }, + { + "epoch": 1.192368839427663, + "grad_norm": 0.4783725873079702, + "learning_rate": 3.940651117416824e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17500782012939453, + "step": 750, + "valid_targets_mean": 3733.9, + "valid_targets_min": 375 + }, + { + "epoch": 1.2003179650238474, + "grad_norm": 0.45013056800116047, + "learning_rate": 3.9387185456842247e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1873418688774109, + "step": 755, + "valid_targets_mean": 4532.6, + "valid_targets_min": 649 + }, + { + "epoch": 1.2082670906200317, + "grad_norm": 0.4513636003405965, + "learning_rate": 3.936755500264274e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19119738042354584, + "step": 760, + "valid_targets_mean": 3633.2, + "valid_targets_min": 614 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 0.43039183768805145, + "learning_rate": 3.9347620120130384e-05, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19387167692184448, + "step": 765, + "valid_targets_mean": 5013.5, + "valid_targets_min": 671 + }, + { + "epoch": 1.2241653418124006, + "grad_norm": 0.5545872512419363, + "learning_rate": 3.932738112265103e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1966329962015152, + "step": 770, + "valid_targets_mean": 3744.9, + "valid_targets_min": 295 + }, + { + "epoch": 1.232114467408585, + "grad_norm": 0.44088445810305865, + "learning_rate": 3.930683832833073e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1997533142566681, + "step": 775, + "valid_targets_mean": 4225.8, + "valid_targets_min": 1304 + }, + { + "epoch": 1.2400635930047694, + "grad_norm": 0.5628393929224067, + "learning_rate": 3.928599206007076e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16368547081947327, + "step": 780, + "valid_targets_mean": 4202.9, + "valid_targets_min": 245 + }, + { + "epoch": 1.248012718600954, + "grad_norm": 0.5044797716972268, + "learning_rate": 3.926484264554253e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2036869078874588, + "step": 785, + "valid_targets_mean": 3405.1, + "valid_targets_min": 656 + }, + { + "epoch": 1.2559618441971383, + "grad_norm": 0.5129578399604017, + "learning_rate": 3.924339041718247e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19128306210041046, + "step": 790, + "valid_targets_mean": 3456.4, + "valid_targets_min": 658 + }, + { + "epoch": 1.2639109697933226, + "grad_norm": 0.563159848952125, + "learning_rate": 3.922163571218676e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2012992799282074, + "step": 795, + "valid_targets_mean": 3187.4, + "valid_targets_min": 987 + }, + { + "epoch": 1.2718600953895072, + "grad_norm": 0.6173273492357961, + "learning_rate": 3.919957887250606e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22333335876464844, + "step": 800, + "valid_targets_mean": 2847.9, + "valid_targets_min": 630 + }, + { + "epoch": 1.2798092209856915, + "grad_norm": 0.44827689109710595, + "learning_rate": 3.917722024484011e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18572327494621277, + "step": 805, + "valid_targets_mean": 4246.1, + "valid_targets_min": 621 + }, + { + "epoch": 1.287758346581876, + "grad_norm": 0.6751673561517912, + "learning_rate": 3.915456018063232e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20056423544883728, + "step": 810, + "valid_targets_mean": 2438.5, + "valid_targets_min": 225 + }, + { + "epoch": 1.2957074721780604, + "grad_norm": 0.7720325184700507, + "learning_rate": 3.9131599036064204e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1767432987689972, + "step": 815, + "valid_targets_mean": 3982.4, + "valid_targets_min": 669 + }, + { + "epoch": 1.303656597774245, + "grad_norm": 0.4366411337881234, + "learning_rate": 3.9108337172049794e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21016202867031097, + "step": 820, + "valid_targets_mean": 4805.6, + "valid_targets_min": 1425 + }, + { + "epoch": 1.3116057233704292, + "grad_norm": 0.5069327068809399, + "learning_rate": 3.908477495422998e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19924074411392212, + "step": 825, + "valid_targets_mean": 3997.4, + "valid_targets_min": 286 + }, + { + "epoch": 1.3195548489666136, + "grad_norm": 0.445873116262987, + "learning_rate": 3.906091275296676e-05, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17496737837791443, + "step": 830, + "valid_targets_mean": 4272.1, + "valid_targets_min": 1011 + }, + { + "epoch": 1.3275039745627981, + "grad_norm": 0.9206148708023212, + "learning_rate": 3.903675094333739e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2181253880262375, + "step": 835, + "valid_targets_mean": 3913.7, + "valid_targets_min": 702 + }, + { + "epoch": 1.3354531001589824, + "grad_norm": 0.5133359649094603, + "learning_rate": 3.901228990512854e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2114499807357788, + "step": 840, + "valid_targets_mean": 3686.8, + "valid_targets_min": 308 + }, + { + "epoch": 1.343402225755167, + "grad_norm": 0.4696592099080785, + "learning_rate": 3.898753002283027e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.209947407245636, + "step": 845, + "valid_targets_mean": 3526.2, + "valid_targets_min": 619 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.5149647286915602, + "learning_rate": 3.896247168563004e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22973836958408356, + "step": 850, + "valid_targets_mean": 4359.3, + "valid_targets_min": 1080 + }, + { + "epoch": 1.3593004769475359, + "grad_norm": 0.45847358766247315, + "learning_rate": 3.8937115287406524e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20062896609306335, + "step": 855, + "valid_targets_mean": 4928.5, + "valid_targets_min": 434 + }, + { + "epoch": 1.3672496025437202, + "grad_norm": 0.5460254266383082, + "learning_rate": 3.891146122672349e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20617523789405823, + "step": 860, + "valid_targets_mean": 3505.1, + "valid_targets_min": 904 + }, + { + "epoch": 1.3751987281399045, + "grad_norm": 0.5461178071898645, + "learning_rate": 3.8885509906823496e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19545643031597137, + "step": 865, + "valid_targets_mean": 3159.5, + "valid_targets_min": 1402 + }, + { + "epoch": 1.383147853736089, + "grad_norm": 0.4558363745882089, + "learning_rate": 3.885926173562157e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18399888277053833, + "step": 870, + "valid_targets_mean": 3677.1, + "valid_targets_min": 309 + }, + { + "epoch": 1.3910969793322734, + "grad_norm": 0.3899280590888697, + "learning_rate": 3.883271712569875e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20818111300468445, + "step": 875, + "valid_targets_mean": 5354.6, + "valid_targets_min": 1826 + }, + { + "epoch": 1.399046104928458, + "grad_norm": 0.4639547086845976, + "learning_rate": 3.8805876494295694e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17379283905029297, + "step": 880, + "valid_targets_mean": 4342.3, + "valid_targets_min": 735 + }, + { + "epoch": 1.4069952305246423, + "grad_norm": 0.39181296096344126, + "learning_rate": 3.877874026330602e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15908557176589966, + "step": 885, + "valid_targets_mean": 4897.9, + "valid_targets_min": 592 + }, + { + "epoch": 1.4149443561208268, + "grad_norm": 0.4132604124999311, + "learning_rate": 3.875130885926973e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18337678909301758, + "step": 890, + "valid_targets_mean": 4757.7, + "valid_targets_min": 330 + }, + { + "epoch": 1.4228934817170111, + "grad_norm": 0.5105514719522013, + "learning_rate": 3.872358271336651e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19865591824054718, + "step": 895, + "valid_targets_mean": 3313.8, + "valid_targets_min": 487 + }, + { + "epoch": 1.4308426073131955, + "grad_norm": 0.5235070348886672, + "learning_rate": 3.8695562261408915e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919848918914795, + "step": 900, + "valid_targets_mean": 2859.2, + "valid_targets_min": 549 + }, + { + "epoch": 1.43879173290938, + "grad_norm": 0.47312190085140265, + "learning_rate": 3.8667247943835555e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20830847322940826, + "step": 905, + "valid_targets_mean": 4533.8, + "valid_targets_min": 727 + }, + { + "epoch": 1.4467408585055643, + "grad_norm": 0.5175775434248544, + "learning_rate": 3.863864020570414e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19121363759040833, + "step": 910, + "valid_targets_mean": 3036.8, + "valid_targets_min": 699 + }, + { + "epoch": 1.4546899841017489, + "grad_norm": 0.5781223268494494, + "learning_rate": 3.860973949668454e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20266948640346527, + "step": 915, + "valid_targets_mean": 2671.8, + "valid_targets_min": 254 + }, + { + "epoch": 1.4626391096979332, + "grad_norm": 0.4567922909167134, + "learning_rate": 3.8580546271051634e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18493634462356567, + "step": 920, + "valid_targets_mean": 4128.1, + "valid_targets_min": 282 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.47418699448382096, + "learning_rate": 3.8551060987678236e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18517082929611206, + "step": 925, + "valid_targets_mean": 3606.5, + "valid_targets_min": 694 + }, + { + "epoch": 1.478537360890302, + "grad_norm": 0.4220236091213908, + "learning_rate": 3.852128411002787e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19654536247253418, + "step": 930, + "valid_targets_mean": 5133.6, + "valid_targets_min": 670 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.4573542025228096, + "learning_rate": 3.849121610614745e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21628251671791077, + "step": 935, + "valid_targets_mean": 4146.7, + "valid_targets_min": 590 + }, + { + "epoch": 1.494435612082671, + "grad_norm": 0.455624363723731, + "learning_rate": 3.8460857448659975e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16024024784564972, + "step": 940, + "valid_targets_mean": 3990.5, + "valid_targets_min": 534 + }, + { + "epoch": 1.5023847376788553, + "grad_norm": 0.8397075238280883, + "learning_rate": 3.8430208614757044e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19728557765483856, + "step": 945, + "valid_targets_mean": 3193.9, + "valid_targets_min": 1069 + }, + { + "epoch": 1.5103338632750396, + "grad_norm": 0.6455640246915216, + "learning_rate": 3.8399270086191425e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18609043955802917, + "step": 950, + "valid_targets_mean": 3671.4, + "valid_targets_min": 598 + }, + { + "epoch": 1.5182829888712241, + "grad_norm": 0.4393679011147717, + "learning_rate": 3.8368042349269405e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18841350078582764, + "step": 955, + "valid_targets_mean": 4189.6, + "valid_targets_min": 1160 + }, + { + "epoch": 1.5262321144674087, + "grad_norm": 0.4748654125112523, + "learning_rate": 3.83365258948432e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18544679880142212, + "step": 960, + "valid_targets_mean": 4059.8, + "valid_targets_min": 537 + }, + { + "epoch": 1.534181240063593, + "grad_norm": 0.44195423356853586, + "learning_rate": 3.830472121830323e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17977693676948547, + "step": 965, + "valid_targets_mean": 4036.9, + "valid_targets_min": 719 + }, + { + "epoch": 1.5421303656597773, + "grad_norm": 0.4190445460085609, + "learning_rate": 3.82726288195703e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15075984597206116, + "step": 970, + "valid_targets_mean": 3788.1, + "valid_targets_min": 919 + }, + { + "epoch": 1.550079491255962, + "grad_norm": 0.38824323345082057, + "learning_rate": 3.824024920308781e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16938742995262146, + "step": 975, + "valid_targets_mean": 4616.4, + "valid_targets_min": 617 + }, + { + "epoch": 1.5580286168521462, + "grad_norm": 0.4803671121375321, + "learning_rate": 3.820758287781374e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21928074955940247, + "step": 980, + "valid_targets_mean": 4322.5, + "valid_targets_min": 693 + }, + { + "epoch": 1.5659777424483305, + "grad_norm": 0.4853782041831188, + "learning_rate": 3.8174630357212714e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17328375577926636, + "step": 985, + "valid_targets_mean": 4317.4, + "valid_targets_min": 690 + }, + { + "epoch": 1.573926868044515, + "grad_norm": 0.47169665339515204, + "learning_rate": 3.8141392159247905e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21782585978507996, + "step": 990, + "valid_targets_mean": 3688.9, + "valid_targets_min": 636 + }, + { + "epoch": 1.5818759936406996, + "grad_norm": 0.4690651486120993, + "learning_rate": 3.81078688063729e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19451123476028442, + "step": 995, + "valid_targets_mean": 3713.2, + "valid_targets_min": 281 + }, + { + "epoch": 1.589825119236884, + "grad_norm": 0.4346450046592069, + "learning_rate": 3.807406082552348e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16455082595348358, + "step": 1000, + "valid_targets_mean": 3809.7, + "valid_targets_min": 260 + }, + { + "epoch": 1.5977742448330683, + "grad_norm": 0.41932684002795517, + "learning_rate": 3.803996874810934e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17046436667442322, + "step": 1005, + "valid_targets_mean": 4024.4, + "valid_targets_min": 273 + }, + { + "epoch": 1.6057233704292528, + "grad_norm": 0.3705151557622996, + "learning_rate": 3.800559311000575e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598929464817047, + "step": 1010, + "valid_targets_mean": 5066.7, + "valid_targets_min": 570 + }, + { + "epoch": 1.6136724960254372, + "grad_norm": 0.43289861621038134, + "learning_rate": 3.7970934451545104e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1886836290359497, + "step": 1015, + "valid_targets_mean": 4769.2, + "valid_targets_min": 651 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.45330830989766385, + "learning_rate": 3.7935993317508455e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18560412526130676, + "step": 1020, + "valid_targets_mean": 4132.1, + "valid_targets_min": 622 + }, + { + "epoch": 1.629570747217806, + "grad_norm": 0.515537997297837, + "learning_rate": 3.790077025711694e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21635811030864716, + "step": 1025, + "valid_targets_mean": 3410.8, + "valid_targets_min": 233 + }, + { + "epoch": 1.6375198728139906, + "grad_norm": 0.4946566085078078, + "learning_rate": 3.786526582402313e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17097623646259308, + "step": 1030, + "valid_targets_mean": 3390.3, + "valid_targets_min": 649 + }, + { + "epoch": 1.645468998410175, + "grad_norm": 0.4669861404848048, + "learning_rate": 3.782948057630236e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19812054932117462, + "step": 1035, + "valid_targets_mean": 3441.6, + "valid_targets_min": 624 + }, + { + "epoch": 1.6534181240063592, + "grad_norm": 0.4939545083568682, + "learning_rate": 3.779341507644394e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17919695377349854, + "step": 1040, + "valid_targets_mean": 3929.4, + "valid_targets_min": 302 + }, + { + "epoch": 1.6613672496025438, + "grad_norm": 0.540991144768767, + "learning_rate": 3.775706989134231e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20103199779987335, + "step": 1045, + "valid_targets_mean": 3738.6, + "valid_targets_min": 709 + }, + { + "epoch": 1.669316375198728, + "grad_norm": 0.4963684293040473, + "learning_rate": 3.772044559228813e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19030243158340454, + "step": 1050, + "valid_targets_mean": 3166.8, + "valid_targets_min": 252 + }, + { + "epoch": 1.6772655007949124, + "grad_norm": 0.501401895351916, + "learning_rate": 3.768354275495933e-05, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1745380461215973, + "step": 1055, + "valid_targets_mean": 3242.2, + "valid_targets_min": 285 + }, + { + "epoch": 1.685214626391097, + "grad_norm": 0.4968401136342654, + "learning_rate": 3.764636195941198e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17797568440437317, + "step": 1060, + "valid_targets_mean": 3188.0, + "valid_targets_min": 262 + }, + { + "epoch": 1.6931637519872815, + "grad_norm": 0.4884093983436614, + "learning_rate": 3.760890379007129e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2129162847995758, + "step": 1065, + "valid_targets_mean": 4418.9, + "valid_targets_min": 523 + }, + { + "epoch": 1.7011128775834659, + "grad_norm": 0.44076053571984003, + "learning_rate": 3.757116883572232e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1850593388080597, + "step": 1070, + "valid_targets_mean": 4093.6, + "valid_targets_min": 821 + }, + { + "epoch": 1.7090620031796502, + "grad_norm": 0.542297894741752, + "learning_rate": 3.753315768950079e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21836097538471222, + "step": 1075, + "valid_targets_mean": 3133.3, + "valid_targets_min": 204 + }, + { + "epoch": 1.7170111287758347, + "grad_norm": 0.5168670041375298, + "learning_rate": 3.74948709488837e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18428263068199158, + "step": 1080, + "valid_targets_mean": 3062.6, + "valid_targets_min": 606 + }, + { + "epoch": 1.724960254372019, + "grad_norm": 0.5653952426228848, + "learning_rate": 3.745630921568004e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17465798556804657, + "step": 1085, + "valid_targets_mean": 2868.4, + "valid_targets_min": 273 + }, + { + "epoch": 1.7329093799682034, + "grad_norm": 0.46856535469042454, + "learning_rate": 3.741747309602117e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20139528810977936, + "step": 1090, + "valid_targets_mean": 4477.7, + "valid_targets_min": 212 + }, + { + "epoch": 1.740858505564388, + "grad_norm": 0.41928118910953777, + "learning_rate": 3.737836320035146e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18881118297576904, + "step": 1095, + "valid_targets_mean": 4088.9, + "valid_targets_min": 1217 + }, + { + "epoch": 1.7488076311605725, + "grad_norm": 0.4251393076302139, + "learning_rate": 3.733898014341858e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18259574472904205, + "step": 1100, + "valid_targets_mean": 4110.7, + "valid_targets_min": 294 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.3931735507262585, + "learning_rate": 3.729932454426391e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18538832664489746, + "step": 1105, + "valid_targets_mean": 5117.2, + "valid_targets_min": 1011 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.4529514349775154, + "learning_rate": 3.725939702621273e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18618038296699524, + "step": 1110, + "valid_targets_mean": 4018.2, + "valid_targets_min": 607 + }, + { + "epoch": 1.7726550079491257, + "grad_norm": 0.4503816594471195, + "learning_rate": 3.72191982168645e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15615426003932953, + "step": 1115, + "valid_targets_mean": 3476.6, + "valid_targets_min": 679 + }, + { + "epoch": 1.78060413354531, + "grad_norm": 0.5088702362995737, + "learning_rate": 3.717872874808298e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21099695563316345, + "step": 1120, + "valid_targets_mean": 3103.6, + "valid_targets_min": 637 + }, + { + "epoch": 1.7885532591414943, + "grad_norm": 0.7999181539319212, + "learning_rate": 3.713798925598623e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19236738979816437, + "step": 1125, + "valid_targets_mean": 3830.4, + "valid_targets_min": 612 + }, + { + "epoch": 1.7965023847376789, + "grad_norm": 0.4307921702238077, + "learning_rate": 3.709698038093671e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16169005632400513, + "step": 1130, + "valid_targets_mean": 3835.4, + "valid_targets_min": 1344 + }, + { + "epoch": 1.8044515103338634, + "grad_norm": 0.485741913256061, + "learning_rate": 3.705570276753116e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18286767601966858, + "step": 1135, + "valid_targets_mean": 3094.0, + "valid_targets_min": 270 + }, + { + "epoch": 1.8124006359300477, + "grad_norm": 0.5255452896790747, + "learning_rate": 3.701415706459044e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19145852327346802, + "step": 1140, + "valid_targets_mean": 3693.2, + "valid_targets_min": 555 + }, + { + "epoch": 1.820349761526232, + "grad_norm": 0.5046926635208234, + "learning_rate": 3.697234392514942e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2077309787273407, + "step": 1145, + "valid_targets_mean": 3055.6, + "valid_targets_min": 607 + }, + { + "epoch": 1.8282988871224166, + "grad_norm": 0.4368221113650411, + "learning_rate": 3.693026400644662e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18555589020252228, + "step": 1150, + "valid_targets_mean": 4117.4, + "valid_targets_min": 279 + }, + { + "epoch": 1.836248012718601, + "grad_norm": 0.4836963066254356, + "learning_rate": 3.6887917969913944e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2165643572807312, + "step": 1155, + "valid_targets_mean": 3562.9, + "valid_targets_min": 525 + }, + { + "epoch": 1.8441971383147853, + "grad_norm": 0.4084280506680083, + "learning_rate": 3.684530648116625e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17716243863105774, + "step": 1160, + "valid_targets_mean": 4411.1, + "valid_targets_min": 245 + }, + { + "epoch": 1.8521462639109698, + "grad_norm": 0.44584904009558374, + "learning_rate": 3.68024302099909e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19152897596359253, + "step": 1165, + "valid_targets_mean": 3953.6, + "valid_targets_min": 450 + }, + { + "epoch": 1.8600953895071544, + "grad_norm": 0.5649727314954088, + "learning_rate": 3.6759289830337246e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756046712398529, + "step": 1170, + "valid_targets_mean": 4100.8, + "valid_targets_min": 1298 + }, + { + "epoch": 1.8680445151033387, + "grad_norm": 0.49736684064600023, + "learning_rate": 3.6715886020306e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16673019528388977, + "step": 1175, + "valid_targets_mean": 3085.4, + "valid_targets_min": 551 + }, + { + "epoch": 1.875993640699523, + "grad_norm": 0.45789796811072, + "learning_rate": 3.6672219462138604e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1682954877614975, + "step": 1180, + "valid_targets_mean": 4273.3, + "valid_targets_min": 299 + }, + { + "epoch": 1.8839427662957076, + "grad_norm": 0.4895932235353001, + "learning_rate": 3.6628290842206495e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19178809225559235, + "step": 1185, + "valid_targets_mean": 3406.9, + "valid_targets_min": 611 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.45298370929710946, + "learning_rate": 3.658410085100034e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21535128355026245, + "step": 1190, + "valid_targets_mean": 4193.9, + "valid_targets_min": 1461 + }, + { + "epoch": 1.8998410174880762, + "grad_norm": 0.44589909041567644, + "learning_rate": 3.6539650183119126e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18020093441009521, + "step": 1195, + "valid_targets_mean": 3717.2, + "valid_targets_min": 651 + }, + { + "epoch": 1.9077901430842608, + "grad_norm": 0.3600027728462642, + "learning_rate": 3.64949395372593e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17888964712619781, + "step": 1200, + "valid_targets_mean": 5276.8, + "valid_targets_min": 1291 + }, + { + "epoch": 1.9157392686804453, + "grad_norm": 0.4993758462348809, + "learning_rate": 3.644996961620378e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17506229877471924, + "step": 1205, + "valid_targets_mean": 3077.0, + "valid_targets_min": 279 + }, + { + "epoch": 1.9236883942766294, + "grad_norm": 0.5207237968380335, + "learning_rate": 3.6404741126810854e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18324558436870575, + "step": 1210, + "valid_targets_mean": 3340.1, + "valid_targets_min": 465 + }, + { + "epoch": 1.931637519872814, + "grad_norm": 0.5043242801398989, + "learning_rate": 3.635925478000315e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19553515315055847, + "step": 1215, + "valid_targets_mean": 3118.6, + "valid_targets_min": 625 + }, + { + "epoch": 1.9395866454689985, + "grad_norm": 0.3968462413963593, + "learning_rate": 3.631351129075638e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19829094409942627, + "step": 1220, + "valid_targets_mean": 4999.4, + "valid_targets_min": 1898 + }, + { + "epoch": 1.9475357710651828, + "grad_norm": 0.5208428823666412, + "learning_rate": 3.6267511378088174e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18420946598052979, + "step": 1225, + "valid_targets_mean": 4461.8, + "valid_targets_min": 735 + }, + { + "epoch": 1.9554848966613672, + "grad_norm": 0.531760446350055, + "learning_rate": 3.622125576504674e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2018202245235443, + "step": 1230, + "valid_targets_mean": 2726.6, + "valid_targets_min": 257 + }, + { + "epoch": 1.9634340222575517, + "grad_norm": 0.5155602824722777, + "learning_rate": 3.6174745178699484e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20093253254890442, + "step": 1235, + "valid_targets_mean": 2971.2, + "valid_targets_min": 542 + }, + { + "epoch": 1.9713831478537363, + "grad_norm": 0.48459979840868433, + "learning_rate": 3.612798035012161e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17393648624420166, + "step": 1240, + "valid_targets_mean": 3234.2, + "valid_targets_min": 269 + }, + { + "epoch": 1.9793322734499204, + "grad_norm": 0.4476136626874852, + "learning_rate": 3.608096201438465e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17231547832489014, + "step": 1245, + "valid_targets_mean": 3629.6, + "valid_targets_min": 559 + }, + { + "epoch": 1.987281399046105, + "grad_norm": 0.430623482890761, + "learning_rate": 3.603369091054484e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1765379011631012, + "step": 1250, + "valid_targets_mean": 3597.4, + "valid_targets_min": 1075 + }, + { + "epoch": 1.9952305246422894, + "grad_norm": 0.436331980026303, + "learning_rate": 3.5986167781631556e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1572694480419159, + "step": 1255, + "valid_targets_mean": 4041.3, + "valid_targets_min": 300 + }, + { + "epoch": 2.0031796502384736, + "grad_norm": 0.43495258070360787, + "learning_rate": 3.5938393374635634e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17657440900802612, + "step": 1260, + "valid_targets_mean": 3687.1, + "valid_targets_min": 768 + }, + { + "epoch": 2.011128775834658, + "grad_norm": 0.44734489727626625, + "learning_rate": 3.589036844049762e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16207647323608398, + "step": 1265, + "valid_targets_mean": 3986.1, + "valid_targets_min": 524 + }, + { + "epoch": 2.0190779014308426, + "grad_norm": 0.46524088697899335, + "learning_rate": 3.584209373409593e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16202375292778015, + "step": 1270, + "valid_targets_mean": 3613.5, + "valid_targets_min": 670 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.45070406893947157, + "learning_rate": 3.579357001423505e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17059147357940674, + "step": 1275, + "valid_targets_mean": 3577.6, + "valid_targets_min": 665 + }, + { + "epoch": 2.0349761526232113, + "grad_norm": 0.4499960165350608, + "learning_rate": 3.5744798043633566e-05, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18004930019378662, + "step": 1280, + "valid_targets_mean": 4093.8, + "valid_targets_min": 566 + }, + { + "epoch": 2.042925278219396, + "grad_norm": 0.5580419519655596, + "learning_rate": 3.569577858891219e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1862042248249054, + "step": 1285, + "valid_targets_mean": 2967.2, + "valid_targets_min": 673 + }, + { + "epoch": 2.0508744038155804, + "grad_norm": 0.4779943776711724, + "learning_rate": 3.56465124205817e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22631016373634338, + "step": 1290, + "valid_targets_mean": 3858.1, + "valid_targets_min": 590 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4911452381940732, + "learning_rate": 3.559700031303082e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16386955976486206, + "step": 1295, + "valid_targets_mean": 3233.1, + "valid_targets_min": 294 + }, + { + "epoch": 2.066772655007949, + "grad_norm": 0.5288525437496359, + "learning_rate": 3.554724304451411e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542258858680725, + "step": 1300, + "valid_targets_mean": 3689.6, + "valid_targets_min": 710 + }, + { + "epoch": 2.0747217806041336, + "grad_norm": 0.6612223596945518, + "learning_rate": 3.549724139713962e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17203554511070251, + "step": 1305, + "valid_targets_mean": 3336.7, + "valid_targets_min": 294 + }, + { + "epoch": 2.082670906200318, + "grad_norm": 0.4373776224124723, + "learning_rate": 3.544699615685671e-05, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14887259900569916, + "step": 1310, + "valid_targets_mean": 4026.8, + "valid_targets_min": 755 + }, + { + "epoch": 2.0906200317965022, + "grad_norm": 0.5037762968017296, + "learning_rate": 3.539650811344363e-05, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1626054346561432, + "step": 1315, + "valid_targets_mean": 3763.6, + "valid_targets_min": 258 + }, + { + "epoch": 2.098569157392687, + "grad_norm": 0.6313496812038843, + "learning_rate": 3.534577806049512e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15829899907112122, + "step": 1320, + "valid_targets_mean": 3636.2, + "valid_targets_min": 682 + }, + { + "epoch": 2.1065182829888713, + "grad_norm": 0.4425905414584769, + "learning_rate": 3.529480679540996e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15318667888641357, + "step": 1325, + "valid_targets_mean": 4196.6, + "valid_targets_min": 583 + }, + { + "epoch": 2.1144674085850554, + "grad_norm": 0.6185724943267303, + "learning_rate": 3.524359511937838e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16072843968868256, + "step": 1330, + "valid_targets_mean": 2547.9, + "valid_targets_min": 263 + }, + { + "epoch": 2.12241653418124, + "grad_norm": 0.48298978637493106, + "learning_rate": 3.5192143837369523e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19100898504257202, + "step": 1335, + "valid_targets_mean": 3434.8, + "valid_targets_min": 296 + }, + { + "epoch": 2.1303656597774245, + "grad_norm": 0.5247741664837559, + "learning_rate": 3.514045375811878e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16440363228321075, + "step": 1340, + "valid_targets_mean": 2886.7, + "valid_targets_min": 201 + }, + { + "epoch": 2.138314785373609, + "grad_norm": 0.4645350321366234, + "learning_rate": 3.508852569411506e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16781508922576904, + "step": 1345, + "valid_targets_mean": 3687.1, + "valid_targets_min": 1028 + }, + { + "epoch": 2.146263910969793, + "grad_norm": 0.4185673015572621, + "learning_rate": 3.503636046158803e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15546417236328125, + "step": 1350, + "valid_targets_mean": 4516.6, + "valid_targets_min": 876 + }, + { + "epoch": 2.1542130365659777, + "grad_norm": 0.49044816100084626, + "learning_rate": 3.498395888049526e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19985926151275635, + "step": 1355, + "valid_targets_mean": 3536.6, + "valid_targets_min": 636 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.4570482087101892, + "learning_rate": 3.4931321774509396e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15912221372127533, + "step": 1360, + "valid_targets_mean": 3607.9, + "valid_targets_min": 299 + }, + { + "epoch": 2.1701112877583464, + "grad_norm": 0.4292180835767507, + "learning_rate": 3.487844997100515e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19553008675575256, + "step": 1365, + "valid_targets_mean": 4204.5, + "valid_targets_min": 307 + }, + { + "epoch": 2.178060413354531, + "grad_norm": 0.524906614575372, + "learning_rate": 3.482534430104633e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.174746572971344, + "step": 1370, + "valid_targets_mean": 3015.8, + "valid_targets_min": 324 + }, + { + "epoch": 2.1860095389507155, + "grad_norm": 0.460503992627472, + "learning_rate": 3.4772005599372764e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17967626452445984, + "step": 1375, + "valid_targets_mean": 3896.6, + "valid_targets_min": 821 + }, + { + "epoch": 2.1939586645469, + "grad_norm": 0.44690523777838326, + "learning_rate": 3.4718434704387174e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1710180938243866, + "step": 1380, + "valid_targets_mean": 4478.8, + "valid_targets_min": 1388 + }, + { + "epoch": 2.201907790143084, + "grad_norm": 0.41765151372647924, + "learning_rate": 3.4664632458142016e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15442225337028503, + "step": 1385, + "valid_targets_mean": 4279.1, + "valid_targets_min": 559 + }, + { + "epoch": 2.2098569157392687, + "grad_norm": 0.434025285839872, + "learning_rate": 3.461059970632622e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14256054162979126, + "step": 1390, + "valid_targets_mean": 3645.9, + "valid_targets_min": 682 + }, + { + "epoch": 2.2178060413354532, + "grad_norm": 0.39810648920310915, + "learning_rate": 3.4556337298251943e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14162832498550415, + "step": 1395, + "valid_targets_mean": 4033.1, + "valid_targets_min": 253 + }, + { + "epoch": 2.2257551669316373, + "grad_norm": 0.5481937999092757, + "learning_rate": 3.450184608684114e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1893032193183899, + "step": 1400, + "valid_targets_mean": 2773.9, + "valid_targets_min": 637 + }, + { + "epoch": 2.233704292527822, + "grad_norm": 0.43591132617223516, + "learning_rate": 3.444712692861224e-05, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12860910594463348, + "step": 1405, + "valid_targets_mean": 3698.6, + "valid_targets_min": 891 + }, + { + "epoch": 2.2416534181240064, + "grad_norm": 0.43573783141415084, + "learning_rate": 3.439218068366663e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1642727553844452, + "step": 1410, + "valid_targets_mean": 3836.3, + "valid_targets_min": 608 + }, + { + "epoch": 2.249602543720191, + "grad_norm": 0.4326278702113856, + "learning_rate": 3.433700821567516e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14071246981620789, + "step": 1415, + "valid_targets_mean": 4719.1, + "valid_targets_min": 244 + }, + { + "epoch": 2.257551669316375, + "grad_norm": 0.5549303616093954, + "learning_rate": 3.428161039186456e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18680042028427124, + "step": 1420, + "valid_targets_mean": 3738.0, + "valid_targets_min": 327 + }, + { + "epoch": 2.2655007949125596, + "grad_norm": 0.8319518010193951, + "learning_rate": 3.42259880830038e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17515277862548828, + "step": 1425, + "valid_targets_mean": 3830.8, + "valid_targets_min": 859 + }, + { + "epoch": 2.273449920508744, + "grad_norm": 0.4319372849950979, + "learning_rate": 3.417014216339043e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18945015966892242, + "step": 1430, + "valid_targets_mean": 4180.6, + "valid_targets_min": 568 + }, + { + "epoch": 2.2813990461049283, + "grad_norm": 0.4645369639888501, + "learning_rate": 3.4114073510836794e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17300865054130554, + "step": 1435, + "valid_targets_mean": 3716.4, + "valid_targets_min": 1337 + }, + { + "epoch": 2.289348171701113, + "grad_norm": 0.4624581509651132, + "learning_rate": 3.4057783006656274e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1803177446126938, + "step": 1440, + "valid_targets_mean": 3995.3, + "valid_targets_min": 609 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.3990685274728258, + "learning_rate": 3.400127153564941e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542663425207138, + "step": 1445, + "valid_targets_mean": 4452.5, + "valid_targets_min": 1190 + }, + { + "epoch": 2.3052464228934815, + "grad_norm": 0.6187952481541217, + "learning_rate": 3.394453998609001e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18409979343414307, + "step": 1450, + "valid_targets_mean": 2894.1, + "valid_targets_min": 281 + }, + { + "epoch": 2.313195548489666, + "grad_norm": 0.49894187881909596, + "learning_rate": 3.388758924971117e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18296250700950623, + "step": 1455, + "valid_targets_mean": 3416.9, + "valid_targets_min": 703 + }, + { + "epoch": 2.3211446740858506, + "grad_norm": 0.5357473334854262, + "learning_rate": 3.3830420221691286e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18557733297348022, + "step": 1460, + "valid_targets_mean": 3307.2, + "valid_targets_min": 591 + }, + { + "epoch": 2.329093799682035, + "grad_norm": 0.3962622061174984, + "learning_rate": 3.377303380063995e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14266249537467957, + "step": 1465, + "valid_targets_mean": 4369.6, + "valid_targets_min": 1371 + }, + { + "epoch": 2.337042925278219, + "grad_norm": 0.3785010810778657, + "learning_rate": 3.371543088858384e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16492611169815063, + "step": 1470, + "valid_targets_mean": 5120.1, + "valid_targets_min": 621 + }, + { + "epoch": 2.3449920508744038, + "grad_norm": 0.48881533304790475, + "learning_rate": 3.365761239095253e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16915087401866913, + "step": 1475, + "valid_targets_mean": 3925.3, + "valid_targets_min": 576 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.48720287373725557, + "learning_rate": 3.3599579216564314e-05, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17547118663787842, + "step": 1480, + "valid_targets_mean": 3980.4, + "valid_targets_min": 706 + }, + { + "epoch": 2.360890302066773, + "grad_norm": 0.5303350453029894, + "learning_rate": 3.354133227761181e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.172575443983078, + "step": 1485, + "valid_targets_mean": 4342.2, + "valid_targets_min": 1065 + }, + { + "epoch": 2.368839427662957, + "grad_norm": 0.5948787016804042, + "learning_rate": 3.3482872489647745e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16050350666046143, + "step": 1490, + "valid_targets_mean": 4073.2, + "valid_targets_min": 279 + }, + { + "epoch": 2.3767885532591415, + "grad_norm": 0.4793825142162827, + "learning_rate": 3.342420077157047e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1763351708650589, + "step": 1495, + "valid_targets_mean": 3483.2, + "valid_targets_min": 643 + }, + { + "epoch": 2.384737678855326, + "grad_norm": 0.4530776431878804, + "learning_rate": 3.336531804560957e-05, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14177027344703674, + "step": 1500, + "valid_targets_mean": 3812.7, + "valid_targets_min": 600 + }, + { + "epoch": 2.39268680445151, + "grad_norm": 0.48697483476860215, + "learning_rate": 3.330622523731136e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15083934366703033, + "step": 1505, + "valid_targets_mean": 2873.0, + "valid_targets_min": 224 + }, + { + "epoch": 2.4006359300476947, + "grad_norm": 0.49665518829864685, + "learning_rate": 3.32469232755243e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1856893002986908, + "step": 1510, + "valid_targets_mean": 3365.2, + "valid_targets_min": 904 + }, + { + "epoch": 2.4085850556438793, + "grad_norm": 0.45662940616841696, + "learning_rate": 3.318741309238444e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18095698952674866, + "step": 1515, + "valid_targets_mean": 3817.8, + "valid_targets_min": 671 + }, + { + "epoch": 2.4165341812400634, + "grad_norm": 0.4040458879915397, + "learning_rate": 3.312769562330075e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1404886543750763, + "step": 1520, + "valid_targets_mean": 4547.6, + "valid_targets_min": 1168 + }, + { + "epoch": 2.424483306836248, + "grad_norm": 0.4028349948778861, + "learning_rate": 3.306777180694042e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15197904407978058, + "step": 1525, + "valid_targets_mean": 4536.2, + "valid_targets_min": 731 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.488660600681927, + "learning_rate": 3.30076425852141e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18501275777816772, + "step": 1530, + "valid_targets_mean": 3903.4, + "valid_targets_min": 519 + }, + { + "epoch": 2.440381558028617, + "grad_norm": 0.4360649019785482, + "learning_rate": 3.294730890326109e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14416208863258362, + "step": 1535, + "valid_targets_mean": 3269.1, + "valid_targets_min": 718 + }, + { + "epoch": 2.448330683624801, + "grad_norm": 0.3987623937481391, + "learning_rate": 3.2886771709434504e-05, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1603170931339264, + "step": 1540, + "valid_targets_mean": 4707.6, + "valid_targets_min": 1348 + }, + { + "epoch": 2.4562798092209857, + "grad_norm": 0.5797017322011377, + "learning_rate": 3.282603195528635e-05, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17639771103858948, + "step": 1545, + "valid_targets_mean": 3203.8, + "valid_targets_min": 568 + }, + { + "epoch": 2.46422893481717, + "grad_norm": 0.44881682454570193, + "learning_rate": 3.276509059555257e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15224912762641907, + "step": 1550, + "valid_targets_mean": 4156.8, + "valid_targets_min": 551 + }, + { + "epoch": 2.4721780604133547, + "grad_norm": 0.4299523456790547, + "learning_rate": 3.270394858813802e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2051280438899994, + "step": 1555, + "valid_targets_mean": 4095.7, + "valid_targets_min": 621 + }, + { + "epoch": 2.480127186009539, + "grad_norm": 0.46068757123222875, + "learning_rate": 3.264260689410147e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15283241868019104, + "step": 1560, + "valid_targets_mean": 3226.4, + "valid_targets_min": 912 + }, + { + "epoch": 2.4880763116057234, + "grad_norm": 0.47403025787042874, + "learning_rate": 3.2581066477640435e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15874740481376648, + "step": 1565, + "valid_targets_mean": 3952.1, + "valid_targets_min": 872 + }, + { + "epoch": 2.496025437201908, + "grad_norm": 0.4626317047147037, + "learning_rate": 3.251932830607603e-05, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16611367464065552, + "step": 1570, + "valid_targets_mean": 3792.7, + "valid_targets_min": 450 + }, + { + "epoch": 2.503974562798092, + "grad_norm": 0.48592321395910304, + "learning_rate": 3.245739334983779e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1791059672832489, + "step": 1575, + "valid_targets_mean": 3011.2, + "valid_targets_min": 305 + }, + { + "epoch": 2.5119236883942766, + "grad_norm": 0.48642110315097836, + "learning_rate": 3.239526258244842e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18121963739395142, + "step": 1580, + "valid_targets_mean": 3705.4, + "valid_targets_min": 465 + }, + { + "epoch": 2.519872813990461, + "grad_norm": 0.4193245496234458, + "learning_rate": 3.233293698050845e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15967342257499695, + "step": 1585, + "valid_targets_mean": 4023.4, + "valid_targets_min": 548 + }, + { + "epoch": 2.5278219395866453, + "grad_norm": 0.5640866498973993, + "learning_rate": 3.227041752368091e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17153598368167877, + "step": 1590, + "valid_targets_mean": 2566.8, + "valid_targets_min": 217 + }, + { + "epoch": 2.53577106518283, + "grad_norm": 0.41665001246607075, + "learning_rate": 3.220770519467597e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1515938937664032, + "step": 1595, + "valid_targets_mean": 4122.9, + "valid_targets_min": 237 + }, + { + "epoch": 2.5437201907790143, + "grad_norm": 0.39614139545188226, + "learning_rate": 3.214480097923542e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18220946192741394, + "step": 1600, + "valid_targets_mean": 5175.8, + "valid_targets_min": 705 + }, + { + "epoch": 2.551669316375199, + "grad_norm": 0.43229505102472626, + "learning_rate": 3.208170586611721e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16309629380702972, + "step": 1605, + "valid_targets_mean": 3784.4, + "valid_targets_min": 584 + }, + { + "epoch": 2.559618441971383, + "grad_norm": 0.4243434467048783, + "learning_rate": 3.201842084707993e-05, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563934087753296, + "step": 1610, + "valid_targets_mean": 4158.7, + "valid_targets_min": 578 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 0.43578064511427506, + "learning_rate": 3.195494691686718e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18895184993743896, + "step": 1615, + "valid_targets_mean": 4406.8, + "valid_targets_min": 1521 + }, + { + "epoch": 2.575516693163752, + "grad_norm": 0.4519962457233152, + "learning_rate": 3.189128507319197e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17107641696929932, + "step": 1620, + "valid_targets_mean": 3569.7, + "valid_targets_min": 534 + }, + { + "epoch": 2.5834658187599366, + "grad_norm": 0.5454247674899096, + "learning_rate": 3.182743631672102e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17942318320274353, + "step": 1625, + "valid_targets_mean": 2771.4, + "valid_targets_min": 1372 + }, + { + "epoch": 2.5914149443561207, + "grad_norm": 0.4497356577426226, + "learning_rate": 3.1763401651059025e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1738359034061432, + "step": 1630, + "valid_targets_mean": 3365.7, + "valid_targets_min": 911 + }, + { + "epoch": 2.5993640699523053, + "grad_norm": 0.7952703957802248, + "learning_rate": 3.1699182082732886e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18283095955848694, + "step": 1635, + "valid_targets_mean": 2729.1, + "valid_targets_min": 249 + }, + { + "epoch": 2.60731319554849, + "grad_norm": 0.39447463509094877, + "learning_rate": 3.1634778621175905e-05, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14707916975021362, + "step": 1640, + "valid_targets_mean": 4424.3, + "valid_targets_min": 794 + }, + { + "epoch": 2.615262321144674, + "grad_norm": 0.5826754203242515, + "learning_rate": 3.157019227871189e-05, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18842703104019165, + "step": 1645, + "valid_targets_mean": 2496.5, + "valid_targets_min": 248 + }, + { + "epoch": 2.6232114467408585, + "grad_norm": 0.4569413845264777, + "learning_rate": 3.150542407053927e-05, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15572036802768707, + "step": 1650, + "valid_targets_mean": 3717.3, + "valid_targets_min": 206 + }, + { + "epoch": 2.631160572337043, + "grad_norm": 0.3769631110508782, + "learning_rate": 3.144047501471511e-05, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15287700295448303, + "step": 1655, + "valid_targets_mean": 4491.1, + "valid_targets_min": 623 + }, + { + "epoch": 2.639109697933227, + "grad_norm": 0.3834879165232079, + "learning_rate": 3.1375346132139135e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14792010188102722, + "step": 1660, + "valid_targets_mean": 4377.1, + "valid_targets_min": 1297 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.40025749764942126, + "learning_rate": 3.131003844653766e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16569784283638, + "step": 1665, + "valid_targets_mean": 4147.7, + "valid_targets_min": 231 + }, + { + "epoch": 2.6550079491255962, + "grad_norm": 0.6151639522393043, + "learning_rate": 3.124455298444752e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18059095740318298, + "step": 1670, + "valid_targets_mean": 3476.2, + "valid_targets_min": 205 + }, + { + "epoch": 2.6629570747217803, + "grad_norm": 0.4732700696731575, + "learning_rate": 3.1178890775199925e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1820976436138153, + "step": 1675, + "valid_targets_mean": 3768.2, + "valid_targets_min": 671 + }, + { + "epoch": 2.670906200317965, + "grad_norm": 0.44427708127372323, + "learning_rate": 3.1113052850904275e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14810726046562195, + "step": 1680, + "valid_targets_mean": 3540.6, + "valid_targets_min": 546 + }, + { + "epoch": 2.6788553259141494, + "grad_norm": 0.47123009484821854, + "learning_rate": 3.1047040246431936e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17077761888504028, + "step": 1685, + "valid_targets_mean": 3889.4, + "valid_targets_min": 269 + }, + { + "epoch": 2.686804451510334, + "grad_norm": 0.4318567795086639, + "learning_rate": 3.098085399939998e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1917814016342163, + "step": 1690, + "valid_targets_mean": 4599.8, + "valid_targets_min": 1239 + }, + { + "epoch": 2.6947535771065185, + "grad_norm": 0.4721685189025569, + "learning_rate": 3.091449515015489e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17242677509784698, + "step": 1695, + "valid_targets_mean": 3185.9, + "valid_targets_min": 683 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 0.4250655218343069, + "learning_rate": 3.084796474175618e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1556561142206192, + "step": 1700, + "valid_targets_mean": 4078.5, + "valid_targets_min": 729 + }, + { + "epoch": 2.710651828298887, + "grad_norm": 0.4447639847156236, + "learning_rate": 3.078126381996001e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15216678380966187, + "step": 1705, + "valid_targets_mean": 4152.2, + "valid_targets_min": 513 + }, + { + "epoch": 2.7186009538950717, + "grad_norm": 0.4579147243616527, + "learning_rate": 3.071439343320274e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1592155396938324, + "step": 1710, + "valid_targets_mean": 3433.2, + "valid_targets_min": 593 + }, + { + "epoch": 2.726550079491256, + "grad_norm": 0.4407571500778382, + "learning_rate": 3.064735463258449e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17721107602119446, + "step": 1715, + "valid_targets_mean": 4108.9, + "valid_targets_min": 240 + }, + { + "epoch": 2.7344992050874404, + "grad_norm": 0.40400452078934607, + "learning_rate": 3.0580148471852544e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15850304067134857, + "step": 1720, + "valid_targets_mean": 4096.9, + "valid_targets_min": 727 + }, + { + "epoch": 2.742448330683625, + "grad_norm": 0.37213244183165023, + "learning_rate": 3.0512776007384882e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14977069199085236, + "step": 1725, + "valid_targets_mean": 5645.9, + "valid_targets_min": 1122 + }, + { + "epoch": 2.750397456279809, + "grad_norm": 0.46209322559956156, + "learning_rate": 3.0445238298173492e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1584990918636322, + "step": 1730, + "valid_targets_mean": 3129.1, + "valid_targets_min": 262 + }, + { + "epoch": 2.7583465818759936, + "grad_norm": 0.4385757383753134, + "learning_rate": 3.0377536405807753e-05, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1700027585029602, + "step": 1735, + "valid_targets_mean": 4126.4, + "valid_targets_min": 1038 + }, + { + "epoch": 2.766295707472178, + "grad_norm": 0.5770273602388422, + "learning_rate": 3.030967139445776e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1636781394481659, + "step": 1740, + "valid_targets_mean": 2366.4, + "valid_targets_min": 330 + }, + { + "epoch": 2.7742448330683622, + "grad_norm": 0.46564547279320734, + "learning_rate": 3.0241644330857604e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15485122799873352, + "step": 1745, + "valid_targets_mean": 3985.4, + "valid_targets_min": 614 + }, + { + "epoch": 2.7821939586645468, + "grad_norm": 0.47547498624328677, + "learning_rate": 3.0173456284288565e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19546955823898315, + "step": 1750, + "valid_targets_mean": 4155.4, + "valid_targets_min": 492 + }, + { + "epoch": 2.7901430842607313, + "grad_norm": 0.5158856359689629, + "learning_rate": 3.010510832656233e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161908358335495, + "step": 1755, + "valid_targets_mean": 3625.6, + "valid_targets_min": 860 + }, + { + "epoch": 2.798092209856916, + "grad_norm": 0.4471113346837271, + "learning_rate": 3.0036601532004175e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1580810248851776, + "step": 1760, + "valid_targets_mean": 3677.0, + "valid_targets_min": 413 + }, + { + "epoch": 2.8060413354531004, + "grad_norm": 0.4114471784729695, + "learning_rate": 2.996793697743601e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1642129123210907, + "step": 1765, + "valid_targets_mean": 4178.1, + "valid_targets_min": 835 + }, + { + "epoch": 2.8139904610492845, + "grad_norm": 0.43668170542838514, + "learning_rate": 2.9899115742159512e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16554221510887146, + "step": 1770, + "valid_targets_mean": 4644.9, + "valid_targets_min": 693 + }, + { + "epoch": 2.821939586645469, + "grad_norm": 0.5043488707813197, + "learning_rate": 2.9830138907939137e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16675223410129547, + "step": 1775, + "valid_targets_mean": 3199.9, + "valid_targets_min": 304 + }, + { + "epoch": 2.8298887122416536, + "grad_norm": 0.39898904679988073, + "learning_rate": 2.976100755898511e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16235503554344177, + "step": 1780, + "valid_targets_mean": 4958.9, + "valid_targets_min": 690 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 0.5087094254868092, + "learning_rate": 2.9691722781936398e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16532549262046814, + "step": 1785, + "valid_targets_mean": 2913.4, + "valid_targets_min": 538 + }, + { + "epoch": 2.8457869634340223, + "grad_norm": 0.514390244173884, + "learning_rate": 2.962228566584362e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18927565217018127, + "step": 1790, + "valid_targets_mean": 3447.2, + "valid_targets_min": 623 + }, + { + "epoch": 2.853736089030207, + "grad_norm": 0.4250826109776861, + "learning_rate": 2.9552697302151937e-05, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15817922353744507, + "step": 1795, + "valid_targets_mean": 4025.1, + "valid_targets_min": 779 + }, + { + "epoch": 2.861685214626391, + "grad_norm": 0.4104299314914552, + "learning_rate": 2.9482958784683883e-05, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1422051340341568, + "step": 1800, + "valid_targets_mean": 3494.4, + "valid_targets_min": 262 + }, + { + "epoch": 2.8696343402225755, + "grad_norm": 0.41440908629922096, + "learning_rate": 2.9413071209622174e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14636817574501038, + "step": 1805, + "valid_targets_mean": 4556.1, + "valid_targets_min": 847 + }, + { + "epoch": 2.87758346581876, + "grad_norm": 0.48515765879460354, + "learning_rate": 2.934303567549251e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1678956151008606, + "step": 1810, + "valid_targets_mean": 3278.6, + "valid_targets_min": 522 + }, + { + "epoch": 2.885532591414944, + "grad_norm": 0.514128862513967, + "learning_rate": 2.9272853283146255e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16510531306266785, + "step": 1815, + "valid_targets_mean": 2977.9, + "valid_targets_min": 355 + }, + { + "epoch": 2.8934817170111287, + "grad_norm": 0.5493626194079975, + "learning_rate": 2.9202525135743158e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22584211826324463, + "step": 1820, + "valid_targets_mean": 2734.2, + "valid_targets_min": 686 + }, + { + "epoch": 2.901430842607313, + "grad_norm": 0.4370645426098783, + "learning_rate": 2.9132052338734033e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16311654448509216, + "step": 1825, + "valid_targets_mean": 3748.9, + "valid_targets_min": 648 + }, + { + "epoch": 2.9093799682034978, + "grad_norm": 0.45957400888294386, + "learning_rate": 2.9061435999843354e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15466034412384033, + "step": 1830, + "valid_targets_mean": 3323.8, + "valid_targets_min": 699 + }, + { + "epoch": 2.9173290937996823, + "grad_norm": 0.39548994984246866, + "learning_rate": 2.8990677229051855e-05, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540757715702057, + "step": 1835, + "valid_targets_mean": 4090.8, + "valid_targets_min": 889 + }, + { + "epoch": 2.9252782193958664, + "grad_norm": 0.4905605606999161, + "learning_rate": 2.8919777138579074e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16376616060733795, + "step": 1840, + "valid_targets_mean": 2992.8, + "valid_targets_min": 253 + }, + { + "epoch": 2.933227344992051, + "grad_norm": 0.39954714024700316, + "learning_rate": 2.8848736842865893e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13295181095600128, + "step": 1845, + "valid_targets_mean": 3622.3, + "valid_targets_min": 534 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.4109518903334489, + "learning_rate": 2.8777557458556993e-05, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15689434111118317, + "step": 1850, + "valid_targets_mean": 4599.6, + "valid_targets_min": 1357 + }, + { + "epoch": 2.9491255961844196, + "grad_norm": 0.4134036810952952, + "learning_rate": 2.870624010448332e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16236630082130432, + "step": 1855, + "valid_targets_mean": 4970.9, + "valid_targets_min": 525 + }, + { + "epoch": 2.957074721780604, + "grad_norm": 0.44310700881675275, + "learning_rate": 2.8634785901644497e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18711400032043457, + "step": 1860, + "valid_targets_mean": 4005.5, + "valid_targets_min": 368 + }, + { + "epoch": 2.9650238473767887, + "grad_norm": 0.3717450015352577, + "learning_rate": 2.856319597319119e-05, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14527782797813416, + "step": 1865, + "valid_targets_mean": 4636.0, + "valid_targets_min": 834 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 0.4489188113660503, + "learning_rate": 2.849147144440747e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18267220258712769, + "step": 1870, + "valid_targets_mean": 3598.4, + "valid_targets_min": 272 + }, + { + "epoch": 2.9809220985691574, + "grad_norm": 0.463504579635663, + "learning_rate": 2.8419613442693127e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16523773968219757, + "step": 1875, + "valid_targets_mean": 3690.0, + "valid_targets_min": 741 + }, + { + "epoch": 2.988871224165342, + "grad_norm": 0.42098915439827383, + "learning_rate": 2.834762309754593e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15425992012023926, + "step": 1880, + "valid_targets_mean": 3851.8, + "valid_targets_min": 1123 + }, + { + "epoch": 2.996820349761526, + "grad_norm": 0.44087606889969083, + "learning_rate": 2.8275501540543877e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1857764720916748, + "step": 1885, + "valid_targets_mean": 4492.8, + "valid_targets_min": 303 + }, + { + "epoch": 3.0047694753577106, + "grad_norm": 0.40019519904714795, + "learning_rate": 2.8203249905327434e-05, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14805257320404053, + "step": 1890, + "valid_targets_mean": 4000.0, + "valid_targets_min": 622 + }, + { + "epoch": 3.012718600953895, + "grad_norm": 0.3925087002790559, + "learning_rate": 2.81308693275817e-05, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15407413244247437, + "step": 1895, + "valid_targets_mean": 5172.0, + "valid_targets_min": 717 + }, + { + "epoch": 3.0206677265500796, + "grad_norm": 0.46275383155041416, + "learning_rate": 2.8058360945018518e-05, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13890619575977325, + "step": 1900, + "valid_targets_mean": 4028.7, + "valid_targets_min": 560 + }, + { + "epoch": 3.0286168521462637, + "grad_norm": 0.4551559001740428, + "learning_rate": 2.7985725897358665e-05, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15436714887619019, + "step": 1905, + "valid_targets_mean": 3797.8, + "valid_targets_min": 565 + }, + { + "epoch": 3.0365659777424483, + "grad_norm": 0.4842383814763534, + "learning_rate": 2.791296532631389e-05, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14294975996017456, + "step": 1910, + "valid_targets_mean": 3047.1, + "valid_targets_min": 545 + }, + { + "epoch": 3.044515103338633, + "grad_norm": 0.5397174824822419, + "learning_rate": 2.7840080375568964e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1518700122833252, + "step": 1915, + "valid_targets_mean": 3124.6, + "valid_targets_min": 650 + }, + { + "epoch": 3.0524642289348174, + "grad_norm": 0.5358906602930991, + "learning_rate": 2.7767072190763733e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1446910947561264, + "step": 1920, + "valid_targets_mean": 2824.1, + "valid_targets_min": 538 + }, + { + "epoch": 3.0604133545310015, + "grad_norm": 0.40186405732354413, + "learning_rate": 2.7693941919475076e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1320028454065323, + "step": 1925, + "valid_targets_mean": 4311.8, + "valid_targets_min": 1384 + }, + { + "epoch": 3.068362480127186, + "grad_norm": 0.5420721530535801, + "learning_rate": 2.7620690711198906e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16763970255851746, + "step": 1930, + "valid_targets_mean": 3246.2, + "valid_targets_min": 678 + }, + { + "epoch": 3.0763116057233706, + "grad_norm": 0.46354880795162495, + "learning_rate": 2.7547319717332066e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15304705500602722, + "step": 1935, + "valid_targets_mean": 3665.1, + "valid_targets_min": 304 + }, + { + "epoch": 3.0842607313195547, + "grad_norm": 0.4230495218281992, + "learning_rate": 2.7473830091154243e-05, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1243860200047493, + "step": 1940, + "valid_targets_mean": 3626.8, + "valid_targets_min": 568 + }, + { + "epoch": 3.0922098569157392, + "grad_norm": 0.4361635236789642, + "learning_rate": 2.7400222987809856e-05, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12188908457756042, + "step": 1945, + "valid_targets_mean": 4010.5, + "valid_targets_min": 498 + }, + { + "epoch": 3.100158982511924, + "grad_norm": 0.4439974035967091, + "learning_rate": 2.7326499564289867e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13789242506027222, + "step": 1950, + "valid_targets_mean": 3711.9, + "valid_targets_min": 454 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 0.41877176013375694, + "learning_rate": 2.725266097941363e-05, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1316269338130951, + "step": 1955, + "valid_targets_mean": 4314.9, + "valid_targets_min": 1299 + }, + { + "epoch": 3.1160572337042924, + "grad_norm": 0.527858481472521, + "learning_rate": 2.717870839381066e-05, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15569709241390228, + "step": 1960, + "valid_targets_mean": 3514.8, + "valid_targets_min": 949 + }, + { + "epoch": 3.124006359300477, + "grad_norm": 0.3850217928191781, + "learning_rate": 2.7104642969902357e-05, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1267361342906952, + "step": 1965, + "valid_targets_mean": 4886.9, + "valid_targets_min": 300 + }, + { + "epoch": 3.1319554848966615, + "grad_norm": 0.44634061646307166, + "learning_rate": 2.7030465871883812e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15789856016635895, + "step": 1970, + "valid_targets_mean": 4405.6, + "valid_targets_min": 512 + }, + { + "epoch": 3.1399046104928456, + "grad_norm": 0.44800018132825026, + "learning_rate": 2.6956178265705434e-05, + "loss": 0.1458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1587265133857727, + "step": 1975, + "valid_targets_mean": 4086.9, + "valid_targets_min": 1071 + }, + { + "epoch": 3.14785373608903, + "grad_norm": 0.4616270378131397, + "learning_rate": 2.688178131905465e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1500604748725891, + "step": 1980, + "valid_targets_mean": 3678.9, + "valid_targets_min": 683 + }, + { + "epoch": 3.1558028616852147, + "grad_norm": 0.5790165944659751, + "learning_rate": 2.680727620133757e-05, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15115083754062653, + "step": 1985, + "valid_targets_mean": 2691.0, + "valid_targets_min": 502 + }, + { + "epoch": 3.1637519872813993, + "grad_norm": 0.4845439722921149, + "learning_rate": 2.673266408366057e-05, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12436607480049133, + "step": 1990, + "valid_targets_mean": 3094.4, + "valid_targets_min": 299 + }, + { + "epoch": 3.1717011128775834, + "grad_norm": 0.49364688977299304, + "learning_rate": 2.6657946138811915e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511753499507904, + "step": 1995, + "valid_targets_mean": 3998.1, + "valid_targets_min": 781 + }, + { + "epoch": 3.179650238473768, + "grad_norm": 0.488321490398884, + "learning_rate": 2.6583123541243302e-05, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17050573229789734, + "step": 2000, + "valid_targets_mean": 3620.8, + "valid_targets_min": 248 + }, + { + "epoch": 3.1875993640699525, + "grad_norm": 0.40982555411387517, + "learning_rate": 2.6508197467051406e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12908829748630524, + "step": 2005, + "valid_targets_mean": 4178.4, + "valid_targets_min": 1472 + }, + { + "epoch": 3.1955484896661366, + "grad_norm": 0.40055451862255126, + "learning_rate": 2.6433169093959405e-05, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14281156659126282, + "step": 2010, + "valid_targets_mean": 4660.8, + "valid_targets_min": 685 + }, + { + "epoch": 3.203497615262321, + "grad_norm": 0.46116477905857267, + "learning_rate": 2.6358039601298454e-05, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13968366384506226, + "step": 2015, + "valid_targets_mean": 3884.2, + "valid_targets_min": 344 + }, + { + "epoch": 3.2114467408585057, + "grad_norm": 0.43289954051552515, + "learning_rate": 2.6282810169989158e-05, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1596599519252777, + "step": 2020, + "valid_targets_mean": 4390.9, + "valid_targets_min": 254 + }, + { + "epoch": 3.21939586645469, + "grad_norm": 0.5078592423856673, + "learning_rate": 2.6207481982523e-05, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1573963314294815, + "step": 2025, + "valid_targets_mean": 3303.6, + "valid_targets_min": 1122 + }, + { + "epoch": 3.2273449920508743, + "grad_norm": 0.5091146694996267, + "learning_rate": 2.6132056222943757e-05, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1862080991268158, + "step": 2030, + "valid_targets_mean": 3232.9, + "valid_targets_min": 481 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.5238546808532447, + "learning_rate": 2.6056534076828883e-05, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18422989547252655, + "step": 2035, + "valid_targets_mean": 3585.9, + "valid_targets_min": 266 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.4363636469238868, + "learning_rate": 2.598091673127091e-05, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14309856295585632, + "step": 2040, + "valid_targets_mean": 3745.1, + "valid_targets_min": 921 + }, + { + "epoch": 3.2511923688394275, + "grad_norm": 0.4339103556613612, + "learning_rate": 2.5905205374858728e-05, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14386232197284698, + "step": 2045, + "valid_targets_mean": 4056.8, + "valid_targets_min": 1465 + }, + { + "epoch": 3.259141494435612, + "grad_norm": 0.42741463506623106, + "learning_rate": 2.5829401197658946e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20502996444702148, + "step": 2050, + "valid_targets_mean": 5179.3, + "valid_targets_min": 1065 + }, + { + "epoch": 3.2670906200317966, + "grad_norm": 0.43626977002875506, + "learning_rate": 2.5753505391197173e-05, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.148350328207016, + "step": 2055, + "valid_targets_mean": 4743.6, + "valid_targets_min": 568 + }, + { + "epoch": 3.275039745627981, + "grad_norm": 0.46840087217117315, + "learning_rate": 2.5677519148439286e-05, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15138383209705353, + "step": 2060, + "valid_targets_mean": 3961.1, + "valid_targets_min": 243 + }, + { + "epoch": 3.2829888712241653, + "grad_norm": 0.507365313224075, + "learning_rate": 2.56014436637727e-05, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13263681530952454, + "step": 2065, + "valid_targets_mean": 2687.8, + "valid_targets_min": 251 + }, + { + "epoch": 3.29093799682035, + "grad_norm": 0.4749587295324443, + "learning_rate": 2.5525280132987544e-05, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13461729884147644, + "step": 2070, + "valid_targets_mean": 3908.1, + "valid_targets_min": 343 + }, + { + "epoch": 3.2988871224165344, + "grad_norm": 0.5571544935176302, + "learning_rate": 2.544902975325793e-05, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15287074446678162, + "step": 2075, + "valid_targets_mean": 2425.0, + "valid_targets_min": 205 + }, + { + "epoch": 3.3068362480127185, + "grad_norm": 0.45571560695399776, + "learning_rate": 2.5372693723123075e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1337605118751526, + "step": 2080, + "valid_targets_mean": 3706.5, + "valid_targets_min": 448 + }, + { + "epoch": 3.314785373608903, + "grad_norm": 0.4841238396850524, + "learning_rate": 2.5296273242468514e-05, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13751645386219025, + "step": 2085, + "valid_targets_mean": 3224.1, + "valid_targets_min": 281 + }, + { + "epoch": 3.3227344992050876, + "grad_norm": 0.5882557810118016, + "learning_rate": 2.5219769512507202e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15059013664722443, + "step": 2090, + "valid_targets_mean": 4222.4, + "valid_targets_min": 1837 + }, + { + "epoch": 3.3306836248012717, + "grad_norm": 0.4355529800792949, + "learning_rate": 2.5143183735760638e-05, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13719777762889862, + "step": 2095, + "valid_targets_mean": 5038.6, + "valid_targets_min": 692 + }, + { + "epoch": 3.338632750397456, + "grad_norm": 0.48366910917541367, + "learning_rate": 2.5066517116039978e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14523795247077942, + "step": 2100, + "valid_targets_mean": 3369.9, + "valid_targets_min": 635 + }, + { + "epoch": 3.3465818759936408, + "grad_norm": 0.432180761274418, + "learning_rate": 2.4989770858427113e-05, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14896121621131897, + "step": 2105, + "valid_targets_mean": 4274.6, + "valid_targets_min": 1463 + }, + { + "epoch": 3.3545310015898253, + "grad_norm": 0.4673043662584066, + "learning_rate": 2.4912946169255722e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14605014026165009, + "step": 2110, + "valid_targets_mean": 3783.2, + "valid_targets_min": 1054 + }, + { + "epoch": 3.3624801271860094, + "grad_norm": 0.4743882917216998, + "learning_rate": 2.4836044256092288e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1554591804742813, + "step": 2115, + "valid_targets_mean": 3935.7, + "valid_targets_min": 612 + }, + { + "epoch": 3.370429252782194, + "grad_norm": 0.4815307642109087, + "learning_rate": 2.475906632771714e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14014503359794617, + "step": 2120, + "valid_targets_mean": 3164.2, + "valid_targets_min": 373 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 0.40589792615723674, + "learning_rate": 2.468201359410548e-05, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14491534233093262, + "step": 2125, + "valid_targets_mean": 4419.9, + "valid_targets_min": 340 + }, + { + "epoch": 3.3863275039745626, + "grad_norm": 0.44260667005600757, + "learning_rate": 2.4604887266408304e-05, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1457509994506836, + "step": 2130, + "valid_targets_mean": 4288.4, + "valid_targets_min": 637 + }, + { + "epoch": 3.394276629570747, + "grad_norm": 0.4581973653604292, + "learning_rate": 2.4527688556933402e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15415199100971222, + "step": 2135, + "valid_targets_mean": 4045.8, + "valid_targets_min": 977 + }, + { + "epoch": 3.4022257551669317, + "grad_norm": 0.7140350335944461, + "learning_rate": 2.445041867912629e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15349167585372925, + "step": 2140, + "valid_targets_mean": 2618.8, + "valid_targets_min": 673 + }, + { + "epoch": 3.4101748807631163, + "grad_norm": 0.5034625114730497, + "learning_rate": 2.4373078847551154e-05, + "loss": 0.1456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14303407073020935, + "step": 2145, + "valid_targets_mean": 3076.3, + "valid_targets_min": 675 + }, + { + "epoch": 3.4181240063593004, + "grad_norm": 0.48632708952047105, + "learning_rate": 2.4295670277871736e-05, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1465069055557251, + "step": 2150, + "valid_targets_mean": 3372.1, + "valid_targets_min": 705 + }, + { + "epoch": 3.426073131955485, + "grad_norm": 0.4937147781905013, + "learning_rate": 2.4218194186832237e-05, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17899924516677856, + "step": 2155, + "valid_targets_mean": 3543.7, + "valid_targets_min": 204 + }, + { + "epoch": 3.4340222575516695, + "grad_norm": 0.446614778026149, + "learning_rate": 2.4140651792238193e-05, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16787397861480713, + "step": 2160, + "valid_targets_mean": 4232.8, + "valid_targets_min": 1131 + }, + { + "epoch": 3.4419713831478536, + "grad_norm": 0.5049089497785076, + "learning_rate": 2.4063044312937332e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1745222806930542, + "step": 2165, + "valid_targets_mean": 3461.6, + "valid_targets_min": 582 + }, + { + "epoch": 3.449920508744038, + "grad_norm": 0.41955260576113246, + "learning_rate": 2.3985372968800407e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1277008205652237, + "step": 2170, + "valid_targets_mean": 3886.0, + "valid_targets_min": 548 + }, + { + "epoch": 3.4578696343402227, + "grad_norm": 0.44853544938510986, + "learning_rate": 2.3907638980702043e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13143455982208252, + "step": 2175, + "valid_targets_mean": 2990.2, + "valid_targets_min": 695 + }, + { + "epoch": 3.4658187599364068, + "grad_norm": 0.5052915184377108, + "learning_rate": 2.382984357050151e-05, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574200838804245, + "step": 2180, + "valid_targets_mean": 3370.8, + "valid_targets_min": 352 + }, + { + "epoch": 3.4737678855325913, + "grad_norm": 0.44259229283391566, + "learning_rate": 2.3751987961023545e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13950997591018677, + "step": 2185, + "valid_targets_mean": 3953.6, + "valid_targets_min": 220 + }, + { + "epoch": 3.481717011128776, + "grad_norm": 0.48788565916051185, + "learning_rate": 2.3674073376039152e-05, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1728115677833557, + "step": 2190, + "valid_targets_mean": 3669.0, + "valid_targets_min": 671 + }, + { + "epoch": 3.4896661367249604, + "grad_norm": 0.4492760385555854, + "learning_rate": 2.359610104024631e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15678560733795166, + "step": 2195, + "valid_targets_mean": 3583.8, + "valid_targets_min": 637 + }, + { + "epoch": 3.4976152623211445, + "grad_norm": 0.39986255476691307, + "learning_rate": 2.3518072179250753e-05, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14417177438735962, + "step": 2200, + "valid_targets_mean": 4219.2, + "valid_targets_min": 641 + }, + { + "epoch": 3.505564387917329, + "grad_norm": 0.384596360059307, + "learning_rate": 2.343998801954673e-05, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11725412309169769, + "step": 2205, + "valid_targets_mean": 4539.5, + "valid_targets_min": 1403 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 0.3726022277230603, + "learning_rate": 2.3361849788497666e-05, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12121937423944473, + "step": 2210, + "valid_targets_mean": 4714.5, + "valid_targets_min": 1012 + }, + { + "epoch": 3.521462639109698, + "grad_norm": 0.426941312289138, + "learning_rate": 2.3283658714316935e-05, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13825076818466187, + "step": 2215, + "valid_targets_mean": 4406.4, + "valid_targets_min": 559 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.4301504339821543, + "learning_rate": 2.320541602604851e-05, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13971027731895447, + "step": 2220, + "valid_targets_mean": 3877.9, + "valid_targets_min": 642 + }, + { + "epoch": 3.537360890302067, + "grad_norm": 0.5113573188300341, + "learning_rate": 2.3127122953547663e-05, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1780821532011032, + "step": 2225, + "valid_targets_mean": 3525.3, + "valid_targets_min": 738 + }, + { + "epoch": 3.5453100158982513, + "grad_norm": 0.5063219756936636, + "learning_rate": 2.3048780727461627e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16249513626098633, + "step": 2230, + "valid_targets_mean": 3025.3, + "valid_targets_min": 627 + }, + { + "epoch": 3.5532591414944354, + "grad_norm": 0.4792775762459798, + "learning_rate": 2.2970390579210246e-05, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1491563469171524, + "step": 2235, + "valid_targets_mean": 3644.2, + "valid_targets_min": 568 + }, + { + "epoch": 3.56120826709062, + "grad_norm": 0.4620771758711201, + "learning_rate": 2.2891953740966643e-05, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16006189584732056, + "step": 2240, + "valid_targets_mean": 4079.1, + "valid_targets_min": 235 + }, + { + "epoch": 3.5691573926868045, + "grad_norm": 0.44785087939845997, + "learning_rate": 2.281347144563782e-05, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1455570012331009, + "step": 2245, + "valid_targets_mean": 4200.4, + "valid_targets_min": 562 + }, + { + "epoch": 3.5771065182829886, + "grad_norm": 0.5185565156273501, + "learning_rate": 2.273494492684531e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16593654453754425, + "step": 2250, + "valid_targets_mean": 3104.9, + "valid_targets_min": 309 + }, + { + "epoch": 3.585055643879173, + "grad_norm": 0.4172228609919065, + "learning_rate": 2.265637541890577e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1278490126132965, + "step": 2255, + "valid_targets_mean": 3515.1, + "valid_targets_min": 294 + }, + { + "epoch": 3.5930047694753577, + "grad_norm": 0.4286795679059123, + "learning_rate": 2.2577764156811563e-05, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14926648139953613, + "step": 2260, + "valid_targets_mean": 4430.3, + "valid_targets_min": 675 + }, + { + "epoch": 3.6009538950715423, + "grad_norm": 0.560172122368529, + "learning_rate": 2.2499112376211373e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18510201573371887, + "step": 2265, + "valid_targets_mean": 3477.6, + "valid_targets_min": 794 + }, + { + "epoch": 3.6089030206677264, + "grad_norm": 0.43226594819787095, + "learning_rate": 2.2420421313390776e-05, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14725884795188904, + "step": 2270, + "valid_targets_mean": 4508.8, + "valid_targets_min": 1163 + }, + { + "epoch": 3.616852146263911, + "grad_norm": 0.4437643653037656, + "learning_rate": 2.234169220525282e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13907790184020996, + "step": 2275, + "valid_targets_mean": 4162.6, + "valid_targets_min": 1468 + }, + { + "epoch": 3.6248012718600955, + "grad_norm": 0.4837609246828785, + "learning_rate": 2.226292628929853e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14810554683208466, + "step": 2280, + "valid_targets_mean": 3524.7, + "valid_targets_min": 947 + }, + { + "epoch": 3.63275039745628, + "grad_norm": 0.4869827211098654, + "learning_rate": 2.2184124803607525e-05, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15223252773284912, + "step": 2285, + "valid_targets_mean": 3463.3, + "valid_targets_min": 759 + }, + { + "epoch": 3.640699523052464, + "grad_norm": 0.4108383751418014, + "learning_rate": 2.210528898681851e-05, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.142578586935997, + "step": 2290, + "valid_targets_mean": 4477.9, + "valid_targets_min": 1006 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.4566807244274852, + "learning_rate": 2.2026420078109825e-05, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13954466581344604, + "step": 2295, + "valid_targets_mean": 4329.2, + "valid_targets_min": 1167 + }, + { + "epoch": 3.6565977742448332, + "grad_norm": 0.4162315487171508, + "learning_rate": 2.1947519317179972e-05, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11526475101709366, + "step": 2300, + "valid_targets_mean": 4507.4, + "valid_targets_min": 775 + }, + { + "epoch": 3.6645468998410173, + "grad_norm": 0.4900587733241069, + "learning_rate": 2.1868587944228118e-05, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15211628377437592, + "step": 2305, + "valid_targets_mean": 3048.8, + "valid_targets_min": 286 + }, + { + "epoch": 3.672496025437202, + "grad_norm": 0.5187421796257302, + "learning_rate": 2.1789627199934588e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1537800431251526, + "step": 2310, + "valid_targets_mean": 3951.4, + "valid_targets_min": 723 + }, + { + "epoch": 3.6804451510333864, + "grad_norm": 0.4523155098757611, + "learning_rate": 2.1710638325441408e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15439185500144958, + "step": 2315, + "valid_targets_mean": 3848.8, + "valid_targets_min": 468 + }, + { + "epoch": 3.6883942766295705, + "grad_norm": 0.5113186455328838, + "learning_rate": 2.1631622562332744e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12993109226226807, + "step": 2320, + "valid_targets_mean": 3168.6, + "valid_targets_min": 203 + }, + { + "epoch": 3.696343402225755, + "grad_norm": 0.520264446513899, + "learning_rate": 2.155258115261542e-05, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1672590672969818, + "step": 2325, + "valid_targets_mean": 3197.3, + "valid_targets_min": 538 + }, + { + "epoch": 3.7042925278219396, + "grad_norm": 0.397858988377642, + "learning_rate": 2.1473515338699383e-05, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13418936729431152, + "step": 2330, + "valid_targets_mean": 4454.8, + "valid_targets_min": 278 + }, + { + "epoch": 3.7122416534181237, + "grad_norm": 0.47038464892938564, + "learning_rate": 2.1394426363378186e-05, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14343999326229095, + "step": 2335, + "valid_targets_mean": 3456.6, + "valid_targets_min": 524 + }, + { + "epoch": 3.7201907790143083, + "grad_norm": 0.4590271298948369, + "learning_rate": 2.1315315469809426e-05, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15839475393295288, + "step": 2340, + "valid_targets_mean": 3980.2, + "valid_targets_min": 1039 + }, + { + "epoch": 3.728139904610493, + "grad_norm": 0.526091163735105, + "learning_rate": 2.1236183901495236e-05, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21831142902374268, + "step": 2345, + "valid_targets_mean": 3343.4, + "valid_targets_min": 578 + }, + { + "epoch": 3.7360890302066774, + "grad_norm": 0.4487683579294528, + "learning_rate": 2.1157032902262716e-05, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15747785568237305, + "step": 2350, + "valid_targets_mean": 4185.1, + "valid_targets_min": 834 + }, + { + "epoch": 3.744038155802862, + "grad_norm": 0.41019128305055114, + "learning_rate": 2.1077863716244388e-05, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12705016136169434, + "step": 2355, + "valid_targets_mean": 3949.4, + "valid_targets_min": 656 + }, + { + "epoch": 3.751987281399046, + "grad_norm": 0.446236415640611, + "learning_rate": 2.099867758785866e-05, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14645332098007202, + "step": 2360, + "valid_targets_mean": 3793.7, + "valid_targets_min": 551 + }, + { + "epoch": 3.7599364069952306, + "grad_norm": 0.5374551107704829, + "learning_rate": 2.091947576179023e-05, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1566208451986313, + "step": 2365, + "valid_targets_mean": 3043.1, + "valid_targets_min": 279 + }, + { + "epoch": 3.767885532591415, + "grad_norm": 0.48392227359646184, + "learning_rate": 2.084025948297055e-05, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14955100417137146, + "step": 2370, + "valid_targets_mean": 3625.3, + "valid_targets_min": 777 + }, + { + "epoch": 3.7758346581875992, + "grad_norm": 0.4196458061277742, + "learning_rate": 2.0761029996558233e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13161495327949524, + "step": 2375, + "valid_targets_mean": 3986.8, + "valid_targets_min": 1095 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 0.40047392629967743, + "learning_rate": 2.068178854791951e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13223011791706085, + "step": 2380, + "valid_targets_mean": 4361.8, + "valid_targets_min": 555 + }, + { + "epoch": 3.7917329093799683, + "grad_norm": 0.3964855035870811, + "learning_rate": 2.0602536382608638e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12523694336414337, + "step": 2385, + "valid_targets_mean": 4463.9, + "valid_targets_min": 790 + }, + { + "epoch": 3.7996820349761524, + "grad_norm": 0.4585272620018194, + "learning_rate": 2.0523274746348315e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16742074489593506, + "step": 2390, + "valid_targets_mean": 4025.6, + "valid_targets_min": 1296 + }, + { + "epoch": 3.807631160572337, + "grad_norm": 0.5154964949124577, + "learning_rate": 2.0444004885010114e-05, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14283734560012817, + "step": 2395, + "valid_targets_mean": 2625.4, + "valid_targets_min": 718 + }, + { + "epoch": 3.8155802861685215, + "grad_norm": 0.40675149816537554, + "learning_rate": 2.0364728044594897e-05, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13937920331954956, + "step": 2400, + "valid_targets_mean": 4620.8, + "valid_targets_min": 847 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.44791101289332774, + "learning_rate": 2.0285445471213218e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11926200985908508, + "step": 2405, + "valid_targets_mean": 3797.2, + "valid_targets_min": 280 + }, + { + "epoch": 3.83147853736089, + "grad_norm": 0.4690049435912802, + "learning_rate": 2.020615841106575e-05, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.158674418926239, + "step": 2410, + "valid_targets_mean": 3615.0, + "valid_targets_min": 434 + }, + { + "epoch": 3.8394276629570747, + "grad_norm": 0.4778625950349475, + "learning_rate": 2.0126868110423685e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1569017916917801, + "step": 2415, + "valid_targets_mean": 3607.1, + "valid_targets_min": 244 + }, + { + "epoch": 3.8473767885532593, + "grad_norm": 0.5094399368297139, + "learning_rate": 2.0047575815609166e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13234496116638184, + "step": 2420, + "valid_targets_mean": 4075.4, + "valid_targets_min": 1276 + }, + { + "epoch": 3.855325914149444, + "grad_norm": 0.4474580576793741, + "learning_rate": 1.996828277297566e-05, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1673652082681656, + "step": 2425, + "valid_targets_mean": 3956.6, + "valid_targets_min": 687 + }, + { + "epoch": 3.863275039745628, + "grad_norm": 0.47557411799173016, + "learning_rate": 1.988899022888841e-05, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14662069082260132, + "step": 2430, + "valid_targets_mean": 3376.6, + "valid_targets_min": 290 + }, + { + "epoch": 3.8712241653418125, + "grad_norm": 0.47253918336301953, + "learning_rate": 1.98096994297048e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17109301686286926, + "step": 2435, + "valid_targets_mean": 4009.4, + "valid_targets_min": 621 + }, + { + "epoch": 3.879173290937997, + "grad_norm": 0.4811824636612629, + "learning_rate": 1.9730411621754798e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16535258293151855, + "step": 2440, + "valid_targets_mean": 4401.9, + "valid_targets_min": 1153 + }, + { + "epoch": 3.887122416534181, + "grad_norm": 0.47574278128170894, + "learning_rate": 1.9651128051321376e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16865494847297668, + "step": 2445, + "valid_targets_mean": 3826.2, + "valid_targets_min": 743 + }, + { + "epoch": 3.8950715421303657, + "grad_norm": 0.4927720545302061, + "learning_rate": 1.9571849964620858e-05, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1311875879764557, + "step": 2450, + "valid_targets_mean": 2777.1, + "valid_targets_min": 619 + }, + { + "epoch": 3.90302066772655, + "grad_norm": 0.4520272890103647, + "learning_rate": 1.949257860778339e-05, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19364655017852783, + "step": 2455, + "valid_targets_mean": 4212.9, + "valid_targets_min": 304 + }, + { + "epoch": 3.9109697933227343, + "grad_norm": 0.48056289266042657, + "learning_rate": 1.9413315226833343e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1471574306488037, + "step": 2460, + "valid_targets_mean": 3280.9, + "valid_targets_min": 988 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 0.44927106422669333, + "learning_rate": 1.9334061067669725e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15913772583007812, + "step": 2465, + "valid_targets_mean": 4163.8, + "valid_targets_min": 525 + }, + { + "epoch": 3.9268680445151034, + "grad_norm": 0.4047406712647876, + "learning_rate": 1.9254817376046556e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14912940561771393, + "step": 2470, + "valid_targets_mean": 4459.5, + "valid_targets_min": 1301 + }, + { + "epoch": 3.9348171701112875, + "grad_norm": 0.48354323358352247, + "learning_rate": 1.9175585397553368e-05, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14772868156433105, + "step": 2475, + "valid_targets_mean": 3460.9, + "valid_targets_min": 746 + }, + { + "epoch": 3.942766295707472, + "grad_norm": 0.45165188294145747, + "learning_rate": 1.909636637759554e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14915470778942108, + "step": 2480, + "valid_targets_mean": 3941.9, + "valid_targets_min": 753 + }, + { + "epoch": 3.9507154213036566, + "grad_norm": 0.675364427291404, + "learning_rate": 1.9017161561374787e-05, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14537788927555084, + "step": 2485, + "valid_targets_mean": 3596.5, + "valid_targets_min": 271 + }, + { + "epoch": 3.958664546899841, + "grad_norm": 0.4529521240142203, + "learning_rate": 1.893797219386957e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14005938172340393, + "step": 2490, + "valid_targets_mean": 3370.2, + "valid_targets_min": 491 + }, + { + "epoch": 3.9666136724960257, + "grad_norm": 0.43898112823019947, + "learning_rate": 1.885879951981549e-05, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563473492860794, + "step": 2495, + "valid_targets_mean": 4142.4, + "valid_targets_min": 519 + }, + { + "epoch": 3.97456279809221, + "grad_norm": 0.4632944157750017, + "learning_rate": 1.877964478368577e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15442904829978943, + "step": 2500, + "valid_targets_mean": 3855.9, + "valid_targets_min": 302 + }, + { + "epoch": 3.9825119236883944, + "grad_norm": 0.4422485867941161, + "learning_rate": 1.8700509229671696e-05, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370777189731598, + "step": 2505, + "valid_targets_mean": 3780.2, + "valid_targets_min": 1165 + }, + { + "epoch": 3.990461049284579, + "grad_norm": 0.40877054927258344, + "learning_rate": 1.8621394101663003e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1423787623643875, + "step": 2510, + "valid_targets_mean": 4734.8, + "valid_targets_min": 519 + }, + { + "epoch": 3.998410174880763, + "grad_norm": 0.49482609642383296, + "learning_rate": 1.854230064322837e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15848851203918457, + "step": 2515, + "valid_targets_mean": 3223.8, + "valid_targets_min": 275 + }, + { + "epoch": 4.006359300476947, + "grad_norm": 0.3675406957911613, + "learning_rate": 1.8463230097595887e-05, + "loss": 0.1376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13189461827278137, + "step": 2520, + "valid_targets_mean": 4952.8, + "valid_targets_min": 1721 + }, + { + "epoch": 4.014308426073132, + "grad_norm": 0.507020553139243, + "learning_rate": 1.8384183707633475e-05, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1438116431236267, + "step": 2525, + "valid_targets_mean": 3300.6, + "valid_targets_min": 677 + }, + { + "epoch": 4.022257551669316, + "grad_norm": 0.47001299296067484, + "learning_rate": 1.8305162715829348e-05, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14781130850315094, + "step": 2530, + "valid_targets_mean": 3934.6, + "valid_targets_min": 921 + }, + { + "epoch": 4.030206677265501, + "grad_norm": 0.41847190140441093, + "learning_rate": 1.8226168364272534e-05, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11341873556375504, + "step": 2535, + "valid_targets_mean": 4553.1, + "valid_targets_min": 1276 + }, + { + "epoch": 4.038155802861685, + "grad_norm": 0.5373424873152618, + "learning_rate": 1.8147201894633282e-05, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10829570144414902, + "step": 2540, + "valid_targets_mean": 4025.9, + "valid_targets_min": 551 + }, + { + "epoch": 4.046104928457869, + "grad_norm": 0.44151630488419186, + "learning_rate": 1.8068264548143605e-05, + "loss": 0.1361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13234928250312805, + "step": 2545, + "valid_targets_mean": 4406.9, + "valid_targets_min": 290 + }, + { + "epoch": 4.054054054054054, + "grad_norm": 0.44111473492914083, + "learning_rate": 1.7989357565577746e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12020754814147949, + "step": 2550, + "valid_targets_mean": 3875.1, + "valid_targets_min": 984 + }, + { + "epoch": 4.0620031796502385, + "grad_norm": 0.497648457748331, + "learning_rate": 1.7910482187232643e-05, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13153505325317383, + "step": 2555, + "valid_targets_mean": 3588.8, + "valid_targets_min": 308 + }, + { + "epoch": 4.069952305246423, + "grad_norm": 0.4466450096417644, + "learning_rate": 1.7831639652908507e-05, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1519913375377655, + "step": 2560, + "valid_targets_mean": 4519.4, + "valid_targets_min": 373 + }, + { + "epoch": 4.077901430842608, + "grad_norm": 0.5133411375199687, + "learning_rate": 1.775283120188925e-05, + "loss": 0.12, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11747326701879501, + "step": 2565, + "valid_targets_mean": 3593.9, + "valid_targets_min": 696 + }, + { + "epoch": 4.085850556438792, + "grad_norm": 0.5340060908823832, + "learning_rate": 1.7674058072923075e-05, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14561407268047333, + "step": 2570, + "valid_targets_mean": 2832.8, + "valid_targets_min": 691 + }, + { + "epoch": 4.093799682034976, + "grad_norm": 0.5131875664235621, + "learning_rate": 1.7595321504202977e-05, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13636553287506104, + "step": 2575, + "valid_targets_mean": 3649.6, + "valid_targets_min": 251 + }, + { + "epoch": 4.101748807631161, + "grad_norm": 0.49807691314048513, + "learning_rate": 1.751662273334725e-05, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14528346061706543, + "step": 2580, + "valid_targets_mean": 3304.9, + "valid_targets_min": 273 + }, + { + "epoch": 4.109697933227345, + "grad_norm": 0.5137089285911086, + "learning_rate": 1.7437962997380093e-05, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12808027863502502, + "step": 2585, + "valid_targets_mean": 3785.6, + "valid_targets_min": 357 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 0.4503928265620839, + "learning_rate": 1.7359343532712135e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14875495433807373, + "step": 2590, + "valid_targets_mean": 4586.7, + "valid_targets_min": 270 + }, + { + "epoch": 4.125596184419714, + "grad_norm": 0.4166285208229854, + "learning_rate": 1.7280765575120992e-05, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11069852113723755, + "step": 2595, + "valid_targets_mean": 4363.1, + "valid_targets_min": 498 + }, + { + "epoch": 4.133545310015898, + "grad_norm": 0.5155386718641107, + "learning_rate": 1.7202230359731835e-05, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14892956614494324, + "step": 2600, + "valid_targets_mean": 3399.8, + "valid_targets_min": 592 + }, + { + "epoch": 4.141494435612083, + "grad_norm": 0.526180170879376, + "learning_rate": 1.7123739120998033e-05, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.133394256234169, + "step": 2605, + "valid_targets_mean": 3551.6, + "valid_targets_min": 519 + }, + { + "epoch": 4.149443561208267, + "grad_norm": 0.43448892503799785, + "learning_rate": 1.7045293092681686e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12770622968673706, + "step": 2610, + "valid_targets_mean": 4312.1, + "valid_targets_min": 278 + }, + { + "epoch": 4.157392686804451, + "grad_norm": 0.47438188721000024, + "learning_rate": 1.6966893507834242e-05, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11475702375173569, + "step": 2615, + "valid_targets_mean": 3365.8, + "valid_targets_min": 527 + }, + { + "epoch": 4.165341812400636, + "grad_norm": 0.4734353597894468, + "learning_rate": 1.6888541598777167e-05, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14915436506271362, + "step": 2620, + "valid_targets_mean": 3915.1, + "valid_targets_min": 709 + }, + { + "epoch": 4.17329093799682, + "grad_norm": 0.44954055606792176, + "learning_rate": 1.68102385970825e-05, + "loss": 0.1295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12147843092679977, + "step": 2625, + "valid_targets_mean": 4042.4, + "valid_targets_min": 702 + }, + { + "epoch": 4.1812400635930045, + "grad_norm": 0.5461716268999647, + "learning_rate": 1.6731985733553545e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752236783504486, + "step": 2630, + "valid_targets_mean": 3508.4, + "valid_targets_min": 237 + }, + { + "epoch": 4.1891891891891895, + "grad_norm": 0.47376122084778394, + "learning_rate": 1.6653784238205525e-05, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13985666632652283, + "step": 2635, + "valid_targets_mean": 3967.4, + "valid_targets_min": 633 + }, + { + "epoch": 4.197138314785374, + "grad_norm": 0.49860840899961373, + "learning_rate": 1.6575635340246203e-05, + "loss": 0.142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13958841562271118, + "step": 2640, + "valid_targets_mean": 3776.4, + "valid_targets_min": 581 + }, + { + "epoch": 4.205087440381558, + "grad_norm": 0.5671244632485692, + "learning_rate": 1.649754026805662e-05, + "loss": 0.1334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17595896124839783, + "step": 2645, + "valid_targets_mean": 3767.5, + "valid_targets_min": 239 + }, + { + "epoch": 4.213036565977743, + "grad_norm": 0.4534522358165292, + "learning_rate": 1.6419500249171737e-05, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12649832665920258, + "step": 2650, + "valid_targets_mean": 3767.0, + "valid_targets_min": 300 + }, + { + "epoch": 4.220985691573927, + "grad_norm": 0.48461063763882706, + "learning_rate": 1.634151651026118e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11336950957775116, + "step": 2655, + "valid_targets_mean": 3496.2, + "valid_targets_min": 337 + }, + { + "epoch": 4.228934817170111, + "grad_norm": 0.5517432754534229, + "learning_rate": 1.626359027710993e-05, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12822790443897247, + "step": 2660, + "valid_targets_mean": 2547.8, + "valid_targets_min": 368 + }, + { + "epoch": 4.236883942766296, + "grad_norm": 0.6506977836059051, + "learning_rate": 1.6185722774599064e-05, + "loss": 0.1323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13393157720565796, + "step": 2665, + "valid_targets_mean": 3280.5, + "valid_targets_min": 611 + }, + { + "epoch": 4.24483306836248, + "grad_norm": 0.5560817254019538, + "learning_rate": 1.6107915226686504e-05, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1394142508506775, + "step": 2670, + "valid_targets_mean": 3167.7, + "valid_targets_min": 512 + }, + { + "epoch": 4.252782193958664, + "grad_norm": 0.531467297115508, + "learning_rate": 1.603016885638779e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14491944015026093, + "step": 2675, + "valid_targets_mean": 3221.8, + "valid_targets_min": 247 + }, + { + "epoch": 4.260731319554849, + "grad_norm": 0.4488475679526595, + "learning_rate": 1.5952484885756827e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13313503563404083, + "step": 2680, + "valid_targets_mean": 3901.4, + "valid_targets_min": 317 + }, + { + "epoch": 4.268680445151033, + "grad_norm": 0.47568888206678156, + "learning_rate": 1.587486453586669e-05, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12674440443515778, + "step": 2685, + "valid_targets_mean": 3657.6, + "valid_targets_min": 513 + }, + { + "epoch": 4.276629570747218, + "grad_norm": 0.46272650057707004, + "learning_rate": 1.579730902679045e-05, + "loss": 0.1339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15541914105415344, + "step": 2690, + "valid_targets_mean": 4025.2, + "valid_targets_min": 502 + }, + { + "epoch": 4.284578696343402, + "grad_norm": 0.4472468025529375, + "learning_rate": 1.5719819577581982e-05, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12270389497280121, + "step": 2695, + "valid_targets_mean": 3892.1, + "valid_targets_min": 793 + }, + { + "epoch": 4.292527821939586, + "grad_norm": 0.5592143714752654, + "learning_rate": 1.5642397406256768e-05, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15055924654006958, + "step": 2700, + "valid_targets_mean": 2888.4, + "valid_targets_min": 663 + }, + { + "epoch": 4.300476947535771, + "grad_norm": 0.6380121268077782, + "learning_rate": 1.556504372977283e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17369496822357178, + "step": 2705, + "valid_targets_mean": 3295.8, + "valid_targets_min": 450 + }, + { + "epoch": 4.3084260731319555, + "grad_norm": 0.43592849432503594, + "learning_rate": 1.548775976401152e-05, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12458646297454834, + "step": 2710, + "valid_targets_mean": 4257.1, + "valid_targets_min": 673 + }, + { + "epoch": 4.31637519872814, + "grad_norm": 0.5249006611498069, + "learning_rate": 1.5410546723758452e-05, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1341407299041748, + "step": 2715, + "valid_targets_mean": 3361.5, + "valid_targets_min": 260 + }, + { + "epoch": 4.324324324324325, + "grad_norm": 0.4535416533335133, + "learning_rate": 1.5333405822684428e-05, + "loss": 0.1254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1214504987001419, + "step": 2720, + "valid_targets_mean": 4414.6, + "valid_targets_min": 540 + }, + { + "epoch": 4.332273449920509, + "grad_norm": 0.4731025497262397, + "learning_rate": 1.5256338273326293e-05, + "loss": 0.127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13752460479736328, + "step": 2725, + "valid_targets_mean": 3609.0, + "valid_targets_min": 1095 + }, + { + "epoch": 4.340222575516693, + "grad_norm": 0.5291643855694953, + "learning_rate": 1.5179345287067935e-05, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14532646536827087, + "step": 2730, + "valid_targets_mean": 3274.8, + "valid_targets_min": 736 + }, + { + "epoch": 4.348171701112878, + "grad_norm": 0.49276963970488963, + "learning_rate": 1.5102428074121222e-05, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13466641306877136, + "step": 2735, + "valid_targets_mean": 4083.1, + "valid_targets_min": 243 + }, + { + "epoch": 4.356120826709062, + "grad_norm": 0.5271105291571133, + "learning_rate": 1.5025587843506986e-05, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13063038885593414, + "step": 2740, + "valid_targets_mean": 3548.1, + "valid_targets_min": 617 + }, + { + "epoch": 4.364069952305247, + "grad_norm": 0.5029010534111552, + "learning_rate": 1.4948825803035996e-05, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.115936279296875, + "step": 2745, + "valid_targets_mean": 4106.9, + "valid_targets_min": 581 + }, + { + "epoch": 4.372019077901431, + "grad_norm": 0.4305157347551604, + "learning_rate": 1.4872143159290016e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1385708451271057, + "step": 2750, + "valid_targets_mean": 4007.7, + "valid_targets_min": 781 + }, + { + "epoch": 4.379968203497615, + "grad_norm": 0.5872775238224067, + "learning_rate": 1.4795541117602808e-05, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11982855200767517, + "step": 2755, + "valid_targets_mean": 3377.2, + "valid_targets_min": 646 + }, + { + "epoch": 4.3879173290938, + "grad_norm": 0.4813077822857676, + "learning_rate": 1.4719020882041175e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1347476989030838, + "step": 2760, + "valid_targets_mean": 3714.1, + "valid_targets_min": 598 + }, + { + "epoch": 4.395866454689984, + "grad_norm": 0.5454824004525505, + "learning_rate": 1.4642583655386084e-05, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12626631557941437, + "step": 2765, + "valid_targets_mean": 3165.4, + "valid_targets_min": 689 + }, + { + "epoch": 4.403815580286168, + "grad_norm": 0.4885305355016124, + "learning_rate": 1.4566230639113696e-05, + "loss": 0.1332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1025635302066803, + "step": 2770, + "valid_targets_mean": 4869.7, + "valid_targets_min": 683 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 0.4770935247503296, + "learning_rate": 1.448996303337654e-05, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.124050073325634, + "step": 2775, + "valid_targets_mean": 3172.6, + "valid_targets_min": 720 + }, + { + "epoch": 4.419713831478537, + "grad_norm": 0.46987397640146145, + "learning_rate": 1.4413782036984616e-05, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11378094553947449, + "step": 2780, + "valid_targets_mean": 3712.6, + "valid_targets_min": 666 + }, + { + "epoch": 4.4276629570747215, + "grad_norm": 0.4291495568703118, + "learning_rate": 1.4337688847386542e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10549108684062958, + "step": 2785, + "valid_targets_mean": 3789.5, + "valid_targets_min": 545 + }, + { + "epoch": 4.4356120826709065, + "grad_norm": 0.48467330245194035, + "learning_rate": 1.426168466065077e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14184287190437317, + "step": 2790, + "valid_targets_mean": 4098.1, + "valid_targets_min": 1006 + }, + { + "epoch": 4.443561208267091, + "grad_norm": 0.4320267387261653, + "learning_rate": 1.4185770671446743e-05, + "loss": 0.1397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11710529029369354, + "step": 2795, + "valid_targets_mean": 4207.4, + "valid_targets_min": 891 + }, + { + "epoch": 4.451510333863275, + "grad_norm": 0.4899699268579415, + "learning_rate": 1.4109948073026153e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13632246851921082, + "step": 2800, + "valid_targets_mean": 3296.7, + "valid_targets_min": 635 + }, + { + "epoch": 4.45945945945946, + "grad_norm": 0.43337393856388357, + "learning_rate": 1.4034218057204165e-05, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12501519918441772, + "step": 2805, + "valid_targets_mean": 3881.5, + "valid_targets_min": 919 + }, + { + "epoch": 4.467408585055644, + "grad_norm": 0.4398737794805095, + "learning_rate": 1.3958581814340679e-05, + "loss": 0.1274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11017243564128876, + "step": 2810, + "valid_targets_mean": 3680.9, + "valid_targets_min": 219 + }, + { + "epoch": 4.475357710651828, + "grad_norm": 0.5233156447822513, + "learning_rate": 1.3883040533321637e-05, + "loss": 0.1281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1441815048456192, + "step": 2815, + "valid_targets_mean": 3133.9, + "valid_targets_min": 341 + }, + { + "epoch": 4.483306836248013, + "grad_norm": 0.4598890957258285, + "learning_rate": 1.3807595401540322e-05, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14368480443954468, + "step": 2820, + "valid_targets_mean": 4432.7, + "valid_targets_min": 291 + }, + { + "epoch": 4.491255961844197, + "grad_norm": 0.5085977500037545, + "learning_rate": 1.3732247604878697e-05, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12015148252248764, + "step": 2825, + "valid_targets_mean": 3563.8, + "valid_targets_min": 587 + }, + { + "epoch": 4.499205087440382, + "grad_norm": 0.5315330999004808, + "learning_rate": 1.3656998327688764e-05, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1353592574596405, + "step": 2830, + "valid_targets_mean": 3689.6, + "valid_targets_min": 204 + }, + { + "epoch": 4.507154213036566, + "grad_norm": 0.5305726504410712, + "learning_rate": 1.3581848752773961e-05, + "loss": 0.1219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11149744689464569, + "step": 2835, + "valid_targets_mean": 3077.8, + "valid_targets_min": 512 + }, + { + "epoch": 4.51510333863275, + "grad_norm": 0.5139894765702253, + "learning_rate": 1.3506800061370555e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14306671917438507, + "step": 2840, + "valid_targets_mean": 3728.9, + "valid_targets_min": 1152 + }, + { + "epoch": 4.523052464228935, + "grad_norm": 0.4814345025583722, + "learning_rate": 1.3431853433129058e-05, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11117015779018402, + "step": 2845, + "valid_targets_mean": 3341.5, + "valid_targets_min": 566 + }, + { + "epoch": 4.531001589825119, + "grad_norm": 0.5079196992633277, + "learning_rate": 1.3357010046095741e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17309923470020294, + "step": 2850, + "valid_targets_mean": 2918.5, + "valid_targets_min": 816 + }, + { + "epoch": 4.538950715421303, + "grad_norm": 0.4517165892172054, + "learning_rate": 1.3282271076694052e-05, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1249191164970398, + "step": 2855, + "valid_targets_mean": 4362.0, + "valid_targets_min": 951 + }, + { + "epoch": 4.546899841017488, + "grad_norm": 0.47163152664152436, + "learning_rate": 1.3207637699706162e-05, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13854432106018066, + "step": 2860, + "valid_targets_mean": 3941.9, + "valid_targets_min": 248 + }, + { + "epoch": 4.5548489666136724, + "grad_norm": 0.4474413787281936, + "learning_rate": 1.3133111088254507e-05, + "loss": 0.1267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11171597242355347, + "step": 2865, + "valid_targets_mean": 3772.6, + "valid_targets_min": 254 + }, + { + "epoch": 4.5627980922098565, + "grad_norm": 0.5078641249343999, + "learning_rate": 1.3058692413783307e-05, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13531452417373657, + "step": 2870, + "valid_targets_mean": 3739.0, + "valid_targets_min": 678 + }, + { + "epoch": 4.5707472178060415, + "grad_norm": 0.7026774408330564, + "learning_rate": 1.2984382846040187e-05, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1399625539779663, + "step": 2875, + "valid_targets_mean": 4271.8, + "valid_targets_min": 716 + }, + { + "epoch": 4.578696343402226, + "grad_norm": 0.46761769157477573, + "learning_rate": 1.2910183553057788e-05, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14058490097522736, + "step": 2880, + "valid_targets_mean": 3651.0, + "valid_targets_min": 310 + }, + { + "epoch": 4.586645468998411, + "grad_norm": 0.4383231851637327, + "learning_rate": 1.2836095701135398e-05, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11564694344997406, + "step": 2885, + "valid_targets_mean": 3662.7, + "valid_targets_min": 691 + }, + { + "epoch": 4.594594594594595, + "grad_norm": 0.46123656999116863, + "learning_rate": 1.2762120454820628e-05, + "loss": 0.1274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1169496402144432, + "step": 2890, + "valid_targets_mean": 3931.8, + "valid_targets_min": 250 + }, + { + "epoch": 4.602543720190779, + "grad_norm": 0.4338183551025682, + "learning_rate": 1.268825897689108e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12584465742111206, + "step": 2895, + "valid_targets_mean": 4739.1, + "valid_targets_min": 1044 + }, + { + "epoch": 4.610492845786963, + "grad_norm": 0.4172369284320454, + "learning_rate": 1.2614512428336105e-05, + "loss": 0.1321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12070607393980026, + "step": 2900, + "valid_targets_mean": 5442.9, + "valid_targets_min": 673 + }, + { + "epoch": 4.618441971383148, + "grad_norm": 0.47358327667927247, + "learning_rate": 1.254088196833855e-05, + "loss": 0.1318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1363537609577179, + "step": 2905, + "valid_targets_mean": 4575.1, + "valid_targets_min": 524 + }, + { + "epoch": 4.626391096979332, + "grad_norm": 0.46039621956253657, + "learning_rate": 1.2467368754256513e-05, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1305224895477295, + "step": 2910, + "valid_targets_mean": 4386.9, + "valid_targets_min": 716 + }, + { + "epoch": 4.634340222575517, + "grad_norm": 0.48832499251309247, + "learning_rate": 1.2393973941605161e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.125309556722641, + "step": 2915, + "valid_targets_mean": 3788.6, + "valid_targets_min": 988 + }, + { + "epoch": 4.642289348171701, + "grad_norm": 0.5232937532641884, + "learning_rate": 1.2320698684038599e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1394532024860382, + "step": 2920, + "valid_targets_mean": 3360.9, + "valid_targets_min": 258 + }, + { + "epoch": 4.650238473767885, + "grad_norm": 0.4558597876370874, + "learning_rate": 1.2247544133331681e-05, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11967042088508606, + "step": 2925, + "valid_targets_mean": 4315.3, + "valid_targets_min": 281 + }, + { + "epoch": 4.65818759936407, + "grad_norm": 0.5566298887502414, + "learning_rate": 1.2174511439361943e-05, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1408625692129135, + "step": 2930, + "valid_targets_mean": 2664.5, + "valid_targets_min": 744 + }, + { + "epoch": 4.666136724960254, + "grad_norm": 0.48634223320131703, + "learning_rate": 1.2101601750091528e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16628813743591309, + "step": 2935, + "valid_targets_mean": 4301.5, + "valid_targets_min": 1436 + }, + { + "epoch": 4.674085850556438, + "grad_norm": 0.46133307731688006, + "learning_rate": 1.2028816211549117e-05, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12970274686813354, + "step": 2940, + "valid_targets_mean": 4872.5, + "valid_targets_min": 1521 + }, + { + "epoch": 4.682034976152623, + "grad_norm": 0.42861468167336814, + "learning_rate": 1.195615596781194e-05, + "loss": 0.1325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1275981068611145, + "step": 2945, + "valid_targets_mean": 4291.4, + "valid_targets_min": 1324 + }, + { + "epoch": 4.6899841017488075, + "grad_norm": 0.45633412060443734, + "learning_rate": 1.18836221609878e-05, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12598268687725067, + "step": 2950, + "valid_targets_mean": 4175.3, + "valid_targets_min": 839 + }, + { + "epoch": 4.697933227344992, + "grad_norm": 0.5343832571290266, + "learning_rate": 1.1811215931197084e-05, + "loss": 0.1332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1605256199836731, + "step": 2955, + "valid_targets_mean": 3076.2, + "valid_targets_min": 774 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.4961038270131699, + "learning_rate": 1.1738938416554857e-05, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13582147657871246, + "step": 2960, + "valid_targets_mean": 3582.1, + "valid_targets_min": 267 + }, + { + "epoch": 4.713831478537361, + "grad_norm": 0.5512004556887968, + "learning_rate": 1.1666790753153009e-05, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13612379133701324, + "step": 2965, + "valid_targets_mean": 2709.6, + "valid_targets_min": 977 + }, + { + "epoch": 4.721780604133546, + "grad_norm": 0.5381030618661898, + "learning_rate": 1.1594774075042345e-05, + "loss": 0.1307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1454896181821823, + "step": 2970, + "valid_targets_mean": 3670.9, + "valid_targets_min": 511 + }, + { + "epoch": 4.72972972972973, + "grad_norm": 0.5509557031574198, + "learning_rate": 1.152288951421478e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13313612341880798, + "step": 2975, + "valid_targets_mean": 2507.1, + "valid_targets_min": 344 + }, + { + "epoch": 4.737678855325914, + "grad_norm": 0.5619248692734761, + "learning_rate": 1.1451138200585567e-05, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13828039169311523, + "step": 2980, + "valid_targets_mean": 3192.6, + "valid_targets_min": 304 + }, + { + "epoch": 4.745627980922099, + "grad_norm": 0.6067851198602392, + "learning_rate": 1.13795212619755e-05, + "loss": 0.1338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1492369920015335, + "step": 2985, + "valid_targets_mean": 2374.6, + "valid_targets_min": 295 + }, + { + "epoch": 4.753577106518283, + "grad_norm": 0.4802919158480157, + "learning_rate": 1.1308039824093197e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1495775282382965, + "step": 2990, + "valid_targets_mean": 3698.0, + "valid_targets_min": 642 + }, + { + "epoch": 4.761526232114467, + "grad_norm": 0.4709231044726117, + "learning_rate": 1.1236695010517434e-05, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1218520998954773, + "step": 2995, + "valid_targets_mean": 3672.4, + "valid_targets_min": 252 + }, + { + "epoch": 4.769475357710652, + "grad_norm": 0.43463931612817397, + "learning_rate": 1.116548794267945e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12758862972259521, + "step": 3000, + "valid_targets_mean": 4420.1, + "valid_targets_min": 413 + }, + { + "epoch": 4.777424483306836, + "grad_norm": 0.45148983869993, + "learning_rate": 1.109441973984534e-05, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11758168786764145, + "step": 3005, + "valid_targets_mean": 3833.6, + "valid_targets_min": 199 + }, + { + "epoch": 4.78537360890302, + "grad_norm": 0.4726760123977448, + "learning_rate": 1.1023491519098439e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15904685854911804, + "step": 3010, + "valid_targets_mean": 4469.8, + "valid_targets_min": 683 + }, + { + "epoch": 4.793322734499205, + "grad_norm": 0.5308825581080399, + "learning_rate": 1.0952704395321781e-05, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13085004687309265, + "step": 3015, + "valid_targets_mean": 3272.6, + "valid_targets_min": 251 + }, + { + "epoch": 4.801271860095389, + "grad_norm": 0.5180355797993398, + "learning_rate": 1.0882059481180588e-05, + "loss": 0.1264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11950570344924927, + "step": 3020, + "valid_targets_mean": 3030.1, + "valid_targets_min": 349 + }, + { + "epoch": 4.809220985691574, + "grad_norm": 0.4600006136661297, + "learning_rate": 1.0811557887104747e-05, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13670320808887482, + "step": 3025, + "valid_targets_mean": 3798.9, + "valid_targets_min": 735 + }, + { + "epoch": 4.8171701112877585, + "grad_norm": 0.5471165258975516, + "learning_rate": 1.074120072127137e-05, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10859496891498566, + "step": 3030, + "valid_targets_mean": 4030.0, + "valid_targets_min": 285 + }, + { + "epoch": 4.825119236883943, + "grad_norm": 0.4875896845337796, + "learning_rate": 1.0670989089587395e-05, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15141701698303223, + "step": 3035, + "valid_targets_mean": 3959.2, + "valid_targets_min": 348 + }, + { + "epoch": 4.833068362480127, + "grad_norm": 0.46841773833007866, + "learning_rate": 1.0600924095672184e-05, + "loss": 0.1317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1404210776090622, + "step": 3040, + "valid_targets_mean": 4134.9, + "valid_targets_min": 834 + }, + { + "epoch": 4.841017488076312, + "grad_norm": 0.4822180291377705, + "learning_rate": 1.0531006840840162e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15318647027015686, + "step": 3045, + "valid_targets_mean": 3816.3, + "valid_targets_min": 1540 + }, + { + "epoch": 4.848966613672496, + "grad_norm": 0.4598922245539829, + "learning_rate": 1.046123842408354e-05, + "loss": 0.1338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.142354816198349, + "step": 3050, + "valid_targets_mean": 4431.2, + "valid_targets_min": 309 + }, + { + "epoch": 4.856915739268681, + "grad_norm": 0.4289633212386471, + "learning_rate": 1.0391619942055007e-05, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1268487572669983, + "step": 3055, + "valid_targets_mean": 3981.8, + "valid_targets_min": 806 + }, + { + "epoch": 4.864864864864865, + "grad_norm": 0.5512192977590487, + "learning_rate": 1.0322152489050508e-05, + "loss": 0.1344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406165063381195, + "step": 3060, + "valid_targets_mean": 3987.3, + "valid_targets_min": 1264 + }, + { + "epoch": 4.872813990461049, + "grad_norm": 0.473626788275172, + "learning_rate": 1.0252837156992065e-05, + "loss": 0.1278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12075041234493256, + "step": 3065, + "valid_targets_mean": 3601.8, + "valid_targets_min": 410 + }, + { + "epoch": 4.880763116057234, + "grad_norm": 0.4208027067707839, + "learning_rate": 1.018367503541057e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11000724136829376, + "step": 3070, + "valid_targets_mean": 4098.9, + "valid_targets_min": 699 + }, + { + "epoch": 4.888712241653418, + "grad_norm": 0.4042871962162526, + "learning_rate": 1.0114667211428675e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12441422045230865, + "step": 3075, + "valid_targets_mean": 5265.6, + "valid_targets_min": 787 + }, + { + "epoch": 4.896661367249602, + "grad_norm": 0.48474645193150095, + "learning_rate": 1.0045814769743731e-05, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13803794980049133, + "step": 3080, + "valid_targets_mean": 3752.9, + "valid_targets_min": 725 + }, + { + "epoch": 4.904610492845787, + "grad_norm": 0.558064394873645, + "learning_rate": 9.977118792610719e-06, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12287827581167221, + "step": 3085, + "valid_targets_mean": 3135.6, + "valid_targets_min": 673 + }, + { + "epoch": 4.912559618441971, + "grad_norm": 0.47354314556138427, + "learning_rate": 9.908580359825204e-06, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11178020387887955, + "step": 3090, + "valid_targets_mean": 4488.8, + "valid_targets_min": 466 + }, + { + "epoch": 4.920508744038155, + "grad_norm": 0.4188496112443199, + "learning_rate": 9.840200548706435e-06, + "loss": 0.1311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1269189715385437, + "step": 3095, + "valid_targets_mean": 4419.6, + "valid_targets_min": 636 + }, + { + "epoch": 4.92845786963434, + "grad_norm": 0.48069575474565923, + "learning_rate": 9.771980434080348e-06, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14761850237846375, + "step": 3100, + "valid_targets_mean": 4263.5, + "valid_targets_min": 609 + }, + { + "epoch": 4.9364069952305245, + "grad_norm": 0.47361836995858364, + "learning_rate": 9.70392108826269e-06, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1389940083026886, + "step": 3105, + "valid_targets_mean": 3997.1, + "valid_targets_min": 538 + }, + { + "epoch": 4.9443561208267095, + "grad_norm": 0.502593429400252, + "learning_rate": 9.636023581042191e-06, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1681274026632309, + "step": 3110, + "valid_targets_mean": 3780.9, + "valid_targets_min": 856 + }, + { + "epoch": 4.952305246422894, + "grad_norm": 0.6771515271251293, + "learning_rate": 9.5682889796637e-06, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1500101536512375, + "step": 3115, + "valid_targets_mean": 4520.4, + "valid_targets_min": 549 + }, + { + "epoch": 4.960254372019078, + "grad_norm": 0.4872072441985026, + "learning_rate": 9.500718348811457e-06, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1341622769832611, + "step": 3120, + "valid_targets_mean": 3533.3, + "valid_targets_min": 264 + }, + { + "epoch": 4.968203497615263, + "grad_norm": 0.4872550048404107, + "learning_rate": 9.433312750592337e-06, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12632372975349426, + "step": 3125, + "valid_targets_mean": 3567.7, + "valid_targets_min": 758 + }, + { + "epoch": 4.976152623211447, + "grad_norm": 0.4963406118007583, + "learning_rate": 9.366073244519124e-06, + "loss": 0.1262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12338396161794662, + "step": 3130, + "valid_targets_mean": 3381.1, + "valid_targets_min": 551 + }, + { + "epoch": 4.984101748807631, + "grad_norm": 0.5634271164967108, + "learning_rate": 9.299000887493934e-06, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14706090092658997, + "step": 3135, + "valid_targets_mean": 3212.7, + "valid_targets_min": 686 + }, + { + "epoch": 4.992050874403816, + "grad_norm": 0.5169455768152297, + "learning_rate": 9.232096733791518e-06, + "loss": 0.1339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14801155030727386, + "step": 3140, + "valid_targets_mean": 3497.0, + "valid_targets_min": 1131 + }, + { + "epoch": 5.0, + "grad_norm": 0.4146566743306906, + "learning_rate": 9.165361835042734e-06, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12379302084445953, + "step": 3145, + "valid_targets_mean": 4029.0, + "valid_targets_min": 281 + }, + { + "epoch": 5.007949125596184, + "grad_norm": 0.457923014155659, + "learning_rate": 9.098797240218036e-06, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10793712735176086, + "step": 3150, + "valid_targets_mean": 4072.9, + "valid_targets_min": 746 + }, + { + "epoch": 5.015898251192369, + "grad_norm": 0.5279976098261123, + "learning_rate": 9.032403995610937e-06, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11182405054569244, + "step": 3155, + "valid_targets_mean": 2553.0, + "valid_targets_min": 538 + }, + { + "epoch": 5.023847376788553, + "grad_norm": 0.48776564405835493, + "learning_rate": 8.966183144821583e-06, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10740777850151062, + "step": 3160, + "valid_targets_mean": 3739.8, + "valid_targets_min": 564 + }, + { + "epoch": 5.031796502384737, + "grad_norm": 0.4595972840148685, + "learning_rate": 8.900135728740373e-06, + "loss": 0.1239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11679710447788239, + "step": 3165, + "valid_targets_mean": 4084.7, + "valid_targets_min": 276 + }, + { + "epoch": 5.039745627980922, + "grad_norm": 0.47396166912319276, + "learning_rate": 8.83426278553158e-06, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11660698056221008, + "step": 3170, + "valid_targets_mean": 3834.4, + "valid_targets_min": 644 + }, + { + "epoch": 5.047694753577106, + "grad_norm": 0.5657974369615991, + "learning_rate": 8.768565350616998e-06, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16728852689266205, + "step": 3175, + "valid_targets_mean": 3509.4, + "valid_targets_min": 610 + }, + { + "epoch": 5.0556438791732905, + "grad_norm": 0.4887634707217319, + "learning_rate": 8.703044456659741e-06, + "loss": 0.1238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11223262548446655, + "step": 3180, + "valid_targets_mean": 4200.7, + "valid_targets_min": 598 + }, + { + "epoch": 5.0635930047694755, + "grad_norm": 0.5536746484103304, + "learning_rate": 8.63770113354794e-06, + "loss": 0.1229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12999343872070312, + "step": 3185, + "valid_targets_mean": 3016.6, + "valid_targets_min": 486 + }, + { + "epoch": 5.07154213036566, + "grad_norm": 0.43802408737800774, + "learning_rate": 8.572536408378587e-06, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0922771543264389, + "step": 3190, + "valid_targets_mean": 4741.3, + "valid_targets_min": 285 + }, + { + "epoch": 5.079491255961845, + "grad_norm": 0.4652049118167889, + "learning_rate": 8.507551305441408e-06, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12782129645347595, + "step": 3195, + "valid_targets_mean": 4579.9, + "valid_targets_min": 568 + }, + { + "epoch": 5.087440381558029, + "grad_norm": 0.43604310018158265, + "learning_rate": 8.442746846202711e-06, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09423212707042694, + "step": 3200, + "valid_targets_mean": 4047.1, + "valid_targets_min": 1021 + }, + { + "epoch": 5.095389507154213, + "grad_norm": 0.49985284842571, + "learning_rate": 8.378124049289394e-06, + "loss": 0.1135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12812739610671997, + "step": 3205, + "valid_targets_mean": 3787.6, + "valid_targets_min": 576 + }, + { + "epoch": 5.103338632750398, + "grad_norm": 0.5306993606687345, + "learning_rate": 8.313683930472889e-06, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13846567273139954, + "step": 3210, + "valid_targets_mean": 3789.9, + "valid_targets_min": 537 + }, + { + "epoch": 5.111287758346582, + "grad_norm": 0.526017364900564, + "learning_rate": 8.249427502653198e-06, + "loss": 0.1231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13277852535247803, + "step": 3215, + "valid_targets_mean": 3243.6, + "valid_targets_min": 623 + }, + { + "epoch": 5.119236883942766, + "grad_norm": 0.4826797520729495, + "learning_rate": 8.185355775842982e-06, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1628974825143814, + "step": 3220, + "valid_targets_mean": 4196.1, + "valid_targets_min": 549 + }, + { + "epoch": 5.127186009538951, + "grad_norm": 0.47266391553239756, + "learning_rate": 8.12146975715171e-06, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.124434694647789, + "step": 3225, + "valid_targets_mean": 4489.1, + "valid_targets_min": 294 + }, + { + "epoch": 5.135135135135135, + "grad_norm": 0.5104351929242484, + "learning_rate": 8.057770450769771e-06, + "loss": 0.1269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14183968305587769, + "step": 3230, + "valid_targets_mean": 3486.3, + "valid_targets_min": 660 + }, + { + "epoch": 5.143084260731319, + "grad_norm": 0.5350401545785989, + "learning_rate": 7.994258857952748e-06, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14458492398262024, + "step": 3235, + "valid_targets_mean": 3730.2, + "valid_targets_min": 679 + }, + { + "epoch": 5.151033386327504, + "grad_norm": 0.47568127630523516, + "learning_rate": 7.93093597700564e-06, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11599075794219971, + "step": 3240, + "valid_targets_mean": 3875.6, + "valid_targets_min": 690 + }, + { + "epoch": 5.158982511923688, + "grad_norm": 0.5212929399191217, + "learning_rate": 7.867802803267182e-06, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11911657452583313, + "step": 3245, + "valid_targets_mean": 3369.2, + "valid_targets_min": 221 + }, + { + "epoch": 5.166931637519872, + "grad_norm": 0.44904326831297614, + "learning_rate": 7.80486032909421e-06, + "loss": 0.1283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13542252779006958, + "step": 3250, + "valid_targets_mean": 5412.6, + "valid_targets_min": 706 + }, + { + "epoch": 5.174880763116057, + "grad_norm": 0.5816953917270461, + "learning_rate": 7.742109543846063e-06, + "loss": 0.1274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12738856673240662, + "step": 3255, + "valid_targets_mean": 2924.6, + "valid_targets_min": 672 + }, + { + "epoch": 5.1828298887122415, + "grad_norm": 0.4706987710505265, + "learning_rate": 7.679551433869001e-06, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11224916577339172, + "step": 3260, + "valid_targets_mean": 4329.3, + "valid_targets_min": 249 + }, + { + "epoch": 5.1907790143084265, + "grad_norm": 0.4351758065293425, + "learning_rate": 7.617186982480749e-06, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10177105665206909, + "step": 3265, + "valid_targets_mean": 3785.4, + "valid_targets_min": 1054 + }, + { + "epoch": 5.198728139904611, + "grad_norm": 0.5015716097475679, + "learning_rate": 7.5550171699549945e-06, + "loss": 0.1219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14602243900299072, + "step": 3270, + "valid_targets_mean": 3782.1, + "valid_targets_min": 663 + }, + { + "epoch": 5.206677265500795, + "grad_norm": 0.4602593511104768, + "learning_rate": 7.493042973506e-06, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11888657510280609, + "step": 3275, + "valid_targets_mean": 4810.8, + "valid_targets_min": 1011 + }, + { + "epoch": 5.21462639109698, + "grad_norm": 0.5526497578097684, + "learning_rate": 7.431265367273268e-06, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11730851978063583, + "step": 3280, + "valid_targets_mean": 3694.4, + "valid_targets_min": 281 + }, + { + "epoch": 5.222575516693164, + "grad_norm": 0.5252464567539773, + "learning_rate": 7.36968532230617e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1225329339504242, + "step": 3285, + "valid_targets_mean": 3195.1, + "valid_targets_min": 258 + }, + { + "epoch": 5.230524642289348, + "grad_norm": 0.5192954833459635, + "learning_rate": 7.308303806548742e-06, + "loss": 0.129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13095176219940186, + "step": 3290, + "valid_targets_mean": 3724.8, + "valid_targets_min": 636 + }, + { + "epoch": 5.238473767885533, + "grad_norm": 0.5487669317072835, + "learning_rate": 7.247121784824445e-06, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13627754151821136, + "step": 3295, + "valid_targets_mean": 3452.6, + "valid_targets_min": 324 + }, + { + "epoch": 5.246422893481717, + "grad_norm": 0.528698693229382, + "learning_rate": 7.186140218820979e-06, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12275183945894241, + "step": 3300, + "valid_targets_mean": 4150.6, + "valid_targets_min": 768 + }, + { + "epoch": 5.254372019077901, + "grad_norm": 0.5046952538958149, + "learning_rate": 7.125360067075196e-06, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13393549621105194, + "step": 3305, + "valid_targets_mean": 3828.8, + "valid_targets_min": 262 + }, + { + "epoch": 5.262321144674086, + "grad_norm": 0.5275289393785, + "learning_rate": 7.0647822849580385e-06, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11984727531671524, + "step": 3310, + "valid_targets_mean": 2990.9, + "valid_targets_min": 465 + }, + { + "epoch": 5.27027027027027, + "grad_norm": 0.5986098211183422, + "learning_rate": 7.004407824659491e-06, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12287823855876923, + "step": 3315, + "valid_targets_mean": 2975.7, + "valid_targets_min": 733 + }, + { + "epoch": 5.278219395866454, + "grad_norm": 0.493508460343489, + "learning_rate": 6.944237635173627e-06, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1410299688577652, + "step": 3320, + "valid_targets_mean": 4679.4, + "valid_targets_min": 542 + }, + { + "epoch": 5.286168521462639, + "grad_norm": 0.5340963975918633, + "learning_rate": 6.88427266228372e-06, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11206860840320587, + "step": 3325, + "valid_targets_mean": 2824.8, + "valid_targets_min": 548 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 0.5288120769728034, + "learning_rate": 6.824513848547323e-06, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13739335536956787, + "step": 3330, + "valid_targets_mean": 2994.0, + "valid_targets_min": 774 + }, + { + "epoch": 5.302066772655008, + "grad_norm": 0.4460570196931101, + "learning_rate": 6.764962133281503e-06, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0984058827161789, + "step": 3335, + "valid_targets_mean": 4019.4, + "valid_targets_min": 657 + }, + { + "epoch": 5.3100158982511925, + "grad_norm": 0.47811950799417524, + "learning_rate": 6.705618452548057e-06, + "loss": 0.1267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11787131428718567, + "step": 3340, + "valid_targets_mean": 4479.8, + "valid_targets_min": 557 + }, + { + "epoch": 5.317965023847377, + "grad_norm": 0.4666566027641988, + "learning_rate": 6.646483739138778e-06, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12056516110897064, + "step": 3345, + "valid_targets_mean": 3949.9, + "valid_targets_min": 1048 + }, + { + "epoch": 5.325914149443562, + "grad_norm": 0.4975374351704887, + "learning_rate": 6.5875589225608376e-06, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11528253555297852, + "step": 3350, + "valid_targets_mean": 3459.3, + "valid_targets_min": 671 + }, + { + "epoch": 5.333863275039746, + "grad_norm": 0.4880004851265583, + "learning_rate": 6.528844929022134e-06, + "loss": 0.1219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10715194791555405, + "step": 3355, + "valid_targets_mean": 3779.9, + "valid_targets_min": 555 + }, + { + "epoch": 5.34181240063593, + "grad_norm": 0.5375997285477788, + "learning_rate": 6.4703426814167434e-06, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12751339375972748, + "step": 3360, + "valid_targets_mean": 3645.8, + "valid_targets_min": 635 + }, + { + "epoch": 5.349761526232115, + "grad_norm": 0.5313239520073082, + "learning_rate": 6.412053099310449e-06, + "loss": 0.1167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12041355669498444, + "step": 3365, + "valid_targets_mean": 4356.8, + "valid_targets_min": 730 + }, + { + "epoch": 5.357710651828299, + "grad_norm": 0.5428782069622755, + "learning_rate": 6.353977098926225e-06, + "loss": 0.1278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13842326402664185, + "step": 3370, + "valid_targets_mean": 3841.1, + "valid_targets_min": 279 + }, + { + "epoch": 5.365659777424483, + "grad_norm": 0.5475098794287568, + "learning_rate": 6.296115593129888e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12307454645633698, + "step": 3375, + "valid_targets_mean": 4148.9, + "valid_targets_min": 519 + }, + { + "epoch": 5.373608903020668, + "grad_norm": 0.797877703342453, + "learning_rate": 6.238469491415728e-06, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13038906455039978, + "step": 3380, + "valid_targets_mean": 2991.3, + "valid_targets_min": 369 + }, + { + "epoch": 5.381558028616852, + "grad_norm": 0.48617214325748453, + "learning_rate": 6.181039699892206e-06, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11232590675354004, + "step": 3385, + "valid_targets_mean": 3804.4, + "valid_targets_min": 591 + }, + { + "epoch": 5.389507154213036, + "grad_norm": 0.5086690584311961, + "learning_rate": 6.123827121267709e-06, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12465276569128036, + "step": 3390, + "valid_targets_mean": 4029.9, + "valid_targets_min": 821 + }, + { + "epoch": 5.397456279809221, + "grad_norm": 0.4123929559301535, + "learning_rate": 6.066832654836396e-06, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10203292965888977, + "step": 3395, + "valid_targets_mean": 4760.1, + "valid_targets_min": 591 + }, + { + "epoch": 5.405405405405405, + "grad_norm": 0.5479065770064241, + "learning_rate": 6.010057196464012e-06, + "loss": 0.1323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11120990663766861, + "step": 3400, + "valid_targets_mean": 3094.6, + "valid_targets_min": 221 + }, + { + "epoch": 5.413354531001589, + "grad_norm": 0.48420485798613333, + "learning_rate": 5.9535016385738335e-06, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10648505389690399, + "step": 3405, + "valid_targets_mean": 4042.4, + "valid_targets_min": 1384 + }, + { + "epoch": 5.421303656597774, + "grad_norm": 0.48392218431147466, + "learning_rate": 5.897166870132658e-06, + "loss": 0.116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11931923776865005, + "step": 3410, + "valid_targets_mean": 3597.8, + "valid_targets_min": 665 + }, + { + "epoch": 5.4292527821939585, + "grad_norm": 0.5393253911493405, + "learning_rate": 5.841053776636781e-06, + "loss": 0.1157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12923894822597504, + "step": 3415, + "valid_targets_mean": 3682.3, + "valid_targets_min": 693 + }, + { + "epoch": 5.4372019077901435, + "grad_norm": 0.5792385751129733, + "learning_rate": 5.7851632400981285e-06, + "loss": 0.1149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10763402283191681, + "step": 3420, + "valid_targets_mean": 4127.3, + "valid_targets_min": 687 + }, + { + "epoch": 5.4451510333863276, + "grad_norm": 0.48182773969358605, + "learning_rate": 5.729496139030377e-06, + "loss": 0.1222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1422126591205597, + "step": 3425, + "valid_targets_mean": 4550.5, + "valid_targets_min": 1598 + }, + { + "epoch": 5.453100158982512, + "grad_norm": 0.41697301913199153, + "learning_rate": 5.67405334843512e-06, + "loss": 0.1165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10176099836826324, + "step": 3430, + "valid_targets_mean": 5134.2, + "valid_targets_min": 581 + }, + { + "epoch": 5.461049284578697, + "grad_norm": 0.5513948557355665, + "learning_rate": 5.618835739788136e-06, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12029505521059036, + "step": 3435, + "valid_targets_mean": 2973.1, + "valid_targets_min": 312 + }, + { + "epoch": 5.468998410174881, + "grad_norm": 0.5035419846944795, + "learning_rate": 5.563844181025706e-06, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11482226848602295, + "step": 3440, + "valid_targets_mean": 3700.4, + "valid_targets_min": 993 + }, + { + "epoch": 5.476947535771065, + "grad_norm": 0.5134434569519974, + "learning_rate": 5.509079536530939e-06, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14722689986228943, + "step": 3445, + "valid_targets_mean": 4011.1, + "valid_targets_min": 1226 + }, + { + "epoch": 5.48489666136725, + "grad_norm": 0.6045299385441325, + "learning_rate": 5.4545426671201905e-06, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12199525535106659, + "step": 3450, + "valid_targets_mean": 3293.1, + "valid_targets_min": 563 + }, + { + "epoch": 5.492845786963434, + "grad_norm": 0.550935705887857, + "learning_rate": 5.400234430029561e-06, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14424438774585724, + "step": 3455, + "valid_targets_mean": 3206.7, + "valid_targets_min": 688 + }, + { + "epoch": 5.500794912559618, + "grad_norm": 0.4951329559478596, + "learning_rate": 5.346155678901392e-06, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12117502838373184, + "step": 3460, + "valid_targets_mean": 3883.6, + "valid_targets_min": 679 + }, + { + "epoch": 5.508744038155803, + "grad_norm": 0.531009459855668, + "learning_rate": 5.292307263770859e-06, + "loss": 0.1244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1292455941438675, + "step": 3465, + "valid_targets_mean": 3177.2, + "valid_targets_min": 636 + }, + { + "epoch": 5.516693163751987, + "grad_norm": 0.5665502788954249, + "learning_rate": 5.238690031052603e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11746140569448471, + "step": 3470, + "valid_targets_mean": 2960.9, + "valid_targets_min": 290 + }, + { + "epoch": 5.524642289348172, + "grad_norm": 0.47470831904665967, + "learning_rate": 5.185304823527426e-06, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11849209666252136, + "step": 3475, + "valid_targets_mean": 4295.4, + "valid_targets_min": 574 + }, + { + "epoch": 5.532591414944356, + "grad_norm": 0.5348564893263684, + "learning_rate": 5.132152480329072e-06, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13434147834777832, + "step": 3480, + "valid_targets_mean": 3653.6, + "valid_targets_min": 593 + }, + { + "epoch": 5.54054054054054, + "grad_norm": 0.45313684699490947, + "learning_rate": 5.07923383693099e-06, + "loss": 0.1123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10309700667858124, + "step": 3485, + "valid_targets_mean": 3730.9, + "valid_targets_min": 545 + }, + { + "epoch": 5.548489666136725, + "grad_norm": 0.5594176484506791, + "learning_rate": 5.0265497251332314e-06, + "loss": 0.1159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11101377010345459, + "step": 3490, + "valid_targets_mean": 3820.3, + "valid_targets_min": 269 + }, + { + "epoch": 5.556438791732909, + "grad_norm": 0.45166597844599227, + "learning_rate": 4.974100973049385e-06, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10583409667015076, + "step": 3495, + "valid_targets_mean": 4374.6, + "valid_targets_min": 275 + }, + { + "epoch": 5.5643879173290935, + "grad_norm": 0.5180648698026142, + "learning_rate": 4.921888405093525e-06, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11158320307731628, + "step": 3500, + "valid_targets_mean": 3499.5, + "valid_targets_min": 749 + }, + { + "epoch": 5.5723370429252785, + "grad_norm": 0.4509178468225708, + "learning_rate": 4.869912841967286e-06, + "loss": 0.1149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11846384406089783, + "step": 3505, + "valid_targets_mean": 4619.4, + "valid_targets_min": 982 + }, + { + "epoch": 5.580286168521463, + "grad_norm": 0.5462056280603287, + "learning_rate": 4.818175100646952e-06, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12996745109558105, + "step": 3510, + "valid_targets_mean": 3452.2, + "valid_targets_min": 715 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.4452887332184351, + "learning_rate": 4.766675994370598e-06, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12007399648427963, + "step": 3515, + "valid_targets_mean": 3976.9, + "valid_targets_min": 528 + }, + { + "epoch": 5.596184419713832, + "grad_norm": 0.4955106517830543, + "learning_rate": 4.7154163326253265e-06, + "loss": 0.1311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1265922635793686, + "step": 3520, + "valid_targets_mean": 4157.2, + "valid_targets_min": 639 + }, + { + "epoch": 5.604133545310016, + "grad_norm": 0.4967818054308162, + "learning_rate": 4.664396921134551e-06, + "loss": 0.1307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12537996470928192, + "step": 3525, + "valid_targets_mean": 3731.1, + "valid_targets_min": 711 + }, + { + "epoch": 5.6120826709062, + "grad_norm": 0.5922113133845275, + "learning_rate": 4.613618561845306e-06, + "loss": 0.128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18149852752685547, + "step": 3530, + "valid_targets_mean": 3075.6, + "valid_targets_min": 717 + }, + { + "epoch": 5.620031796502385, + "grad_norm": 0.4486775225512763, + "learning_rate": 4.563082052915649e-06, + "loss": 0.1233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10791641473770142, + "step": 3535, + "valid_targets_mean": 4184.4, + "valid_targets_min": 1044 + }, + { + "epoch": 5.627980922098569, + "grad_norm": 0.47196647754075016, + "learning_rate": 4.512788188702135e-06, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10782913118600845, + "step": 3540, + "valid_targets_mean": 4232.4, + "valid_targets_min": 286 + }, + { + "epoch": 5.635930047694753, + "grad_norm": 0.40473591866365777, + "learning_rate": 4.462737759747315e-06, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09814178198575974, + "step": 3545, + "valid_targets_mean": 5018.9, + "valid_targets_min": 534 + }, + { + "epoch": 5.643879173290938, + "grad_norm": 0.4970418048544055, + "learning_rate": 4.412931552767295e-06, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12168466299772263, + "step": 3550, + "valid_targets_mean": 3969.1, + "valid_targets_min": 908 + }, + { + "epoch": 5.651828298887122, + "grad_norm": 0.4466747416437135, + "learning_rate": 4.363370350639405e-06, + "loss": 0.1236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11409342288970947, + "step": 3555, + "valid_targets_mean": 4624.2, + "valid_targets_min": 668 + }, + { + "epoch": 5.659777424483307, + "grad_norm": 0.548867935644448, + "learning_rate": 4.314054932389859e-06, + "loss": 0.1214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13996033370494843, + "step": 3560, + "valid_targets_mean": 4342.4, + "valid_targets_min": 668 + }, + { + "epoch": 5.667726550079491, + "grad_norm": 0.48475073547335606, + "learning_rate": 4.2649860731815255e-06, + "loss": 0.1262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12767939269542694, + "step": 3565, + "valid_targets_mean": 3903.4, + "valid_targets_min": 580 + }, + { + "epoch": 5.675675675675675, + "grad_norm": 0.5810018741448055, + "learning_rate": 4.216164544301755e-06, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12019410729408264, + "step": 3570, + "valid_targets_mean": 2819.9, + "valid_targets_min": 277 + }, + { + "epoch": 5.68362480127186, + "grad_norm": 0.45812190476555054, + "learning_rate": 4.167591113150225e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0970279797911644, + "step": 3575, + "valid_targets_mean": 3835.1, + "valid_targets_min": 606 + }, + { + "epoch": 5.6915739268680445, + "grad_norm": 0.4583758611151626, + "learning_rate": 4.119266543226921e-06, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11823764443397522, + "step": 3580, + "valid_targets_mean": 4078.4, + "valid_targets_min": 686 + }, + { + "epoch": 5.699523052464229, + "grad_norm": 0.5300561778832891, + "learning_rate": 4.071191594120081e-06, + "loss": 0.1311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1100766509771347, + "step": 3585, + "valid_targets_mean": 3071.7, + "valid_targets_min": 259 + }, + { + "epoch": 5.707472178060414, + "grad_norm": 0.5096830083092221, + "learning_rate": 4.023367021494313e-06, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13244731724262238, + "step": 3590, + "valid_targets_mean": 3879.1, + "valid_targets_min": 709 + }, + { + "epoch": 5.715421303656598, + "grad_norm": 0.6172482897906679, + "learning_rate": 3.975793577078682e-06, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11370215564966202, + "step": 3595, + "valid_targets_mean": 3879.4, + "valid_targets_min": 1163 + }, + { + "epoch": 5.723370429252782, + "grad_norm": 0.5043378824343994, + "learning_rate": 3.928472008654891e-06, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1176089495420456, + "step": 3600, + "valid_targets_mean": 4378.8, + "valid_targets_min": 819 + }, + { + "epoch": 5.731319554848967, + "grad_norm": 0.5132872802234864, + "learning_rate": 3.881403060045545e-06, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10852010548114777, + "step": 3605, + "valid_targets_mean": 3407.3, + "valid_targets_min": 859 + }, + { + "epoch": 5.739268680445151, + "grad_norm": 0.5063474175192859, + "learning_rate": 3.834587471102464e-06, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11489781737327576, + "step": 3610, + "valid_targets_mean": 3358.4, + "valid_targets_min": 724 + }, + { + "epoch": 5.747217806041336, + "grad_norm": 0.44741625934657075, + "learning_rate": 3.7880259776950224e-06, + "loss": 0.1189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11125510931015015, + "step": 3615, + "valid_targets_mean": 4725.9, + "valid_targets_min": 935 + }, + { + "epoch": 5.75516693163752, + "grad_norm": 0.5622460074052544, + "learning_rate": 3.741719311698608e-06, + "loss": 0.1254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445854902267456, + "step": 3620, + "valid_targets_mean": 3845.8, + "valid_targets_min": 326 + }, + { + "epoch": 5.763116057233704, + "grad_norm": 0.5095441924194869, + "learning_rate": 3.69566820098312e-06, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11811588704586029, + "step": 3625, + "valid_targets_mean": 3504.1, + "valid_targets_min": 499 + }, + { + "epoch": 5.771065182829888, + "grad_norm": 0.6091555550332428, + "learning_rate": 3.6498733694015197e-06, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15617358684539795, + "step": 3630, + "valid_targets_mean": 3396.4, + "valid_targets_min": 219 + }, + { + "epoch": 5.779014308426073, + "grad_norm": 0.5229211609685805, + "learning_rate": 3.604335536778434e-06, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13374817371368408, + "step": 3635, + "valid_targets_mean": 4108.9, + "valid_targets_min": 625 + }, + { + "epoch": 5.786963434022257, + "grad_norm": 0.5225267465274916, + "learning_rate": 3.559055418898887e-06, + "loss": 0.1323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1492437720298767, + "step": 3640, + "valid_targets_mean": 3701.8, + "valid_targets_min": 262 + }, + { + "epoch": 5.794912559618442, + "grad_norm": 0.49027694616246914, + "learning_rate": 3.5140337274970014e-06, + "loss": 0.1205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12383852899074554, + "step": 3645, + "valid_targets_mean": 5302.7, + "valid_targets_min": 992 + }, + { + "epoch": 5.802861685214626, + "grad_norm": 0.49546019569767785, + "learning_rate": 3.469271170244832e-06, + "loss": 0.1157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11465057730674744, + "step": 3650, + "valid_targets_mean": 3762.5, + "valid_targets_min": 1734 + }, + { + "epoch": 5.8108108108108105, + "grad_norm": 0.6328363349278949, + "learning_rate": 3.4247684507412605e-06, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13153359293937683, + "step": 3655, + "valid_targets_mean": 2784.2, + "valid_targets_min": 294 + }, + { + "epoch": 5.8187599364069955, + "grad_norm": 0.502167626760723, + "learning_rate": 3.380526268500892e-06, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12254327535629272, + "step": 3660, + "valid_targets_mean": 3786.4, + "valid_targets_min": 193 + }, + { + "epoch": 5.82670906200318, + "grad_norm": 0.45125186924627153, + "learning_rate": 3.3365453189430984e-06, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10970175266265869, + "step": 3665, + "valid_targets_mean": 4506.9, + "valid_targets_min": 1367 + }, + { + "epoch": 5.834658187599364, + "grad_norm": 0.5205657434846551, + "learning_rate": 3.292826293381071e-06, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13056659698486328, + "step": 3670, + "valid_targets_mean": 4076.0, + "valid_targets_min": 1257 + }, + { + "epoch": 5.842607313195549, + "grad_norm": 0.5429453705327059, + "learning_rate": 3.2493698790109664e-06, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12193150818347931, + "step": 3675, + "valid_targets_mean": 3906.0, + "valid_targets_min": 444 + }, + { + "epoch": 5.850556438791733, + "grad_norm": 0.6227078505185998, + "learning_rate": 3.2061767589010763e-06, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13154500722885132, + "step": 3680, + "valid_targets_mean": 2870.4, + "valid_targets_min": 255 + }, + { + "epoch": 5.858505564387917, + "grad_norm": 0.49948300350719843, + "learning_rate": 3.1632476119811285e-06, + "loss": 0.1214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11474519222974777, + "step": 3685, + "valid_targets_mean": 4023.4, + "valid_targets_min": 737 + }, + { + "epoch": 5.866454689984102, + "grad_norm": 0.5200212537952184, + "learning_rate": 3.120583113031579e-06, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13559041917324066, + "step": 3690, + "valid_targets_mean": 3464.1, + "valid_targets_min": 203 + }, + { + "epoch": 5.874403815580286, + "grad_norm": 0.46446067076843983, + "learning_rate": 3.07818393267304e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09316763281822205, + "step": 3695, + "valid_targets_mean": 3673.7, + "valid_targets_min": 231 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 0.4404533791714637, + "learning_rate": 3.036050737355709e-06, + "loss": 0.1165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09938673675060272, + "step": 3700, + "valid_targets_mean": 4383.9, + "valid_targets_min": 1208 + }, + { + "epoch": 5.890302066772655, + "grad_norm": 0.5161902873588942, + "learning_rate": 2.9941841893489075e-06, + "loss": 0.1276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1233370378613472, + "step": 3705, + "valid_targets_mean": 4478.1, + "valid_targets_min": 1631 + }, + { + "epoch": 5.898251192368839, + "grad_norm": 0.49950014205727683, + "learning_rate": 2.9525849467306766e-06, + "loss": 0.1286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1349901705980301, + "step": 3710, + "valid_targets_mean": 4360.8, + "valid_targets_min": 1207 + }, + { + "epoch": 5.906200317965024, + "grad_norm": 0.6255270102689031, + "learning_rate": 2.9112536633774245e-06, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14242327213287354, + "step": 3715, + "valid_targets_mean": 2748.8, + "valid_targets_min": 566 + }, + { + "epoch": 5.914149443561208, + "grad_norm": 0.4795827841967414, + "learning_rate": 2.8701909889536384e-06, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12320660799741745, + "step": 3720, + "valid_targets_mean": 3648.8, + "valid_targets_min": 1105 + }, + { + "epoch": 5.922098569157392, + "grad_norm": 0.46609837304180957, + "learning_rate": 2.8293975689017018e-06, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09863148629665375, + "step": 3725, + "valid_targets_mean": 3843.1, + "valid_targets_min": 1049 + }, + { + "epoch": 5.930047694753577, + "grad_norm": 0.5299211238306798, + "learning_rate": 2.788874044431722e-06, + "loss": 0.1191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11683914065361023, + "step": 3730, + "valid_targets_mean": 3052.8, + "valid_targets_min": 357 + }, + { + "epoch": 5.9379968203497615, + "grad_norm": 0.5208980690753293, + "learning_rate": 2.7486210525114533e-06, + "loss": 0.1133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12761931121349335, + "step": 3735, + "valid_targets_mean": 3831.2, + "valid_targets_min": 468 + }, + { + "epoch": 5.945945945945946, + "grad_norm": 0.6678323734362, + "learning_rate": 2.708639225856311e-06, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12598900496959686, + "step": 3740, + "valid_targets_mean": 3501.5, + "valid_targets_min": 668 + }, + { + "epoch": 5.953895071542131, + "grad_norm": 0.5264094063837836, + "learning_rate": 2.6689291929193962e-06, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13515892624855042, + "step": 3745, + "valid_targets_mean": 3571.8, + "valid_targets_min": 744 + }, + { + "epoch": 5.961844197138315, + "grad_norm": 0.42300031769118174, + "learning_rate": 2.629491577881622e-06, + "loss": 0.1131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10418035089969635, + "step": 3750, + "valid_targets_mean": 4830.2, + "valid_targets_min": 658 + }, + { + "epoch": 5.9697933227345, + "grad_norm": 0.4877715372350325, + "learning_rate": 2.5903270006419236e-06, + "loss": 0.1276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11131057143211365, + "step": 3755, + "valid_targets_mean": 3662.4, + "valid_targets_min": 384 + }, + { + "epoch": 5.977742448330684, + "grad_norm": 0.4822345208975422, + "learning_rate": 2.551436076807501e-06, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10331891477108002, + "step": 3760, + "valid_targets_mean": 3850.6, + "valid_targets_min": 1253 + }, + { + "epoch": 5.985691573926868, + "grad_norm": 0.5405507727305193, + "learning_rate": 2.5128194176841226e-06, + "loss": 0.1208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11677927523851395, + "step": 3765, + "valid_targets_mean": 3175.6, + "valid_targets_min": 560 + }, + { + "epoch": 5.993640699523052, + "grad_norm": 0.6454948139161141, + "learning_rate": 2.4744776302665563e-06, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13788512349128723, + "step": 3770, + "valid_targets_mean": 3206.7, + "valid_targets_min": 452 + }, + { + "epoch": 6.001589825119237, + "grad_norm": 0.44867451795140395, + "learning_rate": 2.436411317228997e-06, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1155017837882042, + "step": 3775, + "valid_targets_mean": 3869.2, + "valid_targets_min": 717 + }, + { + "epoch": 6.009538950715421, + "grad_norm": 0.48596893402956937, + "learning_rate": 2.3986210769155994e-06, + "loss": 0.1215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11859863996505737, + "step": 3780, + "valid_targets_mean": 3937.1, + "valid_targets_min": 790 + }, + { + "epoch": 6.017488076311606, + "grad_norm": 0.522552014133015, + "learning_rate": 2.361107503331095e-06, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13054189085960388, + "step": 3785, + "valid_targets_mean": 3340.6, + "valid_targets_min": 649 + }, + { + "epoch": 6.02543720190779, + "grad_norm": 0.5187257820275404, + "learning_rate": 2.3238711861314165e-06, + "loss": 0.1164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0982595682144165, + "step": 3790, + "valid_targets_mean": 3887.9, + "valid_targets_min": 225 + }, + { + "epoch": 6.033386327503974, + "grad_norm": 0.492221911506423, + "learning_rate": 2.2869127106144663e-06, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10179409384727478, + "step": 3795, + "valid_targets_mean": 3659.4, + "valid_targets_min": 581 + }, + { + "epoch": 6.041335453100159, + "grad_norm": 0.5706240801694208, + "learning_rate": 2.2502326577109e-06, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14859774708747864, + "step": 3800, + "valid_targets_mean": 3022.1, + "valid_targets_min": 576 + }, + { + "epoch": 6.049284578696343, + "grad_norm": 0.4903458403444158, + "learning_rate": 2.213831603974985e-06, + "loss": 0.1208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12874770164489746, + "step": 3805, + "valid_targets_mean": 3873.9, + "valid_targets_min": 1757 + }, + { + "epoch": 6.0572337042925275, + "grad_norm": 0.6672892423704999, + "learning_rate": 2.1777101215755624e-06, + "loss": 0.124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14127162098884583, + "step": 3810, + "valid_targets_mean": 2096.2, + "valid_targets_min": 307 + }, + { + "epoch": 6.0651828298887125, + "grad_norm": 0.4545999025709332, + "learning_rate": 2.1418687782870284e-06, + "loss": 0.115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1308123916387558, + "step": 3815, + "valid_targets_mean": 4371.7, + "valid_targets_min": 911 + }, + { + "epoch": 6.073131955484897, + "grad_norm": 0.487382004726638, + "learning_rate": 2.1063081374804263e-06, + "loss": 0.1156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11653155088424683, + "step": 3820, + "valid_targets_mean": 4370.7, + "valid_targets_min": 716 + }, + { + "epoch": 6.081081081081081, + "grad_norm": 0.5230128586154896, + "learning_rate": 2.0710287581145884e-06, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11809296905994415, + "step": 3825, + "valid_targets_mean": 3450.1, + "valid_targets_min": 468 + }, + { + "epoch": 6.089030206677266, + "grad_norm": 0.5190144803094725, + "learning_rate": 2.036031194727346e-06, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13838720321655273, + "step": 3830, + "valid_targets_mean": 3676.1, + "valid_targets_min": 623 + }, + { + "epoch": 6.09697933227345, + "grad_norm": 0.46738733555474804, + "learning_rate": 2.0013159974268094e-06, + "loss": 0.1182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11170323193073273, + "step": 3835, + "valid_targets_mean": 4278.0, + "valid_targets_min": 709 + }, + { + "epoch": 6.104928457869635, + "grad_norm": 0.4624128199722627, + "learning_rate": 1.9668837118827346e-06, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09756626933813095, + "step": 3840, + "valid_targets_mean": 4007.2, + "valid_targets_min": 678 + }, + { + "epoch": 6.112877583465819, + "grad_norm": 0.5119513810750553, + "learning_rate": 1.932734879317937e-06, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10754363238811493, + "step": 3845, + "valid_targets_mean": 3588.8, + "valid_targets_min": 295 + }, + { + "epoch": 6.120826709062003, + "grad_norm": 0.41100363613961804, + "learning_rate": 1.8988700364997758e-06, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10548628866672516, + "step": 3850, + "valid_targets_mean": 4552.5, + "valid_targets_min": 1126 + }, + { + "epoch": 6.128775834658188, + "grad_norm": 0.5133116111241467, + "learning_rate": 1.8652897157317395e-06, + "loss": 0.1165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14871114492416382, + "step": 3855, + "valid_targets_mean": 3678.9, + "valid_targets_min": 652 + }, + { + "epoch": 6.136724960254372, + "grad_norm": 0.5261537871220848, + "learning_rate": 1.8319944448450578e-06, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10628058761358261, + "step": 3860, + "valid_targets_mean": 3190.5, + "valid_targets_min": 257 + }, + { + "epoch": 6.144674085850556, + "grad_norm": 0.4713576235713761, + "learning_rate": 1.7989847471904065e-06, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09839437156915665, + "step": 3865, + "valid_targets_mean": 4262.8, + "valid_targets_min": 614 + }, + { + "epoch": 6.152623211446741, + "grad_norm": 0.6149608619383016, + "learning_rate": 1.766261141629706e-06, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10006123036146164, + "step": 3870, + "valid_targets_mean": 4775.4, + "valid_targets_min": 1547 + }, + { + "epoch": 6.160572337042925, + "grad_norm": 0.5676332722536679, + "learning_rate": 1.7338241425279244e-06, + "loss": 0.116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1152639240026474, + "step": 3875, + "valid_targets_mean": 2869.9, + "valid_targets_min": 253 + }, + { + "epoch": 6.168521462639109, + "grad_norm": 0.43406528581225035, + "learning_rate": 1.7016742597450341e-06, + "loss": 0.1035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0894886776804924, + "step": 3880, + "valid_targets_mean": 4228.3, + "valid_targets_min": 889 + }, + { + "epoch": 6.176470588235294, + "grad_norm": 0.4391775601429534, + "learning_rate": 1.6698119986279726e-06, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1086900532245636, + "step": 3885, + "valid_targets_mean": 4487.6, + "valid_targets_min": 1009 + }, + { + "epoch": 6.1844197138314785, + "grad_norm": 0.43257145105765316, + "learning_rate": 1.6382378600026982e-06, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09367109835147858, + "step": 3890, + "valid_targets_mean": 4436.7, + "valid_targets_min": 672 + }, + { + "epoch": 6.192368839427663, + "grad_norm": 0.46585023110175705, + "learning_rate": 1.60695234016633e-06, + "loss": 0.1239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11722202599048615, + "step": 3895, + "valid_targets_mean": 4294.6, + "valid_targets_min": 580 + }, + { + "epoch": 6.200317965023848, + "grad_norm": 0.4902572872103629, + "learning_rate": 1.5759559308793448e-06, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10776770859956741, + "step": 3900, + "valid_targets_mean": 3943.4, + "valid_targets_min": 271 + }, + { + "epoch": 6.208267090620032, + "grad_norm": 0.5594670199155989, + "learning_rate": 1.5452491193578412e-06, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12497895956039429, + "step": 3905, + "valid_targets_mean": 3098.4, + "valid_targets_min": 735 + }, + { + "epoch": 6.216216216216216, + "grad_norm": 0.5219935562912217, + "learning_rate": 1.5148323882658767e-06, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10808251798152924, + "step": 3910, + "valid_targets_mean": 4122.3, + "valid_targets_min": 283 + }, + { + "epoch": 6.224165341812401, + "grad_norm": 0.5005990647111696, + "learning_rate": 1.484706215707905e-06, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10552050173282623, + "step": 3915, + "valid_targets_mean": 3699.5, + "valid_targets_min": 699 + }, + { + "epoch": 6.232114467408585, + "grad_norm": 0.6045839092717541, + "learning_rate": 1.4548710752212292e-06, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11157447099685669, + "step": 3920, + "valid_targets_mean": 2340.5, + "valid_targets_min": 254 + }, + { + "epoch": 6.24006359300477, + "grad_norm": 0.5250995042813899, + "learning_rate": 1.425327435768582e-06, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13459616899490356, + "step": 3925, + "valid_targets_mean": 3940.4, + "valid_targets_min": 699 + }, + { + "epoch": 6.248012718600954, + "grad_norm": 0.472579910076907, + "learning_rate": 1.3960757617307486e-06, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12376942485570908, + "step": 3930, + "valid_targets_mean": 4687.8, + "valid_targets_min": 876 + }, + { + "epoch": 6.255961844197138, + "grad_norm": 0.4618301620060851, + "learning_rate": 1.3671165128992514e-06, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10723580420017242, + "step": 3935, + "valid_targets_mean": 4437.8, + "valid_targets_min": 543 + }, + { + "epoch": 6.263910969793323, + "grad_norm": 0.4999776109608244, + "learning_rate": 1.3384501444691544e-06, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10837283730506897, + "step": 3940, + "valid_targets_mean": 3520.4, + "valid_targets_min": 295 + }, + { + "epoch": 6.271860095389507, + "grad_norm": 0.5126636237250259, + "learning_rate": 1.3100771070318796e-06, + "loss": 0.1176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11529883742332458, + "step": 3945, + "valid_targets_mean": 4288.9, + "valid_targets_min": 251 + }, + { + "epoch": 6.279809220985691, + "grad_norm": 0.47848229996426966, + "learning_rate": 1.2819978465681283e-06, + "loss": 0.1236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0940292477607727, + "step": 3950, + "valid_targets_mean": 3521.3, + "valid_targets_min": 498 + }, + { + "epoch": 6.287758346581876, + "grad_norm": 0.5732393529239456, + "learning_rate": 1.254212804440893e-06, + "loss": 0.1244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1338890790939331, + "step": 3955, + "valid_targets_mean": 3135.6, + "valid_targets_min": 312 + }, + { + "epoch": 6.29570747217806, + "grad_norm": 0.5326295342711036, + "learning_rate": 1.2267224173884929e-06, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13209447264671326, + "step": 3960, + "valid_targets_mean": 3774.2, + "valid_targets_min": 571 + }, + { + "epoch": 6.3036565977742445, + "grad_norm": 0.4711791669124887, + "learning_rate": 1.199527117517727e-06, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11155182123184204, + "step": 3965, + "valid_targets_mean": 3798.5, + "valid_targets_min": 276 + }, + { + "epoch": 6.3116057233704295, + "grad_norm": 0.4818693237836275, + "learning_rate": 1.172627332297076e-06, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10713420063257217, + "step": 3970, + "valid_targets_mean": 3704.2, + "valid_targets_min": 651 + }, + { + "epoch": 6.319554848966614, + "grad_norm": 0.5058735177237607, + "learning_rate": 1.1460234845499763e-06, + "loss": 0.1135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132293701171875, + "step": 3975, + "valid_targets_mean": 4033.2, + "valid_targets_min": 868 + }, + { + "epoch": 6.327503974562799, + "grad_norm": 0.48180321982015545, + "learning_rate": 1.1197159924481804e-06, + "loss": 0.125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11559031903743744, + "step": 3980, + "valid_targets_mean": 4418.4, + "valid_targets_min": 649 + }, + { + "epoch": 6.335453100158983, + "grad_norm": 0.6108262295945303, + "learning_rate": 1.0937052695051965e-06, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14231693744659424, + "step": 3985, + "valid_targets_mean": 4821.4, + "valid_targets_min": 1296 + }, + { + "epoch": 6.343402225755167, + "grad_norm": 0.5735633688087327, + "learning_rate": 1.067991724569759e-06, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12734287977218628, + "step": 3990, + "valid_targets_mean": 3712.1, + "valid_targets_min": 578 + }, + { + "epoch": 6.351351351351352, + "grad_norm": 0.4962186042387327, + "learning_rate": 1.0425757618194265e-06, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11310100555419922, + "step": 3995, + "valid_targets_mean": 4223.6, + "valid_targets_min": 307 + }, + { + "epoch": 6.359300476947536, + "grad_norm": 0.4808255029002646, + "learning_rate": 1.0174577807542273e-06, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10521212220191956, + "step": 4000, + "valid_targets_mean": 3883.7, + "valid_targets_min": 1113 + }, + { + "epoch": 6.36724960254372, + "grad_norm": 0.4740602995913292, + "learning_rate": 9.926381761903614e-07, + "loss": 0.1121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11520376056432724, + "step": 4005, + "valid_targets_mean": 4185.7, + "valid_targets_min": 1471 + }, + { + "epoch": 6.375198728139905, + "grad_norm": 0.4470619298701318, + "learning_rate": 9.681173382540177e-07, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10057910531759262, + "step": 4010, + "valid_targets_mean": 4749.6, + "valid_targets_min": 771 + }, + { + "epoch": 6.383147853736089, + "grad_norm": 0.5582730349142845, + "learning_rate": 9.438956523752263e-07, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14447925984859467, + "step": 4015, + "valid_targets_mean": 3494.1, + "valid_targets_min": 699 + }, + { + "epoch": 6.391096979332273, + "grad_norm": 0.4602774808448654, + "learning_rate": 9.199734992818099e-07, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10162511467933655, + "step": 4020, + "valid_targets_mean": 4045.6, + "valid_targets_min": 615 + }, + { + "epoch": 6.399046104928458, + "grad_norm": 0.9217957425599271, + "learning_rate": 8.963512549933795e-07, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10582847148180008, + "step": 4025, + "valid_targets_mean": 3972.8, + "valid_targets_min": 797 + }, + { + "epoch": 6.406995230524642, + "grad_norm": 0.48625229787458, + "learning_rate": 8.730292908154614e-07, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13875703513622284, + "step": 4030, + "valid_targets_mean": 3860.8, + "valid_targets_min": 218 + }, + { + "epoch": 6.414944356120826, + "grad_norm": 0.4648812460273993, + "learning_rate": 8.500079733336175e-07, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1072784960269928, + "step": 4035, + "valid_targets_mean": 3870.5, + "valid_targets_min": 841 + }, + { + "epoch": 6.422893481717011, + "grad_norm": 0.5971157428394955, + "learning_rate": 8.272876644077188e-07, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16263656318187714, + "step": 4040, + "valid_targets_mean": 3596.1, + "valid_targets_min": 248 + }, + { + "epoch": 6.4308426073131955, + "grad_norm": 0.4392492224710974, + "learning_rate": 8.048687211662343e-07, + "loss": 0.1076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0819651186466217, + "step": 4045, + "valid_targets_mean": 3597.6, + "valid_targets_min": 296 + }, + { + "epoch": 6.43879173290938, + "grad_norm": 0.5349233453433273, + "learning_rate": 7.827514960006266e-07, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.153071790933609, + "step": 4050, + "valid_targets_mean": 4198.1, + "valid_targets_min": 203 + }, + { + "epoch": 6.4467408585055646, + "grad_norm": 0.5485637315230164, + "learning_rate": 7.609363365598165e-07, + "loss": 0.1192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09980130195617676, + "step": 4055, + "valid_targets_mean": 3078.7, + "valid_targets_min": 266 + }, + { + "epoch": 6.454689984101749, + "grad_norm": 0.568411168151065, + "learning_rate": 7.394235857447119e-07, + "loss": 0.1233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12002958357334137, + "step": 4060, + "valid_targets_mean": 3055.5, + "valid_targets_min": 277 + }, + { + "epoch": 6.462639109697934, + "grad_norm": 0.5213091406213507, + "learning_rate": 7.182135817028157e-07, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11195459216833115, + "step": 4065, + "valid_targets_mean": 3587.9, + "valid_targets_min": 839 + }, + { + "epoch": 6.470588235294118, + "grad_norm": 0.5134486003820178, + "learning_rate": 6.973066578229248e-07, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11719434708356857, + "step": 4070, + "valid_targets_mean": 4122.6, + "valid_targets_min": 848 + }, + { + "epoch": 6.478537360890302, + "grad_norm": 0.4962383127871119, + "learning_rate": 6.767031427298687e-07, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11477995663881302, + "step": 4075, + "valid_targets_mean": 3880.5, + "valid_targets_min": 553 + }, + { + "epoch": 6.486486486486487, + "grad_norm": 0.5116829216283925, + "learning_rate": 6.564033602793584e-07, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10865809768438339, + "step": 4080, + "valid_targets_mean": 3936.4, + "valid_targets_min": 203 + }, + { + "epoch": 6.494435612082671, + "grad_norm": 0.49006467961889066, + "learning_rate": 6.364076295529042e-07, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11683492362499237, + "step": 4085, + "valid_targets_mean": 4282.9, + "valid_targets_min": 655 + }, + { + "epoch": 6.502384737678855, + "grad_norm": 0.6452016504816608, + "learning_rate": 6.167162648527703e-07, + "loss": 0.1167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14076688885688782, + "step": 4090, + "valid_targets_mean": 3114.0, + "valid_targets_min": 562 + }, + { + "epoch": 6.51033386327504, + "grad_norm": 0.581962945018844, + "learning_rate": 5.973295756970653e-07, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12058502435684204, + "step": 4095, + "valid_targets_mean": 2951.9, + "valid_targets_min": 215 + }, + { + "epoch": 6.518282988871224, + "grad_norm": 0.5276399869645112, + "learning_rate": 5.782478668148672e-07, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12663058936595917, + "step": 4100, + "valid_targets_mean": 3469.2, + "valid_targets_min": 883 + }, + { + "epoch": 6.526232114467408, + "grad_norm": 0.47852804567749874, + "learning_rate": 5.59471438141419e-07, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09485980868339539, + "step": 4105, + "valid_targets_mean": 3878.8, + "valid_targets_min": 671 + }, + { + "epoch": 6.534181240063593, + "grad_norm": 0.4656719855928257, + "learning_rate": 5.410005848134315e-07, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11063895374536514, + "step": 4110, + "valid_targets_mean": 4169.6, + "valid_targets_min": 555 + }, + { + "epoch": 6.542130365659777, + "grad_norm": 0.582805206984922, + "learning_rate": 5.228355971644461e-07, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14886021614074707, + "step": 4115, + "valid_targets_mean": 3361.9, + "valid_targets_min": 205 + }, + { + "epoch": 6.550079491255962, + "grad_norm": 0.4719462940844744, + "learning_rate": 5.049767607202549e-07, + "loss": 0.1179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10012765973806381, + "step": 4120, + "valid_targets_mean": 4136.4, + "valid_targets_min": 490 + }, + { + "epoch": 6.558028616852146, + "grad_norm": 1.3556535238795349, + "learning_rate": 4.874243561944214e-07, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12124701589345932, + "step": 4125, + "valid_targets_mean": 3717.2, + "valid_targets_min": 281 + }, + { + "epoch": 6.5659777424483305, + "grad_norm": 0.5426225357284815, + "learning_rate": 4.701786594838753e-07, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13302527368068695, + "step": 4130, + "valid_targets_mean": 3212.1, + "valid_targets_min": 519 + }, + { + "epoch": 6.573926868044515, + "grad_norm": 0.5217934884004445, + "learning_rate": 4.532399416645694e-07, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12307184189558029, + "step": 4135, + "valid_targets_mean": 3814.6, + "valid_targets_min": 692 + }, + { + "epoch": 6.5818759936407, + "grad_norm": 0.46337006848354445, + "learning_rate": 4.366084689872074e-07, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.146135151386261, + "step": 4140, + "valid_targets_mean": 4986.4, + "valid_targets_min": 1521 + }, + { + "epoch": 6.589825119236884, + "grad_norm": 0.5660661020795137, + "learning_rate": 4.202845028730829e-07, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14232556521892548, + "step": 4145, + "valid_targets_mean": 3666.0, + "valid_targets_min": 748 + }, + { + "epoch": 6.597774244833069, + "grad_norm": 0.5103188227448534, + "learning_rate": 4.0426829990994677e-07, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10854796320199966, + "step": 4150, + "valid_targets_mean": 3345.2, + "valid_targets_min": 706 + }, + { + "epoch": 6.605723370429253, + "grad_norm": 0.5531875415363303, + "learning_rate": 3.885601118479909e-07, + "loss": 0.1201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13567741215229034, + "step": 4155, + "valid_targets_mean": 3677.8, + "valid_targets_min": 728 + }, + { + "epoch": 6.613672496025437, + "grad_norm": 0.5680532693391935, + "learning_rate": 3.731601855958844e-07, + "loss": 0.1155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10990526527166367, + "step": 4160, + "valid_targets_mean": 3246.2, + "valid_targets_min": 577 + }, + { + "epoch": 6.621621621621622, + "grad_norm": 0.8069806110549206, + "learning_rate": 3.5806876321688553e-07, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15170586109161377, + "step": 4165, + "valid_targets_mean": 3854.2, + "valid_targets_min": 522 + }, + { + "epoch": 6.629570747217806, + "grad_norm": 0.5399036381526361, + "learning_rate": 3.4328608192505164e-07, + "loss": 0.1269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1231968104839325, + "step": 4170, + "valid_targets_mean": 3397.2, + "valid_targets_min": 324 + }, + { + "epoch": 6.63751987281399, + "grad_norm": 0.43662516483746666, + "learning_rate": 3.288123740814997e-07, + "loss": 0.1229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11871956288814545, + "step": 4175, + "valid_targets_mean": 5077.1, + "valid_targets_min": 1399 + }, + { + "epoch": 6.645468998410175, + "grad_norm": 0.52274643112803, + "learning_rate": 3.1464786719075825e-07, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09565606713294983, + "step": 4180, + "valid_targets_mean": 3180.1, + "valid_targets_min": 220 + }, + { + "epoch": 6.653418124006359, + "grad_norm": 0.5634216558721306, + "learning_rate": 3.0079278389719246e-07, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10372507572174072, + "step": 4185, + "valid_targets_mean": 3673.8, + "valid_targets_min": 290 + }, + { + "epoch": 6.661367249602543, + "grad_norm": 0.6083528281521314, + "learning_rate": 2.8724734198149585e-07, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09832460433244705, + "step": 4190, + "valid_targets_mean": 3061.6, + "valid_targets_min": 691 + }, + { + "epoch": 6.669316375198728, + "grad_norm": 0.4419979233676959, + "learning_rate": 2.7401175435727735e-07, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09950825572013855, + "step": 4195, + "valid_targets_mean": 4492.2, + "valid_targets_min": 591 + }, + { + "epoch": 6.677265500794912, + "grad_norm": 0.504982275604603, + "learning_rate": 2.61086229067713e-07, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10245563834905624, + "step": 4200, + "valid_targets_mean": 3867.2, + "valid_targets_min": 1379 + }, + { + "epoch": 6.685214626391097, + "grad_norm": 0.5140544947040371, + "learning_rate": 2.4847096928226846e-07, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10791807621717453, + "step": 4205, + "valid_targets_mean": 3571.3, + "valid_targets_min": 891 + }, + { + "epoch": 6.6931637519872815, + "grad_norm": 0.5413468567786459, + "learning_rate": 2.3616617329351499e-07, + "loss": 0.1141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11113090813159943, + "step": 4210, + "valid_targets_mean": 3172.6, + "valid_targets_min": 281 + }, + { + "epoch": 6.701112877583466, + "grad_norm": 0.49989740724965176, + "learning_rate": 2.2417203451400749e-07, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11632831394672394, + "step": 4215, + "valid_targets_mean": 3993.2, + "valid_targets_min": 293 + }, + { + "epoch": 6.709062003179651, + "grad_norm": 0.5360781888423928, + "learning_rate": 2.124887414732424e-07, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1259898841381073, + "step": 4220, + "valid_targets_mean": 3856.2, + "valid_targets_min": 578 + }, + { + "epoch": 6.717011128775835, + "grad_norm": 0.5026442130672338, + "learning_rate": 2.0111647781470233e-07, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10925573855638504, + "step": 4225, + "valid_targets_mean": 3679.4, + "valid_targets_min": 249 + }, + { + "epoch": 6.724960254372019, + "grad_norm": 0.5622738756387758, + "learning_rate": 1.9005542229295848e-07, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11003442853689194, + "step": 4230, + "valid_targets_mean": 2934.6, + "valid_targets_min": 533 + }, + { + "epoch": 6.732909379968204, + "grad_norm": 0.5361206317257745, + "learning_rate": 1.793057487708705e-07, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11059747636318207, + "step": 4235, + "valid_targets_mean": 3127.6, + "valid_targets_min": 251 + }, + { + "epoch": 6.740858505564388, + "grad_norm": 0.48942654254164564, + "learning_rate": 1.688676262168465e-07, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11752250790596008, + "step": 4240, + "valid_targets_mean": 3818.7, + "valid_targets_min": 739 + }, + { + "epoch": 6.748807631160572, + "grad_norm": 0.45231885595248883, + "learning_rate": 1.5874121870219415e-07, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11548950523138046, + "step": 4245, + "valid_targets_mean": 4585.7, + "valid_targets_min": 308 + }, + { + "epoch": 6.756756756756757, + "grad_norm": 0.47395667412902187, + "learning_rate": 1.4892668539853606e-07, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1214282214641571, + "step": 4250, + "valid_targets_mean": 4161.2, + "valid_targets_min": 765 + }, + { + "epoch": 6.764705882352941, + "grad_norm": 0.43529395808289495, + "learning_rate": 1.3942418057530714e-07, + "loss": 0.1038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09696638584136963, + "step": 4255, + "valid_targets_mean": 4235.6, + "valid_targets_min": 531 + }, + { + "epoch": 6.772655007949125, + "grad_norm": 0.4448513095931578, + "learning_rate": 1.3023385359733687e-07, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10132372379302979, + "step": 4260, + "valid_targets_mean": 4057.3, + "valid_targets_min": 703 + }, + { + "epoch": 6.78060413354531, + "grad_norm": 0.5547672548578956, + "learning_rate": 1.213558489224953e-07, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12664559483528137, + "step": 4265, + "valid_targets_mean": 3385.1, + "valid_targets_min": 349 + }, + { + "epoch": 6.788553259141494, + "grad_norm": 0.4670519878770078, + "learning_rate": 1.1279030609942177e-07, + "loss": 0.1339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12070097029209137, + "step": 4270, + "valid_targets_mean": 5304.9, + "valid_targets_min": 2131 + }, + { + "epoch": 6.796502384737678, + "grad_norm": 1.1700155137729815, + "learning_rate": 1.0453735976533985e-07, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12248082458972931, + "step": 4275, + "valid_targets_mean": 4182.2, + "valid_targets_min": 584 + }, + { + "epoch": 6.804451510333863, + "grad_norm": 0.5171523736974354, + "learning_rate": 9.659713964392358e-08, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10080324113368988, + "step": 4280, + "valid_targets_mean": 3404.8, + "valid_targets_min": 304 + }, + { + "epoch": 6.8124006359300475, + "grad_norm": 0.51183917876764, + "learning_rate": 8.896977054328349e-08, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08926388621330261, + "step": 4285, + "valid_targets_mean": 3781.9, + "valid_targets_min": 334 + }, + { + "epoch": 6.8203497615262325, + "grad_norm": 0.5317954705582983, + "learning_rate": 8.165537235398146e-08, + "loss": 0.1094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10373544692993164, + "step": 4290, + "valid_targets_mean": 3098.6, + "valid_targets_min": 576 + }, + { + "epoch": 6.828298887122417, + "grad_norm": 0.46554008682031195, + "learning_rate": 7.465406004715903e-08, + "loss": 0.1124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11525758355855942, + "step": 4295, + "valid_targets_mean": 4444.8, + "valid_targets_min": 834 + }, + { + "epoch": 6.836248012718601, + "grad_norm": 0.45846708598172736, + "learning_rate": 6.796594367272535e-08, + "loss": 0.1077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09519517421722412, + "step": 4300, + "valid_targets_mean": 3990.9, + "valid_targets_min": 559 + }, + { + "epoch": 6.844197138314786, + "grad_norm": 0.542174177410134, + "learning_rate": 6.159112835763204e-08, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09634318947792053, + "step": 4305, + "valid_targets_mean": 3841.8, + "valid_targets_min": 289 + }, + { + "epoch": 6.85214626391097, + "grad_norm": 0.5687137465211649, + "learning_rate": 5.552971430421439e-08, + "loss": 0.1136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11241281777620316, + "step": 4310, + "valid_targets_mean": 2990.4, + "valid_targets_min": 469 + }, + { + "epoch": 6.860095389507154, + "grad_norm": 0.4978344358648119, + "learning_rate": 4.9781796788621605e-08, + "loss": 0.1135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11623929440975189, + "step": 4315, + "valid_targets_mean": 3618.5, + "valid_targets_min": 671 + }, + { + "epoch": 6.868044515103339, + "grad_norm": 0.5297908608971749, + "learning_rate": 4.434746615932018e-08, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11995728313922882, + "step": 4320, + "valid_targets_mean": 3083.2, + "valid_targets_min": 737 + }, + { + "epoch": 6.875993640699523, + "grad_norm": 0.61054223355744, + "learning_rate": 3.922680783566168e-08, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11064666509628296, + "step": 4325, + "valid_targets_mean": 3176.7, + "valid_targets_min": 558 + }, + { + "epoch": 6.883942766295707, + "grad_norm": 0.522298631432037, + "learning_rate": 3.441990230656167e-08, + "loss": 0.1108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11717766523361206, + "step": 4330, + "valid_targets_mean": 3661.9, + "valid_targets_min": 565 + }, + { + "epoch": 6.891891891891892, + "grad_norm": 0.49498352702719756, + "learning_rate": 2.992682512921175e-08, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12047946453094482, + "step": 4335, + "valid_targets_mean": 3766.9, + "valid_targets_min": 250 + }, + { + "epoch": 6.899841017488076, + "grad_norm": 0.5212865153023899, + "learning_rate": 2.574764692790499e-08, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12060309201478958, + "step": 4340, + "valid_targets_mean": 4045.0, + "valid_targets_min": 1740 + }, + { + "epoch": 6.907790143084261, + "grad_norm": 0.53810487124348, + "learning_rate": 2.188243339292795e-08, + "loss": 0.1131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11383530497550964, + "step": 4345, + "valid_targets_mean": 4394.4, + "valid_targets_min": 607 + }, + { + "epoch": 6.915739268680445, + "grad_norm": 0.44962307621678405, + "learning_rate": 1.8331245279517017e-08, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09446686506271362, + "step": 4350, + "valid_targets_mean": 4012.4, + "valid_targets_min": 444 + }, + { + "epoch": 6.923688394276629, + "grad_norm": 0.5374076189204164, + "learning_rate": 1.509413840691476e-08, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11552655696868896, + "step": 4355, + "valid_targets_mean": 3191.7, + "valid_targets_min": 280 + }, + { + "epoch": 6.9316375198728135, + "grad_norm": 0.53179013058553, + "learning_rate": 1.2171163657481722e-08, + "loss": 0.1229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1528531163930893, + "step": 4360, + "valid_targets_mean": 3466.8, + "valid_targets_min": 564 + }, + { + "epoch": 6.9395866454689985, + "grad_norm": 0.5741198365523206, + "learning_rate": 9.562366975910397e-09, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1264297068119049, + "step": 4365, + "valid_targets_mean": 3148.3, + "valid_targets_min": 594 + }, + { + "epoch": 6.947535771065183, + "grad_norm": 0.5093734832614514, + "learning_rate": 7.2677893684880425e-09, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12815840542316437, + "step": 4370, + "valid_targets_mean": 3770.9, + "valid_targets_min": 295 + }, + { + "epoch": 6.955484896661368, + "grad_norm": 0.5049584504832715, + "learning_rate": 5.2874669024616246e-09, + "loss": 0.1168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1203048899769783, + "step": 4375, + "valid_targets_mean": 4105.9, + "valid_targets_min": 649 + }, + { + "epoch": 6.963434022257552, + "grad_norm": 0.5411424955946904, + "learning_rate": 3.621430705467166e-09, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1432824283838272, + "step": 4380, + "valid_targets_mean": 3633.6, + "valid_targets_min": 834 + }, + { + "epoch": 6.971383147853736, + "grad_norm": 0.508251429943314, + "learning_rate": 2.2697069650456927e-09, + "loss": 0.1183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11797773092985153, + "step": 4385, + "valid_targets_mean": 3701.4, + "valid_targets_min": 610 + }, + { + "epoch": 6.979332273449921, + "grad_norm": 0.48126946316204494, + "learning_rate": 1.2323169282257852e-09, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12007807195186615, + "step": 4390, + "valid_targets_mean": 4153.2, + "valid_targets_min": 614 + }, + { + "epoch": 6.987281399046105, + "grad_norm": 0.5042572172371226, + "learning_rate": 5.092769011860732e-10, + "loss": 0.1192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13380548357963562, + "step": 4395, + "valid_targets_mean": 3818.6, + "valid_targets_min": 1226 + }, + { + "epoch": 6.995230524642289, + "grad_norm": 0.5243147413835153, + "learning_rate": 1.0059824901098581e-10, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10705508291721344, + "step": 4400, + "valid_targets_mean": 3303.4, + "valid_targets_min": 802 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10637553036212921, + "step": 4403, + "total_flos": 1578618973913088.0, + "train_loss": 0.16220748680723057, + "train_runtime": 24708.9634, + "train_samples_per_second": 2.847, + "train_steps_per_second": 0.178, + "valid_targets_mean": 4107.2, + "valid_targets_min": 717 + } + ], + "logging_steps": 5, + "max_steps": 4403, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1578618973913088.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}