{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9863878477017163, "eval_steps": 250, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00026303675938712433, "grad_norm": 174.60673820638053, "learning_rate": 5e-06, "loss": 6.7232, "num_input_tokens_seen": 172192, "step": 1 }, { "epoch": 0.00026303675938712433, "loss": 6.8635101318359375, "loss_ce": 5.2287445068359375, "loss_iou": 0.81640625, "loss_num": 0.328125, "loss_xval": 1.6328125, "num_input_tokens_seen": 172192, "step": 1 }, { "epoch": 0.0005260735187742487, "grad_norm": 113.61145210636417, "learning_rate": 5e-06, "loss": 5.2198, "num_input_tokens_seen": 344520, "step": 2 }, { "epoch": 0.0005260735187742487, "loss": 5.116145133972168, "loss_ce": 3.611750364303589, "loss_iou": 0.0, "loss_num": 0.30078125, "loss_xval": 1.5078125, "num_input_tokens_seen": 344520, "step": 2 }, { "epoch": 0.000789110278161373, "grad_norm": 181.6487958512906, "learning_rate": 5e-06, "loss": 3.6895, "num_input_tokens_seen": 516868, "step": 3 }, { "epoch": 0.000789110278161373, "loss": 3.7434756755828857, "loss_ce": 2.4915225505828857, "loss_iou": 0.0, "loss_num": 0.25, "loss_xval": 1.25, "num_input_tokens_seen": 516868, "step": 3 }, { "epoch": 0.0010521470375484973, "grad_norm": 136.74708656913646, "learning_rate": 5e-06, "loss": 3.5593, "num_input_tokens_seen": 687240, "step": 4 }, { "epoch": 0.0010521470375484973, "loss": 3.5931553840637207, "loss_ce": 1.3441319465637207, "loss_iou": 0.80859375, "loss_num": 0.44921875, "loss_xval": 2.25, "num_input_tokens_seen": 687240, "step": 4 }, { "epoch": 0.0013151837969356218, "grad_norm": 79.92211667172693, "learning_rate": 5e-06, "loss": 2.8187, "num_input_tokens_seen": 859388, "step": 5 }, { "epoch": 0.0013151837969356218, "loss": 2.8585305213928223, "loss_ce": 1.6251322031021118, "loss_iou": 0.2216796875, "loss_num": 0.24609375, "loss_xval": 1.234375, "num_input_tokens_seen": 859388, "step": 5 }, { "epoch": 0.001578220556322746, "grad_norm": 51.97557929429645, "learning_rate": 5e-06, "loss": 2.1961, "num_input_tokens_seen": 1029916, "step": 6 }, { "epoch": 0.001578220556322746, "loss": 2.261239528656006, "loss_ce": 1.1806731224060059, "loss_iou": 0.0, "loss_num": 0.2158203125, "loss_xval": 1.078125, "num_input_tokens_seen": 1029916, "step": 6 }, { "epoch": 0.0018412573157098704, "grad_norm": 60.93429618456285, "learning_rate": 5e-06, "loss": 2.066, "num_input_tokens_seen": 1202056, "step": 7 }, { "epoch": 0.0018412573157098704, "loss": 1.9779821634292603, "loss_ce": 1.0946813821792603, "loss_iou": 0.0966796875, "loss_num": 0.1767578125, "loss_xval": 0.8828125, "num_input_tokens_seen": 1202056, "step": 7 }, { "epoch": 0.0021042940750969946, "grad_norm": 42.670837197481326, "learning_rate": 5e-06, "loss": 2.0543, "num_input_tokens_seen": 1373904, "step": 8 }, { "epoch": 0.0021042940750969946, "loss": 2.022350311279297, "loss_ce": 0.9168814420700073, "loss_iou": 0.0, "loss_num": 0.220703125, "loss_xval": 1.109375, "num_input_tokens_seen": 1373904, "step": 8 }, { "epoch": 0.0023673308344841193, "grad_norm": 48.53251052050697, "learning_rate": 5e-06, "loss": 2.4818, "num_input_tokens_seen": 1546124, "step": 9 }, { "epoch": 0.0023673308344841193, "loss": 2.4849071502685547, "loss_ce": 0.8374460935592651, "loss_iou": 0.01202392578125, "loss_num": 0.330078125, "loss_xval": 1.6484375, "num_input_tokens_seen": 1546124, "step": 9 }, { "epoch": 0.0026303675938712436, "grad_norm": 30.87810784426679, "learning_rate": 5e-06, "loss": 1.8016, "num_input_tokens_seen": 1715836, "step": 10 }, { "epoch": 0.0026303675938712436, "loss": 1.7729861736297607, "loss_ce": 0.8613650798797607, "loss_iou": 0.09326171875, "loss_num": 0.1826171875, "loss_xval": 0.91015625, "num_input_tokens_seen": 1715836, "step": 10 }, { "epoch": 0.002893404353258368, "grad_norm": 25.204935627992594, "learning_rate": 5e-06, "loss": 1.6242, "num_input_tokens_seen": 1887800, "step": 11 }, { "epoch": 0.002893404353258368, "loss": 1.8032605648040771, "loss_ce": 0.7612683176994324, "loss_iou": 0.1513671875, "loss_num": 0.2080078125, "loss_xval": 1.0390625, "num_input_tokens_seen": 1887800, "step": 11 }, { "epoch": 0.003156441112645492, "grad_norm": 27.719293877780373, "learning_rate": 5e-06, "loss": 1.5953, "num_input_tokens_seen": 2058072, "step": 12 }, { "epoch": 0.003156441112645492, "loss": 1.530227541923523, "loss_ce": 0.660110354423523, "loss_iou": 0.017822265625, "loss_num": 0.173828125, "loss_xval": 0.87109375, "num_input_tokens_seen": 2058072, "step": 12 }, { "epoch": 0.0034194778720326164, "grad_norm": 77.78462332066468, "learning_rate": 5e-06, "loss": 1.9432, "num_input_tokens_seen": 2230244, "step": 13 }, { "epoch": 0.0034194778720326164, "loss": 2.0266342163085938, "loss_ce": 0.7092512845993042, "loss_iou": 0.193359375, "loss_num": 0.263671875, "loss_xval": 1.3203125, "num_input_tokens_seen": 2230244, "step": 13 }, { "epoch": 0.0036825146314197407, "grad_norm": 51.375242508900634, "learning_rate": 5e-06, "loss": 1.9389, "num_input_tokens_seen": 2402352, "step": 14 }, { "epoch": 0.0036825146314197407, "loss": 1.8962193727493286, "loss_ce": 0.9186803102493286, "loss_iou": 0.037841796875, "loss_num": 0.1953125, "loss_xval": 0.9765625, "num_input_tokens_seen": 2402352, "step": 14 }, { "epoch": 0.0039455513908068654, "grad_norm": 29.266495242442826, "learning_rate": 5e-06, "loss": 1.6229, "num_input_tokens_seen": 2574284, "step": 15 }, { "epoch": 0.0039455513908068654, "loss": 1.7341835498809814, "loss_ce": 0.7971718311309814, "loss_iou": 0.052734375, "loss_num": 0.1875, "loss_xval": 0.9375, "num_input_tokens_seen": 2574284, "step": 15 }, { "epoch": 0.004208588150193989, "grad_norm": 22.43237476170095, "learning_rate": 5e-06, "loss": 1.4957, "num_input_tokens_seen": 2746500, "step": 16 }, { "epoch": 0.004208588150193989, "loss": 1.5246176719665527, "loss_ce": 0.6452231407165527, "loss_iou": 0.2041015625, "loss_num": 0.17578125, "loss_xval": 0.87890625, "num_input_tokens_seen": 2746500, "step": 16 }, { "epoch": 0.004471624909581114, "grad_norm": 20.976636929732265, "learning_rate": 5e-06, "loss": 1.3572, "num_input_tokens_seen": 2918736, "step": 17 }, { "epoch": 0.004471624909581114, "loss": 1.3845367431640625, "loss_ce": 0.6897125244140625, "loss_iou": 0.0140380859375, "loss_num": 0.138671875, "loss_xval": 0.6953125, "num_input_tokens_seen": 2918736, "step": 17 }, { "epoch": 0.004734661668968239, "grad_norm": 23.059953533674786, "learning_rate": 5e-06, "loss": 1.3283, "num_input_tokens_seen": 3091200, "step": 18 }, { "epoch": 0.004734661668968239, "loss": 1.3037350177764893, "loss_ce": 0.5959713459014893, "loss_iou": 0.052490234375, "loss_num": 0.1416015625, "loss_xval": 0.70703125, "num_input_tokens_seen": 3091200, "step": 18 }, { "epoch": 0.0049976984283553625, "grad_norm": 28.375886109478266, "learning_rate": 5e-06, "loss": 1.4794, "num_input_tokens_seen": 3263136, "step": 19 }, { "epoch": 0.0049976984283553625, "loss": 1.406355381011963, "loss_ce": 0.6358475685119629, "loss_iou": 0.03173828125, "loss_num": 0.154296875, "loss_xval": 0.76953125, "num_input_tokens_seen": 3263136, "step": 19 }, { "epoch": 0.005260735187742487, "grad_norm": 25.383372657888618, "learning_rate": 5e-06, "loss": 1.4853, "num_input_tokens_seen": 3435304, "step": 20 }, { "epoch": 0.005260735187742487, "loss": 1.4931797981262207, "loss_ce": 0.6591953635215759, "loss_iou": 0.1748046875, "loss_num": 0.1669921875, "loss_xval": 0.8359375, "num_input_tokens_seen": 3435304, "step": 20 }, { "epoch": 0.005523771947129611, "grad_norm": 20.893788220271592, "learning_rate": 5e-06, "loss": 1.2467, "num_input_tokens_seen": 3604172, "step": 21 }, { "epoch": 0.005523771947129611, "loss": 1.2556164264678955, "loss_ce": 0.5534679889678955, "loss_iou": NaN, "loss_num": 0.140625, "loss_xval": 0.703125, "num_input_tokens_seen": 3604172, "step": 21 }, { "epoch": 0.005786808706516736, "grad_norm": 16.968829307639602, "learning_rate": 5e-06, "loss": 1.331, "num_input_tokens_seen": 3776444, "step": 22 }, { "epoch": 0.005786808706516736, "loss": 1.4187769889831543, "loss_ce": 0.5535425543785095, "loss_iou": 0.076171875, "loss_num": 0.1728515625, "loss_xval": 0.8671875, "num_input_tokens_seen": 3776444, "step": 22 }, { "epoch": 0.00604984546590386, "grad_norm": 17.03009551099335, "learning_rate": 5e-06, "loss": 1.2354, "num_input_tokens_seen": 3948760, "step": 23 }, { "epoch": 0.00604984546590386, "loss": 1.2475543022155762, "loss_ce": 0.5319781303405762, "loss_iou": 0.07275390625, "loss_num": 0.142578125, "loss_xval": 0.71484375, "num_input_tokens_seen": 3948760, "step": 23 }, { "epoch": 0.006312882225290984, "grad_norm": 17.413119153715428, "learning_rate": 5e-06, "loss": 1.2253, "num_input_tokens_seen": 4120896, "step": 24 }, { "epoch": 0.006312882225290984, "loss": 1.2492257356643677, "loss_ce": 0.6032296419143677, "loss_iou": 0.0400390625, "loss_num": 0.12890625, "loss_xval": 0.64453125, "num_input_tokens_seen": 4120896, "step": 24 }, { "epoch": 0.006575918984678109, "grad_norm": 15.646317268104498, "learning_rate": 5e-06, "loss": 1.1599, "num_input_tokens_seen": 4293068, "step": 25 }, { "epoch": 0.006575918984678109, "loss": 1.221449613571167, "loss_ce": 0.590346097946167, "loss_iou": 0.26171875, "loss_num": 0.1259765625, "loss_xval": 0.6328125, "num_input_tokens_seen": 4293068, "step": 25 }, { "epoch": 0.006838955744065233, "grad_norm": 16.80260761287207, "learning_rate": 5e-06, "loss": 1.1163, "num_input_tokens_seen": 4465316, "step": 26 }, { "epoch": 0.006838955744065233, "loss": 1.0530353784561157, "loss_ce": 0.5227619409561157, "loss_iou": 0.08447265625, "loss_num": 0.10595703125, "loss_xval": 0.53125, "num_input_tokens_seen": 4465316, "step": 26 }, { "epoch": 0.007101992503452358, "grad_norm": 15.596544427195678, "learning_rate": 5e-06, "loss": 1.0849, "num_input_tokens_seen": 4636968, "step": 27 }, { "epoch": 0.007101992503452358, "loss": 1.1465504169464111, "loss_ce": 0.5161793231964111, "loss_iou": 0.173828125, "loss_num": 0.1259765625, "loss_xval": 0.62890625, "num_input_tokens_seen": 4636968, "step": 27 }, { "epoch": 0.0073650292628394814, "grad_norm": 17.97152237441673, "learning_rate": 5e-06, "loss": 1.1255, "num_input_tokens_seen": 4808916, "step": 28 }, { "epoch": 0.0073650292628394814, "loss": 1.0834856033325195, "loss_ce": 0.5703020095825195, "loss_iou": 0.1171875, "loss_num": 0.1025390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 4808916, "step": 28 }, { "epoch": 0.007628066022226606, "grad_norm": 26.650637151013097, "learning_rate": 5e-06, "loss": 1.0819, "num_input_tokens_seen": 4979072, "step": 29 }, { "epoch": 0.007628066022226606, "loss": 1.083469271659851, "loss_ce": 0.49557873606681824, "loss_iou": 0.2470703125, "loss_num": 0.11767578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 4979072, "step": 29 }, { "epoch": 0.007891102781613731, "grad_norm": 47.98868961106415, "learning_rate": 5e-06, "loss": 1.5465, "num_input_tokens_seen": 5150996, "step": 30 }, { "epoch": 0.007891102781613731, "loss": 1.4472854137420654, "loss_ce": 0.4790237545967102, "loss_iou": 0.2001953125, "loss_num": 0.193359375, "loss_xval": 0.96875, "num_input_tokens_seen": 5150996, "step": 30 }, { "epoch": 0.008154139541000855, "grad_norm": 36.03247925799523, "learning_rate": 5e-06, "loss": 1.479, "num_input_tokens_seen": 5321588, "step": 31 }, { "epoch": 0.008154139541000855, "loss": 1.3790192604064941, "loss_ce": 0.5357575416564941, "loss_iou": 0.0703125, "loss_num": 0.1689453125, "loss_xval": 0.84375, "num_input_tokens_seen": 5321588, "step": 31 }, { "epoch": 0.008417176300387979, "grad_norm": 18.14805166699206, "learning_rate": 5e-06, "loss": 1.1322, "num_input_tokens_seen": 5493708, "step": 32 }, { "epoch": 0.008417176300387979, "loss": 1.069289207458496, "loss_ce": 0.5192403793334961, "loss_iou": 0.0654296875, "loss_num": 0.10986328125, "loss_xval": 0.55078125, "num_input_tokens_seen": 5493708, "step": 32 }, { "epoch": 0.008680213059775104, "grad_norm": 17.23835135853453, "learning_rate": 5e-06, "loss": 1.0735, "num_input_tokens_seen": 5665524, "step": 33 }, { "epoch": 0.008680213059775104, "loss": 1.063108205795288, "loss_ce": 0.4869362711906433, "loss_iou": 0.0556640625, "loss_num": 0.115234375, "loss_xval": 0.578125, "num_input_tokens_seen": 5665524, "step": 33 }, { "epoch": 0.008943249819162228, "grad_norm": 15.02530944865542, "learning_rate": 5e-06, "loss": 1.0261, "num_input_tokens_seen": 5837680, "step": 34 }, { "epoch": 0.008943249819162228, "loss": 1.00763738155365, "loss_ce": 0.4573444128036499, "loss_iou": 0.16796875, "loss_num": 0.1103515625, "loss_xval": 0.55078125, "num_input_tokens_seen": 5837680, "step": 34 }, { "epoch": 0.009206286578549352, "grad_norm": 16.61205610634839, "learning_rate": 5e-06, "loss": 1.0035, "num_input_tokens_seen": 6009848, "step": 35 }, { "epoch": 0.009206286578549352, "loss": 0.9831359386444092, "loss_ce": 0.46995237469673157, "loss_iou": 0.123046875, "loss_num": 0.1025390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 6009848, "step": 35 }, { "epoch": 0.009469323337936477, "grad_norm": 15.512257057890888, "learning_rate": 5e-06, "loss": 1.0223, "num_input_tokens_seen": 6182164, "step": 36 }, { "epoch": 0.009469323337936477, "loss": 1.0216686725616455, "loss_ce": 0.5036022663116455, "loss_iou": 0.2021484375, "loss_num": 0.103515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 6182164, "step": 36 }, { "epoch": 0.009732360097323601, "grad_norm": 19.305295502162483, "learning_rate": 5e-06, "loss": 1.0144, "num_input_tokens_seen": 6352672, "step": 37 }, { "epoch": 0.009732360097323601, "loss": 1.0505774021148682, "loss_ce": 0.4739171862602234, "loss_iou": 0.12451171875, "loss_num": 0.115234375, "loss_xval": 0.578125, "num_input_tokens_seen": 6352672, "step": 37 }, { "epoch": 0.009995396856710725, "grad_norm": 20.69114777569909, "learning_rate": 5e-06, "loss": 1.0293, "num_input_tokens_seen": 6525264, "step": 38 }, { "epoch": 0.009995396856710725, "loss": 1.069566249847412, "loss_ce": 0.4638533592224121, "loss_iou": 0.02099609375, "loss_num": 0.12109375, "loss_xval": 0.60546875, "num_input_tokens_seen": 6525264, "step": 38 }, { "epoch": 0.010258433616097849, "grad_norm": 32.793827182045035, "learning_rate": 5e-06, "loss": 1.0754, "num_input_tokens_seen": 6697524, "step": 39 }, { "epoch": 0.010258433616097849, "loss": 1.0720198154449463, "loss_ce": 0.5224591493606567, "loss_iou": 0.099609375, "loss_num": 0.10986328125, "loss_xval": 0.55078125, "num_input_tokens_seen": 6697524, "step": 39 }, { "epoch": 0.010521470375484974, "grad_norm": 22.436128254245773, "learning_rate": 5e-06, "loss": 1.125, "num_input_tokens_seen": 6869752, "step": 40 }, { "epoch": 0.010521470375484974, "loss": 1.1199672222137451, "loss_ce": 0.5101040601730347, "loss_iou": NaN, "loss_num": 0.1220703125, "loss_xval": 0.609375, "num_input_tokens_seen": 6869752, "step": 40 }, { "epoch": 0.010784507134872098, "grad_norm": 17.66019658365854, "learning_rate": 5e-06, "loss": 0.9975, "num_input_tokens_seen": 7041884, "step": 41 }, { "epoch": 0.010784507134872098, "loss": 0.9673187732696533, "loss_ce": 0.37991636991500854, "loss_iou": 0.166015625, "loss_num": 0.11767578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 7041884, "step": 41 }, { "epoch": 0.011047543894259222, "grad_norm": 17.977306189337394, "learning_rate": 5e-06, "loss": 0.9918, "num_input_tokens_seen": 7214400, "step": 42 }, { "epoch": 0.011047543894259222, "loss": 0.9905650615692139, "loss_ce": 0.42928582429885864, "loss_iou": 0.236328125, "loss_num": 0.1123046875, "loss_xval": 0.5625, "num_input_tokens_seen": 7214400, "step": 42 }, { "epoch": 0.011310580653646348, "grad_norm": 16.758234031981292, "learning_rate": 5e-06, "loss": 1.0186, "num_input_tokens_seen": 7386432, "step": 43 }, { "epoch": 0.011310580653646348, "loss": 1.0381113290786743, "loss_ce": 0.47536715865135193, "loss_iou": 0.1748046875, "loss_num": 0.1123046875, "loss_xval": 0.5625, "num_input_tokens_seen": 7386432, "step": 43 }, { "epoch": 0.011573617413033472, "grad_norm": 19.20192611310373, "learning_rate": 5e-06, "loss": 0.9474, "num_input_tokens_seen": 7558604, "step": 44 }, { "epoch": 0.011573617413033472, "loss": 0.8546841144561768, "loss_ce": 0.45356106758117676, "loss_iou": 0.0634765625, "loss_num": 0.080078125, "loss_xval": 0.400390625, "num_input_tokens_seen": 7558604, "step": 44 }, { "epoch": 0.011836654172420595, "grad_norm": 24.07755280606732, "learning_rate": 5e-06, "loss": 1.0256, "num_input_tokens_seen": 7730896, "step": 45 }, { "epoch": 0.011836654172420595, "loss": 0.960330605506897, "loss_ce": 0.48523297905921936, "loss_iou": 0.259765625, "loss_num": 0.09521484375, "loss_xval": 0.474609375, "num_input_tokens_seen": 7730896, "step": 45 }, { "epoch": 0.01209969093180772, "grad_norm": 21.918622706923347, "learning_rate": 5e-06, "loss": 0.9858, "num_input_tokens_seen": 7903036, "step": 46 }, { "epoch": 0.01209969093180772, "loss": 1.0328285694122314, "loss_ce": 0.41710585355758667, "loss_iou": 0.119140625, "loss_num": 0.123046875, "loss_xval": 0.6171875, "num_input_tokens_seen": 7903036, "step": 46 }, { "epoch": 0.012362727691194845, "grad_norm": 17.78591800932899, "learning_rate": 5e-06, "loss": 0.9217, "num_input_tokens_seen": 8075568, "step": 47 }, { "epoch": 0.012362727691194845, "loss": 0.9050750136375427, "loss_ce": 0.4551238417625427, "loss_iou": 0.21875, "loss_num": 0.08984375, "loss_xval": 0.44921875, "num_input_tokens_seen": 8075568, "step": 47 }, { "epoch": 0.012625764450581969, "grad_norm": 18.144694112865135, "learning_rate": 5e-06, "loss": 0.8763, "num_input_tokens_seen": 8247888, "step": 48 }, { "epoch": 0.012625764450581969, "loss": 0.8872632384300232, "loss_ce": 0.3972730040550232, "loss_iou": 0.333984375, "loss_num": 0.09765625, "loss_xval": 0.490234375, "num_input_tokens_seen": 8247888, "step": 48 }, { "epoch": 0.012888801209969093, "grad_norm": 18.397929867872243, "learning_rate": 5e-06, "loss": 0.8479, "num_input_tokens_seen": 8419884, "step": 49 }, { "epoch": 0.012888801209969093, "loss": 0.8515866994857788, "loss_ce": 0.3969968557357788, "loss_iou": 0.109375, "loss_num": 0.0908203125, "loss_xval": 0.455078125, "num_input_tokens_seen": 8419884, "step": 49 }, { "epoch": 0.013151837969356218, "grad_norm": 16.926260406149144, "learning_rate": 5e-06, "loss": 0.8125, "num_input_tokens_seen": 8592264, "step": 50 }, { "epoch": 0.013151837969356218, "loss": 0.8142160177230835, "loss_ce": 0.3877023756504059, "loss_iou": 0.1123046875, "loss_num": 0.08544921875, "loss_xval": 0.42578125, "num_input_tokens_seen": 8592264, "step": 50 }, { "epoch": 0.013414874728743342, "grad_norm": 24.314274074580883, "learning_rate": 5e-06, "loss": 0.8943, "num_input_tokens_seen": 8764528, "step": 51 }, { "epoch": 0.013414874728743342, "loss": 0.8517386317253113, "loss_ce": 0.43084025382995605, "loss_iou": 0.1923828125, "loss_num": 0.083984375, "loss_xval": 0.421875, "num_input_tokens_seen": 8764528, "step": 51 }, { "epoch": 0.013677911488130466, "grad_norm": 26.15955176323275, "learning_rate": 5e-06, "loss": 0.9129, "num_input_tokens_seen": 8936892, "step": 52 }, { "epoch": 0.013677911488130466, "loss": 0.9079768657684326, "loss_ce": 0.3640315532684326, "loss_iou": 0.0654296875, "loss_num": 0.10888671875, "loss_xval": 0.54296875, "num_input_tokens_seen": 8936892, "step": 52 }, { "epoch": 0.013940948247517591, "grad_norm": 29.358592778676385, "learning_rate": 5e-06, "loss": 0.9409, "num_input_tokens_seen": 9108972, "step": 53 }, { "epoch": 0.013940948247517591, "loss": 0.8754456043243408, "loss_ce": 0.4201233685016632, "loss_iou": 0.1279296875, "loss_num": 0.0908203125, "loss_xval": 0.455078125, "num_input_tokens_seen": 9108972, "step": 53 }, { "epoch": 0.014203985006904715, "grad_norm": 20.238375239505068, "learning_rate": 5e-06, "loss": 0.8928, "num_input_tokens_seen": 9281248, "step": 54 }, { "epoch": 0.014203985006904715, "loss": 0.8624146580696106, "loss_ce": 0.4217408299446106, "loss_iou": 0.154296875, "loss_num": 0.087890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 9281248, "step": 54 }, { "epoch": 0.014467021766291839, "grad_norm": 21.380412065088443, "learning_rate": 5e-06, "loss": 0.8996, "num_input_tokens_seen": 9453248, "step": 55 }, { "epoch": 0.014467021766291839, "loss": 0.8669720888137817, "loss_ce": 0.3911420404911041, "loss_iou": 0.095703125, "loss_num": 0.09521484375, "loss_xval": 0.4765625, "num_input_tokens_seen": 9453248, "step": 55 }, { "epoch": 0.014730058525678963, "grad_norm": 21.398016209346718, "learning_rate": 5e-06, "loss": 0.8695, "num_input_tokens_seen": 9625308, "step": 56 }, { "epoch": 0.014730058525678963, "loss": 0.7916536331176758, "loss_ce": 0.3697786033153534, "loss_iou": 0.265625, "loss_num": 0.08447265625, "loss_xval": 0.421875, "num_input_tokens_seen": 9625308, "step": 56 }, { "epoch": 0.014993095285066088, "grad_norm": 22.596856129715338, "learning_rate": 5e-06, "loss": 0.8385, "num_input_tokens_seen": 9797472, "step": 57 }, { "epoch": 0.014993095285066088, "loss": 0.8159988522529602, "loss_ce": 0.3955886960029602, "loss_iou": 0.173828125, "loss_num": 0.083984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 9797472, "step": 57 }, { "epoch": 0.015256132044453212, "grad_norm": 19.06235083584469, "learning_rate": 5e-06, "loss": 0.791, "num_input_tokens_seen": 9969708, "step": 58 }, { "epoch": 0.015256132044453212, "loss": 0.8669772148132324, "loss_ce": 0.3650240898132324, "loss_iou": 0.1318359375, "loss_num": 0.1005859375, "loss_xval": 0.5, "num_input_tokens_seen": 9969708, "step": 58 }, { "epoch": 0.015519168803840336, "grad_norm": 19.2496987382769, "learning_rate": 5e-06, "loss": 0.8251, "num_input_tokens_seen": 10141772, "step": 59 }, { "epoch": 0.015519168803840336, "loss": 0.8268835544586182, "loss_ce": 0.39744019508361816, "loss_iou": 0.3046875, "loss_num": 0.0859375, "loss_xval": 0.4296875, "num_input_tokens_seen": 10141772, "step": 59 }, { "epoch": 0.015782205563227462, "grad_norm": 18.91781603668572, "learning_rate": 5e-06, "loss": 0.8407, "num_input_tokens_seen": 10312188, "step": 60 }, { "epoch": 0.015782205563227462, "loss": 0.7802078723907471, "loss_ce": 0.35906529426574707, "loss_iou": 0.318359375, "loss_num": 0.083984375, "loss_xval": 0.421875, "num_input_tokens_seen": 10312188, "step": 60 }, { "epoch": 0.016045242322614586, "grad_norm": 17.697618089112353, "learning_rate": 5e-06, "loss": 0.7755, "num_input_tokens_seen": 10484428, "step": 61 }, { "epoch": 0.016045242322614586, "loss": 0.8741220235824585, "loss_ce": 0.3609383702278137, "loss_iou": 0.1376953125, "loss_num": 0.1025390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 10484428, "step": 61 }, { "epoch": 0.01630827908200171, "grad_norm": 19.734011329204773, "learning_rate": 5e-06, "loss": 0.769, "num_input_tokens_seen": 10656840, "step": 62 }, { "epoch": 0.01630827908200171, "loss": 0.7350368499755859, "loss_ce": 0.36150169372558594, "loss_iou": 0.099609375, "loss_num": 0.07470703125, "loss_xval": 0.373046875, "num_input_tokens_seen": 10656840, "step": 62 }, { "epoch": 0.016571315841388833, "grad_norm": 24.730627091015997, "learning_rate": 5e-06, "loss": 0.7931, "num_input_tokens_seen": 10828884, "step": 63 }, { "epoch": 0.016571315841388833, "loss": 0.7593971490859985, "loss_ce": 0.36462172865867615, "loss_iou": NaN, "loss_num": 0.0791015625, "loss_xval": 0.39453125, "num_input_tokens_seen": 10828884, "step": 63 }, { "epoch": 0.016834352600775957, "grad_norm": 18.667625860875532, "learning_rate": 5e-06, "loss": 0.8089, "num_input_tokens_seen": 11001164, "step": 64 }, { "epoch": 0.016834352600775957, "loss": 0.8844671845436096, "loss_ce": 0.3607855439186096, "loss_iou": 0.376953125, "loss_num": 0.10498046875, "loss_xval": 0.5234375, "num_input_tokens_seen": 11001164, "step": 64 }, { "epoch": 0.017097389360163084, "grad_norm": 23.265809096889907, "learning_rate": 5e-06, "loss": 0.7793, "num_input_tokens_seen": 11173188, "step": 65 }, { "epoch": 0.017097389360163084, "loss": 0.7827451229095459, "loss_ce": 0.3672178089618683, "loss_iou": 0.255859375, "loss_num": 0.0830078125, "loss_xval": 0.416015625, "num_input_tokens_seen": 11173188, "step": 65 }, { "epoch": 0.017360426119550208, "grad_norm": 24.005431476496838, "learning_rate": 5e-06, "loss": 0.8061, "num_input_tokens_seen": 11345216, "step": 66 }, { "epoch": 0.017360426119550208, "loss": 0.7301403284072876, "loss_ce": 0.3616100549697876, "loss_iou": 0.28515625, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 11345216, "step": 66 }, { "epoch": 0.017623462878937332, "grad_norm": 30.491458847817103, "learning_rate": 5e-06, "loss": 0.8474, "num_input_tokens_seen": 11517388, "step": 67 }, { "epoch": 0.017623462878937332, "loss": 0.8404921889305115, "loss_ce": 0.3583144545555115, "loss_iou": 0.07861328125, "loss_num": 0.0966796875, "loss_xval": 0.482421875, "num_input_tokens_seen": 11517388, "step": 67 }, { "epoch": 0.017886499638324456, "grad_norm": 17.38452589704735, "learning_rate": 5e-06, "loss": 0.8004, "num_input_tokens_seen": 11689284, "step": 68 }, { "epoch": 0.017886499638324456, "loss": 0.8011830449104309, "loss_ce": 0.3470814824104309, "loss_iou": 0.095703125, "loss_num": 0.0908203125, "loss_xval": 0.453125, "num_input_tokens_seen": 11689284, "step": 68 }, { "epoch": 0.01814953639771158, "grad_norm": 22.0592753440465, "learning_rate": 5e-06, "loss": 0.7808, "num_input_tokens_seen": 11861484, "step": 69 }, { "epoch": 0.01814953639771158, "loss": 0.8063881397247314, "loss_ce": 0.38475728034973145, "loss_iou": 0.1826171875, "loss_num": 0.08447265625, "loss_xval": 0.421875, "num_input_tokens_seen": 11861484, "step": 69 }, { "epoch": 0.018412573157098704, "grad_norm": 18.31950796902093, "learning_rate": 5e-06, "loss": 0.766, "num_input_tokens_seen": 12033752, "step": 70 }, { "epoch": 0.018412573157098704, "loss": 0.7670217156410217, "loss_ce": 0.31560570001602173, "loss_iou": 0.275390625, "loss_num": 0.09033203125, "loss_xval": 0.451171875, "num_input_tokens_seen": 12033752, "step": 70 }, { "epoch": 0.018675609916485827, "grad_norm": 18.14951438740657, "learning_rate": 5e-06, "loss": 0.6971, "num_input_tokens_seen": 12205604, "step": 71 }, { "epoch": 0.018675609916485827, "loss": 0.6512900590896606, "loss_ce": 0.31205666065216064, "loss_iou": 0.169921875, "loss_num": 0.06787109375, "loss_xval": 0.33984375, "num_input_tokens_seen": 12205604, "step": 71 }, { "epoch": 0.018938646675872955, "grad_norm": 18.910816353150064, "learning_rate": 5e-06, "loss": 0.7748, "num_input_tokens_seen": 12377968, "step": 72 }, { "epoch": 0.018938646675872955, "loss": 0.8064651489257812, "loss_ce": 0.3355178236961365, "loss_iou": 0.171875, "loss_num": 0.09423828125, "loss_xval": 0.470703125, "num_input_tokens_seen": 12377968, "step": 72 }, { "epoch": 0.01920168343526008, "grad_norm": 18.381185745083314, "learning_rate": 5e-06, "loss": 0.7552, "num_input_tokens_seen": 12549896, "step": 73 }, { "epoch": 0.01920168343526008, "loss": 0.817658007144928, "loss_ce": 0.28372251987457275, "loss_iou": 0.236328125, "loss_num": 0.10693359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 12549896, "step": 73 }, { "epoch": 0.019464720194647202, "grad_norm": 16.482511811798446, "learning_rate": 5e-06, "loss": 0.6763, "num_input_tokens_seen": 12721856, "step": 74 }, { "epoch": 0.019464720194647202, "loss": 0.5998687744140625, "loss_ce": 0.2707671821117401, "loss_iou": 0.333984375, "loss_num": 0.06591796875, "loss_xval": 0.328125, "num_input_tokens_seen": 12721856, "step": 74 }, { "epoch": 0.019727756954034326, "grad_norm": 15.444850813034535, "learning_rate": 5e-06, "loss": 0.6806, "num_input_tokens_seen": 12894040, "step": 75 }, { "epoch": 0.019727756954034326, "loss": 0.7105993032455444, "loss_ce": 0.27493035793304443, "loss_iou": 0.26953125, "loss_num": 0.08740234375, "loss_xval": 0.435546875, "num_input_tokens_seen": 12894040, "step": 75 }, { "epoch": 0.01999079371342145, "grad_norm": 20.590812596799903, "learning_rate": 5e-06, "loss": 0.7622, "num_input_tokens_seen": 13064296, "step": 76 }, { "epoch": 0.01999079371342145, "loss": 0.6805918216705322, "loss_ce": 0.3353770077228546, "loss_iou": 0.26171875, "loss_num": 0.06884765625, "loss_xval": 0.345703125, "num_input_tokens_seen": 13064296, "step": 76 }, { "epoch": 0.020253830472808574, "grad_norm": 20.47406440355872, "learning_rate": 5e-06, "loss": 0.74, "num_input_tokens_seen": 13233888, "step": 77 }, { "epoch": 0.020253830472808574, "loss": 0.7564910650253296, "loss_ce": 0.3099578022956848, "loss_iou": 0.2109375, "loss_num": 0.08935546875, "loss_xval": 0.447265625, "num_input_tokens_seen": 13233888, "step": 77 }, { "epoch": 0.020516867232195698, "grad_norm": 19.122832804118445, "learning_rate": 5e-06, "loss": 0.741, "num_input_tokens_seen": 13405900, "step": 78 }, { "epoch": 0.020516867232195698, "loss": 0.6968704462051392, "loss_ce": 0.26059114933013916, "loss_iou": 0.224609375, "loss_num": 0.08740234375, "loss_xval": 0.435546875, "num_input_tokens_seen": 13405900, "step": 78 }, { "epoch": 0.020779903991582825, "grad_norm": 16.68220779277857, "learning_rate": 5e-06, "loss": 0.6822, "num_input_tokens_seen": 13578336, "step": 79 }, { "epoch": 0.020779903991582825, "loss": 0.6721813082695007, "loss_ce": 0.26910513639450073, "loss_iou": 0.125, "loss_num": 0.08056640625, "loss_xval": 0.40234375, "num_input_tokens_seen": 13578336, "step": 79 }, { "epoch": 0.02104294075096995, "grad_norm": 15.592038335799979, "learning_rate": 5e-06, "loss": 0.6458, "num_input_tokens_seen": 13750256, "step": 80 }, { "epoch": 0.02104294075096995, "loss": 0.604525625705719, "loss_ce": 0.253695547580719, "loss_iou": 0.169921875, "loss_num": 0.0703125, "loss_xval": 0.3515625, "num_input_tokens_seen": 13750256, "step": 80 }, { "epoch": 0.021305977510357073, "grad_norm": 19.123452214665015, "learning_rate": 5e-06, "loss": 0.7034, "num_input_tokens_seen": 13922220, "step": 81 }, { "epoch": 0.021305977510357073, "loss": 0.7461546659469604, "loss_ce": 0.28350815176963806, "loss_iou": 0.27734375, "loss_num": 0.0927734375, "loss_xval": 0.462890625, "num_input_tokens_seen": 13922220, "step": 81 }, { "epoch": 0.021569014269744197, "grad_norm": 14.596994780035258, "learning_rate": 5e-06, "loss": 0.6357, "num_input_tokens_seen": 14094312, "step": 82 }, { "epoch": 0.021569014269744197, "loss": 0.602331280708313, "loss_ce": 0.2593136727809906, "loss_iou": 0.328125, "loss_num": 0.068359375, "loss_xval": 0.34375, "num_input_tokens_seen": 14094312, "step": 82 }, { "epoch": 0.02183205102913132, "grad_norm": 16.4283520636599, "learning_rate": 5e-06, "loss": 0.5782, "num_input_tokens_seen": 14264984, "step": 83 }, { "epoch": 0.02183205102913132, "loss": 0.5699411630630493, "loss_ce": 0.2635447084903717, "loss_iou": 0.306640625, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 14264984, "step": 83 }, { "epoch": 0.022095087788518444, "grad_norm": 22.311971257546926, "learning_rate": 5e-06, "loss": 0.6398, "num_input_tokens_seen": 14437168, "step": 84 }, { "epoch": 0.022095087788518444, "loss": 0.6384698748588562, "loss_ce": 0.240032359957695, "loss_iou": 0.21875, "loss_num": 0.07958984375, "loss_xval": 0.3984375, "num_input_tokens_seen": 14437168, "step": 84 }, { "epoch": 0.022358124547905568, "grad_norm": 26.609904157941, "learning_rate": 5e-06, "loss": 0.8114, "num_input_tokens_seen": 14609504, "step": 85 }, { "epoch": 0.022358124547905568, "loss": 0.7600446939468384, "loss_ce": 0.23172441124916077, "loss_iou": 0.076171875, "loss_num": 0.10546875, "loss_xval": 0.52734375, "num_input_tokens_seen": 14609504, "step": 85 }, { "epoch": 0.022621161307292696, "grad_norm": 28.311475657648764, "learning_rate": 5e-06, "loss": 0.8067, "num_input_tokens_seen": 14781812, "step": 86 }, { "epoch": 0.022621161307292696, "loss": 0.839857816696167, "loss_ce": 0.2500140368938446, "loss_iou": 0.38671875, "loss_num": 0.1181640625, "loss_xval": 0.58984375, "num_input_tokens_seen": 14781812, "step": 86 }, { "epoch": 0.02288419806667982, "grad_norm": 24.92836303632298, "learning_rate": 5e-06, "loss": 0.731, "num_input_tokens_seen": 14954408, "step": 87 }, { "epoch": 0.02288419806667982, "loss": 0.7506657838821411, "loss_ce": 0.2797185182571411, "loss_iou": 0.12890625, "loss_num": 0.09423828125, "loss_xval": 0.470703125, "num_input_tokens_seen": 14954408, "step": 87 }, { "epoch": 0.023147234826066943, "grad_norm": 26.5172487427058, "learning_rate": 5e-06, "loss": 0.8202, "num_input_tokens_seen": 15123552, "step": 88 }, { "epoch": 0.023147234826066943, "loss": 0.808368980884552, "loss_ce": 0.243183434009552, "loss_iou": 0.234375, "loss_num": 0.11328125, "loss_xval": 0.56640625, "num_input_tokens_seen": 15123552, "step": 88 }, { "epoch": 0.023410271585454067, "grad_norm": 17.71060921250277, "learning_rate": 5e-06, "loss": 0.6546, "num_input_tokens_seen": 15295844, "step": 89 }, { "epoch": 0.023410271585454067, "loss": 0.6125390529632568, "loss_ce": 0.24852542579174042, "loss_iou": 0.2021484375, "loss_num": 0.07275390625, "loss_xval": 0.36328125, "num_input_tokens_seen": 15295844, "step": 89 }, { "epoch": 0.02367330834484119, "grad_norm": 15.735800845783544, "learning_rate": 5e-06, "loss": 0.5838, "num_input_tokens_seen": 15467856, "step": 90 }, { "epoch": 0.02367330834484119, "loss": 0.5133854150772095, "loss_ce": 0.22456704080104828, "loss_iou": 0.2412109375, "loss_num": 0.057861328125, "loss_xval": 0.2890625, "num_input_tokens_seen": 15467856, "step": 90 }, { "epoch": 0.023936345104228315, "grad_norm": 16.198479692260427, "learning_rate": 5e-06, "loss": 0.6229, "num_input_tokens_seen": 15640280, "step": 91 }, { "epoch": 0.023936345104228315, "loss": 0.5497957468032837, "loss_ce": 0.23717370629310608, "loss_iou": 0.30859375, "loss_num": 0.0625, "loss_xval": 0.3125, "num_input_tokens_seen": 15640280, "step": 91 }, { "epoch": 0.02419938186361544, "grad_norm": 15.820711258384152, "learning_rate": 5e-06, "loss": 0.5852, "num_input_tokens_seen": 15812532, "step": 92 }, { "epoch": 0.02419938186361544, "loss": 0.5448044538497925, "loss_ce": 0.22046364843845367, "loss_iou": 0.1826171875, "loss_num": 0.06494140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 15812532, "step": 92 }, { "epoch": 0.024462418623002566, "grad_norm": 19.666745517026683, "learning_rate": 5e-06, "loss": 0.5956, "num_input_tokens_seen": 15984492, "step": 93 }, { "epoch": 0.024462418623002566, "loss": 0.6414846181869507, "loss_ce": 0.22668972611427307, "loss_iou": 0.232421875, "loss_num": 0.0830078125, "loss_xval": 0.4140625, "num_input_tokens_seen": 15984492, "step": 93 }, { "epoch": 0.02472545538238969, "grad_norm": 18.823515656198328, "learning_rate": 5e-06, "loss": 0.5443, "num_input_tokens_seen": 16156556, "step": 94 }, { "epoch": 0.02472545538238969, "loss": 0.507426381111145, "loss_ce": 0.2339888960123062, "loss_iou": 0.396484375, "loss_num": 0.0546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 16156556, "step": 94 }, { "epoch": 0.024988492141776814, "grad_norm": 14.625660314629584, "learning_rate": 5e-06, "loss": 0.5402, "num_input_tokens_seen": 16326928, "step": 95 }, { "epoch": 0.024988492141776814, "loss": 0.5663172006607056, "loss_ce": 0.20572152733802795, "loss_iou": 0.2421875, "loss_num": 0.072265625, "loss_xval": 0.361328125, "num_input_tokens_seen": 16326928, "step": 95 }, { "epoch": 0.025251528901163937, "grad_norm": 15.783911320035779, "learning_rate": 5e-06, "loss": 0.5712, "num_input_tokens_seen": 16499268, "step": 96 }, { "epoch": 0.025251528901163937, "loss": 0.5024785399436951, "loss_ce": 0.19632619619369507, "loss_iou": 0.251953125, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 16499268, "step": 96 }, { "epoch": 0.02551456566055106, "grad_norm": 16.72236617723868, "learning_rate": 5e-06, "loss": 0.6175, "num_input_tokens_seen": 16668388, "step": 97 }, { "epoch": 0.02551456566055106, "loss": 0.6046057939529419, "loss_ce": 0.19554820656776428, "loss_iou": 0.287109375, "loss_num": 0.08203125, "loss_xval": 0.408203125, "num_input_tokens_seen": 16668388, "step": 97 }, { "epoch": 0.025777602419938185, "grad_norm": 19.132328111043208, "learning_rate": 5e-06, "loss": 0.6948, "num_input_tokens_seen": 16840748, "step": 98 }, { "epoch": 0.025777602419938185, "loss": 0.635749340057373, "loss_ce": 0.22290757298469543, "loss_iou": 0.1416015625, "loss_num": 0.08251953125, "loss_xval": 0.412109375, "num_input_tokens_seen": 16840748, "step": 98 }, { "epoch": 0.026040639179325312, "grad_norm": 17.546404729768323, "learning_rate": 5e-06, "loss": 0.6342, "num_input_tokens_seen": 17012828, "step": 99 }, { "epoch": 0.026040639179325312, "loss": 0.6548875570297241, "loss_ce": 0.23105943202972412, "loss_iou": 0.283203125, "loss_num": 0.0849609375, "loss_xval": 0.423828125, "num_input_tokens_seen": 17012828, "step": 99 }, { "epoch": 0.026303675938712436, "grad_norm": 18.526246284946534, "learning_rate": 5e-06, "loss": 0.6155, "num_input_tokens_seen": 17185216, "step": 100 }, { "epoch": 0.026303675938712436, "loss": 0.6633030772209167, "loss_ce": 0.20383042097091675, "loss_iou": 0.263671875, "loss_num": 0.091796875, "loss_xval": 0.458984375, "num_input_tokens_seen": 17185216, "step": 100 }, { "epoch": 0.02656671269809956, "grad_norm": 19.37711860101834, "learning_rate": 5e-06, "loss": 0.6252, "num_input_tokens_seen": 17355688, "step": 101 }, { "epoch": 0.02656671269809956, "loss": 0.6485173106193542, "loss_ce": 0.20686691999435425, "loss_iou": 0.146484375, "loss_num": 0.08837890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 17355688, "step": 101 }, { "epoch": 0.026829749457486684, "grad_norm": 24.272978763244147, "learning_rate": 5e-06, "loss": 0.6144, "num_input_tokens_seen": 17527784, "step": 102 }, { "epoch": 0.026829749457486684, "loss": 0.6552723050117493, "loss_ce": 0.20190313458442688, "loss_iou": 0.1513671875, "loss_num": 0.0908203125, "loss_xval": 0.453125, "num_input_tokens_seen": 17527784, "step": 102 }, { "epoch": 0.027092786216873808, "grad_norm": 28.829409367834085, "learning_rate": 5e-06, "loss": 0.6606, "num_input_tokens_seen": 17700208, "step": 103 }, { "epoch": 0.027092786216873808, "loss": 0.7094471454620361, "loss_ce": 0.19260142743587494, "loss_iou": 0.3046875, "loss_num": 0.10302734375, "loss_xval": 0.515625, "num_input_tokens_seen": 17700208, "step": 103 }, { "epoch": 0.02735582297626093, "grad_norm": 23.160720035944347, "learning_rate": 5e-06, "loss": 0.7452, "num_input_tokens_seen": 17872308, "step": 104 }, { "epoch": 0.02735582297626093, "loss": 0.7341784238815308, "loss_ce": 0.19169792532920837, "loss_iou": 0.1513671875, "loss_num": 0.1083984375, "loss_xval": 0.54296875, "num_input_tokens_seen": 17872308, "step": 104 }, { "epoch": 0.027618859735648055, "grad_norm": 17.418429269221175, "learning_rate": 5e-06, "loss": 0.5803, "num_input_tokens_seen": 18044728, "step": 105 }, { "epoch": 0.027618859735648055, "loss": 0.5607779026031494, "loss_ce": 0.20787259936332703, "loss_iou": 0.2353515625, "loss_num": 0.07080078125, "loss_xval": 0.353515625, "num_input_tokens_seen": 18044728, "step": 105 }, { "epoch": 0.027881896495035183, "grad_norm": 17.11728575546532, "learning_rate": 5e-06, "loss": 0.5934, "num_input_tokens_seen": 18215172, "step": 106 }, { "epoch": 0.027881896495035183, "loss": 0.609626293182373, "loss_ce": 0.17749738693237305, "loss_iou": 0.09228515625, "loss_num": 0.08642578125, "loss_xval": 0.431640625, "num_input_tokens_seen": 18215172, "step": 106 }, { "epoch": 0.028144933254422307, "grad_norm": 17.86366659461009, "learning_rate": 5e-06, "loss": 0.5335, "num_input_tokens_seen": 18387084, "step": 107 }, { "epoch": 0.028144933254422307, "loss": 0.48998841643333435, "loss_ce": 0.19213685393333435, "loss_iou": 0.3046875, "loss_num": 0.0595703125, "loss_xval": 0.296875, "num_input_tokens_seen": 18387084, "step": 107 }, { "epoch": 0.02840797001380943, "grad_norm": 20.662496273440333, "learning_rate": 5e-06, "loss": 0.5971, "num_input_tokens_seen": 18559272, "step": 108 }, { "epoch": 0.02840797001380943, "loss": 0.6421129703521729, "loss_ce": 0.18044306337833405, "loss_iou": 0.14453125, "loss_num": 0.09228515625, "loss_xval": 0.4609375, "num_input_tokens_seen": 18559272, "step": 108 }, { "epoch": 0.028671006773196554, "grad_norm": 21.435817028210696, "learning_rate": 5e-06, "loss": 0.5635, "num_input_tokens_seen": 18731380, "step": 109 }, { "epoch": 0.028671006773196554, "loss": 0.5063576698303223, "loss_ce": 0.18104028701782227, "loss_iou": 0.1884765625, "loss_num": 0.06494140625, "loss_xval": 0.326171875, "num_input_tokens_seen": 18731380, "step": 109 }, { "epoch": 0.028934043532583678, "grad_norm": 18.8758769532993, "learning_rate": 5e-06, "loss": 0.6115, "num_input_tokens_seen": 18901956, "step": 110 }, { "epoch": 0.028934043532583678, "loss": 0.6692255139350891, "loss_ce": 0.17020206153392792, "loss_iou": 0.20703125, "loss_num": 0.099609375, "loss_xval": 0.5, "num_input_tokens_seen": 18901956, "step": 110 }, { "epoch": 0.029197080291970802, "grad_norm": 17.651564073843637, "learning_rate": 5e-06, "loss": 0.5336, "num_input_tokens_seen": 19072248, "step": 111 }, { "epoch": 0.029197080291970802, "loss": 0.4951699376106262, "loss_ce": 0.17558985948562622, "loss_iou": 0.6640625, "loss_num": 0.06396484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 19072248, "step": 111 }, { "epoch": 0.029460117051357926, "grad_norm": 16.158969266118735, "learning_rate": 5e-06, "loss": 0.4952, "num_input_tokens_seen": 19244772, "step": 112 }, { "epoch": 0.029460117051357926, "loss": 0.4770987629890442, "loss_ce": 0.1765616536140442, "loss_iou": 0.1904296875, "loss_num": 0.06005859375, "loss_xval": 0.30078125, "num_input_tokens_seen": 19244772, "step": 112 }, { "epoch": 0.029723153810745053, "grad_norm": 14.450830992407502, "learning_rate": 5e-06, "loss": 0.5026, "num_input_tokens_seen": 19416868, "step": 113 }, { "epoch": 0.029723153810745053, "loss": 0.4577527642250061, "loss_ce": 0.1758924424648285, "loss_iou": 0.400390625, "loss_num": 0.056396484375, "loss_xval": 0.28125, "num_input_tokens_seen": 19416868, "step": 113 }, { "epoch": 0.029986190570132177, "grad_norm": 14.560235178346835, "learning_rate": 5e-06, "loss": 0.4865, "num_input_tokens_seen": 19589016, "step": 114 }, { "epoch": 0.029986190570132177, "loss": 0.5253910422325134, "loss_ce": 0.15832561254501343, "loss_iou": 0.173828125, "loss_num": 0.0732421875, "loss_xval": 0.3671875, "num_input_tokens_seen": 19589016, "step": 114 }, { "epoch": 0.0302492273295193, "grad_norm": 18.605711889816632, "learning_rate": 5e-06, "loss": 0.49, "num_input_tokens_seen": 19760904, "step": 115 }, { "epoch": 0.0302492273295193, "loss": 0.4801591634750366, "loss_ce": 0.17290815711021423, "loss_iou": 0.416015625, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 19760904, "step": 115 }, { "epoch": 0.030512264088906425, "grad_norm": 20.839979972241576, "learning_rate": 5e-06, "loss": 0.5137, "num_input_tokens_seen": 19932724, "step": 116 }, { "epoch": 0.030512264088906425, "loss": 0.4843531847000122, "loss_ce": 0.14585217833518982, "loss_iou": NaN, "loss_num": 0.06787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 19932724, "step": 116 }, { "epoch": 0.03077530084829355, "grad_norm": 20.407619855634906, "learning_rate": 5e-06, "loss": 0.5188, "num_input_tokens_seen": 20105196, "step": 117 }, { "epoch": 0.03077530084829355, "loss": 0.521455705165863, "loss_ce": 0.17148011922836304, "loss_iou": 0.275390625, "loss_num": 0.06982421875, "loss_xval": 0.349609375, "num_input_tokens_seen": 20105196, "step": 117 }, { "epoch": 0.031038337607680672, "grad_norm": 17.136634668569023, "learning_rate": 5e-06, "loss": 0.529, "num_input_tokens_seen": 20277468, "step": 118 }, { "epoch": 0.031038337607680672, "loss": 0.5619306564331055, "loss_ce": 0.15323926508426666, "loss_iou": 0.224609375, "loss_num": 0.08203125, "loss_xval": 0.408203125, "num_input_tokens_seen": 20277468, "step": 118 }, { "epoch": 0.0313013743670678, "grad_norm": 15.540166390852267, "learning_rate": 5e-06, "loss": 0.5196, "num_input_tokens_seen": 20448108, "step": 119 }, { "epoch": 0.0313013743670678, "loss": 0.5409432649612427, "loss_ce": 0.15117278695106506, "loss_iou": 0.40234375, "loss_num": 0.078125, "loss_xval": 0.390625, "num_input_tokens_seen": 20448108, "step": 119 }, { "epoch": 0.031564411126454923, "grad_norm": 12.306363086115368, "learning_rate": 5e-06, "loss": 0.4996, "num_input_tokens_seen": 20620368, "step": 120 }, { "epoch": 0.031564411126454923, "loss": 0.49895310401916504, "loss_ce": 0.13860151171684265, "loss_iou": 0.32421875, "loss_num": 0.072265625, "loss_xval": 0.359375, "num_input_tokens_seen": 20620368, "step": 120 }, { "epoch": 0.03182744788584205, "grad_norm": 13.090987000723873, "learning_rate": 5e-06, "loss": 0.4537, "num_input_tokens_seen": 20792584, "step": 121 }, { "epoch": 0.03182744788584205, "loss": 0.47374969720840454, "loss_ce": 0.14452606439590454, "loss_iou": 0.3203125, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 20792584, "step": 121 }, { "epoch": 0.03209048464522917, "grad_norm": 12.574059354034341, "learning_rate": 5e-06, "loss": 0.4245, "num_input_tokens_seen": 20964948, "step": 122 }, { "epoch": 0.03209048464522917, "loss": 0.4678102135658264, "loss_ce": 0.12357192486524582, "loss_iou": 0.10400390625, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 20964948, "step": 122 }, { "epoch": 0.032353521404616295, "grad_norm": 34.33651980128175, "learning_rate": 5e-06, "loss": 0.5624, "num_input_tokens_seen": 21134744, "step": 123 }, { "epoch": 0.032353521404616295, "loss": 0.4556346535682678, "loss_ce": 0.13812974095344543, "loss_iou": 0.6640625, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 21134744, "step": 123 }, { "epoch": 0.03261655816400342, "grad_norm": 22.118966434798295, "learning_rate": 5e-06, "loss": 0.6017, "num_input_tokens_seen": 21305320, "step": 124 }, { "epoch": 0.03261655816400342, "loss": 0.5379438996315002, "loss_ce": 0.14316853880882263, "loss_iou": 0.345703125, "loss_num": 0.0791015625, "loss_xval": 0.39453125, "num_input_tokens_seen": 21305320, "step": 124 }, { "epoch": 0.03287959492339054, "grad_norm": 22.565129824621987, "learning_rate": 5e-06, "loss": 0.6452, "num_input_tokens_seen": 21477544, "step": 125 }, { "epoch": 0.03287959492339054, "loss": 0.5944963693618774, "loss_ce": 0.13990655541419983, "loss_iou": 0.216796875, "loss_num": 0.0908203125, "loss_xval": 0.455078125, "num_input_tokens_seen": 21477544, "step": 125 }, { "epoch": 0.033142631682777667, "grad_norm": 13.89281994742959, "learning_rate": 5e-06, "loss": 0.5755, "num_input_tokens_seen": 21649600, "step": 126 }, { "epoch": 0.033142631682777667, "loss": 0.5376471877098083, "loss_ce": 0.12211985141038895, "loss_iou": 0.10888671875, "loss_num": 0.0830078125, "loss_xval": 0.416015625, "num_input_tokens_seen": 21649600, "step": 126 }, { "epoch": 0.03340566844216479, "grad_norm": 15.070169412653112, "learning_rate": 5e-06, "loss": 0.4704, "num_input_tokens_seen": 21821564, "step": 127 }, { "epoch": 0.03340566844216479, "loss": 0.4526183605194092, "loss_ce": 0.12595820426940918, "loss_iou": 0.21484375, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 21821564, "step": 127 }, { "epoch": 0.033668705201551914, "grad_norm": 14.62625838728594, "learning_rate": 5e-06, "loss": 0.4885, "num_input_tokens_seen": 21993864, "step": 128 }, { "epoch": 0.033668705201551914, "loss": 0.5165751576423645, "loss_ce": 0.12289837747812271, "loss_iou": 0.28515625, "loss_num": 0.07861328125, "loss_xval": 0.39453125, "num_input_tokens_seen": 21993864, "step": 128 }, { "epoch": 0.03393174196093904, "grad_norm": 20.05433315099477, "learning_rate": 5e-06, "loss": 0.4754, "num_input_tokens_seen": 22162596, "step": 129 }, { "epoch": 0.03393174196093904, "loss": 0.4400935173034668, "loss_ce": 0.11538645625114441, "loss_iou": 0.59375, "loss_num": 0.06494140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 22162596, "step": 129 }, { "epoch": 0.03419477872032617, "grad_norm": 17.28797678972647, "learning_rate": 5e-06, "loss": 0.5272, "num_input_tokens_seen": 22334624, "step": 130 }, { "epoch": 0.03419477872032617, "loss": 0.5726144313812256, "loss_ce": 0.1124093234539032, "loss_iou": 0.17578125, "loss_num": 0.091796875, "loss_xval": 0.4609375, "num_input_tokens_seen": 22334624, "step": 130 }, { "epoch": 0.03445781547971329, "grad_norm": 19.611364973231527, "learning_rate": 5e-06, "loss": 0.5629, "num_input_tokens_seen": 22506552, "step": 131 }, { "epoch": 0.03445781547971329, "loss": 0.5982115268707275, "loss_ce": 0.10993030667304993, "loss_iou": 0.08642578125, "loss_num": 0.09765625, "loss_xval": 0.48828125, "num_input_tokens_seen": 22506552, "step": 131 }, { "epoch": 0.034720852239100417, "grad_norm": 16.543739789854843, "learning_rate": 5e-06, "loss": 0.5381, "num_input_tokens_seen": 22678808, "step": 132 }, { "epoch": 0.034720852239100417, "loss": 0.45885854959487915, "loss_ce": 0.11523060500621796, "loss_iou": 0.337890625, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 22678808, "step": 132 }, { "epoch": 0.03498388899848754, "grad_norm": 17.674510611725847, "learning_rate": 5e-06, "loss": 0.4525, "num_input_tokens_seen": 22850736, "step": 133 }, { "epoch": 0.03498388899848754, "loss": 0.45272764563560486, "loss_ce": 0.12191709131002426, "loss_iou": 0.2216796875, "loss_num": 0.06640625, "loss_xval": 0.330078125, "num_input_tokens_seen": 22850736, "step": 133 }, { "epoch": 0.035246925757874664, "grad_norm": 15.972341632067714, "learning_rate": 5e-06, "loss": 0.4719, "num_input_tokens_seen": 23020040, "step": 134 }, { "epoch": 0.035246925757874664, "loss": 0.5267431139945984, "loss_ce": 0.11499997228384018, "loss_iou": 0.232421875, "loss_num": 0.08251953125, "loss_xval": 0.412109375, "num_input_tokens_seen": 23020040, "step": 134 }, { "epoch": 0.03550996251726179, "grad_norm": 18.16452757572112, "learning_rate": 5e-06, "loss": 0.4878, "num_input_tokens_seen": 23192048, "step": 135 }, { "epoch": 0.03550996251726179, "loss": 0.42464134097099304, "loss_ce": 0.10701439529657364, "loss_iou": 0.3671875, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 23192048, "step": 135 }, { "epoch": 0.03577299927664891, "grad_norm": 16.237572483673375, "learning_rate": 5e-06, "loss": 0.4644, "num_input_tokens_seen": 23360448, "step": 136 }, { "epoch": 0.03577299927664891, "loss": 0.4710727334022522, "loss_ce": 0.0985141396522522, "loss_iou": 0.263671875, "loss_num": 0.07470703125, "loss_xval": 0.373046875, "num_input_tokens_seen": 23360448, "step": 136 }, { "epoch": 0.036036036036036036, "grad_norm": 18.804237772436053, "learning_rate": 5e-06, "loss": 0.4291, "num_input_tokens_seen": 23530300, "step": 137 }, { "epoch": 0.036036036036036036, "loss": 0.47930601239204407, "loss_ce": 0.10406187921762466, "loss_iou": 0.29296875, "loss_num": 0.0751953125, "loss_xval": 0.375, "num_input_tokens_seen": 23530300, "step": 137 }, { "epoch": 0.03629907279542316, "grad_norm": 17.355021474463314, "learning_rate": 5e-06, "loss": 0.5374, "num_input_tokens_seen": 23702304, "step": 138 }, { "epoch": 0.03629907279542316, "loss": 0.6615355014801025, "loss_ce": 0.10416243970394135, "loss_iou": 0.267578125, "loss_num": 0.111328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 23702304, "step": 138 }, { "epoch": 0.03656210955481028, "grad_norm": 14.642948575679453, "learning_rate": 5e-06, "loss": 0.4417, "num_input_tokens_seen": 23872132, "step": 139 }, { "epoch": 0.03656210955481028, "loss": 0.44469529390335083, "loss_ce": 0.10735400021076202, "loss_iou": 0.376953125, "loss_num": 0.0673828125, "loss_xval": 0.337890625, "num_input_tokens_seen": 23872132, "step": 139 }, { "epoch": 0.03682514631419741, "grad_norm": 14.422554847110444, "learning_rate": 5e-06, "loss": 0.4228, "num_input_tokens_seen": 24044200, "step": 140 }, { "epoch": 0.03682514631419741, "loss": 0.4664979577064514, "loss_ce": 0.10651260614395142, "loss_iou": 0.275390625, "loss_num": 0.07177734375, "loss_xval": 0.359375, "num_input_tokens_seen": 24044200, "step": 140 }, { "epoch": 0.03708818307358453, "grad_norm": 27.027123187239088, "learning_rate": 5e-06, "loss": 0.4106, "num_input_tokens_seen": 24216464, "step": 141 }, { "epoch": 0.03708818307358453, "loss": 0.4140710234642029, "loss_ce": 0.09876340627670288, "loss_iou": 0.2275390625, "loss_num": 0.06298828125, "loss_xval": 0.314453125, "num_input_tokens_seen": 24216464, "step": 141 }, { "epoch": 0.037351219832971655, "grad_norm": 20.30988259642017, "learning_rate": 5e-06, "loss": 0.5663, "num_input_tokens_seen": 24386876, "step": 142 }, { "epoch": 0.037351219832971655, "loss": 0.500027596950531, "loss_ce": 0.091580331325531, "loss_iou": 0.25390625, "loss_num": 0.08154296875, "loss_xval": 0.408203125, "num_input_tokens_seen": 24386876, "step": 142 }, { "epoch": 0.03761425659235878, "grad_norm": 17.151019381080523, "learning_rate": 5e-06, "loss": 0.4943, "num_input_tokens_seen": 24559004, "step": 143 }, { "epoch": 0.03761425659235878, "loss": 0.5077698826789856, "loss_ce": 0.08748182654380798, "loss_iou": 0.419921875, "loss_num": 0.083984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 24559004, "step": 143 }, { "epoch": 0.03787729335174591, "grad_norm": 13.449145519268138, "learning_rate": 5e-06, "loss": 0.4628, "num_input_tokens_seen": 24729228, "step": 144 }, { "epoch": 0.03787729335174591, "loss": 0.477780282497406, "loss_ce": 0.0906953439116478, "loss_iou": 0.2353515625, "loss_num": 0.07763671875, "loss_xval": 0.38671875, "num_input_tokens_seen": 24729228, "step": 144 }, { "epoch": 0.03814033011113303, "grad_norm": 15.658548235445116, "learning_rate": 5e-06, "loss": 0.4482, "num_input_tokens_seen": 24901324, "step": 145 }, { "epoch": 0.03814033011113303, "loss": 0.460429847240448, "loss_ce": 0.086406409740448, "loss_iou": 0.267578125, "loss_num": 0.07470703125, "loss_xval": 0.375, "num_input_tokens_seen": 24901324, "step": 145 }, { "epoch": 0.03840336687052016, "grad_norm": 14.557303161182968, "learning_rate": 5e-06, "loss": 0.5115, "num_input_tokens_seen": 25073508, "step": 146 }, { "epoch": 0.03840336687052016, "loss": 0.6442508697509766, "loss_ce": 0.07833293080329895, "loss_iou": 0.11376953125, "loss_num": 0.11328125, "loss_xval": 0.56640625, "num_input_tokens_seen": 25073508, "step": 146 }, { "epoch": 0.03866640362990728, "grad_norm": 17.777843374612736, "learning_rate": 5e-06, "loss": 0.4213, "num_input_tokens_seen": 25245752, "step": 147 }, { "epoch": 0.03866640362990728, "loss": 0.40006011724472046, "loss_ce": 0.08243316411972046, "loss_iou": 0.419921875, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 25245752, "step": 147 }, { "epoch": 0.038929440389294405, "grad_norm": 13.44125897811313, "learning_rate": 5e-06, "loss": 0.4791, "num_input_tokens_seen": 25418156, "step": 148 }, { "epoch": 0.038929440389294405, "loss": 0.5981014966964722, "loss_ce": 0.08491791784763336, "loss_iou": 0.18359375, "loss_num": 0.1025390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 25418156, "step": 148 }, { "epoch": 0.03919247714868153, "grad_norm": 11.76514004625091, "learning_rate": 5e-06, "loss": 0.4001, "num_input_tokens_seen": 25589904, "step": 149 }, { "epoch": 0.03919247714868153, "loss": 0.35078275203704834, "loss_ce": 0.08393705636262894, "loss_iou": 0.267578125, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 25589904, "step": 149 }, { "epoch": 0.03945551390806865, "grad_norm": 12.03724933893627, "learning_rate": 5e-06, "loss": 0.4147, "num_input_tokens_seen": 25762000, "step": 150 }, { "epoch": 0.03945551390806865, "loss": 0.4596315622329712, "loss_ce": 0.0690065547823906, "loss_iou": 0.376953125, "loss_num": 0.078125, "loss_xval": 0.390625, "num_input_tokens_seen": 25762000, "step": 150 }, { "epoch": 0.039718550667455776, "grad_norm": 13.05163620933256, "learning_rate": 5e-06, "loss": 0.398, "num_input_tokens_seen": 25934528, "step": 151 }, { "epoch": 0.039718550667455776, "loss": 0.38974958658218384, "loss_ce": 0.07322127372026443, "loss_iou": 0.1591796875, "loss_num": 0.0634765625, "loss_xval": 0.31640625, "num_input_tokens_seen": 25934528, "step": 151 }, { "epoch": 0.0399815874268429, "grad_norm": 23.58601457584165, "learning_rate": 5e-06, "loss": 0.3897, "num_input_tokens_seen": 26104148, "step": 152 }, { "epoch": 0.0399815874268429, "loss": 0.3437004089355469, "loss_ce": 0.07172775268554688, "loss_iou": 0.326171875, "loss_num": 0.054443359375, "loss_xval": 0.271484375, "num_input_tokens_seen": 26104148, "step": 152 }, { "epoch": 0.040244624186230024, "grad_norm": 18.61300216202182, "learning_rate": 5e-06, "loss": 0.4682, "num_input_tokens_seen": 26276256, "step": 153 }, { "epoch": 0.040244624186230024, "loss": 0.43069130182266235, "loss_ce": 0.06960733234882355, "loss_iou": 0.419921875, "loss_num": 0.072265625, "loss_xval": 0.361328125, "num_input_tokens_seen": 26276256, "step": 153 }, { "epoch": 0.04050766094561715, "grad_norm": 14.116759080889954, "learning_rate": 5e-06, "loss": 0.4744, "num_input_tokens_seen": 26448288, "step": 154 }, { "epoch": 0.04050766094561715, "loss": 0.5761303901672363, "loss_ce": 0.0805249810218811, "loss_iou": 0.333984375, "loss_num": 0.09912109375, "loss_xval": 0.49609375, "num_input_tokens_seen": 26448288, "step": 154 }, { "epoch": 0.04077069770500427, "grad_norm": 10.146662886533779, "learning_rate": 5e-06, "loss": 0.3974, "num_input_tokens_seen": 26620604, "step": 155 }, { "epoch": 0.04077069770500427, "loss": 0.3559998869895935, "loss_ce": 0.07047741115093231, "loss_iou": 0.310546875, "loss_num": 0.05712890625, "loss_xval": 0.28515625, "num_input_tokens_seen": 26620604, "step": 155 }, { "epoch": 0.041033734464391396, "grad_norm": 11.74557828439257, "learning_rate": 5e-06, "loss": 0.3616, "num_input_tokens_seen": 26789104, "step": 156 }, { "epoch": 0.041033734464391396, "loss": 0.3762925863265991, "loss_ce": 0.06867540627717972, "loss_iou": 0.2138671875, "loss_num": 0.0615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 26789104, "step": 156 }, { "epoch": 0.041296771223778526, "grad_norm": 11.967339752616043, "learning_rate": 5e-06, "loss": 0.3655, "num_input_tokens_seen": 26961408, "step": 157 }, { "epoch": 0.041296771223778526, "loss": 0.36129921674728394, "loss_ce": 0.060029659420251846, "loss_iou": 0.369140625, "loss_num": 0.060302734375, "loss_xval": 0.30078125, "num_input_tokens_seen": 26961408, "step": 157 }, { "epoch": 0.04155980798316565, "grad_norm": 16.462145017062344, "learning_rate": 5e-06, "loss": 0.3599, "num_input_tokens_seen": 27133600, "step": 158 }, { "epoch": 0.04155980798316565, "loss": 0.4060816764831543, "loss_ce": 0.06208755075931549, "loss_iou": 0.291015625, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 27133600, "step": 158 }, { "epoch": 0.041822844742552774, "grad_norm": 18.2471111237373, "learning_rate": 5e-06, "loss": 0.4173, "num_input_tokens_seen": 27305676, "step": 159 }, { "epoch": 0.041822844742552774, "loss": 0.3555372357368469, "loss_ce": 0.07025889307260513, "loss_iou": 0.271484375, "loss_num": 0.05712890625, "loss_xval": 0.28515625, "num_input_tokens_seen": 27305676, "step": 159 }, { "epoch": 0.0420858815019399, "grad_norm": 22.735392677918405, "learning_rate": 5e-06, "loss": 0.5962, "num_input_tokens_seen": 27477824, "step": 160 }, { "epoch": 0.0420858815019399, "loss": 0.6341134309768677, "loss_ce": 0.06184776872396469, "loss_iou": 0.333984375, "loss_num": 0.1142578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 27477824, "step": 160 }, { "epoch": 0.04234891826132702, "grad_norm": 13.104760953447814, "learning_rate": 5e-06, "loss": 0.4091, "num_input_tokens_seen": 27650024, "step": 161 }, { "epoch": 0.04234891826132702, "loss": 0.4603307843208313, "loss_ce": 0.059085663408041, "loss_iou": 0.275390625, "loss_num": 0.080078125, "loss_xval": 0.400390625, "num_input_tokens_seen": 27650024, "step": 161 }, { "epoch": 0.042611955020714146, "grad_norm": 15.303759630511669, "learning_rate": 5e-06, "loss": 0.4433, "num_input_tokens_seen": 27822408, "step": 162 }, { "epoch": 0.042611955020714146, "loss": 0.43349525332450867, "loss_ce": 0.06215735524892807, "loss_iou": 0.23828125, "loss_num": 0.07421875, "loss_xval": 0.37109375, "num_input_tokens_seen": 27822408, "step": 162 }, { "epoch": 0.04287499178010127, "grad_norm": 14.84522191071012, "learning_rate": 5e-06, "loss": 0.3769, "num_input_tokens_seen": 27992440, "step": 163 }, { "epoch": 0.04287499178010127, "loss": 0.34604543447494507, "loss_ce": 0.05673878639936447, "loss_iou": 0.24609375, "loss_num": 0.057861328125, "loss_xval": 0.2890625, "num_input_tokens_seen": 27992440, "step": 163 }, { "epoch": 0.04313802853948839, "grad_norm": 17.265704948492278, "learning_rate": 5e-06, "loss": 0.4298, "num_input_tokens_seen": 28164464, "step": 164 }, { "epoch": 0.04313802853948839, "loss": 0.48720821738243103, "loss_ce": 0.05849727243185043, "loss_iou": 0.2890625, "loss_num": 0.0859375, "loss_xval": 0.4296875, "num_input_tokens_seen": 28164464, "step": 164 }, { "epoch": 0.04340106529887552, "grad_norm": 13.53959312060916, "learning_rate": 5e-06, "loss": 0.3991, "num_input_tokens_seen": 28334436, "step": 165 }, { "epoch": 0.04340106529887552, "loss": 0.3757469058036804, "loss_ce": 0.05201642960309982, "loss_iou": 0.1220703125, "loss_num": 0.064453125, "loss_xval": 0.32421875, "num_input_tokens_seen": 28334436, "step": 165 }, { "epoch": 0.04366410205826264, "grad_norm": 12.759789778648042, "learning_rate": 5e-06, "loss": 0.3563, "num_input_tokens_seen": 28506772, "step": 166 }, { "epoch": 0.04366410205826264, "loss": 0.3605605363845825, "loss_ce": 0.05233298987150192, "loss_iou": 0.341796875, "loss_num": 0.0615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 28506772, "step": 166 }, { "epoch": 0.043927138817649765, "grad_norm": 14.640887630603752, "learning_rate": 5e-06, "loss": 0.3881, "num_input_tokens_seen": 28676980, "step": 167 }, { "epoch": 0.043927138817649765, "loss": 0.3610857427120209, "loss_ce": 0.056642383337020874, "loss_iou": 0.240234375, "loss_num": 0.06103515625, "loss_xval": 0.3046875, "num_input_tokens_seen": 28676980, "step": 167 }, { "epoch": 0.04419017557703689, "grad_norm": 12.095247845748437, "learning_rate": 5e-06, "loss": 0.3742, "num_input_tokens_seen": 28849236, "step": 168 }, { "epoch": 0.04419017557703689, "loss": 0.40532949566841125, "loss_ce": 0.05388907343149185, "loss_iou": 0.3203125, "loss_num": 0.0703125, "loss_xval": 0.3515625, "num_input_tokens_seen": 28849236, "step": 168 }, { "epoch": 0.04445321233642401, "grad_norm": 10.33587361024043, "learning_rate": 5e-06, "loss": 0.3837, "num_input_tokens_seen": 29021180, "step": 169 }, { "epoch": 0.04445321233642401, "loss": 0.3248461186885834, "loss_ce": 0.046464771032333374, "loss_iou": 0.1787109375, "loss_num": 0.0556640625, "loss_xval": 0.279296875, "num_input_tokens_seen": 29021180, "step": 169 }, { "epoch": 0.044716249095811136, "grad_norm": 10.708625450816182, "learning_rate": 5e-06, "loss": 0.3951, "num_input_tokens_seen": 29191532, "step": 170 }, { "epoch": 0.044716249095811136, "loss": 0.4214698076248169, "loss_ce": 0.052573323249816895, "loss_iou": 0.33203125, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 29191532, "step": 170 }, { "epoch": 0.04497928585519827, "grad_norm": 14.817164885231227, "learning_rate": 5e-06, "loss": 0.3936, "num_input_tokens_seen": 29363536, "step": 171 }, { "epoch": 0.04497928585519827, "loss": 0.40210169553756714, "loss_ce": 0.04602260887622833, "loss_iou": 0.3046875, "loss_num": 0.0712890625, "loss_xval": 0.35546875, "num_input_tokens_seen": 29363536, "step": 171 }, { "epoch": 0.04524232261458539, "grad_norm": 11.284174351804506, "learning_rate": 5e-06, "loss": 0.3753, "num_input_tokens_seen": 29533848, "step": 172 }, { "epoch": 0.04524232261458539, "loss": 0.3555999994277954, "loss_ce": 0.04358828812837601, "loss_iou": 0.2890625, "loss_num": 0.0625, "loss_xval": 0.3125, "num_input_tokens_seen": 29533848, "step": 172 }, { "epoch": 0.045505359373972515, "grad_norm": 9.66534144364822, "learning_rate": 5e-06, "loss": 0.307, "num_input_tokens_seen": 29706424, "step": 173 }, { "epoch": 0.045505359373972515, "loss": 0.3183665871620178, "loss_ce": 0.04297598451375961, "loss_iou": 0.35546875, "loss_num": 0.05517578125, "loss_xval": 0.275390625, "num_input_tokens_seen": 29706424, "step": 173 }, { "epoch": 0.04576839613335964, "grad_norm": 15.933225853148851, "learning_rate": 5e-06, "loss": 0.3645, "num_input_tokens_seen": 29878348, "step": 174 }, { "epoch": 0.04576839613335964, "loss": 0.24233956634998322, "loss_ce": 0.051421597599983215, "loss_iou": 0.609375, "loss_num": 0.0380859375, "loss_xval": 0.19140625, "num_input_tokens_seen": 29878348, "step": 174 }, { "epoch": 0.04603143289274676, "grad_norm": 15.32149954029387, "learning_rate": 5e-06, "loss": 0.403, "num_input_tokens_seen": 30050700, "step": 175 }, { "epoch": 0.04603143289274676, "loss": 0.41682887077331543, "loss_ce": 0.047810301184654236, "loss_iou": 0.2734375, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 30050700, "step": 175 }, { "epoch": 0.046294469652133886, "grad_norm": 11.301125755600708, "learning_rate": 5e-06, "loss": 0.3852, "num_input_tokens_seen": 30222872, "step": 176 }, { "epoch": 0.046294469652133886, "loss": 0.35361334681510925, "loss_ce": 0.04599615931510925, "loss_iou": 0.341796875, "loss_num": 0.0615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 30222872, "step": 176 }, { "epoch": 0.04655750641152101, "grad_norm": 15.04642729797989, "learning_rate": 5e-06, "loss": 0.3569, "num_input_tokens_seen": 30394820, "step": 177 }, { "epoch": 0.04655750641152101, "loss": 0.31377077102661133, "loss_ce": 0.04106569290161133, "loss_iou": 0.2109375, "loss_num": 0.0546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 30394820, "step": 177 }, { "epoch": 0.046820543170908134, "grad_norm": 14.180764275493368, "learning_rate": 5e-06, "loss": 0.3856, "num_input_tokens_seen": 30567088, "step": 178 }, { "epoch": 0.046820543170908134, "loss": 0.3383968770503998, "loss_ce": 0.04188808798789978, "loss_iou": 0.36328125, "loss_num": 0.059326171875, "loss_xval": 0.296875, "num_input_tokens_seen": 30567088, "step": 178 }, { "epoch": 0.04708357993029526, "grad_norm": 11.43960593758222, "learning_rate": 5e-06, "loss": 0.3723, "num_input_tokens_seen": 30739392, "step": 179 }, { "epoch": 0.04708357993029526, "loss": 0.4008994996547699, "loss_ce": 0.0434776172041893, "loss_iou": 0.2451171875, "loss_num": 0.0712890625, "loss_xval": 0.357421875, "num_input_tokens_seen": 30739392, "step": 179 }, { "epoch": 0.04734661668968238, "grad_norm": 10.118856123139876, "learning_rate": 5e-06, "loss": 0.3842, "num_input_tokens_seen": 30911592, "step": 180 }, { "epoch": 0.04734661668968238, "loss": 0.33974575996398926, "loss_ce": 0.03798792511224747, "loss_iou": 0.49609375, "loss_num": 0.060546875, "loss_xval": 0.30078125, "num_input_tokens_seen": 30911592, "step": 180 }, { "epoch": 0.047609653449069506, "grad_norm": 8.662737873806904, "learning_rate": 5e-06, "loss": 0.2718, "num_input_tokens_seen": 31083320, "step": 181 }, { "epoch": 0.047609653449069506, "loss": 0.24805772304534912, "loss_ce": 0.034434687346220016, "loss_iou": 0.330078125, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 31083320, "step": 181 }, { "epoch": 0.04787269020845663, "grad_norm": 9.949547683003779, "learning_rate": 5e-06, "loss": 0.3404, "num_input_tokens_seen": 31255136, "step": 182 }, { "epoch": 0.04787269020845663, "loss": 0.3085269331932068, "loss_ce": 0.03093905746936798, "loss_iou": 0.61328125, "loss_num": 0.055419921875, "loss_xval": 0.27734375, "num_input_tokens_seen": 31255136, "step": 182 }, { "epoch": 0.04813572696784375, "grad_norm": 11.304350377916183, "learning_rate": 5e-06, "loss": 0.3478, "num_input_tokens_seen": 31427380, "step": 183 }, { "epoch": 0.04813572696784375, "loss": 0.2798244059085846, "loss_ce": 0.037881046533584595, "loss_iou": 0.380859375, "loss_num": 0.04833984375, "loss_xval": 0.2421875, "num_input_tokens_seen": 31427380, "step": 183 }, { "epoch": 0.04839876372723088, "grad_norm": 20.65800125552813, "learning_rate": 5e-06, "loss": 0.4261, "num_input_tokens_seen": 31599344, "step": 184 }, { "epoch": 0.04839876372723088, "loss": 0.37513959407806396, "loss_ce": 0.03566203638911247, "loss_iou": 0.59375, "loss_num": 0.06787109375, "loss_xval": 0.33984375, "num_input_tokens_seen": 31599344, "step": 184 }, { "epoch": 0.04866180048661801, "grad_norm": 21.767539584052432, "learning_rate": 5e-06, "loss": 0.4585, "num_input_tokens_seen": 31771536, "step": 185 }, { "epoch": 0.04866180048661801, "loss": 0.437938392162323, "loss_ce": 0.031200092285871506, "loss_iou": 0.275390625, "loss_num": 0.08154296875, "loss_xval": 0.40625, "num_input_tokens_seen": 31771536, "step": 185 }, { "epoch": 0.04892483724600513, "grad_norm": 19.478046485138204, "learning_rate": 5e-06, "loss": 0.5036, "num_input_tokens_seen": 31943688, "step": 186 }, { "epoch": 0.04892483724600513, "loss": 0.4687976837158203, "loss_ce": 0.03593636304140091, "loss_iou": 0.201171875, "loss_num": 0.08642578125, "loss_xval": 0.43359375, "num_input_tokens_seen": 31943688, "step": 186 }, { "epoch": 0.049187874005392256, "grad_norm": 11.185610516547799, "learning_rate": 5e-06, "loss": 0.3628, "num_input_tokens_seen": 32116040, "step": 187 }, { "epoch": 0.049187874005392256, "loss": 0.39327770471572876, "loss_ce": 0.03439096361398697, "loss_iou": 0.36328125, "loss_num": 0.07177734375, "loss_xval": 0.359375, "num_input_tokens_seen": 32116040, "step": 187 }, { "epoch": 0.04945091076477938, "grad_norm": 11.155969289477222, "learning_rate": 5e-06, "loss": 0.2974, "num_input_tokens_seen": 32288500, "step": 188 }, { "epoch": 0.04945091076477938, "loss": 0.31713372468948364, "loss_ce": 0.028681576251983643, "loss_iou": 0.31640625, "loss_num": 0.0576171875, "loss_xval": 0.2890625, "num_input_tokens_seen": 32288500, "step": 188 }, { "epoch": 0.0497139475241665, "grad_norm": 11.834615664464208, "learning_rate": 5e-06, "loss": 0.3369, "num_input_tokens_seen": 32460772, "step": 189 }, { "epoch": 0.0497139475241665, "loss": 0.3497753441333771, "loss_ce": 0.02921871840953827, "loss_iou": 0.484375, "loss_num": 0.06396484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 32460772, "step": 189 }, { "epoch": 0.04997698428355363, "grad_norm": 10.434886006122214, "learning_rate": 5e-06, "loss": 0.3387, "num_input_tokens_seen": 32632848, "step": 190 }, { "epoch": 0.04997698428355363, "loss": 0.36610347032546997, "loss_ce": 0.029921812936663628, "loss_iou": 0.38671875, "loss_num": 0.0673828125, "loss_xval": 0.3359375, "num_input_tokens_seen": 32632848, "step": 190 }, { "epoch": 0.05024002104294075, "grad_norm": 9.800996766479896, "learning_rate": 5e-06, "loss": 0.3599, "num_input_tokens_seen": 32805200, "step": 191 }, { "epoch": 0.05024002104294075, "loss": 0.3484704792499542, "loss_ce": 0.032674580812454224, "loss_iou": 0.466796875, "loss_num": 0.06298828125, "loss_xval": 0.31640625, "num_input_tokens_seen": 32805200, "step": 191 }, { "epoch": 0.050503057802327875, "grad_norm": 9.1652653754744, "learning_rate": 5e-06, "loss": 0.3086, "num_input_tokens_seen": 32977420, "step": 192 }, { "epoch": 0.050503057802327875, "loss": 0.3598284125328064, "loss_ce": 0.027797123417258263, "loss_iou": 0.283203125, "loss_num": 0.06640625, "loss_xval": 0.33203125, "num_input_tokens_seen": 32977420, "step": 192 }, { "epoch": 0.050766094561715, "grad_norm": 11.082061385505076, "learning_rate": 5e-06, "loss": 0.3206, "num_input_tokens_seen": 33149792, "step": 193 }, { "epoch": 0.050766094561715, "loss": 0.3082242012023926, "loss_ce": 0.035763248801231384, "loss_iou": 0.41796875, "loss_num": 0.054443359375, "loss_xval": 0.2734375, "num_input_tokens_seen": 33149792, "step": 193 }, { "epoch": 0.05102913132110212, "grad_norm": 10.154212185692936, "learning_rate": 5e-06, "loss": 0.3308, "num_input_tokens_seen": 33321580, "step": 194 }, { "epoch": 0.05102913132110212, "loss": 0.3002238869667053, "loss_ce": 0.02568773366510868, "loss_iou": 0.36328125, "loss_num": 0.054931640625, "loss_xval": 0.275390625, "num_input_tokens_seen": 33321580, "step": 194 }, { "epoch": 0.051292168080489246, "grad_norm": 12.486303050942563, "learning_rate": 5e-06, "loss": 0.3875, "num_input_tokens_seen": 33493632, "step": 195 }, { "epoch": 0.051292168080489246, "loss": 0.35659241676330566, "loss_ce": 0.030542613938450813, "loss_iou": 0.455078125, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 33493632, "step": 195 }, { "epoch": 0.05155520483987637, "grad_norm": 13.41419397842875, "learning_rate": 5e-06, "loss": 0.3335, "num_input_tokens_seen": 33665544, "step": 196 }, { "epoch": 0.05155520483987637, "loss": 0.36018121242523193, "loss_ce": 0.02436576411128044, "loss_iou": 0.37890625, "loss_num": 0.0673828125, "loss_xval": 0.3359375, "num_input_tokens_seen": 33665544, "step": 196 }, { "epoch": 0.051818241599263494, "grad_norm": 19.34380060773709, "learning_rate": 5e-06, "loss": 0.3472, "num_input_tokens_seen": 33837492, "step": 197 }, { "epoch": 0.051818241599263494, "loss": 0.37059885263442993, "loss_ce": 0.024651601910591125, "loss_iou": 0.341796875, "loss_num": 0.0693359375, "loss_xval": 0.345703125, "num_input_tokens_seen": 33837492, "step": 197 }, { "epoch": 0.052081278358650625, "grad_norm": 12.626529129648052, "learning_rate": 5e-06, "loss": 0.3657, "num_input_tokens_seen": 34009512, "step": 198 }, { "epoch": 0.052081278358650625, "loss": 0.29348307847976685, "loss_ce": 0.028102193027734756, "loss_iou": 0.376953125, "loss_num": 0.052978515625, "loss_xval": 0.265625, "num_input_tokens_seen": 34009512, "step": 198 }, { "epoch": 0.05234431511803775, "grad_norm": 10.467016024509215, "learning_rate": 5e-06, "loss": 0.3464, "num_input_tokens_seen": 34181600, "step": 199 }, { "epoch": 0.05234431511803775, "loss": 0.30455371737480164, "loss_ce": 0.022815439850091934, "loss_iou": 0.400390625, "loss_num": 0.056396484375, "loss_xval": 0.28125, "num_input_tokens_seen": 34181600, "step": 199 }, { "epoch": 0.05260735187742487, "grad_norm": 13.453464722997529, "learning_rate": 5e-06, "loss": 0.3248, "num_input_tokens_seen": 34352348, "step": 200 }, { "epoch": 0.05260735187742487, "loss": 0.45307034254074097, "loss_ce": 0.020819369703531265, "loss_iou": 0.3515625, "loss_num": 0.08642578125, "loss_xval": 0.431640625, "num_input_tokens_seen": 34352348, "step": 200 }, { "epoch": 0.052870388636811996, "grad_norm": 20.491451966511576, "learning_rate": 5e-06, "loss": 0.4317, "num_input_tokens_seen": 34524600, "step": 201 }, { "epoch": 0.052870388636811996, "loss": 0.41993263363838196, "loss_ce": 0.024668946862220764, "loss_iou": 0.51171875, "loss_num": 0.0791015625, "loss_xval": 0.39453125, "num_input_tokens_seen": 34524600, "step": 201 }, { "epoch": 0.05313342539619912, "grad_norm": 17.643454599965338, "learning_rate": 5e-06, "loss": 0.4565, "num_input_tokens_seen": 34696600, "step": 202 }, { "epoch": 0.05313342539619912, "loss": 0.5106714963912964, "loss_ce": 0.022634411230683327, "loss_iou": 0.0888671875, "loss_num": 0.09765625, "loss_xval": 0.48828125, "num_input_tokens_seen": 34696600, "step": 202 }, { "epoch": 0.053396462155586244, "grad_norm": 13.599251013665107, "learning_rate": 5e-06, "loss": 0.3165, "num_input_tokens_seen": 34868596, "step": 203 }, { "epoch": 0.053396462155586244, "loss": 0.3383103609085083, "loss_ce": 0.026054508984088898, "loss_iou": 0.26953125, "loss_num": 0.0625, "loss_xval": 0.3125, "num_input_tokens_seen": 34868596, "step": 203 }, { "epoch": 0.05365949891497337, "grad_norm": 15.289442638831803, "learning_rate": 5e-06, "loss": 0.3217, "num_input_tokens_seen": 35038040, "step": 204 }, { "epoch": 0.05365949891497337, "loss": 0.306024968624115, "loss_ce": 0.022089410573244095, "loss_iou": 0.39453125, "loss_num": 0.056884765625, "loss_xval": 0.283203125, "num_input_tokens_seen": 35038040, "step": 204 }, { "epoch": 0.05392253567436049, "grad_norm": 10.407634168683858, "learning_rate": 5e-06, "loss": 0.3039, "num_input_tokens_seen": 35208292, "step": 205 }, { "epoch": 0.05392253567436049, "loss": 0.35352200269699097, "loss_ce": 0.026007331907749176, "loss_iou": 0.423828125, "loss_num": 0.0654296875, "loss_xval": 0.328125, "num_input_tokens_seen": 35208292, "step": 205 }, { "epoch": 0.054185572433747616, "grad_norm": 11.313536084612993, "learning_rate": 5e-06, "loss": 0.3141, "num_input_tokens_seen": 35377388, "step": 206 }, { "epoch": 0.054185572433747616, "loss": 0.27950865030288696, "loss_ce": 0.02438168227672577, "loss_iou": 0.431640625, "loss_num": 0.051025390625, "loss_xval": 0.255859375, "num_input_tokens_seen": 35377388, "step": 206 }, { "epoch": 0.05444860919313474, "grad_norm": 11.696274899578064, "learning_rate": 5e-06, "loss": 0.3578, "num_input_tokens_seen": 35547812, "step": 207 }, { "epoch": 0.05444860919313474, "loss": 0.3501763641834259, "loss_ce": 0.0216851644217968, "loss_iou": 0.53125, "loss_num": 0.06591796875, "loss_xval": 0.328125, "num_input_tokens_seen": 35547812, "step": 207 }, { "epoch": 0.05471164595252186, "grad_norm": 10.19535036839742, "learning_rate": 5e-06, "loss": 0.2938, "num_input_tokens_seen": 35720256, "step": 208 }, { "epoch": 0.05471164595252186, "loss": 0.28618597984313965, "loss_ce": 0.023246534168720245, "loss_iou": 0.421875, "loss_num": 0.052490234375, "loss_xval": 0.263671875, "num_input_tokens_seen": 35720256, "step": 208 }, { "epoch": 0.05497468271190899, "grad_norm": 10.108238117808526, "learning_rate": 5e-06, "loss": 0.2815, "num_input_tokens_seen": 35892100, "step": 209 }, { "epoch": 0.05497468271190899, "loss": 0.28424978256225586, "loss_ce": 0.025460712611675262, "loss_iou": 0.3125, "loss_num": 0.0517578125, "loss_xval": 0.2578125, "num_input_tokens_seen": 35892100, "step": 209 }, { "epoch": 0.05523771947129611, "grad_norm": 9.765870248571328, "learning_rate": 5e-06, "loss": 0.3151, "num_input_tokens_seen": 36064296, "step": 210 }, { "epoch": 0.05523771947129611, "loss": 0.30667591094970703, "loss_ce": 0.02457140013575554, "loss_iou": 0.373046875, "loss_num": 0.056396484375, "loss_xval": 0.28125, "num_input_tokens_seen": 36064296, "step": 210 }, { "epoch": 0.055500756230683235, "grad_norm": 8.667238391078952, "learning_rate": 5e-06, "loss": 0.3062, "num_input_tokens_seen": 36236356, "step": 211 }, { "epoch": 0.055500756230683235, "loss": 0.32360944151878357, "loss_ce": 0.021241270005702972, "loss_iou": 0.318359375, "loss_num": 0.060546875, "loss_xval": 0.302734375, "num_input_tokens_seen": 36236356, "step": 211 }, { "epoch": 0.055763792990070365, "grad_norm": 7.623662373992362, "learning_rate": 5e-06, "loss": 0.2441, "num_input_tokens_seen": 36406508, "step": 212 }, { "epoch": 0.055763792990070365, "loss": 0.2571391761302948, "loss_ce": 0.022825222462415695, "loss_iou": 0.50390625, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 36406508, "step": 212 }, { "epoch": 0.05602682974945749, "grad_norm": 9.522758823829298, "learning_rate": 5e-06, "loss": 0.263, "num_input_tokens_seen": 36578384, "step": 213 }, { "epoch": 0.05602682974945749, "loss": 0.20427684485912323, "loss_ce": 0.022819336503744125, "loss_iou": 0.32421875, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 36578384, "step": 213 }, { "epoch": 0.05628986650884461, "grad_norm": 14.400118985818274, "learning_rate": 5e-06, "loss": 0.293, "num_input_tokens_seen": 36750552, "step": 214 }, { "epoch": 0.05628986650884461, "loss": 0.29619529843330383, "loss_ce": 0.021537089720368385, "loss_iou": 0.44140625, "loss_num": 0.054931640625, "loss_xval": 0.275390625, "num_input_tokens_seen": 36750552, "step": 214 }, { "epoch": 0.05655290326823174, "grad_norm": 16.206900517822692, "learning_rate": 5e-06, "loss": 0.3487, "num_input_tokens_seen": 36922680, "step": 215 }, { "epoch": 0.05655290326823174, "loss": 0.37616318464279175, "loss_ce": 0.018497148528695107, "loss_iou": 0.224609375, "loss_num": 0.07177734375, "loss_xval": 0.357421875, "num_input_tokens_seen": 36922680, "step": 215 }, { "epoch": 0.05681594002761886, "grad_norm": 16.214529098872383, "learning_rate": 5e-06, "loss": 0.3418, "num_input_tokens_seen": 37095056, "step": 216 }, { "epoch": 0.05681594002761886, "loss": 0.37693583965301514, "loss_ce": 0.018171211704611778, "loss_iou": 0.208984375, "loss_num": 0.07177734375, "loss_xval": 0.359375, "num_input_tokens_seen": 37095056, "step": 216 }, { "epoch": 0.057078976787005985, "grad_norm": 11.604435704258858, "learning_rate": 5e-06, "loss": 0.3517, "num_input_tokens_seen": 37267308, "step": 217 }, { "epoch": 0.057078976787005985, "loss": 0.37936773896217346, "loss_ce": 0.022922419011592865, "loss_iou": 0.55078125, "loss_num": 0.0712890625, "loss_xval": 0.35546875, "num_input_tokens_seen": 37267308, "step": 217 }, { "epoch": 0.05734201354639311, "grad_norm": 25.63677528330297, "learning_rate": 5e-06, "loss": 0.2984, "num_input_tokens_seen": 37434780, "step": 218 }, { "epoch": 0.05734201354639311, "loss": 0.31407007575035095, "loss_ce": 0.02378687635064125, "loss_iou": 0.455078125, "loss_num": 0.05810546875, "loss_xval": 0.291015625, "num_input_tokens_seen": 37434780, "step": 218 }, { "epoch": 0.05760505030578023, "grad_norm": 11.86583190083979, "learning_rate": 5e-06, "loss": 0.3346, "num_input_tokens_seen": 37605076, "step": 219 }, { "epoch": 0.05760505030578023, "loss": 0.31814104318618774, "loss_ce": 0.02236468717455864, "loss_iou": 0.279296875, "loss_num": 0.05908203125, "loss_xval": 0.294921875, "num_input_tokens_seen": 37605076, "step": 219 }, { "epoch": 0.057868087065167356, "grad_norm": 14.940327825772103, "learning_rate": 5e-06, "loss": 0.3733, "num_input_tokens_seen": 37777704, "step": 220 }, { "epoch": 0.057868087065167356, "loss": 0.37386685609817505, "loss_ce": 0.020107077434659004, "loss_iou": 0.443359375, "loss_num": 0.07080078125, "loss_xval": 0.353515625, "num_input_tokens_seen": 37777704, "step": 220 }, { "epoch": 0.05813112382455448, "grad_norm": 13.873681072861537, "learning_rate": 5e-06, "loss": 0.3326, "num_input_tokens_seen": 37948064, "step": 221 }, { "epoch": 0.05813112382455448, "loss": 0.3946155905723572, "loss_ce": 0.019127311185002327, "loss_iou": 0.18359375, "loss_num": 0.0751953125, "loss_xval": 0.375, "num_input_tokens_seen": 37948064, "step": 221 }, { "epoch": 0.058394160583941604, "grad_norm": 13.253297702701516, "learning_rate": 5e-06, "loss": 0.3428, "num_input_tokens_seen": 38118984, "step": 222 }, { "epoch": 0.058394160583941604, "loss": 0.3087061643600464, "loss_ce": 0.018300898373126984, "loss_iou": 0.18359375, "loss_num": 0.05810546875, "loss_xval": 0.291015625, "num_input_tokens_seen": 38118984, "step": 222 }, { "epoch": 0.05865719734332873, "grad_norm": 11.082820014401106, "learning_rate": 5e-06, "loss": 0.2927, "num_input_tokens_seen": 38291092, "step": 223 }, { "epoch": 0.05865719734332873, "loss": 0.2926178574562073, "loss_ce": 0.017837589606642723, "loss_iou": 0.337890625, "loss_num": 0.054931640625, "loss_xval": 0.275390625, "num_input_tokens_seen": 38291092, "step": 223 }, { "epoch": 0.05892023410271585, "grad_norm": 13.606311852622206, "learning_rate": 5e-06, "loss": 0.2877, "num_input_tokens_seen": 38461712, "step": 224 }, { "epoch": 0.05892023410271585, "loss": 0.2914125323295593, "loss_ce": 0.016632266342639923, "loss_iou": 0.29296875, "loss_num": 0.054931640625, "loss_xval": 0.275390625, "num_input_tokens_seen": 38461712, "step": 224 }, { "epoch": 0.059183270862102975, "grad_norm": 12.858292254216362, "learning_rate": 5e-06, "loss": 0.3576, "num_input_tokens_seen": 38633936, "step": 225 }, { "epoch": 0.059183270862102975, "loss": 0.36546608805656433, "loss_ce": 0.015124273486435413, "loss_iou": 0.2890625, "loss_num": 0.06982421875, "loss_xval": 0.349609375, "num_input_tokens_seen": 38633936, "step": 225 }, { "epoch": 0.059446307621490106, "grad_norm": 15.356311927838101, "learning_rate": 5e-06, "loss": 0.2591, "num_input_tokens_seen": 38802732, "step": 226 }, { "epoch": 0.059446307621490106, "loss": 0.24970154464244843, "loss_ce": 0.01520447339862585, "loss_iou": 0.412109375, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 38802732, "step": 226 }, { "epoch": 0.05970934438087723, "grad_norm": 33.523690387446095, "learning_rate": 5e-06, "loss": 0.3181, "num_input_tokens_seen": 38975160, "step": 227 }, { "epoch": 0.05970934438087723, "loss": 0.3422040641307831, "loss_ce": 0.015910113230347633, "loss_iou": 0.33984375, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 38975160, "step": 227 }, { "epoch": 0.059972381140264354, "grad_norm": 8.426822348321474, "learning_rate": 5e-06, "loss": 0.2888, "num_input_tokens_seen": 39147352, "step": 228 }, { "epoch": 0.059972381140264354, "loss": 0.29814714193344116, "loss_ce": 0.014455747790634632, "loss_iou": 0.3359375, "loss_num": 0.056640625, "loss_xval": 0.283203125, "num_input_tokens_seen": 39147352, "step": 228 }, { "epoch": 0.06023541789965148, "grad_norm": 9.384703696891318, "learning_rate": 5e-06, "loss": 0.2861, "num_input_tokens_seen": 39319476, "step": 229 }, { "epoch": 0.06023541789965148, "loss": 0.2668268382549286, "loss_ce": 0.020000681281089783, "loss_iou": 0.474609375, "loss_num": 0.04931640625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 39319476, "step": 229 }, { "epoch": 0.0604984546590386, "grad_norm": 18.427622180632213, "learning_rate": 5e-06, "loss": 0.3917, "num_input_tokens_seen": 39491664, "step": 230 }, { "epoch": 0.0604984546590386, "loss": 0.390484094619751, "loss_ce": 0.015484098345041275, "loss_iou": 0.875, "loss_num": 0.0751953125, "loss_xval": 0.375, "num_input_tokens_seen": 39491664, "step": 230 }, { "epoch": 0.060761491418425725, "grad_norm": 10.977429362928614, "learning_rate": 5e-06, "loss": 0.3949, "num_input_tokens_seen": 39663560, "step": 231 }, { "epoch": 0.060761491418425725, "loss": 0.334339439868927, "loss_ce": 0.015735914930701256, "loss_iou": 0.283203125, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 39663560, "step": 231 }, { "epoch": 0.06102452817781285, "grad_norm": 7.478546882380691, "learning_rate": 5e-06, "loss": 0.2993, "num_input_tokens_seen": 39835708, "step": 232 }, { "epoch": 0.06102452817781285, "loss": 0.2514447867870331, "loss_ce": 0.014872531406581402, "loss_iou": 0.45703125, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 39835708, "step": 232 }, { "epoch": 0.06128756493719997, "grad_norm": 7.3196146291196635, "learning_rate": 5e-06, "loss": 0.2693, "num_input_tokens_seen": 40008008, "step": 233 }, { "epoch": 0.06128756493719997, "loss": 0.3245670795440674, "loss_ce": 0.01768231764435768, "loss_iou": 0.380859375, "loss_num": 0.0615234375, "loss_xval": 0.306640625, "num_input_tokens_seen": 40008008, "step": 233 }, { "epoch": 0.0615506016965871, "grad_norm": 10.123913601017227, "learning_rate": 5e-06, "loss": 0.2834, "num_input_tokens_seen": 40179816, "step": 234 }, { "epoch": 0.0615506016965871, "loss": 0.2691105008125305, "loss_ce": 0.01740150898694992, "loss_iou": 0.462890625, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 40179816, "step": 234 }, { "epoch": 0.06181363845597422, "grad_norm": 16.840337823170596, "learning_rate": 5e-06, "loss": 0.3163, "num_input_tokens_seen": 40352180, "step": 235 }, { "epoch": 0.06181363845597422, "loss": 0.3705546259880066, "loss_ce": 0.015330012887716293, "loss_iou": 0.232421875, "loss_num": 0.0712890625, "loss_xval": 0.35546875, "num_input_tokens_seen": 40352180, "step": 235 }, { "epoch": 0.062076675215361345, "grad_norm": 15.151708802383407, "learning_rate": 5e-06, "loss": 0.3181, "num_input_tokens_seen": 40524204, "step": 236 }, { "epoch": 0.062076675215361345, "loss": 0.342043399810791, "loss_ce": 0.015383241698145866, "loss_iou": 0.1884765625, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 40524204, "step": 236 }, { "epoch": 0.06233971197474847, "grad_norm": 23.279462740978527, "learning_rate": 5e-06, "loss": 0.3112, "num_input_tokens_seen": 40694412, "step": 237 }, { "epoch": 0.06233971197474847, "loss": 0.3619577884674072, "loss_ce": 0.014667754992842674, "loss_iou": NaN, "loss_num": 0.0693359375, "loss_xval": 0.34765625, "num_input_tokens_seen": 40694412, "step": 237 }, { "epoch": 0.0626027487341356, "grad_norm": 7.854110724383197, "learning_rate": 5e-06, "loss": 0.3206, "num_input_tokens_seen": 40866332, "step": 238 }, { "epoch": 0.0626027487341356, "loss": 0.31817951798439026, "loss_ce": 0.013125804252922535, "loss_iou": 0.390625, "loss_num": 0.06103515625, "loss_xval": 0.3046875, "num_input_tokens_seen": 40866332, "step": 238 }, { "epoch": 0.06286578549352272, "grad_norm": 10.293296601258389, "learning_rate": 5e-06, "loss": 0.2924, "num_input_tokens_seen": 41036884, "step": 239 }, { "epoch": 0.06286578549352272, "loss": 0.20027026534080505, "loss_ce": 0.012953377328813076, "loss_iou": 0.470703125, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 41036884, "step": 239 }, { "epoch": 0.06312882225290985, "grad_norm": 27.954530904789568, "learning_rate": 5e-06, "loss": 0.3242, "num_input_tokens_seen": 41209080, "step": 240 }, { "epoch": 0.06312882225290985, "loss": 0.3325929641723633, "loss_ce": 0.015088059939444065, "loss_iou": 0.388671875, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 41209080, "step": 240 }, { "epoch": 0.06339185901229696, "grad_norm": 10.084557723509068, "learning_rate": 5e-06, "loss": 0.4017, "num_input_tokens_seen": 41378712, "step": 241 }, { "epoch": 0.06339185901229696, "loss": 0.42962974309921265, "loss_ce": 0.01459068525582552, "loss_iou": 0.1796875, "loss_num": 0.0830078125, "loss_xval": 0.4140625, "num_input_tokens_seen": 41378712, "step": 241 }, { "epoch": 0.0636548957716841, "grad_norm": 7.845640861327211, "learning_rate": 5e-06, "loss": 0.3082, "num_input_tokens_seen": 41550924, "step": 242 }, { "epoch": 0.0636548957716841, "loss": 0.3617081940174103, "loss_ce": 0.013441601768136024, "loss_iou": 0.306640625, "loss_num": 0.06982421875, "loss_xval": 0.34765625, "num_input_tokens_seen": 41550924, "step": 242 }, { "epoch": 0.06391793253107121, "grad_norm": 7.755635758991768, "learning_rate": 5e-06, "loss": 0.2863, "num_input_tokens_seen": 41723448, "step": 243 }, { "epoch": 0.06391793253107121, "loss": 0.31799250841140747, "loss_ce": 0.01220636535435915, "loss_iou": 0.4609375, "loss_num": 0.06103515625, "loss_xval": 0.306640625, "num_input_tokens_seen": 41723448, "step": 243 }, { "epoch": 0.06418096929045834, "grad_norm": 10.028128094591828, "learning_rate": 5e-06, "loss": 0.3455, "num_input_tokens_seen": 41895440, "step": 244 }, { "epoch": 0.06418096929045834, "loss": 0.2664147913455963, "loss_ce": 0.015926510095596313, "loss_iou": 0.38671875, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 41895440, "step": 244 }, { "epoch": 0.06444400604984546, "grad_norm": 7.455815676512238, "learning_rate": 5e-06, "loss": 0.3158, "num_input_tokens_seen": 42067812, "step": 245 }, { "epoch": 0.06444400604984546, "loss": 0.315701961517334, "loss_ce": 0.012784458696842194, "loss_iou": 0.3515625, "loss_num": 0.060546875, "loss_xval": 0.302734375, "num_input_tokens_seen": 42067812, "step": 245 }, { "epoch": 0.06470704280923259, "grad_norm": 7.183484053450612, "learning_rate": 5e-06, "loss": 0.2873, "num_input_tokens_seen": 42238248, "step": 246 }, { "epoch": 0.06470704280923259, "loss": 0.408550500869751, "loss_ce": 0.015972375869750977, "loss_iou": 0.46484375, "loss_num": 0.07861328125, "loss_xval": 0.392578125, "num_input_tokens_seen": 42238248, "step": 246 }, { "epoch": 0.06497007956861972, "grad_norm": 11.483582566544728, "learning_rate": 5e-06, "loss": 0.2422, "num_input_tokens_seen": 42410540, "step": 247 }, { "epoch": 0.06497007956861972, "loss": 0.24055451154708862, "loss_ce": 0.012405097484588623, "loss_iou": 0.455078125, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 42410540, "step": 247 }, { "epoch": 0.06523311632800684, "grad_norm": 9.093470574393532, "learning_rate": 5e-06, "loss": 0.2447, "num_input_tokens_seen": 42582896, "step": 248 }, { "epoch": 0.06523311632800684, "loss": 0.2380836009979248, "loss_ce": 0.009445905685424805, "loss_iou": 0.51171875, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 42582896, "step": 248 }, { "epoch": 0.06549615308739397, "grad_norm": 11.56824851448475, "learning_rate": 5e-06, "loss": 0.3024, "num_input_tokens_seen": 42755248, "step": 249 }, { "epoch": 0.06549615308739397, "loss": 0.3298885226249695, "loss_ce": 0.012871915474534035, "loss_iou": 0.359375, "loss_num": 0.0634765625, "loss_xval": 0.31640625, "num_input_tokens_seen": 42755248, "step": 249 }, { "epoch": 0.06575918984678109, "grad_norm": 12.53262678976841, "learning_rate": 5e-06, "loss": 0.3279, "num_input_tokens_seen": 42927164, "step": 250 }, { "epoch": 0.06575918984678109, "eval_websight_new_CIoU": 0.6498270332813263, "eval_websight_new_GIoU": 0.6454348266124725, "eval_websight_new_IoU": 0.6616384983062744, "eval_websight_new_MAE_all": 0.05562719702720642, "eval_websight_new_MAE_h": 0.049970587715506554, "eval_websight_new_MAE_w": 0.07883360609412193, "eval_websight_new_MAE_x": 0.06017959490418434, "eval_websight_new_MAE_y": 0.03352500405162573, "eval_websight_new_NUM_probability": 0.9025295078754425, "eval_websight_new_inside_bbox": 0.9253472089767456, "eval_websight_new_loss": 0.27164769172668457, "eval_websight_new_loss_ce": 0.01081773592159152, "eval_websight_new_loss_iou": 0.693359375, "eval_websight_new_loss_num": 0.049407958984375, "eval_websight_new_loss_xval": 0.24688720703125, "eval_websight_new_runtime": 55.9304, "eval_websight_new_samples_per_second": 0.894, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 42927164, "step": 250 }, { "epoch": 0.06575918984678109, "eval_seeclick_CIoU": 0.3509342074394226, "eval_seeclick_GIoU": 0.3347575068473816, "eval_seeclick_IoU": 0.3994097113609314, "eval_seeclick_MAE_all": 0.0986204668879509, "eval_seeclick_MAE_h": 0.0804138220846653, "eval_seeclick_MAE_w": 0.13582541793584824, "eval_seeclick_MAE_x": 0.12244484201073647, "eval_seeclick_MAE_y": 0.05579778365790844, "eval_seeclick_NUM_probability": 0.898758739233017, "eval_seeclick_inside_bbox": 0.47727273404598236, "eval_seeclick_loss": 0.4158971905708313, "eval_seeclick_loss_ce": 0.027117961086332798, "eval_seeclick_loss_iou": 0.66552734375, "eval_seeclick_loss_num": 0.0771331787109375, "eval_seeclick_loss_xval": 0.3856201171875, "eval_seeclick_runtime": 77.1195, "eval_seeclick_samples_per_second": 0.558, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 42927164, "step": 250 }, { "epoch": 0.06575918984678109, "eval_icons_CIoU": 0.533334881067276, "eval_icons_GIoU": 0.525450587272644, "eval_icons_IoU": 0.5554244518280029, "eval_icons_MAE_all": 0.06896939501166344, "eval_icons_MAE_h": 0.07008247822523117, "eval_icons_MAE_w": 0.07993372157216072, "eval_icons_MAE_x": 0.06088143028318882, "eval_icons_MAE_y": 0.06497994437813759, "eval_icons_NUM_probability": 0.9118345677852631, "eval_icons_inside_bbox": 0.8038194477558136, "eval_icons_loss": 0.238239586353302, "eval_icons_loss_ce": 0.01238260930404067, "eval_icons_loss_iou": 0.45098876953125, "eval_icons_loss_num": 0.04291534423828125, "eval_icons_loss_xval": 0.2144775390625, "eval_icons_runtime": 89.0702, "eval_icons_samples_per_second": 0.561, "eval_icons_steps_per_second": 0.022, "num_input_tokens_seen": 42927164, "step": 250 }, { "epoch": 0.06575918984678109, "eval_screenspot_CIoU": 0.40743913253148395, "eval_screenspot_GIoU": 0.38262539108594257, "eval_screenspot_IoU": 0.4510061542193095, "eval_screenspot_MAE_all": 0.10948735972245534, "eval_screenspot_MAE_h": 0.07818744828303655, "eval_screenspot_MAE_w": 0.15290210396051407, "eval_screenspot_MAE_x": 0.12599809964497885, "eval_screenspot_MAE_y": 0.08086179196834564, "eval_screenspot_NUM_probability": 0.9086714188257853, "eval_screenspot_inside_bbox": 0.7354166706403097, "eval_screenspot_loss": 0.9193825125694275, "eval_screenspot_loss_ce": 0.48994149764378864, "eval_screenspot_loss_iou": 0.5732421875, "eval_screenspot_loss_num": 0.08390299479166667, "eval_screenspot_loss_xval": 0.4192708333333333, "eval_screenspot_runtime": 138.8534, "eval_screenspot_samples_per_second": 0.641, "eval_screenspot_steps_per_second": 0.022, "num_input_tokens_seen": 42927164, "step": 250 }, { "epoch": 0.06575918984678109, "loss": 0.8642065525054932, "loss_ce": 0.45453858375549316, "loss_iou": 0.53125, "loss_num": 0.08203125, "loss_xval": 0.41015625, "num_input_tokens_seen": 42927164, "step": 250 }, { "epoch": 0.06602222660616822, "grad_norm": 16.162110660395133, "learning_rate": 5e-06, "loss": 0.3389, "num_input_tokens_seen": 43099272, "step": 251 }, { "epoch": 0.06602222660616822, "loss": 0.3324888348579407, "loss_ce": 0.009490801021456718, "loss_iou": 0.484375, "loss_num": 0.064453125, "loss_xval": 0.322265625, "num_input_tokens_seen": 43099272, "step": 251 }, { "epoch": 0.06628526336555533, "grad_norm": 15.788401372124632, "learning_rate": 5e-06, "loss": 0.3827, "num_input_tokens_seen": 43271440, "step": 252 }, { "epoch": 0.06628526336555533, "loss": 0.3184235990047455, "loss_ce": 0.009219500236213207, "loss_iou": 0.453125, "loss_num": 0.061767578125, "loss_xval": 0.30859375, "num_input_tokens_seen": 43271440, "step": 252 }, { "epoch": 0.06654830012494246, "grad_norm": 19.21368677855186, "learning_rate": 5e-06, "loss": 0.4031, "num_input_tokens_seen": 43443704, "step": 253 }, { "epoch": 0.06654830012494246, "loss": 0.3337656557559967, "loss_ce": 0.009546896442770958, "loss_iou": 0.1796875, "loss_num": 0.06494140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 43443704, "step": 253 }, { "epoch": 0.06681133688432958, "grad_norm": 9.350851106906616, "learning_rate": 5e-06, "loss": 0.2501, "num_input_tokens_seen": 43615820, "step": 254 }, { "epoch": 0.06681133688432958, "loss": 0.22743698954582214, "loss_ce": 0.009236322715878487, "loss_iou": 0.408203125, "loss_num": 0.043701171875, "loss_xval": 0.2177734375, "num_input_tokens_seen": 43615820, "step": 254 }, { "epoch": 0.06707437364371671, "grad_norm": 12.730744024793406, "learning_rate": 5e-06, "loss": 0.3033, "num_input_tokens_seen": 43787664, "step": 255 }, { "epoch": 0.06707437364371671, "loss": 0.3385705351829529, "loss_ce": 0.009468959644436836, "loss_iou": 0.193359375, "loss_num": 0.06591796875, "loss_xval": 0.328125, "num_input_tokens_seen": 43787664, "step": 255 }, { "epoch": 0.06733741040310383, "grad_norm": 20.602499715875325, "learning_rate": 5e-06, "loss": 0.335, "num_input_tokens_seen": 43956324, "step": 256 }, { "epoch": 0.06733741040310383, "loss": 0.33848732709884644, "loss_ce": 0.012559601105749607, "loss_iou": 0.32421875, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 43956324, "step": 256 }, { "epoch": 0.06760044716249096, "grad_norm": 11.274485245872459, "learning_rate": 5e-06, "loss": 0.241, "num_input_tokens_seen": 44128740, "step": 257 }, { "epoch": 0.06760044716249096, "loss": 0.24955862760543823, "loss_ce": 0.011765659786760807, "loss_iou": 0.330078125, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 44128740, "step": 257 }, { "epoch": 0.06786348392187808, "grad_norm": 12.119376554642344, "learning_rate": 5e-06, "loss": 0.359, "num_input_tokens_seen": 44299372, "step": 258 }, { "epoch": 0.06786348392187808, "loss": 0.26774948835372925, "loss_ce": 0.01604049652814865, "loss_iou": 0.56640625, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 44299372, "step": 258 }, { "epoch": 0.0681265206812652, "grad_norm": 15.925957923618926, "learning_rate": 5e-06, "loss": 0.3045, "num_input_tokens_seen": 44469736, "step": 259 }, { "epoch": 0.0681265206812652, "loss": 0.35633599758148193, "loss_ce": 0.01502738706767559, "loss_iou": 0.515625, "loss_num": 0.068359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 44469736, "step": 259 }, { "epoch": 0.06838955744065234, "grad_norm": 13.189087784364153, "learning_rate": 5e-06, "loss": 0.328, "num_input_tokens_seen": 44642020, "step": 260 }, { "epoch": 0.06838955744065234, "loss": 0.4303101599216461, "loss_ce": 0.01173105463385582, "loss_iou": 0.44921875, "loss_num": 0.08349609375, "loss_xval": 0.41796875, "num_input_tokens_seen": 44642020, "step": 260 }, { "epoch": 0.06865259420003945, "grad_norm": 11.878594650017698, "learning_rate": 5e-06, "loss": 0.2752, "num_input_tokens_seen": 44814452, "step": 261 }, { "epoch": 0.06865259420003945, "loss": 0.28906819224357605, "loss_ce": 0.0090388935059309, "loss_iou": 0.251953125, "loss_num": 0.05615234375, "loss_xval": 0.279296875, "num_input_tokens_seen": 44814452, "step": 261 }, { "epoch": 0.06891563095942659, "grad_norm": 8.62440603357029, "learning_rate": 5e-06, "loss": 0.2926, "num_input_tokens_seen": 44986656, "step": 262 }, { "epoch": 0.06891563095942659, "loss": 0.24431782960891724, "loss_ce": 0.008111760020256042, "loss_iou": 0.486328125, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 44986656, "step": 262 }, { "epoch": 0.0691786677188137, "grad_norm": 7.7035777517167645, "learning_rate": 5e-06, "loss": 0.2298, "num_input_tokens_seen": 45158540, "step": 263 }, { "epoch": 0.0691786677188137, "loss": 0.23680636286735535, "loss_ce": 0.013417699374258518, "loss_iou": 0.46484375, "loss_num": 0.044677734375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 45158540, "step": 263 }, { "epoch": 0.06944170447820083, "grad_norm": 11.11690642897216, "learning_rate": 5e-06, "loss": 0.2503, "num_input_tokens_seen": 45330356, "step": 264 }, { "epoch": 0.06944170447820083, "loss": 0.25701650977134705, "loss_ce": 0.01019032672047615, "loss_iou": 0.24609375, "loss_num": 0.04931640625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 45330356, "step": 264 }, { "epoch": 0.06970474123758795, "grad_norm": 16.27774203292456, "learning_rate": 5e-06, "loss": 0.327, "num_input_tokens_seen": 45502460, "step": 265 }, { "epoch": 0.06970474123758795, "loss": 0.3339051902294159, "loss_ce": 0.011639568954706192, "loss_iou": 0.4921875, "loss_num": 0.064453125, "loss_xval": 0.322265625, "num_input_tokens_seen": 45502460, "step": 265 }, { "epoch": 0.06996777799697508, "grad_norm": 11.740826510166434, "learning_rate": 5e-06, "loss": 0.3816, "num_input_tokens_seen": 45674688, "step": 266 }, { "epoch": 0.06996777799697508, "loss": 0.30319204926490784, "loss_ce": 0.016082679852843285, "loss_iou": 0.255859375, "loss_num": 0.057373046875, "loss_xval": 0.287109375, "num_input_tokens_seen": 45674688, "step": 266 }, { "epoch": 0.0702308147563622, "grad_norm": 7.300788157576167, "learning_rate": 5e-06, "loss": 0.29, "num_input_tokens_seen": 45847124, "step": 267 }, { "epoch": 0.0702308147563622, "loss": 0.3247534930706024, "loss_ce": 0.009140691719949245, "loss_iou": 0.33203125, "loss_num": 0.06298828125, "loss_xval": 0.31640625, "num_input_tokens_seen": 45847124, "step": 267 }, { "epoch": 0.07049385151574933, "grad_norm": 7.309781555609638, "learning_rate": 5e-06, "loss": 0.2756, "num_input_tokens_seen": 46019340, "step": 268 }, { "epoch": 0.07049385151574933, "loss": 0.2533281445503235, "loss_ce": 0.008699209429323673, "loss_iou": 0.38671875, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 46019340, "step": 268 }, { "epoch": 0.07075688827513645, "grad_norm": 13.089258421868523, "learning_rate": 5e-06, "loss": 0.2701, "num_input_tokens_seen": 46189784, "step": 269 }, { "epoch": 0.07075688827513645, "loss": 0.2624807357788086, "loss_ce": 0.01223657839000225, "loss_iou": 0.435546875, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 46189784, "step": 269 }, { "epoch": 0.07101992503452358, "grad_norm": 6.658182143864948, "learning_rate": 5e-06, "loss": 0.2118, "num_input_tokens_seen": 46361728, "step": 270 }, { "epoch": 0.07101992503452358, "loss": 0.2072766274213791, "loss_ce": 0.007569607347249985, "loss_iou": 0.4453125, "loss_num": 0.0400390625, "loss_xval": 0.19921875, "num_input_tokens_seen": 46361728, "step": 270 }, { "epoch": 0.07128296179391069, "grad_norm": 9.079661259155806, "learning_rate": 5e-06, "loss": 0.2305, "num_input_tokens_seen": 46534100, "step": 271 }, { "epoch": 0.07128296179391069, "loss": 0.21106520295143127, "loss_ce": 0.008977807126939297, "loss_iou": 0.609375, "loss_num": 0.040283203125, "loss_xval": 0.2021484375, "num_input_tokens_seen": 46534100, "step": 271 }, { "epoch": 0.07154599855329782, "grad_norm": 9.513732560495592, "learning_rate": 5e-06, "loss": 0.2646, "num_input_tokens_seen": 46704648, "step": 272 }, { "epoch": 0.07154599855329782, "loss": 0.2694496214389801, "loss_ce": 0.007120497524738312, "loss_iou": 0.361328125, "loss_num": 0.052490234375, "loss_xval": 0.26171875, "num_input_tokens_seen": 46704648, "step": 272 }, { "epoch": 0.07180903531268495, "grad_norm": 15.714221513863984, "learning_rate": 5e-06, "loss": 0.3078, "num_input_tokens_seen": 46876784, "step": 273 }, { "epoch": 0.07180903531268495, "loss": 0.39289578795433044, "loss_ce": 0.006909476593136787, "loss_iou": 0.5, "loss_num": 0.0771484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 46876784, "step": 273 }, { "epoch": 0.07207207207207207, "grad_norm": 8.093127002316251, "learning_rate": 5e-06, "loss": 0.3025, "num_input_tokens_seen": 47048876, "step": 274 }, { "epoch": 0.07207207207207207, "loss": 0.25725314021110535, "loss_ce": 0.011525607667863369, "loss_iou": 0.30859375, "loss_num": 0.049072265625, "loss_xval": 0.24609375, "num_input_tokens_seen": 47048876, "step": 274 }, { "epoch": 0.0723351088314592, "grad_norm": 8.569390693529225, "learning_rate": 5e-06, "loss": 0.297, "num_input_tokens_seen": 47221068, "step": 275 }, { "epoch": 0.0723351088314592, "loss": 0.2686833143234253, "loss_ce": 0.006720416247844696, "loss_iou": 0.404296875, "loss_num": 0.052490234375, "loss_xval": 0.26171875, "num_input_tokens_seen": 47221068, "step": 275 }, { "epoch": 0.07259814559084632, "grad_norm": 10.067643264055917, "learning_rate": 5e-06, "loss": 0.2437, "num_input_tokens_seen": 47391436, "step": 276 }, { "epoch": 0.07259814559084632, "loss": 0.2530132830142975, "loss_ce": 0.007896100170910358, "loss_iou": 0.330078125, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 47391436, "step": 276 }, { "epoch": 0.07286118235023345, "grad_norm": 13.479763232881819, "learning_rate": 5e-06, "loss": 0.3643, "num_input_tokens_seen": 47563512, "step": 277 }, { "epoch": 0.07286118235023345, "loss": 0.2713325619697571, "loss_ce": 0.013642151840031147, "loss_iou": 0.337890625, "loss_num": 0.051513671875, "loss_xval": 0.2578125, "num_input_tokens_seen": 47563512, "step": 277 }, { "epoch": 0.07312421910962057, "grad_norm": 16.755961470396134, "learning_rate": 5e-06, "loss": 0.3279, "num_input_tokens_seen": 47735372, "step": 278 }, { "epoch": 0.07312421910962057, "loss": 0.42755797505378723, "loss_ce": 0.008124373853206635, "loss_iou": 0.365234375, "loss_num": 0.083984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 47735372, "step": 278 }, { "epoch": 0.0733872558690077, "grad_norm": 13.89589851066108, "learning_rate": 5e-06, "loss": 0.3883, "num_input_tokens_seen": 47907704, "step": 279 }, { "epoch": 0.0733872558690077, "loss": 0.41463613510131836, "loss_ce": 0.013879301026463509, "loss_iou": 0.1337890625, "loss_num": 0.080078125, "loss_xval": 0.400390625, "num_input_tokens_seen": 47907704, "step": 279 }, { "epoch": 0.07365029262839481, "grad_norm": 13.52575149586993, "learning_rate": 5e-06, "loss": 0.2798, "num_input_tokens_seen": 48079692, "step": 280 }, { "epoch": 0.07365029262839481, "loss": 0.2906692624092102, "loss_ce": 0.007099920883774757, "loss_iou": 0.45703125, "loss_num": 0.056640625, "loss_xval": 0.283203125, "num_input_tokens_seen": 48079692, "step": 280 }, { "epoch": 0.07391332938778195, "grad_norm": 11.637653668969445, "learning_rate": 5e-06, "loss": 0.2883, "num_input_tokens_seen": 48251572, "step": 281 }, { "epoch": 0.07391332938778195, "loss": 0.2844490110874176, "loss_ce": 0.010157023556530476, "loss_iou": 0.490234375, "loss_num": 0.054931640625, "loss_xval": 0.2734375, "num_input_tokens_seen": 48251572, "step": 281 }, { "epoch": 0.07417636614716906, "grad_norm": 9.21180215619434, "learning_rate": 5e-06, "loss": 0.2978, "num_input_tokens_seen": 48423560, "step": 282 }, { "epoch": 0.07417636614716906, "loss": 0.32979628443717957, "loss_ce": 0.012047262862324715, "loss_iou": 0.42578125, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 48423560, "step": 282 }, { "epoch": 0.07443940290655619, "grad_norm": 7.167038204924764, "learning_rate": 5e-06, "loss": 0.256, "num_input_tokens_seen": 48595680, "step": 283 }, { "epoch": 0.07443940290655619, "loss": 0.25096797943115234, "loss_ce": 0.007925992831587791, "loss_iou": 0.330078125, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 48595680, "step": 283 }, { "epoch": 0.07470243966594331, "grad_norm": 11.715910802137108, "learning_rate": 5e-06, "loss": 0.2476, "num_input_tokens_seen": 48766076, "step": 284 }, { "epoch": 0.07470243966594331, "loss": 0.19452951848506927, "loss_ce": 0.005930891260504723, "loss_iou": 0.51171875, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 48766076, "step": 284 }, { "epoch": 0.07496547642533044, "grad_norm": 11.971840696280033, "learning_rate": 5e-06, "loss": 0.3413, "num_input_tokens_seen": 48937948, "step": 285 }, { "epoch": 0.07496547642533044, "loss": 0.349312424659729, "loss_ce": 0.00910246279090643, "loss_iou": 0.5078125, "loss_num": 0.06787109375, "loss_xval": 0.33984375, "num_input_tokens_seen": 48937948, "step": 285 }, { "epoch": 0.07522851318471756, "grad_norm": 9.505447479710712, "learning_rate": 5e-06, "loss": 0.2998, "num_input_tokens_seen": 49110048, "step": 286 }, { "epoch": 0.07522851318471756, "loss": 0.3620803654193878, "loss_ce": 0.0063674794510006905, "loss_iou": 0.376953125, "loss_num": 0.0712890625, "loss_xval": 0.35546875, "num_input_tokens_seen": 49110048, "step": 286 }, { "epoch": 0.07549154994410469, "grad_norm": 9.524937593510838, "learning_rate": 5e-06, "loss": 0.2473, "num_input_tokens_seen": 49281924, "step": 287 }, { "epoch": 0.07549154994410469, "loss": 0.21198034286499023, "loss_ce": 0.0065360115841031075, "loss_iou": 0.4140625, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 49281924, "step": 287 }, { "epoch": 0.07575458670349182, "grad_norm": 9.713489351185899, "learning_rate": 5e-06, "loss": 0.2911, "num_input_tokens_seen": 49454288, "step": 288 }, { "epoch": 0.07575458670349182, "loss": 0.35456106066703796, "loss_ce": 0.01032277476042509, "loss_iou": 0.4375, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 49454288, "step": 288 }, { "epoch": 0.07601762346287894, "grad_norm": 8.377848047343468, "learning_rate": 5e-06, "loss": 0.2711, "num_input_tokens_seen": 49626816, "step": 289 }, { "epoch": 0.07601762346287894, "loss": 0.2953585684299469, "loss_ce": 0.005685708485543728, "loss_iou": 0.2890625, "loss_num": 0.057861328125, "loss_xval": 0.2890625, "num_input_tokens_seen": 49626816, "step": 289 }, { "epoch": 0.07628066022226607, "grad_norm": 10.429005548611427, "learning_rate": 5e-06, "loss": 0.2446, "num_input_tokens_seen": 49798788, "step": 290 }, { "epoch": 0.07628066022226607, "loss": 0.19721034169197083, "loss_ce": 0.007268924731761217, "loss_iou": 0.39453125, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 49798788, "step": 290 }, { "epoch": 0.07654369698165318, "grad_norm": 13.641317737771786, "learning_rate": 5e-06, "loss": 0.3162, "num_input_tokens_seen": 49970924, "step": 291 }, { "epoch": 0.07654369698165318, "loss": 0.3052144944667816, "loss_ce": 0.00528772734105587, "loss_iou": 0.67578125, "loss_num": 0.06005859375, "loss_xval": 0.30078125, "num_input_tokens_seen": 49970924, "step": 291 }, { "epoch": 0.07680673374104031, "grad_norm": 13.717892281601975, "learning_rate": 5e-06, "loss": 0.2925, "num_input_tokens_seen": 50143128, "step": 292 }, { "epoch": 0.07680673374104031, "loss": 0.3288165032863617, "loss_ce": 0.006795027758926153, "loss_iou": 0.34375, "loss_num": 0.064453125, "loss_xval": 0.322265625, "num_input_tokens_seen": 50143128, "step": 292 }, { "epoch": 0.07706977050042743, "grad_norm": 9.087941964788197, "learning_rate": 5e-06, "loss": 0.2844, "num_input_tokens_seen": 50315356, "step": 293 }, { "epoch": 0.07706977050042743, "loss": 0.30062055587768555, "loss_ce": 0.006186962127685547, "loss_iou": 0.4140625, "loss_num": 0.058837890625, "loss_xval": 0.294921875, "num_input_tokens_seen": 50315356, "step": 293 }, { "epoch": 0.07733280725981456, "grad_norm": 8.950504335091761, "learning_rate": 5e-06, "loss": 0.2474, "num_input_tokens_seen": 50487452, "step": 294 }, { "epoch": 0.07733280725981456, "loss": 0.20027770102024078, "loss_ce": 0.007284537889063358, "loss_iou": 0.58203125, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 50487452, "step": 294 }, { "epoch": 0.07759584401920168, "grad_norm": 12.276590112874837, "learning_rate": 5e-06, "loss": 0.2962, "num_input_tokens_seen": 50659340, "step": 295 }, { "epoch": 0.07759584401920168, "loss": 0.3079564869403839, "loss_ce": 0.008151799440383911, "loss_iou": 0.5625, "loss_num": 0.059814453125, "loss_xval": 0.30078125, "num_input_tokens_seen": 50659340, "step": 295 }, { "epoch": 0.07785888077858881, "grad_norm": 10.920612112253043, "learning_rate": 5e-06, "loss": 0.2779, "num_input_tokens_seen": 50831668, "step": 296 }, { "epoch": 0.07785888077858881, "loss": 0.3526954650878906, "loss_ce": 0.01395035907626152, "loss_iou": 0.470703125, "loss_num": 0.06787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 50831668, "step": 296 }, { "epoch": 0.07812191753797593, "grad_norm": 14.39030370540137, "learning_rate": 5e-06, "loss": 0.317, "num_input_tokens_seen": 51004036, "step": 297 }, { "epoch": 0.07812191753797593, "loss": 0.3473885655403137, "loss_ce": 0.01047449093312025, "loss_iou": 0.3046875, "loss_num": 0.0673828125, "loss_xval": 0.3359375, "num_input_tokens_seen": 51004036, "step": 297 }, { "epoch": 0.07838495429736306, "grad_norm": 10.790411077010845, "learning_rate": 5e-06, "loss": 0.2641, "num_input_tokens_seen": 51173952, "step": 298 }, { "epoch": 0.07838495429736306, "loss": 0.24334201216697693, "loss_ce": 0.0049387002363801, "loss_iou": 0.453125, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 51173952, "step": 298 }, { "epoch": 0.07864799105675017, "grad_norm": 12.163313302369062, "learning_rate": 5e-06, "loss": 0.277, "num_input_tokens_seen": 51346208, "step": 299 }, { "epoch": 0.07864799105675017, "loss": 0.33479589223861694, "loss_ce": 0.005816406104713678, "loss_iou": 0.408203125, "loss_num": 0.06591796875, "loss_xval": 0.328125, "num_input_tokens_seen": 51346208, "step": 299 }, { "epoch": 0.0789110278161373, "grad_norm": 13.775522127007955, "learning_rate": 5e-06, "loss": 0.3084, "num_input_tokens_seen": 51516552, "step": 300 }, { "epoch": 0.0789110278161373, "loss": 0.2704133987426758, "loss_ce": 0.004422190133482218, "loss_iou": 0.2294921875, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 51516552, "step": 300 }, { "epoch": 0.07917406457552444, "grad_norm": 11.14460650886844, "learning_rate": 5e-06, "loss": 0.2851, "num_input_tokens_seen": 51688716, "step": 301 }, { "epoch": 0.07917406457552444, "loss": 0.2599673867225647, "loss_ce": 0.008197364397346973, "loss_iou": 0.4921875, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 51688716, "step": 301 }, { "epoch": 0.07943710133491155, "grad_norm": 14.089462259095386, "learning_rate": 5e-06, "loss": 0.3106, "num_input_tokens_seen": 51861284, "step": 302 }, { "epoch": 0.07943710133491155, "loss": 0.25006186962127686, "loss_ce": 0.004822590388357639, "loss_iou": 0.2734375, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 51861284, "step": 302 }, { "epoch": 0.07970013809429868, "grad_norm": 11.560263696218914, "learning_rate": 5e-06, "loss": 0.2256, "num_input_tokens_seen": 52033296, "step": 303 }, { "epoch": 0.07970013809429868, "loss": 0.2260829657316208, "loss_ce": 0.005379845853894949, "loss_iou": 0.58984375, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 52033296, "step": 303 }, { "epoch": 0.0799631748536858, "grad_norm": 11.109765340752924, "learning_rate": 5e-06, "loss": 0.2728, "num_input_tokens_seen": 52205488, "step": 304 }, { "epoch": 0.0799631748536858, "loss": 0.26471367478370667, "loss_ce": 0.0049480353482067585, "loss_iou": 0.296875, "loss_num": 0.052001953125, "loss_xval": 0.259765625, "num_input_tokens_seen": 52205488, "step": 304 }, { "epoch": 0.08022621161307293, "grad_norm": 9.561565409079957, "learning_rate": 5e-06, "loss": 0.2229, "num_input_tokens_seen": 52377756, "step": 305 }, { "epoch": 0.08022621161307293, "loss": 0.18537692725658417, "loss_ce": 0.0050790803506970406, "loss_iou": 0.431640625, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 52377756, "step": 305 }, { "epoch": 0.08048924837246005, "grad_norm": 11.190229795777121, "learning_rate": 5e-06, "loss": 0.2198, "num_input_tokens_seen": 52549784, "step": 306 }, { "epoch": 0.08048924837246005, "loss": 0.1936880648136139, "loss_ce": 0.009972257539629936, "loss_iou": 0.490234375, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 52549784, "step": 306 }, { "epoch": 0.08075228513184718, "grad_norm": 9.790391883995309, "learning_rate": 5e-06, "loss": 0.2383, "num_input_tokens_seen": 52719856, "step": 307 }, { "epoch": 0.08075228513184718, "loss": 0.16280654072761536, "loss_ce": 0.007838279940187931, "loss_iou": 0.5859375, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 52719856, "step": 307 }, { "epoch": 0.0810153218912343, "grad_norm": 9.839075915450485, "learning_rate": 5e-06, "loss": 0.2743, "num_input_tokens_seen": 52892084, "step": 308 }, { "epoch": 0.0810153218912343, "loss": 0.15204857289791107, "loss_ce": 0.004587644245475531, "loss_iou": 0.416015625, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 52892084, "step": 308 }, { "epoch": 0.08127835865062143, "grad_norm": 12.7104246981192, "learning_rate": 5e-06, "loss": 0.2624, "num_input_tokens_seen": 53064144, "step": 309 }, { "epoch": 0.08127835865062143, "loss": 0.25361326336860657, "loss_ce": 0.009228497743606567, "loss_iou": 0.7109375, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 53064144, "step": 309 }, { "epoch": 0.08154139541000854, "grad_norm": 9.929561281282899, "learning_rate": 5e-06, "loss": 0.3246, "num_input_tokens_seen": 53236468, "step": 310 }, { "epoch": 0.08154139541000854, "loss": 0.39354461431503296, "loss_ce": 0.005116886459290981, "loss_iou": 0.44140625, "loss_num": 0.07763671875, "loss_xval": 0.388671875, "num_input_tokens_seen": 53236468, "step": 310 }, { "epoch": 0.08180443216939567, "grad_norm": 7.973294747772443, "learning_rate": 5e-06, "loss": 0.3206, "num_input_tokens_seen": 53408452, "step": 311 }, { "epoch": 0.08180443216939567, "loss": 0.2905076742172241, "loss_ce": 0.005351424217224121, "loss_iou": NaN, "loss_num": 0.05712890625, "loss_xval": 0.28515625, "num_input_tokens_seen": 53408452, "step": 311 }, { "epoch": 0.08206746892878279, "grad_norm": 7.623528366567178, "learning_rate": 5e-06, "loss": 0.2153, "num_input_tokens_seen": 53580848, "step": 312 }, { "epoch": 0.08206746892878279, "loss": 0.19026613235473633, "loss_ce": 0.005817888304591179, "loss_iou": 0.5546875, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 53580848, "step": 312 }, { "epoch": 0.08233050568816992, "grad_norm": 8.591099989866738, "learning_rate": 5e-06, "loss": 0.2192, "num_input_tokens_seen": 53751420, "step": 313 }, { "epoch": 0.08233050568816992, "loss": 0.16798478364944458, "loss_ce": 0.005326096434146166, "loss_iou": 0.515625, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 53751420, "step": 313 }, { "epoch": 0.08259354244755705, "grad_norm": 8.168930137137453, "learning_rate": 5e-06, "loss": 0.1859, "num_input_tokens_seen": 53923692, "step": 314 }, { "epoch": 0.08259354244755705, "loss": 0.1808127462863922, "loss_ce": 0.006557377986609936, "loss_iou": 0.474609375, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 53923692, "step": 314 }, { "epoch": 0.08285657920694417, "grad_norm": 12.69374602286009, "learning_rate": 5e-06, "loss": 0.255, "num_input_tokens_seen": 54095596, "step": 315 }, { "epoch": 0.08285657920694417, "loss": 0.2128537893295288, "loss_ce": 0.00447976216673851, "loss_iou": 0.470703125, "loss_num": 0.041748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 54095596, "step": 315 }, { "epoch": 0.0831196159663313, "grad_norm": 15.86710980709006, "learning_rate": 5e-06, "loss": 0.3231, "num_input_tokens_seen": 54267660, "step": 316 }, { "epoch": 0.0831196159663313, "loss": 0.29302194714546204, "loss_ce": 0.004081526771187782, "loss_iou": 0.2890625, "loss_num": 0.0576171875, "loss_xval": 0.2890625, "num_input_tokens_seen": 54267660, "step": 316 }, { "epoch": 0.08338265272571842, "grad_norm": 15.523568167979095, "learning_rate": 5e-06, "loss": 0.2565, "num_input_tokens_seen": 54439776, "step": 317 }, { "epoch": 0.08338265272571842, "loss": 0.45379549264907837, "loss_ce": 0.00494293263182044, "loss_iou": 0.421875, "loss_num": 0.08984375, "loss_xval": 0.44921875, "num_input_tokens_seen": 54439776, "step": 317 }, { "epoch": 0.08364568948510555, "grad_norm": 8.655289602311944, "learning_rate": 5e-06, "loss": 0.2346, "num_input_tokens_seen": 54612224, "step": 318 }, { "epoch": 0.08364568948510555, "loss": 0.22014674544334412, "loss_ce": 0.008476818911731243, "loss_iou": 0.53515625, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 54612224, "step": 318 }, { "epoch": 0.08390872624449267, "grad_norm": 10.678383550359573, "learning_rate": 5e-06, "loss": 0.2569, "num_input_tokens_seen": 54782956, "step": 319 }, { "epoch": 0.08390872624449267, "loss": 0.2195930778980255, "loss_ce": 0.004505200777202845, "loss_iou": 0.36328125, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 54782956, "step": 319 }, { "epoch": 0.0841717630038798, "grad_norm": 9.930533969178247, "learning_rate": 5e-06, "loss": 0.2305, "num_input_tokens_seen": 54955104, "step": 320 }, { "epoch": 0.0841717630038798, "loss": 0.24099573493003845, "loss_ce": 0.008909543044865131, "loss_iou": 0.57421875, "loss_num": 0.04638671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 54955104, "step": 320 }, { "epoch": 0.08443479976326691, "grad_norm": 12.929508923145086, "learning_rate": 5e-06, "loss": 0.2998, "num_input_tokens_seen": 55127092, "step": 321 }, { "epoch": 0.08443479976326691, "loss": 0.3190174698829651, "loss_ce": 0.008958851918578148, "loss_iou": 0.53125, "loss_num": 0.06201171875, "loss_xval": 0.310546875, "num_input_tokens_seen": 55127092, "step": 321 }, { "epoch": 0.08469783652265404, "grad_norm": 8.336572461824584, "learning_rate": 5e-06, "loss": 0.2781, "num_input_tokens_seen": 55299364, "step": 322 }, { "epoch": 0.08469783652265404, "loss": 0.21446217596530914, "loss_ce": 0.005966084077954292, "loss_iou": 0.1796875, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 55299364, "step": 322 }, { "epoch": 0.08496087328204116, "grad_norm": 9.09772392048227, "learning_rate": 5e-06, "loss": 0.2929, "num_input_tokens_seen": 55468252, "step": 323 }, { "epoch": 0.08496087328204116, "loss": 0.28219783306121826, "loss_ce": 0.007356537505984306, "loss_iou": 0.48046875, "loss_num": 0.054931640625, "loss_xval": 0.275390625, "num_input_tokens_seen": 55468252, "step": 323 }, { "epoch": 0.08522391004142829, "grad_norm": 9.926795422915726, "learning_rate": 5e-06, "loss": 0.2412, "num_input_tokens_seen": 55640432, "step": 324 }, { "epoch": 0.08522391004142829, "loss": 0.2594638466835022, "loss_ce": 0.006412106566131115, "loss_iou": 0.59375, "loss_num": 0.050537109375, "loss_xval": 0.25390625, "num_input_tokens_seen": 55640432, "step": 324 }, { "epoch": 0.08548694680081541, "grad_norm": 13.161460539721551, "learning_rate": 5e-06, "loss": 0.3056, "num_input_tokens_seen": 55812284, "step": 325 }, { "epoch": 0.08548694680081541, "loss": 0.30750948190689087, "loss_ce": 0.003920634277164936, "loss_iou": 0.416015625, "loss_num": 0.060791015625, "loss_xval": 0.302734375, "num_input_tokens_seen": 55812284, "step": 325 }, { "epoch": 0.08574998356020254, "grad_norm": 12.20729496362805, "learning_rate": 5e-06, "loss": 0.2492, "num_input_tokens_seen": 55982812, "step": 326 }, { "epoch": 0.08574998356020254, "loss": 0.25009769201278687, "loss_ce": 0.003637729212641716, "loss_iou": 0.435546875, "loss_num": 0.04931640625, "loss_xval": 0.24609375, "num_input_tokens_seen": 55982812, "step": 326 }, { "epoch": 0.08601302031958966, "grad_norm": 8.486549179327342, "learning_rate": 5e-06, "loss": 0.2612, "num_input_tokens_seen": 56154880, "step": 327 }, { "epoch": 0.08601302031958966, "loss": 0.24035008251667023, "loss_ce": 0.006341288331896067, "loss_iou": 0.4453125, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 56154880, "step": 327 }, { "epoch": 0.08627605707897679, "grad_norm": 12.368715754730403, "learning_rate": 5e-06, "loss": 0.2435, "num_input_tokens_seen": 56327104, "step": 328 }, { "epoch": 0.08627605707897679, "loss": 0.2434852123260498, "loss_ce": 0.003983248956501484, "loss_iou": 0.65234375, "loss_num": 0.0478515625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 56327104, "step": 328 }, { "epoch": 0.08653909383836392, "grad_norm": 16.286201391843196, "learning_rate": 5e-06, "loss": 0.3578, "num_input_tokens_seen": 56498760, "step": 329 }, { "epoch": 0.08653909383836392, "loss": 0.3906838297843933, "loss_ce": 0.00481957383453846, "loss_iou": 0.388671875, "loss_num": 0.0771484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 56498760, "step": 329 }, { "epoch": 0.08680213059775103, "grad_norm": 9.709126690100668, "learning_rate": 5e-06, "loss": 0.2709, "num_input_tokens_seen": 56669204, "step": 330 }, { "epoch": 0.08680213059775103, "loss": 0.2887915074825287, "loss_ce": 0.006198735907673836, "loss_iou": 0.359375, "loss_num": 0.056396484375, "loss_xval": 0.283203125, "num_input_tokens_seen": 56669204, "step": 330 }, { "epoch": 0.08706516735713817, "grad_norm": 9.72107226431463, "learning_rate": 5e-06, "loss": 0.2546, "num_input_tokens_seen": 56841324, "step": 331 }, { "epoch": 0.08706516735713817, "loss": 0.17188376188278198, "loss_ce": 0.005013656336814165, "loss_iou": 0.423828125, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 56841324, "step": 331 }, { "epoch": 0.08732820411652528, "grad_norm": 14.410704147758572, "learning_rate": 5e-06, "loss": 0.2888, "num_input_tokens_seen": 57013308, "step": 332 }, { "epoch": 0.08732820411652528, "loss": 0.2731274664402008, "loss_ce": 0.007868663407862186, "loss_iou": 0.5859375, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 57013308, "step": 332 }, { "epoch": 0.08759124087591241, "grad_norm": 9.729446702170758, "learning_rate": 5e-06, "loss": 0.2747, "num_input_tokens_seen": 57185320, "step": 333 }, { "epoch": 0.08759124087591241, "loss": 0.18412570655345917, "loss_ce": 0.004499247297644615, "loss_iou": 0.57421875, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 57185320, "step": 333 }, { "epoch": 0.08785427763529953, "grad_norm": 13.290037619570391, "learning_rate": 5e-06, "loss": 0.2065, "num_input_tokens_seen": 57355852, "step": 334 }, { "epoch": 0.08785427763529953, "loss": 0.17899873852729797, "loss_ce": 0.007001678459346294, "loss_iou": 0.32421875, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 57355852, "step": 334 }, { "epoch": 0.08811731439468666, "grad_norm": 13.54518630333599, "learning_rate": 5e-06, "loss": 0.3901, "num_input_tokens_seen": 57526160, "step": 335 }, { "epoch": 0.08811731439468666, "loss": 0.42548078298568726, "loss_ce": 0.004582356195896864, "loss_iou": 0.72265625, "loss_num": 0.083984375, "loss_xval": 0.421875, "num_input_tokens_seen": 57526160, "step": 335 }, { "epoch": 0.08838035115407378, "grad_norm": 12.0949131727458, "learning_rate": 5e-06, "loss": 0.3092, "num_input_tokens_seen": 57698240, "step": 336 }, { "epoch": 0.08838035115407378, "loss": 0.2829555869102478, "loss_ce": 0.005245603621006012, "loss_iou": 0.4765625, "loss_num": 0.055419921875, "loss_xval": 0.27734375, "num_input_tokens_seen": 57698240, "step": 336 }, { "epoch": 0.08864338791346091, "grad_norm": 11.320262534807219, "learning_rate": 5e-06, "loss": 0.2301, "num_input_tokens_seen": 57870476, "step": 337 }, { "epoch": 0.08864338791346091, "loss": 0.18072998523712158, "loss_ce": 0.004704595077782869, "loss_iou": 0.578125, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 57870476, "step": 337 }, { "epoch": 0.08890642467284803, "grad_norm": 7.708963725187913, "learning_rate": 5e-06, "loss": 0.2643, "num_input_tokens_seen": 58042960, "step": 338 }, { "epoch": 0.08890642467284803, "loss": 0.2719469666481018, "loss_ce": 0.004002647008746862, "loss_iou": 0.2890625, "loss_num": 0.0537109375, "loss_xval": 0.267578125, "num_input_tokens_seen": 58042960, "step": 338 }, { "epoch": 0.08916946143223516, "grad_norm": 8.11559752489591, "learning_rate": 5e-06, "loss": 0.285, "num_input_tokens_seen": 58215276, "step": 339 }, { "epoch": 0.08916946143223516, "loss": 0.2689915895462036, "loss_ce": 0.004709342960268259, "loss_iou": 0.578125, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 58215276, "step": 339 }, { "epoch": 0.08943249819162227, "grad_norm": 16.72537995408633, "learning_rate": 5e-06, "loss": 0.2206, "num_input_tokens_seen": 58387140, "step": 340 }, { "epoch": 0.08943249819162227, "loss": 0.19433817267417908, "loss_ce": 0.007021272089332342, "loss_iou": 0.63671875, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 58387140, "step": 340 }, { "epoch": 0.0896955349510094, "grad_norm": 10.068757947032363, "learning_rate": 5e-06, "loss": 0.2708, "num_input_tokens_seen": 58559132, "step": 341 }, { "epoch": 0.0896955349510094, "loss": 0.19732418656349182, "loss_ce": 0.004819315858185291, "loss_iou": 0.4375, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 58559132, "step": 341 }, { "epoch": 0.08995857171039653, "grad_norm": 15.667162296382404, "learning_rate": 5e-06, "loss": 0.2229, "num_input_tokens_seen": 58729752, "step": 342 }, { "epoch": 0.08995857171039653, "loss": 0.251327782869339, "loss_ce": 0.006515774410218, "loss_iou": 0.5859375, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 58729752, "step": 342 }, { "epoch": 0.09022160846978365, "grad_norm": 10.706496762114405, "learning_rate": 5e-06, "loss": 0.2862, "num_input_tokens_seen": 58901996, "step": 343 }, { "epoch": 0.09022160846978365, "loss": 0.2672620415687561, "loss_ce": 0.003956370986998081, "loss_iou": 0.240234375, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 58901996, "step": 343 }, { "epoch": 0.09048464522917078, "grad_norm": 8.994760858015587, "learning_rate": 5e-06, "loss": 0.2729, "num_input_tokens_seen": 59071632, "step": 344 }, { "epoch": 0.09048464522917078, "loss": 0.2578084170818329, "loss_ce": 0.0040242355316877365, "loss_iou": 0.365234375, "loss_num": 0.05078125, "loss_xval": 0.25390625, "num_input_tokens_seen": 59071632, "step": 344 }, { "epoch": 0.0907476819885579, "grad_norm": 10.48877247334524, "learning_rate": 5e-06, "loss": 0.1963, "num_input_tokens_seen": 59240464, "step": 345 }, { "epoch": 0.0907476819885579, "loss": 0.20404496788978577, "loss_ce": 0.0031172330491244793, "loss_iou": 0.283203125, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 59240464, "step": 345 }, { "epoch": 0.09101071874794503, "grad_norm": 11.358390932213611, "learning_rate": 5e-06, "loss": 0.2309, "num_input_tokens_seen": 59411172, "step": 346 }, { "epoch": 0.09101071874794503, "loss": 0.23203009366989136, "loss_ce": 0.013524244539439678, "loss_iou": 0.5859375, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 59411172, "step": 346 }, { "epoch": 0.09127375550733215, "grad_norm": 11.905025621505947, "learning_rate": 5e-06, "loss": 0.2869, "num_input_tokens_seen": 59581764, "step": 347 }, { "epoch": 0.09127375550733215, "loss": 0.34626448154449463, "loss_ce": 0.005199995823204517, "loss_iou": 0.416015625, "loss_num": 0.068359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 59581764, "step": 347 }, { "epoch": 0.09153679226671928, "grad_norm": 12.735636545678894, "learning_rate": 5e-06, "loss": 0.3255, "num_input_tokens_seen": 59754336, "step": 348 }, { "epoch": 0.09153679226671928, "loss": 0.395874559879303, "loss_ce": 0.005859900265932083, "loss_iou": 0.3125, "loss_num": 0.078125, "loss_xval": 0.390625, "num_input_tokens_seen": 59754336, "step": 348 }, { "epoch": 0.0917998290261064, "grad_norm": 12.687289585677311, "learning_rate": 5e-06, "loss": 0.3009, "num_input_tokens_seen": 59926836, "step": 349 }, { "epoch": 0.0917998290261064, "loss": 0.25403833389282227, "loss_ce": 0.0031838202849030495, "loss_iou": 0.349609375, "loss_num": 0.05029296875, "loss_xval": 0.25, "num_input_tokens_seen": 59926836, "step": 349 }, { "epoch": 0.09206286578549353, "grad_norm": 7.2501298492412145, "learning_rate": 5e-06, "loss": 0.193, "num_input_tokens_seen": 60099096, "step": 350 }, { "epoch": 0.09206286578549353, "loss": 0.25642409920692444, "loss_ce": 0.005325470119714737, "loss_iou": 0.3671875, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 60099096, "step": 350 }, { "epoch": 0.09232590254488064, "grad_norm": 7.233766618859882, "learning_rate": 5e-06, "loss": 0.2191, "num_input_tokens_seen": 60271456, "step": 351 }, { "epoch": 0.09232590254488064, "loss": 0.23899057507514954, "loss_ce": 0.003883154597133398, "loss_iou": 0.51953125, "loss_num": 0.047119140625, "loss_xval": 0.2353515625, "num_input_tokens_seen": 60271456, "step": 351 }, { "epoch": 0.09258893930426777, "grad_norm": 10.116609125862757, "learning_rate": 5e-06, "loss": 0.2124, "num_input_tokens_seen": 60443440, "step": 352 }, { "epoch": 0.09258893930426777, "loss": 0.2204253077507019, "loss_ce": 0.005032242741435766, "loss_iou": 0.53125, "loss_num": 0.04296875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 60443440, "step": 352 }, { "epoch": 0.09285197606365489, "grad_norm": 11.612967488254396, "learning_rate": 5e-06, "loss": 0.2054, "num_input_tokens_seen": 60615884, "step": 353 }, { "epoch": 0.09285197606365489, "loss": 0.20439094305038452, "loss_ce": 0.005050132982432842, "loss_iou": 0.416015625, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 60615884, "step": 353 }, { "epoch": 0.09311501282304202, "grad_norm": 14.924807522940846, "learning_rate": 5e-06, "loss": 0.2902, "num_input_tokens_seen": 60787976, "step": 354 }, { "epoch": 0.09311501282304202, "loss": 0.35237956047058105, "loss_ce": 0.006432283669710159, "loss_iou": 0.63671875, "loss_num": 0.0693359375, "loss_xval": 0.345703125, "num_input_tokens_seen": 60787976, "step": 354 }, { "epoch": 0.09337804958242915, "grad_norm": 14.749952758612022, "learning_rate": 5e-06, "loss": 0.2866, "num_input_tokens_seen": 60960464, "step": 355 }, { "epoch": 0.09337804958242915, "loss": 0.27518266439437866, "loss_ce": 0.008092833682894707, "loss_iou": 0.51953125, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 60960464, "step": 355 }, { "epoch": 0.09364108634181627, "grad_norm": 8.028682980536022, "learning_rate": 5e-06, "loss": 0.2222, "num_input_tokens_seen": 61130588, "step": 356 }, { "epoch": 0.09364108634181627, "loss": 0.2519834041595459, "loss_ce": 0.004424803890287876, "loss_iou": 0.37109375, "loss_num": 0.049560546875, "loss_xval": 0.248046875, "num_input_tokens_seen": 61130588, "step": 356 }, { "epoch": 0.0939041231012034, "grad_norm": 9.199423751114487, "learning_rate": 5e-06, "loss": 0.2881, "num_input_tokens_seen": 61302652, "step": 357 }, { "epoch": 0.0939041231012034, "loss": 0.39324474334716797, "loss_ce": 0.012019152753055096, "loss_iou": 0.5234375, "loss_num": 0.076171875, "loss_xval": 0.380859375, "num_input_tokens_seen": 61302652, "step": 357 }, { "epoch": 0.09416715986059052, "grad_norm": 8.678464430801892, "learning_rate": 5e-06, "loss": 0.2227, "num_input_tokens_seen": 61473012, "step": 358 }, { "epoch": 0.09416715986059052, "loss": 0.20840191841125488, "loss_ce": 0.00448344461619854, "loss_iou": 0.4765625, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 61473012, "step": 358 }, { "epoch": 0.09443019661997765, "grad_norm": 13.785653955051378, "learning_rate": 5e-06, "loss": 0.2401, "num_input_tokens_seen": 61645272, "step": 359 }, { "epoch": 0.09443019661997765, "loss": 0.19086723029613495, "loss_ce": 0.0067241680808365345, "loss_iou": 0.462890625, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 61645272, "step": 359 }, { "epoch": 0.09469323337936476, "grad_norm": 7.119442902133065, "learning_rate": 5e-06, "loss": 0.2102, "num_input_tokens_seen": 61817436, "step": 360 }, { "epoch": 0.09469323337936476, "loss": 0.21873927116394043, "loss_ce": 0.0036513670347630978, "loss_iou": 0.44140625, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 61817436, "step": 360 }, { "epoch": 0.0949562701387519, "grad_norm": 10.914204757351408, "learning_rate": 5e-06, "loss": 0.2247, "num_input_tokens_seen": 61987764, "step": 361 }, { "epoch": 0.0949562701387519, "loss": 0.2072867900133133, "loss_ce": 0.0077018230222165585, "loss_iou": 0.6171875, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 61987764, "step": 361 }, { "epoch": 0.09521930689813901, "grad_norm": 10.090324495449488, "learning_rate": 5e-06, "loss": 0.2923, "num_input_tokens_seen": 62158356, "step": 362 }, { "epoch": 0.09521930689813901, "loss": 0.2174176275730133, "loss_ce": 0.007944983430206776, "loss_iou": 0.345703125, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 62158356, "step": 362 }, { "epoch": 0.09548234365752614, "grad_norm": 8.545687476622918, "learning_rate": 5e-06, "loss": 0.2599, "num_input_tokens_seen": 62330412, "step": 363 }, { "epoch": 0.09548234365752614, "loss": 0.2114790380001068, "loss_ce": 0.004081577528268099, "loss_iou": 0.546875, "loss_num": 0.04150390625, "loss_xval": 0.20703125, "num_input_tokens_seen": 62330412, "step": 363 }, { "epoch": 0.09574538041691326, "grad_norm": 9.203465723081402, "learning_rate": 5e-06, "loss": 0.1892, "num_input_tokens_seen": 62502416, "step": 364 }, { "epoch": 0.09574538041691326, "loss": 0.20543332397937775, "loss_ce": 0.004505585879087448, "loss_iou": 0.62890625, "loss_num": 0.0400390625, "loss_xval": 0.201171875, "num_input_tokens_seen": 62502416, "step": 364 }, { "epoch": 0.09600841717630039, "grad_norm": 11.930210195365788, "learning_rate": 5e-06, "loss": 0.3152, "num_input_tokens_seen": 62674708, "step": 365 }, { "epoch": 0.09600841717630039, "loss": 0.3104754686355591, "loss_ce": 0.006398319266736507, "loss_iou": 0.5859375, "loss_num": 0.060791015625, "loss_xval": 0.3046875, "num_input_tokens_seen": 62674708, "step": 365 }, { "epoch": 0.0962714539356875, "grad_norm": 9.520122083335602, "learning_rate": 5e-06, "loss": 0.2852, "num_input_tokens_seen": 62846712, "step": 366 }, { "epoch": 0.0962714539356875, "loss": 0.30926454067230225, "loss_ce": 0.008239164017140865, "loss_iou": 0.3984375, "loss_num": 0.06005859375, "loss_xval": 0.30078125, "num_input_tokens_seen": 62846712, "step": 366 }, { "epoch": 0.09653449069507464, "grad_norm": 7.113583060734605, "learning_rate": 5e-06, "loss": 0.2553, "num_input_tokens_seen": 63018700, "step": 367 }, { "epoch": 0.09653449069507464, "loss": 0.27072834968566895, "loss_ce": 0.006201992742717266, "loss_iou": 0.470703125, "loss_num": 0.052978515625, "loss_xval": 0.263671875, "num_input_tokens_seen": 63018700, "step": 367 }, { "epoch": 0.09679752745446175, "grad_norm": 7.613528329197277, "learning_rate": 5e-06, "loss": 0.2091, "num_input_tokens_seen": 63188284, "step": 368 }, { "epoch": 0.09679752745446175, "loss": 0.19734525680541992, "loss_ce": 0.0055117676965892315, "loss_iou": 0.53515625, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 63188284, "step": 368 }, { "epoch": 0.09706056421384888, "grad_norm": 7.59501735101136, "learning_rate": 5e-06, "loss": 0.2185, "num_input_tokens_seen": 63360584, "step": 369 }, { "epoch": 0.09706056421384888, "loss": 0.23239563405513763, "loss_ce": 0.003757936879992485, "loss_iou": 0.58984375, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 63360584, "step": 369 }, { "epoch": 0.09732360097323602, "grad_norm": 9.071713140748203, "learning_rate": 5e-06, "loss": 0.2417, "num_input_tokens_seen": 63530348, "step": 370 }, { "epoch": 0.09732360097323602, "loss": 0.3295820653438568, "loss_ce": 0.004997124895453453, "loss_iou": 0.36328125, "loss_num": 0.06494140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 63530348, "step": 370 }, { "epoch": 0.09758663773262313, "grad_norm": 18.945858878396198, "learning_rate": 5e-06, "loss": 0.2916, "num_input_tokens_seen": 63702772, "step": 371 }, { "epoch": 0.09758663773262313, "loss": 0.38020581007003784, "loss_ce": 0.007525136228650808, "loss_iou": 0.54296875, "loss_num": 0.07421875, "loss_xval": 0.373046875, "num_input_tokens_seen": 63702772, "step": 371 }, { "epoch": 0.09784967449201026, "grad_norm": 10.004542997696944, "learning_rate": 5e-06, "loss": 0.2707, "num_input_tokens_seen": 63874988, "step": 372 }, { "epoch": 0.09784967449201026, "loss": 0.21838681399822235, "loss_ce": 0.005313089117407799, "loss_iou": 0.39453125, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 63874988, "step": 372 }, { "epoch": 0.09811271125139738, "grad_norm": 12.569402693685879, "learning_rate": 5e-06, "loss": 0.3095, "num_input_tokens_seen": 64047192, "step": 373 }, { "epoch": 0.09811271125139738, "loss": 0.23138675093650818, "loss_ce": 0.0027490435168147087, "loss_iou": 0.45703125, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 64047192, "step": 373 }, { "epoch": 0.09837574801078451, "grad_norm": 7.761350865641741, "learning_rate": 5e-06, "loss": 0.2808, "num_input_tokens_seen": 64217628, "step": 374 }, { "epoch": 0.09837574801078451, "loss": 0.22036589682102203, "loss_ce": 0.005583181045949459, "loss_iou": 0.40234375, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 64217628, "step": 374 }, { "epoch": 0.09863878477017163, "grad_norm": 15.752581561948926, "learning_rate": 5e-06, "loss": 0.2404, "num_input_tokens_seen": 64389824, "step": 375 }, { "epoch": 0.09863878477017163, "loss": 0.31320562958717346, "loss_ce": 0.0048560285940766335, "loss_iou": 0.40234375, "loss_num": 0.0615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 64389824, "step": 375 }, { "epoch": 0.09890182152955876, "grad_norm": 8.987788273052438, "learning_rate": 5e-06, "loss": 0.2167, "num_input_tokens_seen": 64557964, "step": 376 }, { "epoch": 0.09890182152955876, "loss": 0.15088969469070435, "loss_ce": 0.0028794521931558847, "loss_iou": 0.48828125, "loss_num": 0.029541015625, "loss_xval": 0.1484375, "num_input_tokens_seen": 64557964, "step": 376 }, { "epoch": 0.09916485828894588, "grad_norm": 14.201905900297023, "learning_rate": 5e-06, "loss": 0.282, "num_input_tokens_seen": 64730276, "step": 377 }, { "epoch": 0.09916485828894588, "loss": 0.2574768662452698, "loss_ce": 0.004425112158060074, "loss_iou": 0.46484375, "loss_num": 0.050537109375, "loss_xval": 0.25390625, "num_input_tokens_seen": 64730276, "step": 377 }, { "epoch": 0.099427895048333, "grad_norm": 10.355784479360198, "learning_rate": 5e-06, "loss": 0.2585, "num_input_tokens_seen": 64902196, "step": 378 }, { "epoch": 0.099427895048333, "loss": 0.38254526257514954, "loss_ce": 0.00461557786911726, "loss_iou": 0.345703125, "loss_num": 0.07568359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 64902196, "step": 378 }, { "epoch": 0.09969093180772012, "grad_norm": 8.387859091991706, "learning_rate": 5e-06, "loss": 0.266, "num_input_tokens_seen": 65074196, "step": 379 }, { "epoch": 0.09969093180772012, "loss": 0.3536115884780884, "loss_ce": 0.007664328906685114, "loss_iou": 0.484375, "loss_num": 0.0693359375, "loss_xval": 0.345703125, "num_input_tokens_seen": 65074196, "step": 379 }, { "epoch": 0.09995396856710725, "grad_norm": 6.106776880682037, "learning_rate": 5e-06, "loss": 0.179, "num_input_tokens_seen": 65245768, "step": 380 }, { "epoch": 0.09995396856710725, "loss": 0.16343596577644348, "loss_ce": 0.003645919729024172, "loss_iou": 0.38671875, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 65245768, "step": 380 }, { "epoch": 0.10021700532649437, "grad_norm": 13.780341497897325, "learning_rate": 5e-06, "loss": 0.203, "num_input_tokens_seen": 65418016, "step": 381 }, { "epoch": 0.10021700532649437, "loss": 0.21609731018543243, "loss_ce": 0.004000143148005009, "loss_iou": 0.48046875, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 65418016, "step": 381 }, { "epoch": 0.1004800420858815, "grad_norm": 9.32261949794866, "learning_rate": 5e-06, "loss": 0.2265, "num_input_tokens_seen": 65590312, "step": 382 }, { "epoch": 0.1004800420858815, "loss": 0.22935181856155396, "loss_ce": 0.00425416324287653, "loss_iou": 0.61328125, "loss_num": 0.045166015625, "loss_xval": 0.224609375, "num_input_tokens_seen": 65590312, "step": 382 }, { "epoch": 0.10074307884526863, "grad_norm": 8.048917738163752, "learning_rate": 5e-06, "loss": 0.2741, "num_input_tokens_seen": 65762432, "step": 383 }, { "epoch": 0.10074307884526863, "loss": 0.3397749662399292, "loss_ce": 0.0026167738251388073, "loss_iou": 0.2734375, "loss_num": 0.0673828125, "loss_xval": 0.337890625, "num_input_tokens_seen": 65762432, "step": 383 }, { "epoch": 0.10100611560465575, "grad_norm": 10.637781772264608, "learning_rate": 5e-06, "loss": 0.2218, "num_input_tokens_seen": 65934468, "step": 384 }, { "epoch": 0.10100611560465575, "loss": 0.2347402572631836, "loss_ce": 0.01049709226936102, "loss_iou": 0.41015625, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 65934468, "step": 384 }, { "epoch": 0.10126915236404288, "grad_norm": 8.002274766525902, "learning_rate": 5e-06, "loss": 0.2433, "num_input_tokens_seen": 66106856, "step": 385 }, { "epoch": 0.10126915236404288, "loss": 0.2602759301662445, "loss_ce": 0.005881410092115402, "loss_iou": 0.51171875, "loss_num": 0.05078125, "loss_xval": 0.25390625, "num_input_tokens_seen": 66106856, "step": 385 }, { "epoch": 0.10153218912343, "grad_norm": 5.854621538534226, "learning_rate": 5e-06, "loss": 0.2458, "num_input_tokens_seen": 66278808, "step": 386 }, { "epoch": 0.10153218912343, "loss": 0.24760979413986206, "loss_ce": 0.008718185126781464, "loss_iou": 0.435546875, "loss_num": 0.0478515625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 66278808, "step": 386 }, { "epoch": 0.10179522588281713, "grad_norm": 8.25201471581488, "learning_rate": 5e-06, "loss": 0.2036, "num_input_tokens_seen": 66449192, "step": 387 }, { "epoch": 0.10179522588281713, "loss": 0.18327301740646362, "loss_ce": 0.007003485690802336, "loss_iou": 0.6171875, "loss_num": 0.035400390625, "loss_xval": 0.17578125, "num_input_tokens_seen": 66449192, "step": 387 }, { "epoch": 0.10205826264220424, "grad_norm": 7.445331676424441, "learning_rate": 5e-06, "loss": 0.2392, "num_input_tokens_seen": 66619484, "step": 388 }, { "epoch": 0.10205826264220424, "loss": 0.19971191883087158, "loss_ce": 0.0065356409177184105, "loss_iou": 0.55859375, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 66619484, "step": 388 }, { "epoch": 0.10232129940159138, "grad_norm": 6.590904938513716, "learning_rate": 5e-06, "loss": 0.1929, "num_input_tokens_seen": 66791680, "step": 389 }, { "epoch": 0.10232129940159138, "loss": 0.186821848154068, "loss_ce": 0.0036553400568664074, "loss_iou": 0.4453125, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 66791680, "step": 389 }, { "epoch": 0.10258433616097849, "grad_norm": 7.115969301930752, "learning_rate": 5e-06, "loss": 0.2258, "num_input_tokens_seen": 66963964, "step": 390 }, { "epoch": 0.10258433616097849, "loss": 0.21056599915027618, "loss_ce": 0.005732023622840643, "loss_iou": 0.5234375, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 66963964, "step": 390 }, { "epoch": 0.10284737292036562, "grad_norm": 10.39999100726405, "learning_rate": 5e-06, "loss": 0.2688, "num_input_tokens_seen": 67136264, "step": 391 }, { "epoch": 0.10284737292036562, "loss": 0.32519546151161194, "loss_ce": 0.0023194823879748583, "loss_iou": 0.330078125, "loss_num": 0.064453125, "loss_xval": 0.322265625, "num_input_tokens_seen": 67136264, "step": 391 }, { "epoch": 0.10311040967975274, "grad_norm": 10.853923665788189, "learning_rate": 5e-06, "loss": 0.2335, "num_input_tokens_seen": 67308332, "step": 392 }, { "epoch": 0.10311040967975274, "loss": 0.22598545253276825, "loss_ce": 0.004671982489526272, "loss_iou": 0.578125, "loss_num": 0.044189453125, "loss_xval": 0.2216796875, "num_input_tokens_seen": 67308332, "step": 392 }, { "epoch": 0.10337344643913987, "grad_norm": 12.644758786705145, "learning_rate": 5e-06, "loss": 0.2568, "num_input_tokens_seen": 67480732, "step": 393 }, { "epoch": 0.10337344643913987, "loss": 0.22954288125038147, "loss_ce": 0.0029803775250911713, "loss_iou": 0.322265625, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 67480732, "step": 393 }, { "epoch": 0.10363648319852699, "grad_norm": 9.802676625201595, "learning_rate": 5e-06, "loss": 0.2089, "num_input_tokens_seen": 67652736, "step": 394 }, { "epoch": 0.10363648319852699, "loss": 0.2672483026981354, "loss_ce": 0.006017843261361122, "loss_iou": 0.240234375, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 67652736, "step": 394 }, { "epoch": 0.10389951995791412, "grad_norm": 7.134550479825786, "learning_rate": 5e-06, "loss": 0.2498, "num_input_tokens_seen": 67825356, "step": 395 }, { "epoch": 0.10389951995791412, "loss": 0.2973036766052246, "loss_ce": 0.004334905184805393, "loss_iou": 0.53515625, "loss_num": 0.05859375, "loss_xval": 0.29296875, "num_input_tokens_seen": 67825356, "step": 395 }, { "epoch": 0.10416255671730125, "grad_norm": 9.967886960744174, "learning_rate": 5e-06, "loss": 0.1892, "num_input_tokens_seen": 67997632, "step": 396 }, { "epoch": 0.10416255671730125, "loss": 0.16415753960609436, "loss_ce": 0.0022923052310943604, "loss_iou": 0.62109375, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 67997632, "step": 396 }, { "epoch": 0.10442559347668837, "grad_norm": 10.176506863521315, "learning_rate": 5e-06, "loss": 0.2534, "num_input_tokens_seen": 68168484, "step": 397 }, { "epoch": 0.10442559347668837, "loss": 0.2146688550710678, "loss_ce": 0.002388589084148407, "loss_iou": 0.455078125, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 68168484, "step": 397 }, { "epoch": 0.1046886302360755, "grad_norm": 12.732415116106893, "learning_rate": 5e-06, "loss": 0.2673, "num_input_tokens_seen": 68337496, "step": 398 }, { "epoch": 0.1046886302360755, "loss": 0.28035295009613037, "loss_ce": 0.003985744901001453, "loss_iou": 0.5390625, "loss_num": 0.05517578125, "loss_xval": 0.27734375, "num_input_tokens_seen": 68337496, "step": 398 }, { "epoch": 0.10495166699546261, "grad_norm": 8.052010611714167, "learning_rate": 5e-06, "loss": 0.278, "num_input_tokens_seen": 68509860, "step": 399 }, { "epoch": 0.10495166699546261, "loss": 0.36081743240356445, "loss_ce": 0.003395556937903166, "loss_iou": 0.486328125, "loss_num": 0.0712890625, "loss_xval": 0.357421875, "num_input_tokens_seen": 68509860, "step": 399 }, { "epoch": 0.10521470375484974, "grad_norm": 7.830511214158693, "learning_rate": 5e-06, "loss": 0.2325, "num_input_tokens_seen": 68681940, "step": 400 }, { "epoch": 0.10521470375484974, "loss": 0.2878537178039551, "loss_ce": 0.005016806535422802, "loss_iou": 0.40625, "loss_num": 0.056640625, "loss_xval": 0.283203125, "num_input_tokens_seen": 68681940, "step": 400 }, { "epoch": 0.10547774051423686, "grad_norm": 8.196278314466337, "learning_rate": 5e-06, "loss": 0.2176, "num_input_tokens_seen": 68854332, "step": 401 }, { "epoch": 0.10547774051423686, "loss": 0.27157318592071533, "loss_ce": 0.004483355674892664, "loss_iou": 0.455078125, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 68854332, "step": 401 }, { "epoch": 0.10574077727362399, "grad_norm": 8.502085361700777, "learning_rate": 5e-06, "loss": 0.2004, "num_input_tokens_seen": 69024884, "step": 402 }, { "epoch": 0.10574077727362399, "loss": 0.20401804149150848, "loss_ce": 0.002235804684460163, "loss_iou": 0.34765625, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 69024884, "step": 402 }, { "epoch": 0.10600381403301111, "grad_norm": 12.566157085058673, "learning_rate": 5e-06, "loss": 0.2292, "num_input_tokens_seen": 69197236, "step": 403 }, { "epoch": 0.10600381403301111, "loss": 0.24189046025276184, "loss_ce": 0.0023274626582860947, "loss_iou": 0.4609375, "loss_num": 0.0478515625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 69197236, "step": 403 }, { "epoch": 0.10626685079239824, "grad_norm": 8.163593280105717, "learning_rate": 5e-06, "loss": 0.1782, "num_input_tokens_seen": 69369436, "step": 404 }, { "epoch": 0.10626685079239824, "loss": 0.17368575930595398, "loss_ce": 0.005472864024341106, "loss_iou": 0.35546875, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 69369436, "step": 404 }, { "epoch": 0.10652988755178536, "grad_norm": 7.3996201552939596, "learning_rate": 5e-06, "loss": 0.1615, "num_input_tokens_seen": 69540036, "step": 405 }, { "epoch": 0.10652988755178536, "loss": 0.1399962604045868, "loss_ce": 0.004925462882965803, "loss_iou": 0.5234375, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 69540036, "step": 405 }, { "epoch": 0.10679292431117249, "grad_norm": 10.197562859133864, "learning_rate": 5e-06, "loss": 0.2373, "num_input_tokens_seen": 69712000, "step": 406 }, { "epoch": 0.10679292431117249, "loss": 0.19172173738479614, "loss_ce": 0.0054424479603767395, "loss_iou": 0.71875, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 69712000, "step": 406 }, { "epoch": 0.1070559610705596, "grad_norm": 13.16161294170375, "learning_rate": 5e-06, "loss": 0.2782, "num_input_tokens_seen": 69883872, "step": 407 }, { "epoch": 0.1070559610705596, "loss": 0.28615695238113403, "loss_ce": 0.002465539611876011, "loss_iou": 0.357421875, "loss_num": 0.056640625, "loss_xval": 0.283203125, "num_input_tokens_seen": 69883872, "step": 407 }, { "epoch": 0.10731899782994674, "grad_norm": 8.88729554885214, "learning_rate": 5e-06, "loss": 0.2054, "num_input_tokens_seen": 70056060, "step": 408 }, { "epoch": 0.10731899782994674, "loss": 0.19157525897026062, "loss_ce": 0.0030986934434622526, "loss_iou": 0.5078125, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 70056060, "step": 408 }, { "epoch": 0.10758203458933385, "grad_norm": 10.615365655634278, "learning_rate": 5e-06, "loss": 0.2465, "num_input_tokens_seen": 70228104, "step": 409 }, { "epoch": 0.10758203458933385, "loss": 0.315712034702301, "loss_ce": 0.003150993725284934, "loss_iou": 0.40234375, "loss_num": 0.0625, "loss_xval": 0.3125, "num_input_tokens_seen": 70228104, "step": 409 }, { "epoch": 0.10784507134872098, "grad_norm": 8.946094916164988, "learning_rate": 5e-06, "loss": 0.2711, "num_input_tokens_seen": 70398676, "step": 410 }, { "epoch": 0.10784507134872098, "loss": 0.37502604722976685, "loss_ce": 0.010768221691250801, "loss_iou": 0.412109375, "loss_num": 0.07275390625, "loss_xval": 0.36328125, "num_input_tokens_seen": 70398676, "step": 410 }, { "epoch": 0.10810810810810811, "grad_norm": 10.043204254830277, "learning_rate": 5e-06, "loss": 0.2838, "num_input_tokens_seen": 70570976, "step": 411 }, { "epoch": 0.10810810810810811, "loss": 0.23639510571956635, "loss_ce": 0.004095299169421196, "loss_iou": 0.443359375, "loss_num": 0.04638671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 70570976, "step": 411 }, { "epoch": 0.10837114486749523, "grad_norm": 6.784469948331844, "learning_rate": 5e-06, "loss": 0.2968, "num_input_tokens_seen": 70741228, "step": 412 }, { "epoch": 0.10837114486749523, "loss": 0.3139989376068115, "loss_ce": 0.001865162281319499, "loss_iou": 0.3671875, "loss_num": 0.0625, "loss_xval": 0.3125, "num_input_tokens_seen": 70741228, "step": 412 }, { "epoch": 0.10863418162688236, "grad_norm": 6.343923096243914, "learning_rate": 5e-06, "loss": 0.2107, "num_input_tokens_seen": 70913516, "step": 413 }, { "epoch": 0.10863418162688236, "loss": 0.19345833361148834, "loss_ce": 0.0018079333240166306, "loss_iou": 0.5, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 70913516, "step": 413 }, { "epoch": 0.10889721838626948, "grad_norm": 7.92695485009758, "learning_rate": 5e-06, "loss": 0.1941, "num_input_tokens_seen": 71085736, "step": 414 }, { "epoch": 0.10889721838626948, "loss": 0.18501858413219452, "loss_ce": 0.001791047165170312, "loss_iou": 0.65234375, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 71085736, "step": 414 }, { "epoch": 0.10916025514565661, "grad_norm": 8.754602822026959, "learning_rate": 5e-06, "loss": 0.2194, "num_input_tokens_seen": 71257772, "step": 415 }, { "epoch": 0.10916025514565661, "loss": 0.2221953421831131, "loss_ce": 0.0017973824869841337, "loss_iou": 0.65234375, "loss_num": 0.0439453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 71257772, "step": 415 }, { "epoch": 0.10942329190504373, "grad_norm": 7.459161826900908, "learning_rate": 5e-06, "loss": 0.2477, "num_input_tokens_seen": 71430004, "step": 416 }, { "epoch": 0.10942329190504373, "loss": 0.2557048797607422, "loss_ce": 0.0026531266048550606, "loss_iou": 0.578125, "loss_num": 0.050537109375, "loss_xval": 0.25390625, "num_input_tokens_seen": 71430004, "step": 416 }, { "epoch": 0.10968632866443086, "grad_norm": 6.337226894093766, "learning_rate": 5e-06, "loss": 0.236, "num_input_tokens_seen": 71602160, "step": 417 }, { "epoch": 0.10968632866443086, "loss": 0.1684013307094574, "loss_ce": 0.0018363934941589832, "loss_iou": 0.59375, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 71602160, "step": 417 }, { "epoch": 0.10994936542381797, "grad_norm": 10.092397777907028, "learning_rate": 5e-06, "loss": 0.1998, "num_input_tokens_seen": 71774264, "step": 418 }, { "epoch": 0.10994936542381797, "loss": 0.17219412326812744, "loss_ce": 0.0024553609546273947, "loss_iou": 0.53125, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 71774264, "step": 418 }, { "epoch": 0.1102124021832051, "grad_norm": 10.933777042764003, "learning_rate": 5e-06, "loss": 0.2549, "num_input_tokens_seen": 71943760, "step": 419 }, { "epoch": 0.1102124021832051, "loss": 0.2607925236225128, "loss_ce": 0.0052993567660450935, "loss_iou": 0.43359375, "loss_num": 0.051025390625, "loss_xval": 0.255859375, "num_input_tokens_seen": 71943760, "step": 419 }, { "epoch": 0.11047543894259222, "grad_norm": 18.099442946915016, "learning_rate": 5e-06, "loss": 0.2408, "num_input_tokens_seen": 72114360, "step": 420 }, { "epoch": 0.11047543894259222, "loss": 0.2702986001968384, "loss_ce": 0.0022321869619190693, "loss_iou": 0.3515625, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 72114360, "step": 420 }, { "epoch": 0.11073847570197935, "grad_norm": 6.645880081378423, "learning_rate": 5e-06, "loss": 0.2374, "num_input_tokens_seen": 72285984, "step": 421 }, { "epoch": 0.11073847570197935, "loss": 0.17263615131378174, "loss_ce": 0.0023480583913624287, "loss_iou": 0.38671875, "loss_num": 0.0341796875, "loss_xval": 0.169921875, "num_input_tokens_seen": 72285984, "step": 421 }, { "epoch": 0.11100151246136647, "grad_norm": 10.576055281968134, "learning_rate": 5e-06, "loss": 0.1819, "num_input_tokens_seen": 72458472, "step": 422 }, { "epoch": 0.11100151246136647, "loss": 0.2114211916923523, "loss_ce": 0.005244437139481306, "loss_iou": 0.5546875, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 72458472, "step": 422 }, { "epoch": 0.1112645492207536, "grad_norm": 12.150497670240854, "learning_rate": 5e-06, "loss": 0.2879, "num_input_tokens_seen": 72630848, "step": 423 }, { "epoch": 0.1112645492207536, "loss": 0.195449560880661, "loss_ce": 0.0027005516458302736, "loss_iou": 0.27734375, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 72630848, "step": 423 }, { "epoch": 0.11152758598014073, "grad_norm": 9.206681239637351, "learning_rate": 5e-06, "loss": 0.1882, "num_input_tokens_seen": 72802948, "step": 424 }, { "epoch": 0.11152758598014073, "loss": 0.20556196570396423, "loss_ce": 0.003230433911085129, "loss_iou": 0.4453125, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 72802948, "step": 424 }, { "epoch": 0.11179062273952785, "grad_norm": 7.914405274492032, "learning_rate": 5e-06, "loss": 0.2408, "num_input_tokens_seen": 72973304, "step": 425 }, { "epoch": 0.11179062273952785, "loss": 0.24065472185611725, "loss_ce": 0.002800729824230075, "loss_iou": 0.46875, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 72973304, "step": 425 }, { "epoch": 0.11205365949891498, "grad_norm": 8.079757468011927, "learning_rate": 5e-06, "loss": 0.2298, "num_input_tokens_seen": 73145520, "step": 426 }, { "epoch": 0.11205365949891498, "loss": 0.17487749457359314, "loss_ce": 0.007641167379915714, "loss_iou": 0.515625, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 73145520, "step": 426 }, { "epoch": 0.1123166962583021, "grad_norm": 26.685292496143536, "learning_rate": 5e-06, "loss": 0.2674, "num_input_tokens_seen": 73317728, "step": 427 }, { "epoch": 0.1123166962583021, "loss": 0.24447987973690033, "loss_ce": 0.003635148983448744, "loss_iou": 0.7109375, "loss_num": 0.04833984375, "loss_xval": 0.2412109375, "num_input_tokens_seen": 73317728, "step": 427 }, { "epoch": 0.11257973301768923, "grad_norm": 10.587399579059058, "learning_rate": 5e-06, "loss": 0.2517, "num_input_tokens_seen": 73490108, "step": 428 }, { "epoch": 0.11257973301768923, "loss": 0.24800382554531097, "loss_ce": 0.002154202200472355, "loss_iou": 0.37890625, "loss_num": 0.04931640625, "loss_xval": 0.24609375, "num_input_tokens_seen": 73490108, "step": 428 }, { "epoch": 0.11284276977707634, "grad_norm": 6.6654701515468515, "learning_rate": 5e-06, "loss": 0.2127, "num_input_tokens_seen": 73662104, "step": 429 }, { "epoch": 0.11284276977707634, "loss": 0.35326629877090454, "loss_ce": 0.004389348905533552, "loss_iou": 0.474609375, "loss_num": 0.06982421875, "loss_xval": 0.349609375, "num_input_tokens_seen": 73662104, "step": 429 }, { "epoch": 0.11310580653646347, "grad_norm": 9.359106186429873, "learning_rate": 5e-06, "loss": 0.2209, "num_input_tokens_seen": 73833940, "step": 430 }, { "epoch": 0.11310580653646347, "loss": 0.19281955063343048, "loss_ce": 0.0022678023669868708, "loss_iou": 0.52734375, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 73833940, "step": 430 }, { "epoch": 0.11336884329585059, "grad_norm": 10.912300657166487, "learning_rate": 5e-06, "loss": 0.2706, "num_input_tokens_seen": 74006248, "step": 431 }, { "epoch": 0.11336884329585059, "loss": 0.270729124546051, "loss_ce": 0.0032730703242123127, "loss_iou": 0.56640625, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 74006248, "step": 431 }, { "epoch": 0.11363188005523772, "grad_norm": 7.80923377535479, "learning_rate": 5e-06, "loss": 0.2294, "num_input_tokens_seen": 74178292, "step": 432 }, { "epoch": 0.11363188005523772, "loss": 0.21906697750091553, "loss_ce": 0.002758371876552701, "loss_iou": 0.45703125, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 74178292, "step": 432 }, { "epoch": 0.11389491681462484, "grad_norm": 7.423483614252486, "learning_rate": 5e-06, "loss": 0.2228, "num_input_tokens_seen": 74350588, "step": 433 }, { "epoch": 0.11389491681462484, "loss": 0.17986077070236206, "loss_ce": 0.0017601896543055773, "loss_iou": 0.48828125, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 74350588, "step": 433 }, { "epoch": 0.11415795357401197, "grad_norm": 8.108512259957333, "learning_rate": 5e-06, "loss": 0.226, "num_input_tokens_seen": 74520920, "step": 434 }, { "epoch": 0.11415795357401197, "loss": 0.21928107738494873, "loss_ce": 0.00455939956009388, "loss_iou": 0.640625, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 74520920, "step": 434 }, { "epoch": 0.11442099033339909, "grad_norm": 19.006805430751477, "learning_rate": 5e-06, "loss": 0.2661, "num_input_tokens_seen": 74693212, "step": 435 }, { "epoch": 0.11442099033339909, "loss": 0.3151628077030182, "loss_ce": 0.0064469738863408566, "loss_iou": 0.625, "loss_num": 0.061767578125, "loss_xval": 0.30859375, "num_input_tokens_seen": 74693212, "step": 435 }, { "epoch": 0.11468402709278622, "grad_norm": 8.863661674496596, "learning_rate": 5e-06, "loss": 0.2843, "num_input_tokens_seen": 74865396, "step": 436 }, { "epoch": 0.11468402709278622, "loss": 0.280520498752594, "loss_ce": 0.006594708655029535, "loss_iou": 0.44921875, "loss_num": 0.0546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 74865396, "step": 436 }, { "epoch": 0.11494706385217335, "grad_norm": 7.497393138459489, "learning_rate": 5e-06, "loss": 0.1834, "num_input_tokens_seen": 75037856, "step": 437 }, { "epoch": 0.11494706385217335, "loss": 0.18996562063694, "loss_ce": 0.0047849551774561405, "loss_iou": 0.470703125, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 75037856, "step": 437 }, { "epoch": 0.11521010061156046, "grad_norm": 12.688606481249035, "learning_rate": 5e-06, "loss": 0.2195, "num_input_tokens_seen": 75210192, "step": 438 }, { "epoch": 0.11521010061156046, "loss": 0.20892465114593506, "loss_ce": 0.006348971277475357, "loss_iou": 0.470703125, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 75210192, "step": 438 }, { "epoch": 0.1154731373709476, "grad_norm": 8.208628036837384, "learning_rate": 5e-06, "loss": 0.2418, "num_input_tokens_seen": 75382312, "step": 439 }, { "epoch": 0.1154731373709476, "loss": 0.20543652772903442, "loss_ce": 0.001701178727671504, "loss_iou": 0.50390625, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 75382312, "step": 439 }, { "epoch": 0.11573617413033471, "grad_norm": 9.785336279235814, "learning_rate": 5e-06, "loss": 0.2151, "num_input_tokens_seen": 75554688, "step": 440 }, { "epoch": 0.11573617413033471, "loss": 0.26715782284736633, "loss_ce": 0.0069039189256727695, "loss_iou": 0.65234375, "loss_num": 0.052001953125, "loss_xval": 0.259765625, "num_input_tokens_seen": 75554688, "step": 440 }, { "epoch": 0.11599921088972184, "grad_norm": 23.031133350657914, "learning_rate": 5e-06, "loss": 0.2357, "num_input_tokens_seen": 75726964, "step": 441 }, { "epoch": 0.11599921088972184, "loss": 0.3105895519256592, "loss_ce": 0.004559269640594721, "loss_iou": 0.4609375, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 75726964, "step": 441 }, { "epoch": 0.11626224764910896, "grad_norm": 8.337171777387358, "learning_rate": 5e-06, "loss": 0.239, "num_input_tokens_seen": 75899032, "step": 442 }, { "epoch": 0.11626224764910896, "loss": 0.20333924889564514, "loss_ce": 0.002655645599588752, "loss_iou": 0.55078125, "loss_num": 0.0400390625, "loss_xval": 0.201171875, "num_input_tokens_seen": 75899032, "step": 442 }, { "epoch": 0.11652528440849609, "grad_norm": 4.896294530096544, "learning_rate": 5e-06, "loss": 0.1725, "num_input_tokens_seen": 76071304, "step": 443 }, { "epoch": 0.11652528440849609, "loss": 0.20664632320404053, "loss_ce": 0.0032161371782422066, "loss_iou": 0.439453125, "loss_num": 0.040771484375, "loss_xval": 0.203125, "num_input_tokens_seen": 76071304, "step": 443 }, { "epoch": 0.11678832116788321, "grad_norm": 7.74925868300208, "learning_rate": 5e-06, "loss": 0.2369, "num_input_tokens_seen": 76241732, "step": 444 }, { "epoch": 0.11678832116788321, "loss": 0.2300896942615509, "loss_ce": 0.0016351052327081561, "loss_iou": 0.51953125, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 76241732, "step": 444 }, { "epoch": 0.11705135792727034, "grad_norm": 8.593064888198587, "learning_rate": 5e-06, "loss": 0.2023, "num_input_tokens_seen": 76414004, "step": 445 }, { "epoch": 0.11705135792727034, "loss": 0.14537394046783447, "loss_ce": 0.0013919961638748646, "loss_iou": 0.404296875, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 76414004, "step": 445 }, { "epoch": 0.11731439468665746, "grad_norm": 10.869613464978638, "learning_rate": 5e-06, "loss": 0.2657, "num_input_tokens_seen": 76586248, "step": 446 }, { "epoch": 0.11731439468665746, "loss": 0.2349330186843872, "loss_ce": 0.004464263096451759, "loss_iou": 0.4140625, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 76586248, "step": 446 }, { "epoch": 0.11757743144604459, "grad_norm": 12.757935439615991, "learning_rate": 5e-06, "loss": 0.2717, "num_input_tokens_seen": 76758328, "step": 447 }, { "epoch": 0.11757743144604459, "loss": 0.331451952457428, "loss_ce": 0.0016179666854441166, "loss_iou": 0.359375, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 76758328, "step": 447 }, { "epoch": 0.1178404682054317, "grad_norm": 11.375965574655721, "learning_rate": 5e-06, "loss": 0.2032, "num_input_tokens_seen": 76930392, "step": 448 }, { "epoch": 0.1178404682054317, "loss": 0.21405665576457977, "loss_ce": 0.004584001377224922, "loss_iou": 0.42578125, "loss_num": 0.0419921875, "loss_xval": 0.208984375, "num_input_tokens_seen": 76930392, "step": 448 }, { "epoch": 0.11810350496481883, "grad_norm": 7.336838831766789, "learning_rate": 5e-06, "loss": 0.206, "num_input_tokens_seen": 77102556, "step": 449 }, { "epoch": 0.11810350496481883, "loss": 0.2166377753019333, "loss_ce": 0.005700268317013979, "loss_iou": 0.51171875, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 77102556, "step": 449 }, { "epoch": 0.11836654172420595, "grad_norm": 11.990630578782412, "learning_rate": 5e-06, "loss": 0.2454, "num_input_tokens_seen": 77274600, "step": 450 }, { "epoch": 0.11836654172420595, "loss": 0.24853403866291046, "loss_ce": 0.00378305627964437, "loss_iou": 0.58984375, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 77274600, "step": 450 }, { "epoch": 0.11862957848359308, "grad_norm": 9.139541774912184, "learning_rate": 5e-06, "loss": 0.2682, "num_input_tokens_seen": 77446596, "step": 451 }, { "epoch": 0.11862957848359308, "loss": 0.2904722988605499, "loss_ce": 0.002020149724557996, "loss_iou": 0.259765625, "loss_num": 0.0576171875, "loss_xval": 0.2890625, "num_input_tokens_seen": 77446596, "step": 451 }, { "epoch": 0.11889261524298021, "grad_norm": 7.052664643006658, "learning_rate": 5e-06, "loss": 0.199, "num_input_tokens_seen": 77618608, "step": 452 }, { "epoch": 0.11889261524298021, "loss": 0.23526260256767273, "loss_ce": 0.004305572714656591, "loss_iou": 0.5078125, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 77618608, "step": 452 }, { "epoch": 0.11915565200236733, "grad_norm": 8.748276102929202, "learning_rate": 5e-06, "loss": 0.2101, "num_input_tokens_seen": 77790812, "step": 453 }, { "epoch": 0.11915565200236733, "loss": 0.2472519874572754, "loss_ce": 0.0025620569940656424, "loss_iou": 0.369140625, "loss_num": 0.048828125, "loss_xval": 0.2451171875, "num_input_tokens_seen": 77790812, "step": 453 }, { "epoch": 0.11941868876175446, "grad_norm": 5.877127785016851, "learning_rate": 5e-06, "loss": 0.179, "num_input_tokens_seen": 77962644, "step": 454 }, { "epoch": 0.11941868876175446, "loss": 0.2044929563999176, "loss_ce": 0.005640420597046614, "loss_iou": 0.5625, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 77962644, "step": 454 }, { "epoch": 0.11968172552114158, "grad_norm": 12.849038288174842, "learning_rate": 5e-06, "loss": 0.1941, "num_input_tokens_seen": 78134616, "step": 455 }, { "epoch": 0.11968172552114158, "loss": 0.20492224395275116, "loss_ce": 0.002163449302315712, "loss_iou": 0.640625, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 78134616, "step": 455 }, { "epoch": 0.11994476228052871, "grad_norm": 9.804877236674342, "learning_rate": 5e-06, "loss": 0.2629, "num_input_tokens_seen": 78303656, "step": 456 }, { "epoch": 0.11994476228052871, "loss": 0.23393824696540833, "loss_ce": 0.0058498685248196125, "loss_iou": NaN, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 78303656, "step": 456 }, { "epoch": 0.12020779903991582, "grad_norm": 23.369495819583523, "learning_rate": 5e-06, "loss": 0.2449, "num_input_tokens_seen": 78475936, "step": 457 }, { "epoch": 0.12020779903991582, "loss": 0.2364155352115631, "loss_ce": 0.009792003780603409, "loss_iou": 0.4921875, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 78475936, "step": 457 }, { "epoch": 0.12047083579930296, "grad_norm": 9.860704537193882, "learning_rate": 5e-06, "loss": 0.1929, "num_input_tokens_seen": 78648124, "step": 458 }, { "epoch": 0.12047083579930296, "loss": 0.1498676836490631, "loss_ce": 0.002711937762796879, "loss_iou": 0.66015625, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 78648124, "step": 458 }, { "epoch": 0.12073387255869007, "grad_norm": 8.562392622535716, "learning_rate": 5e-06, "loss": 0.234, "num_input_tokens_seen": 78820104, "step": 459 }, { "epoch": 0.12073387255869007, "loss": 0.23113086819648743, "loss_ce": 0.0024931649677455425, "loss_iou": 0.484375, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 78820104, "step": 459 }, { "epoch": 0.1209969093180772, "grad_norm": 6.866997211199077, "learning_rate": 5e-06, "loss": 0.2359, "num_input_tokens_seen": 78992332, "step": 460 }, { "epoch": 0.1209969093180772, "loss": 0.15971623361110687, "loss_ce": 0.0017572464421391487, "loss_iou": 0.369140625, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 78992332, "step": 460 }, { "epoch": 0.12125994607746432, "grad_norm": 6.404476112237903, "learning_rate": 5e-06, "loss": 0.2015, "num_input_tokens_seen": 79164376, "step": 461 }, { "epoch": 0.12125994607746432, "loss": 0.2323172241449356, "loss_ce": 0.005937827751040459, "loss_iou": 0.59765625, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 79164376, "step": 461 }, { "epoch": 0.12152298283685145, "grad_norm": 9.290291620775372, "learning_rate": 5e-06, "loss": 0.1959, "num_input_tokens_seen": 79336652, "step": 462 }, { "epoch": 0.12152298283685145, "loss": 0.18938115239143372, "loss_ce": 0.002125292085111141, "loss_iou": 0.5390625, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 79336652, "step": 462 }, { "epoch": 0.12178601959623857, "grad_norm": 11.240674872659573, "learning_rate": 5e-06, "loss": 0.1971, "num_input_tokens_seen": 79508724, "step": 463 }, { "epoch": 0.12178601959623857, "loss": 0.2277367115020752, "loss_ce": 0.001418360392563045, "loss_iou": 0.306640625, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 79508724, "step": 463 }, { "epoch": 0.1220490563556257, "grad_norm": 13.32960488624403, "learning_rate": 5e-06, "loss": 0.1817, "num_input_tokens_seen": 79678784, "step": 464 }, { "epoch": 0.1220490563556257, "loss": 0.21636496484279633, "loss_ce": 0.00182638771366328, "loss_iou": 0.376953125, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 79678784, "step": 464 }, { "epoch": 0.12231209311501283, "grad_norm": 12.994343511615464, "learning_rate": 5e-06, "loss": 0.2429, "num_input_tokens_seen": 79850852, "step": 465 }, { "epoch": 0.12231209311501283, "loss": 0.24236971139907837, "loss_ce": 0.0015249918214976788, "loss_iou": 0.419921875, "loss_num": 0.048095703125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 79850852, "step": 465 }, { "epoch": 0.12257512987439995, "grad_norm": 10.746140563102669, "learning_rate": 5e-06, "loss": 0.2493, "num_input_tokens_seen": 80021192, "step": 466 }, { "epoch": 0.12257512987439995, "loss": 0.1711360514163971, "loss_ce": 0.001702459529042244, "loss_iou": 0.59765625, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 80021192, "step": 466 }, { "epoch": 0.12283816663378708, "grad_norm": 20.444744144323252, "learning_rate": 5e-06, "loss": 0.2662, "num_input_tokens_seen": 80190112, "step": 467 }, { "epoch": 0.12283816663378708, "loss": 0.21875979006290436, "loss_ce": 0.0026953346095979214, "loss_iou": 0.4921875, "loss_num": 0.043212890625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 80190112, "step": 467 }, { "epoch": 0.1231012033931742, "grad_norm": 12.488827753013481, "learning_rate": 5e-06, "loss": 0.2328, "num_input_tokens_seen": 80362892, "step": 468 }, { "epoch": 0.1231012033931742, "loss": 0.17285287380218506, "loss_ce": 0.002381683327257633, "loss_iou": 0.5234375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 80362892, "step": 468 }, { "epoch": 0.12336424015256132, "grad_norm": 9.176610530631967, "learning_rate": 5e-06, "loss": 0.2275, "num_input_tokens_seen": 80534972, "step": 469 }, { "epoch": 0.12336424015256132, "loss": 0.27365219593048096, "loss_ce": 0.0032664609607309103, "loss_iou": 0.421875, "loss_num": 0.053955078125, "loss_xval": 0.26953125, "num_input_tokens_seen": 80534972, "step": 469 }, { "epoch": 0.12362727691194844, "grad_norm": 33.167673801928274, "learning_rate": 5e-06, "loss": 0.2499, "num_input_tokens_seen": 80705332, "step": 470 }, { "epoch": 0.12362727691194844, "loss": 0.23707842826843262, "loss_ce": 0.0022151488810777664, "loss_iou": 0.337890625, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 80705332, "step": 470 }, { "epoch": 0.12389031367133557, "grad_norm": 7.668114037963123, "learning_rate": 5e-06, "loss": 0.2394, "num_input_tokens_seen": 80874564, "step": 471 }, { "epoch": 0.12389031367133557, "loss": 0.23063993453979492, "loss_ce": 0.006457816809415817, "loss_iou": 0.6171875, "loss_num": 0.044677734375, "loss_xval": 0.224609375, "num_input_tokens_seen": 80874564, "step": 471 }, { "epoch": 0.12415335043072269, "grad_norm": 10.510373606639076, "learning_rate": 5e-06, "loss": 0.2114, "num_input_tokens_seen": 81046768, "step": 472 }, { "epoch": 0.12415335043072269, "loss": 0.21059830486774445, "loss_ce": 0.002834630198776722, "loss_iou": 0.52734375, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 81046768, "step": 472 }, { "epoch": 0.12441638719010982, "grad_norm": 11.032749822897834, "learning_rate": 5e-06, "loss": 0.2413, "num_input_tokens_seen": 81218788, "step": 473 }, { "epoch": 0.12441638719010982, "loss": 0.2567683458328247, "loss_ce": 0.004815223626792431, "loss_iou": 0.66015625, "loss_num": 0.050537109375, "loss_xval": 0.251953125, "num_input_tokens_seen": 81218788, "step": 473 }, { "epoch": 0.12467942394949694, "grad_norm": 8.640049648990784, "learning_rate": 5e-06, "loss": 0.2498, "num_input_tokens_seen": 81390764, "step": 474 }, { "epoch": 0.12467942394949694, "loss": 0.3335922062397003, "loss_ce": 0.0035140730906277895, "loss_iou": 0.40625, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 81390764, "step": 474 }, { "epoch": 0.12494246070888407, "grad_norm": 6.133084134287981, "learning_rate": 5e-06, "loss": 0.1862, "num_input_tokens_seen": 81561192, "step": 475 }, { "epoch": 0.12494246070888407, "loss": 0.19583408534526825, "loss_ce": 0.002474710112437606, "loss_iou": 0.28125, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 81561192, "step": 475 }, { "epoch": 0.1252054974682712, "grad_norm": 15.123599266995042, "learning_rate": 5e-06, "loss": 0.212, "num_input_tokens_seen": 81731608, "step": 476 }, { "epoch": 0.1252054974682712, "loss": 0.2318619191646576, "loss_ce": 0.0012100562453269958, "loss_iou": 0.41796875, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 81731608, "step": 476 }, { "epoch": 0.1254685342276583, "grad_norm": 10.43813000825477, "learning_rate": 5e-06, "loss": 0.1887, "num_input_tokens_seen": 81902388, "step": 477 }, { "epoch": 0.1254685342276583, "loss": 0.1947634220123291, "loss_ce": 0.0017092193011194468, "loss_iou": 0.35546875, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 81902388, "step": 477 }, { "epoch": 0.12573157098704543, "grad_norm": 15.053311018918661, "learning_rate": 5e-06, "loss": 0.2559, "num_input_tokens_seen": 82074796, "step": 478 }, { "epoch": 0.12573157098704543, "loss": 0.23443953692913055, "loss_ce": 0.002872154116630554, "loss_iou": 0.59375, "loss_num": 0.04638671875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 82074796, "step": 478 }, { "epoch": 0.12599460774643256, "grad_norm": 8.071545338749708, "learning_rate": 5e-06, "loss": 0.2732, "num_input_tokens_seen": 82246976, "step": 479 }, { "epoch": 0.12599460774643256, "loss": 0.22861449420452118, "loss_ce": 0.001929928082972765, "loss_iou": NaN, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 82246976, "step": 479 }, { "epoch": 0.1262576445058197, "grad_norm": 13.388933170286325, "learning_rate": 5e-06, "loss": 0.217, "num_input_tokens_seen": 82419244, "step": 480 }, { "epoch": 0.1262576445058197, "loss": 0.24052694439888, "loss_ce": 0.002611914649605751, "loss_iou": 0.423828125, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 82419244, "step": 480 }, { "epoch": 0.12652068126520682, "grad_norm": 10.19375568056882, "learning_rate": 5e-06, "loss": 0.2132, "num_input_tokens_seen": 82591568, "step": 481 }, { "epoch": 0.12652068126520682, "loss": 0.27330607175827026, "loss_ce": 0.007314843591302633, "loss_iou": 0.4453125, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 82591568, "step": 481 }, { "epoch": 0.12678371802459393, "grad_norm": 8.166078619911394, "learning_rate": 5e-06, "loss": 0.21, "num_input_tokens_seen": 82760532, "step": 482 }, { "epoch": 0.12678371802459393, "loss": 0.19459792971611023, "loss_ce": 0.003313753753900528, "loss_iou": 0.40234375, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 82760532, "step": 482 }, { "epoch": 0.12704675478398106, "grad_norm": 8.980724720396978, "learning_rate": 5e-06, "loss": 0.2429, "num_input_tokens_seen": 82932700, "step": 483 }, { "epoch": 0.12704675478398106, "loss": 0.2369249314069748, "loss_ce": 0.0062120286747813225, "loss_iou": 0.455078125, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 82932700, "step": 483 }, { "epoch": 0.1273097915433682, "grad_norm": 7.529582302992287, "learning_rate": 5e-06, "loss": 0.2337, "num_input_tokens_seen": 83104784, "step": 484 }, { "epoch": 0.1273097915433682, "loss": 0.27052199840545654, "loss_ce": 0.0020893928594887257, "loss_iou": 0.59765625, "loss_num": 0.0537109375, "loss_xval": 0.267578125, "num_input_tokens_seen": 83104784, "step": 484 }, { "epoch": 0.12757282830275532, "grad_norm": 9.051361983377177, "learning_rate": 5e-06, "loss": 0.2223, "num_input_tokens_seen": 83276660, "step": 485 }, { "epoch": 0.12757282830275532, "loss": 0.1893678605556488, "loss_ce": 0.004553401842713356, "loss_iou": 0.48046875, "loss_num": 0.037109375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 83276660, "step": 485 }, { "epoch": 0.12783586506214242, "grad_norm": 7.363403269312881, "learning_rate": 5e-06, "loss": 0.2164, "num_input_tokens_seen": 83448804, "step": 486 }, { "epoch": 0.12783586506214242, "loss": 0.18258926272392273, "loss_ce": 0.004244527779519558, "loss_iou": 0.6015625, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 83448804, "step": 486 }, { "epoch": 0.12809890182152955, "grad_norm": 6.462059422866227, "learning_rate": 5e-06, "loss": 0.1922, "num_input_tokens_seen": 83621024, "step": 487 }, { "epoch": 0.12809890182152955, "loss": 0.14729665219783783, "loss_ce": 0.003986096940934658, "loss_iou": 0.578125, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 83621024, "step": 487 }, { "epoch": 0.12836193858091668, "grad_norm": 9.164596498872053, "learning_rate": 5e-06, "loss": 0.2078, "num_input_tokens_seen": 83793612, "step": 488 }, { "epoch": 0.12836193858091668, "loss": 0.20358332991600037, "loss_ce": 0.0016790404915809631, "loss_iou": 0.52734375, "loss_num": 0.040283203125, "loss_xval": 0.2021484375, "num_input_tokens_seen": 83793612, "step": 488 }, { "epoch": 0.12862497534030382, "grad_norm": 13.35296525183839, "learning_rate": 5e-06, "loss": 0.3196, "num_input_tokens_seen": 83965664, "step": 489 }, { "epoch": 0.12862497534030382, "loss": 0.36003273725509644, "loss_ce": 0.007981948554515839, "loss_iou": 0.58984375, "loss_num": 0.0703125, "loss_xval": 0.3515625, "num_input_tokens_seen": 83965664, "step": 489 }, { "epoch": 0.12888801209969092, "grad_norm": 8.219901608770293, "learning_rate": 5e-06, "loss": 0.2425, "num_input_tokens_seen": 84137656, "step": 490 }, { "epoch": 0.12888801209969092, "loss": 0.21684233844280243, "loss_ce": 0.0033413656055927277, "loss_iou": 0.3359375, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 84137656, "step": 490 }, { "epoch": 0.12915104885907805, "grad_norm": 5.772697796397355, "learning_rate": 5e-06, "loss": 0.1873, "num_input_tokens_seen": 84308240, "step": 491 }, { "epoch": 0.12915104885907805, "loss": 0.1743691861629486, "loss_ce": 0.0028604045510292053, "loss_iou": 0.484375, "loss_num": 0.0341796875, "loss_xval": 0.171875, "num_input_tokens_seen": 84308240, "step": 491 }, { "epoch": 0.12941408561846518, "grad_norm": 7.154674159301149, "learning_rate": 5e-06, "loss": 0.1965, "num_input_tokens_seen": 84478680, "step": 492 }, { "epoch": 0.12941408561846518, "loss": 0.15929880738258362, "loss_ce": 0.003598117269575596, "loss_iou": 0.66015625, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 84478680, "step": 492 }, { "epoch": 0.1296771223778523, "grad_norm": 10.931778081568359, "learning_rate": 5e-06, "loss": 0.2397, "num_input_tokens_seen": 84648900, "step": 493 }, { "epoch": 0.1296771223778523, "loss": 0.24825721979141235, "loss_ce": 0.002285533118993044, "loss_iou": 0.6640625, "loss_num": 0.04931640625, "loss_xval": 0.24609375, "num_input_tokens_seen": 84648900, "step": 493 }, { "epoch": 0.12994015913723944, "grad_norm": 10.142675625135299, "learning_rate": 5e-06, "loss": 0.2203, "num_input_tokens_seen": 84820972, "step": 494 }, { "epoch": 0.12994015913723944, "loss": 0.23026269674301147, "loss_ce": 0.0015639647608622909, "loss_iou": 0.59765625, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 84820972, "step": 494 }, { "epoch": 0.13020319589662654, "grad_norm": 10.196346960471569, "learning_rate": 5e-06, "loss": 0.197, "num_input_tokens_seen": 84993508, "step": 495 }, { "epoch": 0.13020319589662654, "loss": 0.21560978889465332, "loss_ce": 0.001620523864403367, "loss_iou": 0.65234375, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 84993508, "step": 495 }, { "epoch": 0.13046623265601368, "grad_norm": 8.523677693002021, "learning_rate": 5e-06, "loss": 0.2357, "num_input_tokens_seen": 85165596, "step": 496 }, { "epoch": 0.13046623265601368, "loss": 0.17495451867580414, "loss_ce": 0.003750909585505724, "loss_iou": 0.416015625, "loss_num": 0.034423828125, "loss_xval": 0.1708984375, "num_input_tokens_seen": 85165596, "step": 496 }, { "epoch": 0.1307292694154008, "grad_norm": 5.883749010160293, "learning_rate": 5e-06, "loss": 0.217, "num_input_tokens_seen": 85337976, "step": 497 }, { "epoch": 0.1307292694154008, "loss": 0.22726929187774658, "loss_ce": 0.009862057864665985, "loss_iou": 0.494140625, "loss_num": 0.04345703125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 85337976, "step": 497 }, { "epoch": 0.13099230617478794, "grad_norm": 7.408260148240015, "learning_rate": 5e-06, "loss": 0.2231, "num_input_tokens_seen": 85509860, "step": 498 }, { "epoch": 0.13099230617478794, "loss": 0.1337101012468338, "loss_ce": 0.004010388161987066, "loss_iou": 0.515625, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 85509860, "step": 498 }, { "epoch": 0.13125534293417504, "grad_norm": 15.7224480316305, "learning_rate": 5e-06, "loss": 0.2184, "num_input_tokens_seen": 85680488, "step": 499 }, { "epoch": 0.13125534293417504, "loss": 0.3409336507320404, "loss_ce": 0.002554745879024267, "loss_iou": 0.423828125, "loss_num": 0.06787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 85680488, "step": 499 }, { "epoch": 0.13151837969356217, "grad_norm": 9.533912931076111, "learning_rate": 5e-06, "loss": 0.2073, "num_input_tokens_seen": 85852688, "step": 500 }, { "epoch": 0.13151837969356217, "eval_websight_new_CIoU": 0.7392345666885376, "eval_websight_new_GIoU": 0.7380270659923553, "eval_websight_new_IoU": 0.7466294467449188, "eval_websight_new_MAE_all": 0.04153955727815628, "eval_websight_new_MAE_h": 0.03558222949504852, "eval_websight_new_MAE_w": 0.06108394265174866, "eval_websight_new_MAE_x": 0.04947785474359989, "eval_websight_new_MAE_y": 0.020014189183712006, "eval_websight_new_NUM_probability": 0.9849532246589661, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.20474952459335327, "eval_websight_new_loss_ce": 0.0016258999821729958, "eval_websight_new_loss_iou": 0.635986328125, "eval_websight_new_loss_num": 0.037567138671875, "eval_websight_new_loss_xval": 0.187774658203125, "eval_websight_new_runtime": 54.6509, "eval_websight_new_samples_per_second": 0.915, "eval_websight_new_steps_per_second": 0.037, "num_input_tokens_seen": 85852688, "step": 500 }, { "epoch": 0.13151837969356217, "eval_seeclick_CIoU": 0.41250014305114746, "eval_seeclick_GIoU": 0.40925678610801697, "eval_seeclick_IoU": 0.4613874703645706, "eval_seeclick_MAE_all": 0.08603048324584961, "eval_seeclick_MAE_h": 0.05444946512579918, "eval_seeclick_MAE_w": 0.12106707319617271, "eval_seeclick_MAE_x": 0.12660933285951614, "eval_seeclick_MAE_y": 0.04199606738984585, "eval_seeclick_NUM_probability": 0.9906161725521088, "eval_seeclick_inside_bbox": 0.7698863744735718, "eval_seeclick_loss": 0.3508862257003784, "eval_seeclick_loss_ce": 0.013088095001876354, "eval_seeclick_loss_iou": 0.609375, "eval_seeclick_loss_num": 0.0647735595703125, "eval_seeclick_loss_xval": 0.323974609375, "eval_seeclick_runtime": 71.4374, "eval_seeclick_samples_per_second": 0.602, "eval_seeclick_steps_per_second": 0.028, "num_input_tokens_seen": 85852688, "step": 500 }, { "epoch": 0.13151837969356217, "eval_icons_CIoU": 0.7077827751636505, "eval_icons_GIoU": 0.7009360492229462, "eval_icons_IoU": 0.7175993025302887, "eval_icons_MAE_all": 0.041869472712278366, "eval_icons_MAE_h": 0.04292410984635353, "eval_icons_MAE_w": 0.04752085544168949, "eval_icons_MAE_x": 0.038647109642624855, "eval_icons_MAE_y": 0.03838581405580044, "eval_icons_NUM_probability": 0.9924971163272858, "eval_icons_inside_bbox": 1.0, "eval_icons_loss": 0.13132929801940918, "eval_icons_loss_ce": 0.003774530749069527, "eval_icons_loss_iou": 0.590576171875, "eval_icons_loss_num": 0.024749755859375, "eval_icons_loss_xval": 0.12384033203125, "eval_icons_runtime": 78.9038, "eval_icons_samples_per_second": 0.634, "eval_icons_steps_per_second": 0.025, "num_input_tokens_seen": 85852688, "step": 500 }, { "epoch": 0.13151837969356217, "eval_screenspot_CIoU": 0.4913978377978007, "eval_screenspot_GIoU": 0.47330527504285175, "eval_screenspot_IoU": 0.5310356616973877, "eval_screenspot_MAE_all": 0.09648379683494568, "eval_screenspot_MAE_h": 0.061782063295443855, "eval_screenspot_MAE_w": 0.14932986597220102, "eval_screenspot_MAE_x": 0.11150848865509033, "eval_screenspot_MAE_y": 0.0633147731423378, "eval_screenspot_NUM_probability": 0.9926036596298218, "eval_screenspot_inside_bbox": 0.8454166650772095, "eval_screenspot_loss": 0.8485715389251709, "eval_screenspot_loss_ce": 0.47645074129104614, "eval_screenspot_loss_iou": 0.5421142578125, "eval_screenspot_loss_num": 0.07304890950520833, "eval_screenspot_loss_xval": 0.3654378255208333, "eval_screenspot_runtime": 144.4943, "eval_screenspot_samples_per_second": 0.616, "eval_screenspot_steps_per_second": 0.021, "num_input_tokens_seen": 85852688, "step": 500 }, { "epoch": 0.13151837969356217, "loss": 0.8175798058509827, "loss_ce": 0.44428879022598267, "loss_iou": 0.5, "loss_num": 0.07470703125, "loss_xval": 0.373046875, "num_input_tokens_seen": 85852688, "step": 500 }, { "epoch": 0.1317814164529493, "grad_norm": 9.07691940041295, "learning_rate": 5e-06, "loss": 0.1533, "num_input_tokens_seen": 86021700, "step": 501 }, { "epoch": 0.1317814164529493, "loss": 0.15382635593414307, "loss_ce": 0.003191583789885044, "loss_iou": 0.6640625, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 86021700, "step": 501 }, { "epoch": 0.13204445321233643, "grad_norm": 8.240988876623113, "learning_rate": 5e-06, "loss": 0.2297, "num_input_tokens_seen": 86193792, "step": 502 }, { "epoch": 0.13204445321233643, "loss": 0.23657214641571045, "loss_ce": 0.003112667240202427, "loss_iou": 0.625, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 86193792, "step": 502 }, { "epoch": 0.13230748997172354, "grad_norm": 8.766196459329715, "learning_rate": 5e-06, "loss": 0.2049, "num_input_tokens_seen": 86365784, "step": 503 }, { "epoch": 0.13230748997172354, "loss": 0.23612971603870392, "loss_ce": 0.004379219841212034, "loss_iou": 0.54296875, "loss_num": 0.04638671875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 86365784, "step": 503 }, { "epoch": 0.13257052673111067, "grad_norm": 5.336005731654041, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 86538044, "step": 504 }, { "epoch": 0.13257052673111067, "loss": 0.15480023622512817, "loss_ce": 0.0017545849550515413, "loss_iou": 0.625, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 86538044, "step": 504 }, { "epoch": 0.1328335634904978, "grad_norm": 15.391291602176388, "learning_rate": 5e-06, "loss": 0.2019, "num_input_tokens_seen": 86710012, "step": 505 }, { "epoch": 0.1328335634904978, "loss": 0.16446326673030853, "loss_ce": 0.0012552611296996474, "loss_iou": 0.6796875, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 86710012, "step": 505 }, { "epoch": 0.13309660024988493, "grad_norm": 7.414971095415403, "learning_rate": 5e-06, "loss": 0.2046, "num_input_tokens_seen": 86882600, "step": 506 }, { "epoch": 0.13309660024988493, "loss": 0.18114808201789856, "loss_ce": 0.0028033575508743525, "loss_iou": 0.625, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 86882600, "step": 506 }, { "epoch": 0.13335963700927206, "grad_norm": 7.149578381431456, "learning_rate": 5e-06, "loss": 0.1878, "num_input_tokens_seen": 87055160, "step": 507 }, { "epoch": 0.13335963700927206, "loss": 0.185621976852417, "loss_ce": 0.0038592712953686714, "loss_iou": 0.39453125, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 87055160, "step": 507 }, { "epoch": 0.13362267376865916, "grad_norm": 7.2770390495694235, "learning_rate": 5e-06, "loss": 0.2051, "num_input_tokens_seen": 87227176, "step": 508 }, { "epoch": 0.13362267376865916, "loss": 0.12990637123584747, "loss_ce": 0.0011221927125006914, "loss_iou": 0.453125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 87227176, "step": 508 }, { "epoch": 0.1338857105280463, "grad_norm": 13.071840767784375, "learning_rate": 5e-06, "loss": 0.2921, "num_input_tokens_seen": 87397716, "step": 509 }, { "epoch": 0.1338857105280463, "loss": 0.31984156370162964, "loss_ce": 0.0027639116160571575, "loss_iou": NaN, "loss_num": 0.0634765625, "loss_xval": 0.31640625, "num_input_tokens_seen": 87397716, "step": 509 }, { "epoch": 0.13414874728743342, "grad_norm": 9.630666522841075, "learning_rate": 5e-06, "loss": 0.1771, "num_input_tokens_seen": 87570180, "step": 510 }, { "epoch": 0.13414874728743342, "loss": 0.14270807802677155, "loss_ce": 0.002327217720448971, "loss_iou": 0.6328125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 87570180, "step": 510 }, { "epoch": 0.13441178404682055, "grad_norm": 7.92909505971618, "learning_rate": 5e-06, "loss": 0.2147, "num_input_tokens_seen": 87742132, "step": 511 }, { "epoch": 0.13441178404682055, "loss": 0.24002233147621155, "loss_ce": 0.0020462563261389732, "loss_iou": 0.5859375, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 87742132, "step": 511 }, { "epoch": 0.13467482080620766, "grad_norm": 11.73787088896753, "learning_rate": 5e-06, "loss": 0.2146, "num_input_tokens_seen": 87914144, "step": 512 }, { "epoch": 0.13467482080620766, "loss": 0.2217179834842682, "loss_ce": 0.00376143422909081, "loss_iou": 0.5859375, "loss_num": 0.043701171875, "loss_xval": 0.2177734375, "num_input_tokens_seen": 87914144, "step": 512 }, { "epoch": 0.1349378575655948, "grad_norm": 14.481870714144165, "learning_rate": 5e-06, "loss": 0.2105, "num_input_tokens_seen": 88086392, "step": 513 }, { "epoch": 0.1349378575655948, "loss": 0.16927534341812134, "loss_ce": 0.0027714259922504425, "loss_iou": 0.36328125, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 88086392, "step": 513 }, { "epoch": 0.13520089432498192, "grad_norm": 11.732408058803708, "learning_rate": 5e-06, "loss": 0.2117, "num_input_tokens_seen": 88258824, "step": 514 }, { "epoch": 0.13520089432498192, "loss": 0.21276208758354187, "loss_ce": 0.0047542620450258255, "loss_iou": 0.6171875, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 88258824, "step": 514 }, { "epoch": 0.13546393108436905, "grad_norm": 8.699627697080732, "learning_rate": 5e-06, "loss": 0.1604, "num_input_tokens_seen": 88431280, "step": 515 }, { "epoch": 0.13546393108436905, "loss": 0.13956406712532043, "loss_ce": 0.005469819065183401, "loss_iou": 0.62890625, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 88431280, "step": 515 }, { "epoch": 0.13572696784375615, "grad_norm": 9.441247877196542, "learning_rate": 5e-06, "loss": 0.2418, "num_input_tokens_seen": 88603308, "step": 516 }, { "epoch": 0.13572696784375615, "loss": 0.16743244230747223, "loss_ce": 0.00141681800596416, "loss_iou": 0.49609375, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 88603308, "step": 516 }, { "epoch": 0.13599000460314328, "grad_norm": 11.156675067329255, "learning_rate": 5e-06, "loss": 0.1488, "num_input_tokens_seen": 88775492, "step": 517 }, { "epoch": 0.13599000460314328, "loss": 0.167589008808136, "loss_ce": 0.0012681989464908838, "loss_iou": 0.50390625, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 88775492, "step": 517 }, { "epoch": 0.1362530413625304, "grad_norm": 9.64852272360873, "learning_rate": 5e-06, "loss": 0.2377, "num_input_tokens_seen": 88946204, "step": 518 }, { "epoch": 0.1362530413625304, "loss": 0.2998642921447754, "loss_ce": 0.004698258824646473, "loss_iou": 0.5859375, "loss_num": 0.05908203125, "loss_xval": 0.294921875, "num_input_tokens_seen": 88946204, "step": 518 }, { "epoch": 0.13651607812191754, "grad_norm": 8.486100540746042, "learning_rate": 5e-06, "loss": 0.207, "num_input_tokens_seen": 89116056, "step": 519 }, { "epoch": 0.13651607812191754, "loss": 0.21377842128276825, "loss_ce": 0.004488877020776272, "loss_iou": 0.64453125, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 89116056, "step": 519 }, { "epoch": 0.13677911488130468, "grad_norm": 7.241270611453955, "learning_rate": 5e-06, "loss": 0.2361, "num_input_tokens_seen": 89287012, "step": 520 }, { "epoch": 0.13677911488130468, "loss": 0.23230193555355072, "loss_ce": 0.0012228279374539852, "loss_iou": 0.447265625, "loss_num": 0.04638671875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 89287012, "step": 520 }, { "epoch": 0.13704215164069178, "grad_norm": 9.086385631838745, "learning_rate": 5e-06, "loss": 0.1756, "num_input_tokens_seen": 89457780, "step": 521 }, { "epoch": 0.13704215164069178, "loss": 0.16174045205116272, "loss_ce": 0.0012180046178400517, "loss_iou": 0.478515625, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 89457780, "step": 521 }, { "epoch": 0.1373051884000789, "grad_norm": 9.260504659218878, "learning_rate": 5e-06, "loss": 0.1871, "num_input_tokens_seen": 89628244, "step": 522 }, { "epoch": 0.1373051884000789, "loss": 0.18322458863258362, "loss_ce": 0.005673316773027182, "loss_iou": 0.5390625, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 89628244, "step": 522 }, { "epoch": 0.13756822515946604, "grad_norm": 10.862554096761864, "learning_rate": 5e-06, "loss": 0.1938, "num_input_tokens_seen": 89798512, "step": 523 }, { "epoch": 0.13756822515946604, "loss": 0.18914146721363068, "loss_ce": 0.002434919821098447, "loss_iou": 0.62109375, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 89798512, "step": 523 }, { "epoch": 0.13783126191885317, "grad_norm": 8.527732112130064, "learning_rate": 5e-06, "loss": 0.2597, "num_input_tokens_seen": 89968992, "step": 524 }, { "epoch": 0.13783126191885317, "loss": 0.2936267554759979, "loss_ce": 0.0038318424485623837, "loss_iou": 0.40234375, "loss_num": 0.057861328125, "loss_xval": 0.2890625, "num_input_tokens_seen": 89968992, "step": 524 }, { "epoch": 0.13809429867824027, "grad_norm": 8.96210431629978, "learning_rate": 5e-06, "loss": 0.1589, "num_input_tokens_seen": 90140828, "step": 525 }, { "epoch": 0.13809429867824027, "loss": 0.22792883217334747, "loss_ce": 0.001427364069968462, "loss_iou": 0.396484375, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 90140828, "step": 525 }, { "epoch": 0.1383573354376274, "grad_norm": 10.303553365642298, "learning_rate": 5e-06, "loss": 0.1672, "num_input_tokens_seen": 90311476, "step": 526 }, { "epoch": 0.1383573354376274, "loss": 0.09766636043787003, "loss_ce": 0.00202426896430552, "loss_iou": 0.59765625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 90311476, "step": 526 }, { "epoch": 0.13862037219701454, "grad_norm": 7.578553389392675, "learning_rate": 5e-06, "loss": 0.1767, "num_input_tokens_seen": 90483668, "step": 527 }, { "epoch": 0.13862037219701454, "loss": 0.2255394458770752, "loss_ce": 0.0017845738912001252, "loss_iou": 0.5234375, "loss_num": 0.044677734375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 90483668, "step": 527 }, { "epoch": 0.13888340895640167, "grad_norm": 11.866590519507064, "learning_rate": 5e-06, "loss": 0.2463, "num_input_tokens_seen": 90655996, "step": 528 }, { "epoch": 0.13888340895640167, "loss": 0.21356430649757385, "loss_ce": 0.004030614625662565, "loss_iou": 0.6953125, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 90655996, "step": 528 }, { "epoch": 0.13914644571578877, "grad_norm": 9.66204006000912, "learning_rate": 5e-06, "loss": 0.2311, "num_input_tokens_seen": 90828348, "step": 529 }, { "epoch": 0.13914644571578877, "loss": 0.21380871534347534, "loss_ce": 0.0014063662383705378, "loss_iou": 0.5546875, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 90828348, "step": 529 }, { "epoch": 0.1394094824751759, "grad_norm": 7.3801351915919975, "learning_rate": 5e-06, "loss": 0.2048, "num_input_tokens_seen": 91000476, "step": 530 }, { "epoch": 0.1394094824751759, "loss": 0.19109413027763367, "loss_ce": 0.0008780673379078507, "loss_iou": 0.29296875, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 91000476, "step": 530 }, { "epoch": 0.13967251923456303, "grad_norm": 6.737214273696564, "learning_rate": 5e-06, "loss": 0.2037, "num_input_tokens_seen": 91173056, "step": 531 }, { "epoch": 0.13967251923456303, "loss": 0.26445770263671875, "loss_ce": 0.0033187787048518658, "loss_iou": 0.486328125, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 91173056, "step": 531 }, { "epoch": 0.13993555599395016, "grad_norm": 5.9400720051741835, "learning_rate": 5e-06, "loss": 0.1598, "num_input_tokens_seen": 91345516, "step": 532 }, { "epoch": 0.13993555599395016, "loss": 0.12527181208133698, "loss_ce": 0.001858725561760366, "loss_iou": 0.59765625, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 91345516, "step": 532 }, { "epoch": 0.1401985927533373, "grad_norm": 8.591575042379741, "learning_rate": 5e-06, "loss": 0.2351, "num_input_tokens_seen": 91516156, "step": 533 }, { "epoch": 0.1401985927533373, "loss": 0.22694742679595947, "loss_ce": 0.005511872004717588, "loss_iou": 0.443359375, "loss_num": 0.04443359375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 91516156, "step": 533 }, { "epoch": 0.1404616295127244, "grad_norm": 26.674816384255838, "learning_rate": 5e-06, "loss": 0.2705, "num_input_tokens_seen": 91685124, "step": 534 }, { "epoch": 0.1404616295127244, "loss": 0.2157442569732666, "loss_ce": 0.005356077570468187, "loss_iou": 0.52734375, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 91685124, "step": 534 }, { "epoch": 0.14072466627211153, "grad_norm": 7.992225607802382, "learning_rate": 5e-06, "loss": 0.2194, "num_input_tokens_seen": 91857436, "step": 535 }, { "epoch": 0.14072466627211153, "loss": 0.18514756858348846, "loss_ce": 0.0036290136631578207, "loss_iou": 0.625, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 91857436, "step": 535 }, { "epoch": 0.14098770303149866, "grad_norm": 7.005269973220872, "learning_rate": 5e-06, "loss": 0.2265, "num_input_tokens_seen": 92029236, "step": 536 }, { "epoch": 0.14098770303149866, "loss": 0.23437106609344482, "loss_ce": 0.0035971456672996283, "loss_iou": 0.6640625, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 92029236, "step": 536 }, { "epoch": 0.1412507397908858, "grad_norm": 13.679910251313718, "learning_rate": 5e-06, "loss": 0.2237, "num_input_tokens_seen": 92201248, "step": 537 }, { "epoch": 0.1412507397908858, "loss": 0.26536300778388977, "loss_ce": 0.006451865192502737, "loss_iou": 0.625, "loss_num": 0.0517578125, "loss_xval": 0.259765625, "num_input_tokens_seen": 92201248, "step": 537 }, { "epoch": 0.1415137765502729, "grad_norm": 15.448137214976848, "learning_rate": 5e-06, "loss": 0.2463, "num_input_tokens_seen": 92373276, "step": 538 }, { "epoch": 0.1415137765502729, "loss": 0.2220609188079834, "loss_ce": 0.0029447050765156746, "loss_iou": 0.69140625, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 92373276, "step": 538 }, { "epoch": 0.14177681330966002, "grad_norm": 11.236165761153213, "learning_rate": 5e-06, "loss": 0.2009, "num_input_tokens_seen": 92545880, "step": 539 }, { "epoch": 0.14177681330966002, "loss": 0.21155700087547302, "loss_ce": 0.006112661678344011, "loss_iou": 0.3515625, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 92545880, "step": 539 }, { "epoch": 0.14203985006904715, "grad_norm": 7.303097321727043, "learning_rate": 5e-06, "loss": 0.2029, "num_input_tokens_seen": 92717956, "step": 540 }, { "epoch": 0.14203985006904715, "loss": 0.2111251950263977, "loss_ce": 0.0041549778543412685, "loss_iou": 0.37109375, "loss_num": 0.04150390625, "loss_xval": 0.20703125, "num_input_tokens_seen": 92717956, "step": 540 }, { "epoch": 0.14230288682843428, "grad_norm": 28.317923328050057, "learning_rate": 5e-06, "loss": 0.1946, "num_input_tokens_seen": 92890260, "step": 541 }, { "epoch": 0.14230288682843428, "loss": 0.18131288886070251, "loss_ce": 0.004371959716081619, "loss_iou": 0.498046875, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 92890260, "step": 541 }, { "epoch": 0.14256592358782139, "grad_norm": 12.97806529461824, "learning_rate": 5e-06, "loss": 0.2534, "num_input_tokens_seen": 93062192, "step": 542 }, { "epoch": 0.14256592358782139, "loss": 0.30919933319091797, "loss_ce": 0.003779401071369648, "loss_iou": 0.640625, "loss_num": 0.06103515625, "loss_xval": 0.3046875, "num_input_tokens_seen": 93062192, "step": 542 }, { "epoch": 0.14282896034720852, "grad_norm": 7.8709246328059725, "learning_rate": 5e-06, "loss": 0.1756, "num_input_tokens_seen": 93234480, "step": 543 }, { "epoch": 0.14282896034720852, "loss": 0.2343926727771759, "loss_ce": 0.002581145381554961, "loss_iou": 0.40234375, "loss_num": 0.04638671875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 93234480, "step": 543 }, { "epoch": 0.14309199710659565, "grad_norm": 8.436976597382053, "learning_rate": 5e-06, "loss": 0.1913, "num_input_tokens_seen": 93406784, "step": 544 }, { "epoch": 0.14309199710659565, "loss": 0.20959031581878662, "loss_ce": 0.0013383585028350353, "loss_iou": 0.4453125, "loss_num": 0.041748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 93406784, "step": 544 }, { "epoch": 0.14335503386598278, "grad_norm": 12.053836172217155, "learning_rate": 5e-06, "loss": 0.2433, "num_input_tokens_seen": 93577272, "step": 545 }, { "epoch": 0.14335503386598278, "loss": 0.18772834539413452, "loss_ce": 0.002242510672658682, "loss_iou": 0.52734375, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 93577272, "step": 545 }, { "epoch": 0.1436180706253699, "grad_norm": 9.099796427619822, "learning_rate": 5e-06, "loss": 0.1713, "num_input_tokens_seen": 93749292, "step": 546 }, { "epoch": 0.1436180706253699, "loss": 0.2121119648218155, "loss_ce": 0.00813247635960579, "loss_iou": 0.671875, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 93749292, "step": 546 }, { "epoch": 0.143881107384757, "grad_norm": 13.08678717007557, "learning_rate": 5e-06, "loss": 0.2241, "num_input_tokens_seen": 93921812, "step": 547 }, { "epoch": 0.143881107384757, "loss": 0.22192896902561188, "loss_ce": 0.002934828167781234, "loss_iou": 0.38671875, "loss_num": 0.0439453125, "loss_xval": 0.21875, "num_input_tokens_seen": 93921812, "step": 547 }, { "epoch": 0.14414414414414414, "grad_norm": 8.230228011363112, "learning_rate": 5e-06, "loss": 0.2257, "num_input_tokens_seen": 94093976, "step": 548 }, { "epoch": 0.14414414414414414, "loss": 0.21945567429065704, "loss_ce": 0.0016822349280118942, "loss_iou": 0.45703125, "loss_num": 0.04345703125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 94093976, "step": 548 }, { "epoch": 0.14440718090353127, "grad_norm": 6.524669746029216, "learning_rate": 5e-06, "loss": 0.124, "num_input_tokens_seen": 94262972, "step": 549 }, { "epoch": 0.14440718090353127, "loss": 0.13413353264331818, "loss_ce": 0.0032741604372859, "loss_iou": 0.53515625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 94262972, "step": 549 }, { "epoch": 0.1446702176629184, "grad_norm": 7.614359717596038, "learning_rate": 5e-06, "loss": 0.1885, "num_input_tokens_seen": 94435240, "step": 550 }, { "epoch": 0.1446702176629184, "loss": 0.22648407518863678, "loss_ce": 0.0034616070333868265, "loss_iou": 0.427734375, "loss_num": 0.044677734375, "loss_xval": 0.22265625, "num_input_tokens_seen": 94435240, "step": 550 }, { "epoch": 0.1449332544223055, "grad_norm": 7.224416897933664, "learning_rate": 5e-06, "loss": 0.1771, "num_input_tokens_seen": 94607488, "step": 551 }, { "epoch": 0.1449332544223055, "loss": 0.18614572286605835, "loss_ce": 0.0017585159512236714, "loss_iou": 0.6640625, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 94607488, "step": 551 }, { "epoch": 0.14519629118169264, "grad_norm": 7.882321147436634, "learning_rate": 5e-06, "loss": 0.1957, "num_input_tokens_seen": 94776340, "step": 552 }, { "epoch": 0.14519629118169264, "loss": 0.22142915427684784, "loss_ce": 0.003350543323904276, "loss_iou": 0.53125, "loss_num": 0.043701171875, "loss_xval": 0.2177734375, "num_input_tokens_seen": 94776340, "step": 552 }, { "epoch": 0.14545932794107977, "grad_norm": 8.491750307418846, "learning_rate": 5e-06, "loss": 0.1636, "num_input_tokens_seen": 94948568, "step": 553 }, { "epoch": 0.14545932794107977, "loss": 0.17424368858337402, "loss_ce": 0.0024297323543578386, "loss_iou": 0.578125, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 94948568, "step": 553 }, { "epoch": 0.1457223647004669, "grad_norm": 9.839418631011416, "learning_rate": 5e-06, "loss": 0.2278, "num_input_tokens_seen": 95120868, "step": 554 }, { "epoch": 0.1457223647004669, "loss": 0.23207631707191467, "loss_ce": 0.001241360092535615, "loss_iou": 0.5703125, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 95120868, "step": 554 }, { "epoch": 0.145985401459854, "grad_norm": 10.298790259293808, "learning_rate": 5e-06, "loss": 0.2316, "num_input_tokens_seen": 95293380, "step": 555 }, { "epoch": 0.145985401459854, "loss": 0.2527633607387543, "loss_ce": 0.0020309346728026867, "loss_iou": 0.40234375, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 95293380, "step": 555 }, { "epoch": 0.14624843821924113, "grad_norm": 11.750914646944318, "learning_rate": 5e-06, "loss": 0.1353, "num_input_tokens_seen": 95465572, "step": 556 }, { "epoch": 0.14624843821924113, "loss": 0.14519110321998596, "loss_ce": 0.0012702068779617548, "loss_iou": 0.478515625, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 95465572, "step": 556 }, { "epoch": 0.14651147497862826, "grad_norm": 11.674444875910362, "learning_rate": 5e-06, "loss": 0.248, "num_input_tokens_seen": 95636384, "step": 557 }, { "epoch": 0.14651147497862826, "loss": 0.2826082706451416, "loss_ce": 0.0019686208106577396, "loss_iou": 0.5078125, "loss_num": 0.05615234375, "loss_xval": 0.28125, "num_input_tokens_seen": 95636384, "step": 557 }, { "epoch": 0.1467745117380154, "grad_norm": 9.602535161901319, "learning_rate": 5e-06, "loss": 0.2109, "num_input_tokens_seen": 95808820, "step": 558 }, { "epoch": 0.1467745117380154, "loss": 0.1991540640592575, "loss_ce": 0.0036584637127816677, "loss_iou": 0.51953125, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 95808820, "step": 558 }, { "epoch": 0.1470375484974025, "grad_norm": 8.512482701290667, "learning_rate": 5e-06, "loss": 0.2626, "num_input_tokens_seen": 95980876, "step": 559 }, { "epoch": 0.1470375484974025, "loss": 0.3269794285297394, "loss_ce": 0.0010516871698200703, "loss_iou": 0.3203125, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 95980876, "step": 559 }, { "epoch": 0.14730058525678963, "grad_norm": 8.300386324136479, "learning_rate": 5e-06, "loss": 0.178, "num_input_tokens_seen": 96153036, "step": 560 }, { "epoch": 0.14730058525678963, "loss": 0.2191367745399475, "loss_ce": 0.006795480381697416, "loss_iou": 0.5625, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 96153036, "step": 560 }, { "epoch": 0.14756362201617676, "grad_norm": 5.775876970805374, "learning_rate": 5e-06, "loss": 0.1482, "num_input_tokens_seen": 96322532, "step": 561 }, { "epoch": 0.14756362201617676, "loss": 0.1977005898952484, "loss_ce": 0.006996248383074999, "loss_iou": 0.625, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 96322532, "step": 561 }, { "epoch": 0.1478266587755639, "grad_norm": 6.533356383107408, "learning_rate": 5e-06, "loss": 0.2025, "num_input_tokens_seen": 96494568, "step": 562 }, { "epoch": 0.1478266587755639, "loss": 0.15407304465770721, "loss_ce": 0.0012410087510943413, "loss_iou": 0.62890625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 96494568, "step": 562 }, { "epoch": 0.14808969553495102, "grad_norm": 9.772718457216582, "learning_rate": 5e-06, "loss": 0.2334, "num_input_tokens_seen": 96666892, "step": 563 }, { "epoch": 0.14808969553495102, "loss": 0.18993595242500305, "loss_ce": 0.002985279308632016, "loss_iou": 0.75, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 96666892, "step": 563 }, { "epoch": 0.14835273229433812, "grad_norm": 10.172183960721854, "learning_rate": 5e-06, "loss": 0.2474, "num_input_tokens_seen": 96837620, "step": 564 }, { "epoch": 0.14835273229433812, "loss": 0.20410403609275818, "loss_ce": 0.004641146864742041, "loss_iou": 0.5, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 96837620, "step": 564 }, { "epoch": 0.14861576905372526, "grad_norm": 9.445540103601473, "learning_rate": 5e-06, "loss": 0.2275, "num_input_tokens_seen": 97009692, "step": 565 }, { "epoch": 0.14861576905372526, "loss": 0.13070067763328552, "loss_ce": 0.0018554661655798554, "loss_iou": 0.6015625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 97009692, "step": 565 }, { "epoch": 0.14887880581311239, "grad_norm": 7.521927615990519, "learning_rate": 5e-06, "loss": 0.2008, "num_input_tokens_seen": 97182076, "step": 566 }, { "epoch": 0.14887880581311239, "loss": 0.26979702711105347, "loss_ce": 0.009176918305456638, "loss_iou": 0.60546875, "loss_num": 0.052001953125, "loss_xval": 0.259765625, "num_input_tokens_seen": 97182076, "step": 566 }, { "epoch": 0.14914184257249952, "grad_norm": 6.945071393253576, "learning_rate": 5e-06, "loss": 0.2175, "num_input_tokens_seen": 97352348, "step": 567 }, { "epoch": 0.14914184257249952, "loss": 0.2486119419336319, "loss_ce": 0.004898556973785162, "loss_iou": 0.5078125, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 97352348, "step": 567 }, { "epoch": 0.14940487933188662, "grad_norm": 19.761163824718725, "learning_rate": 5e-06, "loss": 0.2266, "num_input_tokens_seen": 97524808, "step": 568 }, { "epoch": 0.14940487933188662, "loss": 0.2611408531665802, "loss_ce": 0.0016193758929148316, "loss_iou": 0.43359375, "loss_num": 0.052001953125, "loss_xval": 0.259765625, "num_input_tokens_seen": 97524808, "step": 568 }, { "epoch": 0.14966791609127375, "grad_norm": 11.05044000141412, "learning_rate": 5e-06, "loss": 0.2071, "num_input_tokens_seen": 97696860, "step": 569 }, { "epoch": 0.14966791609127375, "loss": 0.17859557271003723, "loss_ce": 0.002020859392359853, "loss_iou": 0.6796875, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 97696860, "step": 569 }, { "epoch": 0.14993095285066088, "grad_norm": 10.09349160636077, "learning_rate": 5e-06, "loss": 0.2091, "num_input_tokens_seen": 97869048, "step": 570 }, { "epoch": 0.14993095285066088, "loss": 0.1531415581703186, "loss_ce": 0.001164011424407363, "loss_iou": 0.66796875, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 97869048, "step": 570 }, { "epoch": 0.150193989610048, "grad_norm": 6.128271963212333, "learning_rate": 5e-06, "loss": 0.1797, "num_input_tokens_seen": 98041152, "step": 571 }, { "epoch": 0.150193989610048, "loss": 0.18703126907348633, "loss_ce": 0.003956317901611328, "loss_iou": 0.376953125, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 98041152, "step": 571 }, { "epoch": 0.15045702636943512, "grad_norm": 9.404399204091265, "learning_rate": 5e-06, "loss": 0.1789, "num_input_tokens_seen": 98213508, "step": 572 }, { "epoch": 0.15045702636943512, "loss": 0.1673622578382492, "loss_ce": 0.011722613126039505, "loss_iou": 0.60546875, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 98213508, "step": 572 }, { "epoch": 0.15072006312882225, "grad_norm": 8.281687271797518, "learning_rate": 5e-06, "loss": 0.2554, "num_input_tokens_seen": 98385768, "step": 573 }, { "epoch": 0.15072006312882225, "loss": 0.29049456119537354, "loss_ce": 0.007413491606712341, "loss_iou": 0.5703125, "loss_num": 0.056640625, "loss_xval": 0.283203125, "num_input_tokens_seen": 98385768, "step": 573 }, { "epoch": 0.15098309988820938, "grad_norm": 9.329556868568911, "learning_rate": 5e-06, "loss": 0.1929, "num_input_tokens_seen": 98555404, "step": 574 }, { "epoch": 0.15098309988820938, "loss": 0.17510266602039337, "loss_ce": 0.006096326746046543, "loss_iou": 0.6015625, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 98555404, "step": 574 }, { "epoch": 0.1512461366475965, "grad_norm": 6.666929195510284, "learning_rate": 5e-06, "loss": 0.1966, "num_input_tokens_seen": 98727452, "step": 575 }, { "epoch": 0.1512461366475965, "loss": 0.2249300628900528, "loss_ce": 0.008560429327189922, "loss_iou": 0.400390625, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 98727452, "step": 575 }, { "epoch": 0.15150917340698364, "grad_norm": 12.576347626904536, "learning_rate": 5e-06, "loss": 0.2151, "num_input_tokens_seen": 98899788, "step": 576 }, { "epoch": 0.15150917340698364, "loss": 0.21397414803504944, "loss_ce": 0.004837184213101864, "loss_iou": 0.60546875, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 98899788, "step": 576 }, { "epoch": 0.15177221016637074, "grad_norm": 9.891929331498387, "learning_rate": 5e-06, "loss": 0.2031, "num_input_tokens_seen": 99072416, "step": 577 }, { "epoch": 0.15177221016637074, "loss": 0.24624097347259521, "loss_ce": 0.011255611665546894, "loss_iou": 0.546875, "loss_num": 0.046875, "loss_xval": 0.2353515625, "num_input_tokens_seen": 99072416, "step": 577 }, { "epoch": 0.15203524692575787, "grad_norm": 12.530200633920343, "learning_rate": 5e-06, "loss": 0.2216, "num_input_tokens_seen": 99244280, "step": 578 }, { "epoch": 0.15203524692575787, "loss": 0.19744133949279785, "loss_ce": 0.001030205050483346, "loss_iou": 0.671875, "loss_num": 0.039306640625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 99244280, "step": 578 }, { "epoch": 0.152298283685145, "grad_norm": 10.630444163918117, "learning_rate": 5e-06, "loss": 0.2121, "num_input_tokens_seen": 99416184, "step": 579 }, { "epoch": 0.152298283685145, "loss": 0.2667901813983917, "loss_ce": 0.0020807269029319286, "loss_iou": 0.40234375, "loss_num": 0.052978515625, "loss_xval": 0.265625, "num_input_tokens_seen": 99416184, "step": 579 }, { "epoch": 0.15256132044453213, "grad_norm": 10.945427939330838, "learning_rate": 5e-06, "loss": 0.1943, "num_input_tokens_seen": 99588500, "step": 580 }, { "epoch": 0.15256132044453213, "loss": 0.19293344020843506, "loss_ce": 0.005006188526749611, "loss_iou": 0.4921875, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 99588500, "step": 580 }, { "epoch": 0.15282435720391924, "grad_norm": 10.349037140854193, "learning_rate": 5e-06, "loss": 0.1856, "num_input_tokens_seen": 99760948, "step": 581 }, { "epoch": 0.15282435720391924, "loss": 0.1601850688457489, "loss_ce": 0.0018598883179947734, "loss_iou": 0.55859375, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 99760948, "step": 581 }, { "epoch": 0.15308739396330637, "grad_norm": 13.119978211180065, "learning_rate": 5e-06, "loss": 0.1813, "num_input_tokens_seen": 99932992, "step": 582 }, { "epoch": 0.15308739396330637, "loss": 0.24380066990852356, "loss_ce": 0.002528695622459054, "loss_iou": 0.7109375, "loss_num": 0.04833984375, "loss_xval": 0.2412109375, "num_input_tokens_seen": 99932992, "step": 582 }, { "epoch": 0.1533504307226935, "grad_norm": 8.165624923829593, "learning_rate": 5e-06, "loss": 0.1637, "num_input_tokens_seen": 100104992, "step": 583 }, { "epoch": 0.1533504307226935, "loss": 0.18439146876335144, "loss_ce": 0.0024456623941659927, "loss_iou": 0.6640625, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 100104992, "step": 583 }, { "epoch": 0.15361346748208063, "grad_norm": 9.086411500582741, "learning_rate": 5e-06, "loss": 0.1853, "num_input_tokens_seen": 100277144, "step": 584 }, { "epoch": 0.15361346748208063, "loss": 0.17580674588680267, "loss_ce": 0.0032603610306978226, "loss_iou": 0.4296875, "loss_num": 0.034423828125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 100277144, "step": 584 }, { "epoch": 0.15387650424146773, "grad_norm": 11.772959826434088, "learning_rate": 5e-06, "loss": 0.2266, "num_input_tokens_seen": 100449660, "step": 585 }, { "epoch": 0.15387650424146773, "loss": 0.2301916778087616, "loss_ce": 0.0029577831737697124, "loss_iou": 0.482421875, "loss_num": 0.04541015625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 100449660, "step": 585 }, { "epoch": 0.15413954100085486, "grad_norm": 8.411861716804005, "learning_rate": 5e-06, "loss": 0.2131, "num_input_tokens_seen": 100622028, "step": 586 }, { "epoch": 0.15413954100085486, "loss": 0.183104008436203, "loss_ce": 0.0029892674647271633, "loss_iou": 0.6015625, "loss_num": 0.0361328125, "loss_xval": 0.1796875, "num_input_tokens_seen": 100622028, "step": 586 }, { "epoch": 0.154402577760242, "grad_norm": 8.219849696412924, "learning_rate": 5e-06, "loss": 0.2095, "num_input_tokens_seen": 100792792, "step": 587 }, { "epoch": 0.154402577760242, "loss": 0.23411154747009277, "loss_ce": 0.0037648691795766354, "loss_iou": 0.458984375, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 100792792, "step": 587 }, { "epoch": 0.15466561451962912, "grad_norm": 7.806947443925601, "learning_rate": 5e-06, "loss": 0.1858, "num_input_tokens_seen": 100964900, "step": 588 }, { "epoch": 0.15466561451962912, "loss": 0.15801170468330383, "loss_ce": 0.002433100016787648, "loss_iou": 0.59765625, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 100964900, "step": 588 }, { "epoch": 0.15492865127901626, "grad_norm": 9.15652521254843, "learning_rate": 5e-06, "loss": 0.1918, "num_input_tokens_seen": 101137068, "step": 589 }, { "epoch": 0.15492865127901626, "loss": 0.14395104348659515, "loss_ce": 0.0018611999694257975, "loss_iou": 0.62109375, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 101137068, "step": 589 }, { "epoch": 0.15519168803840336, "grad_norm": 18.2857885682754, "learning_rate": 5e-06, "loss": 0.1912, "num_input_tokens_seen": 101309424, "step": 590 }, { "epoch": 0.15519168803840336, "loss": 0.19831930100917816, "loss_ce": 0.0009926356142386794, "loss_iou": 0.546875, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 101309424, "step": 590 }, { "epoch": 0.1554547247977905, "grad_norm": 7.944412820463009, "learning_rate": 5e-06, "loss": 0.205, "num_input_tokens_seen": 101478352, "step": 591 }, { "epoch": 0.1554547247977905, "loss": 0.21606285870075226, "loss_ce": 0.00683434447273612, "loss_iou": 0.431640625, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 101478352, "step": 591 }, { "epoch": 0.15571776155717762, "grad_norm": 6.618575384086146, "learning_rate": 5e-06, "loss": 0.1821, "num_input_tokens_seen": 101650324, "step": 592 }, { "epoch": 0.15571776155717762, "loss": 0.20387296378612518, "loss_ce": 0.0040438538417220116, "loss_iou": 0.4765625, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 101650324, "step": 592 }, { "epoch": 0.15598079831656475, "grad_norm": 13.644949403576716, "learning_rate": 5e-06, "loss": 0.1791, "num_input_tokens_seen": 101822564, "step": 593 }, { "epoch": 0.15598079831656475, "loss": 0.12713779509067535, "loss_ce": 0.004823335446417332, "loss_iou": 0.609375, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 101822564, "step": 593 }, { "epoch": 0.15624383507595185, "grad_norm": 11.597792275294081, "learning_rate": 5e-06, "loss": 0.2129, "num_input_tokens_seen": 101994900, "step": 594 }, { "epoch": 0.15624383507595185, "loss": 0.17677326500415802, "loss_ce": 0.0048372335731983185, "loss_iou": 0.68359375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 101994900, "step": 594 }, { "epoch": 0.15650687183533898, "grad_norm": 7.4047027755949175, "learning_rate": 5e-06, "loss": 0.1867, "num_input_tokens_seen": 102165488, "step": 595 }, { "epoch": 0.15650687183533898, "loss": 0.1512867510318756, "loss_ce": 0.0011402517557144165, "loss_iou": 0.6015625, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 102165488, "step": 595 }, { "epoch": 0.15676990859472612, "grad_norm": 7.866562092019479, "learning_rate": 5e-06, "loss": 0.1636, "num_input_tokens_seen": 102337576, "step": 596 }, { "epoch": 0.15676990859472612, "loss": 0.11417586356401443, "loss_ce": 0.0016270325286313891, "loss_iou": 0.55078125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 102337576, "step": 596 }, { "epoch": 0.15703294535411325, "grad_norm": 14.250796858508032, "learning_rate": 5e-06, "loss": 0.2038, "num_input_tokens_seen": 102509976, "step": 597 }, { "epoch": 0.15703294535411325, "loss": 0.20375049114227295, "loss_ce": 0.003677244298160076, "loss_iou": 0.53125, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 102509976, "step": 597 }, { "epoch": 0.15729598211350035, "grad_norm": 5.485478227776666, "learning_rate": 5e-06, "loss": 0.1839, "num_input_tokens_seen": 102682292, "step": 598 }, { "epoch": 0.15729598211350035, "loss": 0.1260145604610443, "loss_ce": 0.002143711317330599, "loss_iou": 0.65625, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 102682292, "step": 598 }, { "epoch": 0.15755901887288748, "grad_norm": 16.90449659888107, "learning_rate": 5e-06, "loss": 0.1869, "num_input_tokens_seen": 102854396, "step": 599 }, { "epoch": 0.15755901887288748, "loss": 0.18277683854103088, "loss_ce": 0.002356911078095436, "loss_iou": 0.46484375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 102854396, "step": 599 }, { "epoch": 0.1578220556322746, "grad_norm": 8.022384772643738, "learning_rate": 5e-06, "loss": 0.2261, "num_input_tokens_seen": 103024668, "step": 600 }, { "epoch": 0.1578220556322746, "loss": 0.2725946605205536, "loss_ce": 0.0058710225857794285, "loss_iou": 0.63671875, "loss_num": 0.05322265625, "loss_xval": 0.267578125, "num_input_tokens_seen": 103024668, "step": 600 }, { "epoch": 0.15808509239166174, "grad_norm": 6.502303135299271, "learning_rate": 5e-06, "loss": 0.2163, "num_input_tokens_seen": 103195160, "step": 601 }, { "epoch": 0.15808509239166174, "loss": 0.16184811294078827, "loss_ce": 0.003950160928070545, "loss_iou": 0.6015625, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 103195160, "step": 601 }, { "epoch": 0.15834812915104887, "grad_norm": 11.6613104395809, "learning_rate": 5e-06, "loss": 0.1783, "num_input_tokens_seen": 103365588, "step": 602 }, { "epoch": 0.15834812915104887, "loss": 0.27217578887939453, "loss_ce": 0.006367700640112162, "loss_iou": 0.62109375, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 103365588, "step": 602 }, { "epoch": 0.15861116591043598, "grad_norm": 6.846414395262611, "learning_rate": 5e-06, "loss": 0.2057, "num_input_tokens_seen": 103537444, "step": 603 }, { "epoch": 0.15861116591043598, "loss": 0.24833053350448608, "loss_ce": 0.004678180906921625, "loss_iou": 0.625, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 103537444, "step": 603 }, { "epoch": 0.1588742026698231, "grad_norm": 5.663069385724008, "learning_rate": 5e-06, "loss": 0.1744, "num_input_tokens_seen": 103709932, "step": 604 }, { "epoch": 0.1588742026698231, "loss": 0.19428852200508118, "loss_ce": 0.000929144793190062, "loss_iou": 0.51953125, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 103709932, "step": 604 }, { "epoch": 0.15913723942921024, "grad_norm": 6.358230492219544, "learning_rate": 5e-06, "loss": 0.1712, "num_input_tokens_seen": 103882084, "step": 605 }, { "epoch": 0.15913723942921024, "loss": 0.19055569171905518, "loss_ce": 0.005802282597869635, "loss_iou": 0.51953125, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 103882084, "step": 605 }, { "epoch": 0.15940027618859737, "grad_norm": 8.229813023952058, "learning_rate": 5e-06, "loss": 0.194, "num_input_tokens_seen": 104054236, "step": 606 }, { "epoch": 0.15940027618859737, "loss": 0.14631760120391846, "loss_ce": 0.0008098002290353179, "loss_iou": 0.45703125, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 104054236, "step": 606 }, { "epoch": 0.15966331294798447, "grad_norm": 8.649522876210598, "learning_rate": 5e-06, "loss": 0.1982, "num_input_tokens_seen": 104224580, "step": 607 }, { "epoch": 0.15966331294798447, "loss": 0.2813430428504944, "loss_ce": 0.0039382753893733025, "loss_iou": 0.51171875, "loss_num": 0.0556640625, "loss_xval": 0.27734375, "num_input_tokens_seen": 104224580, "step": 607 }, { "epoch": 0.1599263497073716, "grad_norm": 9.061064934100147, "learning_rate": 5e-06, "loss": 0.2389, "num_input_tokens_seen": 104396656, "step": 608 }, { "epoch": 0.1599263497073716, "loss": 0.25585615634918213, "loss_ce": 0.005581488832831383, "loss_iou": 0.3515625, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 104396656, "step": 608 }, { "epoch": 0.16018938646675873, "grad_norm": 10.782439869738488, "learning_rate": 5e-06, "loss": 0.212, "num_input_tokens_seen": 104568804, "step": 609 }, { "epoch": 0.16018938646675873, "loss": 0.22171396017074585, "loss_ce": 0.0010718655539676547, "loss_iou": 0.6640625, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 104568804, "step": 609 }, { "epoch": 0.16045242322614586, "grad_norm": 7.912260836349583, "learning_rate": 5e-06, "loss": 0.2335, "num_input_tokens_seen": 104740976, "step": 610 }, { "epoch": 0.16045242322614586, "loss": 0.20210719108581543, "loss_ce": 0.002094991272315383, "loss_iou": 0.60546875, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 104740976, "step": 610 }, { "epoch": 0.16071545998553297, "grad_norm": 7.072100604468161, "learning_rate": 5e-06, "loss": 0.1877, "num_input_tokens_seen": 104913120, "step": 611 }, { "epoch": 0.16071545998553297, "loss": 0.24669376015663147, "loss_ce": 0.005971122998744249, "loss_iou": 0.451171875, "loss_num": 0.048095703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 104913120, "step": 611 }, { "epoch": 0.1609784967449201, "grad_norm": 6.775986925165555, "learning_rate": 5e-06, "loss": 0.2033, "num_input_tokens_seen": 105085268, "step": 612 }, { "epoch": 0.1609784967449201, "loss": 0.17514100670814514, "loss_ce": 0.005036028102040291, "loss_iou": 0.63671875, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 105085268, "step": 612 }, { "epoch": 0.16124153350430723, "grad_norm": 13.436082189650097, "learning_rate": 5e-06, "loss": 0.1878, "num_input_tokens_seen": 105255536, "step": 613 }, { "epoch": 0.16124153350430723, "loss": 0.1662948876619339, "loss_ce": 0.004856906831264496, "loss_iou": 0.56640625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 105255536, "step": 613 }, { "epoch": 0.16150457026369436, "grad_norm": 15.212823710443937, "learning_rate": 5e-06, "loss": 0.1571, "num_input_tokens_seen": 105426276, "step": 614 }, { "epoch": 0.16150457026369436, "loss": 0.21024103462696075, "loss_ce": 0.000707346829585731, "loss_iou": 0.349609375, "loss_num": 0.041748046875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 105426276, "step": 614 }, { "epoch": 0.1617676070230815, "grad_norm": 5.811817664832679, "learning_rate": 5e-06, "loss": 0.1759, "num_input_tokens_seen": 105598528, "step": 615 }, { "epoch": 0.1617676070230815, "loss": 0.17331555485725403, "loss_ce": 0.0019593401812016964, "loss_iou": 0.55078125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 105598528, "step": 615 }, { "epoch": 0.1620306437824686, "grad_norm": 7.929569482892307, "learning_rate": 5e-06, "loss": 0.1901, "num_input_tokens_seen": 105768684, "step": 616 }, { "epoch": 0.1620306437824686, "loss": 0.22191157937049866, "loss_ce": 0.0013305249158293009, "loss_iou": 0.470703125, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 105768684, "step": 616 }, { "epoch": 0.16229368054185572, "grad_norm": 9.722668759038529, "learning_rate": 5e-06, "loss": 0.238, "num_input_tokens_seen": 105938984, "step": 617 }, { "epoch": 0.16229368054185572, "loss": 0.2918306887149811, "loss_ce": 0.004660272039473057, "loss_iou": 0.76953125, "loss_num": 0.057373046875, "loss_xval": 0.287109375, "num_input_tokens_seen": 105938984, "step": 617 }, { "epoch": 0.16255671730124285, "grad_norm": 8.495376084254536, "learning_rate": 5e-06, "loss": 0.2003, "num_input_tokens_seen": 106111216, "step": 618 }, { "epoch": 0.16255671730124285, "loss": 0.13662417232990265, "loss_ce": 0.0008209550869651139, "loss_iou": 0.46875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 106111216, "step": 618 }, { "epoch": 0.16281975406062998, "grad_norm": 10.294809858036002, "learning_rate": 5e-06, "loss": 0.2276, "num_input_tokens_seen": 106283444, "step": 619 }, { "epoch": 0.16281975406062998, "loss": 0.23945499956607819, "loss_ce": 0.0016620358219370246, "loss_iou": 0.55859375, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 106283444, "step": 619 }, { "epoch": 0.1630827908200171, "grad_norm": 8.591329712303546, "learning_rate": 5e-06, "loss": 0.1832, "num_input_tokens_seen": 106455556, "step": 620 }, { "epoch": 0.1630827908200171, "loss": 0.24216794967651367, "loss_ce": 0.004680164158344269, "loss_iou": 0.703125, "loss_num": 0.04736328125, "loss_xval": 0.2373046875, "num_input_tokens_seen": 106455556, "step": 620 }, { "epoch": 0.16334582757940422, "grad_norm": 11.580746408707443, "learning_rate": 5e-06, "loss": 0.222, "num_input_tokens_seen": 106625564, "step": 621 }, { "epoch": 0.16334582757940422, "loss": 0.27062344551086426, "loss_ce": 0.0013363163452595472, "loss_iou": 0.625, "loss_num": 0.0537109375, "loss_xval": 0.26953125, "num_input_tokens_seen": 106625564, "step": 621 }, { "epoch": 0.16360886433879135, "grad_norm": 9.075656365353165, "learning_rate": 5e-06, "loss": 0.2241, "num_input_tokens_seen": 106797500, "step": 622 }, { "epoch": 0.16360886433879135, "loss": 0.25023964047431946, "loss_ce": 0.002986219245940447, "loss_iou": 0.66015625, "loss_num": 0.049560546875, "loss_xval": 0.2470703125, "num_input_tokens_seen": 106797500, "step": 622 }, { "epoch": 0.16387190109817848, "grad_norm": 7.173557460385501, "learning_rate": 5e-06, "loss": 0.1767, "num_input_tokens_seen": 106969624, "step": 623 }, { "epoch": 0.16387190109817848, "loss": 0.15543386340141296, "loss_ce": 0.004310814663767815, "loss_iou": 0.51171875, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 106969624, "step": 623 }, { "epoch": 0.16413493785756558, "grad_norm": 7.146771956956073, "learning_rate": 5e-06, "loss": 0.1912, "num_input_tokens_seen": 107139936, "step": 624 }, { "epoch": 0.16413493785756558, "loss": 0.2210107445716858, "loss_ce": 0.0020776439923793077, "loss_iou": 0.376953125, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 107139936, "step": 624 }, { "epoch": 0.1643979746169527, "grad_norm": 7.784232141951156, "learning_rate": 5e-06, "loss": 0.1852, "num_input_tokens_seen": 107312016, "step": 625 }, { "epoch": 0.1643979746169527, "loss": 0.16655078530311584, "loss_ce": 0.00200000312179327, "loss_iou": 0.5078125, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 107312016, "step": 625 }, { "epoch": 0.16466101137633984, "grad_norm": 9.666585010057217, "learning_rate": 5e-06, "loss": 0.2351, "num_input_tokens_seen": 107484484, "step": 626 }, { "epoch": 0.16466101137633984, "loss": 0.17668788135051727, "loss_ce": 0.0009676595800556242, "loss_iou": 0.70703125, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 107484484, "step": 626 }, { "epoch": 0.16492404813572697, "grad_norm": 15.599284961255806, "learning_rate": 5e-06, "loss": 0.1943, "num_input_tokens_seen": 107656148, "step": 627 }, { "epoch": 0.16492404813572697, "loss": 0.2105821818113327, "loss_ce": 0.0011095235822722316, "loss_iou": 0.5, "loss_num": 0.0419921875, "loss_xval": 0.208984375, "num_input_tokens_seen": 107656148, "step": 627 }, { "epoch": 0.1651870848951141, "grad_norm": 8.248204010218137, "learning_rate": 5e-06, "loss": 0.1732, "num_input_tokens_seen": 107828348, "step": 628 }, { "epoch": 0.1651870848951141, "loss": 0.1219111904501915, "loss_ce": 0.009545465931296349, "loss_iou": 0.609375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 107828348, "step": 628 }, { "epoch": 0.1654501216545012, "grad_norm": 10.643869540803188, "learning_rate": 5e-06, "loss": 0.191, "num_input_tokens_seen": 108000340, "step": 629 }, { "epoch": 0.1654501216545012, "loss": 0.21501825749874115, "loss_ce": 0.0015172738349065185, "loss_iou": 0.6171875, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 108000340, "step": 629 }, { "epoch": 0.16571315841388834, "grad_norm": 6.957892002207251, "learning_rate": 5e-06, "loss": 0.1723, "num_input_tokens_seen": 108172648, "step": 630 }, { "epoch": 0.16571315841388834, "loss": 0.17599767446517944, "loss_ce": 0.0010709069902077317, "loss_iou": 0.4921875, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 108172648, "step": 630 }, { "epoch": 0.16597619517327547, "grad_norm": 8.642857572938437, "learning_rate": 5e-06, "loss": 0.2342, "num_input_tokens_seen": 108343044, "step": 631 }, { "epoch": 0.16597619517327547, "loss": 0.18351054191589355, "loss_ce": 0.00296853668987751, "loss_iou": 0.69921875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 108343044, "step": 631 }, { "epoch": 0.1662392319326626, "grad_norm": 7.980653444631839, "learning_rate": 5e-06, "loss": 0.1625, "num_input_tokens_seen": 108515048, "step": 632 }, { "epoch": 0.1662392319326626, "loss": 0.1818259358406067, "loss_ce": 0.008302995935082436, "loss_iou": 0.5625, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 108515048, "step": 632 }, { "epoch": 0.1665022686920497, "grad_norm": 7.668823525125559, "learning_rate": 5e-06, "loss": 0.1603, "num_input_tokens_seen": 108687320, "step": 633 }, { "epoch": 0.1665022686920497, "loss": 0.2121184766292572, "loss_ce": 0.0024016951210796833, "loss_iou": 0.56640625, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 108687320, "step": 633 }, { "epoch": 0.16676530545143683, "grad_norm": 8.952178505337718, "learning_rate": 5e-06, "loss": 0.2244, "num_input_tokens_seen": 108859852, "step": 634 }, { "epoch": 0.16676530545143683, "loss": 0.24805203080177307, "loss_ce": 0.004399674944579601, "loss_iou": 0.59765625, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 108859852, "step": 634 }, { "epoch": 0.16702834221082397, "grad_norm": 8.269015730220213, "learning_rate": 5e-06, "loss": 0.2335, "num_input_tokens_seen": 109031824, "step": 635 }, { "epoch": 0.16702834221082397, "loss": 0.3403623104095459, "loss_ce": 0.01095556654036045, "loss_iou": 0.314453125, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 109031824, "step": 635 }, { "epoch": 0.1672913789702111, "grad_norm": 11.588758914669935, "learning_rate": 5e-06, "loss": 0.1782, "num_input_tokens_seen": 109204100, "step": 636 }, { "epoch": 0.1672913789702111, "loss": 0.25584501028060913, "loss_ce": 0.007065705489367247, "loss_iou": 0.578125, "loss_num": 0.0498046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 109204100, "step": 636 }, { "epoch": 0.1675544157295982, "grad_norm": 9.273803296838299, "learning_rate": 5e-06, "loss": 0.203, "num_input_tokens_seen": 109376120, "step": 637 }, { "epoch": 0.1675544157295982, "loss": 0.24991419911384583, "loss_ce": 0.0015011176001280546, "loss_iou": 0.64453125, "loss_num": 0.0498046875, "loss_xval": 0.248046875, "num_input_tokens_seen": 109376120, "step": 637 }, { "epoch": 0.16781745248898533, "grad_norm": 5.629607113211884, "learning_rate": 5e-06, "loss": 0.1663, "num_input_tokens_seen": 109548116, "step": 638 }, { "epoch": 0.16781745248898533, "loss": 0.22593827545642853, "loss_ce": 0.005753945559263229, "loss_iou": 0.470703125, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 109548116, "step": 638 }, { "epoch": 0.16808048924837246, "grad_norm": 10.827873954115349, "learning_rate": 5e-06, "loss": 0.154, "num_input_tokens_seen": 109720360, "step": 639 }, { "epoch": 0.16808048924837246, "loss": 0.10586154460906982, "loss_ce": 0.0011862462852150202, "loss_iou": 0.474609375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 109720360, "step": 639 }, { "epoch": 0.1683435260077596, "grad_norm": 6.979104215801615, "learning_rate": 5e-06, "loss": 0.1811, "num_input_tokens_seen": 109892700, "step": 640 }, { "epoch": 0.1683435260077596, "loss": 0.21827656030654907, "loss_ce": 0.0010524489916861057, "loss_iou": 0.62109375, "loss_num": 0.04345703125, "loss_xval": 0.216796875, "num_input_tokens_seen": 109892700, "step": 640 }, { "epoch": 0.1686065627671467, "grad_norm": 12.485157254065346, "learning_rate": 5e-06, "loss": 0.2113, "num_input_tokens_seen": 110064992, "step": 641 }, { "epoch": 0.1686065627671467, "loss": 0.26375001668930054, "loss_ce": 0.0014819505158811808, "loss_iou": 0.56640625, "loss_num": 0.052490234375, "loss_xval": 0.26171875, "num_input_tokens_seen": 110064992, "step": 641 }, { "epoch": 0.16886959952653383, "grad_norm": 9.158584740167319, "learning_rate": 5e-06, "loss": 0.1418, "num_input_tokens_seen": 110237352, "step": 642 }, { "epoch": 0.16886959952653383, "loss": 0.12861773371696472, "loss_ce": 0.006120163947343826, "loss_iou": 0.53125, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 110237352, "step": 642 }, { "epoch": 0.16913263628592096, "grad_norm": 12.50685540012725, "learning_rate": 5e-06, "loss": 0.226, "num_input_tokens_seen": 110409596, "step": 643 }, { "epoch": 0.16913263628592096, "loss": 0.29422521591186523, "loss_ce": 0.006322397850453854, "loss_iou": 0.6171875, "loss_num": 0.0576171875, "loss_xval": 0.287109375, "num_input_tokens_seen": 110409596, "step": 643 }, { "epoch": 0.1693956730453081, "grad_norm": 8.91219200815607, "learning_rate": 5e-06, "loss": 0.1801, "num_input_tokens_seen": 110581872, "step": 644 }, { "epoch": 0.1693956730453081, "loss": 0.19362246990203857, "loss_ce": 0.0011175863910466433, "loss_iou": 0.6328125, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 110581872, "step": 644 }, { "epoch": 0.16965870980469522, "grad_norm": 6.648045150838095, "learning_rate": 5e-06, "loss": 0.1817, "num_input_tokens_seen": 110754112, "step": 645 }, { "epoch": 0.16965870980469522, "loss": 0.22110968828201294, "loss_ce": 0.0007727851625531912, "loss_iou": 0.44921875, "loss_num": 0.0439453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 110754112, "step": 645 }, { "epoch": 0.16992174656408232, "grad_norm": 5.685997850783031, "learning_rate": 5e-06, "loss": 0.1755, "num_input_tokens_seen": 110926320, "step": 646 }, { "epoch": 0.16992174656408232, "loss": 0.16139136254787445, "loss_ce": 0.0022421882022172213, "loss_iou": 0.54296875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 110926320, "step": 646 }, { "epoch": 0.17018478332346945, "grad_norm": 5.619576982998229, "learning_rate": 5e-06, "loss": 0.1479, "num_input_tokens_seen": 111098480, "step": 647 }, { "epoch": 0.17018478332346945, "loss": 0.07204495370388031, "loss_ce": 0.0005727877141907811, "loss_iou": 0.484375, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 111098480, "step": 647 }, { "epoch": 0.17044782008285658, "grad_norm": 7.873961542866977, "learning_rate": 5e-06, "loss": 0.2025, "num_input_tokens_seen": 111270660, "step": 648 }, { "epoch": 0.17044782008285658, "loss": 0.2468957006931305, "loss_ce": 0.001473332871682942, "loss_iou": 0.4453125, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 111270660, "step": 648 }, { "epoch": 0.1707108568422437, "grad_norm": 10.015757077433259, "learning_rate": 5e-06, "loss": 0.1934, "num_input_tokens_seen": 111442504, "step": 649 }, { "epoch": 0.1707108568422437, "loss": 0.14880256354808807, "loss_ce": 0.0109546585008502, "loss_iou": 0.52734375, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 111442504, "step": 649 }, { "epoch": 0.17097389360163082, "grad_norm": 7.899725217893395, "learning_rate": 5e-06, "loss": 0.2213, "num_input_tokens_seen": 111614612, "step": 650 }, { "epoch": 0.17097389360163082, "loss": 0.31617793440818787, "loss_ce": 0.005386924371123314, "loss_iou": 0.39453125, "loss_num": 0.062255859375, "loss_xval": 0.310546875, "num_input_tokens_seen": 111614612, "step": 650 }, { "epoch": 0.17123693036101795, "grad_norm": 11.203783050737329, "learning_rate": 5e-06, "loss": 0.2011, "num_input_tokens_seen": 111786832, "step": 651 }, { "epoch": 0.17123693036101795, "loss": 0.16059955954551697, "loss_ce": 0.003617130685597658, "loss_iou": 0.57421875, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 111786832, "step": 651 }, { "epoch": 0.17149996712040508, "grad_norm": 10.553074580421196, "learning_rate": 5e-06, "loss": 0.1373, "num_input_tokens_seen": 111959112, "step": 652 }, { "epoch": 0.17149996712040508, "loss": 0.10537019371986389, "loss_ce": 0.0024038811679929495, "loss_iou": 0.40234375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 111959112, "step": 652 }, { "epoch": 0.1717630038797922, "grad_norm": 7.878994667708907, "learning_rate": 5e-06, "loss": 0.1709, "num_input_tokens_seen": 112129472, "step": 653 }, { "epoch": 0.1717630038797922, "loss": 0.14291326701641083, "loss_ce": 0.0017999822739511728, "loss_iou": 0.50390625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 112129472, "step": 653 }, { "epoch": 0.1720260406391793, "grad_norm": 8.502469919546648, "learning_rate": 5e-06, "loss": 0.2067, "num_input_tokens_seen": 112301764, "step": 654 }, { "epoch": 0.1720260406391793, "loss": 0.14102406799793243, "loss_ce": 0.0035118628293275833, "loss_iou": 0.640625, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 112301764, "step": 654 }, { "epoch": 0.17228907739856644, "grad_norm": 7.248102860819298, "learning_rate": 5e-06, "loss": 0.1947, "num_input_tokens_seen": 112473728, "step": 655 }, { "epoch": 0.17228907739856644, "loss": 0.3125525116920471, "loss_ce": 0.002799113281071186, "loss_iou": 0.455078125, "loss_num": 0.06201171875, "loss_xval": 0.310546875, "num_input_tokens_seen": 112473728, "step": 655 }, { "epoch": 0.17255211415795357, "grad_norm": 7.514690707459716, "learning_rate": 5e-06, "loss": 0.1795, "num_input_tokens_seen": 112646020, "step": 656 }, { "epoch": 0.17255211415795357, "loss": 0.19242502748966217, "loss_ce": 0.0032160417176783085, "loss_iou": 0.451171875, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 112646020, "step": 656 }, { "epoch": 0.1728151509173407, "grad_norm": 12.157018929724885, "learning_rate": 5e-06, "loss": 0.2024, "num_input_tokens_seen": 112818344, "step": 657 }, { "epoch": 0.1728151509173407, "loss": 0.17007869482040405, "loss_ce": 0.0014995899982750416, "loss_iou": 0.63671875, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 112818344, "step": 657 }, { "epoch": 0.17307818767672783, "grad_norm": 12.713612456980975, "learning_rate": 5e-06, "loss": 0.1854, "num_input_tokens_seen": 112984384, "step": 658 }, { "epoch": 0.17307818767672783, "loss": 0.2311791479587555, "loss_ce": 0.003090762998908758, "loss_iou": 0.69140625, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 112984384, "step": 658 }, { "epoch": 0.17334122443611494, "grad_norm": 7.9869346345765235, "learning_rate": 5e-06, "loss": 0.1714, "num_input_tokens_seen": 113156440, "step": 659 }, { "epoch": 0.17334122443611494, "loss": 0.14270631968975067, "loss_ce": 0.007757591083645821, "loss_iou": 0.59765625, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 113156440, "step": 659 }, { "epoch": 0.17360426119550207, "grad_norm": 19.43755709327552, "learning_rate": 5e-06, "loss": 0.2275, "num_input_tokens_seen": 113328684, "step": 660 }, { "epoch": 0.17360426119550207, "loss": 0.19741018116474152, "loss_ce": 0.023887230083346367, "loss_iou": 0.47265625, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 113328684, "step": 660 }, { "epoch": 0.1738672979548892, "grad_norm": 12.233621879837852, "learning_rate": 5e-06, "loss": 0.2043, "num_input_tokens_seen": 113499120, "step": 661 }, { "epoch": 0.1738672979548892, "loss": 0.17290905117988586, "loss_ce": 0.03179576247930527, "loss_iou": 0.640625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 113499120, "step": 661 }, { "epoch": 0.17413033471427633, "grad_norm": 8.034436568495584, "learning_rate": 5e-06, "loss": 0.2288, "num_input_tokens_seen": 113670976, "step": 662 }, { "epoch": 0.17413033471427633, "loss": 0.18118935823440552, "loss_ce": 0.0038822239730507135, "loss_iou": 0.5625, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 113670976, "step": 662 }, { "epoch": 0.17439337147366343, "grad_norm": 6.838416843124344, "learning_rate": 5e-06, "loss": 0.211, "num_input_tokens_seen": 113841352, "step": 663 }, { "epoch": 0.17439337147366343, "loss": 0.16460734605789185, "loss_ce": 0.0039017903618514538, "loss_iou": 0.44140625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 113841352, "step": 663 }, { "epoch": 0.17465640823305056, "grad_norm": 8.007803887726045, "learning_rate": 5e-06, "loss": 0.2191, "num_input_tokens_seen": 114013720, "step": 664 }, { "epoch": 0.17465640823305056, "loss": 0.19987425208091736, "loss_ce": 0.004256566055119038, "loss_iou": 0.3671875, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 114013720, "step": 664 }, { "epoch": 0.1749194449924377, "grad_norm": 15.416941151295424, "learning_rate": 5e-06, "loss": 0.1671, "num_input_tokens_seen": 114185964, "step": 665 }, { "epoch": 0.1749194449924377, "loss": 0.12474516034126282, "loss_ce": 0.0013320783618837595, "loss_iou": 0.53125, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 114185964, "step": 665 }, { "epoch": 0.17518248175182483, "grad_norm": 7.3780372763976105, "learning_rate": 5e-06, "loss": 0.1978, "num_input_tokens_seen": 114354848, "step": 666 }, { "epoch": 0.17518248175182483, "loss": 0.22682063281536102, "loss_ce": 0.002394365146756172, "loss_iou": 0.48828125, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 114354848, "step": 666 }, { "epoch": 0.17544551851121193, "grad_norm": 6.805100830892847, "learning_rate": 5e-06, "loss": 0.1893, "num_input_tokens_seen": 114526920, "step": 667 }, { "epoch": 0.17544551851121193, "loss": 0.16719527542591095, "loss_ce": 0.003987260162830353, "loss_iou": 0.4921875, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 114526920, "step": 667 }, { "epoch": 0.17570855527059906, "grad_norm": 13.214372039286259, "learning_rate": 5e-06, "loss": 0.1388, "num_input_tokens_seen": 114698988, "step": 668 }, { "epoch": 0.17570855527059906, "loss": 0.11197628825902939, "loss_ce": 0.0018078316934406757, "loss_iou": 0.50390625, "loss_num": 0.02197265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 114698988, "step": 668 }, { "epoch": 0.1759715920299862, "grad_norm": 11.38353136981813, "learning_rate": 5e-06, "loss": 0.1566, "num_input_tokens_seen": 114871164, "step": 669 }, { "epoch": 0.1759715920299862, "loss": 0.13217589259147644, "loss_ce": 0.0022930747363716364, "loss_iou": 0.60546875, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 114871164, "step": 669 }, { "epoch": 0.17623462878937332, "grad_norm": 6.3416006325736705, "learning_rate": 5e-06, "loss": 0.1994, "num_input_tokens_seen": 115043272, "step": 670 }, { "epoch": 0.17623462878937332, "loss": 0.2425597459077835, "loss_ce": 0.00726923206821084, "loss_iou": 0.462890625, "loss_num": 0.047119140625, "loss_xval": 0.2353515625, "num_input_tokens_seen": 115043272, "step": 670 }, { "epoch": 0.17649766554876045, "grad_norm": 7.208802786478081, "learning_rate": 5e-06, "loss": 0.1621, "num_input_tokens_seen": 115215332, "step": 671 }, { "epoch": 0.17649766554876045, "loss": 0.2342541515827179, "loss_ce": 0.006531976629048586, "loss_iou": 0.474609375, "loss_num": 0.045654296875, "loss_xval": 0.2275390625, "num_input_tokens_seen": 115215332, "step": 671 }, { "epoch": 0.17676070230814755, "grad_norm": 5.677644761077757, "learning_rate": 5e-06, "loss": 0.1422, "num_input_tokens_seen": 115387420, "step": 672 }, { "epoch": 0.17676070230814755, "loss": 0.1173291727900505, "loss_ce": 0.0031934345606714487, "loss_iou": 0.5546875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 115387420, "step": 672 }, { "epoch": 0.17702373906753469, "grad_norm": 13.804747880548247, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 115559764, "step": 673 }, { "epoch": 0.17702373906753469, "loss": 0.11581701785326004, "loss_ce": 0.0009488522773608565, "loss_iou": 0.546875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 115559764, "step": 673 }, { "epoch": 0.17728677582692182, "grad_norm": 7.7796454748869435, "learning_rate": 5e-06, "loss": 0.2202, "num_input_tokens_seen": 115731572, "step": 674 }, { "epoch": 0.17728677582692182, "loss": 0.22158196568489075, "loss_ce": 0.00228265137411654, "loss_iou": 0.53125, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 115731572, "step": 674 }, { "epoch": 0.17754981258630895, "grad_norm": 7.463691885246712, "learning_rate": 5e-06, "loss": 0.1882, "num_input_tokens_seen": 115903680, "step": 675 }, { "epoch": 0.17754981258630895, "loss": 0.22185131907463074, "loss_ce": 0.002002692548558116, "loss_iou": 0.54296875, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 115903680, "step": 675 }, { "epoch": 0.17781284934569605, "grad_norm": 8.426002860661324, "learning_rate": 5e-06, "loss": 0.1793, "num_input_tokens_seen": 116075520, "step": 676 }, { "epoch": 0.17781284934569605, "loss": 0.1606462597846985, "loss_ce": 0.0009782931301742792, "loss_iou": 0.5625, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 116075520, "step": 676 }, { "epoch": 0.17807588610508318, "grad_norm": 8.301724485985723, "learning_rate": 5e-06, "loss": 0.2, "num_input_tokens_seen": 116243620, "step": 677 }, { "epoch": 0.17807588610508318, "loss": 0.23937323689460754, "loss_ce": 0.002251650206744671, "loss_iou": 0.73828125, "loss_num": 0.047607421875, "loss_xval": 0.2373046875, "num_input_tokens_seen": 116243620, "step": 677 }, { "epoch": 0.1783389228644703, "grad_norm": 8.16746221218087, "learning_rate": 5e-06, "loss": 0.1767, "num_input_tokens_seen": 116414372, "step": 678 }, { "epoch": 0.1783389228644703, "loss": 0.2846035361289978, "loss_ce": 0.003506140550598502, "loss_iou": 0.498046875, "loss_num": 0.05615234375, "loss_xval": 0.28125, "num_input_tokens_seen": 116414372, "step": 678 }, { "epoch": 0.17860195962385744, "grad_norm": 11.233813688882323, "learning_rate": 5e-06, "loss": 0.2052, "num_input_tokens_seen": 116586316, "step": 679 }, { "epoch": 0.17860195962385744, "loss": 0.19775235652923584, "loss_ce": 0.0026840060018002987, "loss_iou": 0.482421875, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 116586316, "step": 679 }, { "epoch": 0.17886499638324455, "grad_norm": 7.134240923845786, "learning_rate": 5e-06, "loss": 0.1528, "num_input_tokens_seen": 116758540, "step": 680 }, { "epoch": 0.17886499638324455, "loss": 0.17349669337272644, "loss_ce": 0.0008892616024240851, "loss_iou": 0.6953125, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 116758540, "step": 680 }, { "epoch": 0.17912803314263168, "grad_norm": 8.457982820816902, "learning_rate": 5e-06, "loss": 0.2133, "num_input_tokens_seen": 116930944, "step": 681 }, { "epoch": 0.17912803314263168, "loss": 0.2098885327577591, "loss_ce": 0.0027962373569607735, "loss_iou": 0.6796875, "loss_num": 0.04150390625, "loss_xval": 0.20703125, "num_input_tokens_seen": 116930944, "step": 681 }, { "epoch": 0.1793910699020188, "grad_norm": 8.07197838986281, "learning_rate": 5e-06, "loss": 0.2051, "num_input_tokens_seen": 117102756, "step": 682 }, { "epoch": 0.1793910699020188, "loss": 0.18535807728767395, "loss_ce": 0.003778480924665928, "loss_iou": 0.578125, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 117102756, "step": 682 }, { "epoch": 0.17965410666140594, "grad_norm": 7.799168173540665, "learning_rate": 5e-06, "loss": 0.2208, "num_input_tokens_seen": 117274704, "step": 683 }, { "epoch": 0.17965410666140594, "loss": 0.22785347700119019, "loss_ce": 0.0029999692924320698, "loss_iou": 0.3125, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 117274704, "step": 683 }, { "epoch": 0.17991714342079307, "grad_norm": 6.357635509161791, "learning_rate": 5e-06, "loss": 0.1508, "num_input_tokens_seen": 117446896, "step": 684 }, { "epoch": 0.17991714342079307, "loss": 0.2038782835006714, "loss_ce": 0.010396835394203663, "loss_iou": 0.50390625, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 117446896, "step": 684 }, { "epoch": 0.18018018018018017, "grad_norm": 7.961998959961417, "learning_rate": 5e-06, "loss": 0.1759, "num_input_tokens_seen": 117618944, "step": 685 }, { "epoch": 0.18018018018018017, "loss": 0.18519067764282227, "loss_ce": 0.0017190101789310575, "loss_iou": 0.54296875, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 117618944, "step": 685 }, { "epoch": 0.1804432169395673, "grad_norm": 9.08254127907995, "learning_rate": 5e-06, "loss": 0.1885, "num_input_tokens_seen": 117791316, "step": 686 }, { "epoch": 0.1804432169395673, "loss": 0.20686465501785278, "loss_ce": 0.0025189572479575872, "loss_iou": 0.6796875, "loss_num": 0.041015625, "loss_xval": 0.2041015625, "num_input_tokens_seen": 117791316, "step": 686 }, { "epoch": 0.18070625369895443, "grad_norm": 10.367656958651155, "learning_rate": 5e-06, "loss": 0.1855, "num_input_tokens_seen": 117963308, "step": 687 }, { "epoch": 0.18070625369895443, "loss": 0.13359057903289795, "loss_ce": 0.0041044931858778, "loss_iou": 0.6953125, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 117963308, "step": 687 }, { "epoch": 0.18096929045834156, "grad_norm": 7.549954240364692, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 118135556, "step": 688 }, { "epoch": 0.18096929045834156, "loss": 0.14778929948806763, "loss_ce": 0.0011828583665192127, "loss_iou": 0.494140625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 118135556, "step": 688 }, { "epoch": 0.18123232721772867, "grad_norm": 9.741300529319401, "learning_rate": 5e-06, "loss": 0.166, "num_input_tokens_seen": 118305884, "step": 689 }, { "epoch": 0.18123232721772867, "loss": 0.1753019541501999, "loss_ce": 0.003182805608958006, "loss_iou": 0.470703125, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 118305884, "step": 689 }, { "epoch": 0.1814953639771158, "grad_norm": 11.31734919561229, "learning_rate": 5e-06, "loss": 0.1811, "num_input_tokens_seen": 118478000, "step": 690 }, { "epoch": 0.1814953639771158, "loss": 0.189756840467453, "loss_ce": 0.0007919906638562679, "loss_iou": 0.77734375, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 118478000, "step": 690 }, { "epoch": 0.18175840073650293, "grad_norm": 9.003341647966222, "learning_rate": 5e-06, "loss": 0.1755, "num_input_tokens_seen": 118650436, "step": 691 }, { "epoch": 0.18175840073650293, "loss": 0.11313121020793915, "loss_ce": 0.0006434204406104982, "loss_iou": 0.53515625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 118650436, "step": 691 }, { "epoch": 0.18202143749589006, "grad_norm": 15.98200660549263, "learning_rate": 5e-06, "loss": 0.1717, "num_input_tokens_seen": 118822416, "step": 692 }, { "epoch": 0.18202143749589006, "loss": 0.14253322780132294, "loss_ce": 0.001908229780383408, "loss_iou": 0.453125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 118822416, "step": 692 }, { "epoch": 0.18228447425527716, "grad_norm": 5.776705438700134, "learning_rate": 5e-06, "loss": 0.1588, "num_input_tokens_seen": 118994492, "step": 693 }, { "epoch": 0.18228447425527716, "loss": 0.15580043196678162, "loss_ce": 0.0014730504481121898, "loss_iou": 0.40625, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 118994492, "step": 693 }, { "epoch": 0.1825475110146643, "grad_norm": 13.97214493017941, "learning_rate": 5e-06, "loss": 0.1825, "num_input_tokens_seen": 119166828, "step": 694 }, { "epoch": 0.1825475110146643, "loss": 0.14117339253425598, "loss_ce": 0.005309135653078556, "loss_iou": 0.6171875, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 119166828, "step": 694 }, { "epoch": 0.18281054777405142, "grad_norm": 9.340996582360178, "learning_rate": 5e-06, "loss": 0.2039, "num_input_tokens_seen": 119339040, "step": 695 }, { "epoch": 0.18281054777405142, "loss": 0.21793845295906067, "loss_ce": 0.005658184178173542, "loss_iou": 0.4921875, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 119339040, "step": 695 }, { "epoch": 0.18307358453343855, "grad_norm": 6.679514738557093, "learning_rate": 5e-06, "loss": 0.1756, "num_input_tokens_seen": 119511024, "step": 696 }, { "epoch": 0.18307358453343855, "loss": 0.18582630157470703, "loss_ce": 0.0012559981551021338, "loss_iou": 0.5546875, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 119511024, "step": 696 }, { "epoch": 0.18333662129282569, "grad_norm": 5.326914153992354, "learning_rate": 5e-06, "loss": 0.143, "num_input_tokens_seen": 119683564, "step": 697 }, { "epoch": 0.18333662129282569, "loss": 0.15467330813407898, "loss_ce": 0.0042826831340789795, "loss_iou": 0.5703125, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 119683564, "step": 697 }, { "epoch": 0.1835996580522128, "grad_norm": 8.515559517433303, "learning_rate": 5e-06, "loss": 0.1103, "num_input_tokens_seen": 119856000, "step": 698 }, { "epoch": 0.1835996580522128, "loss": 0.12481513619422913, "loss_ce": 0.0006696260534226894, "loss_iou": 0.6796875, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 119856000, "step": 698 }, { "epoch": 0.18386269481159992, "grad_norm": 9.420262409376758, "learning_rate": 5e-06, "loss": 0.2191, "num_input_tokens_seen": 120028676, "step": 699 }, { "epoch": 0.18386269481159992, "loss": 0.22470733523368835, "loss_ce": 0.001868000952526927, "loss_iou": 0.6875, "loss_num": 0.044677734375, "loss_xval": 0.22265625, "num_input_tokens_seen": 120028676, "step": 699 }, { "epoch": 0.18412573157098705, "grad_norm": 7.530169678535974, "learning_rate": 5e-06, "loss": 0.1624, "num_input_tokens_seen": 120201208, "step": 700 }, { "epoch": 0.18412573157098705, "loss": 0.10793297737836838, "loss_ce": 0.0036849307361990213, "loss_iou": 0.58203125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 120201208, "step": 700 }, { "epoch": 0.18438876833037418, "grad_norm": 8.205415154202601, "learning_rate": 5e-06, "loss": 0.1795, "num_input_tokens_seen": 120373444, "step": 701 }, { "epoch": 0.18438876833037418, "loss": 0.1353437304496765, "loss_ce": 0.001066376455128193, "loss_iou": 0.5390625, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 120373444, "step": 701 }, { "epoch": 0.18465180508976128, "grad_norm": 8.319297966322562, "learning_rate": 5e-06, "loss": 0.1785, "num_input_tokens_seen": 120545552, "step": 702 }, { "epoch": 0.18465180508976128, "loss": 0.18953613936901093, "loss_ce": 0.0019751053769141436, "loss_iou": 0.69140625, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 120545552, "step": 702 }, { "epoch": 0.18491484184914841, "grad_norm": 8.464289102867136, "learning_rate": 5e-06, "loss": 0.219, "num_input_tokens_seen": 120717584, "step": 703 }, { "epoch": 0.18491484184914841, "loss": 0.24215811491012573, "loss_ce": 0.004975516349077225, "loss_iou": 0.33203125, "loss_num": 0.047607421875, "loss_xval": 0.2373046875, "num_input_tokens_seen": 120717584, "step": 703 }, { "epoch": 0.18517787860853555, "grad_norm": 12.962080293446185, "learning_rate": 5e-06, "loss": 0.2162, "num_input_tokens_seen": 120889756, "step": 704 }, { "epoch": 0.18517787860853555, "loss": 0.17355692386627197, "loss_ce": 0.0025364109314978123, "loss_iou": 0.7109375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 120889756, "step": 704 }, { "epoch": 0.18544091536792268, "grad_norm": 13.968822655265907, "learning_rate": 5e-06, "loss": 0.2237, "num_input_tokens_seen": 121062200, "step": 705 }, { "epoch": 0.18544091536792268, "loss": 0.2510673403739929, "loss_ce": 0.005400843918323517, "loss_iou": 0.3984375, "loss_num": 0.049072265625, "loss_xval": 0.24609375, "num_input_tokens_seen": 121062200, "step": 705 }, { "epoch": 0.18570395212730978, "grad_norm": 15.418397137980255, "learning_rate": 5e-06, "loss": 0.2126, "num_input_tokens_seen": 121234300, "step": 706 }, { "epoch": 0.18570395212730978, "loss": 0.239446759223938, "loss_ce": 0.0006772410124540329, "loss_iou": 0.55078125, "loss_num": 0.0478515625, "loss_xval": 0.23828125, "num_input_tokens_seen": 121234300, "step": 706 }, { "epoch": 0.1859669888866969, "grad_norm": 4.8632608475065355, "learning_rate": 5e-06, "loss": 0.1269, "num_input_tokens_seen": 121406408, "step": 707 }, { "epoch": 0.1859669888866969, "loss": 0.10976609587669373, "loss_ce": 0.003137679770588875, "loss_iou": 0.50390625, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 121406408, "step": 707 }, { "epoch": 0.18623002564608404, "grad_norm": 9.856831060146035, "learning_rate": 5e-06, "loss": 0.1699, "num_input_tokens_seen": 121578364, "step": 708 }, { "epoch": 0.18623002564608404, "loss": 0.06338398158550262, "loss_ce": 0.0025319333653897047, "loss_iou": 0.4765625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 121578364, "step": 708 }, { "epoch": 0.18649306240547117, "grad_norm": 12.596787814338684, "learning_rate": 5e-06, "loss": 0.1839, "num_input_tokens_seen": 121750612, "step": 709 }, { "epoch": 0.18649306240547117, "loss": 0.2992492616176605, "loss_ce": 0.0012756290379911661, "loss_iou": 0.5234375, "loss_num": 0.0595703125, "loss_xval": 0.298828125, "num_input_tokens_seen": 121750612, "step": 709 }, { "epoch": 0.1867560991648583, "grad_norm": 7.147545819350773, "learning_rate": 5e-06, "loss": 0.1776, "num_input_tokens_seen": 121922804, "step": 710 }, { "epoch": 0.1867560991648583, "loss": 0.18950411677360535, "loss_ce": 0.0032858517952263355, "loss_iou": 0.609375, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 121922804, "step": 710 }, { "epoch": 0.1870191359242454, "grad_norm": 22.63136110983146, "learning_rate": 5e-06, "loss": 0.1331, "num_input_tokens_seen": 122094924, "step": 711 }, { "epoch": 0.1870191359242454, "loss": 0.12849926948547363, "loss_ce": 0.0014851100277155638, "loss_iou": 0.625, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 122094924, "step": 711 }, { "epoch": 0.18728217268363254, "grad_norm": 6.766417510968721, "learning_rate": 5e-06, "loss": 0.1724, "num_input_tokens_seen": 122265668, "step": 712 }, { "epoch": 0.18728217268363254, "loss": 0.15207967162132263, "loss_ce": 0.006968589033931494, "loss_iou": 0.58984375, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 122265668, "step": 712 }, { "epoch": 0.18754520944301967, "grad_norm": 7.923158875388113, "learning_rate": 5e-06, "loss": 0.1904, "num_input_tokens_seen": 122437796, "step": 713 }, { "epoch": 0.18754520944301967, "loss": 0.20568042993545532, "loss_ce": 0.002189208287745714, "loss_iou": 0.5703125, "loss_num": 0.040771484375, "loss_xval": 0.203125, "num_input_tokens_seen": 122437796, "step": 713 }, { "epoch": 0.1878082462024068, "grad_norm": 8.667331104283747, "learning_rate": 5e-06, "loss": 0.1887, "num_input_tokens_seen": 122610336, "step": 714 }, { "epoch": 0.1878082462024068, "loss": 0.1953202784061432, "loss_ce": 0.0012895169202238321, "loss_iou": 0.287109375, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 122610336, "step": 714 }, { "epoch": 0.1880712829617939, "grad_norm": 12.96410805672465, "learning_rate": 5e-06, "loss": 0.2143, "num_input_tokens_seen": 122779392, "step": 715 }, { "epoch": 0.1880712829617939, "loss": 0.21114467084407806, "loss_ce": 0.001916159177199006, "loss_iou": 0.41015625, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 122779392, "step": 715 }, { "epoch": 0.18833431972118103, "grad_norm": 6.980680552365313, "learning_rate": 5e-06, "loss": 0.1831, "num_input_tokens_seen": 122951724, "step": 716 }, { "epoch": 0.18833431972118103, "loss": 0.24306175112724304, "loss_ce": 0.006245340220630169, "loss_iou": 0.3359375, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 122951724, "step": 716 }, { "epoch": 0.18859735648056816, "grad_norm": 12.855577653554155, "learning_rate": 5e-06, "loss": 0.1591, "num_input_tokens_seen": 123122088, "step": 717 }, { "epoch": 0.18859735648056816, "loss": 0.1986149549484253, "loss_ce": 0.0012882874580100179, "loss_iou": 0.51953125, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 123122088, "step": 717 }, { "epoch": 0.1888603932399553, "grad_norm": 6.930918072961039, "learning_rate": 5e-06, "loss": 0.1899, "num_input_tokens_seen": 123294160, "step": 718 }, { "epoch": 0.1888603932399553, "loss": 0.19642534852027893, "loss_ce": 0.0014790646964684129, "loss_iou": 0.40625, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 123294160, "step": 718 }, { "epoch": 0.1891234299993424, "grad_norm": 6.929009893187008, "learning_rate": 5e-06, "loss": 0.1532, "num_input_tokens_seen": 123466060, "step": 719 }, { "epoch": 0.1891234299993424, "loss": 0.1527690589427948, "loss_ce": 0.005094507243484259, "loss_iou": 0.40234375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 123466060, "step": 719 }, { "epoch": 0.18938646675872953, "grad_norm": 10.25908293873023, "learning_rate": 5e-06, "loss": 0.1644, "num_input_tokens_seen": 123636388, "step": 720 }, { "epoch": 0.18938646675872953, "loss": 0.18421480059623718, "loss_ce": 0.0032455746550112963, "loss_iou": 0.60546875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 123636388, "step": 720 }, { "epoch": 0.18964950351811666, "grad_norm": 7.318122077183922, "learning_rate": 5e-06, "loss": 0.1458, "num_input_tokens_seen": 123808316, "step": 721 }, { "epoch": 0.18964950351811666, "loss": 0.1352817267179489, "loss_ce": 0.0030185491777956486, "loss_iou": 0.34765625, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 123808316, "step": 721 }, { "epoch": 0.1899125402775038, "grad_norm": 7.382171237495957, "learning_rate": 5e-06, "loss": 0.2283, "num_input_tokens_seen": 123980672, "step": 722 }, { "epoch": 0.1899125402775038, "loss": 0.15074753761291504, "loss_ce": 0.0036528080236166716, "loss_iou": 0.6640625, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 123980672, "step": 722 }, { "epoch": 0.19017557703689092, "grad_norm": 5.982120923855094, "learning_rate": 5e-06, "loss": 0.1499, "num_input_tokens_seen": 124150936, "step": 723 }, { "epoch": 0.19017557703689092, "loss": 0.10008575022220612, "loss_ce": 0.0014224194455891848, "loss_iou": 0.48828125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 124150936, "step": 723 }, { "epoch": 0.19043861379627802, "grad_norm": 8.136060280729641, "learning_rate": 5e-06, "loss": 0.1378, "num_input_tokens_seen": 124323280, "step": 724 }, { "epoch": 0.19043861379627802, "loss": 0.12422403693199158, "loss_ce": 0.0017264705384150147, "loss_iou": 0.69140625, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 124323280, "step": 724 }, { "epoch": 0.19070165055566515, "grad_norm": 8.724019912821047, "learning_rate": 5e-06, "loss": 0.2613, "num_input_tokens_seen": 124495636, "step": 725 }, { "epoch": 0.19070165055566515, "loss": 0.13688622415065765, "loss_ce": 0.0040737236849963665, "loss_iou": 0.57421875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 124495636, "step": 725 }, { "epoch": 0.19096468731505228, "grad_norm": 7.423558318920191, "learning_rate": 5e-06, "loss": 0.2051, "num_input_tokens_seen": 124667644, "step": 726 }, { "epoch": 0.19096468731505228, "loss": 0.2597463130950928, "loss_ce": 0.0010182850528508425, "loss_iou": 0.416015625, "loss_num": 0.0517578125, "loss_xval": 0.2578125, "num_input_tokens_seen": 124667644, "step": 726 }, { "epoch": 0.19122772407443941, "grad_norm": 7.827506457517143, "learning_rate": 5e-06, "loss": 0.1586, "num_input_tokens_seen": 124839252, "step": 727 }, { "epoch": 0.19122772407443941, "loss": 0.1310674101114273, "loss_ce": 0.003076688153669238, "loss_iou": 0.40234375, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 124839252, "step": 727 }, { "epoch": 0.19149076083382652, "grad_norm": 9.183421040665964, "learning_rate": 5e-06, "loss": 0.1576, "num_input_tokens_seen": 125011496, "step": 728 }, { "epoch": 0.19149076083382652, "loss": 0.11314553767442703, "loss_ce": 0.0003525639185681939, "loss_iou": 0.625, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 125011496, "step": 728 }, { "epoch": 0.19175379759321365, "grad_norm": 10.185706416909431, "learning_rate": 5e-06, "loss": 0.2047, "num_input_tokens_seen": 125179336, "step": 729 }, { "epoch": 0.19175379759321365, "loss": 0.1552383303642273, "loss_ce": 0.005152884405106306, "loss_iou": 0.6015625, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 125179336, "step": 729 }, { "epoch": 0.19201683435260078, "grad_norm": 8.517948232569886, "learning_rate": 5e-06, "loss": 0.1789, "num_input_tokens_seen": 125351404, "step": 730 }, { "epoch": 0.19201683435260078, "loss": 0.23154987394809723, "loss_ce": 0.0012031885562464595, "loss_iou": 0.50390625, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 125351404, "step": 730 }, { "epoch": 0.1922798711119879, "grad_norm": 8.61986864690339, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 125523692, "step": 731 }, { "epoch": 0.1922798711119879, "loss": 0.14681357145309448, "loss_ce": 0.0021907794289290905, "loss_iou": 0.357421875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 125523692, "step": 731 }, { "epoch": 0.192542907871375, "grad_norm": 5.859065568818718, "learning_rate": 5e-06, "loss": 0.178, "num_input_tokens_seen": 125693908, "step": 732 }, { "epoch": 0.192542907871375, "loss": 0.15787754952907562, "loss_ce": 0.0012613451108336449, "loss_iou": 0.46875, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 125693908, "step": 732 }, { "epoch": 0.19280594463076214, "grad_norm": 10.469269184120652, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 125865868, "step": 733 }, { "epoch": 0.19280594463076214, "loss": 0.13217297196388245, "loss_ce": 0.0014051578473299742, "loss_iou": 0.578125, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 125865868, "step": 733 }, { "epoch": 0.19306898139014927, "grad_norm": 9.209135916468261, "learning_rate": 5e-06, "loss": 0.1745, "num_input_tokens_seen": 126037852, "step": 734 }, { "epoch": 0.19306898139014927, "loss": 0.17599225044250488, "loss_ce": 0.0004551436868496239, "loss_iou": 0.65625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 126037852, "step": 734 }, { "epoch": 0.1933320181495364, "grad_norm": 8.89690790064976, "learning_rate": 5e-06, "loss": 0.2197, "num_input_tokens_seen": 126209984, "step": 735 }, { "epoch": 0.1933320181495364, "loss": 0.1378391683101654, "loss_ce": 0.0020359433256089687, "loss_iou": 0.6875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 126209984, "step": 735 }, { "epoch": 0.1935950549089235, "grad_norm": 9.250244451235272, "learning_rate": 5e-06, "loss": 0.1999, "num_input_tokens_seen": 126382140, "step": 736 }, { "epoch": 0.1935950549089235, "loss": 0.24104052782058716, "loss_ce": 0.0028813518583774567, "loss_iou": 0.671875, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 126382140, "step": 736 }, { "epoch": 0.19385809166831064, "grad_norm": 6.538014726838841, "learning_rate": 5e-06, "loss": 0.1574, "num_input_tokens_seen": 126554124, "step": 737 }, { "epoch": 0.19385809166831064, "loss": 0.19325746595859528, "loss_ce": 0.0034991574939340353, "loss_iou": 0.48046875, "loss_num": 0.0380859375, "loss_xval": 0.189453125, "num_input_tokens_seen": 126554124, "step": 737 }, { "epoch": 0.19412112842769777, "grad_norm": 6.359733816292311, "learning_rate": 5e-06, "loss": 0.1727, "num_input_tokens_seen": 126726352, "step": 738 }, { "epoch": 0.19412112842769777, "loss": 0.1890466809272766, "loss_ce": 0.003866007784381509, "loss_iou": 0.7109375, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 126726352, "step": 738 }, { "epoch": 0.1943841651870849, "grad_norm": 6.737716233597974, "learning_rate": 5e-06, "loss": 0.168, "num_input_tokens_seen": 126898504, "step": 739 }, { "epoch": 0.1943841651870849, "loss": 0.22711391746997833, "loss_ce": 0.0020162612199783325, "loss_iou": 0.6640625, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 126898504, "step": 739 }, { "epoch": 0.19464720194647203, "grad_norm": 10.63587672601612, "learning_rate": 5e-06, "loss": 0.2406, "num_input_tokens_seen": 127070440, "step": 740 }, { "epoch": 0.19464720194647203, "loss": 0.2545185983181, "loss_ce": 0.0035420400090515614, "loss_iou": 0.53125, "loss_num": 0.05029296875, "loss_xval": 0.25, "num_input_tokens_seen": 127070440, "step": 740 }, { "epoch": 0.19491023870585913, "grad_norm": 10.573425928841477, "learning_rate": 5e-06, "loss": 0.1461, "num_input_tokens_seen": 127242884, "step": 741 }, { "epoch": 0.19491023870585913, "loss": 0.07598280906677246, "loss_ce": 0.0007874930743128061, "loss_iou": 0.5546875, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 127242884, "step": 741 }, { "epoch": 0.19517327546524627, "grad_norm": 5.624166188859615, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 127413548, "step": 742 }, { "epoch": 0.19517327546524627, "loss": 0.12612518668174744, "loss_ce": 0.0008200095035135746, "loss_iou": 0.447265625, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 127413548, "step": 742 }, { "epoch": 0.1954363122246334, "grad_norm": 5.416712373816477, "learning_rate": 5e-06, "loss": 0.1653, "num_input_tokens_seen": 127583504, "step": 743 }, { "epoch": 0.1954363122246334, "loss": 0.14368192851543427, "loss_ce": 0.003209515009075403, "loss_iou": 0.474609375, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 127583504, "step": 743 }, { "epoch": 0.19569934898402053, "grad_norm": 7.55903022235747, "learning_rate": 5e-06, "loss": 0.1741, "num_input_tokens_seen": 127753924, "step": 744 }, { "epoch": 0.19569934898402053, "loss": 0.26765167713165283, "loss_ce": 0.009717106819152832, "loss_iou": 0.451171875, "loss_num": 0.051513671875, "loss_xval": 0.2578125, "num_input_tokens_seen": 127753924, "step": 744 }, { "epoch": 0.19596238574340763, "grad_norm": 6.325854098350077, "learning_rate": 5e-06, "loss": 0.1732, "num_input_tokens_seen": 127925840, "step": 745 }, { "epoch": 0.19596238574340763, "loss": 0.13880833983421326, "loss_ce": 0.0011435477063059807, "loss_iou": 0.50390625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 127925840, "step": 745 }, { "epoch": 0.19622542250279476, "grad_norm": 7.469182967125795, "learning_rate": 5e-06, "loss": 0.1661, "num_input_tokens_seen": 128098212, "step": 746 }, { "epoch": 0.19622542250279476, "loss": 0.22049343585968018, "loss_ce": 0.002109651220962405, "loss_iou": 0.61328125, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 128098212, "step": 746 }, { "epoch": 0.1964884592621819, "grad_norm": 8.506872378366532, "learning_rate": 5e-06, "loss": 0.1859, "num_input_tokens_seen": 128270372, "step": 747 }, { "epoch": 0.1964884592621819, "loss": 0.1590556651353836, "loss_ce": 0.0010356476996093988, "loss_iou": 0.515625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 128270372, "step": 747 }, { "epoch": 0.19675149602156902, "grad_norm": 7.2518370649313075, "learning_rate": 5e-06, "loss": 0.1966, "num_input_tokens_seen": 128439832, "step": 748 }, { "epoch": 0.19675149602156902, "loss": 0.16326385736465454, "loss_ce": 0.0021310443989932537, "loss_iou": 0.55859375, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 128439832, "step": 748 }, { "epoch": 0.19701453278095613, "grad_norm": 9.929926428237398, "learning_rate": 5e-06, "loss": 0.1635, "num_input_tokens_seen": 128612032, "step": 749 }, { "epoch": 0.19701453278095613, "loss": 0.09666653722524643, "loss_ce": 0.002489290665835142, "loss_iou": 0.71484375, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 128612032, "step": 749 }, { "epoch": 0.19727756954034326, "grad_norm": 8.129979685164507, "learning_rate": 5e-06, "loss": 0.144, "num_input_tokens_seen": 128784236, "step": 750 }, { "epoch": 0.19727756954034326, "eval_websight_new_CIoU": 0.7427884042263031, "eval_websight_new_GIoU": 0.7464376986026764, "eval_websight_new_IoU": 0.7504112422466278, "eval_websight_new_MAE_all": 0.03649342246353626, "eval_websight_new_MAE_h": 0.03233582433313131, "eval_websight_new_MAE_w": 0.056174855679273605, "eval_websight_new_MAE_x": 0.046134982258081436, "eval_websight_new_MAE_y": 0.01132803549990058, "eval_websight_new_NUM_probability": 0.9972327351570129, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.19190411269664764, "eval_websight_new_loss_ce": 0.00034623414103407413, "eval_websight_new_loss_iou": 0.777099609375, "eval_websight_new_loss_num": 0.03619384765625, "eval_websight_new_loss_xval": 0.18096923828125, "eval_websight_new_runtime": 55.1355, "eval_websight_new_samples_per_second": 0.907, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 128784236, "step": 750 }, { "epoch": 0.19727756954034326, "eval_seeclick_CIoU": 0.4580978453159332, "eval_seeclick_GIoU": 0.44802993535995483, "eval_seeclick_IoU": 0.4842703342437744, "eval_seeclick_MAE_all": 0.05876399576663971, "eval_seeclick_MAE_h": 0.0604591965675354, "eval_seeclick_MAE_w": 0.06378005631268024, "eval_seeclick_MAE_x": 0.07332894578576088, "eval_seeclick_MAE_y": 0.03748778998851776, "eval_seeclick_NUM_probability": 0.9986195266246796, "eval_seeclick_inside_bbox": 0.9375, "eval_seeclick_loss": 0.3042093813419342, "eval_seeclick_loss_ce": 0.012256910093128681, "eval_seeclick_loss_iou": 0.6614990234375, "eval_seeclick_loss_num": 0.055084228515625, "eval_seeclick_loss_xval": 0.27545166015625, "eval_seeclick_runtime": 69.7623, "eval_seeclick_samples_per_second": 0.616, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 128784236, "step": 750 }, { "epoch": 0.19727756954034326, "eval_icons_CIoU": 0.7633987963199615, "eval_icons_GIoU": 0.7561427056789398, "eval_icons_IoU": 0.7705403864383698, "eval_icons_MAE_all": 0.029183855280280113, "eval_icons_MAE_h": 0.035788778215646744, "eval_icons_MAE_w": 0.03255164809525013, "eval_icons_MAE_x": 0.02722846996039152, "eval_icons_MAE_y": 0.02116652298718691, "eval_icons_NUM_probability": 0.9982486963272095, "eval_icons_inside_bbox": 0.9565972089767456, "eval_icons_loss": 0.13746696710586548, "eval_icons_loss_ce": 0.0027548681828193367, "eval_icons_loss_iou": 0.7633056640625, "eval_icons_loss_num": 0.02550506591796875, "eval_icons_loss_xval": 0.127532958984375, "eval_icons_runtime": 87.2396, "eval_icons_samples_per_second": 0.573, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 128784236, "step": 750 }, { "epoch": 0.19727756954034326, "eval_screenspot_CIoU": 0.4680892725785573, "eval_screenspot_GIoU": 0.45600322882334393, "eval_screenspot_IoU": 0.507049967845281, "eval_screenspot_MAE_all": 0.09560441970825195, "eval_screenspot_MAE_h": 0.08166227489709854, "eval_screenspot_MAE_w": 0.14845576385656992, "eval_screenspot_MAE_x": 0.09815465907255809, "eval_screenspot_MAE_y": 0.054144968589146934, "eval_screenspot_NUM_probability": 0.9986165563265482, "eval_screenspot_inside_bbox": 0.8558333317438761, "eval_screenspot_loss": 0.805972158908844, "eval_screenspot_loss_ce": 0.38150885701179504, "eval_screenspot_loss_iou": 0.580078125, "eval_screenspot_loss_num": 0.0841064453125, "eval_screenspot_loss_xval": 0.4202473958333333, "eval_screenspot_runtime": 148.1708, "eval_screenspot_samples_per_second": 0.601, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 128784236, "step": 750 }, { "epoch": 0.19727756954034326, "loss": 0.7833826541900635, "loss_ce": 0.36297255754470825, "loss_iou": 0.41015625, "loss_num": 0.083984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 128784236, "step": 750 }, { "epoch": 0.1975406062997304, "grad_norm": 6.107960207443286, "learning_rate": 5e-06, "loss": 0.2391, "num_input_tokens_seen": 128956536, "step": 751 }, { "epoch": 0.1975406062997304, "loss": 0.27341228723526, "loss_ce": 0.0016227375017479062, "loss_iou": 0.734375, "loss_num": 0.054443359375, "loss_xval": 0.271484375, "num_input_tokens_seen": 128956536, "step": 751 }, { "epoch": 0.19780364305911752, "grad_norm": 12.720415598286513, "learning_rate": 5e-06, "loss": 0.1518, "num_input_tokens_seen": 129128836, "step": 752 }, { "epoch": 0.19780364305911752, "loss": 0.12072408944368362, "loss_ce": 0.002621056977659464, "loss_iou": 0.515625, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 129128836, "step": 752 }, { "epoch": 0.19806667981850465, "grad_norm": 14.921842726715303, "learning_rate": 5e-06, "loss": 0.1648, "num_input_tokens_seen": 129300844, "step": 753 }, { "epoch": 0.19806667981850465, "loss": 0.1643597036600113, "loss_ce": 0.0029217104893177748, "loss_iou": 0.546875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 129300844, "step": 753 }, { "epoch": 0.19832971657789175, "grad_norm": 6.005245677684448, "learning_rate": 5e-06, "loss": 0.1204, "num_input_tokens_seen": 129471448, "step": 754 }, { "epoch": 0.19832971657789175, "loss": 0.09330937266349792, "loss_ce": 0.0012683530803769827, "loss_iou": 0.5625, "loss_num": 0.0184326171875, "loss_xval": 0.091796875, "num_input_tokens_seen": 129471448, "step": 754 }, { "epoch": 0.19859275333727888, "grad_norm": 10.390450693463213, "learning_rate": 5e-06, "loss": 0.1653, "num_input_tokens_seen": 129643296, "step": 755 }, { "epoch": 0.19859275333727888, "loss": 0.19149892032146454, "loss_ce": 0.000733550579752773, "loss_iou": 0.58984375, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 129643296, "step": 755 }, { "epoch": 0.198855790096666, "grad_norm": 12.828044454318112, "learning_rate": 5e-06, "loss": 0.1805, "num_input_tokens_seen": 129815492, "step": 756 }, { "epoch": 0.198855790096666, "loss": 0.17852596938610077, "loss_ce": 0.003294031834229827, "loss_iou": 0.486328125, "loss_num": 0.03515625, "loss_xval": 0.1748046875, "num_input_tokens_seen": 129815492, "step": 756 }, { "epoch": 0.19911882685605314, "grad_norm": 8.380872387581206, "learning_rate": 5e-06, "loss": 0.2269, "num_input_tokens_seen": 129987692, "step": 757 }, { "epoch": 0.19911882685605314, "loss": 0.20438869297504425, "loss_ce": 0.00040920061292126775, "loss_iou": 0.64453125, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 129987692, "step": 757 }, { "epoch": 0.19938186361544025, "grad_norm": 11.044900252894326, "learning_rate": 5e-06, "loss": 0.1723, "num_input_tokens_seen": 130159828, "step": 758 }, { "epoch": 0.19938186361544025, "loss": 0.1886797845363617, "loss_ce": 0.0009661591611802578, "loss_iou": 0.56640625, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 130159828, "step": 758 }, { "epoch": 0.19964490037482738, "grad_norm": 8.638981565005851, "learning_rate": 5e-06, "loss": 0.1719, "num_input_tokens_seen": 130331756, "step": 759 }, { "epoch": 0.19964490037482738, "loss": 0.1088617593050003, "loss_ce": 0.0055597638711333275, "loss_iou": 0.5703125, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 130331756, "step": 759 }, { "epoch": 0.1999079371342145, "grad_norm": 16.96292619187744, "learning_rate": 5e-06, "loss": 0.1629, "num_input_tokens_seen": 130503960, "step": 760 }, { "epoch": 0.1999079371342145, "loss": 0.1997271627187729, "loss_ce": 0.001668088138103485, "loss_iou": 0.4609375, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 130503960, "step": 760 }, { "epoch": 0.20017097389360164, "grad_norm": 18.254879073962975, "learning_rate": 5e-06, "loss": 0.209, "num_input_tokens_seen": 130676124, "step": 761 }, { "epoch": 0.20017097389360164, "loss": 0.22872218489646912, "loss_ce": 0.002464861376211047, "loss_iou": 0.55078125, "loss_num": 0.045166015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 130676124, "step": 761 }, { "epoch": 0.20043401065298874, "grad_norm": 16.873564483656185, "learning_rate": 5e-06, "loss": 0.1454, "num_input_tokens_seen": 130848448, "step": 762 }, { "epoch": 0.20043401065298874, "loss": 0.11374461650848389, "loss_ce": 0.001867176266387105, "loss_iou": 0.5546875, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 130848448, "step": 762 }, { "epoch": 0.20069704741237587, "grad_norm": 8.061510131196302, "learning_rate": 5e-06, "loss": 0.1913, "num_input_tokens_seen": 131020600, "step": 763 }, { "epoch": 0.20069704741237587, "loss": 0.24045339226722717, "loss_ce": 0.003575955517590046, "loss_iou": 0.59375, "loss_num": 0.04736328125, "loss_xval": 0.2373046875, "num_input_tokens_seen": 131020600, "step": 763 }, { "epoch": 0.200960084171763, "grad_norm": 7.1675953763190865, "learning_rate": 5e-06, "loss": 0.1621, "num_input_tokens_seen": 131191064, "step": 764 }, { "epoch": 0.200960084171763, "loss": 0.1495012640953064, "loss_ce": 0.004115516785532236, "loss_iou": 0.462890625, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 131191064, "step": 764 }, { "epoch": 0.20122312093115013, "grad_norm": 5.20536952677903, "learning_rate": 5e-06, "loss": 0.1756, "num_input_tokens_seen": 131363148, "step": 765 }, { "epoch": 0.20122312093115013, "loss": 0.23652556538581848, "loss_ce": 0.009566339664161205, "loss_iou": 0.546875, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 131363148, "step": 765 }, { "epoch": 0.20148615769053727, "grad_norm": 8.038569612667256, "learning_rate": 5e-06, "loss": 0.1635, "num_input_tokens_seen": 131535488, "step": 766 }, { "epoch": 0.20148615769053727, "loss": 0.24296867847442627, "loss_ce": 0.0015136117581278086, "loss_iou": 0.490234375, "loss_num": 0.04833984375, "loss_xval": 0.2412109375, "num_input_tokens_seen": 131535488, "step": 766 }, { "epoch": 0.20174919444992437, "grad_norm": 6.190176430910417, "learning_rate": 5e-06, "loss": 0.2091, "num_input_tokens_seen": 131707808, "step": 767 }, { "epoch": 0.20174919444992437, "loss": 0.24412932991981506, "loss_ce": 0.00218596076592803, "loss_iou": 0.5859375, "loss_num": 0.04833984375, "loss_xval": 0.2421875, "num_input_tokens_seen": 131707808, "step": 767 }, { "epoch": 0.2020122312093115, "grad_norm": 6.201791565366413, "learning_rate": 5e-06, "loss": 0.1489, "num_input_tokens_seen": 131879928, "step": 768 }, { "epoch": 0.2020122312093115, "loss": 0.09396322071552277, "loss_ce": 0.002288414863869548, "loss_iou": 0.47265625, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 131879928, "step": 768 }, { "epoch": 0.20227526796869863, "grad_norm": 7.34120818873771, "learning_rate": 5e-06, "loss": 0.179, "num_input_tokens_seen": 132052340, "step": 769 }, { "epoch": 0.20227526796869863, "loss": 0.22239628434181213, "loss_ce": 0.005477334372699261, "loss_iou": 0.51171875, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 132052340, "step": 769 }, { "epoch": 0.20253830472808576, "grad_norm": 6.124473321987556, "learning_rate": 5e-06, "loss": 0.1912, "num_input_tokens_seen": 132224712, "step": 770 }, { "epoch": 0.20253830472808576, "loss": 0.17558854818344116, "loss_ce": 0.0018824923317879438, "loss_iou": 0.54296875, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 132224712, "step": 770 }, { "epoch": 0.20280134148747286, "grad_norm": 9.284836415516965, "learning_rate": 5e-06, "loss": 0.1512, "num_input_tokens_seen": 132396772, "step": 771 }, { "epoch": 0.20280134148747286, "loss": 0.15597118437290192, "loss_ce": 0.0003925706841982901, "loss_iou": 0.734375, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 132396772, "step": 771 }, { "epoch": 0.20306437824686, "grad_norm": 7.764344629279706, "learning_rate": 5e-06, "loss": 0.1835, "num_input_tokens_seen": 132568908, "step": 772 }, { "epoch": 0.20306437824686, "loss": 0.18731489777565002, "loss_ce": 0.0028056304436177015, "loss_iou": 0.53125, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 132568908, "step": 772 }, { "epoch": 0.20332741500624713, "grad_norm": 10.387981116967136, "learning_rate": 5e-06, "loss": 0.2105, "num_input_tokens_seen": 132741140, "step": 773 }, { "epoch": 0.20332741500624713, "loss": 0.2155558466911316, "loss_ce": 0.00147504813503474, "loss_iou": 0.5625, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 132741140, "step": 773 }, { "epoch": 0.20359045176563426, "grad_norm": 11.81609487087241, "learning_rate": 5e-06, "loss": 0.1972, "num_input_tokens_seen": 132913412, "step": 774 }, { "epoch": 0.20359045176563426, "loss": 0.2562327980995178, "loss_ce": 0.0022044687066227198, "loss_iou": 0.625, "loss_num": 0.05078125, "loss_xval": 0.25390625, "num_input_tokens_seen": 132913412, "step": 774 }, { "epoch": 0.20385348852502136, "grad_norm": 11.070638283175898, "learning_rate": 5e-06, "loss": 0.1561, "num_input_tokens_seen": 133085388, "step": 775 }, { "epoch": 0.20385348852502136, "loss": 0.1850009262561798, "loss_ce": 0.0011630415683612227, "loss_iou": 0.671875, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 133085388, "step": 775 }, { "epoch": 0.2041165252844085, "grad_norm": 8.169381502927404, "learning_rate": 5e-06, "loss": 0.1653, "num_input_tokens_seen": 133257492, "step": 776 }, { "epoch": 0.2041165252844085, "loss": 0.17405013740062714, "loss_ce": 0.0007713312515988946, "loss_iou": 0.609375, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 133257492, "step": 776 }, { "epoch": 0.20437956204379562, "grad_norm": 7.483205340151747, "learning_rate": 5e-06, "loss": 0.1812, "num_input_tokens_seen": 133429516, "step": 777 }, { "epoch": 0.20437956204379562, "loss": 0.21947714686393738, "loss_ce": 0.0011543984292075038, "loss_iou": 0.6171875, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 133429516, "step": 777 }, { "epoch": 0.20464259880318275, "grad_norm": 15.835699409497824, "learning_rate": 5e-06, "loss": 0.1767, "num_input_tokens_seen": 133599692, "step": 778 }, { "epoch": 0.20464259880318275, "loss": 0.1631580889225006, "loss_ce": 0.0015064696781337261, "loss_iou": 0.50390625, "loss_num": 0.0322265625, "loss_xval": 0.162109375, "num_input_tokens_seen": 133599692, "step": 778 }, { "epoch": 0.20490563556256988, "grad_norm": 6.9792797252935, "learning_rate": 5e-06, "loss": 0.1661, "num_input_tokens_seen": 133771756, "step": 779 }, { "epoch": 0.20490563556256988, "loss": 0.16169646382331848, "loss_ce": 0.0007467527757398784, "loss_iou": 0.435546875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 133771756, "step": 779 }, { "epoch": 0.20516867232195699, "grad_norm": 12.331720736489249, "learning_rate": 5e-06, "loss": 0.1981, "num_input_tokens_seen": 133944348, "step": 780 }, { "epoch": 0.20516867232195699, "loss": 0.13550271093845367, "loss_ce": 0.0025071091949939728, "loss_iou": 0.65234375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 133944348, "step": 780 }, { "epoch": 0.20543170908134412, "grad_norm": 5.7846193156700725, "learning_rate": 5e-06, "loss": 0.1172, "num_input_tokens_seen": 134116796, "step": 781 }, { "epoch": 0.20543170908134412, "loss": 0.09005297720432281, "loss_ce": 0.0008500947151333094, "loss_iou": 0.5390625, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 134116796, "step": 781 }, { "epoch": 0.20569474584073125, "grad_norm": 20.38915176858911, "learning_rate": 5e-06, "loss": 0.2145, "num_input_tokens_seen": 134285564, "step": 782 }, { "epoch": 0.20569474584073125, "loss": 0.2173474133014679, "loss_ce": 0.0005505430162884295, "loss_iou": 0.5390625, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 134285564, "step": 782 }, { "epoch": 0.20595778260011838, "grad_norm": 13.943592884184664, "learning_rate": 5e-06, "loss": 0.2368, "num_input_tokens_seen": 134457600, "step": 783 }, { "epoch": 0.20595778260011838, "loss": 0.23010152578353882, "loss_ce": 0.00341693963855505, "loss_iou": 0.54296875, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 134457600, "step": 783 }, { "epoch": 0.20622081935950548, "grad_norm": 7.47232647542354, "learning_rate": 5e-06, "loss": 0.1884, "num_input_tokens_seen": 134627836, "step": 784 }, { "epoch": 0.20622081935950548, "loss": 0.31371766328811646, "loss_ce": 0.0035369964316487312, "loss_iou": 0.494140625, "loss_num": 0.06201171875, "loss_xval": 0.310546875, "num_input_tokens_seen": 134627836, "step": 784 }, { "epoch": 0.2064838561188926, "grad_norm": 7.561434198006136, "learning_rate": 5e-06, "loss": 0.1991, "num_input_tokens_seen": 134800024, "step": 785 }, { "epoch": 0.2064838561188926, "loss": 0.25232362747192383, "loss_ce": 0.003910548985004425, "loss_iou": 0.63671875, "loss_num": 0.049560546875, "loss_xval": 0.248046875, "num_input_tokens_seen": 134800024, "step": 785 }, { "epoch": 0.20674689287827974, "grad_norm": 7.384265604656037, "learning_rate": 5e-06, "loss": 0.167, "num_input_tokens_seen": 134972276, "step": 786 }, { "epoch": 0.20674689287827974, "loss": 0.22025543451309204, "loss_ce": 0.0012307591969147325, "loss_iou": 0.48046875, "loss_num": 0.0439453125, "loss_xval": 0.21875, "num_input_tokens_seen": 134972276, "step": 786 }, { "epoch": 0.20700992963766687, "grad_norm": 6.5497695680105705, "learning_rate": 5e-06, "loss": 0.1985, "num_input_tokens_seen": 135144300, "step": 787 }, { "epoch": 0.20700992963766687, "loss": 0.24491435289382935, "loss_ce": 0.0007126981508918107, "loss_iou": 0.5546875, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 135144300, "step": 787 }, { "epoch": 0.20727296639705398, "grad_norm": 7.321479905780818, "learning_rate": 5e-06, "loss": 0.1764, "num_input_tokens_seen": 135316344, "step": 788 }, { "epoch": 0.20727296639705398, "loss": 0.19332991540431976, "loss_ce": 0.0017405691323801875, "loss_iou": 0.52734375, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 135316344, "step": 788 }, { "epoch": 0.2075360031564411, "grad_norm": 5.255608280002089, "learning_rate": 5e-06, "loss": 0.1867, "num_input_tokens_seen": 135488352, "step": 789 }, { "epoch": 0.2075360031564411, "loss": 0.18961402773857117, "loss_ce": 0.0032126582227647305, "loss_iou": 0.6015625, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 135488352, "step": 789 }, { "epoch": 0.20779903991582824, "grad_norm": 6.817599663694252, "learning_rate": 5e-06, "loss": 0.1583, "num_input_tokens_seen": 135660716, "step": 790 }, { "epoch": 0.20779903991582824, "loss": 0.11433705687522888, "loss_ce": 0.0007506305119022727, "loss_iou": 0.5234375, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 135660716, "step": 790 }, { "epoch": 0.20806207667521537, "grad_norm": 7.049616575343475, "learning_rate": 5e-06, "loss": 0.2041, "num_input_tokens_seen": 135833108, "step": 791 }, { "epoch": 0.20806207667521537, "loss": 0.16387248039245605, "loss_ce": 0.005669359117746353, "loss_iou": 0.64453125, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 135833108, "step": 791 }, { "epoch": 0.2083251134346025, "grad_norm": 9.83283000996709, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 136001408, "step": 792 }, { "epoch": 0.2083251134346025, "loss": 0.13693515956401825, "loss_ce": 0.0015286724083125591, "loss_iou": 0.546875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 136001408, "step": 792 }, { "epoch": 0.2085881501939896, "grad_norm": 18.56546458626088, "learning_rate": 5e-06, "loss": 0.2126, "num_input_tokens_seen": 136173768, "step": 793 }, { "epoch": 0.2085881501939896, "loss": 0.2671317458152771, "loss_ce": 0.004436435177922249, "loss_iou": 0.384765625, "loss_num": 0.052490234375, "loss_xval": 0.26171875, "num_input_tokens_seen": 136173768, "step": 793 }, { "epoch": 0.20885118695337673, "grad_norm": 7.460522492414201, "learning_rate": 5e-06, "loss": 0.1559, "num_input_tokens_seen": 136345864, "step": 794 }, { "epoch": 0.20885118695337673, "loss": 0.19496268033981323, "loss_ce": 0.003495402168482542, "loss_iou": 0.58984375, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 136345864, "step": 794 }, { "epoch": 0.20911422371276386, "grad_norm": 6.238640410439663, "learning_rate": 5e-06, "loss": 0.177, "num_input_tokens_seen": 136518076, "step": 795 }, { "epoch": 0.20911422371276386, "loss": 0.1869560331106186, "loss_ce": 0.0006157027091830969, "loss_iou": 0.66796875, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 136518076, "step": 795 }, { "epoch": 0.209377260472151, "grad_norm": 4.750721959517019, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 136690192, "step": 796 }, { "epoch": 0.209377260472151, "loss": 0.10885806381702423, "loss_ce": 0.0011920429533347487, "loss_iou": 0.52734375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 136690192, "step": 796 }, { "epoch": 0.2096402972315381, "grad_norm": 11.655608393949823, "learning_rate": 5e-06, "loss": 0.1545, "num_input_tokens_seen": 136862596, "step": 797 }, { "epoch": 0.2096402972315381, "loss": 0.13095784187316895, "loss_ce": 0.0011970889754593372, "loss_iou": 0.54296875, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 136862596, "step": 797 }, { "epoch": 0.20990333399092523, "grad_norm": 7.6147795291746645, "learning_rate": 5e-06, "loss": 0.1671, "num_input_tokens_seen": 137033096, "step": 798 }, { "epoch": 0.20990333399092523, "loss": 0.14115872979164124, "loss_ce": 0.002456323243677616, "loss_iou": 0.494140625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 137033096, "step": 798 }, { "epoch": 0.21016637075031236, "grad_norm": 6.436556075292691, "learning_rate": 5e-06, "loss": 0.164, "num_input_tokens_seen": 137203708, "step": 799 }, { "epoch": 0.21016637075031236, "loss": 0.15657231211662292, "loss_ce": 0.0031909646932035685, "loss_iou": 0.48046875, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 137203708, "step": 799 }, { "epoch": 0.2104294075096995, "grad_norm": 5.272376224873815, "learning_rate": 5e-06, "loss": 0.1485, "num_input_tokens_seen": 137374352, "step": 800 }, { "epoch": 0.2104294075096995, "loss": 0.19644752144813538, "loss_ce": 0.003332281718030572, "loss_iou": 0.5234375, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 137374352, "step": 800 }, { "epoch": 0.2106924442690866, "grad_norm": 9.274938800244652, "learning_rate": 5e-06, "loss": 0.1815, "num_input_tokens_seen": 137546372, "step": 801 }, { "epoch": 0.2106924442690866, "loss": 0.23770156502723694, "loss_ce": 0.0010682701831683517, "loss_iou": 0.5078125, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 137546372, "step": 801 }, { "epoch": 0.21095548102847372, "grad_norm": 12.494816629710325, "learning_rate": 5e-06, "loss": 0.1484, "num_input_tokens_seen": 137718496, "step": 802 }, { "epoch": 0.21095548102847372, "loss": 0.13832518458366394, "loss_ce": 0.00380369508638978, "loss_iou": 0.59765625, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 137718496, "step": 802 }, { "epoch": 0.21121851778786085, "grad_norm": 11.143811760293481, "learning_rate": 5e-06, "loss": 0.1896, "num_input_tokens_seen": 137890492, "step": 803 }, { "epoch": 0.21121851778786085, "loss": 0.17278538644313812, "loss_ce": 0.0006662444211542606, "loss_iou": 0.6015625, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 137890492, "step": 803 }, { "epoch": 0.21148155454724799, "grad_norm": 6.694127616757882, "learning_rate": 5e-06, "loss": 0.1473, "num_input_tokens_seen": 138062576, "step": 804 }, { "epoch": 0.21148155454724799, "loss": 0.08578141778707504, "loss_ce": 0.0013697945978492498, "loss_iou": 0.6328125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 138062576, "step": 804 }, { "epoch": 0.21174459130663512, "grad_norm": 6.879766720907239, "learning_rate": 5e-06, "loss": 0.2407, "num_input_tokens_seen": 138234992, "step": 805 }, { "epoch": 0.21174459130663512, "loss": 0.26608556509017944, "loss_ce": 0.0011929699685424566, "loss_iou": 0.671875, "loss_num": 0.052978515625, "loss_xval": 0.265625, "num_input_tokens_seen": 138234992, "step": 805 }, { "epoch": 0.21200762806602222, "grad_norm": 10.68020187128554, "learning_rate": 5e-06, "loss": 0.1488, "num_input_tokens_seen": 138407296, "step": 806 }, { "epoch": 0.21200762806602222, "loss": 0.16970132291316986, "loss_ce": 0.003929831553250551, "loss_iou": 0.5078125, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 138407296, "step": 806 }, { "epoch": 0.21227066482540935, "grad_norm": 6.8835699764215, "learning_rate": 5e-06, "loss": 0.2142, "num_input_tokens_seen": 138579820, "step": 807 }, { "epoch": 0.21227066482540935, "loss": 0.20114630460739136, "loss_ce": 0.0011341023491695523, "loss_iou": 0.703125, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 138579820, "step": 807 }, { "epoch": 0.21253370158479648, "grad_norm": 5.0783612426941795, "learning_rate": 5e-06, "loss": 0.1658, "num_input_tokens_seen": 138752112, "step": 808 }, { "epoch": 0.21253370158479648, "loss": 0.22068345546722412, "loss_ce": 0.004802103620022535, "loss_iou": 0.42578125, "loss_num": 0.043212890625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 138752112, "step": 808 }, { "epoch": 0.2127967383441836, "grad_norm": 6.76267182747039, "learning_rate": 5e-06, "loss": 0.1732, "num_input_tokens_seen": 138924516, "step": 809 }, { "epoch": 0.2127967383441836, "loss": 0.16195307672023773, "loss_ce": 0.0010644117137417197, "loss_iou": 0.5234375, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 138924516, "step": 809 }, { "epoch": 0.21305977510357071, "grad_norm": 12.983359860046427, "learning_rate": 5e-06, "loss": 0.1645, "num_input_tokens_seen": 139096976, "step": 810 }, { "epoch": 0.21305977510357071, "loss": 0.14437143504619598, "loss_ce": 0.0023426164407283068, "loss_iou": 0.50390625, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 139096976, "step": 810 }, { "epoch": 0.21332281186295785, "grad_norm": 5.900519173332491, "learning_rate": 5e-06, "loss": 0.1796, "num_input_tokens_seen": 139269224, "step": 811 }, { "epoch": 0.21332281186295785, "loss": 0.11451567709445953, "loss_ce": 0.0014175281394273043, "loss_iou": 0.58203125, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 139269224, "step": 811 }, { "epoch": 0.21358584862234498, "grad_norm": 11.704739365440266, "learning_rate": 5e-06, "loss": 0.1362, "num_input_tokens_seen": 139441048, "step": 812 }, { "epoch": 0.21358584862234498, "loss": 0.06064599007368088, "loss_ce": 0.0007399858441203833, "loss_iou": 0.3984375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 139441048, "step": 812 }, { "epoch": 0.2138488853817321, "grad_norm": 5.765338635306649, "learning_rate": 5e-06, "loss": 0.1449, "num_input_tokens_seen": 139613180, "step": 813 }, { "epoch": 0.2138488853817321, "loss": 0.1868004947900772, "loss_ce": 0.001986048649996519, "loss_iou": 0.6171875, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 139613180, "step": 813 }, { "epoch": 0.2141119221411192, "grad_norm": 8.301852392840289, "learning_rate": 5e-06, "loss": 0.1731, "num_input_tokens_seen": 139785340, "step": 814 }, { "epoch": 0.2141119221411192, "loss": 0.21647384762763977, "loss_ce": 0.002240448724478483, "loss_iou": 0.5078125, "loss_num": 0.04296875, "loss_xval": 0.2138671875, "num_input_tokens_seen": 139785340, "step": 814 }, { "epoch": 0.21437495890050634, "grad_norm": 10.75059655667213, "learning_rate": 5e-06, "loss": 0.2066, "num_input_tokens_seen": 139957612, "step": 815 }, { "epoch": 0.21437495890050634, "loss": 0.2657305598258972, "loss_ce": 0.007979076355695724, "loss_iou": 0.41796875, "loss_num": 0.051513671875, "loss_xval": 0.2578125, "num_input_tokens_seen": 139957612, "step": 815 }, { "epoch": 0.21463799565989347, "grad_norm": 8.1554830347417, "learning_rate": 5e-06, "loss": 0.1594, "num_input_tokens_seen": 140129832, "step": 816 }, { "epoch": 0.21463799565989347, "loss": 0.21090401709079742, "loss_ce": 0.0022858483716845512, "loss_iou": 0.6015625, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 140129832, "step": 816 }, { "epoch": 0.2149010324192806, "grad_norm": 6.39307582100075, "learning_rate": 5e-06, "loss": 0.1609, "num_input_tokens_seen": 140302204, "step": 817 }, { "epoch": 0.2149010324192806, "loss": 0.12444409728050232, "loss_ce": 0.0020075817592442036, "loss_iou": 0.5859375, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 140302204, "step": 817 }, { "epoch": 0.2151640691786677, "grad_norm": 11.031920247903537, "learning_rate": 5e-06, "loss": 0.1559, "num_input_tokens_seen": 140470936, "step": 818 }, { "epoch": 0.2151640691786677, "loss": 0.24400946497917175, "loss_ce": 0.00112004519905895, "loss_iou": 0.5078125, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 140470936, "step": 818 }, { "epoch": 0.21542710593805484, "grad_norm": 5.897881271346391, "learning_rate": 5e-06, "loss": 0.194, "num_input_tokens_seen": 140643120, "step": 819 }, { "epoch": 0.21542710593805484, "loss": 0.12999695539474487, "loss_ce": 0.001182260224595666, "loss_iou": 0.5390625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 140643120, "step": 819 }, { "epoch": 0.21569014269744197, "grad_norm": 13.442111711356437, "learning_rate": 5e-06, "loss": 0.1744, "num_input_tokens_seen": 140815608, "step": 820 }, { "epoch": 0.21569014269744197, "loss": 0.1816408634185791, "loss_ce": 0.0014040416572242975, "loss_iou": 0.345703125, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 140815608, "step": 820 }, { "epoch": 0.2159531794568291, "grad_norm": 13.297750331398602, "learning_rate": 5e-06, "loss": 0.1926, "num_input_tokens_seen": 140987824, "step": 821 }, { "epoch": 0.2159531794568291, "loss": 0.29018890857696533, "loss_ce": 0.002347112400457263, "loss_iou": 0.478515625, "loss_num": 0.0576171875, "loss_xval": 0.287109375, "num_input_tokens_seen": 140987824, "step": 821 }, { "epoch": 0.21621621621621623, "grad_norm": 18.74775436478718, "learning_rate": 5e-06, "loss": 0.2948, "num_input_tokens_seen": 141160200, "step": 822 }, { "epoch": 0.21621621621621623, "loss": 0.2370918244123459, "loss_ce": 0.1260078400373459, "loss_iou": 0.59765625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 141160200, "step": 822 }, { "epoch": 0.21647925297560333, "grad_norm": 19.14528392541103, "learning_rate": 5e-06, "loss": 0.1977, "num_input_tokens_seen": 141329928, "step": 823 }, { "epoch": 0.21647925297560333, "loss": 0.1949683427810669, "loss_ce": 0.0917884111404419, "loss_iou": 0.474609375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 141329928, "step": 823 }, { "epoch": 0.21674228973499046, "grad_norm": 6.8551193847587735, "learning_rate": 5e-06, "loss": 0.1354, "num_input_tokens_seen": 141502236, "step": 824 }, { "epoch": 0.21674228973499046, "loss": 0.09739409387111664, "loss_ce": 0.007184132467955351, "loss_iou": 0.6640625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 141502236, "step": 824 }, { "epoch": 0.2170053264943776, "grad_norm": 7.934353513499139, "learning_rate": 5e-06, "loss": 0.2174, "num_input_tokens_seen": 141674272, "step": 825 }, { "epoch": 0.2170053264943776, "loss": 0.3033770024776459, "loss_ce": 0.026491012424230576, "loss_iou": 0.45703125, "loss_num": 0.055419921875, "loss_xval": 0.27734375, "num_input_tokens_seen": 141674272, "step": 825 }, { "epoch": 0.21726836325376472, "grad_norm": 6.296485584445855, "learning_rate": 5e-06, "loss": 0.2048, "num_input_tokens_seen": 141846556, "step": 826 }, { "epoch": 0.21726836325376472, "loss": 0.16873130202293396, "loss_ce": 0.0013118635397404432, "loss_iou": 0.59765625, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 141846556, "step": 826 }, { "epoch": 0.21753140001315183, "grad_norm": 10.428529158159186, "learning_rate": 5e-06, "loss": 0.14, "num_input_tokens_seen": 142018940, "step": 827 }, { "epoch": 0.21753140001315183, "loss": 0.15984642505645752, "loss_ce": 0.0018874472007155418, "loss_iou": 0.421875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 142018940, "step": 827 }, { "epoch": 0.21779443677253896, "grad_norm": 7.702760290749295, "learning_rate": 5e-06, "loss": 0.1934, "num_input_tokens_seen": 142189520, "step": 828 }, { "epoch": 0.21779443677253896, "loss": 0.17661917209625244, "loss_ce": 0.007002475671470165, "loss_iou": 0.5078125, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 142189520, "step": 828 }, { "epoch": 0.2180574735319261, "grad_norm": 5.154197949486892, "learning_rate": 5e-06, "loss": 0.1595, "num_input_tokens_seen": 142359752, "step": 829 }, { "epoch": 0.2180574735319261, "loss": 0.11216248571872711, "loss_ce": 0.0025128289125859737, "loss_iou": 0.52734375, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 142359752, "step": 829 }, { "epoch": 0.21832051029131322, "grad_norm": 9.212654853971248, "learning_rate": 5e-06, "loss": 0.1782, "num_input_tokens_seen": 142530028, "step": 830 }, { "epoch": 0.21832051029131322, "loss": 0.17883381247520447, "loss_ce": 0.006104309111833572, "loss_iou": 0.515625, "loss_num": 0.034423828125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 142530028, "step": 830 }, { "epoch": 0.21858354705070032, "grad_norm": 22.355687744069783, "learning_rate": 5e-06, "loss": 0.2118, "num_input_tokens_seen": 142698896, "step": 831 }, { "epoch": 0.21858354705070032, "loss": 0.2873075604438782, "loss_ce": 0.00788860023021698, "loss_iou": 0.392578125, "loss_num": 0.055908203125, "loss_xval": 0.279296875, "num_input_tokens_seen": 142698896, "step": 831 }, { "epoch": 0.21884658381008745, "grad_norm": 7.646461001716363, "learning_rate": 5e-06, "loss": 0.1589, "num_input_tokens_seen": 142869236, "step": 832 }, { "epoch": 0.21884658381008745, "loss": 0.19723272323608398, "loss_ce": 0.00063848658464849, "loss_iou": 0.53515625, "loss_num": 0.039306640625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 142869236, "step": 832 }, { "epoch": 0.21910962056947458, "grad_norm": 5.709950173979812, "learning_rate": 5e-06, "loss": 0.1462, "num_input_tokens_seen": 143041432, "step": 833 }, { "epoch": 0.21910962056947458, "loss": 0.13923318684101105, "loss_ce": 0.006512241438031197, "loss_iou": 0.470703125, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 143041432, "step": 833 }, { "epoch": 0.21937265732886171, "grad_norm": 7.759034787872298, "learning_rate": 5e-06, "loss": 0.1411, "num_input_tokens_seen": 143213276, "step": 834 }, { "epoch": 0.21937265732886171, "loss": 0.13741275668144226, "loss_ce": 0.00820133276283741, "loss_iou": 0.4609375, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 143213276, "step": 834 }, { "epoch": 0.21963569408824885, "grad_norm": 14.10397350971688, "learning_rate": 5e-06, "loss": 0.1819, "num_input_tokens_seen": 143385916, "step": 835 }, { "epoch": 0.21963569408824885, "loss": 0.17109227180480957, "loss_ce": 0.009959458373486996, "loss_iou": 0.62109375, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 143385916, "step": 835 }, { "epoch": 0.21989873084763595, "grad_norm": 10.610026241323633, "learning_rate": 5e-06, "loss": 0.1959, "num_input_tokens_seen": 143558108, "step": 836 }, { "epoch": 0.21989873084763595, "loss": 0.1955014169216156, "loss_ce": 0.006353452801704407, "loss_iou": 0.58984375, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 143558108, "step": 836 }, { "epoch": 0.22016176760702308, "grad_norm": 6.884069616265665, "learning_rate": 5e-06, "loss": 0.1724, "num_input_tokens_seen": 143730560, "step": 837 }, { "epoch": 0.22016176760702308, "loss": 0.1851910948753357, "loss_ce": 0.001475287601351738, "loss_iou": 0.50390625, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 143730560, "step": 837 }, { "epoch": 0.2204248043664102, "grad_norm": 8.581569398296145, "learning_rate": 5e-06, "loss": 0.1592, "num_input_tokens_seen": 143901168, "step": 838 }, { "epoch": 0.2204248043664102, "loss": 0.10049735009670258, "loss_ce": 0.00043021421879529953, "loss_iou": 0.55859375, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 143901168, "step": 838 }, { "epoch": 0.22068784112579734, "grad_norm": 8.473859363920706, "learning_rate": 5e-06, "loss": 0.1656, "num_input_tokens_seen": 144073300, "step": 839 }, { "epoch": 0.22068784112579734, "loss": 0.18692679703235626, "loss_ce": 0.001440959284082055, "loss_iou": 0.37109375, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 144073300, "step": 839 }, { "epoch": 0.22095087788518444, "grad_norm": 4.5516671382314495, "learning_rate": 5e-06, "loss": 0.1346, "num_input_tokens_seen": 144245656, "step": 840 }, { "epoch": 0.22095087788518444, "loss": 0.1278304159641266, "loss_ce": 0.003227140521630645, "loss_iou": 0.64453125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 144245656, "step": 840 }, { "epoch": 0.22121391464457157, "grad_norm": 8.854170908003372, "learning_rate": 5e-06, "loss": 0.143, "num_input_tokens_seen": 144414684, "step": 841 }, { "epoch": 0.22121391464457157, "loss": 0.11698315292596817, "loss_ce": 0.003732412587851286, "loss_iou": 0.58984375, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 144414684, "step": 841 }, { "epoch": 0.2214769514039587, "grad_norm": 6.139198378486474, "learning_rate": 5e-06, "loss": 0.1303, "num_input_tokens_seen": 144586900, "step": 842 }, { "epoch": 0.2214769514039587, "loss": 0.16039735078811646, "loss_ce": 0.001461806707084179, "loss_iou": 0.66796875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 144586900, "step": 842 }, { "epoch": 0.22173998816334584, "grad_norm": 10.644206851478819, "learning_rate": 5e-06, "loss": 0.1737, "num_input_tokens_seen": 144759108, "step": 843 }, { "epoch": 0.22173998816334584, "loss": 0.14353252947330475, "loss_ce": 0.0032432209700345993, "loss_iou": 0.4453125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 144759108, "step": 843 }, { "epoch": 0.22200302492273294, "grad_norm": 6.739417345524847, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 144931444, "step": 844 }, { "epoch": 0.22200302492273294, "loss": 0.09944656491279602, "loss_ce": 0.0004170280881226063, "loss_iou": 0.6328125, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 144931444, "step": 844 }, { "epoch": 0.22226606168212007, "grad_norm": 8.110959144365463, "learning_rate": 5e-06, "loss": 0.177, "num_input_tokens_seen": 145102036, "step": 845 }, { "epoch": 0.22226606168212007, "loss": 0.2286926507949829, "loss_ce": 0.001336704008281231, "loss_iou": 0.56640625, "loss_num": 0.04541015625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 145102036, "step": 845 }, { "epoch": 0.2225290984415072, "grad_norm": 10.034770268919976, "learning_rate": 5e-06, "loss": 0.2222, "num_input_tokens_seen": 145274324, "step": 846 }, { "epoch": 0.2225290984415072, "loss": 0.23924441635608673, "loss_ce": 0.004075955133885145, "loss_iou": 0.53515625, "loss_num": 0.047119140625, "loss_xval": 0.2353515625, "num_input_tokens_seen": 145274324, "step": 846 }, { "epoch": 0.22279213520089433, "grad_norm": 6.615833632469255, "learning_rate": 5e-06, "loss": 0.1736, "num_input_tokens_seen": 145446460, "step": 847 }, { "epoch": 0.22279213520089433, "loss": 0.1300104260444641, "loss_ce": 0.001287288498133421, "loss_iou": 0.56640625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 145446460, "step": 847 }, { "epoch": 0.22305517196028146, "grad_norm": 6.286344612415352, "learning_rate": 5e-06, "loss": 0.2284, "num_input_tokens_seen": 145618532, "step": 848 }, { "epoch": 0.22305517196028146, "loss": 0.18182075023651123, "loss_ce": 0.0016144568799063563, "loss_iou": NaN, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 145618532, "step": 848 }, { "epoch": 0.22331820871966857, "grad_norm": 15.090592675837648, "learning_rate": 5e-06, "loss": 0.1974, "num_input_tokens_seen": 145788852, "step": 849 }, { "epoch": 0.22331820871966857, "loss": 0.20251962542533875, "loss_ce": 0.0014087767340242863, "loss_iou": 0.5859375, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 145788852, "step": 849 }, { "epoch": 0.2235812454790557, "grad_norm": 6.5100345965661806, "learning_rate": 5e-06, "loss": 0.1365, "num_input_tokens_seen": 145961180, "step": 850 }, { "epoch": 0.2235812454790557, "loss": 0.15530481934547424, "loss_ce": 0.002533819992095232, "loss_iou": 0.5390625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 145961180, "step": 850 }, { "epoch": 0.22384428223844283, "grad_norm": 6.940039828670214, "learning_rate": 5e-06, "loss": 0.1623, "num_input_tokens_seen": 146133828, "step": 851 }, { "epoch": 0.22384428223844283, "loss": 0.1814190298318863, "loss_ce": 0.001731535536237061, "loss_iou": 0.474609375, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 146133828, "step": 851 }, { "epoch": 0.22410731899782996, "grad_norm": 8.305849772963127, "learning_rate": 5e-06, "loss": 0.177, "num_input_tokens_seen": 146306088, "step": 852 }, { "epoch": 0.22410731899782996, "loss": 0.18216609954833984, "loss_ce": 0.002936376491561532, "loss_iou": 0.490234375, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 146306088, "step": 852 }, { "epoch": 0.22437035575721706, "grad_norm": 7.934420093916033, "learning_rate": 5e-06, "loss": 0.2091, "num_input_tokens_seen": 146478248, "step": 853 }, { "epoch": 0.22437035575721706, "loss": 0.1829485148191452, "loss_ce": 0.0025896350853145123, "loss_iou": 0.671875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 146478248, "step": 853 }, { "epoch": 0.2246333925166042, "grad_norm": 8.916802424159366, "learning_rate": 5e-06, "loss": 0.1487, "num_input_tokens_seen": 146650532, "step": 854 }, { "epoch": 0.2246333925166042, "loss": 0.13404083251953125, "loss_ce": 0.0006179830525070429, "loss_iou": 0.63671875, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 146650532, "step": 854 }, { "epoch": 0.22489642927599132, "grad_norm": 16.03094644250741, "learning_rate": 5e-06, "loss": 0.1819, "num_input_tokens_seen": 146822816, "step": 855 }, { "epoch": 0.22489642927599132, "loss": 0.19942086935043335, "loss_ce": 0.007404262199997902, "loss_iou": 0.41015625, "loss_num": 0.038330078125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 146822816, "step": 855 }, { "epoch": 0.22515946603537845, "grad_norm": 10.96099831988347, "learning_rate": 5e-06, "loss": 0.1644, "num_input_tokens_seen": 146994984, "step": 856 }, { "epoch": 0.22515946603537845, "loss": 0.1878020018339157, "loss_ce": 0.0012785641010850668, "loss_iou": 0.546875, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 146994984, "step": 856 }, { "epoch": 0.22542250279476556, "grad_norm": 5.009376224527154, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 147167200, "step": 857 }, { "epoch": 0.22542250279476556, "loss": 0.16484007239341736, "loss_ce": 0.005507797468453646, "loss_iou": 0.64453125, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 147167200, "step": 857 }, { "epoch": 0.2256855395541527, "grad_norm": 6.031288325368203, "learning_rate": 5e-06, "loss": 0.1251, "num_input_tokens_seen": 147337356, "step": 858 }, { "epoch": 0.2256855395541527, "loss": 0.14030741155147552, "loss_ce": 0.0010862206108868122, "loss_iou": 0.71484375, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 147337356, "step": 858 }, { "epoch": 0.22594857631353982, "grad_norm": 8.953664182124317, "learning_rate": 5e-06, "loss": 0.1473, "num_input_tokens_seen": 147509464, "step": 859 }, { "epoch": 0.22594857631353982, "loss": 0.19507214426994324, "loss_ce": 0.0016822540201246738, "loss_iou": 0.359375, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 147509464, "step": 859 }, { "epoch": 0.22621161307292695, "grad_norm": 5.535666138038005, "learning_rate": 5e-06, "loss": 0.1436, "num_input_tokens_seen": 147681864, "step": 860 }, { "epoch": 0.22621161307292695, "loss": 0.194808691740036, "loss_ce": 0.002822606358677149, "loss_iou": 0.5, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 147681864, "step": 860 }, { "epoch": 0.22647464983231408, "grad_norm": 7.1540218345582, "learning_rate": 5e-06, "loss": 0.1663, "num_input_tokens_seen": 147854184, "step": 861 }, { "epoch": 0.22647464983231408, "loss": 0.18444868922233582, "loss_ce": 0.0037846285849809647, "loss_iou": 0.609375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 147854184, "step": 861 }, { "epoch": 0.22673768659170118, "grad_norm": 7.954229564657017, "learning_rate": 5e-06, "loss": 0.134, "num_input_tokens_seen": 148026492, "step": 862 }, { "epoch": 0.22673768659170118, "loss": 0.1205034852027893, "loss_ce": 0.0003863019519485533, "loss_iou": 0.7421875, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 148026492, "step": 862 }, { "epoch": 0.2270007233510883, "grad_norm": 10.790250508426157, "learning_rate": 5e-06, "loss": 0.1621, "num_input_tokens_seen": 148198492, "step": 863 }, { "epoch": 0.2270007233510883, "loss": 0.08790126442909241, "loss_ce": 0.000956688541918993, "loss_iou": 0.52734375, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 148198492, "step": 863 }, { "epoch": 0.22726376011047544, "grad_norm": 7.413582271316403, "learning_rate": 5e-06, "loss": 0.1583, "num_input_tokens_seen": 148370744, "step": 864 }, { "epoch": 0.22726376011047544, "loss": 0.11584703624248505, "loss_ce": 0.0007652430795133114, "loss_iou": 0.625, "loss_num": 0.02294921875, "loss_xval": 0.115234375, "num_input_tokens_seen": 148370744, "step": 864 }, { "epoch": 0.22752679686986257, "grad_norm": 6.505762518470224, "learning_rate": 5e-06, "loss": 0.1692, "num_input_tokens_seen": 148541368, "step": 865 }, { "epoch": 0.22752679686986257, "loss": 0.12583567202091217, "loss_ce": 0.0008661894826218486, "loss_iou": 0.62109375, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 148541368, "step": 865 }, { "epoch": 0.22778983362924968, "grad_norm": 11.31233359994311, "learning_rate": 5e-06, "loss": 0.1448, "num_input_tokens_seen": 148713652, "step": 866 }, { "epoch": 0.22778983362924968, "loss": 0.09625812619924545, "loss_ce": 0.0005244807107374072, "loss_iou": 0.73828125, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 148713652, "step": 866 }, { "epoch": 0.2280528703886368, "grad_norm": 8.070061922275455, "learning_rate": 5e-06, "loss": 0.1469, "num_input_tokens_seen": 148885724, "step": 867 }, { "epoch": 0.2280528703886368, "loss": 0.12818799912929535, "loss_ce": 0.003035409841686487, "loss_iou": 0.443359375, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 148885724, "step": 867 }, { "epoch": 0.22831590714802394, "grad_norm": 8.83029409823491, "learning_rate": 5e-06, "loss": 0.2104, "num_input_tokens_seen": 149057764, "step": 868 }, { "epoch": 0.22831590714802394, "loss": 0.2020527571439743, "loss_ce": 0.00414624810218811, "loss_iou": 0.51953125, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 149057764, "step": 868 }, { "epoch": 0.22857894390741107, "grad_norm": 5.578690860970403, "learning_rate": 5e-06, "loss": 0.1492, "num_input_tokens_seen": 149230132, "step": 869 }, { "epoch": 0.22857894390741107, "loss": 0.16226467490196228, "loss_ce": 0.0007046046666800976, "loss_iou": 0.66015625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 149230132, "step": 869 }, { "epoch": 0.22884198066679817, "grad_norm": 8.388877746304392, "learning_rate": 5e-06, "loss": 0.1553, "num_input_tokens_seen": 149402312, "step": 870 }, { "epoch": 0.22884198066679817, "loss": 0.13199341297149658, "loss_ce": 0.001866456470452249, "loss_iou": 0.51953125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 149402312, "step": 870 }, { "epoch": 0.2291050174261853, "grad_norm": 13.402550744349123, "learning_rate": 5e-06, "loss": 0.1888, "num_input_tokens_seen": 149572632, "step": 871 }, { "epoch": 0.2291050174261853, "loss": 0.20936883985996246, "loss_ce": 0.004016047343611717, "loss_iou": 0.470703125, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 149572632, "step": 871 }, { "epoch": 0.22936805418557243, "grad_norm": 5.899366870528114, "learning_rate": 5e-06, "loss": 0.1651, "num_input_tokens_seen": 149744908, "step": 872 }, { "epoch": 0.22936805418557243, "loss": 0.1176171749830246, "loss_ce": 0.001223139464855194, "loss_iou": 0.51953125, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 149744908, "step": 872 }, { "epoch": 0.22963109094495956, "grad_norm": 8.241998846381511, "learning_rate": 5e-06, "loss": 0.1485, "num_input_tokens_seen": 149917100, "step": 873 }, { "epoch": 0.22963109094495956, "loss": 0.1720806509256363, "loss_ce": 0.004966393578797579, "loss_iou": 0.546875, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 149917100, "step": 873 }, { "epoch": 0.2298941277043467, "grad_norm": 9.153818862978659, "learning_rate": 5e-06, "loss": 0.1555, "num_input_tokens_seen": 150087628, "step": 874 }, { "epoch": 0.2298941277043467, "loss": 0.14027956128120422, "loss_ce": 0.002584239235147834, "loss_iou": 0.5625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 150087628, "step": 874 }, { "epoch": 0.2301571644637338, "grad_norm": 25.107851811290338, "learning_rate": 5e-06, "loss": 0.1805, "num_input_tokens_seen": 150258428, "step": 875 }, { "epoch": 0.2301571644637338, "loss": 0.17914238572120667, "loss_ce": 0.0014690514653921127, "loss_iou": 0.5625, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 150258428, "step": 875 }, { "epoch": 0.23042020122312093, "grad_norm": 6.938403780407342, "learning_rate": 5e-06, "loss": 0.1835, "num_input_tokens_seen": 150430692, "step": 876 }, { "epoch": 0.23042020122312093, "loss": 0.13967271149158478, "loss_ce": 0.002038437407463789, "loss_iou": 0.392578125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 150430692, "step": 876 }, { "epoch": 0.23068323798250806, "grad_norm": 8.594672662971634, "learning_rate": 5e-06, "loss": 0.1592, "num_input_tokens_seen": 150603108, "step": 877 }, { "epoch": 0.23068323798250806, "loss": 0.3066813349723816, "loss_ce": 0.0005289965192787349, "loss_iou": 0.44921875, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 150603108, "step": 877 }, { "epoch": 0.2309462747418952, "grad_norm": 5.613988390531258, "learning_rate": 5e-06, "loss": 0.1482, "num_input_tokens_seen": 150775124, "step": 878 }, { "epoch": 0.2309462747418952, "loss": 0.16111034154891968, "loss_ce": 0.008644518442451954, "loss_iou": 0.640625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 150775124, "step": 878 }, { "epoch": 0.2312093115012823, "grad_norm": 34.35737633666629, "learning_rate": 5e-06, "loss": 0.1148, "num_input_tokens_seen": 150947036, "step": 879 }, { "epoch": 0.2312093115012823, "loss": 0.15406344830989838, "loss_ce": 0.0015213302103802562, "loss_iou": 0.439453125, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 150947036, "step": 879 }, { "epoch": 0.23147234826066942, "grad_norm": 6.1651043542261466, "learning_rate": 5e-06, "loss": 0.1981, "num_input_tokens_seen": 151119388, "step": 880 }, { "epoch": 0.23147234826066942, "loss": 0.29976093769073486, "loss_ce": 0.00825704075396061, "loss_iou": 0.48828125, "loss_num": 0.05810546875, "loss_xval": 0.291015625, "num_input_tokens_seen": 151119388, "step": 880 }, { "epoch": 0.23173538502005656, "grad_norm": 8.982832830215536, "learning_rate": 5e-06, "loss": 0.1522, "num_input_tokens_seen": 151290092, "step": 881 }, { "epoch": 0.23173538502005656, "loss": 0.18080484867095947, "loss_ce": 0.0034977139439433813, "loss_iou": 0.5078125, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 151290092, "step": 881 }, { "epoch": 0.2319984217794437, "grad_norm": 13.943571194131867, "learning_rate": 5e-06, "loss": 0.214, "num_input_tokens_seen": 151462268, "step": 882 }, { "epoch": 0.2319984217794437, "loss": 0.16014625132083893, "loss_ce": 0.004170912317931652, "loss_iou": 0.625, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 151462268, "step": 882 }, { "epoch": 0.2322614585388308, "grad_norm": 7.043443131491897, "learning_rate": 5e-06, "loss": 0.1524, "num_input_tokens_seen": 151634180, "step": 883 }, { "epoch": 0.2322614585388308, "loss": 0.12871429324150085, "loss_ce": 0.0008456383948214352, "loss_iou": 0.44921875, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 151634180, "step": 883 }, { "epoch": 0.23252449529821792, "grad_norm": 6.396513380644998, "learning_rate": 5e-06, "loss": 0.1833, "num_input_tokens_seen": 151806384, "step": 884 }, { "epoch": 0.23252449529821792, "loss": 0.18081963062286377, "loss_ce": 0.002169727347791195, "loss_iou": 0.421875, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 151806384, "step": 884 }, { "epoch": 0.23278753205760505, "grad_norm": 13.149227804701054, "learning_rate": 5e-06, "loss": 0.152, "num_input_tokens_seen": 151978708, "step": 885 }, { "epoch": 0.23278753205760505, "loss": 0.1582297682762146, "loss_ce": 0.007228789385408163, "loss_iou": 0.546875, "loss_num": 0.0301513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 151978708, "step": 885 }, { "epoch": 0.23305056881699218, "grad_norm": 6.784444096578918, "learning_rate": 5e-06, "loss": 0.1646, "num_input_tokens_seen": 152150848, "step": 886 }, { "epoch": 0.23305056881699218, "loss": 0.19164127111434937, "loss_ce": 0.0013946772087365389, "loss_iou": NaN, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 152150848, "step": 886 }, { "epoch": 0.2333136055763793, "grad_norm": 6.295673229022739, "learning_rate": 5e-06, "loss": 0.1772, "num_input_tokens_seen": 152323052, "step": 887 }, { "epoch": 0.2333136055763793, "loss": 0.22069396078586578, "loss_ce": 0.0015167115489020944, "loss_iou": 0.478515625, "loss_num": 0.0439453125, "loss_xval": 0.21875, "num_input_tokens_seen": 152323052, "step": 887 }, { "epoch": 0.23357664233576642, "grad_norm": 11.781517950000717, "learning_rate": 5e-06, "loss": 0.1798, "num_input_tokens_seen": 152493800, "step": 888 }, { "epoch": 0.23357664233576642, "loss": 0.1682368516921997, "loss_ce": 0.0009394832304678857, "loss_iou": 0.54296875, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 152493800, "step": 888 }, { "epoch": 0.23383967909515355, "grad_norm": 11.368208703500017, "learning_rate": 5e-06, "loss": 0.1715, "num_input_tokens_seen": 152665776, "step": 889 }, { "epoch": 0.23383967909515355, "loss": 0.2839386761188507, "loss_ce": 0.003970403224229813, "loss_iou": 0.51171875, "loss_num": 0.05615234375, "loss_xval": 0.279296875, "num_input_tokens_seen": 152665776, "step": 889 }, { "epoch": 0.23410271585454068, "grad_norm": 8.093455275529028, "learning_rate": 5e-06, "loss": 0.1589, "num_input_tokens_seen": 152834764, "step": 890 }, { "epoch": 0.23410271585454068, "loss": 0.1241624653339386, "loss_ce": 0.004472525790333748, "loss_iou": 0.59765625, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 152834764, "step": 890 }, { "epoch": 0.2343657526139278, "grad_norm": 12.52857780858346, "learning_rate": 5e-06, "loss": 0.1966, "num_input_tokens_seen": 153007304, "step": 891 }, { "epoch": 0.2343657526139278, "loss": 0.21788766980171204, "loss_ce": 0.002006314927712083, "loss_iou": 0.59765625, "loss_num": 0.043212890625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 153007304, "step": 891 }, { "epoch": 0.2346287893733149, "grad_norm": 7.50091412461157, "learning_rate": 5e-06, "loss": 0.1804, "num_input_tokens_seen": 153179168, "step": 892 }, { "epoch": 0.2346287893733149, "loss": 0.17867043614387512, "loss_ce": 0.0008750315755605698, "loss_iou": NaN, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 153179168, "step": 892 }, { "epoch": 0.23489182613270204, "grad_norm": 6.297998473336444, "learning_rate": 5e-06, "loss": 0.1679, "num_input_tokens_seen": 153351220, "step": 893 }, { "epoch": 0.23489182613270204, "loss": 0.13165241479873657, "loss_ce": 0.001311830012127757, "loss_iou": 0.380859375, "loss_num": 0.026123046875, "loss_xval": 0.1298828125, "num_input_tokens_seen": 153351220, "step": 893 }, { "epoch": 0.23515486289208917, "grad_norm": 7.909655629561448, "learning_rate": 5e-06, "loss": 0.1896, "num_input_tokens_seen": 153523232, "step": 894 }, { "epoch": 0.23515486289208917, "loss": 0.13778507709503174, "loss_ce": 0.002287032548338175, "loss_iou": 0.51953125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 153523232, "step": 894 }, { "epoch": 0.2354178996514763, "grad_norm": 5.883815728394397, "learning_rate": 5e-06, "loss": 0.1413, "num_input_tokens_seen": 153692208, "step": 895 }, { "epoch": 0.2354178996514763, "loss": 0.1446894109249115, "loss_ce": 0.001439890475012362, "loss_iou": 0.462890625, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 153692208, "step": 895 }, { "epoch": 0.2356809364108634, "grad_norm": 15.735436674991021, "learning_rate": 5e-06, "loss": 0.1563, "num_input_tokens_seen": 153862388, "step": 896 }, { "epoch": 0.2356809364108634, "loss": 0.16044028103351593, "loss_ce": 0.00043661610106937587, "loss_iou": 0.6328125, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 153862388, "step": 896 }, { "epoch": 0.23594397317025054, "grad_norm": 5.835513921908954, "learning_rate": 5e-06, "loss": 0.129, "num_input_tokens_seen": 154034480, "step": 897 }, { "epoch": 0.23594397317025054, "loss": 0.14927011728286743, "loss_ce": 0.0002833124599419534, "loss_iou": 0.515625, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 154034480, "step": 897 }, { "epoch": 0.23620700992963767, "grad_norm": 4.347875981694168, "learning_rate": 5e-06, "loss": 0.1672, "num_input_tokens_seen": 154203000, "step": 898 }, { "epoch": 0.23620700992963767, "loss": 0.1722668707370758, "loss_ce": 0.0003613463486544788, "loss_iou": 0.5390625, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 154203000, "step": 898 }, { "epoch": 0.2364700466890248, "grad_norm": 6.555211118822418, "learning_rate": 5e-06, "loss": 0.123, "num_input_tokens_seen": 154375292, "step": 899 }, { "epoch": 0.2364700466890248, "loss": 0.12109389901161194, "loss_ce": 0.001983799273148179, "loss_iou": 0.53125, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 154375292, "step": 899 }, { "epoch": 0.2367330834484119, "grad_norm": 4.963219405700268, "learning_rate": 5e-06, "loss": 0.1507, "num_input_tokens_seen": 154547472, "step": 900 }, { "epoch": 0.2367330834484119, "loss": 0.1948363184928894, "loss_ce": 0.001843146630562842, "loss_iou": 0.5625, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 154547472, "step": 900 }, { "epoch": 0.23699612020779903, "grad_norm": 9.476812377662082, "learning_rate": 5e-06, "loss": 0.1428, "num_input_tokens_seen": 154719756, "step": 901 }, { "epoch": 0.23699612020779903, "loss": 0.1722353994846344, "loss_ce": 0.0028628362342715263, "loss_iou": 0.61328125, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 154719756, "step": 901 }, { "epoch": 0.23725915696718616, "grad_norm": 8.867901615298983, "learning_rate": 5e-06, "loss": 0.1444, "num_input_tokens_seen": 154891816, "step": 902 }, { "epoch": 0.23725915696718616, "loss": 0.13942725956439972, "loss_ce": 0.0014267791993916035, "loss_iou": 0.71484375, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 154891816, "step": 902 }, { "epoch": 0.2375221937265733, "grad_norm": 7.06026317137913, "learning_rate": 5e-06, "loss": 0.1247, "num_input_tokens_seen": 155064164, "step": 903 }, { "epoch": 0.2375221937265733, "loss": 0.13094615936279297, "loss_ce": 0.000575068814214319, "loss_iou": 0.47265625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 155064164, "step": 903 }, { "epoch": 0.23778523048596042, "grad_norm": 6.065414379643311, "learning_rate": 5e-06, "loss": 0.1882, "num_input_tokens_seen": 155236136, "step": 904 }, { "epoch": 0.23778523048596042, "loss": 0.17110927402973175, "loss_ce": 0.0006380859995260835, "loss_iou": 0.7421875, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 155236136, "step": 904 }, { "epoch": 0.23804826724534753, "grad_norm": 6.4154554676892275, "learning_rate": 5e-06, "loss": 0.1664, "num_input_tokens_seen": 155408400, "step": 905 }, { "epoch": 0.23804826724534753, "loss": 0.18800213932991028, "loss_ce": 0.005415464285761118, "loss_iou": 0.515625, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 155408400, "step": 905 }, { "epoch": 0.23831130400473466, "grad_norm": 6.061588736644807, "learning_rate": 5e-06, "loss": 0.1656, "num_input_tokens_seen": 155578772, "step": 906 }, { "epoch": 0.23831130400473466, "loss": 0.11921393871307373, "loss_ce": 0.0009278038050979376, "loss_iou": 0.609375, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 155578772, "step": 906 }, { "epoch": 0.2385743407641218, "grad_norm": 9.748504628242028, "learning_rate": 5e-06, "loss": 0.128, "num_input_tokens_seen": 155751088, "step": 907 }, { "epoch": 0.2385743407641218, "loss": 0.13661867380142212, "loss_ce": 0.0004492364823818207, "loss_iou": 0.62890625, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 155751088, "step": 907 }, { "epoch": 0.23883737752350892, "grad_norm": 11.132080080363805, "learning_rate": 5e-06, "loss": 0.2072, "num_input_tokens_seen": 155923228, "step": 908 }, { "epoch": 0.23883737752350892, "loss": 0.2608073353767395, "loss_ce": 0.006229718215763569, "loss_iou": 0.447265625, "loss_num": 0.05078125, "loss_xval": 0.25390625, "num_input_tokens_seen": 155923228, "step": 908 }, { "epoch": 0.23910041428289602, "grad_norm": 6.802731098529186, "learning_rate": 5e-06, "loss": 0.1071, "num_input_tokens_seen": 156095144, "step": 909 }, { "epoch": 0.23910041428289602, "loss": 0.12534289062023163, "loss_ce": 0.0022044701036065817, "loss_iou": 0.67578125, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 156095144, "step": 909 }, { "epoch": 0.23936345104228315, "grad_norm": 5.86214467113572, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 156267348, "step": 910 }, { "epoch": 0.23936345104228315, "loss": 0.1707063615322113, "loss_ce": 0.0013948287814855576, "loss_iou": 0.5859375, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 156267348, "step": 910 }, { "epoch": 0.23962648780167028, "grad_norm": 5.562832964795879, "learning_rate": 5e-06, "loss": 0.156, "num_input_tokens_seen": 156439364, "step": 911 }, { "epoch": 0.23962648780167028, "loss": 0.12454073876142502, "loss_ce": 0.0003036795533262193, "loss_iou": 0.578125, "loss_num": 0.02490234375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 156439364, "step": 911 }, { "epoch": 0.23988952456105742, "grad_norm": 13.223359809657884, "learning_rate": 5e-06, "loss": 0.15, "num_input_tokens_seen": 156609552, "step": 912 }, { "epoch": 0.23988952456105742, "loss": 0.15059047937393188, "loss_ce": 0.00239713117480278, "loss_iou": 0.59375, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 156609552, "step": 912 }, { "epoch": 0.24015256132044452, "grad_norm": 6.777584209416996, "learning_rate": 5e-06, "loss": 0.1436, "num_input_tokens_seen": 156781920, "step": 913 }, { "epoch": 0.24015256132044452, "loss": 0.13880465924739838, "loss_ce": 0.0020859187934547663, "loss_iou": 0.6015625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 156781920, "step": 913 }, { "epoch": 0.24041559807983165, "grad_norm": 11.055320488873154, "learning_rate": 5e-06, "loss": 0.2024, "num_input_tokens_seen": 156954340, "step": 914 }, { "epoch": 0.24041559807983165, "loss": 0.2452932596206665, "loss_ce": 0.002312319353222847, "loss_iou": 0.4765625, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 156954340, "step": 914 }, { "epoch": 0.24067863483921878, "grad_norm": 4.458423775022664, "learning_rate": 5e-06, "loss": 0.1784, "num_input_tokens_seen": 157126652, "step": 915 }, { "epoch": 0.24067863483921878, "loss": 0.13856951892375946, "loss_ce": 0.0020949181634932756, "loss_iou": 0.416015625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 157126652, "step": 915 }, { "epoch": 0.2409416715986059, "grad_norm": 5.681838115677692, "learning_rate": 5e-06, "loss": 0.1239, "num_input_tokens_seen": 157298968, "step": 916 }, { "epoch": 0.2409416715986059, "loss": 0.1539350152015686, "loss_ce": 0.00638251006603241, "loss_iou": 0.412109375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 157298968, "step": 916 }, { "epoch": 0.24120470835799304, "grad_norm": 5.180460717860643, "learning_rate": 5e-06, "loss": 0.116, "num_input_tokens_seen": 157471004, "step": 917 }, { "epoch": 0.24120470835799304, "loss": 0.10441954433917999, "loss_ce": 0.0008428902365267277, "loss_iou": 0.43359375, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 157471004, "step": 917 }, { "epoch": 0.24146774511738014, "grad_norm": 10.247033875410487, "learning_rate": 5e-06, "loss": 0.161, "num_input_tokens_seen": 157643232, "step": 918 }, { "epoch": 0.24146774511738014, "loss": 0.18021947145462036, "loss_ce": 0.002607175149023533, "loss_iou": 0.59375, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 157643232, "step": 918 }, { "epoch": 0.24173078187676728, "grad_norm": 6.8962184523908725, "learning_rate": 5e-06, "loss": 0.1535, "num_input_tokens_seen": 157815292, "step": 919 }, { "epoch": 0.24173078187676728, "loss": 0.1455521434545517, "loss_ce": 0.0008988262270577252, "loss_iou": 0.66015625, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 157815292, "step": 919 }, { "epoch": 0.2419938186361544, "grad_norm": 6.7883399023716775, "learning_rate": 5e-06, "loss": 0.1916, "num_input_tokens_seen": 157987716, "step": 920 }, { "epoch": 0.2419938186361544, "loss": 0.19279745221138, "loss_ce": 0.0026729374658316374, "loss_iou": 0.65234375, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 157987716, "step": 920 }, { "epoch": 0.24225685539554154, "grad_norm": 7.426866121442803, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 158160224, "step": 921 }, { "epoch": 0.24225685539554154, "loss": 0.1912505030632019, "loss_ce": 0.001980474451556802, "loss_iou": 0.470703125, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 158160224, "step": 921 }, { "epoch": 0.24251989215492864, "grad_norm": 7.602353481412061, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 158332464, "step": 922 }, { "epoch": 0.24251989215492864, "loss": 0.1421346664428711, "loss_ce": 0.003951081074774265, "loss_iou": 0.494140625, "loss_num": 0.027587890625, "loss_xval": 0.138671875, "num_input_tokens_seen": 158332464, "step": 922 }, { "epoch": 0.24278292891431577, "grad_norm": 9.798997838296735, "learning_rate": 5e-06, "loss": 0.1636, "num_input_tokens_seen": 158504560, "step": 923 }, { "epoch": 0.24278292891431577, "loss": 0.15015605092048645, "loss_ce": 0.0013523304369300604, "loss_iou": 0.44140625, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 158504560, "step": 923 }, { "epoch": 0.2430459656737029, "grad_norm": 9.276091083653826, "learning_rate": 5e-06, "loss": 0.1582, "num_input_tokens_seen": 158676676, "step": 924 }, { "epoch": 0.2430459656737029, "loss": 0.13057658076286316, "loss_ce": 0.0024943118914961815, "loss_iou": 0.59765625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 158676676, "step": 924 }, { "epoch": 0.24330900243309003, "grad_norm": 7.79676627099927, "learning_rate": 5e-06, "loss": 0.1547, "num_input_tokens_seen": 158849084, "step": 925 }, { "epoch": 0.24330900243309003, "loss": 0.1701420098543167, "loss_ce": 0.004248456098139286, "loss_iou": 0.447265625, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 158849084, "step": 925 }, { "epoch": 0.24357203919247714, "grad_norm": 7.627935616299721, "learning_rate": 5e-06, "loss": 0.1337, "num_input_tokens_seen": 159019648, "step": 926 }, { "epoch": 0.24357203919247714, "loss": 0.09866867959499359, "loss_ce": 0.002080547623336315, "loss_iou": 0.59765625, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 159019648, "step": 926 }, { "epoch": 0.24383507595186427, "grad_norm": 7.674777640749283, "learning_rate": 5e-06, "loss": 0.1837, "num_input_tokens_seen": 159191744, "step": 927 }, { "epoch": 0.24383507595186427, "loss": 0.09513729810714722, "loss_ce": 0.0016314350068569183, "loss_iou": 0.451171875, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 159191744, "step": 927 }, { "epoch": 0.2440981127112514, "grad_norm": 8.029174499906352, "learning_rate": 5e-06, "loss": 0.1991, "num_input_tokens_seen": 159364172, "step": 928 }, { "epoch": 0.2440981127112514, "loss": 0.2412642240524292, "loss_ce": 0.0012739873491227627, "loss_iou": 0.48046875, "loss_num": 0.048095703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 159364172, "step": 928 }, { "epoch": 0.24436114947063853, "grad_norm": 3.8948353119563537, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 159534608, "step": 929 }, { "epoch": 0.24436114947063853, "loss": 0.11882825195789337, "loss_ce": 0.0011219491716474295, "loss_iou": 0.58203125, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 159534608, "step": 929 }, { "epoch": 0.24462418623002566, "grad_norm": 12.725190215625348, "learning_rate": 5e-06, "loss": 0.1728, "num_input_tokens_seen": 159705228, "step": 930 }, { "epoch": 0.24462418623002566, "loss": 0.15004633367061615, "loss_ce": 0.000662794045638293, "loss_iou": 0.451171875, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 159705228, "step": 930 }, { "epoch": 0.24488722298941276, "grad_norm": 8.38127835969171, "learning_rate": 5e-06, "loss": 0.1789, "num_input_tokens_seen": 159875324, "step": 931 }, { "epoch": 0.24488722298941276, "loss": 0.1881195604801178, "loss_ce": 0.0022675050422549248, "loss_iou": 0.40234375, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 159875324, "step": 931 }, { "epoch": 0.2451502597487999, "grad_norm": 8.09047633001587, "learning_rate": 5e-06, "loss": 0.1537, "num_input_tokens_seen": 160045932, "step": 932 }, { "epoch": 0.2451502597487999, "loss": 0.23839473724365234, "loss_ce": 0.0010290088830515742, "loss_iou": 0.494140625, "loss_num": 0.047607421875, "loss_xval": 0.2373046875, "num_input_tokens_seen": 160045932, "step": 932 }, { "epoch": 0.24541329650818702, "grad_norm": 14.720243529942747, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 160218252, "step": 933 }, { "epoch": 0.24541329650818702, "loss": 0.12284128367900848, "loss_ce": 0.0022663308773189783, "loss_iou": 0.4296875, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 160218252, "step": 933 }, { "epoch": 0.24567633326757415, "grad_norm": 6.031479541696206, "learning_rate": 5e-06, "loss": 0.1669, "num_input_tokens_seen": 160390560, "step": 934 }, { "epoch": 0.24567633326757415, "loss": 0.1301022320985794, "loss_ce": 0.0005245967186056077, "loss_iou": 0.70703125, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 160390560, "step": 934 }, { "epoch": 0.24593937002696126, "grad_norm": 6.383554643597018, "learning_rate": 5e-06, "loss": 0.1569, "num_input_tokens_seen": 160562528, "step": 935 }, { "epoch": 0.24593937002696126, "loss": 0.17644909024238586, "loss_ce": 0.0030787207651883364, "loss_iou": 0.375, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 160562528, "step": 935 }, { "epoch": 0.2462024067863484, "grad_norm": 20.862114065213355, "learning_rate": 5e-06, "loss": 0.1352, "num_input_tokens_seen": 160734384, "step": 936 }, { "epoch": 0.2462024067863484, "loss": 0.11419504880905151, "loss_ce": 0.0004255172680132091, "loss_iou": 0.466796875, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 160734384, "step": 936 }, { "epoch": 0.24646544354573552, "grad_norm": 8.0386150468435, "learning_rate": 5e-06, "loss": 0.1545, "num_input_tokens_seen": 160906448, "step": 937 }, { "epoch": 0.24646544354573552, "loss": 0.14023496210575104, "loss_ce": 0.0016241249395534396, "loss_iou": 0.388671875, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 160906448, "step": 937 }, { "epoch": 0.24672848030512265, "grad_norm": 7.276863008633557, "learning_rate": 5e-06, "loss": 0.1694, "num_input_tokens_seen": 161078828, "step": 938 }, { "epoch": 0.24672848030512265, "loss": 0.21741217374801636, "loss_ce": 0.004765682853758335, "loss_iou": 0.7265625, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 161078828, "step": 938 }, { "epoch": 0.24699151706450975, "grad_norm": 10.285417073408015, "learning_rate": 5e-06, "loss": 0.1595, "num_input_tokens_seen": 161249228, "step": 939 }, { "epoch": 0.24699151706450975, "loss": 0.2533302903175354, "loss_ce": 0.004825636278837919, "loss_iou": 0.3671875, "loss_num": 0.049560546875, "loss_xval": 0.248046875, "num_input_tokens_seen": 161249228, "step": 939 }, { "epoch": 0.24725455382389688, "grad_norm": 7.503087196122763, "learning_rate": 5e-06, "loss": 0.1885, "num_input_tokens_seen": 161421624, "step": 940 }, { "epoch": 0.24725455382389688, "loss": 0.1497778743505478, "loss_ce": 0.003537639044225216, "loss_iou": 0.71484375, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 161421624, "step": 940 }, { "epoch": 0.24751759058328401, "grad_norm": 6.078271645066026, "learning_rate": 5e-06, "loss": 0.1356, "num_input_tokens_seen": 161593500, "step": 941 }, { "epoch": 0.24751759058328401, "loss": 0.15894815325737, "loss_ce": 0.0006839816924184561, "loss_iou": 0.6015625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 161593500, "step": 941 }, { "epoch": 0.24778062734267114, "grad_norm": 16.470075984430842, "learning_rate": 5e-06, "loss": 0.1725, "num_input_tokens_seen": 161764108, "step": 942 }, { "epoch": 0.24778062734267114, "loss": 0.2659192383289337, "loss_ce": 0.003468066919595003, "loss_iou": 0.5546875, "loss_num": 0.052490234375, "loss_xval": 0.26171875, "num_input_tokens_seen": 161764108, "step": 942 }, { "epoch": 0.24804366410205828, "grad_norm": 18.319111638013048, "learning_rate": 5e-06, "loss": 0.1876, "num_input_tokens_seen": 161936252, "step": 943 }, { "epoch": 0.24804366410205828, "loss": 0.23192915320396423, "loss_ce": 0.004817330744117498, "loss_iou": 0.443359375, "loss_num": 0.04541015625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 161936252, "step": 943 }, { "epoch": 0.24830670086144538, "grad_norm": 11.296239885199183, "learning_rate": 5e-06, "loss": 0.1547, "num_input_tokens_seen": 162108564, "step": 944 }, { "epoch": 0.24830670086144538, "loss": 0.13432571291923523, "loss_ce": 0.002459259470924735, "loss_iou": 0.7578125, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 162108564, "step": 944 }, { "epoch": 0.2485697376208325, "grad_norm": 5.511055668564898, "learning_rate": 5e-06, "loss": 0.1393, "num_input_tokens_seen": 162280792, "step": 945 }, { "epoch": 0.2485697376208325, "loss": 0.13674385845661163, "loss_ce": 0.0015509906224906445, "loss_iou": 0.61328125, "loss_num": 0.027099609375, "loss_xval": 0.134765625, "num_input_tokens_seen": 162280792, "step": 945 }, { "epoch": 0.24883277438021964, "grad_norm": 10.355301687463253, "learning_rate": 5e-06, "loss": 0.1477, "num_input_tokens_seen": 162449528, "step": 946 }, { "epoch": 0.24883277438021964, "loss": 0.2201877236366272, "loss_ce": 0.0006442689918912947, "loss_iou": 0.33984375, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 162449528, "step": 946 }, { "epoch": 0.24909581113960677, "grad_norm": 11.25377814108321, "learning_rate": 5e-06, "loss": 0.1493, "num_input_tokens_seen": 162617256, "step": 947 }, { "epoch": 0.24909581113960677, "loss": 0.188047856092453, "loss_ce": 0.0031418518628925085, "loss_iou": 0.58203125, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 162617256, "step": 947 }, { "epoch": 0.24935884789899387, "grad_norm": 12.235451694711202, "learning_rate": 5e-06, "loss": 0.1615, "num_input_tokens_seen": 162789400, "step": 948 }, { "epoch": 0.24935884789899387, "loss": 0.15606345236301422, "loss_ce": 0.005215056240558624, "loss_iou": 0.66796875, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 162789400, "step": 948 }, { "epoch": 0.249621884658381, "grad_norm": 7.1804139994510034, "learning_rate": 5e-06, "loss": 0.1357, "num_input_tokens_seen": 162961432, "step": 949 }, { "epoch": 0.249621884658381, "loss": 0.10785458981990814, "loss_ce": 0.002507905475795269, "loss_iou": 0.56640625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 162961432, "step": 949 }, { "epoch": 0.24988492141776814, "grad_norm": 14.388122342778688, "learning_rate": 5e-06, "loss": 0.1883, "num_input_tokens_seen": 163133524, "step": 950 }, { "epoch": 0.24988492141776814, "loss": 0.16639769077301025, "loss_ce": 0.001511220121756196, "loss_iou": 0.60546875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 163133524, "step": 950 }, { "epoch": 0.25014795817715524, "grad_norm": 10.331941667190199, "learning_rate": 5e-06, "loss": 0.1895, "num_input_tokens_seen": 163305816, "step": 951 }, { "epoch": 0.25014795817715524, "loss": 0.1127045676112175, "loss_ce": 0.0008271271362900734, "loss_iou": 0.56640625, "loss_num": 0.0224609375, "loss_xval": 0.11181640625, "num_input_tokens_seen": 163305816, "step": 951 }, { "epoch": 0.2504109949365424, "grad_norm": 6.829599375925184, "learning_rate": 5e-06, "loss": 0.154, "num_input_tokens_seen": 163478368, "step": 952 }, { "epoch": 0.2504109949365424, "loss": 0.1521347612142563, "loss_ce": 0.0029038134962320328, "loss_iou": 0.54296875, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 163478368, "step": 952 }, { "epoch": 0.2506740316959295, "grad_norm": 5.382923356179618, "learning_rate": 5e-06, "loss": 0.1478, "num_input_tokens_seen": 163650500, "step": 953 }, { "epoch": 0.2506740316959295, "loss": 0.14390447735786438, "loss_ce": 0.00224187970161438, "loss_iou": 0.62890625, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 163650500, "step": 953 }, { "epoch": 0.2509370684553166, "grad_norm": 4.693926744893699, "learning_rate": 5e-06, "loss": 0.1612, "num_input_tokens_seen": 163822252, "step": 954 }, { "epoch": 0.2509370684553166, "loss": 0.1426747441291809, "loss_ce": 0.001286811544559896, "loss_iou": 0.41015625, "loss_num": 0.0281982421875, "loss_xval": 0.1416015625, "num_input_tokens_seen": 163822252, "step": 954 }, { "epoch": 0.25120010521470376, "grad_norm": 6.416178956242753, "learning_rate": 5e-06, "loss": 0.1743, "num_input_tokens_seen": 163992628, "step": 955 }, { "epoch": 0.25120010521470376, "loss": 0.2167130708694458, "loss_ce": 0.0013200179673731327, "loss_iou": 0.41015625, "loss_num": 0.043212890625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 163992628, "step": 955 }, { "epoch": 0.25146314197409086, "grad_norm": 8.38564274924814, "learning_rate": 5e-06, "loss": 0.1307, "num_input_tokens_seen": 164164972, "step": 956 }, { "epoch": 0.25146314197409086, "loss": 0.14087224006652832, "loss_ce": 0.0018951823003590107, "loss_iou": 0.53515625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 164164972, "step": 956 }, { "epoch": 0.251726178733478, "grad_norm": 10.557767516476979, "learning_rate": 5e-06, "loss": 0.1952, "num_input_tokens_seen": 164334056, "step": 957 }, { "epoch": 0.251726178733478, "loss": 0.10351097583770752, "loss_ce": 0.003108144039288163, "loss_iou": 0.5703125, "loss_num": 0.02001953125, "loss_xval": 0.1005859375, "num_input_tokens_seen": 164334056, "step": 957 }, { "epoch": 0.2519892154928651, "grad_norm": 6.187664908064733, "learning_rate": 5e-06, "loss": 0.1717, "num_input_tokens_seen": 164506496, "step": 958 }, { "epoch": 0.2519892154928651, "loss": 0.15677396953105927, "loss_ce": 0.005193163640797138, "loss_iou": 0.55859375, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 164506496, "step": 958 }, { "epoch": 0.25225225225225223, "grad_norm": 9.49095916832686, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 164678644, "step": 959 }, { "epoch": 0.25225225225225223, "loss": 0.18635977804660797, "loss_ce": 0.002003092784434557, "loss_iou": 0.5, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 164678644, "step": 959 }, { "epoch": 0.2525152890116394, "grad_norm": 6.024170228931713, "learning_rate": 5e-06, "loss": 0.1579, "num_input_tokens_seen": 164851184, "step": 960 }, { "epoch": 0.2525152890116394, "loss": 0.13998761773109436, "loss_ce": 0.003940259106457233, "loss_iou": 0.58203125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 164851184, "step": 960 }, { "epoch": 0.2527783257710265, "grad_norm": 6.120337923811076, "learning_rate": 5e-06, "loss": 0.0964, "num_input_tokens_seen": 165023544, "step": 961 }, { "epoch": 0.2527783257710265, "loss": 0.09865675866603851, "loss_ce": 0.003258807584643364, "loss_iou": 0.66796875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 165023544, "step": 961 }, { "epoch": 0.25304136253041365, "grad_norm": 9.066155915123039, "learning_rate": 5e-06, "loss": 0.1506, "num_input_tokens_seen": 165193832, "step": 962 }, { "epoch": 0.25304136253041365, "loss": 0.1392088085412979, "loss_ce": 0.0009946945356205106, "loss_iou": 0.6328125, "loss_num": 0.027587890625, "loss_xval": 0.138671875, "num_input_tokens_seen": 165193832, "step": 962 }, { "epoch": 0.25330439928980075, "grad_norm": 7.472497977314338, "learning_rate": 5e-06, "loss": 0.1704, "num_input_tokens_seen": 165365892, "step": 963 }, { "epoch": 0.25330439928980075, "loss": 0.23057040572166443, "loss_ce": 0.002054777694866061, "loss_iou": 0.47265625, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 165365892, "step": 963 }, { "epoch": 0.25356743604918786, "grad_norm": 8.732932808526876, "learning_rate": 5e-06, "loss": 0.1689, "num_input_tokens_seen": 165538272, "step": 964 }, { "epoch": 0.25356743604918786, "loss": 0.16193270683288574, "loss_ce": 0.0018985318019986153, "loss_iou": 0.640625, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 165538272, "step": 964 }, { "epoch": 0.253830472808575, "grad_norm": 7.508690685861332, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 165710728, "step": 965 }, { "epoch": 0.253830472808575, "loss": 0.16999930143356323, "loss_ce": 0.002335726749151945, "loss_iou": 0.50390625, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 165710728, "step": 965 }, { "epoch": 0.2540935095679621, "grad_norm": 8.520838856927949, "learning_rate": 5e-06, "loss": 0.2043, "num_input_tokens_seen": 165882912, "step": 966 }, { "epoch": 0.2540935095679621, "loss": 0.1505521535873413, "loss_ce": 0.0006498107686638832, "loss_iou": 0.56640625, "loss_num": 0.0299072265625, "loss_xval": 0.150390625, "num_input_tokens_seen": 165882912, "step": 966 }, { "epoch": 0.2543565463273492, "grad_norm": 7.07336887298293, "learning_rate": 5e-06, "loss": 0.1127, "num_input_tokens_seen": 166053292, "step": 967 }, { "epoch": 0.2543565463273492, "loss": 0.13186028599739075, "loss_ce": 0.002404727740213275, "loss_iou": 0.51953125, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 166053292, "step": 967 }, { "epoch": 0.2546195830867364, "grad_norm": 5.861693184502666, "learning_rate": 5e-06, "loss": 0.1283, "num_input_tokens_seen": 166222324, "step": 968 }, { "epoch": 0.2546195830867364, "loss": 0.12424527108669281, "loss_ce": 0.0015340839745476842, "loss_iou": 0.66796875, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 166222324, "step": 968 }, { "epoch": 0.2548826198461235, "grad_norm": 4.8001560190338335, "learning_rate": 5e-06, "loss": 0.1372, "num_input_tokens_seen": 166394732, "step": 969 }, { "epoch": 0.2548826198461235, "loss": 0.13953326642513275, "loss_ce": 0.0008613896206952631, "loss_iou": 0.5390625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 166394732, "step": 969 }, { "epoch": 0.25514565660551064, "grad_norm": 4.5364064648643065, "learning_rate": 5e-06, "loss": 0.1456, "num_input_tokens_seen": 166566920, "step": 970 }, { "epoch": 0.25514565660551064, "loss": 0.15965688228607178, "loss_ce": 0.0016368532087653875, "loss_iou": 0.498046875, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 166566920, "step": 970 }, { "epoch": 0.25540869336489774, "grad_norm": 6.371608621088164, "learning_rate": 5e-06, "loss": 0.1801, "num_input_tokens_seen": 166739024, "step": 971 }, { "epoch": 0.25540869336489774, "loss": 0.2314717024564743, "loss_ce": 0.003017107956111431, "loss_iou": 0.5625, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 166739024, "step": 971 }, { "epoch": 0.25567173012428485, "grad_norm": 10.601164768004658, "learning_rate": 5e-06, "loss": 0.1504, "num_input_tokens_seen": 166911376, "step": 972 }, { "epoch": 0.25567173012428485, "loss": 0.16122400760650635, "loss_ce": 0.001586547470651567, "loss_iou": 0.75390625, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 166911376, "step": 972 }, { "epoch": 0.255934766883672, "grad_norm": 7.052937396329928, "learning_rate": 5e-06, "loss": 0.1246, "num_input_tokens_seen": 167083504, "step": 973 }, { "epoch": 0.255934766883672, "loss": 0.09488484263420105, "loss_ce": 0.0012263880344107747, "loss_iou": 0.5078125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 167083504, "step": 973 }, { "epoch": 0.2561978036430591, "grad_norm": 11.500260657406463, "learning_rate": 5e-06, "loss": 0.1528, "num_input_tokens_seen": 167255708, "step": 974 }, { "epoch": 0.2561978036430591, "loss": 0.08826225996017456, "loss_ce": 0.0011956070084124804, "loss_iou": 0.6171875, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 167255708, "step": 974 }, { "epoch": 0.25646084040244627, "grad_norm": 15.92469225824967, "learning_rate": 5e-06, "loss": 0.1805, "num_input_tokens_seen": 167428028, "step": 975 }, { "epoch": 0.25646084040244627, "loss": 0.2100502997636795, "loss_ce": 0.0013405811041593552, "loss_iou": 0.65234375, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 167428028, "step": 975 }, { "epoch": 0.25672387716183337, "grad_norm": 8.58380851703317, "learning_rate": 5e-06, "loss": 0.157, "num_input_tokens_seen": 167600008, "step": 976 }, { "epoch": 0.25672387716183337, "loss": 0.16451242566108704, "loss_ce": 0.0012128613889217377, "loss_iou": 0.73046875, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 167600008, "step": 976 }, { "epoch": 0.2569869139212205, "grad_norm": 18.1690929466902, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 167772344, "step": 977 }, { "epoch": 0.2569869139212205, "loss": 0.1349577009677887, "loss_ce": 0.0059293946251273155, "loss_iou": 0.64453125, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 167772344, "step": 977 }, { "epoch": 0.25724995068060763, "grad_norm": 19.89834413612689, "learning_rate": 5e-06, "loss": 0.1866, "num_input_tokens_seen": 167944388, "step": 978 }, { "epoch": 0.25724995068060763, "loss": 0.2309381365776062, "loss_ce": 0.0008356063044629991, "loss_iou": 0.6015625, "loss_num": 0.0458984375, "loss_xval": 0.23046875, "num_input_tokens_seen": 167944388, "step": 978 }, { "epoch": 0.25751298743999473, "grad_norm": 9.124352830191828, "learning_rate": 5e-06, "loss": 0.1645, "num_input_tokens_seen": 168116168, "step": 979 }, { "epoch": 0.25751298743999473, "loss": 0.11332334578037262, "loss_ce": 0.0035516121424734592, "loss_iou": 0.43359375, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 168116168, "step": 979 }, { "epoch": 0.25777602419938184, "grad_norm": 13.324870310905945, "learning_rate": 5e-06, "loss": 0.1532, "num_input_tokens_seen": 168288220, "step": 980 }, { "epoch": 0.25777602419938184, "loss": 0.14565590023994446, "loss_ce": 0.0014908593147993088, "loss_iou": 0.50390625, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 168288220, "step": 980 }, { "epoch": 0.258039060958769, "grad_norm": 12.888295473836926, "learning_rate": 5e-06, "loss": 0.147, "num_input_tokens_seen": 168460592, "step": 981 }, { "epoch": 0.258039060958769, "loss": 0.13789984583854675, "loss_ce": 0.0010895461309701204, "loss_iou": 0.47265625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 168460592, "step": 981 }, { "epoch": 0.2583020977181561, "grad_norm": 7.142842896707461, "learning_rate": 5e-06, "loss": 0.1736, "num_input_tokens_seen": 168632604, "step": 982 }, { "epoch": 0.2583020977181561, "loss": 0.2077009379863739, "loss_ce": 0.0013105443213135004, "loss_iou": 0.59375, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 168632604, "step": 982 }, { "epoch": 0.25856513447754326, "grad_norm": 5.171300059065281, "learning_rate": 5e-06, "loss": 0.194, "num_input_tokens_seen": 168804468, "step": 983 }, { "epoch": 0.25856513447754326, "loss": 0.15598775446414948, "loss_ce": 0.005291945766657591, "loss_iou": 0.51953125, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 168804468, "step": 983 }, { "epoch": 0.25882817123693036, "grad_norm": 11.204489011375072, "learning_rate": 5e-06, "loss": 0.1807, "num_input_tokens_seen": 168976856, "step": 984 }, { "epoch": 0.25882817123693036, "loss": 0.19184689223766327, "loss_ce": 0.0026989425532519817, "loss_iou": 0.6328125, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 168976856, "step": 984 }, { "epoch": 0.25909120799631746, "grad_norm": 6.6174997360939205, "learning_rate": 5e-06, "loss": 0.1608, "num_input_tokens_seen": 169149104, "step": 985 }, { "epoch": 0.25909120799631746, "loss": 0.23251253366470337, "loss_ce": 0.0004568799340631813, "loss_iou": 0.578125, "loss_num": 0.04638671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 169149104, "step": 985 }, { "epoch": 0.2593542447557046, "grad_norm": 7.169144370545132, "learning_rate": 5e-06, "loss": 0.1498, "num_input_tokens_seen": 169321296, "step": 986 }, { "epoch": 0.2593542447557046, "loss": 0.12904971837997437, "loss_ce": 0.0024627982638776302, "loss_iou": 0.66796875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 169321296, "step": 986 }, { "epoch": 0.2596172815150917, "grad_norm": 17.414290331962896, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 169493420, "step": 987 }, { "epoch": 0.2596172815150917, "loss": 0.13824069499969482, "loss_ce": 0.0014914304483681917, "loss_iou": 0.54296875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 169493420, "step": 987 }, { "epoch": 0.2598803182744789, "grad_norm": 5.807379438415879, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 169664140, "step": 988 }, { "epoch": 0.2598803182744789, "loss": 0.15334829688072205, "loss_ce": 0.001828520093113184, "loss_iou": 0.58984375, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 169664140, "step": 988 }, { "epoch": 0.260143355033866, "grad_norm": 6.246751757081568, "learning_rate": 5e-06, "loss": 0.1808, "num_input_tokens_seen": 169836220, "step": 989 }, { "epoch": 0.260143355033866, "loss": 0.25014275312423706, "loss_ce": 0.003926943056285381, "loss_iou": 0.5703125, "loss_num": 0.049072265625, "loss_xval": 0.24609375, "num_input_tokens_seen": 169836220, "step": 989 }, { "epoch": 0.2604063917932531, "grad_norm": 11.305993296500802, "learning_rate": 5e-06, "loss": 0.1627, "num_input_tokens_seen": 170008204, "step": 990 }, { "epoch": 0.2604063917932531, "loss": 0.19628843665122986, "loss_ce": 0.00878843106329441, "loss_iou": 0.60546875, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 170008204, "step": 990 }, { "epoch": 0.26066942855264025, "grad_norm": 6.544136822322436, "learning_rate": 5e-06, "loss": 0.1516, "num_input_tokens_seen": 170178608, "step": 991 }, { "epoch": 0.26066942855264025, "loss": 0.23060224950313568, "loss_ce": 0.004283890128135681, "loss_iou": 0.41796875, "loss_num": 0.045166015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 170178608, "step": 991 }, { "epoch": 0.26093246531202735, "grad_norm": 6.798376445965723, "learning_rate": 5e-06, "loss": 0.1663, "num_input_tokens_seen": 170350580, "step": 992 }, { "epoch": 0.26093246531202735, "loss": 0.15799343585968018, "loss_ce": 0.0012551653198897839, "loss_iou": 0.5859375, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 170350580, "step": 992 }, { "epoch": 0.26119550207141445, "grad_norm": 6.259630629604519, "learning_rate": 5e-06, "loss": 0.1793, "num_input_tokens_seen": 170522692, "step": 993 }, { "epoch": 0.26119550207141445, "loss": 0.2015601247549057, "loss_ce": 0.000510324549395591, "loss_iou": 0.66796875, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 170522692, "step": 993 }, { "epoch": 0.2614585388308016, "grad_norm": 6.902940616756998, "learning_rate": 5e-06, "loss": 0.2081, "num_input_tokens_seen": 170695152, "step": 994 }, { "epoch": 0.2614585388308016, "loss": 0.15382197499275208, "loss_ce": 0.0003795886295847595, "loss_iou": 0.60546875, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 170695152, "step": 994 }, { "epoch": 0.2617215755901887, "grad_norm": 4.496994630933735, "learning_rate": 5e-06, "loss": 0.1308, "num_input_tokens_seen": 170867532, "step": 995 }, { "epoch": 0.2617215755901887, "loss": 0.15487955510616302, "loss_ce": 0.005709626711905003, "loss_iou": 0.41015625, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 170867532, "step": 995 }, { "epoch": 0.2619846123495759, "grad_norm": 5.819986109817203, "learning_rate": 5e-06, "loss": 0.1475, "num_input_tokens_seen": 171039632, "step": 996 }, { "epoch": 0.2619846123495759, "loss": 0.1341342031955719, "loss_ce": 0.0008944571018218994, "loss_iou": 0.458984375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 171039632, "step": 996 }, { "epoch": 0.262247649108963, "grad_norm": 4.6990222732792075, "learning_rate": 5e-06, "loss": 0.1625, "num_input_tokens_seen": 171211920, "step": 997 }, { "epoch": 0.262247649108963, "loss": 0.2527538239955902, "loss_ce": 0.0029674398247152567, "loss_iou": 0.48828125, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 171211920, "step": 997 }, { "epoch": 0.2625106858683501, "grad_norm": 6.547042177560564, "learning_rate": 5e-06, "loss": 0.1684, "num_input_tokens_seen": 171384024, "step": 998 }, { "epoch": 0.2625106858683501, "loss": 0.18961063027381897, "loss_ce": 0.0035754733253270388, "loss_iou": 0.62890625, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 171384024, "step": 998 }, { "epoch": 0.26277372262773724, "grad_norm": 17.093722901281232, "learning_rate": 5e-06, "loss": 0.1271, "num_input_tokens_seen": 171556336, "step": 999 }, { "epoch": 0.26277372262773724, "loss": 0.18390598893165588, "loss_ce": 0.0009531003306619823, "loss_iou": 0.24609375, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 171556336, "step": 999 }, { "epoch": 0.26303675938712434, "grad_norm": 5.862402162816912, "learning_rate": 5e-06, "loss": 0.1274, "num_input_tokens_seen": 171726848, "step": 1000 }, { "epoch": 0.26303675938712434, "eval_websight_new_CIoU": 0.7987666130065918, "eval_websight_new_GIoU": 0.7927780747413635, "eval_websight_new_IoU": 0.8096525371074677, "eval_websight_new_MAE_all": 0.03391252178698778, "eval_websight_new_MAE_h": 0.024881365709006786, "eval_websight_new_MAE_w": 0.04275708086788654, "eval_websight_new_MAE_x": 0.046329958364367485, "eval_websight_new_MAE_y": 0.021681691519916058, "eval_websight_new_NUM_probability": 0.9994822144508362, "eval_websight_new_inside_bbox": 0.984375, "eval_websight_new_loss": 0.15745767951011658, "eval_websight_new_loss_ce": 9.287914144806564e-05, "eval_websight_new_loss_iou": 0.457275390625, "eval_websight_new_loss_num": 0.027721405029296875, "eval_websight_new_loss_xval": 0.1386260986328125, "eval_websight_new_runtime": 55.0835, "eval_websight_new_samples_per_second": 0.908, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 171726848, "step": 1000 }, { "epoch": 0.26303675938712434, "eval_seeclick_CIoU": 0.5508884787559509, "eval_seeclick_GIoU": 0.5434170663356781, "eval_seeclick_IoU": 0.5745402276515961, "eval_seeclick_MAE_all": 0.057029979303479195, "eval_seeclick_MAE_h": 0.03887217864394188, "eval_seeclick_MAE_w": 0.08262282982468605, "eval_seeclick_MAE_x": 0.07410039007663727, "eval_seeclick_MAE_y": 0.032524523325264454, "eval_seeclick_NUM_probability": 0.9997861981391907, "eval_seeclick_inside_bbox": 0.9076704680919647, "eval_seeclick_loss": 0.24120275676250458, "eval_seeclick_loss_ce": 0.00980278616771102, "eval_seeclick_loss_iou": 0.60888671875, "eval_seeclick_loss_num": 0.0429229736328125, "eval_seeclick_loss_xval": 0.214630126953125, "eval_seeclick_runtime": 69.8263, "eval_seeclick_samples_per_second": 0.616, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 171726848, "step": 1000 }, { "epoch": 0.26303675938712434, "eval_icons_CIoU": 0.8137890696525574, "eval_icons_GIoU": 0.8057061433792114, "eval_icons_IoU": 0.8213592171669006, "eval_icons_MAE_all": 0.024967025965452194, "eval_icons_MAE_h": 0.028143037110567093, "eval_icons_MAE_w": 0.026135658845305443, "eval_icons_MAE_x": 0.022455199621617794, "eval_icons_MAE_y": 0.023134205490350723, "eval_icons_NUM_probability": 0.9995008409023285, "eval_icons_inside_bbox": 0.9565972089767456, "eval_icons_loss": 0.09549810737371445, "eval_icons_loss_ce": 0.0016497992401127703, "eval_icons_loss_iou": 0.6103515625, "eval_icons_loss_num": 0.01790618896484375, "eval_icons_loss_xval": 0.0895233154296875, "eval_icons_runtime": 88.8842, "eval_icons_samples_per_second": 0.563, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 171726848, "step": 1000 }, { "epoch": 0.26303675938712434, "eval_screenspot_CIoU": 0.5512810548146566, "eval_screenspot_GIoU": 0.5352131823698679, "eval_screenspot_IoU": 0.5900407036145529, "eval_screenspot_MAE_all": 0.08661519487698872, "eval_screenspot_MAE_h": 0.056614277263482414, "eval_screenspot_MAE_w": 0.13663912812868753, "eval_screenspot_MAE_x": 0.10082270950078964, "eval_screenspot_MAE_y": 0.052384667098522186, "eval_screenspot_NUM_probability": 0.9995323220888773, "eval_screenspot_inside_bbox": 0.8454166650772095, "eval_screenspot_loss": 0.7656806111335754, "eval_screenspot_loss_ce": 0.42391865452130634, "eval_screenspot_loss_iou": 0.4834391276041667, "eval_screenspot_loss_num": 0.0673370361328125, "eval_screenspot_loss_xval": 0.3365885416666667, "eval_screenspot_runtime": 148.461, "eval_screenspot_samples_per_second": 0.599, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 171726848, "step": 1000 }, { "epoch": 0.26303675938712434, "loss": 0.7618361711502075, "loss_ce": 0.4089309275150299, "loss_iou": 0.43359375, "loss_num": 0.07080078125, "loss_xval": 0.353515625, "num_input_tokens_seen": 171726848, "step": 1000 }, { "epoch": 0.2632997961465115, "grad_norm": 12.369274544442106, "learning_rate": 5e-06, "loss": 0.1395, "num_input_tokens_seen": 171897180, "step": 1001 }, { "epoch": 0.2632997961465115, "loss": 0.23002395033836365, "loss_ce": 0.0021186815574765205, "loss_iou": 0.462890625, "loss_num": 0.045654296875, "loss_xval": 0.2275390625, "num_input_tokens_seen": 171897180, "step": 1001 }, { "epoch": 0.2635628329058986, "grad_norm": 6.891119328212443, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 172069384, "step": 1002 }, { "epoch": 0.2635628329058986, "loss": 0.1037362664937973, "loss_ce": 0.0008004722185432911, "loss_iou": 0.45703125, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 172069384, "step": 1002 }, { "epoch": 0.2638258696652857, "grad_norm": 5.298705978749544, "learning_rate": 5e-06, "loss": 0.1493, "num_input_tokens_seen": 172239736, "step": 1003 }, { "epoch": 0.2638258696652857, "loss": 0.16244152188301086, "loss_ce": 0.0035059780348092318, "loss_iou": 0.408203125, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 172239736, "step": 1003 }, { "epoch": 0.26408890642467286, "grad_norm": 28.95390611927557, "learning_rate": 5e-06, "loss": 0.1674, "num_input_tokens_seen": 172409356, "step": 1004 }, { "epoch": 0.26408890642467286, "loss": 0.16002172231674194, "loss_ce": 0.001452386612072587, "loss_iou": 0.70703125, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 172409356, "step": 1004 }, { "epoch": 0.26435194318405997, "grad_norm": 6.613551299237438, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 172579624, "step": 1005 }, { "epoch": 0.26435194318405997, "loss": 0.11175885051488876, "loss_ce": 0.0038792139384895563, "loss_iou": 0.61328125, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 172579624, "step": 1005 }, { "epoch": 0.26461497994344707, "grad_norm": 5.235806011716001, "learning_rate": 5e-06, "loss": 0.181, "num_input_tokens_seen": 172752052, "step": 1006 }, { "epoch": 0.26461497994344707, "loss": 0.19632884860038757, "loss_ce": 0.001321525895036757, "loss_iou": 0.75390625, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 172752052, "step": 1006 }, { "epoch": 0.26487801670283423, "grad_norm": 11.226831704177643, "learning_rate": 5e-06, "loss": 0.164, "num_input_tokens_seen": 172924420, "step": 1007 }, { "epoch": 0.26487801670283423, "loss": 0.16282187402248383, "loss_ce": 0.0012007836485281587, "loss_iou": 0.52734375, "loss_num": 0.0322265625, "loss_xval": 0.162109375, "num_input_tokens_seen": 172924420, "step": 1007 }, { "epoch": 0.26514105346222133, "grad_norm": 5.82233219860515, "learning_rate": 5e-06, "loss": 0.1824, "num_input_tokens_seen": 173096824, "step": 1008 }, { "epoch": 0.26514105346222133, "loss": 0.17045088112354279, "loss_ce": 0.005320262163877487, "loss_iou": 0.53515625, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 173096824, "step": 1008 }, { "epoch": 0.2654040902216085, "grad_norm": 5.52283167756611, "learning_rate": 5e-06, "loss": 0.149, "num_input_tokens_seen": 173268948, "step": 1009 }, { "epoch": 0.2654040902216085, "loss": 0.17849504947662354, "loss_ce": 0.004483824595808983, "loss_iou": 0.61328125, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 173268948, "step": 1009 }, { "epoch": 0.2656671269809956, "grad_norm": 5.624587832123806, "learning_rate": 5e-06, "loss": 0.1733, "num_input_tokens_seen": 173441324, "step": 1010 }, { "epoch": 0.2656671269809956, "loss": 0.15812638401985168, "loss_ce": 0.0008387943962588906, "loss_iou": 0.62890625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 173441324, "step": 1010 }, { "epoch": 0.2659301637403827, "grad_norm": 8.141965504781345, "learning_rate": 5e-06, "loss": 0.1542, "num_input_tokens_seen": 173611452, "step": 1011 }, { "epoch": 0.2659301637403827, "loss": 0.2730504870414734, "loss_ce": 0.004678931087255478, "loss_iou": 0.53125, "loss_num": 0.0537109375, "loss_xval": 0.267578125, "num_input_tokens_seen": 173611452, "step": 1011 }, { "epoch": 0.26619320049976986, "grad_norm": 9.595372252411392, "learning_rate": 5e-06, "loss": 0.1596, "num_input_tokens_seen": 173783736, "step": 1012 }, { "epoch": 0.26619320049976986, "loss": 0.23428162932395935, "loss_ce": 0.0011273245327174664, "loss_iou": 0.54296875, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 173783736, "step": 1012 }, { "epoch": 0.26645623725915696, "grad_norm": 8.106802909743756, "learning_rate": 5e-06, "loss": 0.1535, "num_input_tokens_seen": 173955656, "step": 1013 }, { "epoch": 0.26645623725915696, "loss": 0.15512457489967346, "loss_ce": 0.003147047944366932, "loss_iou": 0.59375, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 173955656, "step": 1013 }, { "epoch": 0.2667192740185441, "grad_norm": 5.365195100132044, "learning_rate": 5e-06, "loss": 0.1487, "num_input_tokens_seen": 174126100, "step": 1014 }, { "epoch": 0.2667192740185441, "loss": 0.17733250558376312, "loss_ce": 0.004175758454948664, "loss_iou": 0.6640625, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 174126100, "step": 1014 }, { "epoch": 0.2669823107779312, "grad_norm": 6.156957339464345, "learning_rate": 5e-06, "loss": 0.1486, "num_input_tokens_seen": 174298328, "step": 1015 }, { "epoch": 0.2669823107779312, "loss": 0.1237088292837143, "loss_ce": 0.001119712833315134, "loss_iou": 0.41796875, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 174298328, "step": 1015 }, { "epoch": 0.2672453475373183, "grad_norm": 5.364063512164187, "learning_rate": 5e-06, "loss": 0.1536, "num_input_tokens_seen": 174470324, "step": 1016 }, { "epoch": 0.2672453475373183, "loss": 0.2085983008146286, "loss_ce": 0.003947417717427015, "loss_iou": 0.5546875, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 174470324, "step": 1016 }, { "epoch": 0.2675083842967055, "grad_norm": 26.166059849551687, "learning_rate": 5e-06, "loss": 0.1652, "num_input_tokens_seen": 174642656, "step": 1017 }, { "epoch": 0.2675083842967055, "loss": 0.0957513153553009, "loss_ce": 0.0004143980913795531, "loss_iou": 0.5546875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 174642656, "step": 1017 }, { "epoch": 0.2677714210560926, "grad_norm": 13.570714399931546, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 174813336, "step": 1018 }, { "epoch": 0.2677714210560926, "loss": 0.08781825006008148, "loss_ce": 0.0015755778877064586, "loss_iou": 0.5703125, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 174813336, "step": 1018 }, { "epoch": 0.2680344578154797, "grad_norm": 5.193615864048545, "learning_rate": 5e-06, "loss": 0.1337, "num_input_tokens_seen": 174983476, "step": 1019 }, { "epoch": 0.2680344578154797, "loss": 0.09173595905303955, "loss_ce": 0.0038148202002048492, "loss_iou": 0.4609375, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 174983476, "step": 1019 }, { "epoch": 0.26829749457486685, "grad_norm": 6.860708381616737, "learning_rate": 5e-06, "loss": 0.1671, "num_input_tokens_seen": 175155688, "step": 1020 }, { "epoch": 0.26829749457486685, "loss": 0.1804433912038803, "loss_ce": 0.002403826452791691, "loss_iou": 0.48828125, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 175155688, "step": 1020 }, { "epoch": 0.26856053133425395, "grad_norm": 4.544920825241194, "learning_rate": 5e-06, "loss": 0.1266, "num_input_tokens_seen": 175327636, "step": 1021 }, { "epoch": 0.26856053133425395, "loss": 0.14717841148376465, "loss_ce": 0.0010602545225992799, "loss_iou": 0.62890625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 175327636, "step": 1021 }, { "epoch": 0.2688235680936411, "grad_norm": 8.925039478850847, "learning_rate": 5e-06, "loss": 0.1755, "num_input_tokens_seen": 175499748, "step": 1022 }, { "epoch": 0.2688235680936411, "loss": 0.1853310763835907, "loss_ce": 0.0006386763998307288, "loss_iou": 0.51953125, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 175499748, "step": 1022 }, { "epoch": 0.2690866048530282, "grad_norm": 5.74787431130144, "learning_rate": 5e-06, "loss": 0.1518, "num_input_tokens_seen": 175671808, "step": 1023 }, { "epoch": 0.2690866048530282, "loss": 0.14083652198314667, "loss_ce": 0.0009744655108079314, "loss_iou": 0.4140625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 175671808, "step": 1023 }, { "epoch": 0.2693496416124153, "grad_norm": 6.6733989699604725, "learning_rate": 5e-06, "loss": 0.1427, "num_input_tokens_seen": 175844188, "step": 1024 }, { "epoch": 0.2693496416124153, "loss": 0.1698358803987503, "loss_ce": 0.0007990067824721336, "loss_iou": 0.6640625, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 175844188, "step": 1024 }, { "epoch": 0.26961267837180247, "grad_norm": 8.873866851189364, "learning_rate": 5e-06, "loss": 0.1894, "num_input_tokens_seen": 176016264, "step": 1025 }, { "epoch": 0.26961267837180247, "loss": 0.28822407126426697, "loss_ce": 0.006241639144718647, "loss_iou": 0.6171875, "loss_num": 0.056396484375, "loss_xval": 0.28125, "num_input_tokens_seen": 176016264, "step": 1025 }, { "epoch": 0.2698757151311896, "grad_norm": 5.745558264753103, "learning_rate": 5e-06, "loss": 0.1313, "num_input_tokens_seen": 176188840, "step": 1026 }, { "epoch": 0.2698757151311896, "loss": 0.20782078802585602, "loss_ce": 0.003353000618517399, "loss_iou": 0.53125, "loss_num": 0.041015625, "loss_xval": 0.2041015625, "num_input_tokens_seen": 176188840, "step": 1026 }, { "epoch": 0.27013875189057673, "grad_norm": 6.250713326486415, "learning_rate": 5e-06, "loss": 0.1462, "num_input_tokens_seen": 176360864, "step": 1027 }, { "epoch": 0.27013875189057673, "loss": 0.17586824297904968, "loss_ce": 0.0016433752607554197, "loss_iou": 0.59375, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 176360864, "step": 1027 }, { "epoch": 0.27040178864996384, "grad_norm": 9.18813465584604, "learning_rate": 5e-06, "loss": 0.1391, "num_input_tokens_seen": 176533304, "step": 1028 }, { "epoch": 0.27040178864996384, "loss": 0.1184663325548172, "loss_ce": 0.0005769361741840839, "loss_iou": 0.640625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 176533304, "step": 1028 }, { "epoch": 0.27066482540935094, "grad_norm": 6.1602828275187, "learning_rate": 5e-06, "loss": 0.1569, "num_input_tokens_seen": 176705536, "step": 1029 }, { "epoch": 0.27066482540935094, "loss": 0.20641186833381653, "loss_ce": 0.0027985800988972187, "loss_iou": 0.54296875, "loss_num": 0.040771484375, "loss_xval": 0.203125, "num_input_tokens_seen": 176705536, "step": 1029 }, { "epoch": 0.2709278621687381, "grad_norm": 6.699314102286077, "learning_rate": 5e-06, "loss": 0.1489, "num_input_tokens_seen": 176877744, "step": 1030 }, { "epoch": 0.2709278621687381, "loss": 0.12170865386724472, "loss_ce": 0.003544584382325411, "loss_iou": 0.5546875, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 176877744, "step": 1030 }, { "epoch": 0.2711908989281252, "grad_norm": 7.305231502769662, "learning_rate": 5e-06, "loss": 0.1635, "num_input_tokens_seen": 177050116, "step": 1031 }, { "epoch": 0.2711908989281252, "loss": 0.1749892234802246, "loss_ce": 0.001527311746031046, "loss_iou": 0.482421875, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 177050116, "step": 1031 }, { "epoch": 0.2714539356875123, "grad_norm": 23.98337624564377, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 177222180, "step": 1032 }, { "epoch": 0.2714539356875123, "loss": 0.11504107713699341, "loss_ce": 0.0008137800614349544, "loss_iou": 0.64453125, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 177222180, "step": 1032 }, { "epoch": 0.27171697244689946, "grad_norm": 8.870505808545992, "learning_rate": 5e-06, "loss": 0.1098, "num_input_tokens_seen": 177394132, "step": 1033 }, { "epoch": 0.27171697244689946, "loss": 0.1380763053894043, "loss_ce": 0.001357543864287436, "loss_iou": 0.55078125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 177394132, "step": 1033 }, { "epoch": 0.27198000920628657, "grad_norm": 6.7102870745927, "learning_rate": 5e-06, "loss": 0.152, "num_input_tokens_seen": 177564532, "step": 1034 }, { "epoch": 0.27198000920628657, "loss": 0.1144593358039856, "loss_ce": 0.0005372193409129977, "loss_iou": 0.625, "loss_num": 0.0228271484375, "loss_xval": 0.11376953125, "num_input_tokens_seen": 177564532, "step": 1034 }, { "epoch": 0.2722430459656737, "grad_norm": 7.475407895210686, "learning_rate": 5e-06, "loss": 0.1639, "num_input_tokens_seen": 177735192, "step": 1035 }, { "epoch": 0.2722430459656737, "loss": 0.1714543104171753, "loss_ce": 0.001501921215094626, "loss_iou": 0.435546875, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 177735192, "step": 1035 }, { "epoch": 0.2725060827250608, "grad_norm": 7.019122274754133, "learning_rate": 5e-06, "loss": 0.183, "num_input_tokens_seen": 177905512, "step": 1036 }, { "epoch": 0.2725060827250608, "loss": 0.23724797368049622, "loss_ce": 0.0011640018783509731, "loss_iou": 0.3671875, "loss_num": 0.047119140625, "loss_xval": 0.236328125, "num_input_tokens_seen": 177905512, "step": 1036 }, { "epoch": 0.27276911948444793, "grad_norm": 6.609200156734422, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 178077776, "step": 1037 }, { "epoch": 0.27276911948444793, "loss": 0.1989010125398636, "loss_ce": 0.002367813140153885, "loss_iou": 0.421875, "loss_num": 0.039306640625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 178077776, "step": 1037 }, { "epoch": 0.2730321562438351, "grad_norm": 7.044877026833013, "learning_rate": 5e-06, "loss": 0.157, "num_input_tokens_seen": 178250200, "step": 1038 }, { "epoch": 0.2730321562438351, "loss": 0.1487899273633957, "loss_ce": 0.0012069200165569782, "loss_iou": 0.7109375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 178250200, "step": 1038 }, { "epoch": 0.2732951930032222, "grad_norm": 7.367246122761307, "learning_rate": 5e-06, "loss": 0.1496, "num_input_tokens_seen": 178420744, "step": 1039 }, { "epoch": 0.2732951930032222, "loss": 0.1740129590034485, "loss_ce": 0.002565213944762945, "loss_iou": 0.53125, "loss_num": 0.0341796875, "loss_xval": 0.171875, "num_input_tokens_seen": 178420744, "step": 1039 }, { "epoch": 0.27355822976260935, "grad_norm": 6.283509699437948, "learning_rate": 5e-06, "loss": 0.1274, "num_input_tokens_seen": 178592888, "step": 1040 }, { "epoch": 0.27355822976260935, "loss": 0.09046860039234161, "loss_ce": 0.0011436456115916371, "loss_iou": 0.6171875, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 178592888, "step": 1040 }, { "epoch": 0.27382126652199645, "grad_norm": 8.68361428045609, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 178763324, "step": 1041 }, { "epoch": 0.27382126652199645, "loss": 0.10273198038339615, "loss_ce": 0.0014746561646461487, "loss_iou": 0.42578125, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 178763324, "step": 1041 }, { "epoch": 0.27408430328138356, "grad_norm": 5.923468447654299, "learning_rate": 5e-06, "loss": 0.1654, "num_input_tokens_seen": 178935692, "step": 1042 }, { "epoch": 0.27408430328138356, "loss": 0.11228330433368683, "loss_ce": 0.0015655276365578175, "loss_iou": 0.7265625, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 178935692, "step": 1042 }, { "epoch": 0.2743473400407707, "grad_norm": 12.335079487643208, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 179105416, "step": 1043 }, { "epoch": 0.2743473400407707, "loss": 0.13459762930870056, "loss_ce": 0.0016020219773054123, "loss_iou": 0.59375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 179105416, "step": 1043 }, { "epoch": 0.2746103768001578, "grad_norm": 5.798275831622124, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 179277628, "step": 1044 }, { "epoch": 0.2746103768001578, "loss": 0.07603298872709274, "loss_ce": 0.0007766383932903409, "loss_iou": 0.59765625, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 179277628, "step": 1044 }, { "epoch": 0.2748734135595449, "grad_norm": 23.44861216824249, "learning_rate": 5e-06, "loss": 0.1352, "num_input_tokens_seen": 179447304, "step": 1045 }, { "epoch": 0.2748734135595449, "loss": 0.143830344080925, "loss_ce": 0.0031137943733483553, "loss_iou": 0.5390625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 179447304, "step": 1045 }, { "epoch": 0.2751364503189321, "grad_norm": 5.56459204907325, "learning_rate": 5e-06, "loss": 0.1263, "num_input_tokens_seen": 179619344, "step": 1046 }, { "epoch": 0.2751364503189321, "loss": 0.1308884471654892, "loss_ce": 0.0006089094094932079, "loss_iou": 0.66015625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 179619344, "step": 1046 }, { "epoch": 0.2753994870783192, "grad_norm": 7.96724794117892, "learning_rate": 5e-06, "loss": 0.1513, "num_input_tokens_seen": 179791548, "step": 1047 }, { "epoch": 0.2753994870783192, "loss": 0.1258929818868637, "loss_ce": 0.006264072842895985, "loss_iou": 0.515625, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 179791548, "step": 1047 }, { "epoch": 0.27566252383770634, "grad_norm": 5.070360731345708, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 179963676, "step": 1048 }, { "epoch": 0.27566252383770634, "loss": 0.11563927680253983, "loss_ce": 0.00031335209496319294, "loss_iou": 0.466796875, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 179963676, "step": 1048 }, { "epoch": 0.27592556059709344, "grad_norm": 7.345106018369934, "learning_rate": 5e-06, "loss": 0.1455, "num_input_tokens_seen": 180135984, "step": 1049 }, { "epoch": 0.27592556059709344, "loss": 0.1866682916879654, "loss_ce": 0.0012129689566791058, "loss_iou": 0.609375, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 180135984, "step": 1049 }, { "epoch": 0.27618859735648055, "grad_norm": 9.442805115405356, "learning_rate": 5e-06, "loss": 0.2447, "num_input_tokens_seen": 180308156, "step": 1050 }, { "epoch": 0.27618859735648055, "loss": 0.23040437698364258, "loss_ce": 0.001797212054952979, "loss_iou": 0.578125, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 180308156, "step": 1050 }, { "epoch": 0.2764516341158677, "grad_norm": 10.313405249427115, "learning_rate": 5e-06, "loss": 0.1419, "num_input_tokens_seen": 180480268, "step": 1051 }, { "epoch": 0.2764516341158677, "loss": 0.1318507045507431, "loss_ce": 0.003188594477251172, "loss_iou": 0.443359375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 180480268, "step": 1051 }, { "epoch": 0.2767146708752548, "grad_norm": 12.938419489278349, "learning_rate": 5e-06, "loss": 0.1389, "num_input_tokens_seen": 180652480, "step": 1052 }, { "epoch": 0.2767146708752548, "loss": 0.1456303596496582, "loss_ce": 0.0045475889928638935, "loss_iou": 0.6484375, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 180652480, "step": 1052 }, { "epoch": 0.27697770763464197, "grad_norm": 6.129454843218688, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 180824868, "step": 1053 }, { "epoch": 0.27697770763464197, "loss": 0.10654839873313904, "loss_ce": 0.0010186205618083477, "loss_iou": 0.53125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 180824868, "step": 1053 }, { "epoch": 0.27724074439402907, "grad_norm": 6.137928561662, "learning_rate": 5e-06, "loss": 0.1994, "num_input_tokens_seen": 180996888, "step": 1054 }, { "epoch": 0.27724074439402907, "loss": 0.21584706008434296, "loss_ce": 0.0034447195939719677, "loss_iou": 0.6171875, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 180996888, "step": 1054 }, { "epoch": 0.2775037811534162, "grad_norm": 4.330377156785293, "learning_rate": 5e-06, "loss": 0.098, "num_input_tokens_seen": 181168924, "step": 1055 }, { "epoch": 0.2775037811534162, "loss": 0.08097569644451141, "loss_ce": 0.0013553331373259425, "loss_iou": 0.64453125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 181168924, "step": 1055 }, { "epoch": 0.27776681791280333, "grad_norm": 6.97802036205834, "learning_rate": 5e-06, "loss": 0.1652, "num_input_tokens_seen": 181341092, "step": 1056 }, { "epoch": 0.27776681791280333, "loss": 0.1544983983039856, "loss_ce": 0.0006592837744392455, "loss_iou": 0.5078125, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 181341092, "step": 1056 }, { "epoch": 0.27802985467219044, "grad_norm": 5.494026690913253, "learning_rate": 5e-06, "loss": 0.1527, "num_input_tokens_seen": 181513136, "step": 1057 }, { "epoch": 0.27802985467219044, "loss": 0.09461110830307007, "loss_ce": 0.004279076587408781, "loss_iou": 0.46875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 181513136, "step": 1057 }, { "epoch": 0.27829289143157754, "grad_norm": 8.725452399730845, "learning_rate": 5e-06, "loss": 0.1293, "num_input_tokens_seen": 181684960, "step": 1058 }, { "epoch": 0.27829289143157754, "loss": 0.14191409945487976, "loss_ce": 0.004157752729952335, "loss_iou": 0.6015625, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 181684960, "step": 1058 }, { "epoch": 0.2785559281909647, "grad_norm": 5.556456214125238, "learning_rate": 5e-06, "loss": 0.1374, "num_input_tokens_seen": 181857208, "step": 1059 }, { "epoch": 0.2785559281909647, "loss": 0.14501094818115234, "loss_ce": 0.0029821395874023438, "loss_iou": 0.5, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 181857208, "step": 1059 }, { "epoch": 0.2788189649503518, "grad_norm": 5.694123648635864, "learning_rate": 5e-06, "loss": 0.1345, "num_input_tokens_seen": 182029272, "step": 1060 }, { "epoch": 0.2788189649503518, "loss": 0.1433970034122467, "loss_ce": 0.0002085179730784148, "loss_iou": 0.49609375, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 182029272, "step": 1060 }, { "epoch": 0.27908200170973896, "grad_norm": 11.17547001843261, "learning_rate": 5e-06, "loss": 0.1709, "num_input_tokens_seen": 182201700, "step": 1061 }, { "epoch": 0.27908200170973896, "loss": 0.2361234724521637, "loss_ce": 0.0002225826756330207, "loss_iou": 0.5390625, "loss_num": 0.047119140625, "loss_xval": 0.236328125, "num_input_tokens_seen": 182201700, "step": 1061 }, { "epoch": 0.27934503846912606, "grad_norm": 7.952701295024384, "learning_rate": 5e-06, "loss": 0.1245, "num_input_tokens_seen": 182373760, "step": 1062 }, { "epoch": 0.27934503846912606, "loss": 0.12168803811073303, "loss_ce": 0.00483623007312417, "loss_iou": 0.66015625, "loss_num": 0.0234375, "loss_xval": 0.11669921875, "num_input_tokens_seen": 182373760, "step": 1062 }, { "epoch": 0.27960807522851316, "grad_norm": 8.381017586610616, "learning_rate": 5e-06, "loss": 0.1264, "num_input_tokens_seen": 182544140, "step": 1063 }, { "epoch": 0.27960807522851316, "loss": 0.18032154440879822, "loss_ce": 0.003350101877003908, "loss_iou": 0.5078125, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 182544140, "step": 1063 }, { "epoch": 0.2798711119879003, "grad_norm": 7.079859253709746, "learning_rate": 5e-06, "loss": 0.1588, "num_input_tokens_seen": 182716400, "step": 1064 }, { "epoch": 0.2798711119879003, "loss": 0.2143479734659195, "loss_ce": 0.005302563309669495, "loss_iou": 0.56640625, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 182716400, "step": 1064 }, { "epoch": 0.2801341487472874, "grad_norm": 5.981555911348935, "learning_rate": 5e-06, "loss": 0.1373, "num_input_tokens_seen": 182888924, "step": 1065 }, { "epoch": 0.2801341487472874, "loss": 0.07972423732280731, "loss_ce": 0.0004090492147952318, "loss_iou": 0.5390625, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 182888924, "step": 1065 }, { "epoch": 0.2803971855066746, "grad_norm": 6.957965061807421, "learning_rate": 5e-06, "loss": 0.1913, "num_input_tokens_seen": 183061040, "step": 1066 }, { "epoch": 0.2803971855066746, "loss": 0.18109014630317688, "loss_ce": 0.0029285247437655926, "loss_iou": 0.6171875, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 183061040, "step": 1066 }, { "epoch": 0.2806602222660617, "grad_norm": 7.354674178304409, "learning_rate": 5e-06, "loss": 0.1445, "num_input_tokens_seen": 183233464, "step": 1067 }, { "epoch": 0.2806602222660617, "loss": 0.19740189611911774, "loss_ce": 0.0007771397940814495, "loss_iou": 0.49609375, "loss_num": 0.039306640625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 183233464, "step": 1067 }, { "epoch": 0.2809232590254488, "grad_norm": 6.666818027916791, "learning_rate": 5e-06, "loss": 0.1868, "num_input_tokens_seen": 183405764, "step": 1068 }, { "epoch": 0.2809232590254488, "loss": 0.15091687440872192, "loss_ce": 0.0006788407335989177, "loss_iou": 0.5546875, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 183405764, "step": 1068 }, { "epoch": 0.28118629578483595, "grad_norm": 5.646804925247109, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 183578300, "step": 1069 }, { "epoch": 0.28118629578483595, "loss": 0.15913698077201843, "loss_ce": 0.0021545523777604103, "loss_iou": 0.5859375, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 183578300, "step": 1069 }, { "epoch": 0.28144933254422305, "grad_norm": 20.5753376454494, "learning_rate": 5e-06, "loss": 0.1509, "num_input_tokens_seen": 183750176, "step": 1070 }, { "epoch": 0.28144933254422305, "loss": 0.11271088570356369, "loss_ce": 0.0023593269288539886, "loss_iou": 0.609375, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 183750176, "step": 1070 }, { "epoch": 0.28171236930361016, "grad_norm": 4.644203324007545, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 183922404, "step": 1071 }, { "epoch": 0.28171236930361016, "loss": 0.12499827146530151, "loss_ce": 0.0020124230068176985, "loss_iou": 0.546875, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 183922404, "step": 1071 }, { "epoch": 0.2819754060629973, "grad_norm": 5.233661334274966, "learning_rate": 5e-06, "loss": 0.1277, "num_input_tokens_seen": 184094188, "step": 1072 }, { "epoch": 0.2819754060629973, "loss": 0.1377188265323639, "loss_ce": 0.0002676558797247708, "loss_iou": 0.443359375, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 184094188, "step": 1072 }, { "epoch": 0.2822384428223844, "grad_norm": 7.522178114598793, "learning_rate": 5e-06, "loss": 0.1911, "num_input_tokens_seen": 184262844, "step": 1073 }, { "epoch": 0.2822384428223844, "loss": 0.2804732322692871, "loss_ce": 0.0030379469972103834, "loss_iou": 0.5390625, "loss_num": 0.055419921875, "loss_xval": 0.27734375, "num_input_tokens_seen": 184262844, "step": 1073 }, { "epoch": 0.2825014795817716, "grad_norm": 10.418392232208523, "learning_rate": 5e-06, "loss": 0.1913, "num_input_tokens_seen": 184431224, "step": 1074 }, { "epoch": 0.2825014795817716, "loss": 0.17388112843036652, "loss_ce": 0.002219748916104436, "loss_iou": 0.5859375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 184431224, "step": 1074 }, { "epoch": 0.2827645163411587, "grad_norm": 7.018369739955214, "learning_rate": 5e-06, "loss": 0.1685, "num_input_tokens_seen": 184603440, "step": 1075 }, { "epoch": 0.2827645163411587, "loss": 0.24287168681621552, "loss_ce": 0.0036749078426510096, "loss_iou": 0.466796875, "loss_num": 0.0478515625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 184603440, "step": 1075 }, { "epoch": 0.2830275531005458, "grad_norm": 8.441416409211774, "learning_rate": 5e-06, "loss": 0.167, "num_input_tokens_seen": 184775664, "step": 1076 }, { "epoch": 0.2830275531005458, "loss": 0.253373384475708, "loss_ce": 0.0034344326704740524, "loss_iou": 0.61328125, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 184775664, "step": 1076 }, { "epoch": 0.28329058985993294, "grad_norm": 5.273714948615874, "learning_rate": 5e-06, "loss": 0.1366, "num_input_tokens_seen": 184947684, "step": 1077 }, { "epoch": 0.28329058985993294, "loss": 0.12758958339691162, "loss_ce": 0.0030168381053954363, "loss_iou": 0.59375, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 184947684, "step": 1077 }, { "epoch": 0.28355362661932004, "grad_norm": 5.622485240246158, "learning_rate": 5e-06, "loss": 0.1707, "num_input_tokens_seen": 185119764, "step": 1078 }, { "epoch": 0.28355362661932004, "loss": 0.15722918510437012, "loss_ce": 0.0001552198955323547, "loss_iou": 0.546875, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 185119764, "step": 1078 }, { "epoch": 0.2838166633787072, "grad_norm": 6.064053191448847, "learning_rate": 5e-06, "loss": 0.1428, "num_input_tokens_seen": 185291940, "step": 1079 }, { "epoch": 0.2838166633787072, "loss": 0.12457242608070374, "loss_ce": 0.0010983101092278957, "loss_iou": 0.53125, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 185291940, "step": 1079 }, { "epoch": 0.2840797001380943, "grad_norm": 8.508529448099232, "learning_rate": 5e-06, "loss": 0.1667, "num_input_tokens_seen": 185460372, "step": 1080 }, { "epoch": 0.2840797001380943, "loss": 0.2045111060142517, "loss_ce": 0.0020574983209371567, "loss_iou": 0.6484375, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 185460372, "step": 1080 }, { "epoch": 0.2843427368974814, "grad_norm": 5.269932611835083, "learning_rate": 5e-06, "loss": 0.1797, "num_input_tokens_seen": 185632912, "step": 1081 }, { "epoch": 0.2843427368974814, "loss": 0.13937309384346008, "loss_ce": 0.009490270167589188, "loss_iou": 0.462890625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 185632912, "step": 1081 }, { "epoch": 0.28460577365686857, "grad_norm": 8.826575998844994, "learning_rate": 5e-06, "loss": 0.1184, "num_input_tokens_seen": 185804888, "step": 1082 }, { "epoch": 0.28460577365686857, "loss": 0.099105603992939, "loss_ce": 0.0018155663274228573, "loss_iou": 0.515625, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 185804888, "step": 1082 }, { "epoch": 0.28486881041625567, "grad_norm": 5.9573043544472535, "learning_rate": 5e-06, "loss": 0.1276, "num_input_tokens_seen": 185977324, "step": 1083 }, { "epoch": 0.28486881041625567, "loss": 0.17646890878677368, "loss_ce": 0.0035868186969310045, "loss_iou": 0.5546875, "loss_num": 0.034423828125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 185977324, "step": 1083 }, { "epoch": 0.28513184717564277, "grad_norm": 5.170909045758908, "learning_rate": 5e-06, "loss": 0.1966, "num_input_tokens_seen": 186149548, "step": 1084 }, { "epoch": 0.28513184717564277, "loss": 0.19262221455574036, "loss_ce": 0.000941307342145592, "loss_iou": 0.6640625, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 186149548, "step": 1084 }, { "epoch": 0.28539488393502993, "grad_norm": 16.76166177176129, "learning_rate": 5e-06, "loss": 0.1336, "num_input_tokens_seen": 186321516, "step": 1085 }, { "epoch": 0.28539488393502993, "loss": 0.13177794218063354, "loss_ce": 0.0014678854495286942, "loss_iou": 0.50390625, "loss_num": 0.026123046875, "loss_xval": 0.1298828125, "num_input_tokens_seen": 186321516, "step": 1085 }, { "epoch": 0.28565792069441703, "grad_norm": 13.385145963732795, "learning_rate": 5e-06, "loss": 0.1565, "num_input_tokens_seen": 186493748, "step": 1086 }, { "epoch": 0.28565792069441703, "loss": 0.24282805621623993, "loss_ce": 0.0057369922287762165, "loss_iou": 0.48828125, "loss_num": 0.04736328125, "loss_xval": 0.2373046875, "num_input_tokens_seen": 186493748, "step": 1086 }, { "epoch": 0.2859209574538042, "grad_norm": 7.0564105719656025, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 186666020, "step": 1087 }, { "epoch": 0.2859209574538042, "loss": 0.10800403356552124, "loss_ce": 0.000704226375091821, "loss_iou": 0.388671875, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 186666020, "step": 1087 }, { "epoch": 0.2861839942131913, "grad_norm": 5.57534276048312, "learning_rate": 5e-06, "loss": 0.1585, "num_input_tokens_seen": 186834708, "step": 1088 }, { "epoch": 0.2861839942131913, "loss": 0.1970679759979248, "loss_ce": 0.0005652993568219244, "loss_iou": 0.474609375, "loss_num": 0.039306640625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 186834708, "step": 1088 }, { "epoch": 0.2864470309725784, "grad_norm": 12.382219859522896, "learning_rate": 5e-06, "loss": 0.1475, "num_input_tokens_seen": 187006628, "step": 1089 }, { "epoch": 0.2864470309725784, "loss": 0.13300946354866028, "loss_ce": 0.0005631742533296347, "loss_iou": 0.55859375, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 187006628, "step": 1089 }, { "epoch": 0.28671006773196556, "grad_norm": 6.678243304603748, "learning_rate": 5e-06, "loss": 0.1563, "num_input_tokens_seen": 187178940, "step": 1090 }, { "epoch": 0.28671006773196556, "loss": 0.16839508712291718, "loss_ce": 0.004485180135816336, "loss_iou": 0.53125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 187178940, "step": 1090 }, { "epoch": 0.28697310449135266, "grad_norm": 17.285977120564166, "learning_rate": 5e-06, "loss": 0.1817, "num_input_tokens_seen": 187351348, "step": 1091 }, { "epoch": 0.28697310449135266, "loss": 0.24864555895328522, "loss_ce": 0.0034063111525028944, "loss_iou": 0.4921875, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 187351348, "step": 1091 }, { "epoch": 0.2872361412507398, "grad_norm": 5.362480990027185, "learning_rate": 5e-06, "loss": 0.1333, "num_input_tokens_seen": 187523476, "step": 1092 }, { "epoch": 0.2872361412507398, "loss": 0.19447633624076843, "loss_ce": 0.005084256641566753, "loss_iou": 0.70703125, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 187523476, "step": 1092 }, { "epoch": 0.2874991780101269, "grad_norm": 9.00142705860072, "learning_rate": 5e-06, "loss": 0.1254, "num_input_tokens_seen": 187695596, "step": 1093 }, { "epoch": 0.2874991780101269, "loss": 0.13948455452919006, "loss_ce": 0.0006295705679804087, "loss_iou": 0.44140625, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 187695596, "step": 1093 }, { "epoch": 0.287762214769514, "grad_norm": 11.592832787644355, "learning_rate": 5e-06, "loss": 0.1673, "num_input_tokens_seen": 187867844, "step": 1094 }, { "epoch": 0.287762214769514, "loss": 0.1788289099931717, "loss_ce": 0.0009724590927362442, "loss_iou": 0.5859375, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 187867844, "step": 1094 }, { "epoch": 0.2880252515289012, "grad_norm": 5.4984009631677, "learning_rate": 5e-06, "loss": 0.1593, "num_input_tokens_seen": 188039960, "step": 1095 }, { "epoch": 0.2880252515289012, "loss": 0.2135852873325348, "loss_ce": 0.0043262611143291, "loss_iou": 0.458984375, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 188039960, "step": 1095 }, { "epoch": 0.2882882882882883, "grad_norm": 6.039228998020695, "learning_rate": 5e-06, "loss": 0.1504, "num_input_tokens_seen": 188212228, "step": 1096 }, { "epoch": 0.2882882882882883, "loss": 0.10800454020500183, "loss_ce": 0.0013761227019131184, "loss_iou": 0.63671875, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 188212228, "step": 1096 }, { "epoch": 0.2885513250476754, "grad_norm": 6.337937600162702, "learning_rate": 5e-06, "loss": 0.1611, "num_input_tokens_seen": 188384548, "step": 1097 }, { "epoch": 0.2885513250476754, "loss": 0.2018011212348938, "loss_ce": 0.0031927230302244425, "loss_iou": 0.40234375, "loss_num": 0.039794921875, "loss_xval": 0.1982421875, "num_input_tokens_seen": 188384548, "step": 1097 }, { "epoch": 0.28881436180706255, "grad_norm": 6.282757808284281, "learning_rate": 5e-06, "loss": 0.1238, "num_input_tokens_seen": 188556612, "step": 1098 }, { "epoch": 0.28881436180706255, "loss": 0.07978774607181549, "loss_ce": 0.0007472233846783638, "loss_iou": 0.6015625, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 188556612, "step": 1098 }, { "epoch": 0.28907739856644965, "grad_norm": 5.5362247942440135, "learning_rate": 5e-06, "loss": 0.1424, "num_input_tokens_seen": 188727124, "step": 1099 }, { "epoch": 0.28907739856644965, "loss": 0.18471282720565796, "loss_ce": 0.0071005141362547874, "loss_iou": 0.455078125, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 188727124, "step": 1099 }, { "epoch": 0.2893404353258368, "grad_norm": 5.455389129921704, "learning_rate": 5e-06, "loss": 0.0874, "num_input_tokens_seen": 188899384, "step": 1100 }, { "epoch": 0.2893404353258368, "loss": 0.08721458911895752, "loss_ce": 0.0006362219573929906, "loss_iou": 0.63671875, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 188899384, "step": 1100 }, { "epoch": 0.2896034720852239, "grad_norm": 6.057305949672798, "learning_rate": 5e-06, "loss": 0.1669, "num_input_tokens_seen": 189068024, "step": 1101 }, { "epoch": 0.2896034720852239, "loss": 0.1826099157333374, "loss_ce": 0.00026738218730315566, "loss_iou": 0.61328125, "loss_num": 0.036376953125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 189068024, "step": 1101 }, { "epoch": 0.289866508844611, "grad_norm": 10.886874649593773, "learning_rate": 5e-06, "loss": 0.1545, "num_input_tokens_seen": 189240248, "step": 1102 }, { "epoch": 0.289866508844611, "loss": 0.1780916154384613, "loss_ce": 0.0009065555641427636, "loss_iou": 0.474609375, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 189240248, "step": 1102 }, { "epoch": 0.2901295456039982, "grad_norm": 8.357621038563343, "learning_rate": 5e-06, "loss": 0.1303, "num_input_tokens_seen": 189412208, "step": 1103 }, { "epoch": 0.2901295456039982, "loss": 0.13078002631664276, "loss_ce": 0.001751709496602416, "loss_iou": 0.45703125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 189412208, "step": 1103 }, { "epoch": 0.2903925823633853, "grad_norm": 13.284219513588615, "learning_rate": 5e-06, "loss": 0.1895, "num_input_tokens_seen": 189584480, "step": 1104 }, { "epoch": 0.2903925823633853, "loss": 0.23028159141540527, "loss_ce": 0.003474962431937456, "loss_iou": 0.62890625, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 189584480, "step": 1104 }, { "epoch": 0.29065561912277244, "grad_norm": 6.716057098151779, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 189755004, "step": 1105 }, { "epoch": 0.29065561912277244, "loss": 0.12404203414916992, "loss_ce": 0.002643106272444129, "loss_iou": 0.61328125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 189755004, "step": 1105 }, { "epoch": 0.29091865588215954, "grad_norm": 13.264130378049543, "learning_rate": 5e-06, "loss": 0.1369, "num_input_tokens_seen": 189927084, "step": 1106 }, { "epoch": 0.29091865588215954, "loss": 0.10699759423732758, "loss_ce": 0.0014678104780614376, "loss_iou": 0.392578125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 189927084, "step": 1106 }, { "epoch": 0.29118169264154664, "grad_norm": 19.87883744683546, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 190097456, "step": 1107 }, { "epoch": 0.29118169264154664, "loss": 0.19540926814079285, "loss_ce": 0.0016531546134501696, "loss_iou": 0.447265625, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 190097456, "step": 1107 }, { "epoch": 0.2914447294009338, "grad_norm": 7.462386686254148, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 190269508, "step": 1108 }, { "epoch": 0.2914447294009338, "loss": 0.08214541524648666, "loss_ce": 0.004813872277736664, "loss_iou": 0.59375, "loss_num": 0.0155029296875, "loss_xval": 0.0771484375, "num_input_tokens_seen": 190269508, "step": 1108 }, { "epoch": 0.2917077661603209, "grad_norm": 10.674953183668224, "learning_rate": 5e-06, "loss": 0.1763, "num_input_tokens_seen": 190441836, "step": 1109 }, { "epoch": 0.2917077661603209, "loss": 0.1546817272901535, "loss_ce": 0.006824057083576918, "loss_iou": 0.32421875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 190441836, "step": 1109 }, { "epoch": 0.291970802919708, "grad_norm": 6.315839345670245, "learning_rate": 5e-06, "loss": 0.1579, "num_input_tokens_seen": 190614052, "step": 1110 }, { "epoch": 0.291970802919708, "loss": 0.15810704231262207, "loss_ce": 0.0053970692679286, "loss_iou": 0.62109375, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 190614052, "step": 1110 }, { "epoch": 0.29223383967909516, "grad_norm": 6.217305097644484, "learning_rate": 5e-06, "loss": 0.1272, "num_input_tokens_seen": 190786084, "step": 1111 }, { "epoch": 0.29223383967909516, "loss": 0.07959192991256714, "loss_ce": 0.0006734670605510473, "loss_iou": 0.53125, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 190786084, "step": 1111 }, { "epoch": 0.29249687643848227, "grad_norm": 5.126517505767914, "learning_rate": 5e-06, "loss": 0.1448, "num_input_tokens_seen": 190956588, "step": 1112 }, { "epoch": 0.29249687643848227, "loss": 0.13071726262569427, "loss_ce": 0.003184308996424079, "loss_iou": 0.6328125, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 190956588, "step": 1112 }, { "epoch": 0.2927599131978694, "grad_norm": 8.996821720570772, "learning_rate": 5e-06, "loss": 0.1638, "num_input_tokens_seen": 191126948, "step": 1113 }, { "epoch": 0.2927599131978694, "loss": 0.07688204199075699, "loss_ce": 0.0015036254189908504, "loss_iou": 0.490234375, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 191126948, "step": 1113 }, { "epoch": 0.29302294995725653, "grad_norm": 5.558729986145218, "learning_rate": 5e-06, "loss": 0.1861, "num_input_tokens_seen": 191299288, "step": 1114 }, { "epoch": 0.29302294995725653, "loss": 0.15988363325595856, "loss_ce": 0.0014058587839826941, "loss_iou": 0.5390625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 191299288, "step": 1114 }, { "epoch": 0.29328598671664363, "grad_norm": 6.8874530652883825, "learning_rate": 5e-06, "loss": 0.1307, "num_input_tokens_seen": 191471672, "step": 1115 }, { "epoch": 0.29328598671664363, "loss": 0.09309347718954086, "loss_ce": 0.0007167698349803686, "loss_iou": 0.4453125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 191471672, "step": 1115 }, { "epoch": 0.2935490234760308, "grad_norm": 13.843828225173697, "learning_rate": 5e-06, "loss": 0.1446, "num_input_tokens_seen": 191643552, "step": 1116 }, { "epoch": 0.2935490234760308, "loss": 0.08135861903429031, "loss_ce": 0.00021237613691482693, "loss_iou": 0.49609375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 191643552, "step": 1116 }, { "epoch": 0.2938120602354179, "grad_norm": 5.436785087484111, "learning_rate": 5e-06, "loss": 0.1364, "num_input_tokens_seen": 191815984, "step": 1117 }, { "epoch": 0.2938120602354179, "loss": 0.10953962057828903, "loss_ce": 0.0004392758710309863, "loss_iou": 0.5546875, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 191815984, "step": 1117 }, { "epoch": 0.294075096994805, "grad_norm": 11.598374668500005, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 191988036, "step": 1118 }, { "epoch": 0.294075096994805, "loss": 0.0961490124464035, "loss_ce": 0.0009952039690688252, "loss_iou": 0.65625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 191988036, "step": 1118 }, { "epoch": 0.29433813375419215, "grad_norm": 7.498841309335166, "learning_rate": 5e-06, "loss": 0.1726, "num_input_tokens_seen": 192160116, "step": 1119 }, { "epoch": 0.29433813375419215, "loss": 0.25876477360725403, "loss_ce": 0.004339736420661211, "loss_iou": 0.56640625, "loss_num": 0.05078125, "loss_xval": 0.25390625, "num_input_tokens_seen": 192160116, "step": 1119 }, { "epoch": 0.29460117051357926, "grad_norm": 5.645236594614784, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 192332060, "step": 1120 }, { "epoch": 0.29460117051357926, "loss": 0.07815182209014893, "loss_ce": 0.0011254575802013278, "loss_iou": 0.6171875, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 192332060, "step": 1120 }, { "epoch": 0.2948642072729664, "grad_norm": 5.473412167736241, "learning_rate": 5e-06, "loss": 0.1499, "num_input_tokens_seen": 192504064, "step": 1121 }, { "epoch": 0.2948642072729664, "loss": 0.19324743747711182, "loss_ce": 0.0004373906413093209, "loss_iou": 0.51171875, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 192504064, "step": 1121 }, { "epoch": 0.2951272440323535, "grad_norm": 15.77937603528804, "learning_rate": 5e-06, "loss": 0.1454, "num_input_tokens_seen": 192676312, "step": 1122 }, { "epoch": 0.2951272440323535, "loss": 0.11738383769989014, "loss_ce": 0.0015696310438215733, "loss_iou": 0.56640625, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 192676312, "step": 1122 }, { "epoch": 0.2953902807917406, "grad_norm": 6.595809745274096, "learning_rate": 5e-06, "loss": 0.1605, "num_input_tokens_seen": 192848608, "step": 1123 }, { "epoch": 0.2953902807917406, "loss": 0.1380428671836853, "loss_ce": 0.0005306481616571546, "loss_iou": 0.453125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 192848608, "step": 1123 }, { "epoch": 0.2956533175511278, "grad_norm": 5.143986978274753, "learning_rate": 5e-06, "loss": 0.2009, "num_input_tokens_seen": 193021044, "step": 1124 }, { "epoch": 0.2956533175511278, "loss": 0.2367524653673172, "loss_ce": 0.00335403298959136, "loss_iou": 0.796875, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 193021044, "step": 1124 }, { "epoch": 0.2959163543105149, "grad_norm": 6.676850446443053, "learning_rate": 5e-06, "loss": 0.1559, "num_input_tokens_seen": 193193124, "step": 1125 }, { "epoch": 0.2959163543105149, "loss": 0.1376284509897232, "loss_ce": 0.002099899807944894, "loss_iou": 0.5234375, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 193193124, "step": 1125 }, { "epoch": 0.29617939106990204, "grad_norm": 6.051359778802083, "learning_rate": 5e-06, "loss": 0.168, "num_input_tokens_seen": 193365612, "step": 1126 }, { "epoch": 0.29617939106990204, "loss": 0.11157628893852234, "loss_ce": 0.001163685112260282, "loss_iou": 0.640625, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 193365612, "step": 1126 }, { "epoch": 0.29644242782928915, "grad_norm": 9.91415103149531, "learning_rate": 5e-06, "loss": 0.1052, "num_input_tokens_seen": 193537580, "step": 1127 }, { "epoch": 0.29644242782928915, "loss": 0.06942566484212875, "loss_ce": 0.004575815983116627, "loss_iou": 0.625, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 193537580, "step": 1127 }, { "epoch": 0.29670546458867625, "grad_norm": 8.171761917591171, "learning_rate": 5e-06, "loss": 0.179, "num_input_tokens_seen": 193709920, "step": 1128 }, { "epoch": 0.29670546458867625, "loss": 0.15288731455802917, "loss_ce": 0.0006046106573194265, "loss_iou": 0.65234375, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 193709920, "step": 1128 }, { "epoch": 0.2969685013480634, "grad_norm": 4.591948701112417, "learning_rate": 5e-06, "loss": 0.1129, "num_input_tokens_seen": 193878608, "step": 1129 }, { "epoch": 0.2969685013480634, "loss": 0.15515002608299255, "loss_ce": 0.005400286056101322, "loss_iou": 0.44140625, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 193878608, "step": 1129 }, { "epoch": 0.2972315381074505, "grad_norm": 9.7802730005981, "learning_rate": 5e-06, "loss": 0.1442, "num_input_tokens_seen": 194051024, "step": 1130 }, { "epoch": 0.2972315381074505, "loss": 0.13672733306884766, "loss_ce": 0.0005579069838859141, "loss_iou": 0.6328125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 194051024, "step": 1130 }, { "epoch": 0.2974945748668376, "grad_norm": 17.109119761118713, "learning_rate": 5e-06, "loss": 0.1283, "num_input_tokens_seen": 194223696, "step": 1131 }, { "epoch": 0.2974945748668376, "loss": 0.0957454964518547, "loss_ce": 0.0012325569987297058, "loss_iou": 0.435546875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 194223696, "step": 1131 }, { "epoch": 0.29775761162622477, "grad_norm": 4.844704657006506, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 194393332, "step": 1132 }, { "epoch": 0.29775761162622477, "loss": 0.08335787057876587, "loss_ce": 0.000533167680259794, "loss_iou": 0.70703125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 194393332, "step": 1132 }, { "epoch": 0.2980206483856119, "grad_norm": 12.589562992323502, "learning_rate": 5e-06, "loss": 0.117, "num_input_tokens_seen": 194563660, "step": 1133 }, { "epoch": 0.2980206483856119, "loss": 0.10209088772535324, "loss_ce": 0.003366521093994379, "loss_iou": 0.4453125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 194563660, "step": 1133 }, { "epoch": 0.29828368514499903, "grad_norm": 5.818392431333322, "learning_rate": 5e-06, "loss": 0.1645, "num_input_tokens_seen": 194735892, "step": 1134 }, { "epoch": 0.29828368514499903, "loss": 0.09164222329854965, "loss_ce": 0.004392467439174652, "loss_iou": 0.6328125, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 194735892, "step": 1134 }, { "epoch": 0.29854672190438614, "grad_norm": 6.569465101392607, "learning_rate": 5e-06, "loss": 0.1288, "num_input_tokens_seen": 194907844, "step": 1135 }, { "epoch": 0.29854672190438614, "loss": 0.12394984811544418, "loss_ce": 0.0028560941573232412, "loss_iou": 0.390625, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 194907844, "step": 1135 }, { "epoch": 0.29880975866377324, "grad_norm": 5.132755422401755, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 195079704, "step": 1136 }, { "epoch": 0.29880975866377324, "loss": 0.11163240671157837, "loss_ce": 0.001341882161796093, "loss_iou": 0.4921875, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 195079704, "step": 1136 }, { "epoch": 0.2990727954231604, "grad_norm": 12.81251876887425, "learning_rate": 5e-06, "loss": 0.1413, "num_input_tokens_seen": 195251904, "step": 1137 }, { "epoch": 0.2990727954231604, "loss": 0.18034929037094116, "loss_ce": 0.0009364524157717824, "loss_iou": 0.482421875, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 195251904, "step": 1137 }, { "epoch": 0.2993358321825475, "grad_norm": 6.432718409068677, "learning_rate": 5e-06, "loss": 0.1404, "num_input_tokens_seen": 195424180, "step": 1138 }, { "epoch": 0.2993358321825475, "loss": 0.08383812010288239, "loss_ce": 0.0012575555592775345, "loss_iou": 0.59375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 195424180, "step": 1138 }, { "epoch": 0.29959886894193466, "grad_norm": 11.352446611671667, "learning_rate": 5e-06, "loss": 0.1314, "num_input_tokens_seen": 195596468, "step": 1139 }, { "epoch": 0.29959886894193466, "loss": 0.1503646820783615, "loss_ce": 0.0016220146790146828, "loss_iou": 0.52734375, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 195596468, "step": 1139 }, { "epoch": 0.29986190570132176, "grad_norm": 7.444433348783749, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 195768360, "step": 1140 }, { "epoch": 0.29986190570132176, "loss": 0.12922053039073944, "loss_ce": 0.0006804917939007282, "loss_iou": 0.62109375, "loss_num": 0.025634765625, "loss_xval": 0.12890625, "num_input_tokens_seen": 195768360, "step": 1140 }, { "epoch": 0.30012494246070887, "grad_norm": 6.813355666039827, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 195940504, "step": 1141 }, { "epoch": 0.30012494246070887, "loss": 0.14036604762077332, "loss_ce": 0.002609711140394211, "loss_iou": 0.515625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 195940504, "step": 1141 }, { "epoch": 0.300387979220096, "grad_norm": 9.605056179900458, "learning_rate": 5e-06, "loss": 0.1463, "num_input_tokens_seen": 196112784, "step": 1142 }, { "epoch": 0.300387979220096, "loss": 0.14964430034160614, "loss_ce": 0.0015119716990739107, "loss_iou": 0.44140625, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 196112784, "step": 1142 }, { "epoch": 0.3006510159794831, "grad_norm": 7.338861721143607, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 196285072, "step": 1143 }, { "epoch": 0.3006510159794831, "loss": 0.13087643682956696, "loss_ce": 0.0021532890386879444, "loss_iou": 0.46875, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 196285072, "step": 1143 }, { "epoch": 0.30091405273887023, "grad_norm": 11.553065995211972, "learning_rate": 5e-06, "loss": 0.1705, "num_input_tokens_seen": 196457252, "step": 1144 }, { "epoch": 0.30091405273887023, "loss": 0.20345042645931244, "loss_ce": 0.0005085308803245425, "loss_iou": 0.5390625, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 196457252, "step": 1144 }, { "epoch": 0.3011770894982574, "grad_norm": 7.552187403058781, "learning_rate": 5e-06, "loss": 0.1652, "num_input_tokens_seen": 196629168, "step": 1145 }, { "epoch": 0.3011770894982574, "loss": 0.17339852452278137, "loss_ce": 0.004331144969910383, "loss_iou": 0.412109375, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 196629168, "step": 1145 }, { "epoch": 0.3014401262576445, "grad_norm": 5.762343246857412, "learning_rate": 5e-06, "loss": 0.1213, "num_input_tokens_seen": 196801432, "step": 1146 }, { "epoch": 0.3014401262576445, "loss": 0.07828192412853241, "loss_ce": 0.001377625041641295, "loss_iou": 0.416015625, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 196801432, "step": 1146 }, { "epoch": 0.30170316301703165, "grad_norm": 5.174420810424456, "learning_rate": 5e-06, "loss": 0.1737, "num_input_tokens_seen": 196973908, "step": 1147 }, { "epoch": 0.30170316301703165, "loss": 0.22339066863059998, "loss_ce": 0.004701712634414434, "loss_iou": 0.53125, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 196973908, "step": 1147 }, { "epoch": 0.30196619977641875, "grad_norm": 10.875286083758624, "learning_rate": 5e-06, "loss": 0.1878, "num_input_tokens_seen": 197146088, "step": 1148 }, { "epoch": 0.30196619977641875, "loss": 0.1432519108057022, "loss_ce": 0.002657424658536911, "loss_iou": 0.66015625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 197146088, "step": 1148 }, { "epoch": 0.30222923653580586, "grad_norm": 5.705651681197872, "learning_rate": 5e-06, "loss": 0.1464, "num_input_tokens_seen": 197318460, "step": 1149 }, { "epoch": 0.30222923653580586, "loss": 0.12248589098453522, "loss_ce": 0.002002496039494872, "loss_iou": 0.5234375, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 197318460, "step": 1149 }, { "epoch": 0.302492273295193, "grad_norm": 6.2074966030457706, "learning_rate": 5e-06, "loss": 0.1398, "num_input_tokens_seen": 197490428, "step": 1150 }, { "epoch": 0.302492273295193, "loss": 0.1659487783908844, "loss_ce": 0.0009402353898622096, "loss_iou": 0.5, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 197490428, "step": 1150 }, { "epoch": 0.3027553100545801, "grad_norm": 11.278645202454504, "learning_rate": 5e-06, "loss": 0.1513, "num_input_tokens_seen": 197662972, "step": 1151 }, { "epoch": 0.3027553100545801, "loss": 0.12700702250003815, "loss_ce": 0.0012135641882196069, "loss_iou": 0.58984375, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 197662972, "step": 1151 }, { "epoch": 0.3030183468139673, "grad_norm": 5.7121477930459665, "learning_rate": 5e-06, "loss": 0.1739, "num_input_tokens_seen": 197835168, "step": 1152 }, { "epoch": 0.3030183468139673, "loss": 0.10988777130842209, "loss_ce": 0.004174881149083376, "loss_iou": 0.546875, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 197835168, "step": 1152 }, { "epoch": 0.3032813835733544, "grad_norm": 12.425343043979453, "learning_rate": 5e-06, "loss": 0.1541, "num_input_tokens_seen": 198007440, "step": 1153 }, { "epoch": 0.3032813835733544, "loss": 0.18807393312454224, "loss_ce": 0.0031679358799010515, "loss_iou": 0.478515625, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 198007440, "step": 1153 }, { "epoch": 0.3035444203327415, "grad_norm": 5.925789792578691, "learning_rate": 5e-06, "loss": 0.1132, "num_input_tokens_seen": 198179896, "step": 1154 }, { "epoch": 0.3035444203327415, "loss": 0.07772859930992126, "loss_ce": 0.0008548187324777246, "loss_iou": 0.66015625, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 198179896, "step": 1154 }, { "epoch": 0.30380745709212864, "grad_norm": 5.901916078113717, "learning_rate": 5e-06, "loss": 0.1559, "num_input_tokens_seen": 198352320, "step": 1155 }, { "epoch": 0.30380745709212864, "loss": 0.17940585315227509, "loss_ce": 0.0011221629101783037, "loss_iou": 0.6484375, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 198352320, "step": 1155 }, { "epoch": 0.30407049385151574, "grad_norm": 10.483911413706299, "learning_rate": 5e-06, "loss": 0.1441, "num_input_tokens_seen": 198524512, "step": 1156 }, { "epoch": 0.30407049385151574, "loss": 0.12064538896083832, "loss_ce": 0.0006197594339028001, "loss_iou": 0.6328125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 198524512, "step": 1156 }, { "epoch": 0.30433353061090285, "grad_norm": 4.522315194840346, "learning_rate": 5e-06, "loss": 0.1278, "num_input_tokens_seen": 198696612, "step": 1157 }, { "epoch": 0.30433353061090285, "loss": 0.09706555306911469, "loss_ce": 0.0013929473934695125, "loss_iou": 0.515625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 198696612, "step": 1157 }, { "epoch": 0.30459656737029, "grad_norm": 28.08652601411638, "learning_rate": 5e-06, "loss": 0.1261, "num_input_tokens_seen": 198868856, "step": 1158 }, { "epoch": 0.30459656737029, "loss": 0.052179381251335144, "loss_ce": 0.00020794683950953186, "loss_iou": 0.53515625, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 198868856, "step": 1158 }, { "epoch": 0.3048596041296771, "grad_norm": 7.240097871891992, "learning_rate": 5e-06, "loss": 0.1803, "num_input_tokens_seen": 199041064, "step": 1159 }, { "epoch": 0.3048596041296771, "loss": 0.18336477875709534, "loss_ce": 0.0020293283741921186, "loss_iou": 0.5546875, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 199041064, "step": 1159 }, { "epoch": 0.30512264088906427, "grad_norm": 5.628334621930633, "learning_rate": 5e-06, "loss": 0.1391, "num_input_tokens_seen": 199213476, "step": 1160 }, { "epoch": 0.30512264088906427, "loss": 0.12429136037826538, "loss_ce": 0.0002068809699267149, "loss_iou": 0.79296875, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 199213476, "step": 1160 }, { "epoch": 0.30538567764845137, "grad_norm": 5.043014865834959, "learning_rate": 5e-06, "loss": 0.1778, "num_input_tokens_seen": 199385580, "step": 1161 }, { "epoch": 0.30538567764845137, "loss": 0.09477389603853226, "loss_ce": 0.001115447492338717, "loss_iou": 0.44140625, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 199385580, "step": 1161 }, { "epoch": 0.3056487144078385, "grad_norm": 5.156573422162299, "learning_rate": 5e-06, "loss": 0.1214, "num_input_tokens_seen": 199556156, "step": 1162 }, { "epoch": 0.3056487144078385, "loss": 0.14625222980976105, "loss_ce": 0.00046976495650596917, "loss_iou": 0.64453125, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 199556156, "step": 1162 }, { "epoch": 0.30591175116722563, "grad_norm": 4.702903740874693, "learning_rate": 5e-06, "loss": 0.1286, "num_input_tokens_seen": 199728484, "step": 1163 }, { "epoch": 0.30591175116722563, "loss": 0.156254380941391, "loss_ce": 0.001774407341144979, "loss_iou": 0.5078125, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 199728484, "step": 1163 }, { "epoch": 0.30617478792661273, "grad_norm": 5.380502082020364, "learning_rate": 5e-06, "loss": 0.1479, "num_input_tokens_seen": 199900748, "step": 1164 }, { "epoch": 0.30617478792661273, "loss": 0.16096284985542297, "loss_ce": 0.0019052416319027543, "loss_iou": 0.7421875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 199900748, "step": 1164 }, { "epoch": 0.3064378246859999, "grad_norm": 5.7025282283031595, "learning_rate": 5e-06, "loss": 0.149, "num_input_tokens_seen": 200072488, "step": 1165 }, { "epoch": 0.3064378246859999, "loss": 0.17774316668510437, "loss_ce": 0.00037499924656003714, "loss_iou": 0.4921875, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 200072488, "step": 1165 }, { "epoch": 0.306700861445387, "grad_norm": 5.532941140209234, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 200244696, "step": 1166 }, { "epoch": 0.306700861445387, "loss": 0.12291580438613892, "loss_ce": 0.0016694690566509962, "loss_iou": 0.703125, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 200244696, "step": 1166 }, { "epoch": 0.3069638982047741, "grad_norm": 5.5856010120270705, "learning_rate": 5e-06, "loss": 0.1263, "num_input_tokens_seen": 200417156, "step": 1167 }, { "epoch": 0.3069638982047741, "loss": 0.12629887461662292, "loss_ce": 0.0023059630766510963, "loss_iou": 0.5859375, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 200417156, "step": 1167 }, { "epoch": 0.30722693496416126, "grad_norm": 4.725090769923517, "learning_rate": 5e-06, "loss": 0.1267, "num_input_tokens_seen": 200587780, "step": 1168 }, { "epoch": 0.30722693496416126, "loss": 0.1289183497428894, "loss_ce": 0.0026366179808974266, "loss_iou": 0.6953125, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 200587780, "step": 1168 }, { "epoch": 0.30748997172354836, "grad_norm": 4.307353086128186, "learning_rate": 5e-06, "loss": 0.1389, "num_input_tokens_seen": 200759832, "step": 1169 }, { "epoch": 0.30748997172354836, "loss": 0.16026800870895386, "loss_ce": 0.0027362804394215345, "loss_iou": 0.431640625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 200759832, "step": 1169 }, { "epoch": 0.30775300848293546, "grad_norm": 11.530918199945434, "learning_rate": 5e-06, "loss": 0.1914, "num_input_tokens_seen": 200929932, "step": 1170 }, { "epoch": 0.30775300848293546, "loss": 0.19007167220115662, "loss_ce": 0.0034261636901646852, "loss_iou": 0.51953125, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 200929932, "step": 1170 }, { "epoch": 0.3080160452423226, "grad_norm": 7.858471531155482, "learning_rate": 5e-06, "loss": 0.1989, "num_input_tokens_seen": 201100520, "step": 1171 }, { "epoch": 0.3080160452423226, "loss": 0.204483300447464, "loss_ce": 0.0005648470250889659, "loss_iou": 0.55859375, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 201100520, "step": 1171 }, { "epoch": 0.3082790820017097, "grad_norm": 5.1657164225988, "learning_rate": 5e-06, "loss": 0.1676, "num_input_tokens_seen": 201272808, "step": 1172 }, { "epoch": 0.3082790820017097, "loss": 0.12782040238380432, "loss_ce": 0.006909756921231747, "loss_iou": 0.67578125, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 201272808, "step": 1172 }, { "epoch": 0.3085421187610969, "grad_norm": 5.7855382812859295, "learning_rate": 5e-06, "loss": 0.1718, "num_input_tokens_seen": 201445044, "step": 1173 }, { "epoch": 0.3085421187610969, "loss": 0.22742098569869995, "loss_ce": 0.0040017981082201, "loss_iou": 0.42578125, "loss_num": 0.044677734375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 201445044, "step": 1173 }, { "epoch": 0.308805155520484, "grad_norm": 19.043518960426944, "learning_rate": 5e-06, "loss": 0.1271, "num_input_tokens_seen": 201617324, "step": 1174 }, { "epoch": 0.308805155520484, "loss": 0.09914430975914001, "loss_ce": 0.001243914244696498, "loss_iou": 0.6171875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 201617324, "step": 1174 }, { "epoch": 0.3090681922798711, "grad_norm": 7.337870221119852, "learning_rate": 5e-06, "loss": 0.1898, "num_input_tokens_seen": 201789648, "step": 1175 }, { "epoch": 0.3090681922798711, "loss": 0.2523178160190582, "loss_ce": 0.0009750226745381951, "loss_iou": 0.52734375, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 201789648, "step": 1175 }, { "epoch": 0.30933122903925825, "grad_norm": 18.238078934317997, "learning_rate": 5e-06, "loss": 0.1987, "num_input_tokens_seen": 201961724, "step": 1176 }, { "epoch": 0.30933122903925825, "loss": 0.17623552680015564, "loss_ce": 0.003994307480752468, "loss_iou": 0.396484375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 201961724, "step": 1176 }, { "epoch": 0.30959426579864535, "grad_norm": 4.51493162048169, "learning_rate": 5e-06, "loss": 0.1611, "num_input_tokens_seen": 202133700, "step": 1177 }, { "epoch": 0.30959426579864535, "loss": 0.1072007492184639, "loss_ce": 0.005058412905782461, "loss_iou": 0.52734375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 202133700, "step": 1177 }, { "epoch": 0.3098573025580325, "grad_norm": 5.072583382632679, "learning_rate": 5e-06, "loss": 0.1459, "num_input_tokens_seen": 202304256, "step": 1178 }, { "epoch": 0.3098573025580325, "loss": 0.14473965764045715, "loss_ce": 0.0024361968971788883, "loss_iou": 0.51953125, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 202304256, "step": 1178 }, { "epoch": 0.3101203393174196, "grad_norm": 7.387732741099245, "learning_rate": 5e-06, "loss": 0.1598, "num_input_tokens_seen": 202476488, "step": 1179 }, { "epoch": 0.3101203393174196, "loss": 0.25686854124069214, "loss_ce": 0.005464731715619564, "loss_iou": 0.408203125, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 202476488, "step": 1179 }, { "epoch": 0.3103833760768067, "grad_norm": 7.564357354372493, "learning_rate": 5e-06, "loss": 0.1721, "num_input_tokens_seen": 202648844, "step": 1180 }, { "epoch": 0.3103833760768067, "loss": 0.2612742781639099, "loss_ce": 0.00047106179408729076, "loss_iou": 0.546875, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 202648844, "step": 1180 }, { "epoch": 0.3106464128361939, "grad_norm": 12.304004570686834, "learning_rate": 5e-06, "loss": 0.1653, "num_input_tokens_seen": 202821176, "step": 1181 }, { "epoch": 0.3106464128361939, "loss": 0.2418098747730255, "loss_ce": 0.0016365369083359838, "loss_iou": 0.453125, "loss_num": 0.048095703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 202821176, "step": 1181 }, { "epoch": 0.310909449595581, "grad_norm": 4.510810550512272, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 202993428, "step": 1182 }, { "epoch": 0.310909449595581, "loss": 0.058775611221790314, "loss_ce": 0.0007006583036854863, "loss_iou": 0.390625, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 202993428, "step": 1182 }, { "epoch": 0.3111724863549681, "grad_norm": 44.4539532395822, "learning_rate": 5e-06, "loss": 0.1567, "num_input_tokens_seen": 203164156, "step": 1183 }, { "epoch": 0.3111724863549681, "loss": 0.16502994298934937, "loss_ce": 0.0006927890353836119, "loss_iou": 0.55078125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 203164156, "step": 1183 }, { "epoch": 0.31143552311435524, "grad_norm": 7.885064131627441, "learning_rate": 5e-06, "loss": 0.1458, "num_input_tokens_seen": 203336328, "step": 1184 }, { "epoch": 0.31143552311435524, "loss": 0.07413887977600098, "loss_ce": 0.00013374855916481465, "loss_iou": 0.62109375, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 203336328, "step": 1184 }, { "epoch": 0.31169855987374234, "grad_norm": 5.056026310722222, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 203508700, "step": 1185 }, { "epoch": 0.31169855987374234, "loss": 0.07386209070682526, "loss_ce": 0.0002842045505531132, "loss_iou": 0.4609375, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 203508700, "step": 1185 }, { "epoch": 0.3119615966331295, "grad_norm": 9.611820717208333, "learning_rate": 5e-06, "loss": 0.1689, "num_input_tokens_seen": 203680940, "step": 1186 }, { "epoch": 0.3119615966331295, "loss": 0.13154730200767517, "loss_ce": 0.0037701984401792288, "loss_iou": 0.59765625, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 203680940, "step": 1186 }, { "epoch": 0.3122246333925166, "grad_norm": 10.99160414493074, "learning_rate": 5e-06, "loss": 0.1515, "num_input_tokens_seen": 203852944, "step": 1187 }, { "epoch": 0.3122246333925166, "loss": 0.17437157034873962, "loss_ce": 0.0008486199658364058, "loss_iou": 0.53515625, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 203852944, "step": 1187 }, { "epoch": 0.3124876701519037, "grad_norm": 7.566892022382808, "learning_rate": 5e-06, "loss": 0.1357, "num_input_tokens_seen": 204025224, "step": 1188 }, { "epoch": 0.3124876701519037, "loss": 0.16989970207214355, "loss_ce": 0.004982716403901577, "loss_iou": 0.4765625, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 204025224, "step": 1188 }, { "epoch": 0.31275070691129087, "grad_norm": 4.336576845925468, "learning_rate": 5e-06, "loss": 0.1629, "num_input_tokens_seen": 204197436, "step": 1189 }, { "epoch": 0.31275070691129087, "loss": 0.30051514506340027, "loss_ce": 0.0009546162909828126, "loss_iou": 0.390625, "loss_num": 0.06005859375, "loss_xval": 0.298828125, "num_input_tokens_seen": 204197436, "step": 1189 }, { "epoch": 0.31301374367067797, "grad_norm": 8.328730704134305, "learning_rate": 5e-06, "loss": 0.1442, "num_input_tokens_seen": 204369676, "step": 1190 }, { "epoch": 0.31301374367067797, "loss": 0.18648235499858856, "loss_ce": 0.0020035963971167803, "loss_iou": 0.44921875, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 204369676, "step": 1190 }, { "epoch": 0.3132767804300651, "grad_norm": 6.248602390066733, "learning_rate": 5e-06, "loss": 0.2073, "num_input_tokens_seen": 204541920, "step": 1191 }, { "epoch": 0.3132767804300651, "loss": 0.12927605211734772, "loss_ce": 0.0012548138620331883, "loss_iou": 0.51953125, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 204541920, "step": 1191 }, { "epoch": 0.31353981718945223, "grad_norm": 13.468629090291936, "learning_rate": 5e-06, "loss": 0.1227, "num_input_tokens_seen": 204714352, "step": 1192 }, { "epoch": 0.31353981718945223, "loss": 0.13916926085948944, "loss_ce": 0.002206363482400775, "loss_iou": 0.64453125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 204714352, "step": 1192 }, { "epoch": 0.31380285394883933, "grad_norm": 14.97613941577105, "learning_rate": 5e-06, "loss": 0.1271, "num_input_tokens_seen": 204886516, "step": 1193 }, { "epoch": 0.31380285394883933, "loss": 0.07034310698509216, "loss_ce": 0.002716155955567956, "loss_iou": 0.53515625, "loss_num": 0.0135498046875, "loss_xval": 0.0673828125, "num_input_tokens_seen": 204886516, "step": 1193 }, { "epoch": 0.3140658907082265, "grad_norm": 7.0534203975755085, "learning_rate": 5e-06, "loss": 0.1572, "num_input_tokens_seen": 205058780, "step": 1194 }, { "epoch": 0.3140658907082265, "loss": 0.1390216201543808, "loss_ce": 0.00144837680272758, "loss_iou": 0.63671875, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 205058780, "step": 1194 }, { "epoch": 0.3143289274676136, "grad_norm": 8.447198281833328, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 205231072, "step": 1195 }, { "epoch": 0.3143289274676136, "loss": 0.13909873366355896, "loss_ce": 0.00030477988184429705, "loss_iou": 0.5390625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 205231072, "step": 1195 }, { "epoch": 0.3145919642270007, "grad_norm": 11.546456728325778, "learning_rate": 5e-06, "loss": 0.1617, "num_input_tokens_seen": 205403380, "step": 1196 }, { "epoch": 0.3145919642270007, "loss": 0.21954891085624695, "loss_ce": 0.0075127785094082355, "loss_iou": NaN, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 205403380, "step": 1196 }, { "epoch": 0.31485500098638786, "grad_norm": 22.283163028622614, "learning_rate": 5e-06, "loss": 0.1286, "num_input_tokens_seen": 205575536, "step": 1197 }, { "epoch": 0.31485500098638786, "loss": 0.10035932064056396, "loss_ce": 0.0016349535435438156, "loss_iou": 0.5703125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 205575536, "step": 1197 }, { "epoch": 0.31511803774577496, "grad_norm": 13.092900629924303, "learning_rate": 5e-06, "loss": 0.1357, "num_input_tokens_seen": 205747664, "step": 1198 }, { "epoch": 0.31511803774577496, "loss": 0.1355750560760498, "loss_ce": 0.0009009751374833286, "loss_iou": 0.46484375, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 205747664, "step": 1198 }, { "epoch": 0.3153810745051621, "grad_norm": 4.560396330595116, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 205919612, "step": 1199 }, { "epoch": 0.3153810745051621, "loss": 0.085871621966362, "loss_ce": 0.0012463756138458848, "loss_iou": 0.478515625, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 205919612, "step": 1199 }, { "epoch": 0.3156441112645492, "grad_norm": 7.441372297845623, "learning_rate": 5e-06, "loss": 0.1451, "num_input_tokens_seen": 206092012, "step": 1200 }, { "epoch": 0.3156441112645492, "loss": 0.13713139295578003, "loss_ce": 0.004868209362030029, "loss_iou": 0.6015625, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 206092012, "step": 1200 }, { "epoch": 0.3159071480239363, "grad_norm": 5.3666102379931795, "learning_rate": 5e-06, "loss": 0.1093, "num_input_tokens_seen": 206262016, "step": 1201 }, { "epoch": 0.3159071480239363, "loss": 0.14059683680534363, "loss_ce": 0.001070460770279169, "loss_iou": 0.53125, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 206262016, "step": 1201 }, { "epoch": 0.3161701847833235, "grad_norm": 8.935322327098595, "learning_rate": 5e-06, "loss": 0.1362, "num_input_tokens_seen": 206434192, "step": 1202 }, { "epoch": 0.3161701847833235, "loss": 0.13417284190654755, "loss_ce": 0.0015129297971725464, "loss_iou": 0.671875, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 206434192, "step": 1202 }, { "epoch": 0.3164332215427106, "grad_norm": 10.825802927092472, "learning_rate": 5e-06, "loss": 0.1619, "num_input_tokens_seen": 206604572, "step": 1203 }, { "epoch": 0.3164332215427106, "loss": 0.1651817262172699, "loss_ce": 0.002462008036673069, "loss_iou": 0.38671875, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 206604572, "step": 1203 }, { "epoch": 0.31669625830209774, "grad_norm": 14.238886956988445, "learning_rate": 5e-06, "loss": 0.1368, "num_input_tokens_seen": 206776604, "step": 1204 }, { "epoch": 0.31669625830209774, "loss": 0.1644083857536316, "loss_ce": 0.0012003772426396608, "loss_iou": 0.75390625, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 206776604, "step": 1204 }, { "epoch": 0.31695929506148485, "grad_norm": 7.214768469241478, "learning_rate": 5e-06, "loss": 0.162, "num_input_tokens_seen": 206948516, "step": 1205 }, { "epoch": 0.31695929506148485, "loss": 0.15767714381217957, "loss_ce": 0.0004200635012239218, "loss_iou": 0.6015625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 206948516, "step": 1205 }, { "epoch": 0.31722233182087195, "grad_norm": 8.494853848101869, "learning_rate": 5e-06, "loss": 0.1633, "num_input_tokens_seen": 207120552, "step": 1206 }, { "epoch": 0.31722233182087195, "loss": 0.20658773183822632, "loss_ce": 0.007216397672891617, "loss_iou": 0.3671875, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 207120552, "step": 1206 }, { "epoch": 0.3174853685802591, "grad_norm": 8.328419666401985, "learning_rate": 5e-06, "loss": 0.1285, "num_input_tokens_seen": 207292524, "step": 1207 }, { "epoch": 0.3174853685802591, "loss": 0.1329333782196045, "loss_ce": 0.0009753549238666892, "loss_iou": 0.55078125, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 207292524, "step": 1207 }, { "epoch": 0.3177484053396462, "grad_norm": 10.047313768362223, "learning_rate": 5e-06, "loss": 0.1291, "num_input_tokens_seen": 207464680, "step": 1208 }, { "epoch": 0.3177484053396462, "loss": 0.12349092215299606, "loss_ce": 0.0011764641385525465, "loss_iou": 0.515625, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 207464680, "step": 1208 }, { "epoch": 0.3180114420990333, "grad_norm": 12.242484724411801, "learning_rate": 5e-06, "loss": 0.1693, "num_input_tokens_seen": 207634788, "step": 1209 }, { "epoch": 0.3180114420990333, "loss": 0.21163830161094666, "loss_ce": 0.0032947922591120005, "loss_iou": 0.54296875, "loss_num": 0.041748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 207634788, "step": 1209 }, { "epoch": 0.3182744788584205, "grad_norm": 5.397067809441159, "learning_rate": 5e-06, "loss": 0.1287, "num_input_tokens_seen": 207806604, "step": 1210 }, { "epoch": 0.3182744788584205, "loss": 0.14931055903434753, "loss_ce": 0.00026271765818819404, "loss_iou": 0.5859375, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 207806604, "step": 1210 }, { "epoch": 0.3185375156178076, "grad_norm": 5.849344287815652, "learning_rate": 5e-06, "loss": 0.1585, "num_input_tokens_seen": 207979236, "step": 1211 }, { "epoch": 0.3185375156178076, "loss": 0.16523879766464233, "loss_ce": 0.000810077937785536, "loss_iou": 0.65234375, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 207979236, "step": 1211 }, { "epoch": 0.31880055237719473, "grad_norm": 6.099253346488696, "learning_rate": 5e-06, "loss": 0.1421, "num_input_tokens_seen": 208151368, "step": 1212 }, { "epoch": 0.31880055237719473, "loss": 0.12017640471458435, "loss_ce": 0.00015077157877385616, "loss_iou": 0.6171875, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 208151368, "step": 1212 }, { "epoch": 0.31906358913658184, "grad_norm": 6.749805208741857, "learning_rate": 5e-06, "loss": 0.1114, "num_input_tokens_seen": 208323600, "step": 1213 }, { "epoch": 0.31906358913658184, "loss": 0.1262407749891281, "loss_ce": 0.0037432105746120214, "loss_iou": 0.4765625, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 208323600, "step": 1213 }, { "epoch": 0.31932662589596894, "grad_norm": 7.672226851345336, "learning_rate": 5e-06, "loss": 0.0956, "num_input_tokens_seen": 208495836, "step": 1214 }, { "epoch": 0.31932662589596894, "loss": 0.09719178080558777, "loss_ce": 0.0004205434233881533, "loss_iou": 0.5625, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 208495836, "step": 1214 }, { "epoch": 0.3195896626553561, "grad_norm": 7.992027772652848, "learning_rate": 5e-06, "loss": 0.1721, "num_input_tokens_seen": 208667840, "step": 1215 }, { "epoch": 0.3195896626553561, "loss": 0.1424046754837036, "loss_ce": 0.00037586723919957876, "loss_iou": 0.46484375, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 208667840, "step": 1215 }, { "epoch": 0.3198526994147432, "grad_norm": 6.160546703869267, "learning_rate": 5e-06, "loss": 0.119, "num_input_tokens_seen": 208840040, "step": 1216 }, { "epoch": 0.3198526994147432, "loss": 0.11277418583631516, "loss_ce": 0.0004084610554855317, "loss_iou": 0.48046875, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 208840040, "step": 1216 }, { "epoch": 0.32011573617413036, "grad_norm": 14.924491605710916, "learning_rate": 5e-06, "loss": 0.1657, "num_input_tokens_seen": 209010176, "step": 1217 }, { "epoch": 0.32011573617413036, "loss": 0.1102285385131836, "loss_ce": 0.0007619824027642608, "loss_iou": 0.51171875, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 209010176, "step": 1217 }, { "epoch": 0.32037877293351746, "grad_norm": 13.936435144745488, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 209182304, "step": 1218 }, { "epoch": 0.32037877293351746, "loss": 0.12400620430707932, "loss_ce": 0.0005168293137103319, "loss_iou": 0.53515625, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 209182304, "step": 1218 }, { "epoch": 0.32064180969290457, "grad_norm": 5.31537857924368, "learning_rate": 5e-06, "loss": 0.1329, "num_input_tokens_seen": 209354400, "step": 1219 }, { "epoch": 0.32064180969290457, "loss": 0.15276062488555908, "loss_ce": 0.0029498354997485876, "loss_iou": 0.50390625, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 209354400, "step": 1219 }, { "epoch": 0.3209048464522917, "grad_norm": 8.044699509860337, "learning_rate": 5e-06, "loss": 0.1303, "num_input_tokens_seen": 209526592, "step": 1220 }, { "epoch": 0.3209048464522917, "loss": 0.13095101714134216, "loss_ce": 0.00137336365878582, "loss_iou": 0.61328125, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 209526592, "step": 1220 }, { "epoch": 0.32116788321167883, "grad_norm": 6.178496640552883, "learning_rate": 5e-06, "loss": 0.1392, "num_input_tokens_seen": 209699004, "step": 1221 }, { "epoch": 0.32116788321167883, "loss": 0.11922457814216614, "loss_ce": 0.00017550383927300572, "loss_iou": 0.625, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 209699004, "step": 1221 }, { "epoch": 0.32143091997106593, "grad_norm": 5.682094913930693, "learning_rate": 5e-06, "loss": 0.1394, "num_input_tokens_seen": 209871116, "step": 1222 }, { "epoch": 0.32143091997106593, "loss": 0.14516758918762207, "loss_ce": 0.004481561481952667, "loss_iou": 0.328125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 209871116, "step": 1222 }, { "epoch": 0.3216939567304531, "grad_norm": 7.037588469004313, "learning_rate": 5e-06, "loss": 0.1381, "num_input_tokens_seen": 210041224, "step": 1223 }, { "epoch": 0.3216939567304531, "loss": 0.1431758999824524, "loss_ce": 0.002764530945569277, "loss_iou": 0.54296875, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 210041224, "step": 1223 }, { "epoch": 0.3219569934898402, "grad_norm": 23.622534135841367, "learning_rate": 5e-06, "loss": 0.1595, "num_input_tokens_seen": 210211380, "step": 1224 }, { "epoch": 0.3219569934898402, "loss": 0.1730581820011139, "loss_ce": 0.0024649298284202814, "loss_iou": 0.69921875, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 210211380, "step": 1224 }, { "epoch": 0.32222003024922735, "grad_norm": 12.04380926493331, "learning_rate": 5e-06, "loss": 0.1454, "num_input_tokens_seen": 210383088, "step": 1225 }, { "epoch": 0.32222003024922735, "loss": 0.23281848430633545, "loss_ce": 0.00115956028457731, "loss_iou": 0.4296875, "loss_num": 0.04638671875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 210383088, "step": 1225 }, { "epoch": 0.32248306700861445, "grad_norm": 4.756101930631895, "learning_rate": 5e-06, "loss": 0.0833, "num_input_tokens_seen": 210555072, "step": 1226 }, { "epoch": 0.32248306700861445, "loss": 0.06910556554794312, "loss_ce": 0.003004492959007621, "loss_iou": 0.5859375, "loss_num": 0.01324462890625, "loss_xval": 0.06591796875, "num_input_tokens_seen": 210555072, "step": 1226 }, { "epoch": 0.32274610376800156, "grad_norm": 3.8368982745320284, "learning_rate": 5e-06, "loss": 0.1197, "num_input_tokens_seen": 210727236, "step": 1227 }, { "epoch": 0.32274610376800156, "loss": 0.10896629840135574, "loss_ce": 0.0005983852897770703, "loss_iou": 0.54296875, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 210727236, "step": 1227 }, { "epoch": 0.3230091405273887, "grad_norm": 5.290425948494437, "learning_rate": 5e-06, "loss": 0.1453, "num_input_tokens_seen": 210899444, "step": 1228 }, { "epoch": 0.3230091405273887, "loss": 0.12627889215946198, "loss_ce": 8.869695011526346e-05, "loss_iou": 0.671875, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 210899444, "step": 1228 }, { "epoch": 0.3232721772867758, "grad_norm": 5.380666634187743, "learning_rate": 5e-06, "loss": 0.1204, "num_input_tokens_seen": 211071748, "step": 1229 }, { "epoch": 0.3232721772867758, "loss": 0.16187983751296997, "loss_ce": 0.003463074564933777, "loss_iou": 0.5234375, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 211071748, "step": 1229 }, { "epoch": 0.323535214046163, "grad_norm": 6.929606907390503, "learning_rate": 5e-06, "loss": 0.1638, "num_input_tokens_seen": 211243884, "step": 1230 }, { "epoch": 0.323535214046163, "loss": 0.16732358932495117, "loss_ce": 0.003932466730475426, "loss_iou": 0.76171875, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 211243884, "step": 1230 }, { "epoch": 0.3237982508055501, "grad_norm": 4.103469407229988, "learning_rate": 5e-06, "loss": 0.1433, "num_input_tokens_seen": 211416144, "step": 1231 }, { "epoch": 0.3237982508055501, "loss": 0.16871199011802673, "loss_ce": 0.0031236184295266867, "loss_iou": 0.54296875, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 211416144, "step": 1231 }, { "epoch": 0.3240612875649372, "grad_norm": 4.57933905462206, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 211588296, "step": 1232 }, { "epoch": 0.3240612875649372, "loss": 0.16614994406700134, "loss_ce": 0.0009277852368541062, "loss_iou": 0.62109375, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 211588296, "step": 1232 }, { "epoch": 0.32432432432432434, "grad_norm": 4.940862588040882, "learning_rate": 5e-06, "loss": 0.1402, "num_input_tokens_seen": 211758972, "step": 1233 }, { "epoch": 0.32432432432432434, "loss": 0.14131012558937073, "loss_ce": 0.0015701348893344402, "loss_iou": 0.482421875, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 211758972, "step": 1233 }, { "epoch": 0.32458736108371145, "grad_norm": 11.781672019450719, "learning_rate": 5e-06, "loss": 0.1631, "num_input_tokens_seen": 211928944, "step": 1234 }, { "epoch": 0.32458736108371145, "loss": 0.14883792400360107, "loss_ce": 0.0010107720736414194, "loss_iou": 0.6875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 211928944, "step": 1234 }, { "epoch": 0.32485039784309855, "grad_norm": 19.10070261855633, "learning_rate": 5e-06, "loss": 0.1379, "num_input_tokens_seen": 212100832, "step": 1235 }, { "epoch": 0.32485039784309855, "loss": 0.07846543192863464, "loss_ce": 0.0002793997118715197, "loss_iou": 0.6328125, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 212100832, "step": 1235 }, { "epoch": 0.3251134346024857, "grad_norm": 15.821804140128867, "learning_rate": 5e-06, "loss": 0.1606, "num_input_tokens_seen": 212271444, "step": 1236 }, { "epoch": 0.3251134346024857, "loss": 0.1032799631357193, "loss_ce": 0.004799742251634598, "loss_iou": 0.5546875, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 212271444, "step": 1236 }, { "epoch": 0.3253764713618728, "grad_norm": 5.984115475857608, "learning_rate": 5e-06, "loss": 0.0958, "num_input_tokens_seen": 212443976, "step": 1237 }, { "epoch": 0.3253764713618728, "loss": 0.06768647581338882, "loss_ce": 0.0013717777328565717, "loss_iou": 0.6796875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 212443976, "step": 1237 }, { "epoch": 0.32563950812125997, "grad_norm": 7.104938203697124, "learning_rate": 5e-06, "loss": 0.1686, "num_input_tokens_seen": 212612720, "step": 1238 }, { "epoch": 0.32563950812125997, "loss": 0.10599420964717865, "loss_ce": 0.002387029118835926, "loss_iou": 0.68359375, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 212612720, "step": 1238 }, { "epoch": 0.32590254488064707, "grad_norm": 5.6715016587403655, "learning_rate": 5e-06, "loss": 0.1341, "num_input_tokens_seen": 212784924, "step": 1239 }, { "epoch": 0.32590254488064707, "loss": 0.07988797873258591, "loss_ce": 0.00020658349967561662, "loss_iou": 0.5078125, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 212784924, "step": 1239 }, { "epoch": 0.3261655816400342, "grad_norm": 10.066783726153659, "learning_rate": 5e-06, "loss": 0.1354, "num_input_tokens_seen": 212957204, "step": 1240 }, { "epoch": 0.3261655816400342, "loss": 0.24682722985744476, "loss_ce": 0.0004893409786745906, "loss_iou": 0.46484375, "loss_num": 0.04931640625, "loss_xval": 0.24609375, "num_input_tokens_seen": 212957204, "step": 1240 }, { "epoch": 0.32642861839942133, "grad_norm": 6.605871983356305, "learning_rate": 5e-06, "loss": 0.1476, "num_input_tokens_seen": 213129636, "step": 1241 }, { "epoch": 0.32642861839942133, "loss": 0.14497268199920654, "loss_ce": 0.008375998586416245, "loss_iou": 0.55859375, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 213129636, "step": 1241 }, { "epoch": 0.32669165515880844, "grad_norm": 6.65301903793999, "learning_rate": 5e-06, "loss": 0.1883, "num_input_tokens_seen": 213302188, "step": 1242 }, { "epoch": 0.32669165515880844, "loss": 0.25356027483940125, "loss_ce": 0.002705805469304323, "loss_iou": 0.71875, "loss_num": 0.05029296875, "loss_xval": 0.25, "num_input_tokens_seen": 213302188, "step": 1242 }, { "epoch": 0.3269546919181956, "grad_norm": 6.148620904940904, "learning_rate": 5e-06, "loss": 0.1644, "num_input_tokens_seen": 213474112, "step": 1243 }, { "epoch": 0.3269546919181956, "loss": 0.10272787511348724, "loss_ce": 0.00427817041054368, "loss_iou": 0.57421875, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 213474112, "step": 1243 }, { "epoch": 0.3272177286775827, "grad_norm": 8.559691577151979, "learning_rate": 5e-06, "loss": 0.214, "num_input_tokens_seen": 213646008, "step": 1244 }, { "epoch": 0.3272177286775827, "loss": 0.2430175393819809, "loss_ce": 0.0016845206264406443, "loss_iou": 0.5625, "loss_num": 0.04833984375, "loss_xval": 0.2412109375, "num_input_tokens_seen": 213646008, "step": 1244 }, { "epoch": 0.3274807654369698, "grad_norm": 4.550203288561425, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 213816528, "step": 1245 }, { "epoch": 0.3274807654369698, "loss": 0.10727906972169876, "loss_ce": 0.001383074326440692, "loss_iou": 0.59765625, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 213816528, "step": 1245 }, { "epoch": 0.32774380219635696, "grad_norm": 8.680127486966335, "learning_rate": 5e-06, "loss": 0.1341, "num_input_tokens_seen": 213988932, "step": 1246 }, { "epoch": 0.32774380219635696, "loss": 0.16796328127384186, "loss_ce": 0.0009405763121321797, "loss_iou": 0.470703125, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 213988932, "step": 1246 }, { "epoch": 0.32800683895574406, "grad_norm": 9.402943236610666, "learning_rate": 5e-06, "loss": 0.1439, "num_input_tokens_seen": 214161184, "step": 1247 }, { "epoch": 0.32800683895574406, "loss": 0.18202053010463715, "loss_ce": 0.008070331066846848, "loss_iou": 0.5859375, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 214161184, "step": 1247 }, { "epoch": 0.32826987571513117, "grad_norm": 8.949806900643816, "learning_rate": 5e-06, "loss": 0.1733, "num_input_tokens_seen": 214333220, "step": 1248 }, { "epoch": 0.32826987571513117, "loss": 0.18508628010749817, "loss_ce": 0.001980806002393365, "loss_iou": 0.67578125, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 214333220, "step": 1248 }, { "epoch": 0.3285329124745183, "grad_norm": 9.623428359724258, "learning_rate": 5e-06, "loss": 0.1899, "num_input_tokens_seen": 214501988, "step": 1249 }, { "epoch": 0.3285329124745183, "loss": 0.18457330763339996, "loss_ce": 0.002108700107783079, "loss_iou": 0.46875, "loss_num": 0.036376953125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 214501988, "step": 1249 }, { "epoch": 0.3287959492339054, "grad_norm": 5.549112096762312, "learning_rate": 5e-06, "loss": 0.1748, "num_input_tokens_seen": 214672212, "step": 1250 }, { "epoch": 0.3287959492339054, "eval_websight_new_CIoU": 0.8321745097637177, "eval_websight_new_GIoU": 0.8282029330730438, "eval_websight_new_IoU": 0.8425185084342957, "eval_websight_new_MAE_all": 0.030896120704710484, "eval_websight_new_MAE_h": 0.020350518636405468, "eval_websight_new_MAE_w": 0.041066043078899384, "eval_websight_new_MAE_x": 0.04287236928939819, "eval_websight_new_MAE_y": 0.019295550882816315, "eval_websight_new_NUM_probability": 0.9998577535152435, "eval_websight_new_inside_bbox": 0.984375, "eval_websight_new_loss": 0.13457995653152466, "eval_websight_new_loss_ce": 2.8165732146590017e-05, "eval_websight_new_loss_iou": 0.39599609375, "eval_websight_new_loss_num": 0.022901535034179688, "eval_websight_new_loss_xval": 0.1145477294921875, "eval_websight_new_runtime": 54.6461, "eval_websight_new_samples_per_second": 0.915, "eval_websight_new_steps_per_second": 0.037, "num_input_tokens_seen": 214672212, "step": 1250 }, { "epoch": 0.3287959492339054, "eval_seeclick_CIoU": 0.5724749565124512, "eval_seeclick_GIoU": 0.5666466653347015, "eval_seeclick_IoU": 0.5994701087474823, "eval_seeclick_MAE_all": 0.054681919515132904, "eval_seeclick_MAE_h": 0.03272205591201782, "eval_seeclick_MAE_w": 0.07547581382095814, "eval_seeclick_MAE_x": 0.07945681735873222, "eval_seeclick_MAE_y": 0.031073003076016903, "eval_seeclick_NUM_probability": 0.9999328255653381, "eval_seeclick_inside_bbox": 0.8465909063816071, "eval_seeclick_loss": 0.2362295389175415, "eval_seeclick_loss_ce": 0.008988222572952509, "eval_seeclick_loss_iou": 0.5384521484375, "eval_seeclick_loss_num": 0.04405975341796875, "eval_seeclick_loss_xval": 0.22039794921875, "eval_seeclick_runtime": 77.1136, "eval_seeclick_samples_per_second": 0.558, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 214672212, "step": 1250 }, { "epoch": 0.3287959492339054, "eval_icons_CIoU": 0.8087750673294067, "eval_icons_GIoU": 0.7993076145648956, "eval_icons_IoU": 0.8191726505756378, "eval_icons_MAE_all": 0.02884785458445549, "eval_icons_MAE_h": 0.033479243516922, "eval_icons_MAE_w": 0.024947408586740494, "eval_icons_MAE_x": 0.02466664183884859, "eval_icons_MAE_y": 0.0322981309145689, "eval_icons_NUM_probability": 0.9998048841953278, "eval_icons_inside_bbox": 0.9565972089767456, "eval_icons_loss": 0.08432088792324066, "eval_icons_loss_ce": 7.355650814133696e-05, "eval_icons_loss_iou": 0.567626953125, "eval_icons_loss_num": 0.015069961547851562, "eval_icons_loss_xval": 0.0753631591796875, "eval_icons_runtime": 78.2995, "eval_icons_samples_per_second": 0.639, "eval_icons_steps_per_second": 0.026, "num_input_tokens_seen": 214672212, "step": 1250 }, { "epoch": 0.3287959492339054, "eval_screenspot_CIoU": 0.5492339730262756, "eval_screenspot_GIoU": 0.5352775057156881, "eval_screenspot_IoU": 0.5879749854405721, "eval_screenspot_MAE_all": 0.0870671272277832, "eval_screenspot_MAE_h": 0.051297743494311966, "eval_screenspot_MAE_w": 0.14652628699938455, "eval_screenspot_MAE_x": 0.10099601248900096, "eval_screenspot_MAE_y": 0.04944847462077936, "eval_screenspot_NUM_probability": 0.999727189540863, "eval_screenspot_inside_bbox": 0.8529166579246521, "eval_screenspot_loss": 0.8003170490264893, "eval_screenspot_loss_ce": 0.4581688741842906, "eval_screenspot_loss_iou": 0.4616292317708333, "eval_screenspot_loss_num": 0.06711324055989583, "eval_screenspot_loss_xval": 0.3355305989583333, "eval_screenspot_runtime": 146.7221, "eval_screenspot_samples_per_second": 0.607, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 214672212, "step": 1250 }, { "epoch": 0.3287959492339054, "loss": 0.7908815741539001, "loss_ce": 0.44920679926872253, "loss_iou": 0.390625, "loss_num": 0.068359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 214672212, "step": 1250 }, { "epoch": 0.3290589859932926, "grad_norm": 5.65145087220912, "learning_rate": 5e-06, "loss": 0.1612, "num_input_tokens_seen": 214840640, "step": 1251 }, { "epoch": 0.3290589859932926, "loss": 0.17782355844974518, "loss_ce": 0.0008826321572996676, "loss_iou": 0.458984375, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 214840640, "step": 1251 }, { "epoch": 0.3293220227526797, "grad_norm": 7.22202274640801, "learning_rate": 5e-06, "loss": 0.1279, "num_input_tokens_seen": 215011212, "step": 1252 }, { "epoch": 0.3293220227526797, "loss": 0.13640594482421875, "loss_ce": 0.0011520386906340718, "loss_iou": 0.5859375, "loss_num": 0.027099609375, "loss_xval": 0.134765625, "num_input_tokens_seen": 215011212, "step": 1252 }, { "epoch": 0.3295850595120668, "grad_norm": 5.1577086011143685, "learning_rate": 5e-06, "loss": 0.1307, "num_input_tokens_seen": 215183424, "step": 1253 }, { "epoch": 0.3295850595120668, "loss": 0.12353098392486572, "loss_ce": 0.0006061755702830851, "loss_iou": 0.5703125, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 215183424, "step": 1253 }, { "epoch": 0.32984809627145395, "grad_norm": 7.368992653841811, "learning_rate": 5e-06, "loss": 0.1308, "num_input_tokens_seen": 215355224, "step": 1254 }, { "epoch": 0.32984809627145395, "loss": 0.15012162923812866, "loss_ce": 0.0011348105035722256, "loss_iou": 0.51171875, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 215355224, "step": 1254 }, { "epoch": 0.33011113303084105, "grad_norm": 5.485255366846693, "learning_rate": 5e-06, "loss": 0.1751, "num_input_tokens_seen": 215525468, "step": 1255 }, { "epoch": 0.33011113303084105, "loss": 0.14393854141235352, "loss_ce": 0.0008110922062769532, "loss_iou": 0.4765625, "loss_num": 0.028564453125, "loss_xval": 0.1435546875, "num_input_tokens_seen": 215525468, "step": 1255 }, { "epoch": 0.3303741697902282, "grad_norm": 5.150585875058386, "learning_rate": 5e-06, "loss": 0.1205, "num_input_tokens_seen": 215697664, "step": 1256 }, { "epoch": 0.3303741697902282, "loss": 0.1360258162021637, "loss_ce": 0.0006498318398371339, "loss_iou": 0.6875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 215697664, "step": 1256 }, { "epoch": 0.3306372065496153, "grad_norm": 4.902248025801288, "learning_rate": 5e-06, "loss": 0.1244, "num_input_tokens_seen": 215869772, "step": 1257 }, { "epoch": 0.3306372065496153, "loss": 0.08430365473031998, "loss_ce": 0.0006854891544207931, "loss_iou": 0.671875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 215869772, "step": 1257 }, { "epoch": 0.3309002433090024, "grad_norm": 8.067058733106478, "learning_rate": 5e-06, "loss": 0.1836, "num_input_tokens_seen": 216041972, "step": 1258 }, { "epoch": 0.3309002433090024, "loss": 0.12436328083276749, "loss_ce": 0.0012248535640537739, "loss_iou": 0.66015625, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 216041972, "step": 1258 }, { "epoch": 0.3311632800683896, "grad_norm": 10.281119628319207, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 216213968, "step": 1259 }, { "epoch": 0.3311632800683896, "loss": 0.1294836401939392, "loss_ce": 0.002225350122898817, "loss_iou": 0.490234375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 216213968, "step": 1259 }, { "epoch": 0.3314263168277767, "grad_norm": 9.188895459960923, "learning_rate": 5e-06, "loss": 0.1358, "num_input_tokens_seen": 216384484, "step": 1260 }, { "epoch": 0.3314263168277767, "loss": 0.17113396525382996, "loss_ce": 0.0021886550821363926, "loss_iou": 0.5234375, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 216384484, "step": 1260 }, { "epoch": 0.3316893535871638, "grad_norm": 9.858408287132631, "learning_rate": 5e-06, "loss": 0.1691, "num_input_tokens_seen": 216556712, "step": 1261 }, { "epoch": 0.3316893535871638, "loss": 0.09802193194627762, "loss_ce": 0.004027791786938906, "loss_iou": 0.53125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 216556712, "step": 1261 }, { "epoch": 0.33195239034655094, "grad_norm": 13.43147706837513, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 216727532, "step": 1262 }, { "epoch": 0.33195239034655094, "loss": 0.12088888883590698, "loss_ce": 0.001778791076503694, "loss_iou": NaN, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 216727532, "step": 1262 }, { "epoch": 0.33221542710593804, "grad_norm": 22.307350229032924, "learning_rate": 5e-06, "loss": 0.1044, "num_input_tokens_seen": 216899424, "step": 1263 }, { "epoch": 0.33221542710593804, "loss": 0.13408984243869781, "loss_ce": 0.00048388654249720275, "loss_iou": 0.5546875, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 216899424, "step": 1263 }, { "epoch": 0.3324784638653252, "grad_norm": 12.430687637818028, "learning_rate": 5e-06, "loss": 0.1269, "num_input_tokens_seen": 217071512, "step": 1264 }, { "epoch": 0.3324784638653252, "loss": 0.12219381332397461, "loss_ce": 0.0014357574982568622, "loss_iou": 0.72265625, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 217071512, "step": 1264 }, { "epoch": 0.3327415006247123, "grad_norm": 5.892819252370443, "learning_rate": 5e-06, "loss": 0.1204, "num_input_tokens_seen": 217241728, "step": 1265 }, { "epoch": 0.3327415006247123, "loss": 0.07451170682907104, "loss_ce": 0.0026122929994016886, "loss_iou": 0.5234375, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 217241728, "step": 1265 }, { "epoch": 0.3330045373840994, "grad_norm": 7.045469734572638, "learning_rate": 5e-06, "loss": 0.1485, "num_input_tokens_seen": 217413824, "step": 1266 }, { "epoch": 0.3330045373840994, "loss": 0.19595244526863098, "loss_ce": 0.0024709957651793957, "loss_iou": 0.5234375, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 217413824, "step": 1266 }, { "epoch": 0.33326757414348657, "grad_norm": 10.385925957202762, "learning_rate": 5e-06, "loss": 0.1572, "num_input_tokens_seen": 217585864, "step": 1267 }, { "epoch": 0.33326757414348657, "loss": 0.14605620503425598, "loss_ce": 0.0005483938148245215, "loss_iou": 0.57421875, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 217585864, "step": 1267 }, { "epoch": 0.33353061090287367, "grad_norm": 6.514979263584694, "learning_rate": 5e-06, "loss": 0.143, "num_input_tokens_seen": 217757720, "step": 1268 }, { "epoch": 0.33353061090287367, "loss": 0.1336522251367569, "loss_ce": 0.000595581834204495, "loss_iou": 0.66015625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 217757720, "step": 1268 }, { "epoch": 0.33379364766226083, "grad_norm": 7.782042489694296, "learning_rate": 5e-06, "loss": 0.1537, "num_input_tokens_seen": 217929844, "step": 1269 }, { "epoch": 0.33379364766226083, "loss": 0.14830312132835388, "loss_ce": 0.0005370010621845722, "loss_iou": 0.41796875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 217929844, "step": 1269 }, { "epoch": 0.33405668442164793, "grad_norm": 37.57598415614969, "learning_rate": 5e-06, "loss": 0.1559, "num_input_tokens_seen": 218102180, "step": 1270 }, { "epoch": 0.33405668442164793, "loss": 0.125936821103096, "loss_ce": 0.000570611678995192, "loss_iou": 0.77734375, "loss_num": 0.025146484375, "loss_xval": 0.125, "num_input_tokens_seen": 218102180, "step": 1270 }, { "epoch": 0.33431972118103503, "grad_norm": 11.611735941773203, "learning_rate": 5e-06, "loss": 0.1685, "num_input_tokens_seen": 218272528, "step": 1271 }, { "epoch": 0.33431972118103503, "loss": 0.15356029570102692, "loss_ce": 0.0005146406474523246, "loss_iou": 0.51171875, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 218272528, "step": 1271 }, { "epoch": 0.3345827579404222, "grad_norm": 8.349189669659024, "learning_rate": 5e-06, "loss": 0.1356, "num_input_tokens_seen": 218445008, "step": 1272 }, { "epoch": 0.3345827579404222, "loss": 0.14823183417320251, "loss_ce": 0.0009540055179968476, "loss_iou": 0.5390625, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 218445008, "step": 1272 }, { "epoch": 0.3348457946998093, "grad_norm": 8.794519146282962, "learning_rate": 5e-06, "loss": 0.1529, "num_input_tokens_seen": 218617480, "step": 1273 }, { "epoch": 0.3348457946998093, "loss": 0.10918127745389938, "loss_ce": 0.0011185340117663145, "loss_iou": 0.625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 218617480, "step": 1273 }, { "epoch": 0.3351088314591964, "grad_norm": 5.0624867047638045, "learning_rate": 5e-06, "loss": 0.1463, "num_input_tokens_seen": 218789900, "step": 1274 }, { "epoch": 0.3351088314591964, "loss": 0.13429242372512817, "loss_ce": 0.00411969143897295, "loss_iou": 0.369140625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 218789900, "step": 1274 }, { "epoch": 0.33537186821858356, "grad_norm": 5.751729189048832, "learning_rate": 5e-06, "loss": 0.1612, "num_input_tokens_seen": 218962156, "step": 1275 }, { "epoch": 0.33537186821858356, "loss": 0.11978072673082352, "loss_ce": 0.00350876129232347, "loss_iou": 0.453125, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 218962156, "step": 1275 }, { "epoch": 0.33563490497797066, "grad_norm": 6.311955311958974, "learning_rate": 5e-06, "loss": 0.1055, "num_input_tokens_seen": 219132288, "step": 1276 }, { "epoch": 0.33563490497797066, "loss": 0.1355704814195633, "loss_ce": 0.0025443662889301777, "loss_iou": 0.46484375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 219132288, "step": 1276 }, { "epoch": 0.3358979417373578, "grad_norm": 7.099991467306234, "learning_rate": 5e-06, "loss": 0.1531, "num_input_tokens_seen": 219304616, "step": 1277 }, { "epoch": 0.3358979417373578, "loss": 0.13075271248817444, "loss_ce": 0.0017243996262550354, "loss_iou": 0.55078125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 219304616, "step": 1277 }, { "epoch": 0.3361609784967449, "grad_norm": 7.34332951683246, "learning_rate": 5e-06, "loss": 0.1414, "num_input_tokens_seen": 219476876, "step": 1278 }, { "epoch": 0.3361609784967449, "loss": 0.08354561030864716, "loss_ce": 0.0010565929114818573, "loss_iou": 0.62890625, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 219476876, "step": 1278 }, { "epoch": 0.336424015256132, "grad_norm": 20.864457223612828, "learning_rate": 5e-06, "loss": 0.1795, "num_input_tokens_seen": 219649440, "step": 1279 }, { "epoch": 0.336424015256132, "loss": 0.14009949564933777, "loss_ce": 0.00029846589313820004, "loss_iou": 0.51953125, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 219649440, "step": 1279 }, { "epoch": 0.3366870520155192, "grad_norm": 6.27874029788255, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 219821712, "step": 1280 }, { "epoch": 0.3366870520155192, "loss": 0.14031162858009338, "loss_ce": 0.0007242212886922061, "loss_iou": 0.640625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 219821712, "step": 1280 }, { "epoch": 0.3369500887749063, "grad_norm": 4.5668271174612025, "learning_rate": 5e-06, "loss": 0.1628, "num_input_tokens_seen": 219993864, "step": 1281 }, { "epoch": 0.3369500887749063, "loss": 0.10837851464748383, "loss_ce": 0.0014449162408709526, "loss_iou": 0.40234375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 219993864, "step": 1281 }, { "epoch": 0.3372131255342934, "grad_norm": 4.389742759586719, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 220166248, "step": 1282 }, { "epoch": 0.3372131255342934, "loss": 0.23685070872306824, "loss_ce": 0.0005836054333485663, "loss_iou": 0.54296875, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 220166248, "step": 1282 }, { "epoch": 0.33747616229368055, "grad_norm": 5.570317525274513, "learning_rate": 5e-06, "loss": 0.1458, "num_input_tokens_seen": 220338428, "step": 1283 }, { "epoch": 0.33747616229368055, "loss": 0.13890241086483002, "loss_ce": 0.0007493281736969948, "loss_iou": 0.44921875, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 220338428, "step": 1283 }, { "epoch": 0.33773919905306765, "grad_norm": 10.985772181850571, "learning_rate": 5e-06, "loss": 0.1223, "num_input_tokens_seen": 220510776, "step": 1284 }, { "epoch": 0.33773919905306765, "loss": 0.12931521236896515, "loss_ce": 0.0006531005492433906, "loss_iou": 0.6015625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 220510776, "step": 1284 }, { "epoch": 0.3380022358124548, "grad_norm": 5.926973995328122, "learning_rate": 5e-06, "loss": 0.139, "num_input_tokens_seen": 220682848, "step": 1285 }, { "epoch": 0.3380022358124548, "loss": 0.21676884591579437, "loss_ce": 0.007204629480838776, "loss_iou": 0.390625, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 220682848, "step": 1285 }, { "epoch": 0.3382652725718419, "grad_norm": 7.996976963449769, "learning_rate": 5e-06, "loss": 0.1313, "num_input_tokens_seen": 220855140, "step": 1286 }, { "epoch": 0.3382652725718419, "loss": 0.08844804763793945, "loss_ce": 0.0003895749687217176, "loss_iou": 0.59375, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 220855140, "step": 1286 }, { "epoch": 0.338528309331229, "grad_norm": 8.499510082866491, "learning_rate": 5e-06, "loss": 0.1178, "num_input_tokens_seen": 221027260, "step": 1287 }, { "epoch": 0.338528309331229, "loss": 0.12149707973003387, "loss_ce": 0.0004948831629008055, "loss_iou": 0.5703125, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 221027260, "step": 1287 }, { "epoch": 0.3387913460906162, "grad_norm": 5.973131676418204, "learning_rate": 5e-06, "loss": 0.1328, "num_input_tokens_seen": 221199416, "step": 1288 }, { "epoch": 0.3387913460906162, "loss": 0.09466279298067093, "loss_ce": 0.00011933030327782035, "loss_iou": 0.671875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 221199416, "step": 1288 }, { "epoch": 0.3390543828500033, "grad_norm": 4.808893821005273, "learning_rate": 5e-06, "loss": 0.1551, "num_input_tokens_seen": 221371720, "step": 1289 }, { "epoch": 0.3390543828500033, "loss": 0.09180793166160583, "loss_ce": 0.0001941679511219263, "loss_iou": 0.54296875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 221371720, "step": 1289 }, { "epoch": 0.33931741960939044, "grad_norm": 7.016843255216574, "learning_rate": 5e-06, "loss": 0.1878, "num_input_tokens_seen": 221542276, "step": 1290 }, { "epoch": 0.33931741960939044, "loss": 0.19763490557670593, "loss_ce": 0.0007354922126978636, "loss_iou": 0.44921875, "loss_num": 0.039306640625, "loss_xval": 0.197265625, "num_input_tokens_seen": 221542276, "step": 1290 }, { "epoch": 0.33958045636877754, "grad_norm": 6.06110322351886, "learning_rate": 5e-06, "loss": 0.1227, "num_input_tokens_seen": 221714344, "step": 1291 }, { "epoch": 0.33958045636877754, "loss": 0.15020573139190674, "loss_ce": 0.0027447929605841637, "loss_iou": 0.4375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 221714344, "step": 1291 }, { "epoch": 0.33984349312816464, "grad_norm": 8.158154012506673, "learning_rate": 5e-06, "loss": 0.1082, "num_input_tokens_seen": 221886284, "step": 1292 }, { "epoch": 0.33984349312816464, "loss": 0.06007716804742813, "loss_ce": 0.0002016789367189631, "loss_iou": 0.609375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 221886284, "step": 1292 }, { "epoch": 0.3401065298875518, "grad_norm": 4.887380704800873, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 222058520, "step": 1293 }, { "epoch": 0.3401065298875518, "loss": 0.09021516144275665, "loss_ce": 0.0008291734848171473, "loss_iou": 0.45703125, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 222058520, "step": 1293 }, { "epoch": 0.3403695666469389, "grad_norm": 13.907525065592614, "learning_rate": 5e-06, "loss": 0.1479, "num_input_tokens_seen": 222231156, "step": 1294 }, { "epoch": 0.3403695666469389, "loss": 0.22907251119613647, "loss_ce": 0.0007704915478825569, "loss_iou": 0.6640625, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 222231156, "step": 1294 }, { "epoch": 0.340632603406326, "grad_norm": 7.536500056913184, "learning_rate": 5e-06, "loss": 0.1822, "num_input_tokens_seen": 222403476, "step": 1295 }, { "epoch": 0.340632603406326, "loss": 0.1321713924407959, "loss_ce": 0.002853148616850376, "loss_iou": 0.5703125, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 222403476, "step": 1295 }, { "epoch": 0.34089564016571317, "grad_norm": 8.538688840874034, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 222575540, "step": 1296 }, { "epoch": 0.34089564016571317, "loss": 0.09548459947109222, "loss_ce": 0.0024365070275962353, "loss_iou": 0.5625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 222575540, "step": 1296 }, { "epoch": 0.34115867692510027, "grad_norm": 4.787366743041949, "learning_rate": 5e-06, "loss": 0.1417, "num_input_tokens_seen": 222747704, "step": 1297 }, { "epoch": 0.34115867692510027, "loss": 0.1637537181377411, "loss_ce": 0.0010034843580797315, "loss_iou": 0.45703125, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 222747704, "step": 1297 }, { "epoch": 0.3414217136844874, "grad_norm": 6.917043775641942, "learning_rate": 5e-06, "loss": 0.0965, "num_input_tokens_seen": 222919896, "step": 1298 }, { "epoch": 0.3414217136844874, "loss": 0.05339755862951279, "loss_ce": 0.001883886638097465, "loss_iou": 0.419921875, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 222919896, "step": 1298 }, { "epoch": 0.34168475044387453, "grad_norm": 8.403015648704343, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 223092124, "step": 1299 }, { "epoch": 0.34168475044387453, "loss": 0.15119820833206177, "loss_ce": 0.0011737870518118143, "loss_iou": 0.62890625, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 223092124, "step": 1299 }, { "epoch": 0.34194778720326163, "grad_norm": 7.291522615294142, "learning_rate": 5e-06, "loss": 0.1482, "num_input_tokens_seen": 223264436, "step": 1300 }, { "epoch": 0.34194778720326163, "loss": 0.14489704370498657, "loss_ce": 0.0020442616660147905, "loss_iou": 0.41796875, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 223264436, "step": 1300 }, { "epoch": 0.3422108239626488, "grad_norm": 8.380731335959293, "learning_rate": 5e-06, "loss": 0.1469, "num_input_tokens_seen": 223436588, "step": 1301 }, { "epoch": 0.3422108239626488, "loss": 0.15774638950824738, "loss_ce": 0.0007334585534408689, "loss_iou": 0.51171875, "loss_num": 0.03125, "loss_xval": 0.1572265625, "num_input_tokens_seen": 223436588, "step": 1301 }, { "epoch": 0.3424738607220359, "grad_norm": 4.869592726119341, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 223607388, "step": 1302 }, { "epoch": 0.3424738607220359, "loss": 0.15213216841220856, "loss_ce": 0.0007344604237005115, "loss_iou": 0.5546875, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 223607388, "step": 1302 }, { "epoch": 0.34273689748142305, "grad_norm": 5.639489902327201, "learning_rate": 5e-06, "loss": 0.1483, "num_input_tokens_seen": 223779372, "step": 1303 }, { "epoch": 0.34273689748142305, "loss": 0.17259347438812256, "loss_ce": 0.0032819565385580063, "loss_iou": 0.490234375, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 223779372, "step": 1303 }, { "epoch": 0.34299993424081016, "grad_norm": 9.093938521490475, "learning_rate": 5e-06, "loss": 0.2003, "num_input_tokens_seen": 223951388, "step": 1304 }, { "epoch": 0.34299993424081016, "loss": 0.22136257588863373, "loss_ce": 0.0015749745070934296, "loss_iou": 0.62109375, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 223951388, "step": 1304 }, { "epoch": 0.34326297100019726, "grad_norm": 6.510612178547868, "learning_rate": 5e-06, "loss": 0.1589, "num_input_tokens_seen": 224123456, "step": 1305 }, { "epoch": 0.34326297100019726, "loss": 0.14626947045326233, "loss_ce": 0.0030199564062058926, "loss_iou": 0.455078125, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 224123456, "step": 1305 }, { "epoch": 0.3435260077595844, "grad_norm": 6.4702586677675855, "learning_rate": 5e-06, "loss": 0.1824, "num_input_tokens_seen": 224295676, "step": 1306 }, { "epoch": 0.3435260077595844, "loss": 0.20608918368816376, "loss_ce": 0.0004007022944279015, "loss_iou": 0.671875, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 224295676, "step": 1306 }, { "epoch": 0.3437890445189715, "grad_norm": 24.89374059454028, "learning_rate": 5e-06, "loss": 0.1518, "num_input_tokens_seen": 224467880, "step": 1307 }, { "epoch": 0.3437890445189715, "loss": 0.12941348552703857, "loss_ce": 0.000507236341945827, "loss_iou": 0.58203125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 224467880, "step": 1307 }, { "epoch": 0.3440520812783586, "grad_norm": 8.792041343116132, "learning_rate": 5e-06, "loss": 0.1322, "num_input_tokens_seen": 224640076, "step": 1308 }, { "epoch": 0.3440520812783586, "loss": 0.18377459049224854, "loss_ce": 0.0007301591685973108, "loss_iou": 0.53125, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 224640076, "step": 1308 }, { "epoch": 0.3443151180377458, "grad_norm": 7.075463244307207, "learning_rate": 5e-06, "loss": 0.1801, "num_input_tokens_seen": 224812516, "step": 1309 }, { "epoch": 0.3443151180377458, "loss": 0.10930690169334412, "loss_ce": 0.0017934793140739202, "loss_iou": 0.5078125, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 224812516, "step": 1309 }, { "epoch": 0.3445781547971329, "grad_norm": 5.622949079750027, "learning_rate": 5e-06, "loss": 0.1576, "num_input_tokens_seen": 224985136, "step": 1310 }, { "epoch": 0.3445781547971329, "loss": 0.13441142439842224, "loss_ce": 0.0033689369447529316, "loss_iou": 0.625, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 224985136, "step": 1310 }, { "epoch": 0.34484119155652004, "grad_norm": 6.126844030056675, "learning_rate": 5e-06, "loss": 0.1343, "num_input_tokens_seen": 225157120, "step": 1311 }, { "epoch": 0.34484119155652004, "loss": 0.1415342092514038, "loss_ce": 0.000787146098446101, "loss_iou": 0.59375, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 225157120, "step": 1311 }, { "epoch": 0.34510422831590715, "grad_norm": 5.277811585596711, "learning_rate": 5e-06, "loss": 0.1527, "num_input_tokens_seen": 225329012, "step": 1312 }, { "epoch": 0.34510422831590715, "loss": 0.08680924028158188, "loss_ce": 0.006761634722352028, "loss_iou": 0.58203125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 225329012, "step": 1312 }, { "epoch": 0.34536726507529425, "grad_norm": 4.953551061650772, "learning_rate": 5e-06, "loss": 0.0972, "num_input_tokens_seen": 225501536, "step": 1313 }, { "epoch": 0.34536726507529425, "loss": 0.10339295864105225, "loss_ce": 0.0035394439473748207, "loss_iou": 0.451171875, "loss_num": 0.02001953125, "loss_xval": 0.099609375, "num_input_tokens_seen": 225501536, "step": 1313 }, { "epoch": 0.3456303018346814, "grad_norm": 14.376989767483847, "learning_rate": 5e-06, "loss": 0.1896, "num_input_tokens_seen": 225673976, "step": 1314 }, { "epoch": 0.3456303018346814, "loss": 0.13013647496700287, "loss_ce": 0.0019016144797205925, "loss_iou": 0.76953125, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 225673976, "step": 1314 }, { "epoch": 0.3458933385940685, "grad_norm": 4.351015648559307, "learning_rate": 5e-06, "loss": 0.1106, "num_input_tokens_seen": 225844284, "step": 1315 }, { "epoch": 0.3458933385940685, "loss": 0.07338554412126541, "loss_ce": 0.0006926720961928368, "loss_iou": 0.478515625, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 225844284, "step": 1315 }, { "epoch": 0.34615637535345567, "grad_norm": 8.704702003668412, "learning_rate": 5e-06, "loss": 0.1023, "num_input_tokens_seen": 226014724, "step": 1316 }, { "epoch": 0.34615637535345567, "loss": 0.13227002322673798, "loss_ce": 0.00031201643287204206, "loss_iou": 0.58984375, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 226014724, "step": 1316 }, { "epoch": 0.3464194121128428, "grad_norm": 9.402885662503637, "learning_rate": 5e-06, "loss": 0.1522, "num_input_tokens_seen": 226186864, "step": 1317 }, { "epoch": 0.3464194121128428, "loss": 0.13149945437908173, "loss_ce": 0.0032035536132752895, "loss_iou": 0.466796875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 226186864, "step": 1317 }, { "epoch": 0.3466824488722299, "grad_norm": 5.4441598572448475, "learning_rate": 5e-06, "loss": 0.1247, "num_input_tokens_seen": 226359284, "step": 1318 }, { "epoch": 0.3466824488722299, "loss": 0.09241662174463272, "loss_ce": 0.00010094831668538973, "loss_iou": 0.68359375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 226359284, "step": 1318 }, { "epoch": 0.34694548563161703, "grad_norm": 5.838383036724073, "learning_rate": 5e-06, "loss": 0.1544, "num_input_tokens_seen": 226531468, "step": 1319 }, { "epoch": 0.34694548563161703, "loss": 0.10588014125823975, "loss_ce": 0.00016725034220144153, "loss_iou": 0.56640625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 226531468, "step": 1319 }, { "epoch": 0.34720852239100414, "grad_norm": 6.350689517126913, "learning_rate": 5e-06, "loss": 0.1432, "num_input_tokens_seen": 226703648, "step": 1320 }, { "epoch": 0.34720852239100414, "loss": 0.11672288179397583, "loss_ce": 0.0005119539564475417, "loss_iou": 0.4765625, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 226703648, "step": 1320 }, { "epoch": 0.34747155915039124, "grad_norm": 13.847941425419814, "learning_rate": 5e-06, "loss": 0.1625, "num_input_tokens_seen": 226873276, "step": 1321 }, { "epoch": 0.34747155915039124, "loss": 0.1297488510608673, "loss_ce": 0.0002322565414942801, "loss_iou": 0.435546875, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 226873276, "step": 1321 }, { "epoch": 0.3477345959097784, "grad_norm": 23.595553939708278, "learning_rate": 5e-06, "loss": 0.1521, "num_input_tokens_seen": 227045376, "step": 1322 }, { "epoch": 0.3477345959097784, "loss": 0.1232631504535675, "loss_ce": 0.00033834436908364296, "loss_iou": 0.63671875, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 227045376, "step": 1322 }, { "epoch": 0.3479976326691655, "grad_norm": 44.36951009678579, "learning_rate": 5e-06, "loss": 0.1561, "num_input_tokens_seen": 227215620, "step": 1323 }, { "epoch": 0.3479976326691655, "loss": 0.07254654914140701, "loss_ce": 0.000280924781691283, "loss_iou": 0.51953125, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 227215620, "step": 1323 }, { "epoch": 0.34826066942855266, "grad_norm": 5.242558618670207, "learning_rate": 5e-06, "loss": 0.129, "num_input_tokens_seen": 227387716, "step": 1324 }, { "epoch": 0.34826066942855266, "loss": 0.11591322720050812, "loss_ce": 0.0015028227353468537, "loss_iou": 0.5703125, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 227387716, "step": 1324 }, { "epoch": 0.34852370618793976, "grad_norm": 6.076654000097945, "learning_rate": 5e-06, "loss": 0.1183, "num_input_tokens_seen": 227560176, "step": 1325 }, { "epoch": 0.34852370618793976, "loss": 0.09558144956827164, "loss_ce": 0.0009464399190619588, "loss_iou": 0.56640625, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 227560176, "step": 1325 }, { "epoch": 0.34878674294732687, "grad_norm": 8.445480824601002, "learning_rate": 5e-06, "loss": 0.15, "num_input_tokens_seen": 227732380, "step": 1326 }, { "epoch": 0.34878674294732687, "loss": 0.18825021386146545, "loss_ce": 0.0008112426148727536, "loss_iou": 0.376953125, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 227732380, "step": 1326 }, { "epoch": 0.349049779706714, "grad_norm": 7.063227159799308, "learning_rate": 5e-06, "loss": 0.1569, "num_input_tokens_seen": 227904472, "step": 1327 }, { "epoch": 0.349049779706714, "loss": 0.23714260756969452, "loss_ce": 0.0035610701888799667, "loss_iou": 0.466796875, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 227904472, "step": 1327 }, { "epoch": 0.34931281646610113, "grad_norm": 5.912334076761878, "learning_rate": 5e-06, "loss": 0.1626, "num_input_tokens_seen": 228076948, "step": 1328 }, { "epoch": 0.34931281646610113, "loss": 0.14180362224578857, "loss_ce": 0.0037115837913006544, "loss_iou": 0.65234375, "loss_num": 0.0277099609375, "loss_xval": 0.1376953125, "num_input_tokens_seen": 228076948, "step": 1328 }, { "epoch": 0.3495758532254883, "grad_norm": 6.00754661823903, "learning_rate": 5e-06, "loss": 0.1532, "num_input_tokens_seen": 228248972, "step": 1329 }, { "epoch": 0.3495758532254883, "loss": 0.14186972379684448, "loss_ce": 0.0010616088984534144, "loss_iou": 0.490234375, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 228248972, "step": 1329 }, { "epoch": 0.3498388899848754, "grad_norm": 5.090693648053783, "learning_rate": 5e-06, "loss": 0.1309, "num_input_tokens_seen": 228421168, "step": 1330 }, { "epoch": 0.3498388899848754, "loss": 0.08804985880851746, "loss_ce": 0.0005559585988521576, "loss_iou": 0.466796875, "loss_num": 0.017578125, "loss_xval": 0.08740234375, "num_input_tokens_seen": 228421168, "step": 1330 }, { "epoch": 0.3501019267442625, "grad_norm": 5.2360127706674335, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 228593364, "step": 1331 }, { "epoch": 0.3501019267442625, "loss": 0.09556721895933151, "loss_ce": 0.0012373843928799033, "loss_iou": 0.515625, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 228593364, "step": 1331 }, { "epoch": 0.35036496350364965, "grad_norm": 7.817751449255831, "learning_rate": 5e-06, "loss": 0.1646, "num_input_tokens_seen": 228765480, "step": 1332 }, { "epoch": 0.35036496350364965, "loss": 0.19872622191905975, "loss_ce": 0.0040851132944226265, "loss_iou": 0.427734375, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 228765480, "step": 1332 }, { "epoch": 0.35062800026303675, "grad_norm": 6.154736113026185, "learning_rate": 5e-06, "loss": 0.1352, "num_input_tokens_seen": 228938028, "step": 1333 }, { "epoch": 0.35062800026303675, "loss": 0.08339493721723557, "loss_ce": 0.0010737692937254906, "loss_iou": 0.5703125, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 228938028, "step": 1333 }, { "epoch": 0.35089103702242386, "grad_norm": 9.10695022243035, "learning_rate": 5e-06, "loss": 0.1561, "num_input_tokens_seen": 229110180, "step": 1334 }, { "epoch": 0.35089103702242386, "loss": 0.17051678895950317, "loss_ce": 0.0020597607363015413, "loss_iou": 0.65234375, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 229110180, "step": 1334 }, { "epoch": 0.351154073781811, "grad_norm": 6.861938259215169, "learning_rate": 5e-06, "loss": 0.192, "num_input_tokens_seen": 229282592, "step": 1335 }, { "epoch": 0.351154073781811, "loss": 0.20712369680404663, "loss_ce": 0.003968170844018459, "loss_iou": 0.52734375, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 229282592, "step": 1335 }, { "epoch": 0.3514171105411981, "grad_norm": 6.702358306191994, "learning_rate": 5e-06, "loss": 0.1308, "num_input_tokens_seen": 229454940, "step": 1336 }, { "epoch": 0.3514171105411981, "loss": 0.1241491287946701, "loss_ce": 0.002719694282859564, "loss_iou": 0.6015625, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 229454940, "step": 1336 }, { "epoch": 0.3516801473005853, "grad_norm": 25.54665143162104, "learning_rate": 5e-06, "loss": 0.1782, "num_input_tokens_seen": 229626740, "step": 1337 }, { "epoch": 0.3516801473005853, "loss": 0.13124999403953552, "loss_ce": 0.0006042490131221712, "loss_iou": 0.50390625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 229626740, "step": 1337 }, { "epoch": 0.3519431840599724, "grad_norm": 10.986092691383933, "learning_rate": 5e-06, "loss": 0.1479, "num_input_tokens_seen": 229795396, "step": 1338 }, { "epoch": 0.3519431840599724, "loss": 0.07223416119813919, "loss_ce": 0.00028897292213514447, "loss_iou": 0.60546875, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 229795396, "step": 1338 }, { "epoch": 0.3522062208193595, "grad_norm": 9.146823823601064, "learning_rate": 5e-06, "loss": 0.1496, "num_input_tokens_seen": 229967592, "step": 1339 }, { "epoch": 0.3522062208193595, "loss": 0.14658141136169434, "loss_ce": 0.00021910574287176132, "loss_iou": 0.58203125, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 229967592, "step": 1339 }, { "epoch": 0.35246925757874664, "grad_norm": 12.31823820919867, "learning_rate": 5e-06, "loss": 0.1059, "num_input_tokens_seen": 230139960, "step": 1340 }, { "epoch": 0.35246925757874664, "loss": 0.07799485325813293, "loss_ce": 0.0002360617509111762, "loss_iou": 0.6328125, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 230139960, "step": 1340 }, { "epoch": 0.35273229433813375, "grad_norm": 6.277536451715721, "learning_rate": 5e-06, "loss": 0.1745, "num_input_tokens_seen": 230312536, "step": 1341 }, { "epoch": 0.35273229433813375, "loss": 0.15891912579536438, "loss_ce": 0.0026691171806305647, "loss_iou": 0.5625, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 230312536, "step": 1341 }, { "epoch": 0.3529953310975209, "grad_norm": 10.816707100229003, "learning_rate": 5e-06, "loss": 0.1205, "num_input_tokens_seen": 230482960, "step": 1342 }, { "epoch": 0.3529953310975209, "loss": 0.10626394301652908, "loss_ce": 0.0003069115919061005, "loss_iou": 0.64453125, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 230482960, "step": 1342 }, { "epoch": 0.353258367856908, "grad_norm": 7.461204110486792, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 230655564, "step": 1343 }, { "epoch": 0.353258367856908, "loss": 0.14832650125026703, "loss_ce": 0.005412681493908167, "loss_iou": 0.431640625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 230655564, "step": 1343 }, { "epoch": 0.3535214046162951, "grad_norm": 3.829230892926663, "learning_rate": 5e-06, "loss": 0.0983, "num_input_tokens_seen": 230827776, "step": 1344 }, { "epoch": 0.3535214046162951, "loss": 0.11698116362094879, "loss_ce": 0.0007397143635898829, "loss_iou": 0.59765625, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 230827776, "step": 1344 }, { "epoch": 0.35378444137568227, "grad_norm": 7.33327936841338, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 230999976, "step": 1345 }, { "epoch": 0.35378444137568227, "loss": 0.18063044548034668, "loss_ce": 0.005978355184197426, "loss_iou": 0.41796875, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 230999976, "step": 1345 }, { "epoch": 0.35404747813506937, "grad_norm": 4.588965012769037, "learning_rate": 5e-06, "loss": 0.1907, "num_input_tokens_seen": 231171944, "step": 1346 }, { "epoch": 0.35404747813506937, "loss": 0.19280412793159485, "loss_ce": 0.0011537342797964811, "loss_iou": 0.60546875, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 231171944, "step": 1346 }, { "epoch": 0.3543105148944565, "grad_norm": 15.486355760387976, "learning_rate": 5e-06, "loss": 0.1488, "num_input_tokens_seen": 231342336, "step": 1347 }, { "epoch": 0.3543105148944565, "loss": 0.11234519630670547, "loss_ce": 0.0008644815534353256, "loss_iou": 0.49609375, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 231342336, "step": 1347 }, { "epoch": 0.35457355165384363, "grad_norm": 17.3485969876628, "learning_rate": 5e-06, "loss": 0.1319, "num_input_tokens_seen": 231514328, "step": 1348 }, { "epoch": 0.35457355165384363, "loss": 0.12869834899902344, "loss_ce": 0.0009517711587250233, "loss_iou": 0.68359375, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 231514328, "step": 1348 }, { "epoch": 0.35483658841323074, "grad_norm": 19.41812876236682, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 231686780, "step": 1349 }, { "epoch": 0.35483658841323074, "loss": 0.13225057721138, "loss_ce": 0.0008724014624021947, "loss_iou": 0.5625, "loss_num": 0.0262451171875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 231686780, "step": 1349 }, { "epoch": 0.3550996251726179, "grad_norm": 10.841969185593292, "learning_rate": 5e-06, "loss": 0.1386, "num_input_tokens_seen": 231858892, "step": 1350 }, { "epoch": 0.3550996251726179, "loss": 0.1675529032945633, "loss_ce": 0.00019450299441814423, "loss_iou": 0.6875, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 231858892, "step": 1350 }, { "epoch": 0.355362661932005, "grad_norm": 9.874209565962254, "learning_rate": 5e-06, "loss": 0.1595, "num_input_tokens_seen": 232029240, "step": 1351 }, { "epoch": 0.355362661932005, "loss": 0.13038085401058197, "loss_ce": 0.00037596753099933267, "loss_iou": 0.63671875, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 232029240, "step": 1351 }, { "epoch": 0.3556256986913921, "grad_norm": 6.572064767991026, "learning_rate": 5e-06, "loss": 0.1609, "num_input_tokens_seen": 232199320, "step": 1352 }, { "epoch": 0.3556256986913921, "loss": 0.11705964803695679, "loss_ce": 0.00029938769876025617, "loss_iou": 0.439453125, "loss_num": 0.0234375, "loss_xval": 0.11669921875, "num_input_tokens_seen": 232199320, "step": 1352 }, { "epoch": 0.35588873545077926, "grad_norm": 19.449170559601413, "learning_rate": 5e-06, "loss": 0.129, "num_input_tokens_seen": 232371336, "step": 1353 }, { "epoch": 0.35588873545077926, "loss": 0.10365074872970581, "loss_ce": 0.00291221821680665, "loss_iou": 0.470703125, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 232371336, "step": 1353 }, { "epoch": 0.35615177221016636, "grad_norm": 6.060818585296291, "learning_rate": 5e-06, "loss": 0.1758, "num_input_tokens_seen": 232543572, "step": 1354 }, { "epoch": 0.35615177221016636, "loss": 0.22082458436489105, "loss_ce": 0.005858768709003925, "loss_iou": 0.56640625, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 232543572, "step": 1354 }, { "epoch": 0.3564148089695535, "grad_norm": 5.2405872305191386, "learning_rate": 5e-06, "loss": 0.1508, "num_input_tokens_seen": 232715652, "step": 1355 }, { "epoch": 0.3564148089695535, "loss": 0.1176377683877945, "loss_ce": 0.001854072092100978, "loss_iou": 0.55859375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 232715652, "step": 1355 }, { "epoch": 0.3566778457289406, "grad_norm": 9.250810729782357, "learning_rate": 5e-06, "loss": 0.1583, "num_input_tokens_seen": 232888076, "step": 1356 }, { "epoch": 0.3566778457289406, "loss": 0.2215210646390915, "loss_ce": 0.0008179338765330613, "loss_iou": 0.59375, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 232888076, "step": 1356 }, { "epoch": 0.3569408824883277, "grad_norm": 5.562200706936175, "learning_rate": 5e-06, "loss": 0.1815, "num_input_tokens_seen": 233060156, "step": 1357 }, { "epoch": 0.3569408824883277, "loss": 0.15043510496616364, "loss_ce": 0.0006548258243128657, "loss_iou": 0.48828125, "loss_num": 0.030029296875, "loss_xval": 0.1494140625, "num_input_tokens_seen": 233060156, "step": 1357 }, { "epoch": 0.3572039192477149, "grad_norm": 5.308030458771483, "learning_rate": 5e-06, "loss": 0.0933, "num_input_tokens_seen": 233232200, "step": 1358 }, { "epoch": 0.3572039192477149, "loss": 0.09737985581159592, "loss_ce": 0.0021955338306725025, "loss_iou": 0.482421875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 233232200, "step": 1358 }, { "epoch": 0.357466956007102, "grad_norm": 22.688298388253326, "learning_rate": 5e-06, "loss": 0.1826, "num_input_tokens_seen": 233402524, "step": 1359 }, { "epoch": 0.357466956007102, "loss": 0.13730812072753906, "loss_ce": 0.000497827713843435, "loss_iou": 0.578125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 233402524, "step": 1359 }, { "epoch": 0.3577299927664891, "grad_norm": 7.149196234866028, "learning_rate": 5e-06, "loss": 0.1397, "num_input_tokens_seen": 233574352, "step": 1360 }, { "epoch": 0.3577299927664891, "loss": 0.13957476615905762, "loss_ce": 0.002062564715743065, "loss_iou": 0.6328125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 233574352, "step": 1360 }, { "epoch": 0.35799302952587625, "grad_norm": 4.747458347619557, "learning_rate": 5e-06, "loss": 0.12, "num_input_tokens_seen": 233746476, "step": 1361 }, { "epoch": 0.35799302952587625, "loss": 0.09390648454427719, "loss_ce": 0.001377185108140111, "loss_iou": 0.44921875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 233746476, "step": 1361 }, { "epoch": 0.35825606628526335, "grad_norm": 4.140560580446189, "learning_rate": 5e-06, "loss": 0.1012, "num_input_tokens_seen": 233918696, "step": 1362 }, { "epoch": 0.35825606628526335, "loss": 0.10030673444271088, "loss_ce": 0.0024216112215071917, "loss_iou": 0.5625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 233918696, "step": 1362 }, { "epoch": 0.3585191030446505, "grad_norm": 5.484628612516693, "learning_rate": 5e-06, "loss": 0.1618, "num_input_tokens_seen": 234090660, "step": 1363 }, { "epoch": 0.3585191030446505, "loss": 0.1829943060874939, "loss_ce": 0.001201094826683402, "loss_iou": 0.443359375, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 234090660, "step": 1363 }, { "epoch": 0.3587821398040376, "grad_norm": 7.875545161330649, "learning_rate": 5e-06, "loss": 0.0939, "num_input_tokens_seen": 234262444, "step": 1364 }, { "epoch": 0.3587821398040376, "loss": 0.10195118188858032, "loss_ce": 0.00035817097523249686, "loss_iou": 0.482421875, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 234262444, "step": 1364 }, { "epoch": 0.3590451765634247, "grad_norm": 5.112293535327179, "learning_rate": 5e-06, "loss": 0.1759, "num_input_tokens_seen": 234434852, "step": 1365 }, { "epoch": 0.3590451765634247, "loss": 0.12388080358505249, "loss_ce": 0.0015663461526855826, "loss_iou": 0.5625, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 234434852, "step": 1365 }, { "epoch": 0.3593082133228119, "grad_norm": 7.038268029638918, "learning_rate": 5e-06, "loss": 0.1712, "num_input_tokens_seen": 234607044, "step": 1366 }, { "epoch": 0.3593082133228119, "loss": 0.10762982070446014, "loss_ce": 0.0011539864353835583, "loss_iou": 0.6328125, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 234607044, "step": 1366 }, { "epoch": 0.359571250082199, "grad_norm": 23.600297531220633, "learning_rate": 5e-06, "loss": 0.1188, "num_input_tokens_seen": 234779280, "step": 1367 }, { "epoch": 0.359571250082199, "loss": 0.06237838417291641, "loss_ce": 0.0010838248999789357, "loss_iou": 0.470703125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 234779280, "step": 1367 }, { "epoch": 0.35983428684158614, "grad_norm": 3.7489621345618547, "learning_rate": 5e-06, "loss": 0.1625, "num_input_tokens_seen": 234951544, "step": 1368 }, { "epoch": 0.35983428684158614, "loss": 0.09055154025554657, "loss_ce": 0.0044309417717158794, "loss_iou": 0.609375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 234951544, "step": 1368 }, { "epoch": 0.36009732360097324, "grad_norm": 11.577809346393625, "learning_rate": 5e-06, "loss": 0.1449, "num_input_tokens_seen": 235124044, "step": 1369 }, { "epoch": 0.36009732360097324, "loss": 0.10378709435462952, "loss_ce": 0.0021635598968714476, "loss_iou": 0.57421875, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 235124044, "step": 1369 }, { "epoch": 0.36036036036036034, "grad_norm": 12.169579645431945, "learning_rate": 5e-06, "loss": 0.1989, "num_input_tokens_seen": 235295776, "step": 1370 }, { "epoch": 0.36036036036036034, "loss": 0.26028677821159363, "loss_ce": 0.00123831897508353, "loss_iou": 0.4140625, "loss_num": 0.0517578125, "loss_xval": 0.259765625, "num_input_tokens_seen": 235295776, "step": 1370 }, { "epoch": 0.3606233971197475, "grad_norm": 6.068163006201439, "learning_rate": 5e-06, "loss": 0.1286, "num_input_tokens_seen": 235467876, "step": 1371 }, { "epoch": 0.3606233971197475, "loss": 0.12217633426189423, "loss_ce": 0.0007163715199567378, "loss_iou": 0.55078125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 235467876, "step": 1371 }, { "epoch": 0.3608864338791346, "grad_norm": 5.0641588944194575, "learning_rate": 5e-06, "loss": 0.15, "num_input_tokens_seen": 235640260, "step": 1372 }, { "epoch": 0.3608864338791346, "loss": 0.13001395761966705, "loss_ce": 0.003243940882384777, "loss_iou": 0.5546875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 235640260, "step": 1372 }, { "epoch": 0.3611494706385217, "grad_norm": 5.364003970603183, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 235812276, "step": 1373 }, { "epoch": 0.3611494706385217, "loss": 0.1422898769378662, "loss_ce": 0.002214185893535614, "loss_iou": 0.57421875, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 235812276, "step": 1373 }, { "epoch": 0.36141250739790887, "grad_norm": 20.072012728606875, "learning_rate": 5e-06, "loss": 0.1562, "num_input_tokens_seen": 235984384, "step": 1374 }, { "epoch": 0.36141250739790887, "loss": 0.1751733273267746, "loss_ce": 0.0007043338264338672, "loss_iou": NaN, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 235984384, "step": 1374 }, { "epoch": 0.36167554415729597, "grad_norm": 9.286393703464922, "learning_rate": 5e-06, "loss": 0.1541, "num_input_tokens_seen": 236156884, "step": 1375 }, { "epoch": 0.36167554415729597, "loss": 0.21126675605773926, "loss_ce": 0.0010006362572312355, "loss_iou": 0.431640625, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 236156884, "step": 1375 }, { "epoch": 0.36193858091668313, "grad_norm": 11.903565521275382, "learning_rate": 5e-06, "loss": 0.1448, "num_input_tokens_seen": 236328968, "step": 1376 }, { "epoch": 0.36193858091668313, "loss": 0.12723658978939056, "loss_ce": 0.0002834574261214584, "loss_iou": 0.57421875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 236328968, "step": 1376 }, { "epoch": 0.36220161767607023, "grad_norm": 16.599820050329747, "learning_rate": 5e-06, "loss": 0.115, "num_input_tokens_seen": 236501060, "step": 1377 }, { "epoch": 0.36220161767607023, "loss": 0.11763446778059006, "loss_ce": 0.0014235277194529772, "loss_iou": 0.57421875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 236501060, "step": 1377 }, { "epoch": 0.36246465443545733, "grad_norm": 12.84169775199896, "learning_rate": 5e-06, "loss": 0.1592, "num_input_tokens_seen": 236673348, "step": 1378 }, { "epoch": 0.36246465443545733, "loss": 0.1577835977077484, "loss_ce": 0.00424966961145401, "loss_iou": 0.703125, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 236673348, "step": 1378 }, { "epoch": 0.3627276911948445, "grad_norm": 4.530082641105361, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 236845952, "step": 1379 }, { "epoch": 0.3627276911948445, "loss": 0.16972720623016357, "loss_ce": 0.0046881334856152534, "loss_iou": 0.546875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 236845952, "step": 1379 }, { "epoch": 0.3629907279542316, "grad_norm": 4.453547029103764, "learning_rate": 5e-06, "loss": 0.1384, "num_input_tokens_seen": 237016412, "step": 1380 }, { "epoch": 0.3629907279542316, "loss": 0.1337898075580597, "loss_ce": 0.0024726560804992914, "loss_iou": 0.58203125, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 237016412, "step": 1380 }, { "epoch": 0.36325376471361875, "grad_norm": 14.926786019286814, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 237188648, "step": 1381 }, { "epoch": 0.36325376471361875, "loss": 0.09095098078250885, "loss_ce": 0.0004968784051015973, "loss_iou": 0.59765625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 237188648, "step": 1381 }, { "epoch": 0.36351680147300586, "grad_norm": 5.283936723228349, "learning_rate": 5e-06, "loss": 0.1558, "num_input_tokens_seen": 237360968, "step": 1382 }, { "epoch": 0.36351680147300586, "loss": 0.10726694762706757, "loss_ce": 0.002011828124523163, "loss_iou": 0.6640625, "loss_num": 0.02099609375, "loss_xval": 0.10546875, "num_input_tokens_seen": 237360968, "step": 1382 }, { "epoch": 0.36377983823239296, "grad_norm": 6.9353605673209175, "learning_rate": 5e-06, "loss": 0.1229, "num_input_tokens_seen": 237533240, "step": 1383 }, { "epoch": 0.36377983823239296, "loss": 0.12320241332054138, "loss_ce": 0.0009337374940514565, "loss_iou": 0.5390625, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 237533240, "step": 1383 }, { "epoch": 0.3640428749917801, "grad_norm": 7.872299034496559, "learning_rate": 5e-06, "loss": 0.1189, "num_input_tokens_seen": 237705412, "step": 1384 }, { "epoch": 0.3640428749917801, "loss": 0.10494999587535858, "loss_ce": 0.0007019541808404028, "loss_iou": 0.5703125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 237705412, "step": 1384 }, { "epoch": 0.3643059117511672, "grad_norm": 9.299219065335537, "learning_rate": 5e-06, "loss": 0.1322, "num_input_tokens_seen": 237875984, "step": 1385 }, { "epoch": 0.3643059117511672, "loss": 0.09333762526512146, "loss_ce": 0.0001979749504243955, "loss_iou": 0.6171875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 237875984, "step": 1385 }, { "epoch": 0.3645689485105543, "grad_norm": 4.989443397895941, "learning_rate": 5e-06, "loss": 0.1464, "num_input_tokens_seen": 238048268, "step": 1386 }, { "epoch": 0.3645689485105543, "loss": 0.14278042316436768, "loss_ce": 0.0023690357338637114, "loss_iou": 0.55078125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 238048268, "step": 1386 }, { "epoch": 0.3648319852699415, "grad_norm": 12.398705819599233, "learning_rate": 5e-06, "loss": 0.1595, "num_input_tokens_seen": 238218704, "step": 1387 }, { "epoch": 0.3648319852699415, "loss": 0.18032394349575043, "loss_ce": 0.000819545122794807, "loss_iou": 0.5703125, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 238218704, "step": 1387 }, { "epoch": 0.3650950220293286, "grad_norm": 6.66114485908042, "learning_rate": 5e-06, "loss": 0.1414, "num_input_tokens_seen": 238390452, "step": 1388 }, { "epoch": 0.3650950220293286, "loss": 0.23675096035003662, "loss_ce": 0.0027726897969841957, "loss_iou": 0.39453125, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 238390452, "step": 1388 }, { "epoch": 0.36535805878871574, "grad_norm": 9.52863817159708, "learning_rate": 5e-06, "loss": 0.1254, "num_input_tokens_seen": 238562708, "step": 1389 }, { "epoch": 0.36535805878871574, "loss": 0.10956455767154694, "loss_ce": 0.0007693836814723909, "loss_iou": 0.6171875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 238562708, "step": 1389 }, { "epoch": 0.36562109554810285, "grad_norm": 14.33207869070322, "learning_rate": 5e-06, "loss": 0.1554, "num_input_tokens_seen": 238734768, "step": 1390 }, { "epoch": 0.36562109554810285, "loss": 0.11770792305469513, "loss_ce": 0.00018472480587661266, "loss_iou": 0.71484375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 238734768, "step": 1390 }, { "epoch": 0.36588413230748995, "grad_norm": 6.704869940759653, "learning_rate": 5e-06, "loss": 0.1267, "num_input_tokens_seen": 238906944, "step": 1391 }, { "epoch": 0.36588413230748995, "loss": 0.12555649876594543, "loss_ce": 0.0015330680180341005, "loss_iou": 0.6953125, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 238906944, "step": 1391 }, { "epoch": 0.3661471690668771, "grad_norm": 7.947535402036419, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 239078728, "step": 1392 }, { "epoch": 0.3661471690668771, "loss": 0.12712188065052032, "loss_ce": 0.0004739244468510151, "loss_iou": 0.40234375, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 239078728, "step": 1392 }, { "epoch": 0.3664102058262642, "grad_norm": 21.67277442987669, "learning_rate": 5e-06, "loss": 0.1626, "num_input_tokens_seen": 239251004, "step": 1393 }, { "epoch": 0.3664102058262642, "loss": 0.16826726496219635, "loss_ce": 0.0013361112214624882, "loss_iou": 0.58984375, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 239251004, "step": 1393 }, { "epoch": 0.36667324258565137, "grad_norm": 10.958283524277078, "learning_rate": 5e-06, "loss": 0.0975, "num_input_tokens_seen": 239418772, "step": 1394 }, { "epoch": 0.36667324258565137, "loss": 0.05772838741540909, "loss_ce": 0.0009962135227397084, "loss_iou": 0.6171875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 239418772, "step": 1394 }, { "epoch": 0.3669362793450385, "grad_norm": 6.917710662760069, "learning_rate": 5e-06, "loss": 0.167, "num_input_tokens_seen": 239591128, "step": 1395 }, { "epoch": 0.3669362793450385, "loss": 0.09961553663015366, "loss_ce": 0.001104797120206058, "loss_iou": 0.54296875, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 239591128, "step": 1395 }, { "epoch": 0.3671993161044256, "grad_norm": 15.902726176045867, "learning_rate": 5e-06, "loss": 0.1675, "num_input_tokens_seen": 239763344, "step": 1396 }, { "epoch": 0.3671993161044256, "loss": 0.32926326990127563, "loss_ce": 0.011209084652364254, "loss_iou": 0.59765625, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 239763344, "step": 1396 }, { "epoch": 0.36746235286381274, "grad_norm": 13.284413053795395, "learning_rate": 5e-06, "loss": 0.1494, "num_input_tokens_seen": 239933592, "step": 1397 }, { "epoch": 0.36746235286381274, "loss": 0.11202233284711838, "loss_ce": 0.004936150275170803, "loss_iou": 0.5625, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 239933592, "step": 1397 }, { "epoch": 0.36772538962319984, "grad_norm": 4.642604596899854, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 240106096, "step": 1398 }, { "epoch": 0.36772538962319984, "loss": 0.12524864077568054, "loss_ce": 0.0025679690297693014, "loss_iou": 0.6640625, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 240106096, "step": 1398 }, { "epoch": 0.36798842638258694, "grad_norm": 6.5389674494027, "learning_rate": 5e-06, "loss": 0.1251, "num_input_tokens_seen": 240278216, "step": 1399 }, { "epoch": 0.36798842638258694, "loss": 0.17912393808364868, "loss_ce": 0.002244053641334176, "loss_iou": 0.53125, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 240278216, "step": 1399 }, { "epoch": 0.3682514631419741, "grad_norm": 6.524251878336125, "learning_rate": 5e-06, "loss": 0.1441, "num_input_tokens_seen": 240450288, "step": 1400 }, { "epoch": 0.3682514631419741, "loss": 0.1508733034133911, "loss_ce": 0.00200856477022171, "loss_iou": 0.57421875, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 240450288, "step": 1400 }, { "epoch": 0.3685144999013612, "grad_norm": 7.825263953291389, "learning_rate": 5e-06, "loss": 0.1443, "num_input_tokens_seen": 240619860, "step": 1401 }, { "epoch": 0.3685144999013612, "loss": 0.21069373190402985, "loss_ce": 0.002014526631683111, "loss_iou": 0.5234375, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 240619860, "step": 1401 }, { "epoch": 0.36877753666074836, "grad_norm": 21.746555075327876, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 240792004, "step": 1402 }, { "epoch": 0.36877753666074836, "loss": 0.10193300247192383, "loss_ce": 0.003086569719016552, "loss_iou": 0.546875, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 240792004, "step": 1402 }, { "epoch": 0.36904057342013546, "grad_norm": 13.416067582793307, "learning_rate": 5e-06, "loss": 0.1275, "num_input_tokens_seen": 240963952, "step": 1403 }, { "epoch": 0.36904057342013546, "loss": 0.14882555603981018, "loss_ce": 0.0034703421406447887, "loss_iou": 0.482421875, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 240963952, "step": 1403 }, { "epoch": 0.36930361017952257, "grad_norm": 7.0351108134980604, "learning_rate": 5e-06, "loss": 0.1138, "num_input_tokens_seen": 241135992, "step": 1404 }, { "epoch": 0.36930361017952257, "loss": 0.16983582079410553, "loss_ce": 0.004186400678008795, "loss_iou": 0.6328125, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 241135992, "step": 1404 }, { "epoch": 0.3695666469389097, "grad_norm": 10.37111011550267, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 241306532, "step": 1405 }, { "epoch": 0.3695666469389097, "loss": 0.13351476192474365, "loss_ce": 0.0033267755061388016, "loss_iou": 0.5078125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 241306532, "step": 1405 }, { "epoch": 0.36982968369829683, "grad_norm": 4.1471191240598, "learning_rate": 5e-06, "loss": 0.1442, "num_input_tokens_seen": 241478648, "step": 1406 }, { "epoch": 0.36982968369829683, "loss": 0.08705037832260132, "loss_ce": 0.0007466700626537204, "loss_iou": 0.52734375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 241478648, "step": 1406 }, { "epoch": 0.370092720457684, "grad_norm": 4.611801393612808, "learning_rate": 5e-06, "loss": 0.162, "num_input_tokens_seen": 241650796, "step": 1407 }, { "epoch": 0.370092720457684, "loss": 0.22668591141700745, "loss_ce": 0.0017713564448058605, "loss_iou": 0.49609375, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 241650796, "step": 1407 }, { "epoch": 0.3703557572170711, "grad_norm": 8.388794391473104, "learning_rate": 5e-06, "loss": 0.1379, "num_input_tokens_seen": 241822848, "step": 1408 }, { "epoch": 0.3703557572170711, "loss": 0.11565081030130386, "loss_ce": 0.006123221945017576, "loss_iou": 0.48828125, "loss_num": 0.02197265625, "loss_xval": 0.109375, "num_input_tokens_seen": 241822848, "step": 1408 }, { "epoch": 0.3706187939764582, "grad_norm": 4.295439998788119, "learning_rate": 5e-06, "loss": 0.1396, "num_input_tokens_seen": 241994956, "step": 1409 }, { "epoch": 0.3706187939764582, "loss": 0.19000545144081116, "loss_ce": 0.0015594041906297207, "loss_iou": 0.59375, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 241994956, "step": 1409 }, { "epoch": 0.37088183073584535, "grad_norm": 5.005427734613651, "learning_rate": 5e-06, "loss": 0.1313, "num_input_tokens_seen": 242167240, "step": 1410 }, { "epoch": 0.37088183073584535, "loss": 0.10752324759960175, "loss_ce": 0.00040654875920154154, "loss_iou": 0.609375, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 242167240, "step": 1410 }, { "epoch": 0.37114486749523246, "grad_norm": 5.984568060178908, "learning_rate": 5e-06, "loss": 0.1437, "num_input_tokens_seen": 242339532, "step": 1411 }, { "epoch": 0.37114486749523246, "loss": 0.14311346411705017, "loss_ce": 0.0001386186049785465, "loss_iou": 0.47265625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 242339532, "step": 1411 }, { "epoch": 0.37140790425461956, "grad_norm": 43.56689122924667, "learning_rate": 5e-06, "loss": 0.1338, "num_input_tokens_seen": 242511708, "step": 1412 }, { "epoch": 0.37140790425461956, "loss": 0.127020463347435, "loss_ce": 0.0014406184200197458, "loss_iou": 0.52734375, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 242511708, "step": 1412 }, { "epoch": 0.3716709410140067, "grad_norm": 8.230300180002782, "learning_rate": 5e-06, "loss": 0.1371, "num_input_tokens_seen": 242684000, "step": 1413 }, { "epoch": 0.3716709410140067, "loss": 0.1775522530078888, "loss_ce": 0.000489264028146863, "loss_iou": 0.62109375, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 242684000, "step": 1413 }, { "epoch": 0.3719339777733938, "grad_norm": 7.623956423887346, "learning_rate": 5e-06, "loss": 0.1156, "num_input_tokens_seen": 242856400, "step": 1414 }, { "epoch": 0.3719339777733938, "loss": 0.172633096575737, "loss_ce": 0.0017346586100757122, "loss_iou": 0.60546875, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 242856400, "step": 1414 }, { "epoch": 0.372197014532781, "grad_norm": 5.086262256381093, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 243028760, "step": 1415 }, { "epoch": 0.372197014532781, "loss": 0.11333785206079483, "loss_ce": 0.0045731994323432446, "loss_iou": 0.5546875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 243028760, "step": 1415 }, { "epoch": 0.3724600512921681, "grad_norm": 7.319953499179629, "learning_rate": 5e-06, "loss": 0.1736, "num_input_tokens_seen": 243200888, "step": 1416 }, { "epoch": 0.3724600512921681, "loss": 0.12842552363872528, "loss_ce": 0.0017775753512978554, "loss_iou": 0.69921875, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 243200888, "step": 1416 }, { "epoch": 0.3727230880515552, "grad_norm": 7.14806250434601, "learning_rate": 5e-06, "loss": 0.1676, "num_input_tokens_seen": 243373224, "step": 1417 }, { "epoch": 0.3727230880515552, "loss": 0.17957568168640137, "loss_ce": 0.004038581624627113, "loss_iou": 0.4765625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 243373224, "step": 1417 }, { "epoch": 0.37298612481094234, "grad_norm": 10.549476863868529, "learning_rate": 5e-06, "loss": 0.1337, "num_input_tokens_seen": 243545548, "step": 1418 }, { "epoch": 0.37298612481094234, "loss": 0.14556309580802917, "loss_ce": 0.0017947773449122906, "loss_iou": 0.61328125, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 243545548, "step": 1418 }, { "epoch": 0.37324916157032945, "grad_norm": 10.0643806609024, "learning_rate": 5e-06, "loss": 0.1452, "num_input_tokens_seen": 243715796, "step": 1419 }, { "epoch": 0.37324916157032945, "loss": 0.13023152947425842, "loss_ce": 0.0014473494375124574, "loss_iou": 0.5703125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 243715796, "step": 1419 }, { "epoch": 0.3735121983297166, "grad_norm": 6.667777106999036, "learning_rate": 5e-06, "loss": 0.1642, "num_input_tokens_seen": 243886244, "step": 1420 }, { "epoch": 0.3735121983297166, "loss": 0.21719014644622803, "loss_ce": 0.0024379536043852568, "loss_iou": 0.40625, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 243886244, "step": 1420 }, { "epoch": 0.3737752350891037, "grad_norm": 6.203781395805212, "learning_rate": 5e-06, "loss": 0.1106, "num_input_tokens_seen": 244058256, "step": 1421 }, { "epoch": 0.3737752350891037, "loss": 0.14644312858581543, "loss_ce": 0.003773454576730728, "loss_iou": 0.50390625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 244058256, "step": 1421 }, { "epoch": 0.3740382718484908, "grad_norm": 11.121295779991378, "learning_rate": 5e-06, "loss": 0.162, "num_input_tokens_seen": 244230348, "step": 1422 }, { "epoch": 0.3740382718484908, "loss": 0.13860073685646057, "loss_ce": 0.004567527212202549, "loss_iou": 0.41796875, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 244230348, "step": 1422 }, { "epoch": 0.37430130860787797, "grad_norm": 8.262433746875137, "learning_rate": 5e-06, "loss": 0.1552, "num_input_tokens_seen": 244402756, "step": 1423 }, { "epoch": 0.37430130860787797, "loss": 0.17389166355133057, "loss_ce": 0.0005212996620684862, "loss_iou": 0.36328125, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 244402756, "step": 1423 }, { "epoch": 0.3745643453672651, "grad_norm": 4.574844176213277, "learning_rate": 5e-06, "loss": 0.1272, "num_input_tokens_seen": 244575028, "step": 1424 }, { "epoch": 0.3745643453672651, "loss": 0.2532828748226166, "loss_ce": 0.002657280070707202, "loss_iou": 0.51171875, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 244575028, "step": 1424 }, { "epoch": 0.3748273821266522, "grad_norm": 4.771481627209834, "learning_rate": 5e-06, "loss": 0.1401, "num_input_tokens_seen": 244745344, "step": 1425 }, { "epoch": 0.3748273821266522, "loss": 0.1295078694820404, "loss_ce": 0.004813054576516151, "loss_iou": 0.5390625, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 244745344, "step": 1425 }, { "epoch": 0.37509041888603933, "grad_norm": 9.163217114560078, "learning_rate": 5e-06, "loss": 0.1447, "num_input_tokens_seen": 244917724, "step": 1426 }, { "epoch": 0.37509041888603933, "loss": 0.08564335107803345, "loss_ce": 0.004375034011900425, "loss_iou": 0.52734375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 244917724, "step": 1426 }, { "epoch": 0.37535345564542644, "grad_norm": 4.314222759697504, "learning_rate": 5e-06, "loss": 0.1353, "num_input_tokens_seen": 245089960, "step": 1427 }, { "epoch": 0.37535345564542644, "loss": 0.1637798547744751, "loss_ce": 0.004539141897112131, "loss_iou": 0.55859375, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 245089960, "step": 1427 }, { "epoch": 0.3756164924048136, "grad_norm": 4.131046733957264, "learning_rate": 5e-06, "loss": 0.0983, "num_input_tokens_seen": 245261980, "step": 1428 }, { "epoch": 0.3756164924048136, "loss": 0.07748128473758698, "loss_ce": 0.0020113117061555386, "loss_iou": 0.4296875, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 245261980, "step": 1428 }, { "epoch": 0.3758795291642007, "grad_norm": 5.689319334005767, "learning_rate": 5e-06, "loss": 0.1536, "num_input_tokens_seen": 245434416, "step": 1429 }, { "epoch": 0.3758795291642007, "loss": 0.11119981110095978, "loss_ce": 0.0007566966232843697, "loss_iou": 0.6015625, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 245434416, "step": 1429 }, { "epoch": 0.3761425659235878, "grad_norm": 20.10795595759354, "learning_rate": 5e-06, "loss": 0.1404, "num_input_tokens_seen": 245606636, "step": 1430 }, { "epoch": 0.3761425659235878, "loss": 0.12385141104459763, "loss_ce": 0.005015961825847626, "loss_iou": 0.474609375, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 245606636, "step": 1430 }, { "epoch": 0.37640560268297496, "grad_norm": 5.445619300828039, "learning_rate": 5e-06, "loss": 0.1253, "num_input_tokens_seen": 245778556, "step": 1431 }, { "epoch": 0.37640560268297496, "loss": 0.09182294458150864, "loss_ce": 0.0022538499906659126, "loss_iou": 0.609375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 245778556, "step": 1431 }, { "epoch": 0.37666863944236206, "grad_norm": 6.5911007721042045, "learning_rate": 5e-06, "loss": 0.1339, "num_input_tokens_seen": 245950596, "step": 1432 }, { "epoch": 0.37666863944236206, "loss": 0.11425422877073288, "loss_ce": 0.002651446033269167, "loss_iou": 0.6328125, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 245950596, "step": 1432 }, { "epoch": 0.3769316762017492, "grad_norm": 5.727028044767281, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 246120388, "step": 1433 }, { "epoch": 0.3769316762017492, "loss": 0.10625661909580231, "loss_ce": 0.0015508129727095366, "loss_iou": 0.58203125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 246120388, "step": 1433 }, { "epoch": 0.3771947129611363, "grad_norm": 11.215881906654301, "learning_rate": 5e-06, "loss": 0.138, "num_input_tokens_seen": 246292408, "step": 1434 }, { "epoch": 0.3771947129611363, "loss": 0.1351088285446167, "loss_ce": 0.0014418261125683784, "loss_iou": 0.412109375, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 246292408, "step": 1434 }, { "epoch": 0.37745774972052343, "grad_norm": 3.9647228124096485, "learning_rate": 5e-06, "loss": 0.1515, "num_input_tokens_seen": 246464396, "step": 1435 }, { "epoch": 0.37745774972052343, "loss": 0.12623311579227448, "loss_ce": 0.00034810492070391774, "loss_iou": 0.31640625, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 246464396, "step": 1435 }, { "epoch": 0.3777207864799106, "grad_norm": 3.981981545629712, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 246636468, "step": 1436 }, { "epoch": 0.3777207864799106, "loss": 0.08713729679584503, "loss_ce": 0.0009556564036756754, "loss_iou": 0.416015625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 246636468, "step": 1436 }, { "epoch": 0.3779838232392977, "grad_norm": 8.169176906981303, "learning_rate": 5e-06, "loss": 0.1047, "num_input_tokens_seen": 246808440, "step": 1437 }, { "epoch": 0.3779838232392977, "loss": 0.10194505006074905, "loss_ce": 0.005204326473176479, "loss_iou": 0.4453125, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 246808440, "step": 1437 }, { "epoch": 0.3782468599986848, "grad_norm": 11.188980116706098, "learning_rate": 5e-06, "loss": 0.1187, "num_input_tokens_seen": 246980788, "step": 1438 }, { "epoch": 0.3782468599986848, "loss": 0.19040054082870483, "loss_ce": 0.0052198669873178005, "loss_iou": 0.462890625, "loss_num": 0.036865234375, "loss_xval": 0.185546875, "num_input_tokens_seen": 246980788, "step": 1438 }, { "epoch": 0.37850989675807195, "grad_norm": 7.739931791211071, "learning_rate": 5e-06, "loss": 0.1058, "num_input_tokens_seen": 247152940, "step": 1439 }, { "epoch": 0.37850989675807195, "loss": 0.057150378823280334, "loss_ce": 0.00032664957689121366, "loss_iou": 0.4609375, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 247152940, "step": 1439 }, { "epoch": 0.37877293351745905, "grad_norm": 4.659888569440988, "learning_rate": 5e-06, "loss": 0.1621, "num_input_tokens_seen": 247321728, "step": 1440 }, { "epoch": 0.37877293351745905, "loss": 0.23644393682479858, "loss_ce": 0.0026182467117905617, "loss_iou": 0.63671875, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 247321728, "step": 1440 }, { "epoch": 0.3790359702768462, "grad_norm": 4.432660256180253, "learning_rate": 5e-06, "loss": 0.1362, "num_input_tokens_seen": 247493788, "step": 1441 }, { "epoch": 0.3790359702768462, "loss": 0.13106387853622437, "loss_ce": 0.0005707137170247734, "loss_iou": 0.52734375, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 247493788, "step": 1441 }, { "epoch": 0.3792990070362333, "grad_norm": 14.394067386487718, "learning_rate": 5e-06, "loss": 0.119, "num_input_tokens_seen": 247666148, "step": 1442 }, { "epoch": 0.3792990070362333, "loss": 0.06981781125068665, "loss_ce": 0.0019467112142592669, "loss_iou": 0.5234375, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 247666148, "step": 1442 }, { "epoch": 0.3795620437956204, "grad_norm": 7.3199746686785785, "learning_rate": 5e-06, "loss": 0.1159, "num_input_tokens_seen": 247838148, "step": 1443 }, { "epoch": 0.3795620437956204, "loss": 0.12706537544727325, "loss_ce": 0.001332960557192564, "loss_iou": 0.58203125, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 247838148, "step": 1443 }, { "epoch": 0.3798250805550076, "grad_norm": 9.946074142043194, "learning_rate": 5e-06, "loss": 0.1498, "num_input_tokens_seen": 248008104, "step": 1444 }, { "epoch": 0.3798250805550076, "loss": 0.08655127882957458, "loss_ce": 0.002902593230828643, "loss_iou": 0.62890625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 248008104, "step": 1444 }, { "epoch": 0.3800881173143947, "grad_norm": 6.769456974696547, "learning_rate": 5e-06, "loss": 0.1537, "num_input_tokens_seen": 248178340, "step": 1445 }, { "epoch": 0.3800881173143947, "loss": 0.12096725404262543, "loss_ce": 0.0002702331403270364, "loss_iou": 0.59765625, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 248178340, "step": 1445 }, { "epoch": 0.38035115407378184, "grad_norm": 8.140876700736197, "learning_rate": 5e-06, "loss": 0.1292, "num_input_tokens_seen": 248350556, "step": 1446 }, { "epoch": 0.38035115407378184, "loss": 0.09277988225221634, "loss_ce": 0.004370463080704212, "loss_iou": 0.53125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 248350556, "step": 1446 }, { "epoch": 0.38061419083316894, "grad_norm": 5.023139574097896, "learning_rate": 5e-06, "loss": 0.1556, "num_input_tokens_seen": 248522684, "step": 1447 }, { "epoch": 0.38061419083316894, "loss": 0.19670161604881287, "loss_ce": 0.002487761899828911, "loss_iou": 0.53515625, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 248522684, "step": 1447 }, { "epoch": 0.38087722759255604, "grad_norm": 10.119019719981985, "learning_rate": 5e-06, "loss": 0.1368, "num_input_tokens_seen": 248692816, "step": 1448 }, { "epoch": 0.38087722759255604, "loss": 0.14071118831634521, "loss_ce": 0.001856213086284697, "loss_iou": 0.486328125, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 248692816, "step": 1448 }, { "epoch": 0.3811402643519432, "grad_norm": 4.80565522402478, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 248865220, "step": 1449 }, { "epoch": 0.3811402643519432, "loss": 0.138558492064476, "loss_ce": 0.005471333395689726, "loss_iou": 0.546875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 248865220, "step": 1449 }, { "epoch": 0.3814033011113303, "grad_norm": 5.799079918279409, "learning_rate": 5e-06, "loss": 0.1445, "num_input_tokens_seen": 249037228, "step": 1450 }, { "epoch": 0.3814033011113303, "loss": 0.1300484836101532, "loss_ce": 0.0007149941520765424, "loss_iou": 0.59765625, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 249037228, "step": 1450 }, { "epoch": 0.3816663378707174, "grad_norm": 5.998961200928992, "learning_rate": 5e-06, "loss": 0.1427, "num_input_tokens_seen": 249209488, "step": 1451 }, { "epoch": 0.3816663378707174, "loss": 0.15324485301971436, "loss_ce": 0.0011757557513192296, "loss_iou": 0.62890625, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 249209488, "step": 1451 }, { "epoch": 0.38192937463010457, "grad_norm": 6.571427699631083, "learning_rate": 5e-06, "loss": 0.1472, "num_input_tokens_seen": 249381876, "step": 1452 }, { "epoch": 0.38192937463010457, "loss": 0.09512484073638916, "loss_ce": 0.001863121404312551, "loss_iou": 0.640625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 249381876, "step": 1452 }, { "epoch": 0.38219241138949167, "grad_norm": 5.686083518821853, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 249554160, "step": 1453 }, { "epoch": 0.38219241138949167, "loss": 0.12024913728237152, "loss_ce": 0.002786980476230383, "loss_iou": 0.49609375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 249554160, "step": 1453 }, { "epoch": 0.38245544814887883, "grad_norm": 5.286753977297326, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 249726068, "step": 1454 }, { "epoch": 0.38245544814887883, "loss": 0.15219104290008545, "loss_ce": 0.001312148873694241, "loss_iou": NaN, "loss_num": 0.0302734375, "loss_xval": 0.150390625, "num_input_tokens_seen": 249726068, "step": 1454 }, { "epoch": 0.38271848490826593, "grad_norm": 6.1144167801042295, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 249898264, "step": 1455 }, { "epoch": 0.38271848490826593, "loss": 0.20618750154972076, "loss_ce": 0.001353507163003087, "loss_iou": 0.5703125, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 249898264, "step": 1455 }, { "epoch": 0.38298152166765304, "grad_norm": 5.254827116017921, "learning_rate": 5e-06, "loss": 0.1403, "num_input_tokens_seen": 250070312, "step": 1456 }, { "epoch": 0.38298152166765304, "loss": 0.09227914363145828, "loss_ce": 0.00014657669817097485, "loss_iou": 0.6484375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 250070312, "step": 1456 }, { "epoch": 0.3832445584270402, "grad_norm": 6.7921237774310095, "learning_rate": 5e-06, "loss": 0.1599, "num_input_tokens_seen": 250242620, "step": 1457 }, { "epoch": 0.3832445584270402, "loss": 0.16699600219726562, "loss_ce": 0.0013771126978099346, "loss_iou": 0.435546875, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 250242620, "step": 1457 }, { "epoch": 0.3835075951864273, "grad_norm": 5.478639252002706, "learning_rate": 5e-06, "loss": 0.1148, "num_input_tokens_seen": 250414736, "step": 1458 }, { "epoch": 0.3835075951864273, "loss": 0.05368629842996597, "loss_ce": 0.00015846370661165565, "loss_iou": 0.734375, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 250414736, "step": 1458 }, { "epoch": 0.3837706319458144, "grad_norm": 5.108471243837392, "learning_rate": 5e-06, "loss": 0.1335, "num_input_tokens_seen": 250586900, "step": 1459 }, { "epoch": 0.3837706319458144, "loss": 0.12694557011127472, "loss_ce": 0.0016709101619198918, "loss_iou": 0.44921875, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 250586900, "step": 1459 }, { "epoch": 0.38403366870520156, "grad_norm": 5.673015930654883, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 250757288, "step": 1460 }, { "epoch": 0.38403366870520156, "loss": 0.13443070650100708, "loss_ce": 0.0023811361752450466, "loss_iou": 0.671875, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 250757288, "step": 1460 }, { "epoch": 0.38429670546458866, "grad_norm": 8.46180134502284, "learning_rate": 5e-06, "loss": 0.1569, "num_input_tokens_seen": 250929524, "step": 1461 }, { "epoch": 0.38429670546458866, "loss": 0.10377843677997589, "loss_ce": 0.0022159344516694546, "loss_iou": 0.55078125, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 250929524, "step": 1461 }, { "epoch": 0.3845597422239758, "grad_norm": 19.056127756550847, "learning_rate": 5e-06, "loss": 0.1583, "num_input_tokens_seen": 251100012, "step": 1462 }, { "epoch": 0.3845597422239758, "loss": 0.13582435250282288, "loss_ce": 0.0002042302949121222, "loss_iou": 0.68359375, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 251100012, "step": 1462 }, { "epoch": 0.3848227789833629, "grad_norm": 13.96527055803463, "learning_rate": 5e-06, "loss": 0.1363, "num_input_tokens_seen": 251272524, "step": 1463 }, { "epoch": 0.3848227789833629, "loss": 0.09230969846248627, "loss_ce": 0.0005128234624862671, "loss_iou": 0.474609375, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 251272524, "step": 1463 }, { "epoch": 0.38508581574275, "grad_norm": 8.704363897996222, "learning_rate": 5e-06, "loss": 0.1065, "num_input_tokens_seen": 251444684, "step": 1464 }, { "epoch": 0.38508581574275, "loss": 0.06702134013175964, "loss_ce": 0.0022020034957677126, "loss_iou": 0.59375, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 251444684, "step": 1464 }, { "epoch": 0.3853488525021372, "grad_norm": 5.865897344217803, "learning_rate": 5e-06, "loss": 0.1043, "num_input_tokens_seen": 251616872, "step": 1465 }, { "epoch": 0.3853488525021372, "loss": 0.0932367742061615, "loss_ce": 0.0029200036078691483, "loss_iou": 0.6015625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 251616872, "step": 1465 }, { "epoch": 0.3856118892615243, "grad_norm": 7.51772092325757, "learning_rate": 5e-06, "loss": 0.1453, "num_input_tokens_seen": 251787464, "step": 1466 }, { "epoch": 0.3856118892615243, "loss": 0.08029404282569885, "loss_ce": 0.0014366218820214272, "loss_iou": 0.6015625, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 251787464, "step": 1466 }, { "epoch": 0.38587492602091145, "grad_norm": 5.797632938807012, "learning_rate": 5e-06, "loss": 0.1405, "num_input_tokens_seen": 251959420, "step": 1467 }, { "epoch": 0.38587492602091145, "loss": 0.1458974927663803, "loss_ce": 0.0016409052768722177, "loss_iou": 0.373046875, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 251959420, "step": 1467 }, { "epoch": 0.38613796278029855, "grad_norm": 11.05258372992866, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 252130168, "step": 1468 }, { "epoch": 0.38613796278029855, "loss": 0.13372868299484253, "loss_ce": 0.002472587861120701, "loss_iou": 0.6015625, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 252130168, "step": 1468 }, { "epoch": 0.38640099953968565, "grad_norm": 14.608414515106098, "learning_rate": 5e-06, "loss": 0.2148, "num_input_tokens_seen": 252302400, "step": 1469 }, { "epoch": 0.38640099953968565, "loss": 0.17314574122428894, "loss_ce": 0.0009960737079381943, "loss_iou": 0.45703125, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 252302400, "step": 1469 }, { "epoch": 0.3866640362990728, "grad_norm": 5.302717622816423, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 252474472, "step": 1470 }, { "epoch": 0.3866640362990728, "loss": 0.11253952234983444, "loss_ce": 0.0038511687889695168, "loss_iou": 0.6015625, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 252474472, "step": 1470 }, { "epoch": 0.3869270730584599, "grad_norm": 6.326570096654801, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 252646620, "step": 1471 }, { "epoch": 0.3869270730584599, "loss": 0.12959828972816467, "loss_ce": 0.0016685951268300414, "loss_iou": 0.4296875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 252646620, "step": 1471 }, { "epoch": 0.387190109817847, "grad_norm": 4.47840778836987, "learning_rate": 5e-06, "loss": 0.1374, "num_input_tokens_seen": 252818676, "step": 1472 }, { "epoch": 0.387190109817847, "loss": 0.09814205765724182, "loss_ce": 0.0010656400118023157, "loss_iou": 0.6796875, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 252818676, "step": 1472 }, { "epoch": 0.3874531465772342, "grad_norm": 8.330831236084894, "learning_rate": 5e-06, "loss": 0.1907, "num_input_tokens_seen": 252989028, "step": 1473 }, { "epoch": 0.3874531465772342, "loss": 0.13005918264389038, "loss_ce": 0.0018853526562452316, "loss_iou": 0.408203125, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 252989028, "step": 1473 }, { "epoch": 0.3877161833366213, "grad_norm": 5.836741339054395, "learning_rate": 5e-06, "loss": 0.1451, "num_input_tokens_seen": 253160776, "step": 1474 }, { "epoch": 0.3877161833366213, "loss": 0.16334399580955505, "loss_ce": 0.0019365199841558933, "loss_iou": 0.61328125, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 253160776, "step": 1474 }, { "epoch": 0.38797922009600844, "grad_norm": 6.563945205492379, "learning_rate": 5e-06, "loss": 0.0887, "num_input_tokens_seen": 253333080, "step": 1475 }, { "epoch": 0.38797922009600844, "loss": 0.11816126108169556, "loss_ce": 0.0005465176654979587, "loss_iou": 0.462890625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 253333080, "step": 1475 }, { "epoch": 0.38824225685539554, "grad_norm": 5.424235438541153, "learning_rate": 5e-06, "loss": 0.1446, "num_input_tokens_seen": 253504996, "step": 1476 }, { "epoch": 0.38824225685539554, "loss": 0.2769371271133423, "loss_ce": 0.0038658371195197105, "loss_iou": 0.65625, "loss_num": 0.0546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 253504996, "step": 1476 }, { "epoch": 0.38850529361478264, "grad_norm": 8.554483113755165, "learning_rate": 5e-06, "loss": 0.1629, "num_input_tokens_seen": 253677224, "step": 1477 }, { "epoch": 0.38850529361478264, "loss": 0.2999889850616455, "loss_ce": 0.0013744828756898642, "loss_iou": 0.373046875, "loss_num": 0.0595703125, "loss_xval": 0.298828125, "num_input_tokens_seen": 253677224, "step": 1477 }, { "epoch": 0.3887683303741698, "grad_norm": 9.64593506404694, "learning_rate": 5e-06, "loss": 0.1791, "num_input_tokens_seen": 253849140, "step": 1478 }, { "epoch": 0.3887683303741698, "loss": 0.10786094516515732, "loss_ce": 0.0013545926194638014, "loss_iou": 0.6015625, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 253849140, "step": 1478 }, { "epoch": 0.3890313671335569, "grad_norm": 4.897950380873555, "learning_rate": 5e-06, "loss": 0.1308, "num_input_tokens_seen": 254021328, "step": 1479 }, { "epoch": 0.3890313671335569, "loss": 0.09722624719142914, "loss_ce": 0.0006075926939956844, "loss_iou": 0.5078125, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 254021328, "step": 1479 }, { "epoch": 0.38929440389294406, "grad_norm": 15.722256371647902, "learning_rate": 5e-06, "loss": 0.1371, "num_input_tokens_seen": 254191780, "step": 1480 }, { "epoch": 0.38929440389294406, "loss": 0.24498049914836884, "loss_ce": 0.003861122764647007, "loss_iou": 0.466796875, "loss_num": 0.04833984375, "loss_xval": 0.2412109375, "num_input_tokens_seen": 254191780, "step": 1480 }, { "epoch": 0.38955744065233117, "grad_norm": 8.008352967265257, "learning_rate": 5e-06, "loss": 0.1663, "num_input_tokens_seen": 254363968, "step": 1481 }, { "epoch": 0.38955744065233117, "loss": 0.11913852393627167, "loss_ce": 0.004117771051824093, "loss_iou": 0.57421875, "loss_num": 0.02294921875, "loss_xval": 0.115234375, "num_input_tokens_seen": 254363968, "step": 1481 }, { "epoch": 0.38982047741171827, "grad_norm": 49.00606130732605, "learning_rate": 5e-06, "loss": 0.1557, "num_input_tokens_seen": 254534476, "step": 1482 }, { "epoch": 0.38982047741171827, "loss": 0.10790035128593445, "loss_ce": 0.001271925400942564, "loss_iou": 0.67578125, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 254534476, "step": 1482 }, { "epoch": 0.3900835141711054, "grad_norm": 7.1385674525154705, "learning_rate": 5e-06, "loss": 0.1465, "num_input_tokens_seen": 254706676, "step": 1483 }, { "epoch": 0.3900835141711054, "loss": 0.21054460108280182, "loss_ce": 0.0007057388429529965, "loss_iou": 0.458984375, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 254706676, "step": 1483 }, { "epoch": 0.39034655093049253, "grad_norm": 11.652107001526453, "learning_rate": 5e-06, "loss": 0.1735, "num_input_tokens_seen": 254877300, "step": 1484 }, { "epoch": 0.39034655093049253, "loss": 0.1516016721725464, "loss_ce": 0.0008448172593489289, "loss_iou": 0.62890625, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 254877300, "step": 1484 }, { "epoch": 0.39060958768987963, "grad_norm": 6.761059905321281, "learning_rate": 5e-06, "loss": 0.1391, "num_input_tokens_seen": 255049560, "step": 1485 }, { "epoch": 0.39060958768987963, "loss": 0.17154067754745483, "loss_ce": 0.00045912445057183504, "loss_iou": 0.431640625, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 255049560, "step": 1485 }, { "epoch": 0.3908726244492668, "grad_norm": 4.6049295798328655, "learning_rate": 5e-06, "loss": 0.1372, "num_input_tokens_seen": 255221840, "step": 1486 }, { "epoch": 0.3908726244492668, "loss": 0.12419994175434113, "loss_ce": 0.0011530672200024128, "loss_iou": 0.52734375, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 255221840, "step": 1486 }, { "epoch": 0.3911356612086539, "grad_norm": 9.631901215468224, "learning_rate": 5e-06, "loss": 0.1838, "num_input_tokens_seen": 255393804, "step": 1487 }, { "epoch": 0.3911356612086539, "loss": 0.14977988600730896, "loss_ce": 0.006835547741502523, "loss_iou": 0.40234375, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 255393804, "step": 1487 }, { "epoch": 0.39139869796804105, "grad_norm": 6.350539398563022, "learning_rate": 5e-06, "loss": 0.149, "num_input_tokens_seen": 255565956, "step": 1488 }, { "epoch": 0.39139869796804105, "loss": 0.08242587745189667, "loss_ce": 0.0012491194065660238, "loss_iou": 0.62109375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 255565956, "step": 1488 }, { "epoch": 0.39166173472742816, "grad_norm": 8.23209899989984, "learning_rate": 5e-06, "loss": 0.1259, "num_input_tokens_seen": 255738272, "step": 1489 }, { "epoch": 0.39166173472742816, "loss": 0.11557637155056, "loss_ce": 0.00025044637732207775, "loss_iou": 0.52734375, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 255738272, "step": 1489 }, { "epoch": 0.39192477148681526, "grad_norm": 6.0709262055147555, "learning_rate": 5e-06, "loss": 0.1715, "num_input_tokens_seen": 255910628, "step": 1490 }, { "epoch": 0.39192477148681526, "loss": 0.06516115367412567, "loss_ce": 0.0002197464054916054, "loss_iou": 0.578125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 255910628, "step": 1490 }, { "epoch": 0.3921878082462024, "grad_norm": 4.913795659722062, "learning_rate": 5e-06, "loss": 0.1073, "num_input_tokens_seen": 256079800, "step": 1491 }, { "epoch": 0.3921878082462024, "loss": 0.12981277704238892, "loss_ce": 0.0006013567326590419, "loss_iou": 0.6015625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 256079800, "step": 1491 }, { "epoch": 0.3924508450055895, "grad_norm": 10.053329401258154, "learning_rate": 5e-06, "loss": 0.0995, "num_input_tokens_seen": 256251908, "step": 1492 }, { "epoch": 0.3924508450055895, "loss": 0.11420565843582153, "loss_ce": 0.0003750904288608581, "loss_iou": 0.5859375, "loss_num": 0.0228271484375, "loss_xval": 0.11376953125, "num_input_tokens_seen": 256251908, "step": 1492 }, { "epoch": 0.3927138817649767, "grad_norm": 9.9668682397801, "learning_rate": 5e-06, "loss": 0.1763, "num_input_tokens_seen": 256423824, "step": 1493 }, { "epoch": 0.3927138817649767, "loss": 0.1758217215538025, "loss_ce": 0.0006203037919476628, "loss_iou": 0.40234375, "loss_num": 0.03515625, "loss_xval": 0.1748046875, "num_input_tokens_seen": 256423824, "step": 1493 }, { "epoch": 0.3929769185243638, "grad_norm": 11.611915023620941, "learning_rate": 5e-06, "loss": 0.1558, "num_input_tokens_seen": 256596028, "step": 1494 }, { "epoch": 0.3929769185243638, "loss": 0.1324601024389267, "loss_ce": 0.0007157221552915871, "loss_iou": 0.4765625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 256596028, "step": 1494 }, { "epoch": 0.3932399552837509, "grad_norm": 8.374710474816515, "learning_rate": 5e-06, "loss": 0.1707, "num_input_tokens_seen": 256764620, "step": 1495 }, { "epoch": 0.3932399552837509, "loss": 0.14535440504550934, "loss_ce": 0.00076211744453758, "loss_iou": 0.35546875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 256764620, "step": 1495 }, { "epoch": 0.39350299204313804, "grad_norm": 3.7888421024340295, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 256936860, "step": 1496 }, { "epoch": 0.39350299204313804, "loss": 0.11324183642864227, "loss_ce": 0.0021883677691221237, "loss_iou": 0.75, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 256936860, "step": 1496 }, { "epoch": 0.39376602880252515, "grad_norm": 6.4208864756609865, "learning_rate": 5e-06, "loss": 0.141, "num_input_tokens_seen": 257108880, "step": 1497 }, { "epoch": 0.39376602880252515, "loss": 0.14706888794898987, "loss_ce": 0.0031480020843446255, "loss_iou": 0.50390625, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 257108880, "step": 1497 }, { "epoch": 0.39402906556191225, "grad_norm": 7.914747299122871, "learning_rate": 5e-06, "loss": 0.1717, "num_input_tokens_seen": 257280748, "step": 1498 }, { "epoch": 0.39402906556191225, "loss": 0.12499146163463593, "loss_ce": 0.0012732032919302583, "loss_iou": 0.58203125, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 257280748, "step": 1498 }, { "epoch": 0.3942921023212994, "grad_norm": 6.479265884809814, "learning_rate": 5e-06, "loss": 0.1054, "num_input_tokens_seen": 257452832, "step": 1499 }, { "epoch": 0.3942921023212994, "loss": 0.15066663920879364, "loss_ce": 0.000764301570598036, "loss_iou": 0.515625, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 257452832, "step": 1499 }, { "epoch": 0.3945551390806865, "grad_norm": 30.54873849483752, "learning_rate": 5e-06, "loss": 0.1459, "num_input_tokens_seen": 257625396, "step": 1500 }, { "epoch": 0.3945551390806865, "eval_websight_new_CIoU": 0.8511096835136414, "eval_websight_new_GIoU": 0.8473265767097473, "eval_websight_new_IoU": 0.8611634373664856, "eval_websight_new_MAE_all": 0.029499702155590057, "eval_websight_new_MAE_h": 0.019038498401641846, "eval_websight_new_MAE_w": 0.04285556077957153, "eval_websight_new_MAE_x": 0.04122600890696049, "eval_websight_new_MAE_y": 0.014878739370033145, "eval_websight_new_NUM_probability": 0.9999458193778992, "eval_websight_new_inside_bbox": 0.984375, "eval_websight_new_loss": 0.13057953119277954, "eval_websight_new_loss_ce": 1.3184878298488911e-05, "eval_websight_new_loss_iou": 0.335205078125, "eval_websight_new_loss_num": 0.022043228149414062, "eval_websight_new_loss_xval": 0.1102447509765625, "eval_websight_new_runtime": 55.84, "eval_websight_new_samples_per_second": 0.895, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 257625396, "step": 1500 }, { "epoch": 0.3945551390806865, "eval_seeclick_CIoU": 0.6286773383617401, "eval_seeclick_GIoU": 0.627009391784668, "eval_seeclick_IoU": 0.646778017282486, "eval_seeclick_MAE_all": 0.04383368603885174, "eval_seeclick_MAE_h": 0.028551836498081684, "eval_seeclick_MAE_w": 0.05717572197318077, "eval_seeclick_MAE_x": 0.06311946921050549, "eval_seeclick_MAE_y": 0.02648772206157446, "eval_seeclick_NUM_probability": 0.9999659061431885, "eval_seeclick_inside_bbox": 0.953125, "eval_seeclick_loss": 0.205887109041214, "eval_seeclick_loss_ce": 0.008159147575497627, "eval_seeclick_loss_iou": 0.525634765625, "eval_seeclick_loss_num": 0.0388031005859375, "eval_seeclick_loss_xval": 0.1939697265625, "eval_seeclick_runtime": 77.0722, "eval_seeclick_samples_per_second": 0.558, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 257625396, "step": 1500 }, { "epoch": 0.3945551390806865, "eval_icons_CIoU": 0.8122206926345825, "eval_icons_GIoU": 0.8060621917247772, "eval_icons_IoU": 0.8242988586425781, "eval_icons_MAE_all": 0.029898496810346842, "eval_icons_MAE_h": 0.02304963255301118, "eval_icons_MAE_w": 0.03834102302789688, "eval_icons_MAE_x": 0.035982510074973106, "eval_icons_MAE_y": 0.022220822051167488, "eval_icons_NUM_probability": 0.9998922646045685, "eval_icons_inside_bbox": 0.9288194477558136, "eval_icons_loss": 0.08346885442733765, "eval_icons_loss_ce": 3.6747020203620195e-05, "eval_icons_loss_iou": 0.5107421875, "eval_icons_loss_num": 0.014827728271484375, "eval_icons_loss_xval": 0.0741424560546875, "eval_icons_runtime": 80.4282, "eval_icons_samples_per_second": 0.622, "eval_icons_steps_per_second": 0.025, "num_input_tokens_seen": 257625396, "step": 1500 }, { "epoch": 0.3945551390806865, "eval_screenspot_CIoU": 0.5216498871644338, "eval_screenspot_GIoU": 0.5137112041314443, "eval_screenspot_IoU": 0.5683091680208842, "eval_screenspot_MAE_all": 0.09167192876338959, "eval_screenspot_MAE_h": 0.04900683710972468, "eval_screenspot_MAE_w": 0.15865817666053772, "eval_screenspot_MAE_x": 0.1116797278324763, "eval_screenspot_MAE_y": 0.04734297779699167, "eval_screenspot_NUM_probability": 0.9997473557790121, "eval_screenspot_inside_bbox": 0.8291666706403097, "eval_screenspot_loss": 0.8493114709854126, "eval_screenspot_loss_ce": 0.5014328161875407, "eval_screenspot_loss_iou": 0.4333089192708333, "eval_screenspot_loss_num": 0.06844584147135417, "eval_screenspot_loss_xval": 0.3421223958333333, "eval_screenspot_runtime": 144.4244, "eval_screenspot_samples_per_second": 0.616, "eval_screenspot_steps_per_second": 0.021, "num_input_tokens_seen": 257625396, "step": 1500 }, { "epoch": 0.3945551390806865, "loss": 0.8482523560523987, "loss_ce": 0.4914408326148987, "loss_iou": 0.369140625, "loss_num": 0.0712890625, "loss_xval": 0.357421875, "num_input_tokens_seen": 257625396, "step": 1500 }, { "epoch": 0.39481817584007367, "grad_norm": 4.597601910727454, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 257797176, "step": 1501 }, { "epoch": 0.39481817584007367, "loss": 0.11587628722190857, "loss_ce": 0.0010386451613157988, "loss_iou": 0.412109375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 257797176, "step": 1501 }, { "epoch": 0.3950812125994608, "grad_norm": 20.41167402936065, "learning_rate": 5e-06, "loss": 0.1429, "num_input_tokens_seen": 257969472, "step": 1502 }, { "epoch": 0.3950812125994608, "loss": 0.13194513320922852, "loss_ce": 0.00023126379528548568, "loss_iou": 0.5078125, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 257969472, "step": 1502 }, { "epoch": 0.3953442493588479, "grad_norm": 5.303324872084742, "learning_rate": 5e-06, "loss": 0.1594, "num_input_tokens_seen": 258141772, "step": 1503 }, { "epoch": 0.3953442493588479, "loss": 0.26048120856285095, "loss_ce": 0.0004714562091976404, "loss_iou": 0.478515625, "loss_num": 0.052001953125, "loss_xval": 0.259765625, "num_input_tokens_seen": 258141772, "step": 1503 }, { "epoch": 0.39560728611823504, "grad_norm": 8.26410044022957, "learning_rate": 5e-06, "loss": 0.1116, "num_input_tokens_seen": 258312152, "step": 1504 }, { "epoch": 0.39560728611823504, "loss": 0.11661653220653534, "loss_ce": 0.0012600838672369719, "loss_iou": 0.58984375, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 258312152, "step": 1504 }, { "epoch": 0.39587032287762214, "grad_norm": 5.069526368720703, "learning_rate": 5e-06, "loss": 0.1434, "num_input_tokens_seen": 258484140, "step": 1505 }, { "epoch": 0.39587032287762214, "loss": 0.15614590048789978, "loss_ce": 0.002184713026508689, "loss_iou": 0.578125, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 258484140, "step": 1505 }, { "epoch": 0.3961333596370093, "grad_norm": 8.21868694477306, "learning_rate": 5e-06, "loss": 0.1234, "num_input_tokens_seen": 258656432, "step": 1506 }, { "epoch": 0.3961333596370093, "loss": 0.15648871660232544, "loss_ce": 0.006220155395567417, "loss_iou": 0.65234375, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 258656432, "step": 1506 }, { "epoch": 0.3963963963963964, "grad_norm": 4.840531799006556, "learning_rate": 5e-06, "loss": 0.1173, "num_input_tokens_seen": 258828268, "step": 1507 }, { "epoch": 0.3963963963963964, "loss": 0.16232186555862427, "loss_ce": 0.0027149375528097153, "loss_iou": 0.51953125, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 258828268, "step": 1507 }, { "epoch": 0.3966594331557835, "grad_norm": 13.037528909207806, "learning_rate": 5e-06, "loss": 0.1478, "num_input_tokens_seen": 259000404, "step": 1508 }, { "epoch": 0.3966594331557835, "loss": 0.14472803473472595, "loss_ce": 0.0009902361780405045, "loss_iou": 0.625, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 259000404, "step": 1508 }, { "epoch": 0.39692246991517066, "grad_norm": 5.085713777784683, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 259172564, "step": 1509 }, { "epoch": 0.39692246991517066, "loss": 0.07742594182491302, "loss_ce": 0.001498206052929163, "loss_iou": 0.50390625, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 259172564, "step": 1509 }, { "epoch": 0.39718550667455776, "grad_norm": 11.735468019604104, "learning_rate": 5e-06, "loss": 0.1383, "num_input_tokens_seen": 259344984, "step": 1510 }, { "epoch": 0.39718550667455776, "loss": 0.19085130095481873, "loss_ce": 0.006021593697369099, "loss_iou": 0.17578125, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 259344984, "step": 1510 }, { "epoch": 0.39744854343394487, "grad_norm": 5.2454273554423505, "learning_rate": 5e-06, "loss": 0.1176, "num_input_tokens_seen": 259517348, "step": 1511 }, { "epoch": 0.39744854343394487, "loss": 0.18470171093940735, "loss_ce": 0.0007722551235929132, "loss_iou": 0.38671875, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 259517348, "step": 1511 }, { "epoch": 0.397711580193332, "grad_norm": 5.93272222700996, "learning_rate": 5e-06, "loss": 0.1389, "num_input_tokens_seen": 259689536, "step": 1512 }, { "epoch": 0.397711580193332, "loss": 0.08698225021362305, "loss_ce": 9.870098438113928e-05, "loss_iou": 0.46875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 259689536, "step": 1512 }, { "epoch": 0.39797461695271913, "grad_norm": 6.869955142591404, "learning_rate": 5e-06, "loss": 0.1645, "num_input_tokens_seen": 259861960, "step": 1513 }, { "epoch": 0.39797461695271913, "loss": 0.17443957924842834, "loss_ce": 0.0013743957970291376, "loss_iou": 0.48828125, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 259861960, "step": 1513 }, { "epoch": 0.3982376537121063, "grad_norm": 5.07838683649017, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 260034008, "step": 1514 }, { "epoch": 0.3982376537121063, "loss": 0.11854679882526398, "loss_ce": 0.0005353257874958217, "loss_iou": 0.498046875, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 260034008, "step": 1514 }, { "epoch": 0.3985006904714934, "grad_norm": 4.287082894022155, "learning_rate": 5e-06, "loss": 0.1279, "num_input_tokens_seen": 260206128, "step": 1515 }, { "epoch": 0.3985006904714934, "loss": 0.13691899180412292, "loss_ce": 0.004594762809574604, "loss_iou": 0.57421875, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 260206128, "step": 1515 }, { "epoch": 0.3987637272308805, "grad_norm": 11.905443274772528, "learning_rate": 5e-06, "loss": 0.1483, "num_input_tokens_seen": 260377024, "step": 1516 }, { "epoch": 0.3987637272308805, "loss": 0.2244867980480194, "loss_ce": 0.004058347083628178, "loss_iou": 0.451171875, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 260377024, "step": 1516 }, { "epoch": 0.39902676399026765, "grad_norm": 5.162416897168446, "learning_rate": 5e-06, "loss": 0.1643, "num_input_tokens_seen": 260549044, "step": 1517 }, { "epoch": 0.39902676399026765, "loss": 0.1554432064294815, "loss_ce": 0.006700540892779827, "loss_iou": 0.49609375, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 260549044, "step": 1517 }, { "epoch": 0.39928980074965476, "grad_norm": 10.132521694727306, "learning_rate": 5e-06, "loss": 0.1059, "num_input_tokens_seen": 260721092, "step": 1518 }, { "epoch": 0.39928980074965476, "loss": 0.10422030091285706, "loss_ce": 0.0018338197842240334, "loss_iou": 0.56640625, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 260721092, "step": 1518 }, { "epoch": 0.3995528375090419, "grad_norm": 5.444540349408582, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 260893156, "step": 1519 }, { "epoch": 0.3995528375090419, "loss": 0.151461660861969, "loss_ce": 0.0018339705420657992, "loss_iou": 0.45703125, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 260893156, "step": 1519 }, { "epoch": 0.399815874268429, "grad_norm": 11.977186984155779, "learning_rate": 5e-06, "loss": 0.0934, "num_input_tokens_seen": 261065364, "step": 1520 }, { "epoch": 0.399815874268429, "loss": 0.10812121629714966, "loss_ce": 0.0015843516448512673, "loss_iou": 0.53125, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 261065364, "step": 1520 }, { "epoch": 0.4000789110278161, "grad_norm": 18.63106485848457, "learning_rate": 5e-06, "loss": 0.1179, "num_input_tokens_seen": 261237376, "step": 1521 }, { "epoch": 0.4000789110278161, "loss": 0.16819404065608978, "loss_ce": 0.0020105685107409954, "loss_iou": 0.4296875, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 261237376, "step": 1521 }, { "epoch": 0.4003419477872033, "grad_norm": 10.102453678806217, "learning_rate": 5e-06, "loss": 0.1354, "num_input_tokens_seen": 261409528, "step": 1522 }, { "epoch": 0.4003419477872033, "loss": 0.16048389673233032, "loss_ce": 0.0018229965353384614, "loss_iou": 0.53515625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 261409528, "step": 1522 }, { "epoch": 0.4006049845465904, "grad_norm": 4.815296030108717, "learning_rate": 5e-06, "loss": 0.2043, "num_input_tokens_seen": 261581516, "step": 1523 }, { "epoch": 0.4006049845465904, "loss": 0.16827590763568878, "loss_ce": 0.0006428433116525412, "loss_iou": 0.5390625, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 261581516, "step": 1523 }, { "epoch": 0.4008680213059775, "grad_norm": 7.523079284642639, "learning_rate": 5e-06, "loss": 0.1283, "num_input_tokens_seen": 261753732, "step": 1524 }, { "epoch": 0.4008680213059775, "loss": 0.07389776408672333, "loss_ce": 0.0012354037025943398, "loss_iou": 0.58984375, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 261753732, "step": 1524 }, { "epoch": 0.40113105806536464, "grad_norm": 5.8778602893728955, "learning_rate": 5e-06, "loss": 0.167, "num_input_tokens_seen": 261925788, "step": 1525 }, { "epoch": 0.40113105806536464, "loss": 0.1510400027036667, "loss_ce": 0.00025264715077355504, "loss_iou": 0.359375, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 261925788, "step": 1525 }, { "epoch": 0.40139409482475175, "grad_norm": 5.850697556727792, "learning_rate": 5e-06, "loss": 0.1457, "num_input_tokens_seen": 262097916, "step": 1526 }, { "epoch": 0.40139409482475175, "loss": 0.1881968379020691, "loss_ce": 0.002436349866911769, "loss_iou": 0.53125, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 262097916, "step": 1526 }, { "epoch": 0.4016571315841389, "grad_norm": 6.537837429507002, "learning_rate": 5e-06, "loss": 0.122, "num_input_tokens_seen": 262269964, "step": 1527 }, { "epoch": 0.4016571315841389, "loss": 0.09432707726955414, "loss_ce": 0.004178154282271862, "loss_iou": 0.486328125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 262269964, "step": 1527 }, { "epoch": 0.401920168343526, "grad_norm": 16.05135902232025, "learning_rate": 5e-06, "loss": 0.148, "num_input_tokens_seen": 262442240, "step": 1528 }, { "epoch": 0.401920168343526, "loss": 0.22578378021717072, "loss_ce": 0.0013269821647554636, "loss_iou": 0.6484375, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 262442240, "step": 1528 }, { "epoch": 0.4021832051029131, "grad_norm": 23.071196756112307, "learning_rate": 5e-06, "loss": 0.1737, "num_input_tokens_seen": 262612560, "step": 1529 }, { "epoch": 0.4021832051029131, "loss": 0.1546820104122162, "loss_ce": 0.0018499757861718535, "loss_iou": 0.451171875, "loss_num": 0.0306396484375, "loss_xval": 0.15234375, "num_input_tokens_seen": 262612560, "step": 1529 }, { "epoch": 0.40244624186230027, "grad_norm": 15.456725226696141, "learning_rate": 5e-06, "loss": 0.1397, "num_input_tokens_seen": 262784424, "step": 1530 }, { "epoch": 0.40244624186230027, "loss": 0.09581418335437775, "loss_ce": 0.00661130016669631, "loss_iou": 0.55859375, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 262784424, "step": 1530 }, { "epoch": 0.40270927862168737, "grad_norm": 4.753085917564063, "learning_rate": 5e-06, "loss": 0.1339, "num_input_tokens_seen": 262956820, "step": 1531 }, { "epoch": 0.40270927862168737, "loss": 0.17357602715492249, "loss_ce": 0.000693951384164393, "loss_iou": 0.484375, "loss_num": 0.034423828125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 262956820, "step": 1531 }, { "epoch": 0.40297231538107453, "grad_norm": 8.23993450342922, "learning_rate": 5e-06, "loss": 0.1338, "num_input_tokens_seen": 263129000, "step": 1532 }, { "epoch": 0.40297231538107453, "loss": 0.104616180062294, "loss_ce": 0.002107633277773857, "loss_iou": 0.65234375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 263129000, "step": 1532 }, { "epoch": 0.40323535214046163, "grad_norm": 5.28557757850573, "learning_rate": 5e-06, "loss": 0.1362, "num_input_tokens_seen": 263301064, "step": 1533 }, { "epoch": 0.40323535214046163, "loss": 0.1304527074098587, "loss_ce": 0.004811838734894991, "loss_iou": 0.48046875, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 263301064, "step": 1533 }, { "epoch": 0.40349838889984874, "grad_norm": 6.190146085552892, "learning_rate": 5e-06, "loss": 0.1623, "num_input_tokens_seen": 263473624, "step": 1534 }, { "epoch": 0.40349838889984874, "loss": 0.18483060598373413, "loss_ce": 0.00016873711138032377, "loss_iou": 0.609375, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 263473624, "step": 1534 }, { "epoch": 0.4037614256592359, "grad_norm": 7.04287854016945, "learning_rate": 5e-06, "loss": 0.1756, "num_input_tokens_seen": 263645984, "step": 1535 }, { "epoch": 0.4037614256592359, "loss": 0.2618888020515442, "loss_ce": 0.003191267838701606, "loss_iou": 0.51953125, "loss_num": 0.0517578125, "loss_xval": 0.2578125, "num_input_tokens_seen": 263645984, "step": 1535 }, { "epoch": 0.404024462418623, "grad_norm": 12.524958974577965, "learning_rate": 5e-06, "loss": 0.1503, "num_input_tokens_seen": 263815796, "step": 1536 }, { "epoch": 0.404024462418623, "loss": 0.1076509952545166, "loss_ce": 0.0004732571542263031, "loss_iou": 0.48046875, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 263815796, "step": 1536 }, { "epoch": 0.4042874991780101, "grad_norm": 11.13747389496862, "learning_rate": 5e-06, "loss": 0.1302, "num_input_tokens_seen": 263985512, "step": 1537 }, { "epoch": 0.4042874991780101, "loss": 0.12391631305217743, "loss_ce": 0.004134822636842728, "loss_iou": 0.484375, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 263985512, "step": 1537 }, { "epoch": 0.40455053593739726, "grad_norm": 13.524733657823086, "learning_rate": 5e-06, "loss": 0.1388, "num_input_tokens_seen": 264157652, "step": 1538 }, { "epoch": 0.40455053593739726, "loss": 0.1372789740562439, "loss_ce": 0.00034661110839806497, "loss_iou": 0.498046875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 264157652, "step": 1538 }, { "epoch": 0.40481357269678436, "grad_norm": 10.591750756623403, "learning_rate": 5e-06, "loss": 0.1444, "num_input_tokens_seen": 264328132, "step": 1539 }, { "epoch": 0.40481357269678436, "loss": 0.11795895546674728, "loss_ce": 0.0021752638276666403, "loss_iou": 0.484375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 264328132, "step": 1539 }, { "epoch": 0.4050766094561715, "grad_norm": 13.38043942604694, "learning_rate": 5e-06, "loss": 0.159, "num_input_tokens_seen": 264500388, "step": 1540 }, { "epoch": 0.4050766094561715, "loss": 0.19694536924362183, "loss_ce": 0.00218218588270247, "loss_iou": 0.66015625, "loss_num": 0.0390625, "loss_xval": 0.1943359375, "num_input_tokens_seen": 264500388, "step": 1540 }, { "epoch": 0.4053396462155586, "grad_norm": 4.192366808850045, "learning_rate": 5e-06, "loss": 0.0912, "num_input_tokens_seen": 264672276, "step": 1541 }, { "epoch": 0.4053396462155586, "loss": 0.07517112791538239, "loss_ce": 0.00021995243150740862, "loss_iou": 0.55078125, "loss_num": 0.01495361328125, "loss_xval": 0.0751953125, "num_input_tokens_seen": 264672276, "step": 1541 }, { "epoch": 0.4056026829749457, "grad_norm": 5.297574673677269, "learning_rate": 5e-06, "loss": 0.1023, "num_input_tokens_seen": 264844464, "step": 1542 }, { "epoch": 0.4056026829749457, "loss": 0.12331673502922058, "loss_ce": 0.00359627278521657, "loss_iou": 0.6171875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 264844464, "step": 1542 }, { "epoch": 0.4058657197343329, "grad_norm": 5.716341192152297, "learning_rate": 5e-06, "loss": 0.1333, "num_input_tokens_seen": 265016664, "step": 1543 }, { "epoch": 0.4058657197343329, "loss": 0.16498282551765442, "loss_ce": 0.001957924338057637, "loss_iou": 0.68359375, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 265016664, "step": 1543 }, { "epoch": 0.40612875649372, "grad_norm": 6.6104135503037895, "learning_rate": 5e-06, "loss": 0.1208, "num_input_tokens_seen": 265188924, "step": 1544 }, { "epoch": 0.40612875649372, "loss": 0.15974299609661102, "loss_ce": 0.0041338615119457245, "loss_iou": 0.5546875, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 265188924, "step": 1544 }, { "epoch": 0.40639179325310715, "grad_norm": 5.4155340578643365, "learning_rate": 5e-06, "loss": 0.129, "num_input_tokens_seen": 265359312, "step": 1545 }, { "epoch": 0.40639179325310715, "loss": 0.21989840269088745, "loss_ce": 0.0024301379453390837, "loss_iou": 0.5546875, "loss_num": 0.04345703125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 265359312, "step": 1545 }, { "epoch": 0.40665483001249425, "grad_norm": 4.753477134622953, "learning_rate": 5e-06, "loss": 0.1179, "num_input_tokens_seen": 265531700, "step": 1546 }, { "epoch": 0.40665483001249425, "loss": 0.05390855669975281, "loss_ce": 0.0012352181365713477, "loss_iou": 0.734375, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 265531700, "step": 1546 }, { "epoch": 0.40691786677188135, "grad_norm": 6.220827805530868, "learning_rate": 5e-06, "loss": 0.1214, "num_input_tokens_seen": 265700312, "step": 1547 }, { "epoch": 0.40691786677188135, "loss": 0.12480812519788742, "loss_ce": 0.0009983108611777425, "loss_iou": 0.4375, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 265700312, "step": 1547 }, { "epoch": 0.4071809035312685, "grad_norm": 6.131445725549716, "learning_rate": 5e-06, "loss": 0.1203, "num_input_tokens_seen": 265872224, "step": 1548 }, { "epoch": 0.4071809035312685, "loss": 0.12791498005390167, "loss_ce": 0.0017858227947726846, "loss_iou": 0.416015625, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 265872224, "step": 1548 }, { "epoch": 0.4074439402906556, "grad_norm": 6.062694054094362, "learning_rate": 5e-06, "loss": 0.1324, "num_input_tokens_seen": 266044368, "step": 1549 }, { "epoch": 0.4074439402906556, "loss": 0.09559094905853271, "loss_ce": 0.00013196848158258945, "loss_iou": 0.625, "loss_num": 0.01904296875, "loss_xval": 0.095703125, "num_input_tokens_seen": 266044368, "step": 1549 }, { "epoch": 0.4077069770500427, "grad_norm": 6.379902327089613, "learning_rate": 5e-06, "loss": 0.1551, "num_input_tokens_seen": 266216648, "step": 1550 }, { "epoch": 0.4077069770500427, "loss": 0.26357126235961914, "loss_ce": 0.007589830085635185, "loss_iou": 0.62890625, "loss_num": 0.05126953125, "loss_xval": 0.255859375, "num_input_tokens_seen": 266216648, "step": 1550 }, { "epoch": 0.4079700138094299, "grad_norm": 5.16918021793661, "learning_rate": 5e-06, "loss": 0.1034, "num_input_tokens_seen": 266387044, "step": 1551 }, { "epoch": 0.4079700138094299, "loss": 0.0737496167421341, "loss_ce": 0.00215538265183568, "loss_iou": 0.53515625, "loss_num": 0.0142822265625, "loss_xval": 0.07177734375, "num_input_tokens_seen": 266387044, "step": 1551 }, { "epoch": 0.408233050568817, "grad_norm": 36.40974082123275, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 266557400, "step": 1552 }, { "epoch": 0.408233050568817, "loss": 0.13131964206695557, "loss_ce": 0.0013757951091974974, "loss_iou": 0.55078125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 266557400, "step": 1552 }, { "epoch": 0.40849608732820414, "grad_norm": 4.785820854925791, "learning_rate": 5e-06, "loss": 0.1252, "num_input_tokens_seen": 266729564, "step": 1553 }, { "epoch": 0.40849608732820414, "loss": 0.12293117493391037, "loss_ce": 0.0009524148190394044, "loss_iou": 0.447265625, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 266729564, "step": 1553 }, { "epoch": 0.40875912408759124, "grad_norm": 19.710178190555972, "learning_rate": 5e-06, "loss": 0.1523, "num_input_tokens_seen": 266901912, "step": 1554 }, { "epoch": 0.40875912408759124, "loss": 0.10157528519630432, "loss_ce": 0.000501063244882971, "loss_iou": 0.51171875, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 266901912, "step": 1554 }, { "epoch": 0.40902216084697834, "grad_norm": 8.53788862222279, "learning_rate": 5e-06, "loss": 0.1432, "num_input_tokens_seen": 267074204, "step": 1555 }, { "epoch": 0.40902216084697834, "loss": 0.18818530440330505, "loss_ce": 0.0011735922889783978, "loss_iou": 0.59765625, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 267074204, "step": 1555 }, { "epoch": 0.4092851976063655, "grad_norm": 9.427604183463405, "learning_rate": 5e-06, "loss": 0.1563, "num_input_tokens_seen": 267246448, "step": 1556 }, { "epoch": 0.4092851976063655, "loss": 0.11521363258361816, "loss_ce": 0.0002539134002290666, "loss_iou": 0.52734375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 267246448, "step": 1556 }, { "epoch": 0.4095482343657526, "grad_norm": 6.05814112958034, "learning_rate": 5e-06, "loss": 0.1307, "num_input_tokens_seen": 267418524, "step": 1557 }, { "epoch": 0.4095482343657526, "loss": 0.17979669570922852, "loss_ce": 0.002214906271547079, "loss_iou": 0.451171875, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 267418524, "step": 1557 }, { "epoch": 0.40981127112513976, "grad_norm": 5.372569051634822, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 267590932, "step": 1558 }, { "epoch": 0.40981127112513976, "loss": 0.08670764416456223, "loss_ce": 0.0017467074794694781, "loss_iou": 0.59375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 267590932, "step": 1558 }, { "epoch": 0.41007430788452687, "grad_norm": 7.2829791835725635, "learning_rate": 5e-06, "loss": 0.1428, "num_input_tokens_seen": 267763292, "step": 1559 }, { "epoch": 0.41007430788452687, "loss": 0.1519029438495636, "loss_ce": 0.002458356786519289, "loss_iou": 0.416015625, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 267763292, "step": 1559 }, { "epoch": 0.41033734464391397, "grad_norm": 5.098616866005624, "learning_rate": 5e-06, "loss": 0.1, "num_input_tokens_seen": 267935712, "step": 1560 }, { "epoch": 0.41033734464391397, "loss": 0.11065052449703217, "loss_ce": 0.0015807072632014751, "loss_iou": 0.69140625, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 267935712, "step": 1560 }, { "epoch": 0.41060038140330113, "grad_norm": 10.199812853245577, "learning_rate": 5e-06, "loss": 0.147, "num_input_tokens_seen": 268108048, "step": 1561 }, { "epoch": 0.41060038140330113, "loss": 0.09520716965198517, "loss_ce": 0.003898570779711008, "loss_iou": 0.46484375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 268108048, "step": 1561 }, { "epoch": 0.41086341816268823, "grad_norm": 40.24459139135473, "learning_rate": 5e-06, "loss": 0.131, "num_input_tokens_seen": 268280092, "step": 1562 }, { "epoch": 0.41086341816268823, "loss": 0.07629628479480743, "loss_ce": 0.0009788942988961935, "loss_iou": 0.50390625, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 268280092, "step": 1562 }, { "epoch": 0.41112645492207534, "grad_norm": 10.392982307740652, "learning_rate": 5e-06, "loss": 0.1151, "num_input_tokens_seen": 268450660, "step": 1563 }, { "epoch": 0.41112645492207534, "loss": 0.122515007853508, "loss_ce": 0.0010550380684435368, "loss_iou": 0.4453125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 268450660, "step": 1563 }, { "epoch": 0.4113894916814625, "grad_norm": 9.96483413417414, "learning_rate": 5e-06, "loss": 0.1875, "num_input_tokens_seen": 268622864, "step": 1564 }, { "epoch": 0.4113894916814625, "loss": 0.22188733518123627, "loss_ce": 0.002923724940046668, "loss_iou": 0.5078125, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 268622864, "step": 1564 }, { "epoch": 0.4116525284408496, "grad_norm": 4.677477301563895, "learning_rate": 5e-06, "loss": 0.1157, "num_input_tokens_seen": 268793508, "step": 1565 }, { "epoch": 0.4116525284408496, "loss": 0.08342467993497849, "loss_ce": 0.00023376270837616175, "loss_iou": 0.53125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 268793508, "step": 1565 }, { "epoch": 0.41191556520023676, "grad_norm": 5.461599242953511, "learning_rate": 5e-06, "loss": 0.1571, "num_input_tokens_seen": 268965604, "step": 1566 }, { "epoch": 0.41191556520023676, "loss": 0.21633076667785645, "loss_ce": 0.003531699301674962, "loss_iou": 0.71875, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 268965604, "step": 1566 }, { "epoch": 0.41217860195962386, "grad_norm": 4.406500798076577, "learning_rate": 5e-06, "loss": 0.1875, "num_input_tokens_seen": 269137692, "step": 1567 }, { "epoch": 0.41217860195962386, "loss": 0.1873759627342224, "loss_ce": 0.0010966623667627573, "loss_iou": 0.5703125, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 269137692, "step": 1567 }, { "epoch": 0.41244163871901096, "grad_norm": 5.2924295964415595, "learning_rate": 5e-06, "loss": 0.1409, "num_input_tokens_seen": 269309988, "step": 1568 }, { "epoch": 0.41244163871901096, "loss": 0.20892329514026642, "loss_ce": 0.0004729711217805743, "loss_iou": 0.57421875, "loss_num": 0.041748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 269309988, "step": 1568 }, { "epoch": 0.4127046754783981, "grad_norm": 5.459543507128776, "learning_rate": 5e-06, "loss": 0.1387, "num_input_tokens_seen": 269482356, "step": 1569 }, { "epoch": 0.4127046754783981, "loss": 0.18314355611801147, "loss_ce": 0.005287110339850187, "loss_iou": 0.4921875, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 269482356, "step": 1569 }, { "epoch": 0.4129677122377852, "grad_norm": 7.266131528232208, "learning_rate": 5e-06, "loss": 0.1709, "num_input_tokens_seen": 269654572, "step": 1570 }, { "epoch": 0.4129677122377852, "loss": 0.1481064409017563, "loss_ce": 0.002049315720796585, "loss_iou": 0.63671875, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 269654572, "step": 1570 }, { "epoch": 0.4132307489971724, "grad_norm": 9.868014321011369, "learning_rate": 5e-06, "loss": 0.169, "num_input_tokens_seen": 269826912, "step": 1571 }, { "epoch": 0.4132307489971724, "loss": 0.21563705801963806, "loss_ce": 0.0007627883460372686, "loss_iou": 0.390625, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 269826912, "step": 1571 }, { "epoch": 0.4134937857565595, "grad_norm": 6.475065616065912, "learning_rate": 5e-06, "loss": 0.1331, "num_input_tokens_seen": 269998912, "step": 1572 }, { "epoch": 0.4134937857565595, "loss": 0.20219947397708893, "loss_ce": 0.0007529358845204115, "loss_iou": 0.609375, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 269998912, "step": 1572 }, { "epoch": 0.4137568225159466, "grad_norm": 5.376660767415141, "learning_rate": 5e-06, "loss": 0.1247, "num_input_tokens_seen": 270171036, "step": 1573 }, { "epoch": 0.4137568225159466, "loss": 0.2056845724582672, "loss_ce": 0.0031394038815051317, "loss_iou": 0.4296875, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 270171036, "step": 1573 }, { "epoch": 0.41401985927533375, "grad_norm": 7.42827277488161, "learning_rate": 5e-06, "loss": 0.1727, "num_input_tokens_seen": 270342784, "step": 1574 }, { "epoch": 0.41401985927533375, "loss": 0.15383180975914001, "loss_ce": 0.00045046067680232227, "loss_iou": 0.4609375, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 270342784, "step": 1574 }, { "epoch": 0.41428289603472085, "grad_norm": 5.130198059184452, "learning_rate": 5e-06, "loss": 0.1234, "num_input_tokens_seen": 270514876, "step": 1575 }, { "epoch": 0.41428289603472085, "loss": 0.17609894275665283, "loss_ce": 0.001187438378110528, "loss_iou": 0.44140625, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 270514876, "step": 1575 }, { "epoch": 0.41454593279410795, "grad_norm": 7.421258492917825, "learning_rate": 5e-06, "loss": 0.0906, "num_input_tokens_seen": 270687292, "step": 1576 }, { "epoch": 0.41454593279410795, "loss": 0.07864829152822495, "loss_ce": 0.0015761489048600197, "loss_iou": 0.58984375, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 270687292, "step": 1576 }, { "epoch": 0.4148089695534951, "grad_norm": 6.818009590404441, "learning_rate": 5e-06, "loss": 0.1344, "num_input_tokens_seen": 270859520, "step": 1577 }, { "epoch": 0.4148089695534951, "loss": 0.18715041875839233, "loss_ce": 0.0005964583833701909, "loss_iou": 0.5234375, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 270859520, "step": 1577 }, { "epoch": 0.4150720063128822, "grad_norm": 16.40610165209138, "learning_rate": 5e-06, "loss": 0.1347, "num_input_tokens_seen": 271029200, "step": 1578 }, { "epoch": 0.4150720063128822, "loss": 0.15075725317001343, "loss_ce": 0.0008091325289569795, "loss_iou": 0.61328125, "loss_num": 0.0299072265625, "loss_xval": 0.150390625, "num_input_tokens_seen": 271029200, "step": 1578 }, { "epoch": 0.41533504307226937, "grad_norm": 5.724561524768373, "learning_rate": 5e-06, "loss": 0.1535, "num_input_tokens_seen": 271201432, "step": 1579 }, { "epoch": 0.41533504307226937, "loss": 0.18409447371959686, "loss_ce": 0.0010500368662178516, "loss_iou": 0.443359375, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 271201432, "step": 1579 }, { "epoch": 0.4155980798316565, "grad_norm": 5.423586189008012, "learning_rate": 5e-06, "loss": 0.1218, "num_input_tokens_seen": 271373604, "step": 1580 }, { "epoch": 0.4155980798316565, "loss": 0.10582901537418365, "loss_ce": 0.0017030383460223675, "loss_iou": 0.58203125, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 271373604, "step": 1580 }, { "epoch": 0.4158611165910436, "grad_norm": 4.639007188262618, "learning_rate": 5e-06, "loss": 0.1184, "num_input_tokens_seen": 271544156, "step": 1581 }, { "epoch": 0.4158611165910436, "loss": 0.08775737881660461, "loss_ce": 0.00032452167943120003, "loss_iou": 0.49609375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 271544156, "step": 1581 }, { "epoch": 0.41612415335043074, "grad_norm": 4.194479063501224, "learning_rate": 5e-06, "loss": 0.1263, "num_input_tokens_seen": 271716244, "step": 1582 }, { "epoch": 0.41612415335043074, "loss": 0.07552362233400345, "loss_ce": 5.3650168410968035e-05, "loss_iou": 0.5859375, "loss_num": 0.01507568359375, "loss_xval": 0.07568359375, "num_input_tokens_seen": 271716244, "step": 1582 }, { "epoch": 0.41638719010981784, "grad_norm": 6.1414517921780325, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 271888460, "step": 1583 }, { "epoch": 0.41638719010981784, "loss": 0.09023694694042206, "loss_ce": 0.004726693499833345, "loss_iou": 0.62890625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 271888460, "step": 1583 }, { "epoch": 0.416650226869205, "grad_norm": 4.588576414677694, "learning_rate": 5e-06, "loss": 0.0835, "num_input_tokens_seen": 272060548, "step": 1584 }, { "epoch": 0.416650226869205, "loss": 0.06312213093042374, "loss_ce": 0.004131653346121311, "loss_iou": 0.56640625, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 272060548, "step": 1584 }, { "epoch": 0.4169132636285921, "grad_norm": 6.028144781636759, "learning_rate": 5e-06, "loss": 0.1561, "num_input_tokens_seen": 272232820, "step": 1585 }, { "epoch": 0.4169132636285921, "loss": 0.0890943706035614, "loss_ce": 0.0011732284910976887, "loss_iou": 0.62109375, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 272232820, "step": 1585 }, { "epoch": 0.4171763003879792, "grad_norm": 4.472287089666441, "learning_rate": 5e-06, "loss": 0.1729, "num_input_tokens_seen": 272405268, "step": 1586 }, { "epoch": 0.4171763003879792, "loss": 0.2705861032009125, "loss_ce": 0.00334367249161005, "loss_iou": 0.5625, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 272405268, "step": 1586 }, { "epoch": 0.41743933714736636, "grad_norm": 3.852608585701711, "learning_rate": 5e-06, "loss": 0.1019, "num_input_tokens_seen": 272577520, "step": 1587 }, { "epoch": 0.41743933714736636, "loss": 0.1504625827074051, "loss_ce": 0.00028558107442222536, "loss_iou": 0.388671875, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 272577520, "step": 1587 }, { "epoch": 0.41770237390675347, "grad_norm": 5.376381690191942, "learning_rate": 5e-06, "loss": 0.1403, "num_input_tokens_seen": 272747276, "step": 1588 }, { "epoch": 0.41770237390675347, "loss": 0.15104855597019196, "loss_ce": 0.005449185613542795, "loss_iou": 0.6640625, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 272747276, "step": 1588 }, { "epoch": 0.41796541066614057, "grad_norm": 6.161958958679319, "learning_rate": 5e-06, "loss": 0.1274, "num_input_tokens_seen": 272919388, "step": 1589 }, { "epoch": 0.41796541066614057, "loss": 0.19656141102313995, "loss_ce": 0.0006995859439484775, "loss_iou": 0.3984375, "loss_num": 0.0390625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 272919388, "step": 1589 }, { "epoch": 0.4182284474255277, "grad_norm": 9.65888325757511, "learning_rate": 5e-06, "loss": 0.1406, "num_input_tokens_seen": 273091656, "step": 1590 }, { "epoch": 0.4182284474255277, "loss": 0.21912047266960144, "loss_ce": 0.0045361267402768135, "loss_iou": 0.62109375, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 273091656, "step": 1590 }, { "epoch": 0.41849148418491483, "grad_norm": 8.813988618374006, "learning_rate": 5e-06, "loss": 0.1735, "num_input_tokens_seen": 273263768, "step": 1591 }, { "epoch": 0.41849148418491483, "loss": 0.23192375898361206, "loss_ce": 0.00888605136424303, "loss_iou": 0.48046875, "loss_num": 0.044677734375, "loss_xval": 0.22265625, "num_input_tokens_seen": 273263768, "step": 1591 }, { "epoch": 0.418754520944302, "grad_norm": 7.699871319865347, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 273435676, "step": 1592 }, { "epoch": 0.418754520944302, "loss": 0.10040029883384705, "loss_ce": 0.0014928288292139769, "loss_iou": 0.5859375, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 273435676, "step": 1592 }, { "epoch": 0.4190175577036891, "grad_norm": 5.319980033934329, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 273607832, "step": 1593 }, { "epoch": 0.4190175577036891, "loss": 0.149379163980484, "loss_ce": 0.004207056015729904, "loss_iou": 0.59375, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 273607832, "step": 1593 }, { "epoch": 0.4192805944630762, "grad_norm": 9.352989040804356, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 273778508, "step": 1594 }, { "epoch": 0.4192805944630762, "loss": 0.12792231142520905, "loss_ce": 0.0009081543539650738, "loss_iou": 0.66015625, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 273778508, "step": 1594 }, { "epoch": 0.41954363122246335, "grad_norm": 10.015857880890572, "learning_rate": 5e-06, "loss": 0.1659, "num_input_tokens_seen": 273950492, "step": 1595 }, { "epoch": 0.41954363122246335, "loss": 0.1797780990600586, "loss_ce": 0.004515648819506168, "loss_iou": 0.392578125, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 273950492, "step": 1595 }, { "epoch": 0.41980666798185046, "grad_norm": 5.437610936264711, "learning_rate": 5e-06, "loss": 0.151, "num_input_tokens_seen": 274122788, "step": 1596 }, { "epoch": 0.41980666798185046, "loss": 0.18884079158306122, "loss_ce": 0.0007914789603091776, "loss_iou": 0.59765625, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 274122788, "step": 1596 }, { "epoch": 0.4200697047412376, "grad_norm": 7.822775629663786, "learning_rate": 5e-06, "loss": 0.1409, "num_input_tokens_seen": 274294960, "step": 1597 }, { "epoch": 0.4200697047412376, "loss": 0.10433492064476013, "loss_ce": 0.000544635346159339, "loss_iou": 0.208984375, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 274294960, "step": 1597 }, { "epoch": 0.4203327415006247, "grad_norm": 5.742663276174034, "learning_rate": 5e-06, "loss": 0.1343, "num_input_tokens_seen": 274466828, "step": 1598 }, { "epoch": 0.4203327415006247, "loss": 0.21367287635803223, "loss_ce": 0.0007822535699233413, "loss_iou": 0.546875, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 274466828, "step": 1598 }, { "epoch": 0.4205957782600118, "grad_norm": 7.632689703873666, "learning_rate": 5e-06, "loss": 0.1806, "num_input_tokens_seen": 274639136, "step": 1599 }, { "epoch": 0.4205957782600118, "loss": 0.15357787907123566, "loss_ce": 0.0009594644652679563, "loss_iou": 0.5234375, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 274639136, "step": 1599 }, { "epoch": 0.420858815019399, "grad_norm": 6.551977275944205, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 274811300, "step": 1600 }, { "epoch": 0.420858815019399, "loss": 0.10377545654773712, "loss_ce": 0.005798771977424622, "loss_iou": 0.5859375, "loss_num": 0.01953125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 274811300, "step": 1600 }, { "epoch": 0.4211218517787861, "grad_norm": 3.634688713329233, "learning_rate": 5e-06, "loss": 0.1177, "num_input_tokens_seen": 274983232, "step": 1601 }, { "epoch": 0.4211218517787861, "loss": 0.0826454609632492, "loss_ce": 0.0008583518210798502, "loss_iou": 0.4453125, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 274983232, "step": 1601 }, { "epoch": 0.4213848885381732, "grad_norm": 6.513487522029516, "learning_rate": 5e-06, "loss": 0.1857, "num_input_tokens_seen": 275155252, "step": 1602 }, { "epoch": 0.4213848885381732, "loss": 0.22003334760665894, "loss_ce": 0.0016495751915499568, "loss_iou": 0.54296875, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 275155252, "step": 1602 }, { "epoch": 0.42164792529756034, "grad_norm": 7.844073689048222, "learning_rate": 5e-06, "loss": 0.164, "num_input_tokens_seen": 275327452, "step": 1603 }, { "epoch": 0.42164792529756034, "loss": 0.21324840188026428, "loss_ce": 0.001090196194127202, "loss_iou": 0.5703125, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 275327452, "step": 1603 }, { "epoch": 0.42191096205694745, "grad_norm": 5.702970135636054, "learning_rate": 5e-06, "loss": 0.0932, "num_input_tokens_seen": 275499556, "step": 1604 }, { "epoch": 0.42191096205694745, "loss": 0.12330840528011322, "loss_ce": 0.00029205146711319685, "loss_iou": 0.67578125, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 275499556, "step": 1604 }, { "epoch": 0.4221739988163346, "grad_norm": 7.295315362378517, "learning_rate": 5e-06, "loss": 0.1263, "num_input_tokens_seen": 275671424, "step": 1605 }, { "epoch": 0.4221739988163346, "loss": 0.09813028573989868, "loss_ce": 0.0004130033776164055, "loss_iou": 0.48046875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 275671424, "step": 1605 }, { "epoch": 0.4224370355757217, "grad_norm": 9.326948217180478, "learning_rate": 5e-06, "loss": 0.1136, "num_input_tokens_seen": 275843716, "step": 1606 }, { "epoch": 0.4224370355757217, "loss": 0.08426269888877869, "loss_ce": 0.00183471804484725, "loss_iou": 0.671875, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 275843716, "step": 1606 }, { "epoch": 0.4227000723351088, "grad_norm": 5.932370446925655, "learning_rate": 5e-06, "loss": 0.1547, "num_input_tokens_seen": 276016052, "step": 1607 }, { "epoch": 0.4227000723351088, "loss": 0.1150435283780098, "loss_ce": 0.0007857157033868134, "loss_iou": 0.474609375, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 276016052, "step": 1607 }, { "epoch": 0.42296310909449597, "grad_norm": 14.783104322463815, "learning_rate": 5e-06, "loss": 0.1635, "num_input_tokens_seen": 276188232, "step": 1608 }, { "epoch": 0.42296310909449597, "loss": 0.2393971085548401, "loss_ce": 0.005510389804840088, "loss_iou": 0.419921875, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 276188232, "step": 1608 }, { "epoch": 0.4232261458538831, "grad_norm": 37.72538153737276, "learning_rate": 5e-06, "loss": 0.1175, "num_input_tokens_seen": 276360224, "step": 1609 }, { "epoch": 0.4232261458538831, "loss": 0.10361947864294052, "loss_ce": 0.001171966316178441, "loss_iou": 0.54296875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 276360224, "step": 1609 }, { "epoch": 0.42348918261327023, "grad_norm": 5.929115604459541, "learning_rate": 5e-06, "loss": 0.185, "num_input_tokens_seen": 276532388, "step": 1610 }, { "epoch": 0.42348918261327023, "loss": 0.2490251511335373, "loss_ce": 0.001466565066948533, "loss_iou": 0.515625, "loss_num": 0.049560546875, "loss_xval": 0.248046875, "num_input_tokens_seen": 276532388, "step": 1610 }, { "epoch": 0.42375221937265733, "grad_norm": 4.065855918819384, "learning_rate": 5e-06, "loss": 0.1123, "num_input_tokens_seen": 276704452, "step": 1611 }, { "epoch": 0.42375221937265733, "loss": 0.08056612312793732, "loss_ce": 0.0004574810154736042, "loss_iou": 0.59375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 276704452, "step": 1611 }, { "epoch": 0.42401525613204444, "grad_norm": 5.144248302598616, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 276876660, "step": 1612 }, { "epoch": 0.42401525613204444, "loss": 0.1256069540977478, "loss_ce": 0.003719748929142952, "loss_iou": 0.470703125, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 276876660, "step": 1612 }, { "epoch": 0.4242782928914316, "grad_norm": 6.484383508592383, "learning_rate": 5e-06, "loss": 0.1448, "num_input_tokens_seen": 277048524, "step": 1613 }, { "epoch": 0.4242782928914316, "loss": 0.08503228425979614, "loss_ce": 0.0004375518183223903, "loss_iou": 0.51953125, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 277048524, "step": 1613 }, { "epoch": 0.4245413296508187, "grad_norm": 4.995173925299756, "learning_rate": 5e-06, "loss": 0.1327, "num_input_tokens_seen": 277220588, "step": 1614 }, { "epoch": 0.4245413296508187, "loss": 0.17099690437316895, "loss_ce": 0.0004646642482839525, "loss_iou": 0.609375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 277220588, "step": 1614 }, { "epoch": 0.4248043664102058, "grad_norm": 8.497364636941706, "learning_rate": 5e-06, "loss": 0.1482, "num_input_tokens_seen": 277392824, "step": 1615 }, { "epoch": 0.4248043664102058, "loss": 0.13706764578819275, "loss_ce": 0.0016916776075959206, "loss_iou": 0.4609375, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 277392824, "step": 1615 }, { "epoch": 0.42506740316959296, "grad_norm": 6.760370062216128, "learning_rate": 5e-06, "loss": 0.1224, "num_input_tokens_seen": 277565104, "step": 1616 }, { "epoch": 0.42506740316959296, "loss": 0.10927695780992508, "loss_ce": 0.00048179191071540117, "loss_iou": 0.65234375, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 277565104, "step": 1616 }, { "epoch": 0.42533043992898006, "grad_norm": 5.035401913498838, "learning_rate": 5e-06, "loss": 0.122, "num_input_tokens_seen": 277737224, "step": 1617 }, { "epoch": 0.42533043992898006, "loss": 0.13474591076374054, "loss_ce": 0.002299622166901827, "loss_iou": 0.41796875, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 277737224, "step": 1617 }, { "epoch": 0.4255934766883672, "grad_norm": 6.777922664584483, "learning_rate": 5e-06, "loss": 0.1104, "num_input_tokens_seen": 277907488, "step": 1618 }, { "epoch": 0.4255934766883672, "loss": 0.11205422878265381, "loss_ce": 0.0009702442912384868, "loss_iou": 0.59765625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 277907488, "step": 1618 }, { "epoch": 0.4258565134477543, "grad_norm": 7.0419776595399695, "learning_rate": 5e-06, "loss": 0.1466, "num_input_tokens_seen": 278079908, "step": 1619 }, { "epoch": 0.4258565134477543, "loss": 0.1803514063358307, "loss_ce": 0.0011216606944799423, "loss_iou": 0.51171875, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 278079908, "step": 1619 }, { "epoch": 0.42611955020714143, "grad_norm": 4.869531542128126, "learning_rate": 5e-06, "loss": 0.1934, "num_input_tokens_seen": 278252060, "step": 1620 }, { "epoch": 0.42611955020714143, "loss": 0.17065666615962982, "loss_ce": 0.0023522234987467527, "loss_iou": 0.478515625, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 278252060, "step": 1620 }, { "epoch": 0.4263825869665286, "grad_norm": 15.025432063122246, "learning_rate": 5e-06, "loss": 0.1486, "num_input_tokens_seen": 278424152, "step": 1621 }, { "epoch": 0.4263825869665286, "loss": 0.16370511054992676, "loss_ce": 0.0011379658244550228, "loss_iou": 0.53125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 278424152, "step": 1621 }, { "epoch": 0.4266456237259157, "grad_norm": 7.359642440972258, "learning_rate": 5e-06, "loss": 0.1398, "num_input_tokens_seen": 278596208, "step": 1622 }, { "epoch": 0.4266456237259157, "loss": 0.18550482392311096, "loss_ce": 0.0022162585519254208, "loss_iou": 0.69921875, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 278596208, "step": 1622 }, { "epoch": 0.42690866048530285, "grad_norm": 3.859276017343743, "learning_rate": 5e-06, "loss": 0.0967, "num_input_tokens_seen": 278768348, "step": 1623 }, { "epoch": 0.42690866048530285, "loss": 0.08928422629833221, "loss_ce": 0.003163617569953203, "loss_iou": 0.447265625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 278768348, "step": 1623 }, { "epoch": 0.42717169724468995, "grad_norm": 3.491408872838399, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 278940432, "step": 1624 }, { "epoch": 0.42717169724468995, "loss": 0.10469355434179306, "loss_ce": 0.004763749893754721, "loss_iou": 0.609375, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 278940432, "step": 1624 }, { "epoch": 0.42743473400407705, "grad_norm": 8.925089197668054, "learning_rate": 5e-06, "loss": 0.1371, "num_input_tokens_seen": 279112480, "step": 1625 }, { "epoch": 0.42743473400407705, "loss": 0.15482491254806519, "loss_ce": 0.0005127866170369089, "loss_iou": 0.5859375, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 279112480, "step": 1625 }, { "epoch": 0.4276977707634642, "grad_norm": 5.017252116965269, "learning_rate": 5e-06, "loss": 0.1621, "num_input_tokens_seen": 279284544, "step": 1626 }, { "epoch": 0.4276977707634642, "loss": 0.11934304237365723, "loss_ce": 0.0036509071942418814, "loss_iou": 0.80859375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 279284544, "step": 1626 }, { "epoch": 0.4279608075228513, "grad_norm": 11.28367561887442, "learning_rate": 5e-06, "loss": 0.1028, "num_input_tokens_seen": 279456684, "step": 1627 }, { "epoch": 0.4279608075228513, "loss": 0.08618146926164627, "loss_ce": 0.0002134529349859804, "loss_iou": 0.435546875, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 279456684, "step": 1627 }, { "epoch": 0.4282238442822384, "grad_norm": 14.334013868271164, "learning_rate": 5e-06, "loss": 0.1636, "num_input_tokens_seen": 279629144, "step": 1628 }, { "epoch": 0.4282238442822384, "loss": 0.21622659265995026, "loss_ce": 0.0035800987388938665, "loss_iou": 0.5546875, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 279629144, "step": 1628 }, { "epoch": 0.4284868810416256, "grad_norm": 5.177758249324022, "learning_rate": 5e-06, "loss": 0.1183, "num_input_tokens_seen": 279800920, "step": 1629 }, { "epoch": 0.4284868810416256, "loss": 0.11388795077800751, "loss_ce": 0.0024377545341849327, "loss_iou": 0.46484375, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 279800920, "step": 1629 }, { "epoch": 0.4287499178010127, "grad_norm": 5.002716354322989, "learning_rate": 5e-06, "loss": 0.16, "num_input_tokens_seen": 279973164, "step": 1630 }, { "epoch": 0.4287499178010127, "loss": 0.11221545934677124, "loss_ce": 0.0021690744906663895, "loss_iou": 0.5546875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 279973164, "step": 1630 }, { "epoch": 0.42901295456039984, "grad_norm": 11.888987422058314, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 280144932, "step": 1631 }, { "epoch": 0.42901295456039984, "loss": 0.10366816818714142, "loss_ce": 0.0032348139211535454, "loss_iou": 0.53125, "loss_num": 0.02001953125, "loss_xval": 0.1005859375, "num_input_tokens_seen": 280144932, "step": 1631 }, { "epoch": 0.42927599131978694, "grad_norm": 5.9683348593169105, "learning_rate": 5e-06, "loss": 0.1279, "num_input_tokens_seen": 280316892, "step": 1632 }, { "epoch": 0.42927599131978694, "loss": 0.1221093013882637, "loss_ce": 0.0030907560139894485, "loss_iou": 0.734375, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 280316892, "step": 1632 }, { "epoch": 0.42953902807917405, "grad_norm": 4.306403695098148, "learning_rate": 5e-06, "loss": 0.1676, "num_input_tokens_seen": 280488680, "step": 1633 }, { "epoch": 0.42953902807917405, "loss": 0.16619864106178284, "loss_ce": 0.0007323222234845161, "loss_iou": 0.421875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 280488680, "step": 1633 }, { "epoch": 0.4298020648385612, "grad_norm": 6.542037684380333, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 280658812, "step": 1634 }, { "epoch": 0.4298020648385612, "loss": 0.1335218995809555, "loss_ce": 0.0008009535376913846, "loss_iou": 0.419921875, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 280658812, "step": 1634 }, { "epoch": 0.4300651015979483, "grad_norm": 5.780919966084494, "learning_rate": 5e-06, "loss": 0.1225, "num_input_tokens_seen": 280829020, "step": 1635 }, { "epoch": 0.4300651015979483, "loss": 0.13609978556632996, "loss_ce": 0.0019139924552291632, "loss_iou": 0.375, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 280829020, "step": 1635 }, { "epoch": 0.4303281383573354, "grad_norm": 4.625756427735958, "learning_rate": 5e-06, "loss": 0.1239, "num_input_tokens_seen": 280999460, "step": 1636 }, { "epoch": 0.4303281383573354, "loss": 0.05524425953626633, "loss_ce": 0.0004957281635142863, "loss_iou": 0.6640625, "loss_num": 0.010986328125, "loss_xval": 0.0546875, "num_input_tokens_seen": 280999460, "step": 1636 }, { "epoch": 0.43059117511672257, "grad_norm": 4.597155715023034, "learning_rate": 5e-06, "loss": 0.1455, "num_input_tokens_seen": 281169544, "step": 1637 }, { "epoch": 0.43059117511672257, "loss": 0.09542928636074066, "loss_ce": 0.002106534782797098, "loss_iou": 0.60546875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 281169544, "step": 1637 }, { "epoch": 0.43085421187610967, "grad_norm": 6.599089127666852, "learning_rate": 5e-06, "loss": 0.1655, "num_input_tokens_seen": 281341764, "step": 1638 }, { "epoch": 0.43085421187610967, "loss": 0.15369150042533875, "loss_ce": 0.0017444868572056293, "loss_iou": 0.5390625, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 281341764, "step": 1638 }, { "epoch": 0.43111724863549683, "grad_norm": 10.985722993191203, "learning_rate": 5e-06, "loss": 0.1179, "num_input_tokens_seen": 281514480, "step": 1639 }, { "epoch": 0.43111724863549683, "loss": 0.12693974375724792, "loss_ce": 0.004442187491804361, "loss_iou": 0.427734375, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 281514480, "step": 1639 }, { "epoch": 0.43138028539488393, "grad_norm": 5.112052305423048, "learning_rate": 5e-06, "loss": 0.1307, "num_input_tokens_seen": 281686636, "step": 1640 }, { "epoch": 0.43138028539488393, "loss": 0.13622896373271942, "loss_ce": 0.001615930232219398, "loss_iou": 0.6875, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 281686636, "step": 1640 }, { "epoch": 0.43164332215427104, "grad_norm": 4.445172786952326, "learning_rate": 5e-06, "loss": 0.1298, "num_input_tokens_seen": 281859068, "step": 1641 }, { "epoch": 0.43164332215427104, "loss": 0.16077426075935364, "loss_ce": 0.0008926668670028448, "loss_iou": 0.478515625, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 281859068, "step": 1641 }, { "epoch": 0.4319063589136582, "grad_norm": 18.643962143254182, "learning_rate": 5e-06, "loss": 0.1174, "num_input_tokens_seen": 282031296, "step": 1642 }, { "epoch": 0.4319063589136582, "loss": 0.11041103303432465, "loss_ce": 0.00216518621891737, "loss_iou": 0.5703125, "loss_num": 0.0216064453125, "loss_xval": 0.1083984375, "num_input_tokens_seen": 282031296, "step": 1642 }, { "epoch": 0.4321693956730453, "grad_norm": 14.94723205702143, "learning_rate": 5e-06, "loss": 0.0973, "num_input_tokens_seen": 282203456, "step": 1643 }, { "epoch": 0.4321693956730453, "loss": 0.10057017207145691, "loss_ce": 0.002517198445275426, "loss_iou": 0.470703125, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 282203456, "step": 1643 }, { "epoch": 0.43243243243243246, "grad_norm": 3.591864332881924, "learning_rate": 5e-06, "loss": 0.1397, "num_input_tokens_seen": 282374020, "step": 1644 }, { "epoch": 0.43243243243243246, "loss": 0.08621848374605179, "loss_ce": 0.00044883223017677665, "loss_iou": 0.625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 282374020, "step": 1644 }, { "epoch": 0.43269546919181956, "grad_norm": 6.406579991141497, "learning_rate": 5e-06, "loss": 0.1025, "num_input_tokens_seen": 282544200, "step": 1645 }, { "epoch": 0.43269546919181956, "loss": 0.10012087225914001, "loss_ce": 0.001396512147039175, "loss_iou": 0.48046875, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 282544200, "step": 1645 }, { "epoch": 0.43295850595120666, "grad_norm": 6.633263160453237, "learning_rate": 5e-06, "loss": 0.1165, "num_input_tokens_seen": 282716276, "step": 1646 }, { "epoch": 0.43295850595120666, "loss": 0.09250755608081818, "loss_ce": 0.0007411979604512453, "loss_iou": 0.57421875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 282716276, "step": 1646 }, { "epoch": 0.4332215427105938, "grad_norm": 11.88641544130966, "learning_rate": 5e-06, "loss": 0.119, "num_input_tokens_seen": 282888756, "step": 1647 }, { "epoch": 0.4332215427105938, "loss": 0.11388491094112396, "loss_ce": 0.0009088441729545593, "loss_iou": 0.55078125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 282888756, "step": 1647 }, { "epoch": 0.4334845794699809, "grad_norm": 5.478375817282589, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 283060788, "step": 1648 }, { "epoch": 0.4334845794699809, "loss": 0.08237907290458679, "loss_ce": 0.003521653823554516, "loss_iou": 0.5859375, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 283060788, "step": 1648 }, { "epoch": 0.433747616229368, "grad_norm": 7.7141358569237015, "learning_rate": 5e-06, "loss": 0.1629, "num_input_tokens_seen": 283232744, "step": 1649 }, { "epoch": 0.433747616229368, "loss": 0.16650542616844177, "loss_ce": 0.001252750400453806, "loss_iou": 0.515625, "loss_num": 0.033203125, "loss_xval": 0.1650390625, "num_input_tokens_seen": 283232744, "step": 1649 }, { "epoch": 0.4340106529887552, "grad_norm": 4.902197150094181, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 283404860, "step": 1650 }, { "epoch": 0.4340106529887552, "loss": 0.11392060667276382, "loss_ce": 0.0009140149923041463, "loss_iou": 0.67578125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 283404860, "step": 1650 }, { "epoch": 0.4342736897481423, "grad_norm": 6.611385564549315, "learning_rate": 5e-06, "loss": 0.1182, "num_input_tokens_seen": 283577340, "step": 1651 }, { "epoch": 0.4342736897481423, "loss": 0.14234262704849243, "loss_ce": 0.0011072808410972357, "loss_iou": 0.46875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 283577340, "step": 1651 }, { "epoch": 0.43453672650752945, "grad_norm": 5.270564238834203, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 283747872, "step": 1652 }, { "epoch": 0.43453672650752945, "loss": 0.08932007849216461, "loss_ce": 0.0014904842246323824, "loss_iou": 0.515625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 283747872, "step": 1652 }, { "epoch": 0.43479976326691655, "grad_norm": 5.150524639297202, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 283920212, "step": 1653 }, { "epoch": 0.43479976326691655, "loss": 0.08878134936094284, "loss_ce": 0.00037192486342974007, "loss_iou": 0.51171875, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 283920212, "step": 1653 }, { "epoch": 0.43506280002630365, "grad_norm": 5.389390494681365, "learning_rate": 5e-06, "loss": 0.1047, "num_input_tokens_seen": 284090700, "step": 1654 }, { "epoch": 0.43506280002630365, "loss": 0.09808811545372009, "loss_ce": 0.0017746419180184603, "loss_iou": 0.59375, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 284090700, "step": 1654 }, { "epoch": 0.4353258367856908, "grad_norm": 5.2829708598355625, "learning_rate": 5e-06, "loss": 0.1384, "num_input_tokens_seen": 284262932, "step": 1655 }, { "epoch": 0.4353258367856908, "loss": 0.24406926333904266, "loss_ce": 0.0009052163222804666, "loss_iou": 0.404296875, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 284262932, "step": 1655 }, { "epoch": 0.4355888735450779, "grad_norm": 4.415055027745429, "learning_rate": 5e-06, "loss": 0.1467, "num_input_tokens_seen": 284433136, "step": 1656 }, { "epoch": 0.4355888735450779, "loss": 0.14210422337055206, "loss_ce": 0.0005942133138887584, "loss_iou": 0.482421875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 284433136, "step": 1656 }, { "epoch": 0.4358519103044651, "grad_norm": 11.10469204899232, "learning_rate": 5e-06, "loss": 0.1059, "num_input_tokens_seen": 284604980, "step": 1657 }, { "epoch": 0.4358519103044651, "loss": 0.06616829335689545, "loss_ce": 0.0029969080351293087, "loss_iou": 0.5078125, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 284604980, "step": 1657 }, { "epoch": 0.4361149470638522, "grad_norm": 37.454091592129345, "learning_rate": 5e-06, "loss": 0.1439, "num_input_tokens_seen": 284777024, "step": 1658 }, { "epoch": 0.4361149470638522, "loss": 0.09428656101226807, "loss_ce": 0.00023138479446060956, "loss_iou": 0.5234375, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 284777024, "step": 1658 }, { "epoch": 0.4363779838232393, "grad_norm": 6.943748076830087, "learning_rate": 5e-06, "loss": 0.1641, "num_input_tokens_seen": 284948692, "step": 1659 }, { "epoch": 0.4363779838232393, "loss": 0.12960584461688995, "loss_ce": 0.0008827036363072693, "loss_iou": 0.546875, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 284948692, "step": 1659 }, { "epoch": 0.43664102058262644, "grad_norm": 6.322360699878544, "learning_rate": 5e-06, "loss": 0.1203, "num_input_tokens_seen": 285120852, "step": 1660 }, { "epoch": 0.43664102058262644, "loss": 0.08696160465478897, "loss_ce": 0.0016649758908897638, "loss_iou": 0.609375, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 285120852, "step": 1660 }, { "epoch": 0.43690405734201354, "grad_norm": 4.9857397500362, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 285293104, "step": 1661 }, { "epoch": 0.43690405734201354, "loss": 0.1620713770389557, "loss_ce": 0.0027085873298346996, "loss_iou": 0.359375, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 285293104, "step": 1661 }, { "epoch": 0.43716709410140064, "grad_norm": 6.167930639257179, "learning_rate": 5e-06, "loss": 0.1339, "num_input_tokens_seen": 285465116, "step": 1662 }, { "epoch": 0.43716709410140064, "loss": 0.13285255432128906, "loss_ce": 0.002481454983353615, "loss_iou": 0.60546875, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 285465116, "step": 1662 }, { "epoch": 0.4374301308607878, "grad_norm": 4.796617637287243, "learning_rate": 5e-06, "loss": 0.1067, "num_input_tokens_seen": 285637712, "step": 1663 }, { "epoch": 0.4374301308607878, "loss": 0.0941682979464531, "loss_ce": 0.0005708856624551117, "loss_iou": 0.58203125, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 285637712, "step": 1663 }, { "epoch": 0.4376931676201749, "grad_norm": 6.365145441420231, "learning_rate": 5e-06, "loss": 0.1524, "num_input_tokens_seen": 285808140, "step": 1664 }, { "epoch": 0.4376931676201749, "loss": 0.08691577613353729, "loss_ce": 0.0016801799647510052, "loss_iou": 0.703125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 285808140, "step": 1664 }, { "epoch": 0.43795620437956206, "grad_norm": 9.14556589502857, "learning_rate": 5e-06, "loss": 0.1419, "num_input_tokens_seen": 285980020, "step": 1665 }, { "epoch": 0.43795620437956206, "loss": 0.11803478002548218, "loss_ce": 0.0006641793297603726, "loss_iou": 0.41796875, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 285980020, "step": 1665 }, { "epoch": 0.43821924113894917, "grad_norm": 8.36736614358982, "learning_rate": 5e-06, "loss": 0.1476, "num_input_tokens_seen": 286152224, "step": 1666 }, { "epoch": 0.43821924113894917, "loss": 0.17869716882705688, "loss_ce": 0.002091944683343172, "loss_iou": 0.578125, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 286152224, "step": 1666 }, { "epoch": 0.43848227789833627, "grad_norm": 5.977656991369799, "learning_rate": 5e-06, "loss": 0.1586, "num_input_tokens_seen": 286324392, "step": 1667 }, { "epoch": 0.43848227789833627, "loss": 0.07165973633527756, "loss_ce": 0.0008894691127352417, "loss_iou": 0.6015625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 286324392, "step": 1667 }, { "epoch": 0.43874531465772343, "grad_norm": 4.564829435087821, "learning_rate": 5e-06, "loss": 0.1163, "num_input_tokens_seen": 286496724, "step": 1668 }, { "epoch": 0.43874531465772343, "loss": 0.10358568280935287, "loss_ce": 0.0014128325274214149, "loss_iou": 0.53125, "loss_num": 0.0205078125, "loss_xval": 0.10205078125, "num_input_tokens_seen": 286496724, "step": 1668 }, { "epoch": 0.43900835141711053, "grad_norm": 5.942642735170272, "learning_rate": 5e-06, "loss": 0.1653, "num_input_tokens_seen": 286669072, "step": 1669 }, { "epoch": 0.43900835141711053, "loss": 0.15151304006576538, "loss_ce": 0.00020689686061814427, "loss_iou": 0.46484375, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 286669072, "step": 1669 }, { "epoch": 0.4392713881764977, "grad_norm": 10.62471442505103, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 286839656, "step": 1670 }, { "epoch": 0.4392713881764977, "loss": 0.07143907248973846, "loss_ce": 0.0015233027515932918, "loss_iou": 0.56640625, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 286839656, "step": 1670 }, { "epoch": 0.4395344249358848, "grad_norm": 10.099626860743896, "learning_rate": 5e-06, "loss": 0.1033, "num_input_tokens_seen": 287009348, "step": 1671 }, { "epoch": 0.4395344249358848, "loss": 0.07566662132740021, "loss_ce": 0.0009290680172853172, "loss_iou": 0.5625, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 287009348, "step": 1671 }, { "epoch": 0.4397974616952719, "grad_norm": 5.653901499064607, "learning_rate": 5e-06, "loss": 0.1272, "num_input_tokens_seen": 287181772, "step": 1672 }, { "epoch": 0.4397974616952719, "loss": 0.13149940967559814, "loss_ce": 0.003600239520892501, "loss_iou": 0.62890625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 287181772, "step": 1672 }, { "epoch": 0.44006049845465905, "grad_norm": 5.524031672248575, "learning_rate": 5e-06, "loss": 0.1428, "num_input_tokens_seen": 287353976, "step": 1673 }, { "epoch": 0.44006049845465905, "loss": 0.09840566664934158, "loss_ce": 0.0025804713368415833, "loss_iou": 0.51953125, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 287353976, "step": 1673 }, { "epoch": 0.44032353521404616, "grad_norm": 9.273568707994169, "learning_rate": 5e-06, "loss": 0.1199, "num_input_tokens_seen": 287526172, "step": 1674 }, { "epoch": 0.44032353521404616, "loss": 0.06107574701309204, "loss_ce": 0.0005288777174428105, "loss_iou": 0.59765625, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 287526172, "step": 1674 }, { "epoch": 0.44058657197343326, "grad_norm": 4.970980583351768, "learning_rate": 5e-06, "loss": 0.1518, "num_input_tokens_seen": 287698712, "step": 1675 }, { "epoch": 0.44058657197343326, "loss": 0.15458138287067413, "loss_ce": 0.004831631202250719, "loss_iou": 0.44921875, "loss_num": 0.030029296875, "loss_xval": 0.1494140625, "num_input_tokens_seen": 287698712, "step": 1675 }, { "epoch": 0.4408496087328204, "grad_norm": 5.818694779717612, "learning_rate": 5e-06, "loss": 0.1327, "num_input_tokens_seen": 287871108, "step": 1676 }, { "epoch": 0.4408496087328204, "loss": 0.09261530637741089, "loss_ce": 0.0006353222415782511, "loss_iou": 0.65625, "loss_num": 0.0184326171875, "loss_xval": 0.091796875, "num_input_tokens_seen": 287871108, "step": 1676 }, { "epoch": 0.4411126454922075, "grad_norm": 13.742421763597438, "learning_rate": 5e-06, "loss": 0.0981, "num_input_tokens_seen": 288043424, "step": 1677 }, { "epoch": 0.4411126454922075, "loss": 0.0773499608039856, "loss_ce": 0.0007813603151589632, "loss_iou": 0.498046875, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 288043424, "step": 1677 }, { "epoch": 0.4413756822515947, "grad_norm": 4.64225389501199, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 288215812, "step": 1678 }, { "epoch": 0.4413756822515947, "loss": 0.1821221113204956, "loss_ce": 0.0025872092228382826, "loss_iou": 0.61328125, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 288215812, "step": 1678 }, { "epoch": 0.4416387190109818, "grad_norm": 3.729084500729301, "learning_rate": 5e-06, "loss": 0.1359, "num_input_tokens_seen": 288388096, "step": 1679 }, { "epoch": 0.4416387190109818, "loss": 0.07683113217353821, "loss_ce": 0.000735556473955512, "loss_iou": 0.390625, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 288388096, "step": 1679 }, { "epoch": 0.4419017557703689, "grad_norm": 3.7239911787320956, "learning_rate": 5e-06, "loss": 0.0862, "num_input_tokens_seen": 288560596, "step": 1680 }, { "epoch": 0.4419017557703689, "loss": 0.09967576712369919, "loss_ce": 0.002248398493975401, "loss_iou": 0.5234375, "loss_num": 0.0194091796875, "loss_xval": 0.09765625, "num_input_tokens_seen": 288560596, "step": 1680 }, { "epoch": 0.44216479252975605, "grad_norm": 7.1544540740612845, "learning_rate": 5e-06, "loss": 0.1522, "num_input_tokens_seen": 288730844, "step": 1681 }, { "epoch": 0.44216479252975605, "loss": 0.14395672082901, "loss_ce": 0.0017753278370946646, "loss_iou": 0.384765625, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 288730844, "step": 1681 }, { "epoch": 0.44242782928914315, "grad_norm": 6.963458789097864, "learning_rate": 5e-06, "loss": 0.123, "num_input_tokens_seen": 288901440, "step": 1682 }, { "epoch": 0.44242782928914315, "loss": 0.12861037254333496, "loss_ce": 0.0015962182078510523, "loss_iou": 0.58984375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 288901440, "step": 1682 }, { "epoch": 0.4426908660485303, "grad_norm": 6.124914951048281, "learning_rate": 5e-06, "loss": 0.2107, "num_input_tokens_seen": 289073420, "step": 1683 }, { "epoch": 0.4426908660485303, "loss": 0.19420379400253296, "loss_ce": 0.0013021675404161215, "loss_iou": 0.58984375, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 289073420, "step": 1683 }, { "epoch": 0.4429539028079174, "grad_norm": 9.860837139100692, "learning_rate": 5e-06, "loss": 0.172, "num_input_tokens_seen": 289245604, "step": 1684 }, { "epoch": 0.4429539028079174, "loss": 0.18933850526809692, "loss_ce": 0.002204704098403454, "loss_iou": 0.73828125, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 289245604, "step": 1684 }, { "epoch": 0.4432169395673045, "grad_norm": 7.0676385774671875, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 289417760, "step": 1685 }, { "epoch": 0.4432169395673045, "loss": 0.16348493099212646, "loss_ce": 0.0013145222328603268, "loss_iou": 0.41015625, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 289417760, "step": 1685 }, { "epoch": 0.44347997632669167, "grad_norm": 5.183221235705912, "learning_rate": 5e-06, "loss": 0.1189, "num_input_tokens_seen": 289590036, "step": 1686 }, { "epoch": 0.44347997632669167, "loss": 0.07584193348884583, "loss_ce": 0.003545790910720825, "loss_iou": 0.51171875, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 289590036, "step": 1686 }, { "epoch": 0.4437430130860788, "grad_norm": 4.680777365130206, "learning_rate": 5e-06, "loss": 0.13, "num_input_tokens_seen": 289762148, "step": 1687 }, { "epoch": 0.4437430130860788, "loss": 0.15222427248954773, "loss_ce": 0.000765529228374362, "loss_iou": 0.53515625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 289762148, "step": 1687 }, { "epoch": 0.4440060498454659, "grad_norm": 8.359052775831522, "learning_rate": 5e-06, "loss": 0.1551, "num_input_tokens_seen": 289932260, "step": 1688 }, { "epoch": 0.4440060498454659, "loss": 0.15807722508907318, "loss_ce": 0.003322579897940159, "loss_iou": 0.62109375, "loss_num": 0.031005859375, "loss_xval": 0.154296875, "num_input_tokens_seen": 289932260, "step": 1688 }, { "epoch": 0.44426908660485304, "grad_norm": 12.300759876906785, "learning_rate": 5e-06, "loss": 0.1129, "num_input_tokens_seen": 290101064, "step": 1689 }, { "epoch": 0.44426908660485304, "loss": 0.1050846129655838, "loss_ce": 0.00016517633048351854, "loss_iou": 0.48046875, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 290101064, "step": 1689 }, { "epoch": 0.44453212336424014, "grad_norm": 7.817323041044055, "learning_rate": 5e-06, "loss": 0.153, "num_input_tokens_seen": 290273392, "step": 1690 }, { "epoch": 0.44453212336424014, "loss": 0.10968184471130371, "loss_ce": 0.004731892608106136, "loss_iou": 0.53125, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 290273392, "step": 1690 }, { "epoch": 0.4447951601236273, "grad_norm": 10.723057383347166, "learning_rate": 5e-06, "loss": 0.1448, "num_input_tokens_seen": 290445728, "step": 1691 }, { "epoch": 0.4447951601236273, "loss": 0.11715184152126312, "loss_ce": 0.0027719633653759956, "loss_iou": 0.5, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 290445728, "step": 1691 }, { "epoch": 0.4450581968830144, "grad_norm": 5.5921534938980795, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 290618040, "step": 1692 }, { "epoch": 0.4450581968830144, "loss": 0.11884011328220367, "loss_ce": 0.0018509816145524383, "loss_iou": 0.62890625, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 290618040, "step": 1692 }, { "epoch": 0.4453212336424015, "grad_norm": 14.56261412387689, "learning_rate": 5e-06, "loss": 0.1343, "num_input_tokens_seen": 290790260, "step": 1693 }, { "epoch": 0.4453212336424015, "loss": 0.12701714038848877, "loss_ce": 0.0006743660196661949, "loss_iou": 0.33203125, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 290790260, "step": 1693 }, { "epoch": 0.44558427040178866, "grad_norm": 18.011685132647553, "learning_rate": 5e-06, "loss": 0.1487, "num_input_tokens_seen": 290960676, "step": 1694 }, { "epoch": 0.44558427040178866, "loss": 0.17821697890758514, "loss_ce": 0.00457195146009326, "loss_iou": 0.62109375, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 290960676, "step": 1694 }, { "epoch": 0.44584730716117577, "grad_norm": 5.536946199556455, "learning_rate": 5e-06, "loss": 0.1152, "num_input_tokens_seen": 291132980, "step": 1695 }, { "epoch": 0.44584730716117577, "loss": 0.10020774602890015, "loss_ce": 0.0019716662354767323, "loss_iou": 0.5390625, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 291132980, "step": 1695 }, { "epoch": 0.4461103439205629, "grad_norm": 14.998078131446578, "learning_rate": 5e-06, "loss": 0.141, "num_input_tokens_seen": 291305020, "step": 1696 }, { "epoch": 0.4461103439205629, "loss": 0.26434189081192017, "loss_ce": 0.003111420664936304, "loss_iou": 0.37890625, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 291305020, "step": 1696 }, { "epoch": 0.44637338067995, "grad_norm": 5.503328090464836, "learning_rate": 5e-06, "loss": 0.1152, "num_input_tokens_seen": 291477360, "step": 1697 }, { "epoch": 0.44637338067995, "loss": 0.08737830072641373, "loss_ce": 0.0004947560373693705, "loss_iou": 0.5390625, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 291477360, "step": 1697 }, { "epoch": 0.44663641743933713, "grad_norm": 4.950750612606285, "learning_rate": 5e-06, "loss": 0.1213, "num_input_tokens_seen": 291647704, "step": 1698 }, { "epoch": 0.44663641743933713, "loss": 0.09089531749486923, "loss_ce": 0.00031914000282995403, "loss_iou": 0.5234375, "loss_num": 0.01806640625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 291647704, "step": 1698 }, { "epoch": 0.4468994541987243, "grad_norm": 7.442760799509784, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 291820320, "step": 1699 }, { "epoch": 0.4468994541987243, "loss": 0.1341613531112671, "loss_ce": 0.004461641423404217, "loss_iou": 0.423828125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 291820320, "step": 1699 }, { "epoch": 0.4471624909581114, "grad_norm": 12.789945030223832, "learning_rate": 5e-06, "loss": 0.1432, "num_input_tokens_seen": 291992692, "step": 1700 }, { "epoch": 0.4471624909581114, "loss": 0.12477381527423859, "loss_ce": 0.002901863306760788, "loss_iou": 0.53125, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 291992692, "step": 1700 }, { "epoch": 0.4474255277174985, "grad_norm": 4.791603869867936, "learning_rate": 5e-06, "loss": 0.1168, "num_input_tokens_seen": 292164820, "step": 1701 }, { "epoch": 0.4474255277174985, "loss": 0.1102890819311142, "loss_ce": 0.002928241156041622, "loss_iou": 0.421875, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 292164820, "step": 1701 }, { "epoch": 0.44768856447688565, "grad_norm": 4.5597072248390145, "learning_rate": 5e-06, "loss": 0.1258, "num_input_tokens_seen": 292336840, "step": 1702 }, { "epoch": 0.44768856447688565, "loss": 0.13155938684940338, "loss_ce": 0.0015850251074880362, "loss_iou": 0.703125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 292336840, "step": 1702 }, { "epoch": 0.44795160123627276, "grad_norm": 4.229506785629678, "learning_rate": 5e-06, "loss": 0.1794, "num_input_tokens_seen": 292507240, "step": 1703 }, { "epoch": 0.44795160123627276, "loss": 0.12941154837608337, "loss_ce": 0.0014818647177889943, "loss_iou": 0.54296875, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 292507240, "step": 1703 }, { "epoch": 0.4482146379956599, "grad_norm": 5.641270696096721, "learning_rate": 5e-06, "loss": 0.1276, "num_input_tokens_seen": 292679176, "step": 1704 }, { "epoch": 0.4482146379956599, "loss": 0.12304902821779251, "loss_ce": 0.002321491949260235, "loss_iou": 0.451171875, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 292679176, "step": 1704 }, { "epoch": 0.448477674755047, "grad_norm": 4.862520419399453, "learning_rate": 5e-06, "loss": 0.1122, "num_input_tokens_seen": 292851500, "step": 1705 }, { "epoch": 0.448477674755047, "loss": 0.089045949280262, "loss_ce": 0.00478691840544343, "loss_iou": 0.546875, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 292851500, "step": 1705 }, { "epoch": 0.4487407115144341, "grad_norm": 5.2865399556662975, "learning_rate": 5e-06, "loss": 0.1533, "num_input_tokens_seen": 293020992, "step": 1706 }, { "epoch": 0.4487407115144341, "loss": 0.20740769803524017, "loss_ce": 0.0010478447657078505, "loss_iou": 0.66015625, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 293020992, "step": 1706 }, { "epoch": 0.4490037482738213, "grad_norm": 4.239149151423396, "learning_rate": 5e-06, "loss": 0.1157, "num_input_tokens_seen": 293192840, "step": 1707 }, { "epoch": 0.4490037482738213, "loss": 0.1247292309999466, "loss_ce": 0.002048558322712779, "loss_iou": 0.578125, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 293192840, "step": 1707 }, { "epoch": 0.4492667850332084, "grad_norm": 14.697036954882604, "learning_rate": 5e-06, "loss": 0.1221, "num_input_tokens_seen": 293359152, "step": 1708 }, { "epoch": 0.4492667850332084, "loss": 0.17449304461479187, "loss_ce": 0.000573370314668864, "loss_iou": 0.53515625, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 293359152, "step": 1708 }, { "epoch": 0.44952982179259554, "grad_norm": 6.428150274293997, "learning_rate": 5e-06, "loss": 0.1398, "num_input_tokens_seen": 293531168, "step": 1709 }, { "epoch": 0.44952982179259554, "loss": 0.0824984684586525, "loss_ce": 0.00025359162827953696, "loss_iou": 0.43359375, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 293531168, "step": 1709 }, { "epoch": 0.44979285855198264, "grad_norm": 12.082892344047602, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 293701596, "step": 1710 }, { "epoch": 0.44979285855198264, "loss": 0.075187087059021, "loss_ce": 0.00011384247045498341, "loss_iou": 0.77734375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 293701596, "step": 1710 }, { "epoch": 0.45005589531136975, "grad_norm": 6.447369973784934, "learning_rate": 5e-06, "loss": 0.1528, "num_input_tokens_seen": 293874048, "step": 1711 }, { "epoch": 0.45005589531136975, "loss": 0.12107305228710175, "loss_ce": 0.0001013779838103801, "loss_iou": 0.44140625, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 293874048, "step": 1711 }, { "epoch": 0.4503189320707569, "grad_norm": 4.920690909069883, "learning_rate": 5e-06, "loss": 0.1174, "num_input_tokens_seen": 294046324, "step": 1712 }, { "epoch": 0.4503189320707569, "loss": 0.09293599426746368, "loss_ce": 0.0002541054564062506, "loss_iou": 0.62890625, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 294046324, "step": 1712 }, { "epoch": 0.450581968830144, "grad_norm": 5.111263917035011, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 294218352, "step": 1713 }, { "epoch": 0.450581968830144, "loss": 0.08807346224784851, "loss_ce": 0.005675997585058212, "loss_iou": 0.546875, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 294218352, "step": 1713 }, { "epoch": 0.4508450055895311, "grad_norm": 22.02715708529402, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 294390700, "step": 1714 }, { "epoch": 0.4508450055895311, "loss": 0.16420426964759827, "loss_ce": 0.00023331816191785038, "loss_iou": 0.66796875, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 294390700, "step": 1714 }, { "epoch": 0.45110804234891827, "grad_norm": 36.948349834543606, "learning_rate": 5e-06, "loss": 0.1583, "num_input_tokens_seen": 294562520, "step": 1715 }, { "epoch": 0.45110804234891827, "loss": 0.20613673329353333, "loss_ce": 0.0074672941118478775, "loss_iou": 0.6015625, "loss_num": 0.039794921875, "loss_xval": 0.1982421875, "num_input_tokens_seen": 294562520, "step": 1715 }, { "epoch": 0.4513710791083054, "grad_norm": 11.731491784871416, "learning_rate": 5e-06, "loss": 0.1352, "num_input_tokens_seen": 294732980, "step": 1716 }, { "epoch": 0.4513710791083054, "loss": 0.2512318789958954, "loss_ce": 0.0024831017944961786, "loss_iou": 0.5859375, "loss_num": 0.0498046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 294732980, "step": 1716 }, { "epoch": 0.45163411586769253, "grad_norm": 14.33875314671704, "learning_rate": 5e-06, "loss": 0.146, "num_input_tokens_seen": 294904972, "step": 1717 }, { "epoch": 0.45163411586769253, "loss": 0.1545875370502472, "loss_ce": 0.0034644976258277893, "loss_iou": 0.609375, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 294904972, "step": 1717 }, { "epoch": 0.45189715262707963, "grad_norm": 4.667158870565546, "learning_rate": 5e-06, "loss": 0.1225, "num_input_tokens_seen": 295077136, "step": 1718 }, { "epoch": 0.45189715262707963, "loss": 0.10374893248081207, "loss_ce": 0.0009657314512878656, "loss_iou": 0.55859375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 295077136, "step": 1718 }, { "epoch": 0.45216018938646674, "grad_norm": 4.671478799286235, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 295249644, "step": 1719 }, { "epoch": 0.45216018938646674, "loss": 0.12534737586975098, "loss_ce": 0.004924997687339783, "loss_iou": 0.57421875, "loss_num": 0.0240478515625, "loss_xval": 0.12060546875, "num_input_tokens_seen": 295249644, "step": 1719 }, { "epoch": 0.4524232261458539, "grad_norm": 9.77050555348203, "learning_rate": 5e-06, "loss": 0.1039, "num_input_tokens_seen": 295421628, "step": 1720 }, { "epoch": 0.4524232261458539, "loss": 0.10829440504312515, "loss_ce": 0.0021237479522824287, "loss_iou": 0.65625, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 295421628, "step": 1720 }, { "epoch": 0.452686262905241, "grad_norm": 6.10598748249797, "learning_rate": 5e-06, "loss": 0.108, "num_input_tokens_seen": 295593664, "step": 1721 }, { "epoch": 0.452686262905241, "loss": 0.05939865857362747, "loss_ce": 0.0012016374384984374, "loss_iou": 0.5625, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 295593664, "step": 1721 }, { "epoch": 0.45294929966462816, "grad_norm": 10.371953857475111, "learning_rate": 5e-06, "loss": 0.1225, "num_input_tokens_seen": 295765768, "step": 1722 }, { "epoch": 0.45294929966462816, "loss": 0.10428635776042938, "loss_ce": 0.0005265921936370432, "loss_iou": 0.76171875, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 295765768, "step": 1722 }, { "epoch": 0.45321233642401526, "grad_norm": 6.589623724090896, "learning_rate": 5e-06, "loss": 0.1679, "num_input_tokens_seen": 295937984, "step": 1723 }, { "epoch": 0.45321233642401526, "loss": 0.12776082754135132, "loss_ce": 0.0015095948474481702, "loss_iou": 0.392578125, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 295937984, "step": 1723 }, { "epoch": 0.45347537318340236, "grad_norm": 5.38306851263364, "learning_rate": 5e-06, "loss": 0.1325, "num_input_tokens_seen": 296109952, "step": 1724 }, { "epoch": 0.45347537318340236, "loss": 0.10374290496110916, "loss_ce": 0.0004714199749287218, "loss_iou": 0.60546875, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 296109952, "step": 1724 }, { "epoch": 0.4537384099427895, "grad_norm": 16.069341006935545, "learning_rate": 5e-06, "loss": 0.1861, "num_input_tokens_seen": 296282320, "step": 1725 }, { "epoch": 0.4537384099427895, "loss": 0.3360915184020996, "loss_ce": 0.003205763641744852, "loss_iou": 0.49609375, "loss_num": 0.06640625, "loss_xval": 0.33203125, "num_input_tokens_seen": 296282320, "step": 1725 }, { "epoch": 0.4540014467021766, "grad_norm": 7.355868991768278, "learning_rate": 5e-06, "loss": 0.1149, "num_input_tokens_seen": 296454412, "step": 1726 }, { "epoch": 0.4540014467021766, "loss": 0.11106541752815247, "loss_ce": 0.0014462803956121206, "loss_iou": 0.44140625, "loss_num": 0.02197265625, "loss_xval": 0.109375, "num_input_tokens_seen": 296454412, "step": 1726 }, { "epoch": 0.45426448346156373, "grad_norm": 4.637757034723655, "learning_rate": 5e-06, "loss": 0.1112, "num_input_tokens_seen": 296626392, "step": 1727 }, { "epoch": 0.45426448346156373, "loss": 0.07483752816915512, "loss_ce": 0.00046619208296760917, "loss_iou": 0.359375, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 296626392, "step": 1727 }, { "epoch": 0.4545275202209509, "grad_norm": 4.094172069183009, "learning_rate": 5e-06, "loss": 0.141, "num_input_tokens_seen": 296795792, "step": 1728 }, { "epoch": 0.4545275202209509, "loss": 0.1533362716436386, "loss_ce": 0.00154183991253376, "loss_iou": 0.53125, "loss_num": 0.0303955078125, "loss_xval": 0.1513671875, "num_input_tokens_seen": 296795792, "step": 1728 }, { "epoch": 0.454790556980338, "grad_norm": 4.550319533659712, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 296966316, "step": 1729 }, { "epoch": 0.454790556980338, "loss": 0.25365394353866577, "loss_ce": 0.002402704209089279, "loss_iou": 0.32421875, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 296966316, "step": 1729 }, { "epoch": 0.45505359373972515, "grad_norm": 5.052226011800126, "learning_rate": 5e-06, "loss": 0.1398, "num_input_tokens_seen": 297136040, "step": 1730 }, { "epoch": 0.45505359373972515, "loss": 0.1388048231601715, "loss_ce": 0.0004991517635062337, "loss_iou": 0.640625, "loss_num": 0.027587890625, "loss_xval": 0.138671875, "num_input_tokens_seen": 297136040, "step": 1730 }, { "epoch": 0.45531663049911225, "grad_norm": 9.365703053895057, "learning_rate": 5e-06, "loss": 0.0907, "num_input_tokens_seen": 297306228, "step": 1731 }, { "epoch": 0.45531663049911225, "loss": 0.09631586819887161, "loss_ce": 0.0004296356055419892, "loss_iou": 0.4453125, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 297306228, "step": 1731 }, { "epoch": 0.45557966725849935, "grad_norm": 5.623520624492047, "learning_rate": 5e-06, "loss": 0.1586, "num_input_tokens_seen": 297478380, "step": 1732 }, { "epoch": 0.45557966725849935, "loss": 0.21242645382881165, "loss_ce": 0.0005734282894991338, "loss_iou": 0.58203125, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 297478380, "step": 1732 }, { "epoch": 0.4558427040178865, "grad_norm": 22.29793682222897, "learning_rate": 5e-06, "loss": 0.1122, "num_input_tokens_seen": 297650336, "step": 1733 }, { "epoch": 0.4558427040178865, "loss": 0.12880732119083405, "loss_ce": 0.0012133296113461256, "loss_iou": 0.56640625, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 297650336, "step": 1733 }, { "epoch": 0.4561057407772736, "grad_norm": 19.5277076823797, "learning_rate": 5e-06, "loss": 0.1377, "num_input_tokens_seen": 297822376, "step": 1734 }, { "epoch": 0.4561057407772736, "loss": 0.10357876121997833, "loss_ce": 0.00030727204284630716, "loss_iou": 0.447265625, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 297822376, "step": 1734 }, { "epoch": 0.4563687775366608, "grad_norm": 4.597687331185091, "learning_rate": 5e-06, "loss": 0.0992, "num_input_tokens_seen": 297994524, "step": 1735 }, { "epoch": 0.4563687775366608, "loss": 0.11487319320440292, "loss_ce": 0.0003712356265168637, "loss_iou": 0.51171875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 297994524, "step": 1735 }, { "epoch": 0.4566318142960479, "grad_norm": 17.34014327670223, "learning_rate": 5e-06, "loss": 0.1904, "num_input_tokens_seen": 298164216, "step": 1736 }, { "epoch": 0.4566318142960479, "loss": 0.21512141823768616, "loss_ce": 0.0003387040051165968, "loss_iou": 0.54296875, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 298164216, "step": 1736 }, { "epoch": 0.456894851055435, "grad_norm": 5.201392853210846, "learning_rate": 5e-06, "loss": 0.116, "num_input_tokens_seen": 298336416, "step": 1737 }, { "epoch": 0.456894851055435, "loss": 0.14383375644683838, "loss_ce": 0.0030866768211126328, "loss_iou": 0.388671875, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 298336416, "step": 1737 }, { "epoch": 0.45715788781482214, "grad_norm": 4.466741788292063, "learning_rate": 5e-06, "loss": 0.1267, "num_input_tokens_seen": 298508336, "step": 1738 }, { "epoch": 0.45715788781482214, "loss": 0.08519221842288971, "loss_ce": 0.001879227813333273, "loss_iou": 0.484375, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 298508336, "step": 1738 }, { "epoch": 0.45742092457420924, "grad_norm": 5.105148151648125, "learning_rate": 5e-06, "loss": 0.1118, "num_input_tokens_seen": 298680480, "step": 1739 }, { "epoch": 0.45742092457420924, "loss": 0.08455468714237213, "loss_ce": 0.00038720344309695065, "loss_iou": 0.40234375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 298680480, "step": 1739 }, { "epoch": 0.45768396133359635, "grad_norm": 4.88765152218918, "learning_rate": 5e-06, "loss": 0.0892, "num_input_tokens_seen": 298852576, "step": 1740 }, { "epoch": 0.45768396133359635, "loss": 0.06836723536252975, "loss_ce": 0.0006487306673079729, "loss_iou": 0.57421875, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 298852576, "step": 1740 }, { "epoch": 0.4579469980929835, "grad_norm": 11.438189750287325, "learning_rate": 5e-06, "loss": 0.1432, "num_input_tokens_seen": 299025188, "step": 1741 }, { "epoch": 0.4579469980929835, "loss": 0.2103656679391861, "loss_ce": 0.0009235285106115043, "loss_iou": 0.458984375, "loss_num": 0.0419921875, "loss_xval": 0.208984375, "num_input_tokens_seen": 299025188, "step": 1741 }, { "epoch": 0.4582100348523706, "grad_norm": 17.844442720051195, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 299195652, "step": 1742 }, { "epoch": 0.4582100348523706, "loss": 0.0902654230594635, "loss_ce": 0.0008184025646187365, "loss_iou": 0.6171875, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 299195652, "step": 1742 }, { "epoch": 0.45847307161175777, "grad_norm": 5.593927162724121, "learning_rate": 5e-06, "loss": 0.1445, "num_input_tokens_seen": 299367692, "step": 1743 }, { "epoch": 0.45847307161175777, "loss": 0.0877409279346466, "loss_ce": 0.004031214863061905, "loss_iou": 0.47265625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 299367692, "step": 1743 }, { "epoch": 0.45873610837114487, "grad_norm": 6.084084258485456, "learning_rate": 5e-06, "loss": 0.1838, "num_input_tokens_seen": 299539820, "step": 1744 }, { "epoch": 0.45873610837114487, "loss": 0.17818066477775574, "loss_ce": 0.0005988804623484612, "loss_iou": 0.67578125, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 299539820, "step": 1744 }, { "epoch": 0.45899914513053197, "grad_norm": 4.3161100884144785, "learning_rate": 5e-06, "loss": 0.0857, "num_input_tokens_seen": 299710180, "step": 1745 }, { "epoch": 0.45899914513053197, "loss": 0.11408813297748566, "loss_ce": 0.005079346243292093, "loss_iou": 0.52734375, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 299710180, "step": 1745 }, { "epoch": 0.45926218188991913, "grad_norm": 9.432280511482096, "learning_rate": 5e-06, "loss": 0.1824, "num_input_tokens_seen": 299882192, "step": 1746 }, { "epoch": 0.45926218188991913, "loss": 0.28233322501182556, "loss_ce": 0.002944799605756998, "loss_iou": 0.40234375, "loss_num": 0.055908203125, "loss_xval": 0.279296875, "num_input_tokens_seen": 299882192, "step": 1746 }, { "epoch": 0.45952521864930623, "grad_norm": 4.947993402328502, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 300054328, "step": 1747 }, { "epoch": 0.45952521864930623, "loss": 0.12365365773439407, "loss_ce": 0.00454354751855135, "loss_iou": 0.515625, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 300054328, "step": 1747 }, { "epoch": 0.4597882554086934, "grad_norm": 29.241280731418787, "learning_rate": 5e-06, "loss": 0.1044, "num_input_tokens_seen": 300226268, "step": 1748 }, { "epoch": 0.4597882554086934, "loss": 0.09467601031064987, "loss_ce": 0.002909653354436159, "loss_iou": 0.546875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 300226268, "step": 1748 }, { "epoch": 0.4600512921680805, "grad_norm": 40.89605718973821, "learning_rate": 5e-06, "loss": 0.1216, "num_input_tokens_seen": 300398324, "step": 1749 }, { "epoch": 0.4600512921680805, "loss": 0.18484918773174286, "loss_ce": 0.002537175314500928, "loss_iou": 0.48828125, "loss_num": 0.036376953125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 300398324, "step": 1749 }, { "epoch": 0.4603143289274676, "grad_norm": 6.154438338656609, "learning_rate": 5e-06, "loss": 0.1836, "num_input_tokens_seen": 300570420, "step": 1750 }, { "epoch": 0.4603143289274676, "eval_websight_new_CIoU": 0.8543897271156311, "eval_websight_new_GIoU": 0.8555817008018494, "eval_websight_new_IoU": 0.8602511882781982, "eval_websight_new_MAE_all": 0.022463313303887844, "eval_websight_new_MAE_h": 0.007148948730900884, "eval_websight_new_MAE_w": 0.03532572276890278, "eval_websight_new_MAE_x": 0.03696838486939669, "eval_websight_new_MAE_y": 0.010410191491246223, "eval_websight_new_NUM_probability": 0.9999746978282928, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.11025163531303406, "eval_websight_new_loss_ce": 7.8859661698516e-06, "eval_websight_new_loss_iou": 0.3748779296875, "eval_websight_new_loss_num": 0.019195556640625, "eval_websight_new_loss_xval": 0.09600830078125, "eval_websight_new_runtime": 55.1926, "eval_websight_new_samples_per_second": 0.906, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 300570420, "step": 1750 }, { "epoch": 0.4603143289274676, "eval_seeclick_CIoU": 0.6177069246768951, "eval_seeclick_GIoU": 0.6163533926010132, "eval_seeclick_IoU": 0.6425465941429138, "eval_seeclick_MAE_all": 0.04845697060227394, "eval_seeclick_MAE_h": 0.026225415989756584, "eval_seeclick_MAE_w": 0.06570588797330856, "eval_seeclick_MAE_x": 0.07359151728451252, "eval_seeclick_MAE_y": 0.0283050537109375, "eval_seeclick_NUM_probability": 0.9999766051769257, "eval_seeclick_inside_bbox": 0.890625, "eval_seeclick_loss": 0.23021526634693146, "eval_seeclick_loss_ce": 0.009315645787864923, "eval_seeclick_loss_iou": 0.5054931640625, "eval_seeclick_loss_num": 0.04460906982421875, "eval_seeclick_loss_xval": 0.22314453125, "eval_seeclick_runtime": 72.9378, "eval_seeclick_samples_per_second": 0.59, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 300570420, "step": 1750 }, { "epoch": 0.4603143289274676, "eval_icons_CIoU": 0.8409464359283447, "eval_icons_GIoU": 0.8350943326950073, "eval_icons_IoU": 0.847510576248169, "eval_icons_MAE_all": 0.022730856202542782, "eval_icons_MAE_h": 0.02164691872894764, "eval_icons_MAE_w": 0.023992713540792465, "eval_icons_MAE_x": 0.025108729489147663, "eval_icons_MAE_y": 0.02017505932599306, "eval_icons_NUM_probability": 0.9999510943889618, "eval_icons_inside_bbox": 0.984375, "eval_icons_loss": 0.079879030585289, "eval_icons_loss_ce": 2.25404792217887e-05, "eval_icons_loss_iou": 0.5426025390625, "eval_icons_loss_num": 0.014867782592773438, "eval_icons_loss_xval": 0.0743560791015625, "eval_icons_runtime": 87.1455, "eval_icons_samples_per_second": 0.574, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 300570420, "step": 1750 }, { "epoch": 0.4603143289274676, "eval_screenspot_CIoU": 0.5540184179941813, "eval_screenspot_GIoU": 0.543454110622406, "eval_screenspot_IoU": 0.5961946249008179, "eval_screenspot_MAE_all": 0.08684368679920833, "eval_screenspot_MAE_h": 0.04899499130745729, "eval_screenspot_MAE_w": 0.1521986499428749, "eval_screenspot_MAE_x": 0.09981551021337509, "eval_screenspot_MAE_y": 0.046365607529878616, "eval_screenspot_NUM_probability": 0.9998787045478821, "eval_screenspot_inside_bbox": 0.8737499912579855, "eval_screenspot_loss": 0.8499577641487122, "eval_screenspot_loss_ce": 0.5143006145954132, "eval_screenspot_loss_iou": 0.4506022135416667, "eval_screenspot_loss_num": 0.06610107421875, "eval_screenspot_loss_xval": 0.3305257161458333, "eval_screenspot_runtime": 149.7903, "eval_screenspot_samples_per_second": 0.594, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 300570420, "step": 1750 }, { "epoch": 0.4603143289274676, "loss": 0.855069637298584, "loss_ce": 0.507047176361084, "loss_iou": 0.392578125, "loss_num": 0.0693359375, "loss_xval": 0.34765625, "num_input_tokens_seen": 300570420, "step": 1750 }, { "epoch": 0.46057736568685476, "grad_norm": 6.10997370548099, "learning_rate": 5e-06, "loss": 0.1083, "num_input_tokens_seen": 300742608, "step": 1751 }, { "epoch": 0.46057736568685476, "loss": 0.11830765753984451, "loss_ce": 0.0002961806021630764, "loss_iou": 0.60546875, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 300742608, "step": 1751 }, { "epoch": 0.46084040244624186, "grad_norm": 4.63619140673612, "learning_rate": 5e-06, "loss": 0.1648, "num_input_tokens_seen": 300914728, "step": 1752 }, { "epoch": 0.46084040244624186, "loss": 0.14693626761436462, "loss_ce": 0.0036867514718323946, "loss_iou": 0.58984375, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 300914728, "step": 1752 }, { "epoch": 0.46110343920562896, "grad_norm": 11.051347194862576, "learning_rate": 5e-06, "loss": 0.1718, "num_input_tokens_seen": 301087180, "step": 1753 }, { "epoch": 0.46110343920562896, "loss": 0.2443966418504715, "loss_ce": 0.0005001651006750762, "loss_iou": 0.57421875, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 301087180, "step": 1753 }, { "epoch": 0.4613664759650161, "grad_norm": 9.081946267145833, "learning_rate": 5e-06, "loss": 0.0997, "num_input_tokens_seen": 301259116, "step": 1754 }, { "epoch": 0.4613664759650161, "loss": 0.09555494785308838, "loss_ce": 0.002583135850727558, "loss_iou": 0.59765625, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 301259116, "step": 1754 }, { "epoch": 0.4616295127244032, "grad_norm": 7.9541431928925395, "learning_rate": 5e-06, "loss": 0.1046, "num_input_tokens_seen": 301427764, "step": 1755 }, { "epoch": 0.4616295127244032, "loss": 0.12469062209129333, "loss_ce": 0.0007892490248195827, "loss_iou": 0.5078125, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 301427764, "step": 1755 }, { "epoch": 0.4618925494837904, "grad_norm": 4.563316138635991, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 301599772, "step": 1756 }, { "epoch": 0.4618925494837904, "loss": 0.0649976134300232, "loss_ce": 0.0009717366192489862, "loss_iou": 0.5234375, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 301599772, "step": 1756 }, { "epoch": 0.4621555862431775, "grad_norm": 13.734783204569586, "learning_rate": 5e-06, "loss": 0.1934, "num_input_tokens_seen": 301771888, "step": 1757 }, { "epoch": 0.4621555862431775, "loss": 0.2562709152698517, "loss_ce": 0.0031581264920532703, "loss_iou": 0.26171875, "loss_num": 0.050537109375, "loss_xval": 0.25390625, "num_input_tokens_seen": 301771888, "step": 1757 }, { "epoch": 0.4624186230025646, "grad_norm": 8.489244421344079, "learning_rate": 5e-06, "loss": 0.1197, "num_input_tokens_seen": 301944272, "step": 1758 }, { "epoch": 0.4624186230025646, "loss": 0.08469030261039734, "loss_ce": 0.00088903569849208, "loss_iou": 0.5390625, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 301944272, "step": 1758 }, { "epoch": 0.46268165976195175, "grad_norm": 5.029084121420607, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 302116376, "step": 1759 }, { "epoch": 0.46268165976195175, "loss": 0.12341859936714172, "loss_ce": 0.0011651779059320688, "loss_iou": 0.44921875, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 302116376, "step": 1759 }, { "epoch": 0.46294469652133885, "grad_norm": 20.972527246553025, "learning_rate": 5e-06, "loss": 0.1375, "num_input_tokens_seen": 302288436, "step": 1760 }, { "epoch": 0.46294469652133885, "loss": 0.0873933807015419, "loss_ce": 0.0027071028016507626, "loss_iou": 0.6171875, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 302288436, "step": 1760 }, { "epoch": 0.463207733280726, "grad_norm": 4.449125782082312, "learning_rate": 5e-06, "loss": 0.1406, "num_input_tokens_seen": 302460504, "step": 1761 }, { "epoch": 0.463207733280726, "loss": 0.10758376121520996, "loss_ce": 0.0050141820684075356, "loss_iou": 0.423828125, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 302460504, "step": 1761 }, { "epoch": 0.4634707700401131, "grad_norm": 6.144634607422675, "learning_rate": 5e-06, "loss": 0.1573, "num_input_tokens_seen": 302632396, "step": 1762 }, { "epoch": 0.4634707700401131, "loss": 0.19673708081245422, "loss_ce": 0.0027063230518251657, "loss_iou": 0.484375, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 302632396, "step": 1762 }, { "epoch": 0.4637338067995002, "grad_norm": 4.45571483495035, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 302804644, "step": 1763 }, { "epoch": 0.4637338067995002, "loss": 0.0787121132016182, "loss_ce": 0.005683551542460918, "loss_iou": 0.35546875, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 302804644, "step": 1763 }, { "epoch": 0.4639968435588874, "grad_norm": 20.80296883365536, "learning_rate": 5e-06, "loss": 0.1304, "num_input_tokens_seen": 302976856, "step": 1764 }, { "epoch": 0.4639968435588874, "loss": 0.14017972350120544, "loss_ce": 0.0037661464884877205, "loss_iou": 0.51171875, "loss_num": 0.0272216796875, "loss_xval": 0.13671875, "num_input_tokens_seen": 302976856, "step": 1764 }, { "epoch": 0.4642598803182745, "grad_norm": 5.519977681881848, "learning_rate": 5e-06, "loss": 0.1241, "num_input_tokens_seen": 303146760, "step": 1765 }, { "epoch": 0.4642598803182745, "loss": 0.12335249036550522, "loss_ce": 0.002991169923916459, "loss_iou": 0.5546875, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 303146760, "step": 1765 }, { "epoch": 0.4645229170776616, "grad_norm": 6.31203658122147, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 303318968, "step": 1766 }, { "epoch": 0.4645229170776616, "loss": 0.08828569203615189, "loss_ce": 0.002073533833026886, "loss_iou": 0.703125, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 303318968, "step": 1766 }, { "epoch": 0.46478595383704874, "grad_norm": 5.046640432518341, "learning_rate": 5e-06, "loss": 0.1099, "num_input_tokens_seen": 303491036, "step": 1767 }, { "epoch": 0.46478595383704874, "loss": 0.08855307102203369, "loss_ce": 0.002066256944090128, "loss_iou": 0.640625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 303491036, "step": 1767 }, { "epoch": 0.46504899059643584, "grad_norm": 8.451371161082179, "learning_rate": 5e-06, "loss": 0.1373, "num_input_tokens_seen": 303663404, "step": 1768 }, { "epoch": 0.46504899059643584, "loss": 0.16699054837226868, "loss_ce": 0.002805991331115365, "loss_iou": 0.640625, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 303663404, "step": 1768 }, { "epoch": 0.465312027355823, "grad_norm": 9.603701773353785, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 303832160, "step": 1769 }, { "epoch": 0.465312027355823, "loss": 0.11529731005430222, "loss_ce": 0.0019550304859876633, "loss_iou": 0.4921875, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 303832160, "step": 1769 }, { "epoch": 0.4655750641152101, "grad_norm": 4.893083096864921, "learning_rate": 5e-06, "loss": 0.1186, "num_input_tokens_seen": 304004460, "step": 1770 }, { "epoch": 0.4655750641152101, "loss": 0.15418250858783722, "loss_ce": 0.0016556488117203116, "loss_iou": 0.6484375, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 304004460, "step": 1770 }, { "epoch": 0.4658381008745972, "grad_norm": 5.299927971595323, "learning_rate": 5e-06, "loss": 0.1103, "num_input_tokens_seen": 304176564, "step": 1771 }, { "epoch": 0.4658381008745972, "loss": 0.11469468474388123, "loss_ce": 0.004251571837812662, "loss_iou": 0.53515625, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 304176564, "step": 1771 }, { "epoch": 0.46610113763398436, "grad_norm": 4.786837798902794, "learning_rate": 5e-06, "loss": 0.1318, "num_input_tokens_seen": 304349064, "step": 1772 }, { "epoch": 0.46610113763398436, "loss": 0.13777077198028564, "loss_ce": 0.002303245011717081, "loss_iou": 0.63671875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 304349064, "step": 1772 }, { "epoch": 0.46636417439337147, "grad_norm": 7.199109826840915, "learning_rate": 5e-06, "loss": 0.1835, "num_input_tokens_seen": 304521336, "step": 1773 }, { "epoch": 0.46636417439337147, "loss": 0.13359083235263824, "loss_ce": 0.0007783286855556071, "loss_iou": 0.6328125, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 304521336, "step": 1773 }, { "epoch": 0.4666272111527586, "grad_norm": 5.7773703638814355, "learning_rate": 5e-06, "loss": 0.1151, "num_input_tokens_seen": 304693604, "step": 1774 }, { "epoch": 0.4666272111527586, "loss": 0.09553907811641693, "loss_ce": 0.0015449414495378733, "loss_iou": 0.7109375, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 304693604, "step": 1774 }, { "epoch": 0.46689024791214573, "grad_norm": 4.7750261071300475, "learning_rate": 5e-06, "loss": 0.1158, "num_input_tokens_seen": 304865896, "step": 1775 }, { "epoch": 0.46689024791214573, "loss": 0.17456401884555817, "loss_ce": 0.0003696825006045401, "loss_iou": 0.466796875, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 304865896, "step": 1775 }, { "epoch": 0.46715328467153283, "grad_norm": 4.560916465958671, "learning_rate": 5e-06, "loss": 0.128, "num_input_tokens_seen": 305038112, "step": 1776 }, { "epoch": 0.46715328467153283, "loss": 0.16979777812957764, "loss_ce": 0.002805587835609913, "loss_iou": 0.6875, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 305038112, "step": 1776 }, { "epoch": 0.46741632143092, "grad_norm": 9.580731840657464, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 305210520, "step": 1777 }, { "epoch": 0.46741632143092, "loss": 0.1295488327741623, "loss_ce": 0.0010087917326018214, "loss_iou": 0.51953125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 305210520, "step": 1777 }, { "epoch": 0.4676793581903071, "grad_norm": 12.328201105743592, "learning_rate": 5e-06, "loss": 0.1654, "num_input_tokens_seen": 305382852, "step": 1778 }, { "epoch": 0.4676793581903071, "loss": 0.10579667240381241, "loss_ce": 0.0018843174912035465, "loss_iou": 0.5625, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 305382852, "step": 1778 }, { "epoch": 0.4679423949496942, "grad_norm": 7.809054499493732, "learning_rate": 5e-06, "loss": 0.091, "num_input_tokens_seen": 305554812, "step": 1779 }, { "epoch": 0.4679423949496942, "loss": 0.12438150495290756, "loss_ce": 0.003974398132413626, "loss_iou": 0.59375, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 305554812, "step": 1779 }, { "epoch": 0.46820543170908135, "grad_norm": 8.973200505315825, "learning_rate": 5e-06, "loss": 0.1374, "num_input_tokens_seen": 305727124, "step": 1780 }, { "epoch": 0.46820543170908135, "loss": 0.08462625741958618, "loss_ce": 0.0006571417325176299, "loss_iou": 0.62890625, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 305727124, "step": 1780 }, { "epoch": 0.46846846846846846, "grad_norm": 5.227611768036767, "learning_rate": 5e-06, "loss": 0.1239, "num_input_tokens_seen": 305899256, "step": 1781 }, { "epoch": 0.46846846846846846, "loss": 0.1279701292514801, "loss_ce": 0.0015358042437583208, "loss_iou": 0.376953125, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 305899256, "step": 1781 }, { "epoch": 0.4687315052278556, "grad_norm": 3.599452107221254, "learning_rate": 5e-06, "loss": 0.109, "num_input_tokens_seen": 306069952, "step": 1782 }, { "epoch": 0.4687315052278556, "loss": 0.1316523402929306, "loss_ce": 0.002959707286208868, "loss_iou": 0.6328125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 306069952, "step": 1782 }, { "epoch": 0.4689945419872427, "grad_norm": 6.0196869037820235, "learning_rate": 5e-06, "loss": 0.1144, "num_input_tokens_seen": 306242268, "step": 1783 }, { "epoch": 0.4689945419872427, "loss": 0.0880986899137497, "loss_ce": 0.0006353051285259426, "loss_iou": 0.369140625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 306242268, "step": 1783 }, { "epoch": 0.4692575787466298, "grad_norm": 5.3230563681220735, "learning_rate": 5e-06, "loss": 0.1359, "num_input_tokens_seen": 306414220, "step": 1784 }, { "epoch": 0.4692575787466298, "loss": 0.08738180994987488, "loss_ce": 0.0023750958498567343, "loss_iou": 0.63671875, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 306414220, "step": 1784 }, { "epoch": 0.469520615506017, "grad_norm": 4.772051394078166, "learning_rate": 5e-06, "loss": 0.1417, "num_input_tokens_seen": 306586436, "step": 1785 }, { "epoch": 0.469520615506017, "loss": 0.17895328998565674, "loss_ce": 0.000791671103797853, "loss_iou": 0.5390625, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 306586436, "step": 1785 }, { "epoch": 0.4697836522654041, "grad_norm": 5.377031057320062, "learning_rate": 5e-06, "loss": 0.1954, "num_input_tokens_seen": 306758712, "step": 1786 }, { "epoch": 0.4697836522654041, "loss": 0.201407790184021, "loss_ce": 0.004172691144049168, "loss_iou": 0.72265625, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 306758712, "step": 1786 }, { "epoch": 0.47004668902479124, "grad_norm": 4.6699721270287275, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 306931112, "step": 1787 }, { "epoch": 0.47004668902479124, "loss": 0.10016533732414246, "loss_ce": 0.00031182204838842154, "loss_iou": 0.53515625, "loss_num": 0.02001953125, "loss_xval": 0.099609375, "num_input_tokens_seen": 306931112, "step": 1787 }, { "epoch": 0.47030972578417835, "grad_norm": 8.618398116569972, "learning_rate": 5e-06, "loss": 0.1445, "num_input_tokens_seen": 307103388, "step": 1788 }, { "epoch": 0.47030972578417835, "loss": 0.27383407950401306, "loss_ce": 0.0001524553372291848, "loss_iou": 0.515625, "loss_num": 0.0546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 307103388, "step": 1788 }, { "epoch": 0.47057276254356545, "grad_norm": 11.779661781344625, "learning_rate": 5e-06, "loss": 0.134, "num_input_tokens_seen": 307275620, "step": 1789 }, { "epoch": 0.47057276254356545, "loss": 0.08500531315803528, "loss_ce": 0.00019696829258464277, "loss_iou": 0.4453125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 307275620, "step": 1789 }, { "epoch": 0.4708357993029526, "grad_norm": 23.708154257705488, "learning_rate": 5e-06, "loss": 0.1535, "num_input_tokens_seen": 307448180, "step": 1790 }, { "epoch": 0.4708357993029526, "loss": 0.17157645523548126, "loss_ce": 0.002493813633918762, "loss_iou": 0.453125, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 307448180, "step": 1790 }, { "epoch": 0.4710988360623397, "grad_norm": 4.7495128426496, "learning_rate": 5e-06, "loss": 0.1434, "num_input_tokens_seen": 307620540, "step": 1791 }, { "epoch": 0.4710988360623397, "loss": 0.23302927613258362, "loss_ce": 0.0009736126521602273, "loss_iou": 0.443359375, "loss_num": 0.04638671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 307620540, "step": 1791 }, { "epoch": 0.4713618728217268, "grad_norm": 7.18577313542124, "learning_rate": 5e-06, "loss": 0.1114, "num_input_tokens_seen": 307792704, "step": 1792 }, { "epoch": 0.4713618728217268, "loss": 0.13438743352890015, "loss_ce": 0.0005068117170594633, "loss_iou": 0.4765625, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 307792704, "step": 1792 }, { "epoch": 0.47162490958111397, "grad_norm": 7.179208918692874, "learning_rate": 5e-06, "loss": 0.1598, "num_input_tokens_seen": 307965184, "step": 1793 }, { "epoch": 0.47162490958111397, "loss": 0.15465694665908813, "loss_ce": 0.00011593455565162003, "loss_iou": 0.42578125, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 307965184, "step": 1793 }, { "epoch": 0.4718879463405011, "grad_norm": 6.391344075166243, "learning_rate": 5e-06, "loss": 0.1742, "num_input_tokens_seen": 308135620, "step": 1794 }, { "epoch": 0.4718879463405011, "loss": 0.23051750659942627, "loss_ce": 0.00010977771307807416, "loss_iou": 0.58984375, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 308135620, "step": 1794 }, { "epoch": 0.47215098309988823, "grad_norm": 8.145404749300642, "learning_rate": 5e-06, "loss": 0.1465, "num_input_tokens_seen": 308308152, "step": 1795 }, { "epoch": 0.47215098309988823, "loss": 0.1917172074317932, "loss_ce": 0.008871147409081459, "loss_iou": 0.640625, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 308308152, "step": 1795 }, { "epoch": 0.47241401985927534, "grad_norm": 5.398862877955347, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 308477996, "step": 1796 }, { "epoch": 0.47241401985927534, "loss": 0.08200335502624512, "loss_ce": 0.0003993565624114126, "loss_iou": 0.48046875, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 308477996, "step": 1796 }, { "epoch": 0.47267705661866244, "grad_norm": 9.91724255704017, "learning_rate": 5e-06, "loss": 0.1449, "num_input_tokens_seen": 308648424, "step": 1797 }, { "epoch": 0.47267705661866244, "loss": 0.12875403463840485, "loss_ce": 0.0009769393363967538, "loss_iou": 0.515625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 308648424, "step": 1797 }, { "epoch": 0.4729400933780496, "grad_norm": 6.138930079819964, "learning_rate": 5e-06, "loss": 0.1842, "num_input_tokens_seen": 308818888, "step": 1798 }, { "epoch": 0.4729400933780496, "loss": 0.16927096247673035, "loss_ce": 0.0021567128133028746, "loss_iou": 0.73046875, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 308818888, "step": 1798 }, { "epoch": 0.4732031301374367, "grad_norm": 19.105244313122192, "learning_rate": 5e-06, "loss": 0.1766, "num_input_tokens_seen": 308991284, "step": 1799 }, { "epoch": 0.4732031301374367, "loss": 0.21268007159233093, "loss_ce": 9.461388253839687e-05, "loss_iou": 0.431640625, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 308991284, "step": 1799 }, { "epoch": 0.4734661668968238, "grad_norm": 6.0514901455791215, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 309163244, "step": 1800 }, { "epoch": 0.4734661668968238, "loss": 0.11982670426368713, "loss_ce": 0.00047245476162061095, "loss_iou": 0.455078125, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 309163244, "step": 1800 }, { "epoch": 0.47372920365621096, "grad_norm": 4.529009692158789, "learning_rate": 5e-06, "loss": 0.1439, "num_input_tokens_seen": 309335260, "step": 1801 }, { "epoch": 0.47372920365621096, "loss": 0.16322672367095947, "loss_ce": 0.000812180747743696, "loss_iou": 0.4921875, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 309335260, "step": 1801 }, { "epoch": 0.47399224041559807, "grad_norm": 4.778436741374609, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 309507516, "step": 1802 }, { "epoch": 0.47399224041559807, "loss": 0.09654629230499268, "loss_ce": 0.0012398946564644575, "loss_iou": 0.412109375, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 309507516, "step": 1802 }, { "epoch": 0.4742552771749852, "grad_norm": 4.954945704123918, "learning_rate": 5e-06, "loss": 0.1113, "num_input_tokens_seen": 309677988, "step": 1803 }, { "epoch": 0.4742552771749852, "loss": 0.0787278264760971, "loss_ce": 0.00014505814760923386, "loss_iou": 0.59375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 309677988, "step": 1803 }, { "epoch": 0.4745183139343723, "grad_norm": 12.56847461072702, "learning_rate": 5e-06, "loss": 0.1235, "num_input_tokens_seen": 309850180, "step": 1804 }, { "epoch": 0.4745183139343723, "loss": 0.13278400897979736, "loss_ce": 0.00015460627037100494, "loss_iou": 0.52734375, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 309850180, "step": 1804 }, { "epoch": 0.47478135069375943, "grad_norm": 15.493537781093892, "learning_rate": 5e-06, "loss": 0.0899, "num_input_tokens_seen": 310019768, "step": 1805 }, { "epoch": 0.47478135069375943, "loss": 0.05194393917918205, "loss_ce": 0.0007049225969240069, "loss_iou": 0.53515625, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 310019768, "step": 1805 }, { "epoch": 0.4750443874531466, "grad_norm": 6.1982861025667795, "learning_rate": 5e-06, "loss": 0.1392, "num_input_tokens_seen": 310191904, "step": 1806 }, { "epoch": 0.4750443874531466, "loss": 0.09039468318223953, "loss_ce": 0.00045937972026877105, "loss_iou": 0.7265625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 310191904, "step": 1806 }, { "epoch": 0.4753074242125337, "grad_norm": 8.01906133906792, "learning_rate": 5e-06, "loss": 0.1692, "num_input_tokens_seen": 310364020, "step": 1807 }, { "epoch": 0.4753074242125337, "loss": 0.1431303471326828, "loss_ce": 0.002688447944819927, "loss_iou": 0.5703125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 310364020, "step": 1807 }, { "epoch": 0.47557046097192085, "grad_norm": 8.410195178652696, "learning_rate": 5e-06, "loss": 0.1112, "num_input_tokens_seen": 310536388, "step": 1808 }, { "epoch": 0.47557046097192085, "loss": 0.1313067376613617, "loss_ce": 0.002064801286906004, "loss_iou": 0.73046875, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 310536388, "step": 1808 }, { "epoch": 0.47583349773130795, "grad_norm": 5.758329237623371, "learning_rate": 5e-06, "loss": 0.122, "num_input_tokens_seen": 310706940, "step": 1809 }, { "epoch": 0.47583349773130795, "loss": 0.0813891738653183, "loss_ce": 0.0004565550771076232, "loss_iou": 0.484375, "loss_num": 0.01611328125, "loss_xval": 0.0810546875, "num_input_tokens_seen": 310706940, "step": 1809 }, { "epoch": 0.47609653449069506, "grad_norm": 38.441949828692486, "learning_rate": 5e-06, "loss": 0.0962, "num_input_tokens_seen": 310879292, "step": 1810 }, { "epoch": 0.47609653449069506, "loss": 0.06832106411457062, "loss_ce": 0.007591082248836756, "loss_iou": 0.75, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 310879292, "step": 1810 }, { "epoch": 0.4763595712500822, "grad_norm": 7.22086526843683, "learning_rate": 5e-06, "loss": 0.124, "num_input_tokens_seen": 311051572, "step": 1811 }, { "epoch": 0.4763595712500822, "loss": 0.0902928039431572, "loss_ce": 0.00047957096830941737, "loss_iou": 0.53125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 311051572, "step": 1811 }, { "epoch": 0.4766226080094693, "grad_norm": 11.063199574808042, "learning_rate": 5e-06, "loss": 0.1343, "num_input_tokens_seen": 311222148, "step": 1812 }, { "epoch": 0.4766226080094693, "loss": 0.2204355001449585, "loss_ce": 0.001777050900273025, "loss_iou": 0.447265625, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 311222148, "step": 1812 }, { "epoch": 0.4768856447688564, "grad_norm": 10.035743209995298, "learning_rate": 5e-06, "loss": 0.0919, "num_input_tokens_seen": 311394364, "step": 1813 }, { "epoch": 0.4768856447688564, "loss": 0.07709920406341553, "loss_ce": 0.003307701088488102, "loss_iou": 0.421875, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 311394364, "step": 1813 }, { "epoch": 0.4771486815282436, "grad_norm": 8.932384890899103, "learning_rate": 5e-06, "loss": 0.1796, "num_input_tokens_seen": 311566412, "step": 1814 }, { "epoch": 0.4771486815282436, "loss": 0.18021875619888306, "loss_ce": 0.002743777120485902, "loss_iou": 0.4375, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 311566412, "step": 1814 }, { "epoch": 0.4774117182876307, "grad_norm": 7.670958022785577, "learning_rate": 5e-06, "loss": 0.097, "num_input_tokens_seen": 311738816, "step": 1815 }, { "epoch": 0.4774117182876307, "loss": 0.06826284527778625, "loss_ce": 0.002039696555584669, "loss_iou": 0.546875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 311738816, "step": 1815 }, { "epoch": 0.47767475504701784, "grad_norm": 9.178964340109715, "learning_rate": 5e-06, "loss": 0.1004, "num_input_tokens_seen": 311910812, "step": 1816 }, { "epoch": 0.47767475504701784, "loss": 0.15946456789970398, "loss_ce": 0.0008036750950850546, "loss_iou": 0.4921875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 311910812, "step": 1816 }, { "epoch": 0.47793779180640494, "grad_norm": 8.864036975405911, "learning_rate": 5e-06, "loss": 0.1104, "num_input_tokens_seen": 312082820, "step": 1817 }, { "epoch": 0.47793779180640494, "loss": 0.0714251697063446, "loss_ce": 0.002028202638030052, "loss_iou": 0.55078125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 312082820, "step": 1817 }, { "epoch": 0.47820082856579205, "grad_norm": 4.5785017279302185, "learning_rate": 5e-06, "loss": 0.1308, "num_input_tokens_seen": 312255312, "step": 1818 }, { "epoch": 0.47820082856579205, "loss": 0.1785389930009842, "loss_ce": 0.0035817159805446863, "loss_iou": 0.5234375, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 312255312, "step": 1818 }, { "epoch": 0.4784638653251792, "grad_norm": 9.103884045463392, "learning_rate": 5e-06, "loss": 0.1388, "num_input_tokens_seen": 312427552, "step": 1819 }, { "epoch": 0.4784638653251792, "loss": 0.09079764038324356, "loss_ce": 0.0007707877666689456, "loss_iou": 0.6484375, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 312427552, "step": 1819 }, { "epoch": 0.4787269020845663, "grad_norm": 3.611814351166419, "learning_rate": 5e-06, "loss": 0.1127, "num_input_tokens_seen": 312599880, "step": 1820 }, { "epoch": 0.4787269020845663, "loss": 0.16918551921844482, "loss_ce": 0.0004843563656322658, "loss_iou": 0.59375, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 312599880, "step": 1820 }, { "epoch": 0.47898993884395347, "grad_norm": 6.886471414344964, "learning_rate": 5e-06, "loss": 0.144, "num_input_tokens_seen": 312771960, "step": 1821 }, { "epoch": 0.47898993884395347, "loss": 0.1718224436044693, "loss_ce": 0.0013817725703120232, "loss_iou": 0.61328125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 312771960, "step": 1821 }, { "epoch": 0.47925297560334057, "grad_norm": 12.670187653553288, "learning_rate": 5e-06, "loss": 0.1477, "num_input_tokens_seen": 312943884, "step": 1822 }, { "epoch": 0.47925297560334057, "loss": 0.17194947600364685, "loss_ce": 0.0042553916573524475, "loss_iou": 0.486328125, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 312943884, "step": 1822 }, { "epoch": 0.4795160123627277, "grad_norm": 5.735564443277563, "learning_rate": 5e-06, "loss": 0.1186, "num_input_tokens_seen": 313115988, "step": 1823 }, { "epoch": 0.4795160123627277, "loss": 0.11030334234237671, "loss_ce": 0.00010437482706038281, "loss_iou": 0.392578125, "loss_num": 0.02197265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 313115988, "step": 1823 }, { "epoch": 0.47977904912211483, "grad_norm": 5.327091023011001, "learning_rate": 5e-06, "loss": 0.1553, "num_input_tokens_seen": 313288456, "step": 1824 }, { "epoch": 0.47977904912211483, "loss": 0.2672034204006195, "loss_ce": 0.0009680833900347352, "loss_iou": 0.42578125, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 313288456, "step": 1824 }, { "epoch": 0.48004208588150193, "grad_norm": 8.698833105847095, "learning_rate": 5e-06, "loss": 0.0994, "num_input_tokens_seen": 313460532, "step": 1825 }, { "epoch": 0.48004208588150193, "loss": 0.11867256462574005, "loss_ce": 0.0026752520352602005, "loss_iou": 0.419921875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 313460532, "step": 1825 }, { "epoch": 0.48030512264088904, "grad_norm": 4.784883256464839, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 313630464, "step": 1826 }, { "epoch": 0.48030512264088904, "loss": 0.18028897047042847, "loss_ce": 0.0021883829031139612, "loss_iou": 0.57421875, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 313630464, "step": 1826 }, { "epoch": 0.4805681594002762, "grad_norm": 7.070087319121284, "learning_rate": 5e-06, "loss": 0.1388, "num_input_tokens_seen": 313802864, "step": 1827 }, { "epoch": 0.4805681594002762, "loss": 0.19596196711063385, "loss_ce": 0.0022058698814362288, "loss_iou": 0.5546875, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 313802864, "step": 1827 }, { "epoch": 0.4808311961596633, "grad_norm": 6.746240646813248, "learning_rate": 5e-06, "loss": 0.1253, "num_input_tokens_seen": 313975036, "step": 1828 }, { "epoch": 0.4808311961596633, "loss": 0.12373416870832443, "loss_ce": 0.0009009129134938121, "loss_iou": 0.59765625, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 313975036, "step": 1828 }, { "epoch": 0.48109423291905046, "grad_norm": 12.282763523774303, "learning_rate": 5e-06, "loss": 0.1221, "num_input_tokens_seen": 314147252, "step": 1829 }, { "epoch": 0.48109423291905046, "loss": 0.09647711366415024, "loss_ce": 0.0027576321735978127, "loss_iou": 0.380859375, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 314147252, "step": 1829 }, { "epoch": 0.48135726967843756, "grad_norm": 6.5229587096602915, "learning_rate": 5e-06, "loss": 0.119, "num_input_tokens_seen": 314317636, "step": 1830 }, { "epoch": 0.48135726967843756, "loss": 0.11963652074337006, "loss_ce": 0.004478443879634142, "loss_iou": 0.55859375, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 314317636, "step": 1830 }, { "epoch": 0.48162030643782466, "grad_norm": 10.474914605426608, "learning_rate": 5e-06, "loss": 0.1571, "num_input_tokens_seen": 314489804, "step": 1831 }, { "epoch": 0.48162030643782466, "loss": 0.08893167972564697, "loss_ce": 0.0019260660046711564, "loss_iou": 0.458984375, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 314489804, "step": 1831 }, { "epoch": 0.4818833431972118, "grad_norm": 4.796918355311461, "learning_rate": 5e-06, "loss": 0.1701, "num_input_tokens_seen": 314661804, "step": 1832 }, { "epoch": 0.4818833431972118, "loss": 0.18161356449127197, "loss_ce": 0.0023533080238848925, "loss_iou": 0.51953125, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 314661804, "step": 1832 }, { "epoch": 0.4821463799565989, "grad_norm": 6.244958331343755, "learning_rate": 5e-06, "loss": 0.1234, "num_input_tokens_seen": 314834036, "step": 1833 }, { "epoch": 0.4821463799565989, "loss": 0.13272178173065186, "loss_ce": 0.003785028588026762, "loss_iou": 0.478515625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 314834036, "step": 1833 }, { "epoch": 0.4824094167159861, "grad_norm": 7.896413991695191, "learning_rate": 5e-06, "loss": 0.1255, "num_input_tokens_seen": 315004484, "step": 1834 }, { "epoch": 0.4824094167159861, "loss": 0.12204363942146301, "loss_ce": 0.0003090191457886249, "loss_iou": 0.58984375, "loss_num": 0.0244140625, "loss_xval": 0.12158203125, "num_input_tokens_seen": 315004484, "step": 1834 }, { "epoch": 0.4826724534753732, "grad_norm": 7.031269313570851, "learning_rate": 5e-06, "loss": 0.1443, "num_input_tokens_seen": 315176780, "step": 1835 }, { "epoch": 0.4826724534753732, "loss": 0.12622271478176117, "loss_ce": 0.002534976229071617, "loss_iou": 0.62109375, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 315176780, "step": 1835 }, { "epoch": 0.4829354902347603, "grad_norm": 6.072447688187852, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 315348656, "step": 1836 }, { "epoch": 0.4829354902347603, "loss": 0.08488506823778152, "loss_ce": 0.0010227651800960302, "loss_iou": 0.462890625, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 315348656, "step": 1836 }, { "epoch": 0.48319852699414745, "grad_norm": 14.427685690814952, "learning_rate": 5e-06, "loss": 0.143, "num_input_tokens_seen": 315520828, "step": 1837 }, { "epoch": 0.48319852699414745, "loss": 0.0883752852678299, "loss_ce": 0.00042362496606074274, "loss_iou": 0.5625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 315520828, "step": 1837 }, { "epoch": 0.48346156375353455, "grad_norm": 17.485755335788554, "learning_rate": 5e-06, "loss": 0.1375, "num_input_tokens_seen": 315692964, "step": 1838 }, { "epoch": 0.48346156375353455, "loss": 0.10190844535827637, "loss_ce": 0.0014140586135908961, "loss_iou": 0.359375, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 315692964, "step": 1838 }, { "epoch": 0.48372460051292165, "grad_norm": 9.721032690775148, "learning_rate": 5e-06, "loss": 0.1692, "num_input_tokens_seen": 315863376, "step": 1839 }, { "epoch": 0.48372460051292165, "loss": 0.18402375280857086, "loss_ce": 0.0028408921789377928, "loss_iou": 0.40234375, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 315863376, "step": 1839 }, { "epoch": 0.4839876372723088, "grad_norm": 7.726124987543399, "learning_rate": 5e-06, "loss": 0.1728, "num_input_tokens_seen": 316035520, "step": 1840 }, { "epoch": 0.4839876372723088, "loss": 0.13577872514724731, "loss_ce": 0.00031119072809815407, "loss_iou": 0.498046875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 316035520, "step": 1840 }, { "epoch": 0.4842506740316959, "grad_norm": 7.757576022533387, "learning_rate": 5e-06, "loss": 0.14, "num_input_tokens_seen": 316207616, "step": 1841 }, { "epoch": 0.4842506740316959, "loss": 0.2208271026611328, "loss_ce": 0.0022144389804452658, "loss_iou": 0.44140625, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 316207616, "step": 1841 }, { "epoch": 0.4845137107910831, "grad_norm": 11.993527938590784, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 316379548, "step": 1842 }, { "epoch": 0.4845137107910831, "loss": 0.17184340953826904, "loss_ce": 0.002074118936434388, "loss_iou": 0.341796875, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 316379548, "step": 1842 }, { "epoch": 0.4847767475504702, "grad_norm": 5.880659481834313, "learning_rate": 5e-06, "loss": 0.1596, "num_input_tokens_seen": 316551724, "step": 1843 }, { "epoch": 0.4847767475504702, "loss": 0.30887043476104736, "loss_ce": 0.0007344337645918131, "loss_iou": 0.48046875, "loss_num": 0.0615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 316551724, "step": 1843 }, { "epoch": 0.4850397843098573, "grad_norm": 9.164014647902235, "learning_rate": 5e-06, "loss": 0.1614, "num_input_tokens_seen": 316724140, "step": 1844 }, { "epoch": 0.4850397843098573, "loss": 0.1765921413898468, "loss_ce": 0.0012076160637661815, "loss_iou": 0.62109375, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 316724140, "step": 1844 }, { "epoch": 0.48530282106924444, "grad_norm": 25.18318601638769, "learning_rate": 5e-06, "loss": 0.1387, "num_input_tokens_seen": 316896084, "step": 1845 }, { "epoch": 0.48530282106924444, "loss": 0.14695365726947784, "loss_ce": 0.0009270399459637702, "loss_iou": 0.47265625, "loss_num": 0.0291748046875, "loss_xval": 0.146484375, "num_input_tokens_seen": 316896084, "step": 1845 }, { "epoch": 0.48556585782863154, "grad_norm": 10.73033101211756, "learning_rate": 5e-06, "loss": 0.183, "num_input_tokens_seen": 317068004, "step": 1846 }, { "epoch": 0.48556585782863154, "loss": 0.3143799304962158, "loss_ce": 0.004962218925356865, "loss_iou": 0.58203125, "loss_num": 0.061767578125, "loss_xval": 0.30859375, "num_input_tokens_seen": 317068004, "step": 1846 }, { "epoch": 0.4858288945880187, "grad_norm": 14.757236176384808, "learning_rate": 5e-06, "loss": 0.0971, "num_input_tokens_seen": 317240256, "step": 1847 }, { "epoch": 0.4858288945880187, "loss": 0.074017733335495, "loss_ce": 0.0028812657110393047, "loss_iou": 0.439453125, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 317240256, "step": 1847 }, { "epoch": 0.4860919313474058, "grad_norm": 5.470394923350598, "learning_rate": 5e-06, "loss": 0.1205, "num_input_tokens_seen": 317412376, "step": 1848 }, { "epoch": 0.4860919313474058, "loss": 0.13736534118652344, "loss_ce": 0.0006160617922432721, "loss_iou": 0.67578125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 317412376, "step": 1848 }, { "epoch": 0.4863549681067929, "grad_norm": 11.769346294638343, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 317584644, "step": 1849 }, { "epoch": 0.4863549681067929, "loss": 0.07671768963336945, "loss_ce": 0.002804110525175929, "loss_iou": 0.5078125, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 317584644, "step": 1849 }, { "epoch": 0.48661800486618007, "grad_norm": 7.934603221234343, "learning_rate": 5e-06, "loss": 0.157, "num_input_tokens_seen": 317756784, "step": 1850 }, { "epoch": 0.48661800486618007, "loss": 0.1591799259185791, "loss_ce": 0.0034792337100952864, "loss_iou": 0.6328125, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 317756784, "step": 1850 }, { "epoch": 0.48688104162556717, "grad_norm": 5.949457674338729, "learning_rate": 5e-06, "loss": 0.1523, "num_input_tokens_seen": 317929000, "step": 1851 }, { "epoch": 0.48688104162556717, "loss": 0.15429449081420898, "loss_ce": 0.0003638358903117478, "loss_iou": 0.443359375, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 317929000, "step": 1851 }, { "epoch": 0.48714407838495427, "grad_norm": 4.538326131208253, "learning_rate": 5e-06, "loss": 0.1159, "num_input_tokens_seen": 318101400, "step": 1852 }, { "epoch": 0.48714407838495427, "loss": 0.14405781030654907, "loss_ce": 0.004745060577988625, "loss_iou": 0.51171875, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 318101400, "step": 1852 }, { "epoch": 0.48740711514434143, "grad_norm": 11.796493242549497, "learning_rate": 5e-06, "loss": 0.1155, "num_input_tokens_seen": 318273392, "step": 1853 }, { "epoch": 0.48740711514434143, "loss": 0.05749022588133812, "loss_ce": 0.0008801189833320677, "loss_iou": 0.453125, "loss_num": 0.01129150390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 318273392, "step": 1853 }, { "epoch": 0.48767015190372853, "grad_norm": 4.839316076441186, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 318444936, "step": 1854 }, { "epoch": 0.48767015190372853, "loss": 0.15674933791160583, "loss_ce": 0.0008960673003457487, "loss_iou": 0.5625, "loss_num": 0.0311279296875, "loss_xval": 0.15625, "num_input_tokens_seen": 318444936, "step": 1854 }, { "epoch": 0.4879331886631157, "grad_norm": 4.489339979618492, "learning_rate": 5e-06, "loss": 0.1002, "num_input_tokens_seen": 318617236, "step": 1855 }, { "epoch": 0.4879331886631157, "loss": 0.17843472957611084, "loss_ce": 0.001768482499755919, "loss_iou": 0.57421875, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 318617236, "step": 1855 }, { "epoch": 0.4881962254225028, "grad_norm": 5.676162725297575, "learning_rate": 5e-06, "loss": 0.178, "num_input_tokens_seen": 318787468, "step": 1856 }, { "epoch": 0.4881962254225028, "loss": 0.25485190749168396, "loss_ce": 0.0009456594125367701, "loss_iou": 0.466796875, "loss_num": 0.05078125, "loss_xval": 0.25390625, "num_input_tokens_seen": 318787468, "step": 1856 }, { "epoch": 0.4884592621818899, "grad_norm": 4.081201767827966, "learning_rate": 5e-06, "loss": 0.1132, "num_input_tokens_seen": 318957884, "step": 1857 }, { "epoch": 0.4884592621818899, "loss": 0.0997210294008255, "loss_ce": 0.00020320963812991977, "loss_iou": 0.34765625, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 318957884, "step": 1857 }, { "epoch": 0.48872229894127706, "grad_norm": 23.818400579938295, "learning_rate": 5e-06, "loss": 0.1233, "num_input_tokens_seen": 319129856, "step": 1858 }, { "epoch": 0.48872229894127706, "loss": 0.07231907546520233, "loss_ce": 0.00011447950237197801, "loss_iou": 0.6171875, "loss_num": 0.014404296875, "loss_xval": 0.072265625, "num_input_tokens_seen": 319129856, "step": 1858 }, { "epoch": 0.48898533570066416, "grad_norm": 6.068112059865649, "learning_rate": 5e-06, "loss": 0.1742, "num_input_tokens_seen": 319302108, "step": 1859 }, { "epoch": 0.48898533570066416, "loss": 0.1806957721710205, "loss_ce": 0.00039790940354578197, "loss_iou": 0.51171875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 319302108, "step": 1859 }, { "epoch": 0.4892483724600513, "grad_norm": 4.764918083593623, "learning_rate": 5e-06, "loss": 0.1205, "num_input_tokens_seen": 319473076, "step": 1860 }, { "epoch": 0.4892483724600513, "loss": 0.056661054491996765, "loss_ce": 0.0034689174499362707, "loss_iou": 0.51953125, "loss_num": 0.0106201171875, "loss_xval": 0.05322265625, "num_input_tokens_seen": 319473076, "step": 1860 }, { "epoch": 0.4895114092194384, "grad_norm": 4.591820064091911, "learning_rate": 5e-06, "loss": 0.1233, "num_input_tokens_seen": 319645292, "step": 1861 }, { "epoch": 0.4895114092194384, "loss": 0.10820820182561874, "loss_ce": 8.442218677373603e-05, "loss_iou": 0.50390625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 319645292, "step": 1861 }, { "epoch": 0.4897744459788255, "grad_norm": 11.395874847747328, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 319817560, "step": 1862 }, { "epoch": 0.4897744459788255, "loss": 0.1541745364665985, "loss_ce": 0.0011288827518001199, "loss_iou": 0.53125, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 319817560, "step": 1862 }, { "epoch": 0.4900374827382127, "grad_norm": 8.74594499498662, "learning_rate": 5e-06, "loss": 0.12, "num_input_tokens_seen": 319990008, "step": 1863 }, { "epoch": 0.4900374827382127, "loss": 0.1368078887462616, "loss_ce": 0.0013403687626123428, "loss_iou": 0.5625, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 319990008, "step": 1863 }, { "epoch": 0.4903005194975998, "grad_norm": 4.454006905645775, "learning_rate": 5e-06, "loss": 0.0984, "num_input_tokens_seen": 320162308, "step": 1864 }, { "epoch": 0.4903005194975998, "loss": 0.09877588599920273, "loss_ce": 0.0007534276228398085, "loss_iou": 0.5546875, "loss_num": 0.01953125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 320162308, "step": 1864 }, { "epoch": 0.4905635562569869, "grad_norm": 9.654102944086466, "learning_rate": 5e-06, "loss": 0.1351, "num_input_tokens_seen": 320334456, "step": 1865 }, { "epoch": 0.4905635562569869, "loss": 0.09071889519691467, "loss_ce": 0.0011192907113581896, "loss_iou": 0.6171875, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 320334456, "step": 1865 }, { "epoch": 0.49082659301637405, "grad_norm": 7.49930478522843, "learning_rate": 5e-06, "loss": 0.1421, "num_input_tokens_seen": 320506784, "step": 1866 }, { "epoch": 0.49082659301637405, "loss": 0.1436455398797989, "loss_ce": 0.0007622435805387795, "loss_iou": 0.5390625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 320506784, "step": 1866 }, { "epoch": 0.49108962977576115, "grad_norm": 4.213229463422797, "learning_rate": 5e-06, "loss": 0.1405, "num_input_tokens_seen": 320678800, "step": 1867 }, { "epoch": 0.49108962977576115, "loss": 0.19276383519172668, "loss_ce": 0.0017237972933799028, "loss_iou": 0.5703125, "loss_num": 0.0380859375, "loss_xval": 0.19140625, "num_input_tokens_seen": 320678800, "step": 1867 }, { "epoch": 0.4913526665351483, "grad_norm": 13.139928650685244, "learning_rate": 5e-06, "loss": 0.1345, "num_input_tokens_seen": 320851152, "step": 1868 }, { "epoch": 0.4913526665351483, "loss": 0.13707411289215088, "loss_ce": 0.0001722496235743165, "loss_iou": 0.6640625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 320851152, "step": 1868 }, { "epoch": 0.4916157032945354, "grad_norm": 10.621311625988106, "learning_rate": 5e-06, "loss": 0.1239, "num_input_tokens_seen": 321023324, "step": 1869 }, { "epoch": 0.4916157032945354, "loss": 0.17078933119773865, "loss_ce": 0.0011115875095129013, "loss_iou": 0.466796875, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 321023324, "step": 1869 }, { "epoch": 0.4918787400539225, "grad_norm": 9.794878479481557, "learning_rate": 5e-06, "loss": 0.1626, "num_input_tokens_seen": 321195396, "step": 1870 }, { "epoch": 0.4918787400539225, "loss": 0.1828850954771042, "loss_ce": 0.0016412028344348073, "loss_iou": 0.65234375, "loss_num": 0.0361328125, "loss_xval": 0.181640625, "num_input_tokens_seen": 321195396, "step": 1870 }, { "epoch": 0.4921417768133097, "grad_norm": 3.6593406817873193, "learning_rate": 5e-06, "loss": 0.1097, "num_input_tokens_seen": 321367508, "step": 1871 }, { "epoch": 0.4921417768133097, "loss": 0.07714089751243591, "loss_ce": 0.0005722964997403324, "loss_iou": 0.478515625, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 321367508, "step": 1871 }, { "epoch": 0.4924048135726968, "grad_norm": 3.9995708653462874, "learning_rate": 5e-06, "loss": 0.1303, "num_input_tokens_seen": 321538180, "step": 1872 }, { "epoch": 0.4924048135726968, "loss": 0.10363311320543289, "loss_ce": 0.004054257180541754, "loss_iou": 0.5078125, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 321538180, "step": 1872 }, { "epoch": 0.49266785033208393, "grad_norm": 8.331617815221565, "learning_rate": 5e-06, "loss": 0.1423, "num_input_tokens_seen": 321710632, "step": 1873 }, { "epoch": 0.49266785033208393, "loss": 0.13812920451164246, "loss_ce": 0.0021428640466183424, "loss_iou": 0.58203125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 321710632, "step": 1873 }, { "epoch": 0.49293088709147104, "grad_norm": 5.452515467611325, "learning_rate": 5e-06, "loss": 0.1377, "num_input_tokens_seen": 321882748, "step": 1874 }, { "epoch": 0.49293088709147104, "loss": 0.14979197084903717, "loss_ce": 0.002147932071238756, "loss_iou": NaN, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 321882748, "step": 1874 }, { "epoch": 0.49319392385085814, "grad_norm": 14.062855136219687, "learning_rate": 5e-06, "loss": 0.1653, "num_input_tokens_seen": 322054892, "step": 1875 }, { "epoch": 0.49319392385085814, "loss": 0.24241477251052856, "loss_ce": 0.0016921274363994598, "loss_iou": 0.447265625, "loss_num": 0.048095703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 322054892, "step": 1875 }, { "epoch": 0.4934569606102453, "grad_norm": 3.3198848239673966, "learning_rate": 5e-06, "loss": 0.1054, "num_input_tokens_seen": 322226992, "step": 1876 }, { "epoch": 0.4934569606102453, "loss": 0.06338231265544891, "loss_ce": 0.0020724977366626263, "loss_iou": 0.62890625, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 322226992, "step": 1876 }, { "epoch": 0.4937199973696324, "grad_norm": 5.974832705883753, "learning_rate": 5e-06, "loss": 0.1286, "num_input_tokens_seen": 322399220, "step": 1877 }, { "epoch": 0.4937199973696324, "loss": 0.20875243842601776, "loss_ce": 0.004009997006505728, "loss_iou": 0.53125, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 322399220, "step": 1877 }, { "epoch": 0.4939830341290195, "grad_norm": 11.103490307412729, "learning_rate": 5e-06, "loss": 0.1036, "num_input_tokens_seen": 322569372, "step": 1878 }, { "epoch": 0.4939830341290195, "loss": 0.10719159990549088, "loss_ce": 0.006331001408398151, "loss_iou": 0.64453125, "loss_num": 0.0201416015625, "loss_xval": 0.10107421875, "num_input_tokens_seen": 322569372, "step": 1878 }, { "epoch": 0.49424607088840666, "grad_norm": 4.615361697814784, "learning_rate": 5e-06, "loss": 0.1426, "num_input_tokens_seen": 322741596, "step": 1879 }, { "epoch": 0.49424607088840666, "loss": 0.13756033778190613, "loss_ce": 0.002703155390918255, "loss_iou": 0.46484375, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 322741596, "step": 1879 }, { "epoch": 0.49450910764779377, "grad_norm": 5.99195178330634, "learning_rate": 5e-06, "loss": 0.1599, "num_input_tokens_seen": 322913828, "step": 1880 }, { "epoch": 0.49450910764779377, "loss": 0.20525991916656494, "loss_ce": 0.0015550723765045404, "loss_iou": 0.47265625, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 322913828, "step": 1880 }, { "epoch": 0.4947721444071809, "grad_norm": 8.544161694628455, "learning_rate": 5e-06, "loss": 0.1051, "num_input_tokens_seen": 323084272, "step": 1881 }, { "epoch": 0.4947721444071809, "loss": 0.13442979753017426, "loss_ce": 0.0013731509679928422, "loss_iou": 0.462890625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 323084272, "step": 1881 }, { "epoch": 0.49503518116656803, "grad_norm": 12.157441672743493, "learning_rate": 5e-06, "loss": 0.1382, "num_input_tokens_seen": 323254564, "step": 1882 }, { "epoch": 0.49503518116656803, "loss": 0.06910552829504013, "loss_ce": 0.00015105513739399612, "loss_iou": 0.52734375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 323254564, "step": 1882 }, { "epoch": 0.49529821792595513, "grad_norm": 7.758308533551838, "learning_rate": 5e-06, "loss": 0.0884, "num_input_tokens_seen": 323426936, "step": 1883 }, { "epoch": 0.49529821792595513, "loss": 0.07692838460206985, "loss_ce": 0.000985392602160573, "loss_iou": 0.51953125, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 323426936, "step": 1883 }, { "epoch": 0.4955612546853423, "grad_norm": 10.56929332370978, "learning_rate": 5e-06, "loss": 0.1292, "num_input_tokens_seen": 323599244, "step": 1884 }, { "epoch": 0.4955612546853423, "loss": 0.17598523199558258, "loss_ce": 0.0006922531756572425, "loss_iou": 0.52734375, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 323599244, "step": 1884 }, { "epoch": 0.4958242914447294, "grad_norm": 4.870883244723278, "learning_rate": 5e-06, "loss": 0.1084, "num_input_tokens_seen": 323771396, "step": 1885 }, { "epoch": 0.4958242914447294, "loss": 0.08608455955982208, "loss_ce": 0.0016424173954874277, "loss_iou": 0.443359375, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 323771396, "step": 1885 }, { "epoch": 0.49608732820411655, "grad_norm": 9.112063415949194, "learning_rate": 5e-06, "loss": 0.1201, "num_input_tokens_seen": 323943464, "step": 1886 }, { "epoch": 0.49608732820411655, "loss": 0.05756930261850357, "loss_ce": 0.00025729500339366496, "loss_iou": 0.494140625, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 323943464, "step": 1886 }, { "epoch": 0.49635036496350365, "grad_norm": 11.77050689912044, "learning_rate": 5e-06, "loss": 0.135, "num_input_tokens_seen": 324115664, "step": 1887 }, { "epoch": 0.49635036496350365, "loss": 0.13590523600578308, "loss_ce": 0.0011243472108617425, "loss_iou": 0.39453125, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 324115664, "step": 1887 }, { "epoch": 0.49661340172289076, "grad_norm": 19.144215192825804, "learning_rate": 5e-06, "loss": 0.154, "num_input_tokens_seen": 324288076, "step": 1888 }, { "epoch": 0.49661340172289076, "loss": 0.1540539562702179, "loss_ce": 0.00036742445081472397, "loss_iou": 0.49609375, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 324288076, "step": 1888 }, { "epoch": 0.4968764384822779, "grad_norm": 4.245370979797126, "learning_rate": 5e-06, "loss": 0.1335, "num_input_tokens_seen": 324460724, "step": 1889 }, { "epoch": 0.4968764384822779, "loss": 0.08321575820446014, "loss_ce": 0.0006657101330347359, "loss_iou": 0.59375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 324460724, "step": 1889 }, { "epoch": 0.497139475241665, "grad_norm": 4.913753500635943, "learning_rate": 5e-06, "loss": 0.1567, "num_input_tokens_seen": 324633088, "step": 1890 }, { "epoch": 0.497139475241665, "loss": 0.13971787691116333, "loss_ce": 0.001625841949135065, "loss_iou": 0.51953125, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 324633088, "step": 1890 }, { "epoch": 0.4974025120010521, "grad_norm": 4.7392923084206275, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 324805420, "step": 1891 }, { "epoch": 0.4974025120010521, "loss": 0.10387594997882843, "loss_ce": 0.001977758714929223, "loss_iou": 0.52734375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 324805420, "step": 1891 }, { "epoch": 0.4976655487604393, "grad_norm": 3.934998355466595, "learning_rate": 5e-06, "loss": 0.1193, "num_input_tokens_seen": 324977628, "step": 1892 }, { "epoch": 0.4976655487604393, "loss": 0.08378120511770248, "loss_ce": 0.0015668454580008984, "loss_iou": 0.4921875, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 324977628, "step": 1892 }, { "epoch": 0.4979285855198264, "grad_norm": 16.6043348209483, "learning_rate": 5e-06, "loss": 0.1289, "num_input_tokens_seen": 325148040, "step": 1893 }, { "epoch": 0.4979285855198264, "loss": 0.11267328262329102, "loss_ce": 0.0008110918570309877, "loss_iou": 0.51953125, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 325148040, "step": 1893 }, { "epoch": 0.49819162227921354, "grad_norm": 4.246925011279612, "learning_rate": 5e-06, "loss": 0.1561, "num_input_tokens_seen": 325317336, "step": 1894 }, { "epoch": 0.49819162227921354, "loss": 0.20916813611984253, "loss_ce": 0.000855154765304178, "loss_iou": 0.49609375, "loss_num": 0.041748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 325317336, "step": 1894 }, { "epoch": 0.49845465903860064, "grad_norm": 22.177021376541713, "learning_rate": 5e-06, "loss": 0.1381, "num_input_tokens_seen": 325489652, "step": 1895 }, { "epoch": 0.49845465903860064, "loss": 0.15358038246631622, "loss_ce": 0.0006873153615742922, "loss_iou": 0.333984375, "loss_num": 0.030517578125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 325489652, "step": 1895 }, { "epoch": 0.49871769579798775, "grad_norm": 4.369246978330159, "learning_rate": 5e-06, "loss": 0.1545, "num_input_tokens_seen": 325661796, "step": 1896 }, { "epoch": 0.49871769579798775, "loss": 0.11801205575466156, "loss_ce": 0.0016180112725123763, "loss_iou": 0.6640625, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 325661796, "step": 1896 }, { "epoch": 0.4989807325573749, "grad_norm": 4.8736707793033975, "learning_rate": 5e-06, "loss": 0.1079, "num_input_tokens_seen": 325834028, "step": 1897 }, { "epoch": 0.4989807325573749, "loss": 0.09736193716526031, "loss_ce": 0.0003770706243813038, "loss_iou": 0.482421875, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 325834028, "step": 1897 }, { "epoch": 0.499243769316762, "grad_norm": 13.34167032829397, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 326006352, "step": 1898 }, { "epoch": 0.499243769316762, "loss": 0.1529536247253418, "loss_ce": 0.0012812747154384851, "loss_iou": 0.640625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 326006352, "step": 1898 }, { "epoch": 0.49950680607614917, "grad_norm": 8.275559071074456, "learning_rate": 5e-06, "loss": 0.1183, "num_input_tokens_seen": 326178328, "step": 1899 }, { "epoch": 0.49950680607614917, "loss": 0.040668413043022156, "loss_ce": 0.0004920191713608801, "loss_iou": 0.515625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 326178328, "step": 1899 }, { "epoch": 0.49976984283553627, "grad_norm": 6.7636763663201105, "learning_rate": 5e-06, "loss": 0.146, "num_input_tokens_seen": 326347792, "step": 1900 }, { "epoch": 0.49976984283553627, "loss": 0.12708882987499237, "loss_ce": 0.0006239861249923706, "loss_iou": 0.50390625, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 326347792, "step": 1900 }, { "epoch": 0.5000328795949234, "grad_norm": 11.91106437107254, "learning_rate": 5e-06, "loss": 0.1809, "num_input_tokens_seen": 326520084, "step": 1901 }, { "epoch": 0.5000328795949234, "loss": 0.24568378925323486, "loss_ce": 0.0030690436251461506, "loss_iou": 0.41796875, "loss_num": 0.048583984375, "loss_xval": 0.2421875, "num_input_tokens_seen": 326520084, "step": 1901 }, { "epoch": 0.5002959163543105, "grad_norm": 15.380203467807723, "learning_rate": 5e-06, "loss": 0.1589, "num_input_tokens_seen": 326691988, "step": 1902 }, { "epoch": 0.5002959163543105, "loss": 0.1489952951669693, "loss_ce": 0.00034418603172525764, "loss_iou": 0.53125, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 326691988, "step": 1902 }, { "epoch": 0.5005589531136977, "grad_norm": 6.172818867405067, "learning_rate": 5e-06, "loss": 0.1834, "num_input_tokens_seen": 326864500, "step": 1903 }, { "epoch": 0.5005589531136977, "loss": 0.08618461340665817, "loss_ce": 0.0021086866036057472, "loss_iou": 0.54296875, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 326864500, "step": 1903 }, { "epoch": 0.5008219898730848, "grad_norm": 6.280435924544313, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 327036512, "step": 1904 }, { "epoch": 0.5008219898730848, "loss": 0.16495370864868164, "loss_ce": 0.0016236326191574335, "loss_iou": 0.58984375, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 327036512, "step": 1904 }, { "epoch": 0.5010850266324719, "grad_norm": 5.592537029701005, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 327208668, "step": 1905 }, { "epoch": 0.5010850266324719, "loss": 0.09431658685207367, "loss_ce": 0.0009938328294083476, "loss_iou": 0.40234375, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 327208668, "step": 1905 }, { "epoch": 0.501348063391859, "grad_norm": 6.449445852698605, "learning_rate": 5e-06, "loss": 0.1186, "num_input_tokens_seen": 327378984, "step": 1906 }, { "epoch": 0.501348063391859, "loss": 0.1146991178393364, "loss_ce": 0.0006091539980843663, "loss_iou": 0.498046875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 327378984, "step": 1906 }, { "epoch": 0.5016111001512461, "grad_norm": 4.708352961414117, "learning_rate": 5e-06, "loss": 0.1441, "num_input_tokens_seen": 327551376, "step": 1907 }, { "epoch": 0.5016111001512461, "loss": 0.15458180010318756, "loss_ce": 0.0006816480308771133, "loss_iou": 0.4140625, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 327551376, "step": 1907 }, { "epoch": 0.5018741369106332, "grad_norm": 11.511613556933392, "learning_rate": 5e-06, "loss": 0.1553, "num_input_tokens_seen": 327724012, "step": 1908 }, { "epoch": 0.5018741369106332, "loss": 0.20126180350780487, "loss_ce": 0.0008528655744157732, "loss_iou": 0.5703125, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 327724012, "step": 1908 }, { "epoch": 0.5021371736700204, "grad_norm": 5.154439626897641, "learning_rate": 5e-06, "loss": 0.1566, "num_input_tokens_seen": 327896420, "step": 1909 }, { "epoch": 0.5021371736700204, "loss": 0.13825711607933044, "loss_ce": 0.0017519897082820535, "loss_iou": 0.484375, "loss_num": 0.0272216796875, "loss_xval": 0.13671875, "num_input_tokens_seen": 327896420, "step": 1909 }, { "epoch": 0.5024002104294075, "grad_norm": 9.76818168107923, "learning_rate": 5e-06, "loss": 0.1211, "num_input_tokens_seen": 328066120, "step": 1910 }, { "epoch": 0.5024002104294075, "loss": 0.20892378687858582, "loss_ce": 0.00096173956990242, "loss_iou": 0.609375, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 328066120, "step": 1910 }, { "epoch": 0.5026632471887946, "grad_norm": 6.89128626408132, "learning_rate": 5e-06, "loss": 0.1391, "num_input_tokens_seen": 328238388, "step": 1911 }, { "epoch": 0.5026632471887946, "loss": 0.19616608321666718, "loss_ce": 0.001982737798243761, "loss_iou": 0.4453125, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 328238388, "step": 1911 }, { "epoch": 0.5029262839481817, "grad_norm": 4.852078459217102, "learning_rate": 5e-06, "loss": 0.1189, "num_input_tokens_seen": 328410228, "step": 1912 }, { "epoch": 0.5029262839481817, "loss": 0.13402841985225677, "loss_ce": 0.0019178204238414764, "loss_iou": 0.40625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 328410228, "step": 1912 }, { "epoch": 0.5031893207075688, "grad_norm": 8.495391184677569, "learning_rate": 5e-06, "loss": 0.1507, "num_input_tokens_seen": 328582660, "step": 1913 }, { "epoch": 0.5031893207075688, "loss": 0.14262062311172485, "loss_ce": 0.004253931809216738, "loss_iou": 0.5546875, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 328582660, "step": 1913 }, { "epoch": 0.503452357466956, "grad_norm": 7.2140273397639385, "learning_rate": 5e-06, "loss": 0.1042, "num_input_tokens_seen": 328754772, "step": 1914 }, { "epoch": 0.503452357466956, "loss": 0.09518692642450333, "loss_ce": 0.003878333605825901, "loss_iou": 0.5859375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 328754772, "step": 1914 }, { "epoch": 0.5037153942263431, "grad_norm": 4.268432766820409, "learning_rate": 5e-06, "loss": 0.1579, "num_input_tokens_seen": 328926900, "step": 1915 }, { "epoch": 0.5037153942263431, "loss": 0.11751651018857956, "loss_ce": 0.0052423360757529736, "loss_iou": 0.5234375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 328926900, "step": 1915 }, { "epoch": 0.5039784309857303, "grad_norm": 3.9588210706780522, "learning_rate": 5e-06, "loss": 0.1548, "num_input_tokens_seen": 329096944, "step": 1916 }, { "epoch": 0.5039784309857303, "loss": 0.1656898707151413, "loss_ce": 0.005258964374661446, "loss_iou": 0.4453125, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 329096944, "step": 1916 }, { "epoch": 0.5042414677451174, "grad_norm": 5.63809501756183, "learning_rate": 5e-06, "loss": 0.0966, "num_input_tokens_seen": 329268892, "step": 1917 }, { "epoch": 0.5042414677451174, "loss": 0.07544635236263275, "loss_ce": 0.000571468030102551, "loss_iou": 0.373046875, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 329268892, "step": 1917 }, { "epoch": 0.5045045045045045, "grad_norm": 7.590332140913489, "learning_rate": 5e-06, "loss": 0.1104, "num_input_tokens_seen": 329441344, "step": 1918 }, { "epoch": 0.5045045045045045, "loss": 0.15229275822639465, "loss_ce": 0.006144077517092228, "loss_iou": 0.52734375, "loss_num": 0.0291748046875, "loss_xval": 0.146484375, "num_input_tokens_seen": 329441344, "step": 1918 }, { "epoch": 0.5047675412638917, "grad_norm": 9.59054902064166, "learning_rate": 5e-06, "loss": 0.1483, "num_input_tokens_seen": 329613564, "step": 1919 }, { "epoch": 0.5047675412638917, "loss": 0.08887225389480591, "loss_ce": 0.001500429236330092, "loss_iou": 0.515625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 329613564, "step": 1919 }, { "epoch": 0.5050305780232788, "grad_norm": 4.210072070601249, "learning_rate": 5e-06, "loss": 0.1766, "num_input_tokens_seen": 329785680, "step": 1920 }, { "epoch": 0.5050305780232788, "loss": 0.2718814015388489, "loss_ce": 0.002228060271590948, "loss_iou": 0.3359375, "loss_num": 0.053955078125, "loss_xval": 0.26953125, "num_input_tokens_seen": 329785680, "step": 1920 }, { "epoch": 0.5052936147826659, "grad_norm": 8.380948003304292, "learning_rate": 5e-06, "loss": 0.1587, "num_input_tokens_seen": 329957636, "step": 1921 }, { "epoch": 0.5052936147826659, "loss": 0.1278029978275299, "loss_ce": 0.00286403251811862, "loss_iou": 0.56640625, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 329957636, "step": 1921 }, { "epoch": 0.505556651542053, "grad_norm": 7.7625200359697715, "learning_rate": 5e-06, "loss": 0.1491, "num_input_tokens_seen": 330129632, "step": 1922 }, { "epoch": 0.505556651542053, "loss": 0.11747082322835922, "loss_ce": 0.004799924790859222, "loss_iou": 0.5859375, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 330129632, "step": 1922 }, { "epoch": 0.5058196883014401, "grad_norm": 6.9606844299684205, "learning_rate": 5e-06, "loss": 0.1055, "num_input_tokens_seen": 330301512, "step": 1923 }, { "epoch": 0.5058196883014401, "loss": 0.09333023428916931, "loss_ce": 0.0011671524262055755, "loss_iou": 0.470703125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 330301512, "step": 1923 }, { "epoch": 0.5060827250608273, "grad_norm": 13.729941886848659, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 330473972, "step": 1924 }, { "epoch": 0.5060827250608273, "loss": 0.09531684219837189, "loss_ce": 0.000803898845333606, "loss_iou": 0.390625, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 330473972, "step": 1924 }, { "epoch": 0.5063457618202144, "grad_norm": 6.989119579903773, "learning_rate": 5e-06, "loss": 0.1192, "num_input_tokens_seen": 330644424, "step": 1925 }, { "epoch": 0.5063457618202144, "loss": 0.17603036761283875, "loss_ce": 0.0006458393763750792, "loss_iou": 0.451171875, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 330644424, "step": 1925 }, { "epoch": 0.5066087985796015, "grad_norm": 33.383727313004705, "learning_rate": 5e-06, "loss": 0.1159, "num_input_tokens_seen": 330816512, "step": 1926 }, { "epoch": 0.5066087985796015, "loss": 0.07775846868753433, "loss_ce": 0.0003048558428417891, "loss_iou": 0.330078125, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 330816512, "step": 1926 }, { "epoch": 0.5068718353389886, "grad_norm": 7.427483033873037, "learning_rate": 5e-06, "loss": 0.151, "num_input_tokens_seen": 330988952, "step": 1927 }, { "epoch": 0.5068718353389886, "loss": 0.16052240133285522, "loss_ce": 0.000579762679990381, "loss_iou": 0.35546875, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 330988952, "step": 1927 }, { "epoch": 0.5071348720983757, "grad_norm": 8.482091734395144, "learning_rate": 5e-06, "loss": 0.1604, "num_input_tokens_seen": 331161228, "step": 1928 }, { "epoch": 0.5071348720983757, "loss": 0.14751845598220825, "loss_ce": 0.000576329359319061, "loss_iou": 0.72265625, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 331161228, "step": 1928 }, { "epoch": 0.5073979088577629, "grad_norm": 6.087066534105735, "learning_rate": 5e-06, "loss": 0.1212, "num_input_tokens_seen": 331329832, "step": 1929 }, { "epoch": 0.5073979088577629, "loss": 0.09639698266983032, "loss_ce": 0.002372324001044035, "loss_iou": 0.4765625, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 331329832, "step": 1929 }, { "epoch": 0.50766094561715, "grad_norm": 6.499679773970415, "learning_rate": 5e-06, "loss": 0.0971, "num_input_tokens_seen": 331502148, "step": 1930 }, { "epoch": 0.50766094561715, "loss": 0.11911525577306747, "loss_ce": 0.0002492967469152063, "loss_iou": 0.61328125, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 331502148, "step": 1930 }, { "epoch": 0.5079239823765371, "grad_norm": 16.477307771999516, "learning_rate": 5e-06, "loss": 0.1057, "num_input_tokens_seen": 331674540, "step": 1931 }, { "epoch": 0.5079239823765371, "loss": 0.14295902848243713, "loss_ce": 0.0038904245011508465, "loss_iou": 0.341796875, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 331674540, "step": 1931 }, { "epoch": 0.5081870191359242, "grad_norm": 4.977000748222245, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 331846988, "step": 1932 }, { "epoch": 0.5081870191359242, "loss": 0.1609978973865509, "loss_ce": 0.00042966773617081344, "loss_iou": 0.66015625, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 331846988, "step": 1932 }, { "epoch": 0.5084500558953113, "grad_norm": 4.68697406244425, "learning_rate": 5e-06, "loss": 0.128, "num_input_tokens_seen": 332019216, "step": 1933 }, { "epoch": 0.5084500558953113, "loss": 0.12378443777561188, "loss_ce": 0.0009511768585070968, "loss_iou": 0.5, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 332019216, "step": 1933 }, { "epoch": 0.5087130926546984, "grad_norm": 5.061790101979302, "learning_rate": 5e-06, "loss": 0.1413, "num_input_tokens_seen": 332191420, "step": 1934 }, { "epoch": 0.5087130926546984, "loss": 0.09300635755062103, "loss_ce": 0.0008737844182178378, "loss_iou": 0.51171875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 332191420, "step": 1934 }, { "epoch": 0.5089761294140857, "grad_norm": 18.6525779104533, "learning_rate": 5e-06, "loss": 0.1468, "num_input_tokens_seen": 332363456, "step": 1935 }, { "epoch": 0.5089761294140857, "loss": 0.13239170610904694, "loss_ce": 0.0003726637805812061, "loss_iou": 0.42578125, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 332363456, "step": 1935 }, { "epoch": 0.5092391661734728, "grad_norm": 16.50490516998537, "learning_rate": 5e-06, "loss": 0.0946, "num_input_tokens_seen": 332535912, "step": 1936 }, { "epoch": 0.5092391661734728, "loss": 0.08137423545122147, "loss_ce": 0.0015249941498041153, "loss_iou": 0.423828125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 332535912, "step": 1936 }, { "epoch": 0.5095022029328599, "grad_norm": 7.868678191164257, "learning_rate": 5e-06, "loss": 0.1762, "num_input_tokens_seen": 332708064, "step": 1937 }, { "epoch": 0.5095022029328599, "loss": 0.2602325677871704, "loss_ce": 0.005715976003557444, "loss_iou": 0.470703125, "loss_num": 0.051025390625, "loss_xval": 0.25390625, "num_input_tokens_seen": 332708064, "step": 1937 }, { "epoch": 0.509765239692247, "grad_norm": 4.650960242488344, "learning_rate": 5e-06, "loss": 0.1441, "num_input_tokens_seen": 332880260, "step": 1938 }, { "epoch": 0.509765239692247, "loss": 0.08695125579833984, "loss_ce": 0.0018987648654729128, "loss_iou": 0.7578125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 332880260, "step": 1938 }, { "epoch": 0.5100282764516341, "grad_norm": 5.979900826127572, "learning_rate": 5e-06, "loss": 0.115, "num_input_tokens_seen": 333052492, "step": 1939 }, { "epoch": 0.5100282764516341, "loss": 0.11689235270023346, "loss_ce": 0.0012612489517778158, "loss_iou": 0.455078125, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 333052492, "step": 1939 }, { "epoch": 0.5102913132110213, "grad_norm": 15.330918281866088, "learning_rate": 5e-06, "loss": 0.1149, "num_input_tokens_seen": 333222132, "step": 1940 }, { "epoch": 0.5102913132110213, "loss": 0.16124433279037476, "loss_ce": 0.0012101602042093873, "loss_iou": 0.58203125, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 333222132, "step": 1940 }, { "epoch": 0.5105543499704084, "grad_norm": 5.298876176442514, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 333394244, "step": 1941 }, { "epoch": 0.5105543499704084, "loss": 0.09125322848558426, "loss_ce": 0.0003718816442415118, "loss_iou": 0.6328125, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 333394244, "step": 1941 }, { "epoch": 0.5108173867297955, "grad_norm": 25.903681591063247, "learning_rate": 5e-06, "loss": 0.0986, "num_input_tokens_seen": 333564616, "step": 1942 }, { "epoch": 0.5108173867297955, "loss": 0.0830872505903244, "loss_ce": 0.003009122796356678, "loss_iou": 0.56640625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 333564616, "step": 1942 }, { "epoch": 0.5110804234891826, "grad_norm": 7.412064312390374, "learning_rate": 5e-06, "loss": 0.1326, "num_input_tokens_seen": 333736828, "step": 1943 }, { "epoch": 0.5110804234891826, "loss": 0.13648918271064758, "loss_ce": 0.0012352685444056988, "loss_iou": 0.49609375, "loss_num": 0.027099609375, "loss_xval": 0.134765625, "num_input_tokens_seen": 333736828, "step": 1943 }, { "epoch": 0.5113434602485697, "grad_norm": 5.823332421421152, "learning_rate": 5e-06, "loss": 0.1076, "num_input_tokens_seen": 333909000, "step": 1944 }, { "epoch": 0.5113434602485697, "loss": 0.08375194668769836, "loss_ce": 0.00024059813586063683, "loss_iou": 0.55078125, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 333909000, "step": 1944 }, { "epoch": 0.5116064970079569, "grad_norm": 5.7512491483697685, "learning_rate": 5e-06, "loss": 0.1466, "num_input_tokens_seen": 334081116, "step": 1945 }, { "epoch": 0.5116064970079569, "loss": 0.11631600558757782, "loss_ce": 0.0020276757422834635, "loss_iou": 0.390625, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 334081116, "step": 1945 }, { "epoch": 0.511869533767344, "grad_norm": 6.061348306489617, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 334252872, "step": 1946 }, { "epoch": 0.511869533767344, "loss": 0.1273353099822998, "loss_ce": 0.0007484056986868382, "loss_iou": 0.671875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 334252872, "step": 1946 }, { "epoch": 0.5121325705267311, "grad_norm": 4.656625920812538, "learning_rate": 5e-06, "loss": 0.1469, "num_input_tokens_seen": 334421696, "step": 1947 }, { "epoch": 0.5121325705267311, "loss": 0.07545529305934906, "loss_ce": 0.002487759804353118, "loss_iou": 0.5390625, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 334421696, "step": 1947 }, { "epoch": 0.5123956072861182, "grad_norm": 11.2997609374861, "learning_rate": 5e-06, "loss": 0.1074, "num_input_tokens_seen": 334593968, "step": 1948 }, { "epoch": 0.5123956072861182, "loss": 0.15095758438110352, "loss_ce": 0.00047542020911350846, "loss_iou": 0.65625, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 334593968, "step": 1948 }, { "epoch": 0.5126586440455053, "grad_norm": 4.740639931376309, "learning_rate": 5e-06, "loss": 0.1006, "num_input_tokens_seen": 334766256, "step": 1949 }, { "epoch": 0.5126586440455053, "loss": 0.0767994076013565, "loss_ce": 0.00020028470316901803, "loss_iou": 0.49609375, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 334766256, "step": 1949 }, { "epoch": 0.5129216808048925, "grad_norm": 15.829229511064487, "learning_rate": 5e-06, "loss": 0.0946, "num_input_tokens_seen": 334938720, "step": 1950 }, { "epoch": 0.5129216808048925, "loss": 0.10283501446247101, "loss_ce": 0.003225642256438732, "loss_iou": 0.57421875, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 334938720, "step": 1950 }, { "epoch": 0.5131847175642796, "grad_norm": 4.547176342272478, "learning_rate": 5e-06, "loss": 0.1084, "num_input_tokens_seen": 335108948, "step": 1951 }, { "epoch": 0.5131847175642796, "loss": 0.07713186740875244, "loss_ce": 0.00047171738697215915, "loss_iou": 0.53515625, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 335108948, "step": 1951 }, { "epoch": 0.5134477543236667, "grad_norm": 13.66162052286571, "learning_rate": 5e-06, "loss": 0.1628, "num_input_tokens_seen": 335281148, "step": 1952 }, { "epoch": 0.5134477543236667, "loss": 0.2089971899986267, "loss_ce": 0.0048346007242798805, "loss_iou": 0.390625, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 335281148, "step": 1952 }, { "epoch": 0.5137107910830538, "grad_norm": 4.53549366428838, "learning_rate": 5e-06, "loss": 0.085, "num_input_tokens_seen": 335453172, "step": 1953 }, { "epoch": 0.5137107910830538, "loss": 0.0756625235080719, "loss_ce": 0.0019930913113057613, "loss_iou": 0.5078125, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 335453172, "step": 1953 }, { "epoch": 0.513973827842441, "grad_norm": 5.655145923562407, "learning_rate": 5e-06, "loss": 0.1403, "num_input_tokens_seen": 335623636, "step": 1954 }, { "epoch": 0.513973827842441, "loss": 0.14977125823497772, "loss_ce": 0.0032868883572518826, "loss_iou": NaN, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 335623636, "step": 1954 }, { "epoch": 0.5142368646018282, "grad_norm": 9.198223458900488, "learning_rate": 5e-06, "loss": 0.158, "num_input_tokens_seen": 335793500, "step": 1955 }, { "epoch": 0.5142368646018282, "loss": 0.13681824505329132, "loss_ce": 0.0008624300826340914, "loss_iou": 0.61328125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 335793500, "step": 1955 }, { "epoch": 0.5144999013612153, "grad_norm": 14.576489713233544, "learning_rate": 5e-06, "loss": 0.1376, "num_input_tokens_seen": 335965788, "step": 1956 }, { "epoch": 0.5144999013612153, "loss": 0.14106330275535583, "loss_ce": 0.0012928071664646268, "loss_iou": 0.56640625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 335965788, "step": 1956 }, { "epoch": 0.5147629381206024, "grad_norm": 12.321724088847931, "learning_rate": 5e-06, "loss": 0.1715, "num_input_tokens_seen": 336138060, "step": 1957 }, { "epoch": 0.5147629381206024, "loss": 0.1251417100429535, "loss_ce": 0.0011182638118043542, "loss_iou": 0.53515625, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 336138060, "step": 1957 }, { "epoch": 0.5150259748799895, "grad_norm": 8.213839245926183, "learning_rate": 5e-06, "loss": 0.1156, "num_input_tokens_seen": 336308692, "step": 1958 }, { "epoch": 0.5150259748799895, "loss": 0.06178643926978111, "loss_ce": 0.00011041228572139516, "loss_iou": 0.62890625, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 336308692, "step": 1958 }, { "epoch": 0.5152890116393766, "grad_norm": 4.514500271556744, "learning_rate": 5e-06, "loss": 0.1589, "num_input_tokens_seen": 336481248, "step": 1959 }, { "epoch": 0.5152890116393766, "loss": 0.15207350254058838, "loss_ce": 0.0027662513311952353, "loss_iou": 0.4453125, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 336481248, "step": 1959 }, { "epoch": 0.5155520483987637, "grad_norm": 3.270020202020867, "learning_rate": 5e-06, "loss": 0.0831, "num_input_tokens_seen": 336653556, "step": 1960 }, { "epoch": 0.5155520483987637, "loss": 0.07474862039089203, "loss_ce": 0.00031624053372070193, "loss_iou": 0.57421875, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 336653556, "step": 1960 }, { "epoch": 0.5158150851581509, "grad_norm": 5.036795139632983, "learning_rate": 5e-06, "loss": 0.1524, "num_input_tokens_seen": 336825632, "step": 1961 }, { "epoch": 0.5158150851581509, "loss": 0.0949145182967186, "loss_ce": 0.004765587393194437, "loss_iou": 0.52734375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 336825632, "step": 1961 }, { "epoch": 0.516078121917538, "grad_norm": 5.18788380960706, "learning_rate": 5e-06, "loss": 0.0908, "num_input_tokens_seen": 336997908, "step": 1962 }, { "epoch": 0.516078121917538, "loss": 0.1347174346446991, "loss_ce": 0.0003027569910045713, "loss_iou": 0.38671875, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 336997908, "step": 1962 }, { "epoch": 0.5163411586769251, "grad_norm": 19.722687183501755, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 337170032, "step": 1963 }, { "epoch": 0.5163411586769251, "loss": 0.15522822737693787, "loss_ce": 0.004196731373667717, "loss_iou": 0.490234375, "loss_num": 0.0301513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 337170032, "step": 1963 }, { "epoch": 0.5166041954363122, "grad_norm": 5.156355195655944, "learning_rate": 5e-06, "loss": 0.1089, "num_input_tokens_seen": 337342000, "step": 1964 }, { "epoch": 0.5166041954363122, "loss": 0.0796535462141037, "loss_ce": 0.0009487088536843657, "loss_iou": 0.66796875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 337342000, "step": 1964 }, { "epoch": 0.5168672321956993, "grad_norm": 4.599075113473689, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 337514164, "step": 1965 }, { "epoch": 0.5168672321956993, "loss": 0.21942217648029327, "loss_ce": 0.0008857909124344587, "loss_iou": 0.5546875, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 337514164, "step": 1965 }, { "epoch": 0.5171302689550865, "grad_norm": 8.512554957251284, "learning_rate": 5e-06, "loss": 0.1336, "num_input_tokens_seen": 337686236, "step": 1966 }, { "epoch": 0.5171302689550865, "loss": 0.08479119837284088, "loss_ce": 0.001386661664582789, "loss_iou": 0.5234375, "loss_num": 0.0166015625, "loss_xval": 0.08349609375, "num_input_tokens_seen": 337686236, "step": 1966 }, { "epoch": 0.5173933057144736, "grad_norm": 5.381969767631673, "learning_rate": 5e-06, "loss": 0.1373, "num_input_tokens_seen": 337858240, "step": 1967 }, { "epoch": 0.5173933057144736, "loss": 0.15276584029197693, "loss_ce": 0.004938698373734951, "loss_iou": 0.52734375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 337858240, "step": 1967 }, { "epoch": 0.5176563424738607, "grad_norm": 12.700271834998917, "learning_rate": 5e-06, "loss": 0.1532, "num_input_tokens_seen": 338030560, "step": 1968 }, { "epoch": 0.5176563424738607, "loss": 0.15190255641937256, "loss_ce": 0.0028852252289652824, "loss_iou": 0.494140625, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 338030560, "step": 1968 }, { "epoch": 0.5179193792332478, "grad_norm": 21.983637705034123, "learning_rate": 5e-06, "loss": 0.1116, "num_input_tokens_seen": 338202560, "step": 1969 }, { "epoch": 0.5179193792332478, "loss": 0.0981290340423584, "loss_ce": 0.0023496169596910477, "loss_iou": 0.369140625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 338202560, "step": 1969 }, { "epoch": 0.5181824159926349, "grad_norm": 10.469128023533157, "learning_rate": 5e-06, "loss": 0.1446, "num_input_tokens_seen": 338374836, "step": 1970 }, { "epoch": 0.5181824159926349, "loss": 0.16515450179576874, "loss_ce": 0.0016413143603131175, "loss_iou": 0.51953125, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 338374836, "step": 1970 }, { "epoch": 0.5184454527520221, "grad_norm": 5.186485351415196, "learning_rate": 5e-06, "loss": 0.162, "num_input_tokens_seen": 338546996, "step": 1971 }, { "epoch": 0.5184454527520221, "loss": 0.24401625990867615, "loss_ce": 0.004056546837091446, "loss_iou": 0.42578125, "loss_num": 0.0478515625, "loss_xval": 0.240234375, "num_input_tokens_seen": 338546996, "step": 1971 }, { "epoch": 0.5187084895114092, "grad_norm": 6.802546863240928, "learning_rate": 5e-06, "loss": 0.1401, "num_input_tokens_seen": 338716756, "step": 1972 }, { "epoch": 0.5187084895114092, "loss": 0.0805911123752594, "loss_ce": 0.0030764644034206867, "loss_iou": 0.43359375, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 338716756, "step": 1972 }, { "epoch": 0.5189715262707963, "grad_norm": 11.859766109061349, "learning_rate": 5e-06, "loss": 0.1037, "num_input_tokens_seen": 338886988, "step": 1973 }, { "epoch": 0.5189715262707963, "loss": 0.10678447037935257, "loss_ce": 0.0013767611235380173, "loss_iou": 0.50390625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 338886988, "step": 1973 }, { "epoch": 0.5192345630301834, "grad_norm": 7.006309240426411, "learning_rate": 5e-06, "loss": 0.1237, "num_input_tokens_seen": 339059260, "step": 1974 }, { "epoch": 0.5192345630301834, "loss": 0.1808294951915741, "loss_ce": 0.00034853501711040735, "loss_iou": 0.6796875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 339059260, "step": 1974 }, { "epoch": 0.5194975997895706, "grad_norm": 21.985452701611596, "learning_rate": 5e-06, "loss": 0.1751, "num_input_tokens_seen": 339231652, "step": 1975 }, { "epoch": 0.5194975997895706, "loss": 0.16675767302513123, "loss_ce": 0.0018406773451715708, "loss_iou": 0.37109375, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 339231652, "step": 1975 }, { "epoch": 0.5197606365489578, "grad_norm": 6.071372766680396, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 339403696, "step": 1976 }, { "epoch": 0.5197606365489578, "loss": 0.08192337304353714, "loss_ce": 0.0025166317354887724, "loss_iou": 0.48046875, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 339403696, "step": 1976 }, { "epoch": 0.5200236733083449, "grad_norm": 7.303457699958576, "learning_rate": 5e-06, "loss": 0.0961, "num_input_tokens_seen": 339576232, "step": 1977 }, { "epoch": 0.5200236733083449, "loss": 0.06348910182714462, "loss_ce": 0.0037356873508542776, "loss_iou": 0.455078125, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 339576232, "step": 1977 }, { "epoch": 0.520286710067732, "grad_norm": 4.058844588183022, "learning_rate": 5e-06, "loss": 0.0786, "num_input_tokens_seen": 339748468, "step": 1978 }, { "epoch": 0.520286710067732, "loss": 0.07935698330402374, "loss_ce": 0.00019439200696069747, "loss_iou": 0.5234375, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 339748468, "step": 1978 }, { "epoch": 0.5205497468271191, "grad_norm": 9.520705989391612, "learning_rate": 5e-06, "loss": 0.1187, "num_input_tokens_seen": 339920428, "step": 1979 }, { "epoch": 0.5205497468271191, "loss": 0.10209144651889801, "loss_ce": 0.002482067793607712, "loss_iou": 0.546875, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 339920428, "step": 1979 }, { "epoch": 0.5208127835865062, "grad_norm": 22.26578813252007, "learning_rate": 5e-06, "loss": 0.1224, "num_input_tokens_seen": 340092516, "step": 1980 }, { "epoch": 0.5208127835865062, "loss": 0.19446319341659546, "loss_ce": 0.006719036493450403, "loss_iou": 0.5703125, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 340092516, "step": 1980 }, { "epoch": 0.5210758203458934, "grad_norm": 9.547756324115959, "learning_rate": 5e-06, "loss": 0.1443, "num_input_tokens_seen": 340265052, "step": 1981 }, { "epoch": 0.5210758203458934, "loss": 0.14365576207637787, "loss_ce": 0.0007572055910713971, "loss_iou": 0.60546875, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 340265052, "step": 1981 }, { "epoch": 0.5213388571052805, "grad_norm": 7.840208454239884, "learning_rate": 5e-06, "loss": 0.1566, "num_input_tokens_seen": 340436896, "step": 1982 }, { "epoch": 0.5213388571052805, "loss": 0.11067777872085571, "loss_ce": 0.0016995080513879657, "loss_iou": 0.55859375, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 340436896, "step": 1982 }, { "epoch": 0.5216018938646676, "grad_norm": 24.43574592347015, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 340609344, "step": 1983 }, { "epoch": 0.5216018938646676, "loss": 0.10227973759174347, "loss_ce": 0.0038605397567152977, "loss_iou": 0.36328125, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 340609344, "step": 1983 }, { "epoch": 0.5218649306240547, "grad_norm": 6.254699637052052, "learning_rate": 5e-06, "loss": 0.1266, "num_input_tokens_seen": 340779700, "step": 1984 }, { "epoch": 0.5218649306240547, "loss": 0.1373625099658966, "loss_ce": 0.0002012598270084709, "loss_iou": 0.5234375, "loss_num": 0.0274658203125, "loss_xval": 0.13671875, "num_input_tokens_seen": 340779700, "step": 1984 }, { "epoch": 0.5221279673834418, "grad_norm": 4.970529369812158, "learning_rate": 5e-06, "loss": 0.1176, "num_input_tokens_seen": 340952012, "step": 1985 }, { "epoch": 0.5221279673834418, "loss": 0.07744970917701721, "loss_ce": 0.00027074594981968403, "loss_iou": 0.625, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 340952012, "step": 1985 }, { "epoch": 0.5223910041428289, "grad_norm": 9.386513844488379, "learning_rate": 5e-06, "loss": 0.1525, "num_input_tokens_seen": 341124172, "step": 1986 }, { "epoch": 0.5223910041428289, "loss": 0.16602635383605957, "loss_ce": 0.0037033448461443186, "loss_iou": 0.6640625, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 341124172, "step": 1986 }, { "epoch": 0.5226540409022161, "grad_norm": 75.36425054276123, "learning_rate": 5e-06, "loss": 0.1244, "num_input_tokens_seen": 341296144, "step": 1987 }, { "epoch": 0.5226540409022161, "loss": 0.08925444632768631, "loss_ce": 0.0014248627703636885, "loss_iou": 0.51953125, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 341296144, "step": 1987 }, { "epoch": 0.5229170776616032, "grad_norm": 6.0244434485136455, "learning_rate": 5e-06, "loss": 0.1624, "num_input_tokens_seen": 341468128, "step": 1988 }, { "epoch": 0.5229170776616032, "loss": 0.18709853291511536, "loss_ce": 0.0007887266110628843, "loss_iou": 0.65234375, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 341468128, "step": 1988 }, { "epoch": 0.5231801144209903, "grad_norm": 6.46446584669901, "learning_rate": 5e-06, "loss": 0.1207, "num_input_tokens_seen": 341640532, "step": 1989 }, { "epoch": 0.5231801144209903, "loss": 0.13015246391296387, "loss_ce": 0.00030016410164535046, "loss_iou": 0.484375, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 341640532, "step": 1989 }, { "epoch": 0.5234431511803774, "grad_norm": 12.345485884753447, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 341812700, "step": 1990 }, { "epoch": 0.5234431511803774, "loss": 0.10668568313121796, "loss_ce": 0.00027087965281680226, "loss_iou": 0.55078125, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 341812700, "step": 1990 }, { "epoch": 0.5237061879397645, "grad_norm": 3.3770749440530894, "learning_rate": 5e-06, "loss": 0.1089, "num_input_tokens_seen": 341984700, "step": 1991 }, { "epoch": 0.5237061879397645, "loss": 0.1165972501039505, "loss_ce": 0.0006304577691480517, "loss_iou": 0.431640625, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 341984700, "step": 1991 }, { "epoch": 0.5239692246991517, "grad_norm": 9.677414053666478, "learning_rate": 5e-06, "loss": 0.1048, "num_input_tokens_seen": 342156884, "step": 1992 }, { "epoch": 0.5239692246991517, "loss": 0.1423652172088623, "loss_ce": 0.0010383009212091565, "loss_iou": 0.58203125, "loss_num": 0.0281982421875, "loss_xval": 0.1416015625, "num_input_tokens_seen": 342156884, "step": 1992 }, { "epoch": 0.5242322614585389, "grad_norm": 3.6750881151911643, "learning_rate": 5e-06, "loss": 0.1894, "num_input_tokens_seen": 342329140, "step": 1993 }, { "epoch": 0.5242322614585389, "loss": 0.18173760175704956, "loss_ce": 0.009221725165843964, "loss_iou": 0.46484375, "loss_num": 0.034423828125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 342329140, "step": 1993 }, { "epoch": 0.524495298217926, "grad_norm": 19.882541327036233, "learning_rate": 5e-06, "loss": 0.1454, "num_input_tokens_seen": 342501472, "step": 1994 }, { "epoch": 0.524495298217926, "loss": 0.0589841827750206, "loss_ce": 0.001641655690036714, "loss_iou": 0.357421875, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 342501472, "step": 1994 }, { "epoch": 0.5247583349773131, "grad_norm": 16.32555909861509, "learning_rate": 5e-06, "loss": 0.1339, "num_input_tokens_seen": 342673528, "step": 1995 }, { "epoch": 0.5247583349773131, "loss": 0.18063244223594666, "loss_ce": 0.0004871786804869771, "loss_iou": 0.5546875, "loss_num": 0.0361328125, "loss_xval": 0.1796875, "num_input_tokens_seen": 342673528, "step": 1995 }, { "epoch": 0.5250213717367002, "grad_norm": 9.845315436294685, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 342845892, "step": 1996 }, { "epoch": 0.5250213717367002, "loss": 0.17416182160377502, "loss_ce": 0.0006388599867932498, "loss_iou": 0.46875, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 342845892, "step": 1996 }, { "epoch": 0.5252844084960874, "grad_norm": 4.531599732042389, "learning_rate": 5e-06, "loss": 0.1005, "num_input_tokens_seen": 343018040, "step": 1997 }, { "epoch": 0.5252844084960874, "loss": 0.15178070962429047, "loss_ce": 0.0012069816002622247, "loss_iou": 0.490234375, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 343018040, "step": 1997 }, { "epoch": 0.5255474452554745, "grad_norm": 15.562861804573469, "learning_rate": 5e-06, "loss": 0.1677, "num_input_tokens_seen": 343189936, "step": 1998 }, { "epoch": 0.5255474452554745, "loss": 0.1337474286556244, "loss_ce": 0.0038340911269187927, "loss_iou": 0.37890625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 343189936, "step": 1998 }, { "epoch": 0.5258104820148616, "grad_norm": 6.338186207451289, "learning_rate": 5e-06, "loss": 0.1225, "num_input_tokens_seen": 343361876, "step": 1999 }, { "epoch": 0.5258104820148616, "loss": 0.08655121922492981, "loss_ce": 0.0022006274666637182, "loss_iou": 0.5, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 343361876, "step": 1999 }, { "epoch": 0.5260735187742487, "grad_norm": 36.314036588641784, "learning_rate": 5e-06, "loss": 0.1506, "num_input_tokens_seen": 343532304, "step": 2000 }, { "epoch": 0.5260735187742487, "eval_websight_new_CIoU": 0.8649432361125946, "eval_websight_new_GIoU": 0.8667054772377014, "eval_websight_new_IoU": 0.8704231679439545, "eval_websight_new_MAE_all": 0.021235700696706772, "eval_websight_new_MAE_h": 0.007067237980663776, "eval_websight_new_MAE_w": 0.03388772998005152, "eval_websight_new_MAE_x": 0.03554858546704054, "eval_websight_new_MAE_y": 0.008439254947006702, "eval_websight_new_NUM_probability": 0.999984085559845, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.10596824437379837, "eval_websight_new_loss_ce": 6.8271494910732144e-06, "eval_websight_new_loss_iou": 0.36328125, "eval_websight_new_loss_num": 0.018640518188476562, "eval_websight_new_loss_xval": 0.09316253662109375, "eval_websight_new_runtime": 54.898, "eval_websight_new_samples_per_second": 0.911, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 343532304, "step": 2000 }, { "epoch": 0.5260735187742487, "eval_seeclick_CIoU": 0.6263805329799652, "eval_seeclick_GIoU": 0.627049595117569, "eval_seeclick_IoU": 0.6531890332698822, "eval_seeclick_MAE_all": 0.050390077754855156, "eval_seeclick_MAE_h": 0.02558732032775879, "eval_seeclick_MAE_w": 0.07135490141808987, "eval_seeclick_MAE_x": 0.07951905764639378, "eval_seeclick_MAE_y": 0.025099032558500767, "eval_seeclick_NUM_probability": 0.9999794960021973, "eval_seeclick_inside_bbox": 0.921875, "eval_seeclick_loss": 0.22126971185207367, "eval_seeclick_loss_ce": 0.008990719448775053, "eval_seeclick_loss_iou": 0.47021484375, "eval_seeclick_loss_num": 0.04229736328125, "eval_seeclick_loss_xval": 0.211334228515625, "eval_seeclick_runtime": 78.1311, "eval_seeclick_samples_per_second": 0.55, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 343532304, "step": 2000 }, { "epoch": 0.5260735187742487, "eval_icons_CIoU": 0.8664080798625946, "eval_icons_GIoU": 0.8631992936134338, "eval_icons_IoU": 0.8702020049095154, "eval_icons_MAE_all": 0.0180177534930408, "eval_icons_MAE_h": 0.016653602942824364, "eval_icons_MAE_w": 0.018555423244833946, "eval_icons_MAE_x": 0.01882947515696287, "eval_icons_MAE_y": 0.018032516352832317, "eval_icons_NUM_probability": 0.9999754428863525, "eval_icons_inside_bbox": 1.0, "eval_icons_loss": 0.059605177491903305, "eval_icons_loss_ce": 1.412479377904674e-05, "eval_icons_loss_iou": 0.5333251953125, "eval_icons_loss_num": 0.011335372924804688, "eval_icons_loss_xval": 0.05667877197265625, "eval_icons_runtime": 80.1161, "eval_icons_samples_per_second": 0.624, "eval_icons_steps_per_second": 0.025, "num_input_tokens_seen": 343532304, "step": 2000 }, { "epoch": 0.5260735187742487, "eval_screenspot_CIoU": 0.5386523008346558, "eval_screenspot_GIoU": 0.5300994714101156, "eval_screenspot_IoU": 0.5833619435628256, "eval_screenspot_MAE_all": 0.08941345165173213, "eval_screenspot_MAE_h": 0.04660519336660703, "eval_screenspot_MAE_w": 0.1580625375111898, "eval_screenspot_MAE_x": 0.10776859025160472, "eval_screenspot_MAE_y": 0.04521748423576355, "eval_screenspot_NUM_probability": 0.9995922644933065, "eval_screenspot_inside_bbox": 0.850000003973643, "eval_screenspot_loss": 0.8645088076591492, "eval_screenspot_loss_ce": 0.5230478445688883, "eval_screenspot_loss_iou": 0.4464925130208333, "eval_screenspot_loss_num": 0.06682078043619792, "eval_screenspot_loss_xval": 0.3342692057291667, "eval_screenspot_runtime": 148.9183, "eval_screenspot_samples_per_second": 0.598, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 343532304, "step": 2000 }, { "epoch": 0.5260735187742487, "loss": 0.8609392046928406, "loss_ce": 0.5118181109428406, "loss_iou": 0.390625, "loss_num": 0.06982421875, "loss_xval": 0.349609375, "num_input_tokens_seen": 343532304, "step": 2000 }, { "epoch": 0.5263365555336358, "grad_norm": 8.429330373903323, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 343704524, "step": 2001 }, { "epoch": 0.5263365555336358, "loss": 0.21621274948120117, "loss_ce": 0.0006976150907576084, "loss_iou": 0.2578125, "loss_num": 0.04296875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 343704524, "step": 2001 }, { "epoch": 0.526599592293023, "grad_norm": 16.618545138422856, "learning_rate": 5e-06, "loss": 0.1471, "num_input_tokens_seen": 343876532, "step": 2002 }, { "epoch": 0.526599592293023, "loss": 0.14034898579120636, "loss_ce": 0.0025926402304321527, "loss_iou": 0.515625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 343876532, "step": 2002 }, { "epoch": 0.5268626290524101, "grad_norm": 7.347451448031887, "learning_rate": 5e-06, "loss": 0.0847, "num_input_tokens_seen": 344048440, "step": 2003 }, { "epoch": 0.5268626290524101, "loss": 0.11765069514513016, "loss_ce": 0.0015160476323217154, "loss_iou": 0.376953125, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 344048440, "step": 2003 }, { "epoch": 0.5271256658117972, "grad_norm": 11.90854860345847, "learning_rate": 5e-06, "loss": 0.1657, "num_input_tokens_seen": 344220524, "step": 2004 }, { "epoch": 0.5271256658117972, "loss": 0.09579437971115112, "loss_ce": 0.0003353926877025515, "loss_iou": 0.490234375, "loss_num": 0.01904296875, "loss_xval": 0.095703125, "num_input_tokens_seen": 344220524, "step": 2004 }, { "epoch": 0.5273887025711843, "grad_norm": 15.406217883818895, "learning_rate": 5e-06, "loss": 0.1745, "num_input_tokens_seen": 344391120, "step": 2005 }, { "epoch": 0.5273887025711843, "loss": 0.08335284888744354, "loss_ce": 0.005258369259536266, "loss_iou": 0.419921875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 344391120, "step": 2005 }, { "epoch": 0.5276517393305714, "grad_norm": 6.625644981750321, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 344563368, "step": 2006 }, { "epoch": 0.5276517393305714, "loss": 0.1281585693359375, "loss_ce": 0.0008697626180946827, "loss_iou": 0.326171875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 344563368, "step": 2006 }, { "epoch": 0.5279147760899586, "grad_norm": 5.116207808263436, "learning_rate": 5e-06, "loss": 0.1025, "num_input_tokens_seen": 344735272, "step": 2007 }, { "epoch": 0.5279147760899586, "loss": 0.10428653657436371, "loss_ce": 0.00014529118197970092, "loss_iou": 0.373046875, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 344735272, "step": 2007 }, { "epoch": 0.5281778128493457, "grad_norm": 5.758545412441202, "learning_rate": 5e-06, "loss": 0.1129, "num_input_tokens_seen": 344907448, "step": 2008 }, { "epoch": 0.5281778128493457, "loss": 0.07528108358383179, "loss_ce": 0.0017337151803076267, "loss_iou": 0.59375, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 344907448, "step": 2008 }, { "epoch": 0.5284408496087328, "grad_norm": 5.079743263348732, "learning_rate": 5e-06, "loss": 0.0995, "num_input_tokens_seen": 345080020, "step": 2009 }, { "epoch": 0.5284408496087328, "loss": 0.06423554569482803, "loss_ce": 0.0006674337200820446, "loss_iou": 0.5859375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 345080020, "step": 2009 }, { "epoch": 0.5287038863681199, "grad_norm": 6.791724647885975, "learning_rate": 5e-06, "loss": 0.1289, "num_input_tokens_seen": 345251684, "step": 2010 }, { "epoch": 0.5287038863681199, "loss": 0.13502314686775208, "loss_ce": 0.0006847689510323107, "loss_iou": 0.484375, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 345251684, "step": 2010 }, { "epoch": 0.528966923127507, "grad_norm": 7.5731259754899245, "learning_rate": 5e-06, "loss": 0.135, "num_input_tokens_seen": 345421972, "step": 2011 }, { "epoch": 0.528966923127507, "loss": 0.10425636172294617, "loss_ce": 0.0018088552169501781, "loss_iou": 0.357421875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 345421972, "step": 2011 }, { "epoch": 0.5292299598868941, "grad_norm": 4.024834656974236, "learning_rate": 5e-06, "loss": 0.0691, "num_input_tokens_seen": 345594140, "step": 2012 }, { "epoch": 0.5292299598868941, "loss": 0.05266657471656799, "loss_ce": 0.00011530861956998706, "loss_iou": 0.53125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 345594140, "step": 2012 }, { "epoch": 0.5294929966462814, "grad_norm": 4.913874816205936, "learning_rate": 5e-06, "loss": 0.1334, "num_input_tokens_seen": 345766220, "step": 2013 }, { "epoch": 0.5294929966462814, "loss": 0.08730873465538025, "loss_ce": 0.0039957487024366856, "loss_iou": 0.55859375, "loss_num": 0.0166015625, "loss_xval": 0.08349609375, "num_input_tokens_seen": 345766220, "step": 2013 }, { "epoch": 0.5297560334056685, "grad_norm": 5.534989831922425, "learning_rate": 5e-06, "loss": 0.1479, "num_input_tokens_seen": 345938392, "step": 2014 }, { "epoch": 0.5297560334056685, "loss": 0.11161148548126221, "loss_ce": 0.0032130456529557705, "loss_iou": 0.63671875, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 345938392, "step": 2014 }, { "epoch": 0.5300190701650556, "grad_norm": 7.209865250975092, "learning_rate": 5e-06, "loss": 0.15, "num_input_tokens_seen": 346110656, "step": 2015 }, { "epoch": 0.5300190701650556, "loss": 0.12729424238204956, "loss_ce": 0.0035759946331381798, "loss_iou": 0.625, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 346110656, "step": 2015 }, { "epoch": 0.5302821069244427, "grad_norm": 5.146240130208517, "learning_rate": 5e-06, "loss": 0.1439, "num_input_tokens_seen": 346283032, "step": 2016 }, { "epoch": 0.5302821069244427, "loss": 0.12445038557052612, "loss_ce": 0.0015866123139858246, "loss_iou": 0.55078125, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 346283032, "step": 2016 }, { "epoch": 0.5305451436838298, "grad_norm": 4.097776262787885, "learning_rate": 5e-06, "loss": 0.1471, "num_input_tokens_seen": 346455396, "step": 2017 }, { "epoch": 0.5305451436838298, "loss": 0.08194537460803986, "loss_ce": 0.0027369949966669083, "loss_iou": 0.46875, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 346455396, "step": 2017 }, { "epoch": 0.530808180443217, "grad_norm": 8.807636765638556, "learning_rate": 5e-06, "loss": 0.1434, "num_input_tokens_seen": 346627308, "step": 2018 }, { "epoch": 0.530808180443217, "loss": 0.16153287887573242, "loss_ce": 0.0020022375974804163, "loss_iou": 0.37109375, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 346627308, "step": 2018 }, { "epoch": 0.5310712172026041, "grad_norm": 10.071725544370869, "learning_rate": 5e-06, "loss": 0.1096, "num_input_tokens_seen": 346799504, "step": 2019 }, { "epoch": 0.5310712172026041, "loss": 0.10446594655513763, "loss_ce": 0.0008282391354441643, "loss_iou": 0.4765625, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 346799504, "step": 2019 }, { "epoch": 0.5313342539619912, "grad_norm": 5.791284152439371, "learning_rate": 5e-06, "loss": 0.1486, "num_input_tokens_seen": 346971328, "step": 2020 }, { "epoch": 0.5313342539619912, "loss": 0.21368807554244995, "loss_ce": 0.002475916873663664, "loss_iou": 0.578125, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 346971328, "step": 2020 }, { "epoch": 0.5315972907213783, "grad_norm": 9.778332313218632, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 347139872, "step": 2021 }, { "epoch": 0.5315972907213783, "loss": 0.2882145047187805, "loss_ce": 0.0021427052561193705, "loss_iou": 0.5078125, "loss_num": 0.05712890625, "loss_xval": 0.28515625, "num_input_tokens_seen": 347139872, "step": 2021 }, { "epoch": 0.5318603274807654, "grad_norm": 7.217119694281503, "learning_rate": 5e-06, "loss": 0.1443, "num_input_tokens_seen": 347312176, "step": 2022 }, { "epoch": 0.5318603274807654, "loss": 0.14617526531219482, "loss_ce": 0.0012472879607230425, "loss_iou": 0.5546875, "loss_num": 0.029052734375, "loss_xval": 0.14453125, "num_input_tokens_seen": 347312176, "step": 2022 }, { "epoch": 0.5321233642401526, "grad_norm": 5.444211224879485, "learning_rate": 5e-06, "loss": 0.1114, "num_input_tokens_seen": 347483864, "step": 2023 }, { "epoch": 0.5321233642401526, "loss": 0.15533965826034546, "loss_ce": 0.0009817371610552073, "loss_iou": 0.47265625, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 347483864, "step": 2023 }, { "epoch": 0.5323864009995397, "grad_norm": 5.88671307787278, "learning_rate": 5e-06, "loss": 0.1151, "num_input_tokens_seen": 347654444, "step": 2024 }, { "epoch": 0.5323864009995397, "loss": 0.10597267001867294, "loss_ce": 0.0017551433993503451, "loss_iou": 0.6484375, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 347654444, "step": 2024 }, { "epoch": 0.5326494377589268, "grad_norm": 4.540449525857699, "learning_rate": 5e-06, "loss": 0.1326, "num_input_tokens_seen": 347826732, "step": 2025 }, { "epoch": 0.5326494377589268, "loss": 0.10671254992485046, "loss_ce": 0.002159323776140809, "loss_iou": 0.51171875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 347826732, "step": 2025 }, { "epoch": 0.5329124745183139, "grad_norm": 12.403111361726802, "learning_rate": 5e-06, "loss": 0.1566, "num_input_tokens_seen": 347995312, "step": 2026 }, { "epoch": 0.5329124745183139, "loss": 0.1710810512304306, "loss_ce": 0.00019787647761404514, "loss_iou": 0.396484375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 347995312, "step": 2026 }, { "epoch": 0.533175511277701, "grad_norm": 4.826031974791996, "learning_rate": 5e-06, "loss": 0.1595, "num_input_tokens_seen": 348167268, "step": 2027 }, { "epoch": 0.533175511277701, "loss": 0.14301443099975586, "loss_ce": 0.004235736560076475, "loss_iou": 0.48046875, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 348167268, "step": 2027 }, { "epoch": 0.5334385480370882, "grad_norm": 5.858834044093049, "learning_rate": 5e-06, "loss": 0.1251, "num_input_tokens_seen": 348339324, "step": 2028 }, { "epoch": 0.5334385480370882, "loss": 0.13567912578582764, "loss_ce": 0.0013865029904991388, "loss_iou": 0.5078125, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 348339324, "step": 2028 }, { "epoch": 0.5337015847964753, "grad_norm": 18.687645390034255, "learning_rate": 5e-06, "loss": 0.1488, "num_input_tokens_seen": 348511688, "step": 2029 }, { "epoch": 0.5337015847964753, "loss": 0.09527582675218582, "loss_ce": 0.0008391792071051896, "loss_iou": 0.484375, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 348511688, "step": 2029 }, { "epoch": 0.5339646215558624, "grad_norm": 13.127056473400154, "learning_rate": 5e-06, "loss": 0.1272, "num_input_tokens_seen": 348682072, "step": 2030 }, { "epoch": 0.5339646215558624, "loss": 0.10760138183832169, "loss_ce": 0.003078682580962777, "loss_iou": 0.5078125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 348682072, "step": 2030 }, { "epoch": 0.5342276583152495, "grad_norm": 4.612341549056293, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 348854384, "step": 2031 }, { "epoch": 0.5342276583152495, "loss": 0.12830308079719543, "loss_ce": 0.004981548525393009, "loss_iou": 0.462890625, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 348854384, "step": 2031 }, { "epoch": 0.5344906950746366, "grad_norm": 8.941916447828152, "learning_rate": 5e-06, "loss": 0.0967, "num_input_tokens_seen": 349026540, "step": 2032 }, { "epoch": 0.5344906950746366, "loss": 0.11764685809612274, "loss_ce": 0.0009781570406630635, "loss_iou": 0.515625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 349026540, "step": 2032 }, { "epoch": 0.5347537318340239, "grad_norm": 3.1856540562472104, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 349195704, "step": 2033 }, { "epoch": 0.5347537318340239, "loss": 0.15431983768939972, "loss_ce": 0.001899797236546874, "loss_iou": 0.455078125, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 349195704, "step": 2033 }, { "epoch": 0.535016768593411, "grad_norm": 6.665681138831115, "learning_rate": 5e-06, "loss": 0.1192, "num_input_tokens_seen": 349367820, "step": 2034 }, { "epoch": 0.535016768593411, "loss": 0.17121072113513947, "loss_ce": 0.001441433560103178, "loss_iou": 0.455078125, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 349367820, "step": 2034 }, { "epoch": 0.5352798053527981, "grad_norm": 5.921520862992091, "learning_rate": 5e-06, "loss": 0.1264, "num_input_tokens_seen": 349540064, "step": 2035 }, { "epoch": 0.5352798053527981, "loss": 0.102360799908638, "loss_ce": 0.0002947567554656416, "loss_iou": 0.39453125, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 349540064, "step": 2035 }, { "epoch": 0.5355428421121852, "grad_norm": 5.3538979358096785, "learning_rate": 5e-06, "loss": 0.0976, "num_input_tokens_seen": 349710408, "step": 2036 }, { "epoch": 0.5355428421121852, "loss": 0.06293447315692902, "loss_ce": 0.0006481010350398719, "loss_iou": 0.392578125, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 349710408, "step": 2036 }, { "epoch": 0.5358058788715723, "grad_norm": 11.23909243130062, "learning_rate": 5e-06, "loss": 0.1226, "num_input_tokens_seen": 349880916, "step": 2037 }, { "epoch": 0.5358058788715723, "loss": 0.1736185997724533, "loss_ce": 0.003116899635642767, "loss_iou": 0.474609375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 349880916, "step": 2037 }, { "epoch": 0.5360689156309594, "grad_norm": 11.327258827718145, "learning_rate": 5e-06, "loss": 0.1434, "num_input_tokens_seen": 350053024, "step": 2038 }, { "epoch": 0.5360689156309594, "loss": 0.14642956852912903, "loss_ce": 0.0015015878016129136, "loss_iou": 0.53515625, "loss_num": 0.029052734375, "loss_xval": 0.14453125, "num_input_tokens_seen": 350053024, "step": 2038 }, { "epoch": 0.5363319523903466, "grad_norm": 10.572751348187758, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 350225056, "step": 2039 }, { "epoch": 0.5363319523903466, "loss": 0.08779959380626678, "loss_ce": 0.0008397561614401639, "loss_iou": 0.57421875, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 350225056, "step": 2039 }, { "epoch": 0.5365949891497337, "grad_norm": 4.830573829484519, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 350395628, "step": 2040 }, { "epoch": 0.5365949891497337, "loss": 0.15380313992500305, "loss_ce": 0.0012152513954788446, "loss_iou": 0.53515625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 350395628, "step": 2040 }, { "epoch": 0.5368580259091208, "grad_norm": 6.867013774487234, "learning_rate": 5e-06, "loss": 0.1852, "num_input_tokens_seen": 350567756, "step": 2041 }, { "epoch": 0.5368580259091208, "loss": 0.1892320066690445, "loss_ce": 0.00420394167304039, "loss_iou": 0.5859375, "loss_num": 0.037109375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 350567756, "step": 2041 }, { "epoch": 0.5371210626685079, "grad_norm": 9.243159752879412, "learning_rate": 5e-06, "loss": 0.1028, "num_input_tokens_seen": 350739840, "step": 2042 }, { "epoch": 0.5371210626685079, "loss": 0.1004796102643013, "loss_ce": 0.00013781688176095486, "loss_iou": 0.54296875, "loss_num": 0.02001953125, "loss_xval": 0.1005859375, "num_input_tokens_seen": 350739840, "step": 2042 }, { "epoch": 0.537384099427895, "grad_norm": 11.965655673255789, "learning_rate": 5e-06, "loss": 0.1592, "num_input_tokens_seen": 350912084, "step": 2043 }, { "epoch": 0.537384099427895, "loss": 0.20747891068458557, "loss_ce": 0.0016378372674807906, "loss_iou": 0.34765625, "loss_num": 0.041015625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 350912084, "step": 2043 }, { "epoch": 0.5376471361872822, "grad_norm": 3.1288535063063327, "learning_rate": 5e-06, "loss": 0.0913, "num_input_tokens_seen": 351081480, "step": 2044 }, { "epoch": 0.5376471361872822, "loss": 0.1123102456331253, "loss_ce": 0.001989200245589018, "loss_iou": 0.48828125, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 351081480, "step": 2044 }, { "epoch": 0.5379101729466693, "grad_norm": 4.103876123324951, "learning_rate": 5e-06, "loss": 0.1013, "num_input_tokens_seen": 351253596, "step": 2045 }, { "epoch": 0.5379101729466693, "loss": 0.11436055600643158, "loss_ce": 0.0011403337121009827, "loss_iou": 0.4296875, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 351253596, "step": 2045 }, { "epoch": 0.5381732097060564, "grad_norm": 5.468839567129726, "learning_rate": 5e-06, "loss": 0.0967, "num_input_tokens_seen": 351425656, "step": 2046 }, { "epoch": 0.5381732097060564, "loss": 0.0928923487663269, "loss_ce": 0.0006071930401958525, "loss_iou": 0.5625, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 351425656, "step": 2046 }, { "epoch": 0.5384362464654435, "grad_norm": 14.247562489878419, "learning_rate": 5e-06, "loss": 0.1135, "num_input_tokens_seen": 351597816, "step": 2047 }, { "epoch": 0.5384362464654435, "loss": 0.2033635675907135, "loss_ce": 0.0019475510343909264, "loss_iou": 0.392578125, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 351597816, "step": 2047 }, { "epoch": 0.5386992832248306, "grad_norm": 4.3738903937509885, "learning_rate": 5e-06, "loss": 0.1305, "num_input_tokens_seen": 351769980, "step": 2048 }, { "epoch": 0.5386992832248306, "loss": 0.059591565281152725, "loss_ce": 8.22915681055747e-05, "loss_iou": 0.578125, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 351769980, "step": 2048 }, { "epoch": 0.5389623199842178, "grad_norm": 4.415343795853702, "learning_rate": 5e-06, "loss": 0.1496, "num_input_tokens_seen": 351939320, "step": 2049 }, { "epoch": 0.5389623199842178, "loss": 0.24034851789474487, "loss_ce": 0.0008465623832307756, "loss_iou": 0.703125, "loss_num": 0.0478515625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 351939320, "step": 2049 }, { "epoch": 0.5392253567436049, "grad_norm": 16.797461240634142, "learning_rate": 5e-06, "loss": 0.1059, "num_input_tokens_seen": 352111376, "step": 2050 }, { "epoch": 0.5392253567436049, "loss": 0.14554640650749207, "loss_ce": 0.0012287711724638939, "loss_iou": 0.45703125, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 352111376, "step": 2050 }, { "epoch": 0.539488393502992, "grad_norm": 7.010827810107144, "learning_rate": 5e-06, "loss": 0.1444, "num_input_tokens_seen": 352283872, "step": 2051 }, { "epoch": 0.539488393502992, "loss": 0.1082100197672844, "loss_ce": 0.0016121190274134278, "loss_iou": 0.546875, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 352283872, "step": 2051 }, { "epoch": 0.5397514302623792, "grad_norm": 5.086953515153334, "learning_rate": 5e-06, "loss": 0.0939, "num_input_tokens_seen": 352456124, "step": 2052 }, { "epoch": 0.5397514302623792, "loss": 0.05017915368080139, "loss_ce": 0.0004965342814102769, "loss_iou": 0.53125, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 352456124, "step": 2052 }, { "epoch": 0.5400144670217663, "grad_norm": 9.025520856951152, "learning_rate": 5e-06, "loss": 0.1356, "num_input_tokens_seen": 352626592, "step": 2053 }, { "epoch": 0.5400144670217663, "loss": 0.16316679120063782, "loss_ce": 0.0005996549734845757, "loss_iou": 0.3828125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 352626592, "step": 2053 }, { "epoch": 0.5402775037811535, "grad_norm": 6.628974819075429, "learning_rate": 5e-06, "loss": 0.1125, "num_input_tokens_seen": 352798532, "step": 2054 }, { "epoch": 0.5402775037811535, "loss": 0.10173699259757996, "loss_ce": 0.002814263803884387, "loss_iou": 0.61328125, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 352798532, "step": 2054 }, { "epoch": 0.5405405405405406, "grad_norm": 4.016489890807633, "learning_rate": 5e-06, "loss": 0.1037, "num_input_tokens_seen": 352970876, "step": 2055 }, { "epoch": 0.5405405405405406, "loss": 0.12183534353971481, "loss_ce": 0.00010072031000163406, "loss_iou": 0.578125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 352970876, "step": 2055 }, { "epoch": 0.5408035772999277, "grad_norm": 10.022531221280579, "learning_rate": 5e-06, "loss": 0.09, "num_input_tokens_seen": 353143148, "step": 2056 }, { "epoch": 0.5408035772999277, "loss": 0.11547866463661194, "loss_ce": 0.0016328342026099563, "loss_iou": 0.45703125, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 353143148, "step": 2056 }, { "epoch": 0.5410666140593148, "grad_norm": 4.0813858316372205, "learning_rate": 5e-06, "loss": 0.1298, "num_input_tokens_seen": 353315092, "step": 2057 }, { "epoch": 0.5410666140593148, "loss": 0.2096938192844391, "loss_ce": 0.005348118022084236, "loss_iou": 0.57421875, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 353315092, "step": 2057 }, { "epoch": 0.5413296508187019, "grad_norm": 6.060936634204627, "learning_rate": 5e-06, "loss": 0.1086, "num_input_tokens_seen": 353487316, "step": 2058 }, { "epoch": 0.5413296508187019, "loss": 0.0933343917131424, "loss_ce": 0.0019037279998883605, "loss_iou": 0.6171875, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 353487316, "step": 2058 }, { "epoch": 0.541592687578089, "grad_norm": 5.183981355840974, "learning_rate": 5e-06, "loss": 0.1516, "num_input_tokens_seen": 353659672, "step": 2059 }, { "epoch": 0.541592687578089, "loss": 0.1340794712305069, "loss_ce": 0.001663701143115759, "loss_iou": 0.6328125, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 353659672, "step": 2059 }, { "epoch": 0.5418557243374762, "grad_norm": 4.266501842679085, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 353829984, "step": 2060 }, { "epoch": 0.5418557243374762, "loss": 0.16655325889587402, "loss_ce": 0.0011174663668498397, "loss_iou": 0.4609375, "loss_num": 0.033203125, "loss_xval": 0.1650390625, "num_input_tokens_seen": 353829984, "step": 2060 }, { "epoch": 0.5421187610968633, "grad_norm": 5.044214417820774, "learning_rate": 5e-06, "loss": 0.1193, "num_input_tokens_seen": 354002080, "step": 2061 }, { "epoch": 0.5421187610968633, "loss": 0.11388491839170456, "loss_ce": 0.0009240994695574045, "loss_iou": 0.412109375, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 354002080, "step": 2061 }, { "epoch": 0.5423817978562504, "grad_norm": 5.933256816179162, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 354174264, "step": 2062 }, { "epoch": 0.5423817978562504, "loss": 0.07782945036888123, "loss_ce": 0.00034532046993263066, "loss_iou": 0.515625, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 354174264, "step": 2062 }, { "epoch": 0.5426448346156375, "grad_norm": 6.3010547634359915, "learning_rate": 5e-06, "loss": 0.1807, "num_input_tokens_seen": 354346228, "step": 2063 }, { "epoch": 0.5426448346156375, "loss": 0.2516539692878723, "loss_ce": 0.0021422426216304302, "loss_iou": 0.57421875, "loss_num": 0.0498046875, "loss_xval": 0.25, "num_input_tokens_seen": 354346228, "step": 2063 }, { "epoch": 0.5429078713750246, "grad_norm": 6.370139521144386, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 354516236, "step": 2064 }, { "epoch": 0.5429078713750246, "loss": 0.1860380321741104, "loss_ce": 0.0007352972170338035, "loss_iou": 0.310546875, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 354516236, "step": 2064 }, { "epoch": 0.5431709081344118, "grad_norm": 5.277591681669997, "learning_rate": 5e-06, "loss": 0.1277, "num_input_tokens_seen": 354688188, "step": 2065 }, { "epoch": 0.5431709081344118, "loss": 0.14346206188201904, "loss_ce": 0.0007008376996964216, "loss_iou": 0.462890625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 354688188, "step": 2065 }, { "epoch": 0.5434339448937989, "grad_norm": 4.479693164703334, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 354860592, "step": 2066 }, { "epoch": 0.5434339448937989, "loss": 0.11085185408592224, "loss_ce": 7.304361497517675e-05, "loss_iou": 0.44921875, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 354860592, "step": 2066 }, { "epoch": 0.543696981653186, "grad_norm": 4.919682261722731, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 355033176, "step": 2067 }, { "epoch": 0.543696981653186, "loss": 0.16546472907066345, "loss_ce": 0.001188602764159441, "loss_iou": 0.5078125, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 355033176, "step": 2067 }, { "epoch": 0.5439600184125731, "grad_norm": 13.936900281461735, "learning_rate": 5e-06, "loss": 0.1039, "num_input_tokens_seen": 355205536, "step": 2068 }, { "epoch": 0.5439600184125731, "loss": 0.08491555601358414, "loss_ce": 1.5654470189474523e-05, "loss_iou": 0.396484375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 355205536, "step": 2068 }, { "epoch": 0.5442230551719602, "grad_norm": 7.496864557637642, "learning_rate": 5e-06, "loss": 0.1187, "num_input_tokens_seen": 355375248, "step": 2069 }, { "epoch": 0.5442230551719602, "loss": 0.11474957317113876, "loss_ce": 0.005954409018158913, "loss_iou": 0.671875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 355375248, "step": 2069 }, { "epoch": 0.5444860919313474, "grad_norm": 7.5192560384682325, "learning_rate": 5e-06, "loss": 0.1416, "num_input_tokens_seen": 355545380, "step": 2070 }, { "epoch": 0.5444860919313474, "loss": 0.156136155128479, "loss_ce": 0.0023580677807331085, "loss_iou": 0.41015625, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 355545380, "step": 2070 }, { "epoch": 0.5447491286907346, "grad_norm": 5.871220250123853, "learning_rate": 5e-06, "loss": 0.1422, "num_input_tokens_seen": 355717808, "step": 2071 }, { "epoch": 0.5447491286907346, "loss": 0.1303853988647461, "loss_ce": 0.0030660659540444613, "loss_iou": 0.443359375, "loss_num": 0.0255126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 355717808, "step": 2071 }, { "epoch": 0.5450121654501217, "grad_norm": 5.18139773459563, "learning_rate": 5e-06, "loss": 0.1021, "num_input_tokens_seen": 355889920, "step": 2072 }, { "epoch": 0.5450121654501217, "loss": 0.08777904510498047, "loss_ce": 0.0009260187507607043, "loss_iou": 0.51953125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 355889920, "step": 2072 }, { "epoch": 0.5452752022095088, "grad_norm": 5.534523394507571, "learning_rate": 5e-06, "loss": 0.1325, "num_input_tokens_seen": 356062284, "step": 2073 }, { "epoch": 0.5452752022095088, "loss": 0.07954730838537216, "loss_ce": 0.0016664512222632766, "loss_iou": 0.515625, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 356062284, "step": 2073 }, { "epoch": 0.5455382389688959, "grad_norm": 5.798318301378571, "learning_rate": 5e-06, "loss": 0.13, "num_input_tokens_seen": 356234572, "step": 2074 }, { "epoch": 0.5455382389688959, "loss": 0.11368724703788757, "loss_ce": 0.0001923761737998575, "loss_iou": 0.43359375, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 356234572, "step": 2074 }, { "epoch": 0.5458012757282831, "grad_norm": 7.153408449848725, "learning_rate": 5e-06, "loss": 0.1113, "num_input_tokens_seen": 356406460, "step": 2075 }, { "epoch": 0.5458012757282831, "loss": 0.12134350836277008, "loss_ce": 0.0004023421206511557, "loss_iou": 0.55078125, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 356406460, "step": 2075 }, { "epoch": 0.5460643124876702, "grad_norm": 5.616257612175736, "learning_rate": 5e-06, "loss": 0.1518, "num_input_tokens_seen": 356578828, "step": 2076 }, { "epoch": 0.5460643124876702, "loss": 0.15952152013778687, "loss_ce": 0.007452425081282854, "loss_iou": 0.44140625, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 356578828, "step": 2076 }, { "epoch": 0.5463273492470573, "grad_norm": 4.268860828505962, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 356750940, "step": 2077 }, { "epoch": 0.5463273492470573, "loss": 0.12928339838981628, "loss_ce": 0.0006517980364151299, "loss_iou": 0.53125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 356750940, "step": 2077 }, { "epoch": 0.5465903860064444, "grad_norm": 5.706661955848177, "learning_rate": 5e-06, "loss": 0.1054, "num_input_tokens_seen": 356923100, "step": 2078 }, { "epoch": 0.5465903860064444, "loss": 0.07689663022756577, "loss_ce": 0.002494774293154478, "loss_iou": 0.51171875, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 356923100, "step": 2078 }, { "epoch": 0.5468534227658315, "grad_norm": 6.622312056596725, "learning_rate": 5e-06, "loss": 0.1513, "num_input_tokens_seen": 357095216, "step": 2079 }, { "epoch": 0.5468534227658315, "loss": 0.2388056218624115, "loss_ce": 0.0006769794854335487, "loss_iou": 0.353515625, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 357095216, "step": 2079 }, { "epoch": 0.5471164595252187, "grad_norm": 5.101694254730695, "learning_rate": 5e-06, "loss": 0.131, "num_input_tokens_seen": 357265716, "step": 2080 }, { "epoch": 0.5471164595252187, "loss": 0.14891289174556732, "loss_ce": 0.0022454019635915756, "loss_iou": 0.482421875, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 357265716, "step": 2080 }, { "epoch": 0.5473794962846058, "grad_norm": 7.015564745743221, "learning_rate": 5e-06, "loss": 0.1375, "num_input_tokens_seen": 357437784, "step": 2081 }, { "epoch": 0.5473794962846058, "loss": 0.09505030512809753, "loss_ce": 0.003283948404714465, "loss_iou": 0.462890625, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 357437784, "step": 2081 }, { "epoch": 0.5476425330439929, "grad_norm": 6.361890201898857, "learning_rate": 5e-06, "loss": 0.1466, "num_input_tokens_seen": 357609848, "step": 2082 }, { "epoch": 0.5476425330439929, "loss": 0.11097072064876556, "loss_ce": 0.0005886423168703914, "loss_iou": 0.5859375, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 357609848, "step": 2082 }, { "epoch": 0.54790556980338, "grad_norm": 11.534449289054661, "learning_rate": 5e-06, "loss": 0.1522, "num_input_tokens_seen": 357782120, "step": 2083 }, { "epoch": 0.54790556980338, "loss": 0.20264874398708344, "loss_ce": 0.0027280959766358137, "loss_iou": 0.322265625, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 357782120, "step": 2083 }, { "epoch": 0.5481686065627671, "grad_norm": 5.295463538809882, "learning_rate": 5e-06, "loss": 0.1303, "num_input_tokens_seen": 357954088, "step": 2084 }, { "epoch": 0.5481686065627671, "loss": 0.13939827680587769, "loss_ce": 0.0004822692717425525, "loss_iou": 0.52734375, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 357954088, "step": 2084 }, { "epoch": 0.5484316433221542, "grad_norm": 5.108081687062298, "learning_rate": 5e-06, "loss": 0.1277, "num_input_tokens_seen": 358123060, "step": 2085 }, { "epoch": 0.5484316433221542, "loss": 0.11673710495233536, "loss_ce": 0.002631876850500703, "loss_iou": 0.53125, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 358123060, "step": 2085 }, { "epoch": 0.5486946800815414, "grad_norm": 8.727300119230549, "learning_rate": 5e-06, "loss": 0.1803, "num_input_tokens_seen": 358295276, "step": 2086 }, { "epoch": 0.5486946800815414, "loss": 0.21835477650165558, "loss_ce": 0.0030837799422442913, "loss_iou": 0.51171875, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 358295276, "step": 2086 }, { "epoch": 0.5489577168409285, "grad_norm": 11.066658379743997, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 358467252, "step": 2087 }, { "epoch": 0.5489577168409285, "loss": 0.10189958661794662, "loss_ce": 0.00464006420224905, "loss_iou": 0.5390625, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 358467252, "step": 2087 }, { "epoch": 0.5492207536003156, "grad_norm": 6.149852742286746, "learning_rate": 5e-06, "loss": 0.1068, "num_input_tokens_seen": 358639392, "step": 2088 }, { "epoch": 0.5492207536003156, "loss": 0.11351503431797028, "loss_ce": 0.0006915500853210688, "loss_iou": 0.4375, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 358639392, "step": 2088 }, { "epoch": 0.5494837903597027, "grad_norm": 4.11715937749224, "learning_rate": 5e-06, "loss": 0.0832, "num_input_tokens_seen": 358811532, "step": 2089 }, { "epoch": 0.5494837903597027, "loss": 0.06601230055093765, "loss_ce": 0.0033291929867118597, "loss_iou": 0.6640625, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 358811532, "step": 2089 }, { "epoch": 0.5497468271190898, "grad_norm": 5.926803480110229, "learning_rate": 5e-06, "loss": 0.1478, "num_input_tokens_seen": 358982120, "step": 2090 }, { "epoch": 0.5497468271190898, "loss": 0.165444478392601, "loss_ce": 0.0021449108608067036, "loss_iou": 0.40234375, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 358982120, "step": 2090 }, { "epoch": 0.5500098638784771, "grad_norm": 3.8279286211407215, "learning_rate": 5e-06, "loss": 0.1365, "num_input_tokens_seen": 359154336, "step": 2091 }, { "epoch": 0.5500098638784771, "loss": 0.1499071568250656, "loss_ce": 0.0012255202746018767, "loss_iou": 0.46484375, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 359154336, "step": 2091 }, { "epoch": 0.5502729006378642, "grad_norm": 11.488893840776536, "learning_rate": 5e-06, "loss": 0.1522, "num_input_tokens_seen": 359326324, "step": 2092 }, { "epoch": 0.5502729006378642, "loss": 0.169376939535141, "loss_ce": 0.0019880137406289577, "loss_iou": 0.50390625, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 359326324, "step": 2092 }, { "epoch": 0.5505359373972513, "grad_norm": 7.053437919021253, "learning_rate": 5e-06, "loss": 0.1738, "num_input_tokens_seen": 359498532, "step": 2093 }, { "epoch": 0.5505359373972513, "loss": 0.17925216257572174, "loss_ce": 0.003562463214620948, "loss_iou": 0.57421875, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 359498532, "step": 2093 }, { "epoch": 0.5507989741566384, "grad_norm": 4.495803041591062, "learning_rate": 5e-06, "loss": 0.1178, "num_input_tokens_seen": 359668724, "step": 2094 }, { "epoch": 0.5507989741566384, "loss": 0.16115409135818481, "loss_ce": 0.001760771730914712, "loss_iou": 0.5546875, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 359668724, "step": 2094 }, { "epoch": 0.5510620109160255, "grad_norm": 4.2291818189369135, "learning_rate": 5e-06, "loss": 0.0761, "num_input_tokens_seen": 359841036, "step": 2095 }, { "epoch": 0.5510620109160255, "loss": 0.04658431187272072, "loss_ce": 0.000533288111910224, "loss_iou": NaN, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 359841036, "step": 2095 }, { "epoch": 0.5513250476754127, "grad_norm": 14.952283770001413, "learning_rate": 5e-06, "loss": 0.1281, "num_input_tokens_seen": 360013076, "step": 2096 }, { "epoch": 0.5513250476754127, "loss": 0.056577593088150024, "loss_ce": 0.00405684020370245, "loss_iou": 0.55078125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 360013076, "step": 2096 }, { "epoch": 0.5515880844347998, "grad_norm": 6.919816354273125, "learning_rate": 5e-06, "loss": 0.1052, "num_input_tokens_seen": 360181792, "step": 2097 }, { "epoch": 0.5515880844347998, "loss": 0.08817453682422638, "loss_ce": 0.0007874465081840754, "loss_iou": 0.546875, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 360181792, "step": 2097 }, { "epoch": 0.5518511211941869, "grad_norm": 4.617511415402225, "learning_rate": 5e-06, "loss": 0.1043, "num_input_tokens_seen": 360353784, "step": 2098 }, { "epoch": 0.5518511211941869, "loss": 0.10340078175067902, "loss_ce": 0.0006938728038221598, "loss_iou": 0.5234375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 360353784, "step": 2098 }, { "epoch": 0.552114157953574, "grad_norm": 21.83477585251536, "learning_rate": 5e-06, "loss": 0.1008, "num_input_tokens_seen": 360526236, "step": 2099 }, { "epoch": 0.552114157953574, "loss": 0.09873877465724945, "loss_ce": 0.005629643332213163, "loss_iou": 0.6171875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 360526236, "step": 2099 }, { "epoch": 0.5523771947129611, "grad_norm": 22.010760760138638, "learning_rate": 5e-06, "loss": 0.1252, "num_input_tokens_seen": 360698124, "step": 2100 }, { "epoch": 0.5523771947129611, "loss": 0.1067737340927124, "loss_ce": 0.0001147918519563973, "loss_iou": 0.44921875, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 360698124, "step": 2100 }, { "epoch": 0.5526402314723483, "grad_norm": 8.309454312868692, "learning_rate": 5e-06, "loss": 0.1585, "num_input_tokens_seen": 360867340, "step": 2101 }, { "epoch": 0.5526402314723483, "loss": 0.1460711508989334, "loss_ce": 0.002302848733961582, "loss_iou": 0.6171875, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 360867340, "step": 2101 }, { "epoch": 0.5529032682317354, "grad_norm": 15.176993944828276, "learning_rate": 5e-06, "loss": 0.1412, "num_input_tokens_seen": 361039524, "step": 2102 }, { "epoch": 0.5529032682317354, "loss": 0.18546344339847565, "loss_ce": 0.003029361367225647, "loss_iou": 0.546875, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 361039524, "step": 2102 }, { "epoch": 0.5531663049911225, "grad_norm": 7.240375850988684, "learning_rate": 5e-06, "loss": 0.0944, "num_input_tokens_seen": 361212152, "step": 2103 }, { "epoch": 0.5531663049911225, "loss": 0.1453348845243454, "loss_ce": 0.0008341491920873523, "loss_iou": 0.65625, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 361212152, "step": 2103 }, { "epoch": 0.5534293417505096, "grad_norm": 4.504467098918756, "learning_rate": 5e-06, "loss": 0.1047, "num_input_tokens_seen": 361384636, "step": 2104 }, { "epoch": 0.5534293417505096, "loss": 0.14879915118217468, "loss_ce": 0.0031692716293036938, "loss_iou": 0.54296875, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 361384636, "step": 2104 }, { "epoch": 0.5536923785098967, "grad_norm": 14.969295535193908, "learning_rate": 5e-06, "loss": 0.1179, "num_input_tokens_seen": 361556736, "step": 2105 }, { "epoch": 0.5536923785098967, "loss": 0.14452342689037323, "loss_ce": 0.0027692681178450584, "loss_iou": 0.46875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 361556736, "step": 2105 }, { "epoch": 0.5539554152692839, "grad_norm": 4.430127573397192, "learning_rate": 5e-06, "loss": 0.1235, "num_input_tokens_seen": 361726936, "step": 2106 }, { "epoch": 0.5539554152692839, "loss": 0.22009092569351196, "loss_ce": 0.0009747114963829517, "loss_iou": 0.431640625, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 361726936, "step": 2106 }, { "epoch": 0.554218452028671, "grad_norm": 4.147747665163274, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 361899184, "step": 2107 }, { "epoch": 0.554218452028671, "loss": 0.13676050305366516, "loss_ce": 0.0002858861698769033, "loss_iou": 0.703125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 361899184, "step": 2107 }, { "epoch": 0.5544814887880581, "grad_norm": 3.7276098432513174, "learning_rate": 5e-06, "loss": 0.1043, "num_input_tokens_seen": 362071768, "step": 2108 }, { "epoch": 0.5544814887880581, "loss": 0.15069580078125, "loss_ce": 0.0011291508562862873, "loss_iou": 0.38671875, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 362071768, "step": 2108 }, { "epoch": 0.5547445255474452, "grad_norm": 5.320538695832165, "learning_rate": 5e-06, "loss": 0.1057, "num_input_tokens_seen": 362240216, "step": 2109 }, { "epoch": 0.5547445255474452, "loss": 0.09280645847320557, "loss_ce": 0.001528381835669279, "loss_iou": 0.52734375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 362240216, "step": 2109 }, { "epoch": 0.5550075623068323, "grad_norm": 11.977017579320059, "learning_rate": 5e-06, "loss": 0.1449, "num_input_tokens_seen": 362412692, "step": 2110 }, { "epoch": 0.5550075623068323, "loss": 0.09514741599559784, "loss_ce": 0.005608838051557541, "loss_iou": 0.498046875, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 362412692, "step": 2110 }, { "epoch": 0.5552705990662195, "grad_norm": 5.133455963412773, "learning_rate": 5e-06, "loss": 0.1081, "num_input_tokens_seen": 362583480, "step": 2111 }, { "epoch": 0.5552705990662195, "loss": 0.20054732263088226, "loss_ce": 0.0018321146490052342, "loss_iou": NaN, "loss_num": 0.039794921875, "loss_xval": 0.1982421875, "num_input_tokens_seen": 362583480, "step": 2111 }, { "epoch": 0.5555336358256067, "grad_norm": 13.046079025621871, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 362755416, "step": 2112 }, { "epoch": 0.5555336358256067, "loss": 0.09682411700487137, "loss_ce": 0.005820699501782656, "loss_iou": 0.5234375, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 362755416, "step": 2112 }, { "epoch": 0.5557966725849938, "grad_norm": 5.170449556654708, "learning_rate": 5e-06, "loss": 0.1413, "num_input_tokens_seen": 362927780, "step": 2113 }, { "epoch": 0.5557966725849938, "loss": 0.1499776542186737, "loss_ce": 0.0003499832237139344, "loss_iou": 0.5234375, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 362927780, "step": 2113 }, { "epoch": 0.5560597093443809, "grad_norm": 4.492587389090409, "learning_rate": 5e-06, "loss": 0.1634, "num_input_tokens_seen": 363100176, "step": 2114 }, { "epoch": 0.5560597093443809, "loss": 0.12822586297988892, "loss_ce": 0.001638943562284112, "loss_iou": 0.435546875, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 363100176, "step": 2114 }, { "epoch": 0.556322746103768, "grad_norm": 6.53280654225724, "learning_rate": 5e-06, "loss": 0.0877, "num_input_tokens_seen": 363272032, "step": 2115 }, { "epoch": 0.556322746103768, "loss": 0.09251531958580017, "loss_ce": 0.0013898293254896998, "loss_iou": 0.59375, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 363272032, "step": 2115 }, { "epoch": 0.5565857828631551, "grad_norm": 6.772073322495271, "learning_rate": 5e-06, "loss": 0.1208, "num_input_tokens_seen": 363444356, "step": 2116 }, { "epoch": 0.5565857828631551, "loss": 0.22190499305725098, "loss_ce": 0.00468086265027523, "loss_iou": 0.486328125, "loss_num": 0.04345703125, "loss_xval": 0.216796875, "num_input_tokens_seen": 363444356, "step": 2116 }, { "epoch": 0.5568488196225423, "grad_norm": 4.259888500977366, "learning_rate": 5e-06, "loss": 0.122, "num_input_tokens_seen": 363616480, "step": 2117 }, { "epoch": 0.5568488196225423, "loss": 0.11629009246826172, "loss_ce": 0.0017881433013826609, "loss_iou": 0.59765625, "loss_num": 0.02294921875, "loss_xval": 0.1142578125, "num_input_tokens_seen": 363616480, "step": 2117 }, { "epoch": 0.5571118563819294, "grad_norm": 5.727482587569898, "learning_rate": 5e-06, "loss": 0.12, "num_input_tokens_seen": 363788792, "step": 2118 }, { "epoch": 0.5571118563819294, "loss": 0.14177094399929047, "loss_ce": 0.0018478452693670988, "loss_iou": 0.357421875, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 363788792, "step": 2118 }, { "epoch": 0.5573748931413165, "grad_norm": 10.912411227157135, "learning_rate": 5e-06, "loss": 0.1523, "num_input_tokens_seen": 363960764, "step": 2119 }, { "epoch": 0.5573748931413165, "loss": 0.1305292397737503, "loss_ce": 0.0011652277316898108, "loss_iou": 0.46484375, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 363960764, "step": 2119 }, { "epoch": 0.5576379299007036, "grad_norm": 3.94400025085083, "learning_rate": 5e-06, "loss": 0.1587, "num_input_tokens_seen": 364133156, "step": 2120 }, { "epoch": 0.5576379299007036, "loss": 0.19977153837680817, "loss_ce": 0.008945131674408913, "loss_iou": 0.40625, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 364133156, "step": 2120 }, { "epoch": 0.5579009666600907, "grad_norm": 3.6301701938092217, "learning_rate": 5e-06, "loss": 0.1377, "num_input_tokens_seen": 364305580, "step": 2121 }, { "epoch": 0.5579009666600907, "loss": 0.1176564022898674, "loss_ce": 0.0034901422914117575, "loss_iou": 0.55859375, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 364305580, "step": 2121 }, { "epoch": 0.5581640034194779, "grad_norm": 19.838531274898244, "learning_rate": 5e-06, "loss": 0.091, "num_input_tokens_seen": 364478040, "step": 2122 }, { "epoch": 0.5581640034194779, "loss": 0.06633633375167847, "loss_ce": 0.0024020099081099033, "loss_iou": 0.53125, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 364478040, "step": 2122 }, { "epoch": 0.558427040178865, "grad_norm": 9.488127206942975, "learning_rate": 5e-06, "loss": 0.0873, "num_input_tokens_seen": 364647728, "step": 2123 }, { "epoch": 0.558427040178865, "loss": 0.07145293056964874, "loss_ce": 0.00010283520532539114, "loss_iou": 0.5, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 364647728, "step": 2123 }, { "epoch": 0.5586900769382521, "grad_norm": 4.8177202903523115, "learning_rate": 5e-06, "loss": 0.1471, "num_input_tokens_seen": 364816920, "step": 2124 }, { "epoch": 0.5586900769382521, "loss": 0.13919737935066223, "loss_ce": 0.002829591976478696, "loss_iou": 0.494140625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 364816920, "step": 2124 }, { "epoch": 0.5589531136976392, "grad_norm": 3.304918735754454, "learning_rate": 5e-06, "loss": 0.0913, "num_input_tokens_seen": 364989156, "step": 2125 }, { "epoch": 0.5589531136976392, "loss": 0.062035560607910156, "loss_ce": 0.002953530289232731, "loss_iou": 0.412109375, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 364989156, "step": 2125 }, { "epoch": 0.5592161504570263, "grad_norm": 2.856854904917954, "learning_rate": 5e-06, "loss": 0.0901, "num_input_tokens_seen": 365161052, "step": 2126 }, { "epoch": 0.5592161504570263, "loss": 0.11198246479034424, "loss_ce": 0.007825973443686962, "loss_iou": 0.33203125, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 365161052, "step": 2126 }, { "epoch": 0.5594791872164135, "grad_norm": 3.111770745456122, "learning_rate": 5e-06, "loss": 0.196, "num_input_tokens_seen": 365333284, "step": 2127 }, { "epoch": 0.5594791872164135, "loss": 0.0993409976363182, "loss_ce": 0.00046404742170125246, "loss_iou": 0.61328125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 365333284, "step": 2127 }, { "epoch": 0.5597422239758006, "grad_norm": 4.17183040056909, "learning_rate": 5e-06, "loss": 0.1189, "num_input_tokens_seen": 365505612, "step": 2128 }, { "epoch": 0.5597422239758006, "loss": 0.08107022941112518, "loss_ce": 0.000320716411806643, "loss_iou": 0.62890625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 365505612, "step": 2128 }, { "epoch": 0.5600052607351877, "grad_norm": 5.3777331039695175, "learning_rate": 5e-06, "loss": 0.1018, "num_input_tokens_seen": 365677880, "step": 2129 }, { "epoch": 0.5600052607351877, "loss": 0.10484224557876587, "loss_ce": 0.0005636783316731453, "loss_iou": 0.5625, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 365677880, "step": 2129 }, { "epoch": 0.5602682974945749, "grad_norm": 13.479247744568044, "learning_rate": 5e-06, "loss": 0.1329, "num_input_tokens_seen": 365849928, "step": 2130 }, { "epoch": 0.5602682974945749, "loss": 0.1259067952632904, "loss_ce": 0.0024021633435040712, "loss_iou": 0.484375, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 365849928, "step": 2130 }, { "epoch": 0.560531334253962, "grad_norm": 20.004883443678533, "learning_rate": 5e-06, "loss": 0.0994, "num_input_tokens_seen": 366021916, "step": 2131 }, { "epoch": 0.560531334253962, "loss": 0.11340343207120895, "loss_ce": 0.0057374173775315285, "loss_iou": 0.515625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 366021916, "step": 2131 }, { "epoch": 0.5607943710133492, "grad_norm": 11.042521647809682, "learning_rate": 5e-06, "loss": 0.1328, "num_input_tokens_seen": 366194008, "step": 2132 }, { "epoch": 0.5607943710133492, "loss": 0.09386501461267471, "loss_ce": 0.00017604799359105527, "loss_iou": 0.640625, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 366194008, "step": 2132 }, { "epoch": 0.5610574077727363, "grad_norm": 5.842914268629563, "learning_rate": 5e-06, "loss": 0.1394, "num_input_tokens_seen": 366366212, "step": 2133 }, { "epoch": 0.5610574077727363, "loss": 0.11772558093070984, "loss_ce": 0.0009958385489881039, "loss_iou": 0.5390625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 366366212, "step": 2133 }, { "epoch": 0.5613204445321234, "grad_norm": 5.960122855306223, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 366538220, "step": 2134 }, { "epoch": 0.5613204445321234, "loss": 0.08179056644439697, "loss_ce": 0.003818158758804202, "loss_iou": 0.703125, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 366538220, "step": 2134 }, { "epoch": 0.5615834812915105, "grad_norm": 5.389091778733193, "learning_rate": 5e-06, "loss": 0.1578, "num_input_tokens_seen": 366710096, "step": 2135 }, { "epoch": 0.5615834812915105, "loss": 0.18556632101535797, "loss_ce": 0.0002635964483488351, "loss_iou": 0.408203125, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 366710096, "step": 2135 }, { "epoch": 0.5618465180508976, "grad_norm": 7.320192738875152, "learning_rate": 5e-06, "loss": 0.1107, "num_input_tokens_seen": 366880456, "step": 2136 }, { "epoch": 0.5618465180508976, "loss": 0.13861671090126038, "loss_ce": 0.003179695922881365, "loss_iou": 0.498046875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 366880456, "step": 2136 }, { "epoch": 0.5621095548102847, "grad_norm": 14.114626386817225, "learning_rate": 5e-06, "loss": 0.1369, "num_input_tokens_seen": 367052752, "step": 2137 }, { "epoch": 0.5621095548102847, "loss": 0.12559227645397186, "loss_ce": 0.001477292738854885, "loss_iou": 0.5234375, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 367052752, "step": 2137 }, { "epoch": 0.5623725915696719, "grad_norm": 3.498748268755153, "learning_rate": 5e-06, "loss": 0.0967, "num_input_tokens_seen": 367225060, "step": 2138 }, { "epoch": 0.5623725915696719, "loss": 0.08549857884645462, "loss_ce": 0.000232468664762564, "loss_iou": 0.69921875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 367225060, "step": 2138 }, { "epoch": 0.562635628329059, "grad_norm": 20.701513936136166, "learning_rate": 5e-06, "loss": 0.0738, "num_input_tokens_seen": 367397332, "step": 2139 }, { "epoch": 0.562635628329059, "loss": 0.08525611460208893, "loss_ce": 0.004125134088099003, "loss_iou": 0.4921875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 367397332, "step": 2139 }, { "epoch": 0.5628986650884461, "grad_norm": 11.983965441231234, "learning_rate": 5e-06, "loss": 0.0957, "num_input_tokens_seen": 367569560, "step": 2140 }, { "epoch": 0.5628986650884461, "loss": 0.06727585196495056, "loss_ce": 0.00010666107118595392, "loss_iou": 0.47265625, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 367569560, "step": 2140 }, { "epoch": 0.5631617018478332, "grad_norm": 6.398433590682968, "learning_rate": 5e-06, "loss": 0.1279, "num_input_tokens_seen": 367741696, "step": 2141 }, { "epoch": 0.5631617018478332, "loss": 0.16246706247329712, "loss_ce": 0.0022497763857245445, "loss_iou": 0.298828125, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 367741696, "step": 2141 }, { "epoch": 0.5634247386072203, "grad_norm": 4.385698804822032, "learning_rate": 5e-06, "loss": 0.1125, "num_input_tokens_seen": 367913912, "step": 2142 }, { "epoch": 0.5634247386072203, "loss": 0.10307023674249649, "loss_ce": 0.0009889386128634214, "loss_iou": 0.671875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 367913912, "step": 2142 }, { "epoch": 0.5636877753666075, "grad_norm": 7.856910300327451, "learning_rate": 5e-06, "loss": 0.1906, "num_input_tokens_seen": 368086176, "step": 2143 }, { "epoch": 0.5636877753666075, "loss": 0.19684657454490662, "loss_ce": 0.005714981816709042, "loss_iou": 0.388671875, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 368086176, "step": 2143 }, { "epoch": 0.5639508121259946, "grad_norm": 6.368187003490122, "learning_rate": 5e-06, "loss": 0.1156, "num_input_tokens_seen": 368258440, "step": 2144 }, { "epoch": 0.5639508121259946, "loss": 0.1019875556230545, "loss_ce": 0.0008217811118811369, "loss_iou": 0.52734375, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 368258440, "step": 2144 }, { "epoch": 0.5642138488853817, "grad_norm": 7.190126438403998, "learning_rate": 5e-06, "loss": 0.1332, "num_input_tokens_seen": 368430620, "step": 2145 }, { "epoch": 0.5642138488853817, "loss": 0.1344844102859497, "loss_ce": 0.00020707116345874965, "loss_iou": 0.546875, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 368430620, "step": 2145 }, { "epoch": 0.5644768856447688, "grad_norm": 6.755563239238797, "learning_rate": 5e-06, "loss": 0.1142, "num_input_tokens_seen": 368603024, "step": 2146 }, { "epoch": 0.5644768856447688, "loss": 0.09109672158956528, "loss_ce": 0.0006883963942527771, "loss_iou": 0.51953125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 368603024, "step": 2146 }, { "epoch": 0.5647399224041559, "grad_norm": 4.616140839020028, "learning_rate": 5e-06, "loss": 0.1489, "num_input_tokens_seen": 368775624, "step": 2147 }, { "epoch": 0.5647399224041559, "loss": 0.19518432021141052, "loss_ce": 0.002435298403725028, "loss_iou": 0.392578125, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 368775624, "step": 2147 }, { "epoch": 0.5650029591635432, "grad_norm": 11.734928019848066, "learning_rate": 5e-06, "loss": 0.1437, "num_input_tokens_seen": 368945448, "step": 2148 }, { "epoch": 0.5650029591635432, "loss": 0.17952315509319305, "loss_ce": 0.009143512696027756, "loss_iou": 0.5703125, "loss_num": 0.0341796875, "loss_xval": 0.169921875, "num_input_tokens_seen": 368945448, "step": 2148 }, { "epoch": 0.5652659959229303, "grad_norm": 6.710624849462096, "learning_rate": 5e-06, "loss": 0.1518, "num_input_tokens_seen": 369115952, "step": 2149 }, { "epoch": 0.5652659959229303, "loss": 0.13233953714370728, "loss_ce": 0.0019989716820418835, "loss_iou": 0.310546875, "loss_num": 0.026123046875, "loss_xval": 0.1298828125, "num_input_tokens_seen": 369115952, "step": 2149 }, { "epoch": 0.5655290326823174, "grad_norm": 6.669120586380705, "learning_rate": 5e-06, "loss": 0.1352, "num_input_tokens_seen": 369288028, "step": 2150 }, { "epoch": 0.5655290326823174, "loss": 0.08679264783859253, "loss_ce": 0.00695866858586669, "loss_iou": 0.55078125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 369288028, "step": 2150 }, { "epoch": 0.5657920694417045, "grad_norm": 4.502790600863043, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 369460112, "step": 2151 }, { "epoch": 0.5657920694417045, "loss": 0.09304537624120712, "loss_ce": 0.002102992497384548, "loss_iou": 0.392578125, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 369460112, "step": 2151 }, { "epoch": 0.5660551062010916, "grad_norm": 5.258977022311985, "learning_rate": 5e-06, "loss": 0.0818, "num_input_tokens_seen": 369632404, "step": 2152 }, { "epoch": 0.5660551062010916, "loss": 0.08912669122219086, "loss_ce": 0.0003358002286404371, "loss_iou": 0.41796875, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 369632404, "step": 2152 }, { "epoch": 0.5663181429604788, "grad_norm": 5.721568404254781, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 369804776, "step": 2153 }, { "epoch": 0.5663181429604788, "loss": 0.10320156812667847, "loss_ce": 0.0007235408993437886, "loss_iou": 0.490234375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 369804776, "step": 2153 }, { "epoch": 0.5665811797198659, "grad_norm": 4.65695630930855, "learning_rate": 5e-06, "loss": 0.1273, "num_input_tokens_seen": 369976952, "step": 2154 }, { "epoch": 0.5665811797198659, "loss": 0.08642168343067169, "loss_ce": 0.0003621142532210797, "loss_iou": 0.515625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 369976952, "step": 2154 }, { "epoch": 0.566844216479253, "grad_norm": 2.957349659939758, "learning_rate": 5e-06, "loss": 0.1046, "num_input_tokens_seen": 370149132, "step": 2155 }, { "epoch": 0.566844216479253, "loss": 0.11206680536270142, "loss_ce": 0.0026002456434071064, "loss_iou": 0.61328125, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 370149132, "step": 2155 }, { "epoch": 0.5671072532386401, "grad_norm": 13.220367815915187, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 370321528, "step": 2156 }, { "epoch": 0.5671072532386401, "loss": 0.13273131847381592, "loss_ce": 0.0014446950517594814, "loss_iou": 0.427734375, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 370321528, "step": 2156 }, { "epoch": 0.5673702899980272, "grad_norm": 6.5167958115597955, "learning_rate": 5e-06, "loss": 0.0839, "num_input_tokens_seen": 370493692, "step": 2157 }, { "epoch": 0.5673702899980272, "loss": 0.0516238659620285, "loss_ce": 0.00018648749392013997, "loss_iou": 0.578125, "loss_num": 0.01025390625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 370493692, "step": 2157 }, { "epoch": 0.5676333267574144, "grad_norm": 9.055547000690966, "learning_rate": 5e-06, "loss": 0.1379, "num_input_tokens_seen": 370666128, "step": 2158 }, { "epoch": 0.5676333267574144, "loss": 0.20648962259292603, "loss_ce": 0.0006485594203695655, "loss_iou": 0.51953125, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 370666128, "step": 2158 }, { "epoch": 0.5678963635168015, "grad_norm": 20.191615671139235, "learning_rate": 5e-06, "loss": 0.1168, "num_input_tokens_seen": 370838096, "step": 2159 }, { "epoch": 0.5678963635168015, "loss": 0.1389261931180954, "loss_ce": 0.0004679340636357665, "loss_iou": 0.359375, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 370838096, "step": 2159 }, { "epoch": 0.5681594002761886, "grad_norm": 16.724015569984758, "learning_rate": 5e-06, "loss": 0.0926, "num_input_tokens_seen": 371010276, "step": 2160 }, { "epoch": 0.5681594002761886, "loss": 0.09113931655883789, "loss_ce": 0.004194741137325764, "loss_iou": 0.53125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 371010276, "step": 2160 }, { "epoch": 0.5684224370355757, "grad_norm": 9.65595197930286, "learning_rate": 5e-06, "loss": 0.1109, "num_input_tokens_seen": 371180756, "step": 2161 }, { "epoch": 0.5684224370355757, "loss": 0.10710924118757248, "loss_ce": 0.0031358497217297554, "loss_iou": 0.53515625, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 371180756, "step": 2161 }, { "epoch": 0.5686854737949628, "grad_norm": 4.559805216337552, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 371351216, "step": 2162 }, { "epoch": 0.5686854737949628, "loss": 0.18528233468532562, "loss_ce": 0.0012918633874505758, "loss_iou": 0.421875, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 371351216, "step": 2162 }, { "epoch": 0.5689485105543499, "grad_norm": 3.9132748752015707, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 371521504, "step": 2163 }, { "epoch": 0.5689485105543499, "loss": 0.09377571940422058, "loss_ce": 0.00017830087745096534, "loss_iou": 0.54296875, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 371521504, "step": 2163 }, { "epoch": 0.5692115473137371, "grad_norm": 6.221489940852924, "learning_rate": 5e-06, "loss": 0.112, "num_input_tokens_seen": 371693620, "step": 2164 }, { "epoch": 0.5692115473137371, "loss": 0.08065352588891983, "loss_ce": 0.000270225660642609, "loss_iou": 0.62109375, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 371693620, "step": 2164 }, { "epoch": 0.5694745840731242, "grad_norm": 5.565730297773139, "learning_rate": 5e-06, "loss": 0.1544, "num_input_tokens_seen": 371864020, "step": 2165 }, { "epoch": 0.5694745840731242, "loss": 0.14530430734157562, "loss_ce": 0.0009256468038074672, "loss_iou": 0.466796875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 371864020, "step": 2165 }, { "epoch": 0.5697376208325113, "grad_norm": 7.807853954323307, "learning_rate": 5e-06, "loss": 0.1032, "num_input_tokens_seen": 372036364, "step": 2166 }, { "epoch": 0.5697376208325113, "loss": 0.1258363127708435, "loss_ce": 0.00034803448943421245, "loss_iou": 0.5390625, "loss_num": 0.025146484375, "loss_xval": 0.125, "num_input_tokens_seen": 372036364, "step": 2166 }, { "epoch": 0.5700006575918984, "grad_norm": 6.119338737471246, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 372208520, "step": 2167 }, { "epoch": 0.5700006575918984, "loss": 0.183029443025589, "loss_ce": 0.0006258888752199709, "loss_iou": 0.4609375, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 372208520, "step": 2167 }, { "epoch": 0.5702636943512855, "grad_norm": 4.796742469743409, "learning_rate": 5e-06, "loss": 0.1689, "num_input_tokens_seen": 372380908, "step": 2168 }, { "epoch": 0.5702636943512855, "loss": 0.24278424680233002, "loss_ce": 0.006517150904983282, "loss_iou": 0.6015625, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 372380908, "step": 2168 }, { "epoch": 0.5705267311106728, "grad_norm": 7.9441210817884444, "learning_rate": 5e-06, "loss": 0.1272, "num_input_tokens_seen": 372553152, "step": 2169 }, { "epoch": 0.5705267311106728, "loss": 0.1403554081916809, "loss_ce": 0.0024769881274551153, "loss_iou": 0.48046875, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 372553152, "step": 2169 }, { "epoch": 0.5707897678700599, "grad_norm": 8.945699343509036, "learning_rate": 5e-06, "loss": 0.1424, "num_input_tokens_seen": 372722892, "step": 2170 }, { "epoch": 0.5707897678700599, "loss": 0.17014455795288086, "loss_ce": 0.0013518218183889985, "loss_iou": 0.578125, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 372722892, "step": 2170 }, { "epoch": 0.571052804629447, "grad_norm": 7.4926547587717405, "learning_rate": 5e-06, "loss": 0.1685, "num_input_tokens_seen": 372895068, "step": 2171 }, { "epoch": 0.571052804629447, "loss": 0.2696492373943329, "loss_ce": 0.0004536675405688584, "loss_iou": 0.40234375, "loss_num": 0.053955078125, "loss_xval": 0.26953125, "num_input_tokens_seen": 372895068, "step": 2171 }, { "epoch": 0.5713158413888341, "grad_norm": 17.833109235832993, "learning_rate": 5e-06, "loss": 0.1442, "num_input_tokens_seen": 373067312, "step": 2172 }, { "epoch": 0.5713158413888341, "loss": 0.09599291533231735, "loss_ce": 0.0006254853797145188, "loss_iou": 0.515625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 373067312, "step": 2172 }, { "epoch": 0.5715788781482212, "grad_norm": 14.290103080283112, "learning_rate": 5e-06, "loss": 0.1329, "num_input_tokens_seen": 373239588, "step": 2173 }, { "epoch": 0.5715788781482212, "loss": 0.15090827643871307, "loss_ce": 0.002836985979229212, "loss_iou": 0.451171875, "loss_num": 0.029541015625, "loss_xval": 0.1484375, "num_input_tokens_seen": 373239588, "step": 2173 }, { "epoch": 0.5718419149076084, "grad_norm": 4.534353132879591, "learning_rate": 5e-06, "loss": 0.0843, "num_input_tokens_seen": 373411840, "step": 2174 }, { "epoch": 0.5718419149076084, "loss": 0.07687939703464508, "loss_ce": 0.0007075219764374197, "loss_iou": 0.578125, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 373411840, "step": 2174 }, { "epoch": 0.5721049516669955, "grad_norm": 12.524713753292392, "learning_rate": 5e-06, "loss": 0.1176, "num_input_tokens_seen": 373582408, "step": 2175 }, { "epoch": 0.5721049516669955, "loss": 0.17097817361354828, "loss_ce": 0.0006900950102135539, "loss_iou": 0.5859375, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 373582408, "step": 2175 }, { "epoch": 0.5723679884263826, "grad_norm": 18.760082445449868, "learning_rate": 5e-06, "loss": 0.1252, "num_input_tokens_seen": 373754416, "step": 2176 }, { "epoch": 0.5723679884263826, "loss": 0.11672262102365494, "loss_ce": 0.0011525547597557306, "loss_iou": 0.40625, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 373754416, "step": 2176 }, { "epoch": 0.5726310251857697, "grad_norm": 8.516129726413212, "learning_rate": 5e-06, "loss": 0.1384, "num_input_tokens_seen": 373922312, "step": 2177 }, { "epoch": 0.5726310251857697, "loss": 0.19126161932945251, "loss_ce": 0.0003436502593103796, "loss_iou": 0.462890625, "loss_num": 0.0380859375, "loss_xval": 0.19140625, "num_input_tokens_seen": 373922312, "step": 2177 }, { "epoch": 0.5728940619451568, "grad_norm": 6.898009634423141, "learning_rate": 5e-06, "loss": 0.1795, "num_input_tokens_seen": 374092720, "step": 2178 }, { "epoch": 0.5728940619451568, "loss": 0.15485724806785583, "loss_ce": 0.0002551883808337152, "loss_iou": 0.6328125, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 374092720, "step": 2178 }, { "epoch": 0.573157098704544, "grad_norm": 6.043872383339532, "learning_rate": 5e-06, "loss": 0.1307, "num_input_tokens_seen": 374264864, "step": 2179 }, { "epoch": 0.573157098704544, "loss": 0.12720485031604767, "loss_ce": 0.0004958686186000705, "loss_iou": 0.466796875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 374264864, "step": 2179 }, { "epoch": 0.5734201354639311, "grad_norm": 5.166426865666374, "learning_rate": 5e-06, "loss": 0.1473, "num_input_tokens_seen": 374436928, "step": 2180 }, { "epoch": 0.5734201354639311, "loss": 0.10286220163106918, "loss_ce": 0.0005062465788796544, "loss_iou": 0.6328125, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 374436928, "step": 2180 }, { "epoch": 0.5736831722233182, "grad_norm": 3.8181355710391034, "learning_rate": 5e-06, "loss": 0.1051, "num_input_tokens_seen": 374608984, "step": 2181 }, { "epoch": 0.5736831722233182, "loss": 0.1574546992778778, "loss_ce": 0.00025864943745546043, "loss_iou": 0.59375, "loss_num": 0.03125, "loss_xval": 0.1572265625, "num_input_tokens_seen": 374608984, "step": 2181 }, { "epoch": 0.5739462089827053, "grad_norm": 9.672382405473702, "learning_rate": 5e-06, "loss": 0.1434, "num_input_tokens_seen": 374779732, "step": 2182 }, { "epoch": 0.5739462089827053, "loss": 0.17890840768814087, "loss_ce": 0.007033395115286112, "loss_iou": 0.64453125, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 374779732, "step": 2182 }, { "epoch": 0.5742092457420924, "grad_norm": 5.19568946256922, "learning_rate": 5e-06, "loss": 0.1326, "num_input_tokens_seen": 374951880, "step": 2183 }, { "epoch": 0.5742092457420924, "loss": 0.11335025727748871, "loss_ce": 0.0024798910599201918, "loss_iou": 0.5859375, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 374951880, "step": 2183 }, { "epoch": 0.5744722825014796, "grad_norm": 4.272972418213817, "learning_rate": 5e-06, "loss": 0.1319, "num_input_tokens_seen": 375120908, "step": 2184 }, { "epoch": 0.5744722825014796, "loss": 0.16440820693969727, "loss_ce": 0.000330454291542992, "loss_iou": 0.310546875, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 375120908, "step": 2184 }, { "epoch": 0.5747353192608667, "grad_norm": 4.877678921853992, "learning_rate": 5e-06, "loss": 0.1378, "num_input_tokens_seen": 375292940, "step": 2185 }, { "epoch": 0.5747353192608667, "loss": 0.19540463387966156, "loss_ce": 0.0004278157721273601, "loss_iou": 0.443359375, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 375292940, "step": 2185 }, { "epoch": 0.5749983560202538, "grad_norm": 9.795881894231353, "learning_rate": 5e-06, "loss": 0.1509, "num_input_tokens_seen": 375463220, "step": 2186 }, { "epoch": 0.5749983560202538, "loss": 0.18603767454624176, "loss_ce": 0.0036951417569071054, "loss_iou": 0.48046875, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 375463220, "step": 2186 }, { "epoch": 0.575261392779641, "grad_norm": 14.3298059229615, "learning_rate": 5e-06, "loss": 0.1165, "num_input_tokens_seen": 375633332, "step": 2187 }, { "epoch": 0.575261392779641, "loss": 0.11824968457221985, "loss_ce": 0.0010011474369093776, "loss_iou": 0.546875, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 375633332, "step": 2187 }, { "epoch": 0.575524429539028, "grad_norm": 5.9170529044976305, "learning_rate": 5e-06, "loss": 0.1216, "num_input_tokens_seen": 375803896, "step": 2188 }, { "epoch": 0.575524429539028, "loss": 0.09120282530784607, "loss_ce": 0.0025797830894589424, "loss_iou": 0.5625, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 375803896, "step": 2188 }, { "epoch": 0.5757874662984152, "grad_norm": 4.8523353127793944, "learning_rate": 5e-06, "loss": 0.1571, "num_input_tokens_seen": 375976212, "step": 2189 }, { "epoch": 0.5757874662984152, "loss": 0.11096520721912384, "loss_ce": 0.0007662302814424038, "loss_iou": 0.54296875, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 375976212, "step": 2189 }, { "epoch": 0.5760505030578024, "grad_norm": 3.7437825333865065, "learning_rate": 5e-06, "loss": 0.1176, "num_input_tokens_seen": 376148372, "step": 2190 }, { "epoch": 0.5760505030578024, "loss": 0.1311783641576767, "loss_ce": 0.0013108099810779095, "loss_iou": 0.5546875, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 376148372, "step": 2190 }, { "epoch": 0.5763135398171895, "grad_norm": 3.8863610288761765, "learning_rate": 5e-06, "loss": 0.106, "num_input_tokens_seen": 376320448, "step": 2191 }, { "epoch": 0.5763135398171895, "loss": 0.08886295557022095, "loss_ce": 0.00014836144691798836, "loss_iou": 0.53515625, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 376320448, "step": 2191 }, { "epoch": 0.5765765765765766, "grad_norm": 26.72560789877691, "learning_rate": 5e-06, "loss": 0.1242, "num_input_tokens_seen": 376492652, "step": 2192 }, { "epoch": 0.5765765765765766, "loss": 0.09363338351249695, "loss_ce": 0.002172202803194523, "loss_iou": 0.458984375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 376492652, "step": 2192 }, { "epoch": 0.5768396133359637, "grad_norm": 3.4367691751013334, "learning_rate": 5e-06, "loss": 0.1017, "num_input_tokens_seen": 376665088, "step": 2193 }, { "epoch": 0.5768396133359637, "loss": 0.05514270067214966, "loss_ce": 0.0010502950754016638, "loss_iou": 0.4765625, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 376665088, "step": 2193 }, { "epoch": 0.5771026500953508, "grad_norm": 9.788333451803501, "learning_rate": 5e-06, "loss": 0.1029, "num_input_tokens_seen": 376837384, "step": 2194 }, { "epoch": 0.5771026500953508, "loss": 0.09362407773733139, "loss_ce": 0.0004081379738636315, "loss_iou": 0.55078125, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 376837384, "step": 2194 }, { "epoch": 0.577365686854738, "grad_norm": 5.1293333388841935, "learning_rate": 5e-06, "loss": 0.1096, "num_input_tokens_seen": 377006200, "step": 2195 }, { "epoch": 0.577365686854738, "loss": 0.14207029342651367, "loss_ce": 0.0032763422932475805, "loss_iou": 0.54296875, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 377006200, "step": 2195 }, { "epoch": 0.5776287236141251, "grad_norm": 9.582310699127605, "learning_rate": 5e-06, "loss": 0.1034, "num_input_tokens_seen": 377178552, "step": 2196 }, { "epoch": 0.5776287236141251, "loss": 0.14796333014965057, "loss_ce": 0.0009296314674429595, "loss_iou": 0.470703125, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 377178552, "step": 2196 }, { "epoch": 0.5778917603735122, "grad_norm": 5.51238614508857, "learning_rate": 5e-06, "loss": 0.1782, "num_input_tokens_seen": 377350508, "step": 2197 }, { "epoch": 0.5778917603735122, "loss": 0.12102293223142624, "loss_ce": 0.011342758312821388, "loss_iou": 0.66015625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 377350508, "step": 2197 }, { "epoch": 0.5781547971328993, "grad_norm": 12.700643820920646, "learning_rate": 5e-06, "loss": 0.141, "num_input_tokens_seen": 377522740, "step": 2198 }, { "epoch": 0.5781547971328993, "loss": 0.13567548990249634, "loss_ce": 0.004617748782038689, "loss_iou": 0.53125, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 377522740, "step": 2198 }, { "epoch": 0.5784178338922864, "grad_norm": 10.703599236021498, "learning_rate": 5e-06, "loss": 0.1364, "num_input_tokens_seen": 377694584, "step": 2199 }, { "epoch": 0.5784178338922864, "loss": 0.17266914248466492, "loss_ce": 0.0024420833215117455, "loss_iou": 0.640625, "loss_num": 0.0341796875, "loss_xval": 0.169921875, "num_input_tokens_seen": 377694584, "step": 2199 }, { "epoch": 0.5786808706516736, "grad_norm": 3.7187673646063684, "learning_rate": 5e-06, "loss": 0.0953, "num_input_tokens_seen": 377866484, "step": 2200 }, { "epoch": 0.5786808706516736, "loss": 0.04466433823108673, "loss_ce": 0.0001697081606835127, "loss_iou": 0.486328125, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 377866484, "step": 2200 }, { "epoch": 0.5789439074110607, "grad_norm": 6.267393061703399, "learning_rate": 5e-06, "loss": 0.1317, "num_input_tokens_seen": 378038564, "step": 2201 }, { "epoch": 0.5789439074110607, "loss": 0.18358194828033447, "loss_ce": 0.0036503085866570473, "loss_iou": 0.416015625, "loss_num": 0.0361328125, "loss_xval": 0.1796875, "num_input_tokens_seen": 378038564, "step": 2201 }, { "epoch": 0.5792069441704478, "grad_norm": 8.846753851466788, "learning_rate": 5e-06, "loss": 0.1165, "num_input_tokens_seen": 378211032, "step": 2202 }, { "epoch": 0.5792069441704478, "loss": 0.14636895060539246, "loss_ce": 0.0015783084090799093, "loss_iou": 0.3203125, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 378211032, "step": 2202 }, { "epoch": 0.5794699809298349, "grad_norm": 9.056222556282368, "learning_rate": 5e-06, "loss": 0.1862, "num_input_tokens_seen": 378383136, "step": 2203 }, { "epoch": 0.5794699809298349, "loss": 0.10566692054271698, "loss_ce": 0.003677169792354107, "loss_iou": 0.46484375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 378383136, "step": 2203 }, { "epoch": 0.579733017689222, "grad_norm": 4.9933922483079725, "learning_rate": 5e-06, "loss": 0.1063, "num_input_tokens_seen": 378553532, "step": 2204 }, { "epoch": 0.579733017689222, "loss": 0.10826604068279266, "loss_ce": 0.001820725854486227, "loss_iou": 0.74609375, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 378553532, "step": 2204 }, { "epoch": 0.5799960544486092, "grad_norm": 15.853504685750568, "learning_rate": 5e-06, "loss": 0.0868, "num_input_tokens_seen": 378725804, "step": 2205 }, { "epoch": 0.5799960544486092, "loss": 0.05737042799592018, "loss_ce": 0.0001499689242336899, "loss_iou": 0.451171875, "loss_num": 0.011474609375, "loss_xval": 0.05712890625, "num_input_tokens_seen": 378725804, "step": 2205 }, { "epoch": 0.5802590912079963, "grad_norm": 10.934142417787772, "learning_rate": 5e-06, "loss": 0.112, "num_input_tokens_seen": 378897924, "step": 2206 }, { "epoch": 0.5802590912079963, "loss": 0.08355730026960373, "loss_ce": 0.004913499113172293, "loss_iou": 0.52734375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 378897924, "step": 2206 }, { "epoch": 0.5805221279673835, "grad_norm": 12.227334484970761, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 379070348, "step": 2207 }, { "epoch": 0.5805221279673835, "loss": 0.07382334768772125, "loss_ce": 0.00047435000305995345, "loss_iou": 0.5078125, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 379070348, "step": 2207 }, { "epoch": 0.5807851647267706, "grad_norm": 5.142567427993278, "learning_rate": 5e-06, "loss": 0.1598, "num_input_tokens_seen": 379242648, "step": 2208 }, { "epoch": 0.5807851647267706, "loss": 0.06755711138248444, "loss_ce": 0.0022494932636618614, "loss_iou": 0.62890625, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 379242648, "step": 2208 }, { "epoch": 0.5810482014861577, "grad_norm": 4.5313412775127055, "learning_rate": 5e-06, "loss": 0.1149, "num_input_tokens_seen": 379414684, "step": 2209 }, { "epoch": 0.5810482014861577, "loss": 0.050227776169776917, "loss_ce": 0.0009724035626277328, "loss_iou": 0.478515625, "loss_num": 0.00982666015625, "loss_xval": 0.04931640625, "num_input_tokens_seen": 379414684, "step": 2209 }, { "epoch": 0.5813112382455449, "grad_norm": 51.12006755064005, "learning_rate": 5e-06, "loss": 0.1344, "num_input_tokens_seen": 379582192, "step": 2210 }, { "epoch": 0.5813112382455449, "loss": 0.11350201815366745, "loss_ce": 0.004676333162933588, "loss_iou": 0.546875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 379582192, "step": 2210 }, { "epoch": 0.581574275004932, "grad_norm": 18.708708783651804, "learning_rate": 5e-06, "loss": 0.1234, "num_input_tokens_seen": 379754536, "step": 2211 }, { "epoch": 0.581574275004932, "loss": 0.09766032546758652, "loss_ce": 0.0012552967527881265, "loss_iou": 0.44140625, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 379754536, "step": 2211 }, { "epoch": 0.5818373117643191, "grad_norm": 5.58601503702725, "learning_rate": 5e-06, "loss": 0.1617, "num_input_tokens_seen": 379926528, "step": 2212 }, { "epoch": 0.5818373117643191, "loss": 0.18606778979301453, "loss_ce": 0.0023214598186314106, "loss_iou": 0.50390625, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 379926528, "step": 2212 }, { "epoch": 0.5821003485237062, "grad_norm": 3.8208876250172965, "learning_rate": 5e-06, "loss": 0.1334, "num_input_tokens_seen": 380096704, "step": 2213 }, { "epoch": 0.5821003485237062, "loss": 0.18421456217765808, "loss_ce": 0.001994105987250805, "loss_iou": 0.455078125, "loss_num": 0.036376953125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 380096704, "step": 2213 }, { "epoch": 0.5823633852830933, "grad_norm": 4.7316491932517595, "learning_rate": 5e-06, "loss": 0.1152, "num_input_tokens_seen": 380269136, "step": 2214 }, { "epoch": 0.5823633852830933, "loss": 0.23052500188350677, "loss_ce": 0.0009107402293011546, "loss_iou": 0.65625, "loss_num": 0.0458984375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 380269136, "step": 2214 }, { "epoch": 0.5826264220424804, "grad_norm": 5.131239937970846, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 380441472, "step": 2215 }, { "epoch": 0.5826264220424804, "loss": 0.09817831218242645, "loss_ce": 0.0009187856921926141, "loss_iou": 0.4921875, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 380441472, "step": 2215 }, { "epoch": 0.5828894588018676, "grad_norm": 5.520590188036205, "learning_rate": 5e-06, "loss": 0.1, "num_input_tokens_seen": 380613832, "step": 2216 }, { "epoch": 0.5828894588018676, "loss": 0.09423954784870148, "loss_ce": 0.0019696487579494715, "loss_iou": 0.46875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 380613832, "step": 2216 }, { "epoch": 0.5831524955612547, "grad_norm": 18.191266422169875, "learning_rate": 5e-06, "loss": 0.1093, "num_input_tokens_seen": 380785988, "step": 2217 }, { "epoch": 0.5831524955612547, "loss": 0.135514497756958, "loss_ce": 0.0037701106630265713, "loss_iou": 0.54296875, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 380785988, "step": 2217 }, { "epoch": 0.5834155323206418, "grad_norm": 13.995891706301725, "learning_rate": 5e-06, "loss": 0.1004, "num_input_tokens_seen": 380958116, "step": 2218 }, { "epoch": 0.5834155323206418, "loss": 0.07710295170545578, "loss_ce": 0.00120573490858078, "loss_iou": 0.60546875, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 380958116, "step": 2218 }, { "epoch": 0.5836785690800289, "grad_norm": 3.998007795342675, "learning_rate": 5e-06, "loss": 0.1081, "num_input_tokens_seen": 381130168, "step": 2219 }, { "epoch": 0.5836785690800289, "loss": 0.17625044286251068, "loss_ce": 0.0008964374428614974, "loss_iou": 0.494140625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 381130168, "step": 2219 }, { "epoch": 0.583941605839416, "grad_norm": 8.315710539211903, "learning_rate": 5e-06, "loss": 0.1583, "num_input_tokens_seen": 381302476, "step": 2220 }, { "epoch": 0.583941605839416, "loss": 0.20635367929935455, "loss_ce": 0.002496248111128807, "loss_iou": 0.5703125, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 381302476, "step": 2220 }, { "epoch": 0.5842046425988032, "grad_norm": 9.769478710683881, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 381474700, "step": 2221 }, { "epoch": 0.5842046425988032, "loss": 0.1044168546795845, "loss_ce": 0.0017252071993425488, "loss_iou": 0.447265625, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 381474700, "step": 2221 }, { "epoch": 0.5844676793581903, "grad_norm": 5.959703884673753, "learning_rate": 5e-06, "loss": 0.1345, "num_input_tokens_seen": 381646792, "step": 2222 }, { "epoch": 0.5844676793581903, "loss": 0.20204247534275055, "loss_ce": 0.0024575116112828255, "loss_iou": 0.5625, "loss_num": 0.0400390625, "loss_xval": 0.19921875, "num_input_tokens_seen": 381646792, "step": 2222 }, { "epoch": 0.5847307161175774, "grad_norm": 4.639177291482811, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 381818872, "step": 2223 }, { "epoch": 0.5847307161175774, "loss": 0.1277788281440735, "loss_ce": 0.007478540297597647, "loss_iou": 0.484375, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 381818872, "step": 2223 }, { "epoch": 0.5849937528769645, "grad_norm": 4.556750582512925, "learning_rate": 5e-06, "loss": 0.1011, "num_input_tokens_seen": 381991284, "step": 2224 }, { "epoch": 0.5849937528769645, "loss": 0.06165578216314316, "loss_ce": 0.003062034724280238, "loss_iou": 0.51953125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 381991284, "step": 2224 }, { "epoch": 0.5852567896363516, "grad_norm": 5.717259207619562, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 382163040, "step": 2225 }, { "epoch": 0.5852567896363516, "loss": 0.08200475573539734, "loss_ce": 0.0018045613542199135, "loss_iou": 0.39453125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 382163040, "step": 2225 }, { "epoch": 0.5855198263957389, "grad_norm": 4.765363459193876, "learning_rate": 5e-06, "loss": 0.1253, "num_input_tokens_seen": 382335172, "step": 2226 }, { "epoch": 0.5855198263957389, "loss": 0.08970290422439575, "loss_ce": 0.001568139297887683, "loss_iou": 0.7109375, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 382335172, "step": 2226 }, { "epoch": 0.585782863155126, "grad_norm": 5.040130370486427, "learning_rate": 5e-06, "loss": 0.0958, "num_input_tokens_seen": 382507360, "step": 2227 }, { "epoch": 0.585782863155126, "loss": 0.06643694639205933, "loss_ce": 0.0008241523755714297, "loss_iou": 0.5390625, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 382507360, "step": 2227 }, { "epoch": 0.5860458999145131, "grad_norm": 27.12917591646992, "learning_rate": 5e-06, "loss": 0.1669, "num_input_tokens_seen": 382677836, "step": 2228 }, { "epoch": 0.5860458999145131, "loss": 0.11577419936656952, "loss_ce": 0.0032864054664969444, "loss_iou": 0.53515625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 382677836, "step": 2228 }, { "epoch": 0.5863089366739002, "grad_norm": 5.353068404352155, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 382849968, "step": 2229 }, { "epoch": 0.5863089366739002, "loss": 0.11902253329753876, "loss_ce": 0.0011636477429419756, "loss_iou": 0.466796875, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 382849968, "step": 2229 }, { "epoch": 0.5865719734332873, "grad_norm": 44.6262248448857, "learning_rate": 5e-06, "loss": 0.1062, "num_input_tokens_seen": 383022244, "step": 2230 }, { "epoch": 0.5865719734332873, "loss": 0.08623628318309784, "loss_ce": 0.0015957842115312815, "loss_iou": 0.62109375, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 383022244, "step": 2230 }, { "epoch": 0.5868350101926745, "grad_norm": 11.243592990037332, "learning_rate": 5e-06, "loss": 0.162, "num_input_tokens_seen": 383190620, "step": 2231 }, { "epoch": 0.5868350101926745, "loss": 0.11198394745588303, "loss_ce": 0.0013882413040846586, "loss_iou": 0.490234375, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 383190620, "step": 2231 }, { "epoch": 0.5870980469520616, "grad_norm": 4.97022834965809, "learning_rate": 5e-06, "loss": 0.1225, "num_input_tokens_seen": 383362980, "step": 2232 }, { "epoch": 0.5870980469520616, "loss": 0.17192208766937256, "loss_ce": 0.0035566147416830063, "loss_iou": 0.447265625, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 383362980, "step": 2232 }, { "epoch": 0.5873610837114487, "grad_norm": 4.21141022600957, "learning_rate": 5e-06, "loss": 0.1084, "num_input_tokens_seen": 383535200, "step": 2233 }, { "epoch": 0.5873610837114487, "loss": 0.1112288236618042, "loss_ce": 0.00690448796376586, "loss_iou": 0.56640625, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 383535200, "step": 2233 }, { "epoch": 0.5876241204708358, "grad_norm": 4.036382922773338, "learning_rate": 5e-06, "loss": 0.0752, "num_input_tokens_seen": 383705348, "step": 2234 }, { "epoch": 0.5876241204708358, "loss": 0.05898036062717438, "loss_ce": 0.001027482096105814, "loss_iou": 0.59765625, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 383705348, "step": 2234 }, { "epoch": 0.5878871572302229, "grad_norm": 7.244981585553582, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 383877692, "step": 2235 }, { "epoch": 0.5878871572302229, "loss": 0.10776747018098831, "loss_ce": 0.0007423229981213808, "loss_iou": 0.58984375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 383877692, "step": 2235 }, { "epoch": 0.58815019398961, "grad_norm": 3.396542457402831, "learning_rate": 5e-06, "loss": 0.1261, "num_input_tokens_seen": 384049540, "step": 2236 }, { "epoch": 0.58815019398961, "loss": 0.1896773874759674, "loss_ce": 0.0011398009955883026, "loss_iou": 0.72265625, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 384049540, "step": 2236 }, { "epoch": 0.5884132307489972, "grad_norm": 8.64901320262372, "learning_rate": 5e-06, "loss": 0.1399, "num_input_tokens_seen": 384221584, "step": 2237 }, { "epoch": 0.5884132307489972, "loss": 0.12090113013982773, "loss_ce": 0.0009365270379930735, "loss_iou": 0.5078125, "loss_num": 0.02392578125, "loss_xval": 0.1201171875, "num_input_tokens_seen": 384221584, "step": 2237 }, { "epoch": 0.5886762675083843, "grad_norm": 4.107798805009285, "learning_rate": 5e-06, "loss": 0.0964, "num_input_tokens_seen": 384393708, "step": 2238 }, { "epoch": 0.5886762675083843, "loss": 0.10467529296875, "loss_ce": 0.0007019043550826609, "loss_iou": 0.51171875, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 384393708, "step": 2238 }, { "epoch": 0.5889393042677714, "grad_norm": 5.687252203871965, "learning_rate": 5e-06, "loss": 0.1541, "num_input_tokens_seen": 384566100, "step": 2239 }, { "epoch": 0.5889393042677714, "loss": 0.2502034306526184, "loss_ce": 0.004079162143170834, "loss_iou": 0.5078125, "loss_num": 0.04931640625, "loss_xval": 0.24609375, "num_input_tokens_seen": 384566100, "step": 2239 }, { "epoch": 0.5892023410271585, "grad_norm": 5.391300577620961, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 384738000, "step": 2240 }, { "epoch": 0.5892023410271585, "loss": 0.09789521992206573, "loss_ce": 0.0031381379812955856, "loss_iou": 0.63671875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 384738000, "step": 2240 }, { "epoch": 0.5894653777865456, "grad_norm": 2.700202309873461, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 384910408, "step": 2241 }, { "epoch": 0.5894653777865456, "loss": 0.18347935378551483, "loss_ce": 0.00031284932629205287, "loss_iou": 0.484375, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 384910408, "step": 2241 }, { "epoch": 0.5897284145459328, "grad_norm": 8.687970034600534, "learning_rate": 5e-06, "loss": 0.1178, "num_input_tokens_seen": 385082740, "step": 2242 }, { "epoch": 0.5897284145459328, "loss": 0.11214028298854828, "loss_ce": 0.0005375072360038757, "loss_iou": 0.5859375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 385082740, "step": 2242 }, { "epoch": 0.5899914513053199, "grad_norm": 7.347975569465696, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 385254812, "step": 2243 }, { "epoch": 0.5899914513053199, "loss": 0.10931709408760071, "loss_ce": 0.000491409155074507, "loss_iou": 0.546875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 385254812, "step": 2243 }, { "epoch": 0.590254488064707, "grad_norm": 5.972916509227761, "learning_rate": 5e-06, "loss": 0.097, "num_input_tokens_seen": 385427116, "step": 2244 }, { "epoch": 0.590254488064707, "loss": 0.06860466301441193, "loss_ce": 0.0034191168379038572, "loss_iou": 0.61328125, "loss_num": 0.01300048828125, "loss_xval": 0.0654296875, "num_input_tokens_seen": 385427116, "step": 2244 }, { "epoch": 0.5905175248240941, "grad_norm": 5.792932058485695, "learning_rate": 5e-06, "loss": 0.1615, "num_input_tokens_seen": 385599308, "step": 2245 }, { "epoch": 0.5905175248240941, "loss": 0.09140485525131226, "loss_ce": 0.0006760837859474123, "loss_iou": 0.578125, "loss_num": 0.01806640625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 385599308, "step": 2245 }, { "epoch": 0.5907805615834812, "grad_norm": 6.999898072921272, "learning_rate": 5e-06, "loss": 0.1475, "num_input_tokens_seen": 385771596, "step": 2246 }, { "epoch": 0.5907805615834812, "loss": 0.18632760643959045, "loss_ce": 0.0021235125605016947, "loss_iou": 0.416015625, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 385771596, "step": 2246 }, { "epoch": 0.5910435983428685, "grad_norm": 18.38237494107746, "learning_rate": 5e-06, "loss": 0.1274, "num_input_tokens_seen": 385940788, "step": 2247 }, { "epoch": 0.5910435983428685, "loss": 0.16689589619636536, "loss_ce": 0.004466078244149685, "loss_iou": 0.40625, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 385940788, "step": 2247 }, { "epoch": 0.5913066351022556, "grad_norm": 7.020065723323422, "learning_rate": 5e-06, "loss": 0.1166, "num_input_tokens_seen": 386112940, "step": 2248 }, { "epoch": 0.5913066351022556, "loss": 0.06020001322031021, "loss_ce": 0.0016672981437295675, "loss_iou": 0.55859375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 386112940, "step": 2248 }, { "epoch": 0.5915696718616427, "grad_norm": 4.523572034689062, "learning_rate": 5e-06, "loss": 0.1053, "num_input_tokens_seen": 386285088, "step": 2249 }, { "epoch": 0.5915696718616427, "loss": 0.10929292440414429, "loss_ce": 0.002771313302218914, "loss_iou": 0.5234375, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 386285088, "step": 2249 }, { "epoch": 0.5918327086210298, "grad_norm": 4.838564447785546, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 386457364, "step": 2250 }, { "epoch": 0.5918327086210298, "eval_websight_new_CIoU": 0.8620143532752991, "eval_websight_new_GIoU": 0.8631013035774231, "eval_websight_new_IoU": 0.8667041063308716, "eval_websight_new_MAE_all": 0.020488929003477097, "eval_websight_new_MAE_h": 0.008965343236923218, "eval_websight_new_MAE_w": 0.0330337006598711, "eval_websight_new_MAE_x": 0.03212242014706135, "eval_websight_new_MAE_y": 0.007834249641746283, "eval_websight_new_NUM_probability": 0.9999847710132599, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.10749460011720657, "eval_websight_new_loss_ce": 1.4298896530817728e-05, "eval_websight_new_loss_iou": 0.3743896484375, "eval_websight_new_loss_num": 0.019153594970703125, "eval_websight_new_loss_xval": 0.095703125, "eval_websight_new_runtime": 58.289, "eval_websight_new_samples_per_second": 0.858, "eval_websight_new_steps_per_second": 0.034, "num_input_tokens_seen": 386457364, "step": 2250 }, { "epoch": 0.5918327086210298, "eval_seeclick_CIoU": 0.6236494481563568, "eval_seeclick_GIoU": 0.6230664253234863, "eval_seeclick_IoU": 0.6453896760940552, "eval_seeclick_MAE_all": 0.046443790197372437, "eval_seeclick_MAE_h": 0.026547173038125038, "eval_seeclick_MAE_w": 0.06541823036968708, "eval_seeclick_MAE_x": 0.06924234330654144, "eval_seeclick_MAE_y": 0.024567410349845886, "eval_seeclick_NUM_probability": 0.9999750256538391, "eval_seeclick_inside_bbox": 0.953125, "eval_seeclick_loss": 0.21426968276500702, "eval_seeclick_loss_ce": 0.009121979121118784, "eval_seeclick_loss_iou": 0.506591796875, "eval_seeclick_loss_num": 0.039905548095703125, "eval_seeclick_loss_xval": 0.1995849609375, "eval_seeclick_runtime": 71.9379, "eval_seeclick_samples_per_second": 0.598, "eval_seeclick_steps_per_second": 0.028, "num_input_tokens_seen": 386457364, "step": 2250 }, { "epoch": 0.5918327086210298, "eval_icons_CIoU": 0.834777295589447, "eval_icons_GIoU": 0.8241135478019714, "eval_icons_IoU": 0.8441117405891418, "eval_icons_MAE_all": 0.02446969971060753, "eval_icons_MAE_h": 0.024498000741004944, "eval_icons_MAE_w": 0.02540498599410057, "eval_icons_MAE_x": 0.02417761366814375, "eval_icons_MAE_y": 0.02379819191992283, "eval_icons_NUM_probability": 0.9999534487724304, "eval_icons_inside_bbox": 0.9565972089767456, "eval_icons_loss": 0.0800996944308281, "eval_icons_loss_ce": 2.8939639378222637e-05, "eval_icons_loss_iou": 0.520263671875, "eval_icons_loss_num": 0.014501571655273438, "eval_icons_loss_xval": 0.072509765625, "eval_icons_runtime": 87.9614, "eval_icons_samples_per_second": 0.568, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 386457364, "step": 2250 }, { "epoch": 0.5918327086210298, "eval_screenspot_CIoU": 0.5634338855743408, "eval_screenspot_GIoU": 0.5513045191764832, "eval_screenspot_IoU": 0.6039714018503824, "eval_screenspot_MAE_all": 0.08249951650698979, "eval_screenspot_MAE_h": 0.04705421378215154, "eval_screenspot_MAE_w": 0.14340341091156006, "eval_screenspot_MAE_x": 0.09280380109945933, "eval_screenspot_MAE_y": 0.04673664582272371, "eval_screenspot_NUM_probability": 0.99980628490448, "eval_screenspot_inside_bbox": 0.8454166650772095, "eval_screenspot_loss": 0.8701639175415039, "eval_screenspot_loss_ce": 0.543925940990448, "eval_screenspot_loss_iou": 0.45556640625, "eval_screenspot_loss_num": 0.06413777669270833, "eval_screenspot_loss_xval": 0.3208414713541667, "eval_screenspot_runtime": 147.2039, "eval_screenspot_samples_per_second": 0.605, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 386457364, "step": 2250 }, { "epoch": 0.5918327086210298, "loss": 0.8634153604507446, "loss_ce": 0.5327268838882446, "loss_iou": 0.388671875, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 386457364, "step": 2250 }, { "epoch": 0.5920957453804169, "grad_norm": 17.382874089820607, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 386629432, "step": 2251 }, { "epoch": 0.5920957453804169, "loss": 0.06886275112628937, "loss_ce": 0.0024259830825030804, "loss_iou": 0.55078125, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 386629432, "step": 2251 }, { "epoch": 0.5923587821398041, "grad_norm": 4.874671437046262, "learning_rate": 5e-06, "loss": 0.1568, "num_input_tokens_seen": 386801572, "step": 2252 }, { "epoch": 0.5923587821398041, "loss": 0.14957007765769958, "loss_ce": 0.0020786237437278032, "loss_iou": 0.51171875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 386801572, "step": 2252 }, { "epoch": 0.5926218188991912, "grad_norm": 5.10088801695758, "learning_rate": 5e-06, "loss": 0.1076, "num_input_tokens_seen": 386973592, "step": 2253 }, { "epoch": 0.5926218188991912, "loss": 0.14472725987434387, "loss_ce": 0.00031808449421077967, "loss_iou": 0.357421875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 386973592, "step": 2253 }, { "epoch": 0.5928848556585783, "grad_norm": 7.206762089035192, "learning_rate": 5e-06, "loss": 0.1079, "num_input_tokens_seen": 387143876, "step": 2254 }, { "epoch": 0.5928848556585783, "loss": 0.1427307277917862, "loss_ce": 0.0004577827639877796, "loss_iou": 0.490234375, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 387143876, "step": 2254 }, { "epoch": 0.5931478924179654, "grad_norm": 28.833326639866428, "learning_rate": 5e-06, "loss": 0.1461, "num_input_tokens_seen": 387316360, "step": 2255 }, { "epoch": 0.5931478924179654, "loss": 0.10336272418498993, "loss_ce": 0.0003353758074808866, "loss_iou": 0.578125, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 387316360, "step": 2255 }, { "epoch": 0.5934109291773525, "grad_norm": 11.869740305960864, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 387488944, "step": 2256 }, { "epoch": 0.5934109291773525, "loss": 0.0730450302362442, "loss_ce": 0.0006573314312845469, "loss_iou": 0.5859375, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 387488944, "step": 2256 }, { "epoch": 0.5936739659367397, "grad_norm": 8.676646075770014, "learning_rate": 5e-06, "loss": 0.1525, "num_input_tokens_seen": 387661216, "step": 2257 }, { "epoch": 0.5936739659367397, "loss": 0.09287041425704956, "loss_ce": 0.00037164040259085596, "loss_iou": 0.48046875, "loss_num": 0.0185546875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 387661216, "step": 2257 }, { "epoch": 0.5939370026961268, "grad_norm": 6.091102593612703, "learning_rate": 5e-06, "loss": 0.1202, "num_input_tokens_seen": 387833316, "step": 2258 }, { "epoch": 0.5939370026961268, "loss": 0.047980114817619324, "loss_ce": 0.002722549019381404, "loss_iou": 0.52734375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 387833316, "step": 2258 }, { "epoch": 0.5942000394555139, "grad_norm": 13.12094289217443, "learning_rate": 5e-06, "loss": 0.1212, "num_input_tokens_seen": 388005488, "step": 2259 }, { "epoch": 0.5942000394555139, "loss": 0.1262204349040985, "loss_ce": 0.006103249732404947, "loss_iou": 0.4453125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 388005488, "step": 2259 }, { "epoch": 0.594463076214901, "grad_norm": 6.977408677449423, "learning_rate": 5e-06, "loss": 0.1116, "num_input_tokens_seen": 388178100, "step": 2260 }, { "epoch": 0.594463076214901, "loss": 0.09303668141365051, "loss_ce": 0.0011177423875778913, "loss_iou": 0.57421875, "loss_num": 0.0184326171875, "loss_xval": 0.091796875, "num_input_tokens_seen": 388178100, "step": 2260 }, { "epoch": 0.5947261129742881, "grad_norm": 5.548896564721941, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 388349964, "step": 2261 }, { "epoch": 0.5947261129742881, "loss": 0.07538396120071411, "loss_ce": 0.0003412406367715448, "loss_iou": 0.49609375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 388349964, "step": 2261 }, { "epoch": 0.5949891497336752, "grad_norm": 3.9845795265650694, "learning_rate": 5e-06, "loss": 0.1365, "num_input_tokens_seen": 388520668, "step": 2262 }, { "epoch": 0.5949891497336752, "loss": 0.18895787000656128, "loss_ce": 0.00042028201278299093, "loss_iou": 0.4765625, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 388520668, "step": 2262 }, { "epoch": 0.5952521864930624, "grad_norm": 5.603595618443302, "learning_rate": 5e-06, "loss": 0.1414, "num_input_tokens_seen": 388692900, "step": 2263 }, { "epoch": 0.5952521864930624, "loss": 0.08959123492240906, "loss_ce": 0.0002662862534634769, "loss_iou": 0.427734375, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 388692900, "step": 2263 }, { "epoch": 0.5955152232524495, "grad_norm": 5.071553765162839, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 388865328, "step": 2264 }, { "epoch": 0.5955152232524495, "loss": 0.07327542454004288, "loss_ce": 0.0008877270738594234, "loss_iou": 0.5390625, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 388865328, "step": 2264 }, { "epoch": 0.5957782600118366, "grad_norm": 7.1017098248073385, "learning_rate": 5e-06, "loss": 0.1325, "num_input_tokens_seen": 389037536, "step": 2265 }, { "epoch": 0.5957782600118366, "loss": 0.15171518921852112, "loss_ce": 0.0005311004933901131, "loss_iou": 0.44921875, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 389037536, "step": 2265 }, { "epoch": 0.5960412967712237, "grad_norm": 6.446588247534734, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 389209660, "step": 2266 }, { "epoch": 0.5960412967712237, "loss": 0.11115900427103043, "loss_ce": 0.00028864690102636814, "loss_iou": 0.50390625, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 389209660, "step": 2266 }, { "epoch": 0.5963043335306109, "grad_norm": 9.702541385516984, "learning_rate": 5e-06, "loss": 0.1152, "num_input_tokens_seen": 389381624, "step": 2267 }, { "epoch": 0.5963043335306109, "loss": 0.13201884925365448, "loss_ce": 0.00581339979544282, "loss_iou": 0.48046875, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 389381624, "step": 2267 }, { "epoch": 0.5965673702899981, "grad_norm": 4.325039584368215, "learning_rate": 5e-06, "loss": 0.1332, "num_input_tokens_seen": 389553348, "step": 2268 }, { "epoch": 0.5965673702899981, "loss": 0.09481080621480942, "loss_ce": 0.0006488211220130324, "loss_iou": 0.6640625, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 389553348, "step": 2268 }, { "epoch": 0.5968304070493852, "grad_norm": 4.06473977313393, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 389723828, "step": 2269 }, { "epoch": 0.5968304070493852, "loss": 0.130482017993927, "loss_ce": 0.00023298643645830452, "loss_iou": 0.57421875, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 389723828, "step": 2269 }, { "epoch": 0.5970934438087723, "grad_norm": 11.83991543858794, "learning_rate": 5e-06, "loss": 0.1278, "num_input_tokens_seen": 389896036, "step": 2270 }, { "epoch": 0.5970934438087723, "loss": 0.1708485186100006, "loss_ce": 0.0017506256699562073, "loss_iou": 0.5625, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 389896036, "step": 2270 }, { "epoch": 0.5973564805681594, "grad_norm": 4.637610311727533, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 390068288, "step": 2271 }, { "epoch": 0.5973564805681594, "loss": 0.12343515455722809, "loss_ce": 0.0004340623854659498, "loss_iou": NaN, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 390068288, "step": 2271 }, { "epoch": 0.5976195173275465, "grad_norm": 4.239687367925931, "learning_rate": 5e-06, "loss": 0.1336, "num_input_tokens_seen": 390240724, "step": 2272 }, { "epoch": 0.5976195173275465, "loss": 0.09358179569244385, "loss_ce": 0.0032497686333954334, "loss_iou": 0.65234375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 390240724, "step": 2272 }, { "epoch": 0.5978825540869337, "grad_norm": 3.650780325093881, "learning_rate": 5e-06, "loss": 0.135, "num_input_tokens_seen": 390413092, "step": 2273 }, { "epoch": 0.5978825540869337, "loss": 0.0876360684633255, "loss_ce": 0.00032527127768844366, "loss_iou": 0.515625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 390413092, "step": 2273 }, { "epoch": 0.5981455908463208, "grad_norm": 4.543660924331624, "learning_rate": 5e-06, "loss": 0.0989, "num_input_tokens_seen": 390585320, "step": 2274 }, { "epoch": 0.5981455908463208, "loss": 0.1153046190738678, "loss_ce": 0.0005890398169867694, "loss_iou": 0.3671875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 390585320, "step": 2274 }, { "epoch": 0.5984086276057079, "grad_norm": 5.356363884549334, "learning_rate": 5e-06, "loss": 0.1013, "num_input_tokens_seen": 390757376, "step": 2275 }, { "epoch": 0.5984086276057079, "loss": 0.1397184282541275, "loss_ce": 0.0057157427072525024, "loss_iou": 0.5, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 390757376, "step": 2275 }, { "epoch": 0.598671664365095, "grad_norm": 3.3713877207341088, "learning_rate": 5e-06, "loss": 0.0875, "num_input_tokens_seen": 390929460, "step": 2276 }, { "epoch": 0.598671664365095, "loss": 0.0523032546043396, "loss_ce": 0.000316562014631927, "loss_iou": 0.458984375, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 390929460, "step": 2276 }, { "epoch": 0.5989347011244821, "grad_norm": 10.008368190926095, "learning_rate": 5e-06, "loss": 0.1313, "num_input_tokens_seen": 391100108, "step": 2277 }, { "epoch": 0.5989347011244821, "loss": 0.08954879641532898, "loss_ce": 0.0027873138897120953, "loss_iou": 0.55078125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 391100108, "step": 2277 }, { "epoch": 0.5991977378838693, "grad_norm": 13.178397333981023, "learning_rate": 5e-06, "loss": 0.1218, "num_input_tokens_seen": 391272352, "step": 2278 }, { "epoch": 0.5991977378838693, "loss": 0.1727224737405777, "loss_ce": 0.00011504795111250132, "loss_iou": 0.53125, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 391272352, "step": 2278 }, { "epoch": 0.5994607746432564, "grad_norm": 5.1375780540443134, "learning_rate": 5e-06, "loss": 0.1023, "num_input_tokens_seen": 391444648, "step": 2279 }, { "epoch": 0.5994607746432564, "loss": 0.05273713544011116, "loss_ce": 0.0022915778681635857, "loss_iou": 0.478515625, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 391444648, "step": 2279 }, { "epoch": 0.5997238114026435, "grad_norm": 25.459943093434337, "learning_rate": 5e-06, "loss": 0.1646, "num_input_tokens_seen": 391616960, "step": 2280 }, { "epoch": 0.5997238114026435, "loss": 0.1544933021068573, "loss_ce": 0.0004405686049722135, "loss_iou": 0.6796875, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 391616960, "step": 2280 }, { "epoch": 0.5999868481620306, "grad_norm": 4.8890917043699185, "learning_rate": 5e-06, "loss": 0.1464, "num_input_tokens_seen": 391788924, "step": 2281 }, { "epoch": 0.5999868481620306, "loss": 0.06912894546985626, "loss_ce": 0.006689978763461113, "loss_iou": 0.55859375, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 391788924, "step": 2281 }, { "epoch": 0.6002498849214177, "grad_norm": 6.219770384955685, "learning_rate": 5e-06, "loss": 0.1357, "num_input_tokens_seen": 391961072, "step": 2282 }, { "epoch": 0.6002498849214177, "loss": 0.07950527220964432, "loss_ce": 0.00041896995389834046, "loss_iou": 0.4140625, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 391961072, "step": 2282 }, { "epoch": 0.600512921680805, "grad_norm": 3.971504685954048, "learning_rate": 5e-06, "loss": 0.156, "num_input_tokens_seen": 392130392, "step": 2283 }, { "epoch": 0.600512921680805, "loss": 0.11902518570423126, "loss_ce": 0.0007390595856122673, "loss_iou": 0.51953125, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 392130392, "step": 2283 }, { "epoch": 0.600775958440192, "grad_norm": 3.36927604682008, "learning_rate": 5e-06, "loss": 0.0892, "num_input_tokens_seen": 392300644, "step": 2284 }, { "epoch": 0.600775958440192, "loss": 0.09242402017116547, "loss_ce": 0.0041824462823569775, "loss_iou": 0.375, "loss_num": 0.017578125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 392300644, "step": 2284 }, { "epoch": 0.6010389951995792, "grad_norm": 5.15292132366606, "learning_rate": 5e-06, "loss": 0.1317, "num_input_tokens_seen": 392472968, "step": 2285 }, { "epoch": 0.6010389951995792, "loss": 0.14593744277954102, "loss_ce": 0.0034508705139160156, "loss_iou": 0.484375, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 392472968, "step": 2285 }, { "epoch": 0.6013020319589663, "grad_norm": 4.237274665038102, "learning_rate": 5e-06, "loss": 0.1003, "num_input_tokens_seen": 392644756, "step": 2286 }, { "epoch": 0.6013020319589663, "loss": 0.07083064317703247, "loss_ce": 0.00170833186712116, "loss_iou": 0.59765625, "loss_num": 0.0137939453125, "loss_xval": 0.0693359375, "num_input_tokens_seen": 392644756, "step": 2286 }, { "epoch": 0.6015650687183534, "grad_norm": 43.77503497663098, "learning_rate": 5e-06, "loss": 0.1046, "num_input_tokens_seen": 392816816, "step": 2287 }, { "epoch": 0.6015650687183534, "loss": 0.09309446811676025, "loss_ce": 0.00016844802303239703, "loss_iou": 0.5390625, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 392816816, "step": 2287 }, { "epoch": 0.6018281054777405, "grad_norm": 8.046683810113267, "learning_rate": 5e-06, "loss": 0.1609, "num_input_tokens_seen": 392987164, "step": 2288 }, { "epoch": 0.6018281054777405, "loss": 0.12998059391975403, "loss_ce": 0.0008912362391129136, "loss_iou": 0.5078125, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 392987164, "step": 2288 }, { "epoch": 0.6020911422371277, "grad_norm": 6.745453284904858, "learning_rate": 5e-06, "loss": 0.1509, "num_input_tokens_seen": 393159584, "step": 2289 }, { "epoch": 0.6020911422371277, "loss": 0.18720246851444244, "loss_ce": 0.0019302507862448692, "loss_iou": 0.474609375, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 393159584, "step": 2289 }, { "epoch": 0.6023541789965148, "grad_norm": 9.388482886571397, "learning_rate": 5e-06, "loss": 0.14, "num_input_tokens_seen": 393331696, "step": 2290 }, { "epoch": 0.6023541789965148, "loss": 0.153926283121109, "loss_ce": 0.005275162868201733, "loss_iou": 0.56640625, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 393331696, "step": 2290 }, { "epoch": 0.6026172157559019, "grad_norm": 5.436488955385426, "learning_rate": 5e-06, "loss": 0.1239, "num_input_tokens_seen": 393504044, "step": 2291 }, { "epoch": 0.6026172157559019, "loss": 0.11098843812942505, "loss_ce": 0.00027066541952081025, "loss_iou": 0.5234375, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 393504044, "step": 2291 }, { "epoch": 0.602880252515289, "grad_norm": 8.104274681671972, "learning_rate": 5e-06, "loss": 0.0981, "num_input_tokens_seen": 393676272, "step": 2292 }, { "epoch": 0.602880252515289, "loss": 0.16863158345222473, "loss_ce": 0.003592532593756914, "loss_iou": 0.453125, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 393676272, "step": 2292 }, { "epoch": 0.6031432892746761, "grad_norm": 12.32428248921536, "learning_rate": 5e-06, "loss": 0.1035, "num_input_tokens_seen": 393848392, "step": 2293 }, { "epoch": 0.6031432892746761, "loss": 0.08580140769481659, "loss_ce": 0.0013287551701068878, "loss_iou": 0.53125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 393848392, "step": 2293 }, { "epoch": 0.6034063260340633, "grad_norm": 10.386325441280649, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 394020680, "step": 2294 }, { "epoch": 0.6034063260340633, "loss": 0.07794995605945587, "loss_ce": 0.003273440757766366, "loss_iou": 0.5546875, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 394020680, "step": 2294 }, { "epoch": 0.6036693627934504, "grad_norm": 5.0087549483878355, "learning_rate": 5e-06, "loss": 0.1252, "num_input_tokens_seen": 394192740, "step": 2295 }, { "epoch": 0.6036693627934504, "loss": 0.12549570202827454, "loss_ce": 0.0021436563692986965, "loss_iou": 0.51953125, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 394192740, "step": 2295 }, { "epoch": 0.6039323995528375, "grad_norm": 12.109968647066866, "learning_rate": 5e-06, "loss": 0.1424, "num_input_tokens_seen": 394362948, "step": 2296 }, { "epoch": 0.6039323995528375, "loss": 0.1552933156490326, "loss_ce": 0.0013626604340970516, "loss_iou": 0.625, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 394362948, "step": 2296 }, { "epoch": 0.6041954363122246, "grad_norm": 3.064699260610535, "learning_rate": 5e-06, "loss": 0.1162, "num_input_tokens_seen": 394534856, "step": 2297 }, { "epoch": 0.6041954363122246, "loss": 0.13219855725765228, "loss_ce": 0.0020563420839607716, "loss_iou": 0.45703125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 394534856, "step": 2297 }, { "epoch": 0.6044584730716117, "grad_norm": 9.354536255922673, "learning_rate": 5e-06, "loss": 0.1776, "num_input_tokens_seen": 394707060, "step": 2298 }, { "epoch": 0.6044584730716117, "loss": 0.16126899421215057, "loss_ce": 0.003599932650104165, "loss_iou": 0.26171875, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 394707060, "step": 2298 }, { "epoch": 0.6047215098309989, "grad_norm": 4.517992664125833, "learning_rate": 5e-06, "loss": 0.0805, "num_input_tokens_seen": 394879016, "step": 2299 }, { "epoch": 0.6047215098309989, "loss": 0.0760730504989624, "loss_ce": 0.002098444849252701, "loss_iou": 0.48046875, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 394879016, "step": 2299 }, { "epoch": 0.604984546590386, "grad_norm": 4.501760214176101, "learning_rate": 5e-06, "loss": 0.1208, "num_input_tokens_seen": 395051188, "step": 2300 }, { "epoch": 0.604984546590386, "loss": 0.15630951523780823, "loss_ce": 0.0008682362968102098, "loss_iou": 0.578125, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 395051188, "step": 2300 }, { "epoch": 0.6052475833497731, "grad_norm": 4.195253270921506, "learning_rate": 5e-06, "loss": 0.1148, "num_input_tokens_seen": 395223544, "step": 2301 }, { "epoch": 0.6052475833497731, "loss": 0.08342162519693375, "loss_ce": 0.0002612252428662032, "loss_iou": 0.54296875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 395223544, "step": 2301 }, { "epoch": 0.6055106201091602, "grad_norm": 5.118374892921478, "learning_rate": 5e-06, "loss": 0.1103, "num_input_tokens_seen": 395395376, "step": 2302 }, { "epoch": 0.6055106201091602, "loss": 0.11348429322242737, "loss_ce": 0.0011490845354273915, "loss_iou": 0.390625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 395395376, "step": 2302 }, { "epoch": 0.6057736568685473, "grad_norm": 4.210270169640029, "learning_rate": 5e-06, "loss": 0.134, "num_input_tokens_seen": 395567140, "step": 2303 }, { "epoch": 0.6057736568685473, "loss": 0.08656048029661179, "loss_ce": 0.0008671237155795097, "loss_iou": 0.50390625, "loss_num": 0.01708984375, "loss_xval": 0.0859375, "num_input_tokens_seen": 395567140, "step": 2303 }, { "epoch": 0.6060366936279346, "grad_norm": 4.272171126326938, "learning_rate": 5e-06, "loss": 0.1251, "num_input_tokens_seen": 395739120, "step": 2304 }, { "epoch": 0.6060366936279346, "loss": 0.14756399393081665, "loss_ce": 0.002758089918643236, "loss_iou": 0.6171875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 395739120, "step": 2304 }, { "epoch": 0.6062997303873217, "grad_norm": 8.535390245901846, "learning_rate": 5e-06, "loss": 0.1204, "num_input_tokens_seen": 395911508, "step": 2305 }, { "epoch": 0.6062997303873217, "loss": 0.1481376737356186, "loss_ce": 0.002080546924844384, "loss_iou": 0.314453125, "loss_num": 0.0291748046875, "loss_xval": 0.146484375, "num_input_tokens_seen": 395911508, "step": 2305 }, { "epoch": 0.6065627671467088, "grad_norm": 6.5128397067856785, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 396083668, "step": 2306 }, { "epoch": 0.6065627671467088, "loss": 0.05626985430717468, "loss_ce": 0.0008499314426444471, "loss_iou": 0.62109375, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 396083668, "step": 2306 }, { "epoch": 0.6068258039060959, "grad_norm": 4.785548469928152, "learning_rate": 5e-06, "loss": 0.0998, "num_input_tokens_seen": 396255888, "step": 2307 }, { "epoch": 0.6068258039060959, "loss": 0.08620062470436096, "loss_ce": 0.002124701626598835, "loss_iou": 0.37109375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 396255888, "step": 2307 }, { "epoch": 0.607088840665483, "grad_norm": 11.035467862846048, "learning_rate": 5e-06, "loss": 0.1858, "num_input_tokens_seen": 396427920, "step": 2308 }, { "epoch": 0.607088840665483, "loss": 0.19853177666664124, "loss_ce": 0.0004726996412500739, "loss_iou": 0.46875, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 396427920, "step": 2308 }, { "epoch": 0.6073518774248702, "grad_norm": 5.6025345838645055, "learning_rate": 5e-06, "loss": 0.0874, "num_input_tokens_seen": 396599820, "step": 2309 }, { "epoch": 0.6073518774248702, "loss": 0.09216836839914322, "loss_ce": 0.0037742014974355698, "loss_iou": 0.4375, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 396599820, "step": 2309 }, { "epoch": 0.6076149141842573, "grad_norm": 21.234701292276423, "learning_rate": 5e-06, "loss": 0.0959, "num_input_tokens_seen": 396771948, "step": 2310 }, { "epoch": 0.6076149141842573, "loss": 0.08635897189378738, "loss_ce": 0.000482511764857918, "loss_iou": 0.6171875, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 396771948, "step": 2310 }, { "epoch": 0.6078779509436444, "grad_norm": 4.7419638393344545, "learning_rate": 5e-06, "loss": 0.1136, "num_input_tokens_seen": 396943768, "step": 2311 }, { "epoch": 0.6078779509436444, "loss": 0.12838855385780334, "loss_ce": 0.002534065628424287, "loss_iou": 0.55078125, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 396943768, "step": 2311 }, { "epoch": 0.6081409877030315, "grad_norm": 5.73379136559562, "learning_rate": 5e-06, "loss": 0.1965, "num_input_tokens_seen": 397116008, "step": 2312 }, { "epoch": 0.6081409877030315, "loss": 0.17652729153633118, "loss_ce": 0.0020277751609683037, "loss_iou": NaN, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 397116008, "step": 2312 }, { "epoch": 0.6084040244624186, "grad_norm": 4.062322434152511, "learning_rate": 5e-06, "loss": 0.0845, "num_input_tokens_seen": 397288120, "step": 2313 }, { "epoch": 0.6084040244624186, "loss": 0.09052719175815582, "loss_ce": 0.0007139619556255639, "loss_iou": 0.470703125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 397288120, "step": 2313 }, { "epoch": 0.6086670612218057, "grad_norm": 16.02293214120212, "learning_rate": 5e-06, "loss": 0.1668, "num_input_tokens_seen": 397460304, "step": 2314 }, { "epoch": 0.6086670612218057, "loss": 0.13694977760314941, "loss_ce": 0.0005667208461090922, "loss_iou": 0.69921875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 397460304, "step": 2314 }, { "epoch": 0.6089300979811929, "grad_norm": 7.987103586052065, "learning_rate": 5e-06, "loss": 0.0996, "num_input_tokens_seen": 397632608, "step": 2315 }, { "epoch": 0.6089300979811929, "loss": 0.06060004234313965, "loss_ce": 0.002006293274462223, "loss_iou": 0.5625, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 397632608, "step": 2315 }, { "epoch": 0.60919313474058, "grad_norm": 7.316466982553544, "learning_rate": 5e-06, "loss": 0.1343, "num_input_tokens_seen": 397804820, "step": 2316 }, { "epoch": 0.60919313474058, "loss": 0.11626386642456055, "loss_ce": 0.004111772403120995, "loss_iou": 0.400390625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 397804820, "step": 2316 }, { "epoch": 0.6094561714999671, "grad_norm": 5.539376425734617, "learning_rate": 5e-06, "loss": 0.1638, "num_input_tokens_seen": 397976940, "step": 2317 }, { "epoch": 0.6094561714999671, "loss": 0.16956084966659546, "loss_ce": 0.004979567602276802, "loss_iou": 0.6171875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 397976940, "step": 2317 }, { "epoch": 0.6097192082593542, "grad_norm": 24.75685247660265, "learning_rate": 5e-06, "loss": 0.1182, "num_input_tokens_seen": 398147040, "step": 2318 }, { "epoch": 0.6097192082593542, "loss": 0.18709902465343475, "loss_ce": 0.0012164636282250285, "loss_iou": NaN, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 398147040, "step": 2318 }, { "epoch": 0.6099822450187413, "grad_norm": 4.315961063244564, "learning_rate": 5e-06, "loss": 0.1385, "num_input_tokens_seen": 398318996, "step": 2319 }, { "epoch": 0.6099822450187413, "loss": 0.18130794167518616, "loss_ce": 0.0009795635705813766, "loss_iou": 0.5234375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 398318996, "step": 2319 }, { "epoch": 0.6102452817781285, "grad_norm": 4.640931709162671, "learning_rate": 5e-06, "loss": 0.0931, "num_input_tokens_seen": 398489328, "step": 2320 }, { "epoch": 0.6102452817781285, "loss": 0.09983004629611969, "loss_ce": 0.0005563638987950981, "loss_iou": 0.546875, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 398489328, "step": 2320 }, { "epoch": 0.6105083185375156, "grad_norm": 23.53542790698555, "learning_rate": 5e-06, "loss": 0.1122, "num_input_tokens_seen": 398661592, "step": 2321 }, { "epoch": 0.6105083185375156, "loss": 0.14297714829444885, "loss_ce": 0.0037559503689408302, "loss_iou": 0.59765625, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 398661592, "step": 2321 }, { "epoch": 0.6107713552969027, "grad_norm": 4.191481208630257, "learning_rate": 5e-06, "loss": 0.1143, "num_input_tokens_seen": 398833940, "step": 2322 }, { "epoch": 0.6107713552969027, "loss": 0.1795070767402649, "loss_ce": 0.003298588562756777, "loss_iou": 0.51171875, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 398833940, "step": 2322 }, { "epoch": 0.6110343920562898, "grad_norm": 4.031146460659383, "learning_rate": 5e-06, "loss": 0.1102, "num_input_tokens_seen": 399006244, "step": 2323 }, { "epoch": 0.6110343920562898, "loss": 0.0546613447368145, "loss_ce": 0.0011029954766854644, "loss_iou": 0.45703125, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 399006244, "step": 2323 }, { "epoch": 0.611297428815677, "grad_norm": 4.121680425084659, "learning_rate": 5e-06, "loss": 0.1074, "num_input_tokens_seen": 399178456, "step": 2324 }, { "epoch": 0.611297428815677, "loss": 0.16250503063201904, "loss_ce": 0.002928605070337653, "loss_iou": 0.61328125, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 399178456, "step": 2324 }, { "epoch": 0.6115604655750642, "grad_norm": 4.598379596612616, "learning_rate": 5e-06, "loss": 0.0851, "num_input_tokens_seen": 399350964, "step": 2325 }, { "epoch": 0.6115604655750642, "loss": 0.06914816796779633, "loss_ce": 0.002833473263308406, "loss_iou": 0.57421875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 399350964, "step": 2325 }, { "epoch": 0.6118235023344513, "grad_norm": 7.270975374787245, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 399523228, "step": 2326 }, { "epoch": 0.6118235023344513, "loss": 0.08900976926088333, "loss_ce": 0.0011801763903349638, "loss_iou": 0.7421875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 399523228, "step": 2326 }, { "epoch": 0.6120865390938384, "grad_norm": 4.064818082785728, "learning_rate": 5e-06, "loss": 0.1503, "num_input_tokens_seen": 399691856, "step": 2327 }, { "epoch": 0.6120865390938384, "loss": 0.10691290348768234, "loss_ce": 0.00043707285658456385, "loss_iou": 0.6328125, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 399691856, "step": 2327 }, { "epoch": 0.6123495758532255, "grad_norm": 7.343987420034391, "learning_rate": 5e-06, "loss": 0.1249, "num_input_tokens_seen": 399864108, "step": 2328 }, { "epoch": 0.6123495758532255, "loss": 0.09687530994415283, "loss_ce": 0.001996160950511694, "loss_iou": 0.58984375, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 399864108, "step": 2328 }, { "epoch": 0.6126126126126126, "grad_norm": 5.275060955551622, "learning_rate": 5e-06, "loss": 0.0841, "num_input_tokens_seen": 400036500, "step": 2329 }, { "epoch": 0.6126126126126126, "loss": 0.08277605473995209, "loss_ce": 0.004162768833339214, "loss_iou": 0.5078125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 400036500, "step": 2329 }, { "epoch": 0.6128756493719998, "grad_norm": 4.648207674990067, "learning_rate": 5e-06, "loss": 0.1259, "num_input_tokens_seen": 400206892, "step": 2330 }, { "epoch": 0.6128756493719998, "loss": 0.19954022765159607, "loss_ce": 0.0007487052353098989, "loss_iou": 0.447265625, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 400206892, "step": 2330 }, { "epoch": 0.6131386861313869, "grad_norm": 6.976991385409509, "learning_rate": 5e-06, "loss": 0.1152, "num_input_tokens_seen": 400377220, "step": 2331 }, { "epoch": 0.6131386861313869, "loss": 0.10119281709194183, "loss_ce": 0.0006984363426454365, "loss_iou": 0.5234375, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 400377220, "step": 2331 }, { "epoch": 0.613401722890774, "grad_norm": 12.605542023426503, "learning_rate": 5e-06, "loss": 0.1478, "num_input_tokens_seen": 400549468, "step": 2332 }, { "epoch": 0.613401722890774, "loss": 0.20965467393398285, "loss_ce": 0.009276244789361954, "loss_iou": 0.62109375, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 400549468, "step": 2332 }, { "epoch": 0.6136647596501611, "grad_norm": 4.707098406894473, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 400721520, "step": 2333 }, { "epoch": 0.6136647596501611, "loss": 0.11055370420217514, "loss_ce": 0.0006599072366952896, "loss_iou": 0.4140625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 400721520, "step": 2333 }, { "epoch": 0.6139277964095482, "grad_norm": 14.198987666898462, "learning_rate": 5e-06, "loss": 0.1166, "num_input_tokens_seen": 400893600, "step": 2334 }, { "epoch": 0.6139277964095482, "loss": 0.11723913997411728, "loss_ce": 0.0025235607754439116, "loss_iou": NaN, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 400893600, "step": 2334 }, { "epoch": 0.6141908331689354, "grad_norm": 4.053613863943027, "learning_rate": 5e-06, "loss": 0.1082, "num_input_tokens_seen": 401065668, "step": 2335 }, { "epoch": 0.6141908331689354, "loss": 0.07270236313343048, "loss_ce": 0.0009250181610696018, "loss_iou": 0.51953125, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 401065668, "step": 2335 }, { "epoch": 0.6144538699283225, "grad_norm": 5.7996514772347005, "learning_rate": 5e-06, "loss": 0.0778, "num_input_tokens_seen": 401237708, "step": 2336 }, { "epoch": 0.6144538699283225, "loss": 0.07430876046419144, "loss_ce": 0.0035384970251470804, "loss_iou": 0.58984375, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 401237708, "step": 2336 }, { "epoch": 0.6147169066877096, "grad_norm": 4.799102165669682, "learning_rate": 5e-06, "loss": 0.1089, "num_input_tokens_seen": 401410012, "step": 2337 }, { "epoch": 0.6147169066877096, "loss": 0.0582679845392704, "loss_ce": 0.0014747708337381482, "loss_iou": 0.61328125, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 401410012, "step": 2337 }, { "epoch": 0.6149799434470967, "grad_norm": 4.382135516303559, "learning_rate": 5e-06, "loss": 0.1124, "num_input_tokens_seen": 401582352, "step": 2338 }, { "epoch": 0.6149799434470967, "loss": 0.09339425712823868, "loss_ce": 0.00275705405510962, "loss_iou": 0.63671875, "loss_num": 0.01806640625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 401582352, "step": 2338 }, { "epoch": 0.6152429802064838, "grad_norm": 4.340567932001983, "learning_rate": 5e-06, "loss": 0.1484, "num_input_tokens_seen": 401754308, "step": 2339 }, { "epoch": 0.6152429802064838, "loss": 0.14638805389404297, "loss_ce": 0.0007886901148594916, "loss_iou": 0.53125, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 401754308, "step": 2339 }, { "epoch": 0.6155060169658709, "grad_norm": 6.569770077552981, "learning_rate": 5e-06, "loss": 0.1542, "num_input_tokens_seen": 401926216, "step": 2340 }, { "epoch": 0.6155060169658709, "loss": 0.12163828313350677, "loss_ce": 0.004069316200911999, "loss_iou": 0.50390625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 401926216, "step": 2340 }, { "epoch": 0.6157690537252581, "grad_norm": 4.067934681272699, "learning_rate": 5e-06, "loss": 0.096, "num_input_tokens_seen": 402098264, "step": 2341 }, { "epoch": 0.6157690537252581, "loss": 0.0871957540512085, "loss_ce": 0.0004342720494605601, "loss_iou": 0.52734375, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 402098264, "step": 2341 }, { "epoch": 0.6160320904846452, "grad_norm": 4.130435258439036, "learning_rate": 5e-06, "loss": 0.1384, "num_input_tokens_seen": 402270372, "step": 2342 }, { "epoch": 0.6160320904846452, "loss": 0.046382177621126175, "loss_ce": 7.175222708610818e-05, "loss_iou": 0.400390625, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 402270372, "step": 2342 }, { "epoch": 0.6162951272440323, "grad_norm": 5.637480244426784, "learning_rate": 5e-06, "loss": 0.1136, "num_input_tokens_seen": 402440632, "step": 2343 }, { "epoch": 0.6162951272440323, "loss": 0.07675184309482574, "loss_ce": 0.0019837813451886177, "loss_iou": 0.361328125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 402440632, "step": 2343 }, { "epoch": 0.6165581640034195, "grad_norm": 8.043736182852212, "learning_rate": 5e-06, "loss": 0.0906, "num_input_tokens_seen": 402612920, "step": 2344 }, { "epoch": 0.6165581640034195, "loss": 0.09050323814153671, "loss_ce": 0.00018646713579073548, "loss_iou": 0.6328125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 402612920, "step": 2344 }, { "epoch": 0.6168212007628066, "grad_norm": 6.221194498282131, "learning_rate": 5e-06, "loss": 0.1581, "num_input_tokens_seen": 402785192, "step": 2345 }, { "epoch": 0.6168212007628066, "loss": 0.14238294959068298, "loss_ce": 0.0007356047863140702, "loss_iou": 0.34765625, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 402785192, "step": 2345 }, { "epoch": 0.6170842375221938, "grad_norm": 4.044951640988235, "learning_rate": 5e-06, "loss": 0.0799, "num_input_tokens_seen": 402955576, "step": 2346 }, { "epoch": 0.6170842375221938, "loss": 0.0908626914024353, "loss_ce": 0.0020260235760360956, "loss_iou": 0.5, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 402955576, "step": 2346 }, { "epoch": 0.6173472742815809, "grad_norm": 6.79401496913443, "learning_rate": 5e-06, "loss": 0.1167, "num_input_tokens_seen": 403125596, "step": 2347 }, { "epoch": 0.6173472742815809, "loss": 0.19389519095420837, "loss_ce": 0.0025499900802969933, "loss_iou": 0.52734375, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 403125596, "step": 2347 }, { "epoch": 0.617610311040968, "grad_norm": 4.6407931564353255, "learning_rate": 5e-06, "loss": 0.1101, "num_input_tokens_seen": 403295848, "step": 2348 }, { "epoch": 0.617610311040968, "loss": 0.12875889241695404, "loss_ce": 0.0007681695278733969, "loss_iou": 0.671875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 403295848, "step": 2348 }, { "epoch": 0.6178733478003551, "grad_norm": 15.448434585562659, "learning_rate": 5e-06, "loss": 0.1008, "num_input_tokens_seen": 403468128, "step": 2349 }, { "epoch": 0.6178733478003551, "loss": 0.12984147667884827, "loss_ce": 0.00026383629301562905, "loss_iou": 0.578125, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 403468128, "step": 2349 }, { "epoch": 0.6181363845597422, "grad_norm": 6.470906735340733, "learning_rate": 5e-06, "loss": 0.095, "num_input_tokens_seen": 403640328, "step": 2350 }, { "epoch": 0.6181363845597422, "loss": 0.08621760457754135, "loss_ce": 0.002935132011771202, "loss_iou": 0.47265625, "loss_num": 0.0166015625, "loss_xval": 0.08349609375, "num_input_tokens_seen": 403640328, "step": 2350 }, { "epoch": 0.6183994213191294, "grad_norm": 3.904089267434168, "learning_rate": 5e-06, "loss": 0.1178, "num_input_tokens_seen": 403812552, "step": 2351 }, { "epoch": 0.6183994213191294, "loss": 0.09386193752288818, "loss_ce": 0.00026452430756762624, "loss_iou": 0.388671875, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 403812552, "step": 2351 }, { "epoch": 0.6186624580785165, "grad_norm": 6.850647458248975, "learning_rate": 5e-06, "loss": 0.1608, "num_input_tokens_seen": 403984536, "step": 2352 }, { "epoch": 0.6186624580785165, "loss": 0.10526977479457855, "loss_ce": 0.001601562718860805, "loss_iou": 0.50390625, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 403984536, "step": 2352 }, { "epoch": 0.6189254948379036, "grad_norm": 6.380076499827068, "learning_rate": 5e-06, "loss": 0.1397, "num_input_tokens_seen": 404156840, "step": 2353 }, { "epoch": 0.6189254948379036, "loss": 0.10814794898033142, "loss_ce": 0.00020728506206069142, "loss_iou": 0.5, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 404156840, "step": 2353 }, { "epoch": 0.6191885315972907, "grad_norm": 16.46365513417012, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 404328912, "step": 2354 }, { "epoch": 0.6191885315972907, "loss": 0.17106536030769348, "loss_ce": 0.00019742565928027034, "loss_iou": 0.48828125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 404328912, "step": 2354 }, { "epoch": 0.6194515683566778, "grad_norm": 12.873967018770987, "learning_rate": 5e-06, "loss": 0.123, "num_input_tokens_seen": 404501048, "step": 2355 }, { "epoch": 0.6194515683566778, "loss": 0.1692376732826233, "loss_ce": 0.0014825284015387297, "loss_iou": 0.443359375, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 404501048, "step": 2355 }, { "epoch": 0.619714605116065, "grad_norm": 3.2257038395599706, "learning_rate": 5e-06, "loss": 0.1655, "num_input_tokens_seen": 404673104, "step": 2356 }, { "epoch": 0.619714605116065, "loss": 0.23891915380954742, "loss_ce": 0.007901079021394253, "loss_iou": 0.50390625, "loss_num": 0.046142578125, "loss_xval": 0.2314453125, "num_input_tokens_seen": 404673104, "step": 2356 }, { "epoch": 0.6199776418754521, "grad_norm": 7.80566516619346, "learning_rate": 5e-06, "loss": 0.1371, "num_input_tokens_seen": 404845264, "step": 2357 }, { "epoch": 0.6199776418754521, "loss": 0.17429864406585693, "loss_ce": 0.005078674294054508, "loss_iou": 0.4375, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 404845264, "step": 2357 }, { "epoch": 0.6202406786348392, "grad_norm": 4.957580646805035, "learning_rate": 5e-06, "loss": 0.0895, "num_input_tokens_seen": 405017460, "step": 2358 }, { "epoch": 0.6202406786348392, "loss": 0.05168257653713226, "loss_ce": 0.003480062121525407, "loss_iou": 0.55078125, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 405017460, "step": 2358 }, { "epoch": 0.6205037153942263, "grad_norm": 4.916881514910221, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 405189524, "step": 2359 }, { "epoch": 0.6205037153942263, "loss": 0.06677095592021942, "loss_ce": 0.001280229538679123, "loss_iou": 0.51171875, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 405189524, "step": 2359 }, { "epoch": 0.6207667521536134, "grad_norm": 3.691480970561855, "learning_rate": 5e-06, "loss": 0.1426, "num_input_tokens_seen": 405362028, "step": 2360 }, { "epoch": 0.6207667521536134, "loss": 0.09511809051036835, "loss_ce": 5.582500307355076e-05, "loss_iou": 0.49609375, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 405362028, "step": 2360 }, { "epoch": 0.6210297889130006, "grad_norm": 23.640084314089293, "learning_rate": 5e-06, "loss": 0.1421, "num_input_tokens_seen": 405534164, "step": 2361 }, { "epoch": 0.6210297889130006, "loss": 0.1902218908071518, "loss_ce": 0.00021944480249658227, "loss_iou": 0.4140625, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 405534164, "step": 2361 }, { "epoch": 0.6212928256723877, "grad_norm": 3.82381362472469, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 405706288, "step": 2362 }, { "epoch": 0.6212928256723877, "loss": 0.07140006124973297, "loss_ce": 0.0007060917560011148, "loss_iou": 0.494140625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 405706288, "step": 2362 }, { "epoch": 0.6215558624317749, "grad_norm": 3.7402655746033293, "learning_rate": 5e-06, "loss": 0.1074, "num_input_tokens_seen": 405878688, "step": 2363 }, { "epoch": 0.6215558624317749, "loss": 0.09456443786621094, "loss_ce": 0.0030269669368863106, "loss_iou": 0.390625, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 405878688, "step": 2363 }, { "epoch": 0.621818899191162, "grad_norm": 5.548477550947741, "learning_rate": 5e-06, "loss": 0.1307, "num_input_tokens_seen": 406050532, "step": 2364 }, { "epoch": 0.621818899191162, "loss": 0.12644195556640625, "loss_ce": 0.005317692644894123, "loss_iou": 0.51171875, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 406050532, "step": 2364 }, { "epoch": 0.6220819359505491, "grad_norm": 8.39488287912259, "learning_rate": 5e-06, "loss": 0.1011, "num_input_tokens_seen": 406220760, "step": 2365 }, { "epoch": 0.6220819359505491, "loss": 0.13592961430549622, "loss_ce": 0.0007520002545788884, "loss_iou": 0.3203125, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 406220760, "step": 2365 }, { "epoch": 0.6223449727099362, "grad_norm": 4.84850760753409, "learning_rate": 5e-06, "loss": 0.0996, "num_input_tokens_seen": 406393068, "step": 2366 }, { "epoch": 0.6223449727099362, "loss": 0.09754068404436111, "loss_ce": 0.0005253083654679358, "loss_iou": 0.4296875, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 406393068, "step": 2366 }, { "epoch": 0.6226080094693234, "grad_norm": 4.613478147053686, "learning_rate": 5e-06, "loss": 0.1095, "num_input_tokens_seen": 406565304, "step": 2367 }, { "epoch": 0.6226080094693234, "loss": 0.1131492406129837, "loss_ce": 0.002370435046032071, "loss_iou": 0.625, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 406565304, "step": 2367 }, { "epoch": 0.6228710462287105, "grad_norm": 4.098258220864737, "learning_rate": 5e-06, "loss": 0.1377, "num_input_tokens_seen": 406737308, "step": 2368 }, { "epoch": 0.6228710462287105, "loss": 0.09665839374065399, "loss_ce": 0.0019318348495289683, "loss_iou": 0.498046875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 406737308, "step": 2368 }, { "epoch": 0.6231340829880976, "grad_norm": 10.800256206470614, "learning_rate": 5e-06, "loss": 0.1047, "num_input_tokens_seen": 406909296, "step": 2369 }, { "epoch": 0.6231340829880976, "loss": 0.08466358482837677, "loss_ce": 0.0036088963970541954, "loss_iou": 0.546875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 406909296, "step": 2369 }, { "epoch": 0.6233971197474847, "grad_norm": 4.097548305836531, "learning_rate": 5e-06, "loss": 0.133, "num_input_tokens_seen": 407078532, "step": 2370 }, { "epoch": 0.6233971197474847, "loss": 0.14105001091957092, "loss_ce": 0.0007301777368411422, "loss_iou": 0.53515625, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 407078532, "step": 2370 }, { "epoch": 0.6236601565068718, "grad_norm": 4.885935118310897, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 407250520, "step": 2371 }, { "epoch": 0.6236601565068718, "loss": 0.185832679271698, "loss_ce": 0.001598058152012527, "loss_iou": 0.365234375, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 407250520, "step": 2371 }, { "epoch": 0.623923193266259, "grad_norm": 4.243940025684864, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 407420132, "step": 2372 }, { "epoch": 0.623923193266259, "loss": 0.17022864520549774, "loss_ce": 0.0003678113571368158, "loss_iou": 0.60546875, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 407420132, "step": 2372 }, { "epoch": 0.6241862300256461, "grad_norm": 6.84932788040415, "learning_rate": 5e-06, "loss": 0.0833, "num_input_tokens_seen": 407592472, "step": 2373 }, { "epoch": 0.6241862300256461, "loss": 0.07611523568630219, "loss_ce": 0.0008588911150582135, "loss_iou": 0.50390625, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 407592472, "step": 2373 }, { "epoch": 0.6244492667850332, "grad_norm": 5.11826467429585, "learning_rate": 5e-06, "loss": 0.1594, "num_input_tokens_seen": 407764812, "step": 2374 }, { "epoch": 0.6244492667850332, "loss": 0.22299307584762573, "loss_ce": 0.001649091369472444, "loss_iou": 0.5078125, "loss_num": 0.04443359375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 407764812, "step": 2374 }, { "epoch": 0.6247123035444203, "grad_norm": 8.470257803757693, "learning_rate": 5e-06, "loss": 0.1313, "num_input_tokens_seen": 407937232, "step": 2375 }, { "epoch": 0.6247123035444203, "loss": 0.12985503673553467, "loss_ce": 0.0015591441188007593, "loss_iou": 0.5625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 407937232, "step": 2375 }, { "epoch": 0.6249753403038074, "grad_norm": 6.1769339682539135, "learning_rate": 5e-06, "loss": 0.1287, "num_input_tokens_seen": 408109660, "step": 2376 }, { "epoch": 0.6249753403038074, "loss": 0.1395704746246338, "loss_ce": 0.0009596287272870541, "loss_iou": 0.4140625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 408109660, "step": 2376 }, { "epoch": 0.6252383770631946, "grad_norm": 16.39235046187932, "learning_rate": 5e-06, "loss": 0.1324, "num_input_tokens_seen": 408281820, "step": 2377 }, { "epoch": 0.6252383770631946, "loss": 0.12146437168121338, "loss_ce": 0.0002790701691992581, "loss_iou": 0.51953125, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 408281820, "step": 2377 }, { "epoch": 0.6255014138225817, "grad_norm": 7.613001701595713, "learning_rate": 5e-06, "loss": 0.1088, "num_input_tokens_seen": 408453984, "step": 2378 }, { "epoch": 0.6255014138225817, "loss": 0.120358906686306, "loss_ce": 0.002347429981455207, "loss_iou": 0.51171875, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 408453984, "step": 2378 }, { "epoch": 0.6257644505819688, "grad_norm": 4.614954338110094, "learning_rate": 5e-06, "loss": 0.1215, "num_input_tokens_seen": 408625956, "step": 2379 }, { "epoch": 0.6257644505819688, "loss": 0.12903685867786407, "loss_ce": 0.003334960900247097, "loss_iou": 0.640625, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 408625956, "step": 2379 }, { "epoch": 0.6260274873413559, "grad_norm": 4.387875886048538, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 408797808, "step": 2380 }, { "epoch": 0.6260274873413559, "loss": 0.1543307602405548, "loss_ce": 0.0021701250225305557, "loss_iou": 0.50390625, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 408797808, "step": 2380 }, { "epoch": 0.626290524100743, "grad_norm": 5.128859786695588, "learning_rate": 5e-06, "loss": 0.1393, "num_input_tokens_seen": 408969768, "step": 2381 }, { "epoch": 0.626290524100743, "loss": 0.1084834560751915, "loss_ce": 0.0040522972121834755, "loss_iou": 0.54296875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 408969768, "step": 2381 }, { "epoch": 0.6265535608601303, "grad_norm": 9.647359334804406, "learning_rate": 5e-06, "loss": 0.1561, "num_input_tokens_seen": 409141828, "step": 2382 }, { "epoch": 0.6265535608601303, "loss": 0.253137469291687, "loss_ce": 0.00036035641096532345, "loss_iou": 0.494140625, "loss_num": 0.050537109375, "loss_xval": 0.251953125, "num_input_tokens_seen": 409141828, "step": 2382 }, { "epoch": 0.6268165976195174, "grad_norm": 7.279560183961185, "learning_rate": 5e-06, "loss": 0.0772, "num_input_tokens_seen": 409313972, "step": 2383 }, { "epoch": 0.6268165976195174, "loss": 0.07071413099765778, "loss_ce": 0.00015749200247228146, "loss_iou": 0.62109375, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 409313972, "step": 2383 }, { "epoch": 0.6270796343789045, "grad_norm": 34.71304497396249, "learning_rate": 5e-06, "loss": 0.1461, "num_input_tokens_seen": 409482936, "step": 2384 }, { "epoch": 0.6270796343789045, "loss": 0.07908271253108978, "loss_ce": 0.0020258277654647827, "loss_iou": 0.37890625, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 409482936, "step": 2384 }, { "epoch": 0.6273426711382916, "grad_norm": 3.2691371809714322, "learning_rate": 5e-06, "loss": 0.0877, "num_input_tokens_seen": 409655000, "step": 2385 }, { "epoch": 0.6273426711382916, "loss": 0.06744687259197235, "loss_ce": 0.00012509411317296326, "loss_iou": 0.5546875, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 409655000, "step": 2385 }, { "epoch": 0.6276057078976787, "grad_norm": 3.653366346571365, "learning_rate": 5e-06, "loss": 0.1535, "num_input_tokens_seen": 409826932, "step": 2386 }, { "epoch": 0.6276057078976787, "loss": 0.11388548463582993, "loss_ce": 0.0006347582675516605, "loss_iou": 0.5390625, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 409826932, "step": 2386 }, { "epoch": 0.6278687446570659, "grad_norm": 4.394076994308062, "learning_rate": 5e-06, "loss": 0.126, "num_input_tokens_seen": 409997112, "step": 2387 }, { "epoch": 0.6278687446570659, "loss": 0.07691079378128052, "loss_ce": 0.0022647997830063105, "loss_iou": 0.625, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 409997112, "step": 2387 }, { "epoch": 0.628131781416453, "grad_norm": 5.885043334484188, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 410169412, "step": 2388 }, { "epoch": 0.628131781416453, "loss": 0.09735430032014847, "loss_ce": 0.0005830569425597787, "loss_iou": 0.6171875, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 410169412, "step": 2388 }, { "epoch": 0.6283948181758401, "grad_norm": 9.328795985316807, "learning_rate": 5e-06, "loss": 0.0794, "num_input_tokens_seen": 410341700, "step": 2389 }, { "epoch": 0.6283948181758401, "loss": 0.08082857728004456, "loss_ce": 0.0010098508791998029, "loss_iou": 0.5390625, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 410341700, "step": 2389 }, { "epoch": 0.6286578549352272, "grad_norm": 4.002799196322825, "learning_rate": 5e-06, "loss": 0.0811, "num_input_tokens_seen": 410513660, "step": 2390 }, { "epoch": 0.6286578549352272, "loss": 0.054968155920505524, "loss_ce": 0.000341689505148679, "loss_iou": 0.53515625, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 410513660, "step": 2390 }, { "epoch": 0.6289208916946143, "grad_norm": 6.9030334525022505, "learning_rate": 5e-06, "loss": 0.1013, "num_input_tokens_seen": 410685956, "step": 2391 }, { "epoch": 0.6289208916946143, "loss": 0.15183863043785095, "loss_ce": 0.0031264659482985735, "loss_iou": 0.53125, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 410685956, "step": 2391 }, { "epoch": 0.6291839284540014, "grad_norm": 5.941678610362804, "learning_rate": 5e-06, "loss": 0.0866, "num_input_tokens_seen": 410856348, "step": 2392 }, { "epoch": 0.6291839284540014, "loss": 0.1089685708284378, "loss_ce": 0.0001734074903652072, "loss_iou": 0.671875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 410856348, "step": 2392 }, { "epoch": 0.6294469652133886, "grad_norm": 7.460369582099088, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 411028268, "step": 2393 }, { "epoch": 0.6294469652133886, "loss": 0.17290328443050385, "loss_ce": 0.0006010266370140016, "loss_iou": 0.3671875, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 411028268, "step": 2393 }, { "epoch": 0.6297100019727757, "grad_norm": 4.673085403235698, "learning_rate": 5e-06, "loss": 0.1337, "num_input_tokens_seen": 411200296, "step": 2394 }, { "epoch": 0.6297100019727757, "loss": 0.11547736823558807, "loss_ce": 0.0025928488466888666, "loss_iou": 0.45703125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 411200296, "step": 2394 }, { "epoch": 0.6299730387321628, "grad_norm": 4.914810997482069, "learning_rate": 5e-06, "loss": 0.0925, "num_input_tokens_seen": 411372592, "step": 2395 }, { "epoch": 0.6299730387321628, "loss": 0.1332027018070221, "loss_ce": 0.0009089965024031699, "loss_iou": 0.4140625, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 411372592, "step": 2395 }, { "epoch": 0.6302360754915499, "grad_norm": 10.387773947226163, "learning_rate": 5e-06, "loss": 0.1214, "num_input_tokens_seen": 411544920, "step": 2396 }, { "epoch": 0.6302360754915499, "loss": 0.12052314728498459, "loss_ce": 0.0002533740480430424, "loss_iou": 0.5625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 411544920, "step": 2396 }, { "epoch": 0.630499112250937, "grad_norm": 8.18207434885096, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 411717416, "step": 2397 }, { "epoch": 0.630499112250937, "loss": 0.09294469654560089, "loss_ce": 0.0004459216434042901, "loss_iou": 0.66015625, "loss_num": 0.0185546875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 411717416, "step": 2397 }, { "epoch": 0.6307621490103242, "grad_norm": 9.784216604991045, "learning_rate": 5e-06, "loss": 0.1422, "num_input_tokens_seen": 411889656, "step": 2398 }, { "epoch": 0.6307621490103242, "loss": 0.06576241552829742, "loss_ce": 0.00011910500325029716, "loss_iou": 0.46484375, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 411889656, "step": 2398 }, { "epoch": 0.6310251857697113, "grad_norm": 9.394179875352377, "learning_rate": 5e-06, "loss": 0.1396, "num_input_tokens_seen": 412061964, "step": 2399 }, { "epoch": 0.6310251857697113, "loss": 0.2204100638628006, "loss_ce": 0.003613188164308667, "loss_iou": 0.37109375, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 412061964, "step": 2399 }, { "epoch": 0.6312882225290984, "grad_norm": 5.421055277942169, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 412233844, "step": 2400 }, { "epoch": 0.6312882225290984, "loss": 0.058117613196372986, "loss_ce": 0.00016473224968649447, "loss_iou": 0.390625, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 412233844, "step": 2400 }, { "epoch": 0.6315512592884855, "grad_norm": 32.78384103492781, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 412404404, "step": 2401 }, { "epoch": 0.6315512592884855, "loss": 0.11002543568611145, "loss_ce": 0.0008640556479804218, "loss_iou": NaN, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 412404404, "step": 2401 }, { "epoch": 0.6318142960478726, "grad_norm": 5.144111967585496, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 412576424, "step": 2402 }, { "epoch": 0.6318142960478726, "loss": 0.10595827549695969, "loss_ce": 0.0008252161205746233, "loss_iou": 0.5390625, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 412576424, "step": 2402 }, { "epoch": 0.6320773328072599, "grad_norm": 9.535369648896554, "learning_rate": 5e-06, "loss": 0.1344, "num_input_tokens_seen": 412748540, "step": 2403 }, { "epoch": 0.6320773328072599, "loss": 0.08199536800384521, "loss_ce": 0.0007880894117988646, "loss_iou": 0.52734375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 412748540, "step": 2403 }, { "epoch": 0.632340369566647, "grad_norm": 3.8619766281957557, "learning_rate": 5e-06, "loss": 0.0813, "num_input_tokens_seen": 412919044, "step": 2404 }, { "epoch": 0.632340369566647, "loss": 0.07876091450452805, "loss_ce": 0.0004375489370431751, "loss_iou": 0.3671875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 412919044, "step": 2404 }, { "epoch": 0.6326034063260341, "grad_norm": 4.690903318512814, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 413089276, "step": 2405 }, { "epoch": 0.6326034063260341, "loss": 0.12442415952682495, "loss_ce": 0.0037271445617079735, "loss_iou": 0.54296875, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 413089276, "step": 2405 }, { "epoch": 0.6328664430854212, "grad_norm": 5.509443001792319, "learning_rate": 5e-06, "loss": 0.1865, "num_input_tokens_seen": 413261512, "step": 2406 }, { "epoch": 0.6328664430854212, "loss": 0.2314736247062683, "loss_ce": 0.0039345622062683105, "loss_iou": 0.55078125, "loss_num": 0.04541015625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 413261512, "step": 2406 }, { "epoch": 0.6331294798448083, "grad_norm": 4.789366484519657, "learning_rate": 5e-06, "loss": 0.1568, "num_input_tokens_seen": 413433928, "step": 2407 }, { "epoch": 0.6331294798448083, "loss": 0.10793605446815491, "loss_ce": 0.002009541727602482, "loss_iou": 0.4609375, "loss_num": 0.0211181640625, "loss_xval": 0.10595703125, "num_input_tokens_seen": 413433928, "step": 2407 }, { "epoch": 0.6333925166041955, "grad_norm": 8.59758393749405, "learning_rate": 5e-06, "loss": 0.0976, "num_input_tokens_seen": 413606128, "step": 2408 }, { "epoch": 0.6333925166041955, "loss": 0.0926143079996109, "loss_ce": 0.001000535092316568, "loss_iou": 0.40234375, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 413606128, "step": 2408 }, { "epoch": 0.6336555533635826, "grad_norm": 9.998355206402028, "learning_rate": 5e-06, "loss": 0.1245, "num_input_tokens_seen": 413778176, "step": 2409 }, { "epoch": 0.6336555533635826, "loss": 0.12268656492233276, "loss_ce": 0.0002958154655061662, "loss_iou": 0.39453125, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 413778176, "step": 2409 }, { "epoch": 0.6339185901229697, "grad_norm": 4.79036194916934, "learning_rate": 5e-06, "loss": 0.1397, "num_input_tokens_seen": 413950292, "step": 2410 }, { "epoch": 0.6339185901229697, "loss": 0.22198227047920227, "loss_ce": 0.0022557121701538563, "loss_iou": 0.482421875, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 413950292, "step": 2410 }, { "epoch": 0.6341816268823568, "grad_norm": 21.148690076997823, "learning_rate": 5e-06, "loss": 0.1036, "num_input_tokens_seen": 414119124, "step": 2411 }, { "epoch": 0.6341816268823568, "loss": 0.08586390316486359, "loss_ce": 0.000368903303751722, "loss_iou": 0.494140625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 414119124, "step": 2411 }, { "epoch": 0.6344446636417439, "grad_norm": 7.240778669769743, "learning_rate": 5e-06, "loss": 0.1765, "num_input_tokens_seen": 414291560, "step": 2412 }, { "epoch": 0.6344446636417439, "loss": 0.24769088625907898, "loss_ce": 0.0022685134317725897, "loss_iou": 0.369140625, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 414291560, "step": 2412 }, { "epoch": 0.634707700401131, "grad_norm": 15.982142868420826, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 414463784, "step": 2413 }, { "epoch": 0.634707700401131, "loss": 0.22224080562591553, "loss_ce": 0.0012630214914679527, "loss_iou": 0.474609375, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 414463784, "step": 2413 }, { "epoch": 0.6349707371605182, "grad_norm": 5.563886751421209, "learning_rate": 5e-06, "loss": 0.0981, "num_input_tokens_seen": 414635968, "step": 2414 }, { "epoch": 0.6349707371605182, "loss": 0.12378741800785065, "loss_ce": 0.0024190132971853018, "loss_iou": 0.515625, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 414635968, "step": 2414 }, { "epoch": 0.6352337739199053, "grad_norm": 6.971489879379984, "learning_rate": 5e-06, "loss": 0.0811, "num_input_tokens_seen": 414808184, "step": 2415 }, { "epoch": 0.6352337739199053, "loss": 0.09163232147693634, "loss_ce": 0.00017114286310970783, "loss_iou": 0.59765625, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 414808184, "step": 2415 }, { "epoch": 0.6354968106792924, "grad_norm": 10.763166082287821, "learning_rate": 5e-06, "loss": 0.1798, "num_input_tokens_seen": 414978292, "step": 2416 }, { "epoch": 0.6354968106792924, "loss": 0.1381605565547943, "loss_ce": 0.00016007423982955515, "loss_iou": 0.50390625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 414978292, "step": 2416 }, { "epoch": 0.6357598474386795, "grad_norm": 9.836473738616426, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 415150344, "step": 2417 }, { "epoch": 0.6357598474386795, "loss": 0.1071944609284401, "loss_ce": 0.0013594944030046463, "loss_iou": 0.47265625, "loss_num": 0.0211181640625, "loss_xval": 0.10595703125, "num_input_tokens_seen": 415150344, "step": 2417 }, { "epoch": 0.6360228841980666, "grad_norm": 3.9009374145565308, "learning_rate": 5e-06, "loss": 0.1033, "num_input_tokens_seen": 415322328, "step": 2418 }, { "epoch": 0.6360228841980666, "loss": 0.08769555389881134, "loss_ce": 0.00026269443333148956, "loss_iou": 0.54296875, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 415322328, "step": 2418 }, { "epoch": 0.6362859209574538, "grad_norm": 12.748558785828688, "learning_rate": 5e-06, "loss": 0.1426, "num_input_tokens_seen": 415494296, "step": 2419 }, { "epoch": 0.6362859209574538, "loss": 0.09499529004096985, "loss_ce": 0.004266531206667423, "loss_iou": 0.458984375, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 415494296, "step": 2419 }, { "epoch": 0.636548957716841, "grad_norm": 19.009247758406477, "learning_rate": 5e-06, "loss": 0.1063, "num_input_tokens_seen": 415666184, "step": 2420 }, { "epoch": 0.636548957716841, "loss": 0.07810772955417633, "loss_ce": 0.0017832687590271235, "loss_iou": 0.54296875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 415666184, "step": 2420 }, { "epoch": 0.636811994476228, "grad_norm": 4.624128366794573, "learning_rate": 5e-06, "loss": 0.1594, "num_input_tokens_seen": 415838228, "step": 2421 }, { "epoch": 0.636811994476228, "loss": 0.11569841206073761, "loss_ce": 0.0010743860621005297, "loss_iou": 0.546875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 415838228, "step": 2421 }, { "epoch": 0.6370750312356152, "grad_norm": 4.489659891906354, "learning_rate": 5e-06, "loss": 0.0868, "num_input_tokens_seen": 416010568, "step": 2422 }, { "epoch": 0.6370750312356152, "loss": 0.09839779138565063, "loss_ce": 0.0043120919726789, "loss_iou": 0.578125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 416010568, "step": 2422 }, { "epoch": 0.6373380679950023, "grad_norm": 5.451345385819082, "learning_rate": 5e-06, "loss": 0.1056, "num_input_tokens_seen": 416182976, "step": 2423 }, { "epoch": 0.6373380679950023, "loss": 0.11042429506778717, "loss_ce": 0.005230204667896032, "loss_iou": 0.65234375, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 416182976, "step": 2423 }, { "epoch": 0.6376011047543895, "grad_norm": 4.648386667387077, "learning_rate": 5e-06, "loss": 0.08, "num_input_tokens_seen": 416354968, "step": 2424 }, { "epoch": 0.6376011047543895, "loss": 0.08212631195783615, "loss_ce": 0.002170257270336151, "loss_iou": 0.55859375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 416354968, "step": 2424 }, { "epoch": 0.6378641415137766, "grad_norm": 7.161469269147777, "learning_rate": 5e-06, "loss": 0.1544, "num_input_tokens_seen": 416527268, "step": 2425 }, { "epoch": 0.6378641415137766, "loss": 0.1374281942844391, "loss_ce": 0.0008009903831407428, "loss_iou": 0.73828125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 416527268, "step": 2425 }, { "epoch": 0.6381271782731637, "grad_norm": 7.707879726473491, "learning_rate": 5e-06, "loss": 0.1335, "num_input_tokens_seen": 416699532, "step": 2426 }, { "epoch": 0.6381271782731637, "loss": 0.06318493187427521, "loss_ce": 0.00016612766194157302, "loss_iou": 0.482421875, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 416699532, "step": 2426 }, { "epoch": 0.6383902150325508, "grad_norm": 4.53622350388229, "learning_rate": 5e-06, "loss": 0.085, "num_input_tokens_seen": 416871872, "step": 2427 }, { "epoch": 0.6383902150325508, "loss": 0.14566369354724884, "loss_ce": 0.0013460592599585652, "loss_iou": 0.640625, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 416871872, "step": 2427 }, { "epoch": 0.6386532517919379, "grad_norm": 5.815980535010829, "learning_rate": 5e-06, "loss": 0.1329, "num_input_tokens_seen": 417044020, "step": 2428 }, { "epoch": 0.6386532517919379, "loss": 0.12992171943187714, "loss_ce": 0.0009391760104335845, "loss_iou": 0.5078125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 417044020, "step": 2428 }, { "epoch": 0.6389162885513251, "grad_norm": 4.632913290565498, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 417215912, "step": 2429 }, { "epoch": 0.6389162885513251, "loss": 0.10175777971744537, "loss_ce": 0.0023772907443344593, "loss_iou": 0.53125, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 417215912, "step": 2429 }, { "epoch": 0.6391793253107122, "grad_norm": 3.31675640465626, "learning_rate": 5e-06, "loss": 0.1199, "num_input_tokens_seen": 417388272, "step": 2430 }, { "epoch": 0.6391793253107122, "loss": 0.13262677192687988, "loss_ce": 0.0031712136697024107, "loss_iou": 0.46875, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 417388272, "step": 2430 }, { "epoch": 0.6394423620700993, "grad_norm": 9.57220985801255, "learning_rate": 5e-06, "loss": 0.0935, "num_input_tokens_seen": 417560304, "step": 2431 }, { "epoch": 0.6394423620700993, "loss": 0.08074182271957397, "loss_ce": 0.00218957313336432, "loss_iou": 0.5078125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 417560304, "step": 2431 }, { "epoch": 0.6397053988294864, "grad_norm": 5.444532606716201, "learning_rate": 5e-06, "loss": 0.1614, "num_input_tokens_seen": 417732592, "step": 2432 }, { "epoch": 0.6397053988294864, "loss": 0.23035961389541626, "loss_ce": 0.0006538145244121552, "loss_iou": 0.43359375, "loss_num": 0.0458984375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 417732592, "step": 2432 }, { "epoch": 0.6399684355888735, "grad_norm": 10.22830520629982, "learning_rate": 5e-06, "loss": 0.1027, "num_input_tokens_seen": 417904600, "step": 2433 }, { "epoch": 0.6399684355888735, "loss": 0.08408404886722565, "loss_ce": 0.0004964005202054977, "loss_iou": 0.57421875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 417904600, "step": 2433 }, { "epoch": 0.6402314723482607, "grad_norm": 4.834822071481386, "learning_rate": 5e-06, "loss": 0.1102, "num_input_tokens_seen": 418076896, "step": 2434 }, { "epoch": 0.6402314723482607, "loss": 0.07859447598457336, "loss_ce": 0.0008967254543676972, "loss_iou": 0.46484375, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 418076896, "step": 2434 }, { "epoch": 0.6404945091076478, "grad_norm": 11.222393651192586, "learning_rate": 5e-06, "loss": 0.1709, "num_input_tokens_seen": 418249128, "step": 2435 }, { "epoch": 0.6404945091076478, "loss": 0.17654769122600555, "loss_ce": 0.0008885157876648009, "loss_iou": 0.59375, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 418249128, "step": 2435 }, { "epoch": 0.6407575458670349, "grad_norm": 4.617257720330074, "learning_rate": 5e-06, "loss": 0.1234, "num_input_tokens_seen": 418421340, "step": 2436 }, { "epoch": 0.6407575458670349, "loss": 0.15517690777778625, "loss_ce": 0.0020702139008790255, "loss_iou": 0.5546875, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 418421340, "step": 2436 }, { "epoch": 0.641020582626422, "grad_norm": 4.8286585158927, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 418591736, "step": 2437 }, { "epoch": 0.641020582626422, "loss": 0.17714877426624298, "loss_ce": 0.003656333312392235, "loss_iou": 0.48046875, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 418591736, "step": 2437 }, { "epoch": 0.6412836193858091, "grad_norm": 13.06198419822853, "learning_rate": 5e-06, "loss": 0.1017, "num_input_tokens_seen": 418763952, "step": 2438 }, { "epoch": 0.6412836193858091, "loss": 0.1256633698940277, "loss_ce": 0.0007243968429975212, "loss_iou": 0.5703125, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 418763952, "step": 2438 }, { "epoch": 0.6415466561451962, "grad_norm": 18.947469777145443, "learning_rate": 5e-06, "loss": 0.1404, "num_input_tokens_seen": 418936088, "step": 2439 }, { "epoch": 0.6415466561451962, "loss": 0.09127810597419739, "loss_ce": 0.003967312164604664, "loss_iou": 0.65234375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 418936088, "step": 2439 }, { "epoch": 0.6418096929045835, "grad_norm": 10.72734062355858, "learning_rate": 5e-06, "loss": 0.129, "num_input_tokens_seen": 419108496, "step": 2440 }, { "epoch": 0.6418096929045835, "loss": 0.16916052997112274, "loss_ce": 0.0024124737828969955, "loss_iou": 0.52734375, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 419108496, "step": 2440 }, { "epoch": 0.6420727296639706, "grad_norm": 9.781068157842057, "learning_rate": 5e-06, "loss": 0.1154, "num_input_tokens_seen": 419280412, "step": 2441 }, { "epoch": 0.6420727296639706, "loss": 0.06414149701595306, "loss_ce": 0.0008175191469490528, "loss_iou": 0.5390625, "loss_num": 0.01263427734375, "loss_xval": 0.0634765625, "num_input_tokens_seen": 419280412, "step": 2441 }, { "epoch": 0.6423357664233577, "grad_norm": 4.632619855595305, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 419452340, "step": 2442 }, { "epoch": 0.6423357664233577, "loss": 0.11532483249902725, "loss_ce": 0.0007618411909788847, "loss_iou": 0.373046875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 419452340, "step": 2442 }, { "epoch": 0.6425988031827448, "grad_norm": 6.28676819593018, "learning_rate": 5e-06, "loss": 0.119, "num_input_tokens_seen": 419624260, "step": 2443 }, { "epoch": 0.6425988031827448, "loss": 0.08432676643133163, "loss_ce": 0.0007086057448759675, "loss_iou": 0.51171875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 419624260, "step": 2443 }, { "epoch": 0.6428618399421319, "grad_norm": 4.106105061012571, "learning_rate": 5e-06, "loss": 0.1012, "num_input_tokens_seen": 419796396, "step": 2444 }, { "epoch": 0.6428618399421319, "loss": 0.08845219761133194, "loss_ce": 0.00016484873776789755, "loss_iou": 0.55078125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 419796396, "step": 2444 }, { "epoch": 0.6431248767015191, "grad_norm": 5.5585844707251555, "learning_rate": 5e-06, "loss": 0.0914, "num_input_tokens_seen": 419965620, "step": 2445 }, { "epoch": 0.6431248767015191, "loss": 0.08765023946762085, "loss_ce": 0.0027198141906410456, "loss_iou": 0.369140625, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 419965620, "step": 2445 }, { "epoch": 0.6433879134609062, "grad_norm": 6.941283237766657, "learning_rate": 5e-06, "loss": 0.1388, "num_input_tokens_seen": 420137700, "step": 2446 }, { "epoch": 0.6433879134609062, "loss": 0.15698650479316711, "loss_ce": 0.0023844558745622635, "loss_iou": 0.60546875, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 420137700, "step": 2446 }, { "epoch": 0.6436509502202933, "grad_norm": 4.724784845199971, "learning_rate": 5e-06, "loss": 0.1266, "num_input_tokens_seen": 420309964, "step": 2447 }, { "epoch": 0.6436509502202933, "loss": 0.05271516367793083, "loss_ce": 0.0011099397670477629, "loss_iou": 0.515625, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 420309964, "step": 2447 }, { "epoch": 0.6439139869796804, "grad_norm": 6.277321029907694, "learning_rate": 5e-06, "loss": 0.1226, "num_input_tokens_seen": 420482028, "step": 2448 }, { "epoch": 0.6439139869796804, "loss": 0.19532331824302673, "loss_ce": 0.0010789325460791588, "loss_iou": 0.51171875, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 420482028, "step": 2448 }, { "epoch": 0.6441770237390675, "grad_norm": 13.80293628596545, "learning_rate": 5e-06, "loss": 0.1117, "num_input_tokens_seen": 420654152, "step": 2449 }, { "epoch": 0.6441770237390675, "loss": 0.07553352415561676, "loss_ce": 0.00017035921337082982, "loss_iou": 0.443359375, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 420654152, "step": 2449 }, { "epoch": 0.6444400604984547, "grad_norm": 10.20337023035234, "learning_rate": 5e-06, "loss": 0.1029, "num_input_tokens_seen": 420823676, "step": 2450 }, { "epoch": 0.6444400604984547, "loss": 0.1215822771191597, "loss_ce": 0.0013124945107847452, "loss_iou": 0.5078125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 420823676, "step": 2450 }, { "epoch": 0.6447030972578418, "grad_norm": 3.8758961183069034, "learning_rate": 5e-06, "loss": 0.0947, "num_input_tokens_seen": 420991548, "step": 2451 }, { "epoch": 0.6447030972578418, "loss": 0.04900962486863136, "loss_ce": 0.0008071088814176619, "loss_iou": 0.61328125, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 420991548, "step": 2451 }, { "epoch": 0.6449661340172289, "grad_norm": 17.020894064566225, "learning_rate": 5e-06, "loss": 0.1099, "num_input_tokens_seen": 421163940, "step": 2452 }, { "epoch": 0.6449661340172289, "loss": 0.11804142594337463, "loss_ce": 6.047027636668645e-05, "loss_iou": 0.455078125, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 421163940, "step": 2452 }, { "epoch": 0.645229170776616, "grad_norm": 9.681794920323762, "learning_rate": 5e-06, "loss": 0.0851, "num_input_tokens_seen": 421333576, "step": 2453 }, { "epoch": 0.645229170776616, "loss": 0.1210954412817955, "loss_ce": 0.0014970521442592144, "loss_iou": 0.52734375, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 421333576, "step": 2453 }, { "epoch": 0.6454922075360031, "grad_norm": 4.642104923921384, "learning_rate": 5e-06, "loss": 0.0956, "num_input_tokens_seen": 421505944, "step": 2454 }, { "epoch": 0.6454922075360031, "loss": 0.1038353219628334, "loss_ce": 0.0009910848457366228, "loss_iou": 0.53515625, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 421505944, "step": 2454 }, { "epoch": 0.6457552442953903, "grad_norm": 9.523149673908406, "learning_rate": 5e-06, "loss": 0.1258, "num_input_tokens_seen": 421677944, "step": 2455 }, { "epoch": 0.6457552442953903, "loss": 0.15976807475090027, "loss_ce": 0.0006189080304466188, "loss_iou": 0.419921875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 421677944, "step": 2455 }, { "epoch": 0.6460182810547774, "grad_norm": 5.022843580524971, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 421850200, "step": 2456 }, { "epoch": 0.6460182810547774, "loss": 0.11521396040916443, "loss_ce": 0.0040994551964104176, "loss_iou": 0.5390625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 421850200, "step": 2456 }, { "epoch": 0.6462813178141645, "grad_norm": 18.53576034320469, "learning_rate": 5e-06, "loss": 0.1467, "num_input_tokens_seen": 422020768, "step": 2457 }, { "epoch": 0.6462813178141645, "loss": 0.145416259765625, "loss_ce": 0.002319341991096735, "loss_iou": 0.60546875, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 422020768, "step": 2457 }, { "epoch": 0.6465443545735516, "grad_norm": 3.5647933372156486, "learning_rate": 5e-06, "loss": 0.0981, "num_input_tokens_seen": 422188996, "step": 2458 }, { "epoch": 0.6465443545735516, "loss": 0.09572234004735947, "loss_ce": 0.002552174963057041, "loss_iou": 0.435546875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 422188996, "step": 2458 }, { "epoch": 0.6468073913329387, "grad_norm": 7.264219107970771, "learning_rate": 5e-06, "loss": 0.1532, "num_input_tokens_seen": 422361088, "step": 2459 }, { "epoch": 0.6468073913329387, "loss": 0.25412267446517944, "loss_ce": 0.002886736299842596, "loss_iou": 0.56640625, "loss_num": 0.050048828125, "loss_xval": 0.251953125, "num_input_tokens_seen": 422361088, "step": 2459 }, { "epoch": 0.647070428092326, "grad_norm": 4.6344065191429635, "learning_rate": 5e-06, "loss": 0.1305, "num_input_tokens_seen": 422533008, "step": 2460 }, { "epoch": 0.647070428092326, "loss": 0.17847508192062378, "loss_ce": 0.00795811414718628, "loss_iou": 0.482421875, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 422533008, "step": 2460 }, { "epoch": 0.6473334648517131, "grad_norm": 8.368380580785693, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 422705020, "step": 2461 }, { "epoch": 0.6473334648517131, "loss": 0.11911526322364807, "loss_ce": 0.0007375775021500885, "loss_iou": 0.65234375, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 422705020, "step": 2461 }, { "epoch": 0.6475965016111002, "grad_norm": 14.530786026480682, "learning_rate": 5e-06, "loss": 0.1327, "num_input_tokens_seen": 422876816, "step": 2462 }, { "epoch": 0.6475965016111002, "loss": 0.1273796260356903, "loss_ce": 0.0006401161663234234, "loss_iou": 0.5390625, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 422876816, "step": 2462 }, { "epoch": 0.6478595383704873, "grad_norm": 5.386199695848856, "learning_rate": 5e-06, "loss": 0.1396, "num_input_tokens_seen": 423048624, "step": 2463 }, { "epoch": 0.6478595383704873, "loss": 0.07592228055000305, "loss_ce": 0.00014713153359480202, "loss_iou": 0.44921875, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 423048624, "step": 2463 }, { "epoch": 0.6481225751298744, "grad_norm": 4.520647486495735, "learning_rate": 5e-06, "loss": 0.0844, "num_input_tokens_seen": 423220672, "step": 2464 }, { "epoch": 0.6481225751298744, "loss": 0.0708494782447815, "loss_ce": 0.0003843960876110941, "loss_iou": 0.6015625, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 423220672, "step": 2464 }, { "epoch": 0.6483856118892615, "grad_norm": 6.550669243795055, "learning_rate": 5e-06, "loss": 0.0995, "num_input_tokens_seen": 423392552, "step": 2465 }, { "epoch": 0.6483856118892615, "loss": 0.07499829679727554, "loss_ce": 0.0005048871971666813, "loss_iou": 0.6953125, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 423392552, "step": 2465 }, { "epoch": 0.6486486486486487, "grad_norm": 19.46883155452454, "learning_rate": 5e-06, "loss": 0.1021, "num_input_tokens_seen": 423564752, "step": 2466 }, { "epoch": 0.6486486486486487, "loss": 0.20438973605632782, "loss_ce": 0.000684903294313699, "loss_iou": 0.67578125, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 423564752, "step": 2466 }, { "epoch": 0.6489116854080358, "grad_norm": 5.889523777651774, "learning_rate": 5e-06, "loss": 0.1212, "num_input_tokens_seen": 423737504, "step": 2467 }, { "epoch": 0.6489116854080358, "loss": 0.1293552964925766, "loss_ce": 0.002432688605040312, "loss_iou": 0.609375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 423737504, "step": 2467 }, { "epoch": 0.6491747221674229, "grad_norm": 7.016832670115666, "learning_rate": 5e-06, "loss": 0.1505, "num_input_tokens_seen": 423909608, "step": 2468 }, { "epoch": 0.6491747221674229, "loss": 0.05709705874323845, "loss_ce": 0.0013414426939561963, "loss_iou": 0.49609375, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 423909608, "step": 2468 }, { "epoch": 0.64943775892681, "grad_norm": 3.9924309473660817, "learning_rate": 5e-06, "loss": 0.1385, "num_input_tokens_seen": 424081612, "step": 2469 }, { "epoch": 0.64943775892681, "loss": 0.21573078632354736, "loss_ce": 0.0024129238445311785, "loss_iou": 0.50390625, "loss_num": 0.042724609375, "loss_xval": 0.212890625, "num_input_tokens_seen": 424081612, "step": 2469 }, { "epoch": 0.6497007956861971, "grad_norm": 8.786316595444035, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 424253804, "step": 2470 }, { "epoch": 0.6497007956861971, "loss": 0.05929354950785637, "loss_ce": 0.00044039852218702435, "loss_iou": NaN, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 424253804, "step": 2470 }, { "epoch": 0.6499638324455843, "grad_norm": 15.002230358263791, "learning_rate": 5e-06, "loss": 0.1137, "num_input_tokens_seen": 424425936, "step": 2471 }, { "epoch": 0.6499638324455843, "loss": 0.13272526860237122, "loss_ce": 0.004475146532058716, "loss_iou": 0.39453125, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 424425936, "step": 2471 }, { "epoch": 0.6502268692049714, "grad_norm": 5.3031138296122675, "learning_rate": 5e-06, "loss": 0.1006, "num_input_tokens_seen": 424596308, "step": 2472 }, { "epoch": 0.6502268692049714, "loss": 0.06613775342702866, "loss_ce": 0.005651914514601231, "loss_iou": 0.462890625, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 424596308, "step": 2472 }, { "epoch": 0.6504899059643585, "grad_norm": 17.448583587295317, "learning_rate": 5e-06, "loss": 0.1235, "num_input_tokens_seen": 424768552, "step": 2473 }, { "epoch": 0.6504899059643585, "loss": 0.08512883633375168, "loss_ce": 0.0031891404651105404, "loss_iou": 0.5625, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 424768552, "step": 2473 }, { "epoch": 0.6507529427237456, "grad_norm": 6.87156581112055, "learning_rate": 5e-06, "loss": 0.0809, "num_input_tokens_seen": 424940764, "step": 2474 }, { "epoch": 0.6507529427237456, "loss": 0.07900265604257584, "loss_ce": 0.0035021707881242037, "loss_iou": 0.5546875, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 424940764, "step": 2474 }, { "epoch": 0.6510159794831327, "grad_norm": 7.4967444521140765, "learning_rate": 5e-06, "loss": 0.1012, "num_input_tokens_seen": 425112860, "step": 2475 }, { "epoch": 0.6510159794831327, "loss": 0.1566176563501358, "loss_ce": 0.0004592128098011017, "loss_iou": 0.55078125, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 425112860, "step": 2475 }, { "epoch": 0.6512790162425199, "grad_norm": 7.198491382802353, "learning_rate": 5e-06, "loss": 0.1238, "num_input_tokens_seen": 425284996, "step": 2476 }, { "epoch": 0.6512790162425199, "loss": 0.12442326545715332, "loss_ce": 0.0022308877669274807, "loss_iou": 0.5234375, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 425284996, "step": 2476 }, { "epoch": 0.651542053001907, "grad_norm": 12.660738408941835, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 425457444, "step": 2477 }, { "epoch": 0.651542053001907, "loss": 0.139640212059021, "loss_ce": 0.0008462676778435707, "loss_iou": 0.65625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 425457444, "step": 2477 }, { "epoch": 0.6518050897612941, "grad_norm": 4.788908959245028, "learning_rate": 5e-06, "loss": 0.117, "num_input_tokens_seen": 425629356, "step": 2478 }, { "epoch": 0.6518050897612941, "loss": 0.11505892872810364, "loss_ce": 0.0043716710060834885, "loss_iou": 0.44921875, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 425629356, "step": 2478 }, { "epoch": 0.6520681265206812, "grad_norm": 14.713914944582257, "learning_rate": 5e-06, "loss": 0.1519, "num_input_tokens_seen": 425801412, "step": 2479 }, { "epoch": 0.6520681265206812, "loss": 0.09329426288604736, "loss_ce": 0.0003987499512732029, "loss_iou": 0.5234375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 425801412, "step": 2479 }, { "epoch": 0.6523311632800683, "grad_norm": 11.370966467516885, "learning_rate": 5e-06, "loss": 0.1579, "num_input_tokens_seen": 425973396, "step": 2480 }, { "epoch": 0.6523311632800683, "loss": 0.12143571674823761, "loss_ce": 0.0008607673225924373, "loss_iou": 0.49609375, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 425973396, "step": 2480 }, { "epoch": 0.6525942000394556, "grad_norm": 16.96417675320112, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 426142788, "step": 2481 }, { "epoch": 0.6525942000394556, "loss": 0.058773696422576904, "loss_ce": 0.00017994196969084442, "loss_iou": 0.51171875, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 426142788, "step": 2481 }, { "epoch": 0.6528572367988427, "grad_norm": 3.267968495608878, "learning_rate": 5e-06, "loss": 0.1454, "num_input_tokens_seen": 426314852, "step": 2482 }, { "epoch": 0.6528572367988427, "loss": 0.14266598224639893, "loss_ce": 0.0022698603570461273, "loss_iou": 0.37109375, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 426314852, "step": 2482 }, { "epoch": 0.6531202735582298, "grad_norm": 12.820952382520073, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 426487324, "step": 2483 }, { "epoch": 0.6531202735582298, "loss": 0.09693928062915802, "loss_ce": 0.0016328811179846525, "loss_iou": 0.4296875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 426487324, "step": 2483 }, { "epoch": 0.6533833103176169, "grad_norm": 10.800205106839552, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 426659468, "step": 2484 }, { "epoch": 0.6533833103176169, "loss": 0.07587607949972153, "loss_ce": 0.00039084581658244133, "loss_iou": 0.4921875, "loss_num": 0.01507568359375, "loss_xval": 0.07568359375, "num_input_tokens_seen": 426659468, "step": 2484 }, { "epoch": 0.653646347077004, "grad_norm": 7.9171669810832395, "learning_rate": 5e-06, "loss": 0.0853, "num_input_tokens_seen": 426831496, "step": 2485 }, { "epoch": 0.653646347077004, "loss": 0.07471035420894623, "loss_ce": 0.0035586238373070955, "loss_iou": 0.6015625, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 426831496, "step": 2485 }, { "epoch": 0.6539093838363912, "grad_norm": 8.230428013130412, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 427003612, "step": 2486 }, { "epoch": 0.6539093838363912, "loss": 0.08275149762630463, "loss_ce": 0.0038025237154215574, "loss_iou": 0.5234375, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 427003612, "step": 2486 }, { "epoch": 0.6541724205957783, "grad_norm": 3.632885838865945, "learning_rate": 5e-06, "loss": 0.1363, "num_input_tokens_seen": 427175916, "step": 2487 }, { "epoch": 0.6541724205957783, "loss": 0.1029161587357521, "loss_ce": 0.000987447565421462, "loss_iou": 0.52734375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 427175916, "step": 2487 }, { "epoch": 0.6544354573551654, "grad_norm": 3.5649981841807588, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 427348060, "step": 2488 }, { "epoch": 0.6544354573551654, "loss": 0.07192617654800415, "loss_ce": 0.003597316797822714, "loss_iou": 0.51171875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 427348060, "step": 2488 }, { "epoch": 0.6546984941145525, "grad_norm": 5.397602518661847, "learning_rate": 5e-06, "loss": 0.134, "num_input_tokens_seen": 427520416, "step": 2489 }, { "epoch": 0.6546984941145525, "loss": 0.08971969783306122, "loss_ce": 0.0012797524686902761, "loss_iou": 0.470703125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 427520416, "step": 2489 }, { "epoch": 0.6549615308739396, "grad_norm": 2.996287220023088, "learning_rate": 5e-06, "loss": 0.0974, "num_input_tokens_seen": 427692328, "step": 2490 }, { "epoch": 0.6549615308739396, "loss": 0.11320722848176956, "loss_ce": 0.0006584011716768146, "loss_iou": 0.3828125, "loss_num": 0.0225830078125, "loss_xval": 0.1123046875, "num_input_tokens_seen": 427692328, "step": 2490 }, { "epoch": 0.6552245676333267, "grad_norm": 9.620885086024206, "learning_rate": 5e-06, "loss": 0.1271, "num_input_tokens_seen": 427864376, "step": 2491 }, { "epoch": 0.6552245676333267, "loss": 0.10166727006435394, "loss_ce": 0.0020884163677692413, "loss_iou": 0.4609375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 427864376, "step": 2491 }, { "epoch": 0.6554876043927139, "grad_norm": 38.296683588407596, "learning_rate": 5e-06, "loss": 0.1392, "num_input_tokens_seen": 428036792, "step": 2492 }, { "epoch": 0.6554876043927139, "loss": 0.06815087795257568, "loss_ce": 6.61654194118455e-05, "loss_iou": 0.44140625, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 428036792, "step": 2492 }, { "epoch": 0.655750641152101, "grad_norm": 29.525923288124442, "learning_rate": 5e-06, "loss": 0.1634, "num_input_tokens_seen": 428207124, "step": 2493 }, { "epoch": 0.655750641152101, "loss": 0.23705750703811646, "loss_ce": 0.0035522649995982647, "loss_iou": 0.59375, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 428207124, "step": 2493 }, { "epoch": 0.6560136779114881, "grad_norm": 5.543812647744825, "learning_rate": 5e-06, "loss": 0.1341, "num_input_tokens_seen": 428379688, "step": 2494 }, { "epoch": 0.6560136779114881, "loss": 0.09441094100475311, "loss_ce": 0.0024462228175252676, "loss_iou": 0.515625, "loss_num": 0.0184326171875, "loss_xval": 0.091796875, "num_input_tokens_seen": 428379688, "step": 2494 }, { "epoch": 0.6562767146708752, "grad_norm": 32.70215077495664, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 428551908, "step": 2495 }, { "epoch": 0.6562767146708752, "loss": 0.05353452265262604, "loss_ce": 0.00015927490312606096, "loss_iou": 0.67578125, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 428551908, "step": 2495 }, { "epoch": 0.6565397514302623, "grad_norm": 9.54910159759898, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 428724396, "step": 2496 }, { "epoch": 0.6565397514302623, "loss": 0.1285240650177002, "loss_ce": 0.00035023505915887654, "loss_iou": 0.609375, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 428724396, "step": 2496 }, { "epoch": 0.6568027881896495, "grad_norm": 22.277656875348782, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 428891900, "step": 2497 }, { "epoch": 0.6568027881896495, "loss": 0.08849343657493591, "loss_ce": 0.0024643833748996258, "loss_iou": 0.52734375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 428891900, "step": 2497 }, { "epoch": 0.6570658249490366, "grad_norm": 6.04979449937967, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 429064016, "step": 2498 }, { "epoch": 0.6570658249490366, "loss": 0.04533851146697998, "loss_ce": 0.000935432268306613, "loss_iou": 0.482421875, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 429064016, "step": 2498 }, { "epoch": 0.6573288617084238, "grad_norm": 13.279231347366395, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 429236096, "step": 2499 }, { "epoch": 0.6573288617084238, "loss": 0.09342057257890701, "loss_ce": 0.0018373207421973348, "loss_iou": 0.61328125, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 429236096, "step": 2499 }, { "epoch": 0.6575918984678109, "grad_norm": 9.388446901609708, "learning_rate": 5e-06, "loss": 0.1338, "num_input_tokens_seen": 429408212, "step": 2500 }, { "epoch": 0.6575918984678109, "eval_websight_new_CIoU": 0.8709481358528137, "eval_websight_new_GIoU": 0.8742890954017639, "eval_websight_new_IoU": 0.8760707080364227, "eval_websight_new_MAE_all": 0.01990661583840847, "eval_websight_new_MAE_h": 0.01017875224351883, "eval_websight_new_MAE_w": 0.03395752049982548, "eval_websight_new_MAE_x": 0.03019585181027651, "eval_websight_new_MAE_y": 0.005294335773214698, "eval_websight_new_NUM_probability": 0.9999906122684479, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.09985960274934769, "eval_websight_new_loss_ce": 2.431606571917655e-05, "eval_websight_new_loss_iou": 0.4124755859375, "eval_websight_new_loss_num": 0.017595291137695312, "eval_websight_new_loss_xval": 0.08795166015625, "eval_websight_new_runtime": 55.684, "eval_websight_new_samples_per_second": 0.898, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 429408212, "step": 2500 }, { "epoch": 0.6575918984678109, "eval_seeclick_CIoU": 0.6180358529090881, "eval_seeclick_GIoU": 0.6200732290744781, "eval_seeclick_IoU": 0.6430684626102448, "eval_seeclick_MAE_all": 0.04816816933453083, "eval_seeclick_MAE_h": 0.03338594362139702, "eval_seeclick_MAE_w": 0.06272775307297707, "eval_seeclick_MAE_x": 0.06945410370826721, "eval_seeclick_MAE_y": 0.027104882523417473, "eval_seeclick_NUM_probability": 0.9999706149101257, "eval_seeclick_inside_bbox": 0.8764204680919647, "eval_seeclick_loss": 0.22014649212360382, "eval_seeclick_loss_ce": 0.008939406834542751, "eval_seeclick_loss_iou": 0.5093994140625, "eval_seeclick_loss_num": 0.040313720703125, "eval_seeclick_loss_xval": 0.201568603515625, "eval_seeclick_runtime": 68.7244, "eval_seeclick_samples_per_second": 0.626, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 429408212, "step": 2500 }, { "epoch": 0.6575918984678109, "eval_icons_CIoU": 0.8709467053413391, "eval_icons_GIoU": 0.8682527244091034, "eval_icons_IoU": 0.874538779258728, "eval_icons_MAE_all": 0.01756941620260477, "eval_icons_MAE_h": 0.02226724848151207, "eval_icons_MAE_w": 0.020997921004891396, "eval_icons_MAE_x": 0.0130357148591429, "eval_icons_MAE_y": 0.013976779766380787, "eval_icons_NUM_probability": 0.9999793469905853, "eval_icons_inside_bbox": 1.0, "eval_icons_loss": 0.06909541040658951, "eval_icons_loss_ce": 1.1558900041563902e-05, "eval_icons_loss_iou": 0.66064453125, "eval_icons_loss_num": 0.01297760009765625, "eval_icons_loss_xval": 0.0648956298828125, "eval_icons_runtime": 83.8008, "eval_icons_samples_per_second": 0.597, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 429408212, "step": 2500 }, { "epoch": 0.6575918984678109, "eval_screenspot_CIoU": 0.5514570474624634, "eval_screenspot_GIoU": 0.5452684958775839, "eval_screenspot_IoU": 0.5914793411890665, "eval_screenspot_MAE_all": 0.08535195142030716, "eval_screenspot_MAE_h": 0.06157554934422175, "eval_screenspot_MAE_w": 0.14490507543087006, "eval_screenspot_MAE_x": 0.08271919315059979, "eval_screenspot_MAE_y": 0.05220799893140793, "eval_screenspot_NUM_probability": 0.9994663198788961, "eval_screenspot_inside_bbox": 0.850000003973643, "eval_screenspot_loss": 0.9172521233558655, "eval_screenspot_loss_ce": 0.5530519783496857, "eval_screenspot_loss_iou": 0.529052734375, "eval_screenspot_loss_num": 0.07132466634114583, "eval_screenspot_loss_xval": 0.3565266927083333, "eval_screenspot_runtime": 139.3325, "eval_screenspot_samples_per_second": 0.639, "eval_screenspot_steps_per_second": 0.022, "num_input_tokens_seen": 429408212, "step": 2500 }, { "epoch": 0.657854935227198, "grad_norm": 4.075831219377808, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 429580660, "step": 2501 }, { "epoch": 0.657854935227198, "loss": 0.13817401230335236, "loss_ce": 0.0033473544754087925, "loss_iou": 0.59375, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 429580660, "step": 2501 }, { "epoch": 0.6581179719865852, "grad_norm": 7.116588034171532, "learning_rate": 5e-06, "loss": 0.1223, "num_input_tokens_seen": 429752720, "step": 2502 }, { "epoch": 0.6581179719865852, "loss": 0.1050395742058754, "loss_ce": 0.0004253170336596668, "loss_iou": 0.35546875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 429752720, "step": 2502 }, { "epoch": 0.6583810087459723, "grad_norm": 4.475079180174651, "learning_rate": 5e-06, "loss": 0.0863, "num_input_tokens_seen": 429924908, "step": 2503 }, { "epoch": 0.6583810087459723, "loss": 0.06600432842969894, "loss_ce": 0.0007577461656183004, "loss_iou": 0.60546875, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 429924908, "step": 2503 }, { "epoch": 0.6586440455053594, "grad_norm": 4.723970217436198, "learning_rate": 5e-06, "loss": 0.1093, "num_input_tokens_seen": 430097000, "step": 2504 }, { "epoch": 0.6586440455053594, "loss": 0.07487765699625015, "loss_ce": 0.0005978714907541871, "loss_iou": 0.54296875, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 430097000, "step": 2504 }, { "epoch": 0.6589070822647465, "grad_norm": 14.464350947162439, "learning_rate": 5e-06, "loss": 0.1466, "num_input_tokens_seen": 430266684, "step": 2505 }, { "epoch": 0.6589070822647465, "loss": 0.24609506130218506, "loss_ce": 0.00015389968757517636, "loss_iou": 0.51171875, "loss_num": 0.04931640625, "loss_xval": 0.24609375, "num_input_tokens_seen": 430266684, "step": 2505 }, { "epoch": 0.6591701190241336, "grad_norm": 4.076117698308253, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 430436940, "step": 2506 }, { "epoch": 0.6591701190241336, "loss": 0.109318308532238, "loss_ce": 0.002522045513615012, "loss_iou": 0.328125, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 430436940, "step": 2506 }, { "epoch": 0.6594331557835208, "grad_norm": 5.795601298430443, "learning_rate": 5e-06, "loss": 0.0979, "num_input_tokens_seen": 430608572, "step": 2507 }, { "epoch": 0.6594331557835208, "loss": 0.07380083203315735, "loss_ce": 0.00034502719063311815, "loss_iou": 0.6171875, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 430608572, "step": 2507 }, { "epoch": 0.6596961925429079, "grad_norm": 3.8858425270409165, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 430778744, "step": 2508 }, { "epoch": 0.6596961925429079, "loss": 0.1448889970779419, "loss_ce": 0.00018989352975040674, "loss_iou": 0.55078125, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 430778744, "step": 2508 }, { "epoch": 0.659959229302295, "grad_norm": 9.24454413883364, "learning_rate": 5e-06, "loss": 0.0771, "num_input_tokens_seen": 430950904, "step": 2509 }, { "epoch": 0.659959229302295, "loss": 0.09062638133764267, "loss_ce": 0.0013014276046305895, "loss_iou": 0.53515625, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 430950904, "step": 2509 }, { "epoch": 0.6602222660616821, "grad_norm": 5.68970165561044, "learning_rate": 5e-06, "loss": 0.1123, "num_input_tokens_seen": 431121056, "step": 2510 }, { "epoch": 0.6602222660616821, "loss": 0.09496060013771057, "loss_ce": 0.0006155127775855362, "loss_iou": NaN, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 431121056, "step": 2510 }, { "epoch": 0.6604853028210692, "grad_norm": 3.2016449128749023, "learning_rate": 5e-06, "loss": 0.084, "num_input_tokens_seen": 431293012, "step": 2511 }, { "epoch": 0.6604853028210692, "loss": 0.03823622688651085, "loss_ce": 1.2961418178747408e-05, "loss_iou": 0.5703125, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 431293012, "step": 2511 }, { "epoch": 0.6607483395804564, "grad_norm": 7.269277911217335, "learning_rate": 5e-06, "loss": 0.145, "num_input_tokens_seen": 431461708, "step": 2512 }, { "epoch": 0.6607483395804564, "loss": 0.27740049362182617, "loss_ce": 0.004237642977386713, "loss_iou": 0.255859375, "loss_num": 0.0546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 431461708, "step": 2512 }, { "epoch": 0.6610113763398435, "grad_norm": 3.005900285482203, "learning_rate": 5e-06, "loss": 0.0914, "num_input_tokens_seen": 431633828, "step": 2513 }, { "epoch": 0.6610113763398435, "loss": 0.0913277417421341, "loss_ce": 0.0005684680072590709, "loss_iou": 0.36328125, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 431633828, "step": 2513 }, { "epoch": 0.6612744130992306, "grad_norm": 3.8195415395135934, "learning_rate": 5e-06, "loss": 0.098, "num_input_tokens_seen": 431805968, "step": 2514 }, { "epoch": 0.6612744130992306, "loss": 0.04755711182951927, "loss_ce": 0.0009872873779386282, "loss_iou": 0.39453125, "loss_num": 0.00927734375, "loss_xval": 0.046630859375, "num_input_tokens_seen": 431805968, "step": 2514 }, { "epoch": 0.6615374498586177, "grad_norm": 4.443605112638486, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 431977936, "step": 2515 }, { "epoch": 0.6615374498586177, "loss": 0.0916135311126709, "loss_ce": 0.0008542468422092497, "loss_iou": 0.458984375, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 431977936, "step": 2515 }, { "epoch": 0.6618004866180048, "grad_norm": 5.518155601836215, "learning_rate": 5e-06, "loss": 0.1724, "num_input_tokens_seen": 432150116, "step": 2516 }, { "epoch": 0.6618004866180048, "loss": 0.14211586117744446, "loss_ce": 0.0029557030647993088, "loss_iou": 0.53125, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 432150116, "step": 2516 }, { "epoch": 0.6620635233773919, "grad_norm": 5.250876023224286, "learning_rate": 5e-06, "loss": 0.1054, "num_input_tokens_seen": 432322696, "step": 2517 }, { "epoch": 0.6620635233773919, "loss": 0.07852162420749664, "loss_ce": 0.00102223118301481, "loss_iou": 0.58984375, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 432322696, "step": 2517 }, { "epoch": 0.6623265601367792, "grad_norm": 4.979602548630015, "learning_rate": 5e-06, "loss": 0.1138, "num_input_tokens_seen": 432490748, "step": 2518 }, { "epoch": 0.6623265601367792, "loss": 0.07791407406330109, "loss_ce": 0.0011623608879745007, "loss_iou": 0.5078125, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 432490748, "step": 2518 }, { "epoch": 0.6625895968961663, "grad_norm": 7.14143158314321, "learning_rate": 5e-06, "loss": 0.1338, "num_input_tokens_seen": 432662924, "step": 2519 }, { "epoch": 0.6625895968961663, "loss": 0.168125718832016, "loss_ce": 0.000858865212649107, "loss_iou": 0.515625, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 432662924, "step": 2519 }, { "epoch": 0.6628526336555534, "grad_norm": 3.8877457754066387, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 432835068, "step": 2520 }, { "epoch": 0.6628526336555534, "loss": 0.12645836174488068, "loss_ce": 0.0006496440037153661, "loss_iou": 0.546875, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 432835068, "step": 2520 }, { "epoch": 0.6631156704149405, "grad_norm": 4.906018490784193, "learning_rate": 5e-06, "loss": 0.1167, "num_input_tokens_seen": 433007048, "step": 2521 }, { "epoch": 0.6631156704149405, "loss": 0.08021432906389236, "loss_ce": 0.000868627626914531, "loss_iou": 0.55859375, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 433007048, "step": 2521 }, { "epoch": 0.6633787071743276, "grad_norm": 4.283295443334077, "learning_rate": 5e-06, "loss": 0.158, "num_input_tokens_seen": 433177068, "step": 2522 }, { "epoch": 0.6633787071743276, "loss": 0.11892493069171906, "loss_ce": 0.00060828379355371, "loss_iou": 0.4921875, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 433177068, "step": 2522 }, { "epoch": 0.6636417439337148, "grad_norm": 4.831641088226549, "learning_rate": 5e-06, "loss": 0.0736, "num_input_tokens_seen": 433349504, "step": 2523 }, { "epoch": 0.6636417439337148, "loss": 0.06323867291212082, "loss_ce": 0.00031142536317929626, "loss_iou": 0.4921875, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 433349504, "step": 2523 }, { "epoch": 0.6639047806931019, "grad_norm": 24.62858299579612, "learning_rate": 5e-06, "loss": 0.1129, "num_input_tokens_seen": 433521616, "step": 2524 }, { "epoch": 0.6639047806931019, "loss": 0.12700864672660828, "loss_ce": 0.003351423656567931, "loss_iou": 0.45703125, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 433521616, "step": 2524 }, { "epoch": 0.664167817452489, "grad_norm": 6.377416561201896, "learning_rate": 5e-06, "loss": 0.1141, "num_input_tokens_seen": 433692012, "step": 2525 }, { "epoch": 0.664167817452489, "loss": 0.06224376708269119, "loss_ce": 0.003238032106310129, "loss_iou": 0.54296875, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 433692012, "step": 2525 }, { "epoch": 0.6644308542118761, "grad_norm": 7.501801025322401, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 433864040, "step": 2526 }, { "epoch": 0.6644308542118761, "loss": 0.09891114383935928, "loss_ce": 0.002109395107254386, "loss_iou": 0.40234375, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 433864040, "step": 2526 }, { "epoch": 0.6646938909712632, "grad_norm": 4.527105351225178, "learning_rate": 5e-06, "loss": 0.1112, "num_input_tokens_seen": 434034556, "step": 2527 }, { "epoch": 0.6646938909712632, "loss": 0.14892783761024475, "loss_ce": 0.0010244110599160194, "loss_iou": 0.4765625, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 434034556, "step": 2527 }, { "epoch": 0.6649569277306504, "grad_norm": 31.749018149089892, "learning_rate": 5e-06, "loss": 0.1157, "num_input_tokens_seen": 434206672, "step": 2528 }, { "epoch": 0.6649569277306504, "loss": 0.12175662815570831, "loss_ce": 0.0015021114377304912, "loss_iou": 0.53515625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 434206672, "step": 2528 }, { "epoch": 0.6652199644900375, "grad_norm": 4.8576669690091885, "learning_rate": 5e-06, "loss": 0.0658, "num_input_tokens_seen": 434378728, "step": 2529 }, { "epoch": 0.6652199644900375, "loss": 0.07344581931829453, "loss_ce": 0.006276628468185663, "loss_iou": 0.5078125, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 434378728, "step": 2529 }, { "epoch": 0.6654830012494246, "grad_norm": 4.9340551800619, "learning_rate": 5e-06, "loss": 0.1017, "num_input_tokens_seen": 434550708, "step": 2530 }, { "epoch": 0.6654830012494246, "loss": 0.0640825480222702, "loss_ce": 0.0013689253246411681, "loss_iou": 0.55859375, "loss_num": 0.0125732421875, "loss_xval": 0.0625, "num_input_tokens_seen": 434550708, "step": 2530 }, { "epoch": 0.6657460380088117, "grad_norm": 9.63881283843686, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 434723100, "step": 2531 }, { "epoch": 0.6657460380088117, "loss": 0.050916872918605804, "loss_ce": 0.0004713151138275862, "loss_iou": 0.64453125, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 434723100, "step": 2531 }, { "epoch": 0.6660090747681988, "grad_norm": 5.34856475630482, "learning_rate": 5e-06, "loss": 0.1127, "num_input_tokens_seen": 434893672, "step": 2532 }, { "epoch": 0.6660090747681988, "loss": 0.11488111317157745, "loss_ce": 0.00108106411062181, "loss_iou": 0.5390625, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 434893672, "step": 2532 }, { "epoch": 0.666272111527586, "grad_norm": 6.482198264697821, "learning_rate": 5e-06, "loss": 0.1046, "num_input_tokens_seen": 435065996, "step": 2533 }, { "epoch": 0.666272111527586, "loss": 0.1057998389005661, "loss_ce": 0.00013272129581309855, "loss_iou": 0.6015625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 435065996, "step": 2533 }, { "epoch": 0.6665351482869731, "grad_norm": 15.608471694941006, "learning_rate": 5e-06, "loss": 0.1151, "num_input_tokens_seen": 435236164, "step": 2534 }, { "epoch": 0.6665351482869731, "loss": 0.08966468274593353, "loss_ce": 0.004398562014102936, "loss_iou": 0.4375, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 435236164, "step": 2534 }, { "epoch": 0.6667981850463602, "grad_norm": 3.821066216794497, "learning_rate": 5e-06, "loss": 0.1027, "num_input_tokens_seen": 435408552, "step": 2535 }, { "epoch": 0.6667981850463602, "loss": 0.13420158624649048, "loss_ce": 0.0008855484193190932, "loss_iou": 0.423828125, "loss_num": 0.026611328125, "loss_xval": 0.1337890625, "num_input_tokens_seen": 435408552, "step": 2535 }, { "epoch": 0.6670612218057473, "grad_norm": 7.414329883383495, "learning_rate": 5e-06, "loss": 0.1206, "num_input_tokens_seen": 435580704, "step": 2536 }, { "epoch": 0.6670612218057473, "loss": 0.09095387160778046, "loss_ce": 0.0024223732762038708, "loss_iou": 0.53125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 435580704, "step": 2536 }, { "epoch": 0.6673242585651344, "grad_norm": 33.27432314606939, "learning_rate": 5e-06, "loss": 0.1523, "num_input_tokens_seen": 435752624, "step": 2537 }, { "epoch": 0.6673242585651344, "loss": 0.15344524383544922, "loss_ce": 0.0014066637959331274, "loss_iou": 0.57421875, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 435752624, "step": 2537 }, { "epoch": 0.6675872953245217, "grad_norm": 7.327533892405677, "learning_rate": 5e-06, "loss": 0.1128, "num_input_tokens_seen": 435924672, "step": 2538 }, { "epoch": 0.6675872953245217, "loss": 0.11101265996694565, "loss_ce": 0.000935757125262171, "loss_iou": 0.447265625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 435924672, "step": 2538 }, { "epoch": 0.6678503320839088, "grad_norm": 7.348552482036463, "learning_rate": 5e-06, "loss": 0.1424, "num_input_tokens_seen": 436096756, "step": 2539 }, { "epoch": 0.6678503320839088, "loss": 0.1398707926273346, "loss_ce": 0.0033656705636531115, "loss_iou": NaN, "loss_num": 0.0272216796875, "loss_xval": 0.13671875, "num_input_tokens_seen": 436096756, "step": 2539 }, { "epoch": 0.6681133688432959, "grad_norm": 20.204014590374456, "learning_rate": 5e-06, "loss": 0.1482, "num_input_tokens_seen": 436267320, "step": 2540 }, { "epoch": 0.6681133688432959, "loss": 0.11994585394859314, "loss_ce": 0.0005916071822866797, "loss_iou": 0.4375, "loss_num": 0.02392578125, "loss_xval": 0.119140625, "num_input_tokens_seen": 436267320, "step": 2540 }, { "epoch": 0.668376405602683, "grad_norm": 8.407025005461401, "learning_rate": 5e-06, "loss": 0.1364, "num_input_tokens_seen": 436439328, "step": 2541 }, { "epoch": 0.668376405602683, "loss": 0.11520107090473175, "loss_ce": 0.003811910282820463, "loss_iou": 0.61328125, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 436439328, "step": 2541 }, { "epoch": 0.6686394423620701, "grad_norm": 5.145720476296494, "learning_rate": 5e-06, "loss": 0.1047, "num_input_tokens_seen": 436609828, "step": 2542 }, { "epoch": 0.6686394423620701, "loss": 0.06836480647325516, "loss_ce": 0.00037164578679949045, "loss_iou": 0.53125, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 436609828, "step": 2542 }, { "epoch": 0.6689024791214572, "grad_norm": 8.206215780846446, "learning_rate": 5e-06, "loss": 0.0996, "num_input_tokens_seen": 436781764, "step": 2543 }, { "epoch": 0.6689024791214572, "loss": 0.107704758644104, "loss_ce": 0.002266527386382222, "loss_iou": 0.5234375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 436781764, "step": 2543 }, { "epoch": 0.6691655158808444, "grad_norm": 7.4768704044685865, "learning_rate": 5e-06, "loss": 0.1623, "num_input_tokens_seen": 436953548, "step": 2544 }, { "epoch": 0.6691655158808444, "loss": 0.17240890860557556, "loss_ce": 0.0010527035919949412, "loss_iou": 0.53515625, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 436953548, "step": 2544 }, { "epoch": 0.6694285526402315, "grad_norm": 9.203598751541529, "learning_rate": 5e-06, "loss": 0.1177, "num_input_tokens_seen": 437125648, "step": 2545 }, { "epoch": 0.6694285526402315, "loss": 0.0696527361869812, "loss_ce": 0.0004388623929116875, "loss_iou": 0.546875, "loss_num": 0.0137939453125, "loss_xval": 0.0693359375, "num_input_tokens_seen": 437125648, "step": 2545 }, { "epoch": 0.6696915893996186, "grad_norm": 4.3933164564180816, "learning_rate": 5e-06, "loss": 0.1253, "num_input_tokens_seen": 437297756, "step": 2546 }, { "epoch": 0.6696915893996186, "loss": 0.12276500463485718, "loss_ce": 0.004127927124500275, "loss_iou": 0.498046875, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 437297756, "step": 2546 }, { "epoch": 0.6699546261590057, "grad_norm": 4.6906533688912795, "learning_rate": 5e-06, "loss": 0.1226, "num_input_tokens_seen": 437470168, "step": 2547 }, { "epoch": 0.6699546261590057, "loss": 0.16925577819347382, "loss_ce": 0.00012736135977320373, "loss_iou": 0.5078125, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 437470168, "step": 2547 }, { "epoch": 0.6702176629183928, "grad_norm": 5.700480300794898, "learning_rate": 5e-06, "loss": 0.1239, "num_input_tokens_seen": 437641860, "step": 2548 }, { "epoch": 0.6702176629183928, "loss": 0.08314710855484009, "loss_ce": 0.0011463778791949153, "loss_iou": 0.3671875, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 437641860, "step": 2548 }, { "epoch": 0.67048069967778, "grad_norm": 4.582897287065671, "learning_rate": 5e-06, "loss": 0.1427, "num_input_tokens_seen": 437814364, "step": 2549 }, { "epoch": 0.67048069967778, "loss": 0.1702890694141388, "loss_ce": 0.0015268486458808184, "loss_iou": 0.51171875, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 437814364, "step": 2549 }, { "epoch": 0.6707437364371671, "grad_norm": 5.613370832938644, "learning_rate": 5e-06, "loss": 0.1421, "num_input_tokens_seen": 437986344, "step": 2550 }, { "epoch": 0.6707437364371671, "loss": 0.13855043053627014, "loss_ce": 0.0012823636643588543, "loss_iou": 0.53125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 437986344, "step": 2550 }, { "epoch": 0.6710067731965542, "grad_norm": 17.044687935022132, "learning_rate": 5e-06, "loss": 0.1258, "num_input_tokens_seen": 438158712, "step": 2551 }, { "epoch": 0.6710067731965542, "loss": 0.08713387697935104, "loss_ce": 0.00023507134756073356, "loss_iou": 0.5703125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 438158712, "step": 2551 }, { "epoch": 0.6712698099559413, "grad_norm": 6.906118924960931, "learning_rate": 5e-06, "loss": 0.1055, "num_input_tokens_seen": 438330820, "step": 2552 }, { "epoch": 0.6712698099559413, "loss": 0.13604578375816345, "loss_ce": 0.0038131249602884054, "loss_iou": 0.45703125, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 438330820, "step": 2552 }, { "epoch": 0.6715328467153284, "grad_norm": 5.001208896831707, "learning_rate": 5e-06, "loss": 0.1252, "num_input_tokens_seen": 438503128, "step": 2553 }, { "epoch": 0.6715328467153284, "loss": 0.11509568989276886, "loss_ce": 0.00016648891323711723, "loss_iou": 0.48046875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 438503128, "step": 2553 }, { "epoch": 0.6717958834747156, "grad_norm": 5.88413772044494, "learning_rate": 5e-06, "loss": 0.0839, "num_input_tokens_seen": 438675408, "step": 2554 }, { "epoch": 0.6717958834747156, "loss": 0.06504229456186295, "loss_ce": 0.000772272062022239, "loss_iou": 0.58203125, "loss_num": 0.0128173828125, "loss_xval": 0.064453125, "num_input_tokens_seen": 438675408, "step": 2554 }, { "epoch": 0.6720589202341027, "grad_norm": 5.286645052918952, "learning_rate": 5e-06, "loss": 0.1103, "num_input_tokens_seen": 438847472, "step": 2555 }, { "epoch": 0.6720589202341027, "loss": 0.11925958842039108, "loss_ce": 0.00024103187024593353, "loss_iou": 0.65625, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 438847472, "step": 2555 }, { "epoch": 0.6723219569934898, "grad_norm": 7.503881873733868, "learning_rate": 5e-06, "loss": 0.1275, "num_input_tokens_seen": 439019864, "step": 2556 }, { "epoch": 0.6723219569934898, "loss": 0.1399090439081192, "loss_ce": 0.0017254444537684321, "loss_iou": 0.51171875, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 439019864, "step": 2556 }, { "epoch": 0.672584993752877, "grad_norm": 4.642912313294466, "learning_rate": 5e-06, "loss": 0.0921, "num_input_tokens_seen": 439191776, "step": 2557 }, { "epoch": 0.672584993752877, "loss": 0.12639446556568146, "loss_ce": 0.0015470522921532393, "loss_iou": 0.62890625, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 439191776, "step": 2557 }, { "epoch": 0.672848030512264, "grad_norm": 5.916383494046343, "learning_rate": 5e-06, "loss": 0.1084, "num_input_tokens_seen": 439360860, "step": 2558 }, { "epoch": 0.672848030512264, "loss": 0.09076009690761566, "loss_ce": 0.0006722048274241388, "loss_iou": 0.6015625, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 439360860, "step": 2558 }, { "epoch": 0.6731110672716513, "grad_norm": 20.566659262884578, "learning_rate": 5e-06, "loss": 0.1212, "num_input_tokens_seen": 439532496, "step": 2559 }, { "epoch": 0.6731110672716513, "loss": 0.10060098767280579, "loss_ce": 0.003646642668172717, "loss_iou": 0.546875, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 439532496, "step": 2559 }, { "epoch": 0.6733741040310384, "grad_norm": 4.011859025446864, "learning_rate": 5e-06, "loss": 0.1167, "num_input_tokens_seen": 439704520, "step": 2560 }, { "epoch": 0.6733741040310384, "loss": 0.0790782943367958, "loss_ce": 0.0015483875758945942, "loss_iou": 0.404296875, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 439704520, "step": 2560 }, { "epoch": 0.6736371407904255, "grad_norm": 7.247619498755374, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 439877048, "step": 2561 }, { "epoch": 0.6736371407904255, "loss": 0.09221772849559784, "loss_ce": 0.0026181198190897703, "loss_iou": 0.50390625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 439877048, "step": 2561 }, { "epoch": 0.6739001775498126, "grad_norm": 6.385962315237775, "learning_rate": 5e-06, "loss": 0.0615, "num_input_tokens_seen": 440049300, "step": 2562 }, { "epoch": 0.6739001775498126, "loss": 0.041686464101076126, "loss_ce": 0.0010523094097152352, "loss_iou": 0.55078125, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 440049300, "step": 2562 }, { "epoch": 0.6741632143091997, "grad_norm": 26.7431708970354, "learning_rate": 5e-06, "loss": 0.1921, "num_input_tokens_seen": 440221368, "step": 2563 }, { "epoch": 0.6741632143091997, "loss": 0.18678849935531616, "loss_ce": 0.002889578230679035, "loss_iou": 0.47265625, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 440221368, "step": 2563 }, { "epoch": 0.6744262510685868, "grad_norm": 3.4897355882240686, "learning_rate": 5e-06, "loss": 0.131, "num_input_tokens_seen": 440393588, "step": 2564 }, { "epoch": 0.6744262510685868, "loss": 0.13501334190368652, "loss_ce": 0.0007817824953235686, "loss_iou": 0.5078125, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 440393588, "step": 2564 }, { "epoch": 0.674689287827974, "grad_norm": 5.796770664321287, "learning_rate": 5e-06, "loss": 0.1058, "num_input_tokens_seen": 440565712, "step": 2565 }, { "epoch": 0.674689287827974, "loss": 0.06512662768363953, "loss_ce": 0.0005819504731334746, "loss_iou": 0.50390625, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 440565712, "step": 2565 }, { "epoch": 0.6749523245873611, "grad_norm": 6.5511648198704755, "learning_rate": 5e-06, "loss": 0.0888, "num_input_tokens_seen": 440737988, "step": 2566 }, { "epoch": 0.6749523245873611, "loss": 0.11750826984643936, "loss_ce": 0.0036166671197861433, "loss_iou": 0.52734375, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 440737988, "step": 2566 }, { "epoch": 0.6752153613467482, "grad_norm": 21.358635365800044, "learning_rate": 5e-06, "loss": 0.1114, "num_input_tokens_seen": 440910068, "step": 2567 }, { "epoch": 0.6752153613467482, "loss": 0.10725726187229156, "loss_ce": 0.0005067750462330878, "loss_iou": 0.6875, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 440910068, "step": 2567 }, { "epoch": 0.6754783981061353, "grad_norm": 4.519751532821699, "learning_rate": 5e-06, "loss": 0.0884, "num_input_tokens_seen": 441080800, "step": 2568 }, { "epoch": 0.6754783981061353, "loss": 0.0696103498339653, "loss_ce": 0.00015234279271680862, "loss_iou": 0.50390625, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 441080800, "step": 2568 }, { "epoch": 0.6757414348655224, "grad_norm": 4.051067943325395, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 441251032, "step": 2569 }, { "epoch": 0.6757414348655224, "loss": 0.09424732625484467, "loss_ce": 0.00022266953601501882, "loss_iou": 0.54296875, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 441251032, "step": 2569 }, { "epoch": 0.6760044716249096, "grad_norm": 6.209787964250504, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 441422784, "step": 2570 }, { "epoch": 0.6760044716249096, "loss": 0.0798967033624649, "loss_ce": 0.00027634453726932406, "loss_iou": 0.5078125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 441422784, "step": 2570 }, { "epoch": 0.6762675083842967, "grad_norm": 7.3672437629155505, "learning_rate": 5e-06, "loss": 0.1272, "num_input_tokens_seen": 441593108, "step": 2571 }, { "epoch": 0.6762675083842967, "loss": 0.12310583889484406, "loss_ce": 0.0010050098644569516, "loss_iou": NaN, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 441593108, "step": 2571 }, { "epoch": 0.6765305451436838, "grad_norm": 5.196433267413594, "learning_rate": 5e-06, "loss": 0.1158, "num_input_tokens_seen": 441765456, "step": 2572 }, { "epoch": 0.6765305451436838, "loss": 0.1754513680934906, "loss_ce": 0.0007077160989865661, "loss_iou": NaN, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 441765456, "step": 2572 }, { "epoch": 0.6767935819030709, "grad_norm": 4.136115815472187, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 441937580, "step": 2573 }, { "epoch": 0.6767935819030709, "loss": 0.13903826475143433, "loss_ce": 0.0002443119592498988, "loss_iou": 0.46484375, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 441937580, "step": 2573 }, { "epoch": 0.677056618662458, "grad_norm": 18.488728586585154, "learning_rate": 5e-06, "loss": 0.0878, "num_input_tokens_seen": 442108052, "step": 2574 }, { "epoch": 0.677056618662458, "loss": 0.12112629413604736, "loss_ce": 0.0002309026604052633, "loss_iou": 0.478515625, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 442108052, "step": 2574 }, { "epoch": 0.6773196554218452, "grad_norm": 7.779321814196593, "learning_rate": 5e-06, "loss": 0.1347, "num_input_tokens_seen": 442279576, "step": 2575 }, { "epoch": 0.6773196554218452, "loss": 0.10174375027418137, "loss_ce": 0.0021038581617176533, "loss_iou": 0.6015625, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 442279576, "step": 2575 }, { "epoch": 0.6775826921812323, "grad_norm": 4.027957209739208, "learning_rate": 5e-06, "loss": 0.0789, "num_input_tokens_seen": 442447800, "step": 2576 }, { "epoch": 0.6775826921812323, "loss": 0.08731138706207275, "loss_ce": 0.0017401032382622361, "loss_iou": 0.4921875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 442447800, "step": 2576 }, { "epoch": 0.6778457289406195, "grad_norm": 3.5976030894091644, "learning_rate": 5e-06, "loss": 0.1165, "num_input_tokens_seen": 442620136, "step": 2577 }, { "epoch": 0.6778457289406195, "loss": 0.11548551917076111, "loss_ce": 0.0009377849055454135, "loss_iou": 0.5390625, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 442620136, "step": 2577 }, { "epoch": 0.6781087657000066, "grad_norm": 14.27185242111941, "learning_rate": 5e-06, "loss": 0.1706, "num_input_tokens_seen": 442792328, "step": 2578 }, { "epoch": 0.6781087657000066, "loss": 0.20107831060886383, "loss_ce": 0.01119793951511383, "loss_iou": 0.447265625, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 442792328, "step": 2578 }, { "epoch": 0.6783718024593937, "grad_norm": 2.6862819825925905, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 442964732, "step": 2579 }, { "epoch": 0.6783718024593937, "loss": 0.12557528913021088, "loss_ce": 0.001063565374352038, "loss_iou": 0.328125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 442964732, "step": 2579 }, { "epoch": 0.6786348392187809, "grad_norm": 19.12719362396153, "learning_rate": 5e-06, "loss": 0.1384, "num_input_tokens_seen": 443136988, "step": 2580 }, { "epoch": 0.6786348392187809, "loss": 0.1754310131072998, "loss_ce": 0.001114598591811955, "loss_iou": 0.578125, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 443136988, "step": 2580 }, { "epoch": 0.678897875978168, "grad_norm": 4.296169271228249, "learning_rate": 5e-06, "loss": 0.1016, "num_input_tokens_seen": 443309004, "step": 2581 }, { "epoch": 0.678897875978168, "loss": 0.056013718247413635, "loss_ce": 0.0018907939083874226, "loss_iou": 0.470703125, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 443309004, "step": 2581 }, { "epoch": 0.6791609127375551, "grad_norm": 14.58212290872784, "learning_rate": 5e-06, "loss": 0.128, "num_input_tokens_seen": 443481132, "step": 2582 }, { "epoch": 0.6791609127375551, "loss": 0.10740009695291519, "loss_ce": 0.00046649359865114093, "loss_iou": 0.310546875, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 443481132, "step": 2582 }, { "epoch": 0.6794239494969422, "grad_norm": 5.824835527761782, "learning_rate": 5e-06, "loss": 0.1152, "num_input_tokens_seen": 443651636, "step": 2583 }, { "epoch": 0.6794239494969422, "loss": 0.13724330067634583, "loss_ce": 0.0017452588072046638, "loss_iou": 0.56640625, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 443651636, "step": 2583 }, { "epoch": 0.6796869862563293, "grad_norm": 6.27799845618246, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 443823548, "step": 2584 }, { "epoch": 0.6796869862563293, "loss": 0.1675836145877838, "loss_ce": 0.002330929273739457, "loss_iou": 0.390625, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 443823548, "step": 2584 }, { "epoch": 0.6799500230157165, "grad_norm": 5.283309975586119, "learning_rate": 5e-06, "loss": 0.1072, "num_input_tokens_seen": 443994196, "step": 2585 }, { "epoch": 0.6799500230157165, "loss": 0.07297757267951965, "loss_ce": 7.10797612555325e-05, "loss_iou": 0.68359375, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 443994196, "step": 2585 }, { "epoch": 0.6802130597751036, "grad_norm": 4.83126163860268, "learning_rate": 5e-06, "loss": 0.1149, "num_input_tokens_seen": 444166260, "step": 2586 }, { "epoch": 0.6802130597751036, "loss": 0.15566733479499817, "loss_ce": 0.0010347592178732157, "loss_iou": 0.57421875, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 444166260, "step": 2586 }, { "epoch": 0.6804760965344907, "grad_norm": 5.935537777211587, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 444338748, "step": 2587 }, { "epoch": 0.6804760965344907, "loss": 0.07495879381895065, "loss_ce": 0.0001907299447339028, "loss_iou": 0.6015625, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 444338748, "step": 2587 }, { "epoch": 0.6807391332938778, "grad_norm": 4.981341494504371, "learning_rate": 5e-06, "loss": 0.1501, "num_input_tokens_seen": 444510996, "step": 2588 }, { "epoch": 0.6807391332938778, "loss": 0.2452811598777771, "loss_ce": 0.002849509473890066, "loss_iou": 0.3984375, "loss_num": 0.04833984375, "loss_xval": 0.2421875, "num_input_tokens_seen": 444510996, "step": 2588 }, { "epoch": 0.6810021700532649, "grad_norm": 34.8294057916369, "learning_rate": 5e-06, "loss": 0.1477, "num_input_tokens_seen": 444682880, "step": 2589 }, { "epoch": 0.6810021700532649, "loss": 0.20022635161876678, "loss_ce": 0.002899688435718417, "loss_iou": 0.64453125, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 444682880, "step": 2589 }, { "epoch": 0.681265206812652, "grad_norm": 6.783440844262528, "learning_rate": 5e-06, "loss": 0.1355, "num_input_tokens_seen": 444855080, "step": 2590 }, { "epoch": 0.681265206812652, "loss": 0.17689135670661926, "loss_ce": 0.0014458001824095845, "loss_iou": 0.431640625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 444855080, "step": 2590 }, { "epoch": 0.6815282435720392, "grad_norm": 6.398055261239124, "learning_rate": 5e-06, "loss": 0.1061, "num_input_tokens_seen": 445027648, "step": 2591 }, { "epoch": 0.6815282435720392, "loss": 0.06686560064554214, "loss_ce": 0.003968872129917145, "loss_iou": 0.6484375, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 445027648, "step": 2591 }, { "epoch": 0.6817912803314263, "grad_norm": 9.754653714308724, "learning_rate": 5e-06, "loss": 0.158, "num_input_tokens_seen": 445198060, "step": 2592 }, { "epoch": 0.6817912803314263, "loss": 0.0956597551703453, "loss_ce": 0.001345182885415852, "loss_iou": 0.42578125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 445198060, "step": 2592 }, { "epoch": 0.6820543170908134, "grad_norm": 11.093960051238781, "learning_rate": 5e-06, "loss": 0.1506, "num_input_tokens_seen": 445370376, "step": 2593 }, { "epoch": 0.6820543170908134, "loss": 0.14656749367713928, "loss_ce": 0.001456425990909338, "loss_iou": 0.482421875, "loss_num": 0.0289306640625, "loss_xval": 0.1455078125, "num_input_tokens_seen": 445370376, "step": 2593 }, { "epoch": 0.6823173538502005, "grad_norm": 7.226137026161222, "learning_rate": 5e-06, "loss": 0.1077, "num_input_tokens_seen": 445542828, "step": 2594 }, { "epoch": 0.6823173538502005, "loss": 0.06742848455905914, "loss_ce": 0.005309954285621643, "loss_iou": 0.48828125, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 445542828, "step": 2594 }, { "epoch": 0.6825803906095876, "grad_norm": 4.617442760822597, "learning_rate": 5e-06, "loss": 0.137, "num_input_tokens_seen": 445715128, "step": 2595 }, { "epoch": 0.6825803906095876, "loss": 0.11120542138814926, "loss_ce": 0.0006707610446028411, "loss_iou": 0.56640625, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 445715128, "step": 2595 }, { "epoch": 0.6828434273689749, "grad_norm": 8.070925307261565, "learning_rate": 5e-06, "loss": 0.1584, "num_input_tokens_seen": 445887032, "step": 2596 }, { "epoch": 0.6828434273689749, "loss": 0.12233078479766846, "loss_ce": 0.0008708295645192266, "loss_iou": 0.486328125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 445887032, "step": 2596 }, { "epoch": 0.683106464128362, "grad_norm": 4.079666869126499, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 446059408, "step": 2597 }, { "epoch": 0.683106464128362, "loss": 0.1118651032447815, "loss_ce": 0.00023180160496849567, "loss_iou": 0.49609375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 446059408, "step": 2597 }, { "epoch": 0.6833695008877491, "grad_norm": 6.132037428307681, "learning_rate": 5e-06, "loss": 0.1023, "num_input_tokens_seen": 446229696, "step": 2598 }, { "epoch": 0.6833695008877491, "loss": 0.13406622409820557, "loss_ce": 0.0008264797506853938, "loss_iou": 0.44140625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 446229696, "step": 2598 }, { "epoch": 0.6836325376471362, "grad_norm": 3.96696464215725, "learning_rate": 5e-06, "loss": 0.0997, "num_input_tokens_seen": 446401824, "step": 2599 }, { "epoch": 0.6836325376471362, "loss": 0.07421835511922836, "loss_ce": 0.0017085927538573742, "loss_iou": 0.4453125, "loss_num": 0.0145263671875, "loss_xval": 0.072265625, "num_input_tokens_seen": 446401824, "step": 2599 }, { "epoch": 0.6838955744065233, "grad_norm": 5.0831174679987035, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 446573796, "step": 2600 }, { "epoch": 0.6838955744065233, "loss": 0.10569039732217789, "loss_ce": 0.0010151021415367723, "loss_iou": 0.484375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 446573796, "step": 2600 }, { "epoch": 0.6841586111659105, "grad_norm": 15.882119320832834, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 446746000, "step": 2601 }, { "epoch": 0.6841586111659105, "loss": 0.1552843451499939, "loss_ce": 0.000499195302836597, "loss_iou": 0.3125, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 446746000, "step": 2601 }, { "epoch": 0.6844216479252976, "grad_norm": 3.8491365485511118, "learning_rate": 5e-06, "loss": 0.0928, "num_input_tokens_seen": 446918100, "step": 2602 }, { "epoch": 0.6844216479252976, "loss": 0.050599753856658936, "loss_ce": 0.0002762702642939985, "loss_iou": 0.54296875, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 446918100, "step": 2602 }, { "epoch": 0.6846846846846847, "grad_norm": 4.058506484572602, "learning_rate": 5e-06, "loss": 0.1059, "num_input_tokens_seen": 447088492, "step": 2603 }, { "epoch": 0.6846846846846847, "loss": 0.043845463544130325, "loss_ce": 0.0001748104114085436, "loss_iou": 0.57421875, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 447088492, "step": 2603 }, { "epoch": 0.6849477214440718, "grad_norm": 4.841823848779041, "learning_rate": 5e-06, "loss": 0.079, "num_input_tokens_seen": 447260652, "step": 2604 }, { "epoch": 0.6849477214440718, "loss": 0.08405909687280655, "loss_ce": 0.004438734147697687, "loss_iou": 0.419921875, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 447260652, "step": 2604 }, { "epoch": 0.6852107582034589, "grad_norm": 9.044057313780108, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 447432936, "step": 2605 }, { "epoch": 0.6852107582034589, "loss": 0.07848000526428223, "loss_ce": 0.0012094933772459626, "loss_iou": 0.63671875, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 447432936, "step": 2605 }, { "epoch": 0.6854737949628461, "grad_norm": 7.595343434642406, "learning_rate": 5e-06, "loss": 0.1109, "num_input_tokens_seen": 447605384, "step": 2606 }, { "epoch": 0.6854737949628461, "loss": 0.12424921244382858, "loss_ce": 0.0026061516255140305, "loss_iou": 0.50390625, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 447605384, "step": 2606 }, { "epoch": 0.6857368317222332, "grad_norm": 5.0763063086969415, "learning_rate": 5e-06, "loss": 0.1387, "num_input_tokens_seen": 447777576, "step": 2607 }, { "epoch": 0.6857368317222332, "loss": 0.11916627734899521, "loss_ce": 0.002253434620797634, "loss_iou": 0.54296875, "loss_num": 0.0234375, "loss_xval": 0.11669921875, "num_input_tokens_seen": 447777576, "step": 2607 }, { "epoch": 0.6859998684816203, "grad_norm": 11.527878652419272, "learning_rate": 5e-06, "loss": 0.0904, "num_input_tokens_seen": 447949852, "step": 2608 }, { "epoch": 0.6859998684816203, "loss": 0.0797332376241684, "loss_ce": 0.0030425682198256254, "loss_iou": 0.5859375, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 447949852, "step": 2608 }, { "epoch": 0.6862629052410074, "grad_norm": 6.796020734872217, "learning_rate": 5e-06, "loss": 0.1006, "num_input_tokens_seen": 448122008, "step": 2609 }, { "epoch": 0.6862629052410074, "loss": 0.09912531077861786, "loss_ce": 0.0006145666702650487, "loss_iou": 0.58203125, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 448122008, "step": 2609 }, { "epoch": 0.6865259420003945, "grad_norm": 53.2300135535103, "learning_rate": 5e-06, "loss": 0.1731, "num_input_tokens_seen": 448294084, "step": 2610 }, { "epoch": 0.6865259420003945, "loss": 0.14799347519874573, "loss_ce": 0.0005325321108102798, "loss_iou": 0.337890625, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 448294084, "step": 2610 }, { "epoch": 0.6867889787597817, "grad_norm": 8.384011107333327, "learning_rate": 5e-06, "loss": 0.1662, "num_input_tokens_seen": 448466064, "step": 2611 }, { "epoch": 0.6867889787597817, "loss": 0.16361187398433685, "loss_ce": 0.0061564212664961815, "loss_iou": 0.40625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 448466064, "step": 2611 }, { "epoch": 0.6870520155191688, "grad_norm": 58.16654404486195, "learning_rate": 5e-06, "loss": 0.1132, "num_input_tokens_seen": 448638396, "step": 2612 }, { "epoch": 0.6870520155191688, "loss": 0.11319980025291443, "loss_ce": 0.008921236731112003, "loss_iou": 0.4609375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 448638396, "step": 2612 }, { "epoch": 0.6873150522785559, "grad_norm": 4.791126855568047, "learning_rate": 5e-06, "loss": 0.0774, "num_input_tokens_seen": 448808820, "step": 2613 }, { "epoch": 0.6873150522785559, "loss": 0.08992376923561096, "loss_ce": 0.0010565832490101457, "loss_iou": 0.6640625, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 448808820, "step": 2613 }, { "epoch": 0.687578089037943, "grad_norm": 4.623158042374579, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 448980968, "step": 2614 }, { "epoch": 0.687578089037943, "loss": 0.10407891124486923, "loss_ce": 0.0005632878746837378, "loss_iou": 0.39453125, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 448980968, "step": 2614 }, { "epoch": 0.6878411257973301, "grad_norm": 16.128815011570154, "learning_rate": 5e-06, "loss": 0.0861, "num_input_tokens_seen": 449153104, "step": 2615 }, { "epoch": 0.6878411257973301, "loss": 0.05162462592124939, "loss_ce": 0.0013011416886001825, "loss_iou": 0.498046875, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 449153104, "step": 2615 }, { "epoch": 0.6881041625567172, "grad_norm": 12.10527552944659, "learning_rate": 5e-06, "loss": 0.0853, "num_input_tokens_seen": 449325208, "step": 2616 }, { "epoch": 0.6881041625567172, "loss": 0.056678079068660736, "loss_ce": 0.0005867721047252417, "loss_iou": 0.515625, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 449325208, "step": 2616 }, { "epoch": 0.6883671993161045, "grad_norm": 4.0544613329310675, "learning_rate": 5e-06, "loss": 0.0917, "num_input_tokens_seen": 449497440, "step": 2617 }, { "epoch": 0.6883671993161045, "loss": 0.12780970335006714, "loss_ce": 0.0006887409836053848, "loss_iou": 0.5234375, "loss_num": 0.0255126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 449497440, "step": 2617 }, { "epoch": 0.6886302360754916, "grad_norm": 6.140781979511808, "learning_rate": 5e-06, "loss": 0.1287, "num_input_tokens_seen": 449669860, "step": 2618 }, { "epoch": 0.6886302360754916, "loss": 0.12178224325180054, "loss_ce": 0.0038012792356312275, "loss_iou": 0.43359375, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 449669860, "step": 2618 }, { "epoch": 0.6888932728348787, "grad_norm": 4.582153125692399, "learning_rate": 5e-06, "loss": 0.1553, "num_input_tokens_seen": 449842236, "step": 2619 }, { "epoch": 0.6888932728348787, "loss": 0.11555735766887665, "loss_ce": 0.003313705325126648, "loss_iou": 0.5703125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 449842236, "step": 2619 }, { "epoch": 0.6891563095942658, "grad_norm": 10.075980623616992, "learning_rate": 5e-06, "loss": 0.1058, "num_input_tokens_seen": 450014444, "step": 2620 }, { "epoch": 0.6891563095942658, "loss": 0.20255392789840698, "loss_ce": 0.004891556687653065, "loss_iou": 0.4765625, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 450014444, "step": 2620 }, { "epoch": 0.6894193463536529, "grad_norm": 16.09024773291766, "learning_rate": 5e-06, "loss": 0.1354, "num_input_tokens_seen": 450186632, "step": 2621 }, { "epoch": 0.6894193463536529, "loss": 0.15711162984371185, "loss_ce": 0.001548278727568686, "loss_iou": 0.5703125, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 450186632, "step": 2621 }, { "epoch": 0.6896823831130401, "grad_norm": 4.077753866376941, "learning_rate": 5e-06, "loss": 0.1379, "num_input_tokens_seen": 450358652, "step": 2622 }, { "epoch": 0.6896823831130401, "loss": 0.17466840147972107, "loss_ce": 0.0030528109055012465, "loss_iou": 0.32421875, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 450358652, "step": 2622 }, { "epoch": 0.6899454198724272, "grad_norm": 5.782402643209976, "learning_rate": 5e-06, "loss": 0.08, "num_input_tokens_seen": 450530992, "step": 2623 }, { "epoch": 0.6899454198724272, "loss": 0.09183860570192337, "loss_ce": 0.000987776555120945, "loss_iou": 0.51171875, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 450530992, "step": 2623 }, { "epoch": 0.6902084566318143, "grad_norm": 4.962596372966127, "learning_rate": 5e-06, "loss": 0.1336, "num_input_tokens_seen": 450702972, "step": 2624 }, { "epoch": 0.6902084566318143, "loss": 0.16587726771831512, "loss_ce": 0.004210404586046934, "loss_iou": 0.34375, "loss_num": 0.0322265625, "loss_xval": 0.162109375, "num_input_tokens_seen": 450702972, "step": 2624 }, { "epoch": 0.6904714933912014, "grad_norm": 4.41836936316546, "learning_rate": 5e-06, "loss": 0.0755, "num_input_tokens_seen": 450871644, "step": 2625 }, { "epoch": 0.6904714933912014, "loss": 0.08390143513679504, "loss_ce": 0.00022223126143217087, "loss_iou": 0.35546875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 450871644, "step": 2625 }, { "epoch": 0.6907345301505885, "grad_norm": 6.24899387116621, "learning_rate": 5e-06, "loss": 0.0997, "num_input_tokens_seen": 451043764, "step": 2626 }, { "epoch": 0.6907345301505885, "loss": 0.07276784628629684, "loss_ce": 0.0041185528971254826, "loss_iou": 0.515625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 451043764, "step": 2626 }, { "epoch": 0.6909975669099757, "grad_norm": 3.855416348391677, "learning_rate": 5e-06, "loss": 0.0774, "num_input_tokens_seen": 451215860, "step": 2627 }, { "epoch": 0.6909975669099757, "loss": 0.0781233012676239, "loss_ce": 0.00033398933010175824, "loss_iou": 0.4921875, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 451215860, "step": 2627 }, { "epoch": 0.6912606036693628, "grad_norm": 9.945431727329952, "learning_rate": 5e-06, "loss": 0.0856, "num_input_tokens_seen": 451387900, "step": 2628 }, { "epoch": 0.6912606036693628, "loss": 0.11348669975996017, "loss_ce": 0.0029215135145932436, "loss_iou": 0.4140625, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 451387900, "step": 2628 }, { "epoch": 0.6915236404287499, "grad_norm": 4.281056455844802, "learning_rate": 5e-06, "loss": 0.0937, "num_input_tokens_seen": 451558132, "step": 2629 }, { "epoch": 0.6915236404287499, "loss": 0.11048734933137894, "loss_ce": 0.00626982469111681, "loss_iou": 0.578125, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 451558132, "step": 2629 }, { "epoch": 0.691786677188137, "grad_norm": 4.21783711884085, "learning_rate": 5e-06, "loss": 0.1156, "num_input_tokens_seen": 451730300, "step": 2630 }, { "epoch": 0.691786677188137, "loss": 0.06679116189479828, "loss_ce": 0.0006290507735684514, "loss_iou": 0.44921875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 451730300, "step": 2630 }, { "epoch": 0.6920497139475241, "grad_norm": 3.6368581797967257, "learning_rate": 5e-06, "loss": 0.0955, "num_input_tokens_seen": 451902464, "step": 2631 }, { "epoch": 0.6920497139475241, "loss": 0.07377897202968597, "loss_ce": 0.002199993235990405, "loss_iou": 0.373046875, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 451902464, "step": 2631 }, { "epoch": 0.6923127507069113, "grad_norm": 4.812739476349578, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 452074432, "step": 2632 }, { "epoch": 0.6923127507069113, "loss": 0.08376286178827286, "loss_ce": 0.0004193550848867744, "loss_iou": 0.482421875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 452074432, "step": 2632 }, { "epoch": 0.6925757874662984, "grad_norm": 17.745450709557872, "learning_rate": 5e-06, "loss": 0.1677, "num_input_tokens_seen": 452246508, "step": 2633 }, { "epoch": 0.6925757874662984, "loss": 0.12932631373405457, "loss_ce": 0.00045057572424411774, "loss_iou": 0.48046875, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 452246508, "step": 2633 }, { "epoch": 0.6928388242256855, "grad_norm": 3.8241822612681404, "learning_rate": 5e-06, "loss": 0.09, "num_input_tokens_seen": 452418684, "step": 2634 }, { "epoch": 0.6928388242256855, "loss": 0.07180548459291458, "loss_ce": 0.000638492638245225, "loss_iou": 0.578125, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 452418684, "step": 2634 }, { "epoch": 0.6931018609850726, "grad_norm": 5.017758815401084, "learning_rate": 5e-06, "loss": 0.1214, "num_input_tokens_seen": 452591112, "step": 2635 }, { "epoch": 0.6931018609850726, "loss": 0.14692719280719757, "loss_ce": 0.001221017329953611, "loss_iou": 0.52734375, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 452591112, "step": 2635 }, { "epoch": 0.6933648977444598, "grad_norm": 4.688279396436893, "learning_rate": 5e-06, "loss": 0.1001, "num_input_tokens_seen": 452763100, "step": 2636 }, { "epoch": 0.6933648977444598, "loss": 0.11540480703115463, "loss_ce": 0.0031611607410013676, "loss_iou": 0.5390625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 452763100, "step": 2636 }, { "epoch": 0.693627934503847, "grad_norm": 5.800484869059427, "learning_rate": 5e-06, "loss": 0.1513, "num_input_tokens_seen": 452932760, "step": 2637 }, { "epoch": 0.693627934503847, "loss": 0.20021981000900269, "loss_ce": 0.002526947297155857, "loss_iou": 0.62890625, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 452932760, "step": 2637 }, { "epoch": 0.6938909712632341, "grad_norm": 6.943406352493944, "learning_rate": 5e-06, "loss": 0.1213, "num_input_tokens_seen": 453104824, "step": 2638 }, { "epoch": 0.6938909712632341, "loss": 0.11978557705879211, "loss_ce": 0.003910328261554241, "loss_iou": 0.51953125, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 453104824, "step": 2638 }, { "epoch": 0.6941540080226212, "grad_norm": 8.392991417214587, "learning_rate": 5e-06, "loss": 0.1027, "num_input_tokens_seen": 453276756, "step": 2639 }, { "epoch": 0.6941540080226212, "loss": 0.14925961196422577, "loss_ce": 0.00796323362737894, "loss_iou": 0.6484375, "loss_num": 0.0281982421875, "loss_xval": 0.1416015625, "num_input_tokens_seen": 453276756, "step": 2639 }, { "epoch": 0.6944170447820083, "grad_norm": 7.152620797713759, "learning_rate": 5e-06, "loss": 0.0825, "num_input_tokens_seen": 453447368, "step": 2640 }, { "epoch": 0.6944170447820083, "loss": 0.12115476280450821, "loss_ce": 0.002624482847750187, "loss_iou": 0.51171875, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 453447368, "step": 2640 }, { "epoch": 0.6946800815413954, "grad_norm": 9.172566148062016, "learning_rate": 5e-06, "loss": 0.1376, "num_input_tokens_seen": 453619160, "step": 2641 }, { "epoch": 0.6946800815413954, "loss": 0.09918803721666336, "loss_ce": 0.0022947255056351423, "loss_iou": 0.53125, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 453619160, "step": 2641 }, { "epoch": 0.6949431183007825, "grad_norm": 4.761554471447507, "learning_rate": 5e-06, "loss": 0.1401, "num_input_tokens_seen": 453791024, "step": 2642 }, { "epoch": 0.6949431183007825, "loss": 0.12317492812871933, "loss_ce": 0.004400510806590319, "loss_iou": 0.5, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 453791024, "step": 2642 }, { "epoch": 0.6952061550601697, "grad_norm": 6.764608063652922, "learning_rate": 5e-06, "loss": 0.0985, "num_input_tokens_seen": 453963268, "step": 2643 }, { "epoch": 0.6952061550601697, "loss": 0.17664819955825806, "loss_ce": 0.0005617668502964079, "loss_iou": 0.5625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 453963268, "step": 2643 }, { "epoch": 0.6954691918195568, "grad_norm": 8.472259366872775, "learning_rate": 5e-06, "loss": 0.1241, "num_input_tokens_seen": 454133580, "step": 2644 }, { "epoch": 0.6954691918195568, "loss": 0.05588904023170471, "loss_ce": 0.0003470498777460307, "loss_iou": 0.52734375, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 454133580, "step": 2644 }, { "epoch": 0.6957322285789439, "grad_norm": 9.45776397859236, "learning_rate": 5e-06, "loss": 0.1535, "num_input_tokens_seen": 454305316, "step": 2645 }, { "epoch": 0.6957322285789439, "loss": 0.11683906614780426, "loss_ce": 0.002215046202763915, "loss_iou": NaN, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 454305316, "step": 2645 }, { "epoch": 0.695995265338331, "grad_norm": 12.21950005723, "learning_rate": 5e-06, "loss": 0.1414, "num_input_tokens_seen": 454477460, "step": 2646 }, { "epoch": 0.695995265338331, "loss": 0.19950228929519653, "loss_ce": 0.00556308263912797, "loss_iou": 0.455078125, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 454477460, "step": 2646 }, { "epoch": 0.6962583020977181, "grad_norm": 4.539781489505394, "learning_rate": 5e-06, "loss": 0.1468, "num_input_tokens_seen": 454649576, "step": 2647 }, { "epoch": 0.6962583020977181, "loss": 0.053970806300640106, "loss_ce": 0.0009465104667469859, "loss_iou": 0.3828125, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 454649576, "step": 2647 }, { "epoch": 0.6965213388571053, "grad_norm": 14.32877869742435, "learning_rate": 5e-06, "loss": 0.1035, "num_input_tokens_seen": 454821436, "step": 2648 }, { "epoch": 0.6965213388571053, "loss": 0.10093516111373901, "loss_ce": 0.00645274156704545, "loss_iou": 0.380859375, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 454821436, "step": 2648 }, { "epoch": 0.6967843756164924, "grad_norm": 7.804241962530211, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 454993508, "step": 2649 }, { "epoch": 0.6967843756164924, "loss": 0.1862741857767105, "loss_ce": 0.0016580985393375158, "loss_iou": 0.53515625, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 454993508, "step": 2649 }, { "epoch": 0.6970474123758795, "grad_norm": 16.275796095386262, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 455165748, "step": 2650 }, { "epoch": 0.6970474123758795, "loss": 0.07620816677808762, "loss_ce": 0.0016232050256803632, "loss_iou": 0.5625, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 455165748, "step": 2650 }, { "epoch": 0.6973104491352666, "grad_norm": 4.357121015686003, "learning_rate": 5e-06, "loss": 0.1124, "num_input_tokens_seen": 455338084, "step": 2651 }, { "epoch": 0.6973104491352666, "loss": 0.13134872913360596, "loss_ce": 0.0034037926234304905, "loss_iou": 0.59765625, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 455338084, "step": 2651 }, { "epoch": 0.6975734858946537, "grad_norm": 5.3899957417482725, "learning_rate": 5e-06, "loss": 0.1252, "num_input_tokens_seen": 455509908, "step": 2652 }, { "epoch": 0.6975734858946537, "loss": 0.14220395684242249, "loss_ce": 0.0009075828129425645, "loss_iou": 0.458984375, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 455509908, "step": 2652 }, { "epoch": 0.697836522654041, "grad_norm": 6.823610210106841, "learning_rate": 5e-06, "loss": 0.1046, "num_input_tokens_seen": 455681908, "step": 2653 }, { "epoch": 0.697836522654041, "loss": 0.05261433497071266, "loss_ce": 0.0018330833408981562, "loss_iou": 0.46484375, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 455681908, "step": 2653 }, { "epoch": 0.698099559413428, "grad_norm": 6.597254450850545, "learning_rate": 5e-06, "loss": 0.1242, "num_input_tokens_seen": 455853952, "step": 2654 }, { "epoch": 0.698099559413428, "loss": 0.147089421749115, "loss_ce": 0.00023882777895778418, "loss_iou": 0.43359375, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 455853952, "step": 2654 }, { "epoch": 0.6983625961728152, "grad_norm": 6.378423815524031, "learning_rate": 5e-06, "loss": 0.1075, "num_input_tokens_seen": 456026208, "step": 2655 }, { "epoch": 0.6983625961728152, "loss": 0.18122422695159912, "loss_ce": 0.0033067562617361546, "loss_iou": 0.57421875, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 456026208, "step": 2655 }, { "epoch": 0.6986256329322023, "grad_norm": 25.554315055519154, "learning_rate": 5e-06, "loss": 0.1181, "num_input_tokens_seen": 456198288, "step": 2656 }, { "epoch": 0.6986256329322023, "loss": 0.10486012697219849, "loss_ce": 0.0011003611143678427, "loss_iou": 0.52734375, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 456198288, "step": 2656 }, { "epoch": 0.6988886696915894, "grad_norm": 4.576359717222489, "learning_rate": 5e-06, "loss": 0.0955, "num_input_tokens_seen": 456365496, "step": 2657 }, { "epoch": 0.6988886696915894, "loss": 0.13455136120319366, "loss_ce": 0.0007317845011129975, "loss_iou": 0.53125, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 456365496, "step": 2657 }, { "epoch": 0.6991517064509766, "grad_norm": 5.013450140740463, "learning_rate": 5e-06, "loss": 0.1199, "num_input_tokens_seen": 456537676, "step": 2658 }, { "epoch": 0.6991517064509766, "loss": 0.08290005475282669, "loss_ce": 0.003767975838854909, "loss_iou": 0.5, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 456537676, "step": 2658 }, { "epoch": 0.6994147432103637, "grad_norm": 4.084720539079834, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 456710364, "step": 2659 }, { "epoch": 0.6994147432103637, "loss": 0.06024003401398659, "loss_ce": 0.0006849807105027139, "loss_iou": 0.5703125, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 456710364, "step": 2659 }, { "epoch": 0.6996777799697508, "grad_norm": 3.7990429726373636, "learning_rate": 5e-06, "loss": 0.1206, "num_input_tokens_seen": 456880760, "step": 2660 }, { "epoch": 0.6996777799697508, "loss": 0.09679127484560013, "loss_ce": 0.0005083205760456622, "loss_iou": 0.482421875, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 456880760, "step": 2660 }, { "epoch": 0.6999408167291379, "grad_norm": 5.055246469854178, "learning_rate": 5e-06, "loss": 0.1654, "num_input_tokens_seen": 457051432, "step": 2661 }, { "epoch": 0.6999408167291379, "loss": 0.21587547659873962, "loss_ce": 0.0004213774227537215, "loss_iou": 0.3671875, "loss_num": 0.04296875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 457051432, "step": 2661 }, { "epoch": 0.700203853488525, "grad_norm": 3.8688477802914774, "learning_rate": 5e-06, "loss": 0.1309, "num_input_tokens_seen": 457223592, "step": 2662 }, { "epoch": 0.700203853488525, "loss": 0.14515820145606995, "loss_ce": 0.0040449099615216255, "loss_iou": 0.4296875, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 457223592, "step": 2662 }, { "epoch": 0.7004668902479122, "grad_norm": 10.967803606252325, "learning_rate": 5e-06, "loss": 0.0963, "num_input_tokens_seen": 457396048, "step": 2663 }, { "epoch": 0.7004668902479122, "loss": 0.13383157551288605, "loss_ce": 0.0020261560566723347, "loss_iou": 0.55078125, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 457396048, "step": 2663 }, { "epoch": 0.7007299270072993, "grad_norm": 3.339673678629444, "learning_rate": 5e-06, "loss": 0.1163, "num_input_tokens_seen": 457566272, "step": 2664 }, { "epoch": 0.7007299270072993, "loss": 0.20303812623023987, "loss_ce": 0.005711473990231752, "loss_iou": 0.515625, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 457566272, "step": 2664 }, { "epoch": 0.7009929637666864, "grad_norm": 10.341407204229276, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 457738384, "step": 2665 }, { "epoch": 0.7009929637666864, "loss": 0.14979971945285797, "loss_ce": 0.0018810234032571316, "loss_iou": 0.46875, "loss_num": 0.0296630859375, "loss_xval": 0.1474609375, "num_input_tokens_seen": 457738384, "step": 2665 }, { "epoch": 0.7012560005260735, "grad_norm": 3.460962528007649, "learning_rate": 5e-06, "loss": 0.1216, "num_input_tokens_seen": 457910472, "step": 2666 }, { "epoch": 0.7012560005260735, "loss": 0.12169472873210907, "loss_ce": 0.004110502544790506, "loss_iou": 0.5859375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 457910472, "step": 2666 }, { "epoch": 0.7015190372854606, "grad_norm": 5.718591075026739, "learning_rate": 5e-06, "loss": 0.1401, "num_input_tokens_seen": 458082324, "step": 2667 }, { "epoch": 0.7015190372854606, "loss": 0.1776396483182907, "loss_ce": 0.0005613988032564521, "loss_iou": 0.359375, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 458082324, "step": 2667 }, { "epoch": 0.7017820740448477, "grad_norm": 5.655030135877534, "learning_rate": 5e-06, "loss": 0.1106, "num_input_tokens_seen": 458254304, "step": 2668 }, { "epoch": 0.7017820740448477, "loss": 0.12232168763875961, "loss_ce": 0.0004344757762737572, "loss_iou": 0.375, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 458254304, "step": 2668 }, { "epoch": 0.7020451108042349, "grad_norm": 5.899988129903735, "learning_rate": 5e-06, "loss": 0.0895, "num_input_tokens_seen": 458426532, "step": 2669 }, { "epoch": 0.7020451108042349, "loss": 0.09821672737598419, "loss_ce": 0.0016896221786737442, "loss_iou": 0.54296875, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 458426532, "step": 2669 }, { "epoch": 0.702308147563622, "grad_norm": 5.5784599020012715, "learning_rate": 5e-06, "loss": 0.1056, "num_input_tokens_seen": 458596896, "step": 2670 }, { "epoch": 0.702308147563622, "loss": 0.16502083837985992, "loss_ce": 0.0003174682497046888, "loss_iou": 0.3984375, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 458596896, "step": 2670 }, { "epoch": 0.7025711843230091, "grad_norm": 14.038116606042868, "learning_rate": 5e-06, "loss": 0.1154, "num_input_tokens_seen": 458769240, "step": 2671 }, { "epoch": 0.7025711843230091, "loss": 0.15062229335308075, "loss_ce": 0.0042905076406896114, "loss_iou": 0.53125, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 458769240, "step": 2671 }, { "epoch": 0.7028342210823962, "grad_norm": 4.120047179172289, "learning_rate": 5e-06, "loss": 0.0767, "num_input_tokens_seen": 458941376, "step": 2672 }, { "epoch": 0.7028342210823962, "loss": 0.08248385787010193, "loss_ce": 0.0018869286868721247, "loss_iou": 0.46875, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 458941376, "step": 2672 }, { "epoch": 0.7030972578417833, "grad_norm": 4.682779609797298, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 459113732, "step": 2673 }, { "epoch": 0.7030972578417833, "loss": 0.08461406081914902, "loss_ce": 0.0013010749826207757, "loss_iou": 0.515625, "loss_num": 0.0166015625, "loss_xval": 0.08349609375, "num_input_tokens_seen": 459113732, "step": 2673 }, { "epoch": 0.7033602946011706, "grad_norm": 6.053537160791169, "learning_rate": 5e-06, "loss": 0.1242, "num_input_tokens_seen": 459283668, "step": 2674 }, { "epoch": 0.7033602946011706, "loss": 0.21815051138401031, "loss_ce": 0.0011094921501353383, "loss_iou": 0.419921875, "loss_num": 0.04345703125, "loss_xval": 0.216796875, "num_input_tokens_seen": 459283668, "step": 2674 }, { "epoch": 0.7036233313605577, "grad_norm": 3.9631091480761715, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 459455956, "step": 2675 }, { "epoch": 0.7036233313605577, "loss": 0.13024334609508514, "loss_ce": 0.00025372387608513236, "loss_iou": 0.5625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 459455956, "step": 2675 }, { "epoch": 0.7038863681199448, "grad_norm": 5.129114238432721, "learning_rate": 5e-06, "loss": 0.137, "num_input_tokens_seen": 459627968, "step": 2676 }, { "epoch": 0.7038863681199448, "loss": 0.08658237755298615, "loss_ce": 0.002750593703240156, "loss_iou": 0.46484375, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 459627968, "step": 2676 }, { "epoch": 0.7041494048793319, "grad_norm": 20.187762197991425, "learning_rate": 5e-06, "loss": 0.124, "num_input_tokens_seen": 459800260, "step": 2677 }, { "epoch": 0.7041494048793319, "loss": 0.08818955719470978, "loss_ce": 0.0012297153007239103, "loss_iou": 0.53125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 459800260, "step": 2677 }, { "epoch": 0.704412441638719, "grad_norm": 7.902489579853604, "learning_rate": 5e-06, "loss": 0.1564, "num_input_tokens_seen": 459971088, "step": 2678 }, { "epoch": 0.704412441638719, "loss": 0.12185804545879364, "loss_ce": 0.00024549951194785535, "loss_iou": 0.3984375, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 459971088, "step": 2678 }, { "epoch": 0.7046754783981062, "grad_norm": 27.40955645070263, "learning_rate": 5e-06, "loss": 0.0666, "num_input_tokens_seen": 460143240, "step": 2679 }, { "epoch": 0.7046754783981062, "loss": 0.06965695321559906, "loss_ce": 0.0006261939415708184, "loss_iou": 0.63671875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 460143240, "step": 2679 }, { "epoch": 0.7049385151574933, "grad_norm": 8.476577616174554, "learning_rate": 5e-06, "loss": 0.1372, "num_input_tokens_seen": 460315544, "step": 2680 }, { "epoch": 0.7049385151574933, "loss": 0.14567114412784576, "loss_ce": 0.0017502475529909134, "loss_iou": 0.515625, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 460315544, "step": 2680 }, { "epoch": 0.7052015519168804, "grad_norm": 11.423647115492507, "learning_rate": 5e-06, "loss": 0.1329, "num_input_tokens_seen": 460487612, "step": 2681 }, { "epoch": 0.7052015519168804, "loss": 0.09613794833421707, "loss_ce": 0.002235355554148555, "loss_iou": 0.44921875, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 460487612, "step": 2681 }, { "epoch": 0.7054645886762675, "grad_norm": 13.90990204859974, "learning_rate": 5e-06, "loss": 0.1238, "num_input_tokens_seen": 460655964, "step": 2682 }, { "epoch": 0.7054645886762675, "loss": 0.06838610768318176, "loss_ce": 0.00024035980459302664, "loss_iou": 0.49609375, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 460655964, "step": 2682 }, { "epoch": 0.7057276254356546, "grad_norm": 14.957706126877364, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 460827816, "step": 2683 }, { "epoch": 0.7057276254356546, "loss": 0.1538136899471283, "loss_ce": 0.0006154490984044969, "loss_iou": 0.4609375, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 460827816, "step": 2683 }, { "epoch": 0.7059906621950418, "grad_norm": 6.337066775015052, "learning_rate": 5e-06, "loss": 0.1216, "num_input_tokens_seen": 461000128, "step": 2684 }, { "epoch": 0.7059906621950418, "loss": 0.18371257185935974, "loss_ce": 0.0005155415856279433, "loss_iou": 0.57421875, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 461000128, "step": 2684 }, { "epoch": 0.7062536989544289, "grad_norm": 4.77556589512149, "learning_rate": 5e-06, "loss": 0.0936, "num_input_tokens_seen": 461172480, "step": 2685 }, { "epoch": 0.7062536989544289, "loss": 0.06664656102657318, "loss_ce": 0.001003247918561101, "loss_iou": 0.55859375, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 461172480, "step": 2685 }, { "epoch": 0.706516735713816, "grad_norm": 4.934194738374163, "learning_rate": 5e-06, "loss": 0.14, "num_input_tokens_seen": 461344704, "step": 2686 }, { "epoch": 0.706516735713816, "loss": 0.07681739330291748, "loss_ce": 0.002476579276844859, "loss_iou": 0.56640625, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 461344704, "step": 2686 }, { "epoch": 0.7067797724732031, "grad_norm": 5.14148867298625, "learning_rate": 5e-06, "loss": 0.0942, "num_input_tokens_seen": 461517244, "step": 2687 }, { "epoch": 0.7067797724732031, "loss": 0.08841484785079956, "loss_ce": 9.696922643342987e-05, "loss_iou": 0.62890625, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 461517244, "step": 2687 }, { "epoch": 0.7070428092325902, "grad_norm": 4.290637783097341, "learning_rate": 5e-06, "loss": 0.1026, "num_input_tokens_seen": 461689392, "step": 2688 }, { "epoch": 0.7070428092325902, "loss": 0.12317334860563278, "loss_ce": 0.0057722218334674835, "loss_iou": 0.484375, "loss_num": 0.0235595703125, "loss_xval": 0.1171875, "num_input_tokens_seen": 461689392, "step": 2688 }, { "epoch": 0.7073058459919774, "grad_norm": 18.724820792441665, "learning_rate": 5e-06, "loss": 0.1002, "num_input_tokens_seen": 461861916, "step": 2689 }, { "epoch": 0.7073058459919774, "loss": 0.05893798917531967, "loss_ce": 0.00014587045006919652, "loss_iou": 0.59375, "loss_num": 0.01171875, "loss_xval": 0.058837890625, "num_input_tokens_seen": 461861916, "step": 2689 }, { "epoch": 0.7075688827513645, "grad_norm": 6.318558285374775, "learning_rate": 5e-06, "loss": 0.0952, "num_input_tokens_seen": 462033832, "step": 2690 }, { "epoch": 0.7075688827513645, "loss": 0.129222571849823, "loss_ce": 0.00034684882848523557, "loss_iou": 0.396484375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 462033832, "step": 2690 }, { "epoch": 0.7078319195107516, "grad_norm": 5.104970581967641, "learning_rate": 5e-06, "loss": 0.0907, "num_input_tokens_seen": 462206008, "step": 2691 }, { "epoch": 0.7078319195107516, "loss": 0.06862036883831024, "loss_ce": 0.00010840100003406405, "loss_iou": 0.5390625, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 462206008, "step": 2691 }, { "epoch": 0.7080949562701387, "grad_norm": 7.0445063257027885, "learning_rate": 5e-06, "loss": 0.1429, "num_input_tokens_seen": 462377836, "step": 2692 }, { "epoch": 0.7080949562701387, "loss": 0.16992726922035217, "loss_ce": 0.014318134635686874, "loss_iou": 0.478515625, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 462377836, "step": 2692 }, { "epoch": 0.7083579930295258, "grad_norm": 3.568328460902499, "learning_rate": 5e-06, "loss": 0.0725, "num_input_tokens_seen": 462550184, "step": 2693 }, { "epoch": 0.7083579930295258, "loss": 0.058605365455150604, "loss_ce": 0.0025903512723743916, "loss_iou": 0.48046875, "loss_num": 0.01123046875, "loss_xval": 0.055908203125, "num_input_tokens_seen": 462550184, "step": 2693 }, { "epoch": 0.708621029788913, "grad_norm": 16.451151717262515, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 462722228, "step": 2694 }, { "epoch": 0.708621029788913, "loss": 0.15793108940124512, "loss_ce": 0.0002467722224537283, "loss_iou": 0.380859375, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 462722228, "step": 2694 }, { "epoch": 0.7088840665483002, "grad_norm": 18.144741325116385, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 462892644, "step": 2695 }, { "epoch": 0.7088840665483002, "loss": 0.18394407629966736, "loss_ce": 0.005751936696469784, "loss_iou": 0.353515625, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 462892644, "step": 2695 }, { "epoch": 0.7091471033076873, "grad_norm": 4.177749037836763, "learning_rate": 5e-06, "loss": 0.0915, "num_input_tokens_seen": 463059460, "step": 2696 }, { "epoch": 0.7091471033076873, "loss": 0.08263557404279709, "loss_ce": 0.0037171156145632267, "loss_iou": 0.330078125, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 463059460, "step": 2696 }, { "epoch": 0.7094101400670744, "grad_norm": 24.286466900686108, "learning_rate": 5e-06, "loss": 0.1303, "num_input_tokens_seen": 463231680, "step": 2697 }, { "epoch": 0.7094101400670744, "loss": 0.1826072484254837, "loss_ce": 0.005239082965999842, "loss_iou": 0.40625, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 463231680, "step": 2697 }, { "epoch": 0.7096731768264615, "grad_norm": 3.90367546541105, "learning_rate": 5e-06, "loss": 0.1316, "num_input_tokens_seen": 463403632, "step": 2698 }, { "epoch": 0.7096731768264615, "loss": 0.19678181409835815, "loss_ce": 0.0011641355231404305, "loss_iou": NaN, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 463403632, "step": 2698 }, { "epoch": 0.7099362135858486, "grad_norm": 4.6638342162135, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 463575764, "step": 2699 }, { "epoch": 0.7099362135858486, "loss": 0.05382794886827469, "loss_ce": 0.0004832194827031344, "loss_iou": 0.6328125, "loss_num": 0.01068115234375, "loss_xval": 0.05322265625, "num_input_tokens_seen": 463575764, "step": 2699 }, { "epoch": 0.7101992503452358, "grad_norm": 10.964970610276819, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 463747960, "step": 2700 }, { "epoch": 0.7101992503452358, "loss": 0.10617360472679138, "loss_ce": 0.002444351091980934, "loss_iou": 0.494140625, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 463747960, "step": 2700 }, { "epoch": 0.7104622871046229, "grad_norm": 4.313980855592432, "learning_rate": 5e-06, "loss": 0.1045, "num_input_tokens_seen": 463920292, "step": 2701 }, { "epoch": 0.7104622871046229, "loss": 0.06053323671221733, "loss_ce": 0.0002305020607309416, "loss_iou": 0.66015625, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 463920292, "step": 2701 }, { "epoch": 0.71072532386401, "grad_norm": 13.238355947380045, "learning_rate": 5e-06, "loss": 0.1614, "num_input_tokens_seen": 464092396, "step": 2702 }, { "epoch": 0.71072532386401, "loss": 0.09292985498905182, "loss_ce": 0.002384199295192957, "loss_iou": 0.39453125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 464092396, "step": 2702 }, { "epoch": 0.7109883606233971, "grad_norm": 3.301662541659157, "learning_rate": 5e-06, "loss": 0.085, "num_input_tokens_seen": 464264724, "step": 2703 }, { "epoch": 0.7109883606233971, "loss": 0.05558721721172333, "loss_ce": 7.5738578743767e-05, "loss_iou": 0.55078125, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 464264724, "step": 2703 }, { "epoch": 0.7112513973827842, "grad_norm": 3.9814210468792854, "learning_rate": 5e-06, "loss": 0.084, "num_input_tokens_seen": 464436668, "step": 2704 }, { "epoch": 0.7112513973827842, "loss": 0.0715593621134758, "loss_ce": 0.004069737158715725, "loss_iou": 0.494140625, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 464436668, "step": 2704 }, { "epoch": 0.7115144341421714, "grad_norm": 24.903703628485385, "learning_rate": 5e-06, "loss": 0.1558, "num_input_tokens_seen": 464607064, "step": 2705 }, { "epoch": 0.7115144341421714, "loss": 0.13525709509849548, "loss_ce": 0.0011323521612212062, "loss_iou": 0.5703125, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 464607064, "step": 2705 }, { "epoch": 0.7117774709015585, "grad_norm": 8.733454229975411, "learning_rate": 5e-06, "loss": 0.099, "num_input_tokens_seen": 464779372, "step": 2706 }, { "epoch": 0.7117774709015585, "loss": 0.1117292046546936, "loss_ce": 0.002689904533326626, "loss_iou": 0.44140625, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 464779372, "step": 2706 }, { "epoch": 0.7120405076609456, "grad_norm": 4.906686826576695, "learning_rate": 5e-06, "loss": 0.1105, "num_input_tokens_seen": 464951196, "step": 2707 }, { "epoch": 0.7120405076609456, "loss": 0.09186941385269165, "loss_ce": 0.00013357413990888745, "loss_iou": 0.6171875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 464951196, "step": 2707 }, { "epoch": 0.7123035444203327, "grad_norm": 3.5122712023777214, "learning_rate": 5e-06, "loss": 0.1182, "num_input_tokens_seen": 465119688, "step": 2708 }, { "epoch": 0.7123035444203327, "loss": 0.17270034551620483, "loss_ce": 0.004762125201523304, "loss_iou": 0.3671875, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 465119688, "step": 2708 }, { "epoch": 0.7125665811797198, "grad_norm": 4.667301843217583, "learning_rate": 5e-06, "loss": 0.1511, "num_input_tokens_seen": 465291884, "step": 2709 }, { "epoch": 0.7125665811797198, "loss": 0.24567091464996338, "loss_ce": 0.0022016754373908043, "loss_iou": 0.5703125, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 465291884, "step": 2709 }, { "epoch": 0.712829617939107, "grad_norm": 4.677081009771049, "learning_rate": 5e-06, "loss": 0.0924, "num_input_tokens_seen": 465463732, "step": 2710 }, { "epoch": 0.712829617939107, "loss": 0.10853127390146255, "loss_ce": 0.0006211129948496819, "loss_iou": 0.546875, "loss_num": 0.021484375, "loss_xval": 0.10791015625, "num_input_tokens_seen": 465463732, "step": 2710 }, { "epoch": 0.7130926546984941, "grad_norm": 9.121056040645339, "learning_rate": 5e-06, "loss": 0.0814, "num_input_tokens_seen": 465635964, "step": 2711 }, { "epoch": 0.7130926546984941, "loss": 0.05089259892702103, "loss_ce": 0.0005080772680230439, "loss_iou": 0.4453125, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 465635964, "step": 2711 }, { "epoch": 0.7133556914578812, "grad_norm": 5.554084070604222, "learning_rate": 5e-06, "loss": 0.1028, "num_input_tokens_seen": 465808444, "step": 2712 }, { "epoch": 0.7133556914578812, "loss": 0.10150805115699768, "loss_ce": 0.0013188383309170604, "loss_iou": 0.51171875, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 465808444, "step": 2712 }, { "epoch": 0.7136187282172683, "grad_norm": 7.721557509382751, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 465980716, "step": 2713 }, { "epoch": 0.7136187282172683, "loss": 0.1155381053686142, "loss_ce": 0.0032639428973197937, "loss_iou": 0.52734375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 465980716, "step": 2713 }, { "epoch": 0.7138817649766555, "grad_norm": 4.213278179953207, "learning_rate": 5e-06, "loss": 0.1475, "num_input_tokens_seen": 466153208, "step": 2714 }, { "epoch": 0.7138817649766555, "loss": 0.1079680472612381, "loss_ce": 0.002407738706097007, "loss_iou": 0.390625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 466153208, "step": 2714 }, { "epoch": 0.7141448017360427, "grad_norm": 4.8611508745546725, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 466325716, "step": 2715 }, { "epoch": 0.7141448017360427, "loss": 0.07399383187294006, "loss_ce": 0.0005075072403997183, "loss_iou": 0.4765625, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 466325716, "step": 2715 }, { "epoch": 0.7144078384954298, "grad_norm": 34.00071276149313, "learning_rate": 5e-06, "loss": 0.1836, "num_input_tokens_seen": 466497816, "step": 2716 }, { "epoch": 0.7144078384954298, "loss": 0.11154329776763916, "loss_ce": 0.00032198382541537285, "loss_iou": 0.435546875, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 466497816, "step": 2716 }, { "epoch": 0.7146708752548169, "grad_norm": 4.289011543265615, "learning_rate": 5e-06, "loss": 0.1322, "num_input_tokens_seen": 466666960, "step": 2717 }, { "epoch": 0.7146708752548169, "loss": 0.10135161876678467, "loss_ce": 0.000498652458190918, "loss_iou": 0.4921875, "loss_num": 0.0201416015625, "loss_xval": 0.10107421875, "num_input_tokens_seen": 466666960, "step": 2717 }, { "epoch": 0.714933912014204, "grad_norm": 5.080575214282277, "learning_rate": 5e-06, "loss": 0.0789, "num_input_tokens_seen": 466839356, "step": 2718 }, { "epoch": 0.714933912014204, "loss": 0.04582955688238144, "loss_ce": 9.896683332044631e-05, "loss_iou": 0.51953125, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 466839356, "step": 2718 }, { "epoch": 0.7151969487735911, "grad_norm": 5.237294473459814, "learning_rate": 5e-06, "loss": 0.1367, "num_input_tokens_seen": 467011304, "step": 2719 }, { "epoch": 0.7151969487735911, "loss": 0.0945780873298645, "loss_ce": 0.0005534276133403182, "loss_iou": 0.72265625, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 467011304, "step": 2719 }, { "epoch": 0.7154599855329782, "grad_norm": 3.135766341307675, "learning_rate": 5e-06, "loss": 0.0941, "num_input_tokens_seen": 467180536, "step": 2720 }, { "epoch": 0.7154599855329782, "loss": 0.042348556220531464, "loss_ce": 0.0003716244827955961, "loss_iou": 0.3984375, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 467180536, "step": 2720 }, { "epoch": 0.7157230222923654, "grad_norm": 3.5558668353050815, "learning_rate": 5e-06, "loss": 0.1062, "num_input_tokens_seen": 467352776, "step": 2721 }, { "epoch": 0.7157230222923654, "loss": 0.175160214304924, "loss_ce": 0.0027969309594482183, "loss_iou": 0.40234375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 467352776, "step": 2721 }, { "epoch": 0.7159860590517525, "grad_norm": 27.92814548048443, "learning_rate": 5e-06, "loss": 0.1623, "num_input_tokens_seen": 467524800, "step": 2722 }, { "epoch": 0.7159860590517525, "loss": 0.11873021721839905, "loss_ce": 0.004441884811967611, "loss_iou": 0.5078125, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 467524800, "step": 2722 }, { "epoch": 0.7162490958111396, "grad_norm": 4.250603227803627, "learning_rate": 5e-06, "loss": 0.0898, "num_input_tokens_seen": 467693664, "step": 2723 }, { "epoch": 0.7162490958111396, "loss": 0.07381434738636017, "loss_ce": 0.0007857821765355766, "loss_iou": 0.66796875, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 467693664, "step": 2723 }, { "epoch": 0.7165121325705267, "grad_norm": 29.613017586366666, "learning_rate": 5e-06, "loss": 0.1783, "num_input_tokens_seen": 467865648, "step": 2724 }, { "epoch": 0.7165121325705267, "loss": 0.06583578884601593, "loss_ce": 0.0012300718808546662, "loss_iou": 0.5078125, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 467865648, "step": 2724 }, { "epoch": 0.7167751693299138, "grad_norm": 9.322275949588843, "learning_rate": 5e-06, "loss": 0.1177, "num_input_tokens_seen": 468035300, "step": 2725 }, { "epoch": 0.7167751693299138, "loss": 0.11070144176483154, "loss_ce": 0.00022780938888899982, "loss_iou": 0.609375, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 468035300, "step": 2725 }, { "epoch": 0.717038206089301, "grad_norm": 7.146952822401602, "learning_rate": 5e-06, "loss": 0.1409, "num_input_tokens_seen": 468207524, "step": 2726 }, { "epoch": 0.717038206089301, "loss": 0.11441102623939514, "loss_ce": 0.0002447651932016015, "loss_iou": 0.6015625, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 468207524, "step": 2726 }, { "epoch": 0.7173012428486881, "grad_norm": 11.196754628011416, "learning_rate": 5e-06, "loss": 0.1409, "num_input_tokens_seen": 468379608, "step": 2727 }, { "epoch": 0.7173012428486881, "loss": 0.16638273000717163, "loss_ce": 0.005860275588929653, "loss_iou": 0.4609375, "loss_num": 0.0322265625, "loss_xval": 0.16015625, "num_input_tokens_seen": 468379608, "step": 2727 }, { "epoch": 0.7175642796080752, "grad_norm": 10.776354846598704, "learning_rate": 5e-06, "loss": 0.1076, "num_input_tokens_seen": 468551976, "step": 2728 }, { "epoch": 0.7175642796080752, "loss": 0.11593279242515564, "loss_ce": 0.002559985499829054, "loss_iou": 0.5625, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 468551976, "step": 2728 }, { "epoch": 0.7178273163674623, "grad_norm": 5.353937326922854, "learning_rate": 5e-06, "loss": 0.1075, "num_input_tokens_seen": 468724232, "step": 2729 }, { "epoch": 0.7178273163674623, "loss": 0.09276724606752396, "loss_ce": 0.00023794792650733143, "loss_iou": 0.51171875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 468724232, "step": 2729 }, { "epoch": 0.7180903531268494, "grad_norm": 4.435607019386441, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 468896348, "step": 2730 }, { "epoch": 0.7180903531268494, "loss": 0.14431005716323853, "loss_ce": 0.001487781759351492, "loss_iou": 0.41796875, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 468896348, "step": 2730 }, { "epoch": 0.7183533898862366, "grad_norm": 5.860440789547134, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 469068240, "step": 2731 }, { "epoch": 0.7183533898862366, "loss": 0.11086121201515198, "loss_ce": 0.003713999642059207, "loss_iou": 0.5, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 469068240, "step": 2731 }, { "epoch": 0.7186164266456238, "grad_norm": 11.64009264254108, "learning_rate": 5e-06, "loss": 0.1218, "num_input_tokens_seen": 469238748, "step": 2732 }, { "epoch": 0.7186164266456238, "loss": 0.14171399176120758, "loss_ce": 0.0019129666034132242, "loss_iou": NaN, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 469238748, "step": 2732 }, { "epoch": 0.7188794634050109, "grad_norm": 19.26931594616413, "learning_rate": 5e-06, "loss": 0.1369, "num_input_tokens_seen": 469409356, "step": 2733 }, { "epoch": 0.7188794634050109, "loss": 0.16873466968536377, "loss_ce": 0.0015593739226460457, "loss_iou": 0.51953125, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 469409356, "step": 2733 }, { "epoch": 0.719142500164398, "grad_norm": 39.88591044746162, "learning_rate": 5e-06, "loss": 0.1402, "num_input_tokens_seen": 469581536, "step": 2734 }, { "epoch": 0.719142500164398, "loss": 0.13763278722763062, "loss_ce": 0.0018295738846063614, "loss_iou": 0.55078125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 469581536, "step": 2734 }, { "epoch": 0.7194055369237851, "grad_norm": 5.070448909039763, "learning_rate": 5e-06, "loss": 0.0973, "num_input_tokens_seen": 469753888, "step": 2735 }, { "epoch": 0.7194055369237851, "loss": 0.11928269267082214, "loss_ce": 0.0068864524364471436, "loss_iou": 0.578125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 469753888, "step": 2735 }, { "epoch": 0.7196685736831723, "grad_norm": 4.73412496708124, "learning_rate": 5e-06, "loss": 0.0816, "num_input_tokens_seen": 469926224, "step": 2736 }, { "epoch": 0.7196685736831723, "loss": 0.10194897651672363, "loss_ce": 0.000539067608769983, "loss_iou": 0.43359375, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 469926224, "step": 2736 }, { "epoch": 0.7199316104425594, "grad_norm": 14.69635519193442, "learning_rate": 5e-06, "loss": 0.1327, "num_input_tokens_seen": 470096688, "step": 2737 }, { "epoch": 0.7199316104425594, "loss": 0.14469808340072632, "loss_ce": 0.0047444626688957214, "loss_iou": 0.671875, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 470096688, "step": 2737 }, { "epoch": 0.7201946472019465, "grad_norm": 5.497552383274807, "learning_rate": 5e-06, "loss": 0.1507, "num_input_tokens_seen": 470268692, "step": 2738 }, { "epoch": 0.7201946472019465, "loss": 0.07712777704000473, "loss_ce": 0.0003760677354875952, "loss_iou": 0.51953125, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 470268692, "step": 2738 }, { "epoch": 0.7204576839613336, "grad_norm": 4.073372808076834, "learning_rate": 5e-06, "loss": 0.1027, "num_input_tokens_seen": 470440752, "step": 2739 }, { "epoch": 0.7204576839613336, "loss": 0.1480931043624878, "loss_ce": 0.001303559634834528, "loss_iou": 0.5546875, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 470440752, "step": 2739 }, { "epoch": 0.7207207207207207, "grad_norm": 5.675528284561051, "learning_rate": 5e-06, "loss": 0.0967, "num_input_tokens_seen": 470612568, "step": 2740 }, { "epoch": 0.7207207207207207, "loss": 0.07299304753541946, "loss_ce": 0.004481084644794464, "loss_iou": 0.625, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 470612568, "step": 2740 }, { "epoch": 0.7209837574801078, "grad_norm": 20.879698464276387, "learning_rate": 5e-06, "loss": 0.0953, "num_input_tokens_seen": 470784724, "step": 2741 }, { "epoch": 0.7209837574801078, "loss": 0.1316141039133072, "loss_ce": 0.001822838094085455, "loss_iou": 0.302734375, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 470784724, "step": 2741 }, { "epoch": 0.721246794239495, "grad_norm": 10.194443634028673, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 470957128, "step": 2742 }, { "epoch": 0.721246794239495, "loss": 0.2153215855360031, "loss_ce": 0.0009050846565514803, "loss_iou": 0.3984375, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 470957128, "step": 2742 }, { "epoch": 0.7215098309988821, "grad_norm": 4.046503425790058, "learning_rate": 5e-06, "loss": 0.1402, "num_input_tokens_seen": 471129076, "step": 2743 }, { "epoch": 0.7215098309988821, "loss": 0.17502932250499725, "loss_ce": 0.005137969274073839, "loss_iou": NaN, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 471129076, "step": 2743 }, { "epoch": 0.7217728677582692, "grad_norm": 16.718028178353897, "learning_rate": 5e-06, "loss": 0.1646, "num_input_tokens_seen": 471301596, "step": 2744 }, { "epoch": 0.7217728677582692, "loss": 0.1273680329322815, "loss_ce": 0.0013304388849064708, "loss_iou": 0.5078125, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 471301596, "step": 2744 }, { "epoch": 0.7220359045176563, "grad_norm": 19.351969856870078, "learning_rate": 5e-06, "loss": 0.1174, "num_input_tokens_seen": 471473704, "step": 2745 }, { "epoch": 0.7220359045176563, "loss": 0.08187679946422577, "loss_ce": 4.3911892134929076e-05, "loss_iou": 0.439453125, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 471473704, "step": 2745 }, { "epoch": 0.7222989412770434, "grad_norm": 3.5611349833279236, "learning_rate": 5e-06, "loss": 0.0947, "num_input_tokens_seen": 471644084, "step": 2746 }, { "epoch": 0.7222989412770434, "loss": 0.04295755550265312, "loss_ce": 0.00018716827617026865, "loss_iou": 0.48828125, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 471644084, "step": 2746 }, { "epoch": 0.7225619780364306, "grad_norm": 4.500519410155881, "learning_rate": 5e-06, "loss": 0.1031, "num_input_tokens_seen": 471814452, "step": 2747 }, { "epoch": 0.7225619780364306, "loss": 0.15782231092453003, "loss_ce": 0.0029913773760199547, "loss_iou": 0.48046875, "loss_num": 0.0308837890625, "loss_xval": 0.1552734375, "num_input_tokens_seen": 471814452, "step": 2747 }, { "epoch": 0.7228250147958177, "grad_norm": 5.3088355134433645, "learning_rate": 5e-06, "loss": 0.1564, "num_input_tokens_seen": 471980364, "step": 2748 }, { "epoch": 0.7228250147958177, "loss": 0.16449454426765442, "loss_ce": 0.0011339561315253377, "loss_iou": 0.42578125, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 471980364, "step": 2748 }, { "epoch": 0.7230880515552048, "grad_norm": 8.162974487273814, "learning_rate": 5e-06, "loss": 0.1117, "num_input_tokens_seen": 472152464, "step": 2749 }, { "epoch": 0.7230880515552048, "loss": 0.08619838953018188, "loss_ce": 0.0012984833447262645, "loss_iou": 0.5234375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 472152464, "step": 2749 }, { "epoch": 0.7233510883145919, "grad_norm": 6.354241375539321, "learning_rate": 5e-06, "loss": 0.0919, "num_input_tokens_seen": 472324756, "step": 2750 }, { "epoch": 0.7233510883145919, "eval_websight_new_CIoU": 0.8880318701267242, "eval_websight_new_GIoU": 0.8903599679470062, "eval_websight_new_IoU": 0.8917953372001648, "eval_websight_new_MAE_all": 0.014968848787248135, "eval_websight_new_MAE_h": 0.008249826729297638, "eval_websight_new_MAE_w": 0.021866537630558014, "eval_websight_new_MAE_x": 0.02344994805753231, "eval_websight_new_MAE_y": 0.006309080636128783, "eval_websight_new_NUM_probability": 0.9999924898147583, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.0760061964392662, "eval_websight_new_loss_ce": 5.840479570906609e-05, "eval_websight_new_loss_iou": 0.35406494140625, "eval_websight_new_loss_num": 0.013774871826171875, "eval_websight_new_loss_xval": 0.0689239501953125, "eval_websight_new_runtime": 59.6817, "eval_websight_new_samples_per_second": 0.838, "eval_websight_new_steps_per_second": 0.034, "num_input_tokens_seen": 472324756, "step": 2750 }, { "epoch": 0.7233510883145919, "eval_seeclick_CIoU": 0.6159887313842773, "eval_seeclick_GIoU": 0.6205049157142639, "eval_seeclick_IoU": 0.6426993608474731, "eval_seeclick_MAE_all": 0.048651453107595444, "eval_seeclick_MAE_h": 0.030230149626731873, "eval_seeclick_MAE_w": 0.06347078271210194, "eval_seeclick_MAE_x": 0.0767427384853363, "eval_seeclick_MAE_y": 0.024162148125469685, "eval_seeclick_NUM_probability": 0.9999720454216003, "eval_seeclick_inside_bbox": 0.8920454680919647, "eval_seeclick_loss": 0.22562426328659058, "eval_seeclick_loss_ce": 0.008924027904868126, "eval_seeclick_loss_iou": 0.5213623046875, "eval_seeclick_loss_num": 0.04332733154296875, "eval_seeclick_loss_xval": 0.2166900634765625, "eval_seeclick_runtime": 76.0092, "eval_seeclick_samples_per_second": 0.566, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 472324756, "step": 2750 }, { "epoch": 0.7233510883145919, "eval_icons_CIoU": 0.8715368807315826, "eval_icons_GIoU": 0.8687921464443207, "eval_icons_IoU": 0.8755140602588654, "eval_icons_MAE_all": 0.015967791434377432, "eval_icons_MAE_h": 0.016439005732536316, "eval_icons_MAE_w": 0.015302729327231646, "eval_icons_MAE_x": 0.015842752531170845, "eval_icons_MAE_y": 0.016286680474877357, "eval_icons_NUM_probability": 0.9999882578849792, "eval_icons_inside_bbox": 1.0, "eval_icons_loss": 0.057806555181741714, "eval_icons_loss_ce": 1.1600203379202867e-05, "eval_icons_loss_iou": 0.617919921875, "eval_icons_loss_num": 0.010974884033203125, "eval_icons_loss_xval": 0.0548553466796875, "eval_icons_runtime": 81.9252, "eval_icons_samples_per_second": 0.61, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 472324756, "step": 2750 }, { "epoch": 0.7233510883145919, "eval_screenspot_CIoU": 0.5503915150960287, "eval_screenspot_GIoU": 0.5448275804519653, "eval_screenspot_IoU": 0.5854077339172363, "eval_screenspot_MAE_all": 0.0787569632132848, "eval_screenspot_MAE_h": 0.05612564583619436, "eval_screenspot_MAE_w": 0.1299388830860456, "eval_screenspot_MAE_x": 0.07724836965401967, "eval_screenspot_MAE_y": 0.05171496793627739, "eval_screenspot_NUM_probability": 0.9999065001805624, "eval_screenspot_inside_bbox": 0.8291666706403097, "eval_screenspot_loss": 0.9146350622177124, "eval_screenspot_loss_ce": 0.567759374777476, "eval_screenspot_loss_iou": 0.5614013671875, "eval_screenspot_loss_num": 0.06831868489583333, "eval_screenspot_loss_xval": 0.3416341145833333, "eval_screenspot_runtime": 140.0636, "eval_screenspot_samples_per_second": 0.635, "eval_screenspot_steps_per_second": 0.021, "num_input_tokens_seen": 472324756, "step": 2750 }, { "epoch": 0.7233510883145919, "loss": 0.9040678143501282, "loss_ce": 0.5593412518501282, "loss_iou": 0.455078125, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 472324756, "step": 2750 }, { "epoch": 0.723614125073979, "grad_norm": 4.425480410673166, "learning_rate": 5e-06, "loss": 0.1233, "num_input_tokens_seen": 472496880, "step": 2751 }, { "epoch": 0.723614125073979, "loss": 0.10025835037231445, "loss_ce": 0.0005421665264293551, "loss_iou": 0.51171875, "loss_num": 0.02001953125, "loss_xval": 0.099609375, "num_input_tokens_seen": 472496880, "step": 2751 }, { "epoch": 0.7238771618333663, "grad_norm": 4.0784104980939135, "learning_rate": 5e-06, "loss": 0.0885, "num_input_tokens_seen": 472668928, "step": 2752 }, { "epoch": 0.7238771618333663, "loss": 0.0833350419998169, "loss_ce": 0.00038827050593681633, "loss_iou": 0.34765625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 472668928, "step": 2752 }, { "epoch": 0.7241401985927534, "grad_norm": 6.523452376364869, "learning_rate": 5e-06, "loss": 0.1273, "num_input_tokens_seen": 472841040, "step": 2753 }, { "epoch": 0.7241401985927534, "loss": 0.13216346502304077, "loss_ce": 0.00023597091785632074, "loss_iou": 0.54296875, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 472841040, "step": 2753 }, { "epoch": 0.7244032353521405, "grad_norm": 13.511689037708722, "learning_rate": 5e-06, "loss": 0.0835, "num_input_tokens_seen": 473013468, "step": 2754 }, { "epoch": 0.7244032353521405, "loss": 0.0918908640742302, "loss_ce": 0.001101069850847125, "loss_iou": 0.431640625, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 473013468, "step": 2754 }, { "epoch": 0.7246662721115276, "grad_norm": 4.609556566608224, "learning_rate": 5e-06, "loss": 0.1375, "num_input_tokens_seen": 473180556, "step": 2755 }, { "epoch": 0.7246662721115276, "loss": 0.2066127210855484, "loss_ce": 0.0006495795678347349, "loss_iou": 0.400390625, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 473180556, "step": 2755 }, { "epoch": 0.7249293088709147, "grad_norm": 17.674016284792703, "learning_rate": 5e-06, "loss": 0.123, "num_input_tokens_seen": 473352636, "step": 2756 }, { "epoch": 0.7249293088709147, "loss": 0.13866102695465088, "loss_ce": 0.0018507244531065226, "loss_iou": 0.443359375, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 473352636, "step": 2756 }, { "epoch": 0.7251923456303019, "grad_norm": 12.378222072828299, "learning_rate": 5e-06, "loss": 0.0739, "num_input_tokens_seen": 473524848, "step": 2757 }, { "epoch": 0.7251923456303019, "loss": 0.059792935848236084, "loss_ce": 0.0014433301985263824, "loss_iou": 0.419921875, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 473524848, "step": 2757 }, { "epoch": 0.725455382389689, "grad_norm": 4.317210813813628, "learning_rate": 5e-06, "loss": 0.1362, "num_input_tokens_seen": 473697016, "step": 2758 }, { "epoch": 0.725455382389689, "loss": 0.1696222722530365, "loss_ce": 0.0004328044014982879, "loss_iou": 0.3984375, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 473697016, "step": 2758 }, { "epoch": 0.7257184191490761, "grad_norm": 14.320905421992641, "learning_rate": 5e-06, "loss": 0.1392, "num_input_tokens_seen": 473868992, "step": 2759 }, { "epoch": 0.7257184191490761, "loss": 0.07304464280605316, "loss_ce": 0.0016335132531821728, "loss_iou": 0.6484375, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 473868992, "step": 2759 }, { "epoch": 0.7259814559084632, "grad_norm": 3.9251643218505756, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 474041228, "step": 2760 }, { "epoch": 0.7259814559084632, "loss": 0.09449034929275513, "loss_ce": 0.0015948471846058965, "loss_iou": 0.48828125, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 474041228, "step": 2760 }, { "epoch": 0.7262444926678503, "grad_norm": 4.80079690317671, "learning_rate": 5e-06, "loss": 0.1184, "num_input_tokens_seen": 474213616, "step": 2761 }, { "epoch": 0.7262444926678503, "loss": 0.14180995523929596, "loss_ce": 0.001276510301977396, "loss_iou": 0.53125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 474213616, "step": 2761 }, { "epoch": 0.7265075294272375, "grad_norm": 5.815165285313656, "learning_rate": 5e-06, "loss": 0.1224, "num_input_tokens_seen": 474385980, "step": 2762 }, { "epoch": 0.7265075294272375, "loss": 0.14968647062778473, "loss_ce": 0.0027748444117605686, "loss_iou": 0.474609375, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 474385980, "step": 2762 }, { "epoch": 0.7267705661866246, "grad_norm": 18.689053225654995, "learning_rate": 5e-06, "loss": 0.1345, "num_input_tokens_seen": 474558212, "step": 2763 }, { "epoch": 0.7267705661866246, "loss": 0.07762917876243591, "loss_ce": 0.0015488516073673964, "loss_iou": 0.546875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 474558212, "step": 2763 }, { "epoch": 0.7270336029460117, "grad_norm": 26.06170268014936, "learning_rate": 5e-06, "loss": 0.1927, "num_input_tokens_seen": 474730824, "step": 2764 }, { "epoch": 0.7270336029460117, "loss": 0.2366107702255249, "loss_ce": 0.0011676568537950516, "loss_iou": 0.337890625, "loss_num": 0.047119140625, "loss_xval": 0.2353515625, "num_input_tokens_seen": 474730824, "step": 2764 }, { "epoch": 0.7272966397053988, "grad_norm": 4.246687258335021, "learning_rate": 5e-06, "loss": 0.0722, "num_input_tokens_seen": 474901372, "step": 2765 }, { "epoch": 0.7272966397053988, "loss": 0.05860138311982155, "loss_ce": 0.0005874672788195312, "loss_iou": 0.5546875, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 474901372, "step": 2765 }, { "epoch": 0.7275596764647859, "grad_norm": 10.116948882200285, "learning_rate": 5e-06, "loss": 0.1366, "num_input_tokens_seen": 475070388, "step": 2766 }, { "epoch": 0.7275596764647859, "loss": 0.14292961359024048, "loss_ce": 0.0015111502725630999, "loss_iou": 0.484375, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 475070388, "step": 2766 }, { "epoch": 0.727822713224173, "grad_norm": 5.556641114443334, "learning_rate": 5e-06, "loss": 0.1004, "num_input_tokens_seen": 475242528, "step": 2767 }, { "epoch": 0.727822713224173, "loss": 0.05033715069293976, "loss_ce": 0.00010521705553401262, "loss_iou": 0.51953125, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 475242528, "step": 2767 }, { "epoch": 0.7280857499835602, "grad_norm": 7.020407355867949, "learning_rate": 5e-06, "loss": 0.1336, "num_input_tokens_seen": 475414560, "step": 2768 }, { "epoch": 0.7280857499835602, "loss": 0.14076019823551178, "loss_ce": 0.003125916002318263, "loss_iou": 0.41796875, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 475414560, "step": 2768 }, { "epoch": 0.7283487867429473, "grad_norm": 3.869087312985774, "learning_rate": 5e-06, "loss": 0.1426, "num_input_tokens_seen": 475586900, "step": 2769 }, { "epoch": 0.7283487867429473, "loss": 0.1432042121887207, "loss_ce": 0.001144890207797289, "loss_iou": NaN, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 475586900, "step": 2769 }, { "epoch": 0.7286118235023344, "grad_norm": 5.203369255610556, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 475759012, "step": 2770 }, { "epoch": 0.7286118235023344, "loss": 0.09897112846374512, "loss_ce": 0.000979190575890243, "loss_iou": 0.478515625, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 475759012, "step": 2770 }, { "epoch": 0.7288748602617215, "grad_norm": 5.470559222873129, "learning_rate": 5e-06, "loss": 0.1425, "num_input_tokens_seen": 475931112, "step": 2771 }, { "epoch": 0.7288748602617215, "loss": 0.057690735906362534, "loss_ce": 0.00324737885966897, "loss_iou": 0.447265625, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 475931112, "step": 2771 }, { "epoch": 0.7291378970211086, "grad_norm": 18.974366841188825, "learning_rate": 5e-06, "loss": 0.1373, "num_input_tokens_seen": 476100764, "step": 2772 }, { "epoch": 0.7291378970211086, "loss": 0.15548212826251984, "loss_ce": 0.0019024083158001304, "loss_iou": 0.5078125, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 476100764, "step": 2772 }, { "epoch": 0.7294009337804959, "grad_norm": 5.671161770383539, "learning_rate": 5e-06, "loss": 0.0964, "num_input_tokens_seen": 476272960, "step": 2773 }, { "epoch": 0.7294009337804959, "loss": 0.0945616215467453, "loss_ce": 0.00023179112758953124, "loss_iou": 0.50390625, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 476272960, "step": 2773 }, { "epoch": 0.729663970539883, "grad_norm": 34.00148217519992, "learning_rate": 5e-06, "loss": 0.1343, "num_input_tokens_seen": 476445160, "step": 2774 }, { "epoch": 0.729663970539883, "loss": 0.2345729023218155, "loss_ce": 0.0021205078810453415, "loss_iou": 0.4921875, "loss_num": 0.04638671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 476445160, "step": 2774 }, { "epoch": 0.7299270072992701, "grad_norm": 4.401043478623018, "learning_rate": 5e-06, "loss": 0.0804, "num_input_tokens_seen": 476617452, "step": 2775 }, { "epoch": 0.7299270072992701, "loss": 0.07482883334159851, "loss_ce": 0.00036594344419427216, "loss_iou": 0.671875, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 476617452, "step": 2775 }, { "epoch": 0.7301900440586572, "grad_norm": 4.444423155502646, "learning_rate": 5e-06, "loss": 0.1158, "num_input_tokens_seen": 476789580, "step": 2776 }, { "epoch": 0.7301900440586572, "loss": 0.08125682920217514, "loss_ce": 0.0011787032708525658, "loss_iou": 0.427734375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 476789580, "step": 2776 }, { "epoch": 0.7304530808180443, "grad_norm": 9.388146192273584, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 476961420, "step": 2777 }, { "epoch": 0.7304530808180443, "loss": 0.09009505808353424, "loss_ce": 0.0032420377247035503, "loss_iou": 0.41796875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 476961420, "step": 2777 }, { "epoch": 0.7307161175774315, "grad_norm": 13.509674001993902, "learning_rate": 5e-06, "loss": 0.1239, "num_input_tokens_seen": 477133352, "step": 2778 }, { "epoch": 0.7307161175774315, "loss": 0.15684369206428528, "loss_ce": 0.0034623502288013697, "loss_iou": 0.50390625, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 477133352, "step": 2778 }, { "epoch": 0.7309791543368186, "grad_norm": 10.371891827363712, "learning_rate": 5e-06, "loss": 0.1536, "num_input_tokens_seen": 477305700, "step": 2779 }, { "epoch": 0.7309791543368186, "loss": 0.1920810043811798, "loss_ce": 0.003238243516534567, "loss_iou": 0.490234375, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 477305700, "step": 2779 }, { "epoch": 0.7312421910962057, "grad_norm": 4.473462123830256, "learning_rate": 5e-06, "loss": 0.106, "num_input_tokens_seen": 477476192, "step": 2780 }, { "epoch": 0.7312421910962057, "loss": 0.12351857125759125, "loss_ce": 0.003828628221526742, "loss_iou": 0.3671875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 477476192, "step": 2780 }, { "epoch": 0.7315052278555928, "grad_norm": 8.785650264431224, "learning_rate": 5e-06, "loss": 0.1026, "num_input_tokens_seen": 477648396, "step": 2781 }, { "epoch": 0.7315052278555928, "loss": 0.10561161488294601, "loss_ce": 0.00282841082662344, "loss_iou": 0.5390625, "loss_num": 0.0206298828125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 477648396, "step": 2781 }, { "epoch": 0.7317682646149799, "grad_norm": 4.9317310113146835, "learning_rate": 5e-06, "loss": 0.1214, "num_input_tokens_seen": 477820536, "step": 2782 }, { "epoch": 0.7317682646149799, "loss": 0.15500710904598236, "loss_ce": 0.0015952409012243152, "loss_iou": 0.50390625, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 477820536, "step": 2782 }, { "epoch": 0.7320313013743671, "grad_norm": 14.38075821387521, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 477992760, "step": 2783 }, { "epoch": 0.7320313013743671, "loss": 0.09361109137535095, "loss_ce": 0.0013869699323549867, "loss_iou": 0.55859375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 477992760, "step": 2783 }, { "epoch": 0.7322943381337542, "grad_norm": 3.3741559107447294, "learning_rate": 5e-06, "loss": 0.1013, "num_input_tokens_seen": 478164884, "step": 2784 }, { "epoch": 0.7322943381337542, "loss": 0.06834319978952408, "loss_ce": 0.00392058864235878, "loss_iou": 0.5625, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 478164884, "step": 2784 }, { "epoch": 0.7325573748931413, "grad_norm": 4.560868774415877, "learning_rate": 5e-06, "loss": 0.1103, "num_input_tokens_seen": 478337276, "step": 2785 }, { "epoch": 0.7325573748931413, "loss": 0.08397236466407776, "loss_ce": 0.0004152356996200979, "loss_iou": 0.6015625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 478337276, "step": 2785 }, { "epoch": 0.7328204116525284, "grad_norm": 17.451789372881052, "learning_rate": 5e-06, "loss": 0.1281, "num_input_tokens_seen": 478509332, "step": 2786 }, { "epoch": 0.7328204116525284, "loss": 0.1723887324333191, "loss_ce": 0.00039166733040474355, "loss_iou": 0.44140625, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 478509332, "step": 2786 }, { "epoch": 0.7330834484119155, "grad_norm": 6.047350371102969, "learning_rate": 5e-06, "loss": 0.1491, "num_input_tokens_seen": 478681352, "step": 2787 }, { "epoch": 0.7330834484119155, "loss": 0.04348166286945343, "loss_ce": 0.0019167213467881083, "loss_iou": 0.5703125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 478681352, "step": 2787 }, { "epoch": 0.7333464851713027, "grad_norm": 4.3763950262189235, "learning_rate": 5e-06, "loss": 0.1166, "num_input_tokens_seen": 478853448, "step": 2788 }, { "epoch": 0.7333464851713027, "loss": 0.11647917330265045, "loss_ce": 0.0007565193227492273, "loss_iou": 0.490234375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 478853448, "step": 2788 }, { "epoch": 0.7336095219306898, "grad_norm": 3.397549309274833, "learning_rate": 5e-06, "loss": 0.0831, "num_input_tokens_seen": 479025420, "step": 2789 }, { "epoch": 0.7336095219306898, "loss": 0.04994508996605873, "loss_ce": 0.0003540250181686133, "loss_iou": 0.37890625, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 479025420, "step": 2789 }, { "epoch": 0.733872558690077, "grad_norm": 6.639247678980395, "learning_rate": 5e-06, "loss": 0.1277, "num_input_tokens_seen": 479197512, "step": 2790 }, { "epoch": 0.733872558690077, "loss": 0.0962657481431961, "loss_ce": 0.0033092054072767496, "loss_iou": 0.5234375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 479197512, "step": 2790 }, { "epoch": 0.734135595449464, "grad_norm": 70.18841715820902, "learning_rate": 5e-06, "loss": 0.121, "num_input_tokens_seen": 479369608, "step": 2791 }, { "epoch": 0.734135595449464, "loss": 0.057409316301345825, "loss_ce": 0.000661880592815578, "loss_iou": 0.61328125, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 479369608, "step": 2791 }, { "epoch": 0.7343986322088512, "grad_norm": 8.444086759452258, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 479541840, "step": 2792 }, { "epoch": 0.7343986322088512, "loss": 0.18742145597934723, "loss_ce": 0.0015999219613149762, "loss_iou": 0.37890625, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 479541840, "step": 2792 }, { "epoch": 0.7346616689682383, "grad_norm": 5.294583182815855, "learning_rate": 5e-06, "loss": 0.1487, "num_input_tokens_seen": 479714196, "step": 2793 }, { "epoch": 0.7346616689682383, "loss": 0.1654970794916153, "loss_ce": 0.002441658638417721, "loss_iou": 0.5234375, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 479714196, "step": 2793 }, { "epoch": 0.7349247057276255, "grad_norm": 4.160027277201995, "learning_rate": 5e-06, "loss": 0.0766, "num_input_tokens_seen": 479886580, "step": 2794 }, { "epoch": 0.7349247057276255, "loss": 0.11283927410840988, "loss_ce": 0.0021825337316840887, "loss_iou": 0.5, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 479886580, "step": 2794 }, { "epoch": 0.7351877424870126, "grad_norm": 8.261194775106992, "learning_rate": 5e-06, "loss": 0.1283, "num_input_tokens_seen": 480058736, "step": 2795 }, { "epoch": 0.7351877424870126, "loss": 0.12811481952667236, "loss_ce": 0.0015584270004183054, "loss_iou": 0.486328125, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 480058736, "step": 2795 }, { "epoch": 0.7354507792463997, "grad_norm": 3.186827546083555, "learning_rate": 5e-06, "loss": 0.1094, "num_input_tokens_seen": 480230860, "step": 2796 }, { "epoch": 0.7354507792463997, "loss": 0.12094112485647202, "loss_ce": 0.00286861858330667, "loss_iou": 0.296875, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 480230860, "step": 2796 }, { "epoch": 0.7357138160057868, "grad_norm": 3.6199423783668534, "learning_rate": 5e-06, "loss": 0.0792, "num_input_tokens_seen": 480403260, "step": 2797 }, { "epoch": 0.7357138160057868, "loss": 0.0642661452293396, "loss_ce": 0.0005149244680069387, "loss_iou": 0.61328125, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 480403260, "step": 2797 }, { "epoch": 0.7359768527651739, "grad_norm": 4.853254066183438, "learning_rate": 5e-06, "loss": 0.0999, "num_input_tokens_seen": 480575740, "step": 2798 }, { "epoch": 0.7359768527651739, "loss": 0.1287141740322113, "loss_ce": 0.000998095260001719, "loss_iou": 0.5234375, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 480575740, "step": 2798 }, { "epoch": 0.7362398895245611, "grad_norm": 6.187299771807659, "learning_rate": 5e-06, "loss": 0.1079, "num_input_tokens_seen": 480745892, "step": 2799 }, { "epoch": 0.7362398895245611, "loss": 0.11742156744003296, "loss_ce": 0.0004476910980883986, "loss_iou": 0.40625, "loss_num": 0.0233154296875, "loss_xval": 0.1171875, "num_input_tokens_seen": 480745892, "step": 2799 }, { "epoch": 0.7365029262839482, "grad_norm": 3.2510976930909194, "learning_rate": 5e-06, "loss": 0.124, "num_input_tokens_seen": 480916480, "step": 2800 }, { "epoch": 0.7365029262839482, "loss": 0.14714287221431732, "loss_ce": 7.865649240557104e-05, "loss_iou": 0.48828125, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 480916480, "step": 2800 }, { "epoch": 0.7367659630433353, "grad_norm": 4.500033616411093, "learning_rate": 5e-06, "loss": 0.1404, "num_input_tokens_seen": 481088428, "step": 2801 }, { "epoch": 0.7367659630433353, "loss": 0.07280252873897552, "loss_ce": 0.0012845833553001285, "loss_iou": 0.427734375, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 481088428, "step": 2801 }, { "epoch": 0.7370289998027224, "grad_norm": 6.733873919958713, "learning_rate": 5e-06, "loss": 0.1157, "num_input_tokens_seen": 481260648, "step": 2802 }, { "epoch": 0.7370289998027224, "loss": 0.13488659262657166, "loss_ce": 0.002318233484402299, "loss_iou": 0.51171875, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 481260648, "step": 2802 }, { "epoch": 0.7372920365621095, "grad_norm": 4.443731950557251, "learning_rate": 5e-06, "loss": 0.1034, "num_input_tokens_seen": 481432928, "step": 2803 }, { "epoch": 0.7372920365621095, "loss": 0.05556986480951309, "loss_ce": 0.0003940859460271895, "loss_iou": 0.578125, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 481432928, "step": 2803 }, { "epoch": 0.7375550733214967, "grad_norm": 3.9159514379676663, "learning_rate": 5e-06, "loss": 0.1142, "num_input_tokens_seen": 481603740, "step": 2804 }, { "epoch": 0.7375550733214967, "loss": 0.07777837663888931, "loss_ce": 5.010394670534879e-05, "loss_iou": 0.58203125, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 481603740, "step": 2804 }, { "epoch": 0.7378181100808838, "grad_norm": 3.806670191888079, "learning_rate": 5e-06, "loss": 0.1196, "num_input_tokens_seen": 481774292, "step": 2805 }, { "epoch": 0.7378181100808838, "loss": 0.046146463602781296, "loss_ce": 0.00412375945597887, "loss_iou": 0.40625, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 481774292, "step": 2805 }, { "epoch": 0.7380811468402709, "grad_norm": 18.2473830072325, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 481946452, "step": 2806 }, { "epoch": 0.7380811468402709, "loss": 0.15219584107398987, "loss_ce": 0.0003708918229676783, "loss_iou": 0.55078125, "loss_num": 0.0303955078125, "loss_xval": 0.1513671875, "num_input_tokens_seen": 481946452, "step": 2806 }, { "epoch": 0.738344183599658, "grad_norm": 5.953555323593533, "learning_rate": 5e-06, "loss": 0.0944, "num_input_tokens_seen": 482118496, "step": 2807 }, { "epoch": 0.738344183599658, "loss": 0.14251753687858582, "loss_ce": 0.00039717700565233827, "loss_iou": 0.60546875, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 482118496, "step": 2807 }, { "epoch": 0.7386072203590451, "grad_norm": 7.279415163791317, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 482287100, "step": 2808 }, { "epoch": 0.7386072203590451, "loss": 0.11856916546821594, "loss_ce": 0.0012290815357118845, "loss_iou": 0.361328125, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 482287100, "step": 2808 }, { "epoch": 0.7388702571184323, "grad_norm": 60.57211297247697, "learning_rate": 5e-06, "loss": 0.1379, "num_input_tokens_seen": 482459096, "step": 2809 }, { "epoch": 0.7388702571184323, "loss": 0.14476290345191956, "loss_ce": 0.003466519294306636, "loss_iou": 0.64453125, "loss_num": 0.0281982421875, "loss_xval": 0.1416015625, "num_input_tokens_seen": 482459096, "step": 2809 }, { "epoch": 0.7391332938778195, "grad_norm": 4.216089199918953, "learning_rate": 5e-06, "loss": 0.0932, "num_input_tokens_seen": 482631768, "step": 2810 }, { "epoch": 0.7391332938778195, "loss": 0.1267136037349701, "loss_ce": 0.0018051671795547009, "loss_iou": 0.466796875, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 482631768, "step": 2810 }, { "epoch": 0.7393963306372066, "grad_norm": 5.529488884431239, "learning_rate": 5e-06, "loss": 0.1088, "num_input_tokens_seen": 482803968, "step": 2811 }, { "epoch": 0.7393963306372066, "loss": 0.12075556814670563, "loss_ce": 0.0023778879549354315, "loss_iou": 0.67578125, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 482803968, "step": 2811 }, { "epoch": 0.7396593673965937, "grad_norm": 14.970093925458164, "learning_rate": 5e-06, "loss": 0.089, "num_input_tokens_seen": 482976316, "step": 2812 }, { "epoch": 0.7396593673965937, "loss": 0.11577010154724121, "loss_ce": 0.00230574794113636, "loss_iou": 0.53125, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 482976316, "step": 2812 }, { "epoch": 0.7399224041559808, "grad_norm": 16.71817355365124, "learning_rate": 5e-06, "loss": 0.1449, "num_input_tokens_seen": 483148324, "step": 2813 }, { "epoch": 0.7399224041559808, "loss": 0.12452364712953568, "loss_ce": 0.0020718637388199568, "loss_iou": 0.2734375, "loss_num": 0.0244140625, "loss_xval": 0.12255859375, "num_input_tokens_seen": 483148324, "step": 2813 }, { "epoch": 0.740185440915368, "grad_norm": 6.067200171837272, "learning_rate": 5e-06, "loss": 0.0904, "num_input_tokens_seen": 483320396, "step": 2814 }, { "epoch": 0.740185440915368, "loss": 0.08812370151281357, "loss_ce": 0.003498460166156292, "loss_iou": 0.494140625, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 483320396, "step": 2814 }, { "epoch": 0.7404484776747551, "grad_norm": 4.557027585995325, "learning_rate": 5e-06, "loss": 0.0874, "num_input_tokens_seen": 483492288, "step": 2815 }, { "epoch": 0.7404484776747551, "loss": 0.11246176064014435, "loss_ce": 0.0007674178341403604, "loss_iou": 0.5078125, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 483492288, "step": 2815 }, { "epoch": 0.7407115144341422, "grad_norm": 4.433500401049952, "learning_rate": 5e-06, "loss": 0.1057, "num_input_tokens_seen": 483664284, "step": 2816 }, { "epoch": 0.7407115144341422, "loss": 0.11581188440322876, "loss_ce": 0.0005164705216884613, "loss_iou": 0.4765625, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 483664284, "step": 2816 }, { "epoch": 0.7409745511935293, "grad_norm": 4.273944166752719, "learning_rate": 5e-06, "loss": 0.1428, "num_input_tokens_seen": 483836152, "step": 2817 }, { "epoch": 0.7409745511935293, "loss": 0.1244652196764946, "loss_ce": 0.0025780070573091507, "loss_iou": 0.578125, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 483836152, "step": 2817 }, { "epoch": 0.7412375879529164, "grad_norm": 7.341058016704505, "learning_rate": 5e-06, "loss": 0.1099, "num_input_tokens_seen": 484005324, "step": 2818 }, { "epoch": 0.7412375879529164, "loss": 0.09527404606342316, "loss_ce": 0.0004711919464170933, "loss_iou": 0.51171875, "loss_num": 0.01904296875, "loss_xval": 0.0947265625, "num_input_tokens_seen": 484005324, "step": 2818 }, { "epoch": 0.7415006247123035, "grad_norm": 5.580265155977227, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 484175008, "step": 2819 }, { "epoch": 0.7415006247123035, "loss": 0.061688005924224854, "loss_ce": 0.000408706720918417, "loss_iou": 0.8203125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 484175008, "step": 2819 }, { "epoch": 0.7417636614716907, "grad_norm": 10.569140888378845, "learning_rate": 5e-06, "loss": 0.1243, "num_input_tokens_seen": 484346776, "step": 2820 }, { "epoch": 0.7417636614716907, "loss": 0.1439221203327179, "loss_ce": 0.00351074174977839, "loss_iou": 0.435546875, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 484346776, "step": 2820 }, { "epoch": 0.7420266982310778, "grad_norm": 4.104540427593442, "learning_rate": 5e-06, "loss": 0.0873, "num_input_tokens_seen": 484518992, "step": 2821 }, { "epoch": 0.7420266982310778, "loss": 0.09965825080871582, "loss_ce": 0.000567668757867068, "loss_iou": 0.60546875, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 484518992, "step": 2821 }, { "epoch": 0.7422897349904649, "grad_norm": 5.580054688233454, "learning_rate": 5e-06, "loss": 0.1559, "num_input_tokens_seen": 484691244, "step": 2822 }, { "epoch": 0.7422897349904649, "loss": 0.17825458943843842, "loss_ce": 0.002351263538002968, "loss_iou": 0.53515625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 484691244, "step": 2822 }, { "epoch": 0.742552771749852, "grad_norm": 3.6849080723348067, "learning_rate": 5e-06, "loss": 0.1494, "num_input_tokens_seen": 484861364, "step": 2823 }, { "epoch": 0.742552771749852, "loss": 0.12127910554409027, "loss_ce": 0.0025046863593161106, "loss_iou": 0.54296875, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 484861364, "step": 2823 }, { "epoch": 0.7428158085092391, "grad_norm": 13.128530967150855, "learning_rate": 5e-06, "loss": 0.1134, "num_input_tokens_seen": 485032096, "step": 2824 }, { "epoch": 0.7428158085092391, "loss": 0.044097013771533966, "loss_ce": 0.0010977452620863914, "loss_iou": 0.470703125, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 485032096, "step": 2824 }, { "epoch": 0.7430788452686263, "grad_norm": 29.97671813936596, "learning_rate": 5e-06, "loss": 0.1387, "num_input_tokens_seen": 485203848, "step": 2825 }, { "epoch": 0.7430788452686263, "loss": 0.1745963990688324, "loss_ce": 0.0014701783657073975, "loss_iou": 0.52734375, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 485203848, "step": 2825 }, { "epoch": 0.7433418820280134, "grad_norm": 3.457370742890275, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 485376152, "step": 2826 }, { "epoch": 0.7433418820280134, "loss": 0.10546036064624786, "loss_ce": 0.0021430959459394217, "loss_iou": 0.44921875, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 485376152, "step": 2826 }, { "epoch": 0.7436049187874005, "grad_norm": 14.472339934978308, "learning_rate": 5e-06, "loss": 0.1093, "num_input_tokens_seen": 485546204, "step": 2827 }, { "epoch": 0.7436049187874005, "loss": 0.1524585783481598, "loss_ce": 0.0006031189695931971, "loss_iou": 0.453125, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 485546204, "step": 2827 }, { "epoch": 0.7438679555467876, "grad_norm": 15.092363857837377, "learning_rate": 5e-06, "loss": 0.1008, "num_input_tokens_seen": 485718156, "step": 2828 }, { "epoch": 0.7438679555467876, "loss": 0.11204935610294342, "loss_ce": 0.00023295017308555543, "loss_iou": 0.484375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 485718156, "step": 2828 }, { "epoch": 0.7441309923061747, "grad_norm": 3.949937010975672, "learning_rate": 5e-06, "loss": 0.0703, "num_input_tokens_seen": 485889984, "step": 2829 }, { "epoch": 0.7441309923061747, "loss": 0.08257782459259033, "loss_ce": 0.002652282826602459, "loss_iou": 0.546875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 485889984, "step": 2829 }, { "epoch": 0.744394029065562, "grad_norm": 5.74645673965003, "learning_rate": 5e-06, "loss": 0.07, "num_input_tokens_seen": 486062200, "step": 2830 }, { "epoch": 0.744394029065562, "loss": 0.10757631063461304, "loss_ce": 0.0008258260786533356, "loss_iou": 0.33203125, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 486062200, "step": 2830 }, { "epoch": 0.7446570658249491, "grad_norm": 5.133478707353028, "learning_rate": 5e-06, "loss": 0.0899, "num_input_tokens_seen": 486234156, "step": 2831 }, { "epoch": 0.7446570658249491, "loss": 0.05313008278608322, "loss_ce": 0.0007771779783070087, "loss_iou": 0.431640625, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 486234156, "step": 2831 }, { "epoch": 0.7449201025843362, "grad_norm": 3.5956313939936164, "learning_rate": 5e-06, "loss": 0.0765, "num_input_tokens_seen": 486406012, "step": 2832 }, { "epoch": 0.7449201025843362, "loss": 0.041897065937519073, "loss_ce": 0.0002405716950306669, "loss_iou": 0.49609375, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 486406012, "step": 2832 }, { "epoch": 0.7451831393437233, "grad_norm": 14.659992900472892, "learning_rate": 5e-06, "loss": 0.1671, "num_input_tokens_seen": 486577696, "step": 2833 }, { "epoch": 0.7451831393437233, "loss": 0.10231424868106842, "loss_ce": 0.00011088042083429173, "loss_iou": 0.61328125, "loss_num": 0.0205078125, "loss_xval": 0.10205078125, "num_input_tokens_seen": 486577696, "step": 2833 }, { "epoch": 0.7454461761031104, "grad_norm": 4.0898533446969365, "learning_rate": 5e-06, "loss": 0.132, "num_input_tokens_seen": 486749788, "step": 2834 }, { "epoch": 0.7454461761031104, "loss": 0.16116830706596375, "loss_ce": 0.002553200349211693, "loss_iou": 0.416015625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 486749788, "step": 2834 }, { "epoch": 0.7457092128624976, "grad_norm": 7.24387104067779, "learning_rate": 5e-06, "loss": 0.1065, "num_input_tokens_seen": 486921812, "step": 2835 }, { "epoch": 0.7457092128624976, "loss": 0.15365542471408844, "loss_ce": 0.0005182233871892095, "loss_iou": 0.6796875, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 486921812, "step": 2835 }, { "epoch": 0.7459722496218847, "grad_norm": 4.544468769186057, "learning_rate": 5e-06, "loss": 0.1593, "num_input_tokens_seen": 487092260, "step": 2836 }, { "epoch": 0.7459722496218847, "loss": 0.13337098062038422, "loss_ce": 0.0010772723471745849, "loss_iou": 0.44921875, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 487092260, "step": 2836 }, { "epoch": 0.7462352863812718, "grad_norm": 25.066833727752858, "learning_rate": 5e-06, "loss": 0.1213, "num_input_tokens_seen": 487264416, "step": 2837 }, { "epoch": 0.7462352863812718, "loss": 0.053436558693647385, "loss_ce": 0.0022585804108530283, "loss_iou": 0.458984375, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 487264416, "step": 2837 }, { "epoch": 0.7464983231406589, "grad_norm": 5.520845699973876, "learning_rate": 5e-06, "loss": 0.112, "num_input_tokens_seen": 487436620, "step": 2838 }, { "epoch": 0.7464983231406589, "loss": 0.16054442524909973, "loss_ce": 0.002860102104023099, "loss_iou": 0.59375, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 487436620, "step": 2838 }, { "epoch": 0.746761359900046, "grad_norm": 3.6663304276460456, "learning_rate": 5e-06, "loss": 0.1253, "num_input_tokens_seen": 487608856, "step": 2839 }, { "epoch": 0.746761359900046, "loss": 0.07470214366912842, "loss_ce": 0.0005749509437009692, "loss_iou": 0.51953125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 487608856, "step": 2839 }, { "epoch": 0.7470243966594332, "grad_norm": 3.816698805705744, "learning_rate": 5e-06, "loss": 0.0945, "num_input_tokens_seen": 487779452, "step": 2840 }, { "epoch": 0.7470243966594332, "loss": 0.08027718961238861, "loss_ce": 0.00013802893226966262, "loss_iou": 0.46484375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 487779452, "step": 2840 }, { "epoch": 0.7472874334188203, "grad_norm": 5.187350725945626, "learning_rate": 5e-06, "loss": 0.1064, "num_input_tokens_seen": 487951444, "step": 2841 }, { "epoch": 0.7472874334188203, "loss": 0.13180628418922424, "loss_ce": 0.0002907742455136031, "loss_iou": 0.50390625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 487951444, "step": 2841 }, { "epoch": 0.7475504701782074, "grad_norm": 17.82443490036777, "learning_rate": 5e-06, "loss": 0.117, "num_input_tokens_seen": 488123816, "step": 2842 }, { "epoch": 0.7475504701782074, "loss": 0.10174276679754257, "loss_ce": 0.00042440436664037406, "loss_iou": 0.44921875, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 488123816, "step": 2842 }, { "epoch": 0.7478135069375945, "grad_norm": 5.349643556816576, "learning_rate": 5e-06, "loss": 0.0817, "num_input_tokens_seen": 488294544, "step": 2843 }, { "epoch": 0.7478135069375945, "loss": 0.06341783702373505, "loss_ce": 6.33449453744106e-05, "loss_iou": 0.55859375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 488294544, "step": 2843 }, { "epoch": 0.7480765436969816, "grad_norm": 5.881783273596092, "learning_rate": 5e-06, "loss": 0.1005, "num_input_tokens_seen": 488466512, "step": 2844 }, { "epoch": 0.7480765436969816, "loss": 0.07953569293022156, "loss_ce": 0.0013496556784957647, "loss_iou": 0.515625, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 488466512, "step": 2844 }, { "epoch": 0.7483395804563687, "grad_norm": 4.967245481323238, "learning_rate": 5e-06, "loss": 0.1174, "num_input_tokens_seen": 488638580, "step": 2845 }, { "epoch": 0.7483395804563687, "loss": 0.13291889429092407, "loss_ce": 0.0028682297561317682, "loss_iou": 0.55859375, "loss_num": 0.026123046875, "loss_xval": 0.1298828125, "num_input_tokens_seen": 488638580, "step": 2845 }, { "epoch": 0.7486026172157559, "grad_norm": 26.635770395926077, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 488810652, "step": 2846 }, { "epoch": 0.7486026172157559, "loss": 0.13681207597255707, "loss_ce": 0.006761421915143728, "loss_iou": 0.3359375, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 488810652, "step": 2846 }, { "epoch": 0.748865653975143, "grad_norm": 3.755973604918146, "learning_rate": 5e-06, "loss": 0.0855, "num_input_tokens_seen": 488982904, "step": 2847 }, { "epoch": 0.748865653975143, "loss": 0.08354687690734863, "loss_ce": 0.0004475130117498338, "loss_iou": 0.447265625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 488982904, "step": 2847 }, { "epoch": 0.7491286907345301, "grad_norm": 3.8356014362129516, "learning_rate": 5e-06, "loss": 0.0982, "num_input_tokens_seen": 489153060, "step": 2848 }, { "epoch": 0.7491286907345301, "loss": 0.1231696754693985, "loss_ce": 0.0056769950315356255, "loss_iou": 0.478515625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 489153060, "step": 2848 }, { "epoch": 0.7493917274939172, "grad_norm": 6.542346686389534, "learning_rate": 5e-06, "loss": 0.1367, "num_input_tokens_seen": 489324920, "step": 2849 }, { "epoch": 0.7493917274939172, "loss": 0.10556487739086151, "loss_ce": 0.00360565772280097, "loss_iou": 0.56640625, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 489324920, "step": 2849 }, { "epoch": 0.7496547642533044, "grad_norm": 3.9950005088023786, "learning_rate": 5e-06, "loss": 0.1183, "num_input_tokens_seen": 489497376, "step": 2850 }, { "epoch": 0.7496547642533044, "loss": 0.11913494765758514, "loss_ce": 0.001535467803478241, "loss_iou": 0.474609375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 489497376, "step": 2850 }, { "epoch": 0.7499178010126916, "grad_norm": 3.9324452711240707, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 489669656, "step": 2851 }, { "epoch": 0.7499178010126916, "loss": 0.14491230249404907, "loss_ce": 0.0019679656252264977, "loss_iou": 0.61328125, "loss_num": 0.0286865234375, "loss_xval": 0.142578125, "num_input_tokens_seen": 489669656, "step": 2851 }, { "epoch": 0.7501808377720787, "grad_norm": 4.883057174767028, "learning_rate": 5e-06, "loss": 0.0772, "num_input_tokens_seen": 489841572, "step": 2852 }, { "epoch": 0.7501808377720787, "loss": 0.06473391503095627, "loss_ce": 0.000845368776936084, "loss_iou": 0.4140625, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 489841572, "step": 2852 }, { "epoch": 0.7504438745314658, "grad_norm": 5.5775463930736695, "learning_rate": 5e-06, "loss": 0.1335, "num_input_tokens_seen": 490010444, "step": 2853 }, { "epoch": 0.7504438745314658, "loss": 0.2125670313835144, "loss_ce": 0.0003172852157149464, "loss_iou": 0.55078125, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 490010444, "step": 2853 }, { "epoch": 0.7507069112908529, "grad_norm": 12.138601127345398, "learning_rate": 5e-06, "loss": 0.0886, "num_input_tokens_seen": 490182752, "step": 2854 }, { "epoch": 0.7507069112908529, "loss": 0.05896022543311119, "loss_ce": 0.0001223331200890243, "loss_iou": 0.5390625, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 490182752, "step": 2854 }, { "epoch": 0.75096994805024, "grad_norm": 7.597618581630176, "learning_rate": 5e-06, "loss": 0.1274, "num_input_tokens_seen": 490354572, "step": 2855 }, { "epoch": 0.75096994805024, "loss": 0.1745096743106842, "loss_ce": 0.0004984359256923199, "loss_iou": 0.498046875, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 490354572, "step": 2855 }, { "epoch": 0.7512329848096272, "grad_norm": 4.639912180988158, "learning_rate": 5e-06, "loss": 0.108, "num_input_tokens_seen": 490524680, "step": 2856 }, { "epoch": 0.7512329848096272, "loss": 0.16067692637443542, "loss_ce": 0.006074874196201563, "loss_iou": 0.427734375, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 490524680, "step": 2856 }, { "epoch": 0.7514960215690143, "grad_norm": 14.921784803655473, "learning_rate": 5e-06, "loss": 0.1355, "num_input_tokens_seen": 490696768, "step": 2857 }, { "epoch": 0.7514960215690143, "loss": 0.1124132052063942, "loss_ce": 0.0032213088124990463, "loss_iou": 0.515625, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 490696768, "step": 2857 }, { "epoch": 0.7517590583284014, "grad_norm": 5.608340999176847, "learning_rate": 5e-06, "loss": 0.1026, "num_input_tokens_seen": 490869040, "step": 2858 }, { "epoch": 0.7517590583284014, "loss": 0.05557282269001007, "loss_ce": 0.003662426257506013, "loss_iou": 0.478515625, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 490869040, "step": 2858 }, { "epoch": 0.7520220950877885, "grad_norm": 4.649441181141725, "learning_rate": 5e-06, "loss": 0.102, "num_input_tokens_seen": 491041492, "step": 2859 }, { "epoch": 0.7520220950877885, "loss": 0.08483953773975372, "loss_ce": 0.00024481338914483786, "loss_iou": 0.625, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 491041492, "step": 2859 }, { "epoch": 0.7522851318471756, "grad_norm": 5.629942269274102, "learning_rate": 5e-06, "loss": 0.1139, "num_input_tokens_seen": 491213328, "step": 2860 }, { "epoch": 0.7522851318471756, "loss": 0.06876988708972931, "loss_ce": 0.0007156922947615385, "loss_iou": 0.4921875, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 491213328, "step": 2860 }, { "epoch": 0.7525481686065628, "grad_norm": 5.7661162016412355, "learning_rate": 5e-06, "loss": 0.0949, "num_input_tokens_seen": 491384112, "step": 2861 }, { "epoch": 0.7525481686065628, "loss": 0.12756821513175964, "loss_ce": 0.0015000998973846436, "loss_iou": 0.69921875, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 491384112, "step": 2861 }, { "epoch": 0.7528112053659499, "grad_norm": 4.585306018678188, "learning_rate": 5e-06, "loss": 0.1186, "num_input_tokens_seen": 491556404, "step": 2862 }, { "epoch": 0.7528112053659499, "loss": 0.1506051868200302, "loss_ce": 0.00195405725389719, "loss_iou": 0.43359375, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 491556404, "step": 2862 }, { "epoch": 0.753074242125337, "grad_norm": 5.018961759147868, "learning_rate": 5e-06, "loss": 0.1566, "num_input_tokens_seen": 491728560, "step": 2863 }, { "epoch": 0.753074242125337, "loss": 0.08526686578989029, "loss_ce": 0.0047004627995193005, "loss_iou": 0.6015625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 491728560, "step": 2863 }, { "epoch": 0.7533372788847241, "grad_norm": 5.649735345560609, "learning_rate": 5e-06, "loss": 0.1325, "num_input_tokens_seen": 491900292, "step": 2864 }, { "epoch": 0.7533372788847241, "loss": 0.07598397135734558, "loss_ce": 0.0024213448632508516, "loss_iou": 0.3984375, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 491900292, "step": 2864 }, { "epoch": 0.7536003156441112, "grad_norm": 5.741199989182661, "learning_rate": 5e-06, "loss": 0.1471, "num_input_tokens_seen": 492072492, "step": 2865 }, { "epoch": 0.7536003156441112, "loss": 0.17299330234527588, "loss_ce": 0.00398694584146142, "loss_iou": 0.296875, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 492072492, "step": 2865 }, { "epoch": 0.7538633524034984, "grad_norm": 6.545995942105509, "learning_rate": 5e-06, "loss": 0.0985, "num_input_tokens_seen": 492244560, "step": 2866 }, { "epoch": 0.7538633524034984, "loss": 0.11732736229896545, "loss_ce": 0.00035348787787370384, "loss_iou": 0.48828125, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 492244560, "step": 2866 }, { "epoch": 0.7541263891628855, "grad_norm": 7.690174312653901, "learning_rate": 5e-06, "loss": 0.1192, "num_input_tokens_seen": 492416460, "step": 2867 }, { "epoch": 0.7541263891628855, "loss": 0.11988115310668945, "loss_ce": 0.0037617662455886602, "loss_iou": 0.5546875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 492416460, "step": 2867 }, { "epoch": 0.7543894259222726, "grad_norm": 4.911561927440868, "learning_rate": 5e-06, "loss": 0.1762, "num_input_tokens_seen": 492582560, "step": 2868 }, { "epoch": 0.7543894259222726, "loss": 0.17355158925056458, "loss_ce": 0.0006237310590222478, "loss_iou": 0.41796875, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 492582560, "step": 2868 }, { "epoch": 0.7546524626816598, "grad_norm": 9.965344498896014, "learning_rate": 5e-06, "loss": 0.1137, "num_input_tokens_seen": 492754468, "step": 2869 }, { "epoch": 0.7546524626816598, "loss": 0.12515220046043396, "loss_ce": 0.0038448250852525234, "loss_iou": 0.515625, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 492754468, "step": 2869 }, { "epoch": 0.7549154994410469, "grad_norm": 7.897878518361561, "learning_rate": 5e-06, "loss": 0.1325, "num_input_tokens_seen": 492926916, "step": 2870 }, { "epoch": 0.7549154994410469, "loss": 0.07539217174053192, "loss_ce": 0.002699300181120634, "loss_iou": 0.5390625, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 492926916, "step": 2870 }, { "epoch": 0.755178536200434, "grad_norm": 8.119348564456503, "learning_rate": 5e-06, "loss": 0.1413, "num_input_tokens_seen": 493099340, "step": 2871 }, { "epoch": 0.755178536200434, "loss": 0.10948731005191803, "loss_ce": 0.002187497215345502, "loss_iou": 0.5546875, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 493099340, "step": 2871 }, { "epoch": 0.7554415729598212, "grad_norm": 4.655104945227204, "learning_rate": 5e-06, "loss": 0.1331, "num_input_tokens_seen": 493269864, "step": 2872 }, { "epoch": 0.7554415729598212, "loss": 0.1148996502161026, "loss_ce": 9.252215386368334e-05, "loss_iou": 0.5390625, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 493269864, "step": 2872 }, { "epoch": 0.7557046097192083, "grad_norm": 7.2487773846350985, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 493441844, "step": 2873 }, { "epoch": 0.7557046097192083, "loss": 0.22252312302589417, "loss_ce": 0.00032462860690429807, "loss_iou": 0.47265625, "loss_num": 0.04443359375, "loss_xval": 0.22265625, "num_input_tokens_seen": 493441844, "step": 2873 }, { "epoch": 0.7559676464785954, "grad_norm": 8.669322747332805, "learning_rate": 5e-06, "loss": 0.0904, "num_input_tokens_seen": 493612416, "step": 2874 }, { "epoch": 0.7559676464785954, "loss": 0.1249040961265564, "loss_ce": 0.0034746630117297173, "loss_iou": 0.5859375, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 493612416, "step": 2874 }, { "epoch": 0.7562306832379825, "grad_norm": 16.40313430717725, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 493782864, "step": 2875 }, { "epoch": 0.7562306832379825, "loss": 0.09373937547206879, "loss_ce": 0.0021256012842059135, "loss_iou": 0.68359375, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 493782864, "step": 2875 }, { "epoch": 0.7564937199973696, "grad_norm": 6.017272387510744, "learning_rate": 5e-06, "loss": 0.1223, "num_input_tokens_seen": 493955332, "step": 2876 }, { "epoch": 0.7564937199973696, "loss": 0.1287456452846527, "loss_ce": 0.0006633760058321059, "loss_iou": 0.41015625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 493955332, "step": 2876 }, { "epoch": 0.7567567567567568, "grad_norm": 5.38013008566038, "learning_rate": 5e-06, "loss": 0.1189, "num_input_tokens_seen": 494127604, "step": 2877 }, { "epoch": 0.7567567567567568, "loss": 0.10688350349664688, "loss_ce": 0.0010485434904694557, "loss_iou": 0.5703125, "loss_num": 0.0211181640625, "loss_xval": 0.10595703125, "num_input_tokens_seen": 494127604, "step": 2877 }, { "epoch": 0.7570197935161439, "grad_norm": 16.650882691357587, "learning_rate": 5e-06, "loss": 0.1237, "num_input_tokens_seen": 494298004, "step": 2878 }, { "epoch": 0.7570197935161439, "loss": 0.1486469805240631, "loss_ce": 0.0029255489353090525, "loss_iou": 0.484375, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 494298004, "step": 2878 }, { "epoch": 0.757282830275531, "grad_norm": 6.750510186398287, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 494470108, "step": 2879 }, { "epoch": 0.757282830275531, "loss": 0.1393243670463562, "loss_ce": 0.0016443128697574139, "loss_iou": 0.287109375, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 494470108, "step": 2879 }, { "epoch": 0.7575458670349181, "grad_norm": 18.523627538074937, "learning_rate": 5e-06, "loss": 0.1173, "num_input_tokens_seen": 494640736, "step": 2880 }, { "epoch": 0.7575458670349181, "loss": 0.11819358170032501, "loss_ce": 0.0007009088294580579, "loss_iou": 0.6171875, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 494640736, "step": 2880 }, { "epoch": 0.7578089037943052, "grad_norm": 6.843824113206992, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 494812784, "step": 2881 }, { "epoch": 0.7578089037943052, "loss": 0.18498176336288452, "loss_ce": 0.001769477385096252, "loss_iou": 0.390625, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 494812784, "step": 2881 }, { "epoch": 0.7580719405536924, "grad_norm": 4.288128333077997, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 494984900, "step": 2882 }, { "epoch": 0.7580719405536924, "loss": 0.11615432053804398, "loss_ce": 0.0020796118769794703, "loss_iou": 0.51171875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 494984900, "step": 2882 }, { "epoch": 0.7583349773130795, "grad_norm": 7.805094359622811, "learning_rate": 5e-06, "loss": 0.1527, "num_input_tokens_seen": 495157204, "step": 2883 }, { "epoch": 0.7583349773130795, "loss": 0.15978480875492096, "loss_ce": 0.002222547074779868, "loss_iou": 0.578125, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 495157204, "step": 2883 }, { "epoch": 0.7585980140724666, "grad_norm": 9.068753339594036, "learning_rate": 5e-06, "loss": 0.1235, "num_input_tokens_seen": 495325872, "step": 2884 }, { "epoch": 0.7585980140724666, "loss": 0.06683582067489624, "loss_ce": 0.001055177883245051, "loss_iou": 0.5625, "loss_num": 0.01312255859375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 495325872, "step": 2884 }, { "epoch": 0.7588610508318537, "grad_norm": 5.303316590010937, "learning_rate": 5e-06, "loss": 0.1331, "num_input_tokens_seen": 495497944, "step": 2885 }, { "epoch": 0.7588610508318537, "loss": 0.09937077760696411, "loss_ce": 0.00031071933335624635, "loss_iou": 0.5625, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 495497944, "step": 2885 }, { "epoch": 0.7591240875912408, "grad_norm": 5.954668834746019, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 495670336, "step": 2886 }, { "epoch": 0.7591240875912408, "loss": 0.04735005646944046, "loss_ce": 0.0002461753028910607, "loss_iou": 0.4375, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 495670336, "step": 2886 }, { "epoch": 0.759387124350628, "grad_norm": 9.86702776219205, "learning_rate": 5e-06, "loss": 0.1488, "num_input_tokens_seen": 495842556, "step": 2887 }, { "epoch": 0.759387124350628, "loss": 0.09923535585403442, "loss_ce": 0.0010908262338489294, "loss_iou": NaN, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 495842556, "step": 2887 }, { "epoch": 0.7596501611100152, "grad_norm": 11.076727887110565, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 496012000, "step": 2888 }, { "epoch": 0.7596501611100152, "loss": 0.07854500412940979, "loss_ce": 0.0005115569802001119, "loss_iou": 0.6171875, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 496012000, "step": 2888 }, { "epoch": 0.7599131978694023, "grad_norm": 4.344396012390115, "learning_rate": 5e-06, "loss": 0.0945, "num_input_tokens_seen": 496181744, "step": 2889 }, { "epoch": 0.7599131978694023, "loss": 0.07574538886547089, "loss_ce": 0.0021980288438498974, "loss_iou": 0.5625, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 496181744, "step": 2889 }, { "epoch": 0.7601762346287894, "grad_norm": 4.8762260883901964, "learning_rate": 5e-06, "loss": 0.1322, "num_input_tokens_seen": 496353892, "step": 2890 }, { "epoch": 0.7601762346287894, "loss": 0.1910967230796814, "loss_ce": 0.0009111673571169376, "loss_iou": 0.349609375, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 496353892, "step": 2890 }, { "epoch": 0.7604392713881765, "grad_norm": 31.258153024005672, "learning_rate": 5e-06, "loss": 0.1188, "num_input_tokens_seen": 496525916, "step": 2891 }, { "epoch": 0.7604392713881765, "loss": 0.07003885507583618, "loss_ce": 0.0008249912643805146, "loss_iou": 0.546875, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 496525916, "step": 2891 }, { "epoch": 0.7607023081475637, "grad_norm": 3.99666824293353, "learning_rate": 5e-06, "loss": 0.1166, "num_input_tokens_seen": 496698052, "step": 2892 }, { "epoch": 0.7607023081475637, "loss": 0.09317123889923096, "loss_ce": 0.0031443799380213022, "loss_iou": 0.59765625, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 496698052, "step": 2892 }, { "epoch": 0.7609653449069508, "grad_norm": 3.917894736012383, "learning_rate": 5e-06, "loss": 0.1374, "num_input_tokens_seen": 496868580, "step": 2893 }, { "epoch": 0.7609653449069508, "loss": 0.1594741940498352, "loss_ce": 0.0014236548449844122, "loss_iou": 0.462890625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 496868580, "step": 2893 }, { "epoch": 0.7612283816663379, "grad_norm": 6.805778622837066, "learning_rate": 5e-06, "loss": 0.0953, "num_input_tokens_seen": 497040720, "step": 2894 }, { "epoch": 0.7612283816663379, "loss": 0.04234718531370163, "loss_ce": 0.0003244808176532388, "loss_iou": 0.44140625, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 497040720, "step": 2894 }, { "epoch": 0.761491418425725, "grad_norm": 8.055134529886692, "learning_rate": 5e-06, "loss": 0.1017, "num_input_tokens_seen": 497213204, "step": 2895 }, { "epoch": 0.761491418425725, "loss": 0.06772617995738983, "loss_ce": 0.0006180237978696823, "loss_iou": 0.5546875, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 497213204, "step": 2895 }, { "epoch": 0.7617544551851121, "grad_norm": 20.74511641540883, "learning_rate": 5e-06, "loss": 0.1427, "num_input_tokens_seen": 497382348, "step": 2896 }, { "epoch": 0.7617544551851121, "loss": 0.1839737594127655, "loss_ce": 0.006239374168217182, "loss_iou": 0.5, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 497382348, "step": 2896 }, { "epoch": 0.7620174919444992, "grad_norm": 3.827051566364027, "learning_rate": 5e-06, "loss": 0.1266, "num_input_tokens_seen": 497554288, "step": 2897 }, { "epoch": 0.7620174919444992, "loss": 0.1408492773771286, "loss_ce": 0.0003158297040499747, "loss_iou": 0.59375, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 497554288, "step": 2897 }, { "epoch": 0.7622805287038864, "grad_norm": 4.251155438400414, "learning_rate": 5e-06, "loss": 0.0949, "num_input_tokens_seen": 497726624, "step": 2898 }, { "epoch": 0.7622805287038864, "loss": 0.08204406499862671, "loss_ce": 0.0024694851599633694, "loss_iou": 0.6015625, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 497726624, "step": 2898 }, { "epoch": 0.7625435654632735, "grad_norm": 25.921624546006864, "learning_rate": 5e-06, "loss": 0.1214, "num_input_tokens_seen": 497899052, "step": 2899 }, { "epoch": 0.7625435654632735, "loss": 0.125847727060318, "loss_ce": 5.42689704161603e-05, "loss_iou": 0.546875, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 497899052, "step": 2899 }, { "epoch": 0.7628066022226606, "grad_norm": 3.225720909448473, "learning_rate": 5e-06, "loss": 0.0838, "num_input_tokens_seen": 498071164, "step": 2900 }, { "epoch": 0.7628066022226606, "loss": 0.13742178678512573, "loss_ce": 0.002015294972807169, "loss_iou": 0.5625, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 498071164, "step": 2900 }, { "epoch": 0.7630696389820477, "grad_norm": 10.436595772958983, "learning_rate": 5e-06, "loss": 0.1184, "num_input_tokens_seen": 498243380, "step": 2901 }, { "epoch": 0.7630696389820477, "loss": 0.08406674116849899, "loss_ce": 0.0019744576420634985, "loss_iou": 0.482421875, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 498243380, "step": 2901 }, { "epoch": 0.7633326757414348, "grad_norm": 5.876814424855767, "learning_rate": 5e-06, "loss": 0.0899, "num_input_tokens_seen": 498415740, "step": 2902 }, { "epoch": 0.7633326757414348, "loss": 0.12014832347631454, "loss_ce": 0.0023199557326734066, "loss_iou": 0.52734375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 498415740, "step": 2902 }, { "epoch": 0.763595712500822, "grad_norm": 6.217199501402136, "learning_rate": 5e-06, "loss": 0.1104, "num_input_tokens_seen": 498587740, "step": 2903 }, { "epoch": 0.763595712500822, "loss": 0.0937931090593338, "loss_ce": 0.0013553638709709048, "loss_iou": 0.6171875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 498587740, "step": 2903 }, { "epoch": 0.7638587492602091, "grad_norm": 4.965193192238494, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 498759856, "step": 2904 }, { "epoch": 0.7638587492602091, "loss": 0.0735275000333786, "loss_ce": 0.00013272266369313002, "loss_iou": 0.578125, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 498759856, "step": 2904 }, { "epoch": 0.7641217860195962, "grad_norm": 36.02324764024362, "learning_rate": 5e-06, "loss": 0.1135, "num_input_tokens_seen": 498929524, "step": 2905 }, { "epoch": 0.7641217860195962, "loss": 0.13745470345020294, "loss_ce": 0.002353382296860218, "loss_iou": 0.40234375, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 498929524, "step": 2905 }, { "epoch": 0.7643848227789833, "grad_norm": 4.064476858285287, "learning_rate": 5e-06, "loss": 0.1233, "num_input_tokens_seen": 499099092, "step": 2906 }, { "epoch": 0.7643848227789833, "loss": 0.12520155310630798, "loss_ce": 0.0020325970835983753, "loss_iou": 0.431640625, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 499099092, "step": 2906 }, { "epoch": 0.7646478595383704, "grad_norm": 9.564182625696946, "learning_rate": 5e-06, "loss": 0.1037, "num_input_tokens_seen": 499271132, "step": 2907 }, { "epoch": 0.7646478595383704, "loss": 0.10383596271276474, "loss_ce": 0.0036772743333131075, "loss_iou": 0.6015625, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 499271132, "step": 2907 }, { "epoch": 0.7649108962977577, "grad_norm": 7.38548887746826, "learning_rate": 5e-06, "loss": 0.1407, "num_input_tokens_seen": 499443276, "step": 2908 }, { "epoch": 0.7649108962977577, "loss": 0.07420553267002106, "loss_ce": 0.006151327397674322, "loss_iou": 0.32421875, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 499443276, "step": 2908 }, { "epoch": 0.7651739330571448, "grad_norm": 3.357102180261463, "learning_rate": 5e-06, "loss": 0.1387, "num_input_tokens_seen": 499615436, "step": 2909 }, { "epoch": 0.7651739330571448, "loss": 0.13687880337238312, "loss_ce": 0.0005110005149617791, "loss_iou": 0.52734375, "loss_num": 0.0272216796875, "loss_xval": 0.13671875, "num_input_tokens_seen": 499615436, "step": 2909 }, { "epoch": 0.7654369698165319, "grad_norm": 5.532042225189138, "learning_rate": 5e-06, "loss": 0.1378, "num_input_tokens_seen": 499787568, "step": 2910 }, { "epoch": 0.7654369698165319, "loss": 0.10780411958694458, "loss_ce": 0.0017250193050131202, "loss_iou": 0.423828125, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 499787568, "step": 2910 }, { "epoch": 0.765700006575919, "grad_norm": 9.098827733621626, "learning_rate": 5e-06, "loss": 0.1118, "num_input_tokens_seen": 499959744, "step": 2911 }, { "epoch": 0.765700006575919, "loss": 0.11246542632579803, "loss_ce": 0.0012135956203565001, "loss_iou": 0.431640625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 499959744, "step": 2911 }, { "epoch": 0.7659630433353061, "grad_norm": 4.671256681679922, "learning_rate": 5e-06, "loss": 0.0884, "num_input_tokens_seen": 500132012, "step": 2912 }, { "epoch": 0.7659630433353061, "loss": 0.07265074551105499, "loss_ce": 0.003986193798482418, "loss_iou": 0.478515625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 500132012, "step": 2912 }, { "epoch": 0.7662260800946933, "grad_norm": 9.318330323174848, "learning_rate": 5e-06, "loss": 0.1125, "num_input_tokens_seen": 500304120, "step": 2913 }, { "epoch": 0.7662260800946933, "loss": 0.10342703014612198, "loss_ce": 0.0003996905288659036, "loss_iou": 0.55859375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 500304120, "step": 2913 }, { "epoch": 0.7664891168540804, "grad_norm": 4.11205791514663, "learning_rate": 5e-06, "loss": 0.108, "num_input_tokens_seen": 500476220, "step": 2914 }, { "epoch": 0.7664891168540804, "loss": 0.16668415069580078, "loss_ce": 0.00170612963847816, "loss_iou": 0.33203125, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 500476220, "step": 2914 }, { "epoch": 0.7667521536134675, "grad_norm": 7.921166949882202, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 500648456, "step": 2915 }, { "epoch": 0.7667521536134675, "loss": 0.14688915014266968, "loss_ce": 0.0016559937503188848, "loss_iou": 0.546875, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 500648456, "step": 2915 }, { "epoch": 0.7670151903728546, "grad_norm": 7.89559416185462, "learning_rate": 5e-06, "loss": 0.1163, "num_input_tokens_seen": 500817440, "step": 2916 }, { "epoch": 0.7670151903728546, "loss": 0.13112960755825043, "loss_ce": 0.0047868345864117146, "loss_iou": 0.5, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 500817440, "step": 2916 }, { "epoch": 0.7672782271322417, "grad_norm": 7.07373988410314, "learning_rate": 5e-06, "loss": 0.1543, "num_input_tokens_seen": 500989576, "step": 2917 }, { "epoch": 0.7672782271322417, "loss": 0.17218878865242004, "loss_ce": 0.0034876265563070774, "loss_iou": 0.55859375, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 500989576, "step": 2917 }, { "epoch": 0.7675412638916288, "grad_norm": 4.658693255259618, "learning_rate": 5e-06, "loss": 0.0942, "num_input_tokens_seen": 501161844, "step": 2918 }, { "epoch": 0.7675412638916288, "loss": 0.0911635234951973, "loss_ce": 0.0017775364685803652, "loss_iou": 0.474609375, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 501161844, "step": 2918 }, { "epoch": 0.767804300651016, "grad_norm": 5.6482830730287485, "learning_rate": 5e-06, "loss": 0.1456, "num_input_tokens_seen": 501334012, "step": 2919 }, { "epoch": 0.767804300651016, "loss": 0.12131966650485992, "loss_ce": 0.00028695265064015985, "loss_iou": 0.52734375, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 501334012, "step": 2919 }, { "epoch": 0.7680673374104031, "grad_norm": 6.055907471900818, "learning_rate": 5e-06, "loss": 0.1116, "num_input_tokens_seen": 501504632, "step": 2920 }, { "epoch": 0.7680673374104031, "loss": 0.06848346441984177, "loss_ce": 0.0003071928513236344, "loss_iou": 0.44921875, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 501504632, "step": 2920 }, { "epoch": 0.7683303741697902, "grad_norm": 4.128716866112718, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 501676892, "step": 2921 }, { "epoch": 0.7683303741697902, "loss": 0.12142758071422577, "loss_ce": 0.000974692520685494, "loss_iou": 0.5, "loss_num": 0.0240478515625, "loss_xval": 0.12060546875, "num_input_tokens_seen": 501676892, "step": 2921 }, { "epoch": 0.7685934109291773, "grad_norm": 6.285708685708194, "learning_rate": 5e-06, "loss": 0.1141, "num_input_tokens_seen": 501846408, "step": 2922 }, { "epoch": 0.7685934109291773, "loss": 0.13839051127433777, "loss_ce": 0.0014276191359385848, "loss_iou": 0.5703125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 501846408, "step": 2922 }, { "epoch": 0.7688564476885644, "grad_norm": 5.199523762158551, "learning_rate": 5e-06, "loss": 0.0935, "num_input_tokens_seen": 502018884, "step": 2923 }, { "epoch": 0.7688564476885644, "loss": 0.0823674127459526, "loss_ce": 0.0034794718958437443, "loss_iou": 0.69921875, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 502018884, "step": 2923 }, { "epoch": 0.7691194844479516, "grad_norm": 7.865534449035968, "learning_rate": 5e-06, "loss": 0.1235, "num_input_tokens_seen": 502189840, "step": 2924 }, { "epoch": 0.7691194844479516, "loss": 0.09236955642700195, "loss_ce": 0.0008473452762700617, "loss_iou": 0.5234375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 502189840, "step": 2924 }, { "epoch": 0.7693825212073387, "grad_norm": 4.464952808674884, "learning_rate": 5e-06, "loss": 0.1007, "num_input_tokens_seen": 502361908, "step": 2925 }, { "epoch": 0.7693825212073387, "loss": 0.09519391506910324, "loss_ce": 0.002862985013052821, "loss_iou": 0.40234375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 502361908, "step": 2925 }, { "epoch": 0.7696455579667258, "grad_norm": 3.137863532599948, "learning_rate": 5e-06, "loss": 0.1102, "num_input_tokens_seen": 502533852, "step": 2926 }, { "epoch": 0.7696455579667258, "loss": 0.062405504286289215, "loss_ce": 0.000225941272219643, "loss_iou": 0.52734375, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 502533852, "step": 2926 }, { "epoch": 0.769908594726113, "grad_norm": 5.424523366119041, "learning_rate": 5e-06, "loss": 0.1141, "num_input_tokens_seen": 502706068, "step": 2927 }, { "epoch": 0.769908594726113, "loss": 0.15939806401729584, "loss_ce": 0.0009050165535882115, "loss_iou": 0.6484375, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 502706068, "step": 2927 }, { "epoch": 0.7701716314855, "grad_norm": 21.020644002444403, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 502878204, "step": 2928 }, { "epoch": 0.7701716314855, "loss": 0.1498870849609375, "loss_ce": 0.00108338613063097, "loss_iou": 0.3984375, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 502878204, "step": 2928 }, { "epoch": 0.7704346682448873, "grad_norm": 6.513721562381314, "learning_rate": 5e-06, "loss": 0.0967, "num_input_tokens_seen": 503050516, "step": 2929 }, { "epoch": 0.7704346682448873, "loss": 0.14237374067306519, "loss_ce": 0.0013520093634724617, "loss_iou": 0.53125, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 503050516, "step": 2929 }, { "epoch": 0.7706977050042744, "grad_norm": 21.68669265107994, "learning_rate": 5e-06, "loss": 0.1036, "num_input_tokens_seen": 503222664, "step": 2930 }, { "epoch": 0.7706977050042744, "loss": 0.09872519969940186, "loss_ce": 0.0024269861169159412, "loss_iou": 0.53125, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 503222664, "step": 2930 }, { "epoch": 0.7709607417636615, "grad_norm": 23.71932442538902, "learning_rate": 5e-06, "loss": 0.1131, "num_input_tokens_seen": 503394820, "step": 2931 }, { "epoch": 0.7709607417636615, "loss": 0.14681634306907654, "loss_ce": 0.0005455805221572518, "loss_iou": 0.359375, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 503394820, "step": 2931 }, { "epoch": 0.7712237785230486, "grad_norm": 14.0356485916023, "learning_rate": 5e-06, "loss": 0.1609, "num_input_tokens_seen": 503567064, "step": 2932 }, { "epoch": 0.7712237785230486, "loss": 0.1981443464756012, "loss_ce": 0.002374083735048771, "loss_iou": 0.4453125, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 503567064, "step": 2932 }, { "epoch": 0.7714868152824357, "grad_norm": 4.952386040850401, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 503739480, "step": 2933 }, { "epoch": 0.7714868152824357, "loss": 0.2314581573009491, "loss_ce": 0.002728914376348257, "loss_iou": 0.40234375, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 503739480, "step": 2933 }, { "epoch": 0.7717498520418229, "grad_norm": 5.454737692407838, "learning_rate": 5e-06, "loss": 0.0938, "num_input_tokens_seen": 503911536, "step": 2934 }, { "epoch": 0.7717498520418229, "loss": 0.1048421710729599, "loss_ce": 0.004591919481754303, "loss_iou": 0.474609375, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 503911536, "step": 2934 }, { "epoch": 0.77201288880121, "grad_norm": 7.9773349981616795, "learning_rate": 5e-06, "loss": 0.108, "num_input_tokens_seen": 504082084, "step": 2935 }, { "epoch": 0.77201288880121, "loss": 0.1008715033531189, "loss_ce": 0.0018419669941067696, "loss_iou": 0.5234375, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 504082084, "step": 2935 }, { "epoch": 0.7722759255605971, "grad_norm": 5.256239273173906, "learning_rate": 5e-06, "loss": 0.1012, "num_input_tokens_seen": 504254328, "step": 2936 }, { "epoch": 0.7722759255605971, "loss": 0.10146059095859528, "loss_ce": 0.0031024364288896322, "loss_iou": 0.50390625, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 504254328, "step": 2936 }, { "epoch": 0.7725389623199842, "grad_norm": 7.796343823545596, "learning_rate": 5e-06, "loss": 0.1617, "num_input_tokens_seen": 504426408, "step": 2937 }, { "epoch": 0.7725389623199842, "loss": 0.09895452111959457, "loss_ce": 0.00047429182450287044, "loss_iou": 0.4375, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 504426408, "step": 2937 }, { "epoch": 0.7728019990793713, "grad_norm": 4.506271114804364, "learning_rate": 5e-06, "loss": 0.0958, "num_input_tokens_seen": 504599160, "step": 2938 }, { "epoch": 0.7728019990793713, "loss": 0.10439816117286682, "loss_ce": 0.0011114203371107578, "loss_iou": 0.53515625, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 504599160, "step": 2938 }, { "epoch": 0.7730650358387585, "grad_norm": 7.051793538200423, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 504771620, "step": 2939 }, { "epoch": 0.7730650358387585, "loss": 0.04662296548485756, "loss_ce": 0.001548499334603548, "loss_iou": 0.51953125, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 504771620, "step": 2939 }, { "epoch": 0.7733280725981456, "grad_norm": 4.48321731819358, "learning_rate": 5e-06, "loss": 0.1049, "num_input_tokens_seen": 504942212, "step": 2940 }, { "epoch": 0.7733280725981456, "loss": 0.1206519603729248, "loss_ce": 0.0022132450249046087, "loss_iou": 0.49609375, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 504942212, "step": 2940 }, { "epoch": 0.7735911093575327, "grad_norm": 3.542637672706516, "learning_rate": 5e-06, "loss": 0.0699, "num_input_tokens_seen": 505114396, "step": 2941 }, { "epoch": 0.7735911093575327, "loss": 0.07642598450183868, "loss_ce": 0.002970176050439477, "loss_iou": 0.59375, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 505114396, "step": 2941 }, { "epoch": 0.7738541461169198, "grad_norm": 13.31557941839502, "learning_rate": 5e-06, "loss": 0.152, "num_input_tokens_seen": 505286572, "step": 2942 }, { "epoch": 0.7738541461169198, "loss": 0.11694711446762085, "loss_ce": 0.000492042163386941, "loss_iou": 0.484375, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 505286572, "step": 2942 }, { "epoch": 0.7741171828763069, "grad_norm": 4.217985208788173, "learning_rate": 5e-06, "loss": 0.1221, "num_input_tokens_seen": 505458580, "step": 2943 }, { "epoch": 0.7741171828763069, "loss": 0.12103226035833359, "loss_ce": 0.00256301905028522, "loss_iou": 0.6640625, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 505458580, "step": 2943 }, { "epoch": 0.774380219635694, "grad_norm": 5.305953482185517, "learning_rate": 5e-06, "loss": 0.1417, "num_input_tokens_seen": 505630628, "step": 2944 }, { "epoch": 0.774380219635694, "loss": 0.13166974484920502, "loss_ce": 0.0009019209537655115, "loss_iou": 0.302734375, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 505630628, "step": 2944 }, { "epoch": 0.7746432563950812, "grad_norm": 3.7265892335604103, "learning_rate": 5e-06, "loss": 0.1021, "num_input_tokens_seen": 505802760, "step": 2945 }, { "epoch": 0.7746432563950812, "loss": 0.174719899892807, "loss_ce": 0.005622013006359339, "loss_iou": 0.515625, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 505802760, "step": 2945 }, { "epoch": 0.7749062931544684, "grad_norm": 3.906958326198367, "learning_rate": 5e-06, "loss": 0.0915, "num_input_tokens_seen": 505975008, "step": 2946 }, { "epoch": 0.7749062931544684, "loss": 0.15144148468971252, "loss_ce": 0.003522793762385845, "loss_iou": 0.609375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 505975008, "step": 2946 }, { "epoch": 0.7751693299138555, "grad_norm": 30.273164796131727, "learning_rate": 5e-06, "loss": 0.1318, "num_input_tokens_seen": 506146864, "step": 2947 }, { "epoch": 0.7751693299138555, "loss": 0.12878431379795074, "loss_ce": 0.00238050683401525, "loss_iou": 0.52734375, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 506146864, "step": 2947 }, { "epoch": 0.7754323666732426, "grad_norm": 9.326627239945015, "learning_rate": 5e-06, "loss": 0.1024, "num_input_tokens_seen": 506317520, "step": 2948 }, { "epoch": 0.7754323666732426, "loss": 0.13574595749378204, "loss_ce": 0.001163432258181274, "loss_iou": 0.384765625, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 506317520, "step": 2948 }, { "epoch": 0.7756954034326297, "grad_norm": 4.291282220054208, "learning_rate": 5e-06, "loss": 0.0979, "num_input_tokens_seen": 506489644, "step": 2949 }, { "epoch": 0.7756954034326297, "loss": 0.07531201094388962, "loss_ce": 0.0006965312641113997, "loss_iou": 0.5625, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 506489644, "step": 2949 }, { "epoch": 0.7759584401920169, "grad_norm": 5.132046879043553, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 506661552, "step": 2950 }, { "epoch": 0.7759584401920169, "loss": 0.12264753133058548, "loss_ce": 0.003171210875734687, "loss_iou": 0.6015625, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 506661552, "step": 2950 }, { "epoch": 0.776221476951404, "grad_norm": 18.605800930237596, "learning_rate": 5e-06, "loss": 0.1066, "num_input_tokens_seen": 506832080, "step": 2951 }, { "epoch": 0.776221476951404, "loss": 0.08513291925191879, "loss_ce": 0.0006144904182292521, "loss_iou": 0.5390625, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 506832080, "step": 2951 }, { "epoch": 0.7764845137107911, "grad_norm": 13.809771584503252, "learning_rate": 5e-06, "loss": 0.148, "num_input_tokens_seen": 507004396, "step": 2952 }, { "epoch": 0.7764845137107911, "loss": 0.16220027208328247, "loss_ce": 0.007384595461189747, "loss_iou": 0.51953125, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 507004396, "step": 2952 }, { "epoch": 0.7767475504701782, "grad_norm": 6.052098852686667, "learning_rate": 5e-06, "loss": 0.1281, "num_input_tokens_seen": 507176636, "step": 2953 }, { "epoch": 0.7767475504701782, "loss": 0.10348343849182129, "loss_ce": 7.461909262929112e-05, "loss_iou": 0.640625, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 507176636, "step": 2953 }, { "epoch": 0.7770105872295653, "grad_norm": 6.115673418463162, "learning_rate": 5e-06, "loss": 0.1208, "num_input_tokens_seen": 507348812, "step": 2954 }, { "epoch": 0.7770105872295653, "loss": 0.17799662053585052, "loss_ce": 0.0009031177032738924, "loss_iou": 0.55078125, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 507348812, "step": 2954 }, { "epoch": 0.7772736239889525, "grad_norm": 4.920564715708625, "learning_rate": 5e-06, "loss": 0.1242, "num_input_tokens_seen": 507521260, "step": 2955 }, { "epoch": 0.7772736239889525, "loss": 0.12127618491649628, "loss_ce": 0.002562812063843012, "loss_iou": 0.625, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 507521260, "step": 2955 }, { "epoch": 0.7775366607483396, "grad_norm": 4.1269903282175875, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 507693580, "step": 2956 }, { "epoch": 0.7775366607483396, "loss": 0.1181151419878006, "loss_ce": 0.0023619618732482195, "loss_iou": 0.5234375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 507693580, "step": 2956 }, { "epoch": 0.7777996975077267, "grad_norm": 6.530908453055091, "learning_rate": 5e-06, "loss": 0.1223, "num_input_tokens_seen": 507866000, "step": 2957 }, { "epoch": 0.7777996975077267, "loss": 0.09897395223379135, "loss_ce": 0.001989088486880064, "loss_iou": 0.5234375, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 507866000, "step": 2957 }, { "epoch": 0.7780627342671138, "grad_norm": 13.799197287571243, "learning_rate": 5e-06, "loss": 0.0943, "num_input_tokens_seen": 508036520, "step": 2958 }, { "epoch": 0.7780627342671138, "loss": 0.12385988235473633, "loss_ce": 0.0020031901076436043, "loss_iou": 0.5078125, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 508036520, "step": 2958 }, { "epoch": 0.7783257710265009, "grad_norm": 6.771630034008941, "learning_rate": 5e-06, "loss": 0.1293, "num_input_tokens_seen": 508208512, "step": 2959 }, { "epoch": 0.7783257710265009, "loss": 0.09063053131103516, "loss_ce": 0.001976970350369811, "loss_iou": 0.70703125, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 508208512, "step": 2959 }, { "epoch": 0.7785888077858881, "grad_norm": 17.947272506590085, "learning_rate": 5e-06, "loss": 0.1182, "num_input_tokens_seen": 508380916, "step": 2960 }, { "epoch": 0.7785888077858881, "loss": 0.18887397646903992, "loss_ce": 0.000855189049616456, "loss_iou": 0.349609375, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 508380916, "step": 2960 }, { "epoch": 0.7788518445452752, "grad_norm": 8.543500304163382, "learning_rate": 5e-06, "loss": 0.1273, "num_input_tokens_seen": 508551252, "step": 2961 }, { "epoch": 0.7788518445452752, "loss": 0.10420133173465729, "loss_ce": 0.000197422195924446, "loss_iou": 0.412109375, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 508551252, "step": 2961 }, { "epoch": 0.7791148813046623, "grad_norm": 11.527993211943581, "learning_rate": 5e-06, "loss": 0.1274, "num_input_tokens_seen": 508723096, "step": 2962 }, { "epoch": 0.7791148813046623, "loss": 0.13255223631858826, "loss_ce": 0.0016013117274269462, "loss_iou": 0.431640625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 508723096, "step": 2962 }, { "epoch": 0.7793779180640494, "grad_norm": 3.765008399602552, "learning_rate": 5e-06, "loss": 0.1139, "num_input_tokens_seen": 508895336, "step": 2963 }, { "epoch": 0.7793779180640494, "loss": 0.15525312721729279, "loss_ce": 0.00026197341503575444, "loss_iou": 0.435546875, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 508895336, "step": 2963 }, { "epoch": 0.7796409548234365, "grad_norm": 4.0681392149062905, "learning_rate": 5e-06, "loss": 0.1164, "num_input_tokens_seen": 509065748, "step": 2964 }, { "epoch": 0.7796409548234365, "loss": 0.103541798889637, "loss_ce": 0.0006670450093224645, "loss_iou": 0.4765625, "loss_num": 0.0205078125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 509065748, "step": 2964 }, { "epoch": 0.7799039915828238, "grad_norm": 5.764259729112096, "learning_rate": 5e-06, "loss": 0.1354, "num_input_tokens_seen": 509237988, "step": 2965 }, { "epoch": 0.7799039915828238, "loss": 0.09293576329946518, "loss_ce": 0.0004980218946002424, "loss_iou": 0.578125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 509237988, "step": 2965 }, { "epoch": 0.7801670283422109, "grad_norm": 6.653297635090324, "learning_rate": 5e-06, "loss": 0.1324, "num_input_tokens_seen": 509407480, "step": 2966 }, { "epoch": 0.7801670283422109, "loss": 0.10752134025096893, "loss_ce": 0.0008471491746604443, "loss_iou": 0.546875, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 509407480, "step": 2966 }, { "epoch": 0.780430065101598, "grad_norm": 5.8594293697060476, "learning_rate": 5e-06, "loss": 0.1152, "num_input_tokens_seen": 509579708, "step": 2967 }, { "epoch": 0.780430065101598, "loss": 0.19974008202552795, "loss_ce": 0.005846647545695305, "loss_iou": 0.3984375, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 509579708, "step": 2967 }, { "epoch": 0.7806931018609851, "grad_norm": 4.074243325830813, "learning_rate": 5e-06, "loss": 0.1193, "num_input_tokens_seen": 509750260, "step": 2968 }, { "epoch": 0.7806931018609851, "loss": 0.09352241456508636, "loss_ce": 0.00045905529987066984, "loss_iou": 0.5625, "loss_num": 0.0185546875, "loss_xval": 0.09326171875, "num_input_tokens_seen": 509750260, "step": 2968 }, { "epoch": 0.7809561386203722, "grad_norm": 18.473086839056062, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 509922416, "step": 2969 }, { "epoch": 0.7809561386203722, "loss": 0.10857398062944412, "loss_ce": 0.002586429938673973, "loss_iou": 0.50390625, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 509922416, "step": 2969 }, { "epoch": 0.7812191753797593, "grad_norm": 4.297341068075648, "learning_rate": 5e-06, "loss": 0.0869, "num_input_tokens_seen": 510094352, "step": 2970 }, { "epoch": 0.7812191753797593, "loss": 0.10554330050945282, "loss_ce": 0.002088708570227027, "loss_iou": 0.53125, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 510094352, "step": 2970 }, { "epoch": 0.7814822121391465, "grad_norm": 4.523551186515909, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 510266568, "step": 2971 }, { "epoch": 0.7814822121391465, "loss": 0.1601797491312027, "loss_ce": 0.000939027639105916, "loss_iou": 0.51171875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 510266568, "step": 2971 }, { "epoch": 0.7817452488985336, "grad_norm": 11.257967210341226, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 510438880, "step": 2972 }, { "epoch": 0.7817452488985336, "loss": 0.08428631722927094, "loss_ce": 0.0006986761000007391, "loss_iou": 0.44921875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 510438880, "step": 2972 }, { "epoch": 0.7820082856579207, "grad_norm": 4.879983737803821, "learning_rate": 5e-06, "loss": 0.1164, "num_input_tokens_seen": 510610888, "step": 2973 }, { "epoch": 0.7820082856579207, "loss": 0.061606645584106445, "loss_ce": 0.0010292520746588707, "loss_iou": 0.515625, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 510610888, "step": 2973 }, { "epoch": 0.7822713224173078, "grad_norm": 6.784338572567158, "learning_rate": 5e-06, "loss": 0.1436, "num_input_tokens_seen": 510780736, "step": 2974 }, { "epoch": 0.7822713224173078, "loss": 0.09628412127494812, "loss_ce": 0.0025646386202424765, "loss_iou": 0.51171875, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 510780736, "step": 2974 }, { "epoch": 0.7825343591766949, "grad_norm": 2.4966016721195827, "learning_rate": 5e-06, "loss": 0.1027, "num_input_tokens_seen": 510952740, "step": 2975 }, { "epoch": 0.7825343591766949, "loss": 0.11784331500530243, "loss_ce": 0.002517384709790349, "loss_iou": 0.5390625, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 510952740, "step": 2975 }, { "epoch": 0.7827973959360821, "grad_norm": 9.752032559631301, "learning_rate": 5e-06, "loss": 0.1082, "num_input_tokens_seen": 511124984, "step": 2976 }, { "epoch": 0.7827973959360821, "loss": 0.10057765245437622, "loss_ce": 0.001853286987170577, "loss_iou": 0.431640625, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 511124984, "step": 2976 }, { "epoch": 0.7830604326954692, "grad_norm": 4.047284415442861, "learning_rate": 5e-06, "loss": 0.1237, "num_input_tokens_seen": 511297176, "step": 2977 }, { "epoch": 0.7830604326954692, "loss": 0.1439676731824875, "loss_ce": 0.004929587244987488, "loss_iou": 0.47265625, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 511297176, "step": 2977 }, { "epoch": 0.7833234694548563, "grad_norm": 4.111909259977495, "learning_rate": 5e-06, "loss": 0.121, "num_input_tokens_seen": 511469216, "step": 2978 }, { "epoch": 0.7833234694548563, "loss": 0.1689942479133606, "loss_ce": 0.008380233310163021, "loss_iou": 0.337890625, "loss_num": 0.0322265625, "loss_xval": 0.16015625, "num_input_tokens_seen": 511469216, "step": 2978 }, { "epoch": 0.7835865062142434, "grad_norm": 3.736811899980652, "learning_rate": 5e-06, "loss": 0.1142, "num_input_tokens_seen": 511641400, "step": 2979 }, { "epoch": 0.7835865062142434, "loss": 0.09928886592388153, "loss_ce": 0.0024871169589459896, "loss_iou": 0.5546875, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 511641400, "step": 2979 }, { "epoch": 0.7838495429736305, "grad_norm": 3.8885212513343204, "learning_rate": 5e-06, "loss": 0.0719, "num_input_tokens_seen": 511813644, "step": 2980 }, { "epoch": 0.7838495429736305, "loss": 0.08123628050088882, "loss_ce": 0.0019821308087557554, "loss_iou": 0.57421875, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 511813644, "step": 2980 }, { "epoch": 0.7841125797330177, "grad_norm": 3.6947801269712643, "learning_rate": 5e-06, "loss": 0.1196, "num_input_tokens_seen": 511985744, "step": 2981 }, { "epoch": 0.7841125797330177, "loss": 0.07377283275127411, "loss_ce": 0.0037197312340140343, "loss_iou": 0.71875, "loss_num": 0.0140380859375, "loss_xval": 0.06982421875, "num_input_tokens_seen": 511985744, "step": 2981 }, { "epoch": 0.7843756164924048, "grad_norm": 4.052383876786155, "learning_rate": 5e-06, "loss": 0.1062, "num_input_tokens_seen": 512157708, "step": 2982 }, { "epoch": 0.7843756164924048, "loss": 0.18616530299186707, "loss_ce": 0.00116774532943964, "loss_iou": 0.59375, "loss_num": 0.037109375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 512157708, "step": 2982 }, { "epoch": 0.7846386532517919, "grad_norm": 3.4825813224277185, "learning_rate": 5e-06, "loss": 0.1122, "num_input_tokens_seen": 512330084, "step": 2983 }, { "epoch": 0.7846386532517919, "loss": 0.1472734808921814, "loss_ce": 0.0032915552146732807, "loss_iou": 0.341796875, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 512330084, "step": 2983 }, { "epoch": 0.784901690011179, "grad_norm": 3.5815743222985947, "learning_rate": 5e-06, "loss": 0.0912, "num_input_tokens_seen": 512502076, "step": 2984 }, { "epoch": 0.784901690011179, "loss": 0.10481996834278107, "loss_ce": 0.0021588318049907684, "loss_iou": 0.51171875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 512502076, "step": 2984 }, { "epoch": 0.7851647267705661, "grad_norm": 7.905852445879054, "learning_rate": 5e-06, "loss": 0.1202, "num_input_tokens_seen": 512674288, "step": 2985 }, { "epoch": 0.7851647267705661, "loss": 0.12316185235977173, "loss_ce": 0.0030751884914934635, "loss_iou": 0.5390625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 512674288, "step": 2985 }, { "epoch": 0.7854277635299534, "grad_norm": 5.926087069922328, "learning_rate": 5e-06, "loss": 0.0863, "num_input_tokens_seen": 512846624, "step": 2986 }, { "epoch": 0.7854277635299534, "loss": 0.06602786481380463, "loss_ce": 0.0011474882485345006, "loss_iou": 0.58203125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 512846624, "step": 2986 }, { "epoch": 0.7856908002893405, "grad_norm": 5.039950797702919, "learning_rate": 5e-06, "loss": 0.1472, "num_input_tokens_seen": 513018668, "step": 2987 }, { "epoch": 0.7856908002893405, "loss": 0.10930870473384857, "loss_ce": 0.004847035743296146, "loss_iou": 0.5078125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 513018668, "step": 2987 }, { "epoch": 0.7859538370487276, "grad_norm": 4.823970003989147, "learning_rate": 5e-06, "loss": 0.1425, "num_input_tokens_seen": 513190872, "step": 2988 }, { "epoch": 0.7859538370487276, "loss": 0.15449807047843933, "loss_ce": 0.0005368961137719452, "loss_iou": 0.486328125, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 513190872, "step": 2988 }, { "epoch": 0.7862168738081147, "grad_norm": 11.156095172455718, "learning_rate": 5e-06, "loss": 0.1369, "num_input_tokens_seen": 513363216, "step": 2989 }, { "epoch": 0.7862168738081147, "loss": 0.07052050530910492, "loss_ce": 0.002100098878145218, "loss_iou": 0.431640625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 513363216, "step": 2989 }, { "epoch": 0.7864799105675018, "grad_norm": 6.545014028470699, "learning_rate": 5e-06, "loss": 0.1922, "num_input_tokens_seen": 513532780, "step": 2990 }, { "epoch": 0.7864799105675018, "loss": 0.20626530051231384, "loss_ce": 0.004162629134953022, "loss_iou": 0.515625, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 513532780, "step": 2990 }, { "epoch": 0.786742947326889, "grad_norm": 5.675250290858523, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 513705312, "step": 2991 }, { "epoch": 0.786742947326889, "loss": 0.10429085791110992, "loss_ce": 0.002728358842432499, "loss_iou": 0.53515625, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 513705312, "step": 2991 }, { "epoch": 0.7870059840862761, "grad_norm": 3.771208751532015, "learning_rate": 5e-06, "loss": 0.0979, "num_input_tokens_seen": 513877348, "step": 2992 }, { "epoch": 0.7870059840862761, "loss": 0.09906225651502609, "loss_ce": 0.0011008285218849778, "loss_iou": 0.51171875, "loss_num": 0.01953125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 513877348, "step": 2992 }, { "epoch": 0.7872690208456632, "grad_norm": 8.702146962798412, "learning_rate": 5e-06, "loss": 0.108, "num_input_tokens_seen": 514046516, "step": 2993 }, { "epoch": 0.7872690208456632, "loss": 0.11204151809215546, "loss_ce": 0.004131356719881296, "loss_iou": 0.50390625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 514046516, "step": 2993 }, { "epoch": 0.7875320576050503, "grad_norm": 6.0868763702009465, "learning_rate": 5e-06, "loss": 0.1044, "num_input_tokens_seen": 514218536, "step": 2994 }, { "epoch": 0.7875320576050503, "loss": 0.15223419666290283, "loss_ce": 0.0040408410131931305, "loss_iou": 0.5390625, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 514218536, "step": 2994 }, { "epoch": 0.7877950943644374, "grad_norm": 4.672234975964948, "learning_rate": 5e-06, "loss": 0.0826, "num_input_tokens_seen": 514391068, "step": 2995 }, { "epoch": 0.7877950943644374, "loss": 0.05085252225399017, "loss_ce": 0.0038707097992300987, "loss_iou": 0.51171875, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 514391068, "step": 2995 }, { "epoch": 0.7880581311238245, "grad_norm": 6.661101757033409, "learning_rate": 5e-06, "loss": 0.1373, "num_input_tokens_seen": 514561472, "step": 2996 }, { "epoch": 0.7880581311238245, "loss": 0.20267510414123535, "loss_ce": 0.000404604768846184, "loss_iou": 0.333984375, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 514561472, "step": 2996 }, { "epoch": 0.7883211678832117, "grad_norm": 9.694053172636844, "learning_rate": 5e-06, "loss": 0.148, "num_input_tokens_seen": 514733680, "step": 2997 }, { "epoch": 0.7883211678832117, "loss": 0.13084860146045685, "loss_ce": 0.002278049010783434, "loss_iou": 0.56640625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 514733680, "step": 2997 }, { "epoch": 0.7885842046425988, "grad_norm": 69.63701857559735, "learning_rate": 5e-06, "loss": 0.1399, "num_input_tokens_seen": 514905800, "step": 2998 }, { "epoch": 0.7885842046425988, "loss": 0.13946260511875153, "loss_ce": 0.004315503872931004, "loss_iou": 0.48046875, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 514905800, "step": 2998 }, { "epoch": 0.7888472414019859, "grad_norm": 16.568333279972496, "learning_rate": 5e-06, "loss": 0.0888, "num_input_tokens_seen": 515077960, "step": 2999 }, { "epoch": 0.7888472414019859, "loss": 0.10318569839000702, "loss_ce": 0.004888580180704594, "loss_iou": 0.46875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 515077960, "step": 2999 }, { "epoch": 0.789110278161373, "grad_norm": 5.577264845510526, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 515248120, "step": 3000 }, { "epoch": 0.789110278161373, "eval_websight_new_CIoU": 0.8789950311183929, "eval_websight_new_GIoU": 0.881831705570221, "eval_websight_new_IoU": 0.8831256628036499, "eval_websight_new_MAE_all": 0.016948864795267582, "eval_websight_new_MAE_h": 0.00989671004936099, "eval_websight_new_MAE_w": 0.027918956242501736, "eval_websight_new_MAE_x": 0.024959519505500793, "eval_websight_new_MAE_y": 0.005020270356908441, "eval_websight_new_NUM_probability": 0.9999921023845673, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.08870729804039001, "eval_websight_new_loss_ce": 6.795420176786138e-06, "eval_websight_new_loss_iou": 0.3958740234375, "eval_websight_new_loss_num": 0.016002655029296875, "eval_websight_new_loss_xval": 0.0800323486328125, "eval_websight_new_runtime": 55.9259, "eval_websight_new_samples_per_second": 0.894, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 515248120, "step": 3000 }, { "epoch": 0.789110278161373, "eval_seeclick_CIoU": 0.6246416866779327, "eval_seeclick_GIoU": 0.6264897286891937, "eval_seeclick_IoU": 0.650597870349884, "eval_seeclick_MAE_all": 0.04822420887649059, "eval_seeclick_MAE_h": 0.030418247915804386, "eval_seeclick_MAE_w": 0.0654730387032032, "eval_seeclick_MAE_x": 0.07548732310533524, "eval_seeclick_MAE_y": 0.0215182239189744, "eval_seeclick_NUM_probability": 0.9999599456787109, "eval_seeclick_inside_bbox": 0.9375, "eval_seeclick_loss": 0.2215959131717682, "eval_seeclick_loss_ce": 0.008739723358303308, "eval_seeclick_loss_iou": 0.4912109375, "eval_seeclick_loss_num": 0.043231964111328125, "eval_seeclick_loss_xval": 0.216217041015625, "eval_seeclick_runtime": 91.1896, "eval_seeclick_samples_per_second": 0.472, "eval_seeclick_steps_per_second": 0.022, "num_input_tokens_seen": 515248120, "step": 3000 }, { "epoch": 0.789110278161373, "eval_icons_CIoU": 0.8647165596485138, "eval_icons_GIoU": 0.8629220426082611, "eval_icons_IoU": 0.8704103231430054, "eval_icons_MAE_all": 0.018641653936356306, "eval_icons_MAE_h": 0.02465621568262577, "eval_icons_MAE_w": 0.017738113179802895, "eval_icons_MAE_x": 0.013989617582410574, "eval_icons_MAE_y": 0.018182669766247272, "eval_icons_NUM_probability": 0.999986469745636, "eval_icons_inside_bbox": 0.984375, "eval_icons_loss": 0.06894619762897491, "eval_icons_loss_ce": 9.26887514651753e-06, "eval_icons_loss_iou": 0.6270751953125, "eval_icons_loss_num": 0.012828826904296875, "eval_icons_loss_xval": 0.0641326904296875, "eval_icons_runtime": 101.3174, "eval_icons_samples_per_second": 0.493, "eval_icons_steps_per_second": 0.02, "num_input_tokens_seen": 515248120, "step": 3000 }, { "epoch": 0.789110278161373, "eval_screenspot_CIoU": 0.5550089081128439, "eval_screenspot_GIoU": 0.5516955653826395, "eval_screenspot_IoU": 0.5956698457400004, "eval_screenspot_MAE_all": 0.08370348066091537, "eval_screenspot_MAE_h": 0.056737360854943596, "eval_screenspot_MAE_w": 0.1454519679148992, "eval_screenspot_MAE_x": 0.08442502965529759, "eval_screenspot_MAE_y": 0.04819955242176851, "eval_screenspot_NUM_probability": 0.9998689492543539, "eval_screenspot_inside_bbox": 0.8395833373069763, "eval_screenspot_loss": 0.9374791979789734, "eval_screenspot_loss_ce": 0.5804212689399719, "eval_screenspot_loss_iou": 0.5504557291666666, "eval_screenspot_loss_num": 0.069854736328125, "eval_screenspot_loss_xval": 0.3492228190104167, "eval_screenspot_runtime": 149.2861, "eval_screenspot_samples_per_second": 0.596, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 515248120, "step": 3000 }, { "epoch": 0.789110278161373, "loss": 0.9090390205383301, "loss_ce": 0.5727353096008301, "loss_iou": 0.4609375, "loss_num": 0.0673828125, "loss_xval": 0.3359375, "num_input_tokens_seen": 515248120, "step": 3000 }, { "epoch": 0.7893733149207601, "grad_norm": 4.193864370890535, "learning_rate": 5e-06, "loss": 0.1374, "num_input_tokens_seen": 515420392, "step": 3001 }, { "epoch": 0.7893733149207601, "loss": 0.12226281315088272, "loss_ce": 0.0011690594255924225, "loss_iou": 0.515625, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 515420392, "step": 3001 }, { "epoch": 0.7896363516801473, "grad_norm": 4.537019888829979, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 515590772, "step": 3002 }, { "epoch": 0.7896363516801473, "loss": 0.14288941025733948, "loss_ce": 0.0013793996768072248, "loss_iou": 0.35546875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 515590772, "step": 3002 }, { "epoch": 0.7898993884395344, "grad_norm": 7.837104532573747, "learning_rate": 5e-06, "loss": 0.1585, "num_input_tokens_seen": 515763064, "step": 3003 }, { "epoch": 0.7898993884395344, "loss": 0.20384354889392853, "loss_ce": 0.0005659655435010791, "loss_iou": 0.443359375, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 515763064, "step": 3003 }, { "epoch": 0.7901624251989215, "grad_norm": 9.871988037860998, "learning_rate": 5e-06, "loss": 0.115, "num_input_tokens_seen": 515935288, "step": 3004 }, { "epoch": 0.7901624251989215, "loss": 0.1931522786617279, "loss_ce": 0.004248465411365032, "loss_iou": 0.33203125, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 515935288, "step": 3004 }, { "epoch": 0.7904254619583087, "grad_norm": 6.705662570329979, "learning_rate": 5e-06, "loss": 0.1395, "num_input_tokens_seen": 516107836, "step": 3005 }, { "epoch": 0.7904254619583087, "loss": 0.24680155515670776, "loss_ce": 0.0018979848828166723, "loss_iou": 0.53125, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 516107836, "step": 3005 }, { "epoch": 0.7906884987176958, "grad_norm": 3.468178023705032, "learning_rate": 5e-06, "loss": 0.1047, "num_input_tokens_seen": 516280260, "step": 3006 }, { "epoch": 0.7906884987176958, "loss": 0.12856581807136536, "loss_ce": 0.0021620113402605057, "loss_iou": 0.478515625, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 516280260, "step": 3006 }, { "epoch": 0.790951535477083, "grad_norm": 2.516769311233636, "learning_rate": 5e-06, "loss": 0.1021, "num_input_tokens_seen": 516452472, "step": 3007 }, { "epoch": 0.790951535477083, "loss": 0.03532446175813675, "loss_ce": 7.665850716875866e-05, "loss_iou": 0.404296875, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 516452472, "step": 3007 }, { "epoch": 0.7912145722364701, "grad_norm": 2.8390538522305993, "learning_rate": 5e-06, "loss": 0.1163, "num_input_tokens_seen": 516624540, "step": 3008 }, { "epoch": 0.7912145722364701, "loss": 0.10718082636594772, "loss_ce": 0.0042908103205263615, "loss_iou": 0.51953125, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 516624540, "step": 3008 }, { "epoch": 0.7914776089958572, "grad_norm": 4.2650017166958545, "learning_rate": 5e-06, "loss": 0.1052, "num_input_tokens_seen": 516796808, "step": 3009 }, { "epoch": 0.7914776089958572, "loss": 0.17158488929271698, "loss_ce": 0.0024106951896101236, "loss_iou": 0.392578125, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 516796808, "step": 3009 }, { "epoch": 0.7917406457552443, "grad_norm": 6.803158022173843, "learning_rate": 5e-06, "loss": 0.1233, "num_input_tokens_seen": 516969144, "step": 3010 }, { "epoch": 0.7917406457552443, "loss": 0.12062282115221024, "loss_ce": 0.0013601221144199371, "loss_iou": 0.50390625, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 516969144, "step": 3010 }, { "epoch": 0.7920036825146314, "grad_norm": 5.268394185167123, "learning_rate": 5e-06, "loss": 0.0984, "num_input_tokens_seen": 517139672, "step": 3011 }, { "epoch": 0.7920036825146314, "loss": 0.10529518872499466, "loss_ce": 0.0033054398372769356, "loss_iou": 0.330078125, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 517139672, "step": 3011 }, { "epoch": 0.7922667192740186, "grad_norm": 4.897299268033889, "learning_rate": 5e-06, "loss": 0.1488, "num_input_tokens_seen": 517311812, "step": 3012 }, { "epoch": 0.7922667192740186, "loss": 0.15520727634429932, "loss_ce": 0.0010019636247307062, "loss_iou": 0.47265625, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 517311812, "step": 3012 }, { "epoch": 0.7925297560334057, "grad_norm": 6.998508129858562, "learning_rate": 5e-06, "loss": 0.1503, "num_input_tokens_seen": 517483804, "step": 3013 }, { "epoch": 0.7925297560334057, "loss": 0.14621217548847198, "loss_ce": 0.004305441863834858, "loss_iou": 0.60546875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 517483804, "step": 3013 }, { "epoch": 0.7927927927927928, "grad_norm": 8.855328154069232, "learning_rate": 5e-06, "loss": 0.0784, "num_input_tokens_seen": 517656016, "step": 3014 }, { "epoch": 0.7927927927927928, "loss": 0.07204173505306244, "loss_ce": 0.003957017324864864, "loss_iou": 0.515625, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 517656016, "step": 3014 }, { "epoch": 0.7930558295521799, "grad_norm": 26.2413884381383, "learning_rate": 5e-06, "loss": 0.1451, "num_input_tokens_seen": 517828272, "step": 3015 }, { "epoch": 0.7930558295521799, "loss": 0.11117606610059738, "loss_ce": 0.0008550205966457725, "loss_iou": 0.455078125, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 517828272, "step": 3015 }, { "epoch": 0.793318866311567, "grad_norm": 4.148118560901186, "learning_rate": 5e-06, "loss": 0.1143, "num_input_tokens_seen": 518000272, "step": 3016 }, { "epoch": 0.793318866311567, "loss": 0.06484581530094147, "loss_ce": 0.0005757926846854389, "loss_iou": 0.3828125, "loss_num": 0.0128173828125, "loss_xval": 0.064453125, "num_input_tokens_seen": 518000272, "step": 3016 }, { "epoch": 0.7935819030709542, "grad_norm": 13.9691023081111, "learning_rate": 5e-06, "loss": 0.0913, "num_input_tokens_seen": 518172412, "step": 3017 }, { "epoch": 0.7935819030709542, "loss": 0.10305923223495483, "loss_ce": 0.002427519764751196, "loss_iou": 0.373046875, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 518172412, "step": 3017 }, { "epoch": 0.7938449398303413, "grad_norm": 4.54961216940048, "learning_rate": 5e-06, "loss": 0.1051, "num_input_tokens_seen": 518344668, "step": 3018 }, { "epoch": 0.7938449398303413, "loss": 0.14651378989219666, "loss_ce": 0.0014942658599466085, "loss_iou": 0.53125, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 518344668, "step": 3018 }, { "epoch": 0.7941079765897284, "grad_norm": 4.200570019762456, "learning_rate": 5e-06, "loss": 0.107, "num_input_tokens_seen": 518517120, "step": 3019 }, { "epoch": 0.7941079765897284, "loss": 0.11832761764526367, "loss_ce": 0.00037718465318903327, "loss_iou": 0.46484375, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 518517120, "step": 3019 }, { "epoch": 0.7943710133491155, "grad_norm": 7.5294315936950005, "learning_rate": 5e-06, "loss": 0.0876, "num_input_tokens_seen": 518689296, "step": 3020 }, { "epoch": 0.7943710133491155, "loss": 0.06614542007446289, "loss_ce": 0.00021218777692411095, "loss_iou": 0.427734375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 518689296, "step": 3020 }, { "epoch": 0.7946340501085026, "grad_norm": 5.635502401126071, "learning_rate": 5e-06, "loss": 0.1396, "num_input_tokens_seen": 518859544, "step": 3021 }, { "epoch": 0.7946340501085026, "loss": 0.12032654881477356, "loss_ce": 0.0027118013240396976, "loss_iou": 0.3359375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 518859544, "step": 3021 }, { "epoch": 0.7948970868678897, "grad_norm": 6.009060825525287, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 519031676, "step": 3022 }, { "epoch": 0.7948970868678897, "loss": 0.1436431109905243, "loss_ce": 0.0033232811838388443, "loss_iou": 0.66015625, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 519031676, "step": 3022 }, { "epoch": 0.795160123627277, "grad_norm": 4.029495813443641, "learning_rate": 5e-06, "loss": 0.1666, "num_input_tokens_seen": 519203700, "step": 3023 }, { "epoch": 0.795160123627277, "loss": 0.15425805747509003, "loss_ce": 0.0003884279867634177, "loss_iou": 0.4453125, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 519203700, "step": 3023 }, { "epoch": 0.795423160386664, "grad_norm": 37.082876522501756, "learning_rate": 5e-06, "loss": 0.1101, "num_input_tokens_seen": 519375616, "step": 3024 }, { "epoch": 0.795423160386664, "loss": 0.1391124576330185, "loss_ce": 0.00012015047832392156, "loss_iou": 0.5625, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 519375616, "step": 3024 }, { "epoch": 0.7956861971460512, "grad_norm": 3.823112661234258, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 519547736, "step": 3025 }, { "epoch": 0.7956861971460512, "loss": 0.09849868714809418, "loss_ce": 0.0015290760202333331, "loss_iou": 0.359375, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 519547736, "step": 3025 }, { "epoch": 0.7959492339054383, "grad_norm": 4.930287612034227, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 519720084, "step": 3026 }, { "epoch": 0.7959492339054383, "loss": 0.1211109384894371, "loss_ce": 0.005083112046122551, "loss_iou": 0.6328125, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 519720084, "step": 3026 }, { "epoch": 0.7962122706648254, "grad_norm": 13.986605867255715, "learning_rate": 5e-06, "loss": 0.1684, "num_input_tokens_seen": 519892180, "step": 3027 }, { "epoch": 0.7962122706648254, "loss": 0.16327086091041565, "loss_ce": 0.0026263254694640636, "loss_iou": 0.408203125, "loss_num": 0.0322265625, "loss_xval": 0.16015625, "num_input_tokens_seen": 519892180, "step": 3027 }, { "epoch": 0.7964753074242126, "grad_norm": 12.476632712407037, "learning_rate": 5e-06, "loss": 0.0691, "num_input_tokens_seen": 520064636, "step": 3028 }, { "epoch": 0.7964753074242126, "loss": 0.11255937814712524, "loss_ce": 0.000788743665907532, "loss_iou": 0.4609375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 520064636, "step": 3028 }, { "epoch": 0.7967383441835997, "grad_norm": 3.6081873902324326, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 520236756, "step": 3029 }, { "epoch": 0.7967383441835997, "loss": 0.12160242348909378, "loss_ce": 0.0018361852271482348, "loss_iou": 0.31640625, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 520236756, "step": 3029 }, { "epoch": 0.7970013809429868, "grad_norm": 8.030386724109855, "learning_rate": 5e-06, "loss": 0.122, "num_input_tokens_seen": 520407308, "step": 3030 }, { "epoch": 0.7970013809429868, "loss": 0.18061389029026031, "loss_ce": 0.0010331911034882069, "loss_iou": 0.5625, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 520407308, "step": 3030 }, { "epoch": 0.7972644177023739, "grad_norm": 11.21273309596263, "learning_rate": 5e-06, "loss": 0.1103, "num_input_tokens_seen": 520579744, "step": 3031 }, { "epoch": 0.7972644177023739, "loss": 0.082832470536232, "loss_ce": 0.0002824235416483134, "loss_iou": 0.515625, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 520579744, "step": 3031 }, { "epoch": 0.797527454461761, "grad_norm": 20.85679503542732, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 520752148, "step": 3032 }, { "epoch": 0.797527454461761, "loss": 0.12713350355625153, "loss_ce": 0.0008517719688825309, "loss_iou": 0.416015625, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 520752148, "step": 3032 }, { "epoch": 0.7977904912211482, "grad_norm": 37.09104960564763, "learning_rate": 5e-06, "loss": 0.1238, "num_input_tokens_seen": 520924220, "step": 3033 }, { "epoch": 0.7977904912211482, "loss": 0.10619133710861206, "loss_ce": 0.0030724371317774057, "loss_iou": 0.5234375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 520924220, "step": 3033 }, { "epoch": 0.7980535279805353, "grad_norm": 16.059073148748364, "learning_rate": 5e-06, "loss": 0.1064, "num_input_tokens_seen": 521093348, "step": 3034 }, { "epoch": 0.7980535279805353, "loss": 0.06166623532772064, "loss_ce": 0.0008294428698718548, "loss_iou": 0.455078125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 521093348, "step": 3034 }, { "epoch": 0.7983165647399224, "grad_norm": 8.368290368776519, "learning_rate": 5e-06, "loss": 0.1456, "num_input_tokens_seen": 521265688, "step": 3035 }, { "epoch": 0.7983165647399224, "loss": 0.14087893068790436, "loss_ce": 0.004129666369408369, "loss_iou": 0.51171875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 521265688, "step": 3035 }, { "epoch": 0.7985796014993095, "grad_norm": 25.178404936799044, "learning_rate": 5e-06, "loss": 0.1592, "num_input_tokens_seen": 521437496, "step": 3036 }, { "epoch": 0.7985796014993095, "loss": 0.29297274351119995, "loss_ce": 0.002842147834599018, "loss_iou": 0.357421875, "loss_num": 0.05810546875, "loss_xval": 0.291015625, "num_input_tokens_seen": 521437496, "step": 3036 }, { "epoch": 0.7988426382586966, "grad_norm": 3.991516418389156, "learning_rate": 5e-06, "loss": 0.0941, "num_input_tokens_seen": 521609940, "step": 3037 }, { "epoch": 0.7988426382586966, "loss": 0.09374190121889114, "loss_ce": 0.0009379457915201783, "loss_iou": 0.392578125, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 521609940, "step": 3037 }, { "epoch": 0.7991056750180838, "grad_norm": 4.808453824486521, "learning_rate": 5e-06, "loss": 0.1124, "num_input_tokens_seen": 521781892, "step": 3038 }, { "epoch": 0.7991056750180838, "loss": 0.07893365621566772, "loss_ce": 0.001052800682373345, "loss_iou": 0.5078125, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 521781892, "step": 3038 }, { "epoch": 0.7993687117774709, "grad_norm": 12.778560985993714, "learning_rate": 5e-06, "loss": 0.109, "num_input_tokens_seen": 521954108, "step": 3039 }, { "epoch": 0.7993687117774709, "loss": 0.12000415474176407, "loss_ce": 0.0017790585989132524, "loss_iou": 0.4765625, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 521954108, "step": 3039 }, { "epoch": 0.799631748536858, "grad_norm": 5.33906426825674, "learning_rate": 5e-06, "loss": 0.1944, "num_input_tokens_seen": 522126236, "step": 3040 }, { "epoch": 0.799631748536858, "loss": 0.2638484239578247, "loss_ce": 0.00042068029870279133, "loss_iou": 0.6015625, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 522126236, "step": 3040 }, { "epoch": 0.7998947852962451, "grad_norm": 4.623536872405031, "learning_rate": 5e-06, "loss": 0.0883, "num_input_tokens_seen": 522298380, "step": 3041 }, { "epoch": 0.7998947852962451, "loss": 0.12680000066757202, "loss_ce": 0.0012506938073784113, "loss_iou": 0.337890625, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 522298380, "step": 3041 }, { "epoch": 0.8001578220556322, "grad_norm": 4.125445081698177, "learning_rate": 5e-06, "loss": 0.0945, "num_input_tokens_seen": 522470360, "step": 3042 }, { "epoch": 0.8001578220556322, "loss": 0.15849211812019348, "loss_ce": 0.0013113392051309347, "loss_iou": 0.50390625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 522470360, "step": 3042 }, { "epoch": 0.8004208588150195, "grad_norm": 11.789889533203668, "learning_rate": 5e-06, "loss": 0.0876, "num_input_tokens_seen": 522642472, "step": 3043 }, { "epoch": 0.8004208588150195, "loss": 0.09324462711811066, "loss_ce": 0.0006237818161025643, "loss_iou": 0.50390625, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 522642472, "step": 3043 }, { "epoch": 0.8006838955744066, "grad_norm": 4.414508918444819, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 522814808, "step": 3044 }, { "epoch": 0.8006838955744066, "loss": 0.07435610890388489, "loss_ce": 0.0009460713481530547, "loss_iou": 0.474609375, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 522814808, "step": 3044 }, { "epoch": 0.8009469323337937, "grad_norm": 6.1075510398524395, "learning_rate": 5e-06, "loss": 0.106, "num_input_tokens_seen": 522986892, "step": 3045 }, { "epoch": 0.8009469323337937, "loss": 0.08977600932121277, "loss_ce": 0.0010614084312692285, "loss_iou": 0.46484375, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 522986892, "step": 3045 }, { "epoch": 0.8012099690931808, "grad_norm": 6.86235832016632, "learning_rate": 5e-06, "loss": 0.1369, "num_input_tokens_seen": 523159044, "step": 3046 }, { "epoch": 0.8012099690931808, "loss": 0.10283628106117249, "loss_ce": 0.003226900240406394, "loss_iou": 0.578125, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 523159044, "step": 3046 }, { "epoch": 0.8014730058525679, "grad_norm": 8.620167546317841, "learning_rate": 5e-06, "loss": 0.1508, "num_input_tokens_seen": 523331372, "step": 3047 }, { "epoch": 0.8014730058525679, "loss": 0.17512959241867065, "loss_ce": 0.0035597749520093203, "loss_iou": 0.404296875, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 523331372, "step": 3047 }, { "epoch": 0.801736042611955, "grad_norm": 4.823577960221348, "learning_rate": 5e-06, "loss": 0.0952, "num_input_tokens_seen": 523504004, "step": 3048 }, { "epoch": 0.801736042611955, "loss": 0.07766547054052353, "loss_ce": 0.0020429138094186783, "loss_iou": 0.44921875, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 523504004, "step": 3048 }, { "epoch": 0.8019990793713422, "grad_norm": 11.00975772697334, "learning_rate": 5e-06, "loss": 0.0908, "num_input_tokens_seen": 523675952, "step": 3049 }, { "epoch": 0.8019990793713422, "loss": 0.11927802860736847, "loss_ce": 0.001342848176136613, "loss_iou": 0.359375, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 523675952, "step": 3049 }, { "epoch": 0.8022621161307293, "grad_norm": 10.685552832243063, "learning_rate": 5e-06, "loss": 0.1029, "num_input_tokens_seen": 523848152, "step": 3050 }, { "epoch": 0.8022621161307293, "loss": 0.0827643871307373, "loss_ce": 0.002106426050886512, "loss_iou": 0.5859375, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 523848152, "step": 3050 }, { "epoch": 0.8025251528901164, "grad_norm": 24.17010500449297, "learning_rate": 5e-06, "loss": 0.1164, "num_input_tokens_seen": 524020708, "step": 3051 }, { "epoch": 0.8025251528901164, "loss": 0.10048617422580719, "loss_ce": 0.000510584854055196, "loss_iou": 0.51953125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 524020708, "step": 3051 }, { "epoch": 0.8027881896495035, "grad_norm": 4.992845761930844, "learning_rate": 5e-06, "loss": 0.1304, "num_input_tokens_seen": 524191372, "step": 3052 }, { "epoch": 0.8027881896495035, "loss": 0.15961629152297974, "loss_ce": 0.006387531757354736, "loss_iou": 0.4453125, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 524191372, "step": 3052 }, { "epoch": 0.8030512264088906, "grad_norm": 2.943381415031694, "learning_rate": 5e-06, "loss": 0.1292, "num_input_tokens_seen": 524363912, "step": 3053 }, { "epoch": 0.8030512264088906, "loss": 0.15981021523475647, "loss_ce": 0.0005695016006939113, "loss_iou": 0.419921875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 524363912, "step": 3053 }, { "epoch": 0.8033142631682778, "grad_norm": 5.648094984205903, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 524536316, "step": 3054 }, { "epoch": 0.8033142631682778, "loss": 0.1740710288286209, "loss_ce": 0.0022723155561834574, "loss_iou": 0.52734375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 524536316, "step": 3054 }, { "epoch": 0.8035772999276649, "grad_norm": 7.984973488378181, "learning_rate": 5e-06, "loss": 0.0847, "num_input_tokens_seen": 524708444, "step": 3055 }, { "epoch": 0.8035772999276649, "loss": 0.07127489894628525, "loss_ce": 0.000809812976513058, "loss_iou": 0.5078125, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 524708444, "step": 3055 }, { "epoch": 0.803840336687052, "grad_norm": 6.031677657650838, "learning_rate": 5e-06, "loss": 0.0908, "num_input_tokens_seen": 524880960, "step": 3056 }, { "epoch": 0.803840336687052, "loss": 0.0638066828250885, "loss_ce": 0.0005437473300844431, "loss_iou": 0.703125, "loss_num": 0.01263427734375, "loss_xval": 0.0634765625, "num_input_tokens_seen": 524880960, "step": 3056 }, { "epoch": 0.8041033734464391, "grad_norm": 5.599082172393745, "learning_rate": 5e-06, "loss": 0.086, "num_input_tokens_seen": 525053308, "step": 3057 }, { "epoch": 0.8041033734464391, "loss": 0.07932358235120773, "loss_ce": 0.0024650623090565205, "loss_iou": 0.546875, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 525053308, "step": 3057 }, { "epoch": 0.8043664102058262, "grad_norm": 28.17799645696428, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 525225432, "step": 3058 }, { "epoch": 0.8043664102058262, "loss": 0.08576367795467377, "loss_ce": 0.0013825736241415143, "loss_iou": 0.578125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 525225432, "step": 3058 }, { "epoch": 0.8046294469652134, "grad_norm": 3.4834198316912564, "learning_rate": 5e-06, "loss": 0.0788, "num_input_tokens_seen": 525397804, "step": 3059 }, { "epoch": 0.8046294469652134, "loss": 0.07873048633337021, "loss_ce": 0.0016125671099871397, "loss_iou": 0.345703125, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 525397804, "step": 3059 }, { "epoch": 0.8048924837246005, "grad_norm": 2.9860188986605367, "learning_rate": 5e-06, "loss": 0.0839, "num_input_tokens_seen": 525570124, "step": 3060 }, { "epoch": 0.8048924837246005, "loss": 0.07787738740444183, "loss_ce": 0.0008205035701394081, "loss_iou": 0.62890625, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 525570124, "step": 3060 }, { "epoch": 0.8051555204839876, "grad_norm": 14.35918831416577, "learning_rate": 5e-06, "loss": 0.1217, "num_input_tokens_seen": 525742708, "step": 3061 }, { "epoch": 0.8051555204839876, "loss": 0.10341215878725052, "loss_ce": 0.00123931048437953, "loss_iou": 0.458984375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 525742708, "step": 3061 }, { "epoch": 0.8054185572433747, "grad_norm": 16.381501371718237, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 525914908, "step": 3062 }, { "epoch": 0.8054185572433747, "loss": 0.10784236341714859, "loss_ce": 0.0002678967430256307, "loss_iou": 0.462890625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 525914908, "step": 3062 }, { "epoch": 0.8056815940027618, "grad_norm": 4.73914065847523, "learning_rate": 5e-06, "loss": 0.1033, "num_input_tokens_seen": 526087040, "step": 3063 }, { "epoch": 0.8056815940027618, "loss": 0.06849893927574158, "loss_ce": 0.000368443870684132, "loss_iou": 0.455078125, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 526087040, "step": 3063 }, { "epoch": 0.8059446307621491, "grad_norm": 8.562314005917766, "learning_rate": 5e-06, "loss": 0.0954, "num_input_tokens_seen": 526259428, "step": 3064 }, { "epoch": 0.8059446307621491, "loss": 0.07737226039171219, "loss_ce": 0.0030009234324097633, "loss_iou": 0.59375, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 526259428, "step": 3064 }, { "epoch": 0.8062076675215362, "grad_norm": 12.302010843258703, "learning_rate": 5e-06, "loss": 0.1505, "num_input_tokens_seen": 526431568, "step": 3065 }, { "epoch": 0.8062076675215362, "loss": 0.20694774389266968, "loss_ce": 0.0032123818527907133, "loss_iou": 0.51171875, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 526431568, "step": 3065 }, { "epoch": 0.8064707042809233, "grad_norm": 8.1243600151207, "learning_rate": 5e-06, "loss": 0.1291, "num_input_tokens_seen": 526603728, "step": 3066 }, { "epoch": 0.8064707042809233, "loss": 0.14467400312423706, "loss_ce": 0.0004784482589457184, "loss_iou": 0.59375, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 526603728, "step": 3066 }, { "epoch": 0.8067337410403104, "grad_norm": 92.56005912402708, "learning_rate": 5e-06, "loss": 0.1108, "num_input_tokens_seen": 526775572, "step": 3067 }, { "epoch": 0.8067337410403104, "loss": 0.08829745650291443, "loss_ce": 0.0011697689769789577, "loss_iou": 0.44140625, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 526775572, "step": 3067 }, { "epoch": 0.8069967777996975, "grad_norm": 33.266184929246414, "learning_rate": 5e-06, "loss": 0.0732, "num_input_tokens_seen": 526947576, "step": 3068 }, { "epoch": 0.8069967777996975, "loss": 0.060733191668987274, "loss_ce": 0.0012391710188239813, "loss_iou": 0.59375, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 526947576, "step": 3068 }, { "epoch": 0.8072598145590847, "grad_norm": 5.97758373469706, "learning_rate": 5e-06, "loss": 0.0885, "num_input_tokens_seen": 527117908, "step": 3069 }, { "epoch": 0.8072598145590847, "loss": 0.09366244077682495, "loss_ce": 0.006046472117304802, "loss_iou": 0.51953125, "loss_num": 0.017578125, "loss_xval": 0.08740234375, "num_input_tokens_seen": 527117908, "step": 3069 }, { "epoch": 0.8075228513184718, "grad_norm": 5.290039917786788, "learning_rate": 5e-06, "loss": 0.143, "num_input_tokens_seen": 527287100, "step": 3070 }, { "epoch": 0.8075228513184718, "loss": 0.10383239388465881, "loss_ce": 0.00046935188584029675, "loss_iou": 0.56640625, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 527287100, "step": 3070 }, { "epoch": 0.8077858880778589, "grad_norm": 4.7891570756526125, "learning_rate": 5e-06, "loss": 0.14, "num_input_tokens_seen": 527458888, "step": 3071 }, { "epoch": 0.8077858880778589, "loss": 0.10491342842578888, "loss_ce": 0.0008484934223815799, "loss_iou": 0.46484375, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 527458888, "step": 3071 }, { "epoch": 0.808048924837246, "grad_norm": 11.230743830755795, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 527630920, "step": 3072 }, { "epoch": 0.808048924837246, "loss": 0.06209864094853401, "loss_ce": 0.0007583063561469316, "loss_iou": 0.64453125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 527630920, "step": 3072 }, { "epoch": 0.8083119615966331, "grad_norm": 5.040744603317089, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 527803168, "step": 3073 }, { "epoch": 0.8083119615966331, "loss": 0.1508997231721878, "loss_ce": 0.0011194492690265179, "loss_iou": 0.53515625, "loss_num": 0.030029296875, "loss_xval": 0.1494140625, "num_input_tokens_seen": 527803168, "step": 3073 }, { "epoch": 0.8085749983560202, "grad_norm": 8.63208434905108, "learning_rate": 5e-06, "loss": 0.0993, "num_input_tokens_seen": 527975596, "step": 3074 }, { "epoch": 0.8085749983560202, "loss": 0.11231046169996262, "loss_ce": 0.00017361767822876573, "loss_iou": 0.408203125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 527975596, "step": 3074 }, { "epoch": 0.8088380351154074, "grad_norm": 4.381253880078918, "learning_rate": 5e-06, "loss": 0.1243, "num_input_tokens_seen": 528147980, "step": 3075 }, { "epoch": 0.8088380351154074, "loss": 0.11533799767494202, "loss_ce": 0.0029875326436012983, "loss_iou": 0.275390625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 528147980, "step": 3075 }, { "epoch": 0.8091010718747945, "grad_norm": 8.161484057964593, "learning_rate": 5e-06, "loss": 0.1619, "num_input_tokens_seen": 528320296, "step": 3076 }, { "epoch": 0.8091010718747945, "loss": 0.16661548614501953, "loss_ce": 0.0008439991506747901, "loss_iou": 0.337890625, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 528320296, "step": 3076 }, { "epoch": 0.8093641086341816, "grad_norm": 4.683292048006889, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 528492424, "step": 3077 }, { "epoch": 0.8093641086341816, "loss": 0.11115469038486481, "loss_ce": 0.0029546155128628016, "loss_iou": 0.51171875, "loss_num": 0.0216064453125, "loss_xval": 0.1083984375, "num_input_tokens_seen": 528492424, "step": 3077 }, { "epoch": 0.8096271453935687, "grad_norm": 5.257960533659777, "learning_rate": 5e-06, "loss": 0.1374, "num_input_tokens_seen": 528664732, "step": 3078 }, { "epoch": 0.8096271453935687, "loss": 0.21525058150291443, "loss_ce": 0.001490199938416481, "loss_iou": 0.435546875, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 528664732, "step": 3078 }, { "epoch": 0.8098901821529558, "grad_norm": 12.566112829085096, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 528837012, "step": 3079 }, { "epoch": 0.8098901821529558, "loss": 0.08169254660606384, "loss_ce": 0.00021060870494693518, "loss_iou": 0.45703125, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 528837012, "step": 3079 }, { "epoch": 0.810153218912343, "grad_norm": 5.324934758940462, "learning_rate": 5e-06, "loss": 0.1012, "num_input_tokens_seen": 529009052, "step": 3080 }, { "epoch": 0.810153218912343, "loss": 0.08878730237483978, "loss_ce": 0.0001795147400116548, "loss_iou": 0.5078125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 529009052, "step": 3080 }, { "epoch": 0.8104162556717301, "grad_norm": 9.272723862546323, "learning_rate": 5e-06, "loss": 0.1193, "num_input_tokens_seen": 529181216, "step": 3081 }, { "epoch": 0.8104162556717301, "loss": 0.0852864533662796, "loss_ce": 0.0025990745052695274, "loss_iou": 0.51171875, "loss_num": 0.0166015625, "loss_xval": 0.08251953125, "num_input_tokens_seen": 529181216, "step": 3081 }, { "epoch": 0.8106792924311172, "grad_norm": 28.988099538140567, "learning_rate": 5e-06, "loss": 0.1292, "num_input_tokens_seen": 529351788, "step": 3082 }, { "epoch": 0.8106792924311172, "loss": 0.05319926142692566, "loss_ce": 0.0007700645364820957, "loss_iou": 0.6796875, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 529351788, "step": 3082 }, { "epoch": 0.8109423291905044, "grad_norm": 5.871269884706642, "learning_rate": 5e-06, "loss": 0.1452, "num_input_tokens_seen": 529524200, "step": 3083 }, { "epoch": 0.8109423291905044, "loss": 0.180719256401062, "loss_ce": 0.004815942607820034, "loss_iou": 0.5078125, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 529524200, "step": 3083 }, { "epoch": 0.8112053659498915, "grad_norm": 9.034864384864637, "learning_rate": 5e-06, "loss": 0.1046, "num_input_tokens_seen": 529696672, "step": 3084 }, { "epoch": 0.8112053659498915, "loss": 0.039623767137527466, "loss_ce": 0.0003476430138107389, "loss_iou": 0.546875, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 529696672, "step": 3084 }, { "epoch": 0.8114684027092787, "grad_norm": 7.571992947700351, "learning_rate": 5e-06, "loss": 0.1168, "num_input_tokens_seen": 529868932, "step": 3085 }, { "epoch": 0.8114684027092787, "loss": 0.15128442645072937, "loss_ce": 0.002373900031670928, "loss_iou": 0.375, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 529868932, "step": 3085 }, { "epoch": 0.8117314394686658, "grad_norm": 29.21584689056005, "learning_rate": 5e-06, "loss": 0.0879, "num_input_tokens_seen": 530041080, "step": 3086 }, { "epoch": 0.8117314394686658, "loss": 0.11466438323259354, "loss_ce": 0.001291584805585444, "loss_iou": 0.494140625, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 530041080, "step": 3086 }, { "epoch": 0.8119944762280529, "grad_norm": 8.627125392444821, "learning_rate": 5e-06, "loss": 0.1586, "num_input_tokens_seen": 530210668, "step": 3087 }, { "epoch": 0.8119944762280529, "loss": 0.1240130364894867, "loss_ce": 0.0024767834693193436, "loss_iou": 0.39453125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 530210668, "step": 3087 }, { "epoch": 0.81225751298744, "grad_norm": 11.153523319241675, "learning_rate": 5e-06, "loss": 0.0849, "num_input_tokens_seen": 530382776, "step": 3088 }, { "epoch": 0.81225751298744, "loss": 0.07181555032730103, "loss_ce": 0.00026709536905400455, "loss_iou": NaN, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 530382776, "step": 3088 }, { "epoch": 0.8125205497468271, "grad_norm": 10.380517960562548, "learning_rate": 5e-06, "loss": 0.1332, "num_input_tokens_seen": 530555004, "step": 3089 }, { "epoch": 0.8125205497468271, "loss": 0.16954563558101654, "loss_ce": 0.0021872336510568857, "loss_iou": 0.5078125, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 530555004, "step": 3089 }, { "epoch": 0.8127835865062143, "grad_norm": 5.4446542029328775, "learning_rate": 5e-06, "loss": 0.0898, "num_input_tokens_seen": 530726976, "step": 3090 }, { "epoch": 0.8127835865062143, "loss": 0.08060289919376373, "loss_ce": 0.00040270722820423543, "loss_iou": 0.40234375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 530726976, "step": 3090 }, { "epoch": 0.8130466232656014, "grad_norm": 5.347357758568155, "learning_rate": 5e-06, "loss": 0.1135, "num_input_tokens_seen": 530898852, "step": 3091 }, { "epoch": 0.8130466232656014, "loss": 0.08159644901752472, "loss_ce": 0.0003891719679813832, "loss_iou": 0.4140625, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 530898852, "step": 3091 }, { "epoch": 0.8133096600249885, "grad_norm": 4.89308048127463, "learning_rate": 5e-06, "loss": 0.1005, "num_input_tokens_seen": 531071100, "step": 3092 }, { "epoch": 0.8133096600249885, "loss": 0.05772348493337631, "loss_ce": 7.577867654617876e-05, "loss_iou": 0.54296875, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 531071100, "step": 3092 }, { "epoch": 0.8135726967843756, "grad_norm": 5.314071866923476, "learning_rate": 5e-06, "loss": 0.1052, "num_input_tokens_seen": 531243088, "step": 3093 }, { "epoch": 0.8135726967843756, "loss": 0.16711823642253876, "loss_ce": 0.002140207216143608, "loss_iou": 0.470703125, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 531243088, "step": 3093 }, { "epoch": 0.8138357335437627, "grad_norm": 4.018224451020325, "learning_rate": 5e-06, "loss": 0.1035, "num_input_tokens_seen": 531415364, "step": 3094 }, { "epoch": 0.8138357335437627, "loss": 0.07948748767375946, "loss_ce": 0.0013472279533743858, "loss_iou": 0.431640625, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 531415364, "step": 3094 }, { "epoch": 0.8140987703031498, "grad_norm": 5.643939653563094, "learning_rate": 5e-06, "loss": 0.0894, "num_input_tokens_seen": 531587860, "step": 3095 }, { "epoch": 0.8140987703031498, "loss": 0.070250503718853, "loss_ce": 0.00019740140123758465, "loss_iou": 0.48828125, "loss_num": 0.0140380859375, "loss_xval": 0.06982421875, "num_input_tokens_seen": 531587860, "step": 3095 }, { "epoch": 0.814361807062537, "grad_norm": 6.298167200858131, "learning_rate": 5e-06, "loss": 0.1384, "num_input_tokens_seen": 531760160, "step": 3096 }, { "epoch": 0.814361807062537, "loss": 0.13414643704891205, "loss_ce": 0.0014254867564886808, "loss_iou": 0.48046875, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 531760160, "step": 3096 }, { "epoch": 0.8146248438219241, "grad_norm": 4.559829968544296, "learning_rate": 5e-06, "loss": 0.0996, "num_input_tokens_seen": 531932728, "step": 3097 }, { "epoch": 0.8146248438219241, "loss": 0.11818031221628189, "loss_ce": 0.0006265999400056899, "loss_iou": 0.44140625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 531932728, "step": 3097 }, { "epoch": 0.8148878805813112, "grad_norm": 6.798137203947019, "learning_rate": 5e-06, "loss": 0.1542, "num_input_tokens_seen": 532104748, "step": 3098 }, { "epoch": 0.8148878805813112, "loss": 0.13361144065856934, "loss_ce": 0.0008294496219605207, "loss_iou": 0.44921875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 532104748, "step": 3098 }, { "epoch": 0.8151509173406983, "grad_norm": 4.492953640528966, "learning_rate": 5e-06, "loss": 0.1604, "num_input_tokens_seen": 532276872, "step": 3099 }, { "epoch": 0.8151509173406983, "loss": 0.23120509088039398, "loss_ce": 0.004703632555902004, "loss_iou": 0.4296875, "loss_num": 0.045166015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 532276872, "step": 3099 }, { "epoch": 0.8154139541000854, "grad_norm": 23.973104234846247, "learning_rate": 5e-06, "loss": 0.1384, "num_input_tokens_seen": 532449140, "step": 3100 }, { "epoch": 0.8154139541000854, "loss": 0.2094377875328064, "loss_ce": 0.005488819442689419, "loss_iou": 0.384765625, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 532449140, "step": 3100 }, { "epoch": 0.8156769908594727, "grad_norm": 5.077265892021237, "learning_rate": 5e-06, "loss": 0.0818, "num_input_tokens_seen": 532621452, "step": 3101 }, { "epoch": 0.8156769908594727, "loss": 0.07252339273691177, "loss_ce": 0.001692092278972268, "loss_iou": 0.53125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 532621452, "step": 3101 }, { "epoch": 0.8159400276188598, "grad_norm": 4.611749386992743, "learning_rate": 5e-06, "loss": 0.1173, "num_input_tokens_seen": 532793420, "step": 3102 }, { "epoch": 0.8159400276188598, "loss": 0.10748874396085739, "loss_ce": 0.0007687745383009315, "loss_iou": 0.59375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 532793420, "step": 3102 }, { "epoch": 0.8162030643782469, "grad_norm": 8.46788739165104, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 532965672, "step": 3103 }, { "epoch": 0.8162030643782469, "loss": 0.0986635610461235, "loss_ce": 0.006317365914583206, "loss_iou": 0.359375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 532965672, "step": 3103 }, { "epoch": 0.816466101137634, "grad_norm": 5.21012803335228, "learning_rate": 5e-06, "loss": 0.1037, "num_input_tokens_seen": 533137620, "step": 3104 }, { "epoch": 0.816466101137634, "loss": 0.1294005811214447, "loss_ce": 0.0007232190691865981, "loss_iou": NaN, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 533137620, "step": 3104 }, { "epoch": 0.8167291378970211, "grad_norm": 4.29918218259194, "learning_rate": 5e-06, "loss": 0.0941, "num_input_tokens_seen": 533308040, "step": 3105 }, { "epoch": 0.8167291378970211, "loss": 0.07954747974872589, "loss_ce": 0.0007816128781996667, "loss_iou": 0.466796875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 533308040, "step": 3105 }, { "epoch": 0.8169921746564083, "grad_norm": 7.9379046963597775, "learning_rate": 5e-06, "loss": 0.0867, "num_input_tokens_seen": 533480088, "step": 3106 }, { "epoch": 0.8169921746564083, "loss": 0.055092211812734604, "loss_ce": 0.002663013059645891, "loss_iou": 0.578125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 533480088, "step": 3106 }, { "epoch": 0.8172552114157954, "grad_norm": 5.798174841670742, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 533652304, "step": 3107 }, { "epoch": 0.8172552114157954, "loss": 0.10001754760742188, "loss_ce": 0.0004997201031073928, "loss_iou": 0.4765625, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 533652304, "step": 3107 }, { "epoch": 0.8175182481751825, "grad_norm": 4.397743796531851, "learning_rate": 5e-06, "loss": 0.1014, "num_input_tokens_seen": 533824784, "step": 3108 }, { "epoch": 0.8175182481751825, "loss": 0.04015748202800751, "loss_ce": 0.001079726149328053, "loss_iou": 0.376953125, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 533824784, "step": 3108 }, { "epoch": 0.8177812849345696, "grad_norm": 3.0634489465977937, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 533997192, "step": 3109 }, { "epoch": 0.8177812849345696, "loss": 0.11612477153539658, "loss_ce": 0.0004936738405376673, "loss_iou": 0.5234375, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 533997192, "step": 3109 }, { "epoch": 0.8180443216939567, "grad_norm": 11.903043667241318, "learning_rate": 5e-06, "loss": 0.0839, "num_input_tokens_seen": 534169324, "step": 3110 }, { "epoch": 0.8180443216939567, "loss": 0.12225233018398285, "loss_ce": 0.002577648963779211, "loss_iou": 0.4609375, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 534169324, "step": 3110 }, { "epoch": 0.8183073584533439, "grad_norm": 11.232858169527498, "learning_rate": 5e-06, "loss": 0.0855, "num_input_tokens_seen": 534341512, "step": 3111 }, { "epoch": 0.8183073584533439, "loss": 0.093436598777771, "loss_ce": 0.00041902740485966206, "loss_iou": 0.4140625, "loss_num": 0.0186767578125, "loss_xval": 0.0927734375, "num_input_tokens_seen": 534341512, "step": 3111 }, { "epoch": 0.818570395212731, "grad_norm": 4.531836934905563, "learning_rate": 5e-06, "loss": 0.0859, "num_input_tokens_seen": 534511748, "step": 3112 }, { "epoch": 0.818570395212731, "loss": 0.07575514912605286, "loss_ce": 0.00020888875587843359, "loss_iou": 0.515625, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 534511748, "step": 3112 }, { "epoch": 0.8188334319721181, "grad_norm": 7.470057833509574, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 534683872, "step": 3113 }, { "epoch": 0.8188334319721181, "loss": 0.10066039860248566, "loss_ce": 0.00047118880320340395, "loss_iou": 0.466796875, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 534683872, "step": 3113 }, { "epoch": 0.8190964687315052, "grad_norm": 6.233336264317783, "learning_rate": 5e-06, "loss": 0.139, "num_input_tokens_seen": 534855832, "step": 3114 }, { "epoch": 0.8190964687315052, "loss": 0.09569014608860016, "loss_ce": 0.0034660203382372856, "loss_iou": 0.40625, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 534855832, "step": 3114 }, { "epoch": 0.8193595054908923, "grad_norm": 5.930104072036699, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 535027896, "step": 3115 }, { "epoch": 0.8193595054908923, "loss": 0.13678929209709167, "loss_ce": 0.001047111232765019, "loss_iou": 0.42578125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 535027896, "step": 3115 }, { "epoch": 0.8196225422502795, "grad_norm": 12.505587123966333, "learning_rate": 5e-06, "loss": 0.1014, "num_input_tokens_seen": 535200112, "step": 3116 }, { "epoch": 0.8196225422502795, "loss": 0.0822412520647049, "loss_ce": 0.0008356067701242864, "loss_iou": 0.5078125, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 535200112, "step": 3116 }, { "epoch": 0.8198855790096666, "grad_norm": 4.231730639769997, "learning_rate": 5e-06, "loss": 0.1087, "num_input_tokens_seen": 535372284, "step": 3117 }, { "epoch": 0.8198855790096666, "loss": 0.12019523978233337, "loss_ce": 0.00018486013868823647, "loss_iou": 0.5234375, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 535372284, "step": 3117 }, { "epoch": 0.8201486157690537, "grad_norm": 7.438830292525926, "learning_rate": 5e-06, "loss": 0.1207, "num_input_tokens_seen": 535544868, "step": 3118 }, { "epoch": 0.8201486157690537, "loss": 0.16251003742218018, "loss_ce": 0.0003091081453021616, "loss_iou": 0.45703125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 535544868, "step": 3118 }, { "epoch": 0.8204116525284408, "grad_norm": 4.777332996643992, "learning_rate": 5e-06, "loss": 0.1083, "num_input_tokens_seen": 535717156, "step": 3119 }, { "epoch": 0.8204116525284408, "loss": 0.16800367832183838, "loss_ce": 0.00139296252746135, "loss_iou": 0.43359375, "loss_num": 0.033203125, "loss_xval": 0.1669921875, "num_input_tokens_seen": 535717156, "step": 3119 }, { "epoch": 0.8206746892878279, "grad_norm": 9.684696947025552, "learning_rate": 5e-06, "loss": 0.0809, "num_input_tokens_seen": 535887380, "step": 3120 }, { "epoch": 0.8206746892878279, "loss": 0.0679091364145279, "loss_ce": 0.0011366696562618017, "loss_iou": 0.5390625, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 535887380, "step": 3120 }, { "epoch": 0.820937726047215, "grad_norm": 5.315834673220869, "learning_rate": 5e-06, "loss": 0.0862, "num_input_tokens_seen": 536059444, "step": 3121 }, { "epoch": 0.820937726047215, "loss": 0.04389767348766327, "loss_ce": 0.00013546722766477615, "loss_iou": 0.4296875, "loss_num": 0.0087890625, "loss_xval": 0.043701171875, "num_input_tokens_seen": 536059444, "step": 3121 }, { "epoch": 0.8212007628066023, "grad_norm": 4.263271795831078, "learning_rate": 5e-06, "loss": 0.1419, "num_input_tokens_seen": 536231404, "step": 3122 }, { "epoch": 0.8212007628066023, "loss": 0.15159711241722107, "loss_ce": 0.004105648957192898, "loss_iou": 0.498046875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 536231404, "step": 3122 }, { "epoch": 0.8214637995659894, "grad_norm": 7.367303112338102, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 536403668, "step": 3123 }, { "epoch": 0.8214637995659894, "loss": 0.1084074005484581, "loss_ce": 0.0006498372531495988, "loss_iou": 0.58203125, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 536403668, "step": 3123 }, { "epoch": 0.8217268363253765, "grad_norm": 6.857717580081254, "learning_rate": 5e-06, "loss": 0.1162, "num_input_tokens_seen": 536575892, "step": 3124 }, { "epoch": 0.8217268363253765, "loss": 0.0787474736571312, "loss_ce": 0.00030203917413018644, "loss_iou": 0.58203125, "loss_num": 0.015625, "loss_xval": 0.07861328125, "num_input_tokens_seen": 536575892, "step": 3124 }, { "epoch": 0.8219898730847636, "grad_norm": 4.826419943692509, "learning_rate": 5e-06, "loss": 0.0802, "num_input_tokens_seen": 536745964, "step": 3125 }, { "epoch": 0.8219898730847636, "loss": 0.06057834252715111, "loss_ce": 7.724385795881972e-05, "loss_iou": 0.388671875, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 536745964, "step": 3125 }, { "epoch": 0.8222529098441507, "grad_norm": 8.883739420539388, "learning_rate": 5e-06, "loss": 0.135, "num_input_tokens_seen": 536918180, "step": 3126 }, { "epoch": 0.8222529098441507, "loss": 0.11514750123023987, "loss_ce": 0.0007676149252802134, "loss_iou": 0.416015625, "loss_num": 0.02294921875, "loss_xval": 0.1142578125, "num_input_tokens_seen": 536918180, "step": 3126 }, { "epoch": 0.8225159466035379, "grad_norm": 4.391986789320263, "learning_rate": 5e-06, "loss": 0.1256, "num_input_tokens_seen": 537090196, "step": 3127 }, { "epoch": 0.8225159466035379, "loss": 0.13754448294639587, "loss_ce": 0.0006731519242748618, "loss_iou": 0.51171875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 537090196, "step": 3127 }, { "epoch": 0.822778983362925, "grad_norm": 7.64555343727262, "learning_rate": 5e-06, "loss": 0.0728, "num_input_tokens_seen": 537259572, "step": 3128 }, { "epoch": 0.822778983362925, "loss": 0.05659133195877075, "loss_ce": 5.7516066590324044e-05, "loss_iou": 0.56640625, "loss_num": 0.01129150390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 537259572, "step": 3128 }, { "epoch": 0.8230420201223121, "grad_norm": 4.3032702337998225, "learning_rate": 5e-06, "loss": 0.0678, "num_input_tokens_seen": 537431812, "step": 3129 }, { "epoch": 0.8230420201223121, "loss": 0.07997481524944305, "loss_ce": 0.0033604400232434273, "loss_iou": 0.5390625, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 537431812, "step": 3129 }, { "epoch": 0.8233050568816992, "grad_norm": 11.32084254562734, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 537602468, "step": 3130 }, { "epoch": 0.8233050568816992, "loss": 0.12599240243434906, "loss_ce": 0.0030370799358934164, "loss_iou": 0.54296875, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 537602468, "step": 3130 }, { "epoch": 0.8235680936410863, "grad_norm": 7.355242955773187, "learning_rate": 5e-06, "loss": 0.149, "num_input_tokens_seen": 537774448, "step": 3131 }, { "epoch": 0.8235680936410863, "loss": 0.18665780127048492, "loss_ce": 0.0003784986911341548, "loss_iou": 0.4453125, "loss_num": 0.037109375, "loss_xval": 0.1865234375, "num_input_tokens_seen": 537774448, "step": 3131 }, { "epoch": 0.8238311304004735, "grad_norm": 11.757519786843565, "learning_rate": 5e-06, "loss": 0.0924, "num_input_tokens_seen": 537946740, "step": 3132 }, { "epoch": 0.8238311304004735, "loss": 0.06623389571905136, "loss_ce": 0.0015976695576682687, "loss_iou": 0.359375, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 537946740, "step": 3132 }, { "epoch": 0.8240941671598606, "grad_norm": 17.264629874858777, "learning_rate": 5e-06, "loss": 0.1361, "num_input_tokens_seen": 538118940, "step": 3133 }, { "epoch": 0.8240941671598606, "loss": 0.09394903481006622, "loss_ce": 0.0025488929823040962, "loss_iou": 0.51953125, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 538118940, "step": 3133 }, { "epoch": 0.8243572039192477, "grad_norm": 12.736031512367697, "learning_rate": 5e-06, "loss": 0.1083, "num_input_tokens_seen": 538291488, "step": 3134 }, { "epoch": 0.8243572039192477, "loss": 0.12837865948677063, "loss_ce": 0.0019290748750790954, "loss_iou": 0.5078125, "loss_num": 0.025390625, "loss_xval": 0.1259765625, "num_input_tokens_seen": 538291488, "step": 3134 }, { "epoch": 0.8246202406786348, "grad_norm": 4.032167194049116, "learning_rate": 5e-06, "loss": 0.0797, "num_input_tokens_seen": 538461628, "step": 3135 }, { "epoch": 0.8246202406786348, "loss": 0.09231233596801758, "loss_ce": 0.0006070120725780725, "loss_iou": 0.4921875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 538461628, "step": 3135 }, { "epoch": 0.8248832774380219, "grad_norm": 3.914309781520812, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 538633772, "step": 3136 }, { "epoch": 0.8248832774380219, "loss": 0.07093091309070587, "loss_ce": 0.00011487161100376397, "loss_iou": 0.58984375, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 538633772, "step": 3136 }, { "epoch": 0.8251463141974091, "grad_norm": 3.7923535168173763, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 538805732, "step": 3137 }, { "epoch": 0.8251463141974091, "loss": 0.1175466924905777, "loss_ce": 0.0006643689121119678, "loss_iou": 0.462890625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 538805732, "step": 3137 }, { "epoch": 0.8254093509567962, "grad_norm": 10.05128274905235, "learning_rate": 5e-06, "loss": 0.1095, "num_input_tokens_seen": 538977916, "step": 3138 }, { "epoch": 0.8254093509567962, "loss": 0.11163683980703354, "loss_ce": 0.003146846778690815, "loss_iou": 0.52734375, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 538977916, "step": 3138 }, { "epoch": 0.8256723877161833, "grad_norm": 12.00226096912509, "learning_rate": 5e-06, "loss": 0.1521, "num_input_tokens_seen": 539150104, "step": 3139 }, { "epoch": 0.8256723877161833, "loss": 0.14204728603363037, "loss_ce": 0.0005983082228340209, "loss_iou": 0.439453125, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 539150104, "step": 3139 }, { "epoch": 0.8259354244755704, "grad_norm": 10.327954218233959, "learning_rate": 5e-06, "loss": 0.1203, "num_input_tokens_seen": 539322280, "step": 3140 }, { "epoch": 0.8259354244755704, "loss": 0.1829340159893036, "loss_ce": 0.004543509799987078, "loss_iou": 0.453125, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 539322280, "step": 3140 }, { "epoch": 0.8261984612349575, "grad_norm": 3.552655492096545, "learning_rate": 5e-06, "loss": 0.1035, "num_input_tokens_seen": 539494448, "step": 3141 }, { "epoch": 0.8261984612349575, "loss": 0.12512250244617462, "loss_ce": 0.003601514268666506, "loss_iou": 0.50390625, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 539494448, "step": 3141 }, { "epoch": 0.8264614979943448, "grad_norm": 4.0634516483762795, "learning_rate": 5e-06, "loss": 0.0828, "num_input_tokens_seen": 539667072, "step": 3142 }, { "epoch": 0.8264614979943448, "loss": 0.1312233954668045, "loss_ce": 0.0005166015471331775, "loss_iou": 0.47265625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 539667072, "step": 3142 }, { "epoch": 0.8267245347537319, "grad_norm": 12.104159402624534, "learning_rate": 5e-06, "loss": 0.1399, "num_input_tokens_seen": 539839460, "step": 3143 }, { "epoch": 0.8267245347537319, "loss": 0.2201562076807022, "loss_ce": 0.002535369014367461, "loss_iou": 0.34765625, "loss_num": 0.04345703125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 539839460, "step": 3143 }, { "epoch": 0.826987571513119, "grad_norm": 5.4839208249491245, "learning_rate": 5e-06, "loss": 0.1106, "num_input_tokens_seen": 540011628, "step": 3144 }, { "epoch": 0.826987571513119, "loss": 0.1272164285182953, "loss_ce": 0.0027352366596460342, "loss_iou": 0.474609375, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 540011628, "step": 3144 }, { "epoch": 0.8272506082725061, "grad_norm": 4.535384525117057, "learning_rate": 5e-06, "loss": 0.1183, "num_input_tokens_seen": 540183608, "step": 3145 }, { "epoch": 0.8272506082725061, "loss": 0.0952780544757843, "loss_ce": 0.002458844566717744, "loss_iou": 0.3671875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 540183608, "step": 3145 }, { "epoch": 0.8275136450318932, "grad_norm": 4.264720708193596, "learning_rate": 5e-06, "loss": 0.1042, "num_input_tokens_seen": 540355724, "step": 3146 }, { "epoch": 0.8275136450318932, "loss": 0.05311005562543869, "loss_ce": 0.0009097411530092359, "loss_iou": 0.51953125, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 540355724, "step": 3146 }, { "epoch": 0.8277766817912803, "grad_norm": 6.253031178644263, "learning_rate": 5e-06, "loss": 0.1647, "num_input_tokens_seen": 540526424, "step": 3147 }, { "epoch": 0.8277766817912803, "loss": 0.1789083182811737, "loss_ce": 0.0011586775071918964, "loss_iou": 0.2734375, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 540526424, "step": 3147 }, { "epoch": 0.8280397185506675, "grad_norm": 4.716126774756702, "learning_rate": 5e-06, "loss": 0.0978, "num_input_tokens_seen": 540698736, "step": 3148 }, { "epoch": 0.8280397185506675, "loss": 0.053756728768348694, "loss_ce": 0.0007171769393607974, "loss_iou": 0.4453125, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 540698736, "step": 3148 }, { "epoch": 0.8283027553100546, "grad_norm": 15.946573913857675, "learning_rate": 5e-06, "loss": 0.1372, "num_input_tokens_seen": 540870876, "step": 3149 }, { "epoch": 0.8283027553100546, "loss": 0.11821180582046509, "loss_ce": 0.001833026995882392, "loss_iou": 0.44140625, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 540870876, "step": 3149 }, { "epoch": 0.8285657920694417, "grad_norm": 3.1443507951345406, "learning_rate": 5e-06, "loss": 0.1031, "num_input_tokens_seen": 541041144, "step": 3150 }, { "epoch": 0.8285657920694417, "loss": 0.13489758968353271, "loss_ce": 0.004984267987310886, "loss_iou": 0.494140625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 541041144, "step": 3150 }, { "epoch": 0.8288288288288288, "grad_norm": 4.1136952366915605, "learning_rate": 5e-06, "loss": 0.0911, "num_input_tokens_seen": 541213632, "step": 3151 }, { "epoch": 0.8288288288288288, "loss": 0.10275200009346008, "loss_ce": 0.0014183830935508013, "loss_iou": 0.4765625, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 541213632, "step": 3151 }, { "epoch": 0.8290918655882159, "grad_norm": 5.711362515317036, "learning_rate": 5e-06, "loss": 0.1271, "num_input_tokens_seen": 541384104, "step": 3152 }, { "epoch": 0.8290918655882159, "loss": 0.08564618229866028, "loss_ce": 0.0014176733093336225, "loss_iou": 0.66796875, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 541384104, "step": 3152 }, { "epoch": 0.8293549023476031, "grad_norm": 7.096186366509263, "learning_rate": 5e-06, "loss": 0.1273, "num_input_tokens_seen": 541556240, "step": 3153 }, { "epoch": 0.8293549023476031, "loss": 0.17091956734657288, "loss_ce": 0.0015012390213087201, "loss_iou": 0.421875, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 541556240, "step": 3153 }, { "epoch": 0.8296179391069902, "grad_norm": 4.29702608695741, "learning_rate": 5e-06, "loss": 0.1452, "num_input_tokens_seen": 541728356, "step": 3154 }, { "epoch": 0.8296179391069902, "loss": 0.10837417840957642, "loss_ce": 0.0006623809458687901, "loss_iou": 0.51171875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 541728356, "step": 3154 }, { "epoch": 0.8298809758663773, "grad_norm": 3.729285456687102, "learning_rate": 5e-06, "loss": 0.0659, "num_input_tokens_seen": 541898716, "step": 3155 }, { "epoch": 0.8298809758663773, "loss": 0.062001317739486694, "loss_ce": 0.0011492683552205563, "loss_iou": 0.50390625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 541898716, "step": 3155 }, { "epoch": 0.8301440126257644, "grad_norm": 6.543104796714217, "learning_rate": 5e-06, "loss": 0.0911, "num_input_tokens_seen": 542070820, "step": 3156 }, { "epoch": 0.8301440126257644, "loss": 0.09034896641969681, "loss_ce": 0.0022752326913177967, "loss_iou": 0.390625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 542070820, "step": 3156 }, { "epoch": 0.8304070493851515, "grad_norm": 3.814660695434685, "learning_rate": 5e-06, "loss": 0.0838, "num_input_tokens_seen": 542242936, "step": 3157 }, { "epoch": 0.8304070493851515, "loss": 0.05618397891521454, "loss_ce": 0.0004894005251117051, "loss_iou": 0.412109375, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 542242936, "step": 3157 }, { "epoch": 0.8306700861445387, "grad_norm": 13.94223132380543, "learning_rate": 5e-06, "loss": 0.1134, "num_input_tokens_seen": 542415376, "step": 3158 }, { "epoch": 0.8306700861445387, "loss": 0.09673337638378143, "loss_ce": 0.0017016411293298006, "loss_iou": 0.5703125, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 542415376, "step": 3158 }, { "epoch": 0.8309331229039258, "grad_norm": 46.47454058969395, "learning_rate": 5e-06, "loss": 0.1032, "num_input_tokens_seen": 542587700, "step": 3159 }, { "epoch": 0.8309331229039258, "loss": 0.13622400164604187, "loss_ce": 0.00045128766214475036, "loss_iou": 0.5703125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 542587700, "step": 3159 }, { "epoch": 0.831196159663313, "grad_norm": 7.76157996882612, "learning_rate": 5e-06, "loss": 0.1055, "num_input_tokens_seen": 542758148, "step": 3160 }, { "epoch": 0.831196159663313, "loss": 0.11631490290164948, "loss_ce": 0.004040728323161602, "loss_iou": 0.42578125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 542758148, "step": 3160 }, { "epoch": 0.8314591964227, "grad_norm": 20.483608511309008, "learning_rate": 5e-06, "loss": 0.085, "num_input_tokens_seen": 542930836, "step": 3161 }, { "epoch": 0.8314591964227, "loss": 0.06239602342247963, "loss_ce": 0.00014016299974173307, "loss_iou": 0.55078125, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 542930836, "step": 3161 }, { "epoch": 0.8317222331820872, "grad_norm": 4.545805831684384, "learning_rate": 5e-06, "loss": 0.1162, "num_input_tokens_seen": 543102800, "step": 3162 }, { "epoch": 0.8317222331820872, "loss": 0.12887202203273773, "loss_ce": 0.0006066488567739725, "loss_iou": 0.6015625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 543102800, "step": 3162 }, { "epoch": 0.8319852699414744, "grad_norm": 6.107575063227538, "learning_rate": 5e-06, "loss": 0.1, "num_input_tokens_seen": 543274984, "step": 3163 }, { "epoch": 0.8319852699414744, "loss": 0.08694491535425186, "loss_ce": 0.00036654339055530727, "loss_iou": 0.498046875, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 543274984, "step": 3163 }, { "epoch": 0.8322483067008615, "grad_norm": 5.535589838242773, "learning_rate": 5e-06, "loss": 0.1173, "num_input_tokens_seen": 543447000, "step": 3164 }, { "epoch": 0.8322483067008615, "loss": 0.12083543837070465, "loss_ce": 0.006333490367978811, "loss_iou": 0.6796875, "loss_num": 0.02294921875, "loss_xval": 0.1142578125, "num_input_tokens_seen": 543447000, "step": 3164 }, { "epoch": 0.8325113434602486, "grad_norm": 5.113582833614297, "learning_rate": 5e-06, "loss": 0.1266, "num_input_tokens_seen": 543619120, "step": 3165 }, { "epoch": 0.8325113434602486, "loss": 0.1699899584054947, "loss_ce": 0.002250079531222582, "loss_iou": 0.51953125, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 543619120, "step": 3165 }, { "epoch": 0.8327743802196357, "grad_norm": 5.479089729021323, "learning_rate": 5e-06, "loss": 0.1221, "num_input_tokens_seen": 543791308, "step": 3166 }, { "epoch": 0.8327743802196357, "loss": 0.08266595751047134, "loss_ce": 0.00042108428897336125, "loss_iou": 0.5625, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 543791308, "step": 3166 }, { "epoch": 0.8330374169790228, "grad_norm": 4.786501058802981, "learning_rate": 5e-06, "loss": 0.0917, "num_input_tokens_seen": 543958632, "step": 3167 }, { "epoch": 0.8330374169790228, "loss": 0.07515060901641846, "loss_ce": 0.0012675554025918245, "loss_iou": 0.5703125, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 543958632, "step": 3167 }, { "epoch": 0.83330045373841, "grad_norm": 8.594141920355682, "learning_rate": 5e-06, "loss": 0.12, "num_input_tokens_seen": 544130848, "step": 3168 }, { "epoch": 0.83330045373841, "loss": 0.09625629335641861, "loss_ce": 0.0025978446938097477, "loss_iou": 0.51171875, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 544130848, "step": 3168 }, { "epoch": 0.8335634904977971, "grad_norm": 3.7461805856215458, "learning_rate": 5e-06, "loss": 0.1468, "num_input_tokens_seen": 544302820, "step": 3169 }, { "epoch": 0.8335634904977971, "loss": 0.09222942590713501, "loss_ce": 0.0022025699727237225, "loss_iou": 0.375, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 544302820, "step": 3169 }, { "epoch": 0.8338265272571842, "grad_norm": 9.92373943246775, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 544474904, "step": 3170 }, { "epoch": 0.8338265272571842, "loss": 0.14077287912368774, "loss_ce": 0.0030775703489780426, "loss_iou": 0.482421875, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 544474904, "step": 3170 }, { "epoch": 0.8340895640165713, "grad_norm": 14.235058865758765, "learning_rate": 5e-06, "loss": 0.0805, "num_input_tokens_seen": 544647036, "step": 3171 }, { "epoch": 0.8340895640165713, "loss": 0.060213349759578705, "loss_ce": 0.0022452091798186302, "loss_iou": 0.4609375, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 544647036, "step": 3171 }, { "epoch": 0.8343526007759584, "grad_norm": 4.48891217014488, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 544817552, "step": 3172 }, { "epoch": 0.8343526007759584, "loss": 0.1579454094171524, "loss_ce": 0.0024888694752007723, "loss_iou": 0.482421875, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 544817552, "step": 3172 }, { "epoch": 0.8346156375353455, "grad_norm": 3.708111692136732, "learning_rate": 5e-06, "loss": 0.1094, "num_input_tokens_seen": 544989748, "step": 3173 }, { "epoch": 0.8346156375353455, "loss": 0.14915120601654053, "loss_ce": 0.0004848288372159004, "loss_iou": 0.34375, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 544989748, "step": 3173 }, { "epoch": 0.8348786742947327, "grad_norm": 6.783609066408303, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 545161788, "step": 3174 }, { "epoch": 0.8348786742947327, "loss": 0.13735045492649078, "loss_ce": 0.003683460643514991, "loss_iou": 0.54296875, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 545161788, "step": 3174 }, { "epoch": 0.8351417110541198, "grad_norm": 4.8955030087080775, "learning_rate": 5e-06, "loss": 0.0932, "num_input_tokens_seen": 545333884, "step": 3175 }, { "epoch": 0.8351417110541198, "loss": 0.08379638940095901, "loss_ce": 0.0006665037362836301, "loss_iou": 0.51171875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 545333884, "step": 3175 }, { "epoch": 0.8354047478135069, "grad_norm": 4.39565885197116, "learning_rate": 5e-06, "loss": 0.1439, "num_input_tokens_seen": 545506244, "step": 3176 }, { "epoch": 0.8354047478135069, "loss": 0.12928782403469086, "loss_ce": 0.000915632932446897, "loss_iou": 0.46875, "loss_num": 0.0257568359375, "loss_xval": 0.1279296875, "num_input_tokens_seen": 545506244, "step": 3176 }, { "epoch": 0.835667784572894, "grad_norm": 6.2304214245063605, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 545678348, "step": 3177 }, { "epoch": 0.835667784572894, "loss": 0.07726402580738068, "loss_ce": 0.000832755584269762, "loss_iou": 0.44921875, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 545678348, "step": 3177 }, { "epoch": 0.8359308213322811, "grad_norm": 4.950850483708038, "learning_rate": 5e-06, "loss": 0.1162, "num_input_tokens_seen": 545848136, "step": 3178 }, { "epoch": 0.8359308213322811, "loss": 0.12010537087917328, "loss_ce": 0.0009342365083284676, "loss_iou": 0.62890625, "loss_num": 0.02392578125, "loss_xval": 0.119140625, "num_input_tokens_seen": 545848136, "step": 3178 }, { "epoch": 0.8361938580916684, "grad_norm": 4.252976007842196, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 546020692, "step": 3179 }, { "epoch": 0.8361938580916684, "loss": 0.12632903456687927, "loss_ce": 0.00018462821026332676, "loss_iou": 0.486328125, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 546020692, "step": 3179 }, { "epoch": 0.8364568948510555, "grad_norm": 16.684582759145172, "learning_rate": 5e-06, "loss": 0.073, "num_input_tokens_seen": 546192760, "step": 3180 }, { "epoch": 0.8364568948510555, "loss": 0.09913001954555511, "loss_ce": 0.0009549736278131604, "loss_iou": 0.49609375, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 546192760, "step": 3180 }, { "epoch": 0.8367199316104426, "grad_norm": 5.017020175241404, "learning_rate": 5e-06, "loss": 0.1322, "num_input_tokens_seen": 546363064, "step": 3181 }, { "epoch": 0.8367199316104426, "loss": 0.1710553914308548, "loss_ce": 0.00040109228575602174, "loss_iou": NaN, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 546363064, "step": 3181 }, { "epoch": 0.8369829683698297, "grad_norm": 17.167559763411923, "learning_rate": 5e-06, "loss": 0.1342, "num_input_tokens_seen": 546535208, "step": 3182 }, { "epoch": 0.8369829683698297, "loss": 0.15842683613300323, "loss_ce": 0.0015664853854104877, "loss_iou": 0.443359375, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 546535208, "step": 3182 }, { "epoch": 0.8372460051292168, "grad_norm": 3.9098429704759723, "learning_rate": 5e-06, "loss": 0.1016, "num_input_tokens_seen": 546707288, "step": 3183 }, { "epoch": 0.8372460051292168, "loss": 0.08958999812602997, "loss_ce": 0.00014297313464339823, "loss_iou": 0.5234375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 546707288, "step": 3183 }, { "epoch": 0.837509041888604, "grad_norm": 5.336892331569848, "learning_rate": 5e-06, "loss": 0.0983, "num_input_tokens_seen": 546879532, "step": 3184 }, { "epoch": 0.837509041888604, "loss": 0.07368629425764084, "loss_ce": 0.004197763279080391, "loss_iou": 0.51953125, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 546879532, "step": 3184 }, { "epoch": 0.8377720786479911, "grad_norm": 3.9486138565288265, "learning_rate": 5e-06, "loss": 0.1003, "num_input_tokens_seen": 547051612, "step": 3185 }, { "epoch": 0.8377720786479911, "loss": 0.15984772145748138, "loss_ce": 0.0009426883771084249, "loss_iou": 0.45703125, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 547051612, "step": 3185 }, { "epoch": 0.8380351154073782, "grad_norm": 6.643098269759205, "learning_rate": 5e-06, "loss": 0.1105, "num_input_tokens_seen": 547223768, "step": 3186 }, { "epoch": 0.8380351154073782, "loss": 0.1492346227169037, "loss_ce": 0.0017126407474279404, "loss_iou": 0.40625, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 547223768, "step": 3186 }, { "epoch": 0.8382981521667653, "grad_norm": 30.69973998174967, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 547395788, "step": 3187 }, { "epoch": 0.8382981521667653, "loss": 0.13615994155406952, "loss_ce": 0.00041775350109674037, "loss_iou": 0.490234375, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 547395788, "step": 3187 }, { "epoch": 0.8385611889261524, "grad_norm": 35.72025172038926, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 547568000, "step": 3188 }, { "epoch": 0.8385611889261524, "loss": 0.04795503616333008, "loss_ce": 0.0002560606808401644, "loss_iou": 0.5703125, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 547568000, "step": 3188 }, { "epoch": 0.8388242256855396, "grad_norm": 7.18874848401809, "learning_rate": 5e-06, "loss": 0.1135, "num_input_tokens_seen": 547740064, "step": 3189 }, { "epoch": 0.8388242256855396, "loss": 0.12481731176376343, "loss_ce": 0.0005802565719932318, "loss_iou": 0.40625, "loss_num": 0.02490234375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 547740064, "step": 3189 }, { "epoch": 0.8390872624449267, "grad_norm": 6.155592453443207, "learning_rate": 5e-06, "loss": 0.0875, "num_input_tokens_seen": 547912300, "step": 3190 }, { "epoch": 0.8390872624449267, "loss": 0.08178332448005676, "loss_ce": 0.0008201911114156246, "loss_iou": 0.470703125, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 547912300, "step": 3190 }, { "epoch": 0.8393502992043138, "grad_norm": 12.72945010384761, "learning_rate": 5e-06, "loss": 0.139, "num_input_tokens_seen": 548084548, "step": 3191 }, { "epoch": 0.8393502992043138, "loss": 0.19623327255249023, "loss_ce": 0.002812865423038602, "loss_iou": 0.4296875, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 548084548, "step": 3191 }, { "epoch": 0.8396133359637009, "grad_norm": 5.353324449071839, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 548255176, "step": 3192 }, { "epoch": 0.8396133359637009, "loss": 0.12437019497156143, "loss_ce": 0.0003467575879767537, "loss_iou": 0.4921875, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 548255176, "step": 3192 }, { "epoch": 0.839876372723088, "grad_norm": 4.347588948765136, "learning_rate": 5e-06, "loss": 0.1162, "num_input_tokens_seen": 548427488, "step": 3193 }, { "epoch": 0.839876372723088, "loss": 0.08202692121267319, "loss_ce": 0.0022234548814594746, "loss_iou": 0.53515625, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 548427488, "step": 3193 }, { "epoch": 0.8401394094824752, "grad_norm": 5.210446302172759, "learning_rate": 5e-06, "loss": 0.1093, "num_input_tokens_seen": 548599828, "step": 3194 }, { "epoch": 0.8401394094824752, "loss": 0.12387488037347794, "loss_ce": 0.00018713258032221347, "loss_iou": 0.59765625, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 548599828, "step": 3194 }, { "epoch": 0.8404024462418623, "grad_norm": 3.9642883700177514, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 548771932, "step": 3195 }, { "epoch": 0.8404024462418623, "loss": 0.12389804422855377, "loss_ce": 0.00396396778523922, "loss_iou": 0.490234375, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 548771932, "step": 3195 }, { "epoch": 0.8406654830012494, "grad_norm": 4.444986430362012, "learning_rate": 5e-06, "loss": 0.08, "num_input_tokens_seen": 548944332, "step": 3196 }, { "epoch": 0.8406654830012494, "loss": 0.07641720026731491, "loss_ce": 0.0017254289705306292, "loss_iou": 0.578125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 548944332, "step": 3196 }, { "epoch": 0.8409285197606365, "grad_norm": 17.152304646652222, "learning_rate": 5e-06, "loss": 0.1166, "num_input_tokens_seen": 549116460, "step": 3197 }, { "epoch": 0.8409285197606365, "loss": 0.07439431548118591, "loss_ce": 0.0016404138877987862, "loss_iou": 0.435546875, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 549116460, "step": 3197 }, { "epoch": 0.8411915565200236, "grad_norm": 6.835966777853977, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 549286920, "step": 3198 }, { "epoch": 0.8411915565200236, "loss": 0.23670437932014465, "loss_ce": 0.0033669895492494106, "loss_iou": 0.4609375, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 549286920, "step": 3198 }, { "epoch": 0.8414545932794107, "grad_norm": 3.334370051059133, "learning_rate": 5e-06, "loss": 0.1002, "num_input_tokens_seen": 549458932, "step": 3199 }, { "epoch": 0.8414545932794107, "loss": 0.10936430096626282, "loss_ce": 0.001118454267270863, "loss_iou": 0.47265625, "loss_num": 0.0216064453125, "loss_xval": 0.1083984375, "num_input_tokens_seen": 549458932, "step": 3199 }, { "epoch": 0.841717630038798, "grad_norm": 3.3887650622610863, "learning_rate": 5e-06, "loss": 0.119, "num_input_tokens_seen": 549630896, "step": 3200 }, { "epoch": 0.841717630038798, "loss": 0.1543048769235611, "loss_ce": 0.00280036055482924, "loss_iou": 0.451171875, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 549630896, "step": 3200 }, { "epoch": 0.8419806667981851, "grad_norm": 8.90667441822763, "learning_rate": 5e-06, "loss": 0.1431, "num_input_tokens_seen": 549802980, "step": 3201 }, { "epoch": 0.8419806667981851, "loss": 0.10668284446001053, "loss_ce": 0.0037165414541959763, "loss_iou": 0.322265625, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 549802980, "step": 3201 }, { "epoch": 0.8422437035575722, "grad_norm": 4.581013999909635, "learning_rate": 5e-06, "loss": 0.0768, "num_input_tokens_seen": 549973288, "step": 3202 }, { "epoch": 0.8422437035575722, "loss": 0.05071475729346275, "loss_ce": 0.0011084338184446096, "loss_iou": 0.578125, "loss_num": 0.00994873046875, "loss_xval": 0.049560546875, "num_input_tokens_seen": 549973288, "step": 3202 }, { "epoch": 0.8425067403169593, "grad_norm": 7.123665679875533, "learning_rate": 5e-06, "loss": 0.1324, "num_input_tokens_seen": 550143700, "step": 3203 }, { "epoch": 0.8425067403169593, "loss": 0.1053222045302391, "loss_ce": 0.0030272852163761854, "loss_iou": 0.404296875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 550143700, "step": 3203 }, { "epoch": 0.8427697770763464, "grad_norm": 14.86581378718403, "learning_rate": 5e-06, "loss": 0.1414, "num_input_tokens_seen": 550315752, "step": 3204 }, { "epoch": 0.8427697770763464, "loss": 0.08674832433462143, "loss_ce": 0.000688750937115401, "loss_iou": NaN, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 550315752, "step": 3204 }, { "epoch": 0.8430328138357336, "grad_norm": 15.15255979041917, "learning_rate": 5e-06, "loss": 0.1341, "num_input_tokens_seen": 550487764, "step": 3205 }, { "epoch": 0.8430328138357336, "loss": 0.11826883256435394, "loss_ce": 0.001401771791279316, "loss_iou": 0.47265625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 550487764, "step": 3205 }, { "epoch": 0.8432958505951207, "grad_norm": 5.341317771181548, "learning_rate": 5e-06, "loss": 0.1085, "num_input_tokens_seen": 550659696, "step": 3206 }, { "epoch": 0.8432958505951207, "loss": 0.11918849498033524, "loss_ce": 0.0002920094411820173, "loss_iou": 0.5859375, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 550659696, "step": 3206 }, { "epoch": 0.8435588873545078, "grad_norm": 5.395123389791678, "learning_rate": 5e-06, "loss": 0.1327, "num_input_tokens_seen": 550831904, "step": 3207 }, { "epoch": 0.8435588873545078, "loss": 0.07498294115066528, "loss_ce": 0.001038852147758007, "loss_iou": 0.33984375, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 550831904, "step": 3207 }, { "epoch": 0.8438219241138949, "grad_norm": 3.643785530851578, "learning_rate": 5e-06, "loss": 0.1128, "num_input_tokens_seen": 551004156, "step": 3208 }, { "epoch": 0.8438219241138949, "loss": 0.06928001344203949, "loss_ce": 0.004369123373180628, "loss_iou": 0.494140625, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 551004156, "step": 3208 }, { "epoch": 0.844084960873282, "grad_norm": 5.391792901404867, "learning_rate": 5e-06, "loss": 0.0774, "num_input_tokens_seen": 551176320, "step": 3209 }, { "epoch": 0.844084960873282, "loss": 0.0887857973575592, "loss_ce": 0.0004984364495612681, "loss_iou": 0.376953125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 551176320, "step": 3209 }, { "epoch": 0.8443479976326692, "grad_norm": 7.538916165469519, "learning_rate": 5e-06, "loss": 0.1156, "num_input_tokens_seen": 551348168, "step": 3210 }, { "epoch": 0.8443479976326692, "loss": 0.16849397122859955, "loss_ce": 0.0030734348110854626, "loss_iou": 0.419921875, "loss_num": 0.033203125, "loss_xval": 0.1650390625, "num_input_tokens_seen": 551348168, "step": 3210 }, { "epoch": 0.8446110343920563, "grad_norm": 7.685615482077233, "learning_rate": 5e-06, "loss": 0.1016, "num_input_tokens_seen": 551520728, "step": 3211 }, { "epoch": 0.8446110343920563, "loss": 0.0783834308385849, "loss_ce": 0.001479133265092969, "loss_iou": 0.5390625, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 551520728, "step": 3211 }, { "epoch": 0.8448740711514434, "grad_norm": 8.632575689436024, "learning_rate": 5e-06, "loss": 0.1109, "num_input_tokens_seen": 551692804, "step": 3212 }, { "epoch": 0.8448740711514434, "loss": 0.15737096965312958, "loss_ce": 0.0006174240261316299, "loss_iou": 0.5234375, "loss_num": 0.03125, "loss_xval": 0.1572265625, "num_input_tokens_seen": 551692804, "step": 3212 }, { "epoch": 0.8451371079108305, "grad_norm": 12.457163123139722, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 551864880, "step": 3213 }, { "epoch": 0.8451371079108305, "loss": 0.07481381297111511, "loss_ce": 0.003173800650984049, "loss_iou": 0.44921875, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 551864880, "step": 3213 }, { "epoch": 0.8454001446702176, "grad_norm": 3.8683898896692166, "learning_rate": 5e-06, "loss": 0.09, "num_input_tokens_seen": 552036812, "step": 3214 }, { "epoch": 0.8454001446702176, "loss": 0.05259804055094719, "loss_ce": 0.0005503093125298619, "loss_iou": 0.51171875, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 552036812, "step": 3214 }, { "epoch": 0.8456631814296048, "grad_norm": 4.539951161935164, "learning_rate": 5e-06, "loss": 0.1134, "num_input_tokens_seen": 552208856, "step": 3215 }, { "epoch": 0.8456631814296048, "loss": 0.07421931624412537, "loss_ce": 0.0011754983570426702, "loss_iou": 0.478515625, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 552208856, "step": 3215 }, { "epoch": 0.8459262181889919, "grad_norm": 10.573780213794782, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 552380904, "step": 3216 }, { "epoch": 0.8459262181889919, "loss": 0.06666961312294006, "loss_ce": 0.001804507803171873, "loss_iou": 0.484375, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 552380904, "step": 3216 }, { "epoch": 0.846189254948379, "grad_norm": 4.275878567324078, "learning_rate": 5e-06, "loss": 0.1204, "num_input_tokens_seen": 552551132, "step": 3217 }, { "epoch": 0.846189254948379, "loss": 0.09229104220867157, "loss_ce": 0.0006162393838167191, "loss_iou": 0.4921875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 552551132, "step": 3217 }, { "epoch": 0.8464522917077661, "grad_norm": 4.658023344353002, "learning_rate": 5e-06, "loss": 0.151, "num_input_tokens_seen": 552722824, "step": 3218 }, { "epoch": 0.8464522917077661, "loss": 0.07020144164562225, "loss_ce": 0.0020709503442049026, "loss_iou": 0.49609375, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 552722824, "step": 3218 }, { "epoch": 0.8467153284671532, "grad_norm": 6.1002348516078095, "learning_rate": 5e-06, "loss": 0.1233, "num_input_tokens_seen": 552894948, "step": 3219 }, { "epoch": 0.8467153284671532, "loss": 0.10183661431074142, "loss_ce": 0.0007318751304410398, "loss_iou": 0.5703125, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 552894948, "step": 3219 }, { "epoch": 0.8469783652265405, "grad_norm": 3.6957881460841646, "learning_rate": 5e-06, "loss": 0.1319, "num_input_tokens_seen": 553065492, "step": 3220 }, { "epoch": 0.8469783652265405, "loss": 0.20681847631931305, "loss_ce": 0.005616086535155773, "loss_iou": 0.4609375, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 553065492, "step": 3220 }, { "epoch": 0.8472414019859276, "grad_norm": 31.1352481206466, "learning_rate": 5e-06, "loss": 0.0969, "num_input_tokens_seen": 553237384, "step": 3221 }, { "epoch": 0.8472414019859276, "loss": 0.0710492879152298, "loss_ce": 0.0004316139966249466, "loss_iou": 0.42578125, "loss_num": 0.01409912109375, "loss_xval": 0.07080078125, "num_input_tokens_seen": 553237384, "step": 3221 }, { "epoch": 0.8475044387453147, "grad_norm": 15.822969391935775, "learning_rate": 5e-06, "loss": 0.1572, "num_input_tokens_seen": 553409788, "step": 3222 }, { "epoch": 0.8475044387453147, "loss": 0.24513369798660278, "loss_ce": 0.001847569365054369, "loss_iou": NaN, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 553409788, "step": 3222 }, { "epoch": 0.8477674755047018, "grad_norm": 4.903880792540357, "learning_rate": 5e-06, "loss": 0.082, "num_input_tokens_seen": 553581532, "step": 3223 }, { "epoch": 0.8477674755047018, "loss": 0.0573626384139061, "loss_ce": 0.0015459894202649593, "loss_iou": NaN, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 553581532, "step": 3223 }, { "epoch": 0.8480305122640889, "grad_norm": 11.193676890263411, "learning_rate": 5e-06, "loss": 0.095, "num_input_tokens_seen": 553753504, "step": 3224 }, { "epoch": 0.8480305122640889, "loss": 0.12998200953006744, "loss_ce": 0.0016861144686117768, "loss_iou": 0.515625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 553753504, "step": 3224 }, { "epoch": 0.848293549023476, "grad_norm": 6.892798676870787, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 553925232, "step": 3225 }, { "epoch": 0.848293549023476, "loss": 0.06351655721664429, "loss_ce": 0.0010470744455233216, "loss_iou": 0.33203125, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 553925232, "step": 3225 }, { "epoch": 0.8485565857828632, "grad_norm": 3.94134933091549, "learning_rate": 5e-06, "loss": 0.0882, "num_input_tokens_seen": 554097380, "step": 3226 }, { "epoch": 0.8485565857828632, "loss": 0.06772696226835251, "loss_ce": 0.00040518559399060905, "loss_iou": 0.5, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 554097380, "step": 3226 }, { "epoch": 0.8488196225422503, "grad_norm": 4.553319445567292, "learning_rate": 5e-06, "loss": 0.1467, "num_input_tokens_seen": 554269324, "step": 3227 }, { "epoch": 0.8488196225422503, "loss": 0.06106797605752945, "loss_ce": 0.00015489489305764437, "loss_iou": 0.50390625, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 554269324, "step": 3227 }, { "epoch": 0.8490826593016374, "grad_norm": 8.122842269719726, "learning_rate": 5e-06, "loss": 0.0758, "num_input_tokens_seen": 554441420, "step": 3228 }, { "epoch": 0.8490826593016374, "loss": 0.07994222640991211, "loss_ce": 0.0005202332977205515, "loss_iou": 0.5078125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 554441420, "step": 3228 }, { "epoch": 0.8493456960610245, "grad_norm": 4.946244650717768, "learning_rate": 5e-06, "loss": 0.0891, "num_input_tokens_seen": 554613348, "step": 3229 }, { "epoch": 0.8493456960610245, "loss": 0.07239595800638199, "loss_ce": 0.0015646612737327814, "loss_iou": 0.4921875, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 554613348, "step": 3229 }, { "epoch": 0.8496087328204116, "grad_norm": 10.327431498521296, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 554785544, "step": 3230 }, { "epoch": 0.8496087328204116, "loss": 0.1082451343536377, "loss_ce": 0.001616714522242546, "loss_iou": 0.56640625, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 554785544, "step": 3230 }, { "epoch": 0.8498717695797988, "grad_norm": 6.2381116819087605, "learning_rate": 5e-06, "loss": 0.0837, "num_input_tokens_seen": 554957700, "step": 3231 }, { "epoch": 0.8498717695797988, "loss": 0.05288837477564812, "loss_ce": 0.0004286564071662724, "loss_iou": 0.41015625, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 554957700, "step": 3231 }, { "epoch": 0.8501348063391859, "grad_norm": 5.665331437954715, "learning_rate": 5e-06, "loss": 0.1052, "num_input_tokens_seen": 555128400, "step": 3232 }, { "epoch": 0.8501348063391859, "loss": 0.17892731726169586, "loss_ce": 0.008456122130155563, "loss_iou": 0.47265625, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 555128400, "step": 3232 }, { "epoch": 0.850397843098573, "grad_norm": 4.02543856443553, "learning_rate": 5e-06, "loss": 0.095, "num_input_tokens_seen": 555300712, "step": 3233 }, { "epoch": 0.850397843098573, "loss": 0.1149306371808052, "loss_ce": 0.0023512912448495626, "loss_iou": 0.51171875, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 555300712, "step": 3233 }, { "epoch": 0.8506608798579601, "grad_norm": 4.049090978454337, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 555472756, "step": 3234 }, { "epoch": 0.8506608798579601, "loss": 0.14269746840000153, "loss_ce": 0.0037509393878281116, "loss_iou": 0.56640625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 555472756, "step": 3234 }, { "epoch": 0.8509239166173472, "grad_norm": 4.24693988860905, "learning_rate": 5e-06, "loss": 0.1089, "num_input_tokens_seen": 555644856, "step": 3235 }, { "epoch": 0.8509239166173472, "loss": 0.06741193681955338, "loss_ce": 0.0012955997372046113, "loss_iou": 0.478515625, "loss_num": 0.01324462890625, "loss_xval": 0.06591796875, "num_input_tokens_seen": 555644856, "step": 3235 }, { "epoch": 0.8511869533767344, "grad_norm": 4.223553863328503, "learning_rate": 5e-06, "loss": 0.1173, "num_input_tokens_seen": 555817488, "step": 3236 }, { "epoch": 0.8511869533767344, "loss": 0.09499558061361313, "loss_ce": 0.0007572993636131287, "loss_iou": 0.5078125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 555817488, "step": 3236 }, { "epoch": 0.8514499901361215, "grad_norm": 8.070861336660833, "learning_rate": 5e-06, "loss": 0.1101, "num_input_tokens_seen": 555989844, "step": 3237 }, { "epoch": 0.8514499901361215, "loss": 0.09148094058036804, "loss_ce": 0.0017287411028519273, "loss_iou": 0.515625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 555989844, "step": 3237 }, { "epoch": 0.8517130268955087, "grad_norm": 4.7628320746905235, "learning_rate": 5e-06, "loss": 0.13, "num_input_tokens_seen": 556162144, "step": 3238 }, { "epoch": 0.8517130268955087, "loss": 0.08755885809659958, "loss_ce": 0.001667136326432228, "loss_iou": 0.421875, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 556162144, "step": 3238 }, { "epoch": 0.8519760636548958, "grad_norm": 8.201240707199219, "learning_rate": 5e-06, "loss": 0.068, "num_input_tokens_seen": 556334020, "step": 3239 }, { "epoch": 0.8519760636548958, "loss": 0.06216670200228691, "loss_ce": 0.001772413495928049, "loss_iou": 0.404296875, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 556334020, "step": 3239 }, { "epoch": 0.8522391004142829, "grad_norm": 5.1836633623539266, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 556502704, "step": 3240 }, { "epoch": 0.8522391004142829, "loss": 0.09547331184148788, "loss_ce": 0.0012350315228104591, "loss_iou": 0.333984375, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 556502704, "step": 3240 }, { "epoch": 0.8525021371736701, "grad_norm": 4.1790455766130234, "learning_rate": 5e-06, "loss": 0.1352, "num_input_tokens_seen": 556674848, "step": 3241 }, { "epoch": 0.8525021371736701, "loss": 0.15765714645385742, "loss_ce": 0.00018642976647242904, "loss_iou": 0.353515625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 556674848, "step": 3241 }, { "epoch": 0.8527651739330572, "grad_norm": 32.42085523655999, "learning_rate": 5e-06, "loss": 0.0973, "num_input_tokens_seen": 556847120, "step": 3242 }, { "epoch": 0.8527651739330572, "loss": 0.10435596853494644, "loss_ce": 0.0030986424535512924, "loss_iou": 0.48046875, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 556847120, "step": 3242 }, { "epoch": 0.8530282106924443, "grad_norm": 3.085990680432512, "learning_rate": 5e-06, "loss": 0.1006, "num_input_tokens_seen": 557015380, "step": 3243 }, { "epoch": 0.8530282106924443, "loss": 0.12935911118984222, "loss_ce": 0.0042217751033604145, "loss_iou": 0.3515625, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 557015380, "step": 3243 }, { "epoch": 0.8532912474518314, "grad_norm": 5.554407683422717, "learning_rate": 5e-06, "loss": 0.0837, "num_input_tokens_seen": 557186020, "step": 3244 }, { "epoch": 0.8532912474518314, "loss": 0.12462737411260605, "loss_ce": 0.002557065337896347, "loss_iou": 0.5234375, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 557186020, "step": 3244 }, { "epoch": 0.8535542842112185, "grad_norm": 77.39805630943022, "learning_rate": 5e-06, "loss": 0.0876, "num_input_tokens_seen": 557358056, "step": 3245 }, { "epoch": 0.8535542842112185, "loss": 0.06247711926698685, "loss_ce": 0.0029983618296682835, "loss_iou": 0.5390625, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 557358056, "step": 3245 }, { "epoch": 0.8538173209706057, "grad_norm": 6.568132581838013, "learning_rate": 5e-06, "loss": 0.1201, "num_input_tokens_seen": 557530164, "step": 3246 }, { "epoch": 0.8538173209706057, "loss": 0.10226649791002274, "loss_ce": 0.0013143508695065975, "loss_iou": 0.48828125, "loss_num": 0.0201416015625, "loss_xval": 0.10107421875, "num_input_tokens_seen": 557530164, "step": 3246 }, { "epoch": 0.8540803577299928, "grad_norm": 8.945021217700889, "learning_rate": 5e-06, "loss": 0.1245, "num_input_tokens_seen": 557702352, "step": 3247 }, { "epoch": 0.8540803577299928, "loss": 0.14076803624629974, "loss_ce": 0.002218232722952962, "loss_iou": 0.40625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 557702352, "step": 3247 }, { "epoch": 0.8543433944893799, "grad_norm": 5.970819800467794, "learning_rate": 5e-06, "loss": 0.113, "num_input_tokens_seen": 557874756, "step": 3248 }, { "epoch": 0.8543433944893799, "loss": 0.12330596148967743, "loss_ce": 0.002425831276923418, "loss_iou": 0.470703125, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 557874756, "step": 3248 }, { "epoch": 0.854606431248767, "grad_norm": 12.952896902366254, "learning_rate": 5e-06, "loss": 0.1366, "num_input_tokens_seen": 558045276, "step": 3249 }, { "epoch": 0.854606431248767, "loss": 0.15818831324577332, "loss_ce": 0.0018772899638861418, "loss_iou": 0.443359375, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 558045276, "step": 3249 }, { "epoch": 0.8548694680081541, "grad_norm": 10.077522334681213, "learning_rate": 5e-06, "loss": 0.0936, "num_input_tokens_seen": 558217164, "step": 3250 }, { "epoch": 0.8548694680081541, "eval_websight_new_CIoU": 0.8994152843952179, "eval_websight_new_GIoU": 0.9002452492713928, "eval_websight_new_IoU": 0.9029170572757721, "eval_websight_new_MAE_all": 0.014043471310287714, "eval_websight_new_MAE_h": 0.007522843778133392, "eval_websight_new_MAE_w": 0.020659465342760086, "eval_websight_new_MAE_x": 0.021266265772283077, "eval_websight_new_MAE_y": 0.006725311512127519, "eval_websight_new_NUM_probability": 0.9999923408031464, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.07195720076560974, "eval_websight_new_loss_ce": 4.601216005539754e-06, "eval_websight_new_loss_iou": 0.31280517578125, "eval_websight_new_loss_num": 0.012767791748046875, "eval_websight_new_loss_xval": 0.06391143798828125, "eval_websight_new_runtime": 56.9054, "eval_websight_new_samples_per_second": 0.879, "eval_websight_new_steps_per_second": 0.035, "num_input_tokens_seen": 558217164, "step": 3250 }, { "epoch": 0.8548694680081541, "eval_seeclick_CIoU": 0.6642794907093048, "eval_seeclick_GIoU": 0.6613934338092804, "eval_seeclick_IoU": 0.6828196048736572, "eval_seeclick_MAE_all": 0.04043097607791424, "eval_seeclick_MAE_h": 0.023812726140022278, "eval_seeclick_MAE_w": 0.055972687900066376, "eval_seeclick_MAE_x": 0.05744660459458828, "eval_seeclick_MAE_y": 0.02449188195168972, "eval_seeclick_NUM_probability": 0.999969094991684, "eval_seeclick_inside_bbox": 0.9375, "eval_seeclick_loss": 0.1842024326324463, "eval_seeclick_loss_ce": 0.008998175617307425, "eval_seeclick_loss_iou": 0.4769287109375, "eval_seeclick_loss_num": 0.0336456298828125, "eval_seeclick_loss_xval": 0.1681365966796875, "eval_seeclick_runtime": 76.1272, "eval_seeclick_samples_per_second": 0.565, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 558217164, "step": 3250 }, { "epoch": 0.8548694680081541, "eval_icons_CIoU": 0.8727431297302246, "eval_icons_GIoU": 0.8711867332458496, "eval_icons_IoU": 0.8781991004943848, "eval_icons_MAE_all": 0.017583131790161133, "eval_icons_MAE_h": 0.019472193904221058, "eval_icons_MAE_w": 0.015529958996921778, "eval_icons_MAE_x": 0.015652839560061693, "eval_icons_MAE_y": 0.019677532836794853, "eval_icons_NUM_probability": 0.9999885261058807, "eval_icons_inside_bbox": 0.984375, "eval_icons_loss": 0.06516695767641068, "eval_icons_loss_ce": 9.223055712936912e-06, "eval_icons_loss_iou": 0.504638671875, "eval_icons_loss_num": 0.012132644653320312, "eval_icons_loss_xval": 0.0607147216796875, "eval_icons_runtime": 94.9537, "eval_icons_samples_per_second": 0.527, "eval_icons_steps_per_second": 0.021, "num_input_tokens_seen": 558217164, "step": 3250 }, { "epoch": 0.8548694680081541, "eval_screenspot_CIoU": 0.6023598512013754, "eval_screenspot_GIoU": 0.5998436013857523, "eval_screenspot_IoU": 0.6310188174247742, "eval_screenspot_MAE_all": 0.06860506162047386, "eval_screenspot_MAE_h": 0.04429138886431853, "eval_screenspot_MAE_w": 0.11814649154742558, "eval_screenspot_MAE_x": 0.06877896686395009, "eval_screenspot_MAE_y": 0.043203407898545265, "eval_screenspot_NUM_probability": 0.9998593727747599, "eval_screenspot_inside_bbox": 0.8841666579246521, "eval_screenspot_loss": 0.8844305276870728, "eval_screenspot_loss_ce": 0.5899222294489542, "eval_screenspot_loss_iou": 0.4657796223958333, "eval_screenspot_loss_num": 0.057614644368489586, "eval_screenspot_loss_xval": 0.2879842122395833, "eval_screenspot_runtime": 153.6916, "eval_screenspot_samples_per_second": 0.579, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 558217164, "step": 3250 }, { "epoch": 0.8548694680081541, "loss": 0.8671462535858154, "loss_ce": 0.5839431285858154, "loss_iou": 0.404296875, "loss_num": 0.056640625, "loss_xval": 0.283203125, "num_input_tokens_seen": 558217164, "step": 3250 }, { "epoch": 0.8551325047675412, "grad_norm": 38.29206628356214, "learning_rate": 5e-06, "loss": 0.1148, "num_input_tokens_seen": 558389192, "step": 3251 }, { "epoch": 0.8551325047675412, "loss": 0.21791958808898926, "loss_ce": 0.0010311761870980263, "loss_iou": 0.5859375, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 558389192, "step": 3251 }, { "epoch": 0.8553955415269284, "grad_norm": 7.6502620366688525, "learning_rate": 5e-06, "loss": 0.1206, "num_input_tokens_seen": 558561204, "step": 3252 }, { "epoch": 0.8553955415269284, "loss": 0.16455963253974915, "loss_ce": 0.0034573215525597334, "loss_iou": 0.376953125, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 558561204, "step": 3252 }, { "epoch": 0.8556585782863155, "grad_norm": 7.815933727502156, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 558733140, "step": 3253 }, { "epoch": 0.8556585782863155, "loss": 0.118372842669487, "loss_ce": 0.0003461065352894366, "loss_iou": NaN, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 558733140, "step": 3253 }, { "epoch": 0.8559216150457026, "grad_norm": 26.03211500217736, "learning_rate": 5e-06, "loss": 0.0874, "num_input_tokens_seen": 558905528, "step": 3254 }, { "epoch": 0.8559216150457026, "loss": 0.0649479404091835, "loss_ce": 0.0013187924632802606, "loss_iou": 0.484375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 558905528, "step": 3254 }, { "epoch": 0.8561846518050897, "grad_norm": 3.695118212351963, "learning_rate": 5e-06, "loss": 0.0848, "num_input_tokens_seen": 559077788, "step": 3255 }, { "epoch": 0.8561846518050897, "loss": 0.05111432075500488, "loss_ce": 0.00036358798388391733, "loss_iou": 0.419921875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 559077788, "step": 3255 }, { "epoch": 0.8564476885644768, "grad_norm": 4.828867302993965, "learning_rate": 5e-06, "loss": 0.1156, "num_input_tokens_seen": 559249740, "step": 3256 }, { "epoch": 0.8564476885644768, "loss": 0.15594083070755005, "loss_ce": 0.00011808436829596758, "loss_iou": 0.279296875, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 559249740, "step": 3256 }, { "epoch": 0.856710725323864, "grad_norm": 4.4588084335736, "learning_rate": 5e-06, "loss": 0.0934, "num_input_tokens_seen": 559422008, "step": 3257 }, { "epoch": 0.856710725323864, "loss": 0.1171468049287796, "loss_ce": 0.005177808925509453, "loss_iou": 0.458984375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 559422008, "step": 3257 }, { "epoch": 0.8569737620832512, "grad_norm": 22.52084230262158, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 559594180, "step": 3258 }, { "epoch": 0.8569737620832512, "loss": 0.12303026020526886, "loss_ce": 0.00025803959579207003, "loss_iou": 0.49609375, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 559594180, "step": 3258 }, { "epoch": 0.8572367988426383, "grad_norm": 5.00194835125284, "learning_rate": 5e-06, "loss": 0.1458, "num_input_tokens_seen": 559766068, "step": 3259 }, { "epoch": 0.8572367988426383, "loss": 0.14650404453277588, "loss_ce": 0.0012403683504089713, "loss_iou": 0.29296875, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 559766068, "step": 3259 }, { "epoch": 0.8574998356020254, "grad_norm": 5.872594878613971, "learning_rate": 5e-06, "loss": 0.107, "num_input_tokens_seen": 559937984, "step": 3260 }, { "epoch": 0.8574998356020254, "loss": 0.14809638261795044, "loss_ce": 0.0009711501188576221, "loss_iou": 0.40234375, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 559937984, "step": 3260 }, { "epoch": 0.8577628723614125, "grad_norm": 7.89915891449399, "learning_rate": 5e-06, "loss": 0.1202, "num_input_tokens_seen": 560108224, "step": 3261 }, { "epoch": 0.8577628723614125, "loss": 0.12339873611927032, "loss_ce": 0.0029458554927259684, "loss_iou": 0.54296875, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 560108224, "step": 3261 }, { "epoch": 0.8580259091207997, "grad_norm": 5.069283331721027, "learning_rate": 5e-06, "loss": 0.151, "num_input_tokens_seen": 560280532, "step": 3262 }, { "epoch": 0.8580259091207997, "loss": 0.12928983569145203, "loss_ce": 0.004533977247774601, "loss_iou": 0.69921875, "loss_num": 0.02490234375, "loss_xval": 0.125, "num_input_tokens_seen": 560280532, "step": 3262 }, { "epoch": 0.8582889458801868, "grad_norm": 14.252906607697872, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 560452680, "step": 3263 }, { "epoch": 0.8582889458801868, "loss": 0.10280074179172516, "loss_ce": 0.0005515902303159237, "loss_iou": 0.341796875, "loss_num": 0.0205078125, "loss_xval": 0.10205078125, "num_input_tokens_seen": 560452680, "step": 3263 }, { "epoch": 0.8585519826395739, "grad_norm": 4.981476516908149, "learning_rate": 5e-06, "loss": 0.1031, "num_input_tokens_seen": 560625236, "step": 3264 }, { "epoch": 0.8585519826395739, "loss": 0.11402536928653717, "loss_ce": 0.0004999822122044861, "loss_iou": 0.453125, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 560625236, "step": 3264 }, { "epoch": 0.858815019398961, "grad_norm": 8.268610027664979, "learning_rate": 5e-06, "loss": 0.1138, "num_input_tokens_seen": 560797248, "step": 3265 }, { "epoch": 0.858815019398961, "loss": 0.13858602941036224, "loss_ce": 0.0017146880272775888, "loss_iou": 0.57421875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 560797248, "step": 3265 }, { "epoch": 0.8590780561583481, "grad_norm": 8.133733497165355, "learning_rate": 5e-06, "loss": 0.1637, "num_input_tokens_seen": 560969220, "step": 3266 }, { "epoch": 0.8590780561583481, "loss": 0.09112342447042465, "loss_ce": 0.00012000648712273687, "loss_iou": 0.59375, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 560969220, "step": 3266 }, { "epoch": 0.8593410929177353, "grad_norm": 3.6754936276624406, "learning_rate": 5e-06, "loss": 0.1416, "num_input_tokens_seen": 561141552, "step": 3267 }, { "epoch": 0.8593410929177353, "loss": 0.12336836755275726, "loss_ce": 0.0018015915993601084, "loss_iou": 0.443359375, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 561141552, "step": 3267 }, { "epoch": 0.8596041296771224, "grad_norm": 12.378662074106767, "learning_rate": 5e-06, "loss": 0.1558, "num_input_tokens_seen": 561313780, "step": 3268 }, { "epoch": 0.8596041296771224, "loss": 0.14065909385681152, "loss_ce": 0.0051305294036865234, "loss_iou": 0.6015625, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 561313780, "step": 3268 }, { "epoch": 0.8598671664365095, "grad_norm": 3.0561071778648485, "learning_rate": 5e-06, "loss": 0.103, "num_input_tokens_seen": 561486192, "step": 3269 }, { "epoch": 0.8598671664365095, "loss": 0.0891718789935112, "loss_ce": 0.003921023570001125, "loss_iou": 0.453125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 561486192, "step": 3269 }, { "epoch": 0.8601302031958966, "grad_norm": 4.439332180024113, "learning_rate": 5e-06, "loss": 0.1082, "num_input_tokens_seen": 561658324, "step": 3270 }, { "epoch": 0.8601302031958966, "loss": 0.0599624440073967, "loss_ce": 0.0006515316781587899, "loss_iou": 0.44140625, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 561658324, "step": 3270 }, { "epoch": 0.8603932399552837, "grad_norm": 5.0489240884621305, "learning_rate": 5e-06, "loss": 0.0889, "num_input_tokens_seen": 561829100, "step": 3271 }, { "epoch": 0.8603932399552837, "loss": 0.06052926927804947, "loss_ce": 0.0006232602754607797, "loss_iou": 0.408203125, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 561829100, "step": 3271 }, { "epoch": 0.8606562767146708, "grad_norm": 14.0110320202428, "learning_rate": 5e-06, "loss": 0.0965, "num_input_tokens_seen": 562001212, "step": 3272 }, { "epoch": 0.8606562767146708, "loss": 0.05530541390180588, "loss_ce": 0.0009230896248482168, "loss_iou": 0.59765625, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 562001212, "step": 3272 }, { "epoch": 0.860919313474058, "grad_norm": 5.01456561485217, "learning_rate": 5e-06, "loss": 0.1815, "num_input_tokens_seen": 562173524, "step": 3273 }, { "epoch": 0.860919313474058, "loss": 0.2010606825351715, "loss_ce": 0.002330208197236061, "loss_iou": 0.50390625, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 562173524, "step": 3273 }, { "epoch": 0.8611823502334451, "grad_norm": 9.881111822277372, "learning_rate": 5e-06, "loss": 0.1502, "num_input_tokens_seen": 562345824, "step": 3274 }, { "epoch": 0.8611823502334451, "loss": 0.106649249792099, "loss_ce": 0.0010584269184619188, "loss_iou": 0.59765625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 562345824, "step": 3274 }, { "epoch": 0.8614453869928322, "grad_norm": 4.536681210731165, "learning_rate": 5e-06, "loss": 0.1007, "num_input_tokens_seen": 562517872, "step": 3275 }, { "epoch": 0.8614453869928322, "loss": 0.07371491193771362, "loss_ce": 0.0001065141914295964, "loss_iou": 0.48046875, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 562517872, "step": 3275 }, { "epoch": 0.8617084237522193, "grad_norm": 4.65141834474055, "learning_rate": 5e-06, "loss": 0.1095, "num_input_tokens_seen": 562689796, "step": 3276 }, { "epoch": 0.8617084237522193, "loss": 0.09085643291473389, "loss_ce": 0.001287340302951634, "loss_iou": 0.55859375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 562689796, "step": 3276 }, { "epoch": 0.8619714605116064, "grad_norm": 8.721252840556735, "learning_rate": 5e-06, "loss": 0.1602, "num_input_tokens_seen": 562860476, "step": 3277 }, { "epoch": 0.8619714605116064, "loss": 0.1055789366364479, "loss_ce": 0.00273469858802855, "loss_iou": 0.5234375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 562860476, "step": 3277 }, { "epoch": 0.8622344972709937, "grad_norm": 4.218638673445259, "learning_rate": 5e-06, "loss": 0.0896, "num_input_tokens_seen": 563032560, "step": 3278 }, { "epoch": 0.8622344972709937, "loss": 0.10979656875133514, "loss_ce": 0.00536542059853673, "loss_iou": 0.609375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 563032560, "step": 3278 }, { "epoch": 0.8624975340303808, "grad_norm": 5.172843844259764, "learning_rate": 5e-06, "loss": 0.0986, "num_input_tokens_seen": 563204720, "step": 3279 }, { "epoch": 0.8624975340303808, "loss": 0.08518050611019135, "loss_ce": 0.0019438066519796848, "loss_iou": 0.458984375, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 563204720, "step": 3279 }, { "epoch": 0.8627605707897679, "grad_norm": 4.560338661852982, "learning_rate": 5e-06, "loss": 0.1002, "num_input_tokens_seen": 563377056, "step": 3280 }, { "epoch": 0.8627605707897679, "loss": 0.08894481509923935, "loss_ce": 0.0001691804500296712, "loss_iou": 0.578125, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 563377056, "step": 3280 }, { "epoch": 0.863023607549155, "grad_norm": 10.082366990468211, "learning_rate": 5e-06, "loss": 0.1264, "num_input_tokens_seen": 563549604, "step": 3281 }, { "epoch": 0.863023607549155, "loss": 0.1488599181175232, "loss_ce": 0.0033978780265897512, "loss_iou": 0.494140625, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 563549604, "step": 3281 }, { "epoch": 0.8632866443085421, "grad_norm": 4.832733016773012, "learning_rate": 5e-06, "loss": 0.081, "num_input_tokens_seen": 563721852, "step": 3282 }, { "epoch": 0.8632866443085421, "loss": 0.14535953104496002, "loss_ce": 0.0013928530970588326, "loss_iou": 0.46484375, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 563721852, "step": 3282 }, { "epoch": 0.8635496810679293, "grad_norm": 6.063993109800719, "learning_rate": 5e-06, "loss": 0.0887, "num_input_tokens_seen": 563893984, "step": 3283 }, { "epoch": 0.8635496810679293, "loss": 0.06962516903877258, "loss_ce": 0.0006554402643814683, "loss_iou": 0.4453125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 563893984, "step": 3283 }, { "epoch": 0.8638127178273164, "grad_norm": 4.985736636691599, "learning_rate": 5e-06, "loss": 0.1177, "num_input_tokens_seen": 564062972, "step": 3284 }, { "epoch": 0.8638127178273164, "loss": 0.11227120459079742, "loss_ce": 0.002926721004769206, "loss_iou": 0.625, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 564062972, "step": 3284 }, { "epoch": 0.8640757545867035, "grad_norm": 10.90397210954732, "learning_rate": 5e-06, "loss": 0.1087, "num_input_tokens_seen": 564235088, "step": 3285 }, { "epoch": 0.8640757545867035, "loss": 0.1431923508644104, "loss_ce": 0.0032692591194063425, "loss_iou": 0.51953125, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 564235088, "step": 3285 }, { "epoch": 0.8643387913460906, "grad_norm": 10.801058055973197, "learning_rate": 5e-06, "loss": 0.1202, "num_input_tokens_seen": 564407212, "step": 3286 }, { "epoch": 0.8643387913460906, "loss": 0.17996424436569214, "loss_ce": 0.005098515655845404, "loss_iou": 0.48828125, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 564407212, "step": 3286 }, { "epoch": 0.8646018281054777, "grad_norm": 3.7908688756477646, "learning_rate": 5e-06, "loss": 0.1763, "num_input_tokens_seen": 564579544, "step": 3287 }, { "epoch": 0.8646018281054777, "loss": 0.1904245764017105, "loss_ce": 0.0021311198361217976, "loss_iou": 0.455078125, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 564579544, "step": 3287 }, { "epoch": 0.8648648648648649, "grad_norm": 20.50727894197877, "learning_rate": 5e-06, "loss": 0.1134, "num_input_tokens_seen": 564751736, "step": 3288 }, { "epoch": 0.8648648648648649, "loss": 0.13853441178798676, "loss_ce": 0.006606926675885916, "loss_iou": 0.34765625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 564751736, "step": 3288 }, { "epoch": 0.865127901624252, "grad_norm": 7.515905813339519, "learning_rate": 5e-06, "loss": 0.1008, "num_input_tokens_seen": 564923836, "step": 3289 }, { "epoch": 0.865127901624252, "loss": 0.09346568584442139, "loss_ce": 0.0014246755745261908, "loss_iou": 0.453125, "loss_num": 0.0184326171875, "loss_xval": 0.091796875, "num_input_tokens_seen": 564923836, "step": 3289 }, { "epoch": 0.8653909383836391, "grad_norm": 4.659865141429364, "learning_rate": 5e-06, "loss": 0.0984, "num_input_tokens_seen": 565093036, "step": 3290 }, { "epoch": 0.8653909383836391, "loss": 0.05857858434319496, "loss_ce": 0.0009613969596102834, "loss_iou": 0.46875, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 565093036, "step": 3290 }, { "epoch": 0.8656539751430262, "grad_norm": 7.156818926737148, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 565264968, "step": 3291 }, { "epoch": 0.8656539751430262, "loss": 0.05671301484107971, "loss_ce": 0.0005454107304103673, "loss_iou": 0.4453125, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 565264968, "step": 3291 }, { "epoch": 0.8659170119024133, "grad_norm": 4.159578348893538, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 565437008, "step": 3292 }, { "epoch": 0.8659170119024133, "loss": 0.10393117368221283, "loss_ce": 0.0047185225412249565, "loss_iou": 0.380859375, "loss_num": 0.0198974609375, "loss_xval": 0.09912109375, "num_input_tokens_seen": 565437008, "step": 3292 }, { "epoch": 0.8661800486618005, "grad_norm": 5.125947407861983, "learning_rate": 5e-06, "loss": 0.0679, "num_input_tokens_seen": 565609352, "step": 3293 }, { "epoch": 0.8661800486618005, "loss": 0.07895916700363159, "loss_ce": 0.004221613518893719, "loss_iou": 0.51171875, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 565609352, "step": 3293 }, { "epoch": 0.8664430854211876, "grad_norm": 4.631562530302505, "learning_rate": 5e-06, "loss": 0.0987, "num_input_tokens_seen": 565781624, "step": 3294 }, { "epoch": 0.8664430854211876, "loss": 0.10047349333763123, "loss_ce": 0.0010014427825808525, "loss_iou": 0.609375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 565781624, "step": 3294 }, { "epoch": 0.8667061221805747, "grad_norm": 12.748430441696817, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 565954124, "step": 3295 }, { "epoch": 0.8667061221805747, "loss": 0.10027378797531128, "loss_ce": 0.0005728579708375037, "loss_iou": 0.478515625, "loss_num": 0.02001953125, "loss_xval": 0.099609375, "num_input_tokens_seen": 565954124, "step": 3295 }, { "epoch": 0.8669691589399618, "grad_norm": 8.307592437145553, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 566124916, "step": 3296 }, { "epoch": 0.8669691589399618, "loss": 0.10838001221418381, "loss_ce": 0.006359752267599106, "loss_iou": 0.61328125, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 566124916, "step": 3296 }, { "epoch": 0.867232195699349, "grad_norm": 4.778870142686005, "learning_rate": 5e-06, "loss": 0.1132, "num_input_tokens_seen": 566296912, "step": 3297 }, { "epoch": 0.867232195699349, "loss": 0.04819861054420471, "loss_ce": 0.00030127062927931547, "loss_iou": 0.45703125, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 566296912, "step": 3297 }, { "epoch": 0.867495232458736, "grad_norm": 7.859655636877698, "learning_rate": 5e-06, "loss": 0.1104, "num_input_tokens_seen": 566467596, "step": 3298 }, { "epoch": 0.867495232458736, "loss": 0.1346224993467331, "loss_ce": 0.0007113683386705816, "loss_iou": 0.57421875, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 566467596, "step": 3298 }, { "epoch": 0.8677582692181233, "grad_norm": 15.137416498429843, "learning_rate": 5e-06, "loss": 0.1106, "num_input_tokens_seen": 566639732, "step": 3299 }, { "epoch": 0.8677582692181233, "loss": 0.10433374345302582, "loss_ce": 0.00313745578750968, "loss_iou": 0.49609375, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 566639732, "step": 3299 }, { "epoch": 0.8680213059775104, "grad_norm": 2.9506634806748657, "learning_rate": 5e-06, "loss": 0.118, "num_input_tokens_seen": 566811792, "step": 3300 }, { "epoch": 0.8680213059775104, "loss": 0.07893712818622589, "loss_ce": 0.0004764424229506403, "loss_iou": 0.53125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 566811792, "step": 3300 }, { "epoch": 0.8682843427368975, "grad_norm": 21.308475468927227, "learning_rate": 5e-06, "loss": 0.1217, "num_input_tokens_seen": 566984152, "step": 3301 }, { "epoch": 0.8682843427368975, "loss": 0.07327578961849213, "loss_ce": 0.004519685637205839, "loss_iou": 0.41015625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 566984152, "step": 3301 }, { "epoch": 0.8685473794962846, "grad_norm": 4.854189489350901, "learning_rate": 5e-06, "loss": 0.0778, "num_input_tokens_seen": 567156660, "step": 3302 }, { "epoch": 0.8685473794962846, "loss": 0.05097039043903351, "loss_ce": 0.0012877746485173702, "loss_iou": 0.44140625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 567156660, "step": 3302 }, { "epoch": 0.8688104162556717, "grad_norm": 16.314064517801537, "learning_rate": 5e-06, "loss": 0.0994, "num_input_tokens_seen": 567328912, "step": 3303 }, { "epoch": 0.8688104162556717, "loss": 0.11892714351415634, "loss_ce": 0.0012513676192611456, "loss_iou": 0.5546875, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 567328912, "step": 3303 }, { "epoch": 0.8690734530150589, "grad_norm": 16.431940991025456, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 567501148, "step": 3304 }, { "epoch": 0.8690734530150589, "loss": 0.12847568094730377, "loss_ce": 8.82247113622725e-05, "loss_iou": 0.46875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 567501148, "step": 3304 }, { "epoch": 0.869336489774446, "grad_norm": 3.641251656159218, "learning_rate": 5e-06, "loss": 0.1218, "num_input_tokens_seen": 567673160, "step": 3305 }, { "epoch": 0.869336489774446, "loss": 0.2121918946504593, "loss_ce": 0.0007966216653585434, "loss_iou": 0.3984375, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 567673160, "step": 3305 }, { "epoch": 0.8695995265338331, "grad_norm": 27.660934774490972, "learning_rate": 5e-06, "loss": 0.1174, "num_input_tokens_seen": 567845644, "step": 3306 }, { "epoch": 0.8695995265338331, "loss": 0.07302013039588928, "loss_ce": 0.00043406913755461574, "loss_iou": 0.5078125, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 567845644, "step": 3306 }, { "epoch": 0.8698625632932202, "grad_norm": 5.415514477165426, "learning_rate": 5e-06, "loss": 0.1059, "num_input_tokens_seen": 568017832, "step": 3307 }, { "epoch": 0.8698625632932202, "loss": 0.14274545013904572, "loss_ce": 0.0012964779743924737, "loss_iou": 0.412109375, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 568017832, "step": 3307 }, { "epoch": 0.8701256000526073, "grad_norm": 6.457549289729974, "learning_rate": 5e-06, "loss": 0.1437, "num_input_tokens_seen": 568189908, "step": 3308 }, { "epoch": 0.8701256000526073, "loss": 0.17567481100559235, "loss_ce": 0.0026706610806286335, "loss_iou": 0.58203125, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 568189908, "step": 3308 }, { "epoch": 0.8703886368119945, "grad_norm": 4.776984151346558, "learning_rate": 5e-06, "loss": 0.0712, "num_input_tokens_seen": 568358640, "step": 3309 }, { "epoch": 0.8703886368119945, "loss": 0.050816282629966736, "loss_ce": 0.0006148651009425521, "loss_iou": 0.50390625, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 568358640, "step": 3309 }, { "epoch": 0.8706516735713816, "grad_norm": 8.715556903813102, "learning_rate": 5e-06, "loss": 0.0681, "num_input_tokens_seen": 568529372, "step": 3310 }, { "epoch": 0.8706516735713816, "loss": 0.055395372211933136, "loss_ce": 0.002645737724378705, "loss_iou": 0.546875, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 568529372, "step": 3310 }, { "epoch": 0.8709147103307687, "grad_norm": 4.571078001011392, "learning_rate": 5e-06, "loss": 0.0938, "num_input_tokens_seen": 568701592, "step": 3311 }, { "epoch": 0.8709147103307687, "loss": 0.07437048852443695, "loss_ce": 0.0011893401388078928, "loss_iou": 0.408203125, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 568701592, "step": 3311 }, { "epoch": 0.8711777470901558, "grad_norm": 5.629346035154349, "learning_rate": 5e-06, "loss": 0.0849, "num_input_tokens_seen": 568871992, "step": 3312 }, { "epoch": 0.8711777470901558, "loss": 0.08190266788005829, "loss_ce": 0.001977135892957449, "loss_iou": 0.58203125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 568871992, "step": 3312 }, { "epoch": 0.8714407838495429, "grad_norm": 5.163413374700219, "learning_rate": 5e-06, "loss": 0.0889, "num_input_tokens_seen": 569044108, "step": 3313 }, { "epoch": 0.8714407838495429, "loss": 0.08449655771255493, "loss_ce": 0.0003901080635841936, "loss_iou": 0.515625, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 569044108, "step": 3313 }, { "epoch": 0.8717038206089301, "grad_norm": 5.300317232023734, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 569216304, "step": 3314 }, { "epoch": 0.8717038206089301, "loss": 0.18054433166980743, "loss_ce": 9.388441685587168e-05, "loss_iou": 0.484375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 569216304, "step": 3314 }, { "epoch": 0.8719668573683172, "grad_norm": 5.650101854564751, "learning_rate": 5e-06, "loss": 0.101, "num_input_tokens_seen": 569388684, "step": 3315 }, { "epoch": 0.8719668573683172, "loss": 0.08428364247083664, "loss_ce": 0.0036256806924939156, "loss_iou": 0.37890625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 569388684, "step": 3315 }, { "epoch": 0.8722298941277044, "grad_norm": 6.227695364300726, "learning_rate": 5e-06, "loss": 0.1296, "num_input_tokens_seen": 569558992, "step": 3316 }, { "epoch": 0.8722298941277044, "loss": 0.18722307682037354, "loss_ce": 0.005307785701006651, "loss_iou": 0.55859375, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 569558992, "step": 3316 }, { "epoch": 0.8724929308870915, "grad_norm": 5.420469146952656, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 569731372, "step": 3317 }, { "epoch": 0.8724929308870915, "loss": 0.09945103526115417, "loss_ce": 0.002435657661408186, "loss_iou": 0.4921875, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 569731372, "step": 3317 }, { "epoch": 0.8727559676464786, "grad_norm": 4.496256221139891, "learning_rate": 5e-06, "loss": 0.0934, "num_input_tokens_seen": 569902092, "step": 3318 }, { "epoch": 0.8727559676464786, "loss": 0.09977222979068756, "loss_ce": 8.65593392518349e-05, "loss_iou": 0.5234375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 569902092, "step": 3318 }, { "epoch": 0.8730190044058658, "grad_norm": 3.818210026049398, "learning_rate": 5e-06, "loss": 0.0969, "num_input_tokens_seen": 570074040, "step": 3319 }, { "epoch": 0.8730190044058658, "loss": 0.11558879911899567, "loss_ce": 0.0015446072211489081, "loss_iou": 0.447265625, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 570074040, "step": 3319 }, { "epoch": 0.8732820411652529, "grad_norm": 48.02513571155541, "learning_rate": 5e-06, "loss": 0.1371, "num_input_tokens_seen": 570246368, "step": 3320 }, { "epoch": 0.8732820411652529, "loss": 0.1751258671283722, "loss_ce": 0.00022961836657486856, "loss_iou": 0.56640625, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 570246368, "step": 3320 }, { "epoch": 0.87354507792464, "grad_norm": 9.348108956967192, "learning_rate": 5e-06, "loss": 0.1217, "num_input_tokens_seen": 570418348, "step": 3321 }, { "epoch": 0.87354507792464, "loss": 0.1822582334280014, "loss_ce": 0.001411056611686945, "loss_iou": 0.53125, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 570418348, "step": 3321 }, { "epoch": 0.8738081146840271, "grad_norm": 10.70912004570317, "learning_rate": 5e-06, "loss": 0.1067, "num_input_tokens_seen": 570590896, "step": 3322 }, { "epoch": 0.8738081146840271, "loss": 0.1525057554244995, "loss_ce": 0.001626854995265603, "loss_iou": 0.439453125, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 570590896, "step": 3322 }, { "epoch": 0.8740711514434142, "grad_norm": 5.432623634439659, "learning_rate": 5e-06, "loss": 0.0846, "num_input_tokens_seen": 570763096, "step": 3323 }, { "epoch": 0.8740711514434142, "loss": 0.10707151144742966, "loss_ce": 0.00158750603441149, "loss_iou": 0.392578125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 570763096, "step": 3323 }, { "epoch": 0.8743341882028013, "grad_norm": 5.502873741651398, "learning_rate": 5e-06, "loss": 0.1645, "num_input_tokens_seen": 570935100, "step": 3324 }, { "epoch": 0.8743341882028013, "loss": 0.0433497279882431, "loss_ce": 0.0022425525821745396, "loss_iou": 0.625, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 570935100, "step": 3324 }, { "epoch": 0.8745972249621885, "grad_norm": 9.035108225672646, "learning_rate": 5e-06, "loss": 0.1216, "num_input_tokens_seen": 571107528, "step": 3325 }, { "epoch": 0.8745972249621885, "loss": 0.11922352015972137, "loss_ce": 0.008475230075418949, "loss_iou": 0.490234375, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 571107528, "step": 3325 }, { "epoch": 0.8748602617215756, "grad_norm": 4.9129107081845635, "learning_rate": 5e-06, "loss": 0.1196, "num_input_tokens_seen": 571279464, "step": 3326 }, { "epoch": 0.8748602617215756, "loss": 0.09204280376434326, "loss_ce": 0.0020769857801496983, "loss_iou": 0.640625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 571279464, "step": 3326 }, { "epoch": 0.8751232984809627, "grad_norm": 4.808896887234646, "learning_rate": 5e-06, "loss": 0.1168, "num_input_tokens_seen": 571451676, "step": 3327 }, { "epoch": 0.8751232984809627, "loss": 0.11273814737796783, "loss_ce": 0.00046397349797189236, "loss_iou": 0.439453125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 571451676, "step": 3327 }, { "epoch": 0.8753863352403498, "grad_norm": 4.264250577253754, "learning_rate": 5e-06, "loss": 0.0882, "num_input_tokens_seen": 571620552, "step": 3328 }, { "epoch": 0.8753863352403498, "loss": 0.06821378320455551, "loss_ce": 0.0043710097670555115, "loss_iou": 0.38671875, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 571620552, "step": 3328 }, { "epoch": 0.8756493719997369, "grad_norm": 19.986204474022973, "learning_rate": 5e-06, "loss": 0.1309, "num_input_tokens_seen": 571792724, "step": 3329 }, { "epoch": 0.8756493719997369, "loss": 0.10326668620109558, "loss_ce": 0.002741775708273053, "loss_iou": 0.5859375, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 571792724, "step": 3329 }, { "epoch": 0.8759124087591241, "grad_norm": 4.794277486673457, "learning_rate": 5e-06, "loss": 0.1107, "num_input_tokens_seen": 571964880, "step": 3330 }, { "epoch": 0.8759124087591241, "loss": 0.11237187683582306, "loss_ce": 0.00040288365562446415, "loss_iou": 0.5703125, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 571964880, "step": 3330 }, { "epoch": 0.8761754455185112, "grad_norm": 4.388563206165435, "learning_rate": 5e-06, "loss": 0.1008, "num_input_tokens_seen": 572137040, "step": 3331 }, { "epoch": 0.8761754455185112, "loss": 0.1180691048502922, "loss_ce": 0.003506115637719631, "loss_iou": 0.5625, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 572137040, "step": 3331 }, { "epoch": 0.8764384822778983, "grad_norm": 4.318891449603682, "learning_rate": 5e-06, "loss": 0.1065, "num_input_tokens_seen": 572309112, "step": 3332 }, { "epoch": 0.8764384822778983, "loss": 0.11371566355228424, "loss_ce": 0.0035472088493406773, "loss_iou": 0.51171875, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 572309112, "step": 3332 }, { "epoch": 0.8767015190372854, "grad_norm": 10.0048154448047, "learning_rate": 5e-06, "loss": 0.1344, "num_input_tokens_seen": 572481700, "step": 3333 }, { "epoch": 0.8767015190372854, "loss": 0.21858729422092438, "loss_ce": 0.0018819711403921247, "loss_iou": 0.42578125, "loss_num": 0.04345703125, "loss_xval": 0.216796875, "num_input_tokens_seen": 572481700, "step": 3333 }, { "epoch": 0.8769645557966725, "grad_norm": 20.86653328138894, "learning_rate": 5e-06, "loss": 0.1063, "num_input_tokens_seen": 572653588, "step": 3334 }, { "epoch": 0.8769645557966725, "loss": 0.1189296618103981, "loss_ce": 0.002413547597825527, "loss_iou": 0.392578125, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 572653588, "step": 3334 }, { "epoch": 0.8772275925560598, "grad_norm": 4.936814293101831, "learning_rate": 5e-06, "loss": 0.1146, "num_input_tokens_seen": 572825664, "step": 3335 }, { "epoch": 0.8772275925560598, "loss": 0.11313323676586151, "loss_ce": 0.002995288698002696, "loss_iou": 0.4765625, "loss_num": 0.02197265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 572825664, "step": 3335 }, { "epoch": 0.8774906293154469, "grad_norm": 45.48466563185734, "learning_rate": 5e-06, "loss": 0.117, "num_input_tokens_seen": 572997764, "step": 3336 }, { "epoch": 0.8774906293154469, "loss": 0.12880939245224, "loss_ce": 0.0020393752492964268, "loss_iou": 0.61328125, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 572997764, "step": 3336 }, { "epoch": 0.877753666074834, "grad_norm": 5.329772367724437, "learning_rate": 5e-06, "loss": 0.145, "num_input_tokens_seen": 573167692, "step": 3337 }, { "epoch": 0.877753666074834, "loss": 0.06320846080780029, "loss_ce": 0.0007694964297115803, "loss_iou": 0.4609375, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 573167692, "step": 3337 }, { "epoch": 0.8780167028342211, "grad_norm": 9.592551272557182, "learning_rate": 5e-06, "loss": 0.0773, "num_input_tokens_seen": 573340028, "step": 3338 }, { "epoch": 0.8780167028342211, "loss": 0.08143293112516403, "loss_ce": 0.0008054894860833883, "loss_iou": 0.57421875, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 573340028, "step": 3338 }, { "epoch": 0.8782797395936082, "grad_norm": 3.2912653441237016, "learning_rate": 5e-06, "loss": 0.0855, "num_input_tokens_seen": 573510240, "step": 3339 }, { "epoch": 0.8782797395936082, "loss": 0.11927518248558044, "loss_ce": 0.0026980361435562372, "loss_iou": 0.4296875, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 573510240, "step": 3339 }, { "epoch": 0.8785427763529954, "grad_norm": 6.074875569536377, "learning_rate": 5e-06, "loss": 0.1221, "num_input_tokens_seen": 573682136, "step": 3340 }, { "epoch": 0.8785427763529954, "loss": 0.14973485469818115, "loss_ce": 0.0035251472145318985, "loss_iou": 0.53125, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 573682136, "step": 3340 }, { "epoch": 0.8788058131123825, "grad_norm": 4.101578026279459, "learning_rate": 5e-06, "loss": 0.1127, "num_input_tokens_seen": 573854492, "step": 3341 }, { "epoch": 0.8788058131123825, "loss": 0.09308157861232758, "loss_ce": 0.0016814331756904721, "loss_iou": 0.57421875, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 573854492, "step": 3341 }, { "epoch": 0.8790688498717696, "grad_norm": 3.5139785635989704, "learning_rate": 5e-06, "loss": 0.1205, "num_input_tokens_seen": 574026312, "step": 3342 }, { "epoch": 0.8790688498717696, "loss": 0.12709392607212067, "loss_ce": 0.002124447375535965, "loss_iou": 0.498046875, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 574026312, "step": 3342 }, { "epoch": 0.8793318866311567, "grad_norm": 8.712064685299605, "learning_rate": 5e-06, "loss": 0.1039, "num_input_tokens_seen": 574198272, "step": 3343 }, { "epoch": 0.8793318866311567, "loss": 0.07467342913150787, "loss_ce": 0.0009124410571530461, "loss_iou": 0.443359375, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 574198272, "step": 3343 }, { "epoch": 0.8795949233905438, "grad_norm": 3.320527799830495, "learning_rate": 5e-06, "loss": 0.0674, "num_input_tokens_seen": 574370244, "step": 3344 }, { "epoch": 0.8795949233905438, "loss": 0.04423138499259949, "loss_ce": 0.00010297012340743095, "loss_iou": 0.486328125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 574370244, "step": 3344 }, { "epoch": 0.879857960149931, "grad_norm": 16.55083596075495, "learning_rate": 5e-06, "loss": 0.1285, "num_input_tokens_seen": 574542464, "step": 3345 }, { "epoch": 0.879857960149931, "loss": 0.05567498877644539, "loss_ce": 0.0007433480932377279, "loss_iou": 0.53125, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 574542464, "step": 3345 }, { "epoch": 0.8801209969093181, "grad_norm": 4.233870416516864, "learning_rate": 5e-06, "loss": 0.1519, "num_input_tokens_seen": 574714876, "step": 3346 }, { "epoch": 0.8801209969093181, "loss": 0.19878937304019928, "loss_ce": 0.0015847685281187296, "loss_iou": 0.443359375, "loss_num": 0.039306640625, "loss_xval": 0.197265625, "num_input_tokens_seen": 574714876, "step": 3346 }, { "epoch": 0.8803840336687052, "grad_norm": 28.780242588246683, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 574886952, "step": 3347 }, { "epoch": 0.8803840336687052, "loss": 0.11355656385421753, "loss_ce": 0.0028998262714594603, "loss_iou": 0.40625, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 574886952, "step": 3347 }, { "epoch": 0.8806470704280923, "grad_norm": 3.1096156375201027, "learning_rate": 5e-06, "loss": 0.0687, "num_input_tokens_seen": 575059096, "step": 3348 }, { "epoch": 0.8806470704280923, "loss": 0.049926742911338806, "loss_ce": 0.0002136088878614828, "loss_iou": 0.4921875, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 575059096, "step": 3348 }, { "epoch": 0.8809101071874794, "grad_norm": 11.92064516300679, "learning_rate": 5e-06, "loss": 0.1151, "num_input_tokens_seen": 575231232, "step": 3349 }, { "epoch": 0.8809101071874794, "loss": 0.10825909674167633, "loss_ce": 0.00019635571516118944, "loss_iou": 0.482421875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 575231232, "step": 3349 }, { "epoch": 0.8811731439468665, "grad_norm": 3.9827768470010754, "learning_rate": 5e-06, "loss": 0.1083, "num_input_tokens_seen": 575403592, "step": 3350 }, { "epoch": 0.8811731439468665, "loss": 0.11462774872779846, "loss_ce": 0.005161193665117025, "loss_iou": 0.70703125, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 575403592, "step": 3350 }, { "epoch": 0.8814361807062537, "grad_norm": 6.779641757995467, "learning_rate": 5e-06, "loss": 0.1017, "num_input_tokens_seen": 575574024, "step": 3351 }, { "epoch": 0.8814361807062537, "loss": 0.08754941821098328, "loss_ce": 0.0006353590288199484, "loss_iou": 0.3828125, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 575574024, "step": 3351 }, { "epoch": 0.8816992174656408, "grad_norm": 6.941492265981877, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 575746304, "step": 3352 }, { "epoch": 0.8816992174656408, "loss": 0.08934507519006729, "loss_ce": 0.0032244701869785786, "loss_iou": 0.40625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 575746304, "step": 3352 }, { "epoch": 0.8819622542250279, "grad_norm": 5.409664650483643, "learning_rate": 5e-06, "loss": 0.1231, "num_input_tokens_seen": 575918724, "step": 3353 }, { "epoch": 0.8819622542250279, "loss": 0.08703694492578506, "loss_ce": 0.0007637504604645073, "loss_iou": 0.482421875, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 575918724, "step": 3353 }, { "epoch": 0.882225290984415, "grad_norm": 7.600652328701811, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 576090812, "step": 3354 }, { "epoch": 0.882225290984415, "loss": 0.25224149227142334, "loss_ce": 0.00353849190287292, "loss_iou": 0.44140625, "loss_num": 0.0498046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 576090812, "step": 3354 }, { "epoch": 0.8824883277438021, "grad_norm": 8.07764479644441, "learning_rate": 5e-06, "loss": 0.124, "num_input_tokens_seen": 576262592, "step": 3355 }, { "epoch": 0.8824883277438021, "loss": 0.08115795254707336, "loss_ce": 0.0004999967059120536, "loss_iou": 0.5703125, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 576262592, "step": 3355 }, { "epoch": 0.8827513645031894, "grad_norm": 9.615405351973429, "learning_rate": 5e-06, "loss": 0.087, "num_input_tokens_seen": 576434568, "step": 3356 }, { "epoch": 0.8827513645031894, "loss": 0.08256545662879944, "loss_ce": 0.0005952401552349329, "loss_iou": 0.5703125, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 576434568, "step": 3356 }, { "epoch": 0.8830144012625765, "grad_norm": 4.944244594714697, "learning_rate": 5e-06, "loss": 0.1242, "num_input_tokens_seen": 576606792, "step": 3357 }, { "epoch": 0.8830144012625765, "loss": 0.05560879409313202, "loss_ce": 0.00032620219280943274, "loss_iou": 0.50390625, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 576606792, "step": 3357 }, { "epoch": 0.8832774380219636, "grad_norm": 3.905791245202154, "learning_rate": 5e-06, "loss": 0.0977, "num_input_tokens_seen": 576778952, "step": 3358 }, { "epoch": 0.8832774380219636, "loss": 0.05678252875804901, "loss_ce": 0.0012634244048967957, "loss_iou": 0.404296875, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 576778952, "step": 3358 }, { "epoch": 0.8835404747813507, "grad_norm": 3.3297332937077506, "learning_rate": 5e-06, "loss": 0.0826, "num_input_tokens_seen": 576951468, "step": 3359 }, { "epoch": 0.8835404747813507, "loss": 0.15261146426200867, "loss_ce": 0.0013968587154522538, "loss_iou": 0.447265625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 576951468, "step": 3359 }, { "epoch": 0.8838035115407378, "grad_norm": 7.82434473224169, "learning_rate": 5e-06, "loss": 0.0952, "num_input_tokens_seen": 577121952, "step": 3360 }, { "epoch": 0.8838035115407378, "loss": 0.10315507650375366, "loss_ce": 0.0030421605333685875, "loss_iou": 0.427734375, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 577121952, "step": 3360 }, { "epoch": 0.884066548300125, "grad_norm": 10.425535033707964, "learning_rate": 5e-06, "loss": 0.102, "num_input_tokens_seen": 577293848, "step": 3361 }, { "epoch": 0.884066548300125, "loss": 0.07322396337985992, "loss_ce": 0.0005005778511986136, "loss_iou": 0.546875, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 577293848, "step": 3361 }, { "epoch": 0.8843295850595121, "grad_norm": 14.476290505686679, "learning_rate": 5e-06, "loss": 0.1432, "num_input_tokens_seen": 577466084, "step": 3362 }, { "epoch": 0.8843295850595121, "loss": 0.1955878734588623, "loss_ce": 0.0021064176689833403, "loss_iou": 0.5234375, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 577466084, "step": 3362 }, { "epoch": 0.8845926218188992, "grad_norm": 7.100653555231675, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 577638416, "step": 3363 }, { "epoch": 0.8845926218188992, "loss": 0.051397159695625305, "loss_ce": 0.00037176761543378234, "loss_iou": 0.462890625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 577638416, "step": 3363 }, { "epoch": 0.8848556585782863, "grad_norm": 17.047045722794394, "learning_rate": 5e-06, "loss": 0.1388, "num_input_tokens_seen": 577810576, "step": 3364 }, { "epoch": 0.8848556585782863, "loss": 0.11024264991283417, "loss_ce": 0.0004404087667353451, "loss_iou": 0.482421875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 577810576, "step": 3364 }, { "epoch": 0.8851186953376734, "grad_norm": 6.6116170819185625, "learning_rate": 5e-06, "loss": 0.0941, "num_input_tokens_seen": 577982704, "step": 3365 }, { "epoch": 0.8851186953376734, "loss": 0.10162153840065002, "loss_ce": 0.003171829041093588, "loss_iou": 0.474609375, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 577982704, "step": 3365 }, { "epoch": 0.8853817320970606, "grad_norm": 8.013299917102648, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 578154696, "step": 3366 }, { "epoch": 0.8853817320970606, "loss": 0.09505804628133774, "loss_ce": 0.002620304934680462, "loss_iou": 0.458984375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 578154696, "step": 3366 }, { "epoch": 0.8856447688564477, "grad_norm": 34.03888154985408, "learning_rate": 5e-06, "loss": 0.0988, "num_input_tokens_seen": 578326940, "step": 3367 }, { "epoch": 0.8856447688564477, "loss": 0.10307721793651581, "loss_ce": 0.0007899247575551271, "loss_iou": 0.427734375, "loss_num": 0.0205078125, "loss_xval": 0.10205078125, "num_input_tokens_seen": 578326940, "step": 3367 }, { "epoch": 0.8859078056158348, "grad_norm": 5.847556882862909, "learning_rate": 5e-06, "loss": 0.1113, "num_input_tokens_seen": 578499332, "step": 3368 }, { "epoch": 0.8859078056158348, "loss": 0.14421170949935913, "loss_ce": 0.0017556664533913136, "loss_iou": 0.4609375, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 578499332, "step": 3368 }, { "epoch": 0.8861708423752219, "grad_norm": 6.636470819413435, "learning_rate": 5e-06, "loss": 0.1011, "num_input_tokens_seen": 578671272, "step": 3369 }, { "epoch": 0.8861708423752219, "loss": 0.128991961479187, "loss_ce": 0.0024660732597112656, "loss_iou": 0.421875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 578671272, "step": 3369 }, { "epoch": 0.886433879134609, "grad_norm": 17.54919755928276, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 578842196, "step": 3370 }, { "epoch": 0.886433879134609, "loss": 0.10547197610139847, "loss_ce": 0.0005067643942311406, "loss_iou": 0.5078125, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 578842196, "step": 3370 }, { "epoch": 0.8866969158939962, "grad_norm": 7.184938374175427, "learning_rate": 5e-06, "loss": 0.0907, "num_input_tokens_seen": 579014236, "step": 3371 }, { "epoch": 0.8866969158939962, "loss": 0.08108609914779663, "loss_ce": 0.0009774556383490562, "loss_iou": 0.439453125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 579014236, "step": 3371 }, { "epoch": 0.8869599526533833, "grad_norm": 6.863755671185031, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 579186596, "step": 3372 }, { "epoch": 0.8869599526533833, "loss": 0.1200333908200264, "loss_ce": 0.00034345051972195506, "loss_iou": 0.47265625, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 579186596, "step": 3372 }, { "epoch": 0.8872229894127704, "grad_norm": 7.255619753434164, "learning_rate": 5e-06, "loss": 0.1118, "num_input_tokens_seen": 579358828, "step": 3373 }, { "epoch": 0.8872229894127704, "loss": 0.11177671700716019, "loss_ce": 0.0016998156206682324, "loss_iou": 0.46484375, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 579358828, "step": 3373 }, { "epoch": 0.8874860261721575, "grad_norm": 4.645601347618675, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 579531144, "step": 3374 }, { "epoch": 0.8874860261721575, "loss": 0.13369636237621307, "loss_ce": 0.0010364485206082463, "loss_iou": 0.59765625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 579531144, "step": 3374 }, { "epoch": 0.8877490629315447, "grad_norm": 19.381762342400947, "learning_rate": 5e-06, "loss": 0.1611, "num_input_tokens_seen": 579703288, "step": 3375 }, { "epoch": 0.8877490629315447, "loss": 0.15896877646446228, "loss_ce": 0.0007961708470247686, "loss_iou": 0.2578125, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 579703288, "step": 3375 }, { "epoch": 0.8880120996909318, "grad_norm": 4.08558956843784, "learning_rate": 5e-06, "loss": 0.1289, "num_input_tokens_seen": 579875620, "step": 3376 }, { "epoch": 0.8880120996909318, "loss": 0.06248597800731659, "loss_ce": 0.0004437366151250899, "loss_iou": 0.58203125, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 579875620, "step": 3376 }, { "epoch": 0.888275136450319, "grad_norm": 4.308623737664192, "learning_rate": 5e-06, "loss": 0.0812, "num_input_tokens_seen": 580047776, "step": 3377 }, { "epoch": 0.888275136450319, "loss": 0.07517598569393158, "loss_ce": 0.0003468855866231024, "loss_iou": 0.48046875, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 580047776, "step": 3377 }, { "epoch": 0.8885381732097061, "grad_norm": 17.189878667773364, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 580219696, "step": 3378 }, { "epoch": 0.8885381732097061, "loss": 0.1588488519191742, "loss_ce": 0.00040158609044738114, "loss_iou": 0.46875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 580219696, "step": 3378 }, { "epoch": 0.8888012099690932, "grad_norm": 17.94232408746683, "learning_rate": 5e-06, "loss": 0.1435, "num_input_tokens_seen": 580389856, "step": 3379 }, { "epoch": 0.8888012099690932, "loss": 0.16451287269592285, "loss_ce": 0.002464542631059885, "loss_iou": 0.255859375, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 580389856, "step": 3379 }, { "epoch": 0.8890642467284803, "grad_norm": 24.21781414429754, "learning_rate": 5e-06, "loss": 0.0962, "num_input_tokens_seen": 580562140, "step": 3380 }, { "epoch": 0.8890642467284803, "loss": 0.15492644906044006, "loss_ce": 0.0021554557606577873, "loss_iou": 0.515625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 580562140, "step": 3380 }, { "epoch": 0.8893272834878674, "grad_norm": 10.546114100067816, "learning_rate": 5e-06, "loss": 0.1532, "num_input_tokens_seen": 580734572, "step": 3381 }, { "epoch": 0.8893272834878674, "loss": 0.10413020849227905, "loss_ce": 0.0011639007134363055, "loss_iou": 0.4453125, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 580734572, "step": 3381 }, { "epoch": 0.8895903202472546, "grad_norm": 6.233208405285995, "learning_rate": 5e-06, "loss": 0.1056, "num_input_tokens_seen": 580906660, "step": 3382 }, { "epoch": 0.8895903202472546, "loss": 0.14456871151924133, "loss_ce": 0.0018837791867554188, "loss_iou": 0.53515625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 580906660, "step": 3382 }, { "epoch": 0.8898533570066417, "grad_norm": 3.9495859544374516, "learning_rate": 5e-06, "loss": 0.0978, "num_input_tokens_seen": 581078540, "step": 3383 }, { "epoch": 0.8898533570066417, "loss": 0.13160666823387146, "loss_ce": 0.0018764439737424254, "loss_iou": 0.25390625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 581078540, "step": 3383 }, { "epoch": 0.8901163937660288, "grad_norm": 4.909648559933354, "learning_rate": 5e-06, "loss": 0.0663, "num_input_tokens_seen": 581247384, "step": 3384 }, { "epoch": 0.8901163937660288, "loss": 0.06679339706897736, "loss_ce": 0.00032610760536044836, "loss_iou": 0.58984375, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 581247384, "step": 3384 }, { "epoch": 0.8903794305254159, "grad_norm": 4.667075414121075, "learning_rate": 5e-06, "loss": 0.1195, "num_input_tokens_seen": 581419360, "step": 3385 }, { "epoch": 0.8903794305254159, "loss": 0.1276930421590805, "loss_ce": 0.0018690668512135744, "loss_iou": 0.5, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 581419360, "step": 3385 }, { "epoch": 0.890642467284803, "grad_norm": 4.483616610873386, "learning_rate": 5e-06, "loss": 0.0794, "num_input_tokens_seen": 581589952, "step": 3386 }, { "epoch": 0.890642467284803, "loss": 0.047724828124046326, "loss_ce": 0.00011740828631445765, "loss_iou": 0.50390625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 581589952, "step": 3386 }, { "epoch": 0.8909055040441902, "grad_norm": 21.859242645746537, "learning_rate": 5e-06, "loss": 0.1284, "num_input_tokens_seen": 581762156, "step": 3387 }, { "epoch": 0.8909055040441902, "loss": 0.18419209122657776, "loss_ce": 0.0005067941965535283, "loss_iou": 0.33984375, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 581762156, "step": 3387 }, { "epoch": 0.8911685408035773, "grad_norm": 8.665372364747144, "learning_rate": 5e-06, "loss": 0.0776, "num_input_tokens_seen": 581934152, "step": 3388 }, { "epoch": 0.8911685408035773, "loss": 0.08677740395069122, "loss_ce": 0.00022955110762268305, "loss_iou": 0.50390625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 581934152, "step": 3388 }, { "epoch": 0.8914315775629644, "grad_norm": 4.1210884311067835, "learning_rate": 5e-06, "loss": 0.1026, "num_input_tokens_seen": 582106144, "step": 3389 }, { "epoch": 0.8914315775629644, "loss": 0.13846494257450104, "loss_ce": 0.0010137634817510843, "loss_iou": 0.53125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 582106144, "step": 3389 }, { "epoch": 0.8916946143223515, "grad_norm": 11.500479931217354, "learning_rate": 5e-06, "loss": 0.1045, "num_input_tokens_seen": 582278400, "step": 3390 }, { "epoch": 0.8916946143223515, "loss": 0.12132581323385239, "loss_ce": 0.002826055744662881, "loss_iou": 0.5859375, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 582278400, "step": 3390 }, { "epoch": 0.8919576510817386, "grad_norm": 4.055853466494655, "learning_rate": 5e-06, "loss": 0.094, "num_input_tokens_seen": 582450564, "step": 3391 }, { "epoch": 0.8919576510817386, "loss": 0.0762665793299675, "loss_ce": 0.0018952443497255445, "loss_iou": 0.4453125, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 582450564, "step": 3391 }, { "epoch": 0.8922206878411258, "grad_norm": 8.665290308457982, "learning_rate": 5e-06, "loss": 0.1485, "num_input_tokens_seen": 582622336, "step": 3392 }, { "epoch": 0.8922206878411258, "loss": 0.16068054735660553, "loss_ce": 0.0011956822127103806, "loss_iou": 0.546875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 582622336, "step": 3392 }, { "epoch": 0.892483724600513, "grad_norm": 14.955097188866974, "learning_rate": 5e-06, "loss": 0.1142, "num_input_tokens_seen": 582794384, "step": 3393 }, { "epoch": 0.892483724600513, "loss": 0.08523661643266678, "loss_ce": 0.0010691368952393532, "loss_iou": 0.494140625, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 582794384, "step": 3393 }, { "epoch": 0.8927467613599, "grad_norm": 14.050392295123078, "learning_rate": 5e-06, "loss": 0.0769, "num_input_tokens_seen": 582966476, "step": 3394 }, { "epoch": 0.8927467613599, "loss": 0.11920854449272156, "loss_ce": 0.0003883557510562241, "loss_iou": 0.5234375, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 582966476, "step": 3394 }, { "epoch": 0.8930097981192872, "grad_norm": 3.5189407633589225, "learning_rate": 5e-06, "loss": 0.1337, "num_input_tokens_seen": 583138656, "step": 3395 }, { "epoch": 0.8930097981192872, "loss": 0.23967677354812622, "loss_ce": 0.0011361411307007074, "loss_iou": 0.27734375, "loss_num": 0.0478515625, "loss_xval": 0.23828125, "num_input_tokens_seen": 583138656, "step": 3395 }, { "epoch": 0.8932728348786743, "grad_norm": 4.239345260315017, "learning_rate": 5e-06, "loss": 0.0886, "num_input_tokens_seen": 583310580, "step": 3396 }, { "epoch": 0.8932728348786743, "loss": 0.045496270060539246, "loss_ce": 0.00010137087519979104, "loss_iou": 0.58203125, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 583310580, "step": 3396 }, { "epoch": 0.8935358716380615, "grad_norm": 4.318293293535835, "learning_rate": 5e-06, "loss": 0.1154, "num_input_tokens_seen": 583482984, "step": 3397 }, { "epoch": 0.8935358716380615, "loss": 0.11194127053022385, "loss_ce": 0.000826766830869019, "loss_iou": 0.44140625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 583482984, "step": 3397 }, { "epoch": 0.8937989083974486, "grad_norm": 4.660547370469876, "learning_rate": 5e-06, "loss": 0.15, "num_input_tokens_seen": 583655292, "step": 3398 }, { "epoch": 0.8937989083974486, "loss": 0.07680627703666687, "loss_ce": 0.00017663151083979756, "loss_iou": 0.451171875, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 583655292, "step": 3398 }, { "epoch": 0.8940619451568357, "grad_norm": 10.828455614089098, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 583827408, "step": 3399 }, { "epoch": 0.8940619451568357, "loss": 0.1173757016658783, "loss_ce": 3.5622346331365407e-05, "loss_iou": 0.42578125, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 583827408, "step": 3399 }, { "epoch": 0.8943249819162228, "grad_norm": 5.042018374187092, "learning_rate": 5e-06, "loss": 0.0892, "num_input_tokens_seen": 583999664, "step": 3400 }, { "epoch": 0.8943249819162228, "loss": 0.061997972428798676, "loss_ce": 0.000367723434465006, "loss_iou": 0.48046875, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 583999664, "step": 3400 }, { "epoch": 0.8945880186756099, "grad_norm": 26.304936486213094, "learning_rate": 5e-06, "loss": 0.1601, "num_input_tokens_seen": 584171616, "step": 3401 }, { "epoch": 0.8945880186756099, "loss": 0.2535288333892822, "loss_ce": 0.004078162834048271, "loss_iou": 0.34765625, "loss_num": 0.0498046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 584171616, "step": 3401 }, { "epoch": 0.894851055434997, "grad_norm": 4.926581765464391, "learning_rate": 5e-06, "loss": 0.1067, "num_input_tokens_seen": 584343844, "step": 3402 }, { "epoch": 0.894851055434997, "loss": 0.11322697252035141, "loss_ce": 0.0008307351381517947, "loss_iou": NaN, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 584343844, "step": 3402 }, { "epoch": 0.8951140921943842, "grad_norm": 7.363528943726032, "learning_rate": 5e-06, "loss": 0.0878, "num_input_tokens_seen": 584516156, "step": 3403 }, { "epoch": 0.8951140921943842, "loss": 0.08316925168037415, "loss_ce": 0.002114569302648306, "loss_iou": 0.37109375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 584516156, "step": 3403 }, { "epoch": 0.8953771289537713, "grad_norm": 4.344737704222575, "learning_rate": 5e-06, "loss": 0.0809, "num_input_tokens_seen": 584688680, "step": 3404 }, { "epoch": 0.8953771289537713, "loss": 0.061312295496463776, "loss_ce": 0.0012231803266331553, "loss_iou": 0.58203125, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 584688680, "step": 3404 }, { "epoch": 0.8956401657131584, "grad_norm": 9.026057514105759, "learning_rate": 5e-06, "loss": 0.1616, "num_input_tokens_seen": 584860880, "step": 3405 }, { "epoch": 0.8956401657131584, "loss": 0.09903927892446518, "loss_ce": 0.00197811983525753, "loss_iou": 0.52734375, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 584860880, "step": 3405 }, { "epoch": 0.8959032024725455, "grad_norm": 9.19169137185048, "learning_rate": 5e-06, "loss": 0.106, "num_input_tokens_seen": 585032800, "step": 3406 }, { "epoch": 0.8959032024725455, "loss": 0.06624776124954224, "loss_ce": 0.00032979153911583126, "loss_iou": 0.443359375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 585032800, "step": 3406 }, { "epoch": 0.8961662392319326, "grad_norm": 9.011492701681583, "learning_rate": 5e-06, "loss": 0.1281, "num_input_tokens_seen": 585203264, "step": 3407 }, { "epoch": 0.8961662392319326, "loss": 0.21042554080486298, "loss_ce": 0.002722906181588769, "loss_iou": 0.40234375, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 585203264, "step": 3407 }, { "epoch": 0.8964292759913198, "grad_norm": 17.979008103100615, "learning_rate": 5e-06, "loss": 0.093, "num_input_tokens_seen": 585375592, "step": 3408 }, { "epoch": 0.8964292759913198, "loss": 0.041413549333810806, "loss_ce": 0.0023205317556858063, "loss_iou": 0.62890625, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 585375592, "step": 3408 }, { "epoch": 0.8966923127507069, "grad_norm": 10.115194008381021, "learning_rate": 5e-06, "loss": 0.1074, "num_input_tokens_seen": 585546216, "step": 3409 }, { "epoch": 0.8966923127507069, "loss": 0.15392959117889404, "loss_ce": 0.0011891128960996866, "loss_iou": 0.447265625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 585546216, "step": 3409 }, { "epoch": 0.896955349510094, "grad_norm": 7.995711146610571, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 585718304, "step": 3410 }, { "epoch": 0.896955349510094, "loss": 0.05361251160502434, "loss_ce": 0.0008171012159436941, "loss_iou": 0.4921875, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 585718304, "step": 3410 }, { "epoch": 0.8972183862694811, "grad_norm": 4.960227305854337, "learning_rate": 5e-06, "loss": 0.1025, "num_input_tokens_seen": 585890728, "step": 3411 }, { "epoch": 0.8972183862694811, "loss": 0.08083316683769226, "loss_ce": 0.0013043570797890425, "loss_iou": 0.455078125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 585890728, "step": 3411 }, { "epoch": 0.8974814230288682, "grad_norm": 5.883674464139159, "learning_rate": 5e-06, "loss": 0.1462, "num_input_tokens_seen": 586062896, "step": 3412 }, { "epoch": 0.8974814230288682, "loss": 0.06225815415382385, "loss_ce": 0.003023534081876278, "loss_iou": 0.515625, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 586062896, "step": 3412 }, { "epoch": 0.8977444597882555, "grad_norm": 5.4958687199216625, "learning_rate": 5e-06, "loss": 0.1108, "num_input_tokens_seen": 586235628, "step": 3413 }, { "epoch": 0.8977444597882555, "loss": 0.08900677412748337, "loss_ce": 0.0012687351554632187, "loss_iou": 0.50390625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 586235628, "step": 3413 }, { "epoch": 0.8980074965476426, "grad_norm": 6.158695953111894, "learning_rate": 5e-06, "loss": 0.1416, "num_input_tokens_seen": 586407736, "step": 3414 }, { "epoch": 0.8980074965476426, "loss": 0.13948456943035126, "loss_ce": 0.0007364002522081137, "loss_iou": 0.5625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 586407736, "step": 3414 }, { "epoch": 0.8982705333070297, "grad_norm": 6.799680477506266, "learning_rate": 5e-06, "loss": 0.1057, "num_input_tokens_seen": 586579816, "step": 3415 }, { "epoch": 0.8982705333070297, "loss": 0.1316445767879486, "loss_ce": 0.0035775681026279926, "loss_iou": 0.447265625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 586579816, "step": 3415 }, { "epoch": 0.8985335700664168, "grad_norm": 4.757727392184302, "learning_rate": 5e-06, "loss": 0.1189, "num_input_tokens_seen": 586750092, "step": 3416 }, { "epoch": 0.8985335700664168, "loss": 0.1803445667028427, "loss_ce": 0.0017557005630806088, "loss_iou": 0.30078125, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 586750092, "step": 3416 }, { "epoch": 0.8987966068258039, "grad_norm": 4.7130808920215825, "learning_rate": 5e-06, "loss": 0.0988, "num_input_tokens_seen": 586920724, "step": 3417 }, { "epoch": 0.8987966068258039, "loss": 0.04298722371459007, "loss_ce": 0.0010102951200678945, "loss_iou": 0.39453125, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 586920724, "step": 3417 }, { "epoch": 0.8990596435851911, "grad_norm": 9.716073099163628, "learning_rate": 5e-06, "loss": 0.0909, "num_input_tokens_seen": 587093308, "step": 3418 }, { "epoch": 0.8990596435851911, "loss": 0.11212408542633057, "loss_ce": 0.0025659759994596243, "loss_iou": 0.78125, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 587093308, "step": 3418 }, { "epoch": 0.8993226803445782, "grad_norm": 9.72495754411886, "learning_rate": 5e-06, "loss": 0.0849, "num_input_tokens_seen": 587265328, "step": 3419 }, { "epoch": 0.8993226803445782, "loss": 0.09303727746009827, "loss_ce": 0.0017286788206547499, "loss_iou": 0.53125, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 587265328, "step": 3419 }, { "epoch": 0.8995857171039653, "grad_norm": 27.58546147063481, "learning_rate": 5e-06, "loss": 0.1334, "num_input_tokens_seen": 587435704, "step": 3420 }, { "epoch": 0.8995857171039653, "loss": 0.20503398776054382, "loss_ce": 0.0012986233923584223, "loss_iou": 0.28515625, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 587435704, "step": 3420 }, { "epoch": 0.8998487538633524, "grad_norm": 3.780121082337908, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 587605756, "step": 3421 }, { "epoch": 0.8998487538633524, "loss": 0.09629541635513306, "loss_ce": 0.0002413361653452739, "loss_iou": 0.435546875, "loss_num": 0.0191650390625, "loss_xval": 0.09619140625, "num_input_tokens_seen": 587605756, "step": 3421 }, { "epoch": 0.9001117906227395, "grad_norm": 4.905276205162047, "learning_rate": 5e-06, "loss": 0.1407, "num_input_tokens_seen": 587776128, "step": 3422 }, { "epoch": 0.9001117906227395, "loss": 0.22563423216342926, "loss_ce": 0.003191609401255846, "loss_iou": 0.462890625, "loss_num": 0.04443359375, "loss_xval": 0.22265625, "num_input_tokens_seen": 587776128, "step": 3422 }, { "epoch": 0.9003748273821266, "grad_norm": 4.24824425523058, "learning_rate": 5e-06, "loss": 0.0902, "num_input_tokens_seen": 587948348, "step": 3423 }, { "epoch": 0.9003748273821266, "loss": 0.04901735112071037, "loss_ce": 0.0007690612110309303, "loss_iou": 0.5, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 587948348, "step": 3423 }, { "epoch": 0.9006378641415138, "grad_norm": 14.901804223286693, "learning_rate": 5e-06, "loss": 0.0734, "num_input_tokens_seen": 588115160, "step": 3424 }, { "epoch": 0.9006378641415138, "loss": 0.0699070394039154, "loss_ce": 0.001684993039816618, "loss_iou": 0.515625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 588115160, "step": 3424 }, { "epoch": 0.9009009009009009, "grad_norm": 4.750314737648371, "learning_rate": 5e-06, "loss": 0.0881, "num_input_tokens_seen": 588287148, "step": 3425 }, { "epoch": 0.9009009009009009, "loss": 0.07479090988636017, "loss_ce": 0.006248427089303732, "loss_iou": 0.498046875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 588287148, "step": 3425 }, { "epoch": 0.901163937660288, "grad_norm": 3.9857635354375183, "learning_rate": 5e-06, "loss": 0.1155, "num_input_tokens_seen": 588457468, "step": 3426 }, { "epoch": 0.901163937660288, "loss": 0.04558904469013214, "loss_ce": 0.0004535481857601553, "loss_iou": 0.5546875, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 588457468, "step": 3426 }, { "epoch": 0.9014269744196751, "grad_norm": 7.929685479499518, "learning_rate": 5e-06, "loss": 0.1174, "num_input_tokens_seen": 588627832, "step": 3427 }, { "epoch": 0.9014269744196751, "loss": 0.11351916939020157, "loss_ce": 0.00022265504230745137, "loss_iou": 0.41796875, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 588627832, "step": 3427 }, { "epoch": 0.9016900111790622, "grad_norm": 5.274686939603064, "learning_rate": 5e-06, "loss": 0.1085, "num_input_tokens_seen": 588797844, "step": 3428 }, { "epoch": 0.9016900111790622, "loss": 0.07151903212070465, "loss_ce": 0.004655018448829651, "loss_iou": 0.2314453125, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 588797844, "step": 3428 }, { "epoch": 0.9019530479384494, "grad_norm": 5.241669176622665, "learning_rate": 5e-06, "loss": 0.1302, "num_input_tokens_seen": 588966836, "step": 3429 }, { "epoch": 0.9019530479384494, "loss": 0.12862293422222137, "loss_ce": 0.0008153152884915471, "loss_iou": 0.5546875, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 588966836, "step": 3429 }, { "epoch": 0.9022160846978365, "grad_norm": 7.584585654330743, "learning_rate": 5e-06, "loss": 0.1064, "num_input_tokens_seen": 589136916, "step": 3430 }, { "epoch": 0.9022160846978365, "loss": 0.1643849015235901, "loss_ce": 0.0007801597821526229, "loss_iou": 0.4609375, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 589136916, "step": 3430 }, { "epoch": 0.9024791214572236, "grad_norm": 3.532701818396997, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 589309164, "step": 3431 }, { "epoch": 0.9024791214572236, "loss": 0.077778160572052, "loss_ce": 0.0013621454127132893, "loss_iou": 0.48046875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 589309164, "step": 3431 }, { "epoch": 0.9027421582166107, "grad_norm": 7.674994076422982, "learning_rate": 5e-06, "loss": 0.1244, "num_input_tokens_seen": 589481132, "step": 3432 }, { "epoch": 0.9027421582166107, "loss": 0.09501226991415024, "loss_ce": 0.0008197662536986172, "loss_iou": 0.404296875, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 589481132, "step": 3432 }, { "epoch": 0.9030051949759978, "grad_norm": 6.592301292301914, "learning_rate": 5e-06, "loss": 0.0956, "num_input_tokens_seen": 589649864, "step": 3433 }, { "epoch": 0.9030051949759978, "loss": 0.06211673840880394, "loss_ce": 0.0013867560774087906, "loss_iou": 0.48828125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 589649864, "step": 3433 }, { "epoch": 0.9032682317353851, "grad_norm": 5.45319787734761, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 589822176, "step": 3434 }, { "epoch": 0.9032682317353851, "loss": 0.06987213343381882, "loss_ce": 0.0025808701757341623, "loss_iou": 0.58984375, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 589822176, "step": 3434 }, { "epoch": 0.9035312684947722, "grad_norm": 35.99527436418574, "learning_rate": 5e-06, "loss": 0.1639, "num_input_tokens_seen": 589992448, "step": 3435 }, { "epoch": 0.9035312684947722, "loss": 0.2300114631652832, "loss_ce": 0.0013890261761844158, "loss_iou": 0.66796875, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 589992448, "step": 3435 }, { "epoch": 0.9037943052541593, "grad_norm": 8.340774703127748, "learning_rate": 5e-06, "loss": 0.1065, "num_input_tokens_seen": 590162576, "step": 3436 }, { "epoch": 0.9037943052541593, "loss": 0.09980174899101257, "loss_ce": 0.001687736832536757, "loss_iou": 0.478515625, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 590162576, "step": 3436 }, { "epoch": 0.9040573420135464, "grad_norm": 3.893496744409823, "learning_rate": 5e-06, "loss": 0.1361, "num_input_tokens_seen": 590334768, "step": 3437 }, { "epoch": 0.9040573420135464, "loss": 0.14963126182556152, "loss_ce": 0.002658609300851822, "loss_iou": 0.443359375, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 590334768, "step": 3437 }, { "epoch": 0.9043203787729335, "grad_norm": 4.512105245241041, "learning_rate": 5e-06, "loss": 0.156, "num_input_tokens_seen": 590506964, "step": 3438 }, { "epoch": 0.9043203787729335, "loss": 0.2095954418182373, "loss_ce": 0.0010688342154026031, "loss_iou": 0.423828125, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 590506964, "step": 3438 }, { "epoch": 0.9045834155323207, "grad_norm": 10.79594707291913, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 590679132, "step": 3439 }, { "epoch": 0.9045834155323207, "loss": 0.056604690849781036, "loss_ce": 0.0010932188015431166, "loss_iou": 0.470703125, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 590679132, "step": 3439 }, { "epoch": 0.9048464522917078, "grad_norm": 7.232598771359226, "learning_rate": 5e-06, "loss": 0.0693, "num_input_tokens_seen": 590851488, "step": 3440 }, { "epoch": 0.9048464522917078, "loss": 0.07088696211576462, "loss_ce": 0.0023749994579702616, "loss_iou": 0.396484375, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 590851488, "step": 3440 }, { "epoch": 0.9051094890510949, "grad_norm": 5.761072356819141, "learning_rate": 5e-06, "loss": 0.0893, "num_input_tokens_seen": 591023672, "step": 3441 }, { "epoch": 0.9051094890510949, "loss": 0.05779781565070152, "loss_ce": 0.002149013802409172, "loss_iou": 0.48828125, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 591023672, "step": 3441 }, { "epoch": 0.905372525810482, "grad_norm": 5.464589960258587, "learning_rate": 5e-06, "loss": 0.0756, "num_input_tokens_seen": 591196064, "step": 3442 }, { "epoch": 0.905372525810482, "loss": 0.11280819773674011, "loss_ce": 0.004058802034705877, "loss_iou": 0.392578125, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 591196064, "step": 3442 }, { "epoch": 0.9056355625698691, "grad_norm": 10.0338321043045, "learning_rate": 5e-06, "loss": 0.1623, "num_input_tokens_seen": 591368108, "step": 3443 }, { "epoch": 0.9056355625698691, "loss": 0.11732570827007294, "loss_ce": 0.0005044231074862182, "loss_iou": 0.3671875, "loss_num": 0.0234375, "loss_xval": 0.11669921875, "num_input_tokens_seen": 591368108, "step": 3443 }, { "epoch": 0.9058985993292563, "grad_norm": 4.9322678909564805, "learning_rate": 5e-06, "loss": 0.1497, "num_input_tokens_seen": 591539968, "step": 3444 }, { "epoch": 0.9058985993292563, "loss": 0.10426676273345947, "loss_ce": 0.0025516818277537823, "loss_iou": 0.478515625, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 591539968, "step": 3444 }, { "epoch": 0.9061616360886434, "grad_norm": 10.20363728877728, "learning_rate": 5e-06, "loss": 0.1077, "num_input_tokens_seen": 591709020, "step": 3445 }, { "epoch": 0.9061616360886434, "loss": 0.049313947558403015, "loss_ce": 0.0002569416828919202, "loss_iou": 0.515625, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 591709020, "step": 3445 }, { "epoch": 0.9064246728480305, "grad_norm": 8.985092426433443, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 591881092, "step": 3446 }, { "epoch": 0.9064246728480305, "loss": 0.0865587666630745, "loss_ce": 0.0004686739994212985, "loss_iou": 0.462890625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 591881092, "step": 3446 }, { "epoch": 0.9066877096074176, "grad_norm": 27.73232811749845, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 592053524, "step": 3447 }, { "epoch": 0.9066877096074176, "loss": 0.10780518501996994, "loss_ce": 0.0006884862086735666, "loss_iou": 0.5, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 592053524, "step": 3447 }, { "epoch": 0.9069507463668047, "grad_norm": 11.824522545760333, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 592225468, "step": 3448 }, { "epoch": 0.9069507463668047, "loss": 0.0678405836224556, "loss_ce": 0.002670294838026166, "loss_iou": 0.546875, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 592225468, "step": 3448 }, { "epoch": 0.9072137831261918, "grad_norm": 5.549108113367569, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 592397784, "step": 3449 }, { "epoch": 0.9072137831261918, "loss": 0.06828893721103668, "loss_ce": 0.0015164725482463837, "loss_iou": 0.482421875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 592397784, "step": 3449 }, { "epoch": 0.907476819885579, "grad_norm": 34.02061333154032, "learning_rate": 5e-06, "loss": 0.1082, "num_input_tokens_seen": 592569716, "step": 3450 }, { "epoch": 0.907476819885579, "loss": 0.18558475375175476, "loss_ce": 0.002723418176174164, "loss_iou": 0.4296875, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 592569716, "step": 3450 }, { "epoch": 0.9077398566449661, "grad_norm": 8.419964301658604, "learning_rate": 5e-06, "loss": 0.1118, "num_input_tokens_seen": 592742332, "step": 3451 }, { "epoch": 0.9077398566449661, "loss": 0.10704197734594345, "loss_ce": 0.0033432499039918184, "loss_iou": 0.447265625, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 592742332, "step": 3451 }, { "epoch": 0.9080028934043533, "grad_norm": 6.543315599280218, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 592914548, "step": 3452 }, { "epoch": 0.9080028934043533, "loss": 0.1184120774269104, "loss_ce": 0.00665670819580555, "loss_iou": 0.482421875, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 592914548, "step": 3452 }, { "epoch": 0.9082659301637404, "grad_norm": 17.663325985728193, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 593086732, "step": 3453 }, { "epoch": 0.9082659301637404, "loss": 0.10038851201534271, "loss_ce": 0.0001993027253774926, "loss_iou": 0.48046875, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 593086732, "step": 3453 }, { "epoch": 0.9085289669231275, "grad_norm": 4.446508836333715, "learning_rate": 5e-06, "loss": 0.1466, "num_input_tokens_seen": 593257004, "step": 3454 }, { "epoch": 0.9085289669231275, "loss": 0.10031658411026001, "loss_ce": 0.0005546216852962971, "loss_iou": 0.51171875, "loss_num": 0.02001953125, "loss_xval": 0.099609375, "num_input_tokens_seen": 593257004, "step": 3454 }, { "epoch": 0.9087920036825147, "grad_norm": 33.88108016172298, "learning_rate": 5e-06, "loss": 0.1243, "num_input_tokens_seen": 593429128, "step": 3455 }, { "epoch": 0.9087920036825147, "loss": 0.08902574330568314, "loss_ce": 0.0005247698863968253, "loss_iou": 0.326171875, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 593429128, "step": 3455 }, { "epoch": 0.9090550404419018, "grad_norm": 5.647244857793579, "learning_rate": 5e-06, "loss": 0.1055, "num_input_tokens_seen": 593601516, "step": 3456 }, { "epoch": 0.9090550404419018, "loss": 0.05231146514415741, "loss_ce": 0.001621769741177559, "loss_iou": 0.42578125, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 593601516, "step": 3456 }, { "epoch": 0.9093180772012889, "grad_norm": 9.778948998175075, "learning_rate": 5e-06, "loss": 0.1366, "num_input_tokens_seen": 593773792, "step": 3457 }, { "epoch": 0.9093180772012889, "loss": 0.09074349701404572, "loss_ce": 0.0006250919541344047, "loss_iou": 0.3359375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 593773792, "step": 3457 }, { "epoch": 0.909581113960676, "grad_norm": 8.900238633139468, "learning_rate": 5e-06, "loss": 0.0842, "num_input_tokens_seen": 593945832, "step": 3458 }, { "epoch": 0.909581113960676, "loss": 0.10212016105651855, "loss_ce": 0.0015037069097161293, "loss_iou": 0.515625, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 593945832, "step": 3458 }, { "epoch": 0.9098441507200631, "grad_norm": 5.072537904030542, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 594118188, "step": 3459 }, { "epoch": 0.9098441507200631, "loss": 0.051810335367918015, "loss_ce": 0.0029516899958252907, "loss_iou": 0.41796875, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 594118188, "step": 3459 }, { "epoch": 0.9101071874794503, "grad_norm": 4.7625991934389305, "learning_rate": 5e-06, "loss": 0.0955, "num_input_tokens_seen": 594290712, "step": 3460 }, { "epoch": 0.9101071874794503, "loss": 0.07667580991983414, "loss_ce": 0.00038186419988051057, "loss_iou": 0.49609375, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 594290712, "step": 3460 }, { "epoch": 0.9103702242388374, "grad_norm": 10.907454103811546, "learning_rate": 5e-06, "loss": 0.1191, "num_input_tokens_seen": 594461168, "step": 3461 }, { "epoch": 0.9103702242388374, "loss": 0.07658274471759796, "loss_ce": 0.00019724905723705888, "loss_iou": 0.455078125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 594461168, "step": 3461 }, { "epoch": 0.9106332609982245, "grad_norm": 6.097275668896845, "learning_rate": 5e-06, "loss": 0.1622, "num_input_tokens_seen": 594633236, "step": 3462 }, { "epoch": 0.9106332609982245, "loss": 0.16042682528495789, "loss_ce": 0.0004689389606937766, "loss_iou": 0.43359375, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 594633236, "step": 3462 }, { "epoch": 0.9108962977576116, "grad_norm": 4.28871295829322, "learning_rate": 5e-06, "loss": 0.1378, "num_input_tokens_seen": 594805240, "step": 3463 }, { "epoch": 0.9108962977576116, "loss": 0.10126248002052307, "loss_ce": 0.0024465657770633698, "loss_iou": 0.3828125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 594805240, "step": 3463 }, { "epoch": 0.9111593345169987, "grad_norm": 29.617769917448427, "learning_rate": 5e-06, "loss": 0.1396, "num_input_tokens_seen": 594977116, "step": 3464 }, { "epoch": 0.9111593345169987, "loss": 0.08838079869747162, "loss_ce": 0.0014514753129333258, "loss_iou": 0.421875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 594977116, "step": 3464 }, { "epoch": 0.9114223712763859, "grad_norm": 12.912626025390432, "learning_rate": 5e-06, "loss": 0.1197, "num_input_tokens_seen": 595148996, "step": 3465 }, { "epoch": 0.9114223712763859, "loss": 0.05163312330842018, "loss_ce": 0.0015690373256802559, "loss_iou": 0.55859375, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 595148996, "step": 3465 }, { "epoch": 0.911685408035773, "grad_norm": 3.7162474056368, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 595321500, "step": 3466 }, { "epoch": 0.911685408035773, "loss": 0.12145687639713287, "loss_ce": 0.0029113469645380974, "loss_iou": 0.3828125, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 595321500, "step": 3466 }, { "epoch": 0.9119484447951601, "grad_norm": 26.611038275341258, "learning_rate": 5e-06, "loss": 0.0815, "num_input_tokens_seen": 595493996, "step": 3467 }, { "epoch": 0.9119484447951601, "loss": 0.08623991906642914, "loss_ce": 8.879496454028413e-05, "loss_iou": 0.369140625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 595493996, "step": 3467 }, { "epoch": 0.9122114815545472, "grad_norm": 3.8019745447441835, "learning_rate": 5e-06, "loss": 0.116, "num_input_tokens_seen": 595666232, "step": 3468 }, { "epoch": 0.9122114815545472, "loss": 0.10731638222932816, "loss_ce": 0.0009015857940539718, "loss_iou": 0.546875, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 595666232, "step": 3468 }, { "epoch": 0.9124745183139343, "grad_norm": 10.09490833472444, "learning_rate": 5e-06, "loss": 0.126, "num_input_tokens_seen": 595838532, "step": 3469 }, { "epoch": 0.9124745183139343, "loss": 0.16073833405971527, "loss_ce": 0.0017417498165741563, "loss_iou": 0.58984375, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 595838532, "step": 3469 }, { "epoch": 0.9127375550733215, "grad_norm": 2.9635391739801356, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 596010728, "step": 3470 }, { "epoch": 0.9127375550733215, "loss": 0.09987487643957138, "loss_ce": 0.0005096413660794497, "loss_iou": 0.5625, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 596010728, "step": 3470 }, { "epoch": 0.9130005918327087, "grad_norm": 5.819990486053208, "learning_rate": 5e-06, "loss": 0.077, "num_input_tokens_seen": 596183036, "step": 3471 }, { "epoch": 0.9130005918327087, "loss": 0.08611226826906204, "loss_ce": 0.0005867574363946915, "loss_iou": 0.4609375, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 596183036, "step": 3471 }, { "epoch": 0.9132636285920958, "grad_norm": 13.706964401152458, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 596353748, "step": 3472 }, { "epoch": 0.9132636285920958, "loss": 0.11669529974460602, "loss_ce": 0.007839101366698742, "loss_iou": 0.490234375, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 596353748, "step": 3472 }, { "epoch": 0.9135266653514829, "grad_norm": 12.86793443272329, "learning_rate": 5e-06, "loss": 0.0992, "num_input_tokens_seen": 596525804, "step": 3473 }, { "epoch": 0.9135266653514829, "loss": 0.14639021456241608, "loss_ce": 0.002057329285889864, "loss_iou": 0.5390625, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 596525804, "step": 3473 }, { "epoch": 0.91378970211087, "grad_norm": 3.549687957056835, "learning_rate": 5e-06, "loss": 0.1106, "num_input_tokens_seen": 596698176, "step": 3474 }, { "epoch": 0.91378970211087, "loss": 0.07896921038627625, "loss_ce": 0.0026447533164173365, "loss_iou": 0.37890625, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 596698176, "step": 3474 }, { "epoch": 0.9140527388702571, "grad_norm": 7.626577083859758, "learning_rate": 5e-06, "loss": 0.0851, "num_input_tokens_seen": 596870272, "step": 3475 }, { "epoch": 0.9140527388702571, "loss": 0.110136017203331, "loss_ce": 0.000928854919038713, "loss_iou": 0.453125, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 596870272, "step": 3475 }, { "epoch": 0.9143157756296443, "grad_norm": 4.749367923818828, "learning_rate": 5e-06, "loss": 0.1027, "num_input_tokens_seen": 597042624, "step": 3476 }, { "epoch": 0.9143157756296443, "loss": 0.1048622876405716, "loss_ce": 0.0006752688204869628, "loss_iou": 0.48046875, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 597042624, "step": 3476 }, { "epoch": 0.9145788123890314, "grad_norm": 7.997778290032009, "learning_rate": 5e-06, "loss": 0.0919, "num_input_tokens_seen": 597214744, "step": 3477 }, { "epoch": 0.9145788123890314, "loss": 0.0939270555973053, "loss_ce": 0.00036016173544339836, "loss_iou": 0.40234375, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 597214744, "step": 3477 }, { "epoch": 0.9148418491484185, "grad_norm": 9.635212836595876, "learning_rate": 5e-06, "loss": 0.0936, "num_input_tokens_seen": 597387080, "step": 3478 }, { "epoch": 0.9148418491484185, "loss": 0.12316415458917618, "loss_ce": 0.0010938385967165232, "loss_iou": 0.68359375, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 597387080, "step": 3478 }, { "epoch": 0.9151048859078056, "grad_norm": 3.963240318321649, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 597559392, "step": 3479 }, { "epoch": 0.9151048859078056, "loss": 0.05641660839319229, "loss_ce": 0.0005084058502689004, "loss_iou": 0.498046875, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 597559392, "step": 3479 }, { "epoch": 0.9153679226671927, "grad_norm": 3.4338009584504947, "learning_rate": 5e-06, "loss": 0.156, "num_input_tokens_seen": 597731528, "step": 3480 }, { "epoch": 0.9153679226671927, "loss": 0.09766215085983276, "loss_ce": 0.001623332966119051, "loss_iou": 0.443359375, "loss_num": 0.0191650390625, "loss_xval": 0.09619140625, "num_input_tokens_seen": 597731528, "step": 3480 }, { "epoch": 0.9156309594265799, "grad_norm": 7.619842626421138, "learning_rate": 5e-06, "loss": 0.0847, "num_input_tokens_seen": 597903520, "step": 3481 }, { "epoch": 0.9156309594265799, "loss": 0.04850924387574196, "loss_ce": 0.0030533126555383205, "loss_iou": 0.3984375, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 597903520, "step": 3481 }, { "epoch": 0.915893996185967, "grad_norm": 3.2410150384393765, "learning_rate": 5e-06, "loss": 0.0627, "num_input_tokens_seen": 598075560, "step": 3482 }, { "epoch": 0.915893996185967, "loss": 0.06452830880880356, "loss_ce": 0.00015147785597946495, "loss_iou": NaN, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 598075560, "step": 3482 }, { "epoch": 0.9161570329453541, "grad_norm": 3.618001442656621, "learning_rate": 5e-06, "loss": 0.1013, "num_input_tokens_seen": 598247796, "step": 3483 }, { "epoch": 0.9161570329453541, "loss": 0.07420238852500916, "loss_ce": 0.00047191951307468116, "loss_iou": 0.51171875, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 598247796, "step": 3483 }, { "epoch": 0.9164200697047412, "grad_norm": 10.045861260608898, "learning_rate": 5e-06, "loss": 0.0892, "num_input_tokens_seen": 598419940, "step": 3484 }, { "epoch": 0.9164200697047412, "loss": 0.1423761397600174, "loss_ce": 0.0001794886775314808, "loss_iou": NaN, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 598419940, "step": 3484 }, { "epoch": 0.9166831064641283, "grad_norm": 13.006322984837828, "learning_rate": 5e-06, "loss": 0.1031, "num_input_tokens_seen": 598592140, "step": 3485 }, { "epoch": 0.9166831064641283, "loss": 0.08574345707893372, "loss_ce": 0.0002637250581756234, "loss_iou": 0.6640625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 598592140, "step": 3485 }, { "epoch": 0.9169461432235155, "grad_norm": 7.24999783542604, "learning_rate": 5e-06, "loss": 0.0925, "num_input_tokens_seen": 598764500, "step": 3486 }, { "epoch": 0.9169461432235155, "loss": 0.0942230224609375, "loss_ce": 0.002975467825308442, "loss_iou": 0.41796875, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 598764500, "step": 3486 }, { "epoch": 0.9172091799829026, "grad_norm": 4.120990685906085, "learning_rate": 5e-06, "loss": 0.1011, "num_input_tokens_seen": 598936608, "step": 3487 }, { "epoch": 0.9172091799829026, "loss": 0.08670195192098618, "loss_ce": 0.0005203153123147786, "loss_iou": 0.6171875, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 598936608, "step": 3487 }, { "epoch": 0.9174722167422897, "grad_norm": 7.691079480924726, "learning_rate": 5e-06, "loss": 0.0946, "num_input_tokens_seen": 599109116, "step": 3488 }, { "epoch": 0.9174722167422897, "loss": 0.07726689428091049, "loss_ce": 0.0006982971681281924, "loss_iou": 0.48828125, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 599109116, "step": 3488 }, { "epoch": 0.9177352535016768, "grad_norm": 5.641902324014326, "learning_rate": 5e-06, "loss": 0.0943, "num_input_tokens_seen": 599281296, "step": 3489 }, { "epoch": 0.9177352535016768, "loss": 0.07277127355337143, "loss_ce": 0.0036947373300790787, "loss_iou": 0.375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 599281296, "step": 3489 }, { "epoch": 0.9179982902610639, "grad_norm": 5.439497936856968, "learning_rate": 5e-06, "loss": 0.1045, "num_input_tokens_seen": 599450632, "step": 3490 }, { "epoch": 0.9179982902610639, "loss": 0.08008137345314026, "loss_ce": 0.0042757089249789715, "loss_iou": 0.44140625, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 599450632, "step": 3490 }, { "epoch": 0.9182613270204512, "grad_norm": 3.272614734962344, "learning_rate": 5e-06, "loss": 0.1474, "num_input_tokens_seen": 599622648, "step": 3491 }, { "epoch": 0.9182613270204512, "loss": 0.21320411562919617, "loss_ce": 0.002327651483938098, "loss_iou": 0.296875, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 599622648, "step": 3491 }, { "epoch": 0.9185243637798383, "grad_norm": 4.968216314604122, "learning_rate": 5e-06, "loss": 0.0668, "num_input_tokens_seen": 599793016, "step": 3492 }, { "epoch": 0.9185243637798383, "loss": 0.05503164976835251, "loss_ce": 0.001076570013538003, "loss_iou": 0.435546875, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 599793016, "step": 3492 }, { "epoch": 0.9187874005392254, "grad_norm": 13.600408653665847, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 599965196, "step": 3493 }, { "epoch": 0.9187874005392254, "loss": 0.2306230664253235, "loss_ce": 0.002626231173053384, "loss_iou": 0.453125, "loss_num": 0.045654296875, "loss_xval": 0.2275390625, "num_input_tokens_seen": 599965196, "step": 3493 }, { "epoch": 0.9190504372986125, "grad_norm": 13.080198547497895, "learning_rate": 5e-06, "loss": 0.1038, "num_input_tokens_seen": 600137328, "step": 3494 }, { "epoch": 0.9190504372986125, "loss": 0.0880010575056076, "loss_ce": 0.0009954443667083979, "loss_iou": 0.486328125, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 600137328, "step": 3494 }, { "epoch": 0.9193134740579996, "grad_norm": 6.526249469256959, "learning_rate": 5e-06, "loss": 0.0958, "num_input_tokens_seen": 600309400, "step": 3495 }, { "epoch": 0.9193134740579996, "loss": 0.11348491907119751, "loss_ce": 0.000966615742072463, "loss_iou": 0.515625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 600309400, "step": 3495 }, { "epoch": 0.9195765108173868, "grad_norm": 8.360743966936655, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 600481616, "step": 3496 }, { "epoch": 0.9195765108173868, "loss": 0.09168469160795212, "loss_ce": 0.0012611029669642448, "loss_iou": 0.5390625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 600481616, "step": 3496 }, { "epoch": 0.9198395475767739, "grad_norm": 6.554644415896188, "learning_rate": 5e-06, "loss": 0.1573, "num_input_tokens_seen": 600653744, "step": 3497 }, { "epoch": 0.9198395475767739, "loss": 0.11767074465751648, "loss_ce": 0.0016429107636213303, "loss_iou": 0.59375, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 600653744, "step": 3497 }, { "epoch": 0.920102584336161, "grad_norm": 16.679597431920058, "learning_rate": 5e-06, "loss": 0.0954, "num_input_tokens_seen": 600824128, "step": 3498 }, { "epoch": 0.920102584336161, "loss": 0.14813083410263062, "loss_ce": 0.0007919695926830173, "loss_iou": NaN, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 600824128, "step": 3498 }, { "epoch": 0.9203656210955481, "grad_norm": 4.858084855581349, "learning_rate": 5e-06, "loss": 0.1519, "num_input_tokens_seen": 600996452, "step": 3499 }, { "epoch": 0.9203656210955481, "loss": 0.10890492051839828, "loss_ce": 0.0007811367395333946, "loss_iou": 0.5625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 600996452, "step": 3499 }, { "epoch": 0.9206286578549352, "grad_norm": 5.0714335035388896, "learning_rate": 5e-06, "loss": 0.1338, "num_input_tokens_seen": 601168700, "step": 3500 }, { "epoch": 0.9206286578549352, "eval_websight_new_CIoU": 0.8971402049064636, "eval_websight_new_GIoU": 0.8995657861232758, "eval_websight_new_IoU": 0.9006073176860809, "eval_websight_new_MAE_all": 0.013850971590727568, "eval_websight_new_MAE_h": 0.00682047987356782, "eval_websight_new_MAE_w": 0.02169650699943304, "eval_websight_new_MAE_x": 0.022679010406136513, "eval_websight_new_MAE_y": 0.004207887570373714, "eval_websight_new_NUM_probability": 0.9999927878379822, "eval_websight_new_inside_bbox": 1.0, "eval_websight_new_loss": 0.07072407752275467, "eval_websight_new_loss_ce": 9.538403446640586e-06, "eval_websight_new_loss_iou": 0.32720947265625, "eval_websight_new_loss_num": 0.012699127197265625, "eval_websight_new_loss_xval": 0.06354522705078125, "eval_websight_new_runtime": 56.2114, "eval_websight_new_samples_per_second": 0.889, "eval_websight_new_steps_per_second": 0.036, "num_input_tokens_seen": 601168700, "step": 3500 }, { "epoch": 0.9206286578549352, "eval_seeclick_CIoU": 0.6764970123767853, "eval_seeclick_GIoU": 0.6781544089317322, "eval_seeclick_IoU": 0.6978051662445068, "eval_seeclick_MAE_all": 0.040375180542469025, "eval_seeclick_MAE_h": 0.021458005532622337, "eval_seeclick_MAE_w": 0.05631308630108833, "eval_seeclick_MAE_x": 0.06287308409810066, "eval_seeclick_MAE_y": 0.02085655089467764, "eval_seeclick_NUM_probability": 0.9999865889549255, "eval_seeclick_inside_bbox": 0.953125, "eval_seeclick_loss": 0.17490381002426147, "eval_seeclick_loss_ce": 0.008978934027254581, "eval_seeclick_loss_iou": 0.46295166015625, "eval_seeclick_loss_num": 0.031360626220703125, "eval_seeclick_loss_xval": 0.1567230224609375, "eval_seeclick_runtime": 74.0396, "eval_seeclick_samples_per_second": 0.581, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 601168700, "step": 3500 }, { "epoch": 0.9206286578549352, "eval_icons_CIoU": 0.8881092965602875, "eval_icons_GIoU": 0.8860294818878174, "eval_icons_IoU": 0.8926480114459991, "eval_icons_MAE_all": 0.0159080708399415, "eval_icons_MAE_h": 0.020047838799655437, "eval_icons_MAE_w": 0.015083736274391413, "eval_icons_MAE_x": 0.013403147924691439, "eval_icons_MAE_y": 0.015097561292350292, "eval_icons_NUM_probability": 0.9999927878379822, "eval_icons_inside_bbox": 0.984375, "eval_icons_loss": 0.060060471296310425, "eval_icons_loss_ce": 9.6267791604987e-06, "eval_icons_loss_iou": 0.605712890625, "eval_icons_loss_num": 0.01105499267578125, "eval_icons_loss_xval": 0.05532073974609375, "eval_icons_runtime": 80.8235, "eval_icons_samples_per_second": 0.619, "eval_icons_steps_per_second": 0.025, "num_input_tokens_seen": 601168700, "step": 3500 }, { "epoch": 0.9206286578549352, "eval_screenspot_CIoU": 0.5590948661168417, "eval_screenspot_GIoU": 0.5591723521550497, "eval_screenspot_IoU": 0.5979611476262411, "eval_screenspot_MAE_all": 0.08105809738238652, "eval_screenspot_MAE_h": 0.05730322003364563, "eval_screenspot_MAE_w": 0.1365982194741567, "eval_screenspot_MAE_x": 0.0790914719303449, "eval_screenspot_MAE_y": 0.05123948057492574, "eval_screenspot_NUM_probability": 0.9999733567237854, "eval_screenspot_inside_bbox": 0.8841666579246521, "eval_screenspot_loss": 0.9397080540657043, "eval_screenspot_loss_ce": 0.5959697167078654, "eval_screenspot_loss_iou": 0.539306640625, "eval_screenspot_loss_num": 0.06738789876302083, "eval_screenspot_loss_xval": 0.3368326822916667, "eval_screenspot_runtime": 151.1273, "eval_screenspot_samples_per_second": 0.589, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 601168700, "step": 3500 }, { "epoch": 0.9206286578549352, "loss": 0.9230542182922363, "loss_ce": 0.5880932807922363, "loss_iou": 0.435546875, "loss_num": 0.06689453125, "loss_xval": 0.3359375, "num_input_tokens_seen": 601168700, "step": 3500 }, { "epoch": 0.9208916946143223, "grad_norm": 3.0758731887113147, "learning_rate": 5e-06, "loss": 0.0803, "num_input_tokens_seen": 601340840, "step": 3501 }, { "epoch": 0.9208916946143223, "loss": 0.07921823859214783, "loss_ce": 0.0005973259685561061, "loss_iou": 0.474609375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 601340840, "step": 3501 }, { "epoch": 0.9211547313737095, "grad_norm": 12.765305175609484, "learning_rate": 5e-06, "loss": 0.1272, "num_input_tokens_seen": 601513112, "step": 3502 }, { "epoch": 0.9211547313737095, "loss": 0.13090460002422333, "loss_ce": 0.002868090523406863, "loss_iou": 0.35546875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 601513112, "step": 3502 }, { "epoch": 0.9214177681330966, "grad_norm": 3.071857580938524, "learning_rate": 5e-06, "loss": 0.0751, "num_input_tokens_seen": 601685348, "step": 3503 }, { "epoch": 0.9214177681330966, "loss": 0.11101450026035309, "loss_ce": 0.002280366839841008, "loss_iou": 0.48046875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 601685348, "step": 3503 }, { "epoch": 0.9216808048924837, "grad_norm": 3.8727649532640296, "learning_rate": 5e-06, "loss": 0.0905, "num_input_tokens_seen": 601857460, "step": 3504 }, { "epoch": 0.9216808048924837, "loss": 0.0590723380446434, "loss_ce": 0.0022028274834156036, "loss_iou": 0.421875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 601857460, "step": 3504 }, { "epoch": 0.9219438416518708, "grad_norm": 12.18479200262375, "learning_rate": 5e-06, "loss": 0.0989, "num_input_tokens_seen": 602029992, "step": 3505 }, { "epoch": 0.9219438416518708, "loss": 0.12037432193756104, "loss_ce": 0.0029426885303109884, "loss_iou": 0.5625, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 602029992, "step": 3505 }, { "epoch": 0.9222068784112579, "grad_norm": 4.289786395943063, "learning_rate": 5e-06, "loss": 0.1091, "num_input_tokens_seen": 602202184, "step": 3506 }, { "epoch": 0.9222068784112579, "loss": 0.16438013315200806, "loss_ce": 0.00022608340077567846, "loss_iou": 0.453125, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 602202184, "step": 3506 }, { "epoch": 0.9224699151706451, "grad_norm": 27.091794274961455, "learning_rate": 5e-06, "loss": 0.097, "num_input_tokens_seen": 602372792, "step": 3507 }, { "epoch": 0.9224699151706451, "loss": 0.081387460231781, "loss_ce": 0.0013398483861237764, "loss_iou": 0.640625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 602372792, "step": 3507 }, { "epoch": 0.9227329519300322, "grad_norm": 8.942524792614973, "learning_rate": 5e-06, "loss": 0.0977, "num_input_tokens_seen": 602542048, "step": 3508 }, { "epoch": 0.9227329519300322, "loss": 0.11642280220985413, "loss_ce": 0.005247259978204966, "loss_iou": 0.52734375, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 602542048, "step": 3508 }, { "epoch": 0.9229959886894193, "grad_norm": 5.673853173524807, "learning_rate": 5e-06, "loss": 0.143, "num_input_tokens_seen": 602714148, "step": 3509 }, { "epoch": 0.9229959886894193, "loss": 0.22935867309570312, "loss_ce": 0.0008430513553321362, "loss_iou": 0.396484375, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 602714148, "step": 3509 }, { "epoch": 0.9232590254488064, "grad_norm": 4.438849137936148, "learning_rate": 5e-06, "loss": 0.1442, "num_input_tokens_seen": 602886264, "step": 3510 }, { "epoch": 0.9232590254488064, "loss": 0.1615859568119049, "loss_ce": 0.0012771158944815397, "loss_iou": 0.376953125, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 602886264, "step": 3510 }, { "epoch": 0.9235220622081936, "grad_norm": 4.872038444582204, "learning_rate": 5e-06, "loss": 0.1038, "num_input_tokens_seen": 603058732, "step": 3511 }, { "epoch": 0.9235220622081936, "loss": 0.13943278789520264, "loss_ce": 0.0005015181959606707, "loss_iou": 0.423828125, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 603058732, "step": 3511 }, { "epoch": 0.9237850989675808, "grad_norm": 4.92552213324199, "learning_rate": 5e-06, "loss": 0.1144, "num_input_tokens_seen": 603230928, "step": 3512 }, { "epoch": 0.9237850989675808, "loss": 0.14707276225090027, "loss_ce": 0.0023278831504285336, "loss_iou": 0.40234375, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 603230928, "step": 3512 }, { "epoch": 0.9240481357269679, "grad_norm": 8.817456239528582, "learning_rate": 5e-06, "loss": 0.0967, "num_input_tokens_seen": 603403300, "step": 3513 }, { "epoch": 0.9240481357269679, "loss": 0.12186002731323242, "loss_ce": 0.0018343898700550199, "loss_iou": 0.5625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 603403300, "step": 3513 }, { "epoch": 0.924311172486355, "grad_norm": 10.676170008596674, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 603575704, "step": 3514 }, { "epoch": 0.924311172486355, "loss": 0.15692317485809326, "loss_ce": 0.004670977126806974, "loss_iou": NaN, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 603575704, "step": 3514 }, { "epoch": 0.9245742092457421, "grad_norm": 11.658802940011123, "learning_rate": 5e-06, "loss": 0.0984, "num_input_tokens_seen": 603745852, "step": 3515 }, { "epoch": 0.9245742092457421, "loss": 0.08981953561306, "loss_ce": 0.00015888996131252497, "loss_iou": 0.53515625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 603745852, "step": 3515 }, { "epoch": 0.9248372460051292, "grad_norm": 11.699348951917594, "learning_rate": 5e-06, "loss": 0.1644, "num_input_tokens_seen": 603918360, "step": 3516 }, { "epoch": 0.9248372460051292, "loss": 0.1186264157295227, "loss_ce": 0.0009811592753976583, "loss_iou": 0.416015625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 603918360, "step": 3516 }, { "epoch": 0.9251002827645164, "grad_norm": 16.59746302136246, "learning_rate": 5e-06, "loss": 0.1447, "num_input_tokens_seen": 604090468, "step": 3517 }, { "epoch": 0.9251002827645164, "loss": 0.15999768674373627, "loss_ce": 0.0011536948150023818, "loss_iou": 0.5234375, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 604090468, "step": 3517 }, { "epoch": 0.9253633195239035, "grad_norm": 7.69976984803875, "learning_rate": 5e-06, "loss": 0.1235, "num_input_tokens_seen": 604262492, "step": 3518 }, { "epoch": 0.9253633195239035, "loss": 0.14189431071281433, "loss_ce": 0.006487809121608734, "loss_iou": 0.470703125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 604262492, "step": 3518 }, { "epoch": 0.9256263562832906, "grad_norm": 12.75000933447967, "learning_rate": 5e-06, "loss": 0.1099, "num_input_tokens_seen": 604432856, "step": 3519 }, { "epoch": 0.9256263562832906, "loss": 0.0861673578619957, "loss_ce": 0.004319215193390846, "loss_iou": 0.484375, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 604432856, "step": 3519 }, { "epoch": 0.9258893930426777, "grad_norm": 5.042427155046337, "learning_rate": 5e-06, "loss": 0.1163, "num_input_tokens_seen": 604605056, "step": 3520 }, { "epoch": 0.9258893930426777, "loss": 0.1412590742111206, "loss_ce": 0.002571945311501622, "loss_iou": 0.455078125, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 604605056, "step": 3520 }, { "epoch": 0.9261524298020648, "grad_norm": 5.26588732086772, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 604775372, "step": 3521 }, { "epoch": 0.9261524298020648, "loss": 0.05718105286359787, "loss_ce": 0.0014254315756261349, "loss_iou": 0.54296875, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 604775372, "step": 3521 }, { "epoch": 0.926415466561452, "grad_norm": 6.371747445648948, "learning_rate": 5e-06, "loss": 0.106, "num_input_tokens_seen": 604947376, "step": 3522 }, { "epoch": 0.926415466561452, "loss": 0.08819465339183807, "loss_ce": 0.0007312724483199418, "loss_iou": 0.322265625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 604947376, "step": 3522 }, { "epoch": 0.9266785033208391, "grad_norm": 10.505886717736445, "learning_rate": 5e-06, "loss": 0.1835, "num_input_tokens_seen": 605119568, "step": 3523 }, { "epoch": 0.9266785033208391, "loss": 0.17979584634304047, "loss_ce": 0.004075629636645317, "loss_iou": 0.609375, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 605119568, "step": 3523 }, { "epoch": 0.9269415400802262, "grad_norm": 6.455190531468335, "learning_rate": 5e-06, "loss": 0.1006, "num_input_tokens_seen": 605291524, "step": 3524 }, { "epoch": 0.9269415400802262, "loss": 0.08140784502029419, "loss_ce": 0.0016959276981651783, "loss_iou": 0.51171875, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 605291524, "step": 3524 }, { "epoch": 0.9272045768396133, "grad_norm": 14.745208066661506, "learning_rate": 5e-06, "loss": 0.1184, "num_input_tokens_seen": 605463808, "step": 3525 }, { "epoch": 0.9272045768396133, "loss": 0.11558607965707779, "loss_ce": 0.0017555101076141, "loss_iou": 0.51171875, "loss_num": 0.0228271484375, "loss_xval": 0.11376953125, "num_input_tokens_seen": 605463808, "step": 3525 }, { "epoch": 0.9274676135990004, "grad_norm": 3.8942399340921825, "learning_rate": 5e-06, "loss": 0.0847, "num_input_tokens_seen": 605635872, "step": 3526 }, { "epoch": 0.9274676135990004, "loss": 0.06215044856071472, "loss_ce": 0.00019976735347881913, "loss_iou": 0.546875, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 605635872, "step": 3526 }, { "epoch": 0.9277306503583875, "grad_norm": 4.175922010381311, "learning_rate": 5e-06, "loss": 0.1207, "num_input_tokens_seen": 605807804, "step": 3527 }, { "epoch": 0.9277306503583875, "loss": 0.16303026676177979, "loss_ce": 0.0018974501872435212, "loss_iou": 0.482421875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 605807804, "step": 3527 }, { "epoch": 0.9279936871177747, "grad_norm": 3.844069149031415, "learning_rate": 5e-06, "loss": 0.1016, "num_input_tokens_seen": 605979952, "step": 3528 }, { "epoch": 0.9279936871177747, "loss": 0.0688394084572792, "loss_ce": 0.0023721237666904926, "loss_iou": 0.4609375, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 605979952, "step": 3528 }, { "epoch": 0.9282567238771618, "grad_norm": 4.202436418234633, "learning_rate": 5e-06, "loss": 0.081, "num_input_tokens_seen": 606150360, "step": 3529 }, { "epoch": 0.9282567238771618, "loss": 0.04098789393901825, "loss_ce": 0.00038425601087510586, "loss_iou": 0.5, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 606150360, "step": 3529 }, { "epoch": 0.928519760636549, "grad_norm": 3.743613714946928, "learning_rate": 5e-06, "loss": 0.1016, "num_input_tokens_seen": 606320544, "step": 3530 }, { "epoch": 0.928519760636549, "loss": 0.12318438291549683, "loss_ce": 0.003891176311299205, "loss_iou": 0.357421875, "loss_num": 0.02392578125, "loss_xval": 0.119140625, "num_input_tokens_seen": 606320544, "step": 3530 }, { "epoch": 0.928782797395936, "grad_norm": 7.541725516396188, "learning_rate": 5e-06, "loss": 0.0936, "num_input_tokens_seen": 606492452, "step": 3531 }, { "epoch": 0.928782797395936, "loss": 0.05570812523365021, "loss_ce": 0.0006238996866159141, "loss_iou": 0.66796875, "loss_num": 0.010986328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 606492452, "step": 3531 }, { "epoch": 0.9290458341553232, "grad_norm": 7.283637997577956, "learning_rate": 5e-06, "loss": 0.1403, "num_input_tokens_seen": 606664904, "step": 3532 }, { "epoch": 0.9290458341553232, "loss": 0.2060985267162323, "loss_ce": 0.0019969542045146227, "loss_iou": 0.40625, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 606664904, "step": 3532 }, { "epoch": 0.9293088709147104, "grad_norm": 3.96510750197539, "learning_rate": 5e-06, "loss": 0.1007, "num_input_tokens_seen": 606836960, "step": 3533 }, { "epoch": 0.9293088709147104, "loss": 0.07297110557556152, "loss_ce": 0.002353430027142167, "loss_iou": 0.384765625, "loss_num": 0.01409912109375, "loss_xval": 0.07080078125, "num_input_tokens_seen": 606836960, "step": 3533 }, { "epoch": 0.9295719076740975, "grad_norm": 2.973198469624849, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 607009360, "step": 3534 }, { "epoch": 0.9295719076740975, "loss": 0.13494953513145447, "loss_ce": 0.0017403117381036282, "loss_iou": 0.4375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 607009360, "step": 3534 }, { "epoch": 0.9298349444334846, "grad_norm": 3.119729004971168, "learning_rate": 5e-06, "loss": 0.0664, "num_input_tokens_seen": 607181304, "step": 3535 }, { "epoch": 0.9298349444334846, "loss": 0.06605780124664307, "loss_ce": 0.002443910576403141, "loss_iou": 0.421875, "loss_num": 0.01275634765625, "loss_xval": 0.0634765625, "num_input_tokens_seen": 607181304, "step": 3535 }, { "epoch": 0.9300979811928717, "grad_norm": 4.352773383034863, "learning_rate": 5e-06, "loss": 0.0763, "num_input_tokens_seen": 607353736, "step": 3536 }, { "epoch": 0.9300979811928717, "loss": 0.06802303344011307, "loss_ce": 0.001021688454784453, "loss_iou": 0.4296875, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 607353736, "step": 3536 }, { "epoch": 0.9303610179522588, "grad_norm": 3.9578360400969794, "learning_rate": 5e-06, "loss": 0.0684, "num_input_tokens_seen": 607525920, "step": 3537 }, { "epoch": 0.9303610179522588, "loss": 0.06541258096694946, "loss_ce": 0.00288206129334867, "loss_iou": 0.5703125, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 607525920, "step": 3537 }, { "epoch": 0.930624054711646, "grad_norm": 4.492300233458511, "learning_rate": 5e-06, "loss": 0.0889, "num_input_tokens_seen": 607697980, "step": 3538 }, { "epoch": 0.930624054711646, "loss": 0.09697936475276947, "loss_ce": 0.0013983015669509768, "loss_iou": 0.53515625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 607697980, "step": 3538 }, { "epoch": 0.9308870914710331, "grad_norm": 4.302295585236548, "learning_rate": 5e-06, "loss": 0.1081, "num_input_tokens_seen": 607870144, "step": 3539 }, { "epoch": 0.9308870914710331, "loss": 0.10722782462835312, "loss_ce": 0.000843545887619257, "loss_iou": 0.49609375, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 607870144, "step": 3539 }, { "epoch": 0.9311501282304202, "grad_norm": 7.0499450082503365, "learning_rate": 5e-06, "loss": 0.0964, "num_input_tokens_seen": 608042240, "step": 3540 }, { "epoch": 0.9311501282304202, "loss": 0.1015363559126854, "loss_ce": 0.006443582940846682, "loss_iou": 0.44140625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 608042240, "step": 3540 }, { "epoch": 0.9314131649898073, "grad_norm": 6.586834009281099, "learning_rate": 5e-06, "loss": 0.1204, "num_input_tokens_seen": 608214284, "step": 3541 }, { "epoch": 0.9314131649898073, "loss": 0.12908074259757996, "loss_ce": 0.004782632924616337, "loss_iou": 0.59765625, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 608214284, "step": 3541 }, { "epoch": 0.9316762017491944, "grad_norm": 4.53807614520661, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 608386616, "step": 3542 }, { "epoch": 0.9316762017491944, "loss": 0.1289425790309906, "loss_ce": 0.0027523916214704514, "loss_iou": 0.458984375, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 608386616, "step": 3542 }, { "epoch": 0.9319392385085816, "grad_norm": 7.59401410995318, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 608558468, "step": 3543 }, { "epoch": 0.9319392385085816, "loss": 0.16303950548171997, "loss_ce": 0.00520259328186512, "loss_iou": 0.4375, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 608558468, "step": 3543 }, { "epoch": 0.9322022752679687, "grad_norm": 8.796770729596103, "learning_rate": 5e-06, "loss": 0.1311, "num_input_tokens_seen": 608730916, "step": 3544 }, { "epoch": 0.9322022752679687, "loss": 0.11866636574268341, "loss_ce": 0.0005633389810100198, "loss_iou": 0.341796875, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 608730916, "step": 3544 }, { "epoch": 0.9324653120273558, "grad_norm": 5.984167827436451, "learning_rate": 5e-06, "loss": 0.1218, "num_input_tokens_seen": 608902956, "step": 3545 }, { "epoch": 0.9324653120273558, "loss": 0.09633171558380127, "loss_ce": 0.000903245760127902, "loss_iou": 0.55078125, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 608902956, "step": 3545 }, { "epoch": 0.9327283487867429, "grad_norm": 13.864562508217144, "learning_rate": 5e-06, "loss": 0.1121, "num_input_tokens_seen": 609072616, "step": 3546 }, { "epoch": 0.9327283487867429, "loss": 0.11617599427700043, "loss_ce": 0.0005448899464681745, "loss_iou": 0.54296875, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 609072616, "step": 3546 }, { "epoch": 0.93299138554613, "grad_norm": 7.250119178294138, "learning_rate": 5e-06, "loss": 0.0744, "num_input_tokens_seen": 609244840, "step": 3547 }, { "epoch": 0.93299138554613, "loss": 0.08952006697654724, "loss_ce": 0.001415827078744769, "loss_iou": 0.498046875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 609244840, "step": 3547 }, { "epoch": 0.9332544223055173, "grad_norm": 8.965223152196344, "learning_rate": 5e-06, "loss": 0.0799, "num_input_tokens_seen": 609417116, "step": 3548 }, { "epoch": 0.9332544223055173, "loss": 0.058124981820583344, "loss_ce": 0.0005077911773696542, "loss_iou": 0.4921875, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 609417116, "step": 3548 }, { "epoch": 0.9335174590649044, "grad_norm": 4.578416828961302, "learning_rate": 5e-06, "loss": 0.1283, "num_input_tokens_seen": 609589092, "step": 3549 }, { "epoch": 0.9335174590649044, "loss": 0.06900876015424728, "loss_ce": 0.0006951588438823819, "loss_iou": 0.671875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 609589092, "step": 3549 }, { "epoch": 0.9337804958242915, "grad_norm": 8.028931966527075, "learning_rate": 5e-06, "loss": 0.1254, "num_input_tokens_seen": 609761432, "step": 3550 }, { "epoch": 0.9337804958242915, "loss": 0.10969488322734833, "loss_ce": 0.0008691949769854546, "loss_iou": 0.32421875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 609761432, "step": 3550 }, { "epoch": 0.9340435325836786, "grad_norm": 3.0608586378113225, "learning_rate": 5e-06, "loss": 0.097, "num_input_tokens_seen": 609933500, "step": 3551 }, { "epoch": 0.9340435325836786, "loss": 0.1336784064769745, "loss_ce": 0.003475167090073228, "loss_iou": 0.5546875, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 609933500, "step": 3551 }, { "epoch": 0.9343065693430657, "grad_norm": 6.227248956666904, "learning_rate": 5e-06, "loss": 0.0903, "num_input_tokens_seen": 610105792, "step": 3552 }, { "epoch": 0.9343065693430657, "loss": 0.08306320756673813, "loss_ce": 0.007104953285306692, "loss_iou": 0.453125, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 610105792, "step": 3552 }, { "epoch": 0.9345696061024528, "grad_norm": 5.469597791438911, "learning_rate": 5e-06, "loss": 0.0938, "num_input_tokens_seen": 610277880, "step": 3553 }, { "epoch": 0.9345696061024528, "loss": 0.08409252762794495, "loss_ce": 0.002595331287011504, "loss_iou": 0.42578125, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 610277880, "step": 3553 }, { "epoch": 0.93483264286184, "grad_norm": 4.1047405914838455, "learning_rate": 5e-06, "loss": 0.0981, "num_input_tokens_seen": 610449924, "step": 3554 }, { "epoch": 0.93483264286184, "loss": 0.10268253833055496, "loss_ce": 0.0010895198211073875, "loss_iou": 0.5, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 610449924, "step": 3554 }, { "epoch": 0.9350956796212271, "grad_norm": 4.649820698561932, "learning_rate": 5e-06, "loss": 0.095, "num_input_tokens_seen": 610622052, "step": 3555 }, { "epoch": 0.9350956796212271, "loss": 0.08575969934463501, "loss_ce": 0.0018668812699615955, "loss_iou": 0.337890625, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 610622052, "step": 3555 }, { "epoch": 0.9353587163806142, "grad_norm": 9.738163582969452, "learning_rate": 5e-06, "loss": 0.0872, "num_input_tokens_seen": 610794168, "step": 3556 }, { "epoch": 0.9353587163806142, "loss": 0.17649231851100922, "loss_ce": 0.004281637258827686, "loss_iou": 0.423828125, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 610794168, "step": 3556 }, { "epoch": 0.9356217531400013, "grad_norm": 60.60865892554417, "learning_rate": 5e-06, "loss": 0.1377, "num_input_tokens_seen": 610964304, "step": 3557 }, { "epoch": 0.9356217531400013, "loss": 0.21576589345932007, "loss_ce": 0.01731007918715477, "loss_iou": 0.390625, "loss_num": 0.039794921875, "loss_xval": 0.1982421875, "num_input_tokens_seen": 610964304, "step": 3557 }, { "epoch": 0.9358847898993884, "grad_norm": 26.41744180605267, "learning_rate": 5e-06, "loss": 0.278, "num_input_tokens_seen": 611136572, "step": 3558 }, { "epoch": 0.9358847898993884, "loss": 0.2503662705421448, "loss_ce": 0.15902717411518097, "loss_iou": 0.515625, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 611136572, "step": 3558 }, { "epoch": 0.9361478266587756, "grad_norm": 31.460812663910158, "learning_rate": 5e-06, "loss": 0.1477, "num_input_tokens_seen": 611308712, "step": 3559 }, { "epoch": 0.9361478266587756, "loss": 0.10697901993989944, "loss_ce": 0.014724383130669594, "loss_iou": 0.59765625, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 611308712, "step": 3559 }, { "epoch": 0.9364108634181627, "grad_norm": 125.0152995081179, "learning_rate": 5e-06, "loss": 0.2523, "num_input_tokens_seen": 611481064, "step": 3560 }, { "epoch": 0.9364108634181627, "loss": 0.19752028584480286, "loss_ce": 0.055674582719802856, "loss_iou": 0.6484375, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 611481064, "step": 3560 }, { "epoch": 0.9366739001775498, "grad_norm": 42.388335248969774, "learning_rate": 5e-06, "loss": 0.6327, "num_input_tokens_seen": 611653432, "step": 3561 }, { "epoch": 0.9366739001775498, "loss": 0.6363104581832886, "loss_ce": 0.5759619474411011, "loss_iou": 0.546875, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 611653432, "step": 3561 }, { "epoch": 0.9369369369369369, "grad_norm": 34.483421318180156, "learning_rate": 5e-06, "loss": 0.1558, "num_input_tokens_seen": 611825552, "step": 3562 }, { "epoch": 0.9369369369369369, "loss": 0.10395447909832001, "loss_ce": 0.05541627109050751, "loss_iou": 0.435546875, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 611825552, "step": 3562 }, { "epoch": 0.937199973696324, "grad_norm": 3.5466208788177513, "learning_rate": 5e-06, "loss": 0.0777, "num_input_tokens_seen": 611997960, "step": 3563 }, { "epoch": 0.937199973696324, "loss": 0.1155683621764183, "loss_ce": 0.0075666578486561775, "loss_iou": 0.47265625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 611997960, "step": 3563 }, { "epoch": 0.9374630104557112, "grad_norm": 6.556188606634138, "learning_rate": 5e-06, "loss": 0.0901, "num_input_tokens_seen": 612170180, "step": 3564 }, { "epoch": 0.9374630104557112, "loss": 0.0452946312725544, "loss_ce": 0.0018070839578285813, "loss_iou": 0.455078125, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 612170180, "step": 3564 }, { "epoch": 0.9377260472150983, "grad_norm": 12.949003378413929, "learning_rate": 5e-06, "loss": 0.1504, "num_input_tokens_seen": 612342360, "step": 3565 }, { "epoch": 0.9377260472150983, "loss": 0.16198524832725525, "loss_ce": 0.001249166438356042, "loss_iou": 0.625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 612342360, "step": 3565 }, { "epoch": 0.9379890839744854, "grad_norm": 5.1711873858566335, "learning_rate": 5e-06, "loss": 0.074, "num_input_tokens_seen": 612514340, "step": 3566 }, { "epoch": 0.9379890839744854, "loss": 0.050079330801963806, "loss_ce": 0.0008087016176432371, "loss_iou": 0.455078125, "loss_num": 0.00982666015625, "loss_xval": 0.04931640625, "num_input_tokens_seen": 612514340, "step": 3566 }, { "epoch": 0.9382521207338725, "grad_norm": 15.271109982643123, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 612686644, "step": 3567 }, { "epoch": 0.9382521207338725, "loss": 0.051343828439712524, "loss_ce": 0.0003336968075018376, "loss_iou": 0.4609375, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 612686644, "step": 3567 }, { "epoch": 0.9385151574932596, "grad_norm": 6.894404249326668, "learning_rate": 5e-06, "loss": 0.1218, "num_input_tokens_seen": 612854000, "step": 3568 }, { "epoch": 0.9385151574932596, "loss": 0.1327725201845169, "loss_ce": 0.0005093337967991829, "loss_iou": 0.46484375, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 612854000, "step": 3568 }, { "epoch": 0.9387781942526469, "grad_norm": 16.019697398064586, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 613026136, "step": 3569 }, { "epoch": 0.9387781942526469, "loss": 0.14443224668502808, "loss_ce": 0.0018541140016168356, "loss_iou": 0.4453125, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 613026136, "step": 3569 }, { "epoch": 0.939041231012034, "grad_norm": 2.778217951943394, "learning_rate": 5e-06, "loss": 0.0847, "num_input_tokens_seen": 613198388, "step": 3570 }, { "epoch": 0.939041231012034, "loss": 0.05536004900932312, "loss_ce": 0.0027019698172807693, "loss_iou": 0.53515625, "loss_num": 0.010498046875, "loss_xval": 0.052734375, "num_input_tokens_seen": 613198388, "step": 3570 }, { "epoch": 0.9393042677714211, "grad_norm": 4.873345772996428, "learning_rate": 5e-06, "loss": 0.0827, "num_input_tokens_seen": 613370288, "step": 3571 }, { "epoch": 0.9393042677714211, "loss": 0.09475830942392349, "loss_ce": 0.0007489121053367853, "loss_iou": 0.392578125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 613370288, "step": 3571 }, { "epoch": 0.9395673045308082, "grad_norm": 6.737346874423115, "learning_rate": 5e-06, "loss": 0.1097, "num_input_tokens_seen": 613542216, "step": 3572 }, { "epoch": 0.9395673045308082, "loss": 0.2296835035085678, "loss_ce": 0.005287751089781523, "loss_iou": 0.330078125, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 613542216, "step": 3572 }, { "epoch": 0.9398303412901953, "grad_norm": 5.428921826665437, "learning_rate": 5e-06, "loss": 0.0901, "num_input_tokens_seen": 613714320, "step": 3573 }, { "epoch": 0.9398303412901953, "loss": 0.04020649567246437, "loss_ce": 0.002578320913016796, "loss_iou": 0.50390625, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 613714320, "step": 3573 }, { "epoch": 0.9400933780495825, "grad_norm": 5.730418477266325, "learning_rate": 5e-06, "loss": 0.0976, "num_input_tokens_seen": 613884004, "step": 3574 }, { "epoch": 0.9400933780495825, "loss": 0.12099497765302658, "loss_ce": 0.000435287831351161, "loss_iou": 0.1884765625, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 613884004, "step": 3574 }, { "epoch": 0.9403564148089696, "grad_norm": 2.8371958686948315, "learning_rate": 5e-06, "loss": 0.102, "num_input_tokens_seen": 614056360, "step": 3575 }, { "epoch": 0.9403564148089696, "loss": 0.06765338778495789, "loss_ce": 0.00017902448598761111, "loss_iou": 0.392578125, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 614056360, "step": 3575 }, { "epoch": 0.9406194515683567, "grad_norm": 3.3483226181812173, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 614228248, "step": 3576 }, { "epoch": 0.9406194515683567, "loss": 0.07497625052928925, "loss_ce": 0.0009863873710855842, "loss_iou": 0.404296875, "loss_num": 0.0147705078125, "loss_xval": 0.07421875, "num_input_tokens_seen": 614228248, "step": 3576 }, { "epoch": 0.9408824883277438, "grad_norm": 3.0470015747124837, "learning_rate": 5e-06, "loss": 0.0987, "num_input_tokens_seen": 614400460, "step": 3577 }, { "epoch": 0.9408824883277438, "loss": 0.15504948794841766, "loss_ce": 0.005391271784901619, "loss_iou": 0.23828125, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 614400460, "step": 3577 }, { "epoch": 0.9411455250871309, "grad_norm": 14.590254214221755, "learning_rate": 5e-06, "loss": 0.0916, "num_input_tokens_seen": 614572668, "step": 3578 }, { "epoch": 0.9411455250871309, "loss": 0.12589725852012634, "loss_ce": 0.00352177070453763, "loss_iou": 0.453125, "loss_num": 0.0244140625, "loss_xval": 0.12255859375, "num_input_tokens_seen": 614572668, "step": 3578 }, { "epoch": 0.941408561846518, "grad_norm": 4.098895136914831, "learning_rate": 5e-06, "loss": 0.1042, "num_input_tokens_seen": 614744728, "step": 3579 }, { "epoch": 0.941408561846518, "loss": 0.07378913462162018, "loss_ce": 0.0005316926399245858, "loss_iou": 0.5, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 614744728, "step": 3579 }, { "epoch": 0.9416715986059052, "grad_norm": 6.551103187216315, "learning_rate": 5e-06, "loss": 0.1095, "num_input_tokens_seen": 614917064, "step": 3580 }, { "epoch": 0.9416715986059052, "loss": 0.11033067107200623, "loss_ce": 0.0037785512395203114, "loss_iou": 0.625, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 614917064, "step": 3580 }, { "epoch": 0.9419346353652923, "grad_norm": 6.125579245234497, "learning_rate": 5e-06, "loss": 0.1093, "num_input_tokens_seen": 615089092, "step": 3581 }, { "epoch": 0.9419346353652923, "loss": 0.09463340044021606, "loss_ce": 0.002287208568304777, "loss_iou": 0.421875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 615089092, "step": 3581 }, { "epoch": 0.9421976721246794, "grad_norm": 4.52462668676838, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 615259732, "step": 3582 }, { "epoch": 0.9421976721246794, "loss": 0.09844870865345001, "loss_ce": 0.0007771998061798513, "loss_iou": 0.546875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 615259732, "step": 3582 }, { "epoch": 0.9424607088840665, "grad_norm": 4.261037832557463, "learning_rate": 5e-06, "loss": 0.1002, "num_input_tokens_seen": 615431984, "step": 3583 }, { "epoch": 0.9424607088840665, "loss": 0.10644324868917465, "loss_ce": 0.0039041785057634115, "loss_iou": 0.53515625, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 615431984, "step": 3583 }, { "epoch": 0.9427237456434536, "grad_norm": 11.026204038590599, "learning_rate": 5e-06, "loss": 0.0974, "num_input_tokens_seen": 615604052, "step": 3584 }, { "epoch": 0.9427237456434536, "loss": 0.08766089379787445, "loss_ce": 0.0031577199697494507, "loss_iou": 0.455078125, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 615604052, "step": 3584 }, { "epoch": 0.9429867824028408, "grad_norm": 4.892521797802774, "learning_rate": 5e-06, "loss": 0.0978, "num_input_tokens_seen": 615776672, "step": 3585 }, { "epoch": 0.9429867824028408, "loss": 0.04693538695573807, "loss_ce": 0.0009148788521997631, "loss_iou": 0.380859375, "loss_num": 0.00921630859375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 615776672, "step": 3585 }, { "epoch": 0.9432498191622279, "grad_norm": 8.102761106727687, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 615948804, "step": 3586 }, { "epoch": 0.9432498191622279, "loss": 0.0826391950249672, "loss_ce": 0.00152347341645509, "loss_iou": 0.40234375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 615948804, "step": 3586 }, { "epoch": 0.943512855921615, "grad_norm": 4.699200150667693, "learning_rate": 5e-06, "loss": 0.0853, "num_input_tokens_seen": 616120904, "step": 3587 }, { "epoch": 0.943512855921615, "loss": 0.07182273268699646, "loss_ce": 0.004500953480601311, "loss_iou": 0.431640625, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 616120904, "step": 3587 }, { "epoch": 0.9437758926810021, "grad_norm": 5.979856684754754, "learning_rate": 5e-06, "loss": 0.1208, "num_input_tokens_seen": 616293232, "step": 3588 }, { "epoch": 0.9437758926810021, "loss": 0.14250054955482483, "loss_ce": 0.004179632291197777, "loss_iou": 0.515625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 616293232, "step": 3588 }, { "epoch": 0.9440389294403893, "grad_norm": 4.477659380807632, "learning_rate": 5e-06, "loss": 0.1234, "num_input_tokens_seen": 616465544, "step": 3589 }, { "epoch": 0.9440389294403893, "loss": 0.13571983575820923, "loss_ce": 0.004356917925179005, "loss_iou": 0.3671875, "loss_num": 0.0262451171875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 616465544, "step": 3589 }, { "epoch": 0.9443019661997765, "grad_norm": 5.354193803812341, "learning_rate": 5e-06, "loss": 0.0752, "num_input_tokens_seen": 616638112, "step": 3590 }, { "epoch": 0.9443019661997765, "loss": 0.08732321113348007, "loss_ce": 0.0015383013524115086, "loss_iou": 0.458984375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 616638112, "step": 3590 }, { "epoch": 0.9445650029591636, "grad_norm": 3.712673541690186, "learning_rate": 5e-06, "loss": 0.0755, "num_input_tokens_seen": 616808952, "step": 3591 }, { "epoch": 0.9445650029591636, "loss": 0.0873071700334549, "loss_ce": 0.0063440315425395966, "loss_iou": 0.45703125, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 616808952, "step": 3591 }, { "epoch": 0.9448280397185507, "grad_norm": 5.018692274423124, "learning_rate": 5e-06, "loss": 0.1035, "num_input_tokens_seen": 616981056, "step": 3592 }, { "epoch": 0.9448280397185507, "loss": 0.16758012771606445, "loss_ce": 0.0011983029544353485, "loss_iou": 0.416015625, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 616981056, "step": 3592 }, { "epoch": 0.9450910764779378, "grad_norm": 8.157994634855726, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 617152956, "step": 3593 }, { "epoch": 0.9450910764779378, "loss": 0.09211128950119019, "loss_ce": 0.0016877016751095653, "loss_iou": 0.466796875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 617152956, "step": 3593 }, { "epoch": 0.9453541132373249, "grad_norm": 6.815211067626931, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 617323192, "step": 3594 }, { "epoch": 0.9453541132373249, "loss": 0.15205200016498566, "loss_ce": 0.0006542917108163238, "loss_iou": 0.373046875, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 617323192, "step": 3594 }, { "epoch": 0.9456171499967121, "grad_norm": 5.698573656915325, "learning_rate": 5e-06, "loss": 0.1838, "num_input_tokens_seen": 617495528, "step": 3595 }, { "epoch": 0.9456171499967121, "loss": 0.15452706813812256, "loss_ce": 0.004380582831799984, "loss_iou": 0.466796875, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 617495528, "step": 3595 }, { "epoch": 0.9458801867560992, "grad_norm": 5.492995557036467, "learning_rate": 5e-06, "loss": 0.0983, "num_input_tokens_seen": 617667772, "step": 3596 }, { "epoch": 0.9458801867560992, "loss": 0.11533143371343613, "loss_ce": 0.0028283819556236267, "loss_iou": 0.453125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 617667772, "step": 3596 }, { "epoch": 0.9461432235154863, "grad_norm": 3.750755987920789, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 617836324, "step": 3597 }, { "epoch": 0.9461432235154863, "loss": 0.07357309758663177, "loss_ce": 0.0006360823172144592, "loss_iou": 0.5546875, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 617836324, "step": 3597 }, { "epoch": 0.9464062602748734, "grad_norm": 3.634136051792054, "learning_rate": 5e-06, "loss": 0.102, "num_input_tokens_seen": 618006748, "step": 3598 }, { "epoch": 0.9464062602748734, "loss": 0.05711160972714424, "loss_ce": 0.0004099512880202383, "loss_iou": 0.474609375, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 618006748, "step": 3598 }, { "epoch": 0.9466692970342605, "grad_norm": 17.367283166631303, "learning_rate": 5e-06, "loss": 0.0994, "num_input_tokens_seen": 618179204, "step": 3599 }, { "epoch": 0.9466692970342605, "loss": 0.11188434064388275, "loss_ce": 0.0004646638117264956, "loss_iou": 0.53125, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 618179204, "step": 3599 }, { "epoch": 0.9469323337936476, "grad_norm": 4.326428384534629, "learning_rate": 5e-06, "loss": 0.1077, "num_input_tokens_seen": 618351176, "step": 3600 }, { "epoch": 0.9469323337936476, "loss": 0.1435290426015854, "loss_ce": 0.002629393944516778, "loss_iou": 0.34375, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 618351176, "step": 3600 }, { "epoch": 0.9471953705530348, "grad_norm": 19.80317191157718, "learning_rate": 5e-06, "loss": 0.0993, "num_input_tokens_seen": 618523308, "step": 3601 }, { "epoch": 0.9471953705530348, "loss": 0.055401187390089035, "loss_ce": 0.0005763589288108051, "loss_iou": 0.5, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 618523308, "step": 3601 }, { "epoch": 0.9474584073124219, "grad_norm": 7.650182889486073, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 618695336, "step": 3602 }, { "epoch": 0.9474584073124219, "loss": 0.15820011496543884, "loss_ce": 0.0013244987931102514, "loss_iou": 0.51171875, "loss_num": 0.03125, "loss_xval": 0.1572265625, "num_input_tokens_seen": 618695336, "step": 3602 }, { "epoch": 0.947721444071809, "grad_norm": 15.018190748040105, "learning_rate": 5e-06, "loss": 0.1028, "num_input_tokens_seen": 618868120, "step": 3603 }, { "epoch": 0.947721444071809, "loss": 0.08152879774570465, "loss_ce": 0.0010081640211865306, "loss_iou": 0.388671875, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 618868120, "step": 3603 }, { "epoch": 0.9479844808311961, "grad_norm": 3.724726783342823, "learning_rate": 5e-06, "loss": 0.0996, "num_input_tokens_seen": 619040216, "step": 3604 }, { "epoch": 0.9479844808311961, "loss": 0.07213738560676575, "loss_ce": 0.0031829241197556257, "loss_iou": 0.3671875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 619040216, "step": 3604 }, { "epoch": 0.9482475175905832, "grad_norm": 28.840812127557175, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 619212192, "step": 3605 }, { "epoch": 0.9482475175905832, "loss": 0.1626545786857605, "loss_ce": 0.0024830668698996305, "loss_iou": 0.482421875, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 619212192, "step": 3605 }, { "epoch": 0.9485105543499704, "grad_norm": 5.069934241766405, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 619382656, "step": 3606 }, { "epoch": 0.9485105543499704, "loss": 0.1947801113128662, "loss_ce": 0.00343489833176136, "loss_iou": 0.53515625, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 619382656, "step": 3606 }, { "epoch": 0.9487735911093576, "grad_norm": 13.189826643263768, "learning_rate": 5e-06, "loss": 0.0894, "num_input_tokens_seen": 619554772, "step": 3607 }, { "epoch": 0.9487735911093576, "loss": 0.10128442198038101, "loss_ce": 0.001995484111830592, "loss_iou": 0.388671875, "loss_num": 0.0198974609375, "loss_xval": 0.09912109375, "num_input_tokens_seen": 619554772, "step": 3607 }, { "epoch": 0.9490366278687447, "grad_norm": 3.7735949518372953, "learning_rate": 5e-06, "loss": 0.0685, "num_input_tokens_seen": 619727116, "step": 3608 }, { "epoch": 0.9490366278687447, "loss": 0.11414018273353577, "loss_ce": 0.005039841867983341, "loss_iou": 0.4140625, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 619727116, "step": 3608 }, { "epoch": 0.9492996646281318, "grad_norm": 4.250123008721577, "learning_rate": 5e-06, "loss": 0.1075, "num_input_tokens_seen": 619899428, "step": 3609 }, { "epoch": 0.9492996646281318, "loss": 0.1226678267121315, "loss_ce": 0.007280868943780661, "loss_iou": 0.45703125, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 619899428, "step": 3609 }, { "epoch": 0.9495627013875189, "grad_norm": 6.863676153540119, "learning_rate": 5e-06, "loss": 0.1046, "num_input_tokens_seen": 620071540, "step": 3610 }, { "epoch": 0.9495627013875189, "loss": 0.21938937902450562, "loss_ce": 0.0038132029585540295, "loss_iou": 0.3984375, "loss_num": 0.04296875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 620071540, "step": 3610 }, { "epoch": 0.9498257381469061, "grad_norm": 3.813325984971825, "learning_rate": 5e-06, "loss": 0.1107, "num_input_tokens_seen": 620243836, "step": 3611 }, { "epoch": 0.9498257381469061, "loss": 0.07250767946243286, "loss_ce": 0.0018289745785295963, "loss_iou": 0.33984375, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 620243836, "step": 3611 }, { "epoch": 0.9500887749062932, "grad_norm": 8.694293187792477, "learning_rate": 5e-06, "loss": 0.0953, "num_input_tokens_seen": 620416164, "step": 3612 }, { "epoch": 0.9500887749062932, "loss": 0.08137984573841095, "loss_ce": 0.0004167144070379436, "loss_iou": 0.421875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 620416164, "step": 3612 }, { "epoch": 0.9503518116656803, "grad_norm": 4.011759905693181, "learning_rate": 5e-06, "loss": 0.0881, "num_input_tokens_seen": 620588248, "step": 3613 }, { "epoch": 0.9503518116656803, "loss": 0.10577777028083801, "loss_ce": 0.003009829204529524, "loss_iou": 0.57421875, "loss_num": 0.0206298828125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 620588248, "step": 3613 }, { "epoch": 0.9506148484250674, "grad_norm": 4.8583016740360545, "learning_rate": 5e-06, "loss": 0.0856, "num_input_tokens_seen": 620760212, "step": 3614 }, { "epoch": 0.9506148484250674, "loss": 0.07320687174797058, "loss_ce": 0.00593086751177907, "loss_iou": 0.4609375, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 620760212, "step": 3614 }, { "epoch": 0.9508778851844545, "grad_norm": 7.183426742063696, "learning_rate": 5e-06, "loss": 0.092, "num_input_tokens_seen": 620932584, "step": 3615 }, { "epoch": 0.9508778851844545, "loss": 0.12085875123739243, "loss_ce": 0.004022202454507351, "loss_iou": 0.3828125, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 620932584, "step": 3615 }, { "epoch": 0.9511409219438417, "grad_norm": 5.9763835890459855, "learning_rate": 5e-06, "loss": 0.0623, "num_input_tokens_seen": 621104624, "step": 3616 }, { "epoch": 0.9511409219438417, "loss": 0.057411566376686096, "loss_ce": 0.00405157683417201, "loss_iou": 0.40625, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 621104624, "step": 3616 }, { "epoch": 0.9514039587032288, "grad_norm": 4.148575034087351, "learning_rate": 5e-06, "loss": 0.1074, "num_input_tokens_seen": 621276932, "step": 3617 }, { "epoch": 0.9514039587032288, "loss": 0.17621394991874695, "loss_ce": 0.0008599417633377016, "loss_iou": 0.515625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 621276932, "step": 3617 }, { "epoch": 0.9516669954626159, "grad_norm": 9.831716470581155, "learning_rate": 5e-06, "loss": 0.1326, "num_input_tokens_seen": 621449128, "step": 3618 }, { "epoch": 0.9516669954626159, "loss": 0.17716863751411438, "loss_ce": 0.0019672252237796783, "loss_iou": 0.5, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 621449128, "step": 3618 }, { "epoch": 0.951930032222003, "grad_norm": 6.726193865130754, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 621621260, "step": 3619 }, { "epoch": 0.951930032222003, "loss": 0.08456599712371826, "loss_ce": 0.0014971550554037094, "loss_iou": 0.4453125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 621621260, "step": 3619 }, { "epoch": 0.9521930689813901, "grad_norm": 5.019143488565095, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 621792008, "step": 3620 }, { "epoch": 0.9521930689813901, "loss": 0.05885142832994461, "loss_ce": 0.0001813878770917654, "loss_iou": 0.59375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 621792008, "step": 3620 }, { "epoch": 0.9524561057407773, "grad_norm": 55.831161754212744, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 621964056, "step": 3621 }, { "epoch": 0.9524561057407773, "loss": 0.08644313365221024, "loss_ce": 0.0015737485373392701, "loss_iou": 0.419921875, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 621964056, "step": 3621 }, { "epoch": 0.9527191425001644, "grad_norm": 6.339787905898623, "learning_rate": 5e-06, "loss": 0.0934, "num_input_tokens_seen": 622136360, "step": 3622 }, { "epoch": 0.9527191425001644, "loss": 0.07180093228816986, "loss_ce": 0.0007102307863533497, "loss_iou": 0.423828125, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 622136360, "step": 3622 }, { "epoch": 0.9529821792595515, "grad_norm": 5.256007258631962, "learning_rate": 5e-06, "loss": 0.1175, "num_input_tokens_seen": 622308160, "step": 3623 }, { "epoch": 0.9529821792595515, "loss": 0.16570287942886353, "loss_ce": 0.0014267577789723873, "loss_iou": 0.396484375, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 622308160, "step": 3623 }, { "epoch": 0.9532452160189386, "grad_norm": 16.742735373121388, "learning_rate": 5e-06, "loss": 0.0872, "num_input_tokens_seen": 622480548, "step": 3624 }, { "epoch": 0.9532452160189386, "loss": 0.052828967571258545, "loss_ce": 0.0007049451814964414, "loss_iou": 0.5078125, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 622480548, "step": 3624 }, { "epoch": 0.9535082527783257, "grad_norm": 17.572925497634134, "learning_rate": 5e-06, "loss": 0.1125, "num_input_tokens_seen": 622652520, "step": 3625 }, { "epoch": 0.9535082527783257, "loss": 0.0615709125995636, "loss_ce": 0.00045946481986902654, "loss_iou": 0.48046875, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 622652520, "step": 3625 }, { "epoch": 0.9537712895377128, "grad_norm": 6.0047852962098975, "learning_rate": 5e-06, "loss": 0.1244, "num_input_tokens_seen": 622823148, "step": 3626 }, { "epoch": 0.9537712895377128, "loss": 0.13855966925621033, "loss_ce": 0.0012610815465450287, "loss_iou": 0.43359375, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 622823148, "step": 3626 }, { "epoch": 0.9540343262971, "grad_norm": 7.792667970397866, "learning_rate": 5e-06, "loss": 0.1385, "num_input_tokens_seen": 622995452, "step": 3627 }, { "epoch": 0.9540343262971, "loss": 0.13608847558498383, "loss_ce": 0.001505955122411251, "loss_iou": 0.48828125, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 622995452, "step": 3627 }, { "epoch": 0.9542973630564872, "grad_norm": 4.565665131118125, "learning_rate": 5e-06, "loss": 0.0977, "num_input_tokens_seen": 623167560, "step": 3628 }, { "epoch": 0.9542973630564872, "loss": 0.11297139525413513, "loss_ce": 0.0007735221879556775, "loss_iou": 0.490234375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 623167560, "step": 3628 }, { "epoch": 0.9545603998158743, "grad_norm": 5.57299165717365, "learning_rate": 5e-06, "loss": 0.1077, "num_input_tokens_seen": 623339872, "step": 3629 }, { "epoch": 0.9545603998158743, "loss": 0.09495042264461517, "loss_ce": 0.002497426699846983, "loss_iou": 0.365234375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 623339872, "step": 3629 }, { "epoch": 0.9548234365752614, "grad_norm": 21.302937351842843, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 623512180, "step": 3630 }, { "epoch": 0.9548234365752614, "loss": 0.11213900148868561, "loss_ce": 0.0011923413258045912, "loss_iou": 0.50390625, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 623512180, "step": 3630 }, { "epoch": 0.9550864733346485, "grad_norm": 12.098614631817124, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 623680972, "step": 3631 }, { "epoch": 0.9550864733346485, "loss": 0.21909213066101074, "loss_ce": 0.0015017889672890306, "loss_iou": 0.453125, "loss_num": 0.04345703125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 623680972, "step": 3631 }, { "epoch": 0.9553495100940357, "grad_norm": 9.034625023288243, "learning_rate": 5e-06, "loss": 0.1064, "num_input_tokens_seen": 623851276, "step": 3632 }, { "epoch": 0.9553495100940357, "loss": 0.0687929093837738, "loss_ce": 0.0007081945077516139, "loss_iou": 0.5859375, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 623851276, "step": 3632 }, { "epoch": 0.9556125468534228, "grad_norm": 4.18724157975137, "learning_rate": 5e-06, "loss": 0.1245, "num_input_tokens_seen": 624023500, "step": 3633 }, { "epoch": 0.9556125468534228, "loss": 0.12112436443567276, "loss_ce": 0.0027008973993360996, "loss_iou": 0.4140625, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 624023500, "step": 3633 }, { "epoch": 0.9558755836128099, "grad_norm": 10.76826846477555, "learning_rate": 5e-06, "loss": 0.1289, "num_input_tokens_seen": 624195576, "step": 3634 }, { "epoch": 0.9558755836128099, "loss": 0.11673011630773544, "loss_ce": 0.004501717630773783, "loss_iou": 0.47265625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 624195576, "step": 3634 }, { "epoch": 0.956138620372197, "grad_norm": 3.420122712086701, "learning_rate": 5e-06, "loss": 0.0911, "num_input_tokens_seen": 624366376, "step": 3635 }, { "epoch": 0.956138620372197, "loss": 0.13994066417217255, "loss_ce": 0.0026115677319467068, "loss_iou": 0.3125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 624366376, "step": 3635 }, { "epoch": 0.9564016571315841, "grad_norm": 3.188466600397663, "learning_rate": 5e-06, "loss": 0.1096, "num_input_tokens_seen": 624538928, "step": 3636 }, { "epoch": 0.9564016571315841, "loss": 0.19849437475204468, "loss_ce": 0.0006184080266393721, "loss_iou": 0.443359375, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 624538928, "step": 3636 }, { "epoch": 0.9566646938909713, "grad_norm": 4.145251385205127, "learning_rate": 5e-06, "loss": 0.0809, "num_input_tokens_seen": 624711036, "step": 3637 }, { "epoch": 0.9566646938909713, "loss": 0.07516495883464813, "loss_ce": 0.0005189694347791374, "loss_iou": 0.51953125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 624711036, "step": 3637 }, { "epoch": 0.9569277306503584, "grad_norm": 7.0181560986956395, "learning_rate": 5e-06, "loss": 0.1178, "num_input_tokens_seen": 624883520, "step": 3638 }, { "epoch": 0.9569277306503584, "loss": 0.06267523765563965, "loss_ce": 0.0029981140978634357, "loss_iou": 0.60546875, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 624883520, "step": 3638 }, { "epoch": 0.9571907674097455, "grad_norm": 3.9660695052723813, "learning_rate": 5e-06, "loss": 0.1141, "num_input_tokens_seen": 625055640, "step": 3639 }, { "epoch": 0.9571907674097455, "loss": 0.1359872817993164, "loss_ce": 0.00042819694499485195, "loss_iou": 0.5546875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 625055640, "step": 3639 }, { "epoch": 0.9574538041691326, "grad_norm": 17.732753202882193, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 625227732, "step": 3640 }, { "epoch": 0.9574538041691326, "loss": 0.09507328271865845, "loss_ce": 0.0008502600830979645, "loss_iou": 0.458984375, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 625227732, "step": 3640 }, { "epoch": 0.9577168409285197, "grad_norm": 3.4840402032914257, "learning_rate": 5e-06, "loss": 0.0918, "num_input_tokens_seen": 625398028, "step": 3641 }, { "epoch": 0.9577168409285197, "loss": 0.13700971007347107, "loss_ce": 0.0006876978441141546, "loss_iou": 0.408203125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 625398028, "step": 3641 }, { "epoch": 0.9579798776879069, "grad_norm": 7.413398524469702, "learning_rate": 5e-06, "loss": 0.1246, "num_input_tokens_seen": 625570244, "step": 3642 }, { "epoch": 0.9579798776879069, "loss": 0.12876945734024048, "loss_ce": 0.00047354548587463796, "loss_iou": 0.5234375, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 625570244, "step": 3642 }, { "epoch": 0.958242914447294, "grad_norm": 5.336702710138045, "learning_rate": 5e-06, "loss": 0.0898, "num_input_tokens_seen": 625742464, "step": 3643 }, { "epoch": 0.958242914447294, "loss": 0.05379210785031319, "loss_ce": 0.00020323891658335924, "loss_iou": 0.44140625, "loss_num": 0.01068115234375, "loss_xval": 0.0537109375, "num_input_tokens_seen": 625742464, "step": 3643 }, { "epoch": 0.9585059512066811, "grad_norm": 23.972493383254932, "learning_rate": 5e-06, "loss": 0.1108, "num_input_tokens_seen": 625914372, "step": 3644 }, { "epoch": 0.9585059512066811, "loss": 0.08553168922662735, "loss_ce": 0.0017609409987926483, "loss_iou": 0.45703125, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 625914372, "step": 3644 }, { "epoch": 0.9587689879660682, "grad_norm": 10.340048159606546, "learning_rate": 5e-06, "loss": 0.1191, "num_input_tokens_seen": 626086592, "step": 3645 }, { "epoch": 0.9587689879660682, "loss": 0.234289288520813, "loss_ce": 0.00026523511041887105, "loss_iou": 0.404296875, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 626086592, "step": 3645 }, { "epoch": 0.9590320247254553, "grad_norm": 9.300486446962857, "learning_rate": 5e-06, "loss": 0.094, "num_input_tokens_seen": 626257416, "step": 3646 }, { "epoch": 0.9590320247254553, "loss": 0.045688219368457794, "loss_ce": 0.0002475440560374409, "loss_iou": 0.3125, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 626257416, "step": 3646 }, { "epoch": 0.9592950614848426, "grad_norm": 11.589258821015147, "learning_rate": 5e-06, "loss": 0.1346, "num_input_tokens_seen": 626427708, "step": 3647 }, { "epoch": 0.9592950614848426, "loss": 0.10979291796684265, "loss_ce": 0.0016691365744918585, "loss_iou": 0.33984375, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 626427708, "step": 3647 }, { "epoch": 0.9595580982442297, "grad_norm": 3.2081047701574943, "learning_rate": 5e-06, "loss": 0.0916, "num_input_tokens_seen": 626599988, "step": 3648 }, { "epoch": 0.9595580982442297, "loss": 0.09939119219779968, "loss_ce": 0.0016281325370073318, "loss_iou": 0.5546875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 626599988, "step": 3648 }, { "epoch": 0.9598211350036168, "grad_norm": 4.783540700409292, "learning_rate": 5e-06, "loss": 0.1455, "num_input_tokens_seen": 626772316, "step": 3649 }, { "epoch": 0.9598211350036168, "loss": 0.09440785646438599, "loss_ce": 0.0029161556158214808, "loss_iou": 0.4296875, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 626772316, "step": 3649 }, { "epoch": 0.9600841717630039, "grad_norm": 5.125730804994831, "learning_rate": 5e-06, "loss": 0.0868, "num_input_tokens_seen": 626942620, "step": 3650 }, { "epoch": 0.9600841717630039, "loss": 0.06443943828344345, "loss_ce": 0.0007797717116773129, "loss_iou": 0.482421875, "loss_num": 0.01275634765625, "loss_xval": 0.0634765625, "num_input_tokens_seen": 626942620, "step": 3650 }, { "epoch": 0.960347208522391, "grad_norm": 4.565795888601258, "learning_rate": 5e-06, "loss": 0.1097, "num_input_tokens_seen": 627114816, "step": 3651 }, { "epoch": 0.960347208522391, "loss": 0.12303532660007477, "loss_ce": 0.0003851845976896584, "loss_iou": 0.453125, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 627114816, "step": 3651 }, { "epoch": 0.9606102452817781, "grad_norm": 8.198006738555572, "learning_rate": 5e-06, "loss": 0.0959, "num_input_tokens_seen": 627286832, "step": 3652 }, { "epoch": 0.9606102452817781, "loss": 0.12663108110427856, "loss_ce": 0.0022109271958470345, "loss_iou": 0.6875, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 627286832, "step": 3652 }, { "epoch": 0.9608732820411653, "grad_norm": 6.977304479694191, "learning_rate": 5e-06, "loss": 0.141, "num_input_tokens_seen": 627458840, "step": 3653 }, { "epoch": 0.9608732820411653, "loss": 0.08323599398136139, "loss_ce": 0.003646154422312975, "loss_iou": 0.4921875, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 627458840, "step": 3653 }, { "epoch": 0.9611363188005524, "grad_norm": 6.143584577276683, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 627631048, "step": 3654 }, { "epoch": 0.9611363188005524, "loss": 0.08655130863189697, "loss_ce": 0.0005985412281006575, "loss_iou": 0.44921875, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 627631048, "step": 3654 }, { "epoch": 0.9613993555599395, "grad_norm": 4.962748401603593, "learning_rate": 5e-06, "loss": 0.0843, "num_input_tokens_seen": 627801572, "step": 3655 }, { "epoch": 0.9613993555599395, "loss": 0.06723140180110931, "loss_ce": 0.0012218892807140946, "loss_iou": 0.232421875, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 627801572, "step": 3655 }, { "epoch": 0.9616623923193266, "grad_norm": 3.888233294845763, "learning_rate": 5e-06, "loss": 0.1216, "num_input_tokens_seen": 627973444, "step": 3656 }, { "epoch": 0.9616623923193266, "loss": 0.19316411018371582, "loss_ce": 0.0060761114582419395, "loss_iou": 0.4765625, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 627973444, "step": 3656 }, { "epoch": 0.9619254290787137, "grad_norm": 12.231564786844833, "learning_rate": 5e-06, "loss": 0.1033, "num_input_tokens_seen": 628145692, "step": 3657 }, { "epoch": 0.9619254290787137, "loss": 0.08886748552322388, "loss_ce": 0.004608447663486004, "loss_iou": 0.37109375, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 628145692, "step": 3657 }, { "epoch": 0.9621884658381009, "grad_norm": 6.681516105502576, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 628317968, "step": 3658 }, { "epoch": 0.9621884658381009, "loss": 0.11374935507774353, "loss_ce": 0.00020870784646831453, "loss_iou": 0.61328125, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 628317968, "step": 3658 }, { "epoch": 0.962451502597488, "grad_norm": 9.221687537639268, "learning_rate": 5e-06, "loss": 0.0512, "num_input_tokens_seen": 628489248, "step": 3659 }, { "epoch": 0.962451502597488, "loss": 0.07078136503696442, "loss_ce": 0.000972404726780951, "loss_iou": 0.41015625, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 628489248, "step": 3659 }, { "epoch": 0.9627145393568751, "grad_norm": 3.888238330257217, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 628661488, "step": 3660 }, { "epoch": 0.9627145393568751, "loss": 0.148333340883255, "loss_ce": 0.001513272407464683, "loss_iou": 0.361328125, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 628661488, "step": 3660 }, { "epoch": 0.9629775761162622, "grad_norm": 3.548818147424885, "learning_rate": 5e-06, "loss": 0.1097, "num_input_tokens_seen": 628833384, "step": 3661 }, { "epoch": 0.9629775761162622, "loss": 0.07117056846618652, "loss_ce": 0.0004918586346320808, "loss_iou": 0.455078125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 628833384, "step": 3661 }, { "epoch": 0.9632406128756493, "grad_norm": 3.779136602687443, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 629005652, "step": 3662 }, { "epoch": 0.9632406128756493, "loss": 0.06165676563978195, "loss_ce": 0.0007131616584956646, "loss_iou": 0.62109375, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 629005652, "step": 3662 }, { "epoch": 0.9635036496350365, "grad_norm": 5.7914523260663495, "learning_rate": 5e-06, "loss": 0.1238, "num_input_tokens_seen": 629177884, "step": 3663 }, { "epoch": 0.9635036496350365, "loss": 0.09137916564941406, "loss_ce": 0.00181007559876889, "loss_iou": 0.46484375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 629177884, "step": 3663 }, { "epoch": 0.9637666863944236, "grad_norm": 3.071560976269854, "learning_rate": 5e-06, "loss": 0.0996, "num_input_tokens_seen": 629350348, "step": 3664 }, { "epoch": 0.9637666863944236, "loss": 0.08777488768100739, "loss_ce": 0.0004946062108501792, "loss_iou": 0.455078125, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 629350348, "step": 3664 }, { "epoch": 0.9640297231538107, "grad_norm": 4.779549366101704, "learning_rate": 5e-06, "loss": 0.1024, "num_input_tokens_seen": 629522792, "step": 3665 }, { "epoch": 0.9640297231538107, "loss": 0.1326950043439865, "loss_ce": 0.0025680402759462595, "loss_iou": 0.5234375, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 629522792, "step": 3665 }, { "epoch": 0.9642927599131978, "grad_norm": 4.346102092926262, "learning_rate": 5e-06, "loss": 0.1112, "num_input_tokens_seen": 629694856, "step": 3666 }, { "epoch": 0.9642927599131978, "loss": 0.11643362045288086, "loss_ce": 0.0008330341661348939, "loss_iou": 0.6328125, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 629694856, "step": 3666 }, { "epoch": 0.964555796672585, "grad_norm": 5.7406338456333845, "learning_rate": 5e-06, "loss": 0.1044, "num_input_tokens_seen": 629866908, "step": 3667 }, { "epoch": 0.964555796672585, "loss": 0.13996919989585876, "loss_ce": 0.0034030412789434195, "loss_iou": 0.37890625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 629866908, "step": 3667 }, { "epoch": 0.9648188334319722, "grad_norm": 7.17686270310257, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 630039516, "step": 3668 }, { "epoch": 0.9648188334319722, "loss": 0.12104253470897675, "loss_ce": 0.00020818831399083138, "loss_iou": 0.49609375, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 630039516, "step": 3668 }, { "epoch": 0.9650818701913593, "grad_norm": 4.2613778122636035, "learning_rate": 5e-06, "loss": 0.0821, "num_input_tokens_seen": 630211624, "step": 3669 }, { "epoch": 0.9650818701913593, "loss": 0.062059760093688965, "loss_ce": 0.0022910854313522577, "loss_iou": 0.34375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 630211624, "step": 3669 }, { "epoch": 0.9653449069507464, "grad_norm": 3.5238324652450235, "learning_rate": 5e-06, "loss": 0.1352, "num_input_tokens_seen": 630382332, "step": 3670 }, { "epoch": 0.9653449069507464, "loss": 0.15831097960472107, "loss_ce": 0.00016888529353309423, "loss_iou": 0.453125, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 630382332, "step": 3670 }, { "epoch": 0.9656079437101335, "grad_norm": 3.564039297129955, "learning_rate": 5e-06, "loss": 0.0998, "num_input_tokens_seen": 630552592, "step": 3671 }, { "epoch": 0.9656079437101335, "loss": 0.08011811971664429, "loss_ce": 0.0016269085463136435, "loss_iou": 0.470703125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 630552592, "step": 3671 }, { "epoch": 0.9658709804695206, "grad_norm": 9.76288401818362, "learning_rate": 5e-06, "loss": 0.1375, "num_input_tokens_seen": 630722244, "step": 3672 }, { "epoch": 0.9658709804695206, "loss": 0.08490733802318573, "loss_ce": 0.0020673726685345173, "loss_iou": 0.53125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 630722244, "step": 3672 }, { "epoch": 0.9661340172289078, "grad_norm": 7.354300413013905, "learning_rate": 5e-06, "loss": 0.1318, "num_input_tokens_seen": 630894380, "step": 3673 }, { "epoch": 0.9661340172289078, "loss": 0.13285204768180847, "loss_ce": 0.0009245476103387773, "loss_iou": 0.54296875, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 630894380, "step": 3673 }, { "epoch": 0.9663970539882949, "grad_norm": 5.188368697186112, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 631066652, "step": 3674 }, { "epoch": 0.9663970539882949, "loss": 0.09987783432006836, "loss_ce": 0.003976346459239721, "loss_iou": 0.5546875, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 631066652, "step": 3674 }, { "epoch": 0.966660090747682, "grad_norm": 12.325083640976278, "learning_rate": 5e-06, "loss": 0.1216, "num_input_tokens_seen": 631237000, "step": 3675 }, { "epoch": 0.966660090747682, "loss": 0.17316505312919617, "loss_ce": 0.002648080699145794, "loss_iou": 0.404296875, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 631237000, "step": 3675 }, { "epoch": 0.9669231275070691, "grad_norm": 6.105729054233263, "learning_rate": 5e-06, "loss": 0.1237, "num_input_tokens_seen": 631409164, "step": 3676 }, { "epoch": 0.9669231275070691, "loss": 0.09672191739082336, "loss_ce": 0.0017207016935572028, "loss_iou": 0.466796875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 631409164, "step": 3676 }, { "epoch": 0.9671861642664562, "grad_norm": 4.682056592281961, "learning_rate": 5e-06, "loss": 0.1134, "num_input_tokens_seen": 631581116, "step": 3677 }, { "epoch": 0.9671861642664562, "loss": 0.12396599352359772, "loss_ce": 0.0040776850655674934, "loss_iou": 0.33984375, "loss_num": 0.02392578125, "loss_xval": 0.1201171875, "num_input_tokens_seen": 631581116, "step": 3677 }, { "epoch": 0.9674492010258433, "grad_norm": 8.33340254914201, "learning_rate": 5e-06, "loss": 0.1258, "num_input_tokens_seen": 631750180, "step": 3678 }, { "epoch": 0.9674492010258433, "loss": 0.09776239842176437, "loss_ce": 0.0013878863537684083, "loss_iou": 0.59375, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 631750180, "step": 3678 }, { "epoch": 0.9677122377852305, "grad_norm": 3.892518620403468, "learning_rate": 5e-06, "loss": 0.1159, "num_input_tokens_seen": 631922528, "step": 3679 }, { "epoch": 0.9677122377852305, "loss": 0.10747776180505753, "loss_ce": 0.0042520565912127495, "loss_iou": 0.498046875, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 631922528, "step": 3679 }, { "epoch": 0.9679752745446176, "grad_norm": 7.586861538526353, "learning_rate": 5e-06, "loss": 0.1193, "num_input_tokens_seen": 632094768, "step": 3680 }, { "epoch": 0.9679752745446176, "loss": 0.08680423349142075, "loss_ce": 0.001721223583444953, "loss_iou": 0.55078125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 632094768, "step": 3680 }, { "epoch": 0.9682383113040047, "grad_norm": 4.2210475168735355, "learning_rate": 5e-06, "loss": 0.0757, "num_input_tokens_seen": 632266740, "step": 3681 }, { "epoch": 0.9682383113040047, "loss": 0.05067894607782364, "loss_ce": 0.0006911527598276734, "loss_iou": 0.5546875, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 632266740, "step": 3681 }, { "epoch": 0.9685013480633918, "grad_norm": 3.8121173575745755, "learning_rate": 5e-06, "loss": 0.1237, "num_input_tokens_seen": 632439188, "step": 3682 }, { "epoch": 0.9685013480633918, "loss": 0.11126217991113663, "loss_ce": 0.00033078622072935104, "loss_iou": 0.60546875, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 632439188, "step": 3682 }, { "epoch": 0.9687643848227789, "grad_norm": 5.838321531180536, "learning_rate": 5e-06, "loss": 0.1026, "num_input_tokens_seen": 632611332, "step": 3683 }, { "epoch": 0.9687643848227789, "loss": 0.08182443678379059, "loss_ce": 0.0006171565037220716, "loss_iou": 0.57421875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 632611332, "step": 3683 }, { "epoch": 0.9690274215821661, "grad_norm": 4.164644957819613, "learning_rate": 5e-06, "loss": 0.086, "num_input_tokens_seen": 632783660, "step": 3684 }, { "epoch": 0.9690274215821661, "loss": 0.12429676204919815, "loss_ce": 0.0015245481627061963, "loss_iou": 0.609375, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 632783660, "step": 3684 }, { "epoch": 0.9692904583415533, "grad_norm": 26.321407570319618, "learning_rate": 5e-06, "loss": 0.1137, "num_input_tokens_seen": 632955748, "step": 3685 }, { "epoch": 0.9692904583415533, "loss": 0.05816134810447693, "loss_ce": 0.0002847612486220896, "loss_iou": 0.427734375, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 632955748, "step": 3685 }, { "epoch": 0.9695534951009404, "grad_norm": 3.994252764794672, "learning_rate": 5e-06, "loss": 0.0997, "num_input_tokens_seen": 633126124, "step": 3686 }, { "epoch": 0.9695534951009404, "loss": 0.10153771191835403, "loss_ce": 0.00023461380624212325, "loss_iou": 0.51171875, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 633126124, "step": 3686 }, { "epoch": 0.9698165318603275, "grad_norm": 7.846355332992627, "learning_rate": 5e-06, "loss": 0.1473, "num_input_tokens_seen": 633298344, "step": 3687 }, { "epoch": 0.9698165318603275, "loss": 0.15658894181251526, "loss_ce": 0.001040847273543477, "loss_iou": 0.478515625, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 633298344, "step": 3687 }, { "epoch": 0.9700795686197146, "grad_norm": 6.085055991191323, "learning_rate": 5e-06, "loss": 0.0813, "num_input_tokens_seen": 633470408, "step": 3688 }, { "epoch": 0.9700795686197146, "loss": 0.14665310084819794, "loss_ce": 0.0056618861854076385, "loss_iou": 0.49609375, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 633470408, "step": 3688 }, { "epoch": 0.9703426053791018, "grad_norm": 4.551457844454551, "learning_rate": 5e-06, "loss": 0.0984, "num_input_tokens_seen": 633642716, "step": 3689 }, { "epoch": 0.9703426053791018, "loss": 0.0793912261724472, "loss_ce": 0.0006558679160661995, "loss_iou": 0.5234375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 633642716, "step": 3689 }, { "epoch": 0.9706056421384889, "grad_norm": 5.945079605333328, "learning_rate": 5e-06, "loss": 0.1123, "num_input_tokens_seen": 633812440, "step": 3690 }, { "epoch": 0.9706056421384889, "loss": 0.08010639995336533, "loss_ce": 0.0009438038687221706, "loss_iou": 0.498046875, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 633812440, "step": 3690 }, { "epoch": 0.970868678897876, "grad_norm": 4.065153864479657, "learning_rate": 5e-06, "loss": 0.1051, "num_input_tokens_seen": 633985000, "step": 3691 }, { "epoch": 0.970868678897876, "loss": 0.0722232460975647, "loss_ce": 0.0003238367207814008, "loss_iou": 0.4609375, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 633985000, "step": 3691 }, { "epoch": 0.9711317156572631, "grad_norm": 4.197621752026495, "learning_rate": 5e-06, "loss": 0.0911, "num_input_tokens_seen": 634157200, "step": 3692 }, { "epoch": 0.9711317156572631, "loss": 0.09805089235305786, "loss_ce": 0.003385363146662712, "loss_iou": 0.51953125, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 634157200, "step": 3692 }, { "epoch": 0.9713947524166502, "grad_norm": 4.118683496322007, "learning_rate": 5e-06, "loss": 0.1073, "num_input_tokens_seen": 634329136, "step": 3693 }, { "epoch": 0.9713947524166502, "loss": 0.06334918737411499, "loss_ce": 0.0014290215913206339, "loss_iou": 0.52734375, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 634329136, "step": 3693 }, { "epoch": 0.9716577891760374, "grad_norm": 5.569436401286201, "learning_rate": 5e-06, "loss": 0.0858, "num_input_tokens_seen": 634501660, "step": 3694 }, { "epoch": 0.9716577891760374, "loss": 0.056709855794906616, "loss_ce": 0.0002981142024509609, "loss_iou": 0.447265625, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 634501660, "step": 3694 }, { "epoch": 0.9719208259354245, "grad_norm": 15.633714461557659, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 634673676, "step": 3695 }, { "epoch": 0.9719208259354245, "loss": 0.16554518043994904, "loss_ce": 0.0010554337641224265, "loss_iou": 0.416015625, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 634673676, "step": 3695 }, { "epoch": 0.9721838626948116, "grad_norm": 26.512182592600844, "learning_rate": 5e-06, "loss": 0.1032, "num_input_tokens_seen": 634845964, "step": 3696 }, { "epoch": 0.9721838626948116, "loss": 0.08471856266260147, "loss_ce": 0.0012224669335409999, "loss_iou": 0.43359375, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 634845964, "step": 3696 }, { "epoch": 0.9724468994541987, "grad_norm": 6.8173523395664715, "learning_rate": 5e-06, "loss": 0.0977, "num_input_tokens_seen": 635018328, "step": 3697 }, { "epoch": 0.9724468994541987, "loss": 0.08134950697422028, "loss_ce": 0.0013324212050065398, "loss_iou": 0.5859375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 635018328, "step": 3697 }, { "epoch": 0.9727099362135858, "grad_norm": 4.383461318684907, "learning_rate": 5e-06, "loss": 0.106, "num_input_tokens_seen": 635190608, "step": 3698 }, { "epoch": 0.9727099362135858, "loss": 0.08322876691818237, "loss_ce": 0.0011364765232428908, "loss_iou": 0.6171875, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 635190608, "step": 3698 }, { "epoch": 0.972972972972973, "grad_norm": 8.267610666760818, "learning_rate": 5e-06, "loss": 0.1073, "num_input_tokens_seen": 635362588, "step": 3699 }, { "epoch": 0.972972972972973, "loss": 0.11783481389284134, "loss_ce": 0.0006015403778292239, "loss_iou": 0.486328125, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 635362588, "step": 3699 }, { "epoch": 0.9732360097323601, "grad_norm": 4.332996360892621, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 635534740, "step": 3700 }, { "epoch": 0.9732360097323601, "loss": 0.1074044331908226, "loss_ce": 0.00025721488054841757, "loss_iou": 0.51953125, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 635534740, "step": 3700 }, { "epoch": 0.9734990464917472, "grad_norm": 5.513199468514905, "learning_rate": 5e-06, "loss": 0.1466, "num_input_tokens_seen": 635707288, "step": 3701 }, { "epoch": 0.9734990464917472, "loss": 0.09230555593967438, "loss_ce": 0.0021413678769022226, "loss_iou": 0.56640625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 635707288, "step": 3701 }, { "epoch": 0.9737620832511343, "grad_norm": 3.768479254290149, "learning_rate": 5e-06, "loss": 0.0828, "num_input_tokens_seen": 635879648, "step": 3702 }, { "epoch": 0.9737620832511343, "loss": 0.03509838879108429, "loss_ce": 0.000766111770644784, "loss_iou": 0.484375, "loss_num": 0.006866455078125, "loss_xval": 0.034423828125, "num_input_tokens_seen": 635879648, "step": 3702 }, { "epoch": 0.9740251200105214, "grad_norm": 3.566831404475063, "learning_rate": 5e-06, "loss": 0.0964, "num_input_tokens_seen": 636051432, "step": 3703 }, { "epoch": 0.9740251200105214, "loss": 0.09813763201236725, "loss_ce": 0.001030704821459949, "loss_iou": 0.45703125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 636051432, "step": 3703 }, { "epoch": 0.9742881567699085, "grad_norm": 3.4711728772501447, "learning_rate": 5e-06, "loss": 0.0877, "num_input_tokens_seen": 636223588, "step": 3704 }, { "epoch": 0.9742881567699085, "loss": 0.09130216389894485, "loss_ce": 0.0020840244833379984, "loss_iou": 0.25390625, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 636223588, "step": 3704 }, { "epoch": 0.9745511935292958, "grad_norm": 10.080851555182159, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 636395540, "step": 3705 }, { "epoch": 0.9745511935292958, "loss": 0.07607042789459229, "loss_ce": 0.0012871015351265669, "loss_iou": 0.2353515625, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 636395540, "step": 3705 }, { "epoch": 0.9748142302886829, "grad_norm": 3.5401703294732814, "learning_rate": 5e-06, "loss": 0.0976, "num_input_tokens_seen": 636567872, "step": 3706 }, { "epoch": 0.9748142302886829, "loss": 0.1415339708328247, "loss_ce": 0.0009700124501250684, "loss_iou": 0.44140625, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 636567872, "step": 3706 }, { "epoch": 0.97507726704807, "grad_norm": 4.65430606928134, "learning_rate": 5e-06, "loss": 0.1013, "num_input_tokens_seen": 636739796, "step": 3707 }, { "epoch": 0.97507726704807, "loss": 0.06319437175989151, "loss_ce": 0.0008469584863632917, "loss_iou": 0.341796875, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 636739796, "step": 3707 }, { "epoch": 0.9753403038074571, "grad_norm": 12.08107208422745, "learning_rate": 5e-06, "loss": 0.0954, "num_input_tokens_seen": 636911900, "step": 3708 }, { "epoch": 0.9753403038074571, "loss": 0.09893044084310532, "loss_ce": 0.004631124436855316, "loss_iou": 0.48828125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 636911900, "step": 3708 }, { "epoch": 0.9756033405668442, "grad_norm": 4.1664851424909894, "learning_rate": 5e-06, "loss": 0.1173, "num_input_tokens_seen": 637083804, "step": 3709 }, { "epoch": 0.9756033405668442, "loss": 0.14125725626945496, "loss_ce": 0.0035619523841887712, "loss_iou": 0.408203125, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 637083804, "step": 3709 }, { "epoch": 0.9758663773262314, "grad_norm": 4.483302931910663, "learning_rate": 5e-06, "loss": 0.0909, "num_input_tokens_seen": 637253480, "step": 3710 }, { "epoch": 0.9758663773262314, "loss": 0.09771590679883957, "loss_ce": 0.0010667359456419945, "loss_iou": 0.498046875, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 637253480, "step": 3710 }, { "epoch": 0.9761294140856185, "grad_norm": 4.570273140954518, "learning_rate": 5e-06, "loss": 0.0962, "num_input_tokens_seen": 637425832, "step": 3711 }, { "epoch": 0.9761294140856185, "loss": 0.15927918255329132, "loss_ce": 0.0016558904899284244, "loss_iou": 0.3671875, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 637425832, "step": 3711 }, { "epoch": 0.9763924508450056, "grad_norm": 4.261644785229519, "learning_rate": 5e-06, "loss": 0.1004, "num_input_tokens_seen": 637597956, "step": 3712 }, { "epoch": 0.9763924508450056, "loss": 0.09720858931541443, "loss_ce": 0.005060770083218813, "loss_iou": 0.400390625, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 637597956, "step": 3712 }, { "epoch": 0.9766554876043927, "grad_norm": 4.316121186379811, "learning_rate": 5e-06, "loss": 0.1502, "num_input_tokens_seen": 637770516, "step": 3713 }, { "epoch": 0.9766554876043927, "loss": 0.23752275109291077, "loss_ce": 0.003071451559662819, "loss_iou": 0.33984375, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 637770516, "step": 3713 }, { "epoch": 0.9769185243637798, "grad_norm": 3.9858618973194195, "learning_rate": 5e-06, "loss": 0.0865, "num_input_tokens_seen": 637940932, "step": 3714 }, { "epoch": 0.9769185243637798, "loss": 0.09894842654466629, "loss_ce": 0.0026502024848014116, "loss_iou": 0.3984375, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 637940932, "step": 3714 }, { "epoch": 0.977181561123167, "grad_norm": 6.459702059191179, "learning_rate": 5e-06, "loss": 0.1155, "num_input_tokens_seen": 638112972, "step": 3715 }, { "epoch": 0.977181561123167, "loss": 0.05996452271938324, "loss_ce": 0.0013707715552300215, "loss_iou": 0.54296875, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 638112972, "step": 3715 }, { "epoch": 0.9774445978825541, "grad_norm": 4.702404331881163, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 638283296, "step": 3716 }, { "epoch": 0.9774445978825541, "loss": 0.09870034456253052, "loss_ce": 0.0003421921283006668, "loss_iou": 0.3671875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 638283296, "step": 3716 }, { "epoch": 0.9777076346419412, "grad_norm": 6.311133724359332, "learning_rate": 5e-06, "loss": 0.1405, "num_input_tokens_seen": 638455388, "step": 3717 }, { "epoch": 0.9777076346419412, "loss": 0.10849727690219879, "loss_ce": 0.001868859282694757, "loss_iou": 0.39453125, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 638455388, "step": 3717 }, { "epoch": 0.9779706714013283, "grad_norm": 4.841088605766017, "learning_rate": 5e-06, "loss": 0.0692, "num_input_tokens_seen": 638627620, "step": 3718 }, { "epoch": 0.9779706714013283, "loss": 0.0839821919798851, "loss_ce": 0.0003640282666310668, "loss_iou": 0.5625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 638627620, "step": 3718 }, { "epoch": 0.9782337081607154, "grad_norm": 4.2847622894404696, "learning_rate": 5e-06, "loss": 0.1385, "num_input_tokens_seen": 638799776, "step": 3719 }, { "epoch": 0.9782337081607154, "loss": 0.18468543887138367, "loss_ce": 0.0003745291323866695, "loss_iou": 0.4453125, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 638799776, "step": 3719 }, { "epoch": 0.9784967449201026, "grad_norm": 4.11039448761818, "learning_rate": 5e-06, "loss": 0.0832, "num_input_tokens_seen": 638971952, "step": 3720 }, { "epoch": 0.9784967449201026, "loss": 0.1171189695596695, "loss_ce": 0.002739093266427517, "loss_iou": 0.41796875, "loss_num": 0.02294921875, "loss_xval": 0.1142578125, "num_input_tokens_seen": 638971952, "step": 3720 }, { "epoch": 0.9787597816794897, "grad_norm": 6.78823750558351, "learning_rate": 5e-06, "loss": 0.1295, "num_input_tokens_seen": 639144380, "step": 3721 }, { "epoch": 0.9787597816794897, "loss": 0.10253561288118362, "loss_ce": 0.0012172528076916933, "loss_iou": 0.5078125, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 639144380, "step": 3721 }, { "epoch": 0.9790228184388768, "grad_norm": 3.963795445916244, "learning_rate": 5e-06, "loss": 0.0748, "num_input_tokens_seen": 639316848, "step": 3722 }, { "epoch": 0.9790228184388768, "loss": 0.13296961784362793, "loss_ce": 0.0031783583108335733, "loss_iou": 0.453125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 639316848, "step": 3722 }, { "epoch": 0.9792858551982639, "grad_norm": 11.532632559627872, "learning_rate": 5e-06, "loss": 0.1246, "num_input_tokens_seen": 639489024, "step": 3723 }, { "epoch": 0.9792858551982639, "loss": 0.12053656578063965, "loss_ce": 0.005637889727950096, "loss_iou": 0.3828125, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 639489024, "step": 3723 }, { "epoch": 0.979548891957651, "grad_norm": 9.989053455854867, "learning_rate": 5e-06, "loss": 0.0825, "num_input_tokens_seen": 639661440, "step": 3724 }, { "epoch": 0.979548891957651, "loss": 0.13625101745128632, "loss_ce": 0.00029520769021473825, "loss_iou": 0.5078125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 639661440, "step": 3724 }, { "epoch": 0.9798119287170383, "grad_norm": 4.9459595738196285, "learning_rate": 5e-06, "loss": 0.1161, "num_input_tokens_seen": 639833676, "step": 3725 }, { "epoch": 0.9798119287170383, "loss": 0.1375160813331604, "loss_ce": 0.0008125934982672334, "loss_iou": 0.392578125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 639833676, "step": 3725 }, { "epoch": 0.9800749654764254, "grad_norm": 4.566071751318605, "learning_rate": 5e-06, "loss": 0.1086, "num_input_tokens_seen": 640006092, "step": 3726 }, { "epoch": 0.9800749654764254, "loss": 0.15862302482128143, "loss_ce": 0.006462385877966881, "loss_iou": 0.44140625, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 640006092, "step": 3726 }, { "epoch": 0.9803380022358125, "grad_norm": 5.733708403269408, "learning_rate": 5e-06, "loss": 0.1203, "num_input_tokens_seen": 640178452, "step": 3727 }, { "epoch": 0.9803380022358125, "loss": 0.045382432639598846, "loss_ce": 0.00027745150146074593, "loss_iou": 0.515625, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 640178452, "step": 3727 }, { "epoch": 0.9806010389951996, "grad_norm": 4.6254627269264095, "learning_rate": 5e-06, "loss": 0.0833, "num_input_tokens_seen": 640350624, "step": 3728 }, { "epoch": 0.9806010389951996, "loss": 0.054897043853998184, "loss_ce": 0.0018880083225667477, "loss_iou": 0.63671875, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 640350624, "step": 3728 }, { "epoch": 0.9808640757545867, "grad_norm": 5.915011241669676, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 640522796, "step": 3729 }, { "epoch": 0.9808640757545867, "loss": 0.12351959943771362, "loss_ce": 0.002242741174995899, "loss_iou": 0.56640625, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 640522796, "step": 3729 }, { "epoch": 0.9811271125139738, "grad_norm": 13.40274422198988, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 640694968, "step": 3730 }, { "epoch": 0.9811271125139738, "loss": 0.08854502439498901, "loss_ce": 0.00027292766026221216, "loss_iou": 0.455078125, "loss_num": 0.017578125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 640694968, "step": 3730 }, { "epoch": 0.981390149273361, "grad_norm": 6.656264975637515, "learning_rate": 5e-06, "loss": 0.0847, "num_input_tokens_seen": 640867112, "step": 3731 }, { "epoch": 0.981390149273361, "loss": 0.12663085758686066, "loss_ce": 0.0017834422178566456, "loss_iou": 0.46484375, "loss_num": 0.02490234375, "loss_xval": 0.125, "num_input_tokens_seen": 640867112, "step": 3731 }, { "epoch": 0.9816531860327481, "grad_norm": 11.247340076351778, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 641039384, "step": 3732 }, { "epoch": 0.9816531860327481, "loss": 0.1410384476184845, "loss_ce": 0.0051131523214280605, "loss_iou": 0.5859375, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 641039384, "step": 3732 }, { "epoch": 0.9819162227921352, "grad_norm": 5.304544125644644, "learning_rate": 5e-06, "loss": 0.1061, "num_input_tokens_seen": 641211452, "step": 3733 }, { "epoch": 0.9819162227921352, "loss": 0.12673211097717285, "loss_ce": 0.004204033873975277, "loss_iou": 0.361328125, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 641211452, "step": 3733 }, { "epoch": 0.9821792595515223, "grad_norm": 4.504449466935876, "learning_rate": 5e-06, "loss": 0.0833, "num_input_tokens_seen": 641383184, "step": 3734 }, { "epoch": 0.9821792595515223, "loss": 0.09244327247142792, "loss_ce": 0.0025995145551860332, "loss_iou": 0.5546875, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 641383184, "step": 3734 }, { "epoch": 0.9824422963109094, "grad_norm": 4.706100049802098, "learning_rate": 5e-06, "loss": 0.0877, "num_input_tokens_seen": 641555380, "step": 3735 }, { "epoch": 0.9824422963109094, "loss": 0.08688107877969742, "loss_ce": 0.0007604720303788781, "loss_iou": 0.46484375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 641555380, "step": 3735 }, { "epoch": 0.9827053330702966, "grad_norm": 8.136156546275306, "learning_rate": 5e-06, "loss": 0.1102, "num_input_tokens_seen": 641727456, "step": 3736 }, { "epoch": 0.9827053330702966, "loss": 0.11328569054603577, "loss_ce": 0.0008589247590862215, "loss_iou": 0.404296875, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 641727456, "step": 3736 }, { "epoch": 0.9829683698296837, "grad_norm": 4.005733802836262, "learning_rate": 5e-06, "loss": 0.074, "num_input_tokens_seen": 641899684, "step": 3737 }, { "epoch": 0.9829683698296837, "loss": 0.09893114119768143, "loss_ce": 0.00038988247979432344, "loss_iou": 0.470703125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 641899684, "step": 3737 }, { "epoch": 0.9832314065890708, "grad_norm": 3.6593398060214795, "learning_rate": 5e-06, "loss": 0.0869, "num_input_tokens_seen": 642072000, "step": 3738 }, { "epoch": 0.9832314065890708, "loss": 0.0708111971616745, "loss_ce": 0.003092692233622074, "loss_iou": 0.609375, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 642072000, "step": 3738 }, { "epoch": 0.9834944433484579, "grad_norm": 112.80387932882391, "learning_rate": 5e-06, "loss": 0.075, "num_input_tokens_seen": 642243916, "step": 3739 }, { "epoch": 0.9834944433484579, "loss": 0.07938267290592194, "loss_ce": 0.001837508985772729, "loss_iou": 0.5078125, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 642243916, "step": 3739 }, { "epoch": 0.983757480107845, "grad_norm": 46.399369914383044, "learning_rate": 5e-06, "loss": 0.0893, "num_input_tokens_seen": 642415992, "step": 3740 }, { "epoch": 0.983757480107845, "loss": 0.12807899713516235, "loss_ce": 0.0024686530232429504, "loss_iou": 0.458984375, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 642415992, "step": 3740 }, { "epoch": 0.9840205168672322, "grad_norm": 4.972320382385805, "learning_rate": 5e-06, "loss": 0.0813, "num_input_tokens_seen": 642586472, "step": 3741 }, { "epoch": 0.9840205168672322, "loss": 0.10946183651685715, "loss_ce": 0.0011854701442644, "loss_iou": 0.412109375, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 642586472, "step": 3741 }, { "epoch": 0.9842835536266193, "grad_norm": 3.8095943920850632, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 642758552, "step": 3742 }, { "epoch": 0.9842835536266193, "loss": 0.1769047975540161, "loss_ce": 0.002832533325999975, "loss_iou": 0.35546875, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 642758552, "step": 3742 }, { "epoch": 0.9845465903860064, "grad_norm": 5.317599890556273, "learning_rate": 5e-06, "loss": 0.0837, "num_input_tokens_seen": 642930684, "step": 3743 }, { "epoch": 0.9845465903860064, "loss": 0.05296643078327179, "loss_ce": 0.0012238813797011971, "loss_iou": 0.5078125, "loss_num": 0.01031494140625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 642930684, "step": 3743 }, { "epoch": 0.9848096271453936, "grad_norm": 4.846803333953094, "learning_rate": 5e-06, "loss": 0.0912, "num_input_tokens_seen": 643102812, "step": 3744 }, { "epoch": 0.9848096271453936, "loss": 0.10566210746765137, "loss_ce": 0.0007274242816492915, "loss_iou": 0.5234375, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 643102812, "step": 3744 }, { "epoch": 0.9850726639047807, "grad_norm": 3.406688370945616, "learning_rate": 5e-06, "loss": 0.1164, "num_input_tokens_seen": 643275096, "step": 3745 }, { "epoch": 0.9850726639047807, "loss": 0.08728724718093872, "loss_ce": 0.0003884438192471862, "loss_iou": 0.41796875, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 643275096, "step": 3745 }, { "epoch": 0.9853357006641679, "grad_norm": 5.344442258228798, "learning_rate": 5e-06, "loss": 0.1098, "num_input_tokens_seen": 643444760, "step": 3746 }, { "epoch": 0.9853357006641679, "loss": 0.07199759781360626, "loss_ce": 0.002844766713678837, "loss_iou": 0.455078125, "loss_num": 0.0137939453125, "loss_xval": 0.0693359375, "num_input_tokens_seen": 643444760, "step": 3746 }, { "epoch": 0.985598737423555, "grad_norm": 9.063261890916973, "learning_rate": 5e-06, "loss": 0.1032, "num_input_tokens_seen": 643617080, "step": 3747 }, { "epoch": 0.985598737423555, "loss": 0.06594318896532059, "loss_ce": 0.0005440223030745983, "loss_iou": 0.39453125, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 643617080, "step": 3747 }, { "epoch": 0.9858617741829421, "grad_norm": 3.8879508865341124, "learning_rate": 5e-06, "loss": 0.0591, "num_input_tokens_seen": 643789448, "step": 3748 }, { "epoch": 0.9858617741829421, "loss": 0.056218698620796204, "loss_ce": 0.001195505610667169, "loss_iou": 0.65625, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 643789448, "step": 3748 }, { "epoch": 0.9861248109423292, "grad_norm": 193.16068563077133, "learning_rate": 5e-06, "loss": 0.1088, "num_input_tokens_seen": 643958208, "step": 3749 }, { "epoch": 0.9861248109423292, "loss": 0.057384688407182693, "loss_ce": 0.003338058013468981, "loss_iou": 0.3984375, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 643958208, "step": 3749 }, { "epoch": 0.9863878477017163, "grad_norm": 11.212759382877062, "learning_rate": 5e-06, "loss": 0.0866, "num_input_tokens_seen": 644130472, "step": 3750 }, { "epoch": 0.9863878477017163, "eval_websight_new_CIoU": 0.8978322446346283, "eval_websight_new_GIoU": 0.8991535305976868, "eval_websight_new_IoU": 0.9012524485588074, "eval_websight_new_MAE_all": 0.013891254551708698, "eval_websight_new_MAE_h": 0.008930663112550974, "eval_websight_new_MAE_w": 0.020785433240234852, "eval_websight_new_MAE_x": 0.0205678790807724, "eval_websight_new_MAE_y": 0.005281045567244291, "eval_websight_new_NUM_probability": 0.9999656677246094, "eval_websight_new_inside_bbox": 0.984375, "eval_websight_new_loss": 0.06887268275022507, "eval_websight_new_loss_ce": 5.8660152717493474e-06, "eval_websight_new_loss_iou": 0.30145263671875, "eval_websight_new_loss_num": 0.012205123901367188, "eval_websight_new_loss_xval": 0.06097412109375, "eval_websight_new_runtime": 65.4719, "eval_websight_new_samples_per_second": 0.764, "eval_websight_new_steps_per_second": 0.031, "num_input_tokens_seen": 644130472, "step": 3750 }, { "epoch": 0.9863878477017163, "eval_seeclick_CIoU": 0.657441109418869, "eval_seeclick_GIoU": 0.6616063714027405, "eval_seeclick_IoU": 0.6793454885482788, "eval_seeclick_MAE_all": 0.04169847071170807, "eval_seeclick_MAE_h": 0.024878486059606075, "eval_seeclick_MAE_w": 0.05510186776518822, "eval_seeclick_MAE_x": 0.06434983387589455, "eval_seeclick_MAE_y": 0.02246370818465948, "eval_seeclick_NUM_probability": 0.9999178946018219, "eval_seeclick_inside_bbox": 0.953125, "eval_seeclick_loss": 0.169124573469162, "eval_seeclick_loss_ce": 0.009124522097408772, "eval_seeclick_loss_iou": 0.455078125, "eval_seeclick_loss_num": 0.030914306640625, "eval_seeclick_loss_xval": 0.15460205078125, "eval_seeclick_runtime": 75.5663, "eval_seeclick_samples_per_second": 0.569, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 644130472, "step": 3750 }, { "epoch": 0.9863878477017163, "eval_icons_CIoU": 0.8627608418464661, "eval_icons_GIoU": 0.863149881362915, "eval_icons_IoU": 0.8711875081062317, "eval_icons_MAE_all": 0.022217202931642532, "eval_icons_MAE_h": 0.01895691081881523, "eval_icons_MAE_w": 0.0274124164134264, "eval_icons_MAE_x": 0.0250897784717381, "eval_icons_MAE_y": 0.017409704625606537, "eval_icons_NUM_probability": 0.9999702572822571, "eval_icons_inside_bbox": 0.9565972089767456, "eval_icons_loss": 0.07059822231531143, "eval_icons_loss_ce": 1.0734858733485453e-05, "eval_icons_loss_iou": 0.47100830078125, "eval_icons_loss_num": 0.012834548950195312, "eval_icons_loss_xval": 0.064178466796875, "eval_icons_runtime": 81.7327, "eval_icons_samples_per_second": 0.612, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 644130472, "step": 3750 }, { "epoch": 0.9863878477017163, "eval_screenspot_CIoU": 0.6053812901178995, "eval_screenspot_GIoU": 0.5979885856310526, "eval_screenspot_IoU": 0.637059231599172, "eval_screenspot_MAE_all": 0.07145863150556882, "eval_screenspot_MAE_h": 0.04250209157665571, "eval_screenspot_MAE_w": 0.12718970080216727, "eval_screenspot_MAE_x": 0.07470711196462314, "eval_screenspot_MAE_y": 0.04143562292059263, "eval_screenspot_NUM_probability": 0.9999008377393087, "eval_screenspot_inside_bbox": 0.8841666579246521, "eval_screenspot_loss": 0.9072001576423645, "eval_screenspot_loss_ce": 0.6087295611699423, "eval_screenspot_loss_iou": 0.4524739583333333, "eval_screenspot_loss_num": 0.058499654134114586, "eval_screenspot_loss_xval": 0.29248046875, "eval_screenspot_runtime": 146.6335, "eval_screenspot_samples_per_second": 0.607, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 644130472, "step": 3750 } ], "logging_steps": 1.0, "max_steps": 11403, "num_input_tokens_seen": 644130472, "num_train_epochs": 3, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4647228784377856.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }