diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,68893 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9863878477017163, + "eval_steps": 250, + "global_step": 3750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00026303675938712433, + "grad_norm": 174.60673820638053, + "learning_rate": 5e-06, + "loss": 6.7232, + "num_input_tokens_seen": 172192, + "step": 1 + }, + { + "epoch": 0.00026303675938712433, + "loss": 6.8635101318359375, + "loss_ce": 5.2287445068359375, + "loss_iou": 0.81640625, + "loss_num": 0.328125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 172192, + "step": 1 + }, + { + "epoch": 0.0005260735187742487, + "grad_norm": 113.61145210636417, + "learning_rate": 5e-06, + "loss": 5.2198, + "num_input_tokens_seen": 344520, + "step": 2 + }, + { + "epoch": 0.0005260735187742487, + "loss": 5.116145133972168, + "loss_ce": 3.611750364303589, + "loss_iou": 0.0, + "loss_num": 0.30078125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 344520, + "step": 2 + }, + { + "epoch": 0.000789110278161373, + "grad_norm": 181.6487958512906, + "learning_rate": 5e-06, + "loss": 3.6895, + "num_input_tokens_seen": 516868, + "step": 3 + }, + { + "epoch": 0.000789110278161373, + "loss": 3.7434756755828857, + "loss_ce": 2.4915225505828857, + "loss_iou": 0.0, + "loss_num": 0.25, + "loss_xval": 1.25, + "num_input_tokens_seen": 516868, + "step": 3 + }, + { + "epoch": 0.0010521470375484973, + "grad_norm": 136.74708656913646, + "learning_rate": 5e-06, + "loss": 3.5593, + "num_input_tokens_seen": 687240, + "step": 4 + }, + { + "epoch": 0.0010521470375484973, + "loss": 3.5931553840637207, + "loss_ce": 1.3441319465637207, + "loss_iou": 0.80859375, + "loss_num": 0.44921875, + "loss_xval": 2.25, + "num_input_tokens_seen": 687240, + "step": 4 + }, + { + "epoch": 0.0013151837969356218, + "grad_norm": 79.92211667172693, + "learning_rate": 5e-06, + "loss": 2.8187, + "num_input_tokens_seen": 859388, + "step": 5 + }, + { + "epoch": 0.0013151837969356218, + "loss": 2.8585305213928223, + "loss_ce": 1.6251322031021118, + "loss_iou": 0.2216796875, + "loss_num": 0.24609375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 859388, + "step": 5 + }, + { + "epoch": 0.001578220556322746, + "grad_norm": 51.97557929429645, + "learning_rate": 5e-06, + "loss": 2.1961, + "num_input_tokens_seen": 1029916, + "step": 6 + }, + { + "epoch": 0.001578220556322746, + "loss": 2.261239528656006, + "loss_ce": 1.1806731224060059, + "loss_iou": 0.0, + "loss_num": 0.2158203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 1029916, + "step": 6 + }, + { + "epoch": 0.0018412573157098704, + "grad_norm": 60.93429618456285, + "learning_rate": 5e-06, + "loss": 2.066, + "num_input_tokens_seen": 1202056, + "step": 7 + }, + { + "epoch": 0.0018412573157098704, + "loss": 1.9779821634292603, + "loss_ce": 1.0946813821792603, + "loss_iou": 0.0966796875, + "loss_num": 0.1767578125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 1202056, + "step": 7 + }, + { + "epoch": 0.0021042940750969946, + "grad_norm": 42.670837197481326, + "learning_rate": 5e-06, + "loss": 2.0543, + "num_input_tokens_seen": 1373904, + "step": 8 + }, + { + "epoch": 0.0021042940750969946, + "loss": 2.022350311279297, + "loss_ce": 0.9168814420700073, + "loss_iou": 0.0, + "loss_num": 0.220703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 1373904, + "step": 8 + }, + { + "epoch": 0.0023673308344841193, + "grad_norm": 48.53251052050697, + "learning_rate": 5e-06, + "loss": 2.4818, + "num_input_tokens_seen": 1546124, + "step": 9 + }, + { + "epoch": 0.0023673308344841193, + "loss": 2.4849071502685547, + "loss_ce": 0.8374460935592651, + "loss_iou": 0.01202392578125, + "loss_num": 0.330078125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 1546124, + "step": 9 + }, + { + "epoch": 0.0026303675938712436, + "grad_norm": 30.87810784426679, + "learning_rate": 5e-06, + "loss": 1.8016, + "num_input_tokens_seen": 1715836, + "step": 10 + }, + { + "epoch": 0.0026303675938712436, + "loss": 1.7729861736297607, + "loss_ce": 0.8613650798797607, + "loss_iou": 0.09326171875, + "loss_num": 0.1826171875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 1715836, + "step": 10 + }, + { + "epoch": 0.002893404353258368, + "grad_norm": 25.204935627992594, + "learning_rate": 5e-06, + "loss": 1.6242, + "num_input_tokens_seen": 1887800, + "step": 11 + }, + { + "epoch": 0.002893404353258368, + "loss": 1.8032605648040771, + "loss_ce": 0.7612683176994324, + "loss_iou": 0.1513671875, + "loss_num": 0.2080078125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 1887800, + "step": 11 + }, + { + "epoch": 0.003156441112645492, + "grad_norm": 27.719293877780373, + "learning_rate": 5e-06, + "loss": 1.5953, + "num_input_tokens_seen": 2058072, + "step": 12 + }, + { + "epoch": 0.003156441112645492, + "loss": 1.530227541923523, + "loss_ce": 0.660110354423523, + "loss_iou": 0.017822265625, + "loss_num": 0.173828125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 2058072, + "step": 12 + }, + { + "epoch": 0.0034194778720326164, + "grad_norm": 77.78462332066468, + "learning_rate": 5e-06, + "loss": 1.9432, + "num_input_tokens_seen": 2230244, + "step": 13 + }, + { + "epoch": 0.0034194778720326164, + "loss": 2.0266342163085938, + "loss_ce": 0.7092512845993042, + "loss_iou": 0.193359375, + "loss_num": 0.263671875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 2230244, + "step": 13 + }, + { + "epoch": 0.0036825146314197407, + "grad_norm": 51.375242508900634, + "learning_rate": 5e-06, + "loss": 1.9389, + "num_input_tokens_seen": 2402352, + "step": 14 + }, + { + "epoch": 0.0036825146314197407, + "loss": 1.8962193727493286, + "loss_ce": 0.9186803102493286, + "loss_iou": 0.037841796875, + "loss_num": 0.1953125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 2402352, + "step": 14 + }, + { + "epoch": 0.0039455513908068654, + "grad_norm": 29.266495242442826, + "learning_rate": 5e-06, + "loss": 1.6229, + "num_input_tokens_seen": 2574284, + "step": 15 + }, + { + "epoch": 0.0039455513908068654, + "loss": 1.7341835498809814, + "loss_ce": 0.7971718311309814, + "loss_iou": 0.052734375, + "loss_num": 0.1875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 2574284, + "step": 15 + }, + { + "epoch": 0.004208588150193989, + "grad_norm": 22.43237476170095, + "learning_rate": 5e-06, + "loss": 1.4957, + "num_input_tokens_seen": 2746500, + "step": 16 + }, + { + "epoch": 0.004208588150193989, + "loss": 1.5246176719665527, + "loss_ce": 0.6452231407165527, + "loss_iou": 0.2041015625, + "loss_num": 0.17578125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 2746500, + "step": 16 + }, + { + "epoch": 0.004471624909581114, + "grad_norm": 20.976636929732265, + "learning_rate": 5e-06, + "loss": 1.3572, + "num_input_tokens_seen": 2918736, + "step": 17 + }, + { + "epoch": 0.004471624909581114, + "loss": 1.3845367431640625, + "loss_ce": 0.6897125244140625, + "loss_iou": 0.0140380859375, + "loss_num": 0.138671875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 2918736, + "step": 17 + }, + { + "epoch": 0.004734661668968239, + "grad_norm": 23.059953533674786, + "learning_rate": 5e-06, + "loss": 1.3283, + "num_input_tokens_seen": 3091200, + "step": 18 + }, + { + "epoch": 0.004734661668968239, + "loss": 1.3037350177764893, + "loss_ce": 0.5959713459014893, + "loss_iou": 0.052490234375, + "loss_num": 0.1416015625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 3091200, + "step": 18 + }, + { + "epoch": 0.0049976984283553625, + "grad_norm": 28.375886109478266, + "learning_rate": 5e-06, + "loss": 1.4794, + "num_input_tokens_seen": 3263136, + "step": 19 + }, + { + "epoch": 0.0049976984283553625, + "loss": 1.406355381011963, + "loss_ce": 0.6358475685119629, + "loss_iou": 0.03173828125, + "loss_num": 0.154296875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 3263136, + "step": 19 + }, + { + "epoch": 0.005260735187742487, + "grad_norm": 25.383372657888618, + "learning_rate": 5e-06, + "loss": 1.4853, + "num_input_tokens_seen": 3435304, + "step": 20 + }, + { + "epoch": 0.005260735187742487, + "loss": 1.4931797981262207, + "loss_ce": 0.6591953635215759, + "loss_iou": 0.1748046875, + "loss_num": 0.1669921875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 3435304, + "step": 20 + }, + { + "epoch": 0.005523771947129611, + "grad_norm": 20.893788220271592, + "learning_rate": 5e-06, + "loss": 1.2467, + "num_input_tokens_seen": 3604172, + "step": 21 + }, + { + "epoch": 0.005523771947129611, + "loss": 1.2556164264678955, + "loss_ce": 0.5534679889678955, + "loss_iou": NaN, + "loss_num": 0.140625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 3604172, + "step": 21 + }, + { + "epoch": 0.005786808706516736, + "grad_norm": 16.968829307639602, + "learning_rate": 5e-06, + "loss": 1.331, + "num_input_tokens_seen": 3776444, + "step": 22 + }, + { + "epoch": 0.005786808706516736, + "loss": 1.4187769889831543, + "loss_ce": 0.5535425543785095, + "loss_iou": 0.076171875, + "loss_num": 0.1728515625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 3776444, + "step": 22 + }, + { + "epoch": 0.00604984546590386, + "grad_norm": 17.03009551099335, + "learning_rate": 5e-06, + "loss": 1.2354, + "num_input_tokens_seen": 3948760, + "step": 23 + }, + { + "epoch": 0.00604984546590386, + "loss": 1.2475543022155762, + "loss_ce": 0.5319781303405762, + "loss_iou": 0.07275390625, + "loss_num": 0.142578125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 3948760, + "step": 23 + }, + { + "epoch": 0.006312882225290984, + "grad_norm": 17.413119153715428, + "learning_rate": 5e-06, + "loss": 1.2253, + "num_input_tokens_seen": 4120896, + "step": 24 + }, + { + "epoch": 0.006312882225290984, + "loss": 1.2492257356643677, + "loss_ce": 0.6032296419143677, + "loss_iou": 0.0400390625, + "loss_num": 0.12890625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 4120896, + "step": 24 + }, + { + "epoch": 0.006575918984678109, + "grad_norm": 15.646317268104498, + "learning_rate": 5e-06, + "loss": 1.1599, + "num_input_tokens_seen": 4293068, + "step": 25 + }, + { + "epoch": 0.006575918984678109, + "loss": 1.221449613571167, + "loss_ce": 0.590346097946167, + "loss_iou": 0.26171875, + "loss_num": 0.1259765625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 4293068, + "step": 25 + }, + { + "epoch": 0.006838955744065233, + "grad_norm": 16.80260761287207, + "learning_rate": 5e-06, + "loss": 1.1163, + "num_input_tokens_seen": 4465316, + "step": 26 + }, + { + "epoch": 0.006838955744065233, + "loss": 1.0530353784561157, + "loss_ce": 0.5227619409561157, + "loss_iou": 0.08447265625, + "loss_num": 0.10595703125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 4465316, + "step": 26 + }, + { + "epoch": 0.007101992503452358, + "grad_norm": 15.596544427195678, + "learning_rate": 5e-06, + "loss": 1.0849, + "num_input_tokens_seen": 4636968, + "step": 27 + }, + { + "epoch": 0.007101992503452358, + "loss": 1.1465504169464111, + "loss_ce": 0.5161793231964111, + "loss_iou": 0.173828125, + "loss_num": 0.1259765625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 4636968, + "step": 27 + }, + { + "epoch": 0.0073650292628394814, + "grad_norm": 17.97152237441673, + "learning_rate": 5e-06, + "loss": 1.1255, + "num_input_tokens_seen": 4808916, + "step": 28 + }, + { + "epoch": 0.0073650292628394814, + "loss": 1.0834856033325195, + "loss_ce": 0.5703020095825195, + "loss_iou": 0.1171875, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 4808916, + "step": 28 + }, + { + "epoch": 0.007628066022226606, + "grad_norm": 26.650637151013097, + "learning_rate": 5e-06, + "loss": 1.0819, + "num_input_tokens_seen": 4979072, + "step": 29 + }, + { + "epoch": 0.007628066022226606, + "loss": 1.083469271659851, + "loss_ce": 0.49557873606681824, + "loss_iou": 0.2470703125, + "loss_num": 0.11767578125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 4979072, + "step": 29 + }, + { + "epoch": 0.007891102781613731, + "grad_norm": 47.98868961106415, + "learning_rate": 5e-06, + "loss": 1.5465, + "num_input_tokens_seen": 5150996, + "step": 30 + }, + { + "epoch": 0.007891102781613731, + "loss": 1.4472854137420654, + "loss_ce": 0.4790237545967102, + "loss_iou": 0.2001953125, + "loss_num": 0.193359375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 5150996, + "step": 30 + }, + { + "epoch": 0.008154139541000855, + "grad_norm": 36.03247925799523, + "learning_rate": 5e-06, + "loss": 1.479, + "num_input_tokens_seen": 5321588, + "step": 31 + }, + { + "epoch": 0.008154139541000855, + "loss": 1.3790192604064941, + "loss_ce": 0.5357575416564941, + "loss_iou": 0.0703125, + "loss_num": 0.1689453125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 5321588, + "step": 31 + }, + { + "epoch": 0.008417176300387979, + "grad_norm": 18.14805166699206, + "learning_rate": 5e-06, + "loss": 1.1322, + "num_input_tokens_seen": 5493708, + "step": 32 + }, + { + "epoch": 0.008417176300387979, + "loss": 1.069289207458496, + "loss_ce": 0.5192403793334961, + "loss_iou": 0.0654296875, + "loss_num": 0.10986328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 5493708, + "step": 32 + }, + { + "epoch": 0.008680213059775104, + "grad_norm": 17.23835135853453, + "learning_rate": 5e-06, + "loss": 1.0735, + "num_input_tokens_seen": 5665524, + "step": 33 + }, + { + "epoch": 0.008680213059775104, + "loss": 1.063108205795288, + "loss_ce": 0.4869362711906433, + "loss_iou": 0.0556640625, + "loss_num": 0.115234375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 5665524, + "step": 33 + }, + { + "epoch": 0.008943249819162228, + "grad_norm": 15.02530944865542, + "learning_rate": 5e-06, + "loss": 1.0261, + "num_input_tokens_seen": 5837680, + "step": 34 + }, + { + "epoch": 0.008943249819162228, + "loss": 1.00763738155365, + "loss_ce": 0.4573444128036499, + "loss_iou": 0.16796875, + "loss_num": 0.1103515625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 5837680, + "step": 34 + }, + { + "epoch": 0.009206286578549352, + "grad_norm": 16.61205610634839, + "learning_rate": 5e-06, + "loss": 1.0035, + "num_input_tokens_seen": 6009848, + "step": 35 + }, + { + "epoch": 0.009206286578549352, + "loss": 0.9831359386444092, + "loss_ce": 0.46995237469673157, + "loss_iou": 0.123046875, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 6009848, + "step": 35 + }, + { + "epoch": 0.009469323337936477, + "grad_norm": 15.512257057890888, + "learning_rate": 5e-06, + "loss": 1.0223, + "num_input_tokens_seen": 6182164, + "step": 36 + }, + { + "epoch": 0.009469323337936477, + "loss": 1.0216686725616455, + "loss_ce": 0.5036022663116455, + "loss_iou": 0.2021484375, + "loss_num": 0.103515625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 6182164, + "step": 36 + }, + { + "epoch": 0.009732360097323601, + "grad_norm": 19.305295502162483, + "learning_rate": 5e-06, + "loss": 1.0144, + "num_input_tokens_seen": 6352672, + "step": 37 + }, + { + "epoch": 0.009732360097323601, + "loss": 1.0505774021148682, + "loss_ce": 0.4739171862602234, + "loss_iou": 0.12451171875, + "loss_num": 0.115234375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 6352672, + "step": 37 + }, + { + "epoch": 0.009995396856710725, + "grad_norm": 20.69114777569909, + "learning_rate": 5e-06, + "loss": 1.0293, + "num_input_tokens_seen": 6525264, + "step": 38 + }, + { + "epoch": 0.009995396856710725, + "loss": 1.069566249847412, + "loss_ce": 0.4638533592224121, + "loss_iou": 0.02099609375, + "loss_num": 0.12109375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 6525264, + "step": 38 + }, + { + "epoch": 0.010258433616097849, + "grad_norm": 32.793827182045035, + "learning_rate": 5e-06, + "loss": 1.0754, + "num_input_tokens_seen": 6697524, + "step": 39 + }, + { + "epoch": 0.010258433616097849, + "loss": 1.0720198154449463, + "loss_ce": 0.5224591493606567, + "loss_iou": 0.099609375, + "loss_num": 0.10986328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 6697524, + "step": 39 + }, + { + "epoch": 0.010521470375484974, + "grad_norm": 22.436128254245773, + "learning_rate": 5e-06, + "loss": 1.125, + "num_input_tokens_seen": 6869752, + "step": 40 + }, + { + "epoch": 0.010521470375484974, + "loss": 1.1199672222137451, + "loss_ce": 0.5101040601730347, + "loss_iou": NaN, + "loss_num": 0.1220703125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 6869752, + "step": 40 + }, + { + "epoch": 0.010784507134872098, + "grad_norm": 17.66019658365854, + "learning_rate": 5e-06, + "loss": 0.9975, + "num_input_tokens_seen": 7041884, + "step": 41 + }, + { + "epoch": 0.010784507134872098, + "loss": 0.9673187732696533, + "loss_ce": 0.37991636991500854, + "loss_iou": 0.166015625, + "loss_num": 0.11767578125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 7041884, + "step": 41 + }, + { + "epoch": 0.011047543894259222, + "grad_norm": 17.977306189337394, + "learning_rate": 5e-06, + "loss": 0.9918, + "num_input_tokens_seen": 7214400, + "step": 42 + }, + { + "epoch": 0.011047543894259222, + "loss": 0.9905650615692139, + "loss_ce": 0.42928582429885864, + "loss_iou": 0.236328125, + "loss_num": 0.1123046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 7214400, + "step": 42 + }, + { + "epoch": 0.011310580653646348, + "grad_norm": 16.758234031981292, + "learning_rate": 5e-06, + "loss": 1.0186, + "num_input_tokens_seen": 7386432, + "step": 43 + }, + { + "epoch": 0.011310580653646348, + "loss": 1.0381113290786743, + "loss_ce": 0.47536715865135193, + "loss_iou": 0.1748046875, + "loss_num": 0.1123046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 7386432, + "step": 43 + }, + { + "epoch": 0.011573617413033472, + "grad_norm": 19.20192611310373, + "learning_rate": 5e-06, + "loss": 0.9474, + "num_input_tokens_seen": 7558604, + "step": 44 + }, + { + "epoch": 0.011573617413033472, + "loss": 0.8546841144561768, + "loss_ce": 0.45356106758117676, + "loss_iou": 0.0634765625, + "loss_num": 0.080078125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 7558604, + "step": 44 + }, + { + "epoch": 0.011836654172420595, + "grad_norm": 24.07755280606732, + "learning_rate": 5e-06, + "loss": 1.0256, + "num_input_tokens_seen": 7730896, + "step": 45 + }, + { + "epoch": 0.011836654172420595, + "loss": 0.960330605506897, + "loss_ce": 0.48523297905921936, + "loss_iou": 0.259765625, + "loss_num": 0.09521484375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 7730896, + "step": 45 + }, + { + "epoch": 0.01209969093180772, + "grad_norm": 21.918622706923347, + "learning_rate": 5e-06, + "loss": 0.9858, + "num_input_tokens_seen": 7903036, + "step": 46 + }, + { + "epoch": 0.01209969093180772, + "loss": 1.0328285694122314, + "loss_ce": 0.41710585355758667, + "loss_iou": 0.119140625, + "loss_num": 0.123046875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 7903036, + "step": 46 + }, + { + "epoch": 0.012362727691194845, + "grad_norm": 17.78591800932899, + "learning_rate": 5e-06, + "loss": 0.9217, + "num_input_tokens_seen": 8075568, + "step": 47 + }, + { + "epoch": 0.012362727691194845, + "loss": 0.9050750136375427, + "loss_ce": 0.4551238417625427, + "loss_iou": 0.21875, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 8075568, + "step": 47 + }, + { + "epoch": 0.012625764450581969, + "grad_norm": 18.144694112865135, + "learning_rate": 5e-06, + "loss": 0.8763, + "num_input_tokens_seen": 8247888, + "step": 48 + }, + { + "epoch": 0.012625764450581969, + "loss": 0.8872632384300232, + "loss_ce": 0.3972730040550232, + "loss_iou": 0.333984375, + "loss_num": 0.09765625, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 8247888, + "step": 48 + }, + { + "epoch": 0.012888801209969093, + "grad_norm": 18.397929867872243, + "learning_rate": 5e-06, + "loss": 0.8479, + "num_input_tokens_seen": 8419884, + "step": 49 + }, + { + "epoch": 0.012888801209969093, + "loss": 0.8515866994857788, + "loss_ce": 0.3969968557357788, + "loss_iou": 0.109375, + "loss_num": 0.0908203125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 8419884, + "step": 49 + }, + { + "epoch": 0.013151837969356218, + "grad_norm": 16.926260406149144, + "learning_rate": 5e-06, + "loss": 0.8125, + "num_input_tokens_seen": 8592264, + "step": 50 + }, + { + "epoch": 0.013151837969356218, + "loss": 0.8142160177230835, + "loss_ce": 0.3877023756504059, + "loss_iou": 0.1123046875, + "loss_num": 0.08544921875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 8592264, + "step": 50 + }, + { + "epoch": 0.013414874728743342, + "grad_norm": 24.314274074580883, + "learning_rate": 5e-06, + "loss": 0.8943, + "num_input_tokens_seen": 8764528, + "step": 51 + }, + { + "epoch": 0.013414874728743342, + "loss": 0.8517386317253113, + "loss_ce": 0.43084025382995605, + "loss_iou": 0.1923828125, + "loss_num": 0.083984375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 8764528, + "step": 51 + }, + { + "epoch": 0.013677911488130466, + "grad_norm": 26.15955176323275, + "learning_rate": 5e-06, + "loss": 0.9129, + "num_input_tokens_seen": 8936892, + "step": 52 + }, + { + "epoch": 0.013677911488130466, + "loss": 0.9079768657684326, + "loss_ce": 0.3640315532684326, + "loss_iou": 0.0654296875, + "loss_num": 0.10888671875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 8936892, + "step": 52 + }, + { + "epoch": 0.013940948247517591, + "grad_norm": 29.358592778676385, + "learning_rate": 5e-06, + "loss": 0.9409, + "num_input_tokens_seen": 9108972, + "step": 53 + }, + { + "epoch": 0.013940948247517591, + "loss": 0.8754456043243408, + "loss_ce": 0.4201233685016632, + "loss_iou": 0.1279296875, + "loss_num": 0.0908203125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 9108972, + "step": 53 + }, + { + "epoch": 0.014203985006904715, + "grad_norm": 20.238375239505068, + "learning_rate": 5e-06, + "loss": 0.8928, + "num_input_tokens_seen": 9281248, + "step": 54 + }, + { + "epoch": 0.014203985006904715, + "loss": 0.8624146580696106, + "loss_ce": 0.4217408299446106, + "loss_iou": 0.154296875, + "loss_num": 0.087890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 9281248, + "step": 54 + }, + { + "epoch": 0.014467021766291839, + "grad_norm": 21.380412065088443, + "learning_rate": 5e-06, + "loss": 0.8996, + "num_input_tokens_seen": 9453248, + "step": 55 + }, + { + "epoch": 0.014467021766291839, + "loss": 0.8669720888137817, + "loss_ce": 0.3911420404911041, + "loss_iou": 0.095703125, + "loss_num": 0.09521484375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 9453248, + "step": 55 + }, + { + "epoch": 0.014730058525678963, + "grad_norm": 21.398016209346718, + "learning_rate": 5e-06, + "loss": 0.8695, + "num_input_tokens_seen": 9625308, + "step": 56 + }, + { + "epoch": 0.014730058525678963, + "loss": 0.7916536331176758, + "loss_ce": 0.3697786033153534, + "loss_iou": 0.265625, + "loss_num": 0.08447265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 9625308, + "step": 56 + }, + { + "epoch": 0.014993095285066088, + "grad_norm": 22.596856129715338, + "learning_rate": 5e-06, + "loss": 0.8385, + "num_input_tokens_seen": 9797472, + "step": 57 + }, + { + "epoch": 0.014993095285066088, + "loss": 0.8159988522529602, + "loss_ce": 0.3955886960029602, + "loss_iou": 0.173828125, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 9797472, + "step": 57 + }, + { + "epoch": 0.015256132044453212, + "grad_norm": 19.06235083584469, + "learning_rate": 5e-06, + "loss": 0.791, + "num_input_tokens_seen": 9969708, + "step": 58 + }, + { + "epoch": 0.015256132044453212, + "loss": 0.8669772148132324, + "loss_ce": 0.3650240898132324, + "loss_iou": 0.1318359375, + "loss_num": 0.1005859375, + "loss_xval": 0.5, + "num_input_tokens_seen": 9969708, + "step": 58 + }, + { + "epoch": 0.015519168803840336, + "grad_norm": 19.2496987382769, + "learning_rate": 5e-06, + "loss": 0.8251, + "num_input_tokens_seen": 10141772, + "step": 59 + }, + { + "epoch": 0.015519168803840336, + "loss": 0.8268835544586182, + "loss_ce": 0.39744019508361816, + "loss_iou": 0.3046875, + "loss_num": 0.0859375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 10141772, + "step": 59 + }, + { + "epoch": 0.015782205563227462, + "grad_norm": 18.91781603668572, + "learning_rate": 5e-06, + "loss": 0.8407, + "num_input_tokens_seen": 10312188, + "step": 60 + }, + { + "epoch": 0.015782205563227462, + "loss": 0.7802078723907471, + "loss_ce": 0.35906529426574707, + "loss_iou": 0.318359375, + "loss_num": 0.083984375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 10312188, + "step": 60 + }, + { + "epoch": 0.016045242322614586, + "grad_norm": 17.697618089112353, + "learning_rate": 5e-06, + "loss": 0.7755, + "num_input_tokens_seen": 10484428, + "step": 61 + }, + { + "epoch": 0.016045242322614586, + "loss": 0.8741220235824585, + "loss_ce": 0.3609383702278137, + "loss_iou": 0.1376953125, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 10484428, + "step": 61 + }, + { + "epoch": 0.01630827908200171, + "grad_norm": 19.734011329204773, + "learning_rate": 5e-06, + "loss": 0.769, + "num_input_tokens_seen": 10656840, + "step": 62 + }, + { + "epoch": 0.01630827908200171, + "loss": 0.7350368499755859, + "loss_ce": 0.36150169372558594, + "loss_iou": 0.099609375, + "loss_num": 0.07470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 10656840, + "step": 62 + }, + { + "epoch": 0.016571315841388833, + "grad_norm": 24.730627091015997, + "learning_rate": 5e-06, + "loss": 0.7931, + "num_input_tokens_seen": 10828884, + "step": 63 + }, + { + "epoch": 0.016571315841388833, + "loss": 0.7593971490859985, + "loss_ce": 0.36462172865867615, + "loss_iou": NaN, + "loss_num": 0.0791015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 10828884, + "step": 63 + }, + { + "epoch": 0.016834352600775957, + "grad_norm": 18.667625860875532, + "learning_rate": 5e-06, + "loss": 0.8089, + "num_input_tokens_seen": 11001164, + "step": 64 + }, + { + "epoch": 0.016834352600775957, + "loss": 0.8844671845436096, + "loss_ce": 0.3607855439186096, + "loss_iou": 0.376953125, + "loss_num": 0.10498046875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 11001164, + "step": 64 + }, + { + "epoch": 0.017097389360163084, + "grad_norm": 23.265809096889907, + "learning_rate": 5e-06, + "loss": 0.7793, + "num_input_tokens_seen": 11173188, + "step": 65 + }, + { + "epoch": 0.017097389360163084, + "loss": 0.7827451229095459, + "loss_ce": 0.3672178089618683, + "loss_iou": 0.255859375, + "loss_num": 0.0830078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 11173188, + "step": 65 + }, + { + "epoch": 0.017360426119550208, + "grad_norm": 24.005431476496838, + "learning_rate": 5e-06, + "loss": 0.8061, + "num_input_tokens_seen": 11345216, + "step": 66 + }, + { + "epoch": 0.017360426119550208, + "loss": 0.7301403284072876, + "loss_ce": 0.3616100549697876, + "loss_iou": 0.28515625, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 11345216, + "step": 66 + }, + { + "epoch": 0.017623462878937332, + "grad_norm": 30.491458847817103, + "learning_rate": 5e-06, + "loss": 0.8474, + "num_input_tokens_seen": 11517388, + "step": 67 + }, + { + "epoch": 0.017623462878937332, + "loss": 0.8404921889305115, + "loss_ce": 0.3583144545555115, + "loss_iou": 0.07861328125, + "loss_num": 0.0966796875, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 11517388, + "step": 67 + }, + { + "epoch": 0.017886499638324456, + "grad_norm": 17.38452589704735, + "learning_rate": 5e-06, + "loss": 0.8004, + "num_input_tokens_seen": 11689284, + "step": 68 + }, + { + "epoch": 0.017886499638324456, + "loss": 0.8011830449104309, + "loss_ce": 0.3470814824104309, + "loss_iou": 0.095703125, + "loss_num": 0.0908203125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 11689284, + "step": 68 + }, + { + "epoch": 0.01814953639771158, + "grad_norm": 22.0592753440465, + "learning_rate": 5e-06, + "loss": 0.7808, + "num_input_tokens_seen": 11861484, + "step": 69 + }, + { + "epoch": 0.01814953639771158, + "loss": 0.8063881397247314, + "loss_ce": 0.38475728034973145, + "loss_iou": 0.1826171875, + "loss_num": 0.08447265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 11861484, + "step": 69 + }, + { + "epoch": 0.018412573157098704, + "grad_norm": 18.31950796902093, + "learning_rate": 5e-06, + "loss": 0.766, + "num_input_tokens_seen": 12033752, + "step": 70 + }, + { + "epoch": 0.018412573157098704, + "loss": 0.7670217156410217, + "loss_ce": 0.31560570001602173, + "loss_iou": 0.275390625, + "loss_num": 0.09033203125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 12033752, + "step": 70 + }, + { + "epoch": 0.018675609916485827, + "grad_norm": 18.14951438740657, + "learning_rate": 5e-06, + "loss": 0.6971, + "num_input_tokens_seen": 12205604, + "step": 71 + }, + { + "epoch": 0.018675609916485827, + "loss": 0.6512900590896606, + "loss_ce": 0.31205666065216064, + "loss_iou": 0.169921875, + "loss_num": 0.06787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 12205604, + "step": 71 + }, + { + "epoch": 0.018938646675872955, + "grad_norm": 18.910816353150064, + "learning_rate": 5e-06, + "loss": 0.7748, + "num_input_tokens_seen": 12377968, + "step": 72 + }, + { + "epoch": 0.018938646675872955, + "loss": 0.8064651489257812, + "loss_ce": 0.3355178236961365, + "loss_iou": 0.171875, + "loss_num": 0.09423828125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 12377968, + "step": 72 + }, + { + "epoch": 0.01920168343526008, + "grad_norm": 18.381185745083314, + "learning_rate": 5e-06, + "loss": 0.7552, + "num_input_tokens_seen": 12549896, + "step": 73 + }, + { + "epoch": 0.01920168343526008, + "loss": 0.817658007144928, + "loss_ce": 0.28372251987457275, + "loss_iou": 0.236328125, + "loss_num": 0.10693359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 12549896, + "step": 73 + }, + { + "epoch": 0.019464720194647202, + "grad_norm": 16.482511811798446, + "learning_rate": 5e-06, + "loss": 0.6763, + "num_input_tokens_seen": 12721856, + "step": 74 + }, + { + "epoch": 0.019464720194647202, + "loss": 0.5998687744140625, + "loss_ce": 0.2707671821117401, + "loss_iou": 0.333984375, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 12721856, + "step": 74 + }, + { + "epoch": 0.019727756954034326, + "grad_norm": 15.444850813034535, + "learning_rate": 5e-06, + "loss": 0.6806, + "num_input_tokens_seen": 12894040, + "step": 75 + }, + { + "epoch": 0.019727756954034326, + "loss": 0.7105993032455444, + "loss_ce": 0.27493035793304443, + "loss_iou": 0.26953125, + "loss_num": 0.08740234375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 12894040, + "step": 75 + }, + { + "epoch": 0.01999079371342145, + "grad_norm": 20.590812596799903, + "learning_rate": 5e-06, + "loss": 0.7622, + "num_input_tokens_seen": 13064296, + "step": 76 + }, + { + "epoch": 0.01999079371342145, + "loss": 0.6805918216705322, + "loss_ce": 0.3353770077228546, + "loss_iou": 0.26171875, + "loss_num": 0.06884765625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 13064296, + "step": 76 + }, + { + "epoch": 0.020253830472808574, + "grad_norm": 20.47406440355872, + "learning_rate": 5e-06, + "loss": 0.74, + "num_input_tokens_seen": 13233888, + "step": 77 + }, + { + "epoch": 0.020253830472808574, + "loss": 0.7564910650253296, + "loss_ce": 0.3099578022956848, + "loss_iou": 0.2109375, + "loss_num": 0.08935546875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 13233888, + "step": 77 + }, + { + "epoch": 0.020516867232195698, + "grad_norm": 19.122832804118445, + "learning_rate": 5e-06, + "loss": 0.741, + "num_input_tokens_seen": 13405900, + "step": 78 + }, + { + "epoch": 0.020516867232195698, + "loss": 0.6968704462051392, + "loss_ce": 0.26059114933013916, + "loss_iou": 0.224609375, + "loss_num": 0.08740234375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 13405900, + "step": 78 + }, + { + "epoch": 0.020779903991582825, + "grad_norm": 16.68220779277857, + "learning_rate": 5e-06, + "loss": 0.6822, + "num_input_tokens_seen": 13578336, + "step": 79 + }, + { + "epoch": 0.020779903991582825, + "loss": 0.6721813082695007, + "loss_ce": 0.26910513639450073, + "loss_iou": 0.125, + "loss_num": 0.08056640625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 13578336, + "step": 79 + }, + { + "epoch": 0.02104294075096995, + "grad_norm": 15.592038335799979, + "learning_rate": 5e-06, + "loss": 0.6458, + "num_input_tokens_seen": 13750256, + "step": 80 + }, + { + "epoch": 0.02104294075096995, + "loss": 0.604525625705719, + "loss_ce": 0.253695547580719, + "loss_iou": 0.169921875, + "loss_num": 0.0703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 13750256, + "step": 80 + }, + { + "epoch": 0.021305977510357073, + "grad_norm": 19.123452214665015, + "learning_rate": 5e-06, + "loss": 0.7034, + "num_input_tokens_seen": 13922220, + "step": 81 + }, + { + "epoch": 0.021305977510357073, + "loss": 0.7461546659469604, + "loss_ce": 0.28350815176963806, + "loss_iou": 0.27734375, + "loss_num": 0.0927734375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 13922220, + "step": 81 + }, + { + "epoch": 0.021569014269744197, + "grad_norm": 14.596994780035258, + "learning_rate": 5e-06, + "loss": 0.6357, + "num_input_tokens_seen": 14094312, + "step": 82 + }, + { + "epoch": 0.021569014269744197, + "loss": 0.602331280708313, + "loss_ce": 0.2593136727809906, + "loss_iou": 0.328125, + "loss_num": 0.068359375, + "loss_xval": 0.34375, + "num_input_tokens_seen": 14094312, + "step": 82 + }, + { + "epoch": 0.02183205102913132, + "grad_norm": 16.4283520636599, + "learning_rate": 5e-06, + "loss": 0.5782, + "num_input_tokens_seen": 14264984, + "step": 83 + }, + { + "epoch": 0.02183205102913132, + "loss": 0.5699411630630493, + "loss_ce": 0.2635447084903717, + "loss_iou": 0.306640625, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 14264984, + "step": 83 + }, + { + "epoch": 0.022095087788518444, + "grad_norm": 22.311971257546926, + "learning_rate": 5e-06, + "loss": 0.6398, + "num_input_tokens_seen": 14437168, + "step": 84 + }, + { + "epoch": 0.022095087788518444, + "loss": 0.6384698748588562, + "loss_ce": 0.240032359957695, + "loss_iou": 0.21875, + "loss_num": 0.07958984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 14437168, + "step": 84 + }, + { + "epoch": 0.022358124547905568, + "grad_norm": 26.609904157941, + "learning_rate": 5e-06, + "loss": 0.8114, + "num_input_tokens_seen": 14609504, + "step": 85 + }, + { + "epoch": 0.022358124547905568, + "loss": 0.7600446939468384, + "loss_ce": 0.23172441124916077, + "loss_iou": 0.076171875, + "loss_num": 0.10546875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 14609504, + "step": 85 + }, + { + "epoch": 0.022621161307292696, + "grad_norm": 28.311475657648764, + "learning_rate": 5e-06, + "loss": 0.8067, + "num_input_tokens_seen": 14781812, + "step": 86 + }, + { + "epoch": 0.022621161307292696, + "loss": 0.839857816696167, + "loss_ce": 0.2500140368938446, + "loss_iou": 0.38671875, + "loss_num": 0.1181640625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 14781812, + "step": 86 + }, + { + "epoch": 0.02288419806667982, + "grad_norm": 24.92836303632298, + "learning_rate": 5e-06, + "loss": 0.731, + "num_input_tokens_seen": 14954408, + "step": 87 + }, + { + "epoch": 0.02288419806667982, + "loss": 0.7506657838821411, + "loss_ce": 0.2797185182571411, + "loss_iou": 0.12890625, + "loss_num": 0.09423828125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 14954408, + "step": 87 + }, + { + "epoch": 0.023147234826066943, + "grad_norm": 26.5172487427058, + "learning_rate": 5e-06, + "loss": 0.8202, + "num_input_tokens_seen": 15123552, + "step": 88 + }, + { + "epoch": 0.023147234826066943, + "loss": 0.808368980884552, + "loss_ce": 0.243183434009552, + "loss_iou": 0.234375, + "loss_num": 0.11328125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 15123552, + "step": 88 + }, + { + "epoch": 0.023410271585454067, + "grad_norm": 17.71060921250277, + "learning_rate": 5e-06, + "loss": 0.6546, + "num_input_tokens_seen": 15295844, + "step": 89 + }, + { + "epoch": 0.023410271585454067, + "loss": 0.6125390529632568, + "loss_ce": 0.24852542579174042, + "loss_iou": 0.2021484375, + "loss_num": 0.07275390625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 15295844, + "step": 89 + }, + { + "epoch": 0.02367330834484119, + "grad_norm": 15.735800845783544, + "learning_rate": 5e-06, + "loss": 0.5838, + "num_input_tokens_seen": 15467856, + "step": 90 + }, + { + "epoch": 0.02367330834484119, + "loss": 0.5133854150772095, + "loss_ce": 0.22456704080104828, + "loss_iou": 0.2412109375, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 15467856, + "step": 90 + }, + { + "epoch": 0.023936345104228315, + "grad_norm": 16.198479692260427, + "learning_rate": 5e-06, + "loss": 0.6229, + "num_input_tokens_seen": 15640280, + "step": 91 + }, + { + "epoch": 0.023936345104228315, + "loss": 0.5497957468032837, + "loss_ce": 0.23717370629310608, + "loss_iou": 0.30859375, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 15640280, + "step": 91 + }, + { + "epoch": 0.02419938186361544, + "grad_norm": 15.820711258384152, + "learning_rate": 5e-06, + "loss": 0.5852, + "num_input_tokens_seen": 15812532, + "step": 92 + }, + { + "epoch": 0.02419938186361544, + "loss": 0.5448044538497925, + "loss_ce": 0.22046364843845367, + "loss_iou": 0.1826171875, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 15812532, + "step": 92 + }, + { + "epoch": 0.024462418623002566, + "grad_norm": 19.666745517026683, + "learning_rate": 5e-06, + "loss": 0.5956, + "num_input_tokens_seen": 15984492, + "step": 93 + }, + { + "epoch": 0.024462418623002566, + "loss": 0.6414846181869507, + "loss_ce": 0.22668972611427307, + "loss_iou": 0.232421875, + "loss_num": 0.0830078125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 15984492, + "step": 93 + }, + { + "epoch": 0.02472545538238969, + "grad_norm": 18.823515656198328, + "learning_rate": 5e-06, + "loss": 0.5443, + "num_input_tokens_seen": 16156556, + "step": 94 + }, + { + "epoch": 0.02472545538238969, + "loss": 0.507426381111145, + "loss_ce": 0.2339888960123062, + "loss_iou": 0.396484375, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 16156556, + "step": 94 + }, + { + "epoch": 0.024988492141776814, + "grad_norm": 14.625660314629584, + "learning_rate": 5e-06, + "loss": 0.5402, + "num_input_tokens_seen": 16326928, + "step": 95 + }, + { + "epoch": 0.024988492141776814, + "loss": 0.5663172006607056, + "loss_ce": 0.20572152733802795, + "loss_iou": 0.2421875, + "loss_num": 0.072265625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 16326928, + "step": 95 + }, + { + "epoch": 0.025251528901163937, + "grad_norm": 15.783911320035779, + "learning_rate": 5e-06, + "loss": 0.5712, + "num_input_tokens_seen": 16499268, + "step": 96 + }, + { + "epoch": 0.025251528901163937, + "loss": 0.5024785399436951, + "loss_ce": 0.19632619619369507, + "loss_iou": 0.251953125, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 16499268, + "step": 96 + }, + { + "epoch": 0.02551456566055106, + "grad_norm": 16.72236617723868, + "learning_rate": 5e-06, + "loss": 0.6175, + "num_input_tokens_seen": 16668388, + "step": 97 + }, + { + "epoch": 0.02551456566055106, + "loss": 0.6046057939529419, + "loss_ce": 0.19554820656776428, + "loss_iou": 0.287109375, + "loss_num": 0.08203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 16668388, + "step": 97 + }, + { + "epoch": 0.025777602419938185, + "grad_norm": 19.132328111043208, + "learning_rate": 5e-06, + "loss": 0.6948, + "num_input_tokens_seen": 16840748, + "step": 98 + }, + { + "epoch": 0.025777602419938185, + "loss": 0.635749340057373, + "loss_ce": 0.22290757298469543, + "loss_iou": 0.1416015625, + "loss_num": 0.08251953125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 16840748, + "step": 98 + }, + { + "epoch": 0.026040639179325312, + "grad_norm": 17.546404729768323, + "learning_rate": 5e-06, + "loss": 0.6342, + "num_input_tokens_seen": 17012828, + "step": 99 + }, + { + "epoch": 0.026040639179325312, + "loss": 0.6548875570297241, + "loss_ce": 0.23105943202972412, + "loss_iou": 0.283203125, + "loss_num": 0.0849609375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 17012828, + "step": 99 + }, + { + "epoch": 0.026303675938712436, + "grad_norm": 18.526246284946534, + "learning_rate": 5e-06, + "loss": 0.6155, + "num_input_tokens_seen": 17185216, + "step": 100 + }, + { + "epoch": 0.026303675938712436, + "loss": 0.6633030772209167, + "loss_ce": 0.20383042097091675, + "loss_iou": 0.263671875, + "loss_num": 0.091796875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 17185216, + "step": 100 + }, + { + "epoch": 0.02656671269809956, + "grad_norm": 19.37711860101834, + "learning_rate": 5e-06, + "loss": 0.6252, + "num_input_tokens_seen": 17355688, + "step": 101 + }, + { + "epoch": 0.02656671269809956, + "loss": 0.6485173106193542, + "loss_ce": 0.20686691999435425, + "loss_iou": 0.146484375, + "loss_num": 0.08837890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 17355688, + "step": 101 + }, + { + "epoch": 0.026829749457486684, + "grad_norm": 24.272978763244147, + "learning_rate": 5e-06, + "loss": 0.6144, + "num_input_tokens_seen": 17527784, + "step": 102 + }, + { + "epoch": 0.026829749457486684, + "loss": 0.6552723050117493, + "loss_ce": 0.20190313458442688, + "loss_iou": 0.1513671875, + "loss_num": 0.0908203125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 17527784, + "step": 102 + }, + { + "epoch": 0.027092786216873808, + "grad_norm": 28.829409367834085, + "learning_rate": 5e-06, + "loss": 0.6606, + "num_input_tokens_seen": 17700208, + "step": 103 + }, + { + "epoch": 0.027092786216873808, + "loss": 0.7094471454620361, + "loss_ce": 0.19260142743587494, + "loss_iou": 0.3046875, + "loss_num": 0.10302734375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 17700208, + "step": 103 + }, + { + "epoch": 0.02735582297626093, + "grad_norm": 23.160720035944347, + "learning_rate": 5e-06, + "loss": 0.7452, + "num_input_tokens_seen": 17872308, + "step": 104 + }, + { + "epoch": 0.02735582297626093, + "loss": 0.7341784238815308, + "loss_ce": 0.19169792532920837, + "loss_iou": 0.1513671875, + "loss_num": 0.1083984375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 17872308, + "step": 104 + }, + { + "epoch": 0.027618859735648055, + "grad_norm": 17.418429269221175, + "learning_rate": 5e-06, + "loss": 0.5803, + "num_input_tokens_seen": 18044728, + "step": 105 + }, + { + "epoch": 0.027618859735648055, + "loss": 0.5607779026031494, + "loss_ce": 0.20787259936332703, + "loss_iou": 0.2353515625, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 18044728, + "step": 105 + }, + { + "epoch": 0.027881896495035183, + "grad_norm": 17.11728575546532, + "learning_rate": 5e-06, + "loss": 0.5934, + "num_input_tokens_seen": 18215172, + "step": 106 + }, + { + "epoch": 0.027881896495035183, + "loss": 0.609626293182373, + "loss_ce": 0.17749738693237305, + "loss_iou": 0.09228515625, + "loss_num": 0.08642578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 18215172, + "step": 106 + }, + { + "epoch": 0.028144933254422307, + "grad_norm": 17.86366659461009, + "learning_rate": 5e-06, + "loss": 0.5335, + "num_input_tokens_seen": 18387084, + "step": 107 + }, + { + "epoch": 0.028144933254422307, + "loss": 0.48998841643333435, + "loss_ce": 0.19213685393333435, + "loss_iou": 0.3046875, + "loss_num": 0.0595703125, + "loss_xval": 0.296875, + "num_input_tokens_seen": 18387084, + "step": 107 + }, + { + "epoch": 0.02840797001380943, + "grad_norm": 20.662496273440333, + "learning_rate": 5e-06, + "loss": 0.5971, + "num_input_tokens_seen": 18559272, + "step": 108 + }, + { + "epoch": 0.02840797001380943, + "loss": 0.6421129703521729, + "loss_ce": 0.18044306337833405, + "loss_iou": 0.14453125, + "loss_num": 0.09228515625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 18559272, + "step": 108 + }, + { + "epoch": 0.028671006773196554, + "grad_norm": 21.435817028210696, + "learning_rate": 5e-06, + "loss": 0.5635, + "num_input_tokens_seen": 18731380, + "step": 109 + }, + { + "epoch": 0.028671006773196554, + "loss": 0.5063576698303223, + "loss_ce": 0.18104028701782227, + "loss_iou": 0.1884765625, + "loss_num": 0.06494140625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 18731380, + "step": 109 + }, + { + "epoch": 0.028934043532583678, + "grad_norm": 18.8758769532993, + "learning_rate": 5e-06, + "loss": 0.6115, + "num_input_tokens_seen": 18901956, + "step": 110 + }, + { + "epoch": 0.028934043532583678, + "loss": 0.6692255139350891, + "loss_ce": 0.17020206153392792, + "loss_iou": 0.20703125, + "loss_num": 0.099609375, + "loss_xval": 0.5, + "num_input_tokens_seen": 18901956, + "step": 110 + }, + { + "epoch": 0.029197080291970802, + "grad_norm": 17.651564073843637, + "learning_rate": 5e-06, + "loss": 0.5336, + "num_input_tokens_seen": 19072248, + "step": 111 + }, + { + "epoch": 0.029197080291970802, + "loss": 0.4951699376106262, + "loss_ce": 0.17558985948562622, + "loss_iou": 0.6640625, + "loss_num": 0.06396484375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 19072248, + "step": 111 + }, + { + "epoch": 0.029460117051357926, + "grad_norm": 16.158969266118735, + "learning_rate": 5e-06, + "loss": 0.4952, + "num_input_tokens_seen": 19244772, + "step": 112 + }, + { + "epoch": 0.029460117051357926, + "loss": 0.4770987629890442, + "loss_ce": 0.1765616536140442, + "loss_iou": 0.1904296875, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 19244772, + "step": 112 + }, + { + "epoch": 0.029723153810745053, + "grad_norm": 14.450830992407502, + "learning_rate": 5e-06, + "loss": 0.5026, + "num_input_tokens_seen": 19416868, + "step": 113 + }, + { + "epoch": 0.029723153810745053, + "loss": 0.4577527642250061, + "loss_ce": 0.1758924424648285, + "loss_iou": 0.400390625, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 19416868, + "step": 113 + }, + { + "epoch": 0.029986190570132177, + "grad_norm": 14.560235178346835, + "learning_rate": 5e-06, + "loss": 0.4865, + "num_input_tokens_seen": 19589016, + "step": 114 + }, + { + "epoch": 0.029986190570132177, + "loss": 0.5253910422325134, + "loss_ce": 0.15832561254501343, + "loss_iou": 0.173828125, + "loss_num": 0.0732421875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 19589016, + "step": 114 + }, + { + "epoch": 0.0302492273295193, + "grad_norm": 18.605711889816632, + "learning_rate": 5e-06, + "loss": 0.49, + "num_input_tokens_seen": 19760904, + "step": 115 + }, + { + "epoch": 0.0302492273295193, + "loss": 0.4801591634750366, + "loss_ce": 0.17290815711021423, + "loss_iou": 0.416015625, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 19760904, + "step": 115 + }, + { + "epoch": 0.030512264088906425, + "grad_norm": 20.839979972241576, + "learning_rate": 5e-06, + "loss": 0.5137, + "num_input_tokens_seen": 19932724, + "step": 116 + }, + { + "epoch": 0.030512264088906425, + "loss": 0.4843531847000122, + "loss_ce": 0.14585217833518982, + "loss_iou": NaN, + "loss_num": 0.06787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 19932724, + "step": 116 + }, + { + "epoch": 0.03077530084829355, + "grad_norm": 20.407619855634906, + "learning_rate": 5e-06, + "loss": 0.5188, + "num_input_tokens_seen": 20105196, + "step": 117 + }, + { + "epoch": 0.03077530084829355, + "loss": 0.521455705165863, + "loss_ce": 0.17148011922836304, + "loss_iou": 0.275390625, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 20105196, + "step": 117 + }, + { + "epoch": 0.031038337607680672, + "grad_norm": 17.136634668569023, + "learning_rate": 5e-06, + "loss": 0.529, + "num_input_tokens_seen": 20277468, + "step": 118 + }, + { + "epoch": 0.031038337607680672, + "loss": 0.5619306564331055, + "loss_ce": 0.15323926508426666, + "loss_iou": 0.224609375, + "loss_num": 0.08203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 20277468, + "step": 118 + }, + { + "epoch": 0.0313013743670678, + "grad_norm": 15.540166390852267, + "learning_rate": 5e-06, + "loss": 0.5196, + "num_input_tokens_seen": 20448108, + "step": 119 + }, + { + "epoch": 0.0313013743670678, + "loss": 0.5409432649612427, + "loss_ce": 0.15117278695106506, + "loss_iou": 0.40234375, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 20448108, + "step": 119 + }, + { + "epoch": 0.031564411126454923, + "grad_norm": 12.306363086115368, + "learning_rate": 5e-06, + "loss": 0.4996, + "num_input_tokens_seen": 20620368, + "step": 120 + }, + { + "epoch": 0.031564411126454923, + "loss": 0.49895310401916504, + "loss_ce": 0.13860151171684265, + "loss_iou": 0.32421875, + "loss_num": 0.072265625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 20620368, + "step": 120 + }, + { + "epoch": 0.03182744788584205, + "grad_norm": 13.090987000723873, + "learning_rate": 5e-06, + "loss": 0.4537, + "num_input_tokens_seen": 20792584, + "step": 121 + }, + { + "epoch": 0.03182744788584205, + "loss": 0.47374969720840454, + "loss_ce": 0.14452606439590454, + "loss_iou": 0.3203125, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 20792584, + "step": 121 + }, + { + "epoch": 0.03209048464522917, + "grad_norm": 12.574059354034341, + "learning_rate": 5e-06, + "loss": 0.4245, + "num_input_tokens_seen": 20964948, + "step": 122 + }, + { + "epoch": 0.03209048464522917, + "loss": 0.4678102135658264, + "loss_ce": 0.12357192486524582, + "loss_iou": 0.10400390625, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 20964948, + "step": 122 + }, + { + "epoch": 0.032353521404616295, + "grad_norm": 34.33651980128175, + "learning_rate": 5e-06, + "loss": 0.5624, + "num_input_tokens_seen": 21134744, + "step": 123 + }, + { + "epoch": 0.032353521404616295, + "loss": 0.4556346535682678, + "loss_ce": 0.13812974095344543, + "loss_iou": 0.6640625, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 21134744, + "step": 123 + }, + { + "epoch": 0.03261655816400342, + "grad_norm": 22.118966434798295, + "learning_rate": 5e-06, + "loss": 0.6017, + "num_input_tokens_seen": 21305320, + "step": 124 + }, + { + "epoch": 0.03261655816400342, + "loss": 0.5379438996315002, + "loss_ce": 0.14316853880882263, + "loss_iou": 0.345703125, + "loss_num": 0.0791015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 21305320, + "step": 124 + }, + { + "epoch": 0.03287959492339054, + "grad_norm": 22.565129824621987, + "learning_rate": 5e-06, + "loss": 0.6452, + "num_input_tokens_seen": 21477544, + "step": 125 + }, + { + "epoch": 0.03287959492339054, + "loss": 0.5944963693618774, + "loss_ce": 0.13990655541419983, + "loss_iou": 0.216796875, + "loss_num": 0.0908203125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 21477544, + "step": 125 + }, + { + "epoch": 0.033142631682777667, + "grad_norm": 13.89281994742959, + "learning_rate": 5e-06, + "loss": 0.5755, + "num_input_tokens_seen": 21649600, + "step": 126 + }, + { + "epoch": 0.033142631682777667, + "loss": 0.5376471877098083, + "loss_ce": 0.12211985141038895, + "loss_iou": 0.10888671875, + "loss_num": 0.0830078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 21649600, + "step": 126 + }, + { + "epoch": 0.03340566844216479, + "grad_norm": 15.070169412653112, + "learning_rate": 5e-06, + "loss": 0.4704, + "num_input_tokens_seen": 21821564, + "step": 127 + }, + { + "epoch": 0.03340566844216479, + "loss": 0.4526183605194092, + "loss_ce": 0.12595820426940918, + "loss_iou": 0.21484375, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 21821564, + "step": 127 + }, + { + "epoch": 0.033668705201551914, + "grad_norm": 14.62625838728594, + "learning_rate": 5e-06, + "loss": 0.4885, + "num_input_tokens_seen": 21993864, + "step": 128 + }, + { + "epoch": 0.033668705201551914, + "loss": 0.5165751576423645, + "loss_ce": 0.12289837747812271, + "loss_iou": 0.28515625, + "loss_num": 0.07861328125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 21993864, + "step": 128 + }, + { + "epoch": 0.03393174196093904, + "grad_norm": 20.05433315099477, + "learning_rate": 5e-06, + "loss": 0.4754, + "num_input_tokens_seen": 22162596, + "step": 129 + }, + { + "epoch": 0.03393174196093904, + "loss": 0.4400935173034668, + "loss_ce": 0.11538645625114441, + "loss_iou": 0.59375, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 22162596, + "step": 129 + }, + { + "epoch": 0.03419477872032617, + "grad_norm": 17.28797678972647, + "learning_rate": 5e-06, + "loss": 0.5272, + "num_input_tokens_seen": 22334624, + "step": 130 + }, + { + "epoch": 0.03419477872032617, + "loss": 0.5726144313812256, + "loss_ce": 0.1124093234539032, + "loss_iou": 0.17578125, + "loss_num": 0.091796875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 22334624, + "step": 130 + }, + { + "epoch": 0.03445781547971329, + "grad_norm": 19.611364973231527, + "learning_rate": 5e-06, + "loss": 0.5629, + "num_input_tokens_seen": 22506552, + "step": 131 + }, + { + "epoch": 0.03445781547971329, + "loss": 0.5982115268707275, + "loss_ce": 0.10993030667304993, + "loss_iou": 0.08642578125, + "loss_num": 0.09765625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 22506552, + "step": 131 + }, + { + "epoch": 0.034720852239100417, + "grad_norm": 16.543739789854843, + "learning_rate": 5e-06, + "loss": 0.5381, + "num_input_tokens_seen": 22678808, + "step": 132 + }, + { + "epoch": 0.034720852239100417, + "loss": 0.45885854959487915, + "loss_ce": 0.11523060500621796, + "loss_iou": 0.337890625, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 22678808, + "step": 132 + }, + { + "epoch": 0.03498388899848754, + "grad_norm": 17.674510611725847, + "learning_rate": 5e-06, + "loss": 0.4525, + "num_input_tokens_seen": 22850736, + "step": 133 + }, + { + "epoch": 0.03498388899848754, + "loss": 0.45272764563560486, + "loss_ce": 0.12191709131002426, + "loss_iou": 0.2216796875, + "loss_num": 0.06640625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 22850736, + "step": 133 + }, + { + "epoch": 0.035246925757874664, + "grad_norm": 15.972341632067714, + "learning_rate": 5e-06, + "loss": 0.4719, + "num_input_tokens_seen": 23020040, + "step": 134 + }, + { + "epoch": 0.035246925757874664, + "loss": 0.5267431139945984, + "loss_ce": 0.11499997228384018, + "loss_iou": 0.232421875, + "loss_num": 0.08251953125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 23020040, + "step": 134 + }, + { + "epoch": 0.03550996251726179, + "grad_norm": 18.16452757572112, + "learning_rate": 5e-06, + "loss": 0.4878, + "num_input_tokens_seen": 23192048, + "step": 135 + }, + { + "epoch": 0.03550996251726179, + "loss": 0.42464134097099304, + "loss_ce": 0.10701439529657364, + "loss_iou": 0.3671875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 23192048, + "step": 135 + }, + { + "epoch": 0.03577299927664891, + "grad_norm": 16.237572483673375, + "learning_rate": 5e-06, + "loss": 0.4644, + "num_input_tokens_seen": 23360448, + "step": 136 + }, + { + "epoch": 0.03577299927664891, + "loss": 0.4710727334022522, + "loss_ce": 0.0985141396522522, + "loss_iou": 0.263671875, + "loss_num": 0.07470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 23360448, + "step": 136 + }, + { + "epoch": 0.036036036036036036, + "grad_norm": 18.804237772436053, + "learning_rate": 5e-06, + "loss": 0.4291, + "num_input_tokens_seen": 23530300, + "step": 137 + }, + { + "epoch": 0.036036036036036036, + "loss": 0.47930601239204407, + "loss_ce": 0.10406187921762466, + "loss_iou": 0.29296875, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 23530300, + "step": 137 + }, + { + "epoch": 0.03629907279542316, + "grad_norm": 17.355021474463314, + "learning_rate": 5e-06, + "loss": 0.5374, + "num_input_tokens_seen": 23702304, + "step": 138 + }, + { + "epoch": 0.03629907279542316, + "loss": 0.6615355014801025, + "loss_ce": 0.10416243970394135, + "loss_iou": 0.267578125, + "loss_num": 0.111328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 23702304, + "step": 138 + }, + { + "epoch": 0.03656210955481028, + "grad_norm": 14.642948575679453, + "learning_rate": 5e-06, + "loss": 0.4417, + "num_input_tokens_seen": 23872132, + "step": 139 + }, + { + "epoch": 0.03656210955481028, + "loss": 0.44469529390335083, + "loss_ce": 0.10735400021076202, + "loss_iou": 0.376953125, + "loss_num": 0.0673828125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 23872132, + "step": 139 + }, + { + "epoch": 0.03682514631419741, + "grad_norm": 14.422554847110444, + "learning_rate": 5e-06, + "loss": 0.4228, + "num_input_tokens_seen": 24044200, + "step": 140 + }, + { + "epoch": 0.03682514631419741, + "loss": 0.4664979577064514, + "loss_ce": 0.10651260614395142, + "loss_iou": 0.275390625, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 24044200, + "step": 140 + }, + { + "epoch": 0.03708818307358453, + "grad_norm": 27.027123187239088, + "learning_rate": 5e-06, + "loss": 0.4106, + "num_input_tokens_seen": 24216464, + "step": 141 + }, + { + "epoch": 0.03708818307358453, + "loss": 0.4140710234642029, + "loss_ce": 0.09876340627670288, + "loss_iou": 0.2275390625, + "loss_num": 0.06298828125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 24216464, + "step": 141 + }, + { + "epoch": 0.037351219832971655, + "grad_norm": 20.30988259642017, + "learning_rate": 5e-06, + "loss": 0.5663, + "num_input_tokens_seen": 24386876, + "step": 142 + }, + { + "epoch": 0.037351219832971655, + "loss": 0.500027596950531, + "loss_ce": 0.091580331325531, + "loss_iou": 0.25390625, + "loss_num": 0.08154296875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 24386876, + "step": 142 + }, + { + "epoch": 0.03761425659235878, + "grad_norm": 17.151019381080523, + "learning_rate": 5e-06, + "loss": 0.4943, + "num_input_tokens_seen": 24559004, + "step": 143 + }, + { + "epoch": 0.03761425659235878, + "loss": 0.5077698826789856, + "loss_ce": 0.08748182654380798, + "loss_iou": 0.419921875, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 24559004, + "step": 143 + }, + { + "epoch": 0.03787729335174591, + "grad_norm": 13.449145519268138, + "learning_rate": 5e-06, + "loss": 0.4628, + "num_input_tokens_seen": 24729228, + "step": 144 + }, + { + "epoch": 0.03787729335174591, + "loss": 0.477780282497406, + "loss_ce": 0.0906953439116478, + "loss_iou": 0.2353515625, + "loss_num": 0.07763671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 24729228, + "step": 144 + }, + { + "epoch": 0.03814033011113303, + "grad_norm": 15.658548235445116, + "learning_rate": 5e-06, + "loss": 0.4482, + "num_input_tokens_seen": 24901324, + "step": 145 + }, + { + "epoch": 0.03814033011113303, + "loss": 0.460429847240448, + "loss_ce": 0.086406409740448, + "loss_iou": 0.267578125, + "loss_num": 0.07470703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 24901324, + "step": 145 + }, + { + "epoch": 0.03840336687052016, + "grad_norm": 14.557303161182968, + "learning_rate": 5e-06, + "loss": 0.5115, + "num_input_tokens_seen": 25073508, + "step": 146 + }, + { + "epoch": 0.03840336687052016, + "loss": 0.6442508697509766, + "loss_ce": 0.07833293080329895, + "loss_iou": 0.11376953125, + "loss_num": 0.11328125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 25073508, + "step": 146 + }, + { + "epoch": 0.03866640362990728, + "grad_norm": 17.777843374612736, + "learning_rate": 5e-06, + "loss": 0.4213, + "num_input_tokens_seen": 25245752, + "step": 147 + }, + { + "epoch": 0.03866640362990728, + "loss": 0.40006011724472046, + "loss_ce": 0.08243316411972046, + "loss_iou": 0.419921875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 25245752, + "step": 147 + }, + { + "epoch": 0.038929440389294405, + "grad_norm": 13.44125897811313, + "learning_rate": 5e-06, + "loss": 0.4791, + "num_input_tokens_seen": 25418156, + "step": 148 + }, + { + "epoch": 0.038929440389294405, + "loss": 0.5981014966964722, + "loss_ce": 0.08491791784763336, + "loss_iou": 0.18359375, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 25418156, + "step": 148 + }, + { + "epoch": 0.03919247714868153, + "grad_norm": 11.76514004625091, + "learning_rate": 5e-06, + "loss": 0.4001, + "num_input_tokens_seen": 25589904, + "step": 149 + }, + { + "epoch": 0.03919247714868153, + "loss": 0.35078275203704834, + "loss_ce": 0.08393705636262894, + "loss_iou": 0.267578125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 25589904, + "step": 149 + }, + { + "epoch": 0.03945551390806865, + "grad_norm": 12.03724933893627, + "learning_rate": 5e-06, + "loss": 0.4147, + "num_input_tokens_seen": 25762000, + "step": 150 + }, + { + "epoch": 0.03945551390806865, + "loss": 0.4596315622329712, + "loss_ce": 0.0690065547823906, + "loss_iou": 0.376953125, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 25762000, + "step": 150 + }, + { + "epoch": 0.039718550667455776, + "grad_norm": 13.05163620933256, + "learning_rate": 5e-06, + "loss": 0.398, + "num_input_tokens_seen": 25934528, + "step": 151 + }, + { + "epoch": 0.039718550667455776, + "loss": 0.38974958658218384, + "loss_ce": 0.07322127372026443, + "loss_iou": 0.1591796875, + "loss_num": 0.0634765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 25934528, + "step": 151 + }, + { + "epoch": 0.0399815874268429, + "grad_norm": 23.58601457584165, + "learning_rate": 5e-06, + "loss": 0.3897, + "num_input_tokens_seen": 26104148, + "step": 152 + }, + { + "epoch": 0.0399815874268429, + "loss": 0.3437004089355469, + "loss_ce": 0.07172775268554688, + "loss_iou": 0.326171875, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 26104148, + "step": 152 + }, + { + "epoch": 0.040244624186230024, + "grad_norm": 18.61300216202182, + "learning_rate": 5e-06, + "loss": 0.4682, + "num_input_tokens_seen": 26276256, + "step": 153 + }, + { + "epoch": 0.040244624186230024, + "loss": 0.43069130182266235, + "loss_ce": 0.06960733234882355, + "loss_iou": 0.419921875, + "loss_num": 0.072265625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 26276256, + "step": 153 + }, + { + "epoch": 0.04050766094561715, + "grad_norm": 14.116759080889954, + "learning_rate": 5e-06, + "loss": 0.4744, + "num_input_tokens_seen": 26448288, + "step": 154 + }, + { + "epoch": 0.04050766094561715, + "loss": 0.5761303901672363, + "loss_ce": 0.0805249810218811, + "loss_iou": 0.333984375, + "loss_num": 0.09912109375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 26448288, + "step": 154 + }, + { + "epoch": 0.04077069770500427, + "grad_norm": 10.146662886533779, + "learning_rate": 5e-06, + "loss": 0.3974, + "num_input_tokens_seen": 26620604, + "step": 155 + }, + { + "epoch": 0.04077069770500427, + "loss": 0.3559998869895935, + "loss_ce": 0.07047741115093231, + "loss_iou": 0.310546875, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 26620604, + "step": 155 + }, + { + "epoch": 0.041033734464391396, + "grad_norm": 11.74557828439257, + "learning_rate": 5e-06, + "loss": 0.3616, + "num_input_tokens_seen": 26789104, + "step": 156 + }, + { + "epoch": 0.041033734464391396, + "loss": 0.3762925863265991, + "loss_ce": 0.06867540627717972, + "loss_iou": 0.2138671875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 26789104, + "step": 156 + }, + { + "epoch": 0.041296771223778526, + "grad_norm": 11.967339752616043, + "learning_rate": 5e-06, + "loss": 0.3655, + "num_input_tokens_seen": 26961408, + "step": 157 + }, + { + "epoch": 0.041296771223778526, + "loss": 0.36129921674728394, + "loss_ce": 0.060029659420251846, + "loss_iou": 0.369140625, + "loss_num": 0.060302734375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 26961408, + "step": 157 + }, + { + "epoch": 0.04155980798316565, + "grad_norm": 16.462145017062344, + "learning_rate": 5e-06, + "loss": 0.3599, + "num_input_tokens_seen": 27133600, + "step": 158 + }, + { + "epoch": 0.04155980798316565, + "loss": 0.4060816764831543, + "loss_ce": 0.06208755075931549, + "loss_iou": 0.291015625, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 27133600, + "step": 158 + }, + { + "epoch": 0.041822844742552774, + "grad_norm": 18.2471111237373, + "learning_rate": 5e-06, + "loss": 0.4173, + "num_input_tokens_seen": 27305676, + "step": 159 + }, + { + "epoch": 0.041822844742552774, + "loss": 0.3555372357368469, + "loss_ce": 0.07025889307260513, + "loss_iou": 0.271484375, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 27305676, + "step": 159 + }, + { + "epoch": 0.0420858815019399, + "grad_norm": 22.735392677918405, + "learning_rate": 5e-06, + "loss": 0.5962, + "num_input_tokens_seen": 27477824, + "step": 160 + }, + { + "epoch": 0.0420858815019399, + "loss": 0.6341134309768677, + "loss_ce": 0.06184776872396469, + "loss_iou": 0.333984375, + "loss_num": 0.1142578125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 27477824, + "step": 160 + }, + { + "epoch": 0.04234891826132702, + "grad_norm": 13.104760953447814, + "learning_rate": 5e-06, + "loss": 0.4091, + "num_input_tokens_seen": 27650024, + "step": 161 + }, + { + "epoch": 0.04234891826132702, + "loss": 0.4603307843208313, + "loss_ce": 0.059085663408041, + "loss_iou": 0.275390625, + "loss_num": 0.080078125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 27650024, + "step": 161 + }, + { + "epoch": 0.042611955020714146, + "grad_norm": 15.303759630511669, + "learning_rate": 5e-06, + "loss": 0.4433, + "num_input_tokens_seen": 27822408, + "step": 162 + }, + { + "epoch": 0.042611955020714146, + "loss": 0.43349525332450867, + "loss_ce": 0.06215735524892807, + "loss_iou": 0.23828125, + "loss_num": 0.07421875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 27822408, + "step": 162 + }, + { + "epoch": 0.04287499178010127, + "grad_norm": 14.84522191071012, + "learning_rate": 5e-06, + "loss": 0.3769, + "num_input_tokens_seen": 27992440, + "step": 163 + }, + { + "epoch": 0.04287499178010127, + "loss": 0.34604543447494507, + "loss_ce": 0.05673878639936447, + "loss_iou": 0.24609375, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 27992440, + "step": 163 + }, + { + "epoch": 0.04313802853948839, + "grad_norm": 17.265704948492278, + "learning_rate": 5e-06, + "loss": 0.4298, + "num_input_tokens_seen": 28164464, + "step": 164 + }, + { + "epoch": 0.04313802853948839, + "loss": 0.48720821738243103, + "loss_ce": 0.05849727243185043, + "loss_iou": 0.2890625, + "loss_num": 0.0859375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 28164464, + "step": 164 + }, + { + "epoch": 0.04340106529887552, + "grad_norm": 13.53959312060916, + "learning_rate": 5e-06, + "loss": 0.3991, + "num_input_tokens_seen": 28334436, + "step": 165 + }, + { + "epoch": 0.04340106529887552, + "loss": 0.3757469058036804, + "loss_ce": 0.05201642960309982, + "loss_iou": 0.1220703125, + "loss_num": 0.064453125, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 28334436, + "step": 165 + }, + { + "epoch": 0.04366410205826264, + "grad_norm": 12.759789778648042, + "learning_rate": 5e-06, + "loss": 0.3563, + "num_input_tokens_seen": 28506772, + "step": 166 + }, + { + "epoch": 0.04366410205826264, + "loss": 0.3605605363845825, + "loss_ce": 0.05233298987150192, + "loss_iou": 0.341796875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 28506772, + "step": 166 + }, + { + "epoch": 0.043927138817649765, + "grad_norm": 14.640887630603752, + "learning_rate": 5e-06, + "loss": 0.3881, + "num_input_tokens_seen": 28676980, + "step": 167 + }, + { + "epoch": 0.043927138817649765, + "loss": 0.3610857427120209, + "loss_ce": 0.056642383337020874, + "loss_iou": 0.240234375, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 28676980, + "step": 167 + }, + { + "epoch": 0.04419017557703689, + "grad_norm": 12.095247845748437, + "learning_rate": 5e-06, + "loss": 0.3742, + "num_input_tokens_seen": 28849236, + "step": 168 + }, + { + "epoch": 0.04419017557703689, + "loss": 0.40532949566841125, + "loss_ce": 0.05388907343149185, + "loss_iou": 0.3203125, + "loss_num": 0.0703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 28849236, + "step": 168 + }, + { + "epoch": 0.04445321233642401, + "grad_norm": 10.33587361024043, + "learning_rate": 5e-06, + "loss": 0.3837, + "num_input_tokens_seen": 29021180, + "step": 169 + }, + { + "epoch": 0.04445321233642401, + "loss": 0.3248461186885834, + "loss_ce": 0.046464771032333374, + "loss_iou": 0.1787109375, + "loss_num": 0.0556640625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 29021180, + "step": 169 + }, + { + "epoch": 0.044716249095811136, + "grad_norm": 10.708625450816182, + "learning_rate": 5e-06, + "loss": 0.3951, + "num_input_tokens_seen": 29191532, + "step": 170 + }, + { + "epoch": 0.044716249095811136, + "loss": 0.4214698076248169, + "loss_ce": 0.052573323249816895, + "loss_iou": 0.33203125, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 29191532, + "step": 170 + }, + { + "epoch": 0.04497928585519827, + "grad_norm": 14.817164885231227, + "learning_rate": 5e-06, + "loss": 0.3936, + "num_input_tokens_seen": 29363536, + "step": 171 + }, + { + "epoch": 0.04497928585519827, + "loss": 0.40210169553756714, + "loss_ce": 0.04602260887622833, + "loss_iou": 0.3046875, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 29363536, + "step": 171 + }, + { + "epoch": 0.04524232261458539, + "grad_norm": 11.284174351804506, + "learning_rate": 5e-06, + "loss": 0.3753, + "num_input_tokens_seen": 29533848, + "step": 172 + }, + { + "epoch": 0.04524232261458539, + "loss": 0.3555999994277954, + "loss_ce": 0.04358828812837601, + "loss_iou": 0.2890625, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 29533848, + "step": 172 + }, + { + "epoch": 0.045505359373972515, + "grad_norm": 9.66534144364822, + "learning_rate": 5e-06, + "loss": 0.307, + "num_input_tokens_seen": 29706424, + "step": 173 + }, + { + "epoch": 0.045505359373972515, + "loss": 0.3183665871620178, + "loss_ce": 0.04297598451375961, + "loss_iou": 0.35546875, + "loss_num": 0.05517578125, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 29706424, + "step": 173 + }, + { + "epoch": 0.04576839613335964, + "grad_norm": 15.933225853148851, + "learning_rate": 5e-06, + "loss": 0.3645, + "num_input_tokens_seen": 29878348, + "step": 174 + }, + { + "epoch": 0.04576839613335964, + "loss": 0.24233956634998322, + "loss_ce": 0.051421597599983215, + "loss_iou": 0.609375, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 29878348, + "step": 174 + }, + { + "epoch": 0.04603143289274676, + "grad_norm": 15.32149954029387, + "learning_rate": 5e-06, + "loss": 0.403, + "num_input_tokens_seen": 30050700, + "step": 175 + }, + { + "epoch": 0.04603143289274676, + "loss": 0.41682887077331543, + "loss_ce": 0.047810301184654236, + "loss_iou": 0.2734375, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 30050700, + "step": 175 + }, + { + "epoch": 0.046294469652133886, + "grad_norm": 11.301125755600708, + "learning_rate": 5e-06, + "loss": 0.3852, + "num_input_tokens_seen": 30222872, + "step": 176 + }, + { + "epoch": 0.046294469652133886, + "loss": 0.35361334681510925, + "loss_ce": 0.04599615931510925, + "loss_iou": 0.341796875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 30222872, + "step": 176 + }, + { + "epoch": 0.04655750641152101, + "grad_norm": 15.04642729797989, + "learning_rate": 5e-06, + "loss": 0.3569, + "num_input_tokens_seen": 30394820, + "step": 177 + }, + { + "epoch": 0.04655750641152101, + "loss": 0.31377077102661133, + "loss_ce": 0.04106569290161133, + "loss_iou": 0.2109375, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 30394820, + "step": 177 + }, + { + "epoch": 0.046820543170908134, + "grad_norm": 14.180764275493368, + "learning_rate": 5e-06, + "loss": 0.3856, + "num_input_tokens_seen": 30567088, + "step": 178 + }, + { + "epoch": 0.046820543170908134, + "loss": 0.3383968770503998, + "loss_ce": 0.04188808798789978, + "loss_iou": 0.36328125, + "loss_num": 0.059326171875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 30567088, + "step": 178 + }, + { + "epoch": 0.04708357993029526, + "grad_norm": 11.43960593758222, + "learning_rate": 5e-06, + "loss": 0.3723, + "num_input_tokens_seen": 30739392, + "step": 179 + }, + { + "epoch": 0.04708357993029526, + "loss": 0.4008994996547699, + "loss_ce": 0.0434776172041893, + "loss_iou": 0.2451171875, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 30739392, + "step": 179 + }, + { + "epoch": 0.04734661668968238, + "grad_norm": 10.118856123139876, + "learning_rate": 5e-06, + "loss": 0.3842, + "num_input_tokens_seen": 30911592, + "step": 180 + }, + { + "epoch": 0.04734661668968238, + "loss": 0.33974575996398926, + "loss_ce": 0.03798792511224747, + "loss_iou": 0.49609375, + "loss_num": 0.060546875, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 30911592, + "step": 180 + }, + { + "epoch": 0.047609653449069506, + "grad_norm": 8.662737873806904, + "learning_rate": 5e-06, + "loss": 0.2718, + "num_input_tokens_seen": 31083320, + "step": 181 + }, + { + "epoch": 0.047609653449069506, + "loss": 0.24805772304534912, + "loss_ce": 0.034434687346220016, + "loss_iou": 0.330078125, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 31083320, + "step": 181 + }, + { + "epoch": 0.04787269020845663, + "grad_norm": 9.949547683003779, + "learning_rate": 5e-06, + "loss": 0.3404, + "num_input_tokens_seen": 31255136, + "step": 182 + }, + { + "epoch": 0.04787269020845663, + "loss": 0.3085269331932068, + "loss_ce": 0.03093905746936798, + "loss_iou": 0.61328125, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 31255136, + "step": 182 + }, + { + "epoch": 0.04813572696784375, + "grad_norm": 11.304350377916183, + "learning_rate": 5e-06, + "loss": 0.3478, + "num_input_tokens_seen": 31427380, + "step": 183 + }, + { + "epoch": 0.04813572696784375, + "loss": 0.2798244059085846, + "loss_ce": 0.037881046533584595, + "loss_iou": 0.380859375, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 31427380, + "step": 183 + }, + { + "epoch": 0.04839876372723088, + "grad_norm": 20.65800125552813, + "learning_rate": 5e-06, + "loss": 0.4261, + "num_input_tokens_seen": 31599344, + "step": 184 + }, + { + "epoch": 0.04839876372723088, + "loss": 0.37513959407806396, + "loss_ce": 0.03566203638911247, + "loss_iou": 0.59375, + "loss_num": 0.06787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 31599344, + "step": 184 + }, + { + "epoch": 0.04866180048661801, + "grad_norm": 21.767539584052432, + "learning_rate": 5e-06, + "loss": 0.4585, + "num_input_tokens_seen": 31771536, + "step": 185 + }, + { + "epoch": 0.04866180048661801, + "loss": 0.437938392162323, + "loss_ce": 0.031200092285871506, + "loss_iou": 0.275390625, + "loss_num": 0.08154296875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 31771536, + "step": 185 + }, + { + "epoch": 0.04892483724600513, + "grad_norm": 19.478046485138204, + "learning_rate": 5e-06, + "loss": 0.5036, + "num_input_tokens_seen": 31943688, + "step": 186 + }, + { + "epoch": 0.04892483724600513, + "loss": 0.4687976837158203, + "loss_ce": 0.03593636304140091, + "loss_iou": 0.201171875, + "loss_num": 0.08642578125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 31943688, + "step": 186 + }, + { + "epoch": 0.049187874005392256, + "grad_norm": 11.185610516547799, + "learning_rate": 5e-06, + "loss": 0.3628, + "num_input_tokens_seen": 32116040, + "step": 187 + }, + { + "epoch": 0.049187874005392256, + "loss": 0.39327770471572876, + "loss_ce": 0.03439096361398697, + "loss_iou": 0.36328125, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 32116040, + "step": 187 + }, + { + "epoch": 0.04945091076477938, + "grad_norm": 11.155969289477222, + "learning_rate": 5e-06, + "loss": 0.2974, + "num_input_tokens_seen": 32288500, + "step": 188 + }, + { + "epoch": 0.04945091076477938, + "loss": 0.31713372468948364, + "loss_ce": 0.028681576251983643, + "loss_iou": 0.31640625, + "loss_num": 0.0576171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 32288500, + "step": 188 + }, + { + "epoch": 0.0497139475241665, + "grad_norm": 11.834615664464208, + "learning_rate": 5e-06, + "loss": 0.3369, + "num_input_tokens_seen": 32460772, + "step": 189 + }, + { + "epoch": 0.0497139475241665, + "loss": 0.3497753441333771, + "loss_ce": 0.02921871840953827, + "loss_iou": 0.484375, + "loss_num": 0.06396484375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 32460772, + "step": 189 + }, + { + "epoch": 0.04997698428355363, + "grad_norm": 10.434886006122214, + "learning_rate": 5e-06, + "loss": 0.3387, + "num_input_tokens_seen": 32632848, + "step": 190 + }, + { + "epoch": 0.04997698428355363, + "loss": 0.36610347032546997, + "loss_ce": 0.029921812936663628, + "loss_iou": 0.38671875, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 32632848, + "step": 190 + }, + { + "epoch": 0.05024002104294075, + "grad_norm": 9.800996766479896, + "learning_rate": 5e-06, + "loss": 0.3599, + "num_input_tokens_seen": 32805200, + "step": 191 + }, + { + "epoch": 0.05024002104294075, + "loss": 0.3484704792499542, + "loss_ce": 0.032674580812454224, + "loss_iou": 0.466796875, + "loss_num": 0.06298828125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 32805200, + "step": 191 + }, + { + "epoch": 0.050503057802327875, + "grad_norm": 9.1652653754744, + "learning_rate": 5e-06, + "loss": 0.3086, + "num_input_tokens_seen": 32977420, + "step": 192 + }, + { + "epoch": 0.050503057802327875, + "loss": 0.3598284125328064, + "loss_ce": 0.027797123417258263, + "loss_iou": 0.283203125, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 32977420, + "step": 192 + }, + { + "epoch": 0.050766094561715, + "grad_norm": 11.082061385505076, + "learning_rate": 5e-06, + "loss": 0.3206, + "num_input_tokens_seen": 33149792, + "step": 193 + }, + { + "epoch": 0.050766094561715, + "loss": 0.3082242012023926, + "loss_ce": 0.035763248801231384, + "loss_iou": 0.41796875, + "loss_num": 0.054443359375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 33149792, + "step": 193 + }, + { + "epoch": 0.05102913132110212, + "grad_norm": 10.154212185692936, + "learning_rate": 5e-06, + "loss": 0.3308, + "num_input_tokens_seen": 33321580, + "step": 194 + }, + { + "epoch": 0.05102913132110212, + "loss": 0.3002238869667053, + "loss_ce": 0.02568773366510868, + "loss_iou": 0.36328125, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 33321580, + "step": 194 + }, + { + "epoch": 0.051292168080489246, + "grad_norm": 12.486303050942563, + "learning_rate": 5e-06, + "loss": 0.3875, + "num_input_tokens_seen": 33493632, + "step": 195 + }, + { + "epoch": 0.051292168080489246, + "loss": 0.35659241676330566, + "loss_ce": 0.030542613938450813, + "loss_iou": 0.455078125, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 33493632, + "step": 195 + }, + { + "epoch": 0.05155520483987637, + "grad_norm": 13.41419397842875, + "learning_rate": 5e-06, + "loss": 0.3335, + "num_input_tokens_seen": 33665544, + "step": 196 + }, + { + "epoch": 0.05155520483987637, + "loss": 0.36018121242523193, + "loss_ce": 0.02436576411128044, + "loss_iou": 0.37890625, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 33665544, + "step": 196 + }, + { + "epoch": 0.051818241599263494, + "grad_norm": 19.34380060773709, + "learning_rate": 5e-06, + "loss": 0.3472, + "num_input_tokens_seen": 33837492, + "step": 197 + }, + { + "epoch": 0.051818241599263494, + "loss": 0.37059885263442993, + "loss_ce": 0.024651601910591125, + "loss_iou": 0.341796875, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 33837492, + "step": 197 + }, + { + "epoch": 0.052081278358650625, + "grad_norm": 12.626529129648052, + "learning_rate": 5e-06, + "loss": 0.3657, + "num_input_tokens_seen": 34009512, + "step": 198 + }, + { + "epoch": 0.052081278358650625, + "loss": 0.29348307847976685, + "loss_ce": 0.028102193027734756, + "loss_iou": 0.376953125, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 34009512, + "step": 198 + }, + { + "epoch": 0.05234431511803775, + "grad_norm": 10.467016024509215, + "learning_rate": 5e-06, + "loss": 0.3464, + "num_input_tokens_seen": 34181600, + "step": 199 + }, + { + "epoch": 0.05234431511803775, + "loss": 0.30455371737480164, + "loss_ce": 0.022815439850091934, + "loss_iou": 0.400390625, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 34181600, + "step": 199 + }, + { + "epoch": 0.05260735187742487, + "grad_norm": 13.453464722997529, + "learning_rate": 5e-06, + "loss": 0.3248, + "num_input_tokens_seen": 34352348, + "step": 200 + }, + { + "epoch": 0.05260735187742487, + "loss": 0.45307034254074097, + "loss_ce": 0.020819369703531265, + "loss_iou": 0.3515625, + "loss_num": 0.08642578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 34352348, + "step": 200 + }, + { + "epoch": 0.052870388636811996, + "grad_norm": 20.491451966511576, + "learning_rate": 5e-06, + "loss": 0.4317, + "num_input_tokens_seen": 34524600, + "step": 201 + }, + { + "epoch": 0.052870388636811996, + "loss": 0.41993263363838196, + "loss_ce": 0.024668946862220764, + "loss_iou": 0.51171875, + "loss_num": 0.0791015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 34524600, + "step": 201 + }, + { + "epoch": 0.05313342539619912, + "grad_norm": 17.643454599965338, + "learning_rate": 5e-06, + "loss": 0.4565, + "num_input_tokens_seen": 34696600, + "step": 202 + }, + { + "epoch": 0.05313342539619912, + "loss": 0.5106714963912964, + "loss_ce": 0.022634411230683327, + "loss_iou": 0.0888671875, + "loss_num": 0.09765625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 34696600, + "step": 202 + }, + { + "epoch": 0.053396462155586244, + "grad_norm": 13.599251013665107, + "learning_rate": 5e-06, + "loss": 0.3165, + "num_input_tokens_seen": 34868596, + "step": 203 + }, + { + "epoch": 0.053396462155586244, + "loss": 0.3383103609085083, + "loss_ce": 0.026054508984088898, + "loss_iou": 0.26953125, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 34868596, + "step": 203 + }, + { + "epoch": 0.05365949891497337, + "grad_norm": 15.289442638831803, + "learning_rate": 5e-06, + "loss": 0.3217, + "num_input_tokens_seen": 35038040, + "step": 204 + }, + { + "epoch": 0.05365949891497337, + "loss": 0.306024968624115, + "loss_ce": 0.022089410573244095, + "loss_iou": 0.39453125, + "loss_num": 0.056884765625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 35038040, + "step": 204 + }, + { + "epoch": 0.05392253567436049, + "grad_norm": 10.407634168683858, + "learning_rate": 5e-06, + "loss": 0.3039, + "num_input_tokens_seen": 35208292, + "step": 205 + }, + { + "epoch": 0.05392253567436049, + "loss": 0.35352200269699097, + "loss_ce": 0.026007331907749176, + "loss_iou": 0.423828125, + "loss_num": 0.0654296875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 35208292, + "step": 205 + }, + { + "epoch": 0.054185572433747616, + "grad_norm": 11.313536084612993, + "learning_rate": 5e-06, + "loss": 0.3141, + "num_input_tokens_seen": 35377388, + "step": 206 + }, + { + "epoch": 0.054185572433747616, + "loss": 0.27950865030288696, + "loss_ce": 0.02438168227672577, + "loss_iou": 0.431640625, + "loss_num": 0.051025390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 35377388, + "step": 206 + }, + { + "epoch": 0.05444860919313474, + "grad_norm": 11.696274899578064, + "learning_rate": 5e-06, + "loss": 0.3578, + "num_input_tokens_seen": 35547812, + "step": 207 + }, + { + "epoch": 0.05444860919313474, + "loss": 0.3501763641834259, + "loss_ce": 0.0216851644217968, + "loss_iou": 0.53125, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 35547812, + "step": 207 + }, + { + "epoch": 0.05471164595252186, + "grad_norm": 10.19535036839742, + "learning_rate": 5e-06, + "loss": 0.2938, + "num_input_tokens_seen": 35720256, + "step": 208 + }, + { + "epoch": 0.05471164595252186, + "loss": 0.28618597984313965, + "loss_ce": 0.023246534168720245, + "loss_iou": 0.421875, + "loss_num": 0.052490234375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 35720256, + "step": 208 + }, + { + "epoch": 0.05497468271190899, + "grad_norm": 10.108238117808526, + "learning_rate": 5e-06, + "loss": 0.2815, + "num_input_tokens_seen": 35892100, + "step": 209 + }, + { + "epoch": 0.05497468271190899, + "loss": 0.28424978256225586, + "loss_ce": 0.025460712611675262, + "loss_iou": 0.3125, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 35892100, + "step": 209 + }, + { + "epoch": 0.05523771947129611, + "grad_norm": 9.765870248571328, + "learning_rate": 5e-06, + "loss": 0.3151, + "num_input_tokens_seen": 36064296, + "step": 210 + }, + { + "epoch": 0.05523771947129611, + "loss": 0.30667591094970703, + "loss_ce": 0.02457140013575554, + "loss_iou": 0.373046875, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 36064296, + "step": 210 + }, + { + "epoch": 0.055500756230683235, + "grad_norm": 8.667238391078952, + "learning_rate": 5e-06, + "loss": 0.3062, + "num_input_tokens_seen": 36236356, + "step": 211 + }, + { + "epoch": 0.055500756230683235, + "loss": 0.32360944151878357, + "loss_ce": 0.021241270005702972, + "loss_iou": 0.318359375, + "loss_num": 0.060546875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 36236356, + "step": 211 + }, + { + "epoch": 0.055763792990070365, + "grad_norm": 7.623662373992362, + "learning_rate": 5e-06, + "loss": 0.2441, + "num_input_tokens_seen": 36406508, + "step": 212 + }, + { + "epoch": 0.055763792990070365, + "loss": 0.2571391761302948, + "loss_ce": 0.022825222462415695, + "loss_iou": 0.50390625, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 36406508, + "step": 212 + }, + { + "epoch": 0.05602682974945749, + "grad_norm": 9.522758823829298, + "learning_rate": 5e-06, + "loss": 0.263, + "num_input_tokens_seen": 36578384, + "step": 213 + }, + { + "epoch": 0.05602682974945749, + "loss": 0.20427684485912323, + "loss_ce": 0.022819336503744125, + "loss_iou": 0.32421875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 36578384, + "step": 213 + }, + { + "epoch": 0.05628986650884461, + "grad_norm": 14.400118985818274, + "learning_rate": 5e-06, + "loss": 0.293, + "num_input_tokens_seen": 36750552, + "step": 214 + }, + { + "epoch": 0.05628986650884461, + "loss": 0.29619529843330383, + "loss_ce": 0.021537089720368385, + "loss_iou": 0.44140625, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 36750552, + "step": 214 + }, + { + "epoch": 0.05655290326823174, + "grad_norm": 16.206900517822692, + "learning_rate": 5e-06, + "loss": 0.3487, + "num_input_tokens_seen": 36922680, + "step": 215 + }, + { + "epoch": 0.05655290326823174, + "loss": 0.37616318464279175, + "loss_ce": 0.018497148528695107, + "loss_iou": 0.224609375, + "loss_num": 0.07177734375, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 36922680, + "step": 215 + }, + { + "epoch": 0.05681594002761886, + "grad_norm": 16.214529098872383, + "learning_rate": 5e-06, + "loss": 0.3418, + "num_input_tokens_seen": 37095056, + "step": 216 + }, + { + "epoch": 0.05681594002761886, + "loss": 0.37693583965301514, + "loss_ce": 0.018171211704611778, + "loss_iou": 0.208984375, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 37095056, + "step": 216 + }, + { + "epoch": 0.057078976787005985, + "grad_norm": 11.604435704258858, + "learning_rate": 5e-06, + "loss": 0.3517, + "num_input_tokens_seen": 37267308, + "step": 217 + }, + { + "epoch": 0.057078976787005985, + "loss": 0.37936773896217346, + "loss_ce": 0.022922419011592865, + "loss_iou": 0.55078125, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 37267308, + "step": 217 + }, + { + "epoch": 0.05734201354639311, + "grad_norm": 25.63677528330297, + "learning_rate": 5e-06, + "loss": 0.2984, + "num_input_tokens_seen": 37434780, + "step": 218 + }, + { + "epoch": 0.05734201354639311, + "loss": 0.31407007575035095, + "loss_ce": 0.02378687635064125, + "loss_iou": 0.455078125, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 37434780, + "step": 218 + }, + { + "epoch": 0.05760505030578023, + "grad_norm": 11.86583190083979, + "learning_rate": 5e-06, + "loss": 0.3346, + "num_input_tokens_seen": 37605076, + "step": 219 + }, + { + "epoch": 0.05760505030578023, + "loss": 0.31814104318618774, + "loss_ce": 0.02236468717455864, + "loss_iou": 0.279296875, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 37605076, + "step": 219 + }, + { + "epoch": 0.057868087065167356, + "grad_norm": 14.940327825772103, + "learning_rate": 5e-06, + "loss": 0.3733, + "num_input_tokens_seen": 37777704, + "step": 220 + }, + { + "epoch": 0.057868087065167356, + "loss": 0.37386685609817505, + "loss_ce": 0.020107077434659004, + "loss_iou": 0.443359375, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 37777704, + "step": 220 + }, + { + "epoch": 0.05813112382455448, + "grad_norm": 13.873681072861537, + "learning_rate": 5e-06, + "loss": 0.3326, + "num_input_tokens_seen": 37948064, + "step": 221 + }, + { + "epoch": 0.05813112382455448, + "loss": 0.3946155905723572, + "loss_ce": 0.019127311185002327, + "loss_iou": 0.18359375, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 37948064, + "step": 221 + }, + { + "epoch": 0.058394160583941604, + "grad_norm": 13.253297702701516, + "learning_rate": 5e-06, + "loss": 0.3428, + "num_input_tokens_seen": 38118984, + "step": 222 + }, + { + "epoch": 0.058394160583941604, + "loss": 0.3087061643600464, + "loss_ce": 0.018300898373126984, + "loss_iou": 0.18359375, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 38118984, + "step": 222 + }, + { + "epoch": 0.05865719734332873, + "grad_norm": 11.082820014401106, + "learning_rate": 5e-06, + "loss": 0.2927, + "num_input_tokens_seen": 38291092, + "step": 223 + }, + { + "epoch": 0.05865719734332873, + "loss": 0.2926178574562073, + "loss_ce": 0.017837589606642723, + "loss_iou": 0.337890625, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 38291092, + "step": 223 + }, + { + "epoch": 0.05892023410271585, + "grad_norm": 13.606311852622206, + "learning_rate": 5e-06, + "loss": 0.2877, + "num_input_tokens_seen": 38461712, + "step": 224 + }, + { + "epoch": 0.05892023410271585, + "loss": 0.2914125323295593, + "loss_ce": 0.016632266342639923, + "loss_iou": 0.29296875, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 38461712, + "step": 224 + }, + { + "epoch": 0.059183270862102975, + "grad_norm": 12.858292254216362, + "learning_rate": 5e-06, + "loss": 0.3576, + "num_input_tokens_seen": 38633936, + "step": 225 + }, + { + "epoch": 0.059183270862102975, + "loss": 0.36546608805656433, + "loss_ce": 0.015124273486435413, + "loss_iou": 0.2890625, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 38633936, + "step": 225 + }, + { + "epoch": 0.059446307621490106, + "grad_norm": 15.356311927838101, + "learning_rate": 5e-06, + "loss": 0.2591, + "num_input_tokens_seen": 38802732, + "step": 226 + }, + { + "epoch": 0.059446307621490106, + "loss": 0.24970154464244843, + "loss_ce": 0.01520447339862585, + "loss_iou": 0.412109375, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 38802732, + "step": 226 + }, + { + "epoch": 0.05970934438087723, + "grad_norm": 33.523690387446095, + "learning_rate": 5e-06, + "loss": 0.3181, + "num_input_tokens_seen": 38975160, + "step": 227 + }, + { + "epoch": 0.05970934438087723, + "loss": 0.3422040641307831, + "loss_ce": 0.015910113230347633, + "loss_iou": 0.33984375, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 38975160, + "step": 227 + }, + { + "epoch": 0.059972381140264354, + "grad_norm": 8.426822348321474, + "learning_rate": 5e-06, + "loss": 0.2888, + "num_input_tokens_seen": 39147352, + "step": 228 + }, + { + "epoch": 0.059972381140264354, + "loss": 0.29814714193344116, + "loss_ce": 0.014455747790634632, + "loss_iou": 0.3359375, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 39147352, + "step": 228 + }, + { + "epoch": 0.06023541789965148, + "grad_norm": 9.384703696891318, + "learning_rate": 5e-06, + "loss": 0.2861, + "num_input_tokens_seen": 39319476, + "step": 229 + }, + { + "epoch": 0.06023541789965148, + "loss": 0.2668268382549286, + "loss_ce": 0.020000681281089783, + "loss_iou": 0.474609375, + "loss_num": 0.04931640625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 39319476, + "step": 229 + }, + { + "epoch": 0.0604984546590386, + "grad_norm": 18.427622180632213, + "learning_rate": 5e-06, + "loss": 0.3917, + "num_input_tokens_seen": 39491664, + "step": 230 + }, + { + "epoch": 0.0604984546590386, + "loss": 0.390484094619751, + "loss_ce": 0.015484098345041275, + "loss_iou": 0.875, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 39491664, + "step": 230 + }, + { + "epoch": 0.060761491418425725, + "grad_norm": 10.977429362928614, + "learning_rate": 5e-06, + "loss": 0.3949, + "num_input_tokens_seen": 39663560, + "step": 231 + }, + { + "epoch": 0.060761491418425725, + "loss": 0.334339439868927, + "loss_ce": 0.015735914930701256, + "loss_iou": 0.283203125, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 39663560, + "step": 231 + }, + { + "epoch": 0.06102452817781285, + "grad_norm": 7.478546882380691, + "learning_rate": 5e-06, + "loss": 0.2993, + "num_input_tokens_seen": 39835708, + "step": 232 + }, + { + "epoch": 0.06102452817781285, + "loss": 0.2514447867870331, + "loss_ce": 0.014872531406581402, + "loss_iou": 0.45703125, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 39835708, + "step": 232 + }, + { + "epoch": 0.06128756493719997, + "grad_norm": 7.3196146291196635, + "learning_rate": 5e-06, + "loss": 0.2693, + "num_input_tokens_seen": 40008008, + "step": 233 + }, + { + "epoch": 0.06128756493719997, + "loss": 0.3245670795440674, + "loss_ce": 0.01768231764435768, + "loss_iou": 0.380859375, + "loss_num": 0.0615234375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 40008008, + "step": 233 + }, + { + "epoch": 0.0615506016965871, + "grad_norm": 10.123913601017227, + "learning_rate": 5e-06, + "loss": 0.2834, + "num_input_tokens_seen": 40179816, + "step": 234 + }, + { + "epoch": 0.0615506016965871, + "loss": 0.2691105008125305, + "loss_ce": 0.01740150898694992, + "loss_iou": 0.462890625, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 40179816, + "step": 234 + }, + { + "epoch": 0.06181363845597422, + "grad_norm": 16.840337823170596, + "learning_rate": 5e-06, + "loss": 0.3163, + "num_input_tokens_seen": 40352180, + "step": 235 + }, + { + "epoch": 0.06181363845597422, + "loss": 0.3705546259880066, + "loss_ce": 0.015330012887716293, + "loss_iou": 0.232421875, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 40352180, + "step": 235 + }, + { + "epoch": 0.062076675215361345, + "grad_norm": 15.151708802383407, + "learning_rate": 5e-06, + "loss": 0.3181, + "num_input_tokens_seen": 40524204, + "step": 236 + }, + { + "epoch": 0.062076675215361345, + "loss": 0.342043399810791, + "loss_ce": 0.015383241698145866, + "loss_iou": 0.1884765625, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 40524204, + "step": 236 + }, + { + "epoch": 0.06233971197474847, + "grad_norm": 23.279462740978527, + "learning_rate": 5e-06, + "loss": 0.3112, + "num_input_tokens_seen": 40694412, + "step": 237 + }, + { + "epoch": 0.06233971197474847, + "loss": 0.3619577884674072, + "loss_ce": 0.014667754992842674, + "loss_iou": NaN, + "loss_num": 0.0693359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 40694412, + "step": 237 + }, + { + "epoch": 0.0626027487341356, + "grad_norm": 7.854110724383197, + "learning_rate": 5e-06, + "loss": 0.3206, + "num_input_tokens_seen": 40866332, + "step": 238 + }, + { + "epoch": 0.0626027487341356, + "loss": 0.31817951798439026, + "loss_ce": 0.013125804252922535, + "loss_iou": 0.390625, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 40866332, + "step": 238 + }, + { + "epoch": 0.06286578549352272, + "grad_norm": 10.293296601258389, + "learning_rate": 5e-06, + "loss": 0.2924, + "num_input_tokens_seen": 41036884, + "step": 239 + }, + { + "epoch": 0.06286578549352272, + "loss": 0.20027026534080505, + "loss_ce": 0.012953377328813076, + "loss_iou": 0.470703125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 41036884, + "step": 239 + }, + { + "epoch": 0.06312882225290985, + "grad_norm": 27.954530904789568, + "learning_rate": 5e-06, + "loss": 0.3242, + "num_input_tokens_seen": 41209080, + "step": 240 + }, + { + "epoch": 0.06312882225290985, + "loss": 0.3325929641723633, + "loss_ce": 0.015088059939444065, + "loss_iou": 0.388671875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 41209080, + "step": 240 + }, + { + "epoch": 0.06339185901229696, + "grad_norm": 10.084557723509068, + "learning_rate": 5e-06, + "loss": 0.4017, + "num_input_tokens_seen": 41378712, + "step": 241 + }, + { + "epoch": 0.06339185901229696, + "loss": 0.42962974309921265, + "loss_ce": 0.01459068525582552, + "loss_iou": 0.1796875, + "loss_num": 0.0830078125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 41378712, + "step": 241 + }, + { + "epoch": 0.0636548957716841, + "grad_norm": 7.845640861327211, + "learning_rate": 5e-06, + "loss": 0.3082, + "num_input_tokens_seen": 41550924, + "step": 242 + }, + { + "epoch": 0.0636548957716841, + "loss": 0.3617081940174103, + "loss_ce": 0.013441601768136024, + "loss_iou": 0.306640625, + "loss_num": 0.06982421875, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 41550924, + "step": 242 + }, + { + "epoch": 0.06391793253107121, + "grad_norm": 7.755635758991768, + "learning_rate": 5e-06, + "loss": 0.2863, + "num_input_tokens_seen": 41723448, + "step": 243 + }, + { + "epoch": 0.06391793253107121, + "loss": 0.31799250841140747, + "loss_ce": 0.01220636535435915, + "loss_iou": 0.4609375, + "loss_num": 0.06103515625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 41723448, + "step": 243 + }, + { + "epoch": 0.06418096929045834, + "grad_norm": 10.028128094591828, + "learning_rate": 5e-06, + "loss": 0.3455, + "num_input_tokens_seen": 41895440, + "step": 244 + }, + { + "epoch": 0.06418096929045834, + "loss": 0.2664147913455963, + "loss_ce": 0.015926510095596313, + "loss_iou": 0.38671875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 41895440, + "step": 244 + }, + { + "epoch": 0.06444400604984546, + "grad_norm": 7.455815676512238, + "learning_rate": 5e-06, + "loss": 0.3158, + "num_input_tokens_seen": 42067812, + "step": 245 + }, + { + "epoch": 0.06444400604984546, + "loss": 0.315701961517334, + "loss_ce": 0.012784458696842194, + "loss_iou": 0.3515625, + "loss_num": 0.060546875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 42067812, + "step": 245 + }, + { + "epoch": 0.06470704280923259, + "grad_norm": 7.183484053450612, + "learning_rate": 5e-06, + "loss": 0.2873, + "num_input_tokens_seen": 42238248, + "step": 246 + }, + { + "epoch": 0.06470704280923259, + "loss": 0.408550500869751, + "loss_ce": 0.015972375869750977, + "loss_iou": 0.46484375, + "loss_num": 0.07861328125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 42238248, + "step": 246 + }, + { + "epoch": 0.06497007956861972, + "grad_norm": 11.483582566544728, + "learning_rate": 5e-06, + "loss": 0.2422, + "num_input_tokens_seen": 42410540, + "step": 247 + }, + { + "epoch": 0.06497007956861972, + "loss": 0.24055451154708862, + "loss_ce": 0.012405097484588623, + "loss_iou": 0.455078125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 42410540, + "step": 247 + }, + { + "epoch": 0.06523311632800684, + "grad_norm": 9.093470574393532, + "learning_rate": 5e-06, + "loss": 0.2447, + "num_input_tokens_seen": 42582896, + "step": 248 + }, + { + "epoch": 0.06523311632800684, + "loss": 0.2380836009979248, + "loss_ce": 0.009445905685424805, + "loss_iou": 0.51171875, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 42582896, + "step": 248 + }, + { + "epoch": 0.06549615308739397, + "grad_norm": 11.56824851448475, + "learning_rate": 5e-06, + "loss": 0.3024, + "num_input_tokens_seen": 42755248, + "step": 249 + }, + { + "epoch": 0.06549615308739397, + "loss": 0.3298885226249695, + "loss_ce": 0.012871915474534035, + "loss_iou": 0.359375, + "loss_num": 0.0634765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 42755248, + "step": 249 + }, + { + "epoch": 0.06575918984678109, + "grad_norm": 12.53262678976841, + "learning_rate": 5e-06, + "loss": 0.3279, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_websight_new_CIoU": 0.6498270332813263, + "eval_websight_new_GIoU": 0.6454348266124725, + "eval_websight_new_IoU": 0.6616384983062744, + "eval_websight_new_MAE_all": 0.05562719702720642, + "eval_websight_new_MAE_h": 0.049970587715506554, + "eval_websight_new_MAE_w": 0.07883360609412193, + "eval_websight_new_MAE_x": 0.06017959490418434, + "eval_websight_new_MAE_y": 0.03352500405162573, + "eval_websight_new_NUM_probability": 0.9025295078754425, + "eval_websight_new_inside_bbox": 0.9253472089767456, + "eval_websight_new_loss": 0.27164769172668457, + "eval_websight_new_loss_ce": 0.01081773592159152, + "eval_websight_new_loss_iou": 0.693359375, + "eval_websight_new_loss_num": 0.049407958984375, + "eval_websight_new_loss_xval": 0.24688720703125, + "eval_websight_new_runtime": 55.9304, + "eval_websight_new_samples_per_second": 0.894, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_seeclick_CIoU": 0.3509342074394226, + "eval_seeclick_GIoU": 0.3347575068473816, + "eval_seeclick_IoU": 0.3994097113609314, + "eval_seeclick_MAE_all": 0.0986204668879509, + "eval_seeclick_MAE_h": 0.0804138220846653, + "eval_seeclick_MAE_w": 0.13582541793584824, + "eval_seeclick_MAE_x": 0.12244484201073647, + "eval_seeclick_MAE_y": 0.05579778365790844, + "eval_seeclick_NUM_probability": 0.898758739233017, + "eval_seeclick_inside_bbox": 0.47727273404598236, + "eval_seeclick_loss": 0.4158971905708313, + "eval_seeclick_loss_ce": 0.027117961086332798, + "eval_seeclick_loss_iou": 0.66552734375, + "eval_seeclick_loss_num": 0.0771331787109375, + "eval_seeclick_loss_xval": 0.3856201171875, + "eval_seeclick_runtime": 77.1195, + "eval_seeclick_samples_per_second": 0.558, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_icons_CIoU": 0.533334881067276, + "eval_icons_GIoU": 0.525450587272644, + "eval_icons_IoU": 0.5554244518280029, + "eval_icons_MAE_all": 0.06896939501166344, + "eval_icons_MAE_h": 0.07008247822523117, + "eval_icons_MAE_w": 0.07993372157216072, + "eval_icons_MAE_x": 0.06088143028318882, + "eval_icons_MAE_y": 0.06497994437813759, + "eval_icons_NUM_probability": 0.9118345677852631, + "eval_icons_inside_bbox": 0.8038194477558136, + "eval_icons_loss": 0.238239586353302, + "eval_icons_loss_ce": 0.01238260930404067, + "eval_icons_loss_iou": 0.45098876953125, + "eval_icons_loss_num": 0.04291534423828125, + "eval_icons_loss_xval": 0.2144775390625, + "eval_icons_runtime": 89.0702, + "eval_icons_samples_per_second": 0.561, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_screenspot_CIoU": 0.40743913253148395, + "eval_screenspot_GIoU": 0.38262539108594257, + "eval_screenspot_IoU": 0.4510061542193095, + "eval_screenspot_MAE_all": 0.10948735972245534, + "eval_screenspot_MAE_h": 0.07818744828303655, + "eval_screenspot_MAE_w": 0.15290210396051407, + "eval_screenspot_MAE_x": 0.12599809964497885, + "eval_screenspot_MAE_y": 0.08086179196834564, + "eval_screenspot_NUM_probability": 0.9086714188257853, + "eval_screenspot_inside_bbox": 0.7354166706403097, + "eval_screenspot_loss": 0.9193825125694275, + "eval_screenspot_loss_ce": 0.48994149764378864, + "eval_screenspot_loss_iou": 0.5732421875, + "eval_screenspot_loss_num": 0.08390299479166667, + "eval_screenspot_loss_xval": 0.4192708333333333, + "eval_screenspot_runtime": 138.8534, + "eval_screenspot_samples_per_second": 0.641, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "loss": 0.8642065525054932, + "loss_ce": 0.45453858375549316, + "loss_iou": 0.53125, + "loss_num": 0.08203125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06602222660616822, + "grad_norm": 16.162110660395133, + "learning_rate": 5e-06, + "loss": 0.3389, + "num_input_tokens_seen": 43099272, + "step": 251 + }, + { + "epoch": 0.06602222660616822, + "loss": 0.3324888348579407, + "loss_ce": 0.009490801021456718, + "loss_iou": 0.484375, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 43099272, + "step": 251 + }, + { + "epoch": 0.06628526336555533, + "grad_norm": 15.788401372124632, + "learning_rate": 5e-06, + "loss": 0.3827, + "num_input_tokens_seen": 43271440, + "step": 252 + }, + { + "epoch": 0.06628526336555533, + "loss": 0.3184235990047455, + "loss_ce": 0.009219500236213207, + "loss_iou": 0.453125, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 43271440, + "step": 252 + }, + { + "epoch": 0.06654830012494246, + "grad_norm": 19.21368677855186, + "learning_rate": 5e-06, + "loss": 0.4031, + "num_input_tokens_seen": 43443704, + "step": 253 + }, + { + "epoch": 0.06654830012494246, + "loss": 0.3337656557559967, + "loss_ce": 0.009546896442770958, + "loss_iou": 0.1796875, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 43443704, + "step": 253 + }, + { + "epoch": 0.06681133688432958, + "grad_norm": 9.350851106906616, + "learning_rate": 5e-06, + "loss": 0.2501, + "num_input_tokens_seen": 43615820, + "step": 254 + }, + { + "epoch": 0.06681133688432958, + "loss": 0.22743698954582214, + "loss_ce": 0.009236322715878487, + "loss_iou": 0.408203125, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 43615820, + "step": 254 + }, + { + "epoch": 0.06707437364371671, + "grad_norm": 12.730744024793406, + "learning_rate": 5e-06, + "loss": 0.3033, + "num_input_tokens_seen": 43787664, + "step": 255 + }, + { + "epoch": 0.06707437364371671, + "loss": 0.3385705351829529, + "loss_ce": 0.009468959644436836, + "loss_iou": 0.193359375, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 43787664, + "step": 255 + }, + { + "epoch": 0.06733741040310383, + "grad_norm": 20.602499715875325, + "learning_rate": 5e-06, + "loss": 0.335, + "num_input_tokens_seen": 43956324, + "step": 256 + }, + { + "epoch": 0.06733741040310383, + "loss": 0.33848732709884644, + "loss_ce": 0.012559601105749607, + "loss_iou": 0.32421875, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 43956324, + "step": 256 + }, + { + "epoch": 0.06760044716249096, + "grad_norm": 11.274485245872459, + "learning_rate": 5e-06, + "loss": 0.241, + "num_input_tokens_seen": 44128740, + "step": 257 + }, + { + "epoch": 0.06760044716249096, + "loss": 0.24955862760543823, + "loss_ce": 0.011765659786760807, + "loss_iou": 0.330078125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 44128740, + "step": 257 + }, + { + "epoch": 0.06786348392187808, + "grad_norm": 12.119376554642344, + "learning_rate": 5e-06, + "loss": 0.359, + "num_input_tokens_seen": 44299372, + "step": 258 + }, + { + "epoch": 0.06786348392187808, + "loss": 0.26774948835372925, + "loss_ce": 0.01604049652814865, + "loss_iou": 0.56640625, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 44299372, + "step": 258 + }, + { + "epoch": 0.0681265206812652, + "grad_norm": 15.925957923618926, + "learning_rate": 5e-06, + "loss": 0.3045, + "num_input_tokens_seen": 44469736, + "step": 259 + }, + { + "epoch": 0.0681265206812652, + "loss": 0.35633599758148193, + "loss_ce": 0.01502738706767559, + "loss_iou": 0.515625, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 44469736, + "step": 259 + }, + { + "epoch": 0.06838955744065234, + "grad_norm": 13.189087784364153, + "learning_rate": 5e-06, + "loss": 0.328, + "num_input_tokens_seen": 44642020, + "step": 260 + }, + { + "epoch": 0.06838955744065234, + "loss": 0.4303101599216461, + "loss_ce": 0.01173105463385582, + "loss_iou": 0.44921875, + "loss_num": 0.08349609375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 44642020, + "step": 260 + }, + { + "epoch": 0.06865259420003945, + "grad_norm": 11.878594650017698, + "learning_rate": 5e-06, + "loss": 0.2752, + "num_input_tokens_seen": 44814452, + "step": 261 + }, + { + "epoch": 0.06865259420003945, + "loss": 0.28906819224357605, + "loss_ce": 0.0090388935059309, + "loss_iou": 0.251953125, + "loss_num": 0.05615234375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 44814452, + "step": 261 + }, + { + "epoch": 0.06891563095942659, + "grad_norm": 8.62440603357029, + "learning_rate": 5e-06, + "loss": 0.2926, + "num_input_tokens_seen": 44986656, + "step": 262 + }, + { + "epoch": 0.06891563095942659, + "loss": 0.24431782960891724, + "loss_ce": 0.008111760020256042, + "loss_iou": 0.486328125, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 44986656, + "step": 262 + }, + { + "epoch": 0.0691786677188137, + "grad_norm": 7.7035777517167645, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 45158540, + "step": 263 + }, + { + "epoch": 0.0691786677188137, + "loss": 0.23680636286735535, + "loss_ce": 0.013417699374258518, + "loss_iou": 0.46484375, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 45158540, + "step": 263 + }, + { + "epoch": 0.06944170447820083, + "grad_norm": 11.11690642897216, + "learning_rate": 5e-06, + "loss": 0.2503, + "num_input_tokens_seen": 45330356, + "step": 264 + }, + { + "epoch": 0.06944170447820083, + "loss": 0.25701650977134705, + "loss_ce": 0.01019032672047615, + "loss_iou": 0.24609375, + "loss_num": 0.04931640625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 45330356, + "step": 264 + }, + { + "epoch": 0.06970474123758795, + "grad_norm": 16.27774203292456, + "learning_rate": 5e-06, + "loss": 0.327, + "num_input_tokens_seen": 45502460, + "step": 265 + }, + { + "epoch": 0.06970474123758795, + "loss": 0.3339051902294159, + "loss_ce": 0.011639568954706192, + "loss_iou": 0.4921875, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 45502460, + "step": 265 + }, + { + "epoch": 0.06996777799697508, + "grad_norm": 11.740826510166434, + "learning_rate": 5e-06, + "loss": 0.3816, + "num_input_tokens_seen": 45674688, + "step": 266 + }, + { + "epoch": 0.06996777799697508, + "loss": 0.30319204926490784, + "loss_ce": 0.016082679852843285, + "loss_iou": 0.255859375, + "loss_num": 0.057373046875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 45674688, + "step": 266 + }, + { + "epoch": 0.0702308147563622, + "grad_norm": 7.300788157576167, + "learning_rate": 5e-06, + "loss": 0.29, + "num_input_tokens_seen": 45847124, + "step": 267 + }, + { + "epoch": 0.0702308147563622, + "loss": 0.3247534930706024, + "loss_ce": 0.009140691719949245, + "loss_iou": 0.33203125, + "loss_num": 0.06298828125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 45847124, + "step": 267 + }, + { + "epoch": 0.07049385151574933, + "grad_norm": 7.309781555609638, + "learning_rate": 5e-06, + "loss": 0.2756, + "num_input_tokens_seen": 46019340, + "step": 268 + }, + { + "epoch": 0.07049385151574933, + "loss": 0.2533281445503235, + "loss_ce": 0.008699209429323673, + "loss_iou": 0.38671875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 46019340, + "step": 268 + }, + { + "epoch": 0.07075688827513645, + "grad_norm": 13.089258421868523, + "learning_rate": 5e-06, + "loss": 0.2701, + "num_input_tokens_seen": 46189784, + "step": 269 + }, + { + "epoch": 0.07075688827513645, + "loss": 0.2624807357788086, + "loss_ce": 0.01223657839000225, + "loss_iou": 0.435546875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 46189784, + "step": 269 + }, + { + "epoch": 0.07101992503452358, + "grad_norm": 6.658182143864948, + "learning_rate": 5e-06, + "loss": 0.2118, + "num_input_tokens_seen": 46361728, + "step": 270 + }, + { + "epoch": 0.07101992503452358, + "loss": 0.2072766274213791, + "loss_ce": 0.007569607347249985, + "loss_iou": 0.4453125, + "loss_num": 0.0400390625, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 46361728, + "step": 270 + }, + { + "epoch": 0.07128296179391069, + "grad_norm": 9.079661259155806, + "learning_rate": 5e-06, + "loss": 0.2305, + "num_input_tokens_seen": 46534100, + "step": 271 + }, + { + "epoch": 0.07128296179391069, + "loss": 0.21106520295143127, + "loss_ce": 0.008977807126939297, + "loss_iou": 0.609375, + "loss_num": 0.040283203125, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 46534100, + "step": 271 + }, + { + "epoch": 0.07154599855329782, + "grad_norm": 9.513732560495592, + "learning_rate": 5e-06, + "loss": 0.2646, + "num_input_tokens_seen": 46704648, + "step": 272 + }, + { + "epoch": 0.07154599855329782, + "loss": 0.2694496214389801, + "loss_ce": 0.007120497524738312, + "loss_iou": 0.361328125, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 46704648, + "step": 272 + }, + { + "epoch": 0.07180903531268495, + "grad_norm": 15.714221513863984, + "learning_rate": 5e-06, + "loss": 0.3078, + "num_input_tokens_seen": 46876784, + "step": 273 + }, + { + "epoch": 0.07180903531268495, + "loss": 0.39289578795433044, + "loss_ce": 0.006909476593136787, + "loss_iou": 0.5, + "loss_num": 0.0771484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 46876784, + "step": 273 + }, + { + "epoch": 0.07207207207207207, + "grad_norm": 8.093127002316251, + "learning_rate": 5e-06, + "loss": 0.3025, + "num_input_tokens_seen": 47048876, + "step": 274 + }, + { + "epoch": 0.07207207207207207, + "loss": 0.25725314021110535, + "loss_ce": 0.011525607667863369, + "loss_iou": 0.30859375, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 47048876, + "step": 274 + }, + { + "epoch": 0.0723351088314592, + "grad_norm": 8.569390693529225, + "learning_rate": 5e-06, + "loss": 0.297, + "num_input_tokens_seen": 47221068, + "step": 275 + }, + { + "epoch": 0.0723351088314592, + "loss": 0.2686833143234253, + "loss_ce": 0.006720416247844696, + "loss_iou": 0.404296875, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 47221068, + "step": 275 + }, + { + "epoch": 0.07259814559084632, + "grad_norm": 10.067643264055917, + "learning_rate": 5e-06, + "loss": 0.2437, + "num_input_tokens_seen": 47391436, + "step": 276 + }, + { + "epoch": 0.07259814559084632, + "loss": 0.2530132830142975, + "loss_ce": 0.007896100170910358, + "loss_iou": 0.330078125, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 47391436, + "step": 276 + }, + { + "epoch": 0.07286118235023345, + "grad_norm": 13.479763232881819, + "learning_rate": 5e-06, + "loss": 0.3643, + "num_input_tokens_seen": 47563512, + "step": 277 + }, + { + "epoch": 0.07286118235023345, + "loss": 0.2713325619697571, + "loss_ce": 0.013642151840031147, + "loss_iou": 0.337890625, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 47563512, + "step": 277 + }, + { + "epoch": 0.07312421910962057, + "grad_norm": 16.755961470396134, + "learning_rate": 5e-06, + "loss": 0.3279, + "num_input_tokens_seen": 47735372, + "step": 278 + }, + { + "epoch": 0.07312421910962057, + "loss": 0.42755797505378723, + "loss_ce": 0.008124373853206635, + "loss_iou": 0.365234375, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 47735372, + "step": 278 + }, + { + "epoch": 0.0733872558690077, + "grad_norm": 13.89589851066108, + "learning_rate": 5e-06, + "loss": 0.3883, + "num_input_tokens_seen": 47907704, + "step": 279 + }, + { + "epoch": 0.0733872558690077, + "loss": 0.41463613510131836, + "loss_ce": 0.013879301026463509, + "loss_iou": 0.1337890625, + "loss_num": 0.080078125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 47907704, + "step": 279 + }, + { + "epoch": 0.07365029262839481, + "grad_norm": 13.52575149586993, + "learning_rate": 5e-06, + "loss": 0.2798, + "num_input_tokens_seen": 48079692, + "step": 280 + }, + { + "epoch": 0.07365029262839481, + "loss": 0.2906692624092102, + "loss_ce": 0.007099920883774757, + "loss_iou": 0.45703125, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 48079692, + "step": 280 + }, + { + "epoch": 0.07391332938778195, + "grad_norm": 11.637653668969445, + "learning_rate": 5e-06, + "loss": 0.2883, + "num_input_tokens_seen": 48251572, + "step": 281 + }, + { + "epoch": 0.07391332938778195, + "loss": 0.2844490110874176, + "loss_ce": 0.010157023556530476, + "loss_iou": 0.490234375, + "loss_num": 0.054931640625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 48251572, + "step": 281 + }, + { + "epoch": 0.07417636614716906, + "grad_norm": 9.21180215619434, + "learning_rate": 5e-06, + "loss": 0.2978, + "num_input_tokens_seen": 48423560, + "step": 282 + }, + { + "epoch": 0.07417636614716906, + "loss": 0.32979628443717957, + "loss_ce": 0.012047262862324715, + "loss_iou": 0.42578125, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 48423560, + "step": 282 + }, + { + "epoch": 0.07443940290655619, + "grad_norm": 7.167038204924764, + "learning_rate": 5e-06, + "loss": 0.256, + "num_input_tokens_seen": 48595680, + "step": 283 + }, + { + "epoch": 0.07443940290655619, + "loss": 0.25096797943115234, + "loss_ce": 0.007925992831587791, + "loss_iou": 0.330078125, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 48595680, + "step": 283 + }, + { + "epoch": 0.07470243966594331, + "grad_norm": 11.715910802137108, + "learning_rate": 5e-06, + "loss": 0.2476, + "num_input_tokens_seen": 48766076, + "step": 284 + }, + { + "epoch": 0.07470243966594331, + "loss": 0.19452951848506927, + "loss_ce": 0.005930891260504723, + "loss_iou": 0.51171875, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 48766076, + "step": 284 + }, + { + "epoch": 0.07496547642533044, + "grad_norm": 11.971840696280033, + "learning_rate": 5e-06, + "loss": 0.3413, + "num_input_tokens_seen": 48937948, + "step": 285 + }, + { + "epoch": 0.07496547642533044, + "loss": 0.349312424659729, + "loss_ce": 0.00910246279090643, + "loss_iou": 0.5078125, + "loss_num": 0.06787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 48937948, + "step": 285 + }, + { + "epoch": 0.07522851318471756, + "grad_norm": 9.505447479710712, + "learning_rate": 5e-06, + "loss": 0.2998, + "num_input_tokens_seen": 49110048, + "step": 286 + }, + { + "epoch": 0.07522851318471756, + "loss": 0.3620803654193878, + "loss_ce": 0.0063674794510006905, + "loss_iou": 0.376953125, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 49110048, + "step": 286 + }, + { + "epoch": 0.07549154994410469, + "grad_norm": 9.524937593510838, + "learning_rate": 5e-06, + "loss": 0.2473, + "num_input_tokens_seen": 49281924, + "step": 287 + }, + { + "epoch": 0.07549154994410469, + "loss": 0.21198034286499023, + "loss_ce": 0.0065360115841031075, + "loss_iou": 0.4140625, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 49281924, + "step": 287 + }, + { + "epoch": 0.07575458670349182, + "grad_norm": 9.713489351185899, + "learning_rate": 5e-06, + "loss": 0.2911, + "num_input_tokens_seen": 49454288, + "step": 288 + }, + { + "epoch": 0.07575458670349182, + "loss": 0.35456106066703796, + "loss_ce": 0.01032277476042509, + "loss_iou": 0.4375, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 49454288, + "step": 288 + }, + { + "epoch": 0.07601762346287894, + "grad_norm": 8.377848047343468, + "learning_rate": 5e-06, + "loss": 0.2711, + "num_input_tokens_seen": 49626816, + "step": 289 + }, + { + "epoch": 0.07601762346287894, + "loss": 0.2953585684299469, + "loss_ce": 0.005685708485543728, + "loss_iou": 0.2890625, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 49626816, + "step": 289 + }, + { + "epoch": 0.07628066022226607, + "grad_norm": 10.429005548611427, + "learning_rate": 5e-06, + "loss": 0.2446, + "num_input_tokens_seen": 49798788, + "step": 290 + }, + { + "epoch": 0.07628066022226607, + "loss": 0.19721034169197083, + "loss_ce": 0.007268924731761217, + "loss_iou": 0.39453125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 49798788, + "step": 290 + }, + { + "epoch": 0.07654369698165318, + "grad_norm": 13.641317737771786, + "learning_rate": 5e-06, + "loss": 0.3162, + "num_input_tokens_seen": 49970924, + "step": 291 + }, + { + "epoch": 0.07654369698165318, + "loss": 0.3052144944667816, + "loss_ce": 0.00528772734105587, + "loss_iou": 0.67578125, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 49970924, + "step": 291 + }, + { + "epoch": 0.07680673374104031, + "grad_norm": 13.717892281601975, + "learning_rate": 5e-06, + "loss": 0.2925, + "num_input_tokens_seen": 50143128, + "step": 292 + }, + { + "epoch": 0.07680673374104031, + "loss": 0.3288165032863617, + "loss_ce": 0.006795027758926153, + "loss_iou": 0.34375, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 50143128, + "step": 292 + }, + { + "epoch": 0.07706977050042743, + "grad_norm": 9.087941964788197, + "learning_rate": 5e-06, + "loss": 0.2844, + "num_input_tokens_seen": 50315356, + "step": 293 + }, + { + "epoch": 0.07706977050042743, + "loss": 0.30062055587768555, + "loss_ce": 0.006186962127685547, + "loss_iou": 0.4140625, + "loss_num": 0.058837890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 50315356, + "step": 293 + }, + { + "epoch": 0.07733280725981456, + "grad_norm": 8.950504335091761, + "learning_rate": 5e-06, + "loss": 0.2474, + "num_input_tokens_seen": 50487452, + "step": 294 + }, + { + "epoch": 0.07733280725981456, + "loss": 0.20027770102024078, + "loss_ce": 0.007284537889063358, + "loss_iou": 0.58203125, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 50487452, + "step": 294 + }, + { + "epoch": 0.07759584401920168, + "grad_norm": 12.276590112874837, + "learning_rate": 5e-06, + "loss": 0.2962, + "num_input_tokens_seen": 50659340, + "step": 295 + }, + { + "epoch": 0.07759584401920168, + "loss": 0.3079564869403839, + "loss_ce": 0.008151799440383911, + "loss_iou": 0.5625, + "loss_num": 0.059814453125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 50659340, + "step": 295 + }, + { + "epoch": 0.07785888077858881, + "grad_norm": 10.920612112253043, + "learning_rate": 5e-06, + "loss": 0.2779, + "num_input_tokens_seen": 50831668, + "step": 296 + }, + { + "epoch": 0.07785888077858881, + "loss": 0.3526954650878906, + "loss_ce": 0.01395035907626152, + "loss_iou": 0.470703125, + "loss_num": 0.06787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 50831668, + "step": 296 + }, + { + "epoch": 0.07812191753797593, + "grad_norm": 14.39030370540137, + "learning_rate": 5e-06, + "loss": 0.317, + "num_input_tokens_seen": 51004036, + "step": 297 + }, + { + "epoch": 0.07812191753797593, + "loss": 0.3473885655403137, + "loss_ce": 0.01047449093312025, + "loss_iou": 0.3046875, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 51004036, + "step": 297 + }, + { + "epoch": 0.07838495429736306, + "grad_norm": 10.790411077010845, + "learning_rate": 5e-06, + "loss": 0.2641, + "num_input_tokens_seen": 51173952, + "step": 298 + }, + { + "epoch": 0.07838495429736306, + "loss": 0.24334201216697693, + "loss_ce": 0.0049387002363801, + "loss_iou": 0.453125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 51173952, + "step": 298 + }, + { + "epoch": 0.07864799105675017, + "grad_norm": 12.163313302369062, + "learning_rate": 5e-06, + "loss": 0.277, + "num_input_tokens_seen": 51346208, + "step": 299 + }, + { + "epoch": 0.07864799105675017, + "loss": 0.33479589223861694, + "loss_ce": 0.005816406104713678, + "loss_iou": 0.408203125, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 51346208, + "step": 299 + }, + { + "epoch": 0.0789110278161373, + "grad_norm": 13.775522127007955, + "learning_rate": 5e-06, + "loss": 0.3084, + "num_input_tokens_seen": 51516552, + "step": 300 + }, + { + "epoch": 0.0789110278161373, + "loss": 0.2704133987426758, + "loss_ce": 0.004422190133482218, + "loss_iou": 0.2294921875, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 51516552, + "step": 300 + }, + { + "epoch": 0.07917406457552444, + "grad_norm": 11.14460650886844, + "learning_rate": 5e-06, + "loss": 0.2851, + "num_input_tokens_seen": 51688716, + "step": 301 + }, + { + "epoch": 0.07917406457552444, + "loss": 0.2599673867225647, + "loss_ce": 0.008197364397346973, + "loss_iou": 0.4921875, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 51688716, + "step": 301 + }, + { + "epoch": 0.07943710133491155, + "grad_norm": 14.089462259095386, + "learning_rate": 5e-06, + "loss": 0.3106, + "num_input_tokens_seen": 51861284, + "step": 302 + }, + { + "epoch": 0.07943710133491155, + "loss": 0.25006186962127686, + "loss_ce": 0.004822590388357639, + "loss_iou": 0.2734375, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 51861284, + "step": 302 + }, + { + "epoch": 0.07970013809429868, + "grad_norm": 11.560263696218914, + "learning_rate": 5e-06, + "loss": 0.2256, + "num_input_tokens_seen": 52033296, + "step": 303 + }, + { + "epoch": 0.07970013809429868, + "loss": 0.2260829657316208, + "loss_ce": 0.005379845853894949, + "loss_iou": 0.58984375, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 52033296, + "step": 303 + }, + { + "epoch": 0.0799631748536858, + "grad_norm": 11.109765340752924, + "learning_rate": 5e-06, + "loss": 0.2728, + "num_input_tokens_seen": 52205488, + "step": 304 + }, + { + "epoch": 0.0799631748536858, + "loss": 0.26471367478370667, + "loss_ce": 0.0049480353482067585, + "loss_iou": 0.296875, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 52205488, + "step": 304 + }, + { + "epoch": 0.08022621161307293, + "grad_norm": 9.561565409079957, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 52377756, + "step": 305 + }, + { + "epoch": 0.08022621161307293, + "loss": 0.18537692725658417, + "loss_ce": 0.0050790803506970406, + "loss_iou": 0.431640625, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 52377756, + "step": 305 + }, + { + "epoch": 0.08048924837246005, + "grad_norm": 11.190229795777121, + "learning_rate": 5e-06, + "loss": 0.2198, + "num_input_tokens_seen": 52549784, + "step": 306 + }, + { + "epoch": 0.08048924837246005, + "loss": 0.1936880648136139, + "loss_ce": 0.009972257539629936, + "loss_iou": 0.490234375, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 52549784, + "step": 306 + }, + { + "epoch": 0.08075228513184718, + "grad_norm": 9.790391883995309, + "learning_rate": 5e-06, + "loss": 0.2383, + "num_input_tokens_seen": 52719856, + "step": 307 + }, + { + "epoch": 0.08075228513184718, + "loss": 0.16280654072761536, + "loss_ce": 0.007838279940187931, + "loss_iou": 0.5859375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 52719856, + "step": 307 + }, + { + "epoch": 0.0810153218912343, + "grad_norm": 9.839075915450485, + "learning_rate": 5e-06, + "loss": 0.2743, + "num_input_tokens_seen": 52892084, + "step": 308 + }, + { + "epoch": 0.0810153218912343, + "loss": 0.15204857289791107, + "loss_ce": 0.004587644245475531, + "loss_iou": 0.416015625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 52892084, + "step": 308 + }, + { + "epoch": 0.08127835865062143, + "grad_norm": 12.7104246981192, + "learning_rate": 5e-06, + "loss": 0.2624, + "num_input_tokens_seen": 53064144, + "step": 309 + }, + { + "epoch": 0.08127835865062143, + "loss": 0.25361326336860657, + "loss_ce": 0.009228497743606567, + "loss_iou": 0.7109375, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 53064144, + "step": 309 + }, + { + "epoch": 0.08154139541000854, + "grad_norm": 9.929561281282899, + "learning_rate": 5e-06, + "loss": 0.3246, + "num_input_tokens_seen": 53236468, + "step": 310 + }, + { + "epoch": 0.08154139541000854, + "loss": 0.39354461431503296, + "loss_ce": 0.005116886459290981, + "loss_iou": 0.44140625, + "loss_num": 0.07763671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 53236468, + "step": 310 + }, + { + "epoch": 0.08180443216939567, + "grad_norm": 7.973294747772443, + "learning_rate": 5e-06, + "loss": 0.3206, + "num_input_tokens_seen": 53408452, + "step": 311 + }, + { + "epoch": 0.08180443216939567, + "loss": 0.2905076742172241, + "loss_ce": 0.005351424217224121, + "loss_iou": NaN, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 53408452, + "step": 311 + }, + { + "epoch": 0.08206746892878279, + "grad_norm": 7.623528366567178, + "learning_rate": 5e-06, + "loss": 0.2153, + "num_input_tokens_seen": 53580848, + "step": 312 + }, + { + "epoch": 0.08206746892878279, + "loss": 0.19026613235473633, + "loss_ce": 0.005817888304591179, + "loss_iou": 0.5546875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 53580848, + "step": 312 + }, + { + "epoch": 0.08233050568816992, + "grad_norm": 8.591099989866738, + "learning_rate": 5e-06, + "loss": 0.2192, + "num_input_tokens_seen": 53751420, + "step": 313 + }, + { + "epoch": 0.08233050568816992, + "loss": 0.16798478364944458, + "loss_ce": 0.005326096434146166, + "loss_iou": 0.515625, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 53751420, + "step": 313 + }, + { + "epoch": 0.08259354244755705, + "grad_norm": 8.168930137137453, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 53923692, + "step": 314 + }, + { + "epoch": 0.08259354244755705, + "loss": 0.1808127462863922, + "loss_ce": 0.006557377986609936, + "loss_iou": 0.474609375, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 53923692, + "step": 314 + }, + { + "epoch": 0.08285657920694417, + "grad_norm": 12.69374602286009, + "learning_rate": 5e-06, + "loss": 0.255, + "num_input_tokens_seen": 54095596, + "step": 315 + }, + { + "epoch": 0.08285657920694417, + "loss": 0.2128537893295288, + "loss_ce": 0.00447976216673851, + "loss_iou": 0.470703125, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 54095596, + "step": 315 + }, + { + "epoch": 0.0831196159663313, + "grad_norm": 15.86710980709006, + "learning_rate": 5e-06, + "loss": 0.3231, + "num_input_tokens_seen": 54267660, + "step": 316 + }, + { + "epoch": 0.0831196159663313, + "loss": 0.29302194714546204, + "loss_ce": 0.004081526771187782, + "loss_iou": 0.2890625, + "loss_num": 0.0576171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 54267660, + "step": 316 + }, + { + "epoch": 0.08338265272571842, + "grad_norm": 15.523568167979095, + "learning_rate": 5e-06, + "loss": 0.2565, + "num_input_tokens_seen": 54439776, + "step": 317 + }, + { + "epoch": 0.08338265272571842, + "loss": 0.45379549264907837, + "loss_ce": 0.00494293263182044, + "loss_iou": 0.421875, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 54439776, + "step": 317 + }, + { + "epoch": 0.08364568948510555, + "grad_norm": 8.655289602311944, + "learning_rate": 5e-06, + "loss": 0.2346, + "num_input_tokens_seen": 54612224, + "step": 318 + }, + { + "epoch": 0.08364568948510555, + "loss": 0.22014674544334412, + "loss_ce": 0.008476818911731243, + "loss_iou": 0.53515625, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 54612224, + "step": 318 + }, + { + "epoch": 0.08390872624449267, + "grad_norm": 10.678383550359573, + "learning_rate": 5e-06, + "loss": 0.2569, + "num_input_tokens_seen": 54782956, + "step": 319 + }, + { + "epoch": 0.08390872624449267, + "loss": 0.2195930778980255, + "loss_ce": 0.004505200777202845, + "loss_iou": 0.36328125, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 54782956, + "step": 319 + }, + { + "epoch": 0.0841717630038798, + "grad_norm": 9.930533969178247, + "learning_rate": 5e-06, + "loss": 0.2305, + "num_input_tokens_seen": 54955104, + "step": 320 + }, + { + "epoch": 0.0841717630038798, + "loss": 0.24099573493003845, + "loss_ce": 0.008909543044865131, + "loss_iou": 0.57421875, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 54955104, + "step": 320 + }, + { + "epoch": 0.08443479976326691, + "grad_norm": 12.929508923145086, + "learning_rate": 5e-06, + "loss": 0.2998, + "num_input_tokens_seen": 55127092, + "step": 321 + }, + { + "epoch": 0.08443479976326691, + "loss": 0.3190174698829651, + "loss_ce": 0.008958851918578148, + "loss_iou": 0.53125, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 55127092, + "step": 321 + }, + { + "epoch": 0.08469783652265404, + "grad_norm": 8.336572461824584, + "learning_rate": 5e-06, + "loss": 0.2781, + "num_input_tokens_seen": 55299364, + "step": 322 + }, + { + "epoch": 0.08469783652265404, + "loss": 0.21446217596530914, + "loss_ce": 0.005966084077954292, + "loss_iou": 0.1796875, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 55299364, + "step": 322 + }, + { + "epoch": 0.08496087328204116, + "grad_norm": 9.09772392048227, + "learning_rate": 5e-06, + "loss": 0.2929, + "num_input_tokens_seen": 55468252, + "step": 323 + }, + { + "epoch": 0.08496087328204116, + "loss": 0.28219783306121826, + "loss_ce": 0.007356537505984306, + "loss_iou": 0.48046875, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 55468252, + "step": 323 + }, + { + "epoch": 0.08522391004142829, + "grad_norm": 9.926795422915726, + "learning_rate": 5e-06, + "loss": 0.2412, + "num_input_tokens_seen": 55640432, + "step": 324 + }, + { + "epoch": 0.08522391004142829, + "loss": 0.2594638466835022, + "loss_ce": 0.006412106566131115, + "loss_iou": 0.59375, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 55640432, + "step": 324 + }, + { + "epoch": 0.08548694680081541, + "grad_norm": 13.161460539721551, + "learning_rate": 5e-06, + "loss": 0.3056, + "num_input_tokens_seen": 55812284, + "step": 325 + }, + { + "epoch": 0.08548694680081541, + "loss": 0.30750948190689087, + "loss_ce": 0.003920634277164936, + "loss_iou": 0.416015625, + "loss_num": 0.060791015625, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 55812284, + "step": 325 + }, + { + "epoch": 0.08574998356020254, + "grad_norm": 12.20729496362805, + "learning_rate": 5e-06, + "loss": 0.2492, + "num_input_tokens_seen": 55982812, + "step": 326 + }, + { + "epoch": 0.08574998356020254, + "loss": 0.25009769201278687, + "loss_ce": 0.003637729212641716, + "loss_iou": 0.435546875, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 55982812, + "step": 326 + }, + { + "epoch": 0.08601302031958966, + "grad_norm": 8.486549179327342, + "learning_rate": 5e-06, + "loss": 0.2612, + "num_input_tokens_seen": 56154880, + "step": 327 + }, + { + "epoch": 0.08601302031958966, + "loss": 0.24035008251667023, + "loss_ce": 0.006341288331896067, + "loss_iou": 0.4453125, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 56154880, + "step": 327 + }, + { + "epoch": 0.08627605707897679, + "grad_norm": 12.368715754730403, + "learning_rate": 5e-06, + "loss": 0.2435, + "num_input_tokens_seen": 56327104, + "step": 328 + }, + { + "epoch": 0.08627605707897679, + "loss": 0.2434852123260498, + "loss_ce": 0.003983248956501484, + "loss_iou": 0.65234375, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 56327104, + "step": 328 + }, + { + "epoch": 0.08653909383836392, + "grad_norm": 16.286201391843196, + "learning_rate": 5e-06, + "loss": 0.3578, + "num_input_tokens_seen": 56498760, + "step": 329 + }, + { + "epoch": 0.08653909383836392, + "loss": 0.3906838297843933, + "loss_ce": 0.00481957383453846, + "loss_iou": 0.388671875, + "loss_num": 0.0771484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 56498760, + "step": 329 + }, + { + "epoch": 0.08680213059775103, + "grad_norm": 9.709126690100668, + "learning_rate": 5e-06, + "loss": 0.2709, + "num_input_tokens_seen": 56669204, + "step": 330 + }, + { + "epoch": 0.08680213059775103, + "loss": 0.2887915074825287, + "loss_ce": 0.006198735907673836, + "loss_iou": 0.359375, + "loss_num": 0.056396484375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 56669204, + "step": 330 + }, + { + "epoch": 0.08706516735713817, + "grad_norm": 9.72107226431463, + "learning_rate": 5e-06, + "loss": 0.2546, + "num_input_tokens_seen": 56841324, + "step": 331 + }, + { + "epoch": 0.08706516735713817, + "loss": 0.17188376188278198, + "loss_ce": 0.005013656336814165, + "loss_iou": 0.423828125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 56841324, + "step": 331 + }, + { + "epoch": 0.08732820411652528, + "grad_norm": 14.410704147758572, + "learning_rate": 5e-06, + "loss": 0.2888, + "num_input_tokens_seen": 57013308, + "step": 332 + }, + { + "epoch": 0.08732820411652528, + "loss": 0.2731274664402008, + "loss_ce": 0.007868663407862186, + "loss_iou": 0.5859375, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 57013308, + "step": 332 + }, + { + "epoch": 0.08759124087591241, + "grad_norm": 9.729446702170758, + "learning_rate": 5e-06, + "loss": 0.2747, + "num_input_tokens_seen": 57185320, + "step": 333 + }, + { + "epoch": 0.08759124087591241, + "loss": 0.18412570655345917, + "loss_ce": 0.004499247297644615, + "loss_iou": 0.57421875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 57185320, + "step": 333 + }, + { + "epoch": 0.08785427763529953, + "grad_norm": 13.290037619570391, + "learning_rate": 5e-06, + "loss": 0.2065, + "num_input_tokens_seen": 57355852, + "step": 334 + }, + { + "epoch": 0.08785427763529953, + "loss": 0.17899873852729797, + "loss_ce": 0.007001678459346294, + "loss_iou": 0.32421875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 57355852, + "step": 334 + }, + { + "epoch": 0.08811731439468666, + "grad_norm": 13.54518630333599, + "learning_rate": 5e-06, + "loss": 0.3901, + "num_input_tokens_seen": 57526160, + "step": 335 + }, + { + "epoch": 0.08811731439468666, + "loss": 0.42548078298568726, + "loss_ce": 0.004582356195896864, + "loss_iou": 0.72265625, + "loss_num": 0.083984375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 57526160, + "step": 335 + }, + { + "epoch": 0.08838035115407378, + "grad_norm": 12.0949131727458, + "learning_rate": 5e-06, + "loss": 0.3092, + "num_input_tokens_seen": 57698240, + "step": 336 + }, + { + "epoch": 0.08838035115407378, + "loss": 0.2829555869102478, + "loss_ce": 0.005245603621006012, + "loss_iou": 0.4765625, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 57698240, + "step": 336 + }, + { + "epoch": 0.08864338791346091, + "grad_norm": 11.320262534807219, + "learning_rate": 5e-06, + "loss": 0.2301, + "num_input_tokens_seen": 57870476, + "step": 337 + }, + { + "epoch": 0.08864338791346091, + "loss": 0.18072998523712158, + "loss_ce": 0.004704595077782869, + "loss_iou": 0.578125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 57870476, + "step": 337 + }, + { + "epoch": 0.08890642467284803, + "grad_norm": 7.708963725187913, + "learning_rate": 5e-06, + "loss": 0.2643, + "num_input_tokens_seen": 58042960, + "step": 338 + }, + { + "epoch": 0.08890642467284803, + "loss": 0.2719469666481018, + "loss_ce": 0.004002647008746862, + "loss_iou": 0.2890625, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 58042960, + "step": 338 + }, + { + "epoch": 0.08916946143223516, + "grad_norm": 8.11559752489591, + "learning_rate": 5e-06, + "loss": 0.285, + "num_input_tokens_seen": 58215276, + "step": 339 + }, + { + "epoch": 0.08916946143223516, + "loss": 0.2689915895462036, + "loss_ce": 0.004709342960268259, + "loss_iou": 0.578125, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 58215276, + "step": 339 + }, + { + "epoch": 0.08943249819162227, + "grad_norm": 16.72537995408633, + "learning_rate": 5e-06, + "loss": 0.2206, + "num_input_tokens_seen": 58387140, + "step": 340 + }, + { + "epoch": 0.08943249819162227, + "loss": 0.19433817267417908, + "loss_ce": 0.007021272089332342, + "loss_iou": 0.63671875, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 58387140, + "step": 340 + }, + { + "epoch": 0.0896955349510094, + "grad_norm": 10.068757947032363, + "learning_rate": 5e-06, + "loss": 0.2708, + "num_input_tokens_seen": 58559132, + "step": 341 + }, + { + "epoch": 0.0896955349510094, + "loss": 0.19732418656349182, + "loss_ce": 0.004819315858185291, + "loss_iou": 0.4375, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 58559132, + "step": 341 + }, + { + "epoch": 0.08995857171039653, + "grad_norm": 15.667162296382404, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 58729752, + "step": 342 + }, + { + "epoch": 0.08995857171039653, + "loss": 0.251327782869339, + "loss_ce": 0.006515774410218, + "loss_iou": 0.5859375, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 58729752, + "step": 342 + }, + { + "epoch": 0.09022160846978365, + "grad_norm": 10.706496762114405, + "learning_rate": 5e-06, + "loss": 0.2862, + "num_input_tokens_seen": 58901996, + "step": 343 + }, + { + "epoch": 0.09022160846978365, + "loss": 0.2672620415687561, + "loss_ce": 0.003956370986998081, + "loss_iou": 0.240234375, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 58901996, + "step": 343 + }, + { + "epoch": 0.09048464522917078, + "grad_norm": 8.994760858015587, + "learning_rate": 5e-06, + "loss": 0.2729, + "num_input_tokens_seen": 59071632, + "step": 344 + }, + { + "epoch": 0.09048464522917078, + "loss": 0.2578084170818329, + "loss_ce": 0.0040242355316877365, + "loss_iou": 0.365234375, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 59071632, + "step": 344 + }, + { + "epoch": 0.0907476819885579, + "grad_norm": 10.48877247334524, + "learning_rate": 5e-06, + "loss": 0.1963, + "num_input_tokens_seen": 59240464, + "step": 345 + }, + { + "epoch": 0.0907476819885579, + "loss": 0.20404496788978577, + "loss_ce": 0.0031172330491244793, + "loss_iou": 0.283203125, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 59240464, + "step": 345 + }, + { + "epoch": 0.09101071874794503, + "grad_norm": 11.358390932213611, + "learning_rate": 5e-06, + "loss": 0.2309, + "num_input_tokens_seen": 59411172, + "step": 346 + }, + { + "epoch": 0.09101071874794503, + "loss": 0.23203009366989136, + "loss_ce": 0.013524244539439678, + "loss_iou": 0.5859375, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 59411172, + "step": 346 + }, + { + "epoch": 0.09127375550733215, + "grad_norm": 11.905025621505947, + "learning_rate": 5e-06, + "loss": 0.2869, + "num_input_tokens_seen": 59581764, + "step": 347 + }, + { + "epoch": 0.09127375550733215, + "loss": 0.34626448154449463, + "loss_ce": 0.005199995823204517, + "loss_iou": 0.416015625, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 59581764, + "step": 347 + }, + { + "epoch": 0.09153679226671928, + "grad_norm": 12.735636545678894, + "learning_rate": 5e-06, + "loss": 0.3255, + "num_input_tokens_seen": 59754336, + "step": 348 + }, + { + "epoch": 0.09153679226671928, + "loss": 0.395874559879303, + "loss_ce": 0.005859900265932083, + "loss_iou": 0.3125, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 59754336, + "step": 348 + }, + { + "epoch": 0.0917998290261064, + "grad_norm": 12.687289585677311, + "learning_rate": 5e-06, + "loss": 0.3009, + "num_input_tokens_seen": 59926836, + "step": 349 + }, + { + "epoch": 0.0917998290261064, + "loss": 0.25403833389282227, + "loss_ce": 0.0031838202849030495, + "loss_iou": 0.349609375, + "loss_num": 0.05029296875, + "loss_xval": 0.25, + "num_input_tokens_seen": 59926836, + "step": 349 + }, + { + "epoch": 0.09206286578549353, + "grad_norm": 7.2501298492412145, + "learning_rate": 5e-06, + "loss": 0.193, + "num_input_tokens_seen": 60099096, + "step": 350 + }, + { + "epoch": 0.09206286578549353, + "loss": 0.25642409920692444, + "loss_ce": 0.005325470119714737, + "loss_iou": 0.3671875, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 60099096, + "step": 350 + }, + { + "epoch": 0.09232590254488064, + "grad_norm": 7.233766618859882, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 60271456, + "step": 351 + }, + { + "epoch": 0.09232590254488064, + "loss": 0.23899057507514954, + "loss_ce": 0.003883154597133398, + "loss_iou": 0.51953125, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 60271456, + "step": 351 + }, + { + "epoch": 0.09258893930426777, + "grad_norm": 10.116609125862757, + "learning_rate": 5e-06, + "loss": 0.2124, + "num_input_tokens_seen": 60443440, + "step": 352 + }, + { + "epoch": 0.09258893930426777, + "loss": 0.2204253077507019, + "loss_ce": 0.005032242741435766, + "loss_iou": 0.53125, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 60443440, + "step": 352 + }, + { + "epoch": 0.09285197606365489, + "grad_norm": 11.612967488254396, + "learning_rate": 5e-06, + "loss": 0.2054, + "num_input_tokens_seen": 60615884, + "step": 353 + }, + { + "epoch": 0.09285197606365489, + "loss": 0.20439094305038452, + "loss_ce": 0.005050132982432842, + "loss_iou": 0.416015625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 60615884, + "step": 353 + }, + { + "epoch": 0.09311501282304202, + "grad_norm": 14.924807522940846, + "learning_rate": 5e-06, + "loss": 0.2902, + "num_input_tokens_seen": 60787976, + "step": 354 + }, + { + "epoch": 0.09311501282304202, + "loss": 0.35237956047058105, + "loss_ce": 0.006432283669710159, + "loss_iou": 0.63671875, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 60787976, + "step": 354 + }, + { + "epoch": 0.09337804958242915, + "grad_norm": 14.749952758612022, + "learning_rate": 5e-06, + "loss": 0.2866, + "num_input_tokens_seen": 60960464, + "step": 355 + }, + { + "epoch": 0.09337804958242915, + "loss": 0.27518266439437866, + "loss_ce": 0.008092833682894707, + "loss_iou": 0.51953125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 60960464, + "step": 355 + }, + { + "epoch": 0.09364108634181627, + "grad_norm": 8.028682980536022, + "learning_rate": 5e-06, + "loss": 0.2222, + "num_input_tokens_seen": 61130588, + "step": 356 + }, + { + "epoch": 0.09364108634181627, + "loss": 0.2519834041595459, + "loss_ce": 0.004424803890287876, + "loss_iou": 0.37109375, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 61130588, + "step": 356 + }, + { + "epoch": 0.0939041231012034, + "grad_norm": 9.199423751114487, + "learning_rate": 5e-06, + "loss": 0.2881, + "num_input_tokens_seen": 61302652, + "step": 357 + }, + { + "epoch": 0.0939041231012034, + "loss": 0.39324474334716797, + "loss_ce": 0.012019152753055096, + "loss_iou": 0.5234375, + "loss_num": 0.076171875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 61302652, + "step": 357 + }, + { + "epoch": 0.09416715986059052, + "grad_norm": 8.678464430801892, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 61473012, + "step": 358 + }, + { + "epoch": 0.09416715986059052, + "loss": 0.20840191841125488, + "loss_ce": 0.00448344461619854, + "loss_iou": 0.4765625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 61473012, + "step": 358 + }, + { + "epoch": 0.09443019661997765, + "grad_norm": 13.785653955051378, + "learning_rate": 5e-06, + "loss": 0.2401, + "num_input_tokens_seen": 61645272, + "step": 359 + }, + { + "epoch": 0.09443019661997765, + "loss": 0.19086723029613495, + "loss_ce": 0.0067241680808365345, + "loss_iou": 0.462890625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 61645272, + "step": 359 + }, + { + "epoch": 0.09469323337936476, + "grad_norm": 7.119442902133065, + "learning_rate": 5e-06, + "loss": 0.2102, + "num_input_tokens_seen": 61817436, + "step": 360 + }, + { + "epoch": 0.09469323337936476, + "loss": 0.21873927116394043, + "loss_ce": 0.0036513670347630978, + "loss_iou": 0.44140625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 61817436, + "step": 360 + }, + { + "epoch": 0.0949562701387519, + "grad_norm": 10.914204757351408, + "learning_rate": 5e-06, + "loss": 0.2247, + "num_input_tokens_seen": 61987764, + "step": 361 + }, + { + "epoch": 0.0949562701387519, + "loss": 0.2072867900133133, + "loss_ce": 0.0077018230222165585, + "loss_iou": 0.6171875, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 61987764, + "step": 361 + }, + { + "epoch": 0.09521930689813901, + "grad_norm": 10.090324495449488, + "learning_rate": 5e-06, + "loss": 0.2923, + "num_input_tokens_seen": 62158356, + "step": 362 + }, + { + "epoch": 0.09521930689813901, + "loss": 0.2174176275730133, + "loss_ce": 0.007944983430206776, + "loss_iou": 0.345703125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 62158356, + "step": 362 + }, + { + "epoch": 0.09548234365752614, + "grad_norm": 8.545687476622918, + "learning_rate": 5e-06, + "loss": 0.2599, + "num_input_tokens_seen": 62330412, + "step": 363 + }, + { + "epoch": 0.09548234365752614, + "loss": 0.2114790380001068, + "loss_ce": 0.004081577528268099, + "loss_iou": 0.546875, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 62330412, + "step": 363 + }, + { + "epoch": 0.09574538041691326, + "grad_norm": 9.203465723081402, + "learning_rate": 5e-06, + "loss": 0.1892, + "num_input_tokens_seen": 62502416, + "step": 364 + }, + { + "epoch": 0.09574538041691326, + "loss": 0.20543332397937775, + "loss_ce": 0.004505585879087448, + "loss_iou": 0.62890625, + "loss_num": 0.0400390625, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 62502416, + "step": 364 + }, + { + "epoch": 0.09600841717630039, + "grad_norm": 11.930210195365788, + "learning_rate": 5e-06, + "loss": 0.3152, + "num_input_tokens_seen": 62674708, + "step": 365 + }, + { + "epoch": 0.09600841717630039, + "loss": 0.3104754686355591, + "loss_ce": 0.006398319266736507, + "loss_iou": 0.5859375, + "loss_num": 0.060791015625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 62674708, + "step": 365 + }, + { + "epoch": 0.0962714539356875, + "grad_norm": 9.520122083335602, + "learning_rate": 5e-06, + "loss": 0.2852, + "num_input_tokens_seen": 62846712, + "step": 366 + }, + { + "epoch": 0.0962714539356875, + "loss": 0.30926454067230225, + "loss_ce": 0.008239164017140865, + "loss_iou": 0.3984375, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 62846712, + "step": 366 + }, + { + "epoch": 0.09653449069507464, + "grad_norm": 7.113583060734605, + "learning_rate": 5e-06, + "loss": 0.2553, + "num_input_tokens_seen": 63018700, + "step": 367 + }, + { + "epoch": 0.09653449069507464, + "loss": 0.27072834968566895, + "loss_ce": 0.006201992742717266, + "loss_iou": 0.470703125, + "loss_num": 0.052978515625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 63018700, + "step": 367 + }, + { + "epoch": 0.09679752745446175, + "grad_norm": 7.613528329197277, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 63188284, + "step": 368 + }, + { + "epoch": 0.09679752745446175, + "loss": 0.19734525680541992, + "loss_ce": 0.0055117676965892315, + "loss_iou": 0.53515625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 63188284, + "step": 368 + }, + { + "epoch": 0.09706056421384888, + "grad_norm": 7.59501735101136, + "learning_rate": 5e-06, + "loss": 0.2185, + "num_input_tokens_seen": 63360584, + "step": 369 + }, + { + "epoch": 0.09706056421384888, + "loss": 0.23239563405513763, + "loss_ce": 0.003757936879992485, + "loss_iou": 0.58984375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 63360584, + "step": 369 + }, + { + "epoch": 0.09732360097323602, + "grad_norm": 9.071713140748203, + "learning_rate": 5e-06, + "loss": 0.2417, + "num_input_tokens_seen": 63530348, + "step": 370 + }, + { + "epoch": 0.09732360097323602, + "loss": 0.3295820653438568, + "loss_ce": 0.004997124895453453, + "loss_iou": 0.36328125, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 63530348, + "step": 370 + }, + { + "epoch": 0.09758663773262313, + "grad_norm": 18.945858878396198, + "learning_rate": 5e-06, + "loss": 0.2916, + "num_input_tokens_seen": 63702772, + "step": 371 + }, + { + "epoch": 0.09758663773262313, + "loss": 0.38020581007003784, + "loss_ce": 0.007525136228650808, + "loss_iou": 0.54296875, + "loss_num": 0.07421875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 63702772, + "step": 371 + }, + { + "epoch": 0.09784967449201026, + "grad_norm": 10.004542997696944, + "learning_rate": 5e-06, + "loss": 0.2707, + "num_input_tokens_seen": 63874988, + "step": 372 + }, + { + "epoch": 0.09784967449201026, + "loss": 0.21838681399822235, + "loss_ce": 0.005313089117407799, + "loss_iou": 0.39453125, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 63874988, + "step": 372 + }, + { + "epoch": 0.09811271125139738, + "grad_norm": 12.569402693685879, + "learning_rate": 5e-06, + "loss": 0.3095, + "num_input_tokens_seen": 64047192, + "step": 373 + }, + { + "epoch": 0.09811271125139738, + "loss": 0.23138675093650818, + "loss_ce": 0.0027490435168147087, + "loss_iou": 0.45703125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 64047192, + "step": 373 + }, + { + "epoch": 0.09837574801078451, + "grad_norm": 7.761350865641741, + "learning_rate": 5e-06, + "loss": 0.2808, + "num_input_tokens_seen": 64217628, + "step": 374 + }, + { + "epoch": 0.09837574801078451, + "loss": 0.22036589682102203, + "loss_ce": 0.005583181045949459, + "loss_iou": 0.40234375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 64217628, + "step": 374 + }, + { + "epoch": 0.09863878477017163, + "grad_norm": 15.752581561948926, + "learning_rate": 5e-06, + "loss": 0.2404, + "num_input_tokens_seen": 64389824, + "step": 375 + }, + { + "epoch": 0.09863878477017163, + "loss": 0.31320562958717346, + "loss_ce": 0.0048560285940766335, + "loss_iou": 0.40234375, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 64389824, + "step": 375 + }, + { + "epoch": 0.09890182152955876, + "grad_norm": 8.987788273052438, + "learning_rate": 5e-06, + "loss": 0.2167, + "num_input_tokens_seen": 64557964, + "step": 376 + }, + { + "epoch": 0.09890182152955876, + "loss": 0.15088969469070435, + "loss_ce": 0.0028794521931558847, + "loss_iou": 0.48828125, + "loss_num": 0.029541015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 64557964, + "step": 376 + }, + { + "epoch": 0.09916485828894588, + "grad_norm": 14.201905900297023, + "learning_rate": 5e-06, + "loss": 0.282, + "num_input_tokens_seen": 64730276, + "step": 377 + }, + { + "epoch": 0.09916485828894588, + "loss": 0.2574768662452698, + "loss_ce": 0.004425112158060074, + "loss_iou": 0.46484375, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 64730276, + "step": 377 + }, + { + "epoch": 0.099427895048333, + "grad_norm": 10.355784479360198, + "learning_rate": 5e-06, + "loss": 0.2585, + "num_input_tokens_seen": 64902196, + "step": 378 + }, + { + "epoch": 0.099427895048333, + "loss": 0.38254526257514954, + "loss_ce": 0.00461557786911726, + "loss_iou": 0.345703125, + "loss_num": 0.07568359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 64902196, + "step": 378 + }, + { + "epoch": 0.09969093180772012, + "grad_norm": 8.387859091991706, + "learning_rate": 5e-06, + "loss": 0.266, + "num_input_tokens_seen": 65074196, + "step": 379 + }, + { + "epoch": 0.09969093180772012, + "loss": 0.3536115884780884, + "loss_ce": 0.007664328906685114, + "loss_iou": 0.484375, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 65074196, + "step": 379 + }, + { + "epoch": 0.09995396856710725, + "grad_norm": 6.106776880682037, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 65245768, + "step": 380 + }, + { + "epoch": 0.09995396856710725, + "loss": 0.16343596577644348, + "loss_ce": 0.003645919729024172, + "loss_iou": 0.38671875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 65245768, + "step": 380 + }, + { + "epoch": 0.10021700532649437, + "grad_norm": 13.780341497897325, + "learning_rate": 5e-06, + "loss": 0.203, + "num_input_tokens_seen": 65418016, + "step": 381 + }, + { + "epoch": 0.10021700532649437, + "loss": 0.21609731018543243, + "loss_ce": 0.004000143148005009, + "loss_iou": 0.48046875, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 65418016, + "step": 381 + }, + { + "epoch": 0.1004800420858815, + "grad_norm": 9.32261949794866, + "learning_rate": 5e-06, + "loss": 0.2265, + "num_input_tokens_seen": 65590312, + "step": 382 + }, + { + "epoch": 0.1004800420858815, + "loss": 0.22935181856155396, + "loss_ce": 0.00425416324287653, + "loss_iou": 0.61328125, + "loss_num": 0.045166015625, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 65590312, + "step": 382 + }, + { + "epoch": 0.10074307884526863, + "grad_norm": 8.048917738163752, + "learning_rate": 5e-06, + "loss": 0.2741, + "num_input_tokens_seen": 65762432, + "step": 383 + }, + { + "epoch": 0.10074307884526863, + "loss": 0.3397749662399292, + "loss_ce": 0.0026167738251388073, + "loss_iou": 0.2734375, + "loss_num": 0.0673828125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 65762432, + "step": 383 + }, + { + "epoch": 0.10100611560465575, + "grad_norm": 10.637781772264608, + "learning_rate": 5e-06, + "loss": 0.2218, + "num_input_tokens_seen": 65934468, + "step": 384 + }, + { + "epoch": 0.10100611560465575, + "loss": 0.2347402572631836, + "loss_ce": 0.01049709226936102, + "loss_iou": 0.41015625, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 65934468, + "step": 384 + }, + { + "epoch": 0.10126915236404288, + "grad_norm": 8.002274766525902, + "learning_rate": 5e-06, + "loss": 0.2433, + "num_input_tokens_seen": 66106856, + "step": 385 + }, + { + "epoch": 0.10126915236404288, + "loss": 0.2602759301662445, + "loss_ce": 0.005881410092115402, + "loss_iou": 0.51171875, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 66106856, + "step": 385 + }, + { + "epoch": 0.10153218912343, + "grad_norm": 5.854621538534226, + "learning_rate": 5e-06, + "loss": 0.2458, + "num_input_tokens_seen": 66278808, + "step": 386 + }, + { + "epoch": 0.10153218912343, + "loss": 0.24760979413986206, + "loss_ce": 0.008718185126781464, + "loss_iou": 0.435546875, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 66278808, + "step": 386 + }, + { + "epoch": 0.10179522588281713, + "grad_norm": 8.25201471581488, + "learning_rate": 5e-06, + "loss": 0.2036, + "num_input_tokens_seen": 66449192, + "step": 387 + }, + { + "epoch": 0.10179522588281713, + "loss": 0.18327301740646362, + "loss_ce": 0.007003485690802336, + "loss_iou": 0.6171875, + "loss_num": 0.035400390625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 66449192, + "step": 387 + }, + { + "epoch": 0.10205826264220424, + "grad_norm": 7.445331676424441, + "learning_rate": 5e-06, + "loss": 0.2392, + "num_input_tokens_seen": 66619484, + "step": 388 + }, + { + "epoch": 0.10205826264220424, + "loss": 0.19971191883087158, + "loss_ce": 0.0065356409177184105, + "loss_iou": 0.55859375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 66619484, + "step": 388 + }, + { + "epoch": 0.10232129940159138, + "grad_norm": 6.590904938513716, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 66791680, + "step": 389 + }, + { + "epoch": 0.10232129940159138, + "loss": 0.186821848154068, + "loss_ce": 0.0036553400568664074, + "loss_iou": 0.4453125, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 66791680, + "step": 389 + }, + { + "epoch": 0.10258433616097849, + "grad_norm": 7.115969301930752, + "learning_rate": 5e-06, + "loss": 0.2258, + "num_input_tokens_seen": 66963964, + "step": 390 + }, + { + "epoch": 0.10258433616097849, + "loss": 0.21056599915027618, + "loss_ce": 0.005732023622840643, + "loss_iou": 0.5234375, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 66963964, + "step": 390 + }, + { + "epoch": 0.10284737292036562, + "grad_norm": 10.39999100726405, + "learning_rate": 5e-06, + "loss": 0.2688, + "num_input_tokens_seen": 67136264, + "step": 391 + }, + { + "epoch": 0.10284737292036562, + "loss": 0.32519546151161194, + "loss_ce": 0.0023194823879748583, + "loss_iou": 0.330078125, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 67136264, + "step": 391 + }, + { + "epoch": 0.10311040967975274, + "grad_norm": 10.853923665788189, + "learning_rate": 5e-06, + "loss": 0.2335, + "num_input_tokens_seen": 67308332, + "step": 392 + }, + { + "epoch": 0.10311040967975274, + "loss": 0.22598545253276825, + "loss_ce": 0.004671982489526272, + "loss_iou": 0.578125, + "loss_num": 0.044189453125, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 67308332, + "step": 392 + }, + { + "epoch": 0.10337344643913987, + "grad_norm": 12.644758786705145, + "learning_rate": 5e-06, + "loss": 0.2568, + "num_input_tokens_seen": 67480732, + "step": 393 + }, + { + "epoch": 0.10337344643913987, + "loss": 0.22954288125038147, + "loss_ce": 0.0029803775250911713, + "loss_iou": 0.322265625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 67480732, + "step": 393 + }, + { + "epoch": 0.10363648319852699, + "grad_norm": 9.802676625201595, + "learning_rate": 5e-06, + "loss": 0.2089, + "num_input_tokens_seen": 67652736, + "step": 394 + }, + { + "epoch": 0.10363648319852699, + "loss": 0.2672483026981354, + "loss_ce": 0.006017843261361122, + "loss_iou": 0.240234375, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 67652736, + "step": 394 + }, + { + "epoch": 0.10389951995791412, + "grad_norm": 7.134550479825786, + "learning_rate": 5e-06, + "loss": 0.2498, + "num_input_tokens_seen": 67825356, + "step": 395 + }, + { + "epoch": 0.10389951995791412, + "loss": 0.2973036766052246, + "loss_ce": 0.004334905184805393, + "loss_iou": 0.53515625, + "loss_num": 0.05859375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 67825356, + "step": 395 + }, + { + "epoch": 0.10416255671730125, + "grad_norm": 9.967886960744174, + "learning_rate": 5e-06, + "loss": 0.1892, + "num_input_tokens_seen": 67997632, + "step": 396 + }, + { + "epoch": 0.10416255671730125, + "loss": 0.16415753960609436, + "loss_ce": 0.0022923052310943604, + "loss_iou": 0.62109375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 67997632, + "step": 396 + }, + { + "epoch": 0.10442559347668837, + "grad_norm": 10.176506863521315, + "learning_rate": 5e-06, + "loss": 0.2534, + "num_input_tokens_seen": 68168484, + "step": 397 + }, + { + "epoch": 0.10442559347668837, + "loss": 0.2146688550710678, + "loss_ce": 0.002388589084148407, + "loss_iou": 0.455078125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 68168484, + "step": 397 + }, + { + "epoch": 0.1046886302360755, + "grad_norm": 12.732415116106893, + "learning_rate": 5e-06, + "loss": 0.2673, + "num_input_tokens_seen": 68337496, + "step": 398 + }, + { + "epoch": 0.1046886302360755, + "loss": 0.28035295009613037, + "loss_ce": 0.003985744901001453, + "loss_iou": 0.5390625, + "loss_num": 0.05517578125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 68337496, + "step": 398 + }, + { + "epoch": 0.10495166699546261, + "grad_norm": 8.052010611714167, + "learning_rate": 5e-06, + "loss": 0.278, + "num_input_tokens_seen": 68509860, + "step": 399 + }, + { + "epoch": 0.10495166699546261, + "loss": 0.36081743240356445, + "loss_ce": 0.003395556937903166, + "loss_iou": 0.486328125, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 68509860, + "step": 399 + }, + { + "epoch": 0.10521470375484974, + "grad_norm": 7.830511214158693, + "learning_rate": 5e-06, + "loss": 0.2325, + "num_input_tokens_seen": 68681940, + "step": 400 + }, + { + "epoch": 0.10521470375484974, + "loss": 0.2878537178039551, + "loss_ce": 0.005016806535422802, + "loss_iou": 0.40625, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 68681940, + "step": 400 + }, + { + "epoch": 0.10547774051423686, + "grad_norm": 8.196278314466337, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 68854332, + "step": 401 + }, + { + "epoch": 0.10547774051423686, + "loss": 0.27157318592071533, + "loss_ce": 0.004483355674892664, + "loss_iou": 0.455078125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 68854332, + "step": 401 + }, + { + "epoch": 0.10574077727362399, + "grad_norm": 8.502085361700777, + "learning_rate": 5e-06, + "loss": 0.2004, + "num_input_tokens_seen": 69024884, + "step": 402 + }, + { + "epoch": 0.10574077727362399, + "loss": 0.20401804149150848, + "loss_ce": 0.002235804684460163, + "loss_iou": 0.34765625, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 69024884, + "step": 402 + }, + { + "epoch": 0.10600381403301111, + "grad_norm": 12.566157085058673, + "learning_rate": 5e-06, + "loss": 0.2292, + "num_input_tokens_seen": 69197236, + "step": 403 + }, + { + "epoch": 0.10600381403301111, + "loss": 0.24189046025276184, + "loss_ce": 0.0023274626582860947, + "loss_iou": 0.4609375, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 69197236, + "step": 403 + }, + { + "epoch": 0.10626685079239824, + "grad_norm": 8.163593280105717, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 69369436, + "step": 404 + }, + { + "epoch": 0.10626685079239824, + "loss": 0.17368575930595398, + "loss_ce": 0.005472864024341106, + "loss_iou": 0.35546875, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 69369436, + "step": 404 + }, + { + "epoch": 0.10652988755178536, + "grad_norm": 7.3996201552939596, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 69540036, + "step": 405 + }, + { + "epoch": 0.10652988755178536, + "loss": 0.1399962604045868, + "loss_ce": 0.004925462882965803, + "loss_iou": 0.5234375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 69540036, + "step": 405 + }, + { + "epoch": 0.10679292431117249, + "grad_norm": 10.197562859133864, + "learning_rate": 5e-06, + "loss": 0.2373, + "num_input_tokens_seen": 69712000, + "step": 406 + }, + { + "epoch": 0.10679292431117249, + "loss": 0.19172173738479614, + "loss_ce": 0.0054424479603767395, + "loss_iou": 0.71875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 69712000, + "step": 406 + }, + { + "epoch": 0.1070559610705596, + "grad_norm": 13.16161294170375, + "learning_rate": 5e-06, + "loss": 0.2782, + "num_input_tokens_seen": 69883872, + "step": 407 + }, + { + "epoch": 0.1070559610705596, + "loss": 0.28615695238113403, + "loss_ce": 0.002465539611876011, + "loss_iou": 0.357421875, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 69883872, + "step": 407 + }, + { + "epoch": 0.10731899782994674, + "grad_norm": 8.88729554885214, + "learning_rate": 5e-06, + "loss": 0.2054, + "num_input_tokens_seen": 70056060, + "step": 408 + }, + { + "epoch": 0.10731899782994674, + "loss": 0.19157525897026062, + "loss_ce": 0.0030986934434622526, + "loss_iou": 0.5078125, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 70056060, + "step": 408 + }, + { + "epoch": 0.10758203458933385, + "grad_norm": 10.615365655634278, + "learning_rate": 5e-06, + "loss": 0.2465, + "num_input_tokens_seen": 70228104, + "step": 409 + }, + { + "epoch": 0.10758203458933385, + "loss": 0.315712034702301, + "loss_ce": 0.003150993725284934, + "loss_iou": 0.40234375, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 70228104, + "step": 409 + }, + { + "epoch": 0.10784507134872098, + "grad_norm": 8.946094916164988, + "learning_rate": 5e-06, + "loss": 0.2711, + "num_input_tokens_seen": 70398676, + "step": 410 + }, + { + "epoch": 0.10784507134872098, + "loss": 0.37502604722976685, + "loss_ce": 0.010768221691250801, + "loss_iou": 0.412109375, + "loss_num": 0.07275390625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 70398676, + "step": 410 + }, + { + "epoch": 0.10810810810810811, + "grad_norm": 10.043204254830277, + "learning_rate": 5e-06, + "loss": 0.2838, + "num_input_tokens_seen": 70570976, + "step": 411 + }, + { + "epoch": 0.10810810810810811, + "loss": 0.23639510571956635, + "loss_ce": 0.004095299169421196, + "loss_iou": 0.443359375, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 70570976, + "step": 411 + }, + { + "epoch": 0.10837114486749523, + "grad_norm": 6.784469948331844, + "learning_rate": 5e-06, + "loss": 0.2968, + "num_input_tokens_seen": 70741228, + "step": 412 + }, + { + "epoch": 0.10837114486749523, + "loss": 0.3139989376068115, + "loss_ce": 0.001865162281319499, + "loss_iou": 0.3671875, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 70741228, + "step": 412 + }, + { + "epoch": 0.10863418162688236, + "grad_norm": 6.343923096243914, + "learning_rate": 5e-06, + "loss": 0.2107, + "num_input_tokens_seen": 70913516, + "step": 413 + }, + { + "epoch": 0.10863418162688236, + "loss": 0.19345833361148834, + "loss_ce": 0.0018079333240166306, + "loss_iou": 0.5, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 70913516, + "step": 413 + }, + { + "epoch": 0.10889721838626948, + "grad_norm": 7.92695485009758, + "learning_rate": 5e-06, + "loss": 0.1941, + "num_input_tokens_seen": 71085736, + "step": 414 + }, + { + "epoch": 0.10889721838626948, + "loss": 0.18501858413219452, + "loss_ce": 0.001791047165170312, + "loss_iou": 0.65234375, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 71085736, + "step": 414 + }, + { + "epoch": 0.10916025514565661, + "grad_norm": 8.754602822026959, + "learning_rate": 5e-06, + "loss": 0.2194, + "num_input_tokens_seen": 71257772, + "step": 415 + }, + { + "epoch": 0.10916025514565661, + "loss": 0.2221953421831131, + "loss_ce": 0.0017973824869841337, + "loss_iou": 0.65234375, + "loss_num": 0.0439453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 71257772, + "step": 415 + }, + { + "epoch": 0.10942329190504373, + "grad_norm": 7.459161826900908, + "learning_rate": 5e-06, + "loss": 0.2477, + "num_input_tokens_seen": 71430004, + "step": 416 + }, + { + "epoch": 0.10942329190504373, + "loss": 0.2557048797607422, + "loss_ce": 0.0026531266048550606, + "loss_iou": 0.578125, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 71430004, + "step": 416 + }, + { + "epoch": 0.10968632866443086, + "grad_norm": 6.337226894093766, + "learning_rate": 5e-06, + "loss": 0.236, + "num_input_tokens_seen": 71602160, + "step": 417 + }, + { + "epoch": 0.10968632866443086, + "loss": 0.1684013307094574, + "loss_ce": 0.0018363934941589832, + "loss_iou": 0.59375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 71602160, + "step": 417 + }, + { + "epoch": 0.10994936542381797, + "grad_norm": 10.092397777907028, + "learning_rate": 5e-06, + "loss": 0.1998, + "num_input_tokens_seen": 71774264, + "step": 418 + }, + { + "epoch": 0.10994936542381797, + "loss": 0.17219412326812744, + "loss_ce": 0.0024553609546273947, + "loss_iou": 0.53125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 71774264, + "step": 418 + }, + { + "epoch": 0.1102124021832051, + "grad_norm": 10.933777042764003, + "learning_rate": 5e-06, + "loss": 0.2549, + "num_input_tokens_seen": 71943760, + "step": 419 + }, + { + "epoch": 0.1102124021832051, + "loss": 0.2607925236225128, + "loss_ce": 0.0052993567660450935, + "loss_iou": 0.43359375, + "loss_num": 0.051025390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 71943760, + "step": 419 + }, + { + "epoch": 0.11047543894259222, + "grad_norm": 18.099442946915016, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 72114360, + "step": 420 + }, + { + "epoch": 0.11047543894259222, + "loss": 0.2702986001968384, + "loss_ce": 0.0022321869619190693, + "loss_iou": 0.3515625, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 72114360, + "step": 420 + }, + { + "epoch": 0.11073847570197935, + "grad_norm": 6.645880081378423, + "learning_rate": 5e-06, + "loss": 0.2374, + "num_input_tokens_seen": 72285984, + "step": 421 + }, + { + "epoch": 0.11073847570197935, + "loss": 0.17263615131378174, + "loss_ce": 0.0023480583913624287, + "loss_iou": 0.38671875, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 72285984, + "step": 421 + }, + { + "epoch": 0.11100151246136647, + "grad_norm": 10.576055281968134, + "learning_rate": 5e-06, + "loss": 0.1819, + "num_input_tokens_seen": 72458472, + "step": 422 + }, + { + "epoch": 0.11100151246136647, + "loss": 0.2114211916923523, + "loss_ce": 0.005244437139481306, + "loss_iou": 0.5546875, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 72458472, + "step": 422 + }, + { + "epoch": 0.1112645492207536, + "grad_norm": 12.150497670240854, + "learning_rate": 5e-06, + "loss": 0.2879, + "num_input_tokens_seen": 72630848, + "step": 423 + }, + { + "epoch": 0.1112645492207536, + "loss": 0.195449560880661, + "loss_ce": 0.0027005516458302736, + "loss_iou": 0.27734375, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 72630848, + "step": 423 + }, + { + "epoch": 0.11152758598014073, + "grad_norm": 9.206681239637351, + "learning_rate": 5e-06, + "loss": 0.1882, + "num_input_tokens_seen": 72802948, + "step": 424 + }, + { + "epoch": 0.11152758598014073, + "loss": 0.20556196570396423, + "loss_ce": 0.003230433911085129, + "loss_iou": 0.4453125, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 72802948, + "step": 424 + }, + { + "epoch": 0.11179062273952785, + "grad_norm": 7.914405274492032, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 72973304, + "step": 425 + }, + { + "epoch": 0.11179062273952785, + "loss": 0.24065472185611725, + "loss_ce": 0.002800729824230075, + "loss_iou": 0.46875, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 72973304, + "step": 425 + }, + { + "epoch": 0.11205365949891498, + "grad_norm": 8.079757468011927, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 73145520, + "step": 426 + }, + { + "epoch": 0.11205365949891498, + "loss": 0.17487749457359314, + "loss_ce": 0.007641167379915714, + "loss_iou": 0.515625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 73145520, + "step": 426 + }, + { + "epoch": 0.1123166962583021, + "grad_norm": 26.685292496143536, + "learning_rate": 5e-06, + "loss": 0.2674, + "num_input_tokens_seen": 73317728, + "step": 427 + }, + { + "epoch": 0.1123166962583021, + "loss": 0.24447987973690033, + "loss_ce": 0.003635148983448744, + "loss_iou": 0.7109375, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 73317728, + "step": 427 + }, + { + "epoch": 0.11257973301768923, + "grad_norm": 10.587399579059058, + "learning_rate": 5e-06, + "loss": 0.2517, + "num_input_tokens_seen": 73490108, + "step": 428 + }, + { + "epoch": 0.11257973301768923, + "loss": 0.24800382554531097, + "loss_ce": 0.002154202200472355, + "loss_iou": 0.37890625, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 73490108, + "step": 428 + }, + { + "epoch": 0.11284276977707634, + "grad_norm": 6.6654701515468515, + "learning_rate": 5e-06, + "loss": 0.2127, + "num_input_tokens_seen": 73662104, + "step": 429 + }, + { + "epoch": 0.11284276977707634, + "loss": 0.35326629877090454, + "loss_ce": 0.004389348905533552, + "loss_iou": 0.474609375, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 73662104, + "step": 429 + }, + { + "epoch": 0.11310580653646347, + "grad_norm": 9.359106186429873, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 73833940, + "step": 430 + }, + { + "epoch": 0.11310580653646347, + "loss": 0.19281955063343048, + "loss_ce": 0.0022678023669868708, + "loss_iou": 0.52734375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 73833940, + "step": 430 + }, + { + "epoch": 0.11336884329585059, + "grad_norm": 10.912300657166487, + "learning_rate": 5e-06, + "loss": 0.2706, + "num_input_tokens_seen": 74006248, + "step": 431 + }, + { + "epoch": 0.11336884329585059, + "loss": 0.270729124546051, + "loss_ce": 0.0032730703242123127, + "loss_iou": 0.56640625, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 74006248, + "step": 431 + }, + { + "epoch": 0.11363188005523772, + "grad_norm": 7.80923377535479, + "learning_rate": 5e-06, + "loss": 0.2294, + "num_input_tokens_seen": 74178292, + "step": 432 + }, + { + "epoch": 0.11363188005523772, + "loss": 0.21906697750091553, + "loss_ce": 0.002758371876552701, + "loss_iou": 0.45703125, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 74178292, + "step": 432 + }, + { + "epoch": 0.11389491681462484, + "grad_norm": 7.423483614252486, + "learning_rate": 5e-06, + "loss": 0.2228, + "num_input_tokens_seen": 74350588, + "step": 433 + }, + { + "epoch": 0.11389491681462484, + "loss": 0.17986077070236206, + "loss_ce": 0.0017601896543055773, + "loss_iou": 0.48828125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 74350588, + "step": 433 + }, + { + "epoch": 0.11415795357401197, + "grad_norm": 8.108512259957333, + "learning_rate": 5e-06, + "loss": 0.226, + "num_input_tokens_seen": 74520920, + "step": 434 + }, + { + "epoch": 0.11415795357401197, + "loss": 0.21928107738494873, + "loss_ce": 0.00455939956009388, + "loss_iou": 0.640625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 74520920, + "step": 434 + }, + { + "epoch": 0.11442099033339909, + "grad_norm": 19.006805430751477, + "learning_rate": 5e-06, + "loss": 0.2661, + "num_input_tokens_seen": 74693212, + "step": 435 + }, + { + "epoch": 0.11442099033339909, + "loss": 0.3151628077030182, + "loss_ce": 0.0064469738863408566, + "loss_iou": 0.625, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 74693212, + "step": 435 + }, + { + "epoch": 0.11468402709278622, + "grad_norm": 8.863661674496596, + "learning_rate": 5e-06, + "loss": 0.2843, + "num_input_tokens_seen": 74865396, + "step": 436 + }, + { + "epoch": 0.11468402709278622, + "loss": 0.280520498752594, + "loss_ce": 0.006594708655029535, + "loss_iou": 0.44921875, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 74865396, + "step": 436 + }, + { + "epoch": 0.11494706385217335, + "grad_norm": 7.497393138459489, + "learning_rate": 5e-06, + "loss": 0.1834, + "num_input_tokens_seen": 75037856, + "step": 437 + }, + { + "epoch": 0.11494706385217335, + "loss": 0.18996562063694, + "loss_ce": 0.0047849551774561405, + "loss_iou": 0.470703125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 75037856, + "step": 437 + }, + { + "epoch": 0.11521010061156046, + "grad_norm": 12.688606481249035, + "learning_rate": 5e-06, + "loss": 0.2195, + "num_input_tokens_seen": 75210192, + "step": 438 + }, + { + "epoch": 0.11521010061156046, + "loss": 0.20892465114593506, + "loss_ce": 0.006348971277475357, + "loss_iou": 0.470703125, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 75210192, + "step": 438 + }, + { + "epoch": 0.1154731373709476, + "grad_norm": 8.208628036837384, + "learning_rate": 5e-06, + "loss": 0.2418, + "num_input_tokens_seen": 75382312, + "step": 439 + }, + { + "epoch": 0.1154731373709476, + "loss": 0.20543652772903442, + "loss_ce": 0.001701178727671504, + "loss_iou": 0.50390625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 75382312, + "step": 439 + }, + { + "epoch": 0.11573617413033471, + "grad_norm": 9.785336279235814, + "learning_rate": 5e-06, + "loss": 0.2151, + "num_input_tokens_seen": 75554688, + "step": 440 + }, + { + "epoch": 0.11573617413033471, + "loss": 0.26715782284736633, + "loss_ce": 0.0069039189256727695, + "loss_iou": 0.65234375, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 75554688, + "step": 440 + }, + { + "epoch": 0.11599921088972184, + "grad_norm": 23.031133350657914, + "learning_rate": 5e-06, + "loss": 0.2357, + "num_input_tokens_seen": 75726964, + "step": 441 + }, + { + "epoch": 0.11599921088972184, + "loss": 0.3105895519256592, + "loss_ce": 0.004559269640594721, + "loss_iou": 0.4609375, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 75726964, + "step": 441 + }, + { + "epoch": 0.11626224764910896, + "grad_norm": 8.337171777387358, + "learning_rate": 5e-06, + "loss": 0.239, + "num_input_tokens_seen": 75899032, + "step": 442 + }, + { + "epoch": 0.11626224764910896, + "loss": 0.20333924889564514, + "loss_ce": 0.002655645599588752, + "loss_iou": 0.55078125, + "loss_num": 0.0400390625, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 75899032, + "step": 442 + }, + { + "epoch": 0.11652528440849609, + "grad_norm": 4.896294530096544, + "learning_rate": 5e-06, + "loss": 0.1725, + "num_input_tokens_seen": 76071304, + "step": 443 + }, + { + "epoch": 0.11652528440849609, + "loss": 0.20664632320404053, + "loss_ce": 0.0032161371782422066, + "loss_iou": 0.439453125, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 76071304, + "step": 443 + }, + { + "epoch": 0.11678832116788321, + "grad_norm": 7.74925868300208, + "learning_rate": 5e-06, + "loss": 0.2369, + "num_input_tokens_seen": 76241732, + "step": 444 + }, + { + "epoch": 0.11678832116788321, + "loss": 0.2300896942615509, + "loss_ce": 0.0016351052327081561, + "loss_iou": 0.51953125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 76241732, + "step": 444 + }, + { + "epoch": 0.11705135792727034, + "grad_norm": 8.593064888198587, + "learning_rate": 5e-06, + "loss": 0.2023, + "num_input_tokens_seen": 76414004, + "step": 445 + }, + { + "epoch": 0.11705135792727034, + "loss": 0.14537394046783447, + "loss_ce": 0.0013919961638748646, + "loss_iou": 0.404296875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 76414004, + "step": 445 + }, + { + "epoch": 0.11731439468665746, + "grad_norm": 10.869613464978638, + "learning_rate": 5e-06, + "loss": 0.2657, + "num_input_tokens_seen": 76586248, + "step": 446 + }, + { + "epoch": 0.11731439468665746, + "loss": 0.2349330186843872, + "loss_ce": 0.004464263096451759, + "loss_iou": 0.4140625, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 76586248, + "step": 446 + }, + { + "epoch": 0.11757743144604459, + "grad_norm": 12.757935439615991, + "learning_rate": 5e-06, + "loss": 0.2717, + "num_input_tokens_seen": 76758328, + "step": 447 + }, + { + "epoch": 0.11757743144604459, + "loss": 0.331451952457428, + "loss_ce": 0.0016179666854441166, + "loss_iou": 0.359375, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 76758328, + "step": 447 + }, + { + "epoch": 0.1178404682054317, + "grad_norm": 11.375965574655721, + "learning_rate": 5e-06, + "loss": 0.2032, + "num_input_tokens_seen": 76930392, + "step": 448 + }, + { + "epoch": 0.1178404682054317, + "loss": 0.21405665576457977, + "loss_ce": 0.004584001377224922, + "loss_iou": 0.42578125, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 76930392, + "step": 448 + }, + { + "epoch": 0.11810350496481883, + "grad_norm": 7.336838831766789, + "learning_rate": 5e-06, + "loss": 0.206, + "num_input_tokens_seen": 77102556, + "step": 449 + }, + { + "epoch": 0.11810350496481883, + "loss": 0.2166377753019333, + "loss_ce": 0.005700268317013979, + "loss_iou": 0.51171875, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 77102556, + "step": 449 + }, + { + "epoch": 0.11836654172420595, + "grad_norm": 11.990630578782412, + "learning_rate": 5e-06, + "loss": 0.2454, + "num_input_tokens_seen": 77274600, + "step": 450 + }, + { + "epoch": 0.11836654172420595, + "loss": 0.24853403866291046, + "loss_ce": 0.00378305627964437, + "loss_iou": 0.58984375, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 77274600, + "step": 450 + }, + { + "epoch": 0.11862957848359308, + "grad_norm": 9.139541774912184, + "learning_rate": 5e-06, + "loss": 0.2682, + "num_input_tokens_seen": 77446596, + "step": 451 + }, + { + "epoch": 0.11862957848359308, + "loss": 0.2904722988605499, + "loss_ce": 0.002020149724557996, + "loss_iou": 0.259765625, + "loss_num": 0.0576171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 77446596, + "step": 451 + }, + { + "epoch": 0.11889261524298021, + "grad_norm": 7.052664643006658, + "learning_rate": 5e-06, + "loss": 0.199, + "num_input_tokens_seen": 77618608, + "step": 452 + }, + { + "epoch": 0.11889261524298021, + "loss": 0.23526260256767273, + "loss_ce": 0.004305572714656591, + "loss_iou": 0.5078125, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 77618608, + "step": 452 + }, + { + "epoch": 0.11915565200236733, + "grad_norm": 8.748276102929202, + "learning_rate": 5e-06, + "loss": 0.2101, + "num_input_tokens_seen": 77790812, + "step": 453 + }, + { + "epoch": 0.11915565200236733, + "loss": 0.2472519874572754, + "loss_ce": 0.0025620569940656424, + "loss_iou": 0.369140625, + "loss_num": 0.048828125, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 77790812, + "step": 453 + }, + { + "epoch": 0.11941868876175446, + "grad_norm": 5.877127785016851, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 77962644, + "step": 454 + }, + { + "epoch": 0.11941868876175446, + "loss": 0.2044929563999176, + "loss_ce": 0.005640420597046614, + "loss_iou": 0.5625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 77962644, + "step": 454 + }, + { + "epoch": 0.11968172552114158, + "grad_norm": 12.849038288174842, + "learning_rate": 5e-06, + "loss": 0.1941, + "num_input_tokens_seen": 78134616, + "step": 455 + }, + { + "epoch": 0.11968172552114158, + "loss": 0.20492224395275116, + "loss_ce": 0.002163449302315712, + "loss_iou": 0.640625, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 78134616, + "step": 455 + }, + { + "epoch": 0.11994476228052871, + "grad_norm": 9.804877236674342, + "learning_rate": 5e-06, + "loss": 0.2629, + "num_input_tokens_seen": 78303656, + "step": 456 + }, + { + "epoch": 0.11994476228052871, + "loss": 0.23393824696540833, + "loss_ce": 0.0058498685248196125, + "loss_iou": NaN, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 78303656, + "step": 456 + }, + { + "epoch": 0.12020779903991582, + "grad_norm": 23.369495819583523, + "learning_rate": 5e-06, + "loss": 0.2449, + "num_input_tokens_seen": 78475936, + "step": 457 + }, + { + "epoch": 0.12020779903991582, + "loss": 0.2364155352115631, + "loss_ce": 0.009792003780603409, + "loss_iou": 0.4921875, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 78475936, + "step": 457 + }, + { + "epoch": 0.12047083579930296, + "grad_norm": 9.860704537193882, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 78648124, + "step": 458 + }, + { + "epoch": 0.12047083579930296, + "loss": 0.1498676836490631, + "loss_ce": 0.002711937762796879, + "loss_iou": 0.66015625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 78648124, + "step": 458 + }, + { + "epoch": 0.12073387255869007, + "grad_norm": 8.562392622535716, + "learning_rate": 5e-06, + "loss": 0.234, + "num_input_tokens_seen": 78820104, + "step": 459 + }, + { + "epoch": 0.12073387255869007, + "loss": 0.23113086819648743, + "loss_ce": 0.0024931649677455425, + "loss_iou": 0.484375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 78820104, + "step": 459 + }, + { + "epoch": 0.1209969093180772, + "grad_norm": 6.866997211199077, + "learning_rate": 5e-06, + "loss": 0.2359, + "num_input_tokens_seen": 78992332, + "step": 460 + }, + { + "epoch": 0.1209969093180772, + "loss": 0.15971623361110687, + "loss_ce": 0.0017572464421391487, + "loss_iou": 0.369140625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 78992332, + "step": 460 + }, + { + "epoch": 0.12125994607746432, + "grad_norm": 6.404476112237903, + "learning_rate": 5e-06, + "loss": 0.2015, + "num_input_tokens_seen": 79164376, + "step": 461 + }, + { + "epoch": 0.12125994607746432, + "loss": 0.2323172241449356, + "loss_ce": 0.005937827751040459, + "loss_iou": 0.59765625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 79164376, + "step": 461 + }, + { + "epoch": 0.12152298283685145, + "grad_norm": 9.290291620775372, + "learning_rate": 5e-06, + "loss": 0.1959, + "num_input_tokens_seen": 79336652, + "step": 462 + }, + { + "epoch": 0.12152298283685145, + "loss": 0.18938115239143372, + "loss_ce": 0.002125292085111141, + "loss_iou": 0.5390625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 79336652, + "step": 462 + }, + { + "epoch": 0.12178601959623857, + "grad_norm": 11.240674872659573, + "learning_rate": 5e-06, + "loss": 0.1971, + "num_input_tokens_seen": 79508724, + "step": 463 + }, + { + "epoch": 0.12178601959623857, + "loss": 0.2277367115020752, + "loss_ce": 0.001418360392563045, + "loss_iou": 0.306640625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 79508724, + "step": 463 + }, + { + "epoch": 0.1220490563556257, + "grad_norm": 13.32960488624403, + "learning_rate": 5e-06, + "loss": 0.1817, + "num_input_tokens_seen": 79678784, + "step": 464 + }, + { + "epoch": 0.1220490563556257, + "loss": 0.21636496484279633, + "loss_ce": 0.00182638771366328, + "loss_iou": 0.376953125, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 79678784, + "step": 464 + }, + { + "epoch": 0.12231209311501283, + "grad_norm": 12.994343511615464, + "learning_rate": 5e-06, + "loss": 0.2429, + "num_input_tokens_seen": 79850852, + "step": 465 + }, + { + "epoch": 0.12231209311501283, + "loss": 0.24236971139907837, + "loss_ce": 0.0015249918214976788, + "loss_iou": 0.419921875, + "loss_num": 0.048095703125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 79850852, + "step": 465 + }, + { + "epoch": 0.12257512987439995, + "grad_norm": 10.746140563102669, + "learning_rate": 5e-06, + "loss": 0.2493, + "num_input_tokens_seen": 80021192, + "step": 466 + }, + { + "epoch": 0.12257512987439995, + "loss": 0.1711360514163971, + "loss_ce": 0.001702459529042244, + "loss_iou": 0.59765625, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 80021192, + "step": 466 + }, + { + "epoch": 0.12283816663378708, + "grad_norm": 20.444744144323252, + "learning_rate": 5e-06, + "loss": 0.2662, + "num_input_tokens_seen": 80190112, + "step": 467 + }, + { + "epoch": 0.12283816663378708, + "loss": 0.21875979006290436, + "loss_ce": 0.0026953346095979214, + "loss_iou": 0.4921875, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 80190112, + "step": 467 + }, + { + "epoch": 0.1231012033931742, + "grad_norm": 12.488827753013481, + "learning_rate": 5e-06, + "loss": 0.2328, + "num_input_tokens_seen": 80362892, + "step": 468 + }, + { + "epoch": 0.1231012033931742, + "loss": 0.17285287380218506, + "loss_ce": 0.002381683327257633, + "loss_iou": 0.5234375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 80362892, + "step": 468 + }, + { + "epoch": 0.12336424015256132, + "grad_norm": 9.176610530631967, + "learning_rate": 5e-06, + "loss": 0.2275, + "num_input_tokens_seen": 80534972, + "step": 469 + }, + { + "epoch": 0.12336424015256132, + "loss": 0.27365219593048096, + "loss_ce": 0.0032664609607309103, + "loss_iou": 0.421875, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 80534972, + "step": 469 + }, + { + "epoch": 0.12362727691194844, + "grad_norm": 33.167673801928274, + "learning_rate": 5e-06, + "loss": 0.2499, + "num_input_tokens_seen": 80705332, + "step": 470 + }, + { + "epoch": 0.12362727691194844, + "loss": 0.23707842826843262, + "loss_ce": 0.0022151488810777664, + "loss_iou": 0.337890625, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 80705332, + "step": 470 + }, + { + "epoch": 0.12389031367133557, + "grad_norm": 7.668114037963123, + "learning_rate": 5e-06, + "loss": 0.2394, + "num_input_tokens_seen": 80874564, + "step": 471 + }, + { + "epoch": 0.12389031367133557, + "loss": 0.23063993453979492, + "loss_ce": 0.006457816809415817, + "loss_iou": 0.6171875, + "loss_num": 0.044677734375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 80874564, + "step": 471 + }, + { + "epoch": 0.12415335043072269, + "grad_norm": 10.510373606639076, + "learning_rate": 5e-06, + "loss": 0.2114, + "num_input_tokens_seen": 81046768, + "step": 472 + }, + { + "epoch": 0.12415335043072269, + "loss": 0.21059830486774445, + "loss_ce": 0.002834630198776722, + "loss_iou": 0.52734375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 81046768, + "step": 472 + }, + { + "epoch": 0.12441638719010982, + "grad_norm": 11.032749822897834, + "learning_rate": 5e-06, + "loss": 0.2413, + "num_input_tokens_seen": 81218788, + "step": 473 + }, + { + "epoch": 0.12441638719010982, + "loss": 0.2567683458328247, + "loss_ce": 0.004815223626792431, + "loss_iou": 0.66015625, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 81218788, + "step": 473 + }, + { + "epoch": 0.12467942394949694, + "grad_norm": 8.640049648990784, + "learning_rate": 5e-06, + "loss": 0.2498, + "num_input_tokens_seen": 81390764, + "step": 474 + }, + { + "epoch": 0.12467942394949694, + "loss": 0.3335922062397003, + "loss_ce": 0.0035140730906277895, + "loss_iou": 0.40625, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 81390764, + "step": 474 + }, + { + "epoch": 0.12494246070888407, + "grad_norm": 6.133084134287981, + "learning_rate": 5e-06, + "loss": 0.1862, + "num_input_tokens_seen": 81561192, + "step": 475 + }, + { + "epoch": 0.12494246070888407, + "loss": 0.19583408534526825, + "loss_ce": 0.002474710112437606, + "loss_iou": 0.28125, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 81561192, + "step": 475 + }, + { + "epoch": 0.1252054974682712, + "grad_norm": 15.123599266995042, + "learning_rate": 5e-06, + "loss": 0.212, + "num_input_tokens_seen": 81731608, + "step": 476 + }, + { + "epoch": 0.1252054974682712, + "loss": 0.2318619191646576, + "loss_ce": 0.0012100562453269958, + "loss_iou": 0.41796875, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 81731608, + "step": 476 + }, + { + "epoch": 0.1254685342276583, + "grad_norm": 10.43813000825477, + "learning_rate": 5e-06, + "loss": 0.1887, + "num_input_tokens_seen": 81902388, + "step": 477 + }, + { + "epoch": 0.1254685342276583, + "loss": 0.1947634220123291, + "loss_ce": 0.0017092193011194468, + "loss_iou": 0.35546875, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 81902388, + "step": 477 + }, + { + "epoch": 0.12573157098704543, + "grad_norm": 15.053311018918661, + "learning_rate": 5e-06, + "loss": 0.2559, + "num_input_tokens_seen": 82074796, + "step": 478 + }, + { + "epoch": 0.12573157098704543, + "loss": 0.23443953692913055, + "loss_ce": 0.002872154116630554, + "loss_iou": 0.59375, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 82074796, + "step": 478 + }, + { + "epoch": 0.12599460774643256, + "grad_norm": 8.071545338749708, + "learning_rate": 5e-06, + "loss": 0.2732, + "num_input_tokens_seen": 82246976, + "step": 479 + }, + { + "epoch": 0.12599460774643256, + "loss": 0.22861449420452118, + "loss_ce": 0.001929928082972765, + "loss_iou": NaN, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 82246976, + "step": 479 + }, + { + "epoch": 0.1262576445058197, + "grad_norm": 13.388933170286325, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 82419244, + "step": 480 + }, + { + "epoch": 0.1262576445058197, + "loss": 0.24052694439888, + "loss_ce": 0.002611914649605751, + "loss_iou": 0.423828125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 82419244, + "step": 480 + }, + { + "epoch": 0.12652068126520682, + "grad_norm": 10.19375568056882, + "learning_rate": 5e-06, + "loss": 0.2132, + "num_input_tokens_seen": 82591568, + "step": 481 + }, + { + "epoch": 0.12652068126520682, + "loss": 0.27330607175827026, + "loss_ce": 0.007314843591302633, + "loss_iou": 0.4453125, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 82591568, + "step": 481 + }, + { + "epoch": 0.12678371802459393, + "grad_norm": 8.166078619911394, + "learning_rate": 5e-06, + "loss": 0.21, + "num_input_tokens_seen": 82760532, + "step": 482 + }, + { + "epoch": 0.12678371802459393, + "loss": 0.19459792971611023, + "loss_ce": 0.003313753753900528, + "loss_iou": 0.40234375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 82760532, + "step": 482 + }, + { + "epoch": 0.12704675478398106, + "grad_norm": 8.980724720396978, + "learning_rate": 5e-06, + "loss": 0.2429, + "num_input_tokens_seen": 82932700, + "step": 483 + }, + { + "epoch": 0.12704675478398106, + "loss": 0.2369249314069748, + "loss_ce": 0.0062120286747813225, + "loss_iou": 0.455078125, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 82932700, + "step": 483 + }, + { + "epoch": 0.1273097915433682, + "grad_norm": 7.529582302992287, + "learning_rate": 5e-06, + "loss": 0.2337, + "num_input_tokens_seen": 83104784, + "step": 484 + }, + { + "epoch": 0.1273097915433682, + "loss": 0.27052199840545654, + "loss_ce": 0.0020893928594887257, + "loss_iou": 0.59765625, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 83104784, + "step": 484 + }, + { + "epoch": 0.12757282830275532, + "grad_norm": 9.051361983377177, + "learning_rate": 5e-06, + "loss": 0.2223, + "num_input_tokens_seen": 83276660, + "step": 485 + }, + { + "epoch": 0.12757282830275532, + "loss": 0.1893678605556488, + "loss_ce": 0.004553401842713356, + "loss_iou": 0.48046875, + "loss_num": 0.037109375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 83276660, + "step": 485 + }, + { + "epoch": 0.12783586506214242, + "grad_norm": 7.363403269312881, + "learning_rate": 5e-06, + "loss": 0.2164, + "num_input_tokens_seen": 83448804, + "step": 486 + }, + { + "epoch": 0.12783586506214242, + "loss": 0.18258926272392273, + "loss_ce": 0.004244527779519558, + "loss_iou": 0.6015625, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 83448804, + "step": 486 + }, + { + "epoch": 0.12809890182152955, + "grad_norm": 6.462059422866227, + "learning_rate": 5e-06, + "loss": 0.1922, + "num_input_tokens_seen": 83621024, + "step": 487 + }, + { + "epoch": 0.12809890182152955, + "loss": 0.14729665219783783, + "loss_ce": 0.003986096940934658, + "loss_iou": 0.578125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 83621024, + "step": 487 + }, + { + "epoch": 0.12836193858091668, + "grad_norm": 9.164596498872053, + "learning_rate": 5e-06, + "loss": 0.2078, + "num_input_tokens_seen": 83793612, + "step": 488 + }, + { + "epoch": 0.12836193858091668, + "loss": 0.20358332991600037, + "loss_ce": 0.0016790404915809631, + "loss_iou": 0.52734375, + "loss_num": 0.040283203125, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 83793612, + "step": 488 + }, + { + "epoch": 0.12862497534030382, + "grad_norm": 13.35296525183839, + "learning_rate": 5e-06, + "loss": 0.3196, + "num_input_tokens_seen": 83965664, + "step": 489 + }, + { + "epoch": 0.12862497534030382, + "loss": 0.36003273725509644, + "loss_ce": 0.007981948554515839, + "loss_iou": 0.58984375, + "loss_num": 0.0703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 83965664, + "step": 489 + }, + { + "epoch": 0.12888801209969092, + "grad_norm": 8.219901608770293, + "learning_rate": 5e-06, + "loss": 0.2425, + "num_input_tokens_seen": 84137656, + "step": 490 + }, + { + "epoch": 0.12888801209969092, + "loss": 0.21684233844280243, + "loss_ce": 0.0033413656055927277, + "loss_iou": 0.3359375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 84137656, + "step": 490 + }, + { + "epoch": 0.12915104885907805, + "grad_norm": 5.772697796397355, + "learning_rate": 5e-06, + "loss": 0.1873, + "num_input_tokens_seen": 84308240, + "step": 491 + }, + { + "epoch": 0.12915104885907805, + "loss": 0.1743691861629486, + "loss_ce": 0.0028604045510292053, + "loss_iou": 0.484375, + "loss_num": 0.0341796875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 84308240, + "step": 491 + }, + { + "epoch": 0.12941408561846518, + "grad_norm": 7.154674159301149, + "learning_rate": 5e-06, + "loss": 0.1965, + "num_input_tokens_seen": 84478680, + "step": 492 + }, + { + "epoch": 0.12941408561846518, + "loss": 0.15929880738258362, + "loss_ce": 0.003598117269575596, + "loss_iou": 0.66015625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 84478680, + "step": 492 + }, + { + "epoch": 0.1296771223778523, + "grad_norm": 10.931778081568359, + "learning_rate": 5e-06, + "loss": 0.2397, + "num_input_tokens_seen": 84648900, + "step": 493 + }, + { + "epoch": 0.1296771223778523, + "loss": 0.24825721979141235, + "loss_ce": 0.002285533118993044, + "loss_iou": 0.6640625, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 84648900, + "step": 493 + }, + { + "epoch": 0.12994015913723944, + "grad_norm": 10.142675625135299, + "learning_rate": 5e-06, + "loss": 0.2203, + "num_input_tokens_seen": 84820972, + "step": 494 + }, + { + "epoch": 0.12994015913723944, + "loss": 0.23026269674301147, + "loss_ce": 0.0015639647608622909, + "loss_iou": 0.59765625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 84820972, + "step": 494 + }, + { + "epoch": 0.13020319589662654, + "grad_norm": 10.196346960471569, + "learning_rate": 5e-06, + "loss": 0.197, + "num_input_tokens_seen": 84993508, + "step": 495 + }, + { + "epoch": 0.13020319589662654, + "loss": 0.21560978889465332, + "loss_ce": 0.001620523864403367, + "loss_iou": 0.65234375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 84993508, + "step": 495 + }, + { + "epoch": 0.13046623265601368, + "grad_norm": 8.523677693002021, + "learning_rate": 5e-06, + "loss": 0.2357, + "num_input_tokens_seen": 85165596, + "step": 496 + }, + { + "epoch": 0.13046623265601368, + "loss": 0.17495451867580414, + "loss_ce": 0.003750909585505724, + "loss_iou": 0.416015625, + "loss_num": 0.034423828125, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 85165596, + "step": 496 + }, + { + "epoch": 0.1307292694154008, + "grad_norm": 5.883749010160293, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 85337976, + "step": 497 + }, + { + "epoch": 0.1307292694154008, + "loss": 0.22726929187774658, + "loss_ce": 0.009862057864665985, + "loss_iou": 0.494140625, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 85337976, + "step": 497 + }, + { + "epoch": 0.13099230617478794, + "grad_norm": 7.408260148240015, + "learning_rate": 5e-06, + "loss": 0.2231, + "num_input_tokens_seen": 85509860, + "step": 498 + }, + { + "epoch": 0.13099230617478794, + "loss": 0.1337101012468338, + "loss_ce": 0.004010388161987066, + "loss_iou": 0.515625, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 85509860, + "step": 498 + }, + { + "epoch": 0.13125534293417504, + "grad_norm": 15.7224480316305, + "learning_rate": 5e-06, + "loss": 0.2184, + "num_input_tokens_seen": 85680488, + "step": 499 + }, + { + "epoch": 0.13125534293417504, + "loss": 0.3409336507320404, + "loss_ce": 0.002554745879024267, + "loss_iou": 0.423828125, + "loss_num": 0.06787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 85680488, + "step": 499 + }, + { + "epoch": 0.13151837969356217, + "grad_norm": 9.533912931076111, + "learning_rate": 5e-06, + "loss": 0.2073, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_websight_new_CIoU": 0.7392345666885376, + "eval_websight_new_GIoU": 0.7380270659923553, + "eval_websight_new_IoU": 0.7466294467449188, + "eval_websight_new_MAE_all": 0.04153955727815628, + "eval_websight_new_MAE_h": 0.03558222949504852, + "eval_websight_new_MAE_w": 0.06108394265174866, + "eval_websight_new_MAE_x": 0.04947785474359989, + "eval_websight_new_MAE_y": 0.020014189183712006, + "eval_websight_new_NUM_probability": 0.9849532246589661, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.20474952459335327, + "eval_websight_new_loss_ce": 0.0016258999821729958, + "eval_websight_new_loss_iou": 0.635986328125, + "eval_websight_new_loss_num": 0.037567138671875, + "eval_websight_new_loss_xval": 0.187774658203125, + "eval_websight_new_runtime": 54.6509, + "eval_websight_new_samples_per_second": 0.915, + "eval_websight_new_steps_per_second": 0.037, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_seeclick_CIoU": 0.41250014305114746, + "eval_seeclick_GIoU": 0.40925678610801697, + "eval_seeclick_IoU": 0.4613874703645706, + "eval_seeclick_MAE_all": 0.08603048324584961, + "eval_seeclick_MAE_h": 0.05444946512579918, + "eval_seeclick_MAE_w": 0.12106707319617271, + "eval_seeclick_MAE_x": 0.12660933285951614, + "eval_seeclick_MAE_y": 0.04199606738984585, + "eval_seeclick_NUM_probability": 0.9906161725521088, + "eval_seeclick_inside_bbox": 0.7698863744735718, + "eval_seeclick_loss": 0.3508862257003784, + "eval_seeclick_loss_ce": 0.013088095001876354, + "eval_seeclick_loss_iou": 0.609375, + "eval_seeclick_loss_num": 0.0647735595703125, + "eval_seeclick_loss_xval": 0.323974609375, + "eval_seeclick_runtime": 71.4374, + "eval_seeclick_samples_per_second": 0.602, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_icons_CIoU": 0.7077827751636505, + "eval_icons_GIoU": 0.7009360492229462, + "eval_icons_IoU": 0.7175993025302887, + "eval_icons_MAE_all": 0.041869472712278366, + "eval_icons_MAE_h": 0.04292410984635353, + "eval_icons_MAE_w": 0.04752085544168949, + "eval_icons_MAE_x": 0.038647109642624855, + "eval_icons_MAE_y": 0.03838581405580044, + "eval_icons_NUM_probability": 0.9924971163272858, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.13132929801940918, + "eval_icons_loss_ce": 0.003774530749069527, + "eval_icons_loss_iou": 0.590576171875, + "eval_icons_loss_num": 0.024749755859375, + "eval_icons_loss_xval": 0.12384033203125, + "eval_icons_runtime": 78.9038, + "eval_icons_samples_per_second": 0.634, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_screenspot_CIoU": 0.4913978377978007, + "eval_screenspot_GIoU": 0.47330527504285175, + "eval_screenspot_IoU": 0.5310356616973877, + "eval_screenspot_MAE_all": 0.09648379683494568, + "eval_screenspot_MAE_h": 0.061782063295443855, + "eval_screenspot_MAE_w": 0.14932986597220102, + "eval_screenspot_MAE_x": 0.11150848865509033, + "eval_screenspot_MAE_y": 0.0633147731423378, + "eval_screenspot_NUM_probability": 0.9926036596298218, + "eval_screenspot_inside_bbox": 0.8454166650772095, + "eval_screenspot_loss": 0.8485715389251709, + "eval_screenspot_loss_ce": 0.47645074129104614, + "eval_screenspot_loss_iou": 0.5421142578125, + "eval_screenspot_loss_num": 0.07304890950520833, + "eval_screenspot_loss_xval": 0.3654378255208333, + "eval_screenspot_runtime": 144.4943, + "eval_screenspot_samples_per_second": 0.616, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "loss": 0.8175798058509827, + "loss_ce": 0.44428879022598267, + "loss_iou": 0.5, + "loss_num": 0.07470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.1317814164529493, + "grad_norm": 9.07691940041295, + "learning_rate": 5e-06, + "loss": 0.1533, + "num_input_tokens_seen": 86021700, + "step": 501 + }, + { + "epoch": 0.1317814164529493, + "loss": 0.15382635593414307, + "loss_ce": 0.003191583789885044, + "loss_iou": 0.6640625, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 86021700, + "step": 501 + }, + { + "epoch": 0.13204445321233643, + "grad_norm": 8.240988876623113, + "learning_rate": 5e-06, + "loss": 0.2297, + "num_input_tokens_seen": 86193792, + "step": 502 + }, + { + "epoch": 0.13204445321233643, + "loss": 0.23657214641571045, + "loss_ce": 0.003112667240202427, + "loss_iou": 0.625, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 86193792, + "step": 502 + }, + { + "epoch": 0.13230748997172354, + "grad_norm": 8.766196459329715, + "learning_rate": 5e-06, + "loss": 0.2049, + "num_input_tokens_seen": 86365784, + "step": 503 + }, + { + "epoch": 0.13230748997172354, + "loss": 0.23612971603870392, + "loss_ce": 0.004379219841212034, + "loss_iou": 0.54296875, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 86365784, + "step": 503 + }, + { + "epoch": 0.13257052673111067, + "grad_norm": 5.336005731654041, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 86538044, + "step": 504 + }, + { + "epoch": 0.13257052673111067, + "loss": 0.15480023622512817, + "loss_ce": 0.0017545849550515413, + "loss_iou": 0.625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 86538044, + "step": 504 + }, + { + "epoch": 0.1328335634904978, + "grad_norm": 15.391291602176388, + "learning_rate": 5e-06, + "loss": 0.2019, + "num_input_tokens_seen": 86710012, + "step": 505 + }, + { + "epoch": 0.1328335634904978, + "loss": 0.16446326673030853, + "loss_ce": 0.0012552611296996474, + "loss_iou": 0.6796875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 86710012, + "step": 505 + }, + { + "epoch": 0.13309660024988493, + "grad_norm": 7.414971095415403, + "learning_rate": 5e-06, + "loss": 0.2046, + "num_input_tokens_seen": 86882600, + "step": 506 + }, + { + "epoch": 0.13309660024988493, + "loss": 0.18114808201789856, + "loss_ce": 0.0028033575508743525, + "loss_iou": 0.625, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 86882600, + "step": 506 + }, + { + "epoch": 0.13335963700927206, + "grad_norm": 7.149578381431456, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 87055160, + "step": 507 + }, + { + "epoch": 0.13335963700927206, + "loss": 0.185621976852417, + "loss_ce": 0.0038592712953686714, + "loss_iou": 0.39453125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 87055160, + "step": 507 + }, + { + "epoch": 0.13362267376865916, + "grad_norm": 7.2770390495694235, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 87227176, + "step": 508 + }, + { + "epoch": 0.13362267376865916, + "loss": 0.12990637123584747, + "loss_ce": 0.0011221927125006914, + "loss_iou": 0.453125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 87227176, + "step": 508 + }, + { + "epoch": 0.1338857105280463, + "grad_norm": 13.071840767784375, + "learning_rate": 5e-06, + "loss": 0.2921, + "num_input_tokens_seen": 87397716, + "step": 509 + }, + { + "epoch": 0.1338857105280463, + "loss": 0.31984156370162964, + "loss_ce": 0.0027639116160571575, + "loss_iou": NaN, + "loss_num": 0.0634765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 87397716, + "step": 509 + }, + { + "epoch": 0.13414874728743342, + "grad_norm": 9.630666522841075, + "learning_rate": 5e-06, + "loss": 0.1771, + "num_input_tokens_seen": 87570180, + "step": 510 + }, + { + "epoch": 0.13414874728743342, + "loss": 0.14270807802677155, + "loss_ce": 0.002327217720448971, + "loss_iou": 0.6328125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 87570180, + "step": 510 + }, + { + "epoch": 0.13441178404682055, + "grad_norm": 7.92909505971618, + "learning_rate": 5e-06, + "loss": 0.2147, + "num_input_tokens_seen": 87742132, + "step": 511 + }, + { + "epoch": 0.13441178404682055, + "loss": 0.24002233147621155, + "loss_ce": 0.0020462563261389732, + "loss_iou": 0.5859375, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 87742132, + "step": 511 + }, + { + "epoch": 0.13467482080620766, + "grad_norm": 11.73787088896753, + "learning_rate": 5e-06, + "loss": 0.2146, + "num_input_tokens_seen": 87914144, + "step": 512 + }, + { + "epoch": 0.13467482080620766, + "loss": 0.2217179834842682, + "loss_ce": 0.00376143422909081, + "loss_iou": 0.5859375, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 87914144, + "step": 512 + }, + { + "epoch": 0.1349378575655948, + "grad_norm": 14.481870714144165, + "learning_rate": 5e-06, + "loss": 0.2105, + "num_input_tokens_seen": 88086392, + "step": 513 + }, + { + "epoch": 0.1349378575655948, + "loss": 0.16927534341812134, + "loss_ce": 0.0027714259922504425, + "loss_iou": 0.36328125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 88086392, + "step": 513 + }, + { + "epoch": 0.13520089432498192, + "grad_norm": 11.732408058803708, + "learning_rate": 5e-06, + "loss": 0.2117, + "num_input_tokens_seen": 88258824, + "step": 514 + }, + { + "epoch": 0.13520089432498192, + "loss": 0.21276208758354187, + "loss_ce": 0.0047542620450258255, + "loss_iou": 0.6171875, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 88258824, + "step": 514 + }, + { + "epoch": 0.13546393108436905, + "grad_norm": 8.699627697080732, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 88431280, + "step": 515 + }, + { + "epoch": 0.13546393108436905, + "loss": 0.13956406712532043, + "loss_ce": 0.005469819065183401, + "loss_iou": 0.62890625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 88431280, + "step": 515 + }, + { + "epoch": 0.13572696784375615, + "grad_norm": 9.441247877196542, + "learning_rate": 5e-06, + "loss": 0.2418, + "num_input_tokens_seen": 88603308, + "step": 516 + }, + { + "epoch": 0.13572696784375615, + "loss": 0.16743244230747223, + "loss_ce": 0.00141681800596416, + "loss_iou": 0.49609375, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 88603308, + "step": 516 + }, + { + "epoch": 0.13599000460314328, + "grad_norm": 11.156675067329255, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 88775492, + "step": 517 + }, + { + "epoch": 0.13599000460314328, + "loss": 0.167589008808136, + "loss_ce": 0.0012681989464908838, + "loss_iou": 0.50390625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 88775492, + "step": 517 + }, + { + "epoch": 0.1362530413625304, + "grad_norm": 9.64852272360873, + "learning_rate": 5e-06, + "loss": 0.2377, + "num_input_tokens_seen": 88946204, + "step": 518 + }, + { + "epoch": 0.1362530413625304, + "loss": 0.2998642921447754, + "loss_ce": 0.004698258824646473, + "loss_iou": 0.5859375, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 88946204, + "step": 518 + }, + { + "epoch": 0.13651607812191754, + "grad_norm": 8.486100540746042, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 89116056, + "step": 519 + }, + { + "epoch": 0.13651607812191754, + "loss": 0.21377842128276825, + "loss_ce": 0.004488877020776272, + "loss_iou": 0.64453125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 89116056, + "step": 519 + }, + { + "epoch": 0.13677911488130468, + "grad_norm": 7.241270611453955, + "learning_rate": 5e-06, + "loss": 0.2361, + "num_input_tokens_seen": 89287012, + "step": 520 + }, + { + "epoch": 0.13677911488130468, + "loss": 0.23230193555355072, + "loss_ce": 0.0012228279374539852, + "loss_iou": 0.447265625, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 89287012, + "step": 520 + }, + { + "epoch": 0.13704215164069178, + "grad_norm": 9.086385631838745, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 89457780, + "step": 521 + }, + { + "epoch": 0.13704215164069178, + "loss": 0.16174045205116272, + "loss_ce": 0.0012180046178400517, + "loss_iou": 0.478515625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 89457780, + "step": 521 + }, + { + "epoch": 0.1373051884000789, + "grad_norm": 9.260504659218878, + "learning_rate": 5e-06, + "loss": 0.1871, + "num_input_tokens_seen": 89628244, + "step": 522 + }, + { + "epoch": 0.1373051884000789, + "loss": 0.18322458863258362, + "loss_ce": 0.005673316773027182, + "loss_iou": 0.5390625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 89628244, + "step": 522 + }, + { + "epoch": 0.13756822515946604, + "grad_norm": 10.862554096761864, + "learning_rate": 5e-06, + "loss": 0.1938, + "num_input_tokens_seen": 89798512, + "step": 523 + }, + { + "epoch": 0.13756822515946604, + "loss": 0.18914146721363068, + "loss_ce": 0.002434919821098447, + "loss_iou": 0.62109375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 89798512, + "step": 523 + }, + { + "epoch": 0.13783126191885317, + "grad_norm": 8.527732112130064, + "learning_rate": 5e-06, + "loss": 0.2597, + "num_input_tokens_seen": 89968992, + "step": 524 + }, + { + "epoch": 0.13783126191885317, + "loss": 0.2936267554759979, + "loss_ce": 0.0038318424485623837, + "loss_iou": 0.40234375, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 89968992, + "step": 524 + }, + { + "epoch": 0.13809429867824027, + "grad_norm": 8.96210431629978, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 90140828, + "step": 525 + }, + { + "epoch": 0.13809429867824027, + "loss": 0.22792883217334747, + "loss_ce": 0.001427364069968462, + "loss_iou": 0.396484375, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 90140828, + "step": 525 + }, + { + "epoch": 0.1383573354376274, + "grad_norm": 10.303553365642298, + "learning_rate": 5e-06, + "loss": 0.1672, + "num_input_tokens_seen": 90311476, + "step": 526 + }, + { + "epoch": 0.1383573354376274, + "loss": 0.09766636043787003, + "loss_ce": 0.00202426896430552, + "loss_iou": 0.59765625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 90311476, + "step": 526 + }, + { + "epoch": 0.13862037219701454, + "grad_norm": 7.578553389392675, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 90483668, + "step": 527 + }, + { + "epoch": 0.13862037219701454, + "loss": 0.2255394458770752, + "loss_ce": 0.0017845738912001252, + "loss_iou": 0.5234375, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 90483668, + "step": 527 + }, + { + "epoch": 0.13888340895640167, + "grad_norm": 11.866590519507064, + "learning_rate": 5e-06, + "loss": 0.2463, + "num_input_tokens_seen": 90655996, + "step": 528 + }, + { + "epoch": 0.13888340895640167, + "loss": 0.21356430649757385, + "loss_ce": 0.004030614625662565, + "loss_iou": 0.6953125, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 90655996, + "step": 528 + }, + { + "epoch": 0.13914644571578877, + "grad_norm": 9.66204006000912, + "learning_rate": 5e-06, + "loss": 0.2311, + "num_input_tokens_seen": 90828348, + "step": 529 + }, + { + "epoch": 0.13914644571578877, + "loss": 0.21380871534347534, + "loss_ce": 0.0014063662383705378, + "loss_iou": 0.5546875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 90828348, + "step": 529 + }, + { + "epoch": 0.1394094824751759, + "grad_norm": 7.3801351915919975, + "learning_rate": 5e-06, + "loss": 0.2048, + "num_input_tokens_seen": 91000476, + "step": 530 + }, + { + "epoch": 0.1394094824751759, + "loss": 0.19109413027763367, + "loss_ce": 0.0008780673379078507, + "loss_iou": 0.29296875, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 91000476, + "step": 530 + }, + { + "epoch": 0.13967251923456303, + "grad_norm": 6.737214273696564, + "learning_rate": 5e-06, + "loss": 0.2037, + "num_input_tokens_seen": 91173056, + "step": 531 + }, + { + "epoch": 0.13967251923456303, + "loss": 0.26445770263671875, + "loss_ce": 0.0033187787048518658, + "loss_iou": 0.486328125, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 91173056, + "step": 531 + }, + { + "epoch": 0.13993555599395016, + "grad_norm": 5.9400720051741835, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 91345516, + "step": 532 + }, + { + "epoch": 0.13993555599395016, + "loss": 0.12527181208133698, + "loss_ce": 0.001858725561760366, + "loss_iou": 0.59765625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 91345516, + "step": 532 + }, + { + "epoch": 0.1401985927533373, + "grad_norm": 8.591575042379741, + "learning_rate": 5e-06, + "loss": 0.2351, + "num_input_tokens_seen": 91516156, + "step": 533 + }, + { + "epoch": 0.1401985927533373, + "loss": 0.22694742679595947, + "loss_ce": 0.005511872004717588, + "loss_iou": 0.443359375, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 91516156, + "step": 533 + }, + { + "epoch": 0.1404616295127244, + "grad_norm": 26.674816384255838, + "learning_rate": 5e-06, + "loss": 0.2705, + "num_input_tokens_seen": 91685124, + "step": 534 + }, + { + "epoch": 0.1404616295127244, + "loss": 0.2157442569732666, + "loss_ce": 0.005356077570468187, + "loss_iou": 0.52734375, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 91685124, + "step": 534 + }, + { + "epoch": 0.14072466627211153, + "grad_norm": 7.992225607802382, + "learning_rate": 5e-06, + "loss": 0.2194, + "num_input_tokens_seen": 91857436, + "step": 535 + }, + { + "epoch": 0.14072466627211153, + "loss": 0.18514756858348846, + "loss_ce": 0.0036290136631578207, + "loss_iou": 0.625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 91857436, + "step": 535 + }, + { + "epoch": 0.14098770303149866, + "grad_norm": 7.005269973220872, + "learning_rate": 5e-06, + "loss": 0.2265, + "num_input_tokens_seen": 92029236, + "step": 536 + }, + { + "epoch": 0.14098770303149866, + "loss": 0.23437106609344482, + "loss_ce": 0.0035971456672996283, + "loss_iou": 0.6640625, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 92029236, + "step": 536 + }, + { + "epoch": 0.1412507397908858, + "grad_norm": 13.679910251313718, + "learning_rate": 5e-06, + "loss": 0.2237, + "num_input_tokens_seen": 92201248, + "step": 537 + }, + { + "epoch": 0.1412507397908858, + "loss": 0.26536300778388977, + "loss_ce": 0.006451865192502737, + "loss_iou": 0.625, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 92201248, + "step": 537 + }, + { + "epoch": 0.1415137765502729, + "grad_norm": 15.448137214976848, + "learning_rate": 5e-06, + "loss": 0.2463, + "num_input_tokens_seen": 92373276, + "step": 538 + }, + { + "epoch": 0.1415137765502729, + "loss": 0.2220609188079834, + "loss_ce": 0.0029447050765156746, + "loss_iou": 0.69140625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 92373276, + "step": 538 + }, + { + "epoch": 0.14177681330966002, + "grad_norm": 11.236165761153213, + "learning_rate": 5e-06, + "loss": 0.2009, + "num_input_tokens_seen": 92545880, + "step": 539 + }, + { + "epoch": 0.14177681330966002, + "loss": 0.21155700087547302, + "loss_ce": 0.006112661678344011, + "loss_iou": 0.3515625, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 92545880, + "step": 539 + }, + { + "epoch": 0.14203985006904715, + "grad_norm": 7.303097321727043, + "learning_rate": 5e-06, + "loss": 0.2029, + "num_input_tokens_seen": 92717956, + "step": 540 + }, + { + "epoch": 0.14203985006904715, + "loss": 0.2111251950263977, + "loss_ce": 0.0041549778543412685, + "loss_iou": 0.37109375, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 92717956, + "step": 540 + }, + { + "epoch": 0.14230288682843428, + "grad_norm": 28.317923328050057, + "learning_rate": 5e-06, + "loss": 0.1946, + "num_input_tokens_seen": 92890260, + "step": 541 + }, + { + "epoch": 0.14230288682843428, + "loss": 0.18131288886070251, + "loss_ce": 0.004371959716081619, + "loss_iou": 0.498046875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 92890260, + "step": 541 + }, + { + "epoch": 0.14256592358782139, + "grad_norm": 12.97806529461824, + "learning_rate": 5e-06, + "loss": 0.2534, + "num_input_tokens_seen": 93062192, + "step": 542 + }, + { + "epoch": 0.14256592358782139, + "loss": 0.30919933319091797, + "loss_ce": 0.003779401071369648, + "loss_iou": 0.640625, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 93062192, + "step": 542 + }, + { + "epoch": 0.14282896034720852, + "grad_norm": 7.8709246328059725, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 93234480, + "step": 543 + }, + { + "epoch": 0.14282896034720852, + "loss": 0.2343926727771759, + "loss_ce": 0.002581145381554961, + "loss_iou": 0.40234375, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 93234480, + "step": 543 + }, + { + "epoch": 0.14309199710659565, + "grad_norm": 8.436976597382053, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 93406784, + "step": 544 + }, + { + "epoch": 0.14309199710659565, + "loss": 0.20959031581878662, + "loss_ce": 0.0013383585028350353, + "loss_iou": 0.4453125, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 93406784, + "step": 544 + }, + { + "epoch": 0.14335503386598278, + "grad_norm": 12.053836172217155, + "learning_rate": 5e-06, + "loss": 0.2433, + "num_input_tokens_seen": 93577272, + "step": 545 + }, + { + "epoch": 0.14335503386598278, + "loss": 0.18772834539413452, + "loss_ce": 0.002242510672658682, + "loss_iou": 0.52734375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 93577272, + "step": 545 + }, + { + "epoch": 0.1436180706253699, + "grad_norm": 9.099796427619822, + "learning_rate": 5e-06, + "loss": 0.1713, + "num_input_tokens_seen": 93749292, + "step": 546 + }, + { + "epoch": 0.1436180706253699, + "loss": 0.2121119648218155, + "loss_ce": 0.00813247635960579, + "loss_iou": 0.671875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 93749292, + "step": 546 + }, + { + "epoch": 0.143881107384757, + "grad_norm": 13.08678717007557, + "learning_rate": 5e-06, + "loss": 0.2241, + "num_input_tokens_seen": 93921812, + "step": 547 + }, + { + "epoch": 0.143881107384757, + "loss": 0.22192896902561188, + "loss_ce": 0.002934828167781234, + "loss_iou": 0.38671875, + "loss_num": 0.0439453125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 93921812, + "step": 547 + }, + { + "epoch": 0.14414414414414414, + "grad_norm": 8.230228011363112, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 94093976, + "step": 548 + }, + { + "epoch": 0.14414414414414414, + "loss": 0.21945567429065704, + "loss_ce": 0.0016822349280118942, + "loss_iou": 0.45703125, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 94093976, + "step": 548 + }, + { + "epoch": 0.14440718090353127, + "grad_norm": 6.524669746029216, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 94262972, + "step": 549 + }, + { + "epoch": 0.14440718090353127, + "loss": 0.13413353264331818, + "loss_ce": 0.0032741604372859, + "loss_iou": 0.53515625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 94262972, + "step": 549 + }, + { + "epoch": 0.1446702176629184, + "grad_norm": 7.614359717596038, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 94435240, + "step": 550 + }, + { + "epoch": 0.1446702176629184, + "loss": 0.22648407518863678, + "loss_ce": 0.0034616070333868265, + "loss_iou": 0.427734375, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 94435240, + "step": 550 + }, + { + "epoch": 0.1449332544223055, + "grad_norm": 7.224416897933664, + "learning_rate": 5e-06, + "loss": 0.1771, + "num_input_tokens_seen": 94607488, + "step": 551 + }, + { + "epoch": 0.1449332544223055, + "loss": 0.18614572286605835, + "loss_ce": 0.0017585159512236714, + "loss_iou": 0.6640625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 94607488, + "step": 551 + }, + { + "epoch": 0.14519629118169264, + "grad_norm": 7.882321147436634, + "learning_rate": 5e-06, + "loss": 0.1957, + "num_input_tokens_seen": 94776340, + "step": 552 + }, + { + "epoch": 0.14519629118169264, + "loss": 0.22142915427684784, + "loss_ce": 0.003350543323904276, + "loss_iou": 0.53125, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 94776340, + "step": 552 + }, + { + "epoch": 0.14545932794107977, + "grad_norm": 8.491750307418846, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 94948568, + "step": 553 + }, + { + "epoch": 0.14545932794107977, + "loss": 0.17424368858337402, + "loss_ce": 0.0024297323543578386, + "loss_iou": 0.578125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 94948568, + "step": 553 + }, + { + "epoch": 0.1457223647004669, + "grad_norm": 9.839418631011416, + "learning_rate": 5e-06, + "loss": 0.2278, + "num_input_tokens_seen": 95120868, + "step": 554 + }, + { + "epoch": 0.1457223647004669, + "loss": 0.23207631707191467, + "loss_ce": 0.001241360092535615, + "loss_iou": 0.5703125, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 95120868, + "step": 554 + }, + { + "epoch": 0.145985401459854, + "grad_norm": 10.298790259293808, + "learning_rate": 5e-06, + "loss": 0.2316, + "num_input_tokens_seen": 95293380, + "step": 555 + }, + { + "epoch": 0.145985401459854, + "loss": 0.2527633607387543, + "loss_ce": 0.0020309346728026867, + "loss_iou": 0.40234375, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 95293380, + "step": 555 + }, + { + "epoch": 0.14624843821924113, + "grad_norm": 11.750914646944318, + "learning_rate": 5e-06, + "loss": 0.1353, + "num_input_tokens_seen": 95465572, + "step": 556 + }, + { + "epoch": 0.14624843821924113, + "loss": 0.14519110321998596, + "loss_ce": 0.0012702068779617548, + "loss_iou": 0.478515625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 95465572, + "step": 556 + }, + { + "epoch": 0.14651147497862826, + "grad_norm": 11.674444875910362, + "learning_rate": 5e-06, + "loss": 0.248, + "num_input_tokens_seen": 95636384, + "step": 557 + }, + { + "epoch": 0.14651147497862826, + "loss": 0.2826082706451416, + "loss_ce": 0.0019686208106577396, + "loss_iou": 0.5078125, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 95636384, + "step": 557 + }, + { + "epoch": 0.1467745117380154, + "grad_norm": 9.602535161901319, + "learning_rate": 5e-06, + "loss": 0.2109, + "num_input_tokens_seen": 95808820, + "step": 558 + }, + { + "epoch": 0.1467745117380154, + "loss": 0.1991540640592575, + "loss_ce": 0.0036584637127816677, + "loss_iou": 0.51953125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 95808820, + "step": 558 + }, + { + "epoch": 0.1470375484974025, + "grad_norm": 8.512482701290667, + "learning_rate": 5e-06, + "loss": 0.2626, + "num_input_tokens_seen": 95980876, + "step": 559 + }, + { + "epoch": 0.1470375484974025, + "loss": 0.3269794285297394, + "loss_ce": 0.0010516871698200703, + "loss_iou": 0.3203125, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 95980876, + "step": 559 + }, + { + "epoch": 0.14730058525678963, + "grad_norm": 8.300386324136479, + "learning_rate": 5e-06, + "loss": 0.178, + "num_input_tokens_seen": 96153036, + "step": 560 + }, + { + "epoch": 0.14730058525678963, + "loss": 0.2191367745399475, + "loss_ce": 0.006795480381697416, + "loss_iou": 0.5625, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 96153036, + "step": 560 + }, + { + "epoch": 0.14756362201617676, + "grad_norm": 5.775876970805374, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 96322532, + "step": 561 + }, + { + "epoch": 0.14756362201617676, + "loss": 0.1977005898952484, + "loss_ce": 0.006996248383074999, + "loss_iou": 0.625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 96322532, + "step": 561 + }, + { + "epoch": 0.1478266587755639, + "grad_norm": 6.533356383107408, + "learning_rate": 5e-06, + "loss": 0.2025, + "num_input_tokens_seen": 96494568, + "step": 562 + }, + { + "epoch": 0.1478266587755639, + "loss": 0.15407304465770721, + "loss_ce": 0.0012410087510943413, + "loss_iou": 0.62890625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 96494568, + "step": 562 + }, + { + "epoch": 0.14808969553495102, + "grad_norm": 9.772718457216582, + "learning_rate": 5e-06, + "loss": 0.2334, + "num_input_tokens_seen": 96666892, + "step": 563 + }, + { + "epoch": 0.14808969553495102, + "loss": 0.18993595242500305, + "loss_ce": 0.002985279308632016, + "loss_iou": 0.75, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 96666892, + "step": 563 + }, + { + "epoch": 0.14835273229433812, + "grad_norm": 10.172183960721854, + "learning_rate": 5e-06, + "loss": 0.2474, + "num_input_tokens_seen": 96837620, + "step": 564 + }, + { + "epoch": 0.14835273229433812, + "loss": 0.20410403609275818, + "loss_ce": 0.004641146864742041, + "loss_iou": 0.5, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 96837620, + "step": 564 + }, + { + "epoch": 0.14861576905372526, + "grad_norm": 9.445540103601473, + "learning_rate": 5e-06, + "loss": 0.2275, + "num_input_tokens_seen": 97009692, + "step": 565 + }, + { + "epoch": 0.14861576905372526, + "loss": 0.13070067763328552, + "loss_ce": 0.0018554661655798554, + "loss_iou": 0.6015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 97009692, + "step": 565 + }, + { + "epoch": 0.14887880581311239, + "grad_norm": 7.521927615990519, + "learning_rate": 5e-06, + "loss": 0.2008, + "num_input_tokens_seen": 97182076, + "step": 566 + }, + { + "epoch": 0.14887880581311239, + "loss": 0.26979702711105347, + "loss_ce": 0.009176918305456638, + "loss_iou": 0.60546875, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 97182076, + "step": 566 + }, + { + "epoch": 0.14914184257249952, + "grad_norm": 6.945071393253576, + "learning_rate": 5e-06, + "loss": 0.2175, + "num_input_tokens_seen": 97352348, + "step": 567 + }, + { + "epoch": 0.14914184257249952, + "loss": 0.2486119419336319, + "loss_ce": 0.004898556973785162, + "loss_iou": 0.5078125, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 97352348, + "step": 567 + }, + { + "epoch": 0.14940487933188662, + "grad_norm": 19.761163824718725, + "learning_rate": 5e-06, + "loss": 0.2266, + "num_input_tokens_seen": 97524808, + "step": 568 + }, + { + "epoch": 0.14940487933188662, + "loss": 0.2611408531665802, + "loss_ce": 0.0016193758929148316, + "loss_iou": 0.43359375, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 97524808, + "step": 568 + }, + { + "epoch": 0.14966791609127375, + "grad_norm": 11.05044000141412, + "learning_rate": 5e-06, + "loss": 0.2071, + "num_input_tokens_seen": 97696860, + "step": 569 + }, + { + "epoch": 0.14966791609127375, + "loss": 0.17859557271003723, + "loss_ce": 0.002020859392359853, + "loss_iou": 0.6796875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 97696860, + "step": 569 + }, + { + "epoch": 0.14993095285066088, + "grad_norm": 10.09349160636077, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 97869048, + "step": 570 + }, + { + "epoch": 0.14993095285066088, + "loss": 0.1531415581703186, + "loss_ce": 0.001164011424407363, + "loss_iou": 0.66796875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 97869048, + "step": 570 + }, + { + "epoch": 0.150193989610048, + "grad_norm": 6.128271963212333, + "learning_rate": 5e-06, + "loss": 0.1797, + "num_input_tokens_seen": 98041152, + "step": 571 + }, + { + "epoch": 0.150193989610048, + "loss": 0.18703126907348633, + "loss_ce": 0.003956317901611328, + "loss_iou": 0.376953125, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 98041152, + "step": 571 + }, + { + "epoch": 0.15045702636943512, + "grad_norm": 9.404399204091265, + "learning_rate": 5e-06, + "loss": 0.1789, + "num_input_tokens_seen": 98213508, + "step": 572 + }, + { + "epoch": 0.15045702636943512, + "loss": 0.1673622578382492, + "loss_ce": 0.011722613126039505, + "loss_iou": 0.60546875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 98213508, + "step": 572 + }, + { + "epoch": 0.15072006312882225, + "grad_norm": 8.281687271797518, + "learning_rate": 5e-06, + "loss": 0.2554, + "num_input_tokens_seen": 98385768, + "step": 573 + }, + { + "epoch": 0.15072006312882225, + "loss": 0.29049456119537354, + "loss_ce": 0.007413491606712341, + "loss_iou": 0.5703125, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 98385768, + "step": 573 + }, + { + "epoch": 0.15098309988820938, + "grad_norm": 9.329556868568911, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 98555404, + "step": 574 + }, + { + "epoch": 0.15098309988820938, + "loss": 0.17510266602039337, + "loss_ce": 0.006096326746046543, + "loss_iou": 0.6015625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 98555404, + "step": 574 + }, + { + "epoch": 0.1512461366475965, + "grad_norm": 6.666929195510284, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 98727452, + "step": 575 + }, + { + "epoch": 0.1512461366475965, + "loss": 0.2249300628900528, + "loss_ce": 0.008560429327189922, + "loss_iou": 0.400390625, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 98727452, + "step": 575 + }, + { + "epoch": 0.15150917340698364, + "grad_norm": 12.576347626904536, + "learning_rate": 5e-06, + "loss": 0.2151, + "num_input_tokens_seen": 98899788, + "step": 576 + }, + { + "epoch": 0.15150917340698364, + "loss": 0.21397414803504944, + "loss_ce": 0.004837184213101864, + "loss_iou": 0.60546875, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 98899788, + "step": 576 + }, + { + "epoch": 0.15177221016637074, + "grad_norm": 9.891929331498387, + "learning_rate": 5e-06, + "loss": 0.2031, + "num_input_tokens_seen": 99072416, + "step": 577 + }, + { + "epoch": 0.15177221016637074, + "loss": 0.24624097347259521, + "loss_ce": 0.011255611665546894, + "loss_iou": 0.546875, + "loss_num": 0.046875, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 99072416, + "step": 577 + }, + { + "epoch": 0.15203524692575787, + "grad_norm": 12.530200633920343, + "learning_rate": 5e-06, + "loss": 0.2216, + "num_input_tokens_seen": 99244280, + "step": 578 + }, + { + "epoch": 0.15203524692575787, + "loss": 0.19744133949279785, + "loss_ce": 0.001030205050483346, + "loss_iou": 0.671875, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 99244280, + "step": 578 + }, + { + "epoch": 0.152298283685145, + "grad_norm": 10.630444163918117, + "learning_rate": 5e-06, + "loss": 0.2121, + "num_input_tokens_seen": 99416184, + "step": 579 + }, + { + "epoch": 0.152298283685145, + "loss": 0.2667901813983917, + "loss_ce": 0.0020807269029319286, + "loss_iou": 0.40234375, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 99416184, + "step": 579 + }, + { + "epoch": 0.15256132044453213, + "grad_norm": 10.945427939330838, + "learning_rate": 5e-06, + "loss": 0.1943, + "num_input_tokens_seen": 99588500, + "step": 580 + }, + { + "epoch": 0.15256132044453213, + "loss": 0.19293344020843506, + "loss_ce": 0.005006188526749611, + "loss_iou": 0.4921875, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 99588500, + "step": 580 + }, + { + "epoch": 0.15282435720391924, + "grad_norm": 10.349037140854193, + "learning_rate": 5e-06, + "loss": 0.1856, + "num_input_tokens_seen": 99760948, + "step": 581 + }, + { + "epoch": 0.15282435720391924, + "loss": 0.1601850688457489, + "loss_ce": 0.0018598883179947734, + "loss_iou": 0.55859375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 99760948, + "step": 581 + }, + { + "epoch": 0.15308739396330637, + "grad_norm": 13.119978211180065, + "learning_rate": 5e-06, + "loss": 0.1813, + "num_input_tokens_seen": 99932992, + "step": 582 + }, + { + "epoch": 0.15308739396330637, + "loss": 0.24380066990852356, + "loss_ce": 0.002528695622459054, + "loss_iou": 0.7109375, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 99932992, + "step": 582 + }, + { + "epoch": 0.1533504307226935, + "grad_norm": 8.165624923829593, + "learning_rate": 5e-06, + "loss": 0.1637, + "num_input_tokens_seen": 100104992, + "step": 583 + }, + { + "epoch": 0.1533504307226935, + "loss": 0.18439146876335144, + "loss_ce": 0.0024456623941659927, + "loss_iou": 0.6640625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 100104992, + "step": 583 + }, + { + "epoch": 0.15361346748208063, + "grad_norm": 9.086411500582741, + "learning_rate": 5e-06, + "loss": 0.1853, + "num_input_tokens_seen": 100277144, + "step": 584 + }, + { + "epoch": 0.15361346748208063, + "loss": 0.17580674588680267, + "loss_ce": 0.0032603610306978226, + "loss_iou": 0.4296875, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 100277144, + "step": 584 + }, + { + "epoch": 0.15387650424146773, + "grad_norm": 11.772959826434088, + "learning_rate": 5e-06, + "loss": 0.2266, + "num_input_tokens_seen": 100449660, + "step": 585 + }, + { + "epoch": 0.15387650424146773, + "loss": 0.2301916778087616, + "loss_ce": 0.0029577831737697124, + "loss_iou": 0.482421875, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 100449660, + "step": 585 + }, + { + "epoch": 0.15413954100085486, + "grad_norm": 8.411861716804005, + "learning_rate": 5e-06, + "loss": 0.2131, + "num_input_tokens_seen": 100622028, + "step": 586 + }, + { + "epoch": 0.15413954100085486, + "loss": 0.183104008436203, + "loss_ce": 0.0029892674647271633, + "loss_iou": 0.6015625, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 100622028, + "step": 586 + }, + { + "epoch": 0.154402577760242, + "grad_norm": 8.219849696412924, + "learning_rate": 5e-06, + "loss": 0.2095, + "num_input_tokens_seen": 100792792, + "step": 587 + }, + { + "epoch": 0.154402577760242, + "loss": 0.23411154747009277, + "loss_ce": 0.0037648691795766354, + "loss_iou": 0.458984375, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 100792792, + "step": 587 + }, + { + "epoch": 0.15466561451962912, + "grad_norm": 7.806947443925601, + "learning_rate": 5e-06, + "loss": 0.1858, + "num_input_tokens_seen": 100964900, + "step": 588 + }, + { + "epoch": 0.15466561451962912, + "loss": 0.15801170468330383, + "loss_ce": 0.002433100016787648, + "loss_iou": 0.59765625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 100964900, + "step": 588 + }, + { + "epoch": 0.15492865127901626, + "grad_norm": 9.15652521254843, + "learning_rate": 5e-06, + "loss": 0.1918, + "num_input_tokens_seen": 101137068, + "step": 589 + }, + { + "epoch": 0.15492865127901626, + "loss": 0.14395104348659515, + "loss_ce": 0.0018611999694257975, + "loss_iou": 0.62109375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 101137068, + "step": 589 + }, + { + "epoch": 0.15519168803840336, + "grad_norm": 18.2857885682754, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 101309424, + "step": 590 + }, + { + "epoch": 0.15519168803840336, + "loss": 0.19831930100917816, + "loss_ce": 0.0009926356142386794, + "loss_iou": 0.546875, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 101309424, + "step": 590 + }, + { + "epoch": 0.1554547247977905, + "grad_norm": 7.944412820463009, + "learning_rate": 5e-06, + "loss": 0.205, + "num_input_tokens_seen": 101478352, + "step": 591 + }, + { + "epoch": 0.1554547247977905, + "loss": 0.21606285870075226, + "loss_ce": 0.00683434447273612, + "loss_iou": 0.431640625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 101478352, + "step": 591 + }, + { + "epoch": 0.15571776155717762, + "grad_norm": 6.618575384086146, + "learning_rate": 5e-06, + "loss": 0.1821, + "num_input_tokens_seen": 101650324, + "step": 592 + }, + { + "epoch": 0.15571776155717762, + "loss": 0.20387296378612518, + "loss_ce": 0.0040438538417220116, + "loss_iou": 0.4765625, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 101650324, + "step": 592 + }, + { + "epoch": 0.15598079831656475, + "grad_norm": 13.644949403576716, + "learning_rate": 5e-06, + "loss": 0.1791, + "num_input_tokens_seen": 101822564, + "step": 593 + }, + { + "epoch": 0.15598079831656475, + "loss": 0.12713779509067535, + "loss_ce": 0.004823335446417332, + "loss_iou": 0.609375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 101822564, + "step": 593 + }, + { + "epoch": 0.15624383507595185, + "grad_norm": 11.597792275294081, + "learning_rate": 5e-06, + "loss": 0.2129, + "num_input_tokens_seen": 101994900, + "step": 594 + }, + { + "epoch": 0.15624383507595185, + "loss": 0.17677326500415802, + "loss_ce": 0.0048372335731983185, + "loss_iou": 0.68359375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 101994900, + "step": 594 + }, + { + "epoch": 0.15650687183533898, + "grad_norm": 7.4047027755949175, + "learning_rate": 5e-06, + "loss": 0.1867, + "num_input_tokens_seen": 102165488, + "step": 595 + }, + { + "epoch": 0.15650687183533898, + "loss": 0.1512867510318756, + "loss_ce": 0.0011402517557144165, + "loss_iou": 0.6015625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 102165488, + "step": 595 + }, + { + "epoch": 0.15676990859472612, + "grad_norm": 7.866562092019479, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 102337576, + "step": 596 + }, + { + "epoch": 0.15676990859472612, + "loss": 0.11417586356401443, + "loss_ce": 0.0016270325286313891, + "loss_iou": 0.55078125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 102337576, + "step": 596 + }, + { + "epoch": 0.15703294535411325, + "grad_norm": 14.250796858508032, + "learning_rate": 5e-06, + "loss": 0.2038, + "num_input_tokens_seen": 102509976, + "step": 597 + }, + { + "epoch": 0.15703294535411325, + "loss": 0.20375049114227295, + "loss_ce": 0.003677244298160076, + "loss_iou": 0.53125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 102509976, + "step": 597 + }, + { + "epoch": 0.15729598211350035, + "grad_norm": 5.485478227776666, + "learning_rate": 5e-06, + "loss": 0.1839, + "num_input_tokens_seen": 102682292, + "step": 598 + }, + { + "epoch": 0.15729598211350035, + "loss": 0.1260145604610443, + "loss_ce": 0.002143711317330599, + "loss_iou": 0.65625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 102682292, + "step": 598 + }, + { + "epoch": 0.15755901887288748, + "grad_norm": 16.90449659888107, + "learning_rate": 5e-06, + "loss": 0.1869, + "num_input_tokens_seen": 102854396, + "step": 599 + }, + { + "epoch": 0.15755901887288748, + "loss": 0.18277683854103088, + "loss_ce": 0.002356911078095436, + "loss_iou": 0.46484375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 102854396, + "step": 599 + }, + { + "epoch": 0.1578220556322746, + "grad_norm": 8.022384772643738, + "learning_rate": 5e-06, + "loss": 0.2261, + "num_input_tokens_seen": 103024668, + "step": 600 + }, + { + "epoch": 0.1578220556322746, + "loss": 0.2725946605205536, + "loss_ce": 0.0058710225857794285, + "loss_iou": 0.63671875, + "loss_num": 0.05322265625, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 103024668, + "step": 600 + }, + { + "epoch": 0.15808509239166174, + "grad_norm": 6.502303135299271, + "learning_rate": 5e-06, + "loss": 0.2163, + "num_input_tokens_seen": 103195160, + "step": 601 + }, + { + "epoch": 0.15808509239166174, + "loss": 0.16184811294078827, + "loss_ce": 0.003950160928070545, + "loss_iou": 0.6015625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 103195160, + "step": 601 + }, + { + "epoch": 0.15834812915104887, + "grad_norm": 11.6613104395809, + "learning_rate": 5e-06, + "loss": 0.1783, + "num_input_tokens_seen": 103365588, + "step": 602 + }, + { + "epoch": 0.15834812915104887, + "loss": 0.27217578887939453, + "loss_ce": 0.006367700640112162, + "loss_iou": 0.62109375, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 103365588, + "step": 602 + }, + { + "epoch": 0.15861116591043598, + "grad_norm": 6.846414395262611, + "learning_rate": 5e-06, + "loss": 0.2057, + "num_input_tokens_seen": 103537444, + "step": 603 + }, + { + "epoch": 0.15861116591043598, + "loss": 0.24833053350448608, + "loss_ce": 0.004678180906921625, + "loss_iou": 0.625, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 103537444, + "step": 603 + }, + { + "epoch": 0.1588742026698231, + "grad_norm": 5.663069385724008, + "learning_rate": 5e-06, + "loss": 0.1744, + "num_input_tokens_seen": 103709932, + "step": 604 + }, + { + "epoch": 0.1588742026698231, + "loss": 0.19428852200508118, + "loss_ce": 0.000929144793190062, + "loss_iou": 0.51953125, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 103709932, + "step": 604 + }, + { + "epoch": 0.15913723942921024, + "grad_norm": 6.358230492219544, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 103882084, + "step": 605 + }, + { + "epoch": 0.15913723942921024, + "loss": 0.19055569171905518, + "loss_ce": 0.005802282597869635, + "loss_iou": 0.51953125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 103882084, + "step": 605 + }, + { + "epoch": 0.15940027618859737, + "grad_norm": 8.229813023952058, + "learning_rate": 5e-06, + "loss": 0.194, + "num_input_tokens_seen": 104054236, + "step": 606 + }, + { + "epoch": 0.15940027618859737, + "loss": 0.14631760120391846, + "loss_ce": 0.0008098002290353179, + "loss_iou": 0.45703125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 104054236, + "step": 606 + }, + { + "epoch": 0.15966331294798447, + "grad_norm": 8.649522876210598, + "learning_rate": 5e-06, + "loss": 0.1982, + "num_input_tokens_seen": 104224580, + "step": 607 + }, + { + "epoch": 0.15966331294798447, + "loss": 0.2813430428504944, + "loss_ce": 0.0039382753893733025, + "loss_iou": 0.51171875, + "loss_num": 0.0556640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 104224580, + "step": 607 + }, + { + "epoch": 0.1599263497073716, + "grad_norm": 9.061064934100147, + "learning_rate": 5e-06, + "loss": 0.2389, + "num_input_tokens_seen": 104396656, + "step": 608 + }, + { + "epoch": 0.1599263497073716, + "loss": 0.25585615634918213, + "loss_ce": 0.005581488832831383, + "loss_iou": 0.3515625, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 104396656, + "step": 608 + }, + { + "epoch": 0.16018938646675873, + "grad_norm": 10.782439869738488, + "learning_rate": 5e-06, + "loss": 0.212, + "num_input_tokens_seen": 104568804, + "step": 609 + }, + { + "epoch": 0.16018938646675873, + "loss": 0.22171396017074585, + "loss_ce": 0.0010718655539676547, + "loss_iou": 0.6640625, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 104568804, + "step": 609 + }, + { + "epoch": 0.16045242322614586, + "grad_norm": 7.912260836349583, + "learning_rate": 5e-06, + "loss": 0.2335, + "num_input_tokens_seen": 104740976, + "step": 610 + }, + { + "epoch": 0.16045242322614586, + "loss": 0.20210719108581543, + "loss_ce": 0.002094991272315383, + "loss_iou": 0.60546875, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 104740976, + "step": 610 + }, + { + "epoch": 0.16071545998553297, + "grad_norm": 7.072100604468161, + "learning_rate": 5e-06, + "loss": 0.1877, + "num_input_tokens_seen": 104913120, + "step": 611 + }, + { + "epoch": 0.16071545998553297, + "loss": 0.24669376015663147, + "loss_ce": 0.005971122998744249, + "loss_iou": 0.451171875, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 104913120, + "step": 611 + }, + { + "epoch": 0.1609784967449201, + "grad_norm": 6.775986925165555, + "learning_rate": 5e-06, + "loss": 0.2033, + "num_input_tokens_seen": 105085268, + "step": 612 + }, + { + "epoch": 0.1609784967449201, + "loss": 0.17514100670814514, + "loss_ce": 0.005036028102040291, + "loss_iou": 0.63671875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 105085268, + "step": 612 + }, + { + "epoch": 0.16124153350430723, + "grad_norm": 13.436082189650097, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 105255536, + "step": 613 + }, + { + "epoch": 0.16124153350430723, + "loss": 0.1662948876619339, + "loss_ce": 0.004856906831264496, + "loss_iou": 0.56640625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 105255536, + "step": 613 + }, + { + "epoch": 0.16150457026369436, + "grad_norm": 15.212823710443937, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 105426276, + "step": 614 + }, + { + "epoch": 0.16150457026369436, + "loss": 0.21024103462696075, + "loss_ce": 0.000707346829585731, + "loss_iou": 0.349609375, + "loss_num": 0.041748046875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 105426276, + "step": 614 + }, + { + "epoch": 0.1617676070230815, + "grad_norm": 5.811817664832679, + "learning_rate": 5e-06, + "loss": 0.1759, + "num_input_tokens_seen": 105598528, + "step": 615 + }, + { + "epoch": 0.1617676070230815, + "loss": 0.17331555485725403, + "loss_ce": 0.0019593401812016964, + "loss_iou": 0.55078125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 105598528, + "step": 615 + }, + { + "epoch": 0.1620306437824686, + "grad_norm": 7.929569482892307, + "learning_rate": 5e-06, + "loss": 0.1901, + "num_input_tokens_seen": 105768684, + "step": 616 + }, + { + "epoch": 0.1620306437824686, + "loss": 0.22191157937049866, + "loss_ce": 0.0013305249158293009, + "loss_iou": 0.470703125, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 105768684, + "step": 616 + }, + { + "epoch": 0.16229368054185572, + "grad_norm": 9.722668759038529, + "learning_rate": 5e-06, + "loss": 0.238, + "num_input_tokens_seen": 105938984, + "step": 617 + }, + { + "epoch": 0.16229368054185572, + "loss": 0.2918306887149811, + "loss_ce": 0.004660272039473057, + "loss_iou": 0.76953125, + "loss_num": 0.057373046875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 105938984, + "step": 617 + }, + { + "epoch": 0.16255671730124285, + "grad_norm": 8.495376084254536, + "learning_rate": 5e-06, + "loss": 0.2003, + "num_input_tokens_seen": 106111216, + "step": 618 + }, + { + "epoch": 0.16255671730124285, + "loss": 0.13662417232990265, + "loss_ce": 0.0008209550869651139, + "loss_iou": 0.46875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 106111216, + "step": 618 + }, + { + "epoch": 0.16281975406062998, + "grad_norm": 10.294809858036002, + "learning_rate": 5e-06, + "loss": 0.2276, + "num_input_tokens_seen": 106283444, + "step": 619 + }, + { + "epoch": 0.16281975406062998, + "loss": 0.23945499956607819, + "loss_ce": 0.0016620358219370246, + "loss_iou": 0.55859375, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 106283444, + "step": 619 + }, + { + "epoch": 0.1630827908200171, + "grad_norm": 8.591329712303546, + "learning_rate": 5e-06, + "loss": 0.1832, + "num_input_tokens_seen": 106455556, + "step": 620 + }, + { + "epoch": 0.1630827908200171, + "loss": 0.24216794967651367, + "loss_ce": 0.004680164158344269, + "loss_iou": 0.703125, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 106455556, + "step": 620 + }, + { + "epoch": 0.16334582757940422, + "grad_norm": 11.580746408707443, + "learning_rate": 5e-06, + "loss": 0.222, + "num_input_tokens_seen": 106625564, + "step": 621 + }, + { + "epoch": 0.16334582757940422, + "loss": 0.27062344551086426, + "loss_ce": 0.0013363163452595472, + "loss_iou": 0.625, + "loss_num": 0.0537109375, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 106625564, + "step": 621 + }, + { + "epoch": 0.16360886433879135, + "grad_norm": 9.075656365353165, + "learning_rate": 5e-06, + "loss": 0.2241, + "num_input_tokens_seen": 106797500, + "step": 622 + }, + { + "epoch": 0.16360886433879135, + "loss": 0.25023964047431946, + "loss_ce": 0.002986219245940447, + "loss_iou": 0.66015625, + "loss_num": 0.049560546875, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 106797500, + "step": 622 + }, + { + "epoch": 0.16387190109817848, + "grad_norm": 7.173557460385501, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 106969624, + "step": 623 + }, + { + "epoch": 0.16387190109817848, + "loss": 0.15543386340141296, + "loss_ce": 0.004310814663767815, + "loss_iou": 0.51171875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 106969624, + "step": 623 + }, + { + "epoch": 0.16413493785756558, + "grad_norm": 7.146771956956073, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 107139936, + "step": 624 + }, + { + "epoch": 0.16413493785756558, + "loss": 0.2210107445716858, + "loss_ce": 0.0020776439923793077, + "loss_iou": 0.376953125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 107139936, + "step": 624 + }, + { + "epoch": 0.1643979746169527, + "grad_norm": 7.784232141951156, + "learning_rate": 5e-06, + "loss": 0.1852, + "num_input_tokens_seen": 107312016, + "step": 625 + }, + { + "epoch": 0.1643979746169527, + "loss": 0.16655078530311584, + "loss_ce": 0.00200000312179327, + "loss_iou": 0.5078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 107312016, + "step": 625 + }, + { + "epoch": 0.16466101137633984, + "grad_norm": 9.666585010057217, + "learning_rate": 5e-06, + "loss": 0.2351, + "num_input_tokens_seen": 107484484, + "step": 626 + }, + { + "epoch": 0.16466101137633984, + "loss": 0.17668788135051727, + "loss_ce": 0.0009676595800556242, + "loss_iou": 0.70703125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 107484484, + "step": 626 + }, + { + "epoch": 0.16492404813572697, + "grad_norm": 15.599284961255806, + "learning_rate": 5e-06, + "loss": 0.1943, + "num_input_tokens_seen": 107656148, + "step": 627 + }, + { + "epoch": 0.16492404813572697, + "loss": 0.2105821818113327, + "loss_ce": 0.0011095235822722316, + "loss_iou": 0.5, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 107656148, + "step": 627 + }, + { + "epoch": 0.1651870848951141, + "grad_norm": 8.248204010218137, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 107828348, + "step": 628 + }, + { + "epoch": 0.1651870848951141, + "loss": 0.1219111904501915, + "loss_ce": 0.009545465931296349, + "loss_iou": 0.609375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 107828348, + "step": 628 + }, + { + "epoch": 0.1654501216545012, + "grad_norm": 10.643869540803188, + "learning_rate": 5e-06, + "loss": 0.191, + "num_input_tokens_seen": 108000340, + "step": 629 + }, + { + "epoch": 0.1654501216545012, + "loss": 0.21501825749874115, + "loss_ce": 0.0015172738349065185, + "loss_iou": 0.6171875, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 108000340, + "step": 629 + }, + { + "epoch": 0.16571315841388834, + "grad_norm": 6.957892002207251, + "learning_rate": 5e-06, + "loss": 0.1723, + "num_input_tokens_seen": 108172648, + "step": 630 + }, + { + "epoch": 0.16571315841388834, + "loss": 0.17599767446517944, + "loss_ce": 0.0010709069902077317, + "loss_iou": 0.4921875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 108172648, + "step": 630 + }, + { + "epoch": 0.16597619517327547, + "grad_norm": 8.642857572938437, + "learning_rate": 5e-06, + "loss": 0.2342, + "num_input_tokens_seen": 108343044, + "step": 631 + }, + { + "epoch": 0.16597619517327547, + "loss": 0.18351054191589355, + "loss_ce": 0.00296853668987751, + "loss_iou": 0.69921875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 108343044, + "step": 631 + }, + { + "epoch": 0.1662392319326626, + "grad_norm": 7.980653444631839, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 108515048, + "step": 632 + }, + { + "epoch": 0.1662392319326626, + "loss": 0.1818259358406067, + "loss_ce": 0.008302995935082436, + "loss_iou": 0.5625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 108515048, + "step": 632 + }, + { + "epoch": 0.1665022686920497, + "grad_norm": 7.668823525125559, + "learning_rate": 5e-06, + "loss": 0.1603, + "num_input_tokens_seen": 108687320, + "step": 633 + }, + { + "epoch": 0.1665022686920497, + "loss": 0.2121184766292572, + "loss_ce": 0.0024016951210796833, + "loss_iou": 0.56640625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 108687320, + "step": 633 + }, + { + "epoch": 0.16676530545143683, + "grad_norm": 8.952178505337718, + "learning_rate": 5e-06, + "loss": 0.2244, + "num_input_tokens_seen": 108859852, + "step": 634 + }, + { + "epoch": 0.16676530545143683, + "loss": 0.24805203080177307, + "loss_ce": 0.004399674944579601, + "loss_iou": 0.59765625, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 108859852, + "step": 634 + }, + { + "epoch": 0.16702834221082397, + "grad_norm": 8.269015730220213, + "learning_rate": 5e-06, + "loss": 0.2335, + "num_input_tokens_seen": 109031824, + "step": 635 + }, + { + "epoch": 0.16702834221082397, + "loss": 0.3403623104095459, + "loss_ce": 0.01095556654036045, + "loss_iou": 0.314453125, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 109031824, + "step": 635 + }, + { + "epoch": 0.1672913789702111, + "grad_norm": 11.588758914669935, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 109204100, + "step": 636 + }, + { + "epoch": 0.1672913789702111, + "loss": 0.25584501028060913, + "loss_ce": 0.007065705489367247, + "loss_iou": 0.578125, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 109204100, + "step": 636 + }, + { + "epoch": 0.1675544157295982, + "grad_norm": 9.273803296838299, + "learning_rate": 5e-06, + "loss": 0.203, + "num_input_tokens_seen": 109376120, + "step": 637 + }, + { + "epoch": 0.1675544157295982, + "loss": 0.24991419911384583, + "loss_ce": 0.0015011176001280546, + "loss_iou": 0.64453125, + "loss_num": 0.0498046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 109376120, + "step": 637 + }, + { + "epoch": 0.16781745248898533, + "grad_norm": 5.629607113211884, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 109548116, + "step": 638 + }, + { + "epoch": 0.16781745248898533, + "loss": 0.22593827545642853, + "loss_ce": 0.005753945559263229, + "loss_iou": 0.470703125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 109548116, + "step": 638 + }, + { + "epoch": 0.16808048924837246, + "grad_norm": 10.827873954115349, + "learning_rate": 5e-06, + "loss": 0.154, + "num_input_tokens_seen": 109720360, + "step": 639 + }, + { + "epoch": 0.16808048924837246, + "loss": 0.10586154460906982, + "loss_ce": 0.0011862462852150202, + "loss_iou": 0.474609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 109720360, + "step": 639 + }, + { + "epoch": 0.1683435260077596, + "grad_norm": 6.979104215801615, + "learning_rate": 5e-06, + "loss": 0.1811, + "num_input_tokens_seen": 109892700, + "step": 640 + }, + { + "epoch": 0.1683435260077596, + "loss": 0.21827656030654907, + "loss_ce": 0.0010524489916861057, + "loss_iou": 0.62109375, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 109892700, + "step": 640 + }, + { + "epoch": 0.1686065627671467, + "grad_norm": 12.485157254065346, + "learning_rate": 5e-06, + "loss": 0.2113, + "num_input_tokens_seen": 110064992, + "step": 641 + }, + { + "epoch": 0.1686065627671467, + "loss": 0.26375001668930054, + "loss_ce": 0.0014819505158811808, + "loss_iou": 0.56640625, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 110064992, + "step": 641 + }, + { + "epoch": 0.16886959952653383, + "grad_norm": 9.158584740167319, + "learning_rate": 5e-06, + "loss": 0.1418, + "num_input_tokens_seen": 110237352, + "step": 642 + }, + { + "epoch": 0.16886959952653383, + "loss": 0.12861773371696472, + "loss_ce": 0.006120163947343826, + "loss_iou": 0.53125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 110237352, + "step": 642 + }, + { + "epoch": 0.16913263628592096, + "grad_norm": 12.50685540012725, + "learning_rate": 5e-06, + "loss": 0.226, + "num_input_tokens_seen": 110409596, + "step": 643 + }, + { + "epoch": 0.16913263628592096, + "loss": 0.29422521591186523, + "loss_ce": 0.006322397850453854, + "loss_iou": 0.6171875, + "loss_num": 0.0576171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 110409596, + "step": 643 + }, + { + "epoch": 0.1693956730453081, + "grad_norm": 8.91219200815607, + "learning_rate": 5e-06, + "loss": 0.1801, + "num_input_tokens_seen": 110581872, + "step": 644 + }, + { + "epoch": 0.1693956730453081, + "loss": 0.19362246990203857, + "loss_ce": 0.0011175863910466433, + "loss_iou": 0.6328125, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 110581872, + "step": 644 + }, + { + "epoch": 0.16965870980469522, + "grad_norm": 6.648045150838095, + "learning_rate": 5e-06, + "loss": 0.1817, + "num_input_tokens_seen": 110754112, + "step": 645 + }, + { + "epoch": 0.16965870980469522, + "loss": 0.22110968828201294, + "loss_ce": 0.0007727851625531912, + "loss_iou": 0.44921875, + "loss_num": 0.0439453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 110754112, + "step": 645 + }, + { + "epoch": 0.16992174656408232, + "grad_norm": 5.685997850783031, + "learning_rate": 5e-06, + "loss": 0.1755, + "num_input_tokens_seen": 110926320, + "step": 646 + }, + { + "epoch": 0.16992174656408232, + "loss": 0.16139136254787445, + "loss_ce": 0.0022421882022172213, + "loss_iou": 0.54296875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 110926320, + "step": 646 + }, + { + "epoch": 0.17018478332346945, + "grad_norm": 5.619576982998229, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 111098480, + "step": 647 + }, + { + "epoch": 0.17018478332346945, + "loss": 0.07204495370388031, + "loss_ce": 0.0005727877141907811, + "loss_iou": 0.484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 111098480, + "step": 647 + }, + { + "epoch": 0.17044782008285658, + "grad_norm": 7.873961542866977, + "learning_rate": 5e-06, + "loss": 0.2025, + "num_input_tokens_seen": 111270660, + "step": 648 + }, + { + "epoch": 0.17044782008285658, + "loss": 0.2468957006931305, + "loss_ce": 0.001473332871682942, + "loss_iou": 0.4453125, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 111270660, + "step": 648 + }, + { + "epoch": 0.1707108568422437, + "grad_norm": 10.015757077433259, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 111442504, + "step": 649 + }, + { + "epoch": 0.1707108568422437, + "loss": 0.14880256354808807, + "loss_ce": 0.0109546585008502, + "loss_iou": 0.52734375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 111442504, + "step": 649 + }, + { + "epoch": 0.17097389360163082, + "grad_norm": 7.899725217893395, + "learning_rate": 5e-06, + "loss": 0.2213, + "num_input_tokens_seen": 111614612, + "step": 650 + }, + { + "epoch": 0.17097389360163082, + "loss": 0.31617793440818787, + "loss_ce": 0.005386924371123314, + "loss_iou": 0.39453125, + "loss_num": 0.062255859375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 111614612, + "step": 650 + }, + { + "epoch": 0.17123693036101795, + "grad_norm": 11.203783050737329, + "learning_rate": 5e-06, + "loss": 0.2011, + "num_input_tokens_seen": 111786832, + "step": 651 + }, + { + "epoch": 0.17123693036101795, + "loss": 0.16059955954551697, + "loss_ce": 0.003617130685597658, + "loss_iou": 0.57421875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 111786832, + "step": 651 + }, + { + "epoch": 0.17149996712040508, + "grad_norm": 10.553074580421196, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 111959112, + "step": 652 + }, + { + "epoch": 0.17149996712040508, + "loss": 0.10537019371986389, + "loss_ce": 0.0024038811679929495, + "loss_iou": 0.40234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 111959112, + "step": 652 + }, + { + "epoch": 0.1717630038797922, + "grad_norm": 7.878994667708907, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 112129472, + "step": 653 + }, + { + "epoch": 0.1717630038797922, + "loss": 0.14291326701641083, + "loss_ce": 0.0017999822739511728, + "loss_iou": 0.50390625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 112129472, + "step": 653 + }, + { + "epoch": 0.1720260406391793, + "grad_norm": 8.502469919546648, + "learning_rate": 5e-06, + "loss": 0.2067, + "num_input_tokens_seen": 112301764, + "step": 654 + }, + { + "epoch": 0.1720260406391793, + "loss": 0.14102406799793243, + "loss_ce": 0.0035118628293275833, + "loss_iou": 0.640625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 112301764, + "step": 654 + }, + { + "epoch": 0.17228907739856644, + "grad_norm": 7.248102860819298, + "learning_rate": 5e-06, + "loss": 0.1947, + "num_input_tokens_seen": 112473728, + "step": 655 + }, + { + "epoch": 0.17228907739856644, + "loss": 0.3125525116920471, + "loss_ce": 0.002799113281071186, + "loss_iou": 0.455078125, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 112473728, + "step": 655 + }, + { + "epoch": 0.17255211415795357, + "grad_norm": 7.514690707459716, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 112646020, + "step": 656 + }, + { + "epoch": 0.17255211415795357, + "loss": 0.19242502748966217, + "loss_ce": 0.0032160417176783085, + "loss_iou": 0.451171875, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 112646020, + "step": 656 + }, + { + "epoch": 0.1728151509173407, + "grad_norm": 12.157018929724885, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 112818344, + "step": 657 + }, + { + "epoch": 0.1728151509173407, + "loss": 0.17007869482040405, + "loss_ce": 0.0014995899982750416, + "loss_iou": 0.63671875, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 112818344, + "step": 657 + }, + { + "epoch": 0.17307818767672783, + "grad_norm": 12.713612456980975, + "learning_rate": 5e-06, + "loss": 0.1854, + "num_input_tokens_seen": 112984384, + "step": 658 + }, + { + "epoch": 0.17307818767672783, + "loss": 0.2311791479587555, + "loss_ce": 0.003090762998908758, + "loss_iou": 0.69140625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 112984384, + "step": 658 + }, + { + "epoch": 0.17334122443611494, + "grad_norm": 7.9869346345765235, + "learning_rate": 5e-06, + "loss": 0.1714, + "num_input_tokens_seen": 113156440, + "step": 659 + }, + { + "epoch": 0.17334122443611494, + "loss": 0.14270631968975067, + "loss_ce": 0.007757591083645821, + "loss_iou": 0.59765625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 113156440, + "step": 659 + }, + { + "epoch": 0.17360426119550207, + "grad_norm": 19.43755709327552, + "learning_rate": 5e-06, + "loss": 0.2275, + "num_input_tokens_seen": 113328684, + "step": 660 + }, + { + "epoch": 0.17360426119550207, + "loss": 0.19741018116474152, + "loss_ce": 0.023887230083346367, + "loss_iou": 0.47265625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 113328684, + "step": 660 + }, + { + "epoch": 0.1738672979548892, + "grad_norm": 12.233621879837852, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 113499120, + "step": 661 + }, + { + "epoch": 0.1738672979548892, + "loss": 0.17290905117988586, + "loss_ce": 0.03179576247930527, + "loss_iou": 0.640625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 113499120, + "step": 661 + }, + { + "epoch": 0.17413033471427633, + "grad_norm": 8.034436568495584, + "learning_rate": 5e-06, + "loss": 0.2288, + "num_input_tokens_seen": 113670976, + "step": 662 + }, + { + "epoch": 0.17413033471427633, + "loss": 0.18118935823440552, + "loss_ce": 0.0038822239730507135, + "loss_iou": 0.5625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 113670976, + "step": 662 + }, + { + "epoch": 0.17439337147366343, + "grad_norm": 6.838416843124344, + "learning_rate": 5e-06, + "loss": 0.211, + "num_input_tokens_seen": 113841352, + "step": 663 + }, + { + "epoch": 0.17439337147366343, + "loss": 0.16460734605789185, + "loss_ce": 0.0039017903618514538, + "loss_iou": 0.44140625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 113841352, + "step": 663 + }, + { + "epoch": 0.17465640823305056, + "grad_norm": 8.007803887726045, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 114013720, + "step": 664 + }, + { + "epoch": 0.17465640823305056, + "loss": 0.19987425208091736, + "loss_ce": 0.004256566055119038, + "loss_iou": 0.3671875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 114013720, + "step": 664 + }, + { + "epoch": 0.1749194449924377, + "grad_norm": 15.416941151295424, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 114185964, + "step": 665 + }, + { + "epoch": 0.1749194449924377, + "loss": 0.12474516034126282, + "loss_ce": 0.0013320783618837595, + "loss_iou": 0.53125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 114185964, + "step": 665 + }, + { + "epoch": 0.17518248175182483, + "grad_norm": 7.3780372763976105, + "learning_rate": 5e-06, + "loss": 0.1978, + "num_input_tokens_seen": 114354848, + "step": 666 + }, + { + "epoch": 0.17518248175182483, + "loss": 0.22682063281536102, + "loss_ce": 0.002394365146756172, + "loss_iou": 0.48828125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 114354848, + "step": 666 + }, + { + "epoch": 0.17544551851121193, + "grad_norm": 6.805100830892847, + "learning_rate": 5e-06, + "loss": 0.1893, + "num_input_tokens_seen": 114526920, + "step": 667 + }, + { + "epoch": 0.17544551851121193, + "loss": 0.16719527542591095, + "loss_ce": 0.003987260162830353, + "loss_iou": 0.4921875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 114526920, + "step": 667 + }, + { + "epoch": 0.17570855527059906, + "grad_norm": 13.214372039286259, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 114698988, + "step": 668 + }, + { + "epoch": 0.17570855527059906, + "loss": 0.11197628825902939, + "loss_ce": 0.0018078316934406757, + "loss_iou": 0.50390625, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 114698988, + "step": 668 + }, + { + "epoch": 0.1759715920299862, + "grad_norm": 11.38353136981813, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 114871164, + "step": 669 + }, + { + "epoch": 0.1759715920299862, + "loss": 0.13217589259147644, + "loss_ce": 0.0022930747363716364, + "loss_iou": 0.60546875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 114871164, + "step": 669 + }, + { + "epoch": 0.17623462878937332, + "grad_norm": 6.3416006325736705, + "learning_rate": 5e-06, + "loss": 0.1994, + "num_input_tokens_seen": 115043272, + "step": 670 + }, + { + "epoch": 0.17623462878937332, + "loss": 0.2425597459077835, + "loss_ce": 0.00726923206821084, + "loss_iou": 0.462890625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 115043272, + "step": 670 + }, + { + "epoch": 0.17649766554876045, + "grad_norm": 7.208802786478081, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 115215332, + "step": 671 + }, + { + "epoch": 0.17649766554876045, + "loss": 0.2342541515827179, + "loss_ce": 0.006531976629048586, + "loss_iou": 0.474609375, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 115215332, + "step": 671 + }, + { + "epoch": 0.17676070230814755, + "grad_norm": 5.677644761077757, + "learning_rate": 5e-06, + "loss": 0.1422, + "num_input_tokens_seen": 115387420, + "step": 672 + }, + { + "epoch": 0.17676070230814755, + "loss": 0.1173291727900505, + "loss_ce": 0.0031934345606714487, + "loss_iou": 0.5546875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 115387420, + "step": 672 + }, + { + "epoch": 0.17702373906753469, + "grad_norm": 13.804747880548247, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 115559764, + "step": 673 + }, + { + "epoch": 0.17702373906753469, + "loss": 0.11581701785326004, + "loss_ce": 0.0009488522773608565, + "loss_iou": 0.546875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 115559764, + "step": 673 + }, + { + "epoch": 0.17728677582692182, + "grad_norm": 7.7796454748869435, + "learning_rate": 5e-06, + "loss": 0.2202, + "num_input_tokens_seen": 115731572, + "step": 674 + }, + { + "epoch": 0.17728677582692182, + "loss": 0.22158196568489075, + "loss_ce": 0.00228265137411654, + "loss_iou": 0.53125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 115731572, + "step": 674 + }, + { + "epoch": 0.17754981258630895, + "grad_norm": 7.463691885246712, + "learning_rate": 5e-06, + "loss": 0.1882, + "num_input_tokens_seen": 115903680, + "step": 675 + }, + { + "epoch": 0.17754981258630895, + "loss": 0.22185131907463074, + "loss_ce": 0.002002692548558116, + "loss_iou": 0.54296875, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 115903680, + "step": 675 + }, + { + "epoch": 0.17781284934569605, + "grad_norm": 8.426002860661324, + "learning_rate": 5e-06, + "loss": 0.1793, + "num_input_tokens_seen": 116075520, + "step": 676 + }, + { + "epoch": 0.17781284934569605, + "loss": 0.1606462597846985, + "loss_ce": 0.0009782931301742792, + "loss_iou": 0.5625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 116075520, + "step": 676 + }, + { + "epoch": 0.17807588610508318, + "grad_norm": 8.301724485985723, + "learning_rate": 5e-06, + "loss": 0.2, + "num_input_tokens_seen": 116243620, + "step": 677 + }, + { + "epoch": 0.17807588610508318, + "loss": 0.23937323689460754, + "loss_ce": 0.002251650206744671, + "loss_iou": 0.73828125, + "loss_num": 0.047607421875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 116243620, + "step": 677 + }, + { + "epoch": 0.1783389228644703, + "grad_norm": 8.16746221218087, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 116414372, + "step": 678 + }, + { + "epoch": 0.1783389228644703, + "loss": 0.2846035361289978, + "loss_ce": 0.003506140550598502, + "loss_iou": 0.498046875, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 116414372, + "step": 678 + }, + { + "epoch": 0.17860195962385744, + "grad_norm": 11.233813688882323, + "learning_rate": 5e-06, + "loss": 0.2052, + "num_input_tokens_seen": 116586316, + "step": 679 + }, + { + "epoch": 0.17860195962385744, + "loss": 0.19775235652923584, + "loss_ce": 0.0026840060018002987, + "loss_iou": 0.482421875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 116586316, + "step": 679 + }, + { + "epoch": 0.17886499638324455, + "grad_norm": 7.134240923845786, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 116758540, + "step": 680 + }, + { + "epoch": 0.17886499638324455, + "loss": 0.17349669337272644, + "loss_ce": 0.0008892616024240851, + "loss_iou": 0.6953125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 116758540, + "step": 680 + }, + { + "epoch": 0.17912803314263168, + "grad_norm": 8.457982820816902, + "learning_rate": 5e-06, + "loss": 0.2133, + "num_input_tokens_seen": 116930944, + "step": 681 + }, + { + "epoch": 0.17912803314263168, + "loss": 0.2098885327577591, + "loss_ce": 0.0027962373569607735, + "loss_iou": 0.6796875, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 116930944, + "step": 681 + }, + { + "epoch": 0.1793910699020188, + "grad_norm": 8.07197838986281, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 117102756, + "step": 682 + }, + { + "epoch": 0.1793910699020188, + "loss": 0.18535807728767395, + "loss_ce": 0.003778480924665928, + "loss_iou": 0.578125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 117102756, + "step": 682 + }, + { + "epoch": 0.17965410666140594, + "grad_norm": 7.799168173540665, + "learning_rate": 5e-06, + "loss": 0.2208, + "num_input_tokens_seen": 117274704, + "step": 683 + }, + { + "epoch": 0.17965410666140594, + "loss": 0.22785347700119019, + "loss_ce": 0.0029999692924320698, + "loss_iou": 0.3125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 117274704, + "step": 683 + }, + { + "epoch": 0.17991714342079307, + "grad_norm": 6.357635509161791, + "learning_rate": 5e-06, + "loss": 0.1508, + "num_input_tokens_seen": 117446896, + "step": 684 + }, + { + "epoch": 0.17991714342079307, + "loss": 0.2038782835006714, + "loss_ce": 0.010396835394203663, + "loss_iou": 0.50390625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 117446896, + "step": 684 + }, + { + "epoch": 0.18018018018018017, + "grad_norm": 7.961998959961417, + "learning_rate": 5e-06, + "loss": 0.1759, + "num_input_tokens_seen": 117618944, + "step": 685 + }, + { + "epoch": 0.18018018018018017, + "loss": 0.18519067764282227, + "loss_ce": 0.0017190101789310575, + "loss_iou": 0.54296875, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 117618944, + "step": 685 + }, + { + "epoch": 0.1804432169395673, + "grad_norm": 9.08254127907995, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 117791316, + "step": 686 + }, + { + "epoch": 0.1804432169395673, + "loss": 0.20686465501785278, + "loss_ce": 0.0025189572479575872, + "loss_iou": 0.6796875, + "loss_num": 0.041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 117791316, + "step": 686 + }, + { + "epoch": 0.18070625369895443, + "grad_norm": 10.367656958651155, + "learning_rate": 5e-06, + "loss": 0.1855, + "num_input_tokens_seen": 117963308, + "step": 687 + }, + { + "epoch": 0.18070625369895443, + "loss": 0.13359057903289795, + "loss_ce": 0.0041044931858778, + "loss_iou": 0.6953125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 117963308, + "step": 687 + }, + { + "epoch": 0.18096929045834156, + "grad_norm": 7.549954240364692, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 118135556, + "step": 688 + }, + { + "epoch": 0.18096929045834156, + "loss": 0.14778929948806763, + "loss_ce": 0.0011828583665192127, + "loss_iou": 0.494140625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 118135556, + "step": 688 + }, + { + "epoch": 0.18123232721772867, + "grad_norm": 9.741300529319401, + "learning_rate": 5e-06, + "loss": 0.166, + "num_input_tokens_seen": 118305884, + "step": 689 + }, + { + "epoch": 0.18123232721772867, + "loss": 0.1753019541501999, + "loss_ce": 0.003182805608958006, + "loss_iou": 0.470703125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 118305884, + "step": 689 + }, + { + "epoch": 0.1814953639771158, + "grad_norm": 11.31734919561229, + "learning_rate": 5e-06, + "loss": 0.1811, + "num_input_tokens_seen": 118478000, + "step": 690 + }, + { + "epoch": 0.1814953639771158, + "loss": 0.189756840467453, + "loss_ce": 0.0007919906638562679, + "loss_iou": 0.77734375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 118478000, + "step": 690 + }, + { + "epoch": 0.18175840073650293, + "grad_norm": 9.003341647966222, + "learning_rate": 5e-06, + "loss": 0.1755, + "num_input_tokens_seen": 118650436, + "step": 691 + }, + { + "epoch": 0.18175840073650293, + "loss": 0.11313121020793915, + "loss_ce": 0.0006434204406104982, + "loss_iou": 0.53515625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 118650436, + "step": 691 + }, + { + "epoch": 0.18202143749589006, + "grad_norm": 15.98200660549263, + "learning_rate": 5e-06, + "loss": 0.1717, + "num_input_tokens_seen": 118822416, + "step": 692 + }, + { + "epoch": 0.18202143749589006, + "loss": 0.14253322780132294, + "loss_ce": 0.001908229780383408, + "loss_iou": 0.453125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 118822416, + "step": 692 + }, + { + "epoch": 0.18228447425527716, + "grad_norm": 5.776705438700134, + "learning_rate": 5e-06, + "loss": 0.1588, + "num_input_tokens_seen": 118994492, + "step": 693 + }, + { + "epoch": 0.18228447425527716, + "loss": 0.15580043196678162, + "loss_ce": 0.0014730504481121898, + "loss_iou": 0.40625, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 118994492, + "step": 693 + }, + { + "epoch": 0.1825475110146643, + "grad_norm": 13.97214493017941, + "learning_rate": 5e-06, + "loss": 0.1825, + "num_input_tokens_seen": 119166828, + "step": 694 + }, + { + "epoch": 0.1825475110146643, + "loss": 0.14117339253425598, + "loss_ce": 0.005309135653078556, + "loss_iou": 0.6171875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 119166828, + "step": 694 + }, + { + "epoch": 0.18281054777405142, + "grad_norm": 9.340996582360178, + "learning_rate": 5e-06, + "loss": 0.2039, + "num_input_tokens_seen": 119339040, + "step": 695 + }, + { + "epoch": 0.18281054777405142, + "loss": 0.21793845295906067, + "loss_ce": 0.005658184178173542, + "loss_iou": 0.4921875, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 119339040, + "step": 695 + }, + { + "epoch": 0.18307358453343855, + "grad_norm": 6.679514738557093, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 119511024, + "step": 696 + }, + { + "epoch": 0.18307358453343855, + "loss": 0.18582630157470703, + "loss_ce": 0.0012559981551021338, + "loss_iou": 0.5546875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 119511024, + "step": 696 + }, + { + "epoch": 0.18333662129282569, + "grad_norm": 5.326914153992354, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 119683564, + "step": 697 + }, + { + "epoch": 0.18333662129282569, + "loss": 0.15467330813407898, + "loss_ce": 0.0042826831340789795, + "loss_iou": 0.5703125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 119683564, + "step": 697 + }, + { + "epoch": 0.1835996580522128, + "grad_norm": 8.515559517433303, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 119856000, + "step": 698 + }, + { + "epoch": 0.1835996580522128, + "loss": 0.12481513619422913, + "loss_ce": 0.0006696260534226894, + "loss_iou": 0.6796875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 119856000, + "step": 698 + }, + { + "epoch": 0.18386269481159992, + "grad_norm": 9.420262409376758, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 120028676, + "step": 699 + }, + { + "epoch": 0.18386269481159992, + "loss": 0.22470733523368835, + "loss_ce": 0.001868000952526927, + "loss_iou": 0.6875, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 120028676, + "step": 699 + }, + { + "epoch": 0.18412573157098705, + "grad_norm": 7.530169678535974, + "learning_rate": 5e-06, + "loss": 0.1624, + "num_input_tokens_seen": 120201208, + "step": 700 + }, + { + "epoch": 0.18412573157098705, + "loss": 0.10793297737836838, + "loss_ce": 0.0036849307361990213, + "loss_iou": 0.58203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 120201208, + "step": 700 + }, + { + "epoch": 0.18438876833037418, + "grad_norm": 8.205415154202601, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 120373444, + "step": 701 + }, + { + "epoch": 0.18438876833037418, + "loss": 0.1353437304496765, + "loss_ce": 0.001066376455128193, + "loss_iou": 0.5390625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 120373444, + "step": 701 + }, + { + "epoch": 0.18465180508976128, + "grad_norm": 8.319297966322562, + "learning_rate": 5e-06, + "loss": 0.1785, + "num_input_tokens_seen": 120545552, + "step": 702 + }, + { + "epoch": 0.18465180508976128, + "loss": 0.18953613936901093, + "loss_ce": 0.0019751053769141436, + "loss_iou": 0.69140625, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 120545552, + "step": 702 + }, + { + "epoch": 0.18491484184914841, + "grad_norm": 8.464289102867136, + "learning_rate": 5e-06, + "loss": 0.219, + "num_input_tokens_seen": 120717584, + "step": 703 + }, + { + "epoch": 0.18491484184914841, + "loss": 0.24215811491012573, + "loss_ce": 0.004975516349077225, + "loss_iou": 0.33203125, + "loss_num": 0.047607421875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 120717584, + "step": 703 + }, + { + "epoch": 0.18517787860853555, + "grad_norm": 12.962080293446185, + "learning_rate": 5e-06, + "loss": 0.2162, + "num_input_tokens_seen": 120889756, + "step": 704 + }, + { + "epoch": 0.18517787860853555, + "loss": 0.17355692386627197, + "loss_ce": 0.0025364109314978123, + "loss_iou": 0.7109375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 120889756, + "step": 704 + }, + { + "epoch": 0.18544091536792268, + "grad_norm": 13.968822655265907, + "learning_rate": 5e-06, + "loss": 0.2237, + "num_input_tokens_seen": 121062200, + "step": 705 + }, + { + "epoch": 0.18544091536792268, + "loss": 0.2510673403739929, + "loss_ce": 0.005400843918323517, + "loss_iou": 0.3984375, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 121062200, + "step": 705 + }, + { + "epoch": 0.18570395212730978, + "grad_norm": 15.418397137980255, + "learning_rate": 5e-06, + "loss": 0.2126, + "num_input_tokens_seen": 121234300, + "step": 706 + }, + { + "epoch": 0.18570395212730978, + "loss": 0.239446759223938, + "loss_ce": 0.0006772410124540329, + "loss_iou": 0.55078125, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 121234300, + "step": 706 + }, + { + "epoch": 0.1859669888866969, + "grad_norm": 4.8632608475065355, + "learning_rate": 5e-06, + "loss": 0.1269, + "num_input_tokens_seen": 121406408, + "step": 707 + }, + { + "epoch": 0.1859669888866969, + "loss": 0.10976609587669373, + "loss_ce": 0.003137679770588875, + "loss_iou": 0.50390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 121406408, + "step": 707 + }, + { + "epoch": 0.18623002564608404, + "grad_norm": 9.856831060146035, + "learning_rate": 5e-06, + "loss": 0.1699, + "num_input_tokens_seen": 121578364, + "step": 708 + }, + { + "epoch": 0.18623002564608404, + "loss": 0.06338398158550262, + "loss_ce": 0.0025319333653897047, + "loss_iou": 0.4765625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 121578364, + "step": 708 + }, + { + "epoch": 0.18649306240547117, + "grad_norm": 12.596787814338684, + "learning_rate": 5e-06, + "loss": 0.1839, + "num_input_tokens_seen": 121750612, + "step": 709 + }, + { + "epoch": 0.18649306240547117, + "loss": 0.2992492616176605, + "loss_ce": 0.0012756290379911661, + "loss_iou": 0.5234375, + "loss_num": 0.0595703125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 121750612, + "step": 709 + }, + { + "epoch": 0.1867560991648583, + "grad_norm": 7.147545819350773, + "learning_rate": 5e-06, + "loss": 0.1776, + "num_input_tokens_seen": 121922804, + "step": 710 + }, + { + "epoch": 0.1867560991648583, + "loss": 0.18950411677360535, + "loss_ce": 0.0032858517952263355, + "loss_iou": 0.609375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 121922804, + "step": 710 + }, + { + "epoch": 0.1870191359242454, + "grad_norm": 22.63136110983146, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 122094924, + "step": 711 + }, + { + "epoch": 0.1870191359242454, + "loss": 0.12849926948547363, + "loss_ce": 0.0014851100277155638, + "loss_iou": 0.625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 122094924, + "step": 711 + }, + { + "epoch": 0.18728217268363254, + "grad_norm": 6.766417510968721, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 122265668, + "step": 712 + }, + { + "epoch": 0.18728217268363254, + "loss": 0.15207967162132263, + "loss_ce": 0.006968589033931494, + "loss_iou": 0.58984375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 122265668, + "step": 712 + }, + { + "epoch": 0.18754520944301967, + "grad_norm": 7.923158875388113, + "learning_rate": 5e-06, + "loss": 0.1904, + "num_input_tokens_seen": 122437796, + "step": 713 + }, + { + "epoch": 0.18754520944301967, + "loss": 0.20568042993545532, + "loss_ce": 0.002189208287745714, + "loss_iou": 0.5703125, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 122437796, + "step": 713 + }, + { + "epoch": 0.1878082462024068, + "grad_norm": 8.667331104283747, + "learning_rate": 5e-06, + "loss": 0.1887, + "num_input_tokens_seen": 122610336, + "step": 714 + }, + { + "epoch": 0.1878082462024068, + "loss": 0.1953202784061432, + "loss_ce": 0.0012895169202238321, + "loss_iou": 0.287109375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 122610336, + "step": 714 + }, + { + "epoch": 0.1880712829617939, + "grad_norm": 12.96410805672465, + "learning_rate": 5e-06, + "loss": 0.2143, + "num_input_tokens_seen": 122779392, + "step": 715 + }, + { + "epoch": 0.1880712829617939, + "loss": 0.21114467084407806, + "loss_ce": 0.001916159177199006, + "loss_iou": 0.41015625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 122779392, + "step": 715 + }, + { + "epoch": 0.18833431972118103, + "grad_norm": 6.980680552365313, + "learning_rate": 5e-06, + "loss": 0.1831, + "num_input_tokens_seen": 122951724, + "step": 716 + }, + { + "epoch": 0.18833431972118103, + "loss": 0.24306175112724304, + "loss_ce": 0.006245340220630169, + "loss_iou": 0.3359375, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 122951724, + "step": 716 + }, + { + "epoch": 0.18859735648056816, + "grad_norm": 12.855577653554155, + "learning_rate": 5e-06, + "loss": 0.1591, + "num_input_tokens_seen": 123122088, + "step": 717 + }, + { + "epoch": 0.18859735648056816, + "loss": 0.1986149549484253, + "loss_ce": 0.0012882874580100179, + "loss_iou": 0.51953125, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 123122088, + "step": 717 + }, + { + "epoch": 0.1888603932399553, + "grad_norm": 6.930918072961039, + "learning_rate": 5e-06, + "loss": 0.1899, + "num_input_tokens_seen": 123294160, + "step": 718 + }, + { + "epoch": 0.1888603932399553, + "loss": 0.19642534852027893, + "loss_ce": 0.0014790646964684129, + "loss_iou": 0.40625, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 123294160, + "step": 718 + }, + { + "epoch": 0.1891234299993424, + "grad_norm": 6.929009893187008, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 123466060, + "step": 719 + }, + { + "epoch": 0.1891234299993424, + "loss": 0.1527690589427948, + "loss_ce": 0.005094507243484259, + "loss_iou": 0.40234375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 123466060, + "step": 719 + }, + { + "epoch": 0.18938646675872953, + "grad_norm": 10.25908293873023, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 123636388, + "step": 720 + }, + { + "epoch": 0.18938646675872953, + "loss": 0.18421480059623718, + "loss_ce": 0.0032455746550112963, + "loss_iou": 0.60546875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 123636388, + "step": 720 + }, + { + "epoch": 0.18964950351811666, + "grad_norm": 7.318122077183922, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 123808316, + "step": 721 + }, + { + "epoch": 0.18964950351811666, + "loss": 0.1352817267179489, + "loss_ce": 0.0030185491777956486, + "loss_iou": 0.34765625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 123808316, + "step": 721 + }, + { + "epoch": 0.1899125402775038, + "grad_norm": 7.382171237495957, + "learning_rate": 5e-06, + "loss": 0.2283, + "num_input_tokens_seen": 123980672, + "step": 722 + }, + { + "epoch": 0.1899125402775038, + "loss": 0.15074753761291504, + "loss_ce": 0.0036528080236166716, + "loss_iou": 0.6640625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 123980672, + "step": 722 + }, + { + "epoch": 0.19017557703689092, + "grad_norm": 5.982120923855094, + "learning_rate": 5e-06, + "loss": 0.1499, + "num_input_tokens_seen": 124150936, + "step": 723 + }, + { + "epoch": 0.19017557703689092, + "loss": 0.10008575022220612, + "loss_ce": 0.0014224194455891848, + "loss_iou": 0.48828125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 124150936, + "step": 723 + }, + { + "epoch": 0.19043861379627802, + "grad_norm": 8.136060280729641, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 124323280, + "step": 724 + }, + { + "epoch": 0.19043861379627802, + "loss": 0.12422403693199158, + "loss_ce": 0.0017264705384150147, + "loss_iou": 0.69140625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 124323280, + "step": 724 + }, + { + "epoch": 0.19070165055566515, + "grad_norm": 8.724019912821047, + "learning_rate": 5e-06, + "loss": 0.2613, + "num_input_tokens_seen": 124495636, + "step": 725 + }, + { + "epoch": 0.19070165055566515, + "loss": 0.13688622415065765, + "loss_ce": 0.0040737236849963665, + "loss_iou": 0.57421875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 124495636, + "step": 725 + }, + { + "epoch": 0.19096468731505228, + "grad_norm": 7.423558318920191, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 124667644, + "step": 726 + }, + { + "epoch": 0.19096468731505228, + "loss": 0.2597463130950928, + "loss_ce": 0.0010182850528508425, + "loss_iou": 0.416015625, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 124667644, + "step": 726 + }, + { + "epoch": 0.19122772407443941, + "grad_norm": 7.827506457517143, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 124839252, + "step": 727 + }, + { + "epoch": 0.19122772407443941, + "loss": 0.1310674101114273, + "loss_ce": 0.003076688153669238, + "loss_iou": 0.40234375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 124839252, + "step": 727 + }, + { + "epoch": 0.19149076083382652, + "grad_norm": 9.183421040665964, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 125011496, + "step": 728 + }, + { + "epoch": 0.19149076083382652, + "loss": 0.11314553767442703, + "loss_ce": 0.0003525639185681939, + "loss_iou": 0.625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 125011496, + "step": 728 + }, + { + "epoch": 0.19175379759321365, + "grad_norm": 10.185706416909431, + "learning_rate": 5e-06, + "loss": 0.2047, + "num_input_tokens_seen": 125179336, + "step": 729 + }, + { + "epoch": 0.19175379759321365, + "loss": 0.1552383303642273, + "loss_ce": 0.005152884405106306, + "loss_iou": 0.6015625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 125179336, + "step": 729 + }, + { + "epoch": 0.19201683435260078, + "grad_norm": 8.517948232569886, + "learning_rate": 5e-06, + "loss": 0.1789, + "num_input_tokens_seen": 125351404, + "step": 730 + }, + { + "epoch": 0.19201683435260078, + "loss": 0.23154987394809723, + "loss_ce": 0.0012031885562464595, + "loss_iou": 0.50390625, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 125351404, + "step": 730 + }, + { + "epoch": 0.1922798711119879, + "grad_norm": 8.61986864690339, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 125523692, + "step": 731 + }, + { + "epoch": 0.1922798711119879, + "loss": 0.14681357145309448, + "loss_ce": 0.0021907794289290905, + "loss_iou": 0.357421875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 125523692, + "step": 731 + }, + { + "epoch": 0.192542907871375, + "grad_norm": 5.859065568818718, + "learning_rate": 5e-06, + "loss": 0.178, + "num_input_tokens_seen": 125693908, + "step": 732 + }, + { + "epoch": 0.192542907871375, + "loss": 0.15787754952907562, + "loss_ce": 0.0012613451108336449, + "loss_iou": 0.46875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 125693908, + "step": 732 + }, + { + "epoch": 0.19280594463076214, + "grad_norm": 10.469269184120652, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 125865868, + "step": 733 + }, + { + "epoch": 0.19280594463076214, + "loss": 0.13217297196388245, + "loss_ce": 0.0014051578473299742, + "loss_iou": 0.578125, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 125865868, + "step": 733 + }, + { + "epoch": 0.19306898139014927, + "grad_norm": 9.209135916468261, + "learning_rate": 5e-06, + "loss": 0.1745, + "num_input_tokens_seen": 126037852, + "step": 734 + }, + { + "epoch": 0.19306898139014927, + "loss": 0.17599225044250488, + "loss_ce": 0.0004551436868496239, + "loss_iou": 0.65625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 126037852, + "step": 734 + }, + { + "epoch": 0.1933320181495364, + "grad_norm": 8.89690790064976, + "learning_rate": 5e-06, + "loss": 0.2197, + "num_input_tokens_seen": 126209984, + "step": 735 + }, + { + "epoch": 0.1933320181495364, + "loss": 0.1378391683101654, + "loss_ce": 0.0020359433256089687, + "loss_iou": 0.6875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 126209984, + "step": 735 + }, + { + "epoch": 0.1935950549089235, + "grad_norm": 9.250244451235272, + "learning_rate": 5e-06, + "loss": 0.1999, + "num_input_tokens_seen": 126382140, + "step": 736 + }, + { + "epoch": 0.1935950549089235, + "loss": 0.24104052782058716, + "loss_ce": 0.0028813518583774567, + "loss_iou": 0.671875, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 126382140, + "step": 736 + }, + { + "epoch": 0.19385809166831064, + "grad_norm": 6.538014726838841, + "learning_rate": 5e-06, + "loss": 0.1574, + "num_input_tokens_seen": 126554124, + "step": 737 + }, + { + "epoch": 0.19385809166831064, + "loss": 0.19325746595859528, + "loss_ce": 0.0034991574939340353, + "loss_iou": 0.48046875, + "loss_num": 0.0380859375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 126554124, + "step": 737 + }, + { + "epoch": 0.19412112842769777, + "grad_norm": 6.359733816292311, + "learning_rate": 5e-06, + "loss": 0.1727, + "num_input_tokens_seen": 126726352, + "step": 738 + }, + { + "epoch": 0.19412112842769777, + "loss": 0.1890466809272766, + "loss_ce": 0.003866007784381509, + "loss_iou": 0.7109375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 126726352, + "step": 738 + }, + { + "epoch": 0.1943841651870849, + "grad_norm": 6.737716233597974, + "learning_rate": 5e-06, + "loss": 0.168, + "num_input_tokens_seen": 126898504, + "step": 739 + }, + { + "epoch": 0.1943841651870849, + "loss": 0.22711391746997833, + "loss_ce": 0.0020162612199783325, + "loss_iou": 0.6640625, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 126898504, + "step": 739 + }, + { + "epoch": 0.19464720194647203, + "grad_norm": 10.63587672601612, + "learning_rate": 5e-06, + "loss": 0.2406, + "num_input_tokens_seen": 127070440, + "step": 740 + }, + { + "epoch": 0.19464720194647203, + "loss": 0.2545185983181, + "loss_ce": 0.0035420400090515614, + "loss_iou": 0.53125, + "loss_num": 0.05029296875, + "loss_xval": 0.25, + "num_input_tokens_seen": 127070440, + "step": 740 + }, + { + "epoch": 0.19491023870585913, + "grad_norm": 10.573425928841477, + "learning_rate": 5e-06, + "loss": 0.1461, + "num_input_tokens_seen": 127242884, + "step": 741 + }, + { + "epoch": 0.19491023870585913, + "loss": 0.07598280906677246, + "loss_ce": 0.0007874930743128061, + "loss_iou": 0.5546875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 127242884, + "step": 741 + }, + { + "epoch": 0.19517327546524627, + "grad_norm": 5.624166188859615, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 127413548, + "step": 742 + }, + { + "epoch": 0.19517327546524627, + "loss": 0.12612518668174744, + "loss_ce": 0.0008200095035135746, + "loss_iou": 0.447265625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 127413548, + "step": 742 + }, + { + "epoch": 0.1954363122246334, + "grad_norm": 5.416712373816477, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 127583504, + "step": 743 + }, + { + "epoch": 0.1954363122246334, + "loss": 0.14368192851543427, + "loss_ce": 0.003209515009075403, + "loss_iou": 0.474609375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 127583504, + "step": 743 + }, + { + "epoch": 0.19569934898402053, + "grad_norm": 7.55903022235747, + "learning_rate": 5e-06, + "loss": 0.1741, + "num_input_tokens_seen": 127753924, + "step": 744 + }, + { + "epoch": 0.19569934898402053, + "loss": 0.26765167713165283, + "loss_ce": 0.009717106819152832, + "loss_iou": 0.451171875, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 127753924, + "step": 744 + }, + { + "epoch": 0.19596238574340763, + "grad_norm": 6.325854098350077, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 127925840, + "step": 745 + }, + { + "epoch": 0.19596238574340763, + "loss": 0.13880833983421326, + "loss_ce": 0.0011435477063059807, + "loss_iou": 0.50390625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 127925840, + "step": 745 + }, + { + "epoch": 0.19622542250279476, + "grad_norm": 7.469182967125795, + "learning_rate": 5e-06, + "loss": 0.1661, + "num_input_tokens_seen": 128098212, + "step": 746 + }, + { + "epoch": 0.19622542250279476, + "loss": 0.22049343585968018, + "loss_ce": 0.002109651220962405, + "loss_iou": 0.61328125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 128098212, + "step": 746 + }, + { + "epoch": 0.1964884592621819, + "grad_norm": 8.506872378366532, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 128270372, + "step": 747 + }, + { + "epoch": 0.1964884592621819, + "loss": 0.1590556651353836, + "loss_ce": 0.0010356476996093988, + "loss_iou": 0.515625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 128270372, + "step": 747 + }, + { + "epoch": 0.19675149602156902, + "grad_norm": 7.2518370649313075, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 128439832, + "step": 748 + }, + { + "epoch": 0.19675149602156902, + "loss": 0.16326385736465454, + "loss_ce": 0.0021310443989932537, + "loss_iou": 0.55859375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 128439832, + "step": 748 + }, + { + "epoch": 0.19701453278095613, + "grad_norm": 9.929926428237398, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 128612032, + "step": 749 + }, + { + "epoch": 0.19701453278095613, + "loss": 0.09666653722524643, + "loss_ce": 0.002489290665835142, + "loss_iou": 0.71484375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 128612032, + "step": 749 + }, + { + "epoch": 0.19727756954034326, + "grad_norm": 8.129979685164507, + "learning_rate": 5e-06, + "loss": 0.144, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_websight_new_CIoU": 0.7427884042263031, + "eval_websight_new_GIoU": 0.7464376986026764, + "eval_websight_new_IoU": 0.7504112422466278, + "eval_websight_new_MAE_all": 0.03649342246353626, + "eval_websight_new_MAE_h": 0.03233582433313131, + "eval_websight_new_MAE_w": 0.056174855679273605, + "eval_websight_new_MAE_x": 0.046134982258081436, + "eval_websight_new_MAE_y": 0.01132803549990058, + "eval_websight_new_NUM_probability": 0.9972327351570129, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.19190411269664764, + "eval_websight_new_loss_ce": 0.00034623414103407413, + "eval_websight_new_loss_iou": 0.777099609375, + "eval_websight_new_loss_num": 0.03619384765625, + "eval_websight_new_loss_xval": 0.18096923828125, + "eval_websight_new_runtime": 55.1355, + "eval_websight_new_samples_per_second": 0.907, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_seeclick_CIoU": 0.4580978453159332, + "eval_seeclick_GIoU": 0.44802993535995483, + "eval_seeclick_IoU": 0.4842703342437744, + "eval_seeclick_MAE_all": 0.05876399576663971, + "eval_seeclick_MAE_h": 0.0604591965675354, + "eval_seeclick_MAE_w": 0.06378005631268024, + "eval_seeclick_MAE_x": 0.07332894578576088, + "eval_seeclick_MAE_y": 0.03748778998851776, + "eval_seeclick_NUM_probability": 0.9986195266246796, + "eval_seeclick_inside_bbox": 0.9375, + "eval_seeclick_loss": 0.3042093813419342, + "eval_seeclick_loss_ce": 0.012256910093128681, + "eval_seeclick_loss_iou": 0.6614990234375, + "eval_seeclick_loss_num": 0.055084228515625, + "eval_seeclick_loss_xval": 0.27545166015625, + "eval_seeclick_runtime": 69.7623, + "eval_seeclick_samples_per_second": 0.616, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_icons_CIoU": 0.7633987963199615, + "eval_icons_GIoU": 0.7561427056789398, + "eval_icons_IoU": 0.7705403864383698, + "eval_icons_MAE_all": 0.029183855280280113, + "eval_icons_MAE_h": 0.035788778215646744, + "eval_icons_MAE_w": 0.03255164809525013, + "eval_icons_MAE_x": 0.02722846996039152, + "eval_icons_MAE_y": 0.02116652298718691, + "eval_icons_NUM_probability": 0.9982486963272095, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.13746696710586548, + "eval_icons_loss_ce": 0.0027548681828193367, + "eval_icons_loss_iou": 0.7633056640625, + "eval_icons_loss_num": 0.02550506591796875, + "eval_icons_loss_xval": 0.127532958984375, + "eval_icons_runtime": 87.2396, + "eval_icons_samples_per_second": 0.573, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_screenspot_CIoU": 0.4680892725785573, + "eval_screenspot_GIoU": 0.45600322882334393, + "eval_screenspot_IoU": 0.507049967845281, + "eval_screenspot_MAE_all": 0.09560441970825195, + "eval_screenspot_MAE_h": 0.08166227489709854, + "eval_screenspot_MAE_w": 0.14845576385656992, + "eval_screenspot_MAE_x": 0.09815465907255809, + "eval_screenspot_MAE_y": 0.054144968589146934, + "eval_screenspot_NUM_probability": 0.9986165563265482, + "eval_screenspot_inside_bbox": 0.8558333317438761, + "eval_screenspot_loss": 0.805972158908844, + "eval_screenspot_loss_ce": 0.38150885701179504, + "eval_screenspot_loss_iou": 0.580078125, + "eval_screenspot_loss_num": 0.0841064453125, + "eval_screenspot_loss_xval": 0.4202473958333333, + "eval_screenspot_runtime": 148.1708, + "eval_screenspot_samples_per_second": 0.601, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "loss": 0.7833826541900635, + "loss_ce": 0.36297255754470825, + "loss_iou": 0.41015625, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.1975406062997304, + "grad_norm": 6.107960207443286, + "learning_rate": 5e-06, + "loss": 0.2391, + "num_input_tokens_seen": 128956536, + "step": 751 + }, + { + "epoch": 0.1975406062997304, + "loss": 0.27341228723526, + "loss_ce": 0.0016227375017479062, + "loss_iou": 0.734375, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 128956536, + "step": 751 + }, + { + "epoch": 0.19780364305911752, + "grad_norm": 12.720415598286513, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 129128836, + "step": 752 + }, + { + "epoch": 0.19780364305911752, + "loss": 0.12072408944368362, + "loss_ce": 0.002621056977659464, + "loss_iou": 0.515625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 129128836, + "step": 752 + }, + { + "epoch": 0.19806667981850465, + "grad_norm": 14.921842726715303, + "learning_rate": 5e-06, + "loss": 0.1648, + "num_input_tokens_seen": 129300844, + "step": 753 + }, + { + "epoch": 0.19806667981850465, + "loss": 0.1643597036600113, + "loss_ce": 0.0029217104893177748, + "loss_iou": 0.546875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 129300844, + "step": 753 + }, + { + "epoch": 0.19832971657789175, + "grad_norm": 6.005245677684448, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 129471448, + "step": 754 + }, + { + "epoch": 0.19832971657789175, + "loss": 0.09330937266349792, + "loss_ce": 0.0012683530803769827, + "loss_iou": 0.5625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 129471448, + "step": 754 + }, + { + "epoch": 0.19859275333727888, + "grad_norm": 10.390450693463213, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 129643296, + "step": 755 + }, + { + "epoch": 0.19859275333727888, + "loss": 0.19149892032146454, + "loss_ce": 0.000733550579752773, + "loss_iou": 0.58984375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 129643296, + "step": 755 + }, + { + "epoch": 0.198855790096666, + "grad_norm": 12.828044454318112, + "learning_rate": 5e-06, + "loss": 0.1805, + "num_input_tokens_seen": 129815492, + "step": 756 + }, + { + "epoch": 0.198855790096666, + "loss": 0.17852596938610077, + "loss_ce": 0.003294031834229827, + "loss_iou": 0.486328125, + "loss_num": 0.03515625, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 129815492, + "step": 756 + }, + { + "epoch": 0.19911882685605314, + "grad_norm": 8.380872387581206, + "learning_rate": 5e-06, + "loss": 0.2269, + "num_input_tokens_seen": 129987692, + "step": 757 + }, + { + "epoch": 0.19911882685605314, + "loss": 0.20438869297504425, + "loss_ce": 0.00040920061292126775, + "loss_iou": 0.64453125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 129987692, + "step": 757 + }, + { + "epoch": 0.19938186361544025, + "grad_norm": 11.044900252894326, + "learning_rate": 5e-06, + "loss": 0.1723, + "num_input_tokens_seen": 130159828, + "step": 758 + }, + { + "epoch": 0.19938186361544025, + "loss": 0.1886797845363617, + "loss_ce": 0.0009661591611802578, + "loss_iou": 0.56640625, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 130159828, + "step": 758 + }, + { + "epoch": 0.19964490037482738, + "grad_norm": 8.638981565005851, + "learning_rate": 5e-06, + "loss": 0.1719, + "num_input_tokens_seen": 130331756, + "step": 759 + }, + { + "epoch": 0.19964490037482738, + "loss": 0.1088617593050003, + "loss_ce": 0.0055597638711333275, + "loss_iou": 0.5703125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 130331756, + "step": 759 + }, + { + "epoch": 0.1999079371342145, + "grad_norm": 16.96292619187744, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 130503960, + "step": 760 + }, + { + "epoch": 0.1999079371342145, + "loss": 0.1997271627187729, + "loss_ce": 0.001668088138103485, + "loss_iou": 0.4609375, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 130503960, + "step": 760 + }, + { + "epoch": 0.20017097389360164, + "grad_norm": 18.254879073962975, + "learning_rate": 5e-06, + "loss": 0.209, + "num_input_tokens_seen": 130676124, + "step": 761 + }, + { + "epoch": 0.20017097389360164, + "loss": 0.22872218489646912, + "loss_ce": 0.002464861376211047, + "loss_iou": 0.55078125, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 130676124, + "step": 761 + }, + { + "epoch": 0.20043401065298874, + "grad_norm": 16.873564483656185, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 130848448, + "step": 762 + }, + { + "epoch": 0.20043401065298874, + "loss": 0.11374461650848389, + "loss_ce": 0.001867176266387105, + "loss_iou": 0.5546875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 130848448, + "step": 762 + }, + { + "epoch": 0.20069704741237587, + "grad_norm": 8.061510131196302, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 131020600, + "step": 763 + }, + { + "epoch": 0.20069704741237587, + "loss": 0.24045339226722717, + "loss_ce": 0.003575955517590046, + "loss_iou": 0.59375, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 131020600, + "step": 763 + }, + { + "epoch": 0.200960084171763, + "grad_norm": 7.1675953763190865, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 131191064, + "step": 764 + }, + { + "epoch": 0.200960084171763, + "loss": 0.1495012640953064, + "loss_ce": 0.004115516785532236, + "loss_iou": 0.462890625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 131191064, + "step": 764 + }, + { + "epoch": 0.20122312093115013, + "grad_norm": 5.20536952677903, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 131363148, + "step": 765 + }, + { + "epoch": 0.20122312093115013, + "loss": 0.23652556538581848, + "loss_ce": 0.009566339664161205, + "loss_iou": 0.546875, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 131363148, + "step": 765 + }, + { + "epoch": 0.20148615769053727, + "grad_norm": 8.038569612667256, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 131535488, + "step": 766 + }, + { + "epoch": 0.20148615769053727, + "loss": 0.24296867847442627, + "loss_ce": 0.0015136117581278086, + "loss_iou": 0.490234375, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 131535488, + "step": 766 + }, + { + "epoch": 0.20174919444992437, + "grad_norm": 6.190176430910417, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 131707808, + "step": 767 + }, + { + "epoch": 0.20174919444992437, + "loss": 0.24412932991981506, + "loss_ce": 0.00218596076592803, + "loss_iou": 0.5859375, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 131707808, + "step": 767 + }, + { + "epoch": 0.2020122312093115, + "grad_norm": 6.201791565366413, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 131879928, + "step": 768 + }, + { + "epoch": 0.2020122312093115, + "loss": 0.09396322071552277, + "loss_ce": 0.002288414863869548, + "loss_iou": 0.47265625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 131879928, + "step": 768 + }, + { + "epoch": 0.20227526796869863, + "grad_norm": 7.34120818873771, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 132052340, + "step": 769 + }, + { + "epoch": 0.20227526796869863, + "loss": 0.22239628434181213, + "loss_ce": 0.005477334372699261, + "loss_iou": 0.51171875, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 132052340, + "step": 769 + }, + { + "epoch": 0.20253830472808576, + "grad_norm": 6.124473321987556, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 132224712, + "step": 770 + }, + { + "epoch": 0.20253830472808576, + "loss": 0.17558854818344116, + "loss_ce": 0.0018824923317879438, + "loss_iou": 0.54296875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 132224712, + "step": 770 + }, + { + "epoch": 0.20280134148747286, + "grad_norm": 9.284836415516965, + "learning_rate": 5e-06, + "loss": 0.1512, + "num_input_tokens_seen": 132396772, + "step": 771 + }, + { + "epoch": 0.20280134148747286, + "loss": 0.15597118437290192, + "loss_ce": 0.0003925706841982901, + "loss_iou": 0.734375, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 132396772, + "step": 771 + }, + { + "epoch": 0.20306437824686, + "grad_norm": 7.764344629279706, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 132568908, + "step": 772 + }, + { + "epoch": 0.20306437824686, + "loss": 0.18731489777565002, + "loss_ce": 0.0028056304436177015, + "loss_iou": 0.53125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 132568908, + "step": 772 + }, + { + "epoch": 0.20332741500624713, + "grad_norm": 10.387981116967136, + "learning_rate": 5e-06, + "loss": 0.2105, + "num_input_tokens_seen": 132741140, + "step": 773 + }, + { + "epoch": 0.20332741500624713, + "loss": 0.2155558466911316, + "loss_ce": 0.00147504813503474, + "loss_iou": 0.5625, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 132741140, + "step": 773 + }, + { + "epoch": 0.20359045176563426, + "grad_norm": 11.81609487087241, + "learning_rate": 5e-06, + "loss": 0.1972, + "num_input_tokens_seen": 132913412, + "step": 774 + }, + { + "epoch": 0.20359045176563426, + "loss": 0.2562327980995178, + "loss_ce": 0.0022044687066227198, + "loss_iou": 0.625, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 132913412, + "step": 774 + }, + { + "epoch": 0.20385348852502136, + "grad_norm": 11.070638283175898, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 133085388, + "step": 775 + }, + { + "epoch": 0.20385348852502136, + "loss": 0.1850009262561798, + "loss_ce": 0.0011630415683612227, + "loss_iou": 0.671875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 133085388, + "step": 775 + }, + { + "epoch": 0.2041165252844085, + "grad_norm": 8.169381502927404, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 133257492, + "step": 776 + }, + { + "epoch": 0.2041165252844085, + "loss": 0.17405013740062714, + "loss_ce": 0.0007713312515988946, + "loss_iou": 0.609375, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 133257492, + "step": 776 + }, + { + "epoch": 0.20437956204379562, + "grad_norm": 7.483205340151747, + "learning_rate": 5e-06, + "loss": 0.1812, + "num_input_tokens_seen": 133429516, + "step": 777 + }, + { + "epoch": 0.20437956204379562, + "loss": 0.21947714686393738, + "loss_ce": 0.0011543984292075038, + "loss_iou": 0.6171875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 133429516, + "step": 777 + }, + { + "epoch": 0.20464259880318275, + "grad_norm": 15.835699409497824, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 133599692, + "step": 778 + }, + { + "epoch": 0.20464259880318275, + "loss": 0.1631580889225006, + "loss_ce": 0.0015064696781337261, + "loss_iou": 0.50390625, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 133599692, + "step": 778 + }, + { + "epoch": 0.20490563556256988, + "grad_norm": 6.9792797252935, + "learning_rate": 5e-06, + "loss": 0.1661, + "num_input_tokens_seen": 133771756, + "step": 779 + }, + { + "epoch": 0.20490563556256988, + "loss": 0.16169646382331848, + "loss_ce": 0.0007467527757398784, + "loss_iou": 0.435546875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 133771756, + "step": 779 + }, + { + "epoch": 0.20516867232195699, + "grad_norm": 12.331720736489249, + "learning_rate": 5e-06, + "loss": 0.1981, + "num_input_tokens_seen": 133944348, + "step": 780 + }, + { + "epoch": 0.20516867232195699, + "loss": 0.13550271093845367, + "loss_ce": 0.0025071091949939728, + "loss_iou": 0.65234375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 133944348, + "step": 780 + }, + { + "epoch": 0.20543170908134412, + "grad_norm": 5.7846193156700725, + "learning_rate": 5e-06, + "loss": 0.1172, + "num_input_tokens_seen": 134116796, + "step": 781 + }, + { + "epoch": 0.20543170908134412, + "loss": 0.09005297720432281, + "loss_ce": 0.0008500947151333094, + "loss_iou": 0.5390625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 134116796, + "step": 781 + }, + { + "epoch": 0.20569474584073125, + "grad_norm": 20.38915176858911, + "learning_rate": 5e-06, + "loss": 0.2145, + "num_input_tokens_seen": 134285564, + "step": 782 + }, + { + "epoch": 0.20569474584073125, + "loss": 0.2173474133014679, + "loss_ce": 0.0005505430162884295, + "loss_iou": 0.5390625, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 134285564, + "step": 782 + }, + { + "epoch": 0.20595778260011838, + "grad_norm": 13.943592884184664, + "learning_rate": 5e-06, + "loss": 0.2368, + "num_input_tokens_seen": 134457600, + "step": 783 + }, + { + "epoch": 0.20595778260011838, + "loss": 0.23010152578353882, + "loss_ce": 0.00341693963855505, + "loss_iou": 0.54296875, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 134457600, + "step": 783 + }, + { + "epoch": 0.20622081935950548, + "grad_norm": 7.47232647542354, + "learning_rate": 5e-06, + "loss": 0.1884, + "num_input_tokens_seen": 134627836, + "step": 784 + }, + { + "epoch": 0.20622081935950548, + "loss": 0.31371766328811646, + "loss_ce": 0.0035369964316487312, + "loss_iou": 0.494140625, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 134627836, + "step": 784 + }, + { + "epoch": 0.2064838561188926, + "grad_norm": 7.561434198006136, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 134800024, + "step": 785 + }, + { + "epoch": 0.2064838561188926, + "loss": 0.25232362747192383, + "loss_ce": 0.003910548985004425, + "loss_iou": 0.63671875, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 134800024, + "step": 785 + }, + { + "epoch": 0.20674689287827974, + "grad_norm": 7.384265604656037, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 134972276, + "step": 786 + }, + { + "epoch": 0.20674689287827974, + "loss": 0.22025543451309204, + "loss_ce": 0.0012307591969147325, + "loss_iou": 0.48046875, + "loss_num": 0.0439453125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 134972276, + "step": 786 + }, + { + "epoch": 0.20700992963766687, + "grad_norm": 6.5497695680105705, + "learning_rate": 5e-06, + "loss": 0.1985, + "num_input_tokens_seen": 135144300, + "step": 787 + }, + { + "epoch": 0.20700992963766687, + "loss": 0.24491435289382935, + "loss_ce": 0.0007126981508918107, + "loss_iou": 0.5546875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 135144300, + "step": 787 + }, + { + "epoch": 0.20727296639705398, + "grad_norm": 7.321479905780818, + "learning_rate": 5e-06, + "loss": 0.1764, + "num_input_tokens_seen": 135316344, + "step": 788 + }, + { + "epoch": 0.20727296639705398, + "loss": 0.19332991540431976, + "loss_ce": 0.0017405691323801875, + "loss_iou": 0.52734375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 135316344, + "step": 788 + }, + { + "epoch": 0.2075360031564411, + "grad_norm": 5.255608280002089, + "learning_rate": 5e-06, + "loss": 0.1867, + "num_input_tokens_seen": 135488352, + "step": 789 + }, + { + "epoch": 0.2075360031564411, + "loss": 0.18961402773857117, + "loss_ce": 0.0032126582227647305, + "loss_iou": 0.6015625, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 135488352, + "step": 789 + }, + { + "epoch": 0.20779903991582824, + "grad_norm": 6.817599663694252, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 135660716, + "step": 790 + }, + { + "epoch": 0.20779903991582824, + "loss": 0.11433705687522888, + "loss_ce": 0.0007506305119022727, + "loss_iou": 0.5234375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 135660716, + "step": 790 + }, + { + "epoch": 0.20806207667521537, + "grad_norm": 7.049616575343475, + "learning_rate": 5e-06, + "loss": 0.2041, + "num_input_tokens_seen": 135833108, + "step": 791 + }, + { + "epoch": 0.20806207667521537, + "loss": 0.16387248039245605, + "loss_ce": 0.005669359117746353, + "loss_iou": 0.64453125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 135833108, + "step": 791 + }, + { + "epoch": 0.2083251134346025, + "grad_norm": 9.83283000996709, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 136001408, + "step": 792 + }, + { + "epoch": 0.2083251134346025, + "loss": 0.13693515956401825, + "loss_ce": 0.0015286724083125591, + "loss_iou": 0.546875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 136001408, + "step": 792 + }, + { + "epoch": 0.2085881501939896, + "grad_norm": 18.56546458626088, + "learning_rate": 5e-06, + "loss": 0.2126, + "num_input_tokens_seen": 136173768, + "step": 793 + }, + { + "epoch": 0.2085881501939896, + "loss": 0.2671317458152771, + "loss_ce": 0.004436435177922249, + "loss_iou": 0.384765625, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 136173768, + "step": 793 + }, + { + "epoch": 0.20885118695337673, + "grad_norm": 7.460522492414201, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 136345864, + "step": 794 + }, + { + "epoch": 0.20885118695337673, + "loss": 0.19496268033981323, + "loss_ce": 0.003495402168482542, + "loss_iou": 0.58984375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 136345864, + "step": 794 + }, + { + "epoch": 0.20911422371276386, + "grad_norm": 6.238640410439663, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 136518076, + "step": 795 + }, + { + "epoch": 0.20911422371276386, + "loss": 0.1869560331106186, + "loss_ce": 0.0006157027091830969, + "loss_iou": 0.66796875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 136518076, + "step": 795 + }, + { + "epoch": 0.209377260472151, + "grad_norm": 4.750721959517019, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 136690192, + "step": 796 + }, + { + "epoch": 0.209377260472151, + "loss": 0.10885806381702423, + "loss_ce": 0.0011920429533347487, + "loss_iou": 0.52734375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 136690192, + "step": 796 + }, + { + "epoch": 0.2096402972315381, + "grad_norm": 11.655608393949823, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 136862596, + "step": 797 + }, + { + "epoch": 0.2096402972315381, + "loss": 0.13095784187316895, + "loss_ce": 0.0011970889754593372, + "loss_iou": 0.54296875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 136862596, + "step": 797 + }, + { + "epoch": 0.20990333399092523, + "grad_norm": 7.6147795291746645, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 137033096, + "step": 798 + }, + { + "epoch": 0.20990333399092523, + "loss": 0.14115872979164124, + "loss_ce": 0.002456323243677616, + "loss_iou": 0.494140625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 137033096, + "step": 798 + }, + { + "epoch": 0.21016637075031236, + "grad_norm": 6.436556075292691, + "learning_rate": 5e-06, + "loss": 0.164, + "num_input_tokens_seen": 137203708, + "step": 799 + }, + { + "epoch": 0.21016637075031236, + "loss": 0.15657231211662292, + "loss_ce": 0.0031909646932035685, + "loss_iou": 0.48046875, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 137203708, + "step": 799 + }, + { + "epoch": 0.2104294075096995, + "grad_norm": 5.272376224873815, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 137374352, + "step": 800 + }, + { + "epoch": 0.2104294075096995, + "loss": 0.19644752144813538, + "loss_ce": 0.003332281718030572, + "loss_iou": 0.5234375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 137374352, + "step": 800 + }, + { + "epoch": 0.2106924442690866, + "grad_norm": 9.274938800244652, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 137546372, + "step": 801 + }, + { + "epoch": 0.2106924442690866, + "loss": 0.23770156502723694, + "loss_ce": 0.0010682701831683517, + "loss_iou": 0.5078125, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 137546372, + "step": 801 + }, + { + "epoch": 0.21095548102847372, + "grad_norm": 12.494816629710325, + "learning_rate": 5e-06, + "loss": 0.1484, + "num_input_tokens_seen": 137718496, + "step": 802 + }, + { + "epoch": 0.21095548102847372, + "loss": 0.13832518458366394, + "loss_ce": 0.00380369508638978, + "loss_iou": 0.59765625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 137718496, + "step": 802 + }, + { + "epoch": 0.21121851778786085, + "grad_norm": 11.143811760293481, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 137890492, + "step": 803 + }, + { + "epoch": 0.21121851778786085, + "loss": 0.17278538644313812, + "loss_ce": 0.0006662444211542606, + "loss_iou": 0.6015625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 137890492, + "step": 803 + }, + { + "epoch": 0.21148155454724799, + "grad_norm": 6.694127616757882, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 138062576, + "step": 804 + }, + { + "epoch": 0.21148155454724799, + "loss": 0.08578141778707504, + "loss_ce": 0.0013697945978492498, + "loss_iou": 0.6328125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 138062576, + "step": 804 + }, + { + "epoch": 0.21174459130663512, + "grad_norm": 6.879766720907239, + "learning_rate": 5e-06, + "loss": 0.2407, + "num_input_tokens_seen": 138234992, + "step": 805 + }, + { + "epoch": 0.21174459130663512, + "loss": 0.26608556509017944, + "loss_ce": 0.0011929699685424566, + "loss_iou": 0.671875, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 138234992, + "step": 805 + }, + { + "epoch": 0.21200762806602222, + "grad_norm": 10.68020187128554, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 138407296, + "step": 806 + }, + { + "epoch": 0.21200762806602222, + "loss": 0.16970132291316986, + "loss_ce": 0.003929831553250551, + "loss_iou": 0.5078125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 138407296, + "step": 806 + }, + { + "epoch": 0.21227066482540935, + "grad_norm": 6.8835699764215, + "learning_rate": 5e-06, + "loss": 0.2142, + "num_input_tokens_seen": 138579820, + "step": 807 + }, + { + "epoch": 0.21227066482540935, + "loss": 0.20114630460739136, + "loss_ce": 0.0011341023491695523, + "loss_iou": 0.703125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 138579820, + "step": 807 + }, + { + "epoch": 0.21253370158479648, + "grad_norm": 5.0783612426941795, + "learning_rate": 5e-06, + "loss": 0.1658, + "num_input_tokens_seen": 138752112, + "step": 808 + }, + { + "epoch": 0.21253370158479648, + "loss": 0.22068345546722412, + "loss_ce": 0.004802103620022535, + "loss_iou": 0.42578125, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 138752112, + "step": 808 + }, + { + "epoch": 0.2127967383441836, + "grad_norm": 6.76267182747039, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 138924516, + "step": 809 + }, + { + "epoch": 0.2127967383441836, + "loss": 0.16195307672023773, + "loss_ce": 0.0010644117137417197, + "loss_iou": 0.5234375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 138924516, + "step": 809 + }, + { + "epoch": 0.21305977510357071, + "grad_norm": 12.983359860046427, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 139096976, + "step": 810 + }, + { + "epoch": 0.21305977510357071, + "loss": 0.14437143504619598, + "loss_ce": 0.0023426164407283068, + "loss_iou": 0.50390625, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 139096976, + "step": 810 + }, + { + "epoch": 0.21332281186295785, + "grad_norm": 5.900519173332491, + "learning_rate": 5e-06, + "loss": 0.1796, + "num_input_tokens_seen": 139269224, + "step": 811 + }, + { + "epoch": 0.21332281186295785, + "loss": 0.11451567709445953, + "loss_ce": 0.0014175281394273043, + "loss_iou": 0.58203125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 139269224, + "step": 811 + }, + { + "epoch": 0.21358584862234498, + "grad_norm": 11.704739365440266, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 139441048, + "step": 812 + }, + { + "epoch": 0.21358584862234498, + "loss": 0.06064599007368088, + "loss_ce": 0.0007399858441203833, + "loss_iou": 0.3984375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 139441048, + "step": 812 + }, + { + "epoch": 0.2138488853817321, + "grad_norm": 5.765338635306649, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 139613180, + "step": 813 + }, + { + "epoch": 0.2138488853817321, + "loss": 0.1868004947900772, + "loss_ce": 0.001986048649996519, + "loss_iou": 0.6171875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 139613180, + "step": 813 + }, + { + "epoch": 0.2141119221411192, + "grad_norm": 8.301852392840289, + "learning_rate": 5e-06, + "loss": 0.1731, + "num_input_tokens_seen": 139785340, + "step": 814 + }, + { + "epoch": 0.2141119221411192, + "loss": 0.21647384762763977, + "loss_ce": 0.002240448724478483, + "loss_iou": 0.5078125, + "loss_num": 0.04296875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 139785340, + "step": 814 + }, + { + "epoch": 0.21437495890050634, + "grad_norm": 10.75059655667213, + "learning_rate": 5e-06, + "loss": 0.2066, + "num_input_tokens_seen": 139957612, + "step": 815 + }, + { + "epoch": 0.21437495890050634, + "loss": 0.2657305598258972, + "loss_ce": 0.007979076355695724, + "loss_iou": 0.41796875, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 139957612, + "step": 815 + }, + { + "epoch": 0.21463799565989347, + "grad_norm": 8.1554830347417, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 140129832, + "step": 816 + }, + { + "epoch": 0.21463799565989347, + "loss": 0.21090401709079742, + "loss_ce": 0.0022858483716845512, + "loss_iou": 0.6015625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 140129832, + "step": 816 + }, + { + "epoch": 0.2149010324192806, + "grad_norm": 6.39307582100075, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 140302204, + "step": 817 + }, + { + "epoch": 0.2149010324192806, + "loss": 0.12444409728050232, + "loss_ce": 0.0020075817592442036, + "loss_iou": 0.5859375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 140302204, + "step": 817 + }, + { + "epoch": 0.2151640691786677, + "grad_norm": 11.031920247903537, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 140470936, + "step": 818 + }, + { + "epoch": 0.2151640691786677, + "loss": 0.24400946497917175, + "loss_ce": 0.00112004519905895, + "loss_iou": 0.5078125, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 140470936, + "step": 818 + }, + { + "epoch": 0.21542710593805484, + "grad_norm": 5.897881271346391, + "learning_rate": 5e-06, + "loss": 0.194, + "num_input_tokens_seen": 140643120, + "step": 819 + }, + { + "epoch": 0.21542710593805484, + "loss": 0.12999695539474487, + "loss_ce": 0.001182260224595666, + "loss_iou": 0.5390625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 140643120, + "step": 819 + }, + { + "epoch": 0.21569014269744197, + "grad_norm": 13.442111711356437, + "learning_rate": 5e-06, + "loss": 0.1744, + "num_input_tokens_seen": 140815608, + "step": 820 + }, + { + "epoch": 0.21569014269744197, + "loss": 0.1816408634185791, + "loss_ce": 0.0014040416572242975, + "loss_iou": 0.345703125, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 140815608, + "step": 820 + }, + { + "epoch": 0.2159531794568291, + "grad_norm": 13.297750331398602, + "learning_rate": 5e-06, + "loss": 0.1926, + "num_input_tokens_seen": 140987824, + "step": 821 + }, + { + "epoch": 0.2159531794568291, + "loss": 0.29018890857696533, + "loss_ce": 0.002347112400457263, + "loss_iou": 0.478515625, + "loss_num": 0.0576171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 140987824, + "step": 821 + }, + { + "epoch": 0.21621621621621623, + "grad_norm": 18.74775436478718, + "learning_rate": 5e-06, + "loss": 0.2948, + "num_input_tokens_seen": 141160200, + "step": 822 + }, + { + "epoch": 0.21621621621621623, + "loss": 0.2370918244123459, + "loss_ce": 0.1260078400373459, + "loss_iou": 0.59765625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 141160200, + "step": 822 + }, + { + "epoch": 0.21647925297560333, + "grad_norm": 19.14528392541103, + "learning_rate": 5e-06, + "loss": 0.1977, + "num_input_tokens_seen": 141329928, + "step": 823 + }, + { + "epoch": 0.21647925297560333, + "loss": 0.1949683427810669, + "loss_ce": 0.0917884111404419, + "loss_iou": 0.474609375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 141329928, + "step": 823 + }, + { + "epoch": 0.21674228973499046, + "grad_norm": 6.8551193847587735, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 141502236, + "step": 824 + }, + { + "epoch": 0.21674228973499046, + "loss": 0.09739409387111664, + "loss_ce": 0.007184132467955351, + "loss_iou": 0.6640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 141502236, + "step": 824 + }, + { + "epoch": 0.2170053264943776, + "grad_norm": 7.934353513499139, + "learning_rate": 5e-06, + "loss": 0.2174, + "num_input_tokens_seen": 141674272, + "step": 825 + }, + { + "epoch": 0.2170053264943776, + "loss": 0.3033770024776459, + "loss_ce": 0.026491012424230576, + "loss_iou": 0.45703125, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 141674272, + "step": 825 + }, + { + "epoch": 0.21726836325376472, + "grad_norm": 6.296485584445855, + "learning_rate": 5e-06, + "loss": 0.2048, + "num_input_tokens_seen": 141846556, + "step": 826 + }, + { + "epoch": 0.21726836325376472, + "loss": 0.16873130202293396, + "loss_ce": 0.0013118635397404432, + "loss_iou": 0.59765625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 141846556, + "step": 826 + }, + { + "epoch": 0.21753140001315183, + "grad_norm": 10.428529158159186, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 142018940, + "step": 827 + }, + { + "epoch": 0.21753140001315183, + "loss": 0.15984642505645752, + "loss_ce": 0.0018874472007155418, + "loss_iou": 0.421875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 142018940, + "step": 827 + }, + { + "epoch": 0.21779443677253896, + "grad_norm": 7.702760290749295, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 142189520, + "step": 828 + }, + { + "epoch": 0.21779443677253896, + "loss": 0.17661917209625244, + "loss_ce": 0.007002475671470165, + "loss_iou": 0.5078125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 142189520, + "step": 828 + }, + { + "epoch": 0.2180574735319261, + "grad_norm": 5.154197949486892, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 142359752, + "step": 829 + }, + { + "epoch": 0.2180574735319261, + "loss": 0.11216248571872711, + "loss_ce": 0.0025128289125859737, + "loss_iou": 0.52734375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 142359752, + "step": 829 + }, + { + "epoch": 0.21832051029131322, + "grad_norm": 9.212654853971248, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 142530028, + "step": 830 + }, + { + "epoch": 0.21832051029131322, + "loss": 0.17883381247520447, + "loss_ce": 0.006104309111833572, + "loss_iou": 0.515625, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 142530028, + "step": 830 + }, + { + "epoch": 0.21858354705070032, + "grad_norm": 22.355687744069783, + "learning_rate": 5e-06, + "loss": 0.2118, + "num_input_tokens_seen": 142698896, + "step": 831 + }, + { + "epoch": 0.21858354705070032, + "loss": 0.2873075604438782, + "loss_ce": 0.00788860023021698, + "loss_iou": 0.392578125, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 142698896, + "step": 831 + }, + { + "epoch": 0.21884658381008745, + "grad_norm": 7.646461001716363, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 142869236, + "step": 832 + }, + { + "epoch": 0.21884658381008745, + "loss": 0.19723272323608398, + "loss_ce": 0.00063848658464849, + "loss_iou": 0.53515625, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 142869236, + "step": 832 + }, + { + "epoch": 0.21910962056947458, + "grad_norm": 5.709950173979812, + "learning_rate": 5e-06, + "loss": 0.1462, + "num_input_tokens_seen": 143041432, + "step": 833 + }, + { + "epoch": 0.21910962056947458, + "loss": 0.13923318684101105, + "loss_ce": 0.006512241438031197, + "loss_iou": 0.470703125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 143041432, + "step": 833 + }, + { + "epoch": 0.21937265732886171, + "grad_norm": 7.759034787872298, + "learning_rate": 5e-06, + "loss": 0.1411, + "num_input_tokens_seen": 143213276, + "step": 834 + }, + { + "epoch": 0.21937265732886171, + "loss": 0.13741275668144226, + "loss_ce": 0.00820133276283741, + "loss_iou": 0.4609375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 143213276, + "step": 834 + }, + { + "epoch": 0.21963569408824885, + "grad_norm": 14.10397350971688, + "learning_rate": 5e-06, + "loss": 0.1819, + "num_input_tokens_seen": 143385916, + "step": 835 + }, + { + "epoch": 0.21963569408824885, + "loss": 0.17109227180480957, + "loss_ce": 0.009959458373486996, + "loss_iou": 0.62109375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 143385916, + "step": 835 + }, + { + "epoch": 0.21989873084763595, + "grad_norm": 10.610026241323633, + "learning_rate": 5e-06, + "loss": 0.1959, + "num_input_tokens_seen": 143558108, + "step": 836 + }, + { + "epoch": 0.21989873084763595, + "loss": 0.1955014169216156, + "loss_ce": 0.006353452801704407, + "loss_iou": 0.58984375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 143558108, + "step": 836 + }, + { + "epoch": 0.22016176760702308, + "grad_norm": 6.884069616265665, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 143730560, + "step": 837 + }, + { + "epoch": 0.22016176760702308, + "loss": 0.1851910948753357, + "loss_ce": 0.001475287601351738, + "loss_iou": 0.50390625, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 143730560, + "step": 837 + }, + { + "epoch": 0.2204248043664102, + "grad_norm": 8.581569398296145, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 143901168, + "step": 838 + }, + { + "epoch": 0.2204248043664102, + "loss": 0.10049735009670258, + "loss_ce": 0.00043021421879529953, + "loss_iou": 0.55859375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 143901168, + "step": 838 + }, + { + "epoch": 0.22068784112579734, + "grad_norm": 8.473859363920706, + "learning_rate": 5e-06, + "loss": 0.1656, + "num_input_tokens_seen": 144073300, + "step": 839 + }, + { + "epoch": 0.22068784112579734, + "loss": 0.18692679703235626, + "loss_ce": 0.001440959284082055, + "loss_iou": 0.37109375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 144073300, + "step": 839 + }, + { + "epoch": 0.22095087788518444, + "grad_norm": 4.5516671382314495, + "learning_rate": 5e-06, + "loss": 0.1346, + "num_input_tokens_seen": 144245656, + "step": 840 + }, + { + "epoch": 0.22095087788518444, + "loss": 0.1278304159641266, + "loss_ce": 0.003227140521630645, + "loss_iou": 0.64453125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 144245656, + "step": 840 + }, + { + "epoch": 0.22121391464457157, + "grad_norm": 8.854170908003372, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 144414684, + "step": 841 + }, + { + "epoch": 0.22121391464457157, + "loss": 0.11698315292596817, + "loss_ce": 0.003732412587851286, + "loss_iou": 0.58984375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 144414684, + "step": 841 + }, + { + "epoch": 0.2214769514039587, + "grad_norm": 6.139198378486474, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 144586900, + "step": 842 + }, + { + "epoch": 0.2214769514039587, + "loss": 0.16039735078811646, + "loss_ce": 0.001461806707084179, + "loss_iou": 0.66796875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 144586900, + "step": 842 + }, + { + "epoch": 0.22173998816334584, + "grad_norm": 10.644206851478819, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 144759108, + "step": 843 + }, + { + "epoch": 0.22173998816334584, + "loss": 0.14353252947330475, + "loss_ce": 0.0032432209700345993, + "loss_iou": 0.4453125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 144759108, + "step": 843 + }, + { + "epoch": 0.22200302492273294, + "grad_norm": 6.739417345524847, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 144931444, + "step": 844 + }, + { + "epoch": 0.22200302492273294, + "loss": 0.09944656491279602, + "loss_ce": 0.0004170280881226063, + "loss_iou": 0.6328125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 144931444, + "step": 844 + }, + { + "epoch": 0.22226606168212007, + "grad_norm": 8.110959144365463, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 145102036, + "step": 845 + }, + { + "epoch": 0.22226606168212007, + "loss": 0.2286926507949829, + "loss_ce": 0.001336704008281231, + "loss_iou": 0.56640625, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 145102036, + "step": 845 + }, + { + "epoch": 0.2225290984415072, + "grad_norm": 10.034770268919976, + "learning_rate": 5e-06, + "loss": 0.2222, + "num_input_tokens_seen": 145274324, + "step": 846 + }, + { + "epoch": 0.2225290984415072, + "loss": 0.23924441635608673, + "loss_ce": 0.004075955133885145, + "loss_iou": 0.53515625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 145274324, + "step": 846 + }, + { + "epoch": 0.22279213520089433, + "grad_norm": 6.615833632469255, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 145446460, + "step": 847 + }, + { + "epoch": 0.22279213520089433, + "loss": 0.1300104260444641, + "loss_ce": 0.001287288498133421, + "loss_iou": 0.56640625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 145446460, + "step": 847 + }, + { + "epoch": 0.22305517196028146, + "grad_norm": 6.286344612415352, + "learning_rate": 5e-06, + "loss": 0.2284, + "num_input_tokens_seen": 145618532, + "step": 848 + }, + { + "epoch": 0.22305517196028146, + "loss": 0.18182075023651123, + "loss_ce": 0.0016144568799063563, + "loss_iou": NaN, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 145618532, + "step": 848 + }, + { + "epoch": 0.22331820871966857, + "grad_norm": 15.090592675837648, + "learning_rate": 5e-06, + "loss": 0.1974, + "num_input_tokens_seen": 145788852, + "step": 849 + }, + { + "epoch": 0.22331820871966857, + "loss": 0.20251962542533875, + "loss_ce": 0.0014087767340242863, + "loss_iou": 0.5859375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 145788852, + "step": 849 + }, + { + "epoch": 0.2235812454790557, + "grad_norm": 6.5100345965661806, + "learning_rate": 5e-06, + "loss": 0.1365, + "num_input_tokens_seen": 145961180, + "step": 850 + }, + { + "epoch": 0.2235812454790557, + "loss": 0.15530481934547424, + "loss_ce": 0.002533819992095232, + "loss_iou": 0.5390625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 145961180, + "step": 850 + }, + { + "epoch": 0.22384428223844283, + "grad_norm": 6.940039828670214, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 146133828, + "step": 851 + }, + { + "epoch": 0.22384428223844283, + "loss": 0.1814190298318863, + "loss_ce": 0.001731535536237061, + "loss_iou": 0.474609375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 146133828, + "step": 851 + }, + { + "epoch": 0.22410731899782996, + "grad_norm": 8.305849772963127, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 146306088, + "step": 852 + }, + { + "epoch": 0.22410731899782996, + "loss": 0.18216609954833984, + "loss_ce": 0.002936376491561532, + "loss_iou": 0.490234375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 146306088, + "step": 852 + }, + { + "epoch": 0.22437035575721706, + "grad_norm": 7.934420093916033, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 146478248, + "step": 853 + }, + { + "epoch": 0.22437035575721706, + "loss": 0.1829485148191452, + "loss_ce": 0.0025896350853145123, + "loss_iou": 0.671875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 146478248, + "step": 853 + }, + { + "epoch": 0.2246333925166042, + "grad_norm": 8.916802424159366, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 146650532, + "step": 854 + }, + { + "epoch": 0.2246333925166042, + "loss": 0.13404083251953125, + "loss_ce": 0.0006179830525070429, + "loss_iou": 0.63671875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 146650532, + "step": 854 + }, + { + "epoch": 0.22489642927599132, + "grad_norm": 16.03094644250741, + "learning_rate": 5e-06, + "loss": 0.1819, + "num_input_tokens_seen": 146822816, + "step": 855 + }, + { + "epoch": 0.22489642927599132, + "loss": 0.19942086935043335, + "loss_ce": 0.007404262199997902, + "loss_iou": 0.41015625, + "loss_num": 0.038330078125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 146822816, + "step": 855 + }, + { + "epoch": 0.22515946603537845, + "grad_norm": 10.96099831988347, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 146994984, + "step": 856 + }, + { + "epoch": 0.22515946603537845, + "loss": 0.1878020018339157, + "loss_ce": 0.0012785641010850668, + "loss_iou": 0.546875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 146994984, + "step": 856 + }, + { + "epoch": 0.22542250279476556, + "grad_norm": 5.009376224527154, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 147167200, + "step": 857 + }, + { + "epoch": 0.22542250279476556, + "loss": 0.16484007239341736, + "loss_ce": 0.005507797468453646, + "loss_iou": 0.64453125, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 147167200, + "step": 857 + }, + { + "epoch": 0.2256855395541527, + "grad_norm": 6.031288325368203, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 147337356, + "step": 858 + }, + { + "epoch": 0.2256855395541527, + "loss": 0.14030741155147552, + "loss_ce": 0.0010862206108868122, + "loss_iou": 0.71484375, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 147337356, + "step": 858 + }, + { + "epoch": 0.22594857631353982, + "grad_norm": 8.953664182124317, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 147509464, + "step": 859 + }, + { + "epoch": 0.22594857631353982, + "loss": 0.19507214426994324, + "loss_ce": 0.0016822540201246738, + "loss_iou": 0.359375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 147509464, + "step": 859 + }, + { + "epoch": 0.22621161307292695, + "grad_norm": 5.535666138038005, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 147681864, + "step": 860 + }, + { + "epoch": 0.22621161307292695, + "loss": 0.194808691740036, + "loss_ce": 0.002822606358677149, + "loss_iou": 0.5, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 147681864, + "step": 860 + }, + { + "epoch": 0.22647464983231408, + "grad_norm": 7.1540218345582, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 147854184, + "step": 861 + }, + { + "epoch": 0.22647464983231408, + "loss": 0.18444868922233582, + "loss_ce": 0.0037846285849809647, + "loss_iou": 0.609375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 147854184, + "step": 861 + }, + { + "epoch": 0.22673768659170118, + "grad_norm": 7.954229564657017, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 148026492, + "step": 862 + }, + { + "epoch": 0.22673768659170118, + "loss": 0.1205034852027893, + "loss_ce": 0.0003863019519485533, + "loss_iou": 0.7421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 148026492, + "step": 862 + }, + { + "epoch": 0.2270007233510883, + "grad_norm": 10.790250508426157, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 148198492, + "step": 863 + }, + { + "epoch": 0.2270007233510883, + "loss": 0.08790126442909241, + "loss_ce": 0.000956688541918993, + "loss_iou": 0.52734375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 148198492, + "step": 863 + }, + { + "epoch": 0.22726376011047544, + "grad_norm": 7.413582271316403, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 148370744, + "step": 864 + }, + { + "epoch": 0.22726376011047544, + "loss": 0.11584703624248505, + "loss_ce": 0.0007652430795133114, + "loss_iou": 0.625, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 148370744, + "step": 864 + }, + { + "epoch": 0.22752679686986257, + "grad_norm": 6.505762518470224, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 148541368, + "step": 865 + }, + { + "epoch": 0.22752679686986257, + "loss": 0.12583567202091217, + "loss_ce": 0.0008661894826218486, + "loss_iou": 0.62109375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 148541368, + "step": 865 + }, + { + "epoch": 0.22778983362924968, + "grad_norm": 11.31233359994311, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 148713652, + "step": 866 + }, + { + "epoch": 0.22778983362924968, + "loss": 0.09625812619924545, + "loss_ce": 0.0005244807107374072, + "loss_iou": 0.73828125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 148713652, + "step": 866 + }, + { + "epoch": 0.2280528703886368, + "grad_norm": 8.070061922275455, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 148885724, + "step": 867 + }, + { + "epoch": 0.2280528703886368, + "loss": 0.12818799912929535, + "loss_ce": 0.003035409841686487, + "loss_iou": 0.443359375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 148885724, + "step": 867 + }, + { + "epoch": 0.22831590714802394, + "grad_norm": 8.83029409823491, + "learning_rate": 5e-06, + "loss": 0.2104, + "num_input_tokens_seen": 149057764, + "step": 868 + }, + { + "epoch": 0.22831590714802394, + "loss": 0.2020527571439743, + "loss_ce": 0.00414624810218811, + "loss_iou": 0.51953125, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 149057764, + "step": 868 + }, + { + "epoch": 0.22857894390741107, + "grad_norm": 5.578690860970403, + "learning_rate": 5e-06, + "loss": 0.1492, + "num_input_tokens_seen": 149230132, + "step": 869 + }, + { + "epoch": 0.22857894390741107, + "loss": 0.16226467490196228, + "loss_ce": 0.0007046046666800976, + "loss_iou": 0.66015625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 149230132, + "step": 869 + }, + { + "epoch": 0.22884198066679817, + "grad_norm": 8.388877746304392, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 149402312, + "step": 870 + }, + { + "epoch": 0.22884198066679817, + "loss": 0.13199341297149658, + "loss_ce": 0.001866456470452249, + "loss_iou": 0.51953125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 149402312, + "step": 870 + }, + { + "epoch": 0.2291050174261853, + "grad_norm": 13.402550744349123, + "learning_rate": 5e-06, + "loss": 0.1888, + "num_input_tokens_seen": 149572632, + "step": 871 + }, + { + "epoch": 0.2291050174261853, + "loss": 0.20936883985996246, + "loss_ce": 0.004016047343611717, + "loss_iou": 0.470703125, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 149572632, + "step": 871 + }, + { + "epoch": 0.22936805418557243, + "grad_norm": 5.899366870528114, + "learning_rate": 5e-06, + "loss": 0.1651, + "num_input_tokens_seen": 149744908, + "step": 872 + }, + { + "epoch": 0.22936805418557243, + "loss": 0.1176171749830246, + "loss_ce": 0.001223139464855194, + "loss_iou": 0.51953125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 149744908, + "step": 872 + }, + { + "epoch": 0.22963109094495956, + "grad_norm": 8.241998846381511, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 149917100, + "step": 873 + }, + { + "epoch": 0.22963109094495956, + "loss": 0.1720806509256363, + "loss_ce": 0.004966393578797579, + "loss_iou": 0.546875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 149917100, + "step": 873 + }, + { + "epoch": 0.2298941277043467, + "grad_norm": 9.153818862978659, + "learning_rate": 5e-06, + "loss": 0.1555, + "num_input_tokens_seen": 150087628, + "step": 874 + }, + { + "epoch": 0.2298941277043467, + "loss": 0.14027956128120422, + "loss_ce": 0.002584239235147834, + "loss_iou": 0.5625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 150087628, + "step": 874 + }, + { + "epoch": 0.2301571644637338, + "grad_norm": 25.107851811290338, + "learning_rate": 5e-06, + "loss": 0.1805, + "num_input_tokens_seen": 150258428, + "step": 875 + }, + { + "epoch": 0.2301571644637338, + "loss": 0.17914238572120667, + "loss_ce": 0.0014690514653921127, + "loss_iou": 0.5625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 150258428, + "step": 875 + }, + { + "epoch": 0.23042020122312093, + "grad_norm": 6.938403780407342, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 150430692, + "step": 876 + }, + { + "epoch": 0.23042020122312093, + "loss": 0.13967271149158478, + "loss_ce": 0.002038437407463789, + "loss_iou": 0.392578125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 150430692, + "step": 876 + }, + { + "epoch": 0.23068323798250806, + "grad_norm": 8.594672662971634, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 150603108, + "step": 877 + }, + { + "epoch": 0.23068323798250806, + "loss": 0.3066813349723816, + "loss_ce": 0.0005289965192787349, + "loss_iou": 0.44921875, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 150603108, + "step": 877 + }, + { + "epoch": 0.2309462747418952, + "grad_norm": 5.613988390531258, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 150775124, + "step": 878 + }, + { + "epoch": 0.2309462747418952, + "loss": 0.16111034154891968, + "loss_ce": 0.008644518442451954, + "loss_iou": 0.640625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 150775124, + "step": 878 + }, + { + "epoch": 0.2312093115012823, + "grad_norm": 34.35737633666629, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 150947036, + "step": 879 + }, + { + "epoch": 0.2312093115012823, + "loss": 0.15406344830989838, + "loss_ce": 0.0015213302103802562, + "loss_iou": 0.439453125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 150947036, + "step": 879 + }, + { + "epoch": 0.23147234826066942, + "grad_norm": 6.1651043542261466, + "learning_rate": 5e-06, + "loss": 0.1981, + "num_input_tokens_seen": 151119388, + "step": 880 + }, + { + "epoch": 0.23147234826066942, + "loss": 0.29976093769073486, + "loss_ce": 0.00825704075396061, + "loss_iou": 0.48828125, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 151119388, + "step": 880 + }, + { + "epoch": 0.23173538502005656, + "grad_norm": 8.982832830215536, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 151290092, + "step": 881 + }, + { + "epoch": 0.23173538502005656, + "loss": 0.18080484867095947, + "loss_ce": 0.0034977139439433813, + "loss_iou": 0.5078125, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 151290092, + "step": 881 + }, + { + "epoch": 0.2319984217794437, + "grad_norm": 13.943571194131867, + "learning_rate": 5e-06, + "loss": 0.214, + "num_input_tokens_seen": 151462268, + "step": 882 + }, + { + "epoch": 0.2319984217794437, + "loss": 0.16014625132083893, + "loss_ce": 0.004170912317931652, + "loss_iou": 0.625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 151462268, + "step": 882 + }, + { + "epoch": 0.2322614585388308, + "grad_norm": 7.043443131491897, + "learning_rate": 5e-06, + "loss": 0.1524, + "num_input_tokens_seen": 151634180, + "step": 883 + }, + { + "epoch": 0.2322614585388308, + "loss": 0.12871429324150085, + "loss_ce": 0.0008456383948214352, + "loss_iou": 0.44921875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 151634180, + "step": 883 + }, + { + "epoch": 0.23252449529821792, + "grad_norm": 6.396513380644998, + "learning_rate": 5e-06, + "loss": 0.1833, + "num_input_tokens_seen": 151806384, + "step": 884 + }, + { + "epoch": 0.23252449529821792, + "loss": 0.18081963062286377, + "loss_ce": 0.002169727347791195, + "loss_iou": 0.421875, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 151806384, + "step": 884 + }, + { + "epoch": 0.23278753205760505, + "grad_norm": 13.149227804701054, + "learning_rate": 5e-06, + "loss": 0.152, + "num_input_tokens_seen": 151978708, + "step": 885 + }, + { + "epoch": 0.23278753205760505, + "loss": 0.1582297682762146, + "loss_ce": 0.007228789385408163, + "loss_iou": 0.546875, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 151978708, + "step": 885 + }, + { + "epoch": 0.23305056881699218, + "grad_norm": 6.784444096578918, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 152150848, + "step": 886 + }, + { + "epoch": 0.23305056881699218, + "loss": 0.19164127111434937, + "loss_ce": 0.0013946772087365389, + "loss_iou": NaN, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 152150848, + "step": 886 + }, + { + "epoch": 0.2333136055763793, + "grad_norm": 6.295673229022739, + "learning_rate": 5e-06, + "loss": 0.1772, + "num_input_tokens_seen": 152323052, + "step": 887 + }, + { + "epoch": 0.2333136055763793, + "loss": 0.22069396078586578, + "loss_ce": 0.0015167115489020944, + "loss_iou": 0.478515625, + "loss_num": 0.0439453125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 152323052, + "step": 887 + }, + { + "epoch": 0.23357664233576642, + "grad_norm": 11.781517950000717, + "learning_rate": 5e-06, + "loss": 0.1798, + "num_input_tokens_seen": 152493800, + "step": 888 + }, + { + "epoch": 0.23357664233576642, + "loss": 0.1682368516921997, + "loss_ce": 0.0009394832304678857, + "loss_iou": 0.54296875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 152493800, + "step": 888 + }, + { + "epoch": 0.23383967909515355, + "grad_norm": 11.368208703500017, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 152665776, + "step": 889 + }, + { + "epoch": 0.23383967909515355, + "loss": 0.2839386761188507, + "loss_ce": 0.003970403224229813, + "loss_iou": 0.51171875, + "loss_num": 0.05615234375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 152665776, + "step": 889 + }, + { + "epoch": 0.23410271585454068, + "grad_norm": 8.093455275529028, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 152834764, + "step": 890 + }, + { + "epoch": 0.23410271585454068, + "loss": 0.1241624653339386, + "loss_ce": 0.004472525790333748, + "loss_iou": 0.59765625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 152834764, + "step": 890 + }, + { + "epoch": 0.2343657526139278, + "grad_norm": 12.52857780858346, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 153007304, + "step": 891 + }, + { + "epoch": 0.2343657526139278, + "loss": 0.21788766980171204, + "loss_ce": 0.002006314927712083, + "loss_iou": 0.59765625, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 153007304, + "step": 891 + }, + { + "epoch": 0.2346287893733149, + "grad_norm": 7.50091412461157, + "learning_rate": 5e-06, + "loss": 0.1804, + "num_input_tokens_seen": 153179168, + "step": 892 + }, + { + "epoch": 0.2346287893733149, + "loss": 0.17867043614387512, + "loss_ce": 0.0008750315755605698, + "loss_iou": NaN, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 153179168, + "step": 892 + }, + { + "epoch": 0.23489182613270204, + "grad_norm": 6.297998473336444, + "learning_rate": 5e-06, + "loss": 0.1679, + "num_input_tokens_seen": 153351220, + "step": 893 + }, + { + "epoch": 0.23489182613270204, + "loss": 0.13165241479873657, + "loss_ce": 0.001311830012127757, + "loss_iou": 0.380859375, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 153351220, + "step": 893 + }, + { + "epoch": 0.23515486289208917, + "grad_norm": 7.909655629561448, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 153523232, + "step": 894 + }, + { + "epoch": 0.23515486289208917, + "loss": 0.13778507709503174, + "loss_ce": 0.002287032548338175, + "loss_iou": 0.51953125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 153523232, + "step": 894 + }, + { + "epoch": 0.2354178996514763, + "grad_norm": 5.883815728394397, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 153692208, + "step": 895 + }, + { + "epoch": 0.2354178996514763, + "loss": 0.1446894109249115, + "loss_ce": 0.001439890475012362, + "loss_iou": 0.462890625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 153692208, + "step": 895 + }, + { + "epoch": 0.2356809364108634, + "grad_norm": 15.735436674991021, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 153862388, + "step": 896 + }, + { + "epoch": 0.2356809364108634, + "loss": 0.16044028103351593, + "loss_ce": 0.00043661610106937587, + "loss_iou": 0.6328125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 153862388, + "step": 896 + }, + { + "epoch": 0.23594397317025054, + "grad_norm": 5.835513921908954, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 154034480, + "step": 897 + }, + { + "epoch": 0.23594397317025054, + "loss": 0.14927011728286743, + "loss_ce": 0.0002833124599419534, + "loss_iou": 0.515625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 154034480, + "step": 897 + }, + { + "epoch": 0.23620700992963767, + "grad_norm": 4.347875981694168, + "learning_rate": 5e-06, + "loss": 0.1672, + "num_input_tokens_seen": 154203000, + "step": 898 + }, + { + "epoch": 0.23620700992963767, + "loss": 0.1722668707370758, + "loss_ce": 0.0003613463486544788, + "loss_iou": 0.5390625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 154203000, + "step": 898 + }, + { + "epoch": 0.2364700466890248, + "grad_norm": 6.555211118822418, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 154375292, + "step": 899 + }, + { + "epoch": 0.2364700466890248, + "loss": 0.12109389901161194, + "loss_ce": 0.001983799273148179, + "loss_iou": 0.53125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 154375292, + "step": 899 + }, + { + "epoch": 0.2367330834484119, + "grad_norm": 4.963219405700268, + "learning_rate": 5e-06, + "loss": 0.1507, + "num_input_tokens_seen": 154547472, + "step": 900 + }, + { + "epoch": 0.2367330834484119, + "loss": 0.1948363184928894, + "loss_ce": 0.001843146630562842, + "loss_iou": 0.5625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 154547472, + "step": 900 + }, + { + "epoch": 0.23699612020779903, + "grad_norm": 9.476812377662082, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 154719756, + "step": 901 + }, + { + "epoch": 0.23699612020779903, + "loss": 0.1722353994846344, + "loss_ce": 0.0028628362342715263, + "loss_iou": 0.61328125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 154719756, + "step": 901 + }, + { + "epoch": 0.23725915696718616, + "grad_norm": 8.867901615298983, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 154891816, + "step": 902 + }, + { + "epoch": 0.23725915696718616, + "loss": 0.13942725956439972, + "loss_ce": 0.0014267791993916035, + "loss_iou": 0.71484375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 154891816, + "step": 902 + }, + { + "epoch": 0.2375221937265733, + "grad_norm": 7.06026317137913, + "learning_rate": 5e-06, + "loss": 0.1247, + "num_input_tokens_seen": 155064164, + "step": 903 + }, + { + "epoch": 0.2375221937265733, + "loss": 0.13094615936279297, + "loss_ce": 0.000575068814214319, + "loss_iou": 0.47265625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 155064164, + "step": 903 + }, + { + "epoch": 0.23778523048596042, + "grad_norm": 6.065414379643311, + "learning_rate": 5e-06, + "loss": 0.1882, + "num_input_tokens_seen": 155236136, + "step": 904 + }, + { + "epoch": 0.23778523048596042, + "loss": 0.17110927402973175, + "loss_ce": 0.0006380859995260835, + "loss_iou": 0.7421875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 155236136, + "step": 904 + }, + { + "epoch": 0.23804826724534753, + "grad_norm": 6.4154554676892275, + "learning_rate": 5e-06, + "loss": 0.1664, + "num_input_tokens_seen": 155408400, + "step": 905 + }, + { + "epoch": 0.23804826724534753, + "loss": 0.18800213932991028, + "loss_ce": 0.005415464285761118, + "loss_iou": 0.515625, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 155408400, + "step": 905 + }, + { + "epoch": 0.23831130400473466, + "grad_norm": 6.061588736644807, + "learning_rate": 5e-06, + "loss": 0.1656, + "num_input_tokens_seen": 155578772, + "step": 906 + }, + { + "epoch": 0.23831130400473466, + "loss": 0.11921393871307373, + "loss_ce": 0.0009278038050979376, + "loss_iou": 0.609375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 155578772, + "step": 906 + }, + { + "epoch": 0.2385743407641218, + "grad_norm": 9.748504628242028, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 155751088, + "step": 907 + }, + { + "epoch": 0.2385743407641218, + "loss": 0.13661867380142212, + "loss_ce": 0.0004492364823818207, + "loss_iou": 0.62890625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 155751088, + "step": 907 + }, + { + "epoch": 0.23883737752350892, + "grad_norm": 11.132080080363805, + "learning_rate": 5e-06, + "loss": 0.2072, + "num_input_tokens_seen": 155923228, + "step": 908 + }, + { + "epoch": 0.23883737752350892, + "loss": 0.2608073353767395, + "loss_ce": 0.006229718215763569, + "loss_iou": 0.447265625, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 155923228, + "step": 908 + }, + { + "epoch": 0.23910041428289602, + "grad_norm": 6.802731098529186, + "learning_rate": 5e-06, + "loss": 0.1071, + "num_input_tokens_seen": 156095144, + "step": 909 + }, + { + "epoch": 0.23910041428289602, + "loss": 0.12534289062023163, + "loss_ce": 0.0022044701036065817, + "loss_iou": 0.67578125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 156095144, + "step": 909 + }, + { + "epoch": 0.23936345104228315, + "grad_norm": 5.86214467113572, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 156267348, + "step": 910 + }, + { + "epoch": 0.23936345104228315, + "loss": 0.1707063615322113, + "loss_ce": 0.0013948287814855576, + "loss_iou": 0.5859375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 156267348, + "step": 910 + }, + { + "epoch": 0.23962648780167028, + "grad_norm": 5.562832964795879, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 156439364, + "step": 911 + }, + { + "epoch": 0.23962648780167028, + "loss": 0.12454073876142502, + "loss_ce": 0.0003036795533262193, + "loss_iou": 0.578125, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 156439364, + "step": 911 + }, + { + "epoch": 0.23988952456105742, + "grad_norm": 13.223359809657884, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 156609552, + "step": 912 + }, + { + "epoch": 0.23988952456105742, + "loss": 0.15059047937393188, + "loss_ce": 0.00239713117480278, + "loss_iou": 0.59375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 156609552, + "step": 912 + }, + { + "epoch": 0.24015256132044452, + "grad_norm": 6.777584209416996, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 156781920, + "step": 913 + }, + { + "epoch": 0.24015256132044452, + "loss": 0.13880465924739838, + "loss_ce": 0.0020859187934547663, + "loss_iou": 0.6015625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 156781920, + "step": 913 + }, + { + "epoch": 0.24041559807983165, + "grad_norm": 11.055320488873154, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 156954340, + "step": 914 + }, + { + "epoch": 0.24041559807983165, + "loss": 0.2452932596206665, + "loss_ce": 0.002312319353222847, + "loss_iou": 0.4765625, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 156954340, + "step": 914 + }, + { + "epoch": 0.24067863483921878, + "grad_norm": 4.458423775022664, + "learning_rate": 5e-06, + "loss": 0.1784, + "num_input_tokens_seen": 157126652, + "step": 915 + }, + { + "epoch": 0.24067863483921878, + "loss": 0.13856951892375946, + "loss_ce": 0.0020949181634932756, + "loss_iou": 0.416015625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 157126652, + "step": 915 + }, + { + "epoch": 0.2409416715986059, + "grad_norm": 5.681838115677692, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 157298968, + "step": 916 + }, + { + "epoch": 0.2409416715986059, + "loss": 0.1539350152015686, + "loss_ce": 0.00638251006603241, + "loss_iou": 0.412109375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 157298968, + "step": 916 + }, + { + "epoch": 0.24120470835799304, + "grad_norm": 5.180460717860643, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 157471004, + "step": 917 + }, + { + "epoch": 0.24120470835799304, + "loss": 0.10441954433917999, + "loss_ce": 0.0008428902365267277, + "loss_iou": 0.43359375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 157471004, + "step": 917 + }, + { + "epoch": 0.24146774511738014, + "grad_norm": 10.247033875410487, + "learning_rate": 5e-06, + "loss": 0.161, + "num_input_tokens_seen": 157643232, + "step": 918 + }, + { + "epoch": 0.24146774511738014, + "loss": 0.18021947145462036, + "loss_ce": 0.002607175149023533, + "loss_iou": 0.59375, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 157643232, + "step": 918 + }, + { + "epoch": 0.24173078187676728, + "grad_norm": 6.8962184523908725, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 157815292, + "step": 919 + }, + { + "epoch": 0.24173078187676728, + "loss": 0.1455521434545517, + "loss_ce": 0.0008988262270577252, + "loss_iou": 0.66015625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 157815292, + "step": 919 + }, + { + "epoch": 0.2419938186361544, + "grad_norm": 6.7883399023716775, + "learning_rate": 5e-06, + "loss": 0.1916, + "num_input_tokens_seen": 157987716, + "step": 920 + }, + { + "epoch": 0.2419938186361544, + "loss": 0.19279745221138, + "loss_ce": 0.0026729374658316374, + "loss_iou": 0.65234375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 157987716, + "step": 920 + }, + { + "epoch": 0.24225685539554154, + "grad_norm": 7.426866121442803, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 158160224, + "step": 921 + }, + { + "epoch": 0.24225685539554154, + "loss": 0.1912505030632019, + "loss_ce": 0.001980474451556802, + "loss_iou": 0.470703125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 158160224, + "step": 921 + }, + { + "epoch": 0.24251989215492864, + "grad_norm": 7.602353481412061, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 158332464, + "step": 922 + }, + { + "epoch": 0.24251989215492864, + "loss": 0.1421346664428711, + "loss_ce": 0.003951081074774265, + "loss_iou": 0.494140625, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 158332464, + "step": 922 + }, + { + "epoch": 0.24278292891431577, + "grad_norm": 9.798997838296735, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 158504560, + "step": 923 + }, + { + "epoch": 0.24278292891431577, + "loss": 0.15015605092048645, + "loss_ce": 0.0013523304369300604, + "loss_iou": 0.44140625, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 158504560, + "step": 923 + }, + { + "epoch": 0.2430459656737029, + "grad_norm": 9.276091083653826, + "learning_rate": 5e-06, + "loss": 0.1582, + "num_input_tokens_seen": 158676676, + "step": 924 + }, + { + "epoch": 0.2430459656737029, + "loss": 0.13057658076286316, + "loss_ce": 0.0024943118914961815, + "loss_iou": 0.59765625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 158676676, + "step": 924 + }, + { + "epoch": 0.24330900243309003, + "grad_norm": 7.79676627099927, + "learning_rate": 5e-06, + "loss": 0.1547, + "num_input_tokens_seen": 158849084, + "step": 925 + }, + { + "epoch": 0.24330900243309003, + "loss": 0.1701420098543167, + "loss_ce": 0.004248456098139286, + "loss_iou": 0.447265625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 158849084, + "step": 925 + }, + { + "epoch": 0.24357203919247714, + "grad_norm": 7.627935616299721, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 159019648, + "step": 926 + }, + { + "epoch": 0.24357203919247714, + "loss": 0.09866867959499359, + "loss_ce": 0.002080547623336315, + "loss_iou": 0.59765625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 159019648, + "step": 926 + }, + { + "epoch": 0.24383507595186427, + "grad_norm": 7.674777640749283, + "learning_rate": 5e-06, + "loss": 0.1837, + "num_input_tokens_seen": 159191744, + "step": 927 + }, + { + "epoch": 0.24383507595186427, + "loss": 0.09513729810714722, + "loss_ce": 0.0016314350068569183, + "loss_iou": 0.451171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 159191744, + "step": 927 + }, + { + "epoch": 0.2440981127112514, + "grad_norm": 8.029174499906352, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 159364172, + "step": 928 + }, + { + "epoch": 0.2440981127112514, + "loss": 0.2412642240524292, + "loss_ce": 0.0012739873491227627, + "loss_iou": 0.48046875, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 159364172, + "step": 928 + }, + { + "epoch": 0.24436114947063853, + "grad_norm": 3.8948353119563537, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 159534608, + "step": 929 + }, + { + "epoch": 0.24436114947063853, + "loss": 0.11882825195789337, + "loss_ce": 0.0011219491716474295, + "loss_iou": 0.58203125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 159534608, + "step": 929 + }, + { + "epoch": 0.24462418623002566, + "grad_norm": 12.725190215625348, + "learning_rate": 5e-06, + "loss": 0.1728, + "num_input_tokens_seen": 159705228, + "step": 930 + }, + { + "epoch": 0.24462418623002566, + "loss": 0.15004633367061615, + "loss_ce": 0.000662794045638293, + "loss_iou": 0.451171875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 159705228, + "step": 930 + }, + { + "epoch": 0.24488722298941276, + "grad_norm": 8.38127835969171, + "learning_rate": 5e-06, + "loss": 0.1789, + "num_input_tokens_seen": 159875324, + "step": 931 + }, + { + "epoch": 0.24488722298941276, + "loss": 0.1881195604801178, + "loss_ce": 0.0022675050422549248, + "loss_iou": 0.40234375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 159875324, + "step": 931 + }, + { + "epoch": 0.2451502597487999, + "grad_norm": 8.09047633001587, + "learning_rate": 5e-06, + "loss": 0.1537, + "num_input_tokens_seen": 160045932, + "step": 932 + }, + { + "epoch": 0.2451502597487999, + "loss": 0.23839473724365234, + "loss_ce": 0.0010290088830515742, + "loss_iou": 0.494140625, + "loss_num": 0.047607421875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 160045932, + "step": 932 + }, + { + "epoch": 0.24541329650818702, + "grad_norm": 14.720243529942747, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 160218252, + "step": 933 + }, + { + "epoch": 0.24541329650818702, + "loss": 0.12284128367900848, + "loss_ce": 0.0022663308773189783, + "loss_iou": 0.4296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 160218252, + "step": 933 + }, + { + "epoch": 0.24567633326757415, + "grad_norm": 6.031479541696206, + "learning_rate": 5e-06, + "loss": 0.1669, + "num_input_tokens_seen": 160390560, + "step": 934 + }, + { + "epoch": 0.24567633326757415, + "loss": 0.1301022320985794, + "loss_ce": 0.0005245967186056077, + "loss_iou": 0.70703125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 160390560, + "step": 934 + }, + { + "epoch": 0.24593937002696126, + "grad_norm": 6.383554643597018, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 160562528, + "step": 935 + }, + { + "epoch": 0.24593937002696126, + "loss": 0.17644909024238586, + "loss_ce": 0.0030787207651883364, + "loss_iou": 0.375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 160562528, + "step": 935 + }, + { + "epoch": 0.2462024067863484, + "grad_norm": 20.862114065213355, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 160734384, + "step": 936 + }, + { + "epoch": 0.2462024067863484, + "loss": 0.11419504880905151, + "loss_ce": 0.0004255172680132091, + "loss_iou": 0.466796875, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 160734384, + "step": 936 + }, + { + "epoch": 0.24646544354573552, + "grad_norm": 8.0386150468435, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 160906448, + "step": 937 + }, + { + "epoch": 0.24646544354573552, + "loss": 0.14023496210575104, + "loss_ce": 0.0016241249395534396, + "loss_iou": 0.388671875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 160906448, + "step": 937 + }, + { + "epoch": 0.24672848030512265, + "grad_norm": 7.276863008633557, + "learning_rate": 5e-06, + "loss": 0.1694, + "num_input_tokens_seen": 161078828, + "step": 938 + }, + { + "epoch": 0.24672848030512265, + "loss": 0.21741217374801636, + "loss_ce": 0.004765682853758335, + "loss_iou": 0.7265625, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 161078828, + "step": 938 + }, + { + "epoch": 0.24699151706450975, + "grad_norm": 10.285417073408015, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 161249228, + "step": 939 + }, + { + "epoch": 0.24699151706450975, + "loss": 0.2533302903175354, + "loss_ce": 0.004825636278837919, + "loss_iou": 0.3671875, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 161249228, + "step": 939 + }, + { + "epoch": 0.24725455382389688, + "grad_norm": 7.503087196122763, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 161421624, + "step": 940 + }, + { + "epoch": 0.24725455382389688, + "loss": 0.1497778743505478, + "loss_ce": 0.003537639044225216, + "loss_iou": 0.71484375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 161421624, + "step": 940 + }, + { + "epoch": 0.24751759058328401, + "grad_norm": 6.078271645066026, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 161593500, + "step": 941 + }, + { + "epoch": 0.24751759058328401, + "loss": 0.15894815325737, + "loss_ce": 0.0006839816924184561, + "loss_iou": 0.6015625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 161593500, + "step": 941 + }, + { + "epoch": 0.24778062734267114, + "grad_norm": 16.470075984430842, + "learning_rate": 5e-06, + "loss": 0.1725, + "num_input_tokens_seen": 161764108, + "step": 942 + }, + { + "epoch": 0.24778062734267114, + "loss": 0.2659192383289337, + "loss_ce": 0.003468066919595003, + "loss_iou": 0.5546875, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 161764108, + "step": 942 + }, + { + "epoch": 0.24804366410205828, + "grad_norm": 18.319111638013048, + "learning_rate": 5e-06, + "loss": 0.1876, + "num_input_tokens_seen": 161936252, + "step": 943 + }, + { + "epoch": 0.24804366410205828, + "loss": 0.23192915320396423, + "loss_ce": 0.004817330744117498, + "loss_iou": 0.443359375, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 161936252, + "step": 943 + }, + { + "epoch": 0.24830670086144538, + "grad_norm": 11.296239885199183, + "learning_rate": 5e-06, + "loss": 0.1547, + "num_input_tokens_seen": 162108564, + "step": 944 + }, + { + "epoch": 0.24830670086144538, + "loss": 0.13432571291923523, + "loss_ce": 0.002459259470924735, + "loss_iou": 0.7578125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 162108564, + "step": 944 + }, + { + "epoch": 0.2485697376208325, + "grad_norm": 5.511055668564898, + "learning_rate": 5e-06, + "loss": 0.1393, + "num_input_tokens_seen": 162280792, + "step": 945 + }, + { + "epoch": 0.2485697376208325, + "loss": 0.13674385845661163, + "loss_ce": 0.0015509906224906445, + "loss_iou": 0.61328125, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 162280792, + "step": 945 + }, + { + "epoch": 0.24883277438021964, + "grad_norm": 10.355301687463253, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 162449528, + "step": 946 + }, + { + "epoch": 0.24883277438021964, + "loss": 0.2201877236366272, + "loss_ce": 0.0006442689918912947, + "loss_iou": 0.33984375, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 162449528, + "step": 946 + }, + { + "epoch": 0.24909581113960677, + "grad_norm": 11.25377814108321, + "learning_rate": 5e-06, + "loss": 0.1493, + "num_input_tokens_seen": 162617256, + "step": 947 + }, + { + "epoch": 0.24909581113960677, + "loss": 0.188047856092453, + "loss_ce": 0.0031418518628925085, + "loss_iou": 0.58203125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 162617256, + "step": 947 + }, + { + "epoch": 0.24935884789899387, + "grad_norm": 12.235451694711202, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 162789400, + "step": 948 + }, + { + "epoch": 0.24935884789899387, + "loss": 0.15606345236301422, + "loss_ce": 0.005215056240558624, + "loss_iou": 0.66796875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 162789400, + "step": 948 + }, + { + "epoch": 0.249621884658381, + "grad_norm": 7.1804139994510034, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 162961432, + "step": 949 + }, + { + "epoch": 0.249621884658381, + "loss": 0.10785458981990814, + "loss_ce": 0.002507905475795269, + "loss_iou": 0.56640625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 162961432, + "step": 949 + }, + { + "epoch": 0.24988492141776814, + "grad_norm": 14.388122342778688, + "learning_rate": 5e-06, + "loss": 0.1883, + "num_input_tokens_seen": 163133524, + "step": 950 + }, + { + "epoch": 0.24988492141776814, + "loss": 0.16639769077301025, + "loss_ce": 0.001511220121756196, + "loss_iou": 0.60546875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 163133524, + "step": 950 + }, + { + "epoch": 0.25014795817715524, + "grad_norm": 10.331941667190199, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 163305816, + "step": 951 + }, + { + "epoch": 0.25014795817715524, + "loss": 0.1127045676112175, + "loss_ce": 0.0008271271362900734, + "loss_iou": 0.56640625, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 163305816, + "step": 951 + }, + { + "epoch": 0.2504109949365424, + "grad_norm": 6.829599375925184, + "learning_rate": 5e-06, + "loss": 0.154, + "num_input_tokens_seen": 163478368, + "step": 952 + }, + { + "epoch": 0.2504109949365424, + "loss": 0.1521347612142563, + "loss_ce": 0.0029038134962320328, + "loss_iou": 0.54296875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 163478368, + "step": 952 + }, + { + "epoch": 0.2506740316959295, + "grad_norm": 5.382923356179618, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 163650500, + "step": 953 + }, + { + "epoch": 0.2506740316959295, + "loss": 0.14390447735786438, + "loss_ce": 0.00224187970161438, + "loss_iou": 0.62890625, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 163650500, + "step": 953 + }, + { + "epoch": 0.2509370684553166, + "grad_norm": 4.693926744893699, + "learning_rate": 5e-06, + "loss": 0.1612, + "num_input_tokens_seen": 163822252, + "step": 954 + }, + { + "epoch": 0.2509370684553166, + "loss": 0.1426747441291809, + "loss_ce": 0.001286811544559896, + "loss_iou": 0.41015625, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 163822252, + "step": 954 + }, + { + "epoch": 0.25120010521470376, + "grad_norm": 6.416178956242753, + "learning_rate": 5e-06, + "loss": 0.1743, + "num_input_tokens_seen": 163992628, + "step": 955 + }, + { + "epoch": 0.25120010521470376, + "loss": 0.2167130708694458, + "loss_ce": 0.0013200179673731327, + "loss_iou": 0.41015625, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 163992628, + "step": 955 + }, + { + "epoch": 0.25146314197409086, + "grad_norm": 8.38564274924814, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 164164972, + "step": 956 + }, + { + "epoch": 0.25146314197409086, + "loss": 0.14087224006652832, + "loss_ce": 0.0018951823003590107, + "loss_iou": 0.53515625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 164164972, + "step": 956 + }, + { + "epoch": 0.251726178733478, + "grad_norm": 10.557767516476979, + "learning_rate": 5e-06, + "loss": 0.1952, + "num_input_tokens_seen": 164334056, + "step": 957 + }, + { + "epoch": 0.251726178733478, + "loss": 0.10351097583770752, + "loss_ce": 0.003108144039288163, + "loss_iou": 0.5703125, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 164334056, + "step": 957 + }, + { + "epoch": 0.2519892154928651, + "grad_norm": 6.187664908064733, + "learning_rate": 5e-06, + "loss": 0.1717, + "num_input_tokens_seen": 164506496, + "step": 958 + }, + { + "epoch": 0.2519892154928651, + "loss": 0.15677396953105927, + "loss_ce": 0.005193163640797138, + "loss_iou": 0.55859375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 164506496, + "step": 958 + }, + { + "epoch": 0.25225225225225223, + "grad_norm": 9.49095916832686, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 164678644, + "step": 959 + }, + { + "epoch": 0.25225225225225223, + "loss": 0.18635977804660797, + "loss_ce": 0.002003092784434557, + "loss_iou": 0.5, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 164678644, + "step": 959 + }, + { + "epoch": 0.2525152890116394, + "grad_norm": 6.024170228931713, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 164851184, + "step": 960 + }, + { + "epoch": 0.2525152890116394, + "loss": 0.13998761773109436, + "loss_ce": 0.003940259106457233, + "loss_iou": 0.58203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 164851184, + "step": 960 + }, + { + "epoch": 0.2527783257710265, + "grad_norm": 6.120337923811076, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 165023544, + "step": 961 + }, + { + "epoch": 0.2527783257710265, + "loss": 0.09865675866603851, + "loss_ce": 0.003258807584643364, + "loss_iou": 0.66796875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 165023544, + "step": 961 + }, + { + "epoch": 0.25304136253041365, + "grad_norm": 9.066155915123039, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 165193832, + "step": 962 + }, + { + "epoch": 0.25304136253041365, + "loss": 0.1392088085412979, + "loss_ce": 0.0009946945356205106, + "loss_iou": 0.6328125, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 165193832, + "step": 962 + }, + { + "epoch": 0.25330439928980075, + "grad_norm": 7.472497977314338, + "learning_rate": 5e-06, + "loss": 0.1704, + "num_input_tokens_seen": 165365892, + "step": 963 + }, + { + "epoch": 0.25330439928980075, + "loss": 0.23057040572166443, + "loss_ce": 0.002054777694866061, + "loss_iou": 0.47265625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 165365892, + "step": 963 + }, + { + "epoch": 0.25356743604918786, + "grad_norm": 8.732932808526876, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 165538272, + "step": 964 + }, + { + "epoch": 0.25356743604918786, + "loss": 0.16193270683288574, + "loss_ce": 0.0018985318019986153, + "loss_iou": 0.640625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 165538272, + "step": 964 + }, + { + "epoch": 0.253830472808575, + "grad_norm": 7.508690685861332, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 165710728, + "step": 965 + }, + { + "epoch": 0.253830472808575, + "loss": 0.16999930143356323, + "loss_ce": 0.002335726749151945, + "loss_iou": 0.50390625, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 165710728, + "step": 965 + }, + { + "epoch": 0.2540935095679621, + "grad_norm": 8.520838856927949, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 165882912, + "step": 966 + }, + { + "epoch": 0.2540935095679621, + "loss": 0.1505521535873413, + "loss_ce": 0.0006498107686638832, + "loss_iou": 0.56640625, + "loss_num": 0.0299072265625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 165882912, + "step": 966 + }, + { + "epoch": 0.2543565463273492, + "grad_norm": 7.07336887298293, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 166053292, + "step": 967 + }, + { + "epoch": 0.2543565463273492, + "loss": 0.13186028599739075, + "loss_ce": 0.002404727740213275, + "loss_iou": 0.51953125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 166053292, + "step": 967 + }, + { + "epoch": 0.2546195830867364, + "grad_norm": 5.861693184502666, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 166222324, + "step": 968 + }, + { + "epoch": 0.2546195830867364, + "loss": 0.12424527108669281, + "loss_ce": 0.0015340839745476842, + "loss_iou": 0.66796875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 166222324, + "step": 968 + }, + { + "epoch": 0.2548826198461235, + "grad_norm": 4.8001560190338335, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 166394732, + "step": 969 + }, + { + "epoch": 0.2548826198461235, + "loss": 0.13953326642513275, + "loss_ce": 0.0008613896206952631, + "loss_iou": 0.5390625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 166394732, + "step": 969 + }, + { + "epoch": 0.25514565660551064, + "grad_norm": 4.5364064648643065, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 166566920, + "step": 970 + }, + { + "epoch": 0.25514565660551064, + "loss": 0.15965688228607178, + "loss_ce": 0.0016368532087653875, + "loss_iou": 0.498046875, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 166566920, + "step": 970 + }, + { + "epoch": 0.25540869336489774, + "grad_norm": 6.371608621088164, + "learning_rate": 5e-06, + "loss": 0.1801, + "num_input_tokens_seen": 166739024, + "step": 971 + }, + { + "epoch": 0.25540869336489774, + "loss": 0.2314717024564743, + "loss_ce": 0.003017107956111431, + "loss_iou": 0.5625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 166739024, + "step": 971 + }, + { + "epoch": 0.25567173012428485, + "grad_norm": 10.601164768004658, + "learning_rate": 5e-06, + "loss": 0.1504, + "num_input_tokens_seen": 166911376, + "step": 972 + }, + { + "epoch": 0.25567173012428485, + "loss": 0.16122400760650635, + "loss_ce": 0.001586547470651567, + "loss_iou": 0.75390625, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 166911376, + "step": 972 + }, + { + "epoch": 0.255934766883672, + "grad_norm": 7.052937396329928, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 167083504, + "step": 973 + }, + { + "epoch": 0.255934766883672, + "loss": 0.09488484263420105, + "loss_ce": 0.0012263880344107747, + "loss_iou": 0.5078125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 167083504, + "step": 973 + }, + { + "epoch": 0.2561978036430591, + "grad_norm": 11.500260657406463, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 167255708, + "step": 974 + }, + { + "epoch": 0.2561978036430591, + "loss": 0.08826225996017456, + "loss_ce": 0.0011956070084124804, + "loss_iou": 0.6171875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 167255708, + "step": 974 + }, + { + "epoch": 0.25646084040244627, + "grad_norm": 15.92469225824967, + "learning_rate": 5e-06, + "loss": 0.1805, + "num_input_tokens_seen": 167428028, + "step": 975 + }, + { + "epoch": 0.25646084040244627, + "loss": 0.2100502997636795, + "loss_ce": 0.0013405811041593552, + "loss_iou": 0.65234375, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 167428028, + "step": 975 + }, + { + "epoch": 0.25672387716183337, + "grad_norm": 8.58380851703317, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 167600008, + "step": 976 + }, + { + "epoch": 0.25672387716183337, + "loss": 0.16451242566108704, + "loss_ce": 0.0012128613889217377, + "loss_iou": 0.73046875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 167600008, + "step": 976 + }, + { + "epoch": 0.2569869139212205, + "grad_norm": 18.1690929466902, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 167772344, + "step": 977 + }, + { + "epoch": 0.2569869139212205, + "loss": 0.1349577009677887, + "loss_ce": 0.0059293946251273155, + "loss_iou": 0.64453125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 167772344, + "step": 977 + }, + { + "epoch": 0.25724995068060763, + "grad_norm": 19.89834413612689, + "learning_rate": 5e-06, + "loss": 0.1866, + "num_input_tokens_seen": 167944388, + "step": 978 + }, + { + "epoch": 0.25724995068060763, + "loss": 0.2309381365776062, + "loss_ce": 0.0008356063044629991, + "loss_iou": 0.6015625, + "loss_num": 0.0458984375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 167944388, + "step": 978 + }, + { + "epoch": 0.25751298743999473, + "grad_norm": 9.124352830191828, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 168116168, + "step": 979 + }, + { + "epoch": 0.25751298743999473, + "loss": 0.11332334578037262, + "loss_ce": 0.0035516121424734592, + "loss_iou": 0.43359375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 168116168, + "step": 979 + }, + { + "epoch": 0.25777602419938184, + "grad_norm": 13.324870310905945, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 168288220, + "step": 980 + }, + { + "epoch": 0.25777602419938184, + "loss": 0.14565590023994446, + "loss_ce": 0.0014908593147993088, + "loss_iou": 0.50390625, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 168288220, + "step": 980 + }, + { + "epoch": 0.258039060958769, + "grad_norm": 12.888295473836926, + "learning_rate": 5e-06, + "loss": 0.147, + "num_input_tokens_seen": 168460592, + "step": 981 + }, + { + "epoch": 0.258039060958769, + "loss": 0.13789984583854675, + "loss_ce": 0.0010895461309701204, + "loss_iou": 0.47265625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 168460592, + "step": 981 + }, + { + "epoch": 0.2583020977181561, + "grad_norm": 7.142842896707461, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 168632604, + "step": 982 + }, + { + "epoch": 0.2583020977181561, + "loss": 0.2077009379863739, + "loss_ce": 0.0013105443213135004, + "loss_iou": 0.59375, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 168632604, + "step": 982 + }, + { + "epoch": 0.25856513447754326, + "grad_norm": 5.171300059065281, + "learning_rate": 5e-06, + "loss": 0.194, + "num_input_tokens_seen": 168804468, + "step": 983 + }, + { + "epoch": 0.25856513447754326, + "loss": 0.15598775446414948, + "loss_ce": 0.005291945766657591, + "loss_iou": 0.51953125, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 168804468, + "step": 983 + }, + { + "epoch": 0.25882817123693036, + "grad_norm": 11.204489011375072, + "learning_rate": 5e-06, + "loss": 0.1807, + "num_input_tokens_seen": 168976856, + "step": 984 + }, + { + "epoch": 0.25882817123693036, + "loss": 0.19184689223766327, + "loss_ce": 0.0026989425532519817, + "loss_iou": 0.6328125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 168976856, + "step": 984 + }, + { + "epoch": 0.25909120799631746, + "grad_norm": 6.6174997360939205, + "learning_rate": 5e-06, + "loss": 0.1608, + "num_input_tokens_seen": 169149104, + "step": 985 + }, + { + "epoch": 0.25909120799631746, + "loss": 0.23251253366470337, + "loss_ce": 0.0004568799340631813, + "loss_iou": 0.578125, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 169149104, + "step": 985 + }, + { + "epoch": 0.2593542447557046, + "grad_norm": 7.169144370545132, + "learning_rate": 5e-06, + "loss": 0.1498, + "num_input_tokens_seen": 169321296, + "step": 986 + }, + { + "epoch": 0.2593542447557046, + "loss": 0.12904971837997437, + "loss_ce": 0.0024627982638776302, + "loss_iou": 0.66796875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 169321296, + "step": 986 + }, + { + "epoch": 0.2596172815150917, + "grad_norm": 17.414290331962896, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 169493420, + "step": 987 + }, + { + "epoch": 0.2596172815150917, + "loss": 0.13824069499969482, + "loss_ce": 0.0014914304483681917, + "loss_iou": 0.54296875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 169493420, + "step": 987 + }, + { + "epoch": 0.2598803182744789, + "grad_norm": 5.807379438415879, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 169664140, + "step": 988 + }, + { + "epoch": 0.2598803182744789, + "loss": 0.15334829688072205, + "loss_ce": 0.001828520093113184, + "loss_iou": 0.58984375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 169664140, + "step": 988 + }, + { + "epoch": 0.260143355033866, + "grad_norm": 6.246751757081568, + "learning_rate": 5e-06, + "loss": 0.1808, + "num_input_tokens_seen": 169836220, + "step": 989 + }, + { + "epoch": 0.260143355033866, + "loss": 0.25014275312423706, + "loss_ce": 0.003926943056285381, + "loss_iou": 0.5703125, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 169836220, + "step": 989 + }, + { + "epoch": 0.2604063917932531, + "grad_norm": 11.305993296500802, + "learning_rate": 5e-06, + "loss": 0.1627, + "num_input_tokens_seen": 170008204, + "step": 990 + }, + { + "epoch": 0.2604063917932531, + "loss": 0.19628843665122986, + "loss_ce": 0.00878843106329441, + "loss_iou": 0.60546875, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 170008204, + "step": 990 + }, + { + "epoch": 0.26066942855264025, + "grad_norm": 6.544136822322436, + "learning_rate": 5e-06, + "loss": 0.1516, + "num_input_tokens_seen": 170178608, + "step": 991 + }, + { + "epoch": 0.26066942855264025, + "loss": 0.23060224950313568, + "loss_ce": 0.004283890128135681, + "loss_iou": 0.41796875, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 170178608, + "step": 991 + }, + { + "epoch": 0.26093246531202735, + "grad_norm": 6.798376445965723, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 170350580, + "step": 992 + }, + { + "epoch": 0.26093246531202735, + "loss": 0.15799343585968018, + "loss_ce": 0.0012551653198897839, + "loss_iou": 0.5859375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 170350580, + "step": 992 + }, + { + "epoch": 0.26119550207141445, + "grad_norm": 6.259630629604519, + "learning_rate": 5e-06, + "loss": 0.1793, + "num_input_tokens_seen": 170522692, + "step": 993 + }, + { + "epoch": 0.26119550207141445, + "loss": 0.2015601247549057, + "loss_ce": 0.000510324549395591, + "loss_iou": 0.66796875, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 170522692, + "step": 993 + }, + { + "epoch": 0.2614585388308016, + "grad_norm": 6.902940616756998, + "learning_rate": 5e-06, + "loss": 0.2081, + "num_input_tokens_seen": 170695152, + "step": 994 + }, + { + "epoch": 0.2614585388308016, + "loss": 0.15382197499275208, + "loss_ce": 0.0003795886295847595, + "loss_iou": 0.60546875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 170695152, + "step": 994 + }, + { + "epoch": 0.2617215755901887, + "grad_norm": 4.496994630933735, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 170867532, + "step": 995 + }, + { + "epoch": 0.2617215755901887, + "loss": 0.15487955510616302, + "loss_ce": 0.005709626711905003, + "loss_iou": 0.41015625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 170867532, + "step": 995 + }, + { + "epoch": 0.2619846123495759, + "grad_norm": 5.819986109817203, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 171039632, + "step": 996 + }, + { + "epoch": 0.2619846123495759, + "loss": 0.1341342031955719, + "loss_ce": 0.0008944571018218994, + "loss_iou": 0.458984375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 171039632, + "step": 996 + }, + { + "epoch": 0.262247649108963, + "grad_norm": 4.6990222732792075, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 171211920, + "step": 997 + }, + { + "epoch": 0.262247649108963, + "loss": 0.2527538239955902, + "loss_ce": 0.0029674398247152567, + "loss_iou": 0.48828125, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 171211920, + "step": 997 + }, + { + "epoch": 0.2625106858683501, + "grad_norm": 6.547042177560564, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 171384024, + "step": 998 + }, + { + "epoch": 0.2625106858683501, + "loss": 0.18961063027381897, + "loss_ce": 0.0035754733253270388, + "loss_iou": 0.62890625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 171384024, + "step": 998 + }, + { + "epoch": 0.26277372262773724, + "grad_norm": 17.093722901281232, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 171556336, + "step": 999 + }, + { + "epoch": 0.26277372262773724, + "loss": 0.18390598893165588, + "loss_ce": 0.0009531003306619823, + "loss_iou": 0.24609375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 171556336, + "step": 999 + }, + { + "epoch": 0.26303675938712434, + "grad_norm": 5.862402162816912, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_websight_new_CIoU": 0.7987666130065918, + "eval_websight_new_GIoU": 0.7927780747413635, + "eval_websight_new_IoU": 0.8096525371074677, + "eval_websight_new_MAE_all": 0.03391252178698778, + "eval_websight_new_MAE_h": 0.024881365709006786, + "eval_websight_new_MAE_w": 0.04275708086788654, + "eval_websight_new_MAE_x": 0.046329958364367485, + "eval_websight_new_MAE_y": 0.021681691519916058, + "eval_websight_new_NUM_probability": 0.9994822144508362, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.15745767951011658, + "eval_websight_new_loss_ce": 9.287914144806564e-05, + "eval_websight_new_loss_iou": 0.457275390625, + "eval_websight_new_loss_num": 0.027721405029296875, + "eval_websight_new_loss_xval": 0.1386260986328125, + "eval_websight_new_runtime": 55.0835, + "eval_websight_new_samples_per_second": 0.908, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_seeclick_CIoU": 0.5508884787559509, + "eval_seeclick_GIoU": 0.5434170663356781, + "eval_seeclick_IoU": 0.5745402276515961, + "eval_seeclick_MAE_all": 0.057029979303479195, + "eval_seeclick_MAE_h": 0.03887217864394188, + "eval_seeclick_MAE_w": 0.08262282982468605, + "eval_seeclick_MAE_x": 0.07410039007663727, + "eval_seeclick_MAE_y": 0.032524523325264454, + "eval_seeclick_NUM_probability": 0.9997861981391907, + "eval_seeclick_inside_bbox": 0.9076704680919647, + "eval_seeclick_loss": 0.24120275676250458, + "eval_seeclick_loss_ce": 0.00980278616771102, + "eval_seeclick_loss_iou": 0.60888671875, + "eval_seeclick_loss_num": 0.0429229736328125, + "eval_seeclick_loss_xval": 0.214630126953125, + "eval_seeclick_runtime": 69.8263, + "eval_seeclick_samples_per_second": 0.616, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_icons_CIoU": 0.8137890696525574, + "eval_icons_GIoU": 0.8057061433792114, + "eval_icons_IoU": 0.8213592171669006, + "eval_icons_MAE_all": 0.024967025965452194, + "eval_icons_MAE_h": 0.028143037110567093, + "eval_icons_MAE_w": 0.026135658845305443, + "eval_icons_MAE_x": 0.022455199621617794, + "eval_icons_MAE_y": 0.023134205490350723, + "eval_icons_NUM_probability": 0.9995008409023285, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.09549810737371445, + "eval_icons_loss_ce": 0.0016497992401127703, + "eval_icons_loss_iou": 0.6103515625, + "eval_icons_loss_num": 0.01790618896484375, + "eval_icons_loss_xval": 0.0895233154296875, + "eval_icons_runtime": 88.8842, + "eval_icons_samples_per_second": 0.563, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_screenspot_CIoU": 0.5512810548146566, + "eval_screenspot_GIoU": 0.5352131823698679, + "eval_screenspot_IoU": 0.5900407036145529, + "eval_screenspot_MAE_all": 0.08661519487698872, + "eval_screenspot_MAE_h": 0.056614277263482414, + "eval_screenspot_MAE_w": 0.13663912812868753, + "eval_screenspot_MAE_x": 0.10082270950078964, + "eval_screenspot_MAE_y": 0.052384667098522186, + "eval_screenspot_NUM_probability": 0.9995323220888773, + "eval_screenspot_inside_bbox": 0.8454166650772095, + "eval_screenspot_loss": 0.7656806111335754, + "eval_screenspot_loss_ce": 0.42391865452130634, + "eval_screenspot_loss_iou": 0.4834391276041667, + "eval_screenspot_loss_num": 0.0673370361328125, + "eval_screenspot_loss_xval": 0.3365885416666667, + "eval_screenspot_runtime": 148.461, + "eval_screenspot_samples_per_second": 0.599, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "loss": 0.7618361711502075, + "loss_ce": 0.4089309275150299, + "loss_iou": 0.43359375, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.2632997961465115, + "grad_norm": 12.369274544442106, + "learning_rate": 5e-06, + "loss": 0.1395, + "num_input_tokens_seen": 171897180, + "step": 1001 + }, + { + "epoch": 0.2632997961465115, + "loss": 0.23002395033836365, + "loss_ce": 0.0021186815574765205, + "loss_iou": 0.462890625, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 171897180, + "step": 1001 + }, + { + "epoch": 0.2635628329058986, + "grad_norm": 6.891119328212443, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 172069384, + "step": 1002 + }, + { + "epoch": 0.2635628329058986, + "loss": 0.1037362664937973, + "loss_ce": 0.0008004722185432911, + "loss_iou": 0.45703125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 172069384, + "step": 1002 + }, + { + "epoch": 0.2638258696652857, + "grad_norm": 5.298705978749544, + "learning_rate": 5e-06, + "loss": 0.1493, + "num_input_tokens_seen": 172239736, + "step": 1003 + }, + { + "epoch": 0.2638258696652857, + "loss": 0.16244152188301086, + "loss_ce": 0.0035059780348092318, + "loss_iou": 0.408203125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 172239736, + "step": 1003 + }, + { + "epoch": 0.26408890642467286, + "grad_norm": 28.95390611927557, + "learning_rate": 5e-06, + "loss": 0.1674, + "num_input_tokens_seen": 172409356, + "step": 1004 + }, + { + "epoch": 0.26408890642467286, + "loss": 0.16002172231674194, + "loss_ce": 0.001452386612072587, + "loss_iou": 0.70703125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 172409356, + "step": 1004 + }, + { + "epoch": 0.26435194318405997, + "grad_norm": 6.613551299237438, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 172579624, + "step": 1005 + }, + { + "epoch": 0.26435194318405997, + "loss": 0.11175885051488876, + "loss_ce": 0.0038792139384895563, + "loss_iou": 0.61328125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 172579624, + "step": 1005 + }, + { + "epoch": 0.26461497994344707, + "grad_norm": 5.235806011716001, + "learning_rate": 5e-06, + "loss": 0.181, + "num_input_tokens_seen": 172752052, + "step": 1006 + }, + { + "epoch": 0.26461497994344707, + "loss": 0.19632884860038757, + "loss_ce": 0.001321525895036757, + "loss_iou": 0.75390625, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 172752052, + "step": 1006 + }, + { + "epoch": 0.26487801670283423, + "grad_norm": 11.226831704177643, + "learning_rate": 5e-06, + "loss": 0.164, + "num_input_tokens_seen": 172924420, + "step": 1007 + }, + { + "epoch": 0.26487801670283423, + "loss": 0.16282187402248383, + "loss_ce": 0.0012007836485281587, + "loss_iou": 0.52734375, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 172924420, + "step": 1007 + }, + { + "epoch": 0.26514105346222133, + "grad_norm": 5.82233219860515, + "learning_rate": 5e-06, + "loss": 0.1824, + "num_input_tokens_seen": 173096824, + "step": 1008 + }, + { + "epoch": 0.26514105346222133, + "loss": 0.17045088112354279, + "loss_ce": 0.005320262163877487, + "loss_iou": 0.53515625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 173096824, + "step": 1008 + }, + { + "epoch": 0.2654040902216085, + "grad_norm": 5.52283167756611, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 173268948, + "step": 1009 + }, + { + "epoch": 0.2654040902216085, + "loss": 0.17849504947662354, + "loss_ce": 0.004483824595808983, + "loss_iou": 0.61328125, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 173268948, + "step": 1009 + }, + { + "epoch": 0.2656671269809956, + "grad_norm": 5.624587832123806, + "learning_rate": 5e-06, + "loss": 0.1733, + "num_input_tokens_seen": 173441324, + "step": 1010 + }, + { + "epoch": 0.2656671269809956, + "loss": 0.15812638401985168, + "loss_ce": 0.0008387943962588906, + "loss_iou": 0.62890625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 173441324, + "step": 1010 + }, + { + "epoch": 0.2659301637403827, + "grad_norm": 8.141965504781345, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 173611452, + "step": 1011 + }, + { + "epoch": 0.2659301637403827, + "loss": 0.2730504870414734, + "loss_ce": 0.004678931087255478, + "loss_iou": 0.53125, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 173611452, + "step": 1011 + }, + { + "epoch": 0.26619320049976986, + "grad_norm": 9.595372252411392, + "learning_rate": 5e-06, + "loss": 0.1596, + "num_input_tokens_seen": 173783736, + "step": 1012 + }, + { + "epoch": 0.26619320049976986, + "loss": 0.23428162932395935, + "loss_ce": 0.0011273245327174664, + "loss_iou": 0.54296875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 173783736, + "step": 1012 + }, + { + "epoch": 0.26645623725915696, + "grad_norm": 8.106802909743756, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 173955656, + "step": 1013 + }, + { + "epoch": 0.26645623725915696, + "loss": 0.15512457489967346, + "loss_ce": 0.003147047944366932, + "loss_iou": 0.59375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 173955656, + "step": 1013 + }, + { + "epoch": 0.2667192740185441, + "grad_norm": 5.365195100132044, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 174126100, + "step": 1014 + }, + { + "epoch": 0.2667192740185441, + "loss": 0.17733250558376312, + "loss_ce": 0.004175758454948664, + "loss_iou": 0.6640625, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 174126100, + "step": 1014 + }, + { + "epoch": 0.2669823107779312, + "grad_norm": 6.156957339464345, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 174298328, + "step": 1015 + }, + { + "epoch": 0.2669823107779312, + "loss": 0.1237088292837143, + "loss_ce": 0.001119712833315134, + "loss_iou": 0.41796875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 174298328, + "step": 1015 + }, + { + "epoch": 0.2672453475373183, + "grad_norm": 5.364063512164187, + "learning_rate": 5e-06, + "loss": 0.1536, + "num_input_tokens_seen": 174470324, + "step": 1016 + }, + { + "epoch": 0.2672453475373183, + "loss": 0.2085983008146286, + "loss_ce": 0.003947417717427015, + "loss_iou": 0.5546875, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 174470324, + "step": 1016 + }, + { + "epoch": 0.2675083842967055, + "grad_norm": 26.166059849551687, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 174642656, + "step": 1017 + }, + { + "epoch": 0.2675083842967055, + "loss": 0.0957513153553009, + "loss_ce": 0.0004143980913795531, + "loss_iou": 0.5546875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 174642656, + "step": 1017 + }, + { + "epoch": 0.2677714210560926, + "grad_norm": 13.570714399931546, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 174813336, + "step": 1018 + }, + { + "epoch": 0.2677714210560926, + "loss": 0.08781825006008148, + "loss_ce": 0.0015755778877064586, + "loss_iou": 0.5703125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 174813336, + "step": 1018 + }, + { + "epoch": 0.2680344578154797, + "grad_norm": 5.193615864048545, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 174983476, + "step": 1019 + }, + { + "epoch": 0.2680344578154797, + "loss": 0.09173595905303955, + "loss_ce": 0.0038148202002048492, + "loss_iou": 0.4609375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 174983476, + "step": 1019 + }, + { + "epoch": 0.26829749457486685, + "grad_norm": 6.860708381616737, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 175155688, + "step": 1020 + }, + { + "epoch": 0.26829749457486685, + "loss": 0.1804433912038803, + "loss_ce": 0.002403826452791691, + "loss_iou": 0.48828125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 175155688, + "step": 1020 + }, + { + "epoch": 0.26856053133425395, + "grad_norm": 4.544920825241194, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 175327636, + "step": 1021 + }, + { + "epoch": 0.26856053133425395, + "loss": 0.14717841148376465, + "loss_ce": 0.0010602545225992799, + "loss_iou": 0.62890625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 175327636, + "step": 1021 + }, + { + "epoch": 0.2688235680936411, + "grad_norm": 8.925039478850847, + "learning_rate": 5e-06, + "loss": 0.1755, + "num_input_tokens_seen": 175499748, + "step": 1022 + }, + { + "epoch": 0.2688235680936411, + "loss": 0.1853310763835907, + "loss_ce": 0.0006386763998307288, + "loss_iou": 0.51953125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 175499748, + "step": 1022 + }, + { + "epoch": 0.2690866048530282, + "grad_norm": 5.74787431130144, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 175671808, + "step": 1023 + }, + { + "epoch": 0.2690866048530282, + "loss": 0.14083652198314667, + "loss_ce": 0.0009744655108079314, + "loss_iou": 0.4140625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 175671808, + "step": 1023 + }, + { + "epoch": 0.2693496416124153, + "grad_norm": 6.6733989699604725, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 175844188, + "step": 1024 + }, + { + "epoch": 0.2693496416124153, + "loss": 0.1698358803987503, + "loss_ce": 0.0007990067824721336, + "loss_iou": 0.6640625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 175844188, + "step": 1024 + }, + { + "epoch": 0.26961267837180247, + "grad_norm": 8.873866851189364, + "learning_rate": 5e-06, + "loss": 0.1894, + "num_input_tokens_seen": 176016264, + "step": 1025 + }, + { + "epoch": 0.26961267837180247, + "loss": 0.28822407126426697, + "loss_ce": 0.006241639144718647, + "loss_iou": 0.6171875, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 176016264, + "step": 1025 + }, + { + "epoch": 0.2698757151311896, + "grad_norm": 5.745558264753103, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 176188840, + "step": 1026 + }, + { + "epoch": 0.2698757151311896, + "loss": 0.20782078802585602, + "loss_ce": 0.003353000618517399, + "loss_iou": 0.53125, + "loss_num": 0.041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 176188840, + "step": 1026 + }, + { + "epoch": 0.27013875189057673, + "grad_norm": 6.250713326486415, + "learning_rate": 5e-06, + "loss": 0.1462, + "num_input_tokens_seen": 176360864, + "step": 1027 + }, + { + "epoch": 0.27013875189057673, + "loss": 0.17586824297904968, + "loss_ce": 0.0016433752607554197, + "loss_iou": 0.59375, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 176360864, + "step": 1027 + }, + { + "epoch": 0.27040178864996384, + "grad_norm": 9.18813465584604, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 176533304, + "step": 1028 + }, + { + "epoch": 0.27040178864996384, + "loss": 0.1184663325548172, + "loss_ce": 0.0005769361741840839, + "loss_iou": 0.640625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 176533304, + "step": 1028 + }, + { + "epoch": 0.27066482540935094, + "grad_norm": 6.1602828275187, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 176705536, + "step": 1029 + }, + { + "epoch": 0.27066482540935094, + "loss": 0.20641186833381653, + "loss_ce": 0.0027985800988972187, + "loss_iou": 0.54296875, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 176705536, + "step": 1029 + }, + { + "epoch": 0.2709278621687381, + "grad_norm": 6.699314102286077, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 176877744, + "step": 1030 + }, + { + "epoch": 0.2709278621687381, + "loss": 0.12170865386724472, + "loss_ce": 0.003544584382325411, + "loss_iou": 0.5546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 176877744, + "step": 1030 + }, + { + "epoch": 0.2711908989281252, + "grad_norm": 7.305231502769662, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 177050116, + "step": 1031 + }, + { + "epoch": 0.2711908989281252, + "loss": 0.1749892234802246, + "loss_ce": 0.001527311746031046, + "loss_iou": 0.482421875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 177050116, + "step": 1031 + }, + { + "epoch": 0.2714539356875123, + "grad_norm": 23.98337624564377, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 177222180, + "step": 1032 + }, + { + "epoch": 0.2714539356875123, + "loss": 0.11504107713699341, + "loss_ce": 0.0008137800614349544, + "loss_iou": 0.64453125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 177222180, + "step": 1032 + }, + { + "epoch": 0.27171697244689946, + "grad_norm": 8.870505808545992, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 177394132, + "step": 1033 + }, + { + "epoch": 0.27171697244689946, + "loss": 0.1380763053894043, + "loss_ce": 0.001357543864287436, + "loss_iou": 0.55078125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 177394132, + "step": 1033 + }, + { + "epoch": 0.27198000920628657, + "grad_norm": 6.7102870745927, + "learning_rate": 5e-06, + "loss": 0.152, + "num_input_tokens_seen": 177564532, + "step": 1034 + }, + { + "epoch": 0.27198000920628657, + "loss": 0.1144593358039856, + "loss_ce": 0.0005372193409129977, + "loss_iou": 0.625, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 177564532, + "step": 1034 + }, + { + "epoch": 0.2722430459656737, + "grad_norm": 7.475407895210686, + "learning_rate": 5e-06, + "loss": 0.1639, + "num_input_tokens_seen": 177735192, + "step": 1035 + }, + { + "epoch": 0.2722430459656737, + "loss": 0.1714543104171753, + "loss_ce": 0.001501921215094626, + "loss_iou": 0.435546875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 177735192, + "step": 1035 + }, + { + "epoch": 0.2725060827250608, + "grad_norm": 7.019122274754133, + "learning_rate": 5e-06, + "loss": 0.183, + "num_input_tokens_seen": 177905512, + "step": 1036 + }, + { + "epoch": 0.2725060827250608, + "loss": 0.23724797368049622, + "loss_ce": 0.0011640018783509731, + "loss_iou": 0.3671875, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 177905512, + "step": 1036 + }, + { + "epoch": 0.27276911948444793, + "grad_norm": 6.609200156734422, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 178077776, + "step": 1037 + }, + { + "epoch": 0.27276911948444793, + "loss": 0.1989010125398636, + "loss_ce": 0.002367813140153885, + "loss_iou": 0.421875, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 178077776, + "step": 1037 + }, + { + "epoch": 0.2730321562438351, + "grad_norm": 7.044877026833013, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 178250200, + "step": 1038 + }, + { + "epoch": 0.2730321562438351, + "loss": 0.1487899273633957, + "loss_ce": 0.0012069200165569782, + "loss_iou": 0.7109375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 178250200, + "step": 1038 + }, + { + "epoch": 0.2732951930032222, + "grad_norm": 7.367246122761307, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 178420744, + "step": 1039 + }, + { + "epoch": 0.2732951930032222, + "loss": 0.1740129590034485, + "loss_ce": 0.002565213944762945, + "loss_iou": 0.53125, + "loss_num": 0.0341796875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 178420744, + "step": 1039 + }, + { + "epoch": 0.27355822976260935, + "grad_norm": 6.283509699437948, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 178592888, + "step": 1040 + }, + { + "epoch": 0.27355822976260935, + "loss": 0.09046860039234161, + "loss_ce": 0.0011436456115916371, + "loss_iou": 0.6171875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 178592888, + "step": 1040 + }, + { + "epoch": 0.27382126652199645, + "grad_norm": 8.68361428045609, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 178763324, + "step": 1041 + }, + { + "epoch": 0.27382126652199645, + "loss": 0.10273198038339615, + "loss_ce": 0.0014746561646461487, + "loss_iou": 0.42578125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 178763324, + "step": 1041 + }, + { + "epoch": 0.27408430328138356, + "grad_norm": 5.923468447654299, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 178935692, + "step": 1042 + }, + { + "epoch": 0.27408430328138356, + "loss": 0.11228330433368683, + "loss_ce": 0.0015655276365578175, + "loss_iou": 0.7265625, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 178935692, + "step": 1042 + }, + { + "epoch": 0.2743473400407707, + "grad_norm": 12.335079487643208, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 179105416, + "step": 1043 + }, + { + "epoch": 0.2743473400407707, + "loss": 0.13459762930870056, + "loss_ce": 0.0016020219773054123, + "loss_iou": 0.59375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 179105416, + "step": 1043 + }, + { + "epoch": 0.2746103768001578, + "grad_norm": 5.798275831622124, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 179277628, + "step": 1044 + }, + { + "epoch": 0.2746103768001578, + "loss": 0.07603298872709274, + "loss_ce": 0.0007766383932903409, + "loss_iou": 0.59765625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 179277628, + "step": 1044 + }, + { + "epoch": 0.2748734135595449, + "grad_norm": 23.44861216824249, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 179447304, + "step": 1045 + }, + { + "epoch": 0.2748734135595449, + "loss": 0.143830344080925, + "loss_ce": 0.0031137943733483553, + "loss_iou": 0.5390625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 179447304, + "step": 1045 + }, + { + "epoch": 0.2751364503189321, + "grad_norm": 5.56459204907325, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 179619344, + "step": 1046 + }, + { + "epoch": 0.2751364503189321, + "loss": 0.1308884471654892, + "loss_ce": 0.0006089094094932079, + "loss_iou": 0.66015625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 179619344, + "step": 1046 + }, + { + "epoch": 0.2753994870783192, + "grad_norm": 7.96724794117892, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 179791548, + "step": 1047 + }, + { + "epoch": 0.2753994870783192, + "loss": 0.1258929818868637, + "loss_ce": 0.006264072842895985, + "loss_iou": 0.515625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 179791548, + "step": 1047 + }, + { + "epoch": 0.27566252383770634, + "grad_norm": 5.070360731345708, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 179963676, + "step": 1048 + }, + { + "epoch": 0.27566252383770634, + "loss": 0.11563927680253983, + "loss_ce": 0.00031335209496319294, + "loss_iou": 0.466796875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 179963676, + "step": 1048 + }, + { + "epoch": 0.27592556059709344, + "grad_norm": 7.345106018369934, + "learning_rate": 5e-06, + "loss": 0.1455, + "num_input_tokens_seen": 180135984, + "step": 1049 + }, + { + "epoch": 0.27592556059709344, + "loss": 0.1866682916879654, + "loss_ce": 0.0012129689566791058, + "loss_iou": 0.609375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 180135984, + "step": 1049 + }, + { + "epoch": 0.27618859735648055, + "grad_norm": 9.442805115405356, + "learning_rate": 5e-06, + "loss": 0.2447, + "num_input_tokens_seen": 180308156, + "step": 1050 + }, + { + "epoch": 0.27618859735648055, + "loss": 0.23040437698364258, + "loss_ce": 0.001797212054952979, + "loss_iou": 0.578125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 180308156, + "step": 1050 + }, + { + "epoch": 0.2764516341158677, + "grad_norm": 10.313405249427115, + "learning_rate": 5e-06, + "loss": 0.1419, + "num_input_tokens_seen": 180480268, + "step": 1051 + }, + { + "epoch": 0.2764516341158677, + "loss": 0.1318507045507431, + "loss_ce": 0.003188594477251172, + "loss_iou": 0.443359375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 180480268, + "step": 1051 + }, + { + "epoch": 0.2767146708752548, + "grad_norm": 12.938419489278349, + "learning_rate": 5e-06, + "loss": 0.1389, + "num_input_tokens_seen": 180652480, + "step": 1052 + }, + { + "epoch": 0.2767146708752548, + "loss": 0.1456303596496582, + "loss_ce": 0.0045475889928638935, + "loss_iou": 0.6484375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 180652480, + "step": 1052 + }, + { + "epoch": 0.27697770763464197, + "grad_norm": 6.129454843218688, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 180824868, + "step": 1053 + }, + { + "epoch": 0.27697770763464197, + "loss": 0.10654839873313904, + "loss_ce": 0.0010186205618083477, + "loss_iou": 0.53125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 180824868, + "step": 1053 + }, + { + "epoch": 0.27724074439402907, + "grad_norm": 6.137928561662, + "learning_rate": 5e-06, + "loss": 0.1994, + "num_input_tokens_seen": 180996888, + "step": 1054 + }, + { + "epoch": 0.27724074439402907, + "loss": 0.21584706008434296, + "loss_ce": 0.0034447195939719677, + "loss_iou": 0.6171875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 180996888, + "step": 1054 + }, + { + "epoch": 0.2775037811534162, + "grad_norm": 4.330377156785293, + "learning_rate": 5e-06, + "loss": 0.098, + "num_input_tokens_seen": 181168924, + "step": 1055 + }, + { + "epoch": 0.2775037811534162, + "loss": 0.08097569644451141, + "loss_ce": 0.0013553331373259425, + "loss_iou": 0.64453125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 181168924, + "step": 1055 + }, + { + "epoch": 0.27776681791280333, + "grad_norm": 6.97802036205834, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 181341092, + "step": 1056 + }, + { + "epoch": 0.27776681791280333, + "loss": 0.1544983983039856, + "loss_ce": 0.0006592837744392455, + "loss_iou": 0.5078125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 181341092, + "step": 1056 + }, + { + "epoch": 0.27802985467219044, + "grad_norm": 5.494026690913253, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 181513136, + "step": 1057 + }, + { + "epoch": 0.27802985467219044, + "loss": 0.09461110830307007, + "loss_ce": 0.004279076587408781, + "loss_iou": 0.46875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 181513136, + "step": 1057 + }, + { + "epoch": 0.27829289143157754, + "grad_norm": 8.725452399730845, + "learning_rate": 5e-06, + "loss": 0.1293, + "num_input_tokens_seen": 181684960, + "step": 1058 + }, + { + "epoch": 0.27829289143157754, + "loss": 0.14191409945487976, + "loss_ce": 0.004157752729952335, + "loss_iou": 0.6015625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 181684960, + "step": 1058 + }, + { + "epoch": 0.2785559281909647, + "grad_norm": 5.556456214125238, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 181857208, + "step": 1059 + }, + { + "epoch": 0.2785559281909647, + "loss": 0.14501094818115234, + "loss_ce": 0.0029821395874023438, + "loss_iou": 0.5, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 181857208, + "step": 1059 + }, + { + "epoch": 0.2788189649503518, + "grad_norm": 5.694123648635864, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 182029272, + "step": 1060 + }, + { + "epoch": 0.2788189649503518, + "loss": 0.1433970034122467, + "loss_ce": 0.0002085179730784148, + "loss_iou": 0.49609375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 182029272, + "step": 1060 + }, + { + "epoch": 0.27908200170973896, + "grad_norm": 11.17547001843261, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 182201700, + "step": 1061 + }, + { + "epoch": 0.27908200170973896, + "loss": 0.2361234724521637, + "loss_ce": 0.0002225826756330207, + "loss_iou": 0.5390625, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 182201700, + "step": 1061 + }, + { + "epoch": 0.27934503846912606, + "grad_norm": 7.952701295024384, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 182373760, + "step": 1062 + }, + { + "epoch": 0.27934503846912606, + "loss": 0.12168803811073303, + "loss_ce": 0.00483623007312417, + "loss_iou": 0.66015625, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 182373760, + "step": 1062 + }, + { + "epoch": 0.27960807522851316, + "grad_norm": 8.381017586610616, + "learning_rate": 5e-06, + "loss": 0.1264, + "num_input_tokens_seen": 182544140, + "step": 1063 + }, + { + "epoch": 0.27960807522851316, + "loss": 0.18032154440879822, + "loss_ce": 0.003350101877003908, + "loss_iou": 0.5078125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 182544140, + "step": 1063 + }, + { + "epoch": 0.2798711119879003, + "grad_norm": 7.079859253709746, + "learning_rate": 5e-06, + "loss": 0.1588, + "num_input_tokens_seen": 182716400, + "step": 1064 + }, + { + "epoch": 0.2798711119879003, + "loss": 0.2143479734659195, + "loss_ce": 0.005302563309669495, + "loss_iou": 0.56640625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 182716400, + "step": 1064 + }, + { + "epoch": 0.2801341487472874, + "grad_norm": 5.981555911348935, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 182888924, + "step": 1065 + }, + { + "epoch": 0.2801341487472874, + "loss": 0.07972423732280731, + "loss_ce": 0.0004090492147952318, + "loss_iou": 0.5390625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 182888924, + "step": 1065 + }, + { + "epoch": 0.2803971855066746, + "grad_norm": 6.957965061807421, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 183061040, + "step": 1066 + }, + { + "epoch": 0.2803971855066746, + "loss": 0.18109014630317688, + "loss_ce": 0.0029285247437655926, + "loss_iou": 0.6171875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 183061040, + "step": 1066 + }, + { + "epoch": 0.2806602222660617, + "grad_norm": 7.354674178304409, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 183233464, + "step": 1067 + }, + { + "epoch": 0.2806602222660617, + "loss": 0.19740189611911774, + "loss_ce": 0.0007771397940814495, + "loss_iou": 0.49609375, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 183233464, + "step": 1067 + }, + { + "epoch": 0.2809232590254488, + "grad_norm": 6.666818027916791, + "learning_rate": 5e-06, + "loss": 0.1868, + "num_input_tokens_seen": 183405764, + "step": 1068 + }, + { + "epoch": 0.2809232590254488, + "loss": 0.15091687440872192, + "loss_ce": 0.0006788407335989177, + "loss_iou": 0.5546875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 183405764, + "step": 1068 + }, + { + "epoch": 0.28118629578483595, + "grad_norm": 5.646804925247109, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 183578300, + "step": 1069 + }, + { + "epoch": 0.28118629578483595, + "loss": 0.15913698077201843, + "loss_ce": 0.0021545523777604103, + "loss_iou": 0.5859375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 183578300, + "step": 1069 + }, + { + "epoch": 0.28144933254422305, + "grad_norm": 20.5753376454494, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 183750176, + "step": 1070 + }, + { + "epoch": 0.28144933254422305, + "loss": 0.11271088570356369, + "loss_ce": 0.0023593269288539886, + "loss_iou": 0.609375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 183750176, + "step": 1070 + }, + { + "epoch": 0.28171236930361016, + "grad_norm": 4.644203324007545, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 183922404, + "step": 1071 + }, + { + "epoch": 0.28171236930361016, + "loss": 0.12499827146530151, + "loss_ce": 0.0020124230068176985, + "loss_iou": 0.546875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 183922404, + "step": 1071 + }, + { + "epoch": 0.2819754060629973, + "grad_norm": 5.233661334274966, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 184094188, + "step": 1072 + }, + { + "epoch": 0.2819754060629973, + "loss": 0.1377188265323639, + "loss_ce": 0.0002676558797247708, + "loss_iou": 0.443359375, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 184094188, + "step": 1072 + }, + { + "epoch": 0.2822384428223844, + "grad_norm": 7.522178114598793, + "learning_rate": 5e-06, + "loss": 0.1911, + "num_input_tokens_seen": 184262844, + "step": 1073 + }, + { + "epoch": 0.2822384428223844, + "loss": 0.2804732322692871, + "loss_ce": 0.0030379469972103834, + "loss_iou": 0.5390625, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 184262844, + "step": 1073 + }, + { + "epoch": 0.2825014795817716, + "grad_norm": 10.418392232208523, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 184431224, + "step": 1074 + }, + { + "epoch": 0.2825014795817716, + "loss": 0.17388112843036652, + "loss_ce": 0.002219748916104436, + "loss_iou": 0.5859375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 184431224, + "step": 1074 + }, + { + "epoch": 0.2827645163411587, + "grad_norm": 7.018369739955214, + "learning_rate": 5e-06, + "loss": 0.1685, + "num_input_tokens_seen": 184603440, + "step": 1075 + }, + { + "epoch": 0.2827645163411587, + "loss": 0.24287168681621552, + "loss_ce": 0.0036749078426510096, + "loss_iou": 0.466796875, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 184603440, + "step": 1075 + }, + { + "epoch": 0.2830275531005458, + "grad_norm": 8.441416409211774, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 184775664, + "step": 1076 + }, + { + "epoch": 0.2830275531005458, + "loss": 0.253373384475708, + "loss_ce": 0.0034344326704740524, + "loss_iou": 0.61328125, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 184775664, + "step": 1076 + }, + { + "epoch": 0.28329058985993294, + "grad_norm": 5.273714948615874, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 184947684, + "step": 1077 + }, + { + "epoch": 0.28329058985993294, + "loss": 0.12758958339691162, + "loss_ce": 0.0030168381053954363, + "loss_iou": 0.59375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 184947684, + "step": 1077 + }, + { + "epoch": 0.28355362661932004, + "grad_norm": 5.622485240246158, + "learning_rate": 5e-06, + "loss": 0.1707, + "num_input_tokens_seen": 185119764, + "step": 1078 + }, + { + "epoch": 0.28355362661932004, + "loss": 0.15722918510437012, + "loss_ce": 0.0001552198955323547, + "loss_iou": 0.546875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 185119764, + "step": 1078 + }, + { + "epoch": 0.2838166633787072, + "grad_norm": 6.064053191448847, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 185291940, + "step": 1079 + }, + { + "epoch": 0.2838166633787072, + "loss": 0.12457242608070374, + "loss_ce": 0.0010983101092278957, + "loss_iou": 0.53125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 185291940, + "step": 1079 + }, + { + "epoch": 0.2840797001380943, + "grad_norm": 8.508529448099232, + "learning_rate": 5e-06, + "loss": 0.1667, + "num_input_tokens_seen": 185460372, + "step": 1080 + }, + { + "epoch": 0.2840797001380943, + "loss": 0.2045111060142517, + "loss_ce": 0.0020574983209371567, + "loss_iou": 0.6484375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 185460372, + "step": 1080 + }, + { + "epoch": 0.2843427368974814, + "grad_norm": 5.269932611835083, + "learning_rate": 5e-06, + "loss": 0.1797, + "num_input_tokens_seen": 185632912, + "step": 1081 + }, + { + "epoch": 0.2843427368974814, + "loss": 0.13937309384346008, + "loss_ce": 0.009490270167589188, + "loss_iou": 0.462890625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 185632912, + "step": 1081 + }, + { + "epoch": 0.28460577365686857, + "grad_norm": 8.826575998844994, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 185804888, + "step": 1082 + }, + { + "epoch": 0.28460577365686857, + "loss": 0.099105603992939, + "loss_ce": 0.0018155663274228573, + "loss_iou": 0.515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 185804888, + "step": 1082 + }, + { + "epoch": 0.28486881041625567, + "grad_norm": 5.9573043544472535, + "learning_rate": 5e-06, + "loss": 0.1276, + "num_input_tokens_seen": 185977324, + "step": 1083 + }, + { + "epoch": 0.28486881041625567, + "loss": 0.17646890878677368, + "loss_ce": 0.0035868186969310045, + "loss_iou": 0.5546875, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 185977324, + "step": 1083 + }, + { + "epoch": 0.28513184717564277, + "grad_norm": 5.170909045758908, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 186149548, + "step": 1084 + }, + { + "epoch": 0.28513184717564277, + "loss": 0.19262221455574036, + "loss_ce": 0.000941307342145592, + "loss_iou": 0.6640625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 186149548, + "step": 1084 + }, + { + "epoch": 0.28539488393502993, + "grad_norm": 16.76166177176129, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 186321516, + "step": 1085 + }, + { + "epoch": 0.28539488393502993, + "loss": 0.13177794218063354, + "loss_ce": 0.0014678854495286942, + "loss_iou": 0.50390625, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 186321516, + "step": 1085 + }, + { + "epoch": 0.28565792069441703, + "grad_norm": 13.385145963732795, + "learning_rate": 5e-06, + "loss": 0.1565, + "num_input_tokens_seen": 186493748, + "step": 1086 + }, + { + "epoch": 0.28565792069441703, + "loss": 0.24282805621623993, + "loss_ce": 0.0057369922287762165, + "loss_iou": 0.48828125, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 186493748, + "step": 1086 + }, + { + "epoch": 0.2859209574538042, + "grad_norm": 7.0564105719656025, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 186666020, + "step": 1087 + }, + { + "epoch": 0.2859209574538042, + "loss": 0.10800403356552124, + "loss_ce": 0.000704226375091821, + "loss_iou": 0.388671875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 186666020, + "step": 1087 + }, + { + "epoch": 0.2861839942131913, + "grad_norm": 5.57534276048312, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 186834708, + "step": 1088 + }, + { + "epoch": 0.2861839942131913, + "loss": 0.1970679759979248, + "loss_ce": 0.0005652993568219244, + "loss_iou": 0.474609375, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 186834708, + "step": 1088 + }, + { + "epoch": 0.2864470309725784, + "grad_norm": 12.382219859522896, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 187006628, + "step": 1089 + }, + { + "epoch": 0.2864470309725784, + "loss": 0.13300946354866028, + "loss_ce": 0.0005631742533296347, + "loss_iou": 0.55859375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 187006628, + "step": 1089 + }, + { + "epoch": 0.28671006773196556, + "grad_norm": 6.678243304603748, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 187178940, + "step": 1090 + }, + { + "epoch": 0.28671006773196556, + "loss": 0.16839508712291718, + "loss_ce": 0.004485180135816336, + "loss_iou": 0.53125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 187178940, + "step": 1090 + }, + { + "epoch": 0.28697310449135266, + "grad_norm": 17.285977120564166, + "learning_rate": 5e-06, + "loss": 0.1817, + "num_input_tokens_seen": 187351348, + "step": 1091 + }, + { + "epoch": 0.28697310449135266, + "loss": 0.24864555895328522, + "loss_ce": 0.0034063111525028944, + "loss_iou": 0.4921875, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 187351348, + "step": 1091 + }, + { + "epoch": 0.2872361412507398, + "grad_norm": 5.362480990027185, + "learning_rate": 5e-06, + "loss": 0.1333, + "num_input_tokens_seen": 187523476, + "step": 1092 + }, + { + "epoch": 0.2872361412507398, + "loss": 0.19447633624076843, + "loss_ce": 0.005084256641566753, + "loss_iou": 0.70703125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 187523476, + "step": 1092 + }, + { + "epoch": 0.2874991780101269, + "grad_norm": 9.00142705860072, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 187695596, + "step": 1093 + }, + { + "epoch": 0.2874991780101269, + "loss": 0.13948455452919006, + "loss_ce": 0.0006295705679804087, + "loss_iou": 0.44140625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 187695596, + "step": 1093 + }, + { + "epoch": 0.287762214769514, + "grad_norm": 11.592832787644355, + "learning_rate": 5e-06, + "loss": 0.1673, + "num_input_tokens_seen": 187867844, + "step": 1094 + }, + { + "epoch": 0.287762214769514, + "loss": 0.1788289099931717, + "loss_ce": 0.0009724590927362442, + "loss_iou": 0.5859375, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 187867844, + "step": 1094 + }, + { + "epoch": 0.2880252515289012, + "grad_norm": 5.4984009631677, + "learning_rate": 5e-06, + "loss": 0.1593, + "num_input_tokens_seen": 188039960, + "step": 1095 + }, + { + "epoch": 0.2880252515289012, + "loss": 0.2135852873325348, + "loss_ce": 0.0043262611143291, + "loss_iou": 0.458984375, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 188039960, + "step": 1095 + }, + { + "epoch": 0.2882882882882883, + "grad_norm": 6.039228998020695, + "learning_rate": 5e-06, + "loss": 0.1504, + "num_input_tokens_seen": 188212228, + "step": 1096 + }, + { + "epoch": 0.2882882882882883, + "loss": 0.10800454020500183, + "loss_ce": 0.0013761227019131184, + "loss_iou": 0.63671875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 188212228, + "step": 1096 + }, + { + "epoch": 0.2885513250476754, + "grad_norm": 6.337937600162702, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 188384548, + "step": 1097 + }, + { + "epoch": 0.2885513250476754, + "loss": 0.2018011212348938, + "loss_ce": 0.0031927230302244425, + "loss_iou": 0.40234375, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 188384548, + "step": 1097 + }, + { + "epoch": 0.28881436180706255, + "grad_norm": 6.282757808284281, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 188556612, + "step": 1098 + }, + { + "epoch": 0.28881436180706255, + "loss": 0.07978774607181549, + "loss_ce": 0.0007472233846783638, + "loss_iou": 0.6015625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 188556612, + "step": 1098 + }, + { + "epoch": 0.28907739856644965, + "grad_norm": 5.5362247942440135, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 188727124, + "step": 1099 + }, + { + "epoch": 0.28907739856644965, + "loss": 0.18471282720565796, + "loss_ce": 0.0071005141362547874, + "loss_iou": 0.455078125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 188727124, + "step": 1099 + }, + { + "epoch": 0.2893404353258368, + "grad_norm": 5.455389129921704, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 188899384, + "step": 1100 + }, + { + "epoch": 0.2893404353258368, + "loss": 0.08721458911895752, + "loss_ce": 0.0006362219573929906, + "loss_iou": 0.63671875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 188899384, + "step": 1100 + }, + { + "epoch": 0.2896034720852239, + "grad_norm": 6.057305949672798, + "learning_rate": 5e-06, + "loss": 0.1669, + "num_input_tokens_seen": 189068024, + "step": 1101 + }, + { + "epoch": 0.2896034720852239, + "loss": 0.1826099157333374, + "loss_ce": 0.00026738218730315566, + "loss_iou": 0.61328125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 189068024, + "step": 1101 + }, + { + "epoch": 0.289866508844611, + "grad_norm": 10.886874649593773, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 189240248, + "step": 1102 + }, + { + "epoch": 0.289866508844611, + "loss": 0.1780916154384613, + "loss_ce": 0.0009065555641427636, + "loss_iou": 0.474609375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 189240248, + "step": 1102 + }, + { + "epoch": 0.2901295456039982, + "grad_norm": 8.357621038563343, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 189412208, + "step": 1103 + }, + { + "epoch": 0.2901295456039982, + "loss": 0.13078002631664276, + "loss_ce": 0.001751709496602416, + "loss_iou": 0.45703125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 189412208, + "step": 1103 + }, + { + "epoch": 0.2903925823633853, + "grad_norm": 13.284219513588615, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 189584480, + "step": 1104 + }, + { + "epoch": 0.2903925823633853, + "loss": 0.23028159141540527, + "loss_ce": 0.003474962431937456, + "loss_iou": 0.62890625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 189584480, + "step": 1104 + }, + { + "epoch": 0.29065561912277244, + "grad_norm": 6.716057098151779, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 189755004, + "step": 1105 + }, + { + "epoch": 0.29065561912277244, + "loss": 0.12404203414916992, + "loss_ce": 0.002643106272444129, + "loss_iou": 0.61328125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 189755004, + "step": 1105 + }, + { + "epoch": 0.29091865588215954, + "grad_norm": 13.264130378049543, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 189927084, + "step": 1106 + }, + { + "epoch": 0.29091865588215954, + "loss": 0.10699759423732758, + "loss_ce": 0.0014678104780614376, + "loss_iou": 0.392578125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 189927084, + "step": 1106 + }, + { + "epoch": 0.29118169264154664, + "grad_norm": 19.87883744683546, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 190097456, + "step": 1107 + }, + { + "epoch": 0.29118169264154664, + "loss": 0.19540926814079285, + "loss_ce": 0.0016531546134501696, + "loss_iou": 0.447265625, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 190097456, + "step": 1107 + }, + { + "epoch": 0.2914447294009338, + "grad_norm": 7.462386686254148, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 190269508, + "step": 1108 + }, + { + "epoch": 0.2914447294009338, + "loss": 0.08214541524648666, + "loss_ce": 0.004813872277736664, + "loss_iou": 0.59375, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 190269508, + "step": 1108 + }, + { + "epoch": 0.2917077661603209, + "grad_norm": 10.674953183668224, + "learning_rate": 5e-06, + "loss": 0.1763, + "num_input_tokens_seen": 190441836, + "step": 1109 + }, + { + "epoch": 0.2917077661603209, + "loss": 0.1546817272901535, + "loss_ce": 0.006824057083576918, + "loss_iou": 0.32421875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 190441836, + "step": 1109 + }, + { + "epoch": 0.291970802919708, + "grad_norm": 6.315839345670245, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 190614052, + "step": 1110 + }, + { + "epoch": 0.291970802919708, + "loss": 0.15810704231262207, + "loss_ce": 0.0053970692679286, + "loss_iou": 0.62109375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 190614052, + "step": 1110 + }, + { + "epoch": 0.29223383967909516, + "grad_norm": 6.217305097644484, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 190786084, + "step": 1111 + }, + { + "epoch": 0.29223383967909516, + "loss": 0.07959192991256714, + "loss_ce": 0.0006734670605510473, + "loss_iou": 0.53125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 190786084, + "step": 1111 + }, + { + "epoch": 0.29249687643848227, + "grad_norm": 5.126517505767914, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 190956588, + "step": 1112 + }, + { + "epoch": 0.29249687643848227, + "loss": 0.13071726262569427, + "loss_ce": 0.003184308996424079, + "loss_iou": 0.6328125, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 190956588, + "step": 1112 + }, + { + "epoch": 0.2927599131978694, + "grad_norm": 8.996821720570772, + "learning_rate": 5e-06, + "loss": 0.1638, + "num_input_tokens_seen": 191126948, + "step": 1113 + }, + { + "epoch": 0.2927599131978694, + "loss": 0.07688204199075699, + "loss_ce": 0.0015036254189908504, + "loss_iou": 0.490234375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 191126948, + "step": 1113 + }, + { + "epoch": 0.29302294995725653, + "grad_norm": 5.558729986145218, + "learning_rate": 5e-06, + "loss": 0.1861, + "num_input_tokens_seen": 191299288, + "step": 1114 + }, + { + "epoch": 0.29302294995725653, + "loss": 0.15988363325595856, + "loss_ce": 0.0014058587839826941, + "loss_iou": 0.5390625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 191299288, + "step": 1114 + }, + { + "epoch": 0.29328598671664363, + "grad_norm": 6.8874530652883825, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 191471672, + "step": 1115 + }, + { + "epoch": 0.29328598671664363, + "loss": 0.09309347718954086, + "loss_ce": 0.0007167698349803686, + "loss_iou": 0.4453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 191471672, + "step": 1115 + }, + { + "epoch": 0.2935490234760308, + "grad_norm": 13.843828225173697, + "learning_rate": 5e-06, + "loss": 0.1446, + "num_input_tokens_seen": 191643552, + "step": 1116 + }, + { + "epoch": 0.2935490234760308, + "loss": 0.08135861903429031, + "loss_ce": 0.00021237613691482693, + "loss_iou": 0.49609375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 191643552, + "step": 1116 + }, + { + "epoch": 0.2938120602354179, + "grad_norm": 5.436785087484111, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 191815984, + "step": 1117 + }, + { + "epoch": 0.2938120602354179, + "loss": 0.10953962057828903, + "loss_ce": 0.0004392758710309863, + "loss_iou": 0.5546875, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 191815984, + "step": 1117 + }, + { + "epoch": 0.294075096994805, + "grad_norm": 11.598374668500005, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 191988036, + "step": 1118 + }, + { + "epoch": 0.294075096994805, + "loss": 0.0961490124464035, + "loss_ce": 0.0009952039690688252, + "loss_iou": 0.65625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 191988036, + "step": 1118 + }, + { + "epoch": 0.29433813375419215, + "grad_norm": 7.498841309335166, + "learning_rate": 5e-06, + "loss": 0.1726, + "num_input_tokens_seen": 192160116, + "step": 1119 + }, + { + "epoch": 0.29433813375419215, + "loss": 0.25876477360725403, + "loss_ce": 0.004339736420661211, + "loss_iou": 0.56640625, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 192160116, + "step": 1119 + }, + { + "epoch": 0.29460117051357926, + "grad_norm": 5.645236594614784, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 192332060, + "step": 1120 + }, + { + "epoch": 0.29460117051357926, + "loss": 0.07815182209014893, + "loss_ce": 0.0011254575802013278, + "loss_iou": 0.6171875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 192332060, + "step": 1120 + }, + { + "epoch": 0.2948642072729664, + "grad_norm": 5.473412167736241, + "learning_rate": 5e-06, + "loss": 0.1499, + "num_input_tokens_seen": 192504064, + "step": 1121 + }, + { + "epoch": 0.2948642072729664, + "loss": 0.19324743747711182, + "loss_ce": 0.0004373906413093209, + "loss_iou": 0.51171875, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 192504064, + "step": 1121 + }, + { + "epoch": 0.2951272440323535, + "grad_norm": 15.77937603528804, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 192676312, + "step": 1122 + }, + { + "epoch": 0.2951272440323535, + "loss": 0.11738383769989014, + "loss_ce": 0.0015696310438215733, + "loss_iou": 0.56640625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 192676312, + "step": 1122 + }, + { + "epoch": 0.2953902807917406, + "grad_norm": 6.595809745274096, + "learning_rate": 5e-06, + "loss": 0.1605, + "num_input_tokens_seen": 192848608, + "step": 1123 + }, + { + "epoch": 0.2953902807917406, + "loss": 0.1380428671836853, + "loss_ce": 0.0005306481616571546, + "loss_iou": 0.453125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 192848608, + "step": 1123 + }, + { + "epoch": 0.2956533175511278, + "grad_norm": 5.143986978274753, + "learning_rate": 5e-06, + "loss": 0.2009, + "num_input_tokens_seen": 193021044, + "step": 1124 + }, + { + "epoch": 0.2956533175511278, + "loss": 0.2367524653673172, + "loss_ce": 0.00335403298959136, + "loss_iou": 0.796875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 193021044, + "step": 1124 + }, + { + "epoch": 0.2959163543105149, + "grad_norm": 6.676850446443053, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 193193124, + "step": 1125 + }, + { + "epoch": 0.2959163543105149, + "loss": 0.1376284509897232, + "loss_ce": 0.002099899807944894, + "loss_iou": 0.5234375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 193193124, + "step": 1125 + }, + { + "epoch": 0.29617939106990204, + "grad_norm": 6.051359778802083, + "learning_rate": 5e-06, + "loss": 0.168, + "num_input_tokens_seen": 193365612, + "step": 1126 + }, + { + "epoch": 0.29617939106990204, + "loss": 0.11157628893852234, + "loss_ce": 0.001163685112260282, + "loss_iou": 0.640625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 193365612, + "step": 1126 + }, + { + "epoch": 0.29644242782928915, + "grad_norm": 9.91415103149531, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 193537580, + "step": 1127 + }, + { + "epoch": 0.29644242782928915, + "loss": 0.06942566484212875, + "loss_ce": 0.004575815983116627, + "loss_iou": 0.625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 193537580, + "step": 1127 + }, + { + "epoch": 0.29670546458867625, + "grad_norm": 8.171761917591171, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 193709920, + "step": 1128 + }, + { + "epoch": 0.29670546458867625, + "loss": 0.15288731455802917, + "loss_ce": 0.0006046106573194265, + "loss_iou": 0.65234375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 193709920, + "step": 1128 + }, + { + "epoch": 0.2969685013480634, + "grad_norm": 4.591948701112417, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 193878608, + "step": 1129 + }, + { + "epoch": 0.2969685013480634, + "loss": 0.15515002608299255, + "loss_ce": 0.005400286056101322, + "loss_iou": 0.44140625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 193878608, + "step": 1129 + }, + { + "epoch": 0.2972315381074505, + "grad_norm": 9.7802730005981, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 194051024, + "step": 1130 + }, + { + "epoch": 0.2972315381074505, + "loss": 0.13672733306884766, + "loss_ce": 0.0005579069838859141, + "loss_iou": 0.6328125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 194051024, + "step": 1130 + }, + { + "epoch": 0.2974945748668376, + "grad_norm": 17.109119761118713, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 194223696, + "step": 1131 + }, + { + "epoch": 0.2974945748668376, + "loss": 0.0957454964518547, + "loss_ce": 0.0012325569987297058, + "loss_iou": 0.435546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 194223696, + "step": 1131 + }, + { + "epoch": 0.29775761162622477, + "grad_norm": 4.844704657006506, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 194393332, + "step": 1132 + }, + { + "epoch": 0.29775761162622477, + "loss": 0.08335787057876587, + "loss_ce": 0.000533167680259794, + "loss_iou": 0.70703125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 194393332, + "step": 1132 + }, + { + "epoch": 0.2980206483856119, + "grad_norm": 12.589562992323502, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 194563660, + "step": 1133 + }, + { + "epoch": 0.2980206483856119, + "loss": 0.10209088772535324, + "loss_ce": 0.003366521093994379, + "loss_iou": 0.4453125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 194563660, + "step": 1133 + }, + { + "epoch": 0.29828368514499903, + "grad_norm": 5.818392431333322, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 194735892, + "step": 1134 + }, + { + "epoch": 0.29828368514499903, + "loss": 0.09164222329854965, + "loss_ce": 0.004392467439174652, + "loss_iou": 0.6328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 194735892, + "step": 1134 + }, + { + "epoch": 0.29854672190438614, + "grad_norm": 6.569465101392607, + "learning_rate": 5e-06, + "loss": 0.1288, + "num_input_tokens_seen": 194907844, + "step": 1135 + }, + { + "epoch": 0.29854672190438614, + "loss": 0.12394984811544418, + "loss_ce": 0.0028560941573232412, + "loss_iou": 0.390625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 194907844, + "step": 1135 + }, + { + "epoch": 0.29880975866377324, + "grad_norm": 5.132755422401755, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 195079704, + "step": 1136 + }, + { + "epoch": 0.29880975866377324, + "loss": 0.11163240671157837, + "loss_ce": 0.001341882161796093, + "loss_iou": 0.4921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 195079704, + "step": 1136 + }, + { + "epoch": 0.2990727954231604, + "grad_norm": 12.81251876887425, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 195251904, + "step": 1137 + }, + { + "epoch": 0.2990727954231604, + "loss": 0.18034929037094116, + "loss_ce": 0.0009364524157717824, + "loss_iou": 0.482421875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 195251904, + "step": 1137 + }, + { + "epoch": 0.2993358321825475, + "grad_norm": 6.432718409068677, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 195424180, + "step": 1138 + }, + { + "epoch": 0.2993358321825475, + "loss": 0.08383812010288239, + "loss_ce": 0.0012575555592775345, + "loss_iou": 0.59375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 195424180, + "step": 1138 + }, + { + "epoch": 0.29959886894193466, + "grad_norm": 11.352446611671667, + "learning_rate": 5e-06, + "loss": 0.1314, + "num_input_tokens_seen": 195596468, + "step": 1139 + }, + { + "epoch": 0.29959886894193466, + "loss": 0.1503646820783615, + "loss_ce": 0.0016220146790146828, + "loss_iou": 0.52734375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 195596468, + "step": 1139 + }, + { + "epoch": 0.29986190570132176, + "grad_norm": 7.444433348783749, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 195768360, + "step": 1140 + }, + { + "epoch": 0.29986190570132176, + "loss": 0.12922053039073944, + "loss_ce": 0.0006804917939007282, + "loss_iou": 0.62109375, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 195768360, + "step": 1140 + }, + { + "epoch": 0.30012494246070887, + "grad_norm": 6.813355666039827, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 195940504, + "step": 1141 + }, + { + "epoch": 0.30012494246070887, + "loss": 0.14036604762077332, + "loss_ce": 0.002609711140394211, + "loss_iou": 0.515625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 195940504, + "step": 1141 + }, + { + "epoch": 0.300387979220096, + "grad_norm": 9.605056179900458, + "learning_rate": 5e-06, + "loss": 0.1463, + "num_input_tokens_seen": 196112784, + "step": 1142 + }, + { + "epoch": 0.300387979220096, + "loss": 0.14964430034160614, + "loss_ce": 0.0015119716990739107, + "loss_iou": 0.44140625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 196112784, + "step": 1142 + }, + { + "epoch": 0.3006510159794831, + "grad_norm": 7.338861721143607, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 196285072, + "step": 1143 + }, + { + "epoch": 0.3006510159794831, + "loss": 0.13087643682956696, + "loss_ce": 0.0021532890386879444, + "loss_iou": 0.46875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 196285072, + "step": 1143 + }, + { + "epoch": 0.30091405273887023, + "grad_norm": 11.553065995211972, + "learning_rate": 5e-06, + "loss": 0.1705, + "num_input_tokens_seen": 196457252, + "step": 1144 + }, + { + "epoch": 0.30091405273887023, + "loss": 0.20345042645931244, + "loss_ce": 0.0005085308803245425, + "loss_iou": 0.5390625, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 196457252, + "step": 1144 + }, + { + "epoch": 0.3011770894982574, + "grad_norm": 7.552187403058781, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 196629168, + "step": 1145 + }, + { + "epoch": 0.3011770894982574, + "loss": 0.17339852452278137, + "loss_ce": 0.004331144969910383, + "loss_iou": 0.412109375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 196629168, + "step": 1145 + }, + { + "epoch": 0.3014401262576445, + "grad_norm": 5.762343246857412, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 196801432, + "step": 1146 + }, + { + "epoch": 0.3014401262576445, + "loss": 0.07828192412853241, + "loss_ce": 0.001377625041641295, + "loss_iou": 0.416015625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 196801432, + "step": 1146 + }, + { + "epoch": 0.30170316301703165, + "grad_norm": 5.174420810424456, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 196973908, + "step": 1147 + }, + { + "epoch": 0.30170316301703165, + "loss": 0.22339066863059998, + "loss_ce": 0.004701712634414434, + "loss_iou": 0.53125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 196973908, + "step": 1147 + }, + { + "epoch": 0.30196619977641875, + "grad_norm": 10.875286083758624, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 197146088, + "step": 1148 + }, + { + "epoch": 0.30196619977641875, + "loss": 0.1432519108057022, + "loss_ce": 0.002657424658536911, + "loss_iou": 0.66015625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 197146088, + "step": 1148 + }, + { + "epoch": 0.30222923653580586, + "grad_norm": 5.705651681197872, + "learning_rate": 5e-06, + "loss": 0.1464, + "num_input_tokens_seen": 197318460, + "step": 1149 + }, + { + "epoch": 0.30222923653580586, + "loss": 0.12248589098453522, + "loss_ce": 0.002002496039494872, + "loss_iou": 0.5234375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 197318460, + "step": 1149 + }, + { + "epoch": 0.302492273295193, + "grad_norm": 6.2074966030457706, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 197490428, + "step": 1150 + }, + { + "epoch": 0.302492273295193, + "loss": 0.1659487783908844, + "loss_ce": 0.0009402353898622096, + "loss_iou": 0.5, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 197490428, + "step": 1150 + }, + { + "epoch": 0.3027553100545801, + "grad_norm": 11.278645202454504, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 197662972, + "step": 1151 + }, + { + "epoch": 0.3027553100545801, + "loss": 0.12700702250003815, + "loss_ce": 0.0012135641882196069, + "loss_iou": 0.58984375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 197662972, + "step": 1151 + }, + { + "epoch": 0.3030183468139673, + "grad_norm": 5.7121477930459665, + "learning_rate": 5e-06, + "loss": 0.1739, + "num_input_tokens_seen": 197835168, + "step": 1152 + }, + { + "epoch": 0.3030183468139673, + "loss": 0.10988777130842209, + "loss_ce": 0.004174881149083376, + "loss_iou": 0.546875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 197835168, + "step": 1152 + }, + { + "epoch": 0.3032813835733544, + "grad_norm": 12.425343043979453, + "learning_rate": 5e-06, + "loss": 0.1541, + "num_input_tokens_seen": 198007440, + "step": 1153 + }, + { + "epoch": 0.3032813835733544, + "loss": 0.18807393312454224, + "loss_ce": 0.0031679358799010515, + "loss_iou": 0.478515625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 198007440, + "step": 1153 + }, + { + "epoch": 0.3035444203327415, + "grad_norm": 5.925789792578691, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 198179896, + "step": 1154 + }, + { + "epoch": 0.3035444203327415, + "loss": 0.07772859930992126, + "loss_ce": 0.0008548187324777246, + "loss_iou": 0.66015625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 198179896, + "step": 1154 + }, + { + "epoch": 0.30380745709212864, + "grad_norm": 5.901916078113717, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 198352320, + "step": 1155 + }, + { + "epoch": 0.30380745709212864, + "loss": 0.17940585315227509, + "loss_ce": 0.0011221629101783037, + "loss_iou": 0.6484375, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 198352320, + "step": 1155 + }, + { + "epoch": 0.30407049385151574, + "grad_norm": 10.483911413706299, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 198524512, + "step": 1156 + }, + { + "epoch": 0.30407049385151574, + "loss": 0.12064538896083832, + "loss_ce": 0.0006197594339028001, + "loss_iou": 0.6328125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 198524512, + "step": 1156 + }, + { + "epoch": 0.30433353061090285, + "grad_norm": 4.522315194840346, + "learning_rate": 5e-06, + "loss": 0.1278, + "num_input_tokens_seen": 198696612, + "step": 1157 + }, + { + "epoch": 0.30433353061090285, + "loss": 0.09706555306911469, + "loss_ce": 0.0013929473934695125, + "loss_iou": 0.515625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 198696612, + "step": 1157 + }, + { + "epoch": 0.30459656737029, + "grad_norm": 28.08652601411638, + "learning_rate": 5e-06, + "loss": 0.1261, + "num_input_tokens_seen": 198868856, + "step": 1158 + }, + { + "epoch": 0.30459656737029, + "loss": 0.052179381251335144, + "loss_ce": 0.00020794683950953186, + "loss_iou": 0.53515625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 198868856, + "step": 1158 + }, + { + "epoch": 0.3048596041296771, + "grad_norm": 7.240097871891992, + "learning_rate": 5e-06, + "loss": 0.1803, + "num_input_tokens_seen": 199041064, + "step": 1159 + }, + { + "epoch": 0.3048596041296771, + "loss": 0.18336477875709534, + "loss_ce": 0.0020293283741921186, + "loss_iou": 0.5546875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 199041064, + "step": 1159 + }, + { + "epoch": 0.30512264088906427, + "grad_norm": 5.628334621930633, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 199213476, + "step": 1160 + }, + { + "epoch": 0.30512264088906427, + "loss": 0.12429136037826538, + "loss_ce": 0.0002068809699267149, + "loss_iou": 0.79296875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 199213476, + "step": 1160 + }, + { + "epoch": 0.30538567764845137, + "grad_norm": 5.043014865834959, + "learning_rate": 5e-06, + "loss": 0.1778, + "num_input_tokens_seen": 199385580, + "step": 1161 + }, + { + "epoch": 0.30538567764845137, + "loss": 0.09477389603853226, + "loss_ce": 0.001115447492338717, + "loss_iou": 0.44140625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 199385580, + "step": 1161 + }, + { + "epoch": 0.3056487144078385, + "grad_norm": 5.156573422162299, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 199556156, + "step": 1162 + }, + { + "epoch": 0.3056487144078385, + "loss": 0.14625222980976105, + "loss_ce": 0.00046976495650596917, + "loss_iou": 0.64453125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 199556156, + "step": 1162 + }, + { + "epoch": 0.30591175116722563, + "grad_norm": 4.702903740874693, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 199728484, + "step": 1163 + }, + { + "epoch": 0.30591175116722563, + "loss": 0.156254380941391, + "loss_ce": 0.001774407341144979, + "loss_iou": 0.5078125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 199728484, + "step": 1163 + }, + { + "epoch": 0.30617478792661273, + "grad_norm": 5.380502082020364, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 199900748, + "step": 1164 + }, + { + "epoch": 0.30617478792661273, + "loss": 0.16096284985542297, + "loss_ce": 0.0019052416319027543, + "loss_iou": 0.7421875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 199900748, + "step": 1164 + }, + { + "epoch": 0.3064378246859999, + "grad_norm": 5.7025282283031595, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 200072488, + "step": 1165 + }, + { + "epoch": 0.3064378246859999, + "loss": 0.17774316668510437, + "loss_ce": 0.00037499924656003714, + "loss_iou": 0.4921875, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 200072488, + "step": 1165 + }, + { + "epoch": 0.306700861445387, + "grad_norm": 5.532941140209234, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 200244696, + "step": 1166 + }, + { + "epoch": 0.306700861445387, + "loss": 0.12291580438613892, + "loss_ce": 0.0016694690566509962, + "loss_iou": 0.703125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 200244696, + "step": 1166 + }, + { + "epoch": 0.3069638982047741, + "grad_norm": 5.5856010120270705, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 200417156, + "step": 1167 + }, + { + "epoch": 0.3069638982047741, + "loss": 0.12629887461662292, + "loss_ce": 0.0023059630766510963, + "loss_iou": 0.5859375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 200417156, + "step": 1167 + }, + { + "epoch": 0.30722693496416126, + "grad_norm": 4.725090769923517, + "learning_rate": 5e-06, + "loss": 0.1267, + "num_input_tokens_seen": 200587780, + "step": 1168 + }, + { + "epoch": 0.30722693496416126, + "loss": 0.1289183497428894, + "loss_ce": 0.0026366179808974266, + "loss_iou": 0.6953125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 200587780, + "step": 1168 + }, + { + "epoch": 0.30748997172354836, + "grad_norm": 4.307353086128186, + "learning_rate": 5e-06, + "loss": 0.1389, + "num_input_tokens_seen": 200759832, + "step": 1169 + }, + { + "epoch": 0.30748997172354836, + "loss": 0.16026800870895386, + "loss_ce": 0.0027362804394215345, + "loss_iou": 0.431640625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 200759832, + "step": 1169 + }, + { + "epoch": 0.30775300848293546, + "grad_norm": 11.530918199945434, + "learning_rate": 5e-06, + "loss": 0.1914, + "num_input_tokens_seen": 200929932, + "step": 1170 + }, + { + "epoch": 0.30775300848293546, + "loss": 0.19007167220115662, + "loss_ce": 0.0034261636901646852, + "loss_iou": 0.51953125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 200929932, + "step": 1170 + }, + { + "epoch": 0.3080160452423226, + "grad_norm": 7.858471531155482, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 201100520, + "step": 1171 + }, + { + "epoch": 0.3080160452423226, + "loss": 0.204483300447464, + "loss_ce": 0.0005648470250889659, + "loss_iou": 0.55859375, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 201100520, + "step": 1171 + }, + { + "epoch": 0.3082790820017097, + "grad_norm": 5.1657164225988, + "learning_rate": 5e-06, + "loss": 0.1676, + "num_input_tokens_seen": 201272808, + "step": 1172 + }, + { + "epoch": 0.3082790820017097, + "loss": 0.12782040238380432, + "loss_ce": 0.006909756921231747, + "loss_iou": 0.67578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 201272808, + "step": 1172 + }, + { + "epoch": 0.3085421187610969, + "grad_norm": 5.7855382812859295, + "learning_rate": 5e-06, + "loss": 0.1718, + "num_input_tokens_seen": 201445044, + "step": 1173 + }, + { + "epoch": 0.3085421187610969, + "loss": 0.22742098569869995, + "loss_ce": 0.0040017981082201, + "loss_iou": 0.42578125, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 201445044, + "step": 1173 + }, + { + "epoch": 0.308805155520484, + "grad_norm": 19.043518960426944, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 201617324, + "step": 1174 + }, + { + "epoch": 0.308805155520484, + "loss": 0.09914430975914001, + "loss_ce": 0.001243914244696498, + "loss_iou": 0.6171875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 201617324, + "step": 1174 + }, + { + "epoch": 0.3090681922798711, + "grad_norm": 7.337870221119852, + "learning_rate": 5e-06, + "loss": 0.1898, + "num_input_tokens_seen": 201789648, + "step": 1175 + }, + { + "epoch": 0.3090681922798711, + "loss": 0.2523178160190582, + "loss_ce": 0.0009750226745381951, + "loss_iou": 0.52734375, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 201789648, + "step": 1175 + }, + { + "epoch": 0.30933122903925825, + "grad_norm": 18.238078934317997, + "learning_rate": 5e-06, + "loss": 0.1987, + "num_input_tokens_seen": 201961724, + "step": 1176 + }, + { + "epoch": 0.30933122903925825, + "loss": 0.17623552680015564, + "loss_ce": 0.003994307480752468, + "loss_iou": 0.396484375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 201961724, + "step": 1176 + }, + { + "epoch": 0.30959426579864535, + "grad_norm": 4.51493162048169, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 202133700, + "step": 1177 + }, + { + "epoch": 0.30959426579864535, + "loss": 0.1072007492184639, + "loss_ce": 0.005058412905782461, + "loss_iou": 0.52734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 202133700, + "step": 1177 + }, + { + "epoch": 0.3098573025580325, + "grad_norm": 5.072583382632679, + "learning_rate": 5e-06, + "loss": 0.1459, + "num_input_tokens_seen": 202304256, + "step": 1178 + }, + { + "epoch": 0.3098573025580325, + "loss": 0.14473965764045715, + "loss_ce": 0.0024361968971788883, + "loss_iou": 0.51953125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 202304256, + "step": 1178 + }, + { + "epoch": 0.3101203393174196, + "grad_norm": 7.387732741099245, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 202476488, + "step": 1179 + }, + { + "epoch": 0.3101203393174196, + "loss": 0.25686854124069214, + "loss_ce": 0.005464731715619564, + "loss_iou": 0.408203125, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 202476488, + "step": 1179 + }, + { + "epoch": 0.3103833760768067, + "grad_norm": 7.564357354372493, + "learning_rate": 5e-06, + "loss": 0.1721, + "num_input_tokens_seen": 202648844, + "step": 1180 + }, + { + "epoch": 0.3103833760768067, + "loss": 0.2612742781639099, + "loss_ce": 0.00047106179408729076, + "loss_iou": 0.546875, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 202648844, + "step": 1180 + }, + { + "epoch": 0.3106464128361939, + "grad_norm": 12.304004570686834, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 202821176, + "step": 1181 + }, + { + "epoch": 0.3106464128361939, + "loss": 0.2418098747730255, + "loss_ce": 0.0016365369083359838, + "loss_iou": 0.453125, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 202821176, + "step": 1181 + }, + { + "epoch": 0.310909449595581, + "grad_norm": 4.510810550512272, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 202993428, + "step": 1182 + }, + { + "epoch": 0.310909449595581, + "loss": 0.058775611221790314, + "loss_ce": 0.0007006583036854863, + "loss_iou": 0.390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 202993428, + "step": 1182 + }, + { + "epoch": 0.3111724863549681, + "grad_norm": 44.4539532395822, + "learning_rate": 5e-06, + "loss": 0.1567, + "num_input_tokens_seen": 203164156, + "step": 1183 + }, + { + "epoch": 0.3111724863549681, + "loss": 0.16502994298934937, + "loss_ce": 0.0006927890353836119, + "loss_iou": 0.55078125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 203164156, + "step": 1183 + }, + { + "epoch": 0.31143552311435524, + "grad_norm": 7.885064131627441, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 203336328, + "step": 1184 + }, + { + "epoch": 0.31143552311435524, + "loss": 0.07413887977600098, + "loss_ce": 0.00013374855916481465, + "loss_iou": 0.62109375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 203336328, + "step": 1184 + }, + { + "epoch": 0.31169855987374234, + "grad_norm": 5.056026310722222, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 203508700, + "step": 1185 + }, + { + "epoch": 0.31169855987374234, + "loss": 0.07386209070682526, + "loss_ce": 0.0002842045505531132, + "loss_iou": 0.4609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 203508700, + "step": 1185 + }, + { + "epoch": 0.3119615966331295, + "grad_norm": 9.611820717208333, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 203680940, + "step": 1186 + }, + { + "epoch": 0.3119615966331295, + "loss": 0.13154730200767517, + "loss_ce": 0.0037701984401792288, + "loss_iou": 0.59765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 203680940, + "step": 1186 + }, + { + "epoch": 0.3122246333925166, + "grad_norm": 10.99160414493074, + "learning_rate": 5e-06, + "loss": 0.1515, + "num_input_tokens_seen": 203852944, + "step": 1187 + }, + { + "epoch": 0.3122246333925166, + "loss": 0.17437157034873962, + "loss_ce": 0.0008486199658364058, + "loss_iou": 0.53515625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 203852944, + "step": 1187 + }, + { + "epoch": 0.3124876701519037, + "grad_norm": 7.566892022382808, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 204025224, + "step": 1188 + }, + { + "epoch": 0.3124876701519037, + "loss": 0.16989970207214355, + "loss_ce": 0.004982716403901577, + "loss_iou": 0.4765625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 204025224, + "step": 1188 + }, + { + "epoch": 0.31275070691129087, + "grad_norm": 4.336576845925468, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 204197436, + "step": 1189 + }, + { + "epoch": 0.31275070691129087, + "loss": 0.30051514506340027, + "loss_ce": 0.0009546162909828126, + "loss_iou": 0.390625, + "loss_num": 0.06005859375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 204197436, + "step": 1189 + }, + { + "epoch": 0.31301374367067797, + "grad_norm": 8.328730704134305, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 204369676, + "step": 1190 + }, + { + "epoch": 0.31301374367067797, + "loss": 0.18648235499858856, + "loss_ce": 0.0020035963971167803, + "loss_iou": 0.44921875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 204369676, + "step": 1190 + }, + { + "epoch": 0.3132767804300651, + "grad_norm": 6.248602390066733, + "learning_rate": 5e-06, + "loss": 0.2073, + "num_input_tokens_seen": 204541920, + "step": 1191 + }, + { + "epoch": 0.3132767804300651, + "loss": 0.12927605211734772, + "loss_ce": 0.0012548138620331883, + "loss_iou": 0.51953125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 204541920, + "step": 1191 + }, + { + "epoch": 0.31353981718945223, + "grad_norm": 13.468629090291936, + "learning_rate": 5e-06, + "loss": 0.1227, + "num_input_tokens_seen": 204714352, + "step": 1192 + }, + { + "epoch": 0.31353981718945223, + "loss": 0.13916926085948944, + "loss_ce": 0.002206363482400775, + "loss_iou": 0.64453125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 204714352, + "step": 1192 + }, + { + "epoch": 0.31380285394883933, + "grad_norm": 14.97613941577105, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 204886516, + "step": 1193 + }, + { + "epoch": 0.31380285394883933, + "loss": 0.07034310698509216, + "loss_ce": 0.002716155955567956, + "loss_iou": 0.53515625, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 204886516, + "step": 1193 + }, + { + "epoch": 0.3140658907082265, + "grad_norm": 7.0534203975755085, + "learning_rate": 5e-06, + "loss": 0.1572, + "num_input_tokens_seen": 205058780, + "step": 1194 + }, + { + "epoch": 0.3140658907082265, + "loss": 0.1390216201543808, + "loss_ce": 0.00144837680272758, + "loss_iou": 0.63671875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 205058780, + "step": 1194 + }, + { + "epoch": 0.3143289274676136, + "grad_norm": 8.447198281833328, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 205231072, + "step": 1195 + }, + { + "epoch": 0.3143289274676136, + "loss": 0.13909873366355896, + "loss_ce": 0.00030477988184429705, + "loss_iou": 0.5390625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 205231072, + "step": 1195 + }, + { + "epoch": 0.3145919642270007, + "grad_norm": 11.546456728325778, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 205403380, + "step": 1196 + }, + { + "epoch": 0.3145919642270007, + "loss": 0.21954891085624695, + "loss_ce": 0.0075127785094082355, + "loss_iou": NaN, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 205403380, + "step": 1196 + }, + { + "epoch": 0.31485500098638786, + "grad_norm": 22.283163028622614, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 205575536, + "step": 1197 + }, + { + "epoch": 0.31485500098638786, + "loss": 0.10035932064056396, + "loss_ce": 0.0016349535435438156, + "loss_iou": 0.5703125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 205575536, + "step": 1197 + }, + { + "epoch": 0.31511803774577496, + "grad_norm": 13.092900629924303, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 205747664, + "step": 1198 + }, + { + "epoch": 0.31511803774577496, + "loss": 0.1355750560760498, + "loss_ce": 0.0009009751374833286, + "loss_iou": 0.46484375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 205747664, + "step": 1198 + }, + { + "epoch": 0.3153810745051621, + "grad_norm": 4.560396330595116, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 205919612, + "step": 1199 + }, + { + "epoch": 0.3153810745051621, + "loss": 0.085871621966362, + "loss_ce": 0.0012463756138458848, + "loss_iou": 0.478515625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 205919612, + "step": 1199 + }, + { + "epoch": 0.3156441112645492, + "grad_norm": 7.441372297845623, + "learning_rate": 5e-06, + "loss": 0.1451, + "num_input_tokens_seen": 206092012, + "step": 1200 + }, + { + "epoch": 0.3156441112645492, + "loss": 0.13713139295578003, + "loss_ce": 0.004868209362030029, + "loss_iou": 0.6015625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 206092012, + "step": 1200 + }, + { + "epoch": 0.3159071480239363, + "grad_norm": 5.3666102379931795, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 206262016, + "step": 1201 + }, + { + "epoch": 0.3159071480239363, + "loss": 0.14059683680534363, + "loss_ce": 0.001070460770279169, + "loss_iou": 0.53125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 206262016, + "step": 1201 + }, + { + "epoch": 0.3161701847833235, + "grad_norm": 8.935322327098595, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 206434192, + "step": 1202 + }, + { + "epoch": 0.3161701847833235, + "loss": 0.13417284190654755, + "loss_ce": 0.0015129297971725464, + "loss_iou": 0.671875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 206434192, + "step": 1202 + }, + { + "epoch": 0.3164332215427106, + "grad_norm": 10.825802927092472, + "learning_rate": 5e-06, + "loss": 0.1619, + "num_input_tokens_seen": 206604572, + "step": 1203 + }, + { + "epoch": 0.3164332215427106, + "loss": 0.1651817262172699, + "loss_ce": 0.002462008036673069, + "loss_iou": 0.38671875, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 206604572, + "step": 1203 + }, + { + "epoch": 0.31669625830209774, + "grad_norm": 14.238886956988445, + "learning_rate": 5e-06, + "loss": 0.1368, + "num_input_tokens_seen": 206776604, + "step": 1204 + }, + { + "epoch": 0.31669625830209774, + "loss": 0.1644083857536316, + "loss_ce": 0.0012003772426396608, + "loss_iou": 0.75390625, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 206776604, + "step": 1204 + }, + { + "epoch": 0.31695929506148485, + "grad_norm": 7.214768469241478, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 206948516, + "step": 1205 + }, + { + "epoch": 0.31695929506148485, + "loss": 0.15767714381217957, + "loss_ce": 0.0004200635012239218, + "loss_iou": 0.6015625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 206948516, + "step": 1205 + }, + { + "epoch": 0.31722233182087195, + "grad_norm": 8.494853848101869, + "learning_rate": 5e-06, + "loss": 0.1633, + "num_input_tokens_seen": 207120552, + "step": 1206 + }, + { + "epoch": 0.31722233182087195, + "loss": 0.20658773183822632, + "loss_ce": 0.007216397672891617, + "loss_iou": 0.3671875, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 207120552, + "step": 1206 + }, + { + "epoch": 0.3174853685802591, + "grad_norm": 8.328419666401985, + "learning_rate": 5e-06, + "loss": 0.1285, + "num_input_tokens_seen": 207292524, + "step": 1207 + }, + { + "epoch": 0.3174853685802591, + "loss": 0.1329333782196045, + "loss_ce": 0.0009753549238666892, + "loss_iou": 0.55078125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 207292524, + "step": 1207 + }, + { + "epoch": 0.3177484053396462, + "grad_norm": 10.047313768362223, + "learning_rate": 5e-06, + "loss": 0.1291, + "num_input_tokens_seen": 207464680, + "step": 1208 + }, + { + "epoch": 0.3177484053396462, + "loss": 0.12349092215299606, + "loss_ce": 0.0011764641385525465, + "loss_iou": 0.515625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 207464680, + "step": 1208 + }, + { + "epoch": 0.3180114420990333, + "grad_norm": 12.242484724411801, + "learning_rate": 5e-06, + "loss": 0.1693, + "num_input_tokens_seen": 207634788, + "step": 1209 + }, + { + "epoch": 0.3180114420990333, + "loss": 0.21163830161094666, + "loss_ce": 0.0032947922591120005, + "loss_iou": 0.54296875, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 207634788, + "step": 1209 + }, + { + "epoch": 0.3182744788584205, + "grad_norm": 5.397067809441159, + "learning_rate": 5e-06, + "loss": 0.1287, + "num_input_tokens_seen": 207806604, + "step": 1210 + }, + { + "epoch": 0.3182744788584205, + "loss": 0.14931055903434753, + "loss_ce": 0.00026271765818819404, + "loss_iou": 0.5859375, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 207806604, + "step": 1210 + }, + { + "epoch": 0.3185375156178076, + "grad_norm": 5.849344287815652, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 207979236, + "step": 1211 + }, + { + "epoch": 0.3185375156178076, + "loss": 0.16523879766464233, + "loss_ce": 0.000810077937785536, + "loss_iou": 0.65234375, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 207979236, + "step": 1211 + }, + { + "epoch": 0.31880055237719473, + "grad_norm": 6.099253346488696, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 208151368, + "step": 1212 + }, + { + "epoch": 0.31880055237719473, + "loss": 0.12017640471458435, + "loss_ce": 0.00015077157877385616, + "loss_iou": 0.6171875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 208151368, + "step": 1212 + }, + { + "epoch": 0.31906358913658184, + "grad_norm": 6.749805208741857, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 208323600, + "step": 1213 + }, + { + "epoch": 0.31906358913658184, + "loss": 0.1262407749891281, + "loss_ce": 0.0037432105746120214, + "loss_iou": 0.4765625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 208323600, + "step": 1213 + }, + { + "epoch": 0.31932662589596894, + "grad_norm": 7.672226851345336, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 208495836, + "step": 1214 + }, + { + "epoch": 0.31932662589596894, + "loss": 0.09719178080558777, + "loss_ce": 0.0004205434233881533, + "loss_iou": 0.5625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 208495836, + "step": 1214 + }, + { + "epoch": 0.3195896626553561, + "grad_norm": 7.992027772652848, + "learning_rate": 5e-06, + "loss": 0.1721, + "num_input_tokens_seen": 208667840, + "step": 1215 + }, + { + "epoch": 0.3195896626553561, + "loss": 0.1424046754837036, + "loss_ce": 0.00037586723919957876, + "loss_iou": 0.46484375, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 208667840, + "step": 1215 + }, + { + "epoch": 0.3198526994147432, + "grad_norm": 6.160546703869267, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 208840040, + "step": 1216 + }, + { + "epoch": 0.3198526994147432, + "loss": 0.11277418583631516, + "loss_ce": 0.0004084610554855317, + "loss_iou": 0.48046875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 208840040, + "step": 1216 + }, + { + "epoch": 0.32011573617413036, + "grad_norm": 14.924491605710916, + "learning_rate": 5e-06, + "loss": 0.1657, + "num_input_tokens_seen": 209010176, + "step": 1217 + }, + { + "epoch": 0.32011573617413036, + "loss": 0.1102285385131836, + "loss_ce": 0.0007619824027642608, + "loss_iou": 0.51171875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 209010176, + "step": 1217 + }, + { + "epoch": 0.32037877293351746, + "grad_norm": 13.936435144745488, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 209182304, + "step": 1218 + }, + { + "epoch": 0.32037877293351746, + "loss": 0.12400620430707932, + "loss_ce": 0.0005168293137103319, + "loss_iou": 0.53515625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 209182304, + "step": 1218 + }, + { + "epoch": 0.32064180969290457, + "grad_norm": 5.31537857924368, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 209354400, + "step": 1219 + }, + { + "epoch": 0.32064180969290457, + "loss": 0.15276062488555908, + "loss_ce": 0.0029498354997485876, + "loss_iou": 0.50390625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 209354400, + "step": 1219 + }, + { + "epoch": 0.3209048464522917, + "grad_norm": 8.044699509860337, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 209526592, + "step": 1220 + }, + { + "epoch": 0.3209048464522917, + "loss": 0.13095101714134216, + "loss_ce": 0.00137336365878582, + "loss_iou": 0.61328125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 209526592, + "step": 1220 + }, + { + "epoch": 0.32116788321167883, + "grad_norm": 6.178496640552883, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 209699004, + "step": 1221 + }, + { + "epoch": 0.32116788321167883, + "loss": 0.11922457814216614, + "loss_ce": 0.00017550383927300572, + "loss_iou": 0.625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 209699004, + "step": 1221 + }, + { + "epoch": 0.32143091997106593, + "grad_norm": 5.682094913930693, + "learning_rate": 5e-06, + "loss": 0.1394, + "num_input_tokens_seen": 209871116, + "step": 1222 + }, + { + "epoch": 0.32143091997106593, + "loss": 0.14516758918762207, + "loss_ce": 0.004481561481952667, + "loss_iou": 0.328125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 209871116, + "step": 1222 + }, + { + "epoch": 0.3216939567304531, + "grad_norm": 7.037588469004313, + "learning_rate": 5e-06, + "loss": 0.1381, + "num_input_tokens_seen": 210041224, + "step": 1223 + }, + { + "epoch": 0.3216939567304531, + "loss": 0.1431758999824524, + "loss_ce": 0.002764530945569277, + "loss_iou": 0.54296875, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 210041224, + "step": 1223 + }, + { + "epoch": 0.3219569934898402, + "grad_norm": 23.622534135841367, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 210211380, + "step": 1224 + }, + { + "epoch": 0.3219569934898402, + "loss": 0.1730581820011139, + "loss_ce": 0.0024649298284202814, + "loss_iou": 0.69921875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 210211380, + "step": 1224 + }, + { + "epoch": 0.32222003024922735, + "grad_norm": 12.04380926493331, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 210383088, + "step": 1225 + }, + { + "epoch": 0.32222003024922735, + "loss": 0.23281848430633545, + "loss_ce": 0.00115956028457731, + "loss_iou": 0.4296875, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 210383088, + "step": 1225 + }, + { + "epoch": 0.32248306700861445, + "grad_norm": 4.756101930631895, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 210555072, + "step": 1226 + }, + { + "epoch": 0.32248306700861445, + "loss": 0.06910556554794312, + "loss_ce": 0.003004492959007621, + "loss_iou": 0.5859375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 210555072, + "step": 1226 + }, + { + "epoch": 0.32274610376800156, + "grad_norm": 3.8368982745320284, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 210727236, + "step": 1227 + }, + { + "epoch": 0.32274610376800156, + "loss": 0.10896629840135574, + "loss_ce": 0.0005983852897770703, + "loss_iou": 0.54296875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 210727236, + "step": 1227 + }, + { + "epoch": 0.3230091405273887, + "grad_norm": 5.290425948494437, + "learning_rate": 5e-06, + "loss": 0.1453, + "num_input_tokens_seen": 210899444, + "step": 1228 + }, + { + "epoch": 0.3230091405273887, + "loss": 0.12627889215946198, + "loss_ce": 8.869695011526346e-05, + "loss_iou": 0.671875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 210899444, + "step": 1228 + }, + { + "epoch": 0.3232721772867758, + "grad_norm": 5.380666634187743, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 211071748, + "step": 1229 + }, + { + "epoch": 0.3232721772867758, + "loss": 0.16187983751296997, + "loss_ce": 0.003463074564933777, + "loss_iou": 0.5234375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 211071748, + "step": 1229 + }, + { + "epoch": 0.323535214046163, + "grad_norm": 6.929606907390503, + "learning_rate": 5e-06, + "loss": 0.1638, + "num_input_tokens_seen": 211243884, + "step": 1230 + }, + { + "epoch": 0.323535214046163, + "loss": 0.16732358932495117, + "loss_ce": 0.003932466730475426, + "loss_iou": 0.76171875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 211243884, + "step": 1230 + }, + { + "epoch": 0.3237982508055501, + "grad_norm": 4.103469407229988, + "learning_rate": 5e-06, + "loss": 0.1433, + "num_input_tokens_seen": 211416144, + "step": 1231 + }, + { + "epoch": 0.3237982508055501, + "loss": 0.16871199011802673, + "loss_ce": 0.0031236184295266867, + "loss_iou": 0.54296875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 211416144, + "step": 1231 + }, + { + "epoch": 0.3240612875649372, + "grad_norm": 4.57933905462206, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 211588296, + "step": 1232 + }, + { + "epoch": 0.3240612875649372, + "loss": 0.16614994406700134, + "loss_ce": 0.0009277852368541062, + "loss_iou": 0.62109375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 211588296, + "step": 1232 + }, + { + "epoch": 0.32432432432432434, + "grad_norm": 4.940862588040882, + "learning_rate": 5e-06, + "loss": 0.1402, + "num_input_tokens_seen": 211758972, + "step": 1233 + }, + { + "epoch": 0.32432432432432434, + "loss": 0.14131012558937073, + "loss_ce": 0.0015701348893344402, + "loss_iou": 0.482421875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 211758972, + "step": 1233 + }, + { + "epoch": 0.32458736108371145, + "grad_norm": 11.781672019450719, + "learning_rate": 5e-06, + "loss": 0.1631, + "num_input_tokens_seen": 211928944, + "step": 1234 + }, + { + "epoch": 0.32458736108371145, + "loss": 0.14883792400360107, + "loss_ce": 0.0010107720736414194, + "loss_iou": 0.6875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 211928944, + "step": 1234 + }, + { + "epoch": 0.32485039784309855, + "grad_norm": 19.10070261855633, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 212100832, + "step": 1235 + }, + { + "epoch": 0.32485039784309855, + "loss": 0.07846543192863464, + "loss_ce": 0.0002793997118715197, + "loss_iou": 0.6328125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 212100832, + "step": 1235 + }, + { + "epoch": 0.3251134346024857, + "grad_norm": 15.821804140128867, + "learning_rate": 5e-06, + "loss": 0.1606, + "num_input_tokens_seen": 212271444, + "step": 1236 + }, + { + "epoch": 0.3251134346024857, + "loss": 0.1032799631357193, + "loss_ce": 0.004799742251634598, + "loss_iou": 0.5546875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 212271444, + "step": 1236 + }, + { + "epoch": 0.3253764713618728, + "grad_norm": 5.984115475857608, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 212443976, + "step": 1237 + }, + { + "epoch": 0.3253764713618728, + "loss": 0.06768647581338882, + "loss_ce": 0.0013717777328565717, + "loss_iou": 0.6796875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 212443976, + "step": 1237 + }, + { + "epoch": 0.32563950812125997, + "grad_norm": 7.104938203697124, + "learning_rate": 5e-06, + "loss": 0.1686, + "num_input_tokens_seen": 212612720, + "step": 1238 + }, + { + "epoch": 0.32563950812125997, + "loss": 0.10599420964717865, + "loss_ce": 0.002387029118835926, + "loss_iou": 0.68359375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 212612720, + "step": 1238 + }, + { + "epoch": 0.32590254488064707, + "grad_norm": 5.6715016587403655, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 212784924, + "step": 1239 + }, + { + "epoch": 0.32590254488064707, + "loss": 0.07988797873258591, + "loss_ce": 0.00020658349967561662, + "loss_iou": 0.5078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 212784924, + "step": 1239 + }, + { + "epoch": 0.3261655816400342, + "grad_norm": 10.066783726153659, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 212957204, + "step": 1240 + }, + { + "epoch": 0.3261655816400342, + "loss": 0.24682722985744476, + "loss_ce": 0.0004893409786745906, + "loss_iou": 0.46484375, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 212957204, + "step": 1240 + }, + { + "epoch": 0.32642861839942133, + "grad_norm": 6.605871983356305, + "learning_rate": 5e-06, + "loss": 0.1476, + "num_input_tokens_seen": 213129636, + "step": 1241 + }, + { + "epoch": 0.32642861839942133, + "loss": 0.14497268199920654, + "loss_ce": 0.008375998586416245, + "loss_iou": 0.55859375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 213129636, + "step": 1241 + }, + { + "epoch": 0.32669165515880844, + "grad_norm": 6.65301903793999, + "learning_rate": 5e-06, + "loss": 0.1883, + "num_input_tokens_seen": 213302188, + "step": 1242 + }, + { + "epoch": 0.32669165515880844, + "loss": 0.25356027483940125, + "loss_ce": 0.002705805469304323, + "loss_iou": 0.71875, + "loss_num": 0.05029296875, + "loss_xval": 0.25, + "num_input_tokens_seen": 213302188, + "step": 1242 + }, + { + "epoch": 0.3269546919181956, + "grad_norm": 6.148620904940904, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 213474112, + "step": 1243 + }, + { + "epoch": 0.3269546919181956, + "loss": 0.10272787511348724, + "loss_ce": 0.00427817041054368, + "loss_iou": 0.57421875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 213474112, + "step": 1243 + }, + { + "epoch": 0.3272177286775827, + "grad_norm": 8.559691577151979, + "learning_rate": 5e-06, + "loss": 0.214, + "num_input_tokens_seen": 213646008, + "step": 1244 + }, + { + "epoch": 0.3272177286775827, + "loss": 0.2430175393819809, + "loss_ce": 0.0016845206264406443, + "loss_iou": 0.5625, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 213646008, + "step": 1244 + }, + { + "epoch": 0.3274807654369698, + "grad_norm": 4.550203288561425, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 213816528, + "step": 1245 + }, + { + "epoch": 0.3274807654369698, + "loss": 0.10727906972169876, + "loss_ce": 0.001383074326440692, + "loss_iou": 0.59765625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 213816528, + "step": 1245 + }, + { + "epoch": 0.32774380219635696, + "grad_norm": 8.680127486966335, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 213988932, + "step": 1246 + }, + { + "epoch": 0.32774380219635696, + "loss": 0.16796328127384186, + "loss_ce": 0.0009405763121321797, + "loss_iou": 0.470703125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 213988932, + "step": 1246 + }, + { + "epoch": 0.32800683895574406, + "grad_norm": 9.402943236610666, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 214161184, + "step": 1247 + }, + { + "epoch": 0.32800683895574406, + "loss": 0.18202053010463715, + "loss_ce": 0.008070331066846848, + "loss_iou": 0.5859375, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 214161184, + "step": 1247 + }, + { + "epoch": 0.32826987571513117, + "grad_norm": 8.949806900643816, + "learning_rate": 5e-06, + "loss": 0.1733, + "num_input_tokens_seen": 214333220, + "step": 1248 + }, + { + "epoch": 0.32826987571513117, + "loss": 0.18508628010749817, + "loss_ce": 0.001980806002393365, + "loss_iou": 0.67578125, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 214333220, + "step": 1248 + }, + { + "epoch": 0.3285329124745183, + "grad_norm": 9.623428359724258, + "learning_rate": 5e-06, + "loss": 0.1899, + "num_input_tokens_seen": 214501988, + "step": 1249 + }, + { + "epoch": 0.3285329124745183, + "loss": 0.18457330763339996, + "loss_ce": 0.002108700107783079, + "loss_iou": 0.46875, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 214501988, + "step": 1249 + }, + { + "epoch": 0.3287959492339054, + "grad_norm": 5.549112096762312, + "learning_rate": 5e-06, + "loss": 0.1748, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_websight_new_CIoU": 0.8321745097637177, + "eval_websight_new_GIoU": 0.8282029330730438, + "eval_websight_new_IoU": 0.8425185084342957, + "eval_websight_new_MAE_all": 0.030896120704710484, + "eval_websight_new_MAE_h": 0.020350518636405468, + "eval_websight_new_MAE_w": 0.041066043078899384, + "eval_websight_new_MAE_x": 0.04287236928939819, + "eval_websight_new_MAE_y": 0.019295550882816315, + "eval_websight_new_NUM_probability": 0.9998577535152435, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.13457995653152466, + "eval_websight_new_loss_ce": 2.8165732146590017e-05, + "eval_websight_new_loss_iou": 0.39599609375, + "eval_websight_new_loss_num": 0.022901535034179688, + "eval_websight_new_loss_xval": 0.1145477294921875, + "eval_websight_new_runtime": 54.6461, + "eval_websight_new_samples_per_second": 0.915, + "eval_websight_new_steps_per_second": 0.037, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_seeclick_CIoU": 0.5724749565124512, + "eval_seeclick_GIoU": 0.5666466653347015, + "eval_seeclick_IoU": 0.5994701087474823, + "eval_seeclick_MAE_all": 0.054681919515132904, + "eval_seeclick_MAE_h": 0.03272205591201782, + "eval_seeclick_MAE_w": 0.07547581382095814, + "eval_seeclick_MAE_x": 0.07945681735873222, + "eval_seeclick_MAE_y": 0.031073003076016903, + "eval_seeclick_NUM_probability": 0.9999328255653381, + "eval_seeclick_inside_bbox": 0.8465909063816071, + "eval_seeclick_loss": 0.2362295389175415, + "eval_seeclick_loss_ce": 0.008988222572952509, + "eval_seeclick_loss_iou": 0.5384521484375, + "eval_seeclick_loss_num": 0.04405975341796875, + "eval_seeclick_loss_xval": 0.22039794921875, + "eval_seeclick_runtime": 77.1136, + "eval_seeclick_samples_per_second": 0.558, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_icons_CIoU": 0.8087750673294067, + "eval_icons_GIoU": 0.7993076145648956, + "eval_icons_IoU": 0.8191726505756378, + "eval_icons_MAE_all": 0.02884785458445549, + "eval_icons_MAE_h": 0.033479243516922, + "eval_icons_MAE_w": 0.024947408586740494, + "eval_icons_MAE_x": 0.02466664183884859, + "eval_icons_MAE_y": 0.0322981309145689, + "eval_icons_NUM_probability": 0.9998048841953278, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.08432088792324066, + "eval_icons_loss_ce": 7.355650814133696e-05, + "eval_icons_loss_iou": 0.567626953125, + "eval_icons_loss_num": 0.015069961547851562, + "eval_icons_loss_xval": 0.0753631591796875, + "eval_icons_runtime": 78.2995, + "eval_icons_samples_per_second": 0.639, + "eval_icons_steps_per_second": 0.026, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_screenspot_CIoU": 0.5492339730262756, + "eval_screenspot_GIoU": 0.5352775057156881, + "eval_screenspot_IoU": 0.5879749854405721, + "eval_screenspot_MAE_all": 0.0870671272277832, + "eval_screenspot_MAE_h": 0.051297743494311966, + "eval_screenspot_MAE_w": 0.14652628699938455, + "eval_screenspot_MAE_x": 0.10099601248900096, + "eval_screenspot_MAE_y": 0.04944847462077936, + "eval_screenspot_NUM_probability": 0.999727189540863, + "eval_screenspot_inside_bbox": 0.8529166579246521, + "eval_screenspot_loss": 0.8003170490264893, + "eval_screenspot_loss_ce": 0.4581688741842906, + "eval_screenspot_loss_iou": 0.4616292317708333, + "eval_screenspot_loss_num": 0.06711324055989583, + "eval_screenspot_loss_xval": 0.3355305989583333, + "eval_screenspot_runtime": 146.7221, + "eval_screenspot_samples_per_second": 0.607, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "loss": 0.7908815741539001, + "loss_ce": 0.44920679926872253, + "loss_iou": 0.390625, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3290589859932926, + "grad_norm": 5.65145087220912, + "learning_rate": 5e-06, + "loss": 0.1612, + "num_input_tokens_seen": 214840640, + "step": 1251 + }, + { + "epoch": 0.3290589859932926, + "loss": 0.17782355844974518, + "loss_ce": 0.0008826321572996676, + "loss_iou": 0.458984375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 214840640, + "step": 1251 + }, + { + "epoch": 0.3293220227526797, + "grad_norm": 7.22202274640801, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 215011212, + "step": 1252 + }, + { + "epoch": 0.3293220227526797, + "loss": 0.13640594482421875, + "loss_ce": 0.0011520386906340718, + "loss_iou": 0.5859375, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 215011212, + "step": 1252 + }, + { + "epoch": 0.3295850595120668, + "grad_norm": 5.1577086011143685, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 215183424, + "step": 1253 + }, + { + "epoch": 0.3295850595120668, + "loss": 0.12353098392486572, + "loss_ce": 0.0006061755702830851, + "loss_iou": 0.5703125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 215183424, + "step": 1253 + }, + { + "epoch": 0.32984809627145395, + "grad_norm": 7.368992653841811, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 215355224, + "step": 1254 + }, + { + "epoch": 0.32984809627145395, + "loss": 0.15012162923812866, + "loss_ce": 0.0011348105035722256, + "loss_iou": 0.51171875, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 215355224, + "step": 1254 + }, + { + "epoch": 0.33011113303084105, + "grad_norm": 5.485255366846693, + "learning_rate": 5e-06, + "loss": 0.1751, + "num_input_tokens_seen": 215525468, + "step": 1255 + }, + { + "epoch": 0.33011113303084105, + "loss": 0.14393854141235352, + "loss_ce": 0.0008110922062769532, + "loss_iou": 0.4765625, + "loss_num": 0.028564453125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 215525468, + "step": 1255 + }, + { + "epoch": 0.3303741697902282, + "grad_norm": 5.150585875058386, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 215697664, + "step": 1256 + }, + { + "epoch": 0.3303741697902282, + "loss": 0.1360258162021637, + "loss_ce": 0.0006498318398371339, + "loss_iou": 0.6875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 215697664, + "step": 1256 + }, + { + "epoch": 0.3306372065496153, + "grad_norm": 4.902248025801288, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 215869772, + "step": 1257 + }, + { + "epoch": 0.3306372065496153, + "loss": 0.08430365473031998, + "loss_ce": 0.0006854891544207931, + "loss_iou": 0.671875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 215869772, + "step": 1257 + }, + { + "epoch": 0.3309002433090024, + "grad_norm": 8.067058733106478, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 216041972, + "step": 1258 + }, + { + "epoch": 0.3309002433090024, + "loss": 0.12436328083276749, + "loss_ce": 0.0012248535640537739, + "loss_iou": 0.66015625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 216041972, + "step": 1258 + }, + { + "epoch": 0.3311632800683896, + "grad_norm": 10.281119628319207, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 216213968, + "step": 1259 + }, + { + "epoch": 0.3311632800683896, + "loss": 0.1294836401939392, + "loss_ce": 0.002225350122898817, + "loss_iou": 0.490234375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 216213968, + "step": 1259 + }, + { + "epoch": 0.3314263168277767, + "grad_norm": 9.188895459960923, + "learning_rate": 5e-06, + "loss": 0.1358, + "num_input_tokens_seen": 216384484, + "step": 1260 + }, + { + "epoch": 0.3314263168277767, + "loss": 0.17113396525382996, + "loss_ce": 0.0021886550821363926, + "loss_iou": 0.5234375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 216384484, + "step": 1260 + }, + { + "epoch": 0.3316893535871638, + "grad_norm": 9.858408287132631, + "learning_rate": 5e-06, + "loss": 0.1691, + "num_input_tokens_seen": 216556712, + "step": 1261 + }, + { + "epoch": 0.3316893535871638, + "loss": 0.09802193194627762, + "loss_ce": 0.004027791786938906, + "loss_iou": 0.53125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 216556712, + "step": 1261 + }, + { + "epoch": 0.33195239034655094, + "grad_norm": 13.43147706837513, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 216727532, + "step": 1262 + }, + { + "epoch": 0.33195239034655094, + "loss": 0.12088888883590698, + "loss_ce": 0.001778791076503694, + "loss_iou": NaN, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 216727532, + "step": 1262 + }, + { + "epoch": 0.33221542710593804, + "grad_norm": 22.307350229032924, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 216899424, + "step": 1263 + }, + { + "epoch": 0.33221542710593804, + "loss": 0.13408984243869781, + "loss_ce": 0.00048388654249720275, + "loss_iou": 0.5546875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 216899424, + "step": 1263 + }, + { + "epoch": 0.3324784638653252, + "grad_norm": 12.430687637818028, + "learning_rate": 5e-06, + "loss": 0.1269, + "num_input_tokens_seen": 217071512, + "step": 1264 + }, + { + "epoch": 0.3324784638653252, + "loss": 0.12219381332397461, + "loss_ce": 0.0014357574982568622, + "loss_iou": 0.72265625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 217071512, + "step": 1264 + }, + { + "epoch": 0.3327415006247123, + "grad_norm": 5.892819252370443, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 217241728, + "step": 1265 + }, + { + "epoch": 0.3327415006247123, + "loss": 0.07451170682907104, + "loss_ce": 0.0026122929994016886, + "loss_iou": 0.5234375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 217241728, + "step": 1265 + }, + { + "epoch": 0.3330045373840994, + "grad_norm": 7.045469734572638, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 217413824, + "step": 1266 + }, + { + "epoch": 0.3330045373840994, + "loss": 0.19595244526863098, + "loss_ce": 0.0024709957651793957, + "loss_iou": 0.5234375, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 217413824, + "step": 1266 + }, + { + "epoch": 0.33326757414348657, + "grad_norm": 10.385925957202762, + "learning_rate": 5e-06, + "loss": 0.1572, + "num_input_tokens_seen": 217585864, + "step": 1267 + }, + { + "epoch": 0.33326757414348657, + "loss": 0.14605620503425598, + "loss_ce": 0.0005483938148245215, + "loss_iou": 0.57421875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 217585864, + "step": 1267 + }, + { + "epoch": 0.33353061090287367, + "grad_norm": 6.514979263584694, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 217757720, + "step": 1268 + }, + { + "epoch": 0.33353061090287367, + "loss": 0.1336522251367569, + "loss_ce": 0.000595581834204495, + "loss_iou": 0.66015625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 217757720, + "step": 1268 + }, + { + "epoch": 0.33379364766226083, + "grad_norm": 7.782042489694296, + "learning_rate": 5e-06, + "loss": 0.1537, + "num_input_tokens_seen": 217929844, + "step": 1269 + }, + { + "epoch": 0.33379364766226083, + "loss": 0.14830312132835388, + "loss_ce": 0.0005370010621845722, + "loss_iou": 0.41796875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 217929844, + "step": 1269 + }, + { + "epoch": 0.33405668442164793, + "grad_norm": 37.57598415614969, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 218102180, + "step": 1270 + }, + { + "epoch": 0.33405668442164793, + "loss": 0.125936821103096, + "loss_ce": 0.000570611678995192, + "loss_iou": 0.77734375, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 218102180, + "step": 1270 + }, + { + "epoch": 0.33431972118103503, + "grad_norm": 11.611735941773203, + "learning_rate": 5e-06, + "loss": 0.1685, + "num_input_tokens_seen": 218272528, + "step": 1271 + }, + { + "epoch": 0.33431972118103503, + "loss": 0.15356029570102692, + "loss_ce": 0.0005146406474523246, + "loss_iou": 0.51171875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 218272528, + "step": 1271 + }, + { + "epoch": 0.3345827579404222, + "grad_norm": 8.349189669659024, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 218445008, + "step": 1272 + }, + { + "epoch": 0.3345827579404222, + "loss": 0.14823183417320251, + "loss_ce": 0.0009540055179968476, + "loss_iou": 0.5390625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 218445008, + "step": 1272 + }, + { + "epoch": 0.3348457946998093, + "grad_norm": 8.794519146282962, + "learning_rate": 5e-06, + "loss": 0.1529, + "num_input_tokens_seen": 218617480, + "step": 1273 + }, + { + "epoch": 0.3348457946998093, + "loss": 0.10918127745389938, + "loss_ce": 0.0011185340117663145, + "loss_iou": 0.625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 218617480, + "step": 1273 + }, + { + "epoch": 0.3351088314591964, + "grad_norm": 5.0624867047638045, + "learning_rate": 5e-06, + "loss": 0.1463, + "num_input_tokens_seen": 218789900, + "step": 1274 + }, + { + "epoch": 0.3351088314591964, + "loss": 0.13429242372512817, + "loss_ce": 0.00411969143897295, + "loss_iou": 0.369140625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 218789900, + "step": 1274 + }, + { + "epoch": 0.33537186821858356, + "grad_norm": 5.751729189048832, + "learning_rate": 5e-06, + "loss": 0.1612, + "num_input_tokens_seen": 218962156, + "step": 1275 + }, + { + "epoch": 0.33537186821858356, + "loss": 0.11978072673082352, + "loss_ce": 0.00350876129232347, + "loss_iou": 0.453125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 218962156, + "step": 1275 + }, + { + "epoch": 0.33563490497797066, + "grad_norm": 6.311955311958974, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 219132288, + "step": 1276 + }, + { + "epoch": 0.33563490497797066, + "loss": 0.1355704814195633, + "loss_ce": 0.0025443662889301777, + "loss_iou": 0.46484375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 219132288, + "step": 1276 + }, + { + "epoch": 0.3358979417373578, + "grad_norm": 7.099991467306234, + "learning_rate": 5e-06, + "loss": 0.1531, + "num_input_tokens_seen": 219304616, + "step": 1277 + }, + { + "epoch": 0.3358979417373578, + "loss": 0.13075271248817444, + "loss_ce": 0.0017243996262550354, + "loss_iou": 0.55078125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 219304616, + "step": 1277 + }, + { + "epoch": 0.3361609784967449, + "grad_norm": 7.34332951683246, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 219476876, + "step": 1278 + }, + { + "epoch": 0.3361609784967449, + "loss": 0.08354561030864716, + "loss_ce": 0.0010565929114818573, + "loss_iou": 0.62890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 219476876, + "step": 1278 + }, + { + "epoch": 0.336424015256132, + "grad_norm": 20.864457223612828, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 219649440, + "step": 1279 + }, + { + "epoch": 0.336424015256132, + "loss": 0.14009949564933777, + "loss_ce": 0.00029846589313820004, + "loss_iou": 0.51953125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 219649440, + "step": 1279 + }, + { + "epoch": 0.3366870520155192, + "grad_norm": 6.27874029788255, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 219821712, + "step": 1280 + }, + { + "epoch": 0.3366870520155192, + "loss": 0.14031162858009338, + "loss_ce": 0.0007242212886922061, + "loss_iou": 0.640625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 219821712, + "step": 1280 + }, + { + "epoch": 0.3369500887749063, + "grad_norm": 4.5668271174612025, + "learning_rate": 5e-06, + "loss": 0.1628, + "num_input_tokens_seen": 219993864, + "step": 1281 + }, + { + "epoch": 0.3369500887749063, + "loss": 0.10837851464748383, + "loss_ce": 0.0014449162408709526, + "loss_iou": 0.40234375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 219993864, + "step": 1281 + }, + { + "epoch": 0.3372131255342934, + "grad_norm": 4.389742759586719, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 220166248, + "step": 1282 + }, + { + "epoch": 0.3372131255342934, + "loss": 0.23685070872306824, + "loss_ce": 0.0005836054333485663, + "loss_iou": 0.54296875, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 220166248, + "step": 1282 + }, + { + "epoch": 0.33747616229368055, + "grad_norm": 5.570317525274513, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 220338428, + "step": 1283 + }, + { + "epoch": 0.33747616229368055, + "loss": 0.13890241086483002, + "loss_ce": 0.0007493281736969948, + "loss_iou": 0.44921875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 220338428, + "step": 1283 + }, + { + "epoch": 0.33773919905306765, + "grad_norm": 10.985772181850571, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 220510776, + "step": 1284 + }, + { + "epoch": 0.33773919905306765, + "loss": 0.12931521236896515, + "loss_ce": 0.0006531005492433906, + "loss_iou": 0.6015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 220510776, + "step": 1284 + }, + { + "epoch": 0.3380022358124548, + "grad_norm": 5.926973995328122, + "learning_rate": 5e-06, + "loss": 0.139, + "num_input_tokens_seen": 220682848, + "step": 1285 + }, + { + "epoch": 0.3380022358124548, + "loss": 0.21676884591579437, + "loss_ce": 0.007204629480838776, + "loss_iou": 0.390625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 220682848, + "step": 1285 + }, + { + "epoch": 0.3382652725718419, + "grad_norm": 7.996976963449769, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 220855140, + "step": 1286 + }, + { + "epoch": 0.3382652725718419, + "loss": 0.08844804763793945, + "loss_ce": 0.0003895749687217176, + "loss_iou": 0.59375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 220855140, + "step": 1286 + }, + { + "epoch": 0.338528309331229, + "grad_norm": 8.499510082866491, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 221027260, + "step": 1287 + }, + { + "epoch": 0.338528309331229, + "loss": 0.12149707973003387, + "loss_ce": 0.0004948831629008055, + "loss_iou": 0.5703125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 221027260, + "step": 1287 + }, + { + "epoch": 0.3387913460906162, + "grad_norm": 5.973131676418204, + "learning_rate": 5e-06, + "loss": 0.1328, + "num_input_tokens_seen": 221199416, + "step": 1288 + }, + { + "epoch": 0.3387913460906162, + "loss": 0.09466279298067093, + "loss_ce": 0.00011933030327782035, + "loss_iou": 0.671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 221199416, + "step": 1288 + }, + { + "epoch": 0.3390543828500033, + "grad_norm": 4.808893821005273, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 221371720, + "step": 1289 + }, + { + "epoch": 0.3390543828500033, + "loss": 0.09180793166160583, + "loss_ce": 0.0001941679511219263, + "loss_iou": 0.54296875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 221371720, + "step": 1289 + }, + { + "epoch": 0.33931741960939044, + "grad_norm": 7.016843255216574, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 221542276, + "step": 1290 + }, + { + "epoch": 0.33931741960939044, + "loss": 0.19763490557670593, + "loss_ce": 0.0007354922126978636, + "loss_iou": 0.44921875, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 221542276, + "step": 1290 + }, + { + "epoch": 0.33958045636877754, + "grad_norm": 6.06110322351886, + "learning_rate": 5e-06, + "loss": 0.1227, + "num_input_tokens_seen": 221714344, + "step": 1291 + }, + { + "epoch": 0.33958045636877754, + "loss": 0.15020573139190674, + "loss_ce": 0.0027447929605841637, + "loss_iou": 0.4375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 221714344, + "step": 1291 + }, + { + "epoch": 0.33984349312816464, + "grad_norm": 8.158154012506673, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 221886284, + "step": 1292 + }, + { + "epoch": 0.33984349312816464, + "loss": 0.06007716804742813, + "loss_ce": 0.0002016789367189631, + "loss_iou": 0.609375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 221886284, + "step": 1292 + }, + { + "epoch": 0.3401065298875518, + "grad_norm": 4.887380704800873, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 222058520, + "step": 1293 + }, + { + "epoch": 0.3401065298875518, + "loss": 0.09021516144275665, + "loss_ce": 0.0008291734848171473, + "loss_iou": 0.45703125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 222058520, + "step": 1293 + }, + { + "epoch": 0.3403695666469389, + "grad_norm": 13.907525065592614, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 222231156, + "step": 1294 + }, + { + "epoch": 0.3403695666469389, + "loss": 0.22907251119613647, + "loss_ce": 0.0007704915478825569, + "loss_iou": 0.6640625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 222231156, + "step": 1294 + }, + { + "epoch": 0.340632603406326, + "grad_norm": 7.536500056913184, + "learning_rate": 5e-06, + "loss": 0.1822, + "num_input_tokens_seen": 222403476, + "step": 1295 + }, + { + "epoch": 0.340632603406326, + "loss": 0.1321713924407959, + "loss_ce": 0.002853148616850376, + "loss_iou": 0.5703125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 222403476, + "step": 1295 + }, + { + "epoch": 0.34089564016571317, + "grad_norm": 8.538688840874034, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 222575540, + "step": 1296 + }, + { + "epoch": 0.34089564016571317, + "loss": 0.09548459947109222, + "loss_ce": 0.0024365070275962353, + "loss_iou": 0.5625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 222575540, + "step": 1296 + }, + { + "epoch": 0.34115867692510027, + "grad_norm": 4.787366743041949, + "learning_rate": 5e-06, + "loss": 0.1417, + "num_input_tokens_seen": 222747704, + "step": 1297 + }, + { + "epoch": 0.34115867692510027, + "loss": 0.1637537181377411, + "loss_ce": 0.0010034843580797315, + "loss_iou": 0.45703125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 222747704, + "step": 1297 + }, + { + "epoch": 0.3414217136844874, + "grad_norm": 6.917043775641942, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 222919896, + "step": 1298 + }, + { + "epoch": 0.3414217136844874, + "loss": 0.05339755862951279, + "loss_ce": 0.001883886638097465, + "loss_iou": 0.419921875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 222919896, + "step": 1298 + }, + { + "epoch": 0.34168475044387453, + "grad_norm": 8.403015648704343, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 223092124, + "step": 1299 + }, + { + "epoch": 0.34168475044387453, + "loss": 0.15119820833206177, + "loss_ce": 0.0011737870518118143, + "loss_iou": 0.62890625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 223092124, + "step": 1299 + }, + { + "epoch": 0.34194778720326163, + "grad_norm": 7.291522615294142, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 223264436, + "step": 1300 + }, + { + "epoch": 0.34194778720326163, + "loss": 0.14489704370498657, + "loss_ce": 0.0020442616660147905, + "loss_iou": 0.41796875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 223264436, + "step": 1300 + }, + { + "epoch": 0.3422108239626488, + "grad_norm": 8.380731335959293, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 223436588, + "step": 1301 + }, + { + "epoch": 0.3422108239626488, + "loss": 0.15774638950824738, + "loss_ce": 0.0007334585534408689, + "loss_iou": 0.51171875, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 223436588, + "step": 1301 + }, + { + "epoch": 0.3424738607220359, + "grad_norm": 4.869592726119341, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 223607388, + "step": 1302 + }, + { + "epoch": 0.3424738607220359, + "loss": 0.15213216841220856, + "loss_ce": 0.0007344604237005115, + "loss_iou": 0.5546875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 223607388, + "step": 1302 + }, + { + "epoch": 0.34273689748142305, + "grad_norm": 5.639489902327201, + "learning_rate": 5e-06, + "loss": 0.1483, + "num_input_tokens_seen": 223779372, + "step": 1303 + }, + { + "epoch": 0.34273689748142305, + "loss": 0.17259347438812256, + "loss_ce": 0.0032819565385580063, + "loss_iou": 0.490234375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 223779372, + "step": 1303 + }, + { + "epoch": 0.34299993424081016, + "grad_norm": 9.093938521490475, + "learning_rate": 5e-06, + "loss": 0.2003, + "num_input_tokens_seen": 223951388, + "step": 1304 + }, + { + "epoch": 0.34299993424081016, + "loss": 0.22136257588863373, + "loss_ce": 0.0015749745070934296, + "loss_iou": 0.62109375, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 223951388, + "step": 1304 + }, + { + "epoch": 0.34326297100019726, + "grad_norm": 6.510612178547868, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 224123456, + "step": 1305 + }, + { + "epoch": 0.34326297100019726, + "loss": 0.14626947045326233, + "loss_ce": 0.0030199564062058926, + "loss_iou": 0.455078125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 224123456, + "step": 1305 + }, + { + "epoch": 0.3435260077595844, + "grad_norm": 6.4702586677675855, + "learning_rate": 5e-06, + "loss": 0.1824, + "num_input_tokens_seen": 224295676, + "step": 1306 + }, + { + "epoch": 0.3435260077595844, + "loss": 0.20608918368816376, + "loss_ce": 0.0004007022944279015, + "loss_iou": 0.671875, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 224295676, + "step": 1306 + }, + { + "epoch": 0.3437890445189715, + "grad_norm": 24.89374059454028, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 224467880, + "step": 1307 + }, + { + "epoch": 0.3437890445189715, + "loss": 0.12941348552703857, + "loss_ce": 0.000507236341945827, + "loss_iou": 0.58203125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 224467880, + "step": 1307 + }, + { + "epoch": 0.3440520812783586, + "grad_norm": 8.792041343116132, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 224640076, + "step": 1308 + }, + { + "epoch": 0.3440520812783586, + "loss": 0.18377459049224854, + "loss_ce": 0.0007301591685973108, + "loss_iou": 0.53125, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 224640076, + "step": 1308 + }, + { + "epoch": 0.3443151180377458, + "grad_norm": 7.075463244307207, + "learning_rate": 5e-06, + "loss": 0.1801, + "num_input_tokens_seen": 224812516, + "step": 1309 + }, + { + "epoch": 0.3443151180377458, + "loss": 0.10930690169334412, + "loss_ce": 0.0017934793140739202, + "loss_iou": 0.5078125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 224812516, + "step": 1309 + }, + { + "epoch": 0.3445781547971329, + "grad_norm": 5.622949079750027, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 224985136, + "step": 1310 + }, + { + "epoch": 0.3445781547971329, + "loss": 0.13441142439842224, + "loss_ce": 0.0033689369447529316, + "loss_iou": 0.625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 224985136, + "step": 1310 + }, + { + "epoch": 0.34484119155652004, + "grad_norm": 6.126844030056675, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 225157120, + "step": 1311 + }, + { + "epoch": 0.34484119155652004, + "loss": 0.1415342092514038, + "loss_ce": 0.000787146098446101, + "loss_iou": 0.59375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 225157120, + "step": 1311 + }, + { + "epoch": 0.34510422831590715, + "grad_norm": 5.277811585596711, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 225329012, + "step": 1312 + }, + { + "epoch": 0.34510422831590715, + "loss": 0.08680924028158188, + "loss_ce": 0.006761634722352028, + "loss_iou": 0.58203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 225329012, + "step": 1312 + }, + { + "epoch": 0.34536726507529425, + "grad_norm": 4.953551061650772, + "learning_rate": 5e-06, + "loss": 0.0972, + "num_input_tokens_seen": 225501536, + "step": 1313 + }, + { + "epoch": 0.34536726507529425, + "loss": 0.10339295864105225, + "loss_ce": 0.0035394439473748207, + "loss_iou": 0.451171875, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 225501536, + "step": 1313 + }, + { + "epoch": 0.3456303018346814, + "grad_norm": 14.376989767483847, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 225673976, + "step": 1314 + }, + { + "epoch": 0.3456303018346814, + "loss": 0.13013647496700287, + "loss_ce": 0.0019016144797205925, + "loss_iou": 0.76953125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 225673976, + "step": 1314 + }, + { + "epoch": 0.3458933385940685, + "grad_norm": 4.351015648559307, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 225844284, + "step": 1315 + }, + { + "epoch": 0.3458933385940685, + "loss": 0.07338554412126541, + "loss_ce": 0.0006926720961928368, + "loss_iou": 0.478515625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 225844284, + "step": 1315 + }, + { + "epoch": 0.34615637535345567, + "grad_norm": 8.704702003668412, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 226014724, + "step": 1316 + }, + { + "epoch": 0.34615637535345567, + "loss": 0.13227002322673798, + "loss_ce": 0.00031201643287204206, + "loss_iou": 0.58984375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 226014724, + "step": 1316 + }, + { + "epoch": 0.3464194121128428, + "grad_norm": 9.402885662503637, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 226186864, + "step": 1317 + }, + { + "epoch": 0.3464194121128428, + "loss": 0.13149945437908173, + "loss_ce": 0.0032035536132752895, + "loss_iou": 0.466796875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 226186864, + "step": 1317 + }, + { + "epoch": 0.3466824488722299, + "grad_norm": 5.4441598572448475, + "learning_rate": 5e-06, + "loss": 0.1247, + "num_input_tokens_seen": 226359284, + "step": 1318 + }, + { + "epoch": 0.3466824488722299, + "loss": 0.09241662174463272, + "loss_ce": 0.00010094831668538973, + "loss_iou": 0.68359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 226359284, + "step": 1318 + }, + { + "epoch": 0.34694548563161703, + "grad_norm": 5.838383036724073, + "learning_rate": 5e-06, + "loss": 0.1544, + "num_input_tokens_seen": 226531468, + "step": 1319 + }, + { + "epoch": 0.34694548563161703, + "loss": 0.10588014125823975, + "loss_ce": 0.00016725034220144153, + "loss_iou": 0.56640625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 226531468, + "step": 1319 + }, + { + "epoch": 0.34720852239100414, + "grad_norm": 6.350689517126913, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 226703648, + "step": 1320 + }, + { + "epoch": 0.34720852239100414, + "loss": 0.11672288179397583, + "loss_ce": 0.0005119539564475417, + "loss_iou": 0.4765625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 226703648, + "step": 1320 + }, + { + "epoch": 0.34747155915039124, + "grad_norm": 13.847941425419814, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 226873276, + "step": 1321 + }, + { + "epoch": 0.34747155915039124, + "loss": 0.1297488510608673, + "loss_ce": 0.0002322565414942801, + "loss_iou": 0.435546875, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 226873276, + "step": 1321 + }, + { + "epoch": 0.3477345959097784, + "grad_norm": 23.595553939708278, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 227045376, + "step": 1322 + }, + { + "epoch": 0.3477345959097784, + "loss": 0.1232631504535675, + "loss_ce": 0.00033834436908364296, + "loss_iou": 0.63671875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 227045376, + "step": 1322 + }, + { + "epoch": 0.3479976326691655, + "grad_norm": 44.36951009678579, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 227215620, + "step": 1323 + }, + { + "epoch": 0.3479976326691655, + "loss": 0.07254654914140701, + "loss_ce": 0.000280924781691283, + "loss_iou": 0.51953125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 227215620, + "step": 1323 + }, + { + "epoch": 0.34826066942855266, + "grad_norm": 5.242558618670207, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 227387716, + "step": 1324 + }, + { + "epoch": 0.34826066942855266, + "loss": 0.11591322720050812, + "loss_ce": 0.0015028227353468537, + "loss_iou": 0.5703125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 227387716, + "step": 1324 + }, + { + "epoch": 0.34852370618793976, + "grad_norm": 6.076654000097945, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 227560176, + "step": 1325 + }, + { + "epoch": 0.34852370618793976, + "loss": 0.09558144956827164, + "loss_ce": 0.0009464399190619588, + "loss_iou": 0.56640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 227560176, + "step": 1325 + }, + { + "epoch": 0.34878674294732687, + "grad_norm": 8.445480824601002, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 227732380, + "step": 1326 + }, + { + "epoch": 0.34878674294732687, + "loss": 0.18825021386146545, + "loss_ce": 0.0008112426148727536, + "loss_iou": 0.376953125, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 227732380, + "step": 1326 + }, + { + "epoch": 0.349049779706714, + "grad_norm": 7.063227159799308, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 227904472, + "step": 1327 + }, + { + "epoch": 0.349049779706714, + "loss": 0.23714260756969452, + "loss_ce": 0.0035610701888799667, + "loss_iou": 0.466796875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 227904472, + "step": 1327 + }, + { + "epoch": 0.34931281646610113, + "grad_norm": 5.912334076761878, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 228076948, + "step": 1328 + }, + { + "epoch": 0.34931281646610113, + "loss": 0.14180362224578857, + "loss_ce": 0.0037115837913006544, + "loss_iou": 0.65234375, + "loss_num": 0.0277099609375, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 228076948, + "step": 1328 + }, + { + "epoch": 0.3495758532254883, + "grad_norm": 6.00754661823903, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 228248972, + "step": 1329 + }, + { + "epoch": 0.3495758532254883, + "loss": 0.14186972379684448, + "loss_ce": 0.0010616088984534144, + "loss_iou": 0.490234375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 228248972, + "step": 1329 + }, + { + "epoch": 0.3498388899848754, + "grad_norm": 5.090693648053783, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 228421168, + "step": 1330 + }, + { + "epoch": 0.3498388899848754, + "loss": 0.08804985880851746, + "loss_ce": 0.0005559585988521576, + "loss_iou": 0.466796875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 228421168, + "step": 1330 + }, + { + "epoch": 0.3501019267442625, + "grad_norm": 5.2360127706674335, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 228593364, + "step": 1331 + }, + { + "epoch": 0.3501019267442625, + "loss": 0.09556721895933151, + "loss_ce": 0.0012373843928799033, + "loss_iou": 0.515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 228593364, + "step": 1331 + }, + { + "epoch": 0.35036496350364965, + "grad_norm": 7.817751449255831, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 228765480, + "step": 1332 + }, + { + "epoch": 0.35036496350364965, + "loss": 0.19872622191905975, + "loss_ce": 0.0040851132944226265, + "loss_iou": 0.427734375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 228765480, + "step": 1332 + }, + { + "epoch": 0.35062800026303675, + "grad_norm": 6.154736113026185, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 228938028, + "step": 1333 + }, + { + "epoch": 0.35062800026303675, + "loss": 0.08339493721723557, + "loss_ce": 0.0010737692937254906, + "loss_iou": 0.5703125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 228938028, + "step": 1333 + }, + { + "epoch": 0.35089103702242386, + "grad_norm": 9.10695022243035, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 229110180, + "step": 1334 + }, + { + "epoch": 0.35089103702242386, + "loss": 0.17051678895950317, + "loss_ce": 0.0020597607363015413, + "loss_iou": 0.65234375, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 229110180, + "step": 1334 + }, + { + "epoch": 0.351154073781811, + "grad_norm": 6.861938259215169, + "learning_rate": 5e-06, + "loss": 0.192, + "num_input_tokens_seen": 229282592, + "step": 1335 + }, + { + "epoch": 0.351154073781811, + "loss": 0.20712369680404663, + "loss_ce": 0.003968170844018459, + "loss_iou": 0.52734375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 229282592, + "step": 1335 + }, + { + "epoch": 0.3514171105411981, + "grad_norm": 6.702358306191994, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 229454940, + "step": 1336 + }, + { + "epoch": 0.3514171105411981, + "loss": 0.1241491287946701, + "loss_ce": 0.002719694282859564, + "loss_iou": 0.6015625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 229454940, + "step": 1336 + }, + { + "epoch": 0.3516801473005853, + "grad_norm": 25.54665143162104, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 229626740, + "step": 1337 + }, + { + "epoch": 0.3516801473005853, + "loss": 0.13124999403953552, + "loss_ce": 0.0006042490131221712, + "loss_iou": 0.50390625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 229626740, + "step": 1337 + }, + { + "epoch": 0.3519431840599724, + "grad_norm": 10.986092691383933, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 229795396, + "step": 1338 + }, + { + "epoch": 0.3519431840599724, + "loss": 0.07223416119813919, + "loss_ce": 0.00028897292213514447, + "loss_iou": 0.60546875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 229795396, + "step": 1338 + }, + { + "epoch": 0.3522062208193595, + "grad_norm": 9.146823823601064, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 229967592, + "step": 1339 + }, + { + "epoch": 0.3522062208193595, + "loss": 0.14658141136169434, + "loss_ce": 0.00021910574287176132, + "loss_iou": 0.58203125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 229967592, + "step": 1339 + }, + { + "epoch": 0.35246925757874664, + "grad_norm": 12.31823820919867, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 230139960, + "step": 1340 + }, + { + "epoch": 0.35246925757874664, + "loss": 0.07799485325813293, + "loss_ce": 0.0002360617509111762, + "loss_iou": 0.6328125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 230139960, + "step": 1340 + }, + { + "epoch": 0.35273229433813375, + "grad_norm": 6.277536451715721, + "learning_rate": 5e-06, + "loss": 0.1745, + "num_input_tokens_seen": 230312536, + "step": 1341 + }, + { + "epoch": 0.35273229433813375, + "loss": 0.15891912579536438, + "loss_ce": 0.0026691171806305647, + "loss_iou": 0.5625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 230312536, + "step": 1341 + }, + { + "epoch": 0.3529953310975209, + "grad_norm": 10.816707100229003, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 230482960, + "step": 1342 + }, + { + "epoch": 0.3529953310975209, + "loss": 0.10626394301652908, + "loss_ce": 0.0003069115919061005, + "loss_iou": 0.64453125, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 230482960, + "step": 1342 + }, + { + "epoch": 0.353258367856908, + "grad_norm": 7.461204110486792, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 230655564, + "step": 1343 + }, + { + "epoch": 0.353258367856908, + "loss": 0.14832650125026703, + "loss_ce": 0.005412681493908167, + "loss_iou": 0.431640625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 230655564, + "step": 1343 + }, + { + "epoch": 0.3535214046162951, + "grad_norm": 3.829230892926663, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 230827776, + "step": 1344 + }, + { + "epoch": 0.3535214046162951, + "loss": 0.11698116362094879, + "loss_ce": 0.0007397143635898829, + "loss_iou": 0.59765625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 230827776, + "step": 1344 + }, + { + "epoch": 0.35378444137568227, + "grad_norm": 7.33327936841338, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 230999976, + "step": 1345 + }, + { + "epoch": 0.35378444137568227, + "loss": 0.18063044548034668, + "loss_ce": 0.005978355184197426, + "loss_iou": 0.41796875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 230999976, + "step": 1345 + }, + { + "epoch": 0.35404747813506937, + "grad_norm": 4.588965012769037, + "learning_rate": 5e-06, + "loss": 0.1907, + "num_input_tokens_seen": 231171944, + "step": 1346 + }, + { + "epoch": 0.35404747813506937, + "loss": 0.19280412793159485, + "loss_ce": 0.0011537342797964811, + "loss_iou": 0.60546875, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 231171944, + "step": 1346 + }, + { + "epoch": 0.3543105148944565, + "grad_norm": 15.486355760387976, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 231342336, + "step": 1347 + }, + { + "epoch": 0.3543105148944565, + "loss": 0.11234519630670547, + "loss_ce": 0.0008644815534353256, + "loss_iou": 0.49609375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 231342336, + "step": 1347 + }, + { + "epoch": 0.35457355165384363, + "grad_norm": 17.3485969876628, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 231514328, + "step": 1348 + }, + { + "epoch": 0.35457355165384363, + "loss": 0.12869834899902344, + "loss_ce": 0.0009517711587250233, + "loss_iou": 0.68359375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 231514328, + "step": 1348 + }, + { + "epoch": 0.35483658841323074, + "grad_norm": 19.41812876236682, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 231686780, + "step": 1349 + }, + { + "epoch": 0.35483658841323074, + "loss": 0.13225057721138, + "loss_ce": 0.0008724014624021947, + "loss_iou": 0.5625, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 231686780, + "step": 1349 + }, + { + "epoch": 0.3550996251726179, + "grad_norm": 10.841969185593292, + "learning_rate": 5e-06, + "loss": 0.1386, + "num_input_tokens_seen": 231858892, + "step": 1350 + }, + { + "epoch": 0.3550996251726179, + "loss": 0.1675529032945633, + "loss_ce": 0.00019450299441814423, + "loss_iou": 0.6875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 231858892, + "step": 1350 + }, + { + "epoch": 0.355362661932005, + "grad_norm": 9.874209565962254, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 232029240, + "step": 1351 + }, + { + "epoch": 0.355362661932005, + "loss": 0.13038085401058197, + "loss_ce": 0.00037596753099933267, + "loss_iou": 0.63671875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 232029240, + "step": 1351 + }, + { + "epoch": 0.3556256986913921, + "grad_norm": 6.572064767991026, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 232199320, + "step": 1352 + }, + { + "epoch": 0.3556256986913921, + "loss": 0.11705964803695679, + "loss_ce": 0.00029938769876025617, + "loss_iou": 0.439453125, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 232199320, + "step": 1352 + }, + { + "epoch": 0.35588873545077926, + "grad_norm": 19.449170559601413, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 232371336, + "step": 1353 + }, + { + "epoch": 0.35588873545077926, + "loss": 0.10365074872970581, + "loss_ce": 0.00291221821680665, + "loss_iou": 0.470703125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 232371336, + "step": 1353 + }, + { + "epoch": 0.35615177221016636, + "grad_norm": 6.060818585296291, + "learning_rate": 5e-06, + "loss": 0.1758, + "num_input_tokens_seen": 232543572, + "step": 1354 + }, + { + "epoch": 0.35615177221016636, + "loss": 0.22082458436489105, + "loss_ce": 0.005858768709003925, + "loss_iou": 0.56640625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 232543572, + "step": 1354 + }, + { + "epoch": 0.3564148089695535, + "grad_norm": 5.2405872305191386, + "learning_rate": 5e-06, + "loss": 0.1508, + "num_input_tokens_seen": 232715652, + "step": 1355 + }, + { + "epoch": 0.3564148089695535, + "loss": 0.1176377683877945, + "loss_ce": 0.001854072092100978, + "loss_iou": 0.55859375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 232715652, + "step": 1355 + }, + { + "epoch": 0.3566778457289406, + "grad_norm": 9.250810729782357, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 232888076, + "step": 1356 + }, + { + "epoch": 0.3566778457289406, + "loss": 0.2215210646390915, + "loss_ce": 0.0008179338765330613, + "loss_iou": 0.59375, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 232888076, + "step": 1356 + }, + { + "epoch": 0.3569408824883277, + "grad_norm": 5.562200706936175, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 233060156, + "step": 1357 + }, + { + "epoch": 0.3569408824883277, + "loss": 0.15043510496616364, + "loss_ce": 0.0006548258243128657, + "loss_iou": 0.48828125, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 233060156, + "step": 1357 + }, + { + "epoch": 0.3572039192477149, + "grad_norm": 5.308030458771483, + "learning_rate": 5e-06, + "loss": 0.0933, + "num_input_tokens_seen": 233232200, + "step": 1358 + }, + { + "epoch": 0.3572039192477149, + "loss": 0.09737985581159592, + "loss_ce": 0.0021955338306725025, + "loss_iou": 0.482421875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 233232200, + "step": 1358 + }, + { + "epoch": 0.357466956007102, + "grad_norm": 22.688298388253326, + "learning_rate": 5e-06, + "loss": 0.1826, + "num_input_tokens_seen": 233402524, + "step": 1359 + }, + { + "epoch": 0.357466956007102, + "loss": 0.13730812072753906, + "loss_ce": 0.000497827713843435, + "loss_iou": 0.578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 233402524, + "step": 1359 + }, + { + "epoch": 0.3577299927664891, + "grad_norm": 7.149196234866028, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 233574352, + "step": 1360 + }, + { + "epoch": 0.3577299927664891, + "loss": 0.13957476615905762, + "loss_ce": 0.002062564715743065, + "loss_iou": 0.6328125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 233574352, + "step": 1360 + }, + { + "epoch": 0.35799302952587625, + "grad_norm": 4.747458347619557, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 233746476, + "step": 1361 + }, + { + "epoch": 0.35799302952587625, + "loss": 0.09390648454427719, + "loss_ce": 0.001377185108140111, + "loss_iou": 0.44921875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 233746476, + "step": 1361 + }, + { + "epoch": 0.35825606628526335, + "grad_norm": 4.140560580446189, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 233918696, + "step": 1362 + }, + { + "epoch": 0.35825606628526335, + "loss": 0.10030673444271088, + "loss_ce": 0.0024216112215071917, + "loss_iou": 0.5625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 233918696, + "step": 1362 + }, + { + "epoch": 0.3585191030446505, + "grad_norm": 5.484628612516693, + "learning_rate": 5e-06, + "loss": 0.1618, + "num_input_tokens_seen": 234090660, + "step": 1363 + }, + { + "epoch": 0.3585191030446505, + "loss": 0.1829943060874939, + "loss_ce": 0.001201094826683402, + "loss_iou": 0.443359375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 234090660, + "step": 1363 + }, + { + "epoch": 0.3587821398040376, + "grad_norm": 7.875545161330649, + "learning_rate": 5e-06, + "loss": 0.0939, + "num_input_tokens_seen": 234262444, + "step": 1364 + }, + { + "epoch": 0.3587821398040376, + "loss": 0.10195118188858032, + "loss_ce": 0.00035817097523249686, + "loss_iou": 0.482421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 234262444, + "step": 1364 + }, + { + "epoch": 0.3590451765634247, + "grad_norm": 5.112293535327179, + "learning_rate": 5e-06, + "loss": 0.1759, + "num_input_tokens_seen": 234434852, + "step": 1365 + }, + { + "epoch": 0.3590451765634247, + "loss": 0.12388080358505249, + "loss_ce": 0.0015663461526855826, + "loss_iou": 0.5625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 234434852, + "step": 1365 + }, + { + "epoch": 0.3593082133228119, + "grad_norm": 7.038268029638918, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 234607044, + "step": 1366 + }, + { + "epoch": 0.3593082133228119, + "loss": 0.10762982070446014, + "loss_ce": 0.0011539864353835583, + "loss_iou": 0.6328125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 234607044, + "step": 1366 + }, + { + "epoch": 0.359571250082199, + "grad_norm": 23.600297531220633, + "learning_rate": 5e-06, + "loss": 0.1188, + "num_input_tokens_seen": 234779280, + "step": 1367 + }, + { + "epoch": 0.359571250082199, + "loss": 0.06237838417291641, + "loss_ce": 0.0010838248999789357, + "loss_iou": 0.470703125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 234779280, + "step": 1367 + }, + { + "epoch": 0.35983428684158614, + "grad_norm": 3.7489621345618547, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 234951544, + "step": 1368 + }, + { + "epoch": 0.35983428684158614, + "loss": 0.09055154025554657, + "loss_ce": 0.0044309417717158794, + "loss_iou": 0.609375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 234951544, + "step": 1368 + }, + { + "epoch": 0.36009732360097324, + "grad_norm": 11.577809346393625, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 235124044, + "step": 1369 + }, + { + "epoch": 0.36009732360097324, + "loss": 0.10378709435462952, + "loss_ce": 0.0021635598968714476, + "loss_iou": 0.57421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 235124044, + "step": 1369 + }, + { + "epoch": 0.36036036036036034, + "grad_norm": 12.169579645431945, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 235295776, + "step": 1370 + }, + { + "epoch": 0.36036036036036034, + "loss": 0.26028677821159363, + "loss_ce": 0.00123831897508353, + "loss_iou": 0.4140625, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 235295776, + "step": 1370 + }, + { + "epoch": 0.3606233971197475, + "grad_norm": 6.068163006201439, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 235467876, + "step": 1371 + }, + { + "epoch": 0.3606233971197475, + "loss": 0.12217633426189423, + "loss_ce": 0.0007163715199567378, + "loss_iou": 0.55078125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 235467876, + "step": 1371 + }, + { + "epoch": 0.3608864338791346, + "grad_norm": 5.0641588944194575, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 235640260, + "step": 1372 + }, + { + "epoch": 0.3608864338791346, + "loss": 0.13001395761966705, + "loss_ce": 0.003243940882384777, + "loss_iou": 0.5546875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 235640260, + "step": 1372 + }, + { + "epoch": 0.3611494706385217, + "grad_norm": 5.364003970603183, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 235812276, + "step": 1373 + }, + { + "epoch": 0.3611494706385217, + "loss": 0.1422898769378662, + "loss_ce": 0.002214185893535614, + "loss_iou": 0.57421875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 235812276, + "step": 1373 + }, + { + "epoch": 0.36141250739790887, + "grad_norm": 20.072012728606875, + "learning_rate": 5e-06, + "loss": 0.1562, + "num_input_tokens_seen": 235984384, + "step": 1374 + }, + { + "epoch": 0.36141250739790887, + "loss": 0.1751733273267746, + "loss_ce": 0.0007043338264338672, + "loss_iou": NaN, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 235984384, + "step": 1374 + }, + { + "epoch": 0.36167554415729597, + "grad_norm": 9.286393703464922, + "learning_rate": 5e-06, + "loss": 0.1541, + "num_input_tokens_seen": 236156884, + "step": 1375 + }, + { + "epoch": 0.36167554415729597, + "loss": 0.21126675605773926, + "loss_ce": 0.0010006362572312355, + "loss_iou": 0.431640625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 236156884, + "step": 1375 + }, + { + "epoch": 0.36193858091668313, + "grad_norm": 11.903565521275382, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 236328968, + "step": 1376 + }, + { + "epoch": 0.36193858091668313, + "loss": 0.12723658978939056, + "loss_ce": 0.0002834574261214584, + "loss_iou": 0.57421875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 236328968, + "step": 1376 + }, + { + "epoch": 0.36220161767607023, + "grad_norm": 16.599820050329747, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 236501060, + "step": 1377 + }, + { + "epoch": 0.36220161767607023, + "loss": 0.11763446778059006, + "loss_ce": 0.0014235277194529772, + "loss_iou": 0.57421875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 236501060, + "step": 1377 + }, + { + "epoch": 0.36246465443545733, + "grad_norm": 12.84169775199896, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 236673348, + "step": 1378 + }, + { + "epoch": 0.36246465443545733, + "loss": 0.1577835977077484, + "loss_ce": 0.00424966961145401, + "loss_iou": 0.703125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 236673348, + "step": 1378 + }, + { + "epoch": 0.3627276911948445, + "grad_norm": 4.530082641105361, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 236845952, + "step": 1379 + }, + { + "epoch": 0.3627276911948445, + "loss": 0.16972720623016357, + "loss_ce": 0.0046881334856152534, + "loss_iou": 0.546875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 236845952, + "step": 1379 + }, + { + "epoch": 0.3629907279542316, + "grad_norm": 4.453547029103764, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 237016412, + "step": 1380 + }, + { + "epoch": 0.3629907279542316, + "loss": 0.1337898075580597, + "loss_ce": 0.0024726560804992914, + "loss_iou": 0.58203125, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 237016412, + "step": 1380 + }, + { + "epoch": 0.36325376471361875, + "grad_norm": 14.926786019286814, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 237188648, + "step": 1381 + }, + { + "epoch": 0.36325376471361875, + "loss": 0.09095098078250885, + "loss_ce": 0.0004968784051015973, + "loss_iou": 0.59765625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 237188648, + "step": 1381 + }, + { + "epoch": 0.36351680147300586, + "grad_norm": 5.283936723228349, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 237360968, + "step": 1382 + }, + { + "epoch": 0.36351680147300586, + "loss": 0.10726694762706757, + "loss_ce": 0.002011828124523163, + "loss_iou": 0.6640625, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 237360968, + "step": 1382 + }, + { + "epoch": 0.36377983823239296, + "grad_norm": 6.9353605673209175, + "learning_rate": 5e-06, + "loss": 0.1229, + "num_input_tokens_seen": 237533240, + "step": 1383 + }, + { + "epoch": 0.36377983823239296, + "loss": 0.12320241332054138, + "loss_ce": 0.0009337374940514565, + "loss_iou": 0.5390625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 237533240, + "step": 1383 + }, + { + "epoch": 0.3640428749917801, + "grad_norm": 7.872299034496559, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 237705412, + "step": 1384 + }, + { + "epoch": 0.3640428749917801, + "loss": 0.10494999587535858, + "loss_ce": 0.0007019541808404028, + "loss_iou": 0.5703125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 237705412, + "step": 1384 + }, + { + "epoch": 0.3643059117511672, + "grad_norm": 9.299219065335537, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 237875984, + "step": 1385 + }, + { + "epoch": 0.3643059117511672, + "loss": 0.09333762526512146, + "loss_ce": 0.0001979749504243955, + "loss_iou": 0.6171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 237875984, + "step": 1385 + }, + { + "epoch": 0.3645689485105543, + "grad_norm": 4.989443397895941, + "learning_rate": 5e-06, + "loss": 0.1464, + "num_input_tokens_seen": 238048268, + "step": 1386 + }, + { + "epoch": 0.3645689485105543, + "loss": 0.14278042316436768, + "loss_ce": 0.0023690357338637114, + "loss_iou": 0.55078125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 238048268, + "step": 1386 + }, + { + "epoch": 0.3648319852699415, + "grad_norm": 12.398705819599233, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 238218704, + "step": 1387 + }, + { + "epoch": 0.3648319852699415, + "loss": 0.18032394349575043, + "loss_ce": 0.000819545122794807, + "loss_iou": 0.5703125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 238218704, + "step": 1387 + }, + { + "epoch": 0.3650950220293286, + "grad_norm": 6.66114485908042, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 238390452, + "step": 1388 + }, + { + "epoch": 0.3650950220293286, + "loss": 0.23675096035003662, + "loss_ce": 0.0027726897969841957, + "loss_iou": 0.39453125, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 238390452, + "step": 1388 + }, + { + "epoch": 0.36535805878871574, + "grad_norm": 9.52863817159708, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 238562708, + "step": 1389 + }, + { + "epoch": 0.36535805878871574, + "loss": 0.10956455767154694, + "loss_ce": 0.0007693836814723909, + "loss_iou": 0.6171875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 238562708, + "step": 1389 + }, + { + "epoch": 0.36562109554810285, + "grad_norm": 14.33207869070322, + "learning_rate": 5e-06, + "loss": 0.1554, + "num_input_tokens_seen": 238734768, + "step": 1390 + }, + { + "epoch": 0.36562109554810285, + "loss": 0.11770792305469513, + "loss_ce": 0.00018472480587661266, + "loss_iou": 0.71484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 238734768, + "step": 1390 + }, + { + "epoch": 0.36588413230748995, + "grad_norm": 6.704869940759653, + "learning_rate": 5e-06, + "loss": 0.1267, + "num_input_tokens_seen": 238906944, + "step": 1391 + }, + { + "epoch": 0.36588413230748995, + "loss": 0.12555649876594543, + "loss_ce": 0.0015330680180341005, + "loss_iou": 0.6953125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 238906944, + "step": 1391 + }, + { + "epoch": 0.3661471690668771, + "grad_norm": 7.947535402036419, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 239078728, + "step": 1392 + }, + { + "epoch": 0.3661471690668771, + "loss": 0.12712188065052032, + "loss_ce": 0.0004739244468510151, + "loss_iou": 0.40234375, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 239078728, + "step": 1392 + }, + { + "epoch": 0.3664102058262642, + "grad_norm": 21.67277442987669, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 239251004, + "step": 1393 + }, + { + "epoch": 0.3664102058262642, + "loss": 0.16826726496219635, + "loss_ce": 0.0013361112214624882, + "loss_iou": 0.58984375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 239251004, + "step": 1393 + }, + { + "epoch": 0.36667324258565137, + "grad_norm": 10.958283524277078, + "learning_rate": 5e-06, + "loss": 0.0975, + "num_input_tokens_seen": 239418772, + "step": 1394 + }, + { + "epoch": 0.36667324258565137, + "loss": 0.05772838741540909, + "loss_ce": 0.0009962135227397084, + "loss_iou": 0.6171875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 239418772, + "step": 1394 + }, + { + "epoch": 0.3669362793450385, + "grad_norm": 6.917710662760069, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 239591128, + "step": 1395 + }, + { + "epoch": 0.3669362793450385, + "loss": 0.09961553663015366, + "loss_ce": 0.001104797120206058, + "loss_iou": 0.54296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 239591128, + "step": 1395 + }, + { + "epoch": 0.3671993161044256, + "grad_norm": 15.902726176045867, + "learning_rate": 5e-06, + "loss": 0.1675, + "num_input_tokens_seen": 239763344, + "step": 1396 + }, + { + "epoch": 0.3671993161044256, + "loss": 0.32926326990127563, + "loss_ce": 0.011209084652364254, + "loss_iou": 0.59765625, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 239763344, + "step": 1396 + }, + { + "epoch": 0.36746235286381274, + "grad_norm": 13.284413053795395, + "learning_rate": 5e-06, + "loss": 0.1494, + "num_input_tokens_seen": 239933592, + "step": 1397 + }, + { + "epoch": 0.36746235286381274, + "loss": 0.11202233284711838, + "loss_ce": 0.004936150275170803, + "loss_iou": 0.5625, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 239933592, + "step": 1397 + }, + { + "epoch": 0.36772538962319984, + "grad_norm": 4.642604596899854, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 240106096, + "step": 1398 + }, + { + "epoch": 0.36772538962319984, + "loss": 0.12524864077568054, + "loss_ce": 0.0025679690297693014, + "loss_iou": 0.6640625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 240106096, + "step": 1398 + }, + { + "epoch": 0.36798842638258694, + "grad_norm": 6.5389674494027, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 240278216, + "step": 1399 + }, + { + "epoch": 0.36798842638258694, + "loss": 0.17912393808364868, + "loss_ce": 0.002244053641334176, + "loss_iou": 0.53125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 240278216, + "step": 1399 + }, + { + "epoch": 0.3682514631419741, + "grad_norm": 6.524251878336125, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 240450288, + "step": 1400 + }, + { + "epoch": 0.3682514631419741, + "loss": 0.1508733034133911, + "loss_ce": 0.00200856477022171, + "loss_iou": 0.57421875, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 240450288, + "step": 1400 + }, + { + "epoch": 0.3685144999013612, + "grad_norm": 7.825263953291389, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 240619860, + "step": 1401 + }, + { + "epoch": 0.3685144999013612, + "loss": 0.21069373190402985, + "loss_ce": 0.002014526631683111, + "loss_iou": 0.5234375, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 240619860, + "step": 1401 + }, + { + "epoch": 0.36877753666074836, + "grad_norm": 21.746555075327876, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 240792004, + "step": 1402 + }, + { + "epoch": 0.36877753666074836, + "loss": 0.10193300247192383, + "loss_ce": 0.003086569719016552, + "loss_iou": 0.546875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 240792004, + "step": 1402 + }, + { + "epoch": 0.36904057342013546, + "grad_norm": 13.416067582793307, + "learning_rate": 5e-06, + "loss": 0.1275, + "num_input_tokens_seen": 240963952, + "step": 1403 + }, + { + "epoch": 0.36904057342013546, + "loss": 0.14882555603981018, + "loss_ce": 0.0034703421406447887, + "loss_iou": 0.482421875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 240963952, + "step": 1403 + }, + { + "epoch": 0.36930361017952257, + "grad_norm": 7.0351108134980604, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 241135992, + "step": 1404 + }, + { + "epoch": 0.36930361017952257, + "loss": 0.16983582079410553, + "loss_ce": 0.004186400678008795, + "loss_iou": 0.6328125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 241135992, + "step": 1404 + }, + { + "epoch": 0.3695666469389097, + "grad_norm": 10.37111011550267, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 241306532, + "step": 1405 + }, + { + "epoch": 0.3695666469389097, + "loss": 0.13351476192474365, + "loss_ce": 0.0033267755061388016, + "loss_iou": 0.5078125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 241306532, + "step": 1405 + }, + { + "epoch": 0.36982968369829683, + "grad_norm": 4.1471191240598, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 241478648, + "step": 1406 + }, + { + "epoch": 0.36982968369829683, + "loss": 0.08705037832260132, + "loss_ce": 0.0007466700626537204, + "loss_iou": 0.52734375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 241478648, + "step": 1406 + }, + { + "epoch": 0.370092720457684, + "grad_norm": 4.611801393612808, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 241650796, + "step": 1407 + }, + { + "epoch": 0.370092720457684, + "loss": 0.22668591141700745, + "loss_ce": 0.0017713564448058605, + "loss_iou": 0.49609375, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 241650796, + "step": 1407 + }, + { + "epoch": 0.3703557572170711, + "grad_norm": 8.388794391473104, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 241822848, + "step": 1408 + }, + { + "epoch": 0.3703557572170711, + "loss": 0.11565081030130386, + "loss_ce": 0.006123221945017576, + "loss_iou": 0.48828125, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 241822848, + "step": 1408 + }, + { + "epoch": 0.3706187939764582, + "grad_norm": 4.295439998788119, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 241994956, + "step": 1409 + }, + { + "epoch": 0.3706187939764582, + "loss": 0.19000545144081116, + "loss_ce": 0.0015594041906297207, + "loss_iou": 0.59375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 241994956, + "step": 1409 + }, + { + "epoch": 0.37088183073584535, + "grad_norm": 5.005427734613651, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 242167240, + "step": 1410 + }, + { + "epoch": 0.37088183073584535, + "loss": 0.10752324759960175, + "loss_ce": 0.00040654875920154154, + "loss_iou": 0.609375, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 242167240, + "step": 1410 + }, + { + "epoch": 0.37114486749523246, + "grad_norm": 5.984568060178908, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 242339532, + "step": 1411 + }, + { + "epoch": 0.37114486749523246, + "loss": 0.14311346411705017, + "loss_ce": 0.0001386186049785465, + "loss_iou": 0.47265625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 242339532, + "step": 1411 + }, + { + "epoch": 0.37140790425461956, + "grad_norm": 43.56689122924667, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 242511708, + "step": 1412 + }, + { + "epoch": 0.37140790425461956, + "loss": 0.127020463347435, + "loss_ce": 0.0014406184200197458, + "loss_iou": 0.52734375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 242511708, + "step": 1412 + }, + { + "epoch": 0.3716709410140067, + "grad_norm": 8.230300180002782, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 242684000, + "step": 1413 + }, + { + "epoch": 0.3716709410140067, + "loss": 0.1775522530078888, + "loss_ce": 0.000489264028146863, + "loss_iou": 0.62109375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 242684000, + "step": 1413 + }, + { + "epoch": 0.3719339777733938, + "grad_norm": 7.623956423887346, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 242856400, + "step": 1414 + }, + { + "epoch": 0.3719339777733938, + "loss": 0.172633096575737, + "loss_ce": 0.0017346586100757122, + "loss_iou": 0.60546875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 242856400, + "step": 1414 + }, + { + "epoch": 0.372197014532781, + "grad_norm": 5.086262256381093, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 243028760, + "step": 1415 + }, + { + "epoch": 0.372197014532781, + "loss": 0.11333785206079483, + "loss_ce": 0.0045731994323432446, + "loss_iou": 0.5546875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 243028760, + "step": 1415 + }, + { + "epoch": 0.3724600512921681, + "grad_norm": 7.319953499179629, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 243200888, + "step": 1416 + }, + { + "epoch": 0.3724600512921681, + "loss": 0.12842552363872528, + "loss_ce": 0.0017775753512978554, + "loss_iou": 0.69921875, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 243200888, + "step": 1416 + }, + { + "epoch": 0.3727230880515552, + "grad_norm": 7.14806250434601, + "learning_rate": 5e-06, + "loss": 0.1676, + "num_input_tokens_seen": 243373224, + "step": 1417 + }, + { + "epoch": 0.3727230880515552, + "loss": 0.17957568168640137, + "loss_ce": 0.004038581624627113, + "loss_iou": 0.4765625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 243373224, + "step": 1417 + }, + { + "epoch": 0.37298612481094234, + "grad_norm": 10.549476863868529, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 243545548, + "step": 1418 + }, + { + "epoch": 0.37298612481094234, + "loss": 0.14556309580802917, + "loss_ce": 0.0017947773449122906, + "loss_iou": 0.61328125, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 243545548, + "step": 1418 + }, + { + "epoch": 0.37324916157032945, + "grad_norm": 10.0643806609024, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 243715796, + "step": 1419 + }, + { + "epoch": 0.37324916157032945, + "loss": 0.13023152947425842, + "loss_ce": 0.0014473494375124574, + "loss_iou": 0.5703125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 243715796, + "step": 1419 + }, + { + "epoch": 0.3735121983297166, + "grad_norm": 6.667777106999036, + "learning_rate": 5e-06, + "loss": 0.1642, + "num_input_tokens_seen": 243886244, + "step": 1420 + }, + { + "epoch": 0.3735121983297166, + "loss": 0.21719014644622803, + "loss_ce": 0.0024379536043852568, + "loss_iou": 0.40625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 243886244, + "step": 1420 + }, + { + "epoch": 0.3737752350891037, + "grad_norm": 6.203781395805212, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 244058256, + "step": 1421 + }, + { + "epoch": 0.3737752350891037, + "loss": 0.14644312858581543, + "loss_ce": 0.003773454576730728, + "loss_iou": 0.50390625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 244058256, + "step": 1421 + }, + { + "epoch": 0.3740382718484908, + "grad_norm": 11.121295779991378, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 244230348, + "step": 1422 + }, + { + "epoch": 0.3740382718484908, + "loss": 0.13860073685646057, + "loss_ce": 0.004567527212202549, + "loss_iou": 0.41796875, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 244230348, + "step": 1422 + }, + { + "epoch": 0.37430130860787797, + "grad_norm": 8.262433746875137, + "learning_rate": 5e-06, + "loss": 0.1552, + "num_input_tokens_seen": 244402756, + "step": 1423 + }, + { + "epoch": 0.37430130860787797, + "loss": 0.17389166355133057, + "loss_ce": 0.0005212996620684862, + "loss_iou": 0.36328125, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 244402756, + "step": 1423 + }, + { + "epoch": 0.3745643453672651, + "grad_norm": 4.574844176213277, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 244575028, + "step": 1424 + }, + { + "epoch": 0.3745643453672651, + "loss": 0.2532828748226166, + "loss_ce": 0.002657280070707202, + "loss_iou": 0.51171875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 244575028, + "step": 1424 + }, + { + "epoch": 0.3748273821266522, + "grad_norm": 4.771481627209834, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 244745344, + "step": 1425 + }, + { + "epoch": 0.3748273821266522, + "loss": 0.1295078694820404, + "loss_ce": 0.004813054576516151, + "loss_iou": 0.5390625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 244745344, + "step": 1425 + }, + { + "epoch": 0.37509041888603933, + "grad_norm": 9.163217114560078, + "learning_rate": 5e-06, + "loss": 0.1447, + "num_input_tokens_seen": 244917724, + "step": 1426 + }, + { + "epoch": 0.37509041888603933, + "loss": 0.08564335107803345, + "loss_ce": 0.004375034011900425, + "loss_iou": 0.52734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 244917724, + "step": 1426 + }, + { + "epoch": 0.37535345564542644, + "grad_norm": 4.314222759697504, + "learning_rate": 5e-06, + "loss": 0.1353, + "num_input_tokens_seen": 245089960, + "step": 1427 + }, + { + "epoch": 0.37535345564542644, + "loss": 0.1637798547744751, + "loss_ce": 0.004539141897112131, + "loss_iou": 0.55859375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 245089960, + "step": 1427 + }, + { + "epoch": 0.3756164924048136, + "grad_norm": 4.131046733957264, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 245261980, + "step": 1428 + }, + { + "epoch": 0.3756164924048136, + "loss": 0.07748128473758698, + "loss_ce": 0.0020113117061555386, + "loss_iou": 0.4296875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 245261980, + "step": 1428 + }, + { + "epoch": 0.3758795291642007, + "grad_norm": 5.689319334005767, + "learning_rate": 5e-06, + "loss": 0.1536, + "num_input_tokens_seen": 245434416, + "step": 1429 + }, + { + "epoch": 0.3758795291642007, + "loss": 0.11119981110095978, + "loss_ce": 0.0007566966232843697, + "loss_iou": 0.6015625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 245434416, + "step": 1429 + }, + { + "epoch": 0.3761425659235878, + "grad_norm": 20.10795595759354, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 245606636, + "step": 1430 + }, + { + "epoch": 0.3761425659235878, + "loss": 0.12385141104459763, + "loss_ce": 0.005015961825847626, + "loss_iou": 0.474609375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 245606636, + "step": 1430 + }, + { + "epoch": 0.37640560268297496, + "grad_norm": 5.445619300828039, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 245778556, + "step": 1431 + }, + { + "epoch": 0.37640560268297496, + "loss": 0.09182294458150864, + "loss_ce": 0.0022538499906659126, + "loss_iou": 0.609375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 245778556, + "step": 1431 + }, + { + "epoch": 0.37666863944236206, + "grad_norm": 6.5911007721042045, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 245950596, + "step": 1432 + }, + { + "epoch": 0.37666863944236206, + "loss": 0.11425422877073288, + "loss_ce": 0.002651446033269167, + "loss_iou": 0.6328125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 245950596, + "step": 1432 + }, + { + "epoch": 0.3769316762017492, + "grad_norm": 5.727028044767281, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 246120388, + "step": 1433 + }, + { + "epoch": 0.3769316762017492, + "loss": 0.10625661909580231, + "loss_ce": 0.0015508129727095366, + "loss_iou": 0.58203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 246120388, + "step": 1433 + }, + { + "epoch": 0.3771947129611363, + "grad_norm": 11.215881906654301, + "learning_rate": 5e-06, + "loss": 0.138, + "num_input_tokens_seen": 246292408, + "step": 1434 + }, + { + "epoch": 0.3771947129611363, + "loss": 0.1351088285446167, + "loss_ce": 0.0014418261125683784, + "loss_iou": 0.412109375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 246292408, + "step": 1434 + }, + { + "epoch": 0.37745774972052343, + "grad_norm": 3.9647228124096485, + "learning_rate": 5e-06, + "loss": 0.1515, + "num_input_tokens_seen": 246464396, + "step": 1435 + }, + { + "epoch": 0.37745774972052343, + "loss": 0.12623311579227448, + "loss_ce": 0.00034810492070391774, + "loss_iou": 0.31640625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 246464396, + "step": 1435 + }, + { + "epoch": 0.3777207864799106, + "grad_norm": 3.981981545629712, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 246636468, + "step": 1436 + }, + { + "epoch": 0.3777207864799106, + "loss": 0.08713729679584503, + "loss_ce": 0.0009556564036756754, + "loss_iou": 0.416015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 246636468, + "step": 1436 + }, + { + "epoch": 0.3779838232392977, + "grad_norm": 8.169176906981303, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 246808440, + "step": 1437 + }, + { + "epoch": 0.3779838232392977, + "loss": 0.10194505006074905, + "loss_ce": 0.005204326473176479, + "loss_iou": 0.4453125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 246808440, + "step": 1437 + }, + { + "epoch": 0.3782468599986848, + "grad_norm": 11.188980116706098, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 246980788, + "step": 1438 + }, + { + "epoch": 0.3782468599986848, + "loss": 0.19040054082870483, + "loss_ce": 0.0052198669873178005, + "loss_iou": 0.462890625, + "loss_num": 0.036865234375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 246980788, + "step": 1438 + }, + { + "epoch": 0.37850989675807195, + "grad_norm": 7.739931791211071, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 247152940, + "step": 1439 + }, + { + "epoch": 0.37850989675807195, + "loss": 0.057150378823280334, + "loss_ce": 0.00032664957689121366, + "loss_iou": 0.4609375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 247152940, + "step": 1439 + }, + { + "epoch": 0.37877293351745905, + "grad_norm": 4.659888569440988, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 247321728, + "step": 1440 + }, + { + "epoch": 0.37877293351745905, + "loss": 0.23644393682479858, + "loss_ce": 0.0026182467117905617, + "loss_iou": 0.63671875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 247321728, + "step": 1440 + }, + { + "epoch": 0.3790359702768462, + "grad_norm": 4.432660256180253, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 247493788, + "step": 1441 + }, + { + "epoch": 0.3790359702768462, + "loss": 0.13106387853622437, + "loss_ce": 0.0005707137170247734, + "loss_iou": 0.52734375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 247493788, + "step": 1441 + }, + { + "epoch": 0.3792990070362333, + "grad_norm": 14.394067386487718, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 247666148, + "step": 1442 + }, + { + "epoch": 0.3792990070362333, + "loss": 0.06981781125068665, + "loss_ce": 0.0019467112142592669, + "loss_iou": 0.5234375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 247666148, + "step": 1442 + }, + { + "epoch": 0.3795620437956204, + "grad_norm": 7.3199746686785785, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 247838148, + "step": 1443 + }, + { + "epoch": 0.3795620437956204, + "loss": 0.12706537544727325, + "loss_ce": 0.001332960557192564, + "loss_iou": 0.58203125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 247838148, + "step": 1443 + }, + { + "epoch": 0.3798250805550076, + "grad_norm": 9.946074142043194, + "learning_rate": 5e-06, + "loss": 0.1498, + "num_input_tokens_seen": 248008104, + "step": 1444 + }, + { + "epoch": 0.3798250805550076, + "loss": 0.08655127882957458, + "loss_ce": 0.002902593230828643, + "loss_iou": 0.62890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 248008104, + "step": 1444 + }, + { + "epoch": 0.3800881173143947, + "grad_norm": 6.769456974696547, + "learning_rate": 5e-06, + "loss": 0.1537, + "num_input_tokens_seen": 248178340, + "step": 1445 + }, + { + "epoch": 0.3800881173143947, + "loss": 0.12096725404262543, + "loss_ce": 0.0002702331403270364, + "loss_iou": 0.59765625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 248178340, + "step": 1445 + }, + { + "epoch": 0.38035115407378184, + "grad_norm": 8.140876700736197, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 248350556, + "step": 1446 + }, + { + "epoch": 0.38035115407378184, + "loss": 0.09277988225221634, + "loss_ce": 0.004370463080704212, + "loss_iou": 0.53125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 248350556, + "step": 1446 + }, + { + "epoch": 0.38061419083316894, + "grad_norm": 5.023139574097896, + "learning_rate": 5e-06, + "loss": 0.1556, + "num_input_tokens_seen": 248522684, + "step": 1447 + }, + { + "epoch": 0.38061419083316894, + "loss": 0.19670161604881287, + "loss_ce": 0.002487761899828911, + "loss_iou": 0.53515625, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 248522684, + "step": 1447 + }, + { + "epoch": 0.38087722759255604, + "grad_norm": 10.119019719981985, + "learning_rate": 5e-06, + "loss": 0.1368, + "num_input_tokens_seen": 248692816, + "step": 1448 + }, + { + "epoch": 0.38087722759255604, + "loss": 0.14071118831634521, + "loss_ce": 0.001856213086284697, + "loss_iou": 0.486328125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 248692816, + "step": 1448 + }, + { + "epoch": 0.3811402643519432, + "grad_norm": 4.80565522402478, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 248865220, + "step": 1449 + }, + { + "epoch": 0.3811402643519432, + "loss": 0.138558492064476, + "loss_ce": 0.005471333395689726, + "loss_iou": 0.546875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 248865220, + "step": 1449 + }, + { + "epoch": 0.3814033011113303, + "grad_norm": 5.799079918279409, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 249037228, + "step": 1450 + }, + { + "epoch": 0.3814033011113303, + "loss": 0.1300484836101532, + "loss_ce": 0.0007149941520765424, + "loss_iou": 0.59765625, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 249037228, + "step": 1450 + }, + { + "epoch": 0.3816663378707174, + "grad_norm": 5.998961200928992, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 249209488, + "step": 1451 + }, + { + "epoch": 0.3816663378707174, + "loss": 0.15324485301971436, + "loss_ce": 0.0011757557513192296, + "loss_iou": 0.62890625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 249209488, + "step": 1451 + }, + { + "epoch": 0.38192937463010457, + "grad_norm": 6.571427699631083, + "learning_rate": 5e-06, + "loss": 0.1472, + "num_input_tokens_seen": 249381876, + "step": 1452 + }, + { + "epoch": 0.38192937463010457, + "loss": 0.09512484073638916, + "loss_ce": 0.001863121404312551, + "loss_iou": 0.640625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 249381876, + "step": 1452 + }, + { + "epoch": 0.38219241138949167, + "grad_norm": 5.686083518821853, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 249554160, + "step": 1453 + }, + { + "epoch": 0.38219241138949167, + "loss": 0.12024913728237152, + "loss_ce": 0.002786980476230383, + "loss_iou": 0.49609375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 249554160, + "step": 1453 + }, + { + "epoch": 0.38245544814887883, + "grad_norm": 5.286753977297326, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 249726068, + "step": 1454 + }, + { + "epoch": 0.38245544814887883, + "loss": 0.15219104290008545, + "loss_ce": 0.001312148873694241, + "loss_iou": NaN, + "loss_num": 0.0302734375, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 249726068, + "step": 1454 + }, + { + "epoch": 0.38271848490826593, + "grad_norm": 6.1144167801042295, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 249898264, + "step": 1455 + }, + { + "epoch": 0.38271848490826593, + "loss": 0.20618750154972076, + "loss_ce": 0.001353507163003087, + "loss_iou": 0.5703125, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 249898264, + "step": 1455 + }, + { + "epoch": 0.38298152166765304, + "grad_norm": 5.254827116017921, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 250070312, + "step": 1456 + }, + { + "epoch": 0.38298152166765304, + "loss": 0.09227914363145828, + "loss_ce": 0.00014657669817097485, + "loss_iou": 0.6484375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 250070312, + "step": 1456 + }, + { + "epoch": 0.3832445584270402, + "grad_norm": 6.7921237774310095, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 250242620, + "step": 1457 + }, + { + "epoch": 0.3832445584270402, + "loss": 0.16699600219726562, + "loss_ce": 0.0013771126978099346, + "loss_iou": 0.435546875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 250242620, + "step": 1457 + }, + { + "epoch": 0.3835075951864273, + "grad_norm": 5.478639252002706, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 250414736, + "step": 1458 + }, + { + "epoch": 0.3835075951864273, + "loss": 0.05368629842996597, + "loss_ce": 0.00015846370661165565, + "loss_iou": 0.734375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 250414736, + "step": 1458 + }, + { + "epoch": 0.3837706319458144, + "grad_norm": 5.108471243837392, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 250586900, + "step": 1459 + }, + { + "epoch": 0.3837706319458144, + "loss": 0.12694557011127472, + "loss_ce": 0.0016709101619198918, + "loss_iou": 0.44921875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 250586900, + "step": 1459 + }, + { + "epoch": 0.38403366870520156, + "grad_norm": 5.673015930654883, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 250757288, + "step": 1460 + }, + { + "epoch": 0.38403366870520156, + "loss": 0.13443070650100708, + "loss_ce": 0.0023811361752450466, + "loss_iou": 0.671875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 250757288, + "step": 1460 + }, + { + "epoch": 0.38429670546458866, + "grad_norm": 8.46180134502284, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 250929524, + "step": 1461 + }, + { + "epoch": 0.38429670546458866, + "loss": 0.10377843677997589, + "loss_ce": 0.0022159344516694546, + "loss_iou": 0.55078125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 250929524, + "step": 1461 + }, + { + "epoch": 0.3845597422239758, + "grad_norm": 19.056127756550847, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 251100012, + "step": 1462 + }, + { + "epoch": 0.3845597422239758, + "loss": 0.13582435250282288, + "loss_ce": 0.0002042302949121222, + "loss_iou": 0.68359375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 251100012, + "step": 1462 + }, + { + "epoch": 0.3848227789833629, + "grad_norm": 13.96527055803463, + "learning_rate": 5e-06, + "loss": 0.1363, + "num_input_tokens_seen": 251272524, + "step": 1463 + }, + { + "epoch": 0.3848227789833629, + "loss": 0.09230969846248627, + "loss_ce": 0.0005128234624862671, + "loss_iou": 0.474609375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 251272524, + "step": 1463 + }, + { + "epoch": 0.38508581574275, + "grad_norm": 8.704363897996222, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 251444684, + "step": 1464 + }, + { + "epoch": 0.38508581574275, + "loss": 0.06702134013175964, + "loss_ce": 0.0022020034957677126, + "loss_iou": 0.59375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 251444684, + "step": 1464 + }, + { + "epoch": 0.3853488525021372, + "grad_norm": 5.865897344217803, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 251616872, + "step": 1465 + }, + { + "epoch": 0.3853488525021372, + "loss": 0.0932367742061615, + "loss_ce": 0.0029200036078691483, + "loss_iou": 0.6015625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 251616872, + "step": 1465 + }, + { + "epoch": 0.3856118892615243, + "grad_norm": 7.51772092325757, + "learning_rate": 5e-06, + "loss": 0.1453, + "num_input_tokens_seen": 251787464, + "step": 1466 + }, + { + "epoch": 0.3856118892615243, + "loss": 0.08029404282569885, + "loss_ce": 0.0014366218820214272, + "loss_iou": 0.6015625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 251787464, + "step": 1466 + }, + { + "epoch": 0.38587492602091145, + "grad_norm": 5.797632938807012, + "learning_rate": 5e-06, + "loss": 0.1405, + "num_input_tokens_seen": 251959420, + "step": 1467 + }, + { + "epoch": 0.38587492602091145, + "loss": 0.1458974927663803, + "loss_ce": 0.0016409052768722177, + "loss_iou": 0.373046875, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 251959420, + "step": 1467 + }, + { + "epoch": 0.38613796278029855, + "grad_norm": 11.05258372992866, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 252130168, + "step": 1468 + }, + { + "epoch": 0.38613796278029855, + "loss": 0.13372868299484253, + "loss_ce": 0.002472587861120701, + "loss_iou": 0.6015625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 252130168, + "step": 1468 + }, + { + "epoch": 0.38640099953968565, + "grad_norm": 14.608414515106098, + "learning_rate": 5e-06, + "loss": 0.2148, + "num_input_tokens_seen": 252302400, + "step": 1469 + }, + { + "epoch": 0.38640099953968565, + "loss": 0.17314574122428894, + "loss_ce": 0.0009960737079381943, + "loss_iou": 0.45703125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 252302400, + "step": 1469 + }, + { + "epoch": 0.3866640362990728, + "grad_norm": 5.302717622816423, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 252474472, + "step": 1470 + }, + { + "epoch": 0.3866640362990728, + "loss": 0.11253952234983444, + "loss_ce": 0.0038511687889695168, + "loss_iou": 0.6015625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 252474472, + "step": 1470 + }, + { + "epoch": 0.3869270730584599, + "grad_norm": 6.326570096654801, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 252646620, + "step": 1471 + }, + { + "epoch": 0.3869270730584599, + "loss": 0.12959828972816467, + "loss_ce": 0.0016685951268300414, + "loss_iou": 0.4296875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 252646620, + "step": 1471 + }, + { + "epoch": 0.387190109817847, + "grad_norm": 4.47840778836987, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 252818676, + "step": 1472 + }, + { + "epoch": 0.387190109817847, + "loss": 0.09814205765724182, + "loss_ce": 0.0010656400118023157, + "loss_iou": 0.6796875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 252818676, + "step": 1472 + }, + { + "epoch": 0.3874531465772342, + "grad_norm": 8.330831236084894, + "learning_rate": 5e-06, + "loss": 0.1907, + "num_input_tokens_seen": 252989028, + "step": 1473 + }, + { + "epoch": 0.3874531465772342, + "loss": 0.13005918264389038, + "loss_ce": 0.0018853526562452316, + "loss_iou": 0.408203125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 252989028, + "step": 1473 + }, + { + "epoch": 0.3877161833366213, + "grad_norm": 5.836741339054395, + "learning_rate": 5e-06, + "loss": 0.1451, + "num_input_tokens_seen": 253160776, + "step": 1474 + }, + { + "epoch": 0.3877161833366213, + "loss": 0.16334399580955505, + "loss_ce": 0.0019365199841558933, + "loss_iou": 0.61328125, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 253160776, + "step": 1474 + }, + { + "epoch": 0.38797922009600844, + "grad_norm": 6.563945205492379, + "learning_rate": 5e-06, + "loss": 0.0887, + "num_input_tokens_seen": 253333080, + "step": 1475 + }, + { + "epoch": 0.38797922009600844, + "loss": 0.11816126108169556, + "loss_ce": 0.0005465176654979587, + "loss_iou": 0.462890625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 253333080, + "step": 1475 + }, + { + "epoch": 0.38824225685539554, + "grad_norm": 5.424235438541153, + "learning_rate": 5e-06, + "loss": 0.1446, + "num_input_tokens_seen": 253504996, + "step": 1476 + }, + { + "epoch": 0.38824225685539554, + "loss": 0.2769371271133423, + "loss_ce": 0.0038658371195197105, + "loss_iou": 0.65625, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 253504996, + "step": 1476 + }, + { + "epoch": 0.38850529361478264, + "grad_norm": 8.554483113755165, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 253677224, + "step": 1477 + }, + { + "epoch": 0.38850529361478264, + "loss": 0.2999889850616455, + "loss_ce": 0.0013744828756898642, + "loss_iou": 0.373046875, + "loss_num": 0.0595703125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 253677224, + "step": 1477 + }, + { + "epoch": 0.3887683303741698, + "grad_norm": 9.64593506404694, + "learning_rate": 5e-06, + "loss": 0.1791, + "num_input_tokens_seen": 253849140, + "step": 1478 + }, + { + "epoch": 0.3887683303741698, + "loss": 0.10786094516515732, + "loss_ce": 0.0013545926194638014, + "loss_iou": 0.6015625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 253849140, + "step": 1478 + }, + { + "epoch": 0.3890313671335569, + "grad_norm": 4.897950380873555, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 254021328, + "step": 1479 + }, + { + "epoch": 0.3890313671335569, + "loss": 0.09722624719142914, + "loss_ce": 0.0006075926939956844, + "loss_iou": 0.5078125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 254021328, + "step": 1479 + }, + { + "epoch": 0.38929440389294406, + "grad_norm": 15.722256371647902, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 254191780, + "step": 1480 + }, + { + "epoch": 0.38929440389294406, + "loss": 0.24498049914836884, + "loss_ce": 0.003861122764647007, + "loss_iou": 0.466796875, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 254191780, + "step": 1480 + }, + { + "epoch": 0.38955744065233117, + "grad_norm": 8.008352967265257, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 254363968, + "step": 1481 + }, + { + "epoch": 0.38955744065233117, + "loss": 0.11913852393627167, + "loss_ce": 0.004117771051824093, + "loss_iou": 0.57421875, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 254363968, + "step": 1481 + }, + { + "epoch": 0.38982047741171827, + "grad_norm": 49.00606130732605, + "learning_rate": 5e-06, + "loss": 0.1557, + "num_input_tokens_seen": 254534476, + "step": 1482 + }, + { + "epoch": 0.38982047741171827, + "loss": 0.10790035128593445, + "loss_ce": 0.001271925400942564, + "loss_iou": 0.67578125, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 254534476, + "step": 1482 + }, + { + "epoch": 0.3900835141711054, + "grad_norm": 7.1385674525154705, + "learning_rate": 5e-06, + "loss": 0.1465, + "num_input_tokens_seen": 254706676, + "step": 1483 + }, + { + "epoch": 0.3900835141711054, + "loss": 0.21054460108280182, + "loss_ce": 0.0007057388429529965, + "loss_iou": 0.458984375, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 254706676, + "step": 1483 + }, + { + "epoch": 0.39034655093049253, + "grad_norm": 11.652107001526453, + "learning_rate": 5e-06, + "loss": 0.1735, + "num_input_tokens_seen": 254877300, + "step": 1484 + }, + { + "epoch": 0.39034655093049253, + "loss": 0.1516016721725464, + "loss_ce": 0.0008448172593489289, + "loss_iou": 0.62890625, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 254877300, + "step": 1484 + }, + { + "epoch": 0.39060958768987963, + "grad_norm": 6.761059905321281, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 255049560, + "step": 1485 + }, + { + "epoch": 0.39060958768987963, + "loss": 0.17154067754745483, + "loss_ce": 0.00045912445057183504, + "loss_iou": 0.431640625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 255049560, + "step": 1485 + }, + { + "epoch": 0.3908726244492668, + "grad_norm": 4.6049295798328655, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 255221840, + "step": 1486 + }, + { + "epoch": 0.3908726244492668, + "loss": 0.12419994175434113, + "loss_ce": 0.0011530672200024128, + "loss_iou": 0.52734375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 255221840, + "step": 1486 + }, + { + "epoch": 0.3911356612086539, + "grad_norm": 9.631901215468224, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 255393804, + "step": 1487 + }, + { + "epoch": 0.3911356612086539, + "loss": 0.14977988600730896, + "loss_ce": 0.006835547741502523, + "loss_iou": 0.40234375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 255393804, + "step": 1487 + }, + { + "epoch": 0.39139869796804105, + "grad_norm": 6.350539398563022, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 255565956, + "step": 1488 + }, + { + "epoch": 0.39139869796804105, + "loss": 0.08242587745189667, + "loss_ce": 0.0012491194065660238, + "loss_iou": 0.62109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 255565956, + "step": 1488 + }, + { + "epoch": 0.39166173472742816, + "grad_norm": 8.23209899989984, + "learning_rate": 5e-06, + "loss": 0.1259, + "num_input_tokens_seen": 255738272, + "step": 1489 + }, + { + "epoch": 0.39166173472742816, + "loss": 0.11557637155056, + "loss_ce": 0.00025044637732207775, + "loss_iou": 0.52734375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 255738272, + "step": 1489 + }, + { + "epoch": 0.39192477148681526, + "grad_norm": 6.0709262055147555, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 255910628, + "step": 1490 + }, + { + "epoch": 0.39192477148681526, + "loss": 0.06516115367412567, + "loss_ce": 0.0002197464054916054, + "loss_iou": 0.578125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 255910628, + "step": 1490 + }, + { + "epoch": 0.3921878082462024, + "grad_norm": 4.913795659722062, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 256079800, + "step": 1491 + }, + { + "epoch": 0.3921878082462024, + "loss": 0.12981277704238892, + "loss_ce": 0.0006013567326590419, + "loss_iou": 0.6015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 256079800, + "step": 1491 + }, + { + "epoch": 0.3924508450055895, + "grad_norm": 10.053329401258154, + "learning_rate": 5e-06, + "loss": 0.0995, + "num_input_tokens_seen": 256251908, + "step": 1492 + }, + { + "epoch": 0.3924508450055895, + "loss": 0.11420565843582153, + "loss_ce": 0.0003750904288608581, + "loss_iou": 0.5859375, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 256251908, + "step": 1492 + }, + { + "epoch": 0.3927138817649767, + "grad_norm": 9.9668682397801, + "learning_rate": 5e-06, + "loss": 0.1763, + "num_input_tokens_seen": 256423824, + "step": 1493 + }, + { + "epoch": 0.3927138817649767, + "loss": 0.1758217215538025, + "loss_ce": 0.0006203037919476628, + "loss_iou": 0.40234375, + "loss_num": 0.03515625, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 256423824, + "step": 1493 + }, + { + "epoch": 0.3929769185243638, + "grad_norm": 11.611915023620941, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 256596028, + "step": 1494 + }, + { + "epoch": 0.3929769185243638, + "loss": 0.1324601024389267, + "loss_ce": 0.0007157221552915871, + "loss_iou": 0.4765625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 256596028, + "step": 1494 + }, + { + "epoch": 0.3932399552837509, + "grad_norm": 8.374710474816515, + "learning_rate": 5e-06, + "loss": 0.1707, + "num_input_tokens_seen": 256764620, + "step": 1495 + }, + { + "epoch": 0.3932399552837509, + "loss": 0.14535440504550934, + "loss_ce": 0.00076211744453758, + "loss_iou": 0.35546875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 256764620, + "step": 1495 + }, + { + "epoch": 0.39350299204313804, + "grad_norm": 3.7888421024340295, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 256936860, + "step": 1496 + }, + { + "epoch": 0.39350299204313804, + "loss": 0.11324183642864227, + "loss_ce": 0.0021883677691221237, + "loss_iou": 0.75, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 256936860, + "step": 1496 + }, + { + "epoch": 0.39376602880252515, + "grad_norm": 6.4208864756609865, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 257108880, + "step": 1497 + }, + { + "epoch": 0.39376602880252515, + "loss": 0.14706888794898987, + "loss_ce": 0.0031480020843446255, + "loss_iou": 0.50390625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 257108880, + "step": 1497 + }, + { + "epoch": 0.39402906556191225, + "grad_norm": 7.914747299122871, + "learning_rate": 5e-06, + "loss": 0.1717, + "num_input_tokens_seen": 257280748, + "step": 1498 + }, + { + "epoch": 0.39402906556191225, + "loss": 0.12499146163463593, + "loss_ce": 0.0012732032919302583, + "loss_iou": 0.58203125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 257280748, + "step": 1498 + }, + { + "epoch": 0.3942921023212994, + "grad_norm": 6.479265884809814, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 257452832, + "step": 1499 + }, + { + "epoch": 0.3942921023212994, + "loss": 0.15066663920879364, + "loss_ce": 0.000764301570598036, + "loss_iou": 0.515625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 257452832, + "step": 1499 + }, + { + "epoch": 0.3945551390806865, + "grad_norm": 30.54873849483752, + "learning_rate": 5e-06, + "loss": 0.1459, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_websight_new_CIoU": 0.8511096835136414, + "eval_websight_new_GIoU": 0.8473265767097473, + "eval_websight_new_IoU": 0.8611634373664856, + "eval_websight_new_MAE_all": 0.029499702155590057, + "eval_websight_new_MAE_h": 0.019038498401641846, + "eval_websight_new_MAE_w": 0.04285556077957153, + "eval_websight_new_MAE_x": 0.04122600890696049, + "eval_websight_new_MAE_y": 0.014878739370033145, + "eval_websight_new_NUM_probability": 0.9999458193778992, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.13057953119277954, + "eval_websight_new_loss_ce": 1.3184878298488911e-05, + "eval_websight_new_loss_iou": 0.335205078125, + "eval_websight_new_loss_num": 0.022043228149414062, + "eval_websight_new_loss_xval": 0.1102447509765625, + "eval_websight_new_runtime": 55.84, + "eval_websight_new_samples_per_second": 0.895, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_seeclick_CIoU": 0.6286773383617401, + "eval_seeclick_GIoU": 0.627009391784668, + "eval_seeclick_IoU": 0.646778017282486, + "eval_seeclick_MAE_all": 0.04383368603885174, + "eval_seeclick_MAE_h": 0.028551836498081684, + "eval_seeclick_MAE_w": 0.05717572197318077, + "eval_seeclick_MAE_x": 0.06311946921050549, + "eval_seeclick_MAE_y": 0.02648772206157446, + "eval_seeclick_NUM_probability": 0.9999659061431885, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.205887109041214, + "eval_seeclick_loss_ce": 0.008159147575497627, + "eval_seeclick_loss_iou": 0.525634765625, + "eval_seeclick_loss_num": 0.0388031005859375, + "eval_seeclick_loss_xval": 0.1939697265625, + "eval_seeclick_runtime": 77.0722, + "eval_seeclick_samples_per_second": 0.558, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_icons_CIoU": 0.8122206926345825, + "eval_icons_GIoU": 0.8060621917247772, + "eval_icons_IoU": 0.8242988586425781, + "eval_icons_MAE_all": 0.029898496810346842, + "eval_icons_MAE_h": 0.02304963255301118, + "eval_icons_MAE_w": 0.03834102302789688, + "eval_icons_MAE_x": 0.035982510074973106, + "eval_icons_MAE_y": 0.022220822051167488, + "eval_icons_NUM_probability": 0.9998922646045685, + "eval_icons_inside_bbox": 0.9288194477558136, + "eval_icons_loss": 0.08346885442733765, + "eval_icons_loss_ce": 3.6747020203620195e-05, + "eval_icons_loss_iou": 0.5107421875, + "eval_icons_loss_num": 0.014827728271484375, + "eval_icons_loss_xval": 0.0741424560546875, + "eval_icons_runtime": 80.4282, + "eval_icons_samples_per_second": 0.622, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_screenspot_CIoU": 0.5216498871644338, + "eval_screenspot_GIoU": 0.5137112041314443, + "eval_screenspot_IoU": 0.5683091680208842, + "eval_screenspot_MAE_all": 0.09167192876338959, + "eval_screenspot_MAE_h": 0.04900683710972468, + "eval_screenspot_MAE_w": 0.15865817666053772, + "eval_screenspot_MAE_x": 0.1116797278324763, + "eval_screenspot_MAE_y": 0.04734297779699167, + "eval_screenspot_NUM_probability": 0.9997473557790121, + "eval_screenspot_inside_bbox": 0.8291666706403097, + "eval_screenspot_loss": 0.8493114709854126, + "eval_screenspot_loss_ce": 0.5014328161875407, + "eval_screenspot_loss_iou": 0.4333089192708333, + "eval_screenspot_loss_num": 0.06844584147135417, + "eval_screenspot_loss_xval": 0.3421223958333333, + "eval_screenspot_runtime": 144.4244, + "eval_screenspot_samples_per_second": 0.616, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "loss": 0.8482523560523987, + "loss_ce": 0.4914408326148987, + "loss_iou": 0.369140625, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.39481817584007367, + "grad_norm": 4.597601910727454, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 257797176, + "step": 1501 + }, + { + "epoch": 0.39481817584007367, + "loss": 0.11587628722190857, + "loss_ce": 0.0010386451613157988, + "loss_iou": 0.412109375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 257797176, + "step": 1501 + }, + { + "epoch": 0.3950812125994608, + "grad_norm": 20.41167402936065, + "learning_rate": 5e-06, + "loss": 0.1429, + "num_input_tokens_seen": 257969472, + "step": 1502 + }, + { + "epoch": 0.3950812125994608, + "loss": 0.13194513320922852, + "loss_ce": 0.00023126379528548568, + "loss_iou": 0.5078125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 257969472, + "step": 1502 + }, + { + "epoch": 0.3953442493588479, + "grad_norm": 5.303324872084742, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 258141772, + "step": 1503 + }, + { + "epoch": 0.3953442493588479, + "loss": 0.26048120856285095, + "loss_ce": 0.0004714562091976404, + "loss_iou": 0.478515625, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 258141772, + "step": 1503 + }, + { + "epoch": 0.39560728611823504, + "grad_norm": 8.26410044022957, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 258312152, + "step": 1504 + }, + { + "epoch": 0.39560728611823504, + "loss": 0.11661653220653534, + "loss_ce": 0.0012600838672369719, + "loss_iou": 0.58984375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 258312152, + "step": 1504 + }, + { + "epoch": 0.39587032287762214, + "grad_norm": 5.069526368720703, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 258484140, + "step": 1505 + }, + { + "epoch": 0.39587032287762214, + "loss": 0.15614590048789978, + "loss_ce": 0.002184713026508689, + "loss_iou": 0.578125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 258484140, + "step": 1505 + }, + { + "epoch": 0.3961333596370093, + "grad_norm": 8.21868694477306, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 258656432, + "step": 1506 + }, + { + "epoch": 0.3961333596370093, + "loss": 0.15648871660232544, + "loss_ce": 0.006220155395567417, + "loss_iou": 0.65234375, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 258656432, + "step": 1506 + }, + { + "epoch": 0.3963963963963964, + "grad_norm": 4.840531799006556, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 258828268, + "step": 1507 + }, + { + "epoch": 0.3963963963963964, + "loss": 0.16232186555862427, + "loss_ce": 0.0027149375528097153, + "loss_iou": 0.51953125, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 258828268, + "step": 1507 + }, + { + "epoch": 0.3966594331557835, + "grad_norm": 13.037528909207806, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 259000404, + "step": 1508 + }, + { + "epoch": 0.3966594331557835, + "loss": 0.14472803473472595, + "loss_ce": 0.0009902361780405045, + "loss_iou": 0.625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 259000404, + "step": 1508 + }, + { + "epoch": 0.39692246991517066, + "grad_norm": 5.085713777784683, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 259172564, + "step": 1509 + }, + { + "epoch": 0.39692246991517066, + "loss": 0.07742594182491302, + "loss_ce": 0.001498206052929163, + "loss_iou": 0.50390625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 259172564, + "step": 1509 + }, + { + "epoch": 0.39718550667455776, + "grad_norm": 11.735468019604104, + "learning_rate": 5e-06, + "loss": 0.1383, + "num_input_tokens_seen": 259344984, + "step": 1510 + }, + { + "epoch": 0.39718550667455776, + "loss": 0.19085130095481873, + "loss_ce": 0.006021593697369099, + "loss_iou": 0.17578125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 259344984, + "step": 1510 + }, + { + "epoch": 0.39744854343394487, + "grad_norm": 5.2454273554423505, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 259517348, + "step": 1511 + }, + { + "epoch": 0.39744854343394487, + "loss": 0.18470171093940735, + "loss_ce": 0.0007722551235929132, + "loss_iou": 0.38671875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 259517348, + "step": 1511 + }, + { + "epoch": 0.397711580193332, + "grad_norm": 5.93272222700996, + "learning_rate": 5e-06, + "loss": 0.1389, + "num_input_tokens_seen": 259689536, + "step": 1512 + }, + { + "epoch": 0.397711580193332, + "loss": 0.08698225021362305, + "loss_ce": 9.870098438113928e-05, + "loss_iou": 0.46875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 259689536, + "step": 1512 + }, + { + "epoch": 0.39797461695271913, + "grad_norm": 6.869955142591404, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 259861960, + "step": 1513 + }, + { + "epoch": 0.39797461695271913, + "loss": 0.17443957924842834, + "loss_ce": 0.0013743957970291376, + "loss_iou": 0.48828125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 259861960, + "step": 1513 + }, + { + "epoch": 0.3982376537121063, + "grad_norm": 5.07838683649017, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 260034008, + "step": 1514 + }, + { + "epoch": 0.3982376537121063, + "loss": 0.11854679882526398, + "loss_ce": 0.0005353257874958217, + "loss_iou": 0.498046875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 260034008, + "step": 1514 + }, + { + "epoch": 0.3985006904714934, + "grad_norm": 4.287082894022155, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 260206128, + "step": 1515 + }, + { + "epoch": 0.3985006904714934, + "loss": 0.13691899180412292, + "loss_ce": 0.004594762809574604, + "loss_iou": 0.57421875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 260206128, + "step": 1515 + }, + { + "epoch": 0.3987637272308805, + "grad_norm": 11.905443274772528, + "learning_rate": 5e-06, + "loss": 0.1483, + "num_input_tokens_seen": 260377024, + "step": 1516 + }, + { + "epoch": 0.3987637272308805, + "loss": 0.2244867980480194, + "loss_ce": 0.004058347083628178, + "loss_iou": 0.451171875, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 260377024, + "step": 1516 + }, + { + "epoch": 0.39902676399026765, + "grad_norm": 5.162416897168446, + "learning_rate": 5e-06, + "loss": 0.1643, + "num_input_tokens_seen": 260549044, + "step": 1517 + }, + { + "epoch": 0.39902676399026765, + "loss": 0.1554432064294815, + "loss_ce": 0.006700540892779827, + "loss_iou": 0.49609375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 260549044, + "step": 1517 + }, + { + "epoch": 0.39928980074965476, + "grad_norm": 10.132521694727306, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 260721092, + "step": 1518 + }, + { + "epoch": 0.39928980074965476, + "loss": 0.10422030091285706, + "loss_ce": 0.0018338197842240334, + "loss_iou": 0.56640625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 260721092, + "step": 1518 + }, + { + "epoch": 0.3995528375090419, + "grad_norm": 5.444540349408582, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 260893156, + "step": 1519 + }, + { + "epoch": 0.3995528375090419, + "loss": 0.151461660861969, + "loss_ce": 0.0018339705420657992, + "loss_iou": 0.45703125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 260893156, + "step": 1519 + }, + { + "epoch": 0.399815874268429, + "grad_norm": 11.977186984155779, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 261065364, + "step": 1520 + }, + { + "epoch": 0.399815874268429, + "loss": 0.10812121629714966, + "loss_ce": 0.0015843516448512673, + "loss_iou": 0.53125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 261065364, + "step": 1520 + }, + { + "epoch": 0.4000789110278161, + "grad_norm": 18.63106485848457, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 261237376, + "step": 1521 + }, + { + "epoch": 0.4000789110278161, + "loss": 0.16819404065608978, + "loss_ce": 0.0020105685107409954, + "loss_iou": 0.4296875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 261237376, + "step": 1521 + }, + { + "epoch": 0.4003419477872033, + "grad_norm": 10.102453678806217, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 261409528, + "step": 1522 + }, + { + "epoch": 0.4003419477872033, + "loss": 0.16048389673233032, + "loss_ce": 0.0018229965353384614, + "loss_iou": 0.53515625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 261409528, + "step": 1522 + }, + { + "epoch": 0.4006049845465904, + "grad_norm": 4.815296030108717, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 261581516, + "step": 1523 + }, + { + "epoch": 0.4006049845465904, + "loss": 0.16827590763568878, + "loss_ce": 0.0006428433116525412, + "loss_iou": 0.5390625, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 261581516, + "step": 1523 + }, + { + "epoch": 0.4008680213059775, + "grad_norm": 7.523079284642639, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 261753732, + "step": 1524 + }, + { + "epoch": 0.4008680213059775, + "loss": 0.07389776408672333, + "loss_ce": 0.0012354037025943398, + "loss_iou": 0.58984375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 261753732, + "step": 1524 + }, + { + "epoch": 0.40113105806536464, + "grad_norm": 5.8778602893728955, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 261925788, + "step": 1525 + }, + { + "epoch": 0.40113105806536464, + "loss": 0.1510400027036667, + "loss_ce": 0.00025264715077355504, + "loss_iou": 0.359375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 261925788, + "step": 1525 + }, + { + "epoch": 0.40139409482475175, + "grad_norm": 5.850697556727792, + "learning_rate": 5e-06, + "loss": 0.1457, + "num_input_tokens_seen": 262097916, + "step": 1526 + }, + { + "epoch": 0.40139409482475175, + "loss": 0.1881968379020691, + "loss_ce": 0.002436349866911769, + "loss_iou": 0.53125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 262097916, + "step": 1526 + }, + { + "epoch": 0.4016571315841389, + "grad_norm": 6.537837429507002, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 262269964, + "step": 1527 + }, + { + "epoch": 0.4016571315841389, + "loss": 0.09432707726955414, + "loss_ce": 0.004178154282271862, + "loss_iou": 0.486328125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 262269964, + "step": 1527 + }, + { + "epoch": 0.401920168343526, + "grad_norm": 16.05135902232025, + "learning_rate": 5e-06, + "loss": 0.148, + "num_input_tokens_seen": 262442240, + "step": 1528 + }, + { + "epoch": 0.401920168343526, + "loss": 0.22578378021717072, + "loss_ce": 0.0013269821647554636, + "loss_iou": 0.6484375, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 262442240, + "step": 1528 + }, + { + "epoch": 0.4021832051029131, + "grad_norm": 23.071196756112307, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 262612560, + "step": 1529 + }, + { + "epoch": 0.4021832051029131, + "loss": 0.1546820104122162, + "loss_ce": 0.0018499757861718535, + "loss_iou": 0.451171875, + "loss_num": 0.0306396484375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 262612560, + "step": 1529 + }, + { + "epoch": 0.40244624186230027, + "grad_norm": 15.456725226696141, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 262784424, + "step": 1530 + }, + { + "epoch": 0.40244624186230027, + "loss": 0.09581418335437775, + "loss_ce": 0.00661130016669631, + "loss_iou": 0.55859375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 262784424, + "step": 1530 + }, + { + "epoch": 0.40270927862168737, + "grad_norm": 4.753085917564063, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 262956820, + "step": 1531 + }, + { + "epoch": 0.40270927862168737, + "loss": 0.17357602715492249, + "loss_ce": 0.000693951384164393, + "loss_iou": 0.484375, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 262956820, + "step": 1531 + }, + { + "epoch": 0.40297231538107453, + "grad_norm": 8.23993450342922, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 263129000, + "step": 1532 + }, + { + "epoch": 0.40297231538107453, + "loss": 0.104616180062294, + "loss_ce": 0.002107633277773857, + "loss_iou": 0.65234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 263129000, + "step": 1532 + }, + { + "epoch": 0.40323535214046163, + "grad_norm": 5.28557757850573, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 263301064, + "step": 1533 + }, + { + "epoch": 0.40323535214046163, + "loss": 0.1304527074098587, + "loss_ce": 0.004811838734894991, + "loss_iou": 0.48046875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 263301064, + "step": 1533 + }, + { + "epoch": 0.40349838889984874, + "grad_norm": 6.190146085552892, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 263473624, + "step": 1534 + }, + { + "epoch": 0.40349838889984874, + "loss": 0.18483060598373413, + "loss_ce": 0.00016873711138032377, + "loss_iou": 0.609375, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 263473624, + "step": 1534 + }, + { + "epoch": 0.4037614256592359, + "grad_norm": 7.04287854016945, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 263645984, + "step": 1535 + }, + { + "epoch": 0.4037614256592359, + "loss": 0.2618888020515442, + "loss_ce": 0.003191267838701606, + "loss_iou": 0.51953125, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 263645984, + "step": 1535 + }, + { + "epoch": 0.404024462418623, + "grad_norm": 12.524958974577965, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 263815796, + "step": 1536 + }, + { + "epoch": 0.404024462418623, + "loss": 0.1076509952545166, + "loss_ce": 0.0004732571542263031, + "loss_iou": 0.48046875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 263815796, + "step": 1536 + }, + { + "epoch": 0.4042874991780101, + "grad_norm": 11.13747389496862, + "learning_rate": 5e-06, + "loss": 0.1302, + "num_input_tokens_seen": 263985512, + "step": 1537 + }, + { + "epoch": 0.4042874991780101, + "loss": 0.12391631305217743, + "loss_ce": 0.004134822636842728, + "loss_iou": 0.484375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 263985512, + "step": 1537 + }, + { + "epoch": 0.40455053593739726, + "grad_norm": 13.524733657823086, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 264157652, + "step": 1538 + }, + { + "epoch": 0.40455053593739726, + "loss": 0.1372789740562439, + "loss_ce": 0.00034661110839806497, + "loss_iou": 0.498046875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 264157652, + "step": 1538 + }, + { + "epoch": 0.40481357269678436, + "grad_norm": 10.591750756623403, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 264328132, + "step": 1539 + }, + { + "epoch": 0.40481357269678436, + "loss": 0.11795895546674728, + "loss_ce": 0.0021752638276666403, + "loss_iou": 0.484375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 264328132, + "step": 1539 + }, + { + "epoch": 0.4050766094561715, + "grad_norm": 13.38043942604694, + "learning_rate": 5e-06, + "loss": 0.159, + "num_input_tokens_seen": 264500388, + "step": 1540 + }, + { + "epoch": 0.4050766094561715, + "loss": 0.19694536924362183, + "loss_ce": 0.00218218588270247, + "loss_iou": 0.66015625, + "loss_num": 0.0390625, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 264500388, + "step": 1540 + }, + { + "epoch": 0.4053396462155586, + "grad_norm": 4.192366808850045, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 264672276, + "step": 1541 + }, + { + "epoch": 0.4053396462155586, + "loss": 0.07517112791538239, + "loss_ce": 0.00021995243150740862, + "loss_iou": 0.55078125, + "loss_num": 0.01495361328125, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 264672276, + "step": 1541 + }, + { + "epoch": 0.4056026829749457, + "grad_norm": 5.297574673677269, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 264844464, + "step": 1542 + }, + { + "epoch": 0.4056026829749457, + "loss": 0.12331673502922058, + "loss_ce": 0.00359627278521657, + "loss_iou": 0.6171875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 264844464, + "step": 1542 + }, + { + "epoch": 0.4058657197343329, + "grad_norm": 5.716341192152297, + "learning_rate": 5e-06, + "loss": 0.1333, + "num_input_tokens_seen": 265016664, + "step": 1543 + }, + { + "epoch": 0.4058657197343329, + "loss": 0.16498282551765442, + "loss_ce": 0.001957924338057637, + "loss_iou": 0.68359375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 265016664, + "step": 1543 + }, + { + "epoch": 0.40612875649372, + "grad_norm": 6.6104135503037895, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 265188924, + "step": 1544 + }, + { + "epoch": 0.40612875649372, + "loss": 0.15974299609661102, + "loss_ce": 0.0041338615119457245, + "loss_iou": 0.5546875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 265188924, + "step": 1544 + }, + { + "epoch": 0.40639179325310715, + "grad_norm": 5.4155340578643365, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 265359312, + "step": 1545 + }, + { + "epoch": 0.40639179325310715, + "loss": 0.21989840269088745, + "loss_ce": 0.0024301379453390837, + "loss_iou": 0.5546875, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 265359312, + "step": 1545 + }, + { + "epoch": 0.40665483001249425, + "grad_norm": 4.753477134622953, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 265531700, + "step": 1546 + }, + { + "epoch": 0.40665483001249425, + "loss": 0.05390855669975281, + "loss_ce": 0.0012352181365713477, + "loss_iou": 0.734375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 265531700, + "step": 1546 + }, + { + "epoch": 0.40691786677188135, + "grad_norm": 6.220827805530868, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 265700312, + "step": 1547 + }, + { + "epoch": 0.40691786677188135, + "loss": 0.12480812519788742, + "loss_ce": 0.0009983108611777425, + "loss_iou": 0.4375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 265700312, + "step": 1547 + }, + { + "epoch": 0.4071809035312685, + "grad_norm": 6.131445725549716, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 265872224, + "step": 1548 + }, + { + "epoch": 0.4071809035312685, + "loss": 0.12791498005390167, + "loss_ce": 0.0017858227947726846, + "loss_iou": 0.416015625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 265872224, + "step": 1548 + }, + { + "epoch": 0.4074439402906556, + "grad_norm": 6.062694054094362, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 266044368, + "step": 1549 + }, + { + "epoch": 0.4074439402906556, + "loss": 0.09559094905853271, + "loss_ce": 0.00013196848158258945, + "loss_iou": 0.625, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 266044368, + "step": 1549 + }, + { + "epoch": 0.4077069770500427, + "grad_norm": 6.379902327089613, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 266216648, + "step": 1550 + }, + { + "epoch": 0.4077069770500427, + "loss": 0.26357126235961914, + "loss_ce": 0.007589830085635185, + "loss_iou": 0.62890625, + "loss_num": 0.05126953125, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 266216648, + "step": 1550 + }, + { + "epoch": 0.4079700138094299, + "grad_norm": 5.16918021793661, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 266387044, + "step": 1551 + }, + { + "epoch": 0.4079700138094299, + "loss": 0.0737496167421341, + "loss_ce": 0.00215538265183568, + "loss_iou": 0.53515625, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 266387044, + "step": 1551 + }, + { + "epoch": 0.408233050568817, + "grad_norm": 36.40974082123275, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 266557400, + "step": 1552 + }, + { + "epoch": 0.408233050568817, + "loss": 0.13131964206695557, + "loss_ce": 0.0013757951091974974, + "loss_iou": 0.55078125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 266557400, + "step": 1552 + }, + { + "epoch": 0.40849608732820414, + "grad_norm": 4.785820854925791, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 266729564, + "step": 1553 + }, + { + "epoch": 0.40849608732820414, + "loss": 0.12293117493391037, + "loss_ce": 0.0009524148190394044, + "loss_iou": 0.447265625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 266729564, + "step": 1553 + }, + { + "epoch": 0.40875912408759124, + "grad_norm": 19.710178190555972, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 266901912, + "step": 1554 + }, + { + "epoch": 0.40875912408759124, + "loss": 0.10157528519630432, + "loss_ce": 0.000501063244882971, + "loss_iou": 0.51171875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 266901912, + "step": 1554 + }, + { + "epoch": 0.40902216084697834, + "grad_norm": 8.53788862222279, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 267074204, + "step": 1555 + }, + { + "epoch": 0.40902216084697834, + "loss": 0.18818530440330505, + "loss_ce": 0.0011735922889783978, + "loss_iou": 0.59765625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 267074204, + "step": 1555 + }, + { + "epoch": 0.4092851976063655, + "grad_norm": 9.427604183463405, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 267246448, + "step": 1556 + }, + { + "epoch": 0.4092851976063655, + "loss": 0.11521363258361816, + "loss_ce": 0.0002539134002290666, + "loss_iou": 0.52734375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 267246448, + "step": 1556 + }, + { + "epoch": 0.4095482343657526, + "grad_norm": 6.05814112958034, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 267418524, + "step": 1557 + }, + { + "epoch": 0.4095482343657526, + "loss": 0.17979669570922852, + "loss_ce": 0.002214906271547079, + "loss_iou": 0.451171875, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 267418524, + "step": 1557 + }, + { + "epoch": 0.40981127112513976, + "grad_norm": 5.372569051634822, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 267590932, + "step": 1558 + }, + { + "epoch": 0.40981127112513976, + "loss": 0.08670764416456223, + "loss_ce": 0.0017467074794694781, + "loss_iou": 0.59375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 267590932, + "step": 1558 + }, + { + "epoch": 0.41007430788452687, + "grad_norm": 7.2829791835725635, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 267763292, + "step": 1559 + }, + { + "epoch": 0.41007430788452687, + "loss": 0.1519029438495636, + "loss_ce": 0.002458356786519289, + "loss_iou": 0.416015625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 267763292, + "step": 1559 + }, + { + "epoch": 0.41033734464391397, + "grad_norm": 5.098616866005624, + "learning_rate": 5e-06, + "loss": 0.1, + "num_input_tokens_seen": 267935712, + "step": 1560 + }, + { + "epoch": 0.41033734464391397, + "loss": 0.11065052449703217, + "loss_ce": 0.0015807072632014751, + "loss_iou": 0.69140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 267935712, + "step": 1560 + }, + { + "epoch": 0.41060038140330113, + "grad_norm": 10.199812853245577, + "learning_rate": 5e-06, + "loss": 0.147, + "num_input_tokens_seen": 268108048, + "step": 1561 + }, + { + "epoch": 0.41060038140330113, + "loss": 0.09520716965198517, + "loss_ce": 0.003898570779711008, + "loss_iou": 0.46484375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 268108048, + "step": 1561 + }, + { + "epoch": 0.41086341816268823, + "grad_norm": 40.24459139135473, + "learning_rate": 5e-06, + "loss": 0.131, + "num_input_tokens_seen": 268280092, + "step": 1562 + }, + { + "epoch": 0.41086341816268823, + "loss": 0.07629628479480743, + "loss_ce": 0.0009788942988961935, + "loss_iou": 0.50390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 268280092, + "step": 1562 + }, + { + "epoch": 0.41112645492207534, + "grad_norm": 10.392982307740652, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 268450660, + "step": 1563 + }, + { + "epoch": 0.41112645492207534, + "loss": 0.122515007853508, + "loss_ce": 0.0010550380684435368, + "loss_iou": 0.4453125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 268450660, + "step": 1563 + }, + { + "epoch": 0.4113894916814625, + "grad_norm": 9.96483413417414, + "learning_rate": 5e-06, + "loss": 0.1875, + "num_input_tokens_seen": 268622864, + "step": 1564 + }, + { + "epoch": 0.4113894916814625, + "loss": 0.22188733518123627, + "loss_ce": 0.002923724940046668, + "loss_iou": 0.5078125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 268622864, + "step": 1564 + }, + { + "epoch": 0.4116525284408496, + "grad_norm": 4.677477301563895, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 268793508, + "step": 1565 + }, + { + "epoch": 0.4116525284408496, + "loss": 0.08342467993497849, + "loss_ce": 0.00023376270837616175, + "loss_iou": 0.53125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 268793508, + "step": 1565 + }, + { + "epoch": 0.41191556520023676, + "grad_norm": 5.461599242953511, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 268965604, + "step": 1566 + }, + { + "epoch": 0.41191556520023676, + "loss": 0.21633076667785645, + "loss_ce": 0.003531699301674962, + "loss_iou": 0.71875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 268965604, + "step": 1566 + }, + { + "epoch": 0.41217860195962386, + "grad_norm": 4.406500798076577, + "learning_rate": 5e-06, + "loss": 0.1875, + "num_input_tokens_seen": 269137692, + "step": 1567 + }, + { + "epoch": 0.41217860195962386, + "loss": 0.1873759627342224, + "loss_ce": 0.0010966623667627573, + "loss_iou": 0.5703125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 269137692, + "step": 1567 + }, + { + "epoch": 0.41244163871901096, + "grad_norm": 5.2924295964415595, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 269309988, + "step": 1568 + }, + { + "epoch": 0.41244163871901096, + "loss": 0.20892329514026642, + "loss_ce": 0.0004729711217805743, + "loss_iou": 0.57421875, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 269309988, + "step": 1568 + }, + { + "epoch": 0.4127046754783981, + "grad_norm": 5.459543507128776, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 269482356, + "step": 1569 + }, + { + "epoch": 0.4127046754783981, + "loss": 0.18314355611801147, + "loss_ce": 0.005287110339850187, + "loss_iou": 0.4921875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 269482356, + "step": 1569 + }, + { + "epoch": 0.4129677122377852, + "grad_norm": 7.266131528232208, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 269654572, + "step": 1570 + }, + { + "epoch": 0.4129677122377852, + "loss": 0.1481064409017563, + "loss_ce": 0.002049315720796585, + "loss_iou": 0.63671875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 269654572, + "step": 1570 + }, + { + "epoch": 0.4132307489971724, + "grad_norm": 9.868014321011369, + "learning_rate": 5e-06, + "loss": 0.169, + "num_input_tokens_seen": 269826912, + "step": 1571 + }, + { + "epoch": 0.4132307489971724, + "loss": 0.21563705801963806, + "loss_ce": 0.0007627883460372686, + "loss_iou": 0.390625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 269826912, + "step": 1571 + }, + { + "epoch": 0.4134937857565595, + "grad_norm": 6.475065616065912, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 269998912, + "step": 1572 + }, + { + "epoch": 0.4134937857565595, + "loss": 0.20219947397708893, + "loss_ce": 0.0007529358845204115, + "loss_iou": 0.609375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 269998912, + "step": 1572 + }, + { + "epoch": 0.4137568225159466, + "grad_norm": 5.376660767415141, + "learning_rate": 5e-06, + "loss": 0.1247, + "num_input_tokens_seen": 270171036, + "step": 1573 + }, + { + "epoch": 0.4137568225159466, + "loss": 0.2056845724582672, + "loss_ce": 0.0031394038815051317, + "loss_iou": 0.4296875, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 270171036, + "step": 1573 + }, + { + "epoch": 0.41401985927533375, + "grad_norm": 7.42827277488161, + "learning_rate": 5e-06, + "loss": 0.1727, + "num_input_tokens_seen": 270342784, + "step": 1574 + }, + { + "epoch": 0.41401985927533375, + "loss": 0.15383180975914001, + "loss_ce": 0.00045046067680232227, + "loss_iou": 0.4609375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 270342784, + "step": 1574 + }, + { + "epoch": 0.41428289603472085, + "grad_norm": 5.130198059184452, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 270514876, + "step": 1575 + }, + { + "epoch": 0.41428289603472085, + "loss": 0.17609894275665283, + "loss_ce": 0.001187438378110528, + "loss_iou": 0.44140625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 270514876, + "step": 1575 + }, + { + "epoch": 0.41454593279410795, + "grad_norm": 7.421258492917825, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 270687292, + "step": 1576 + }, + { + "epoch": 0.41454593279410795, + "loss": 0.07864829152822495, + "loss_ce": 0.0015761489048600197, + "loss_iou": 0.58984375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 270687292, + "step": 1576 + }, + { + "epoch": 0.4148089695534951, + "grad_norm": 6.818009590404441, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 270859520, + "step": 1577 + }, + { + "epoch": 0.4148089695534951, + "loss": 0.18715041875839233, + "loss_ce": 0.0005964583833701909, + "loss_iou": 0.5234375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 270859520, + "step": 1577 + }, + { + "epoch": 0.4150720063128822, + "grad_norm": 16.40610165209138, + "learning_rate": 5e-06, + "loss": 0.1347, + "num_input_tokens_seen": 271029200, + "step": 1578 + }, + { + "epoch": 0.4150720063128822, + "loss": 0.15075725317001343, + "loss_ce": 0.0008091325289569795, + "loss_iou": 0.61328125, + "loss_num": 0.0299072265625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 271029200, + "step": 1578 + }, + { + "epoch": 0.41533504307226937, + "grad_norm": 5.724561524768373, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 271201432, + "step": 1579 + }, + { + "epoch": 0.41533504307226937, + "loss": 0.18409447371959686, + "loss_ce": 0.0010500368662178516, + "loss_iou": 0.443359375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 271201432, + "step": 1579 + }, + { + "epoch": 0.4155980798316565, + "grad_norm": 5.423586189008012, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 271373604, + "step": 1580 + }, + { + "epoch": 0.4155980798316565, + "loss": 0.10582901537418365, + "loss_ce": 0.0017030383460223675, + "loss_iou": 0.58203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 271373604, + "step": 1580 + }, + { + "epoch": 0.4158611165910436, + "grad_norm": 4.639007188262618, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 271544156, + "step": 1581 + }, + { + "epoch": 0.4158611165910436, + "loss": 0.08775737881660461, + "loss_ce": 0.00032452167943120003, + "loss_iou": 0.49609375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 271544156, + "step": 1581 + }, + { + "epoch": 0.41612415335043074, + "grad_norm": 4.194479063501224, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 271716244, + "step": 1582 + }, + { + "epoch": 0.41612415335043074, + "loss": 0.07552362233400345, + "loss_ce": 5.3650168410968035e-05, + "loss_iou": 0.5859375, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 271716244, + "step": 1582 + }, + { + "epoch": 0.41638719010981784, + "grad_norm": 6.1414517921780325, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 271888460, + "step": 1583 + }, + { + "epoch": 0.41638719010981784, + "loss": 0.09023694694042206, + "loss_ce": 0.004726693499833345, + "loss_iou": 0.62890625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 271888460, + "step": 1583 + }, + { + "epoch": 0.416650226869205, + "grad_norm": 4.588576414677694, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 272060548, + "step": 1584 + }, + { + "epoch": 0.416650226869205, + "loss": 0.06312213093042374, + "loss_ce": 0.004131653346121311, + "loss_iou": 0.56640625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 272060548, + "step": 1584 + }, + { + "epoch": 0.4169132636285921, + "grad_norm": 6.028144781636759, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 272232820, + "step": 1585 + }, + { + "epoch": 0.4169132636285921, + "loss": 0.0890943706035614, + "loss_ce": 0.0011732284910976887, + "loss_iou": 0.62109375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 272232820, + "step": 1585 + }, + { + "epoch": 0.4171763003879792, + "grad_norm": 4.472287089666441, + "learning_rate": 5e-06, + "loss": 0.1729, + "num_input_tokens_seen": 272405268, + "step": 1586 + }, + { + "epoch": 0.4171763003879792, + "loss": 0.2705861032009125, + "loss_ce": 0.00334367249161005, + "loss_iou": 0.5625, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 272405268, + "step": 1586 + }, + { + "epoch": 0.41743933714736636, + "grad_norm": 3.852608585701711, + "learning_rate": 5e-06, + "loss": 0.1019, + "num_input_tokens_seen": 272577520, + "step": 1587 + }, + { + "epoch": 0.41743933714736636, + "loss": 0.1504625827074051, + "loss_ce": 0.00028558107442222536, + "loss_iou": 0.388671875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 272577520, + "step": 1587 + }, + { + "epoch": 0.41770237390675347, + "grad_norm": 5.376381690191942, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 272747276, + "step": 1588 + }, + { + "epoch": 0.41770237390675347, + "loss": 0.15104855597019196, + "loss_ce": 0.005449185613542795, + "loss_iou": 0.6640625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 272747276, + "step": 1588 + }, + { + "epoch": 0.41796541066614057, + "grad_norm": 6.161958958679319, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 272919388, + "step": 1589 + }, + { + "epoch": 0.41796541066614057, + "loss": 0.19656141102313995, + "loss_ce": 0.0006995859439484775, + "loss_iou": 0.3984375, + "loss_num": 0.0390625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 272919388, + "step": 1589 + }, + { + "epoch": 0.4182284474255277, + "grad_norm": 9.65888325757511, + "learning_rate": 5e-06, + "loss": 0.1406, + "num_input_tokens_seen": 273091656, + "step": 1590 + }, + { + "epoch": 0.4182284474255277, + "loss": 0.21912047266960144, + "loss_ce": 0.0045361267402768135, + "loss_iou": 0.62109375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 273091656, + "step": 1590 + }, + { + "epoch": 0.41849148418491483, + "grad_norm": 8.813988618374006, + "learning_rate": 5e-06, + "loss": 0.1735, + "num_input_tokens_seen": 273263768, + "step": 1591 + }, + { + "epoch": 0.41849148418491483, + "loss": 0.23192375898361206, + "loss_ce": 0.00888605136424303, + "loss_iou": 0.48046875, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 273263768, + "step": 1591 + }, + { + "epoch": 0.418754520944302, + "grad_norm": 7.699871319865347, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 273435676, + "step": 1592 + }, + { + "epoch": 0.418754520944302, + "loss": 0.10040029883384705, + "loss_ce": 0.0014928288292139769, + "loss_iou": 0.5859375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 273435676, + "step": 1592 + }, + { + "epoch": 0.4190175577036891, + "grad_norm": 5.319980033934329, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 273607832, + "step": 1593 + }, + { + "epoch": 0.4190175577036891, + "loss": 0.149379163980484, + "loss_ce": 0.004207056015729904, + "loss_iou": 0.59375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 273607832, + "step": 1593 + }, + { + "epoch": 0.4192805944630762, + "grad_norm": 9.352989040804356, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 273778508, + "step": 1594 + }, + { + "epoch": 0.4192805944630762, + "loss": 0.12792231142520905, + "loss_ce": 0.0009081543539650738, + "loss_iou": 0.66015625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 273778508, + "step": 1594 + }, + { + "epoch": 0.41954363122246335, + "grad_norm": 10.015857880890572, + "learning_rate": 5e-06, + "loss": 0.1659, + "num_input_tokens_seen": 273950492, + "step": 1595 + }, + { + "epoch": 0.41954363122246335, + "loss": 0.1797780990600586, + "loss_ce": 0.004515648819506168, + "loss_iou": 0.392578125, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 273950492, + "step": 1595 + }, + { + "epoch": 0.41980666798185046, + "grad_norm": 5.437610936264711, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 274122788, + "step": 1596 + }, + { + "epoch": 0.41980666798185046, + "loss": 0.18884079158306122, + "loss_ce": 0.0007914789603091776, + "loss_iou": 0.59765625, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 274122788, + "step": 1596 + }, + { + "epoch": 0.4200697047412376, + "grad_norm": 7.822775629663786, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 274294960, + "step": 1597 + }, + { + "epoch": 0.4200697047412376, + "loss": 0.10433492064476013, + "loss_ce": 0.000544635346159339, + "loss_iou": 0.208984375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 274294960, + "step": 1597 + }, + { + "epoch": 0.4203327415006247, + "grad_norm": 5.742663276174034, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 274466828, + "step": 1598 + }, + { + "epoch": 0.4203327415006247, + "loss": 0.21367287635803223, + "loss_ce": 0.0007822535699233413, + "loss_iou": 0.546875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 274466828, + "step": 1598 + }, + { + "epoch": 0.4205957782600118, + "grad_norm": 7.632689703873666, + "learning_rate": 5e-06, + "loss": 0.1806, + "num_input_tokens_seen": 274639136, + "step": 1599 + }, + { + "epoch": 0.4205957782600118, + "loss": 0.15357787907123566, + "loss_ce": 0.0009594644652679563, + "loss_iou": 0.5234375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 274639136, + "step": 1599 + }, + { + "epoch": 0.420858815019399, + "grad_norm": 6.551977275944205, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 274811300, + "step": 1600 + }, + { + "epoch": 0.420858815019399, + "loss": 0.10377545654773712, + "loss_ce": 0.005798771977424622, + "loss_iou": 0.5859375, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 274811300, + "step": 1600 + }, + { + "epoch": 0.4211218517787861, + "grad_norm": 3.634688713329233, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 274983232, + "step": 1601 + }, + { + "epoch": 0.4211218517787861, + "loss": 0.0826454609632492, + "loss_ce": 0.0008583518210798502, + "loss_iou": 0.4453125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 274983232, + "step": 1601 + }, + { + "epoch": 0.4213848885381732, + "grad_norm": 6.513487522029516, + "learning_rate": 5e-06, + "loss": 0.1857, + "num_input_tokens_seen": 275155252, + "step": 1602 + }, + { + "epoch": 0.4213848885381732, + "loss": 0.22003334760665894, + "loss_ce": 0.0016495751915499568, + "loss_iou": 0.54296875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 275155252, + "step": 1602 + }, + { + "epoch": 0.42164792529756034, + "grad_norm": 7.844073689048222, + "learning_rate": 5e-06, + "loss": 0.164, + "num_input_tokens_seen": 275327452, + "step": 1603 + }, + { + "epoch": 0.42164792529756034, + "loss": 0.21324840188026428, + "loss_ce": 0.001090196194127202, + "loss_iou": 0.5703125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 275327452, + "step": 1603 + }, + { + "epoch": 0.42191096205694745, + "grad_norm": 5.702970135636054, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 275499556, + "step": 1604 + }, + { + "epoch": 0.42191096205694745, + "loss": 0.12330840528011322, + "loss_ce": 0.00029205146711319685, + "loss_iou": 0.67578125, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 275499556, + "step": 1604 + }, + { + "epoch": 0.4221739988163346, + "grad_norm": 7.295315362378517, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 275671424, + "step": 1605 + }, + { + "epoch": 0.4221739988163346, + "loss": 0.09813028573989868, + "loss_ce": 0.0004130033776164055, + "loss_iou": 0.48046875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 275671424, + "step": 1605 + }, + { + "epoch": 0.4224370355757217, + "grad_norm": 9.326948217180478, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 275843716, + "step": 1606 + }, + { + "epoch": 0.4224370355757217, + "loss": 0.08426269888877869, + "loss_ce": 0.00183471804484725, + "loss_iou": 0.671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 275843716, + "step": 1606 + }, + { + "epoch": 0.4227000723351088, + "grad_norm": 5.932370446925655, + "learning_rate": 5e-06, + "loss": 0.1547, + "num_input_tokens_seen": 276016052, + "step": 1607 + }, + { + "epoch": 0.4227000723351088, + "loss": 0.1150435283780098, + "loss_ce": 0.0007857157033868134, + "loss_iou": 0.474609375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 276016052, + "step": 1607 + }, + { + "epoch": 0.42296310909449597, + "grad_norm": 14.783104322463815, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 276188232, + "step": 1608 + }, + { + "epoch": 0.42296310909449597, + "loss": 0.2393971085548401, + "loss_ce": 0.005510389804840088, + "loss_iou": 0.419921875, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 276188232, + "step": 1608 + }, + { + "epoch": 0.4232261458538831, + "grad_norm": 37.72538153737276, + "learning_rate": 5e-06, + "loss": 0.1175, + "num_input_tokens_seen": 276360224, + "step": 1609 + }, + { + "epoch": 0.4232261458538831, + "loss": 0.10361947864294052, + "loss_ce": 0.001171966316178441, + "loss_iou": 0.54296875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 276360224, + "step": 1609 + }, + { + "epoch": 0.42348918261327023, + "grad_norm": 5.929115604459541, + "learning_rate": 5e-06, + "loss": 0.185, + "num_input_tokens_seen": 276532388, + "step": 1610 + }, + { + "epoch": 0.42348918261327023, + "loss": 0.2490251511335373, + "loss_ce": 0.001466565066948533, + "loss_iou": 0.515625, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 276532388, + "step": 1610 + }, + { + "epoch": 0.42375221937265733, + "grad_norm": 4.065855918819384, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 276704452, + "step": 1611 + }, + { + "epoch": 0.42375221937265733, + "loss": 0.08056612312793732, + "loss_ce": 0.0004574810154736042, + "loss_iou": 0.59375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 276704452, + "step": 1611 + }, + { + "epoch": 0.42401525613204444, + "grad_norm": 5.144248302598616, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 276876660, + "step": 1612 + }, + { + "epoch": 0.42401525613204444, + "loss": 0.1256069540977478, + "loss_ce": 0.003719748929142952, + "loss_iou": 0.470703125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 276876660, + "step": 1612 + }, + { + "epoch": 0.4242782928914316, + "grad_norm": 6.484383508592383, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 277048524, + "step": 1613 + }, + { + "epoch": 0.4242782928914316, + "loss": 0.08503228425979614, + "loss_ce": 0.0004375518183223903, + "loss_iou": 0.51953125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 277048524, + "step": 1613 + }, + { + "epoch": 0.4245413296508187, + "grad_norm": 4.995173925299756, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 277220588, + "step": 1614 + }, + { + "epoch": 0.4245413296508187, + "loss": 0.17099690437316895, + "loss_ce": 0.0004646642482839525, + "loss_iou": 0.609375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 277220588, + "step": 1614 + }, + { + "epoch": 0.4248043664102058, + "grad_norm": 8.497364636941706, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 277392824, + "step": 1615 + }, + { + "epoch": 0.4248043664102058, + "loss": 0.13706764578819275, + "loss_ce": 0.0016916776075959206, + "loss_iou": 0.4609375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 277392824, + "step": 1615 + }, + { + "epoch": 0.42506740316959296, + "grad_norm": 6.760370062216128, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 277565104, + "step": 1616 + }, + { + "epoch": 0.42506740316959296, + "loss": 0.10927695780992508, + "loss_ce": 0.00048179191071540117, + "loss_iou": 0.65234375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 277565104, + "step": 1616 + }, + { + "epoch": 0.42533043992898006, + "grad_norm": 5.035401913498838, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 277737224, + "step": 1617 + }, + { + "epoch": 0.42533043992898006, + "loss": 0.13474591076374054, + "loss_ce": 0.002299622166901827, + "loss_iou": 0.41796875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 277737224, + "step": 1617 + }, + { + "epoch": 0.4255934766883672, + "grad_norm": 6.777922664584483, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 277907488, + "step": 1618 + }, + { + "epoch": 0.4255934766883672, + "loss": 0.11205422878265381, + "loss_ce": 0.0009702442912384868, + "loss_iou": 0.59765625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 277907488, + "step": 1618 + }, + { + "epoch": 0.4258565134477543, + "grad_norm": 7.0419776595399695, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 278079908, + "step": 1619 + }, + { + "epoch": 0.4258565134477543, + "loss": 0.1803514063358307, + "loss_ce": 0.0011216606944799423, + "loss_iou": 0.51171875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 278079908, + "step": 1619 + }, + { + "epoch": 0.42611955020714143, + "grad_norm": 4.869531542128126, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 278252060, + "step": 1620 + }, + { + "epoch": 0.42611955020714143, + "loss": 0.17065666615962982, + "loss_ce": 0.0023522234987467527, + "loss_iou": 0.478515625, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 278252060, + "step": 1620 + }, + { + "epoch": 0.4263825869665286, + "grad_norm": 15.025432063122246, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 278424152, + "step": 1621 + }, + { + "epoch": 0.4263825869665286, + "loss": 0.16370511054992676, + "loss_ce": 0.0011379658244550228, + "loss_iou": 0.53125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 278424152, + "step": 1621 + }, + { + "epoch": 0.4266456237259157, + "grad_norm": 7.359642440972258, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 278596208, + "step": 1622 + }, + { + "epoch": 0.4266456237259157, + "loss": 0.18550482392311096, + "loss_ce": 0.0022162585519254208, + "loss_iou": 0.69921875, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 278596208, + "step": 1622 + }, + { + "epoch": 0.42690866048530285, + "grad_norm": 3.859276017343743, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 278768348, + "step": 1623 + }, + { + "epoch": 0.42690866048530285, + "loss": 0.08928422629833221, + "loss_ce": 0.003163617569953203, + "loss_iou": 0.447265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 278768348, + "step": 1623 + }, + { + "epoch": 0.42717169724468995, + "grad_norm": 3.491408872838399, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 278940432, + "step": 1624 + }, + { + "epoch": 0.42717169724468995, + "loss": 0.10469355434179306, + "loss_ce": 0.004763749893754721, + "loss_iou": 0.609375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 278940432, + "step": 1624 + }, + { + "epoch": 0.42743473400407705, + "grad_norm": 8.925089197668054, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 279112480, + "step": 1625 + }, + { + "epoch": 0.42743473400407705, + "loss": 0.15482491254806519, + "loss_ce": 0.0005127866170369089, + "loss_iou": 0.5859375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 279112480, + "step": 1625 + }, + { + "epoch": 0.4276977707634642, + "grad_norm": 5.017252116965269, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 279284544, + "step": 1626 + }, + { + "epoch": 0.4276977707634642, + "loss": 0.11934304237365723, + "loss_ce": 0.0036509071942418814, + "loss_iou": 0.80859375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 279284544, + "step": 1626 + }, + { + "epoch": 0.4279608075228513, + "grad_norm": 11.28367561887442, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 279456684, + "step": 1627 + }, + { + "epoch": 0.4279608075228513, + "loss": 0.08618146926164627, + "loss_ce": 0.0002134529349859804, + "loss_iou": 0.435546875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 279456684, + "step": 1627 + }, + { + "epoch": 0.4282238442822384, + "grad_norm": 14.334013868271164, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 279629144, + "step": 1628 + }, + { + "epoch": 0.4282238442822384, + "loss": 0.21622659265995026, + "loss_ce": 0.0035800987388938665, + "loss_iou": 0.5546875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 279629144, + "step": 1628 + }, + { + "epoch": 0.4284868810416256, + "grad_norm": 5.177758249324022, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 279800920, + "step": 1629 + }, + { + "epoch": 0.4284868810416256, + "loss": 0.11388795077800751, + "loss_ce": 0.0024377545341849327, + "loss_iou": 0.46484375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 279800920, + "step": 1629 + }, + { + "epoch": 0.4287499178010127, + "grad_norm": 5.002716354322989, + "learning_rate": 5e-06, + "loss": 0.16, + "num_input_tokens_seen": 279973164, + "step": 1630 + }, + { + "epoch": 0.4287499178010127, + "loss": 0.11221545934677124, + "loss_ce": 0.0021690744906663895, + "loss_iou": 0.5546875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 279973164, + "step": 1630 + }, + { + "epoch": 0.42901295456039984, + "grad_norm": 11.888987422058314, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 280144932, + "step": 1631 + }, + { + "epoch": 0.42901295456039984, + "loss": 0.10366816818714142, + "loss_ce": 0.0032348139211535454, + "loss_iou": 0.53125, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 280144932, + "step": 1631 + }, + { + "epoch": 0.42927599131978694, + "grad_norm": 5.9683348593169105, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 280316892, + "step": 1632 + }, + { + "epoch": 0.42927599131978694, + "loss": 0.1221093013882637, + "loss_ce": 0.0030907560139894485, + "loss_iou": 0.734375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 280316892, + "step": 1632 + }, + { + "epoch": 0.42953902807917405, + "grad_norm": 4.306403695098148, + "learning_rate": 5e-06, + "loss": 0.1676, + "num_input_tokens_seen": 280488680, + "step": 1633 + }, + { + "epoch": 0.42953902807917405, + "loss": 0.16619864106178284, + "loss_ce": 0.0007323222234845161, + "loss_iou": 0.421875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 280488680, + "step": 1633 + }, + { + "epoch": 0.4298020648385612, + "grad_norm": 6.542037684380333, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 280658812, + "step": 1634 + }, + { + "epoch": 0.4298020648385612, + "loss": 0.1335218995809555, + "loss_ce": 0.0008009535376913846, + "loss_iou": 0.419921875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 280658812, + "step": 1634 + }, + { + "epoch": 0.4300651015979483, + "grad_norm": 5.780919966084494, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 280829020, + "step": 1635 + }, + { + "epoch": 0.4300651015979483, + "loss": 0.13609978556632996, + "loss_ce": 0.0019139924552291632, + "loss_iou": 0.375, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 280829020, + "step": 1635 + }, + { + "epoch": 0.4303281383573354, + "grad_norm": 4.625756427735958, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 280999460, + "step": 1636 + }, + { + "epoch": 0.4303281383573354, + "loss": 0.05524425953626633, + "loss_ce": 0.0004957281635142863, + "loss_iou": 0.6640625, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 280999460, + "step": 1636 + }, + { + "epoch": 0.43059117511672257, + "grad_norm": 4.597155715023034, + "learning_rate": 5e-06, + "loss": 0.1455, + "num_input_tokens_seen": 281169544, + "step": 1637 + }, + { + "epoch": 0.43059117511672257, + "loss": 0.09542928636074066, + "loss_ce": 0.002106534782797098, + "loss_iou": 0.60546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 281169544, + "step": 1637 + }, + { + "epoch": 0.43085421187610967, + "grad_norm": 6.599089127666852, + "learning_rate": 5e-06, + "loss": 0.1655, + "num_input_tokens_seen": 281341764, + "step": 1638 + }, + { + "epoch": 0.43085421187610967, + "loss": 0.15369150042533875, + "loss_ce": 0.0017444868572056293, + "loss_iou": 0.5390625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 281341764, + "step": 1638 + }, + { + "epoch": 0.43111724863549683, + "grad_norm": 10.985722993191203, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 281514480, + "step": 1639 + }, + { + "epoch": 0.43111724863549683, + "loss": 0.12693974375724792, + "loss_ce": 0.004442187491804361, + "loss_iou": 0.427734375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 281514480, + "step": 1639 + }, + { + "epoch": 0.43138028539488393, + "grad_norm": 5.112052305423048, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 281686636, + "step": 1640 + }, + { + "epoch": 0.43138028539488393, + "loss": 0.13622896373271942, + "loss_ce": 0.001615930232219398, + "loss_iou": 0.6875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 281686636, + "step": 1640 + }, + { + "epoch": 0.43164332215427104, + "grad_norm": 4.445172786952326, + "learning_rate": 5e-06, + "loss": 0.1298, + "num_input_tokens_seen": 281859068, + "step": 1641 + }, + { + "epoch": 0.43164332215427104, + "loss": 0.16077426075935364, + "loss_ce": 0.0008926668670028448, + "loss_iou": 0.478515625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 281859068, + "step": 1641 + }, + { + "epoch": 0.4319063589136582, + "grad_norm": 18.643962143254182, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 282031296, + "step": 1642 + }, + { + "epoch": 0.4319063589136582, + "loss": 0.11041103303432465, + "loss_ce": 0.00216518621891737, + "loss_iou": 0.5703125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 282031296, + "step": 1642 + }, + { + "epoch": 0.4321693956730453, + "grad_norm": 14.94723205702143, + "learning_rate": 5e-06, + "loss": 0.0973, + "num_input_tokens_seen": 282203456, + "step": 1643 + }, + { + "epoch": 0.4321693956730453, + "loss": 0.10057017207145691, + "loss_ce": 0.002517198445275426, + "loss_iou": 0.470703125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 282203456, + "step": 1643 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 3.591864332881924, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 282374020, + "step": 1644 + }, + { + "epoch": 0.43243243243243246, + "loss": 0.08621848374605179, + "loss_ce": 0.00044883223017677665, + "loss_iou": 0.625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 282374020, + "step": 1644 + }, + { + "epoch": 0.43269546919181956, + "grad_norm": 6.406579991141497, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 282544200, + "step": 1645 + }, + { + "epoch": 0.43269546919181956, + "loss": 0.10012087225914001, + "loss_ce": 0.001396512147039175, + "loss_iou": 0.48046875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 282544200, + "step": 1645 + }, + { + "epoch": 0.43295850595120666, + "grad_norm": 6.633263160453237, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 282716276, + "step": 1646 + }, + { + "epoch": 0.43295850595120666, + "loss": 0.09250755608081818, + "loss_ce": 0.0007411979604512453, + "loss_iou": 0.57421875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 282716276, + "step": 1646 + }, + { + "epoch": 0.4332215427105938, + "grad_norm": 11.88641544130966, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 282888756, + "step": 1647 + }, + { + "epoch": 0.4332215427105938, + "loss": 0.11388491094112396, + "loss_ce": 0.0009088441729545593, + "loss_iou": 0.55078125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 282888756, + "step": 1647 + }, + { + "epoch": 0.4334845794699809, + "grad_norm": 5.478375817282589, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 283060788, + "step": 1648 + }, + { + "epoch": 0.4334845794699809, + "loss": 0.08237907290458679, + "loss_ce": 0.003521653823554516, + "loss_iou": 0.5859375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 283060788, + "step": 1648 + }, + { + "epoch": 0.433747616229368, + "grad_norm": 7.7141358569237015, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 283232744, + "step": 1649 + }, + { + "epoch": 0.433747616229368, + "loss": 0.16650542616844177, + "loss_ce": 0.001252750400453806, + "loss_iou": 0.515625, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 283232744, + "step": 1649 + }, + { + "epoch": 0.4340106529887552, + "grad_norm": 4.902197150094181, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 283404860, + "step": 1650 + }, + { + "epoch": 0.4340106529887552, + "loss": 0.11392060667276382, + "loss_ce": 0.0009140149923041463, + "loss_iou": 0.67578125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 283404860, + "step": 1650 + }, + { + "epoch": 0.4342736897481423, + "grad_norm": 6.611385564549315, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 283577340, + "step": 1651 + }, + { + "epoch": 0.4342736897481423, + "loss": 0.14234262704849243, + "loss_ce": 0.0011072808410972357, + "loss_iou": 0.46875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 283577340, + "step": 1651 + }, + { + "epoch": 0.43453672650752945, + "grad_norm": 5.270564238834203, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 283747872, + "step": 1652 + }, + { + "epoch": 0.43453672650752945, + "loss": 0.08932007849216461, + "loss_ce": 0.0014904842246323824, + "loss_iou": 0.515625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 283747872, + "step": 1652 + }, + { + "epoch": 0.43479976326691655, + "grad_norm": 5.150524639297202, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 283920212, + "step": 1653 + }, + { + "epoch": 0.43479976326691655, + "loss": 0.08878134936094284, + "loss_ce": 0.00037192486342974007, + "loss_iou": 0.51171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 283920212, + "step": 1653 + }, + { + "epoch": 0.43506280002630365, + "grad_norm": 5.389390494681365, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 284090700, + "step": 1654 + }, + { + "epoch": 0.43506280002630365, + "loss": 0.09808811545372009, + "loss_ce": 0.0017746419180184603, + "loss_iou": 0.59375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 284090700, + "step": 1654 + }, + { + "epoch": 0.4353258367856908, + "grad_norm": 5.2829708598355625, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 284262932, + "step": 1655 + }, + { + "epoch": 0.4353258367856908, + "loss": 0.24406926333904266, + "loss_ce": 0.0009052163222804666, + "loss_iou": 0.404296875, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 284262932, + "step": 1655 + }, + { + "epoch": 0.4355888735450779, + "grad_norm": 4.415055027745429, + "learning_rate": 5e-06, + "loss": 0.1467, + "num_input_tokens_seen": 284433136, + "step": 1656 + }, + { + "epoch": 0.4355888735450779, + "loss": 0.14210422337055206, + "loss_ce": 0.0005942133138887584, + "loss_iou": 0.482421875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 284433136, + "step": 1656 + }, + { + "epoch": 0.4358519103044651, + "grad_norm": 11.10469204899232, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 284604980, + "step": 1657 + }, + { + "epoch": 0.4358519103044651, + "loss": 0.06616829335689545, + "loss_ce": 0.0029969080351293087, + "loss_iou": 0.5078125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 284604980, + "step": 1657 + }, + { + "epoch": 0.4361149470638522, + "grad_norm": 37.454091592129345, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 284777024, + "step": 1658 + }, + { + "epoch": 0.4361149470638522, + "loss": 0.09428656101226807, + "loss_ce": 0.00023138479446060956, + "loss_iou": 0.5234375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 284777024, + "step": 1658 + }, + { + "epoch": 0.4363779838232393, + "grad_norm": 6.943748076830087, + "learning_rate": 5e-06, + "loss": 0.1641, + "num_input_tokens_seen": 284948692, + "step": 1659 + }, + { + "epoch": 0.4363779838232393, + "loss": 0.12960584461688995, + "loss_ce": 0.0008827036363072693, + "loss_iou": 0.546875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 284948692, + "step": 1659 + }, + { + "epoch": 0.43664102058262644, + "grad_norm": 6.322360699878544, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 285120852, + "step": 1660 + }, + { + "epoch": 0.43664102058262644, + "loss": 0.08696160465478897, + "loss_ce": 0.0016649758908897638, + "loss_iou": 0.609375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 285120852, + "step": 1660 + }, + { + "epoch": 0.43690405734201354, + "grad_norm": 4.9857397500362, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 285293104, + "step": 1661 + }, + { + "epoch": 0.43690405734201354, + "loss": 0.1620713770389557, + "loss_ce": 0.0027085873298346996, + "loss_iou": 0.359375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 285293104, + "step": 1661 + }, + { + "epoch": 0.43716709410140064, + "grad_norm": 6.167930639257179, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 285465116, + "step": 1662 + }, + { + "epoch": 0.43716709410140064, + "loss": 0.13285255432128906, + "loss_ce": 0.002481454983353615, + "loss_iou": 0.60546875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 285465116, + "step": 1662 + }, + { + "epoch": 0.4374301308607878, + "grad_norm": 4.796617637287243, + "learning_rate": 5e-06, + "loss": 0.1067, + "num_input_tokens_seen": 285637712, + "step": 1663 + }, + { + "epoch": 0.4374301308607878, + "loss": 0.0941682979464531, + "loss_ce": 0.0005708856624551117, + "loss_iou": 0.58203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 285637712, + "step": 1663 + }, + { + "epoch": 0.4376931676201749, + "grad_norm": 6.365145441420231, + "learning_rate": 5e-06, + "loss": 0.1524, + "num_input_tokens_seen": 285808140, + "step": 1664 + }, + { + "epoch": 0.4376931676201749, + "loss": 0.08691577613353729, + "loss_ce": 0.0016801799647510052, + "loss_iou": 0.703125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 285808140, + "step": 1664 + }, + { + "epoch": 0.43795620437956206, + "grad_norm": 9.14556589502857, + "learning_rate": 5e-06, + "loss": 0.1419, + "num_input_tokens_seen": 285980020, + "step": 1665 + }, + { + "epoch": 0.43795620437956206, + "loss": 0.11803478002548218, + "loss_ce": 0.0006641793297603726, + "loss_iou": 0.41796875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 285980020, + "step": 1665 + }, + { + "epoch": 0.43821924113894917, + "grad_norm": 8.36736614358982, + "learning_rate": 5e-06, + "loss": 0.1476, + "num_input_tokens_seen": 286152224, + "step": 1666 + }, + { + "epoch": 0.43821924113894917, + "loss": 0.17869716882705688, + "loss_ce": 0.002091944683343172, + "loss_iou": 0.578125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 286152224, + "step": 1666 + }, + { + "epoch": 0.43848227789833627, + "grad_norm": 5.977656991369799, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 286324392, + "step": 1667 + }, + { + "epoch": 0.43848227789833627, + "loss": 0.07165973633527756, + "loss_ce": 0.0008894691127352417, + "loss_iou": 0.6015625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 286324392, + "step": 1667 + }, + { + "epoch": 0.43874531465772343, + "grad_norm": 4.564829435087821, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 286496724, + "step": 1668 + }, + { + "epoch": 0.43874531465772343, + "loss": 0.10358568280935287, + "loss_ce": 0.0014128325274214149, + "loss_iou": 0.53125, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 286496724, + "step": 1668 + }, + { + "epoch": 0.43900835141711053, + "grad_norm": 5.942642735170272, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 286669072, + "step": 1669 + }, + { + "epoch": 0.43900835141711053, + "loss": 0.15151304006576538, + "loss_ce": 0.00020689686061814427, + "loss_iou": 0.46484375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 286669072, + "step": 1669 + }, + { + "epoch": 0.4392713881764977, + "grad_norm": 10.62471442505103, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 286839656, + "step": 1670 + }, + { + "epoch": 0.4392713881764977, + "loss": 0.07143907248973846, + "loss_ce": 0.0015233027515932918, + "loss_iou": 0.56640625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 286839656, + "step": 1670 + }, + { + "epoch": 0.4395344249358848, + "grad_norm": 10.099626860743896, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 287009348, + "step": 1671 + }, + { + "epoch": 0.4395344249358848, + "loss": 0.07566662132740021, + "loss_ce": 0.0009290680172853172, + "loss_iou": 0.5625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 287009348, + "step": 1671 + }, + { + "epoch": 0.4397974616952719, + "grad_norm": 5.653901499064607, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 287181772, + "step": 1672 + }, + { + "epoch": 0.4397974616952719, + "loss": 0.13149940967559814, + "loss_ce": 0.003600239520892501, + "loss_iou": 0.62890625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 287181772, + "step": 1672 + }, + { + "epoch": 0.44006049845465905, + "grad_norm": 5.524031672248575, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 287353976, + "step": 1673 + }, + { + "epoch": 0.44006049845465905, + "loss": 0.09840566664934158, + "loss_ce": 0.0025804713368415833, + "loss_iou": 0.51953125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 287353976, + "step": 1673 + }, + { + "epoch": 0.44032353521404616, + "grad_norm": 9.273568707994169, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 287526172, + "step": 1674 + }, + { + "epoch": 0.44032353521404616, + "loss": 0.06107574701309204, + "loss_ce": 0.0005288777174428105, + "loss_iou": 0.59765625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 287526172, + "step": 1674 + }, + { + "epoch": 0.44058657197343326, + "grad_norm": 4.970980583351768, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 287698712, + "step": 1675 + }, + { + "epoch": 0.44058657197343326, + "loss": 0.15458138287067413, + "loss_ce": 0.004831631202250719, + "loss_iou": 0.44921875, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 287698712, + "step": 1675 + }, + { + "epoch": 0.4408496087328204, + "grad_norm": 5.818694779717612, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 287871108, + "step": 1676 + }, + { + "epoch": 0.4408496087328204, + "loss": 0.09261530637741089, + "loss_ce": 0.0006353222415782511, + "loss_iou": 0.65625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 287871108, + "step": 1676 + }, + { + "epoch": 0.4411126454922075, + "grad_norm": 13.742421763597438, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 288043424, + "step": 1677 + }, + { + "epoch": 0.4411126454922075, + "loss": 0.0773499608039856, + "loss_ce": 0.0007813603151589632, + "loss_iou": 0.498046875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 288043424, + "step": 1677 + }, + { + "epoch": 0.4413756822515947, + "grad_norm": 4.64225389501199, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 288215812, + "step": 1678 + }, + { + "epoch": 0.4413756822515947, + "loss": 0.1821221113204956, + "loss_ce": 0.0025872092228382826, + "loss_iou": 0.61328125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 288215812, + "step": 1678 + }, + { + "epoch": 0.4416387190109818, + "grad_norm": 3.729084500729301, + "learning_rate": 5e-06, + "loss": 0.1359, + "num_input_tokens_seen": 288388096, + "step": 1679 + }, + { + "epoch": 0.4416387190109818, + "loss": 0.07683113217353821, + "loss_ce": 0.000735556473955512, + "loss_iou": 0.390625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 288388096, + "step": 1679 + }, + { + "epoch": 0.4419017557703689, + "grad_norm": 3.7239911787320956, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 288560596, + "step": 1680 + }, + { + "epoch": 0.4419017557703689, + "loss": 0.09967576712369919, + "loss_ce": 0.002248398493975401, + "loss_iou": 0.5234375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 288560596, + "step": 1680 + }, + { + "epoch": 0.44216479252975605, + "grad_norm": 7.1544540740612845, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 288730844, + "step": 1681 + }, + { + "epoch": 0.44216479252975605, + "loss": 0.14395672082901, + "loss_ce": 0.0017753278370946646, + "loss_iou": 0.384765625, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 288730844, + "step": 1681 + }, + { + "epoch": 0.44242782928914315, + "grad_norm": 6.963458789097864, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 288901440, + "step": 1682 + }, + { + "epoch": 0.44242782928914315, + "loss": 0.12861037254333496, + "loss_ce": 0.0015962182078510523, + "loss_iou": 0.58984375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 288901440, + "step": 1682 + }, + { + "epoch": 0.4426908660485303, + "grad_norm": 6.124914951048281, + "learning_rate": 5e-06, + "loss": 0.2107, + "num_input_tokens_seen": 289073420, + "step": 1683 + }, + { + "epoch": 0.4426908660485303, + "loss": 0.19420379400253296, + "loss_ce": 0.0013021675404161215, + "loss_iou": 0.58984375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 289073420, + "step": 1683 + }, + { + "epoch": 0.4429539028079174, + "grad_norm": 9.860837139100692, + "learning_rate": 5e-06, + "loss": 0.172, + "num_input_tokens_seen": 289245604, + "step": 1684 + }, + { + "epoch": 0.4429539028079174, + "loss": 0.18933850526809692, + "loss_ce": 0.002204704098403454, + "loss_iou": 0.73828125, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 289245604, + "step": 1684 + }, + { + "epoch": 0.4432169395673045, + "grad_norm": 7.0676385774671875, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 289417760, + "step": 1685 + }, + { + "epoch": 0.4432169395673045, + "loss": 0.16348493099212646, + "loss_ce": 0.0013145222328603268, + "loss_iou": 0.41015625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 289417760, + "step": 1685 + }, + { + "epoch": 0.44347997632669167, + "grad_norm": 5.183221235705912, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 289590036, + "step": 1686 + }, + { + "epoch": 0.44347997632669167, + "loss": 0.07584193348884583, + "loss_ce": 0.003545790910720825, + "loss_iou": 0.51171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 289590036, + "step": 1686 + }, + { + "epoch": 0.4437430130860788, + "grad_norm": 4.680777365130206, + "learning_rate": 5e-06, + "loss": 0.13, + "num_input_tokens_seen": 289762148, + "step": 1687 + }, + { + "epoch": 0.4437430130860788, + "loss": 0.15222427248954773, + "loss_ce": 0.000765529228374362, + "loss_iou": 0.53515625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 289762148, + "step": 1687 + }, + { + "epoch": 0.4440060498454659, + "grad_norm": 8.359052775831522, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 289932260, + "step": 1688 + }, + { + "epoch": 0.4440060498454659, + "loss": 0.15807722508907318, + "loss_ce": 0.003322579897940159, + "loss_iou": 0.62109375, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 289932260, + "step": 1688 + }, + { + "epoch": 0.44426908660485304, + "grad_norm": 12.300759876906785, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 290101064, + "step": 1689 + }, + { + "epoch": 0.44426908660485304, + "loss": 0.1050846129655838, + "loss_ce": 0.00016517633048351854, + "loss_iou": 0.48046875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 290101064, + "step": 1689 + }, + { + "epoch": 0.44453212336424014, + "grad_norm": 7.817323041044055, + "learning_rate": 5e-06, + "loss": 0.153, + "num_input_tokens_seen": 290273392, + "step": 1690 + }, + { + "epoch": 0.44453212336424014, + "loss": 0.10968184471130371, + "loss_ce": 0.004731892608106136, + "loss_iou": 0.53125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 290273392, + "step": 1690 + }, + { + "epoch": 0.4447951601236273, + "grad_norm": 10.723057383347166, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 290445728, + "step": 1691 + }, + { + "epoch": 0.4447951601236273, + "loss": 0.11715184152126312, + "loss_ce": 0.0027719633653759956, + "loss_iou": 0.5, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 290445728, + "step": 1691 + }, + { + "epoch": 0.4450581968830144, + "grad_norm": 5.5921534938980795, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 290618040, + "step": 1692 + }, + { + "epoch": 0.4450581968830144, + "loss": 0.11884011328220367, + "loss_ce": 0.0018509816145524383, + "loss_iou": 0.62890625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 290618040, + "step": 1692 + }, + { + "epoch": 0.4453212336424015, + "grad_norm": 14.56261412387689, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 290790260, + "step": 1693 + }, + { + "epoch": 0.4453212336424015, + "loss": 0.12701714038848877, + "loss_ce": 0.0006743660196661949, + "loss_iou": 0.33203125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 290790260, + "step": 1693 + }, + { + "epoch": 0.44558427040178866, + "grad_norm": 18.011685132647553, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 290960676, + "step": 1694 + }, + { + "epoch": 0.44558427040178866, + "loss": 0.17821697890758514, + "loss_ce": 0.00457195146009326, + "loss_iou": 0.62109375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 290960676, + "step": 1694 + }, + { + "epoch": 0.44584730716117577, + "grad_norm": 5.536946199556455, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 291132980, + "step": 1695 + }, + { + "epoch": 0.44584730716117577, + "loss": 0.10020774602890015, + "loss_ce": 0.0019716662354767323, + "loss_iou": 0.5390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 291132980, + "step": 1695 + }, + { + "epoch": 0.4461103439205629, + "grad_norm": 14.998078131446578, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 291305020, + "step": 1696 + }, + { + "epoch": 0.4461103439205629, + "loss": 0.26434189081192017, + "loss_ce": 0.003111420664936304, + "loss_iou": 0.37890625, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 291305020, + "step": 1696 + }, + { + "epoch": 0.44637338067995, + "grad_norm": 5.503328090464836, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 291477360, + "step": 1697 + }, + { + "epoch": 0.44637338067995, + "loss": 0.08737830072641373, + "loss_ce": 0.0004947560373693705, + "loss_iou": 0.5390625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 291477360, + "step": 1697 + }, + { + "epoch": 0.44663641743933713, + "grad_norm": 4.950750612606285, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 291647704, + "step": 1698 + }, + { + "epoch": 0.44663641743933713, + "loss": 0.09089531749486923, + "loss_ce": 0.00031914000282995403, + "loss_iou": 0.5234375, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 291647704, + "step": 1698 + }, + { + "epoch": 0.4468994541987243, + "grad_norm": 7.442760799509784, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 291820320, + "step": 1699 + }, + { + "epoch": 0.4468994541987243, + "loss": 0.1341613531112671, + "loss_ce": 0.004461641423404217, + "loss_iou": 0.423828125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 291820320, + "step": 1699 + }, + { + "epoch": 0.4471624909581114, + "grad_norm": 12.789945030223832, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 291992692, + "step": 1700 + }, + { + "epoch": 0.4471624909581114, + "loss": 0.12477381527423859, + "loss_ce": 0.002901863306760788, + "loss_iou": 0.53125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 291992692, + "step": 1700 + }, + { + "epoch": 0.4474255277174985, + "grad_norm": 4.791603869867936, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 292164820, + "step": 1701 + }, + { + "epoch": 0.4474255277174985, + "loss": 0.1102890819311142, + "loss_ce": 0.002928241156041622, + "loss_iou": 0.421875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 292164820, + "step": 1701 + }, + { + "epoch": 0.44768856447688565, + "grad_norm": 4.5597072248390145, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 292336840, + "step": 1702 + }, + { + "epoch": 0.44768856447688565, + "loss": 0.13155938684940338, + "loss_ce": 0.0015850251074880362, + "loss_iou": 0.703125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 292336840, + "step": 1702 + }, + { + "epoch": 0.44795160123627276, + "grad_norm": 4.229506785629678, + "learning_rate": 5e-06, + "loss": 0.1794, + "num_input_tokens_seen": 292507240, + "step": 1703 + }, + { + "epoch": 0.44795160123627276, + "loss": 0.12941154837608337, + "loss_ce": 0.0014818647177889943, + "loss_iou": 0.54296875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 292507240, + "step": 1703 + }, + { + "epoch": 0.4482146379956599, + "grad_norm": 5.641270696096721, + "learning_rate": 5e-06, + "loss": 0.1276, + "num_input_tokens_seen": 292679176, + "step": 1704 + }, + { + "epoch": 0.4482146379956599, + "loss": 0.12304902821779251, + "loss_ce": 0.002321491949260235, + "loss_iou": 0.451171875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 292679176, + "step": 1704 + }, + { + "epoch": 0.448477674755047, + "grad_norm": 4.862520419399453, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 292851500, + "step": 1705 + }, + { + "epoch": 0.448477674755047, + "loss": 0.089045949280262, + "loss_ce": 0.00478691840544343, + "loss_iou": 0.546875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 292851500, + "step": 1705 + }, + { + "epoch": 0.4487407115144341, + "grad_norm": 5.2865399556662975, + "learning_rate": 5e-06, + "loss": 0.1533, + "num_input_tokens_seen": 293020992, + "step": 1706 + }, + { + "epoch": 0.4487407115144341, + "loss": 0.20740769803524017, + "loss_ce": 0.0010478447657078505, + "loss_iou": 0.66015625, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 293020992, + "step": 1706 + }, + { + "epoch": 0.4490037482738213, + "grad_norm": 4.239149151423396, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 293192840, + "step": 1707 + }, + { + "epoch": 0.4490037482738213, + "loss": 0.1247292309999466, + "loss_ce": 0.002048558322712779, + "loss_iou": 0.578125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 293192840, + "step": 1707 + }, + { + "epoch": 0.4492667850332084, + "grad_norm": 14.697036954882604, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 293359152, + "step": 1708 + }, + { + "epoch": 0.4492667850332084, + "loss": 0.17449304461479187, + "loss_ce": 0.000573370314668864, + "loss_iou": 0.53515625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 293359152, + "step": 1708 + }, + { + "epoch": 0.44952982179259554, + "grad_norm": 6.428150274293997, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 293531168, + "step": 1709 + }, + { + "epoch": 0.44952982179259554, + "loss": 0.0824984684586525, + "loss_ce": 0.00025359162827953696, + "loss_iou": 0.43359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 293531168, + "step": 1709 + }, + { + "epoch": 0.44979285855198264, + "grad_norm": 12.082892344047602, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 293701596, + "step": 1710 + }, + { + "epoch": 0.44979285855198264, + "loss": 0.075187087059021, + "loss_ce": 0.00011384247045498341, + "loss_iou": 0.77734375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 293701596, + "step": 1710 + }, + { + "epoch": 0.45005589531136975, + "grad_norm": 6.447369973784934, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 293874048, + "step": 1711 + }, + { + "epoch": 0.45005589531136975, + "loss": 0.12107305228710175, + "loss_ce": 0.0001013779838103801, + "loss_iou": 0.44140625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 293874048, + "step": 1711 + }, + { + "epoch": 0.4503189320707569, + "grad_norm": 4.920690909069883, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 294046324, + "step": 1712 + }, + { + "epoch": 0.4503189320707569, + "loss": 0.09293599426746368, + "loss_ce": 0.0002541054564062506, + "loss_iou": 0.62890625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 294046324, + "step": 1712 + }, + { + "epoch": 0.450581968830144, + "grad_norm": 5.111263917035011, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 294218352, + "step": 1713 + }, + { + "epoch": 0.450581968830144, + "loss": 0.08807346224784851, + "loss_ce": 0.005675997585058212, + "loss_iou": 0.546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 294218352, + "step": 1713 + }, + { + "epoch": 0.4508450055895311, + "grad_norm": 22.02715708529402, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 294390700, + "step": 1714 + }, + { + "epoch": 0.4508450055895311, + "loss": 0.16420426964759827, + "loss_ce": 0.00023331816191785038, + "loss_iou": 0.66796875, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 294390700, + "step": 1714 + }, + { + "epoch": 0.45110804234891827, + "grad_norm": 36.948349834543606, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 294562520, + "step": 1715 + }, + { + "epoch": 0.45110804234891827, + "loss": 0.20613673329353333, + "loss_ce": 0.0074672941118478775, + "loss_iou": 0.6015625, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 294562520, + "step": 1715 + }, + { + "epoch": 0.4513710791083054, + "grad_norm": 11.731491784871416, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 294732980, + "step": 1716 + }, + { + "epoch": 0.4513710791083054, + "loss": 0.2512318789958954, + "loss_ce": 0.0024831017944961786, + "loss_iou": 0.5859375, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 294732980, + "step": 1716 + }, + { + "epoch": 0.45163411586769253, + "grad_norm": 14.33875314671704, + "learning_rate": 5e-06, + "loss": 0.146, + "num_input_tokens_seen": 294904972, + "step": 1717 + }, + { + "epoch": 0.45163411586769253, + "loss": 0.1545875370502472, + "loss_ce": 0.0034644976258277893, + "loss_iou": 0.609375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 294904972, + "step": 1717 + }, + { + "epoch": 0.45189715262707963, + "grad_norm": 4.667158870565546, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 295077136, + "step": 1718 + }, + { + "epoch": 0.45189715262707963, + "loss": 0.10374893248081207, + "loss_ce": 0.0009657314512878656, + "loss_iou": 0.55859375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 295077136, + "step": 1718 + }, + { + "epoch": 0.45216018938646674, + "grad_norm": 4.671478799286235, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 295249644, + "step": 1719 + }, + { + "epoch": 0.45216018938646674, + "loss": 0.12534737586975098, + "loss_ce": 0.004924997687339783, + "loss_iou": 0.57421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 295249644, + "step": 1719 + }, + { + "epoch": 0.4524232261458539, + "grad_norm": 9.77050555348203, + "learning_rate": 5e-06, + "loss": 0.1039, + "num_input_tokens_seen": 295421628, + "step": 1720 + }, + { + "epoch": 0.4524232261458539, + "loss": 0.10829440504312515, + "loss_ce": 0.0021237479522824287, + "loss_iou": 0.65625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 295421628, + "step": 1720 + }, + { + "epoch": 0.452686262905241, + "grad_norm": 6.10598748249797, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 295593664, + "step": 1721 + }, + { + "epoch": 0.452686262905241, + "loss": 0.05939865857362747, + "loss_ce": 0.0012016374384984374, + "loss_iou": 0.5625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 295593664, + "step": 1721 + }, + { + "epoch": 0.45294929966462816, + "grad_norm": 10.371953857475111, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 295765768, + "step": 1722 + }, + { + "epoch": 0.45294929966462816, + "loss": 0.10428635776042938, + "loss_ce": 0.0005265921936370432, + "loss_iou": 0.76171875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 295765768, + "step": 1722 + }, + { + "epoch": 0.45321233642401526, + "grad_norm": 6.589623724090896, + "learning_rate": 5e-06, + "loss": 0.1679, + "num_input_tokens_seen": 295937984, + "step": 1723 + }, + { + "epoch": 0.45321233642401526, + "loss": 0.12776082754135132, + "loss_ce": 0.0015095948474481702, + "loss_iou": 0.392578125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 295937984, + "step": 1723 + }, + { + "epoch": 0.45347537318340236, + "grad_norm": 5.38306851263364, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 296109952, + "step": 1724 + }, + { + "epoch": 0.45347537318340236, + "loss": 0.10374290496110916, + "loss_ce": 0.0004714199749287218, + "loss_iou": 0.60546875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 296109952, + "step": 1724 + }, + { + "epoch": 0.4537384099427895, + "grad_norm": 16.069341006935545, + "learning_rate": 5e-06, + "loss": 0.1861, + "num_input_tokens_seen": 296282320, + "step": 1725 + }, + { + "epoch": 0.4537384099427895, + "loss": 0.3360915184020996, + "loss_ce": 0.003205763641744852, + "loss_iou": 0.49609375, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 296282320, + "step": 1725 + }, + { + "epoch": 0.4540014467021766, + "grad_norm": 7.355868991768278, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 296454412, + "step": 1726 + }, + { + "epoch": 0.4540014467021766, + "loss": 0.11106541752815247, + "loss_ce": 0.0014462803956121206, + "loss_iou": 0.44140625, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 296454412, + "step": 1726 + }, + { + "epoch": 0.45426448346156373, + "grad_norm": 4.637757034723655, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 296626392, + "step": 1727 + }, + { + "epoch": 0.45426448346156373, + "loss": 0.07483752816915512, + "loss_ce": 0.00046619208296760917, + "loss_iou": 0.359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 296626392, + "step": 1727 + }, + { + "epoch": 0.4545275202209509, + "grad_norm": 4.094172069183009, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 296795792, + "step": 1728 + }, + { + "epoch": 0.4545275202209509, + "loss": 0.1533362716436386, + "loss_ce": 0.00154183991253376, + "loss_iou": 0.53125, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 296795792, + "step": 1728 + }, + { + "epoch": 0.454790556980338, + "grad_norm": 4.550319533659712, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 296966316, + "step": 1729 + }, + { + "epoch": 0.454790556980338, + "loss": 0.25365394353866577, + "loss_ce": 0.002402704209089279, + "loss_iou": 0.32421875, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 296966316, + "step": 1729 + }, + { + "epoch": 0.45505359373972515, + "grad_norm": 5.052226011800126, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 297136040, + "step": 1730 + }, + { + "epoch": 0.45505359373972515, + "loss": 0.1388048231601715, + "loss_ce": 0.0004991517635062337, + "loss_iou": 0.640625, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 297136040, + "step": 1730 + }, + { + "epoch": 0.45531663049911225, + "grad_norm": 9.365703053895057, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 297306228, + "step": 1731 + }, + { + "epoch": 0.45531663049911225, + "loss": 0.09631586819887161, + "loss_ce": 0.0004296356055419892, + "loss_iou": 0.4453125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 297306228, + "step": 1731 + }, + { + "epoch": 0.45557966725849935, + "grad_norm": 5.623520624492047, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 297478380, + "step": 1732 + }, + { + "epoch": 0.45557966725849935, + "loss": 0.21242645382881165, + "loss_ce": 0.0005734282894991338, + "loss_iou": 0.58203125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 297478380, + "step": 1732 + }, + { + "epoch": 0.4558427040178865, + "grad_norm": 22.29793682222897, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 297650336, + "step": 1733 + }, + { + "epoch": 0.4558427040178865, + "loss": 0.12880732119083405, + "loss_ce": 0.0012133296113461256, + "loss_iou": 0.56640625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 297650336, + "step": 1733 + }, + { + "epoch": 0.4561057407772736, + "grad_norm": 19.5277076823797, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 297822376, + "step": 1734 + }, + { + "epoch": 0.4561057407772736, + "loss": 0.10357876121997833, + "loss_ce": 0.00030727204284630716, + "loss_iou": 0.447265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 297822376, + "step": 1734 + }, + { + "epoch": 0.4563687775366608, + "grad_norm": 4.597687331185091, + "learning_rate": 5e-06, + "loss": 0.0992, + "num_input_tokens_seen": 297994524, + "step": 1735 + }, + { + "epoch": 0.4563687775366608, + "loss": 0.11487319320440292, + "loss_ce": 0.0003712356265168637, + "loss_iou": 0.51171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 297994524, + "step": 1735 + }, + { + "epoch": 0.4566318142960479, + "grad_norm": 17.34014327670223, + "learning_rate": 5e-06, + "loss": 0.1904, + "num_input_tokens_seen": 298164216, + "step": 1736 + }, + { + "epoch": 0.4566318142960479, + "loss": 0.21512141823768616, + "loss_ce": 0.0003387040051165968, + "loss_iou": 0.54296875, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 298164216, + "step": 1736 + }, + { + "epoch": 0.456894851055435, + "grad_norm": 5.201392853210846, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 298336416, + "step": 1737 + }, + { + "epoch": 0.456894851055435, + "loss": 0.14383375644683838, + "loss_ce": 0.0030866768211126328, + "loss_iou": 0.388671875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 298336416, + "step": 1737 + }, + { + "epoch": 0.45715788781482214, + "grad_norm": 4.466741788292063, + "learning_rate": 5e-06, + "loss": 0.1267, + "num_input_tokens_seen": 298508336, + "step": 1738 + }, + { + "epoch": 0.45715788781482214, + "loss": 0.08519221842288971, + "loss_ce": 0.001879227813333273, + "loss_iou": 0.484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 298508336, + "step": 1738 + }, + { + "epoch": 0.45742092457420924, + "grad_norm": 5.105148151648125, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 298680480, + "step": 1739 + }, + { + "epoch": 0.45742092457420924, + "loss": 0.08455468714237213, + "loss_ce": 0.00038720344309695065, + "loss_iou": 0.40234375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 298680480, + "step": 1739 + }, + { + "epoch": 0.45768396133359635, + "grad_norm": 4.88765152218918, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 298852576, + "step": 1740 + }, + { + "epoch": 0.45768396133359635, + "loss": 0.06836723536252975, + "loss_ce": 0.0006487306673079729, + "loss_iou": 0.57421875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 298852576, + "step": 1740 + }, + { + "epoch": 0.4579469980929835, + "grad_norm": 11.438189750287325, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 299025188, + "step": 1741 + }, + { + "epoch": 0.4579469980929835, + "loss": 0.2103656679391861, + "loss_ce": 0.0009235285106115043, + "loss_iou": 0.458984375, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 299025188, + "step": 1741 + }, + { + "epoch": 0.4582100348523706, + "grad_norm": 17.844442720051195, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 299195652, + "step": 1742 + }, + { + "epoch": 0.4582100348523706, + "loss": 0.0902654230594635, + "loss_ce": 0.0008184025646187365, + "loss_iou": 0.6171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 299195652, + "step": 1742 + }, + { + "epoch": 0.45847307161175777, + "grad_norm": 5.593927162724121, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 299367692, + "step": 1743 + }, + { + "epoch": 0.45847307161175777, + "loss": 0.0877409279346466, + "loss_ce": 0.004031214863061905, + "loss_iou": 0.47265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 299367692, + "step": 1743 + }, + { + "epoch": 0.45873610837114487, + "grad_norm": 6.084084258485456, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 299539820, + "step": 1744 + }, + { + "epoch": 0.45873610837114487, + "loss": 0.17818066477775574, + "loss_ce": 0.0005988804623484612, + "loss_iou": 0.67578125, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 299539820, + "step": 1744 + }, + { + "epoch": 0.45899914513053197, + "grad_norm": 4.3161100884144785, + "learning_rate": 5e-06, + "loss": 0.0857, + "num_input_tokens_seen": 299710180, + "step": 1745 + }, + { + "epoch": 0.45899914513053197, + "loss": 0.11408813297748566, + "loss_ce": 0.005079346243292093, + "loss_iou": 0.52734375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 299710180, + "step": 1745 + }, + { + "epoch": 0.45926218188991913, + "grad_norm": 9.432280511482096, + "learning_rate": 5e-06, + "loss": 0.1824, + "num_input_tokens_seen": 299882192, + "step": 1746 + }, + { + "epoch": 0.45926218188991913, + "loss": 0.28233322501182556, + "loss_ce": 0.002944799605756998, + "loss_iou": 0.40234375, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 299882192, + "step": 1746 + }, + { + "epoch": 0.45952521864930623, + "grad_norm": 4.947993402328502, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 300054328, + "step": 1747 + }, + { + "epoch": 0.45952521864930623, + "loss": 0.12365365773439407, + "loss_ce": 0.00454354751855135, + "loss_iou": 0.515625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 300054328, + "step": 1747 + }, + { + "epoch": 0.4597882554086934, + "grad_norm": 29.241280731418787, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 300226268, + "step": 1748 + }, + { + "epoch": 0.4597882554086934, + "loss": 0.09467601031064987, + "loss_ce": 0.002909653354436159, + "loss_iou": 0.546875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 300226268, + "step": 1748 + }, + { + "epoch": 0.4600512921680805, + "grad_norm": 40.89605718973821, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 300398324, + "step": 1749 + }, + { + "epoch": 0.4600512921680805, + "loss": 0.18484918773174286, + "loss_ce": 0.002537175314500928, + "loss_iou": 0.48828125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 300398324, + "step": 1749 + }, + { + "epoch": 0.4603143289274676, + "grad_norm": 6.154438338656609, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_websight_new_CIoU": 0.8543897271156311, + "eval_websight_new_GIoU": 0.8555817008018494, + "eval_websight_new_IoU": 0.8602511882781982, + "eval_websight_new_MAE_all": 0.022463313303887844, + "eval_websight_new_MAE_h": 0.007148948730900884, + "eval_websight_new_MAE_w": 0.03532572276890278, + "eval_websight_new_MAE_x": 0.03696838486939669, + "eval_websight_new_MAE_y": 0.010410191491246223, + "eval_websight_new_NUM_probability": 0.9999746978282928, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.11025163531303406, + "eval_websight_new_loss_ce": 7.8859661698516e-06, + "eval_websight_new_loss_iou": 0.3748779296875, + "eval_websight_new_loss_num": 0.019195556640625, + "eval_websight_new_loss_xval": 0.09600830078125, + "eval_websight_new_runtime": 55.1926, + "eval_websight_new_samples_per_second": 0.906, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_seeclick_CIoU": 0.6177069246768951, + "eval_seeclick_GIoU": 0.6163533926010132, + "eval_seeclick_IoU": 0.6425465941429138, + "eval_seeclick_MAE_all": 0.04845697060227394, + "eval_seeclick_MAE_h": 0.026225415989756584, + "eval_seeclick_MAE_w": 0.06570588797330856, + "eval_seeclick_MAE_x": 0.07359151728451252, + "eval_seeclick_MAE_y": 0.0283050537109375, + "eval_seeclick_NUM_probability": 0.9999766051769257, + "eval_seeclick_inside_bbox": 0.890625, + "eval_seeclick_loss": 0.23021526634693146, + "eval_seeclick_loss_ce": 0.009315645787864923, + "eval_seeclick_loss_iou": 0.5054931640625, + "eval_seeclick_loss_num": 0.04460906982421875, + "eval_seeclick_loss_xval": 0.22314453125, + "eval_seeclick_runtime": 72.9378, + "eval_seeclick_samples_per_second": 0.59, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_icons_CIoU": 0.8409464359283447, + "eval_icons_GIoU": 0.8350943326950073, + "eval_icons_IoU": 0.847510576248169, + "eval_icons_MAE_all": 0.022730856202542782, + "eval_icons_MAE_h": 0.02164691872894764, + "eval_icons_MAE_w": 0.023992713540792465, + "eval_icons_MAE_x": 0.025108729489147663, + "eval_icons_MAE_y": 0.02017505932599306, + "eval_icons_NUM_probability": 0.9999510943889618, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.079879030585289, + "eval_icons_loss_ce": 2.25404792217887e-05, + "eval_icons_loss_iou": 0.5426025390625, + "eval_icons_loss_num": 0.014867782592773438, + "eval_icons_loss_xval": 0.0743560791015625, + "eval_icons_runtime": 87.1455, + "eval_icons_samples_per_second": 0.574, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_screenspot_CIoU": 0.5540184179941813, + "eval_screenspot_GIoU": 0.543454110622406, + "eval_screenspot_IoU": 0.5961946249008179, + "eval_screenspot_MAE_all": 0.08684368679920833, + "eval_screenspot_MAE_h": 0.04899499130745729, + "eval_screenspot_MAE_w": 0.1521986499428749, + "eval_screenspot_MAE_x": 0.09981551021337509, + "eval_screenspot_MAE_y": 0.046365607529878616, + "eval_screenspot_NUM_probability": 0.9998787045478821, + "eval_screenspot_inside_bbox": 0.8737499912579855, + "eval_screenspot_loss": 0.8499577641487122, + "eval_screenspot_loss_ce": 0.5143006145954132, + "eval_screenspot_loss_iou": 0.4506022135416667, + "eval_screenspot_loss_num": 0.06610107421875, + "eval_screenspot_loss_xval": 0.3305257161458333, + "eval_screenspot_runtime": 149.7903, + "eval_screenspot_samples_per_second": 0.594, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "loss": 0.855069637298584, + "loss_ce": 0.507047176361084, + "loss_iou": 0.392578125, + "loss_num": 0.0693359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.46057736568685476, + "grad_norm": 6.10997370548099, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 300742608, + "step": 1751 + }, + { + "epoch": 0.46057736568685476, + "loss": 0.11830765753984451, + "loss_ce": 0.0002961806021630764, + "loss_iou": 0.60546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 300742608, + "step": 1751 + }, + { + "epoch": 0.46084040244624186, + "grad_norm": 4.63619140673612, + "learning_rate": 5e-06, + "loss": 0.1648, + "num_input_tokens_seen": 300914728, + "step": 1752 + }, + { + "epoch": 0.46084040244624186, + "loss": 0.14693626761436462, + "loss_ce": 0.0036867514718323946, + "loss_iou": 0.58984375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 300914728, + "step": 1752 + }, + { + "epoch": 0.46110343920562896, + "grad_norm": 11.051347194862576, + "learning_rate": 5e-06, + "loss": 0.1718, + "num_input_tokens_seen": 301087180, + "step": 1753 + }, + { + "epoch": 0.46110343920562896, + "loss": 0.2443966418504715, + "loss_ce": 0.0005001651006750762, + "loss_iou": 0.57421875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 301087180, + "step": 1753 + }, + { + "epoch": 0.4613664759650161, + "grad_norm": 9.081946267145833, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 301259116, + "step": 1754 + }, + { + "epoch": 0.4613664759650161, + "loss": 0.09555494785308838, + "loss_ce": 0.002583135850727558, + "loss_iou": 0.59765625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 301259116, + "step": 1754 + }, + { + "epoch": 0.4616295127244032, + "grad_norm": 7.9541431928925395, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 301427764, + "step": 1755 + }, + { + "epoch": 0.4616295127244032, + "loss": 0.12469062209129333, + "loss_ce": 0.0007892490248195827, + "loss_iou": 0.5078125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 301427764, + "step": 1755 + }, + { + "epoch": 0.4618925494837904, + "grad_norm": 4.563316138635991, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 301599772, + "step": 1756 + }, + { + "epoch": 0.4618925494837904, + "loss": 0.0649976134300232, + "loss_ce": 0.0009717366192489862, + "loss_iou": 0.5234375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 301599772, + "step": 1756 + }, + { + "epoch": 0.4621555862431775, + "grad_norm": 13.734783204569586, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 301771888, + "step": 1757 + }, + { + "epoch": 0.4621555862431775, + "loss": 0.2562709152698517, + "loss_ce": 0.0031581264920532703, + "loss_iou": 0.26171875, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 301771888, + "step": 1757 + }, + { + "epoch": 0.4624186230025646, + "grad_norm": 8.489244421344079, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 301944272, + "step": 1758 + }, + { + "epoch": 0.4624186230025646, + "loss": 0.08469030261039734, + "loss_ce": 0.00088903569849208, + "loss_iou": 0.5390625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 301944272, + "step": 1758 + }, + { + "epoch": 0.46268165976195175, + "grad_norm": 5.029084121420607, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 302116376, + "step": 1759 + }, + { + "epoch": 0.46268165976195175, + "loss": 0.12341859936714172, + "loss_ce": 0.0011651779059320688, + "loss_iou": 0.44921875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 302116376, + "step": 1759 + }, + { + "epoch": 0.46294469652133885, + "grad_norm": 20.972527246553025, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 302288436, + "step": 1760 + }, + { + "epoch": 0.46294469652133885, + "loss": 0.0873933807015419, + "loss_ce": 0.0027071028016507626, + "loss_iou": 0.6171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 302288436, + "step": 1760 + }, + { + "epoch": 0.463207733280726, + "grad_norm": 4.449125782082312, + "learning_rate": 5e-06, + "loss": 0.1406, + "num_input_tokens_seen": 302460504, + "step": 1761 + }, + { + "epoch": 0.463207733280726, + "loss": 0.10758376121520996, + "loss_ce": 0.0050141820684075356, + "loss_iou": 0.423828125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 302460504, + "step": 1761 + }, + { + "epoch": 0.4634707700401131, + "grad_norm": 6.144634607422675, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 302632396, + "step": 1762 + }, + { + "epoch": 0.4634707700401131, + "loss": 0.19673708081245422, + "loss_ce": 0.0027063230518251657, + "loss_iou": 0.484375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 302632396, + "step": 1762 + }, + { + "epoch": 0.4637338067995002, + "grad_norm": 4.45571483495035, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 302804644, + "step": 1763 + }, + { + "epoch": 0.4637338067995002, + "loss": 0.0787121132016182, + "loss_ce": 0.005683551542460918, + "loss_iou": 0.35546875, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 302804644, + "step": 1763 + }, + { + "epoch": 0.4639968435588874, + "grad_norm": 20.80296883365536, + "learning_rate": 5e-06, + "loss": 0.1304, + "num_input_tokens_seen": 302976856, + "step": 1764 + }, + { + "epoch": 0.4639968435588874, + "loss": 0.14017972350120544, + "loss_ce": 0.0037661464884877205, + "loss_iou": 0.51171875, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 302976856, + "step": 1764 + }, + { + "epoch": 0.4642598803182745, + "grad_norm": 5.519977681881848, + "learning_rate": 5e-06, + "loss": 0.1241, + "num_input_tokens_seen": 303146760, + "step": 1765 + }, + { + "epoch": 0.4642598803182745, + "loss": 0.12335249036550522, + "loss_ce": 0.002991169923916459, + "loss_iou": 0.5546875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 303146760, + "step": 1765 + }, + { + "epoch": 0.4645229170776616, + "grad_norm": 6.31203658122147, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 303318968, + "step": 1766 + }, + { + "epoch": 0.4645229170776616, + "loss": 0.08828569203615189, + "loss_ce": 0.002073533833026886, + "loss_iou": 0.703125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 303318968, + "step": 1766 + }, + { + "epoch": 0.46478595383704874, + "grad_norm": 5.046640432518341, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 303491036, + "step": 1767 + }, + { + "epoch": 0.46478595383704874, + "loss": 0.08855307102203369, + "loss_ce": 0.002066256944090128, + "loss_iou": 0.640625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 303491036, + "step": 1767 + }, + { + "epoch": 0.46504899059643584, + "grad_norm": 8.451371161082179, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 303663404, + "step": 1768 + }, + { + "epoch": 0.46504899059643584, + "loss": 0.16699054837226868, + "loss_ce": 0.002805991331115365, + "loss_iou": 0.640625, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 303663404, + "step": 1768 + }, + { + "epoch": 0.465312027355823, + "grad_norm": 9.603701773353785, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 303832160, + "step": 1769 + }, + { + "epoch": 0.465312027355823, + "loss": 0.11529731005430222, + "loss_ce": 0.0019550304859876633, + "loss_iou": 0.4921875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 303832160, + "step": 1769 + }, + { + "epoch": 0.4655750641152101, + "grad_norm": 4.893083096864921, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 304004460, + "step": 1770 + }, + { + "epoch": 0.4655750641152101, + "loss": 0.15418250858783722, + "loss_ce": 0.0016556488117203116, + "loss_iou": 0.6484375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 304004460, + "step": 1770 + }, + { + "epoch": 0.4658381008745972, + "grad_norm": 5.299927971595323, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 304176564, + "step": 1771 + }, + { + "epoch": 0.4658381008745972, + "loss": 0.11469468474388123, + "loss_ce": 0.004251571837812662, + "loss_iou": 0.53515625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 304176564, + "step": 1771 + }, + { + "epoch": 0.46610113763398436, + "grad_norm": 4.786837798902794, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 304349064, + "step": 1772 + }, + { + "epoch": 0.46610113763398436, + "loss": 0.13777077198028564, + "loss_ce": 0.002303245011717081, + "loss_iou": 0.63671875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 304349064, + "step": 1772 + }, + { + "epoch": 0.46636417439337147, + "grad_norm": 7.199109826840915, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 304521336, + "step": 1773 + }, + { + "epoch": 0.46636417439337147, + "loss": 0.13359083235263824, + "loss_ce": 0.0007783286855556071, + "loss_iou": 0.6328125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 304521336, + "step": 1773 + }, + { + "epoch": 0.4666272111527586, + "grad_norm": 5.7773703638814355, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 304693604, + "step": 1774 + }, + { + "epoch": 0.4666272111527586, + "loss": 0.09553907811641693, + "loss_ce": 0.0015449414495378733, + "loss_iou": 0.7109375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 304693604, + "step": 1774 + }, + { + "epoch": 0.46689024791214573, + "grad_norm": 4.7750261071300475, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 304865896, + "step": 1775 + }, + { + "epoch": 0.46689024791214573, + "loss": 0.17456401884555817, + "loss_ce": 0.0003696825006045401, + "loss_iou": 0.466796875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 304865896, + "step": 1775 + }, + { + "epoch": 0.46715328467153283, + "grad_norm": 4.560916465958671, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 305038112, + "step": 1776 + }, + { + "epoch": 0.46715328467153283, + "loss": 0.16979777812957764, + "loss_ce": 0.002805587835609913, + "loss_iou": 0.6875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 305038112, + "step": 1776 + }, + { + "epoch": 0.46741632143092, + "grad_norm": 9.580731840657464, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 305210520, + "step": 1777 + }, + { + "epoch": 0.46741632143092, + "loss": 0.1295488327741623, + "loss_ce": 0.0010087917326018214, + "loss_iou": 0.51953125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 305210520, + "step": 1777 + }, + { + "epoch": 0.4676793581903071, + "grad_norm": 12.328201105743592, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 305382852, + "step": 1778 + }, + { + "epoch": 0.4676793581903071, + "loss": 0.10579667240381241, + "loss_ce": 0.0018843174912035465, + "loss_iou": 0.5625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 305382852, + "step": 1778 + }, + { + "epoch": 0.4679423949496942, + "grad_norm": 7.809054499493732, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 305554812, + "step": 1779 + }, + { + "epoch": 0.4679423949496942, + "loss": 0.12438150495290756, + "loss_ce": 0.003974398132413626, + "loss_iou": 0.59375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 305554812, + "step": 1779 + }, + { + "epoch": 0.46820543170908135, + "grad_norm": 8.973200505315825, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 305727124, + "step": 1780 + }, + { + "epoch": 0.46820543170908135, + "loss": 0.08462625741958618, + "loss_ce": 0.0006571417325176299, + "loss_iou": 0.62890625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 305727124, + "step": 1780 + }, + { + "epoch": 0.46846846846846846, + "grad_norm": 5.227611768036767, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 305899256, + "step": 1781 + }, + { + "epoch": 0.46846846846846846, + "loss": 0.1279701292514801, + "loss_ce": 0.0015358042437583208, + "loss_iou": 0.376953125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 305899256, + "step": 1781 + }, + { + "epoch": 0.4687315052278556, + "grad_norm": 3.599452107221254, + "learning_rate": 5e-06, + "loss": 0.109, + "num_input_tokens_seen": 306069952, + "step": 1782 + }, + { + "epoch": 0.4687315052278556, + "loss": 0.1316523402929306, + "loss_ce": 0.002959707286208868, + "loss_iou": 0.6328125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 306069952, + "step": 1782 + }, + { + "epoch": 0.4689945419872427, + "grad_norm": 6.0196869037820235, + "learning_rate": 5e-06, + "loss": 0.1144, + "num_input_tokens_seen": 306242268, + "step": 1783 + }, + { + "epoch": 0.4689945419872427, + "loss": 0.0880986899137497, + "loss_ce": 0.0006353051285259426, + "loss_iou": 0.369140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 306242268, + "step": 1783 + }, + { + "epoch": 0.4692575787466298, + "grad_norm": 5.3230563681220735, + "learning_rate": 5e-06, + "loss": 0.1359, + "num_input_tokens_seen": 306414220, + "step": 1784 + }, + { + "epoch": 0.4692575787466298, + "loss": 0.08738180994987488, + "loss_ce": 0.0023750958498567343, + "loss_iou": 0.63671875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 306414220, + "step": 1784 + }, + { + "epoch": 0.469520615506017, + "grad_norm": 4.772051394078166, + "learning_rate": 5e-06, + "loss": 0.1417, + "num_input_tokens_seen": 306586436, + "step": 1785 + }, + { + "epoch": 0.469520615506017, + "loss": 0.17895328998565674, + "loss_ce": 0.000791671103797853, + "loss_iou": 0.5390625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 306586436, + "step": 1785 + }, + { + "epoch": 0.4697836522654041, + "grad_norm": 5.377031057320062, + "learning_rate": 5e-06, + "loss": 0.1954, + "num_input_tokens_seen": 306758712, + "step": 1786 + }, + { + "epoch": 0.4697836522654041, + "loss": 0.201407790184021, + "loss_ce": 0.004172691144049168, + "loss_iou": 0.72265625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 306758712, + "step": 1786 + }, + { + "epoch": 0.47004668902479124, + "grad_norm": 4.6699721270287275, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 306931112, + "step": 1787 + }, + { + "epoch": 0.47004668902479124, + "loss": 0.10016533732414246, + "loss_ce": 0.00031182204838842154, + "loss_iou": 0.53515625, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 306931112, + "step": 1787 + }, + { + "epoch": 0.47030972578417835, + "grad_norm": 8.618398116569972, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 307103388, + "step": 1788 + }, + { + "epoch": 0.47030972578417835, + "loss": 0.27383407950401306, + "loss_ce": 0.0001524553372291848, + "loss_iou": 0.515625, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 307103388, + "step": 1788 + }, + { + "epoch": 0.47057276254356545, + "grad_norm": 11.779661781344625, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 307275620, + "step": 1789 + }, + { + "epoch": 0.47057276254356545, + "loss": 0.08500531315803528, + "loss_ce": 0.00019696829258464277, + "loss_iou": 0.4453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 307275620, + "step": 1789 + }, + { + "epoch": 0.4708357993029526, + "grad_norm": 23.708154257705488, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 307448180, + "step": 1790 + }, + { + "epoch": 0.4708357993029526, + "loss": 0.17157645523548126, + "loss_ce": 0.002493813633918762, + "loss_iou": 0.453125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 307448180, + "step": 1790 + }, + { + "epoch": 0.4710988360623397, + "grad_norm": 4.7495128426496, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 307620540, + "step": 1791 + }, + { + "epoch": 0.4710988360623397, + "loss": 0.23302927613258362, + "loss_ce": 0.0009736126521602273, + "loss_iou": 0.443359375, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 307620540, + "step": 1791 + }, + { + "epoch": 0.4713618728217268, + "grad_norm": 7.18577313542124, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 307792704, + "step": 1792 + }, + { + "epoch": 0.4713618728217268, + "loss": 0.13438743352890015, + "loss_ce": 0.0005068117170594633, + "loss_iou": 0.4765625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 307792704, + "step": 1792 + }, + { + "epoch": 0.47162490958111397, + "grad_norm": 7.179208918692874, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 307965184, + "step": 1793 + }, + { + "epoch": 0.47162490958111397, + "loss": 0.15465694665908813, + "loss_ce": 0.00011593455565162003, + "loss_iou": 0.42578125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 307965184, + "step": 1793 + }, + { + "epoch": 0.4718879463405011, + "grad_norm": 6.391344075166243, + "learning_rate": 5e-06, + "loss": 0.1742, + "num_input_tokens_seen": 308135620, + "step": 1794 + }, + { + "epoch": 0.4718879463405011, + "loss": 0.23051750659942627, + "loss_ce": 0.00010977771307807416, + "loss_iou": 0.58984375, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 308135620, + "step": 1794 + }, + { + "epoch": 0.47215098309988823, + "grad_norm": 8.145404749300642, + "learning_rate": 5e-06, + "loss": 0.1465, + "num_input_tokens_seen": 308308152, + "step": 1795 + }, + { + "epoch": 0.47215098309988823, + "loss": 0.1917172074317932, + "loss_ce": 0.008871147409081459, + "loss_iou": 0.640625, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 308308152, + "step": 1795 + }, + { + "epoch": 0.47241401985927534, + "grad_norm": 5.398862877955347, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 308477996, + "step": 1796 + }, + { + "epoch": 0.47241401985927534, + "loss": 0.08200335502624512, + "loss_ce": 0.0003993565624114126, + "loss_iou": 0.48046875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 308477996, + "step": 1796 + }, + { + "epoch": 0.47267705661866244, + "grad_norm": 9.91724255704017, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 308648424, + "step": 1797 + }, + { + "epoch": 0.47267705661866244, + "loss": 0.12875403463840485, + "loss_ce": 0.0009769393363967538, + "loss_iou": 0.515625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 308648424, + "step": 1797 + }, + { + "epoch": 0.4729400933780496, + "grad_norm": 6.138930079819964, + "learning_rate": 5e-06, + "loss": 0.1842, + "num_input_tokens_seen": 308818888, + "step": 1798 + }, + { + "epoch": 0.4729400933780496, + "loss": 0.16927096247673035, + "loss_ce": 0.0021567128133028746, + "loss_iou": 0.73046875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 308818888, + "step": 1798 + }, + { + "epoch": 0.4732031301374367, + "grad_norm": 19.105244313122192, + "learning_rate": 5e-06, + "loss": 0.1766, + "num_input_tokens_seen": 308991284, + "step": 1799 + }, + { + "epoch": 0.4732031301374367, + "loss": 0.21268007159233093, + "loss_ce": 9.461388253839687e-05, + "loss_iou": 0.431640625, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 308991284, + "step": 1799 + }, + { + "epoch": 0.4734661668968238, + "grad_norm": 6.0514901455791215, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 309163244, + "step": 1800 + }, + { + "epoch": 0.4734661668968238, + "loss": 0.11982670426368713, + "loss_ce": 0.00047245476162061095, + "loss_iou": 0.455078125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 309163244, + "step": 1800 + }, + { + "epoch": 0.47372920365621096, + "grad_norm": 4.529009692158789, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 309335260, + "step": 1801 + }, + { + "epoch": 0.47372920365621096, + "loss": 0.16322672367095947, + "loss_ce": 0.000812180747743696, + "loss_iou": 0.4921875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 309335260, + "step": 1801 + }, + { + "epoch": 0.47399224041559807, + "grad_norm": 4.778436741374609, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 309507516, + "step": 1802 + }, + { + "epoch": 0.47399224041559807, + "loss": 0.09654629230499268, + "loss_ce": 0.0012398946564644575, + "loss_iou": 0.412109375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 309507516, + "step": 1802 + }, + { + "epoch": 0.4742552771749852, + "grad_norm": 4.954945704123918, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 309677988, + "step": 1803 + }, + { + "epoch": 0.4742552771749852, + "loss": 0.0787278264760971, + "loss_ce": 0.00014505814760923386, + "loss_iou": 0.59375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 309677988, + "step": 1803 + }, + { + "epoch": 0.4745183139343723, + "grad_norm": 12.56847461072702, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 309850180, + "step": 1804 + }, + { + "epoch": 0.4745183139343723, + "loss": 0.13278400897979736, + "loss_ce": 0.00015460627037100494, + "loss_iou": 0.52734375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 309850180, + "step": 1804 + }, + { + "epoch": 0.47478135069375943, + "grad_norm": 15.493537781093892, + "learning_rate": 5e-06, + "loss": 0.0899, + "num_input_tokens_seen": 310019768, + "step": 1805 + }, + { + "epoch": 0.47478135069375943, + "loss": 0.05194393917918205, + "loss_ce": 0.0007049225969240069, + "loss_iou": 0.53515625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 310019768, + "step": 1805 + }, + { + "epoch": 0.4750443874531466, + "grad_norm": 6.1982861025667795, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 310191904, + "step": 1806 + }, + { + "epoch": 0.4750443874531466, + "loss": 0.09039468318223953, + "loss_ce": 0.00045937972026877105, + "loss_iou": 0.7265625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 310191904, + "step": 1806 + }, + { + "epoch": 0.4753074242125337, + "grad_norm": 8.01906133906792, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 310364020, + "step": 1807 + }, + { + "epoch": 0.4753074242125337, + "loss": 0.1431303471326828, + "loss_ce": 0.002688447944819927, + "loss_iou": 0.5703125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 310364020, + "step": 1807 + }, + { + "epoch": 0.47557046097192085, + "grad_norm": 8.410195178652696, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 310536388, + "step": 1808 + }, + { + "epoch": 0.47557046097192085, + "loss": 0.1313067376613617, + "loss_ce": 0.002064801286906004, + "loss_iou": 0.73046875, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 310536388, + "step": 1808 + }, + { + "epoch": 0.47583349773130795, + "grad_norm": 5.758329237623371, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 310706940, + "step": 1809 + }, + { + "epoch": 0.47583349773130795, + "loss": 0.0813891738653183, + "loss_ce": 0.0004565550771076232, + "loss_iou": 0.484375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 310706940, + "step": 1809 + }, + { + "epoch": 0.47609653449069506, + "grad_norm": 38.441949828692486, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 310879292, + "step": 1810 + }, + { + "epoch": 0.47609653449069506, + "loss": 0.06832106411457062, + "loss_ce": 0.007591082248836756, + "loss_iou": 0.75, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 310879292, + "step": 1810 + }, + { + "epoch": 0.4763595712500822, + "grad_norm": 7.22086526843683, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 311051572, + "step": 1811 + }, + { + "epoch": 0.4763595712500822, + "loss": 0.0902928039431572, + "loss_ce": 0.00047957096830941737, + "loss_iou": 0.53125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 311051572, + "step": 1811 + }, + { + "epoch": 0.4766226080094693, + "grad_norm": 11.063199574808042, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 311222148, + "step": 1812 + }, + { + "epoch": 0.4766226080094693, + "loss": 0.2204355001449585, + "loss_ce": 0.001777050900273025, + "loss_iou": 0.447265625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 311222148, + "step": 1812 + }, + { + "epoch": 0.4768856447688564, + "grad_norm": 10.035743209995298, + "learning_rate": 5e-06, + "loss": 0.0919, + "num_input_tokens_seen": 311394364, + "step": 1813 + }, + { + "epoch": 0.4768856447688564, + "loss": 0.07709920406341553, + "loss_ce": 0.003307701088488102, + "loss_iou": 0.421875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 311394364, + "step": 1813 + }, + { + "epoch": 0.4771486815282436, + "grad_norm": 8.932384890899103, + "learning_rate": 5e-06, + "loss": 0.1796, + "num_input_tokens_seen": 311566412, + "step": 1814 + }, + { + "epoch": 0.4771486815282436, + "loss": 0.18021875619888306, + "loss_ce": 0.002743777120485902, + "loss_iou": 0.4375, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 311566412, + "step": 1814 + }, + { + "epoch": 0.4774117182876307, + "grad_norm": 7.670958022785577, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 311738816, + "step": 1815 + }, + { + "epoch": 0.4774117182876307, + "loss": 0.06826284527778625, + "loss_ce": 0.002039696555584669, + "loss_iou": 0.546875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 311738816, + "step": 1815 + }, + { + "epoch": 0.47767475504701784, + "grad_norm": 9.178964340109715, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 311910812, + "step": 1816 + }, + { + "epoch": 0.47767475504701784, + "loss": 0.15946456789970398, + "loss_ce": 0.0008036750950850546, + "loss_iou": 0.4921875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 311910812, + "step": 1816 + }, + { + "epoch": 0.47793779180640494, + "grad_norm": 8.864036975405911, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 312082820, + "step": 1817 + }, + { + "epoch": 0.47793779180640494, + "loss": 0.0714251697063446, + "loss_ce": 0.002028202638030052, + "loss_iou": 0.55078125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 312082820, + "step": 1817 + }, + { + "epoch": 0.47820082856579205, + "grad_norm": 4.5785017279302185, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 312255312, + "step": 1818 + }, + { + "epoch": 0.47820082856579205, + "loss": 0.1785389930009842, + "loss_ce": 0.0035817159805446863, + "loss_iou": 0.5234375, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 312255312, + "step": 1818 + }, + { + "epoch": 0.4784638653251792, + "grad_norm": 9.103884045463392, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 312427552, + "step": 1819 + }, + { + "epoch": 0.4784638653251792, + "loss": 0.09079764038324356, + "loss_ce": 0.0007707877666689456, + "loss_iou": 0.6484375, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 312427552, + "step": 1819 + }, + { + "epoch": 0.4787269020845663, + "grad_norm": 3.611814351166419, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 312599880, + "step": 1820 + }, + { + "epoch": 0.4787269020845663, + "loss": 0.16918551921844482, + "loss_ce": 0.0004843563656322658, + "loss_iou": 0.59375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 312599880, + "step": 1820 + }, + { + "epoch": 0.47898993884395347, + "grad_norm": 6.886471414344964, + "learning_rate": 5e-06, + "loss": 0.144, + "num_input_tokens_seen": 312771960, + "step": 1821 + }, + { + "epoch": 0.47898993884395347, + "loss": 0.1718224436044693, + "loss_ce": 0.0013817725703120232, + "loss_iou": 0.61328125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 312771960, + "step": 1821 + }, + { + "epoch": 0.47925297560334057, + "grad_norm": 12.670187653553288, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 312943884, + "step": 1822 + }, + { + "epoch": 0.47925297560334057, + "loss": 0.17194947600364685, + "loss_ce": 0.0042553916573524475, + "loss_iou": 0.486328125, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 312943884, + "step": 1822 + }, + { + "epoch": 0.4795160123627277, + "grad_norm": 5.735564443277563, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 313115988, + "step": 1823 + }, + { + "epoch": 0.4795160123627277, + "loss": 0.11030334234237671, + "loss_ce": 0.00010437482706038281, + "loss_iou": 0.392578125, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 313115988, + "step": 1823 + }, + { + "epoch": 0.47977904912211483, + "grad_norm": 5.327091023011001, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 313288456, + "step": 1824 + }, + { + "epoch": 0.47977904912211483, + "loss": 0.2672034204006195, + "loss_ce": 0.0009680833900347352, + "loss_iou": 0.42578125, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 313288456, + "step": 1824 + }, + { + "epoch": 0.48004208588150193, + "grad_norm": 8.698833105847095, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 313460532, + "step": 1825 + }, + { + "epoch": 0.48004208588150193, + "loss": 0.11867256462574005, + "loss_ce": 0.0026752520352602005, + "loss_iou": 0.419921875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 313460532, + "step": 1825 + }, + { + "epoch": 0.48030512264088904, + "grad_norm": 4.784883256464839, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 313630464, + "step": 1826 + }, + { + "epoch": 0.48030512264088904, + "loss": 0.18028897047042847, + "loss_ce": 0.0021883829031139612, + "loss_iou": 0.57421875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 313630464, + "step": 1826 + }, + { + "epoch": 0.4805681594002762, + "grad_norm": 7.070087319121284, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 313802864, + "step": 1827 + }, + { + "epoch": 0.4805681594002762, + "loss": 0.19596196711063385, + "loss_ce": 0.0022058698814362288, + "loss_iou": 0.5546875, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 313802864, + "step": 1827 + }, + { + "epoch": 0.4808311961596633, + "grad_norm": 6.746240646813248, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 313975036, + "step": 1828 + }, + { + "epoch": 0.4808311961596633, + "loss": 0.12373416870832443, + "loss_ce": 0.0009009129134938121, + "loss_iou": 0.59765625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 313975036, + "step": 1828 + }, + { + "epoch": 0.48109423291905046, + "grad_norm": 12.282763523774303, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 314147252, + "step": 1829 + }, + { + "epoch": 0.48109423291905046, + "loss": 0.09647711366415024, + "loss_ce": 0.0027576321735978127, + "loss_iou": 0.380859375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 314147252, + "step": 1829 + }, + { + "epoch": 0.48135726967843756, + "grad_norm": 6.5229587096602915, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 314317636, + "step": 1830 + }, + { + "epoch": 0.48135726967843756, + "loss": 0.11963652074337006, + "loss_ce": 0.004478443879634142, + "loss_iou": 0.55859375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 314317636, + "step": 1830 + }, + { + "epoch": 0.48162030643782466, + "grad_norm": 10.474914605426608, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 314489804, + "step": 1831 + }, + { + "epoch": 0.48162030643782466, + "loss": 0.08893167972564697, + "loss_ce": 0.0019260660046711564, + "loss_iou": 0.458984375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 314489804, + "step": 1831 + }, + { + "epoch": 0.4818833431972118, + "grad_norm": 4.796918355311461, + "learning_rate": 5e-06, + "loss": 0.1701, + "num_input_tokens_seen": 314661804, + "step": 1832 + }, + { + "epoch": 0.4818833431972118, + "loss": 0.18161356449127197, + "loss_ce": 0.0023533080238848925, + "loss_iou": 0.51953125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 314661804, + "step": 1832 + }, + { + "epoch": 0.4821463799565989, + "grad_norm": 6.244958331343755, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 314834036, + "step": 1833 + }, + { + "epoch": 0.4821463799565989, + "loss": 0.13272178173065186, + "loss_ce": 0.003785028588026762, + "loss_iou": 0.478515625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 314834036, + "step": 1833 + }, + { + "epoch": 0.4824094167159861, + "grad_norm": 7.896413991695191, + "learning_rate": 5e-06, + "loss": 0.1255, + "num_input_tokens_seen": 315004484, + "step": 1834 + }, + { + "epoch": 0.4824094167159861, + "loss": 0.12204363942146301, + "loss_ce": 0.0003090191457886249, + "loss_iou": 0.58984375, + "loss_num": 0.0244140625, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 315004484, + "step": 1834 + }, + { + "epoch": 0.4826724534753732, + "grad_norm": 7.031269313570851, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 315176780, + "step": 1835 + }, + { + "epoch": 0.4826724534753732, + "loss": 0.12622271478176117, + "loss_ce": 0.002534976229071617, + "loss_iou": 0.62109375, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 315176780, + "step": 1835 + }, + { + "epoch": 0.4829354902347603, + "grad_norm": 6.072447688187852, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 315348656, + "step": 1836 + }, + { + "epoch": 0.4829354902347603, + "loss": 0.08488506823778152, + "loss_ce": 0.0010227651800960302, + "loss_iou": 0.462890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 315348656, + "step": 1836 + }, + { + "epoch": 0.48319852699414745, + "grad_norm": 14.427685690814952, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 315520828, + "step": 1837 + }, + { + "epoch": 0.48319852699414745, + "loss": 0.0883752852678299, + "loss_ce": 0.00042362496606074274, + "loss_iou": 0.5625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 315520828, + "step": 1837 + }, + { + "epoch": 0.48346156375353455, + "grad_norm": 17.485755335788554, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 315692964, + "step": 1838 + }, + { + "epoch": 0.48346156375353455, + "loss": 0.10190844535827637, + "loss_ce": 0.0014140586135908961, + "loss_iou": 0.359375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 315692964, + "step": 1838 + }, + { + "epoch": 0.48372460051292165, + "grad_norm": 9.721032690775148, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 315863376, + "step": 1839 + }, + { + "epoch": 0.48372460051292165, + "loss": 0.18402375280857086, + "loss_ce": 0.0028408921789377928, + "loss_iou": 0.40234375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 315863376, + "step": 1839 + }, + { + "epoch": 0.4839876372723088, + "grad_norm": 7.726124987543399, + "learning_rate": 5e-06, + "loss": 0.1728, + "num_input_tokens_seen": 316035520, + "step": 1840 + }, + { + "epoch": 0.4839876372723088, + "loss": 0.13577872514724731, + "loss_ce": 0.00031119072809815407, + "loss_iou": 0.498046875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 316035520, + "step": 1840 + }, + { + "epoch": 0.4842506740316959, + "grad_norm": 7.757576022533387, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 316207616, + "step": 1841 + }, + { + "epoch": 0.4842506740316959, + "loss": 0.2208271026611328, + "loss_ce": 0.0022144389804452658, + "loss_iou": 0.44140625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 316207616, + "step": 1841 + }, + { + "epoch": 0.4845137107910831, + "grad_norm": 11.993527938590784, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 316379548, + "step": 1842 + }, + { + "epoch": 0.4845137107910831, + "loss": 0.17184340953826904, + "loss_ce": 0.002074118936434388, + "loss_iou": 0.341796875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 316379548, + "step": 1842 + }, + { + "epoch": 0.4847767475504702, + "grad_norm": 5.880659481834313, + "learning_rate": 5e-06, + "loss": 0.1596, + "num_input_tokens_seen": 316551724, + "step": 1843 + }, + { + "epoch": 0.4847767475504702, + "loss": 0.30887043476104736, + "loss_ce": 0.0007344337645918131, + "loss_iou": 0.48046875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 316551724, + "step": 1843 + }, + { + "epoch": 0.4850397843098573, + "grad_norm": 9.164014647902235, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 316724140, + "step": 1844 + }, + { + "epoch": 0.4850397843098573, + "loss": 0.1765921413898468, + "loss_ce": 0.0012076160637661815, + "loss_iou": 0.62109375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 316724140, + "step": 1844 + }, + { + "epoch": 0.48530282106924444, + "grad_norm": 25.18318601638769, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 316896084, + "step": 1845 + }, + { + "epoch": 0.48530282106924444, + "loss": 0.14695365726947784, + "loss_ce": 0.0009270399459637702, + "loss_iou": 0.47265625, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 316896084, + "step": 1845 + }, + { + "epoch": 0.48556585782863154, + "grad_norm": 10.73033101211756, + "learning_rate": 5e-06, + "loss": 0.183, + "num_input_tokens_seen": 317068004, + "step": 1846 + }, + { + "epoch": 0.48556585782863154, + "loss": 0.3143799304962158, + "loss_ce": 0.004962218925356865, + "loss_iou": 0.58203125, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 317068004, + "step": 1846 + }, + { + "epoch": 0.4858288945880187, + "grad_norm": 14.757236176384808, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 317240256, + "step": 1847 + }, + { + "epoch": 0.4858288945880187, + "loss": 0.074017733335495, + "loss_ce": 0.0028812657110393047, + "loss_iou": 0.439453125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 317240256, + "step": 1847 + }, + { + "epoch": 0.4860919313474058, + "grad_norm": 5.470394923350598, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 317412376, + "step": 1848 + }, + { + "epoch": 0.4860919313474058, + "loss": 0.13736534118652344, + "loss_ce": 0.0006160617922432721, + "loss_iou": 0.67578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 317412376, + "step": 1848 + }, + { + "epoch": 0.4863549681067929, + "grad_norm": 11.769346294638343, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 317584644, + "step": 1849 + }, + { + "epoch": 0.4863549681067929, + "loss": 0.07671768963336945, + "loss_ce": 0.002804110525175929, + "loss_iou": 0.5078125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 317584644, + "step": 1849 + }, + { + "epoch": 0.48661800486618007, + "grad_norm": 7.934603221234343, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 317756784, + "step": 1850 + }, + { + "epoch": 0.48661800486618007, + "loss": 0.1591799259185791, + "loss_ce": 0.0034792337100952864, + "loss_iou": 0.6328125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 317756784, + "step": 1850 + }, + { + "epoch": 0.48688104162556717, + "grad_norm": 5.949457674338729, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 317929000, + "step": 1851 + }, + { + "epoch": 0.48688104162556717, + "loss": 0.15429449081420898, + "loss_ce": 0.0003638358903117478, + "loss_iou": 0.443359375, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 317929000, + "step": 1851 + }, + { + "epoch": 0.48714407838495427, + "grad_norm": 4.538326131208253, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 318101400, + "step": 1852 + }, + { + "epoch": 0.48714407838495427, + "loss": 0.14405781030654907, + "loss_ce": 0.004745060577988625, + "loss_iou": 0.51171875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 318101400, + "step": 1852 + }, + { + "epoch": 0.48740711514434143, + "grad_norm": 11.796493242549497, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 318273392, + "step": 1853 + }, + { + "epoch": 0.48740711514434143, + "loss": 0.05749022588133812, + "loss_ce": 0.0008801189833320677, + "loss_iou": 0.453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 318273392, + "step": 1853 + }, + { + "epoch": 0.48767015190372853, + "grad_norm": 4.839316076441186, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 318444936, + "step": 1854 + }, + { + "epoch": 0.48767015190372853, + "loss": 0.15674933791160583, + "loss_ce": 0.0008960673003457487, + "loss_iou": 0.5625, + "loss_num": 0.0311279296875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 318444936, + "step": 1854 + }, + { + "epoch": 0.4879331886631157, + "grad_norm": 4.489339979618492, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 318617236, + "step": 1855 + }, + { + "epoch": 0.4879331886631157, + "loss": 0.17843472957611084, + "loss_ce": 0.001768482499755919, + "loss_iou": 0.57421875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 318617236, + "step": 1855 + }, + { + "epoch": 0.4881962254225028, + "grad_norm": 5.676162725297575, + "learning_rate": 5e-06, + "loss": 0.178, + "num_input_tokens_seen": 318787468, + "step": 1856 + }, + { + "epoch": 0.4881962254225028, + "loss": 0.25485190749168396, + "loss_ce": 0.0009456594125367701, + "loss_iou": 0.466796875, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 318787468, + "step": 1856 + }, + { + "epoch": 0.4884592621818899, + "grad_norm": 4.081201767827966, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 318957884, + "step": 1857 + }, + { + "epoch": 0.4884592621818899, + "loss": 0.0997210294008255, + "loss_ce": 0.00020320963812991977, + "loss_iou": 0.34765625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 318957884, + "step": 1857 + }, + { + "epoch": 0.48872229894127706, + "grad_norm": 23.818400579938295, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 319129856, + "step": 1858 + }, + { + "epoch": 0.48872229894127706, + "loss": 0.07231907546520233, + "loss_ce": 0.00011447950237197801, + "loss_iou": 0.6171875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 319129856, + "step": 1858 + }, + { + "epoch": 0.48898533570066416, + "grad_norm": 6.068112059865649, + "learning_rate": 5e-06, + "loss": 0.1742, + "num_input_tokens_seen": 319302108, + "step": 1859 + }, + { + "epoch": 0.48898533570066416, + "loss": 0.1806957721710205, + "loss_ce": 0.00039790940354578197, + "loss_iou": 0.51171875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 319302108, + "step": 1859 + }, + { + "epoch": 0.4892483724600513, + "grad_norm": 4.764918083593623, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 319473076, + "step": 1860 + }, + { + "epoch": 0.4892483724600513, + "loss": 0.056661054491996765, + "loss_ce": 0.0034689174499362707, + "loss_iou": 0.51953125, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 319473076, + "step": 1860 + }, + { + "epoch": 0.4895114092194384, + "grad_norm": 4.591820064091911, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 319645292, + "step": 1861 + }, + { + "epoch": 0.4895114092194384, + "loss": 0.10820820182561874, + "loss_ce": 8.442218677373603e-05, + "loss_iou": 0.50390625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 319645292, + "step": 1861 + }, + { + "epoch": 0.4897744459788255, + "grad_norm": 11.395874847747328, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 319817560, + "step": 1862 + }, + { + "epoch": 0.4897744459788255, + "loss": 0.1541745364665985, + "loss_ce": 0.0011288827518001199, + "loss_iou": 0.53125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 319817560, + "step": 1862 + }, + { + "epoch": 0.4900374827382127, + "grad_norm": 8.74594499498662, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 319990008, + "step": 1863 + }, + { + "epoch": 0.4900374827382127, + "loss": 0.1368078887462616, + "loss_ce": 0.0013403687626123428, + "loss_iou": 0.5625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 319990008, + "step": 1863 + }, + { + "epoch": 0.4903005194975998, + "grad_norm": 4.454006905645775, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 320162308, + "step": 1864 + }, + { + "epoch": 0.4903005194975998, + "loss": 0.09877588599920273, + "loss_ce": 0.0007534276228398085, + "loss_iou": 0.5546875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 320162308, + "step": 1864 + }, + { + "epoch": 0.4905635562569869, + "grad_norm": 9.654102944086466, + "learning_rate": 5e-06, + "loss": 0.1351, + "num_input_tokens_seen": 320334456, + "step": 1865 + }, + { + "epoch": 0.4905635562569869, + "loss": 0.09071889519691467, + "loss_ce": 0.0011192907113581896, + "loss_iou": 0.6171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 320334456, + "step": 1865 + }, + { + "epoch": 0.49082659301637405, + "grad_norm": 7.49930478522843, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 320506784, + "step": 1866 + }, + { + "epoch": 0.49082659301637405, + "loss": 0.1436455398797989, + "loss_ce": 0.0007622435805387795, + "loss_iou": 0.5390625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 320506784, + "step": 1866 + }, + { + "epoch": 0.49108962977576115, + "grad_norm": 4.213229463422797, + "learning_rate": 5e-06, + "loss": 0.1405, + "num_input_tokens_seen": 320678800, + "step": 1867 + }, + { + "epoch": 0.49108962977576115, + "loss": 0.19276383519172668, + "loss_ce": 0.0017237972933799028, + "loss_iou": 0.5703125, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 320678800, + "step": 1867 + }, + { + "epoch": 0.4913526665351483, + "grad_norm": 13.139928650685244, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 320851152, + "step": 1868 + }, + { + "epoch": 0.4913526665351483, + "loss": 0.13707411289215088, + "loss_ce": 0.0001722496235743165, + "loss_iou": 0.6640625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 320851152, + "step": 1868 + }, + { + "epoch": 0.4916157032945354, + "grad_norm": 10.621311625988106, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 321023324, + "step": 1869 + }, + { + "epoch": 0.4916157032945354, + "loss": 0.17078933119773865, + "loss_ce": 0.0011115875095129013, + "loss_iou": 0.466796875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 321023324, + "step": 1869 + }, + { + "epoch": 0.4918787400539225, + "grad_norm": 9.794878479481557, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 321195396, + "step": 1870 + }, + { + "epoch": 0.4918787400539225, + "loss": 0.1828850954771042, + "loss_ce": 0.0016412028344348073, + "loss_iou": 0.65234375, + "loss_num": 0.0361328125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 321195396, + "step": 1870 + }, + { + "epoch": 0.4921417768133097, + "grad_norm": 3.6593406817873193, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 321367508, + "step": 1871 + }, + { + "epoch": 0.4921417768133097, + "loss": 0.07714089751243591, + "loss_ce": 0.0005722964997403324, + "loss_iou": 0.478515625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 321367508, + "step": 1871 + }, + { + "epoch": 0.4924048135726968, + "grad_norm": 3.9995708653462874, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 321538180, + "step": 1872 + }, + { + "epoch": 0.4924048135726968, + "loss": 0.10363311320543289, + "loss_ce": 0.004054257180541754, + "loss_iou": 0.5078125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 321538180, + "step": 1872 + }, + { + "epoch": 0.49266785033208393, + "grad_norm": 8.331617815221565, + "learning_rate": 5e-06, + "loss": 0.1423, + "num_input_tokens_seen": 321710632, + "step": 1873 + }, + { + "epoch": 0.49266785033208393, + "loss": 0.13812920451164246, + "loss_ce": 0.0021428640466183424, + "loss_iou": 0.58203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 321710632, + "step": 1873 + }, + { + "epoch": 0.49293088709147104, + "grad_norm": 5.452515467611325, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 321882748, + "step": 1874 + }, + { + "epoch": 0.49293088709147104, + "loss": 0.14979197084903717, + "loss_ce": 0.002147932071238756, + "loss_iou": NaN, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 321882748, + "step": 1874 + }, + { + "epoch": 0.49319392385085814, + "grad_norm": 14.062855136219687, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 322054892, + "step": 1875 + }, + { + "epoch": 0.49319392385085814, + "loss": 0.24241477251052856, + "loss_ce": 0.0016921274363994598, + "loss_iou": 0.447265625, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 322054892, + "step": 1875 + }, + { + "epoch": 0.4934569606102453, + "grad_norm": 3.3198848239673966, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 322226992, + "step": 1876 + }, + { + "epoch": 0.4934569606102453, + "loss": 0.06338231265544891, + "loss_ce": 0.0020724977366626263, + "loss_iou": 0.62890625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 322226992, + "step": 1876 + }, + { + "epoch": 0.4937199973696324, + "grad_norm": 5.974832705883753, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 322399220, + "step": 1877 + }, + { + "epoch": 0.4937199973696324, + "loss": 0.20875243842601776, + "loss_ce": 0.004009997006505728, + "loss_iou": 0.53125, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 322399220, + "step": 1877 + }, + { + "epoch": 0.4939830341290195, + "grad_norm": 11.103490307412729, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 322569372, + "step": 1878 + }, + { + "epoch": 0.4939830341290195, + "loss": 0.10719159990549088, + "loss_ce": 0.006331001408398151, + "loss_iou": 0.64453125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 322569372, + "step": 1878 + }, + { + "epoch": 0.49424607088840666, + "grad_norm": 4.615361697814784, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 322741596, + "step": 1879 + }, + { + "epoch": 0.49424607088840666, + "loss": 0.13756033778190613, + "loss_ce": 0.002703155390918255, + "loss_iou": 0.46484375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 322741596, + "step": 1879 + }, + { + "epoch": 0.49450910764779377, + "grad_norm": 5.99195178330634, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 322913828, + "step": 1880 + }, + { + "epoch": 0.49450910764779377, + "loss": 0.20525991916656494, + "loss_ce": 0.0015550723765045404, + "loss_iou": 0.47265625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 322913828, + "step": 1880 + }, + { + "epoch": 0.4947721444071809, + "grad_norm": 8.544161694628455, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 323084272, + "step": 1881 + }, + { + "epoch": 0.4947721444071809, + "loss": 0.13442979753017426, + "loss_ce": 0.0013731509679928422, + "loss_iou": 0.462890625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 323084272, + "step": 1881 + }, + { + "epoch": 0.49503518116656803, + "grad_norm": 12.157441672743493, + "learning_rate": 5e-06, + "loss": 0.1382, + "num_input_tokens_seen": 323254564, + "step": 1882 + }, + { + "epoch": 0.49503518116656803, + "loss": 0.06910552829504013, + "loss_ce": 0.00015105513739399612, + "loss_iou": 0.52734375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 323254564, + "step": 1882 + }, + { + "epoch": 0.49529821792595513, + "grad_norm": 7.758308533551838, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 323426936, + "step": 1883 + }, + { + "epoch": 0.49529821792595513, + "loss": 0.07692838460206985, + "loss_ce": 0.000985392602160573, + "loss_iou": 0.51953125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 323426936, + "step": 1883 + }, + { + "epoch": 0.4955612546853423, + "grad_norm": 10.56929332370978, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 323599244, + "step": 1884 + }, + { + "epoch": 0.4955612546853423, + "loss": 0.17598523199558258, + "loss_ce": 0.0006922531756572425, + "loss_iou": 0.52734375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 323599244, + "step": 1884 + }, + { + "epoch": 0.4958242914447294, + "grad_norm": 4.870883244723278, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 323771396, + "step": 1885 + }, + { + "epoch": 0.4958242914447294, + "loss": 0.08608455955982208, + "loss_ce": 0.0016424173954874277, + "loss_iou": 0.443359375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 323771396, + "step": 1885 + }, + { + "epoch": 0.49608732820411655, + "grad_norm": 9.112063415949194, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 323943464, + "step": 1886 + }, + { + "epoch": 0.49608732820411655, + "loss": 0.05756930261850357, + "loss_ce": 0.00025729500339366496, + "loss_iou": 0.494140625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 323943464, + "step": 1886 + }, + { + "epoch": 0.49635036496350365, + "grad_norm": 11.77050689912044, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 324115664, + "step": 1887 + }, + { + "epoch": 0.49635036496350365, + "loss": 0.13590523600578308, + "loss_ce": 0.0011243472108617425, + "loss_iou": 0.39453125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 324115664, + "step": 1887 + }, + { + "epoch": 0.49661340172289076, + "grad_norm": 19.144215192825804, + "learning_rate": 5e-06, + "loss": 0.154, + "num_input_tokens_seen": 324288076, + "step": 1888 + }, + { + "epoch": 0.49661340172289076, + "loss": 0.1540539562702179, + "loss_ce": 0.00036742445081472397, + "loss_iou": 0.49609375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 324288076, + "step": 1888 + }, + { + "epoch": 0.4968764384822779, + "grad_norm": 4.245370979797126, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 324460724, + "step": 1889 + }, + { + "epoch": 0.4968764384822779, + "loss": 0.08321575820446014, + "loss_ce": 0.0006657101330347359, + "loss_iou": 0.59375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 324460724, + "step": 1889 + }, + { + "epoch": 0.497139475241665, + "grad_norm": 4.913753500635943, + "learning_rate": 5e-06, + "loss": 0.1567, + "num_input_tokens_seen": 324633088, + "step": 1890 + }, + { + "epoch": 0.497139475241665, + "loss": 0.13971787691116333, + "loss_ce": 0.001625841949135065, + "loss_iou": 0.51953125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 324633088, + "step": 1890 + }, + { + "epoch": 0.4974025120010521, + "grad_norm": 4.7392923084206275, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 324805420, + "step": 1891 + }, + { + "epoch": 0.4974025120010521, + "loss": 0.10387594997882843, + "loss_ce": 0.001977758714929223, + "loss_iou": 0.52734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 324805420, + "step": 1891 + }, + { + "epoch": 0.4976655487604393, + "grad_norm": 3.934998355466595, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 324977628, + "step": 1892 + }, + { + "epoch": 0.4976655487604393, + "loss": 0.08378120511770248, + "loss_ce": 0.0015668454580008984, + "loss_iou": 0.4921875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 324977628, + "step": 1892 + }, + { + "epoch": 0.4979285855198264, + "grad_norm": 16.6043348209483, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 325148040, + "step": 1893 + }, + { + "epoch": 0.4979285855198264, + "loss": 0.11267328262329102, + "loss_ce": 0.0008110918570309877, + "loss_iou": 0.51953125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 325148040, + "step": 1893 + }, + { + "epoch": 0.49819162227921354, + "grad_norm": 4.246925011279612, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 325317336, + "step": 1894 + }, + { + "epoch": 0.49819162227921354, + "loss": 0.20916813611984253, + "loss_ce": 0.000855154765304178, + "loss_iou": 0.49609375, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 325317336, + "step": 1894 + }, + { + "epoch": 0.49845465903860064, + "grad_norm": 22.177021376541713, + "learning_rate": 5e-06, + "loss": 0.1381, + "num_input_tokens_seen": 325489652, + "step": 1895 + }, + { + "epoch": 0.49845465903860064, + "loss": 0.15358038246631622, + "loss_ce": 0.0006873153615742922, + "loss_iou": 0.333984375, + "loss_num": 0.030517578125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 325489652, + "step": 1895 + }, + { + "epoch": 0.49871769579798775, + "grad_norm": 4.369246978330159, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 325661796, + "step": 1896 + }, + { + "epoch": 0.49871769579798775, + "loss": 0.11801205575466156, + "loss_ce": 0.0016180112725123763, + "loss_iou": 0.6640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 325661796, + "step": 1896 + }, + { + "epoch": 0.4989807325573749, + "grad_norm": 4.8736707793033975, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 325834028, + "step": 1897 + }, + { + "epoch": 0.4989807325573749, + "loss": 0.09736193716526031, + "loss_ce": 0.0003770706243813038, + "loss_iou": 0.482421875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 325834028, + "step": 1897 + }, + { + "epoch": 0.499243769316762, + "grad_norm": 13.34167032829397, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 326006352, + "step": 1898 + }, + { + "epoch": 0.499243769316762, + "loss": 0.1529536247253418, + "loss_ce": 0.0012812747154384851, + "loss_iou": 0.640625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 326006352, + "step": 1898 + }, + { + "epoch": 0.49950680607614917, + "grad_norm": 8.275559071074456, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 326178328, + "step": 1899 + }, + { + "epoch": 0.49950680607614917, + "loss": 0.040668413043022156, + "loss_ce": 0.0004920191713608801, + "loss_iou": 0.515625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 326178328, + "step": 1899 + }, + { + "epoch": 0.49976984283553627, + "grad_norm": 6.7636763663201105, + "learning_rate": 5e-06, + "loss": 0.146, + "num_input_tokens_seen": 326347792, + "step": 1900 + }, + { + "epoch": 0.49976984283553627, + "loss": 0.12708882987499237, + "loss_ce": 0.0006239861249923706, + "loss_iou": 0.50390625, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 326347792, + "step": 1900 + }, + { + "epoch": 0.5000328795949234, + "grad_norm": 11.91106437107254, + "learning_rate": 5e-06, + "loss": 0.1809, + "num_input_tokens_seen": 326520084, + "step": 1901 + }, + { + "epoch": 0.5000328795949234, + "loss": 0.24568378925323486, + "loss_ce": 0.0030690436251461506, + "loss_iou": 0.41796875, + "loss_num": 0.048583984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 326520084, + "step": 1901 + }, + { + "epoch": 0.5002959163543105, + "grad_norm": 15.380203467807723, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 326691988, + "step": 1902 + }, + { + "epoch": 0.5002959163543105, + "loss": 0.1489952951669693, + "loss_ce": 0.00034418603172525764, + "loss_iou": 0.53125, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 326691988, + "step": 1902 + }, + { + "epoch": 0.5005589531136977, + "grad_norm": 6.172818867405067, + "learning_rate": 5e-06, + "loss": 0.1834, + "num_input_tokens_seen": 326864500, + "step": 1903 + }, + { + "epoch": 0.5005589531136977, + "loss": 0.08618461340665817, + "loss_ce": 0.0021086866036057472, + "loss_iou": 0.54296875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 326864500, + "step": 1903 + }, + { + "epoch": 0.5008219898730848, + "grad_norm": 6.280435924544313, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 327036512, + "step": 1904 + }, + { + "epoch": 0.5008219898730848, + "loss": 0.16495370864868164, + "loss_ce": 0.0016236326191574335, + "loss_iou": 0.58984375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 327036512, + "step": 1904 + }, + { + "epoch": 0.5010850266324719, + "grad_norm": 5.592537029701005, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 327208668, + "step": 1905 + }, + { + "epoch": 0.5010850266324719, + "loss": 0.09431658685207367, + "loss_ce": 0.0009938328294083476, + "loss_iou": 0.40234375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 327208668, + "step": 1905 + }, + { + "epoch": 0.501348063391859, + "grad_norm": 6.449445852698605, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 327378984, + "step": 1906 + }, + { + "epoch": 0.501348063391859, + "loss": 0.1146991178393364, + "loss_ce": 0.0006091539980843663, + "loss_iou": 0.498046875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 327378984, + "step": 1906 + }, + { + "epoch": 0.5016111001512461, + "grad_norm": 4.708352961414117, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 327551376, + "step": 1907 + }, + { + "epoch": 0.5016111001512461, + "loss": 0.15458180010318756, + "loss_ce": 0.0006816480308771133, + "loss_iou": 0.4140625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 327551376, + "step": 1907 + }, + { + "epoch": 0.5018741369106332, + "grad_norm": 11.511613556933392, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 327724012, + "step": 1908 + }, + { + "epoch": 0.5018741369106332, + "loss": 0.20126180350780487, + "loss_ce": 0.0008528655744157732, + "loss_iou": 0.5703125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 327724012, + "step": 1908 + }, + { + "epoch": 0.5021371736700204, + "grad_norm": 5.154439626897641, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 327896420, + "step": 1909 + }, + { + "epoch": 0.5021371736700204, + "loss": 0.13825711607933044, + "loss_ce": 0.0017519897082820535, + "loss_iou": 0.484375, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 327896420, + "step": 1909 + }, + { + "epoch": 0.5024002104294075, + "grad_norm": 9.76818168107923, + "learning_rate": 5e-06, + "loss": 0.1211, + "num_input_tokens_seen": 328066120, + "step": 1910 + }, + { + "epoch": 0.5024002104294075, + "loss": 0.20892378687858582, + "loss_ce": 0.00096173956990242, + "loss_iou": 0.609375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 328066120, + "step": 1910 + }, + { + "epoch": 0.5026632471887946, + "grad_norm": 6.89128626408132, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 328238388, + "step": 1911 + }, + { + "epoch": 0.5026632471887946, + "loss": 0.19616608321666718, + "loss_ce": 0.001982737798243761, + "loss_iou": 0.4453125, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 328238388, + "step": 1911 + }, + { + "epoch": 0.5029262839481817, + "grad_norm": 4.852078459217102, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 328410228, + "step": 1912 + }, + { + "epoch": 0.5029262839481817, + "loss": 0.13402841985225677, + "loss_ce": 0.0019178204238414764, + "loss_iou": 0.40625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 328410228, + "step": 1912 + }, + { + "epoch": 0.5031893207075688, + "grad_norm": 8.495391184677569, + "learning_rate": 5e-06, + "loss": 0.1507, + "num_input_tokens_seen": 328582660, + "step": 1913 + }, + { + "epoch": 0.5031893207075688, + "loss": 0.14262062311172485, + "loss_ce": 0.004253931809216738, + "loss_iou": 0.5546875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 328582660, + "step": 1913 + }, + { + "epoch": 0.503452357466956, + "grad_norm": 7.2140273397639385, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 328754772, + "step": 1914 + }, + { + "epoch": 0.503452357466956, + "loss": 0.09518692642450333, + "loss_ce": 0.003878333605825901, + "loss_iou": 0.5859375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 328754772, + "step": 1914 + }, + { + "epoch": 0.5037153942263431, + "grad_norm": 4.268432766820409, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 328926900, + "step": 1915 + }, + { + "epoch": 0.5037153942263431, + "loss": 0.11751651018857956, + "loss_ce": 0.0052423360757529736, + "loss_iou": 0.5234375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 328926900, + "step": 1915 + }, + { + "epoch": 0.5039784309857303, + "grad_norm": 3.9588210706780522, + "learning_rate": 5e-06, + "loss": 0.1548, + "num_input_tokens_seen": 329096944, + "step": 1916 + }, + { + "epoch": 0.5039784309857303, + "loss": 0.1656898707151413, + "loss_ce": 0.005258964374661446, + "loss_iou": 0.4453125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 329096944, + "step": 1916 + }, + { + "epoch": 0.5042414677451174, + "grad_norm": 5.63809501756183, + "learning_rate": 5e-06, + "loss": 0.0966, + "num_input_tokens_seen": 329268892, + "step": 1917 + }, + { + "epoch": 0.5042414677451174, + "loss": 0.07544635236263275, + "loss_ce": 0.000571468030102551, + "loss_iou": 0.373046875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 329268892, + "step": 1917 + }, + { + "epoch": 0.5045045045045045, + "grad_norm": 7.590332140913489, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 329441344, + "step": 1918 + }, + { + "epoch": 0.5045045045045045, + "loss": 0.15229275822639465, + "loss_ce": 0.006144077517092228, + "loss_iou": 0.52734375, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 329441344, + "step": 1918 + }, + { + "epoch": 0.5047675412638917, + "grad_norm": 9.59054902064166, + "learning_rate": 5e-06, + "loss": 0.1483, + "num_input_tokens_seen": 329613564, + "step": 1919 + }, + { + "epoch": 0.5047675412638917, + "loss": 0.08887225389480591, + "loss_ce": 0.001500429236330092, + "loss_iou": 0.515625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 329613564, + "step": 1919 + }, + { + "epoch": 0.5050305780232788, + "grad_norm": 4.210072070601249, + "learning_rate": 5e-06, + "loss": 0.1766, + "num_input_tokens_seen": 329785680, + "step": 1920 + }, + { + "epoch": 0.5050305780232788, + "loss": 0.2718814015388489, + "loss_ce": 0.002228060271590948, + "loss_iou": 0.3359375, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 329785680, + "step": 1920 + }, + { + "epoch": 0.5052936147826659, + "grad_norm": 8.380948003304292, + "learning_rate": 5e-06, + "loss": 0.1587, + "num_input_tokens_seen": 329957636, + "step": 1921 + }, + { + "epoch": 0.5052936147826659, + "loss": 0.1278029978275299, + "loss_ce": 0.00286403251811862, + "loss_iou": 0.56640625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 329957636, + "step": 1921 + }, + { + "epoch": 0.505556651542053, + "grad_norm": 7.7625200359697715, + "learning_rate": 5e-06, + "loss": 0.1491, + "num_input_tokens_seen": 330129632, + "step": 1922 + }, + { + "epoch": 0.505556651542053, + "loss": 0.11747082322835922, + "loss_ce": 0.004799924790859222, + "loss_iou": 0.5859375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 330129632, + "step": 1922 + }, + { + "epoch": 0.5058196883014401, + "grad_norm": 6.9606844299684205, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 330301512, + "step": 1923 + }, + { + "epoch": 0.5058196883014401, + "loss": 0.09333023428916931, + "loss_ce": 0.0011671524262055755, + "loss_iou": 0.470703125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 330301512, + "step": 1923 + }, + { + "epoch": 0.5060827250608273, + "grad_norm": 13.729941886848659, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 330473972, + "step": 1924 + }, + { + "epoch": 0.5060827250608273, + "loss": 0.09531684219837189, + "loss_ce": 0.000803898845333606, + "loss_iou": 0.390625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 330473972, + "step": 1924 + }, + { + "epoch": 0.5063457618202144, + "grad_norm": 6.989119579903773, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 330644424, + "step": 1925 + }, + { + "epoch": 0.5063457618202144, + "loss": 0.17603036761283875, + "loss_ce": 0.0006458393763750792, + "loss_iou": 0.451171875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 330644424, + "step": 1925 + }, + { + "epoch": 0.5066087985796015, + "grad_norm": 33.383727313004705, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 330816512, + "step": 1926 + }, + { + "epoch": 0.5066087985796015, + "loss": 0.07775846868753433, + "loss_ce": 0.0003048558428417891, + "loss_iou": 0.330078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 330816512, + "step": 1926 + }, + { + "epoch": 0.5068718353389886, + "grad_norm": 7.427483033873037, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 330988952, + "step": 1927 + }, + { + "epoch": 0.5068718353389886, + "loss": 0.16052240133285522, + "loss_ce": 0.000579762679990381, + "loss_iou": 0.35546875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 330988952, + "step": 1927 + }, + { + "epoch": 0.5071348720983757, + "grad_norm": 8.482091734395144, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 331161228, + "step": 1928 + }, + { + "epoch": 0.5071348720983757, + "loss": 0.14751845598220825, + "loss_ce": 0.000576329359319061, + "loss_iou": 0.72265625, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 331161228, + "step": 1928 + }, + { + "epoch": 0.5073979088577629, + "grad_norm": 6.087066534105735, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 331329832, + "step": 1929 + }, + { + "epoch": 0.5073979088577629, + "loss": 0.09639698266983032, + "loss_ce": 0.002372324001044035, + "loss_iou": 0.4765625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 331329832, + "step": 1929 + }, + { + "epoch": 0.50766094561715, + "grad_norm": 6.499679773970415, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 331502148, + "step": 1930 + }, + { + "epoch": 0.50766094561715, + "loss": 0.11911525577306747, + "loss_ce": 0.0002492967469152063, + "loss_iou": 0.61328125, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 331502148, + "step": 1930 + }, + { + "epoch": 0.5079239823765371, + "grad_norm": 16.477307771999516, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 331674540, + "step": 1931 + }, + { + "epoch": 0.5079239823765371, + "loss": 0.14295902848243713, + "loss_ce": 0.0038904245011508465, + "loss_iou": 0.341796875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 331674540, + "step": 1931 + }, + { + "epoch": 0.5081870191359242, + "grad_norm": 4.977000748222245, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 331846988, + "step": 1932 + }, + { + "epoch": 0.5081870191359242, + "loss": 0.1609978973865509, + "loss_ce": 0.00042966773617081344, + "loss_iou": 0.66015625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 331846988, + "step": 1932 + }, + { + "epoch": 0.5084500558953113, + "grad_norm": 4.68697406244425, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 332019216, + "step": 1933 + }, + { + "epoch": 0.5084500558953113, + "loss": 0.12378443777561188, + "loss_ce": 0.0009511768585070968, + "loss_iou": 0.5, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 332019216, + "step": 1933 + }, + { + "epoch": 0.5087130926546984, + "grad_norm": 5.061790101979302, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 332191420, + "step": 1934 + }, + { + "epoch": 0.5087130926546984, + "loss": 0.09300635755062103, + "loss_ce": 0.0008737844182178378, + "loss_iou": 0.51171875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 332191420, + "step": 1934 + }, + { + "epoch": 0.5089761294140857, + "grad_norm": 18.6525779104533, + "learning_rate": 5e-06, + "loss": 0.1468, + "num_input_tokens_seen": 332363456, + "step": 1935 + }, + { + "epoch": 0.5089761294140857, + "loss": 0.13239170610904694, + "loss_ce": 0.0003726637805812061, + "loss_iou": 0.42578125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 332363456, + "step": 1935 + }, + { + "epoch": 0.5092391661734728, + "grad_norm": 16.50490516998537, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 332535912, + "step": 1936 + }, + { + "epoch": 0.5092391661734728, + "loss": 0.08137423545122147, + "loss_ce": 0.0015249941498041153, + "loss_iou": 0.423828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 332535912, + "step": 1936 + }, + { + "epoch": 0.5095022029328599, + "grad_norm": 7.868678191164257, + "learning_rate": 5e-06, + "loss": 0.1762, + "num_input_tokens_seen": 332708064, + "step": 1937 + }, + { + "epoch": 0.5095022029328599, + "loss": 0.2602325677871704, + "loss_ce": 0.005715976003557444, + "loss_iou": 0.470703125, + "loss_num": 0.051025390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 332708064, + "step": 1937 + }, + { + "epoch": 0.509765239692247, + "grad_norm": 4.650960242488344, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 332880260, + "step": 1938 + }, + { + "epoch": 0.509765239692247, + "loss": 0.08695125579833984, + "loss_ce": 0.0018987648654729128, + "loss_iou": 0.7578125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 332880260, + "step": 1938 + }, + { + "epoch": 0.5100282764516341, + "grad_norm": 5.979900826127572, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 333052492, + "step": 1939 + }, + { + "epoch": 0.5100282764516341, + "loss": 0.11689235270023346, + "loss_ce": 0.0012612489517778158, + "loss_iou": 0.455078125, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 333052492, + "step": 1939 + }, + { + "epoch": 0.5102913132110213, + "grad_norm": 15.330918281866088, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 333222132, + "step": 1940 + }, + { + "epoch": 0.5102913132110213, + "loss": 0.16124433279037476, + "loss_ce": 0.0012101602042093873, + "loss_iou": 0.58203125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 333222132, + "step": 1940 + }, + { + "epoch": 0.5105543499704084, + "grad_norm": 5.298876176442514, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 333394244, + "step": 1941 + }, + { + "epoch": 0.5105543499704084, + "loss": 0.09125322848558426, + "loss_ce": 0.0003718816442415118, + "loss_iou": 0.6328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 333394244, + "step": 1941 + }, + { + "epoch": 0.5108173867297955, + "grad_norm": 25.903681591063247, + "learning_rate": 5e-06, + "loss": 0.0986, + "num_input_tokens_seen": 333564616, + "step": 1942 + }, + { + "epoch": 0.5108173867297955, + "loss": 0.0830872505903244, + "loss_ce": 0.003009122796356678, + "loss_iou": 0.56640625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 333564616, + "step": 1942 + }, + { + "epoch": 0.5110804234891826, + "grad_norm": 7.412064312390374, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 333736828, + "step": 1943 + }, + { + "epoch": 0.5110804234891826, + "loss": 0.13648918271064758, + "loss_ce": 0.0012352685444056988, + "loss_iou": 0.49609375, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 333736828, + "step": 1943 + }, + { + "epoch": 0.5113434602485697, + "grad_norm": 5.823332421421152, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 333909000, + "step": 1944 + }, + { + "epoch": 0.5113434602485697, + "loss": 0.08375194668769836, + "loss_ce": 0.00024059813586063683, + "loss_iou": 0.55078125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 333909000, + "step": 1944 + }, + { + "epoch": 0.5116064970079569, + "grad_norm": 5.7512491483697685, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 334081116, + "step": 1945 + }, + { + "epoch": 0.5116064970079569, + "loss": 0.11631600558757782, + "loss_ce": 0.0020276757422834635, + "loss_iou": 0.390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 334081116, + "step": 1945 + }, + { + "epoch": 0.511869533767344, + "grad_norm": 6.061348306489617, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 334252872, + "step": 1946 + }, + { + "epoch": 0.511869533767344, + "loss": 0.1273353099822998, + "loss_ce": 0.0007484056986868382, + "loss_iou": 0.671875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 334252872, + "step": 1946 + }, + { + "epoch": 0.5121325705267311, + "grad_norm": 4.656625920812538, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 334421696, + "step": 1947 + }, + { + "epoch": 0.5121325705267311, + "loss": 0.07545529305934906, + "loss_ce": 0.002487759804353118, + "loss_iou": 0.5390625, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 334421696, + "step": 1947 + }, + { + "epoch": 0.5123956072861182, + "grad_norm": 11.2997609374861, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 334593968, + "step": 1948 + }, + { + "epoch": 0.5123956072861182, + "loss": 0.15095758438110352, + "loss_ce": 0.00047542020911350846, + "loss_iou": 0.65625, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 334593968, + "step": 1948 + }, + { + "epoch": 0.5126586440455053, + "grad_norm": 4.740639931376309, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 334766256, + "step": 1949 + }, + { + "epoch": 0.5126586440455053, + "loss": 0.0767994076013565, + "loss_ce": 0.00020028470316901803, + "loss_iou": 0.49609375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 334766256, + "step": 1949 + }, + { + "epoch": 0.5129216808048925, + "grad_norm": 15.829229511064487, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 334938720, + "step": 1950 + }, + { + "epoch": 0.5129216808048925, + "loss": 0.10283501446247101, + "loss_ce": 0.003225642256438732, + "loss_iou": 0.57421875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 334938720, + "step": 1950 + }, + { + "epoch": 0.5131847175642796, + "grad_norm": 4.547176342272478, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 335108948, + "step": 1951 + }, + { + "epoch": 0.5131847175642796, + "loss": 0.07713186740875244, + "loss_ce": 0.00047171738697215915, + "loss_iou": 0.53515625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 335108948, + "step": 1951 + }, + { + "epoch": 0.5134477543236667, + "grad_norm": 13.66162052286571, + "learning_rate": 5e-06, + "loss": 0.1628, + "num_input_tokens_seen": 335281148, + "step": 1952 + }, + { + "epoch": 0.5134477543236667, + "loss": 0.2089971899986267, + "loss_ce": 0.0048346007242798805, + "loss_iou": 0.390625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 335281148, + "step": 1952 + }, + { + "epoch": 0.5137107910830538, + "grad_norm": 4.53549366428838, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 335453172, + "step": 1953 + }, + { + "epoch": 0.5137107910830538, + "loss": 0.0756625235080719, + "loss_ce": 0.0019930913113057613, + "loss_iou": 0.5078125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 335453172, + "step": 1953 + }, + { + "epoch": 0.513973827842441, + "grad_norm": 5.655145923562407, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 335623636, + "step": 1954 + }, + { + "epoch": 0.513973827842441, + "loss": 0.14977125823497772, + "loss_ce": 0.0032868883572518826, + "loss_iou": NaN, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 335623636, + "step": 1954 + }, + { + "epoch": 0.5142368646018282, + "grad_norm": 9.198223458900488, + "learning_rate": 5e-06, + "loss": 0.158, + "num_input_tokens_seen": 335793500, + "step": 1955 + }, + { + "epoch": 0.5142368646018282, + "loss": 0.13681824505329132, + "loss_ce": 0.0008624300826340914, + "loss_iou": 0.61328125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 335793500, + "step": 1955 + }, + { + "epoch": 0.5144999013612153, + "grad_norm": 14.576489713233544, + "learning_rate": 5e-06, + "loss": 0.1376, + "num_input_tokens_seen": 335965788, + "step": 1956 + }, + { + "epoch": 0.5144999013612153, + "loss": 0.14106330275535583, + "loss_ce": 0.0012928071664646268, + "loss_iou": 0.56640625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 335965788, + "step": 1956 + }, + { + "epoch": 0.5147629381206024, + "grad_norm": 12.321724088847931, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 336138060, + "step": 1957 + }, + { + "epoch": 0.5147629381206024, + "loss": 0.1251417100429535, + "loss_ce": 0.0011182638118043542, + "loss_iou": 0.53515625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 336138060, + "step": 1957 + }, + { + "epoch": 0.5150259748799895, + "grad_norm": 8.213839245926183, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 336308692, + "step": 1958 + }, + { + "epoch": 0.5150259748799895, + "loss": 0.06178643926978111, + "loss_ce": 0.00011041228572139516, + "loss_iou": 0.62890625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 336308692, + "step": 1958 + }, + { + "epoch": 0.5152890116393766, + "grad_norm": 4.514500271556744, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 336481248, + "step": 1959 + }, + { + "epoch": 0.5152890116393766, + "loss": 0.15207350254058838, + "loss_ce": 0.0027662513311952353, + "loss_iou": 0.4453125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 336481248, + "step": 1959 + }, + { + "epoch": 0.5155520483987637, + "grad_norm": 3.270020202020867, + "learning_rate": 5e-06, + "loss": 0.0831, + "num_input_tokens_seen": 336653556, + "step": 1960 + }, + { + "epoch": 0.5155520483987637, + "loss": 0.07474862039089203, + "loss_ce": 0.00031624053372070193, + "loss_iou": 0.57421875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 336653556, + "step": 1960 + }, + { + "epoch": 0.5158150851581509, + "grad_norm": 5.036795139632983, + "learning_rate": 5e-06, + "loss": 0.1524, + "num_input_tokens_seen": 336825632, + "step": 1961 + }, + { + "epoch": 0.5158150851581509, + "loss": 0.0949145182967186, + "loss_ce": 0.004765587393194437, + "loss_iou": 0.52734375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 336825632, + "step": 1961 + }, + { + "epoch": 0.516078121917538, + "grad_norm": 5.18788380960706, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 336997908, + "step": 1962 + }, + { + "epoch": 0.516078121917538, + "loss": 0.1347174346446991, + "loss_ce": 0.0003027569910045713, + "loss_iou": 0.38671875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 336997908, + "step": 1962 + }, + { + "epoch": 0.5163411586769251, + "grad_norm": 19.722687183501755, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 337170032, + "step": 1963 + }, + { + "epoch": 0.5163411586769251, + "loss": 0.15522822737693787, + "loss_ce": 0.004196731373667717, + "loss_iou": 0.490234375, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 337170032, + "step": 1963 + }, + { + "epoch": 0.5166041954363122, + "grad_norm": 5.156355195655944, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 337342000, + "step": 1964 + }, + { + "epoch": 0.5166041954363122, + "loss": 0.0796535462141037, + "loss_ce": 0.0009487088536843657, + "loss_iou": 0.66796875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 337342000, + "step": 1964 + }, + { + "epoch": 0.5168672321956993, + "grad_norm": 4.599075113473689, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 337514164, + "step": 1965 + }, + { + "epoch": 0.5168672321956993, + "loss": 0.21942217648029327, + "loss_ce": 0.0008857909124344587, + "loss_iou": 0.5546875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 337514164, + "step": 1965 + }, + { + "epoch": 0.5171302689550865, + "grad_norm": 8.512554957251284, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 337686236, + "step": 1966 + }, + { + "epoch": 0.5171302689550865, + "loss": 0.08479119837284088, + "loss_ce": 0.001386661664582789, + "loss_iou": 0.5234375, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 337686236, + "step": 1966 + }, + { + "epoch": 0.5173933057144736, + "grad_norm": 5.381969767631673, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 337858240, + "step": 1967 + }, + { + "epoch": 0.5173933057144736, + "loss": 0.15276584029197693, + "loss_ce": 0.004938698373734951, + "loss_iou": 0.52734375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 337858240, + "step": 1967 + }, + { + "epoch": 0.5176563424738607, + "grad_norm": 12.700271834998917, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 338030560, + "step": 1968 + }, + { + "epoch": 0.5176563424738607, + "loss": 0.15190255641937256, + "loss_ce": 0.0028852252289652824, + "loss_iou": 0.494140625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 338030560, + "step": 1968 + }, + { + "epoch": 0.5179193792332478, + "grad_norm": 21.983637705034123, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 338202560, + "step": 1969 + }, + { + "epoch": 0.5179193792332478, + "loss": 0.0981290340423584, + "loss_ce": 0.0023496169596910477, + "loss_iou": 0.369140625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 338202560, + "step": 1969 + }, + { + "epoch": 0.5181824159926349, + "grad_norm": 10.469128023533157, + "learning_rate": 5e-06, + "loss": 0.1446, + "num_input_tokens_seen": 338374836, + "step": 1970 + }, + { + "epoch": 0.5181824159926349, + "loss": 0.16515450179576874, + "loss_ce": 0.0016413143603131175, + "loss_iou": 0.51953125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 338374836, + "step": 1970 + }, + { + "epoch": 0.5184454527520221, + "grad_norm": 5.186485351415196, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 338546996, + "step": 1971 + }, + { + "epoch": 0.5184454527520221, + "loss": 0.24401625990867615, + "loss_ce": 0.004056546837091446, + "loss_iou": 0.42578125, + "loss_num": 0.0478515625, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 338546996, + "step": 1971 + }, + { + "epoch": 0.5187084895114092, + "grad_norm": 6.802546863240928, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 338716756, + "step": 1972 + }, + { + "epoch": 0.5187084895114092, + "loss": 0.0805911123752594, + "loss_ce": 0.0030764644034206867, + "loss_iou": 0.43359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 338716756, + "step": 1972 + }, + { + "epoch": 0.5189715262707963, + "grad_norm": 11.859766109061349, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 338886988, + "step": 1973 + }, + { + "epoch": 0.5189715262707963, + "loss": 0.10678447037935257, + "loss_ce": 0.0013767611235380173, + "loss_iou": 0.50390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 338886988, + "step": 1973 + }, + { + "epoch": 0.5192345630301834, + "grad_norm": 7.006309240426411, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 339059260, + "step": 1974 + }, + { + "epoch": 0.5192345630301834, + "loss": 0.1808294951915741, + "loss_ce": 0.00034853501711040735, + "loss_iou": 0.6796875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 339059260, + "step": 1974 + }, + { + "epoch": 0.5194975997895706, + "grad_norm": 21.985452701611596, + "learning_rate": 5e-06, + "loss": 0.1751, + "num_input_tokens_seen": 339231652, + "step": 1975 + }, + { + "epoch": 0.5194975997895706, + "loss": 0.16675767302513123, + "loss_ce": 0.0018406773451715708, + "loss_iou": 0.37109375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 339231652, + "step": 1975 + }, + { + "epoch": 0.5197606365489578, + "grad_norm": 6.071372766680396, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 339403696, + "step": 1976 + }, + { + "epoch": 0.5197606365489578, + "loss": 0.08192337304353714, + "loss_ce": 0.0025166317354887724, + "loss_iou": 0.48046875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 339403696, + "step": 1976 + }, + { + "epoch": 0.5200236733083449, + "grad_norm": 7.303457699958576, + "learning_rate": 5e-06, + "loss": 0.0961, + "num_input_tokens_seen": 339576232, + "step": 1977 + }, + { + "epoch": 0.5200236733083449, + "loss": 0.06348910182714462, + "loss_ce": 0.0037356873508542776, + "loss_iou": 0.455078125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 339576232, + "step": 1977 + }, + { + "epoch": 0.520286710067732, + "grad_norm": 4.058844588183022, + "learning_rate": 5e-06, + "loss": 0.0786, + "num_input_tokens_seen": 339748468, + "step": 1978 + }, + { + "epoch": 0.520286710067732, + "loss": 0.07935698330402374, + "loss_ce": 0.00019439200696069747, + "loss_iou": 0.5234375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 339748468, + "step": 1978 + }, + { + "epoch": 0.5205497468271191, + "grad_norm": 9.520705989391612, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 339920428, + "step": 1979 + }, + { + "epoch": 0.5205497468271191, + "loss": 0.10209144651889801, + "loss_ce": 0.002482067793607712, + "loss_iou": 0.546875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 339920428, + "step": 1979 + }, + { + "epoch": 0.5208127835865062, + "grad_norm": 22.26578813252007, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 340092516, + "step": 1980 + }, + { + "epoch": 0.5208127835865062, + "loss": 0.19446319341659546, + "loss_ce": 0.006719036493450403, + "loss_iou": 0.5703125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 340092516, + "step": 1980 + }, + { + "epoch": 0.5210758203458934, + "grad_norm": 9.547756324115959, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 340265052, + "step": 1981 + }, + { + "epoch": 0.5210758203458934, + "loss": 0.14365576207637787, + "loss_ce": 0.0007572055910713971, + "loss_iou": 0.60546875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 340265052, + "step": 1981 + }, + { + "epoch": 0.5213388571052805, + "grad_norm": 7.840208454239884, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 340436896, + "step": 1982 + }, + { + "epoch": 0.5213388571052805, + "loss": 0.11067777872085571, + "loss_ce": 0.0016995080513879657, + "loss_iou": 0.55859375, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 340436896, + "step": 1982 + }, + { + "epoch": 0.5216018938646676, + "grad_norm": 24.43574592347015, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 340609344, + "step": 1983 + }, + { + "epoch": 0.5216018938646676, + "loss": 0.10227973759174347, + "loss_ce": 0.0038605397567152977, + "loss_iou": 0.36328125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 340609344, + "step": 1983 + }, + { + "epoch": 0.5218649306240547, + "grad_norm": 6.254699637052052, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 340779700, + "step": 1984 + }, + { + "epoch": 0.5218649306240547, + "loss": 0.1373625099658966, + "loss_ce": 0.0002012598270084709, + "loss_iou": 0.5234375, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 340779700, + "step": 1984 + }, + { + "epoch": 0.5221279673834418, + "grad_norm": 4.970529369812158, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 340952012, + "step": 1985 + }, + { + "epoch": 0.5221279673834418, + "loss": 0.07744970917701721, + "loss_ce": 0.00027074594981968403, + "loss_iou": 0.625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 340952012, + "step": 1985 + }, + { + "epoch": 0.5223910041428289, + "grad_norm": 9.386513844488379, + "learning_rate": 5e-06, + "loss": 0.1525, + "num_input_tokens_seen": 341124172, + "step": 1986 + }, + { + "epoch": 0.5223910041428289, + "loss": 0.16602635383605957, + "loss_ce": 0.0037033448461443186, + "loss_iou": 0.6640625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 341124172, + "step": 1986 + }, + { + "epoch": 0.5226540409022161, + "grad_norm": 75.36425054276123, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 341296144, + "step": 1987 + }, + { + "epoch": 0.5226540409022161, + "loss": 0.08925444632768631, + "loss_ce": 0.0014248627703636885, + "loss_iou": 0.51953125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 341296144, + "step": 1987 + }, + { + "epoch": 0.5229170776616032, + "grad_norm": 6.0244434485136455, + "learning_rate": 5e-06, + "loss": 0.1624, + "num_input_tokens_seen": 341468128, + "step": 1988 + }, + { + "epoch": 0.5229170776616032, + "loss": 0.18709853291511536, + "loss_ce": 0.0007887266110628843, + "loss_iou": 0.65234375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 341468128, + "step": 1988 + }, + { + "epoch": 0.5231801144209903, + "grad_norm": 6.46446584669901, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 341640532, + "step": 1989 + }, + { + "epoch": 0.5231801144209903, + "loss": 0.13015246391296387, + "loss_ce": 0.00030016410164535046, + "loss_iou": 0.484375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 341640532, + "step": 1989 + }, + { + "epoch": 0.5234431511803774, + "grad_norm": 12.345485884753447, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 341812700, + "step": 1990 + }, + { + "epoch": 0.5234431511803774, + "loss": 0.10668568313121796, + "loss_ce": 0.00027087965281680226, + "loss_iou": 0.55078125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 341812700, + "step": 1990 + }, + { + "epoch": 0.5237061879397645, + "grad_norm": 3.3770749440530894, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 341984700, + "step": 1991 + }, + { + "epoch": 0.5237061879397645, + "loss": 0.1165972501039505, + "loss_ce": 0.0006304577691480517, + "loss_iou": 0.431640625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 341984700, + "step": 1991 + }, + { + "epoch": 0.5239692246991517, + "grad_norm": 9.677414053666478, + "learning_rate": 5e-06, + "loss": 0.1048, + "num_input_tokens_seen": 342156884, + "step": 1992 + }, + { + "epoch": 0.5239692246991517, + "loss": 0.1423652172088623, + "loss_ce": 0.0010383009212091565, + "loss_iou": 0.58203125, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 342156884, + "step": 1992 + }, + { + "epoch": 0.5242322614585389, + "grad_norm": 3.6750881151911643, + "learning_rate": 5e-06, + "loss": 0.1894, + "num_input_tokens_seen": 342329140, + "step": 1993 + }, + { + "epoch": 0.5242322614585389, + "loss": 0.18173760175704956, + "loss_ce": 0.009221725165843964, + "loss_iou": 0.46484375, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 342329140, + "step": 1993 + }, + { + "epoch": 0.524495298217926, + "grad_norm": 19.882541327036233, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 342501472, + "step": 1994 + }, + { + "epoch": 0.524495298217926, + "loss": 0.0589841827750206, + "loss_ce": 0.001641655690036714, + "loss_iou": 0.357421875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 342501472, + "step": 1994 + }, + { + "epoch": 0.5247583349773131, + "grad_norm": 16.32555909861509, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 342673528, + "step": 1995 + }, + { + "epoch": 0.5247583349773131, + "loss": 0.18063244223594666, + "loss_ce": 0.0004871786804869771, + "loss_iou": 0.5546875, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 342673528, + "step": 1995 + }, + { + "epoch": 0.5250213717367002, + "grad_norm": 9.845315436294685, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 342845892, + "step": 1996 + }, + { + "epoch": 0.5250213717367002, + "loss": 0.17416182160377502, + "loss_ce": 0.0006388599867932498, + "loss_iou": 0.46875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 342845892, + "step": 1996 + }, + { + "epoch": 0.5252844084960874, + "grad_norm": 4.531599732042389, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 343018040, + "step": 1997 + }, + { + "epoch": 0.5252844084960874, + "loss": 0.15178070962429047, + "loss_ce": 0.0012069816002622247, + "loss_iou": 0.490234375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 343018040, + "step": 1997 + }, + { + "epoch": 0.5255474452554745, + "grad_norm": 15.562861804573469, + "learning_rate": 5e-06, + "loss": 0.1677, + "num_input_tokens_seen": 343189936, + "step": 1998 + }, + { + "epoch": 0.5255474452554745, + "loss": 0.1337474286556244, + "loss_ce": 0.0038340911269187927, + "loss_iou": 0.37890625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 343189936, + "step": 1998 + }, + { + "epoch": 0.5258104820148616, + "grad_norm": 6.338186207451289, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 343361876, + "step": 1999 + }, + { + "epoch": 0.5258104820148616, + "loss": 0.08655121922492981, + "loss_ce": 0.0022006274666637182, + "loss_iou": 0.5, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 343361876, + "step": 1999 + }, + { + "epoch": 0.5260735187742487, + "grad_norm": 36.314036588641784, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_websight_new_CIoU": 0.8649432361125946, + "eval_websight_new_GIoU": 0.8667054772377014, + "eval_websight_new_IoU": 0.8704231679439545, + "eval_websight_new_MAE_all": 0.021235700696706772, + "eval_websight_new_MAE_h": 0.007067237980663776, + "eval_websight_new_MAE_w": 0.03388772998005152, + "eval_websight_new_MAE_x": 0.03554858546704054, + "eval_websight_new_MAE_y": 0.008439254947006702, + "eval_websight_new_NUM_probability": 0.999984085559845, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.10596824437379837, + "eval_websight_new_loss_ce": 6.8271494910732144e-06, + "eval_websight_new_loss_iou": 0.36328125, + "eval_websight_new_loss_num": 0.018640518188476562, + "eval_websight_new_loss_xval": 0.09316253662109375, + "eval_websight_new_runtime": 54.898, + "eval_websight_new_samples_per_second": 0.911, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_seeclick_CIoU": 0.6263805329799652, + "eval_seeclick_GIoU": 0.627049595117569, + "eval_seeclick_IoU": 0.6531890332698822, + "eval_seeclick_MAE_all": 0.050390077754855156, + "eval_seeclick_MAE_h": 0.02558732032775879, + "eval_seeclick_MAE_w": 0.07135490141808987, + "eval_seeclick_MAE_x": 0.07951905764639378, + "eval_seeclick_MAE_y": 0.025099032558500767, + "eval_seeclick_NUM_probability": 0.9999794960021973, + "eval_seeclick_inside_bbox": 0.921875, + "eval_seeclick_loss": 0.22126971185207367, + "eval_seeclick_loss_ce": 0.008990719448775053, + "eval_seeclick_loss_iou": 0.47021484375, + "eval_seeclick_loss_num": 0.04229736328125, + "eval_seeclick_loss_xval": 0.211334228515625, + "eval_seeclick_runtime": 78.1311, + "eval_seeclick_samples_per_second": 0.55, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_icons_CIoU": 0.8664080798625946, + "eval_icons_GIoU": 0.8631992936134338, + "eval_icons_IoU": 0.8702020049095154, + "eval_icons_MAE_all": 0.0180177534930408, + "eval_icons_MAE_h": 0.016653602942824364, + "eval_icons_MAE_w": 0.018555423244833946, + "eval_icons_MAE_x": 0.01882947515696287, + "eval_icons_MAE_y": 0.018032516352832317, + "eval_icons_NUM_probability": 0.9999754428863525, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.059605177491903305, + "eval_icons_loss_ce": 1.412479377904674e-05, + "eval_icons_loss_iou": 0.5333251953125, + "eval_icons_loss_num": 0.011335372924804688, + "eval_icons_loss_xval": 0.05667877197265625, + "eval_icons_runtime": 80.1161, + "eval_icons_samples_per_second": 0.624, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_screenspot_CIoU": 0.5386523008346558, + "eval_screenspot_GIoU": 0.5300994714101156, + "eval_screenspot_IoU": 0.5833619435628256, + "eval_screenspot_MAE_all": 0.08941345165173213, + "eval_screenspot_MAE_h": 0.04660519336660703, + "eval_screenspot_MAE_w": 0.1580625375111898, + "eval_screenspot_MAE_x": 0.10776859025160472, + "eval_screenspot_MAE_y": 0.04521748423576355, + "eval_screenspot_NUM_probability": 0.9995922644933065, + "eval_screenspot_inside_bbox": 0.850000003973643, + "eval_screenspot_loss": 0.8645088076591492, + "eval_screenspot_loss_ce": 0.5230478445688883, + "eval_screenspot_loss_iou": 0.4464925130208333, + "eval_screenspot_loss_num": 0.06682078043619792, + "eval_screenspot_loss_xval": 0.3342692057291667, + "eval_screenspot_runtime": 148.9183, + "eval_screenspot_samples_per_second": 0.598, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "loss": 0.8609392046928406, + "loss_ce": 0.5118181109428406, + "loss_iou": 0.390625, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5263365555336358, + "grad_norm": 8.429330373903323, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 343704524, + "step": 2001 + }, + { + "epoch": 0.5263365555336358, + "loss": 0.21621274948120117, + "loss_ce": 0.0006976150907576084, + "loss_iou": 0.2578125, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 343704524, + "step": 2001 + }, + { + "epoch": 0.526599592293023, + "grad_norm": 16.618545138422856, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 343876532, + "step": 2002 + }, + { + "epoch": 0.526599592293023, + "loss": 0.14034898579120636, + "loss_ce": 0.0025926402304321527, + "loss_iou": 0.515625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 343876532, + "step": 2002 + }, + { + "epoch": 0.5268626290524101, + "grad_norm": 7.347451448031887, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 344048440, + "step": 2003 + }, + { + "epoch": 0.5268626290524101, + "loss": 0.11765069514513016, + "loss_ce": 0.0015160476323217154, + "loss_iou": 0.376953125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 344048440, + "step": 2003 + }, + { + "epoch": 0.5271256658117972, + "grad_norm": 11.90854860345847, + "learning_rate": 5e-06, + "loss": 0.1657, + "num_input_tokens_seen": 344220524, + "step": 2004 + }, + { + "epoch": 0.5271256658117972, + "loss": 0.09579437971115112, + "loss_ce": 0.0003353926877025515, + "loss_iou": 0.490234375, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 344220524, + "step": 2004 + }, + { + "epoch": 0.5273887025711843, + "grad_norm": 15.406217883818895, + "learning_rate": 5e-06, + "loss": 0.1745, + "num_input_tokens_seen": 344391120, + "step": 2005 + }, + { + "epoch": 0.5273887025711843, + "loss": 0.08335284888744354, + "loss_ce": 0.005258369259536266, + "loss_iou": 0.419921875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 344391120, + "step": 2005 + }, + { + "epoch": 0.5276517393305714, + "grad_norm": 6.625644981750321, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 344563368, + "step": 2006 + }, + { + "epoch": 0.5276517393305714, + "loss": 0.1281585693359375, + "loss_ce": 0.0008697626180946827, + "loss_iou": 0.326171875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 344563368, + "step": 2006 + }, + { + "epoch": 0.5279147760899586, + "grad_norm": 5.116207808263436, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 344735272, + "step": 2007 + }, + { + "epoch": 0.5279147760899586, + "loss": 0.10428653657436371, + "loss_ce": 0.00014529118197970092, + "loss_iou": 0.373046875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 344735272, + "step": 2007 + }, + { + "epoch": 0.5281778128493457, + "grad_norm": 5.758545412441202, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 344907448, + "step": 2008 + }, + { + "epoch": 0.5281778128493457, + "loss": 0.07528108358383179, + "loss_ce": 0.0017337151803076267, + "loss_iou": 0.59375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 344907448, + "step": 2008 + }, + { + "epoch": 0.5284408496087328, + "grad_norm": 5.079743263348732, + "learning_rate": 5e-06, + "loss": 0.0995, + "num_input_tokens_seen": 345080020, + "step": 2009 + }, + { + "epoch": 0.5284408496087328, + "loss": 0.06423554569482803, + "loss_ce": 0.0006674337200820446, + "loss_iou": 0.5859375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 345080020, + "step": 2009 + }, + { + "epoch": 0.5287038863681199, + "grad_norm": 6.791724647885975, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 345251684, + "step": 2010 + }, + { + "epoch": 0.5287038863681199, + "loss": 0.13502314686775208, + "loss_ce": 0.0006847689510323107, + "loss_iou": 0.484375, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 345251684, + "step": 2010 + }, + { + "epoch": 0.528966923127507, + "grad_norm": 7.5731259754899245, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 345421972, + "step": 2011 + }, + { + "epoch": 0.528966923127507, + "loss": 0.10425636172294617, + "loss_ce": 0.0018088552169501781, + "loss_iou": 0.357421875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 345421972, + "step": 2011 + }, + { + "epoch": 0.5292299598868941, + "grad_norm": 4.024834656974236, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 345594140, + "step": 2012 + }, + { + "epoch": 0.5292299598868941, + "loss": 0.05266657471656799, + "loss_ce": 0.00011530861956998706, + "loss_iou": 0.53125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 345594140, + "step": 2012 + }, + { + "epoch": 0.5294929966462814, + "grad_norm": 4.913874816205936, + "learning_rate": 5e-06, + "loss": 0.1334, + "num_input_tokens_seen": 345766220, + "step": 2013 + }, + { + "epoch": 0.5294929966462814, + "loss": 0.08730873465538025, + "loss_ce": 0.0039957487024366856, + "loss_iou": 0.55859375, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 345766220, + "step": 2013 + }, + { + "epoch": 0.5297560334056685, + "grad_norm": 5.534989831922425, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 345938392, + "step": 2014 + }, + { + "epoch": 0.5297560334056685, + "loss": 0.11161148548126221, + "loss_ce": 0.0032130456529557705, + "loss_iou": 0.63671875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 345938392, + "step": 2014 + }, + { + "epoch": 0.5300190701650556, + "grad_norm": 7.209865250975092, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 346110656, + "step": 2015 + }, + { + "epoch": 0.5300190701650556, + "loss": 0.12729424238204956, + "loss_ce": 0.0035759946331381798, + "loss_iou": 0.625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 346110656, + "step": 2015 + }, + { + "epoch": 0.5302821069244427, + "grad_norm": 5.146240130208517, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 346283032, + "step": 2016 + }, + { + "epoch": 0.5302821069244427, + "loss": 0.12445038557052612, + "loss_ce": 0.0015866123139858246, + "loss_iou": 0.55078125, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 346283032, + "step": 2016 + }, + { + "epoch": 0.5305451436838298, + "grad_norm": 4.097776262787885, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 346455396, + "step": 2017 + }, + { + "epoch": 0.5305451436838298, + "loss": 0.08194537460803986, + "loss_ce": 0.0027369949966669083, + "loss_iou": 0.46875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 346455396, + "step": 2017 + }, + { + "epoch": 0.530808180443217, + "grad_norm": 8.807636765638556, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 346627308, + "step": 2018 + }, + { + "epoch": 0.530808180443217, + "loss": 0.16153287887573242, + "loss_ce": 0.0020022375974804163, + "loss_iou": 0.37109375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 346627308, + "step": 2018 + }, + { + "epoch": 0.5310712172026041, + "grad_norm": 10.071725544370869, + "learning_rate": 5e-06, + "loss": 0.1096, + "num_input_tokens_seen": 346799504, + "step": 2019 + }, + { + "epoch": 0.5310712172026041, + "loss": 0.10446594655513763, + "loss_ce": 0.0008282391354441643, + "loss_iou": 0.4765625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 346799504, + "step": 2019 + }, + { + "epoch": 0.5313342539619912, + "grad_norm": 5.791284152439371, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 346971328, + "step": 2020 + }, + { + "epoch": 0.5313342539619912, + "loss": 0.21368807554244995, + "loss_ce": 0.002475916873663664, + "loss_iou": 0.578125, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 346971328, + "step": 2020 + }, + { + "epoch": 0.5315972907213783, + "grad_norm": 9.778332313218632, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 347139872, + "step": 2021 + }, + { + "epoch": 0.5315972907213783, + "loss": 0.2882145047187805, + "loss_ce": 0.0021427052561193705, + "loss_iou": 0.5078125, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 347139872, + "step": 2021 + }, + { + "epoch": 0.5318603274807654, + "grad_norm": 7.217119694281503, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 347312176, + "step": 2022 + }, + { + "epoch": 0.5318603274807654, + "loss": 0.14617526531219482, + "loss_ce": 0.0012472879607230425, + "loss_iou": 0.5546875, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 347312176, + "step": 2022 + }, + { + "epoch": 0.5321233642401526, + "grad_norm": 5.444211224879485, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 347483864, + "step": 2023 + }, + { + "epoch": 0.5321233642401526, + "loss": 0.15533965826034546, + "loss_ce": 0.0009817371610552073, + "loss_iou": 0.47265625, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 347483864, + "step": 2023 + }, + { + "epoch": 0.5323864009995397, + "grad_norm": 5.88671307787278, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 347654444, + "step": 2024 + }, + { + "epoch": 0.5323864009995397, + "loss": 0.10597267001867294, + "loss_ce": 0.0017551433993503451, + "loss_iou": 0.6484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 347654444, + "step": 2024 + }, + { + "epoch": 0.5326494377589268, + "grad_norm": 4.540449525857699, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 347826732, + "step": 2025 + }, + { + "epoch": 0.5326494377589268, + "loss": 0.10671254992485046, + "loss_ce": 0.002159323776140809, + "loss_iou": 0.51171875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 347826732, + "step": 2025 + }, + { + "epoch": 0.5329124745183139, + "grad_norm": 12.403111361726802, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 347995312, + "step": 2026 + }, + { + "epoch": 0.5329124745183139, + "loss": 0.1710810512304306, + "loss_ce": 0.00019787647761404514, + "loss_iou": 0.396484375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 347995312, + "step": 2026 + }, + { + "epoch": 0.533175511277701, + "grad_norm": 4.826031974791996, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 348167268, + "step": 2027 + }, + { + "epoch": 0.533175511277701, + "loss": 0.14301443099975586, + "loss_ce": 0.004235736560076475, + "loss_iou": 0.48046875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 348167268, + "step": 2027 + }, + { + "epoch": 0.5334385480370882, + "grad_norm": 5.858834044093049, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 348339324, + "step": 2028 + }, + { + "epoch": 0.5334385480370882, + "loss": 0.13567912578582764, + "loss_ce": 0.0013865029904991388, + "loss_iou": 0.5078125, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 348339324, + "step": 2028 + }, + { + "epoch": 0.5337015847964753, + "grad_norm": 18.687645390034255, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 348511688, + "step": 2029 + }, + { + "epoch": 0.5337015847964753, + "loss": 0.09527582675218582, + "loss_ce": 0.0008391792071051896, + "loss_iou": 0.484375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 348511688, + "step": 2029 + }, + { + "epoch": 0.5339646215558624, + "grad_norm": 13.127056473400154, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 348682072, + "step": 2030 + }, + { + "epoch": 0.5339646215558624, + "loss": 0.10760138183832169, + "loss_ce": 0.003078682580962777, + "loss_iou": 0.5078125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 348682072, + "step": 2030 + }, + { + "epoch": 0.5342276583152495, + "grad_norm": 4.612341549056293, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 348854384, + "step": 2031 + }, + { + "epoch": 0.5342276583152495, + "loss": 0.12830308079719543, + "loss_ce": 0.004981548525393009, + "loss_iou": 0.462890625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 348854384, + "step": 2031 + }, + { + "epoch": 0.5344906950746366, + "grad_norm": 8.941916447828152, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 349026540, + "step": 2032 + }, + { + "epoch": 0.5344906950746366, + "loss": 0.11764685809612274, + "loss_ce": 0.0009781570406630635, + "loss_iou": 0.515625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 349026540, + "step": 2032 + }, + { + "epoch": 0.5347537318340239, + "grad_norm": 3.1856540562472104, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 349195704, + "step": 2033 + }, + { + "epoch": 0.5347537318340239, + "loss": 0.15431983768939972, + "loss_ce": 0.001899797236546874, + "loss_iou": 0.455078125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 349195704, + "step": 2033 + }, + { + "epoch": 0.535016768593411, + "grad_norm": 6.665681138831115, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 349367820, + "step": 2034 + }, + { + "epoch": 0.535016768593411, + "loss": 0.17121072113513947, + "loss_ce": 0.001441433560103178, + "loss_iou": 0.455078125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 349367820, + "step": 2034 + }, + { + "epoch": 0.5352798053527981, + "grad_norm": 5.921520862992091, + "learning_rate": 5e-06, + "loss": 0.1264, + "num_input_tokens_seen": 349540064, + "step": 2035 + }, + { + "epoch": 0.5352798053527981, + "loss": 0.102360799908638, + "loss_ce": 0.0002947567554656416, + "loss_iou": 0.39453125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 349540064, + "step": 2035 + }, + { + "epoch": 0.5355428421121852, + "grad_norm": 5.3538979358096785, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 349710408, + "step": 2036 + }, + { + "epoch": 0.5355428421121852, + "loss": 0.06293447315692902, + "loss_ce": 0.0006481010350398719, + "loss_iou": 0.392578125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 349710408, + "step": 2036 + }, + { + "epoch": 0.5358058788715723, + "grad_norm": 11.23909243130062, + "learning_rate": 5e-06, + "loss": 0.1226, + "num_input_tokens_seen": 349880916, + "step": 2037 + }, + { + "epoch": 0.5358058788715723, + "loss": 0.1736185997724533, + "loss_ce": 0.003116899635642767, + "loss_iou": 0.474609375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 349880916, + "step": 2037 + }, + { + "epoch": 0.5360689156309594, + "grad_norm": 11.327258827718145, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 350053024, + "step": 2038 + }, + { + "epoch": 0.5360689156309594, + "loss": 0.14642956852912903, + "loss_ce": 0.0015015878016129136, + "loss_iou": 0.53515625, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 350053024, + "step": 2038 + }, + { + "epoch": 0.5363319523903466, + "grad_norm": 10.572751348187758, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 350225056, + "step": 2039 + }, + { + "epoch": 0.5363319523903466, + "loss": 0.08779959380626678, + "loss_ce": 0.0008397561614401639, + "loss_iou": 0.57421875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 350225056, + "step": 2039 + }, + { + "epoch": 0.5365949891497337, + "grad_norm": 4.830573829484519, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 350395628, + "step": 2040 + }, + { + "epoch": 0.5365949891497337, + "loss": 0.15380313992500305, + "loss_ce": 0.0012152513954788446, + "loss_iou": 0.53515625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 350395628, + "step": 2040 + }, + { + "epoch": 0.5368580259091208, + "grad_norm": 6.867013774487234, + "learning_rate": 5e-06, + "loss": 0.1852, + "num_input_tokens_seen": 350567756, + "step": 2041 + }, + { + "epoch": 0.5368580259091208, + "loss": 0.1892320066690445, + "loss_ce": 0.00420394167304039, + "loss_iou": 0.5859375, + "loss_num": 0.037109375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 350567756, + "step": 2041 + }, + { + "epoch": 0.5371210626685079, + "grad_norm": 9.243159752879412, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 350739840, + "step": 2042 + }, + { + "epoch": 0.5371210626685079, + "loss": 0.1004796102643013, + "loss_ce": 0.00013781688176095486, + "loss_iou": 0.54296875, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 350739840, + "step": 2042 + }, + { + "epoch": 0.537384099427895, + "grad_norm": 11.965655673255789, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 350912084, + "step": 2043 + }, + { + "epoch": 0.537384099427895, + "loss": 0.20747891068458557, + "loss_ce": 0.0016378372674807906, + "loss_iou": 0.34765625, + "loss_num": 0.041015625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 350912084, + "step": 2043 + }, + { + "epoch": 0.5376471361872822, + "grad_norm": 3.1288535063063327, + "learning_rate": 5e-06, + "loss": 0.0913, + "num_input_tokens_seen": 351081480, + "step": 2044 + }, + { + "epoch": 0.5376471361872822, + "loss": 0.1123102456331253, + "loss_ce": 0.001989200245589018, + "loss_iou": 0.48828125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 351081480, + "step": 2044 + }, + { + "epoch": 0.5379101729466693, + "grad_norm": 4.103876123324951, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 351253596, + "step": 2045 + }, + { + "epoch": 0.5379101729466693, + "loss": 0.11436055600643158, + "loss_ce": 0.0011403337121009827, + "loss_iou": 0.4296875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 351253596, + "step": 2045 + }, + { + "epoch": 0.5381732097060564, + "grad_norm": 5.468839567129726, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 351425656, + "step": 2046 + }, + { + "epoch": 0.5381732097060564, + "loss": 0.0928923487663269, + "loss_ce": 0.0006071930401958525, + "loss_iou": 0.5625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 351425656, + "step": 2046 + }, + { + "epoch": 0.5384362464654435, + "grad_norm": 14.247562489878419, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 351597816, + "step": 2047 + }, + { + "epoch": 0.5384362464654435, + "loss": 0.2033635675907135, + "loss_ce": 0.0019475510343909264, + "loss_iou": 0.392578125, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 351597816, + "step": 2047 + }, + { + "epoch": 0.5386992832248306, + "grad_norm": 4.3738903937509885, + "learning_rate": 5e-06, + "loss": 0.1305, + "num_input_tokens_seen": 351769980, + "step": 2048 + }, + { + "epoch": 0.5386992832248306, + "loss": 0.059591565281152725, + "loss_ce": 8.22915681055747e-05, + "loss_iou": 0.578125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 351769980, + "step": 2048 + }, + { + "epoch": 0.5389623199842178, + "grad_norm": 4.415343795853702, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 351939320, + "step": 2049 + }, + { + "epoch": 0.5389623199842178, + "loss": 0.24034851789474487, + "loss_ce": 0.0008465623832307756, + "loss_iou": 0.703125, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 351939320, + "step": 2049 + }, + { + "epoch": 0.5392253567436049, + "grad_norm": 16.797461240634142, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 352111376, + "step": 2050 + }, + { + "epoch": 0.5392253567436049, + "loss": 0.14554640650749207, + "loss_ce": 0.0012287711724638939, + "loss_iou": 0.45703125, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 352111376, + "step": 2050 + }, + { + "epoch": 0.539488393502992, + "grad_norm": 7.010827810107144, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 352283872, + "step": 2051 + }, + { + "epoch": 0.539488393502992, + "loss": 0.1082100197672844, + "loss_ce": 0.0016121190274134278, + "loss_iou": 0.546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 352283872, + "step": 2051 + }, + { + "epoch": 0.5397514302623792, + "grad_norm": 5.086953515153334, + "learning_rate": 5e-06, + "loss": 0.0939, + "num_input_tokens_seen": 352456124, + "step": 2052 + }, + { + "epoch": 0.5397514302623792, + "loss": 0.05017915368080139, + "loss_ce": 0.0004965342814102769, + "loss_iou": 0.53125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 352456124, + "step": 2052 + }, + { + "epoch": 0.5400144670217663, + "grad_norm": 9.025520856951152, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 352626592, + "step": 2053 + }, + { + "epoch": 0.5400144670217663, + "loss": 0.16316679120063782, + "loss_ce": 0.0005996549734845757, + "loss_iou": 0.3828125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 352626592, + "step": 2053 + }, + { + "epoch": 0.5402775037811535, + "grad_norm": 6.628974819075429, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 352798532, + "step": 2054 + }, + { + "epoch": 0.5402775037811535, + "loss": 0.10173699259757996, + "loss_ce": 0.002814263803884387, + "loss_iou": 0.61328125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 352798532, + "step": 2054 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 4.016489890807633, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 352970876, + "step": 2055 + }, + { + "epoch": 0.5405405405405406, + "loss": 0.12183534353971481, + "loss_ce": 0.00010072031000163406, + "loss_iou": 0.578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 352970876, + "step": 2055 + }, + { + "epoch": 0.5408035772999277, + "grad_norm": 10.022531221280579, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 353143148, + "step": 2056 + }, + { + "epoch": 0.5408035772999277, + "loss": 0.11547866463661194, + "loss_ce": 0.0016328342026099563, + "loss_iou": 0.45703125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 353143148, + "step": 2056 + }, + { + "epoch": 0.5410666140593148, + "grad_norm": 4.0813858316372205, + "learning_rate": 5e-06, + "loss": 0.1298, + "num_input_tokens_seen": 353315092, + "step": 2057 + }, + { + "epoch": 0.5410666140593148, + "loss": 0.2096938192844391, + "loss_ce": 0.005348118022084236, + "loss_iou": 0.57421875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 353315092, + "step": 2057 + }, + { + "epoch": 0.5413296508187019, + "grad_norm": 6.060936634204627, + "learning_rate": 5e-06, + "loss": 0.1086, + "num_input_tokens_seen": 353487316, + "step": 2058 + }, + { + "epoch": 0.5413296508187019, + "loss": 0.0933343917131424, + "loss_ce": 0.0019037279998883605, + "loss_iou": 0.6171875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 353487316, + "step": 2058 + }, + { + "epoch": 0.541592687578089, + "grad_norm": 5.183981355840974, + "learning_rate": 5e-06, + "loss": 0.1516, + "num_input_tokens_seen": 353659672, + "step": 2059 + }, + { + "epoch": 0.541592687578089, + "loss": 0.1340794712305069, + "loss_ce": 0.001663701143115759, + "loss_iou": 0.6328125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 353659672, + "step": 2059 + }, + { + "epoch": 0.5418557243374762, + "grad_norm": 4.266501842679085, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 353829984, + "step": 2060 + }, + { + "epoch": 0.5418557243374762, + "loss": 0.16655325889587402, + "loss_ce": 0.0011174663668498397, + "loss_iou": 0.4609375, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 353829984, + "step": 2060 + }, + { + "epoch": 0.5421187610968633, + "grad_norm": 5.044214417820774, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 354002080, + "step": 2061 + }, + { + "epoch": 0.5421187610968633, + "loss": 0.11388491839170456, + "loss_ce": 0.0009240994695574045, + "loss_iou": 0.412109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 354002080, + "step": 2061 + }, + { + "epoch": 0.5423817978562504, + "grad_norm": 5.933256816179162, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 354174264, + "step": 2062 + }, + { + "epoch": 0.5423817978562504, + "loss": 0.07782945036888123, + "loss_ce": 0.00034532046993263066, + "loss_iou": 0.515625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 354174264, + "step": 2062 + }, + { + "epoch": 0.5426448346156375, + "grad_norm": 6.3010547634359915, + "learning_rate": 5e-06, + "loss": 0.1807, + "num_input_tokens_seen": 354346228, + "step": 2063 + }, + { + "epoch": 0.5426448346156375, + "loss": 0.2516539692878723, + "loss_ce": 0.0021422426216304302, + "loss_iou": 0.57421875, + "loss_num": 0.0498046875, + "loss_xval": 0.25, + "num_input_tokens_seen": 354346228, + "step": 2063 + }, + { + "epoch": 0.5429078713750246, + "grad_norm": 6.370139521144386, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 354516236, + "step": 2064 + }, + { + "epoch": 0.5429078713750246, + "loss": 0.1860380321741104, + "loss_ce": 0.0007352972170338035, + "loss_iou": 0.310546875, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 354516236, + "step": 2064 + }, + { + "epoch": 0.5431709081344118, + "grad_norm": 5.277591681669997, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 354688188, + "step": 2065 + }, + { + "epoch": 0.5431709081344118, + "loss": 0.14346206188201904, + "loss_ce": 0.0007008376996964216, + "loss_iou": 0.462890625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 354688188, + "step": 2065 + }, + { + "epoch": 0.5434339448937989, + "grad_norm": 4.479693164703334, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 354860592, + "step": 2066 + }, + { + "epoch": 0.5434339448937989, + "loss": 0.11085185408592224, + "loss_ce": 7.304361497517675e-05, + "loss_iou": 0.44921875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 354860592, + "step": 2066 + }, + { + "epoch": 0.543696981653186, + "grad_norm": 4.919682261722731, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 355033176, + "step": 2067 + }, + { + "epoch": 0.543696981653186, + "loss": 0.16546472907066345, + "loss_ce": 0.001188602764159441, + "loss_iou": 0.5078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 355033176, + "step": 2067 + }, + { + "epoch": 0.5439600184125731, + "grad_norm": 13.936900281461735, + "learning_rate": 5e-06, + "loss": 0.1039, + "num_input_tokens_seen": 355205536, + "step": 2068 + }, + { + "epoch": 0.5439600184125731, + "loss": 0.08491555601358414, + "loss_ce": 1.5654470189474523e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 355205536, + "step": 2068 + }, + { + "epoch": 0.5442230551719602, + "grad_norm": 7.496864557637642, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 355375248, + "step": 2069 + }, + { + "epoch": 0.5442230551719602, + "loss": 0.11474957317113876, + "loss_ce": 0.005954409018158913, + "loss_iou": 0.671875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 355375248, + "step": 2069 + }, + { + "epoch": 0.5444860919313474, + "grad_norm": 7.5192560384682325, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 355545380, + "step": 2070 + }, + { + "epoch": 0.5444860919313474, + "loss": 0.156136155128479, + "loss_ce": 0.0023580677807331085, + "loss_iou": 0.41015625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 355545380, + "step": 2070 + }, + { + "epoch": 0.5447491286907346, + "grad_norm": 5.871220250123853, + "learning_rate": 5e-06, + "loss": 0.1422, + "num_input_tokens_seen": 355717808, + "step": 2071 + }, + { + "epoch": 0.5447491286907346, + "loss": 0.1303853988647461, + "loss_ce": 0.0030660659540444613, + "loss_iou": 0.443359375, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 355717808, + "step": 2071 + }, + { + "epoch": 0.5450121654501217, + "grad_norm": 5.18139773459563, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 355889920, + "step": 2072 + }, + { + "epoch": 0.5450121654501217, + "loss": 0.08777904510498047, + "loss_ce": 0.0009260187507607043, + "loss_iou": 0.51953125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 355889920, + "step": 2072 + }, + { + "epoch": 0.5452752022095088, + "grad_norm": 5.534523394507571, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 356062284, + "step": 2073 + }, + { + "epoch": 0.5452752022095088, + "loss": 0.07954730838537216, + "loss_ce": 0.0016664512222632766, + "loss_iou": 0.515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 356062284, + "step": 2073 + }, + { + "epoch": 0.5455382389688959, + "grad_norm": 5.798318301378571, + "learning_rate": 5e-06, + "loss": 0.13, + "num_input_tokens_seen": 356234572, + "step": 2074 + }, + { + "epoch": 0.5455382389688959, + "loss": 0.11368724703788757, + "loss_ce": 0.0001923761737998575, + "loss_iou": 0.43359375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 356234572, + "step": 2074 + }, + { + "epoch": 0.5458012757282831, + "grad_norm": 7.153408449848725, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 356406460, + "step": 2075 + }, + { + "epoch": 0.5458012757282831, + "loss": 0.12134350836277008, + "loss_ce": 0.0004023421206511557, + "loss_iou": 0.55078125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 356406460, + "step": 2075 + }, + { + "epoch": 0.5460643124876702, + "grad_norm": 5.616257612175736, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 356578828, + "step": 2076 + }, + { + "epoch": 0.5460643124876702, + "loss": 0.15952152013778687, + "loss_ce": 0.007452425081282854, + "loss_iou": 0.44140625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 356578828, + "step": 2076 + }, + { + "epoch": 0.5463273492470573, + "grad_norm": 4.268860828505962, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 356750940, + "step": 2077 + }, + { + "epoch": 0.5463273492470573, + "loss": 0.12928339838981628, + "loss_ce": 0.0006517980364151299, + "loss_iou": 0.53125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 356750940, + "step": 2077 + }, + { + "epoch": 0.5465903860064444, + "grad_norm": 5.706661955848177, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 356923100, + "step": 2078 + }, + { + "epoch": 0.5465903860064444, + "loss": 0.07689663022756577, + "loss_ce": 0.002494774293154478, + "loss_iou": 0.51171875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 356923100, + "step": 2078 + }, + { + "epoch": 0.5468534227658315, + "grad_norm": 6.622312056596725, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 357095216, + "step": 2079 + }, + { + "epoch": 0.5468534227658315, + "loss": 0.2388056218624115, + "loss_ce": 0.0006769794854335487, + "loss_iou": 0.353515625, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 357095216, + "step": 2079 + }, + { + "epoch": 0.5471164595252187, + "grad_norm": 5.101694254730695, + "learning_rate": 5e-06, + "loss": 0.131, + "num_input_tokens_seen": 357265716, + "step": 2080 + }, + { + "epoch": 0.5471164595252187, + "loss": 0.14891289174556732, + "loss_ce": 0.0022454019635915756, + "loss_iou": 0.482421875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 357265716, + "step": 2080 + }, + { + "epoch": 0.5473794962846058, + "grad_norm": 7.015564745743221, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 357437784, + "step": 2081 + }, + { + "epoch": 0.5473794962846058, + "loss": 0.09505030512809753, + "loss_ce": 0.003283948404714465, + "loss_iou": 0.462890625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 357437784, + "step": 2081 + }, + { + "epoch": 0.5476425330439929, + "grad_norm": 6.361890201898857, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 357609848, + "step": 2082 + }, + { + "epoch": 0.5476425330439929, + "loss": 0.11097072064876556, + "loss_ce": 0.0005886423168703914, + "loss_iou": 0.5859375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 357609848, + "step": 2082 + }, + { + "epoch": 0.54790556980338, + "grad_norm": 11.534449289054661, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 357782120, + "step": 2083 + }, + { + "epoch": 0.54790556980338, + "loss": 0.20264874398708344, + "loss_ce": 0.0027280959766358137, + "loss_iou": 0.322265625, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 357782120, + "step": 2083 + }, + { + "epoch": 0.5481686065627671, + "grad_norm": 5.295463538809882, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 357954088, + "step": 2084 + }, + { + "epoch": 0.5481686065627671, + "loss": 0.13939827680587769, + "loss_ce": 0.0004822692717425525, + "loss_iou": 0.52734375, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 357954088, + "step": 2084 + }, + { + "epoch": 0.5484316433221542, + "grad_norm": 5.108081687062298, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 358123060, + "step": 2085 + }, + { + "epoch": 0.5484316433221542, + "loss": 0.11673710495233536, + "loss_ce": 0.002631876850500703, + "loss_iou": 0.53125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 358123060, + "step": 2085 + }, + { + "epoch": 0.5486946800815414, + "grad_norm": 8.727300119230549, + "learning_rate": 5e-06, + "loss": 0.1803, + "num_input_tokens_seen": 358295276, + "step": 2086 + }, + { + "epoch": 0.5486946800815414, + "loss": 0.21835477650165558, + "loss_ce": 0.0030837799422442913, + "loss_iou": 0.51171875, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 358295276, + "step": 2086 + }, + { + "epoch": 0.5489577168409285, + "grad_norm": 11.066658379743997, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 358467252, + "step": 2087 + }, + { + "epoch": 0.5489577168409285, + "loss": 0.10189958661794662, + "loss_ce": 0.00464006420224905, + "loss_iou": 0.5390625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 358467252, + "step": 2087 + }, + { + "epoch": 0.5492207536003156, + "grad_norm": 6.149852742286746, + "learning_rate": 5e-06, + "loss": 0.1068, + "num_input_tokens_seen": 358639392, + "step": 2088 + }, + { + "epoch": 0.5492207536003156, + "loss": 0.11351503431797028, + "loss_ce": 0.0006915500853210688, + "loss_iou": 0.4375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 358639392, + "step": 2088 + }, + { + "epoch": 0.5494837903597027, + "grad_norm": 4.11715937749224, + "learning_rate": 5e-06, + "loss": 0.0832, + "num_input_tokens_seen": 358811532, + "step": 2089 + }, + { + "epoch": 0.5494837903597027, + "loss": 0.06601230055093765, + "loss_ce": 0.0033291929867118597, + "loss_iou": 0.6640625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 358811532, + "step": 2089 + }, + { + "epoch": 0.5497468271190898, + "grad_norm": 5.926803480110229, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 358982120, + "step": 2090 + }, + { + "epoch": 0.5497468271190898, + "loss": 0.165444478392601, + "loss_ce": 0.0021449108608067036, + "loss_iou": 0.40234375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 358982120, + "step": 2090 + }, + { + "epoch": 0.5500098638784771, + "grad_norm": 3.8279286211407215, + "learning_rate": 5e-06, + "loss": 0.1365, + "num_input_tokens_seen": 359154336, + "step": 2091 + }, + { + "epoch": 0.5500098638784771, + "loss": 0.1499071568250656, + "loss_ce": 0.0012255202746018767, + "loss_iou": 0.46484375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 359154336, + "step": 2091 + }, + { + "epoch": 0.5502729006378642, + "grad_norm": 11.488893840776536, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 359326324, + "step": 2092 + }, + { + "epoch": 0.5502729006378642, + "loss": 0.169376939535141, + "loss_ce": 0.0019880137406289577, + "loss_iou": 0.50390625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 359326324, + "step": 2092 + }, + { + "epoch": 0.5505359373972513, + "grad_norm": 7.053437919021253, + "learning_rate": 5e-06, + "loss": 0.1738, + "num_input_tokens_seen": 359498532, + "step": 2093 + }, + { + "epoch": 0.5505359373972513, + "loss": 0.17925216257572174, + "loss_ce": 0.003562463214620948, + "loss_iou": 0.57421875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 359498532, + "step": 2093 + }, + { + "epoch": 0.5507989741566384, + "grad_norm": 4.495803041591062, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 359668724, + "step": 2094 + }, + { + "epoch": 0.5507989741566384, + "loss": 0.16115409135818481, + "loss_ce": 0.001760771730914712, + "loss_iou": 0.5546875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 359668724, + "step": 2094 + }, + { + "epoch": 0.5510620109160255, + "grad_norm": 4.2291818189369135, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 359841036, + "step": 2095 + }, + { + "epoch": 0.5510620109160255, + "loss": 0.04658431187272072, + "loss_ce": 0.000533288111910224, + "loss_iou": NaN, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 359841036, + "step": 2095 + }, + { + "epoch": 0.5513250476754127, + "grad_norm": 14.952283770001413, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 360013076, + "step": 2096 + }, + { + "epoch": 0.5513250476754127, + "loss": 0.056577593088150024, + "loss_ce": 0.00405684020370245, + "loss_iou": 0.55078125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 360013076, + "step": 2096 + }, + { + "epoch": 0.5515880844347998, + "grad_norm": 6.919816354273125, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 360181792, + "step": 2097 + }, + { + "epoch": 0.5515880844347998, + "loss": 0.08817453682422638, + "loss_ce": 0.0007874465081840754, + "loss_iou": 0.546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 360181792, + "step": 2097 + }, + { + "epoch": 0.5518511211941869, + "grad_norm": 4.617511415402225, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 360353784, + "step": 2098 + }, + { + "epoch": 0.5518511211941869, + "loss": 0.10340078175067902, + "loss_ce": 0.0006938728038221598, + "loss_iou": 0.5234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 360353784, + "step": 2098 + }, + { + "epoch": 0.552114157953574, + "grad_norm": 21.83477585251536, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 360526236, + "step": 2099 + }, + { + "epoch": 0.552114157953574, + "loss": 0.09873877465724945, + "loss_ce": 0.005629643332213163, + "loss_iou": 0.6171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 360526236, + "step": 2099 + }, + { + "epoch": 0.5523771947129611, + "grad_norm": 22.010760760138638, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 360698124, + "step": 2100 + }, + { + "epoch": 0.5523771947129611, + "loss": 0.1067737340927124, + "loss_ce": 0.0001147918519563973, + "loss_iou": 0.44921875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 360698124, + "step": 2100 + }, + { + "epoch": 0.5526402314723483, + "grad_norm": 8.309454312868692, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 360867340, + "step": 2101 + }, + { + "epoch": 0.5526402314723483, + "loss": 0.1460711508989334, + "loss_ce": 0.002302848733961582, + "loss_iou": 0.6171875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 360867340, + "step": 2101 + }, + { + "epoch": 0.5529032682317354, + "grad_norm": 15.176993944828276, + "learning_rate": 5e-06, + "loss": 0.1412, + "num_input_tokens_seen": 361039524, + "step": 2102 + }, + { + "epoch": 0.5529032682317354, + "loss": 0.18546344339847565, + "loss_ce": 0.003029361367225647, + "loss_iou": 0.546875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 361039524, + "step": 2102 + }, + { + "epoch": 0.5531663049911225, + "grad_norm": 7.240375850988684, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 361212152, + "step": 2103 + }, + { + "epoch": 0.5531663049911225, + "loss": 0.1453348845243454, + "loss_ce": 0.0008341491920873523, + "loss_iou": 0.65625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 361212152, + "step": 2103 + }, + { + "epoch": 0.5534293417505096, + "grad_norm": 4.504467098918756, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 361384636, + "step": 2104 + }, + { + "epoch": 0.5534293417505096, + "loss": 0.14879915118217468, + "loss_ce": 0.0031692716293036938, + "loss_iou": 0.54296875, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 361384636, + "step": 2104 + }, + { + "epoch": 0.5536923785098967, + "grad_norm": 14.969295535193908, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 361556736, + "step": 2105 + }, + { + "epoch": 0.5536923785098967, + "loss": 0.14452342689037323, + "loss_ce": 0.0027692681178450584, + "loss_iou": 0.46875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 361556736, + "step": 2105 + }, + { + "epoch": 0.5539554152692839, + "grad_norm": 4.430127573397192, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 361726936, + "step": 2106 + }, + { + "epoch": 0.5539554152692839, + "loss": 0.22009092569351196, + "loss_ce": 0.0009747114963829517, + "loss_iou": 0.431640625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 361726936, + "step": 2106 + }, + { + "epoch": 0.554218452028671, + "grad_norm": 4.147747665163274, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 361899184, + "step": 2107 + }, + { + "epoch": 0.554218452028671, + "loss": 0.13676050305366516, + "loss_ce": 0.0002858861698769033, + "loss_iou": 0.703125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 361899184, + "step": 2107 + }, + { + "epoch": 0.5544814887880581, + "grad_norm": 3.7276098432513174, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 362071768, + "step": 2108 + }, + { + "epoch": 0.5544814887880581, + "loss": 0.15069580078125, + "loss_ce": 0.0011291508562862873, + "loss_iou": 0.38671875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 362071768, + "step": 2108 + }, + { + "epoch": 0.5547445255474452, + "grad_norm": 5.320538695832165, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 362240216, + "step": 2109 + }, + { + "epoch": 0.5547445255474452, + "loss": 0.09280645847320557, + "loss_ce": 0.001528381835669279, + "loss_iou": 0.52734375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 362240216, + "step": 2109 + }, + { + "epoch": 0.5550075623068323, + "grad_norm": 11.977017579320059, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 362412692, + "step": 2110 + }, + { + "epoch": 0.5550075623068323, + "loss": 0.09514741599559784, + "loss_ce": 0.005608838051557541, + "loss_iou": 0.498046875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 362412692, + "step": 2110 + }, + { + "epoch": 0.5552705990662195, + "grad_norm": 5.133455963412773, + "learning_rate": 5e-06, + "loss": 0.1081, + "num_input_tokens_seen": 362583480, + "step": 2111 + }, + { + "epoch": 0.5552705990662195, + "loss": 0.20054732263088226, + "loss_ce": 0.0018321146490052342, + "loss_iou": NaN, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 362583480, + "step": 2111 + }, + { + "epoch": 0.5555336358256067, + "grad_norm": 13.046079025621871, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 362755416, + "step": 2112 + }, + { + "epoch": 0.5555336358256067, + "loss": 0.09682411700487137, + "loss_ce": 0.005820699501782656, + "loss_iou": 0.5234375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 362755416, + "step": 2112 + }, + { + "epoch": 0.5557966725849938, + "grad_norm": 5.170449556654708, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 362927780, + "step": 2113 + }, + { + "epoch": 0.5557966725849938, + "loss": 0.1499776542186737, + "loss_ce": 0.0003499832237139344, + "loss_iou": 0.5234375, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 362927780, + "step": 2113 + }, + { + "epoch": 0.5560597093443809, + "grad_norm": 4.492587389090409, + "learning_rate": 5e-06, + "loss": 0.1634, + "num_input_tokens_seen": 363100176, + "step": 2114 + }, + { + "epoch": 0.5560597093443809, + "loss": 0.12822586297988892, + "loss_ce": 0.001638943562284112, + "loss_iou": 0.435546875, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 363100176, + "step": 2114 + }, + { + "epoch": 0.556322746103768, + "grad_norm": 6.53280654225724, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 363272032, + "step": 2115 + }, + { + "epoch": 0.556322746103768, + "loss": 0.09251531958580017, + "loss_ce": 0.0013898293254896998, + "loss_iou": 0.59375, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 363272032, + "step": 2115 + }, + { + "epoch": 0.5565857828631551, + "grad_norm": 6.772073322495271, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 363444356, + "step": 2116 + }, + { + "epoch": 0.5565857828631551, + "loss": 0.22190499305725098, + "loss_ce": 0.00468086265027523, + "loss_iou": 0.486328125, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 363444356, + "step": 2116 + }, + { + "epoch": 0.5568488196225423, + "grad_norm": 4.259888500977366, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 363616480, + "step": 2117 + }, + { + "epoch": 0.5568488196225423, + "loss": 0.11629009246826172, + "loss_ce": 0.0017881433013826609, + "loss_iou": 0.59765625, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 363616480, + "step": 2117 + }, + { + "epoch": 0.5571118563819294, + "grad_norm": 5.727482587569898, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 363788792, + "step": 2118 + }, + { + "epoch": 0.5571118563819294, + "loss": 0.14177094399929047, + "loss_ce": 0.0018478452693670988, + "loss_iou": 0.357421875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 363788792, + "step": 2118 + }, + { + "epoch": 0.5573748931413165, + "grad_norm": 10.912411227157135, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 363960764, + "step": 2119 + }, + { + "epoch": 0.5573748931413165, + "loss": 0.1305292397737503, + "loss_ce": 0.0011652277316898108, + "loss_iou": 0.46484375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 363960764, + "step": 2119 + }, + { + "epoch": 0.5576379299007036, + "grad_norm": 3.94400025085083, + "learning_rate": 5e-06, + "loss": 0.1587, + "num_input_tokens_seen": 364133156, + "step": 2120 + }, + { + "epoch": 0.5576379299007036, + "loss": 0.19977153837680817, + "loss_ce": 0.008945131674408913, + "loss_iou": 0.40625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 364133156, + "step": 2120 + }, + { + "epoch": 0.5579009666600907, + "grad_norm": 3.6301701938092217, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 364305580, + "step": 2121 + }, + { + "epoch": 0.5579009666600907, + "loss": 0.1176564022898674, + "loss_ce": 0.0034901422914117575, + "loss_iou": 0.55859375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 364305580, + "step": 2121 + }, + { + "epoch": 0.5581640034194779, + "grad_norm": 19.838531274898244, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 364478040, + "step": 2122 + }, + { + "epoch": 0.5581640034194779, + "loss": 0.06633633375167847, + "loss_ce": 0.0024020099081099033, + "loss_iou": 0.53125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 364478040, + "step": 2122 + }, + { + "epoch": 0.558427040178865, + "grad_norm": 9.488127206942975, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 364647728, + "step": 2123 + }, + { + "epoch": 0.558427040178865, + "loss": 0.07145293056964874, + "loss_ce": 0.00010283520532539114, + "loss_iou": 0.5, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 364647728, + "step": 2123 + }, + { + "epoch": 0.5586900769382521, + "grad_norm": 4.8177202903523115, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 364816920, + "step": 2124 + }, + { + "epoch": 0.5586900769382521, + "loss": 0.13919737935066223, + "loss_ce": 0.002829591976478696, + "loss_iou": 0.494140625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 364816920, + "step": 2124 + }, + { + "epoch": 0.5589531136976392, + "grad_norm": 3.304918735754454, + "learning_rate": 5e-06, + "loss": 0.0913, + "num_input_tokens_seen": 364989156, + "step": 2125 + }, + { + "epoch": 0.5589531136976392, + "loss": 0.062035560607910156, + "loss_ce": 0.002953530289232731, + "loss_iou": 0.412109375, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 364989156, + "step": 2125 + }, + { + "epoch": 0.5592161504570263, + "grad_norm": 2.856854904917954, + "learning_rate": 5e-06, + "loss": 0.0901, + "num_input_tokens_seen": 365161052, + "step": 2126 + }, + { + "epoch": 0.5592161504570263, + "loss": 0.11198246479034424, + "loss_ce": 0.007825973443686962, + "loss_iou": 0.33203125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 365161052, + "step": 2126 + }, + { + "epoch": 0.5594791872164135, + "grad_norm": 3.111770745456122, + "learning_rate": 5e-06, + "loss": 0.196, + "num_input_tokens_seen": 365333284, + "step": 2127 + }, + { + "epoch": 0.5594791872164135, + "loss": 0.0993409976363182, + "loss_ce": 0.00046404742170125246, + "loss_iou": 0.61328125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 365333284, + "step": 2127 + }, + { + "epoch": 0.5597422239758006, + "grad_norm": 4.17183040056909, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 365505612, + "step": 2128 + }, + { + "epoch": 0.5597422239758006, + "loss": 0.08107022941112518, + "loss_ce": 0.000320716411806643, + "loss_iou": 0.62890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 365505612, + "step": 2128 + }, + { + "epoch": 0.5600052607351877, + "grad_norm": 5.3777331039695175, + "learning_rate": 5e-06, + "loss": 0.1018, + "num_input_tokens_seen": 365677880, + "step": 2129 + }, + { + "epoch": 0.5600052607351877, + "loss": 0.10484224557876587, + "loss_ce": 0.0005636783316731453, + "loss_iou": 0.5625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 365677880, + "step": 2129 + }, + { + "epoch": 0.5602682974945749, + "grad_norm": 13.479247744568044, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 365849928, + "step": 2130 + }, + { + "epoch": 0.5602682974945749, + "loss": 0.1259067952632904, + "loss_ce": 0.0024021633435040712, + "loss_iou": 0.484375, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 365849928, + "step": 2130 + }, + { + "epoch": 0.560531334253962, + "grad_norm": 20.004883443678533, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 366021916, + "step": 2131 + }, + { + "epoch": 0.560531334253962, + "loss": 0.11340343207120895, + "loss_ce": 0.0057374173775315285, + "loss_iou": 0.515625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 366021916, + "step": 2131 + }, + { + "epoch": 0.5607943710133492, + "grad_norm": 11.042521647809682, + "learning_rate": 5e-06, + "loss": 0.1328, + "num_input_tokens_seen": 366194008, + "step": 2132 + }, + { + "epoch": 0.5607943710133492, + "loss": 0.09386501461267471, + "loss_ce": 0.00017604799359105527, + "loss_iou": 0.640625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 366194008, + "step": 2132 + }, + { + "epoch": 0.5610574077727363, + "grad_norm": 5.842914268629563, + "learning_rate": 5e-06, + "loss": 0.1394, + "num_input_tokens_seen": 366366212, + "step": 2133 + }, + { + "epoch": 0.5610574077727363, + "loss": 0.11772558093070984, + "loss_ce": 0.0009958385489881039, + "loss_iou": 0.5390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 366366212, + "step": 2133 + }, + { + "epoch": 0.5613204445321234, + "grad_norm": 5.960122855306223, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 366538220, + "step": 2134 + }, + { + "epoch": 0.5613204445321234, + "loss": 0.08179056644439697, + "loss_ce": 0.003818158758804202, + "loss_iou": 0.703125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 366538220, + "step": 2134 + }, + { + "epoch": 0.5615834812915105, + "grad_norm": 5.389091778733193, + "learning_rate": 5e-06, + "loss": 0.1578, + "num_input_tokens_seen": 366710096, + "step": 2135 + }, + { + "epoch": 0.5615834812915105, + "loss": 0.18556632101535797, + "loss_ce": 0.0002635964483488351, + "loss_iou": 0.408203125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 366710096, + "step": 2135 + }, + { + "epoch": 0.5618465180508976, + "grad_norm": 7.320192738875152, + "learning_rate": 5e-06, + "loss": 0.1107, + "num_input_tokens_seen": 366880456, + "step": 2136 + }, + { + "epoch": 0.5618465180508976, + "loss": 0.13861671090126038, + "loss_ce": 0.003179695922881365, + "loss_iou": 0.498046875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 366880456, + "step": 2136 + }, + { + "epoch": 0.5621095548102847, + "grad_norm": 14.114626386817225, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 367052752, + "step": 2137 + }, + { + "epoch": 0.5621095548102847, + "loss": 0.12559227645397186, + "loss_ce": 0.001477292738854885, + "loss_iou": 0.5234375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 367052752, + "step": 2137 + }, + { + "epoch": 0.5623725915696719, + "grad_norm": 3.498748268755153, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 367225060, + "step": 2138 + }, + { + "epoch": 0.5623725915696719, + "loss": 0.08549857884645462, + "loss_ce": 0.000232468664762564, + "loss_iou": 0.69921875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 367225060, + "step": 2138 + }, + { + "epoch": 0.562635628329059, + "grad_norm": 20.701513936136166, + "learning_rate": 5e-06, + "loss": 0.0738, + "num_input_tokens_seen": 367397332, + "step": 2139 + }, + { + "epoch": 0.562635628329059, + "loss": 0.08525611460208893, + "loss_ce": 0.004125134088099003, + "loss_iou": 0.4921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 367397332, + "step": 2139 + }, + { + "epoch": 0.5628986650884461, + "grad_norm": 11.983965441231234, + "learning_rate": 5e-06, + "loss": 0.0957, + "num_input_tokens_seen": 367569560, + "step": 2140 + }, + { + "epoch": 0.5628986650884461, + "loss": 0.06727585196495056, + "loss_ce": 0.00010666107118595392, + "loss_iou": 0.47265625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 367569560, + "step": 2140 + }, + { + "epoch": 0.5631617018478332, + "grad_norm": 6.398433590682968, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 367741696, + "step": 2141 + }, + { + "epoch": 0.5631617018478332, + "loss": 0.16246706247329712, + "loss_ce": 0.0022497763857245445, + "loss_iou": 0.298828125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 367741696, + "step": 2141 + }, + { + "epoch": 0.5634247386072203, + "grad_norm": 4.385698804822032, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 367913912, + "step": 2142 + }, + { + "epoch": 0.5634247386072203, + "loss": 0.10307023674249649, + "loss_ce": 0.0009889386128634214, + "loss_iou": 0.671875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 367913912, + "step": 2142 + }, + { + "epoch": 0.5636877753666075, + "grad_norm": 7.856910300327451, + "learning_rate": 5e-06, + "loss": 0.1906, + "num_input_tokens_seen": 368086176, + "step": 2143 + }, + { + "epoch": 0.5636877753666075, + "loss": 0.19684657454490662, + "loss_ce": 0.005714981816709042, + "loss_iou": 0.388671875, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 368086176, + "step": 2143 + }, + { + "epoch": 0.5639508121259946, + "grad_norm": 6.368187003490122, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 368258440, + "step": 2144 + }, + { + "epoch": 0.5639508121259946, + "loss": 0.1019875556230545, + "loss_ce": 0.0008217811118811369, + "loss_iou": 0.52734375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 368258440, + "step": 2144 + }, + { + "epoch": 0.5642138488853817, + "grad_norm": 7.190126438403998, + "learning_rate": 5e-06, + "loss": 0.1332, + "num_input_tokens_seen": 368430620, + "step": 2145 + }, + { + "epoch": 0.5642138488853817, + "loss": 0.1344844102859497, + "loss_ce": 0.00020707116345874965, + "loss_iou": 0.546875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 368430620, + "step": 2145 + }, + { + "epoch": 0.5644768856447688, + "grad_norm": 6.755563239238797, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 368603024, + "step": 2146 + }, + { + "epoch": 0.5644768856447688, + "loss": 0.09109672158956528, + "loss_ce": 0.0006883963942527771, + "loss_iou": 0.51953125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 368603024, + "step": 2146 + }, + { + "epoch": 0.5647399224041559, + "grad_norm": 4.616140839020028, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 368775624, + "step": 2147 + }, + { + "epoch": 0.5647399224041559, + "loss": 0.19518432021141052, + "loss_ce": 0.002435298403725028, + "loss_iou": 0.392578125, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 368775624, + "step": 2147 + }, + { + "epoch": 0.5650029591635432, + "grad_norm": 11.734928019848066, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 368945448, + "step": 2148 + }, + { + "epoch": 0.5650029591635432, + "loss": 0.17952315509319305, + "loss_ce": 0.009143512696027756, + "loss_iou": 0.5703125, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 368945448, + "step": 2148 + }, + { + "epoch": 0.5652659959229303, + "grad_norm": 6.710624849462096, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 369115952, + "step": 2149 + }, + { + "epoch": 0.5652659959229303, + "loss": 0.13233953714370728, + "loss_ce": 0.0019989716820418835, + "loss_iou": 0.310546875, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 369115952, + "step": 2149 + }, + { + "epoch": 0.5655290326823174, + "grad_norm": 6.669120586380705, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 369288028, + "step": 2150 + }, + { + "epoch": 0.5655290326823174, + "loss": 0.08679264783859253, + "loss_ce": 0.00695866858586669, + "loss_iou": 0.55078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 369288028, + "step": 2150 + }, + { + "epoch": 0.5657920694417045, + "grad_norm": 4.502790600863043, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 369460112, + "step": 2151 + }, + { + "epoch": 0.5657920694417045, + "loss": 0.09304537624120712, + "loss_ce": 0.002102992497384548, + "loss_iou": 0.392578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 369460112, + "step": 2151 + }, + { + "epoch": 0.5660551062010916, + "grad_norm": 5.258977022311985, + "learning_rate": 5e-06, + "loss": 0.0818, + "num_input_tokens_seen": 369632404, + "step": 2152 + }, + { + "epoch": 0.5660551062010916, + "loss": 0.08912669122219086, + "loss_ce": 0.0003358002286404371, + "loss_iou": 0.41796875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 369632404, + "step": 2152 + }, + { + "epoch": 0.5663181429604788, + "grad_norm": 5.721568404254781, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 369804776, + "step": 2153 + }, + { + "epoch": 0.5663181429604788, + "loss": 0.10320156812667847, + "loss_ce": 0.0007235408993437886, + "loss_iou": 0.490234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 369804776, + "step": 2153 + }, + { + "epoch": 0.5665811797198659, + "grad_norm": 4.65695630930855, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 369976952, + "step": 2154 + }, + { + "epoch": 0.5665811797198659, + "loss": 0.08642168343067169, + "loss_ce": 0.0003621142532210797, + "loss_iou": 0.515625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 369976952, + "step": 2154 + }, + { + "epoch": 0.566844216479253, + "grad_norm": 2.957349659939758, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 370149132, + "step": 2155 + }, + { + "epoch": 0.566844216479253, + "loss": 0.11206680536270142, + "loss_ce": 0.0026002456434071064, + "loss_iou": 0.61328125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 370149132, + "step": 2155 + }, + { + "epoch": 0.5671072532386401, + "grad_norm": 13.220367815915187, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 370321528, + "step": 2156 + }, + { + "epoch": 0.5671072532386401, + "loss": 0.13273131847381592, + "loss_ce": 0.0014446950517594814, + "loss_iou": 0.427734375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 370321528, + "step": 2156 + }, + { + "epoch": 0.5673702899980272, + "grad_norm": 6.5167958115597955, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 370493692, + "step": 2157 + }, + { + "epoch": 0.5673702899980272, + "loss": 0.0516238659620285, + "loss_ce": 0.00018648749392013997, + "loss_iou": 0.578125, + "loss_num": 0.01025390625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 370493692, + "step": 2157 + }, + { + "epoch": 0.5676333267574144, + "grad_norm": 9.055547000690966, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 370666128, + "step": 2158 + }, + { + "epoch": 0.5676333267574144, + "loss": 0.20648962259292603, + "loss_ce": 0.0006485594203695655, + "loss_iou": 0.51953125, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 370666128, + "step": 2158 + }, + { + "epoch": 0.5678963635168015, + "grad_norm": 20.191615671139235, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 370838096, + "step": 2159 + }, + { + "epoch": 0.5678963635168015, + "loss": 0.1389261931180954, + "loss_ce": 0.0004679340636357665, + "loss_iou": 0.359375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 370838096, + "step": 2159 + }, + { + "epoch": 0.5681594002761886, + "grad_norm": 16.724015569984758, + "learning_rate": 5e-06, + "loss": 0.0926, + "num_input_tokens_seen": 371010276, + "step": 2160 + }, + { + "epoch": 0.5681594002761886, + "loss": 0.09113931655883789, + "loss_ce": 0.004194741137325764, + "loss_iou": 0.53125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 371010276, + "step": 2160 + }, + { + "epoch": 0.5684224370355757, + "grad_norm": 9.65595197930286, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 371180756, + "step": 2161 + }, + { + "epoch": 0.5684224370355757, + "loss": 0.10710924118757248, + "loss_ce": 0.0031358497217297554, + "loss_iou": 0.53515625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 371180756, + "step": 2161 + }, + { + "epoch": 0.5686854737949628, + "grad_norm": 4.559805216337552, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 371351216, + "step": 2162 + }, + { + "epoch": 0.5686854737949628, + "loss": 0.18528233468532562, + "loss_ce": 0.0012918633874505758, + "loss_iou": 0.421875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 371351216, + "step": 2162 + }, + { + "epoch": 0.5689485105543499, + "grad_norm": 3.9132748752015707, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 371521504, + "step": 2163 + }, + { + "epoch": 0.5689485105543499, + "loss": 0.09377571940422058, + "loss_ce": 0.00017830087745096534, + "loss_iou": 0.54296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 371521504, + "step": 2163 + }, + { + "epoch": 0.5692115473137371, + "grad_norm": 6.221489940852924, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 371693620, + "step": 2164 + }, + { + "epoch": 0.5692115473137371, + "loss": 0.08065352588891983, + "loss_ce": 0.000270225660642609, + "loss_iou": 0.62109375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 371693620, + "step": 2164 + }, + { + "epoch": 0.5694745840731242, + "grad_norm": 5.565730297773139, + "learning_rate": 5e-06, + "loss": 0.1544, + "num_input_tokens_seen": 371864020, + "step": 2165 + }, + { + "epoch": 0.5694745840731242, + "loss": 0.14530430734157562, + "loss_ce": 0.0009256468038074672, + "loss_iou": 0.466796875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 371864020, + "step": 2165 + }, + { + "epoch": 0.5697376208325113, + "grad_norm": 7.807853954323307, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 372036364, + "step": 2166 + }, + { + "epoch": 0.5697376208325113, + "loss": 0.1258363127708435, + "loss_ce": 0.00034803448943421245, + "loss_iou": 0.5390625, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 372036364, + "step": 2166 + }, + { + "epoch": 0.5700006575918984, + "grad_norm": 6.119338737471246, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 372208520, + "step": 2167 + }, + { + "epoch": 0.5700006575918984, + "loss": 0.183029443025589, + "loss_ce": 0.0006258888752199709, + "loss_iou": 0.4609375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 372208520, + "step": 2167 + }, + { + "epoch": 0.5702636943512855, + "grad_norm": 4.796742469743409, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 372380908, + "step": 2168 + }, + { + "epoch": 0.5702636943512855, + "loss": 0.24278424680233002, + "loss_ce": 0.006517150904983282, + "loss_iou": 0.6015625, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 372380908, + "step": 2168 + }, + { + "epoch": 0.5705267311106728, + "grad_norm": 7.9441210817884444, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 372553152, + "step": 2169 + }, + { + "epoch": 0.5705267311106728, + "loss": 0.1403554081916809, + "loss_ce": 0.0024769881274551153, + "loss_iou": 0.48046875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 372553152, + "step": 2169 + }, + { + "epoch": 0.5707897678700599, + "grad_norm": 8.945699343509036, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 372722892, + "step": 2170 + }, + { + "epoch": 0.5707897678700599, + "loss": 0.17014455795288086, + "loss_ce": 0.0013518218183889985, + "loss_iou": 0.578125, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 372722892, + "step": 2170 + }, + { + "epoch": 0.571052804629447, + "grad_norm": 7.4926547587717405, + "learning_rate": 5e-06, + "loss": 0.1685, + "num_input_tokens_seen": 372895068, + "step": 2171 + }, + { + "epoch": 0.571052804629447, + "loss": 0.2696492373943329, + "loss_ce": 0.0004536675405688584, + "loss_iou": 0.40234375, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 372895068, + "step": 2171 + }, + { + "epoch": 0.5713158413888341, + "grad_norm": 17.833109235832993, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 373067312, + "step": 2172 + }, + { + "epoch": 0.5713158413888341, + "loss": 0.09599291533231735, + "loss_ce": 0.0006254853797145188, + "loss_iou": 0.515625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 373067312, + "step": 2172 + }, + { + "epoch": 0.5715788781482212, + "grad_norm": 14.290103080283112, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 373239588, + "step": 2173 + }, + { + "epoch": 0.5715788781482212, + "loss": 0.15090827643871307, + "loss_ce": 0.002836985979229212, + "loss_iou": 0.451171875, + "loss_num": 0.029541015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 373239588, + "step": 2173 + }, + { + "epoch": 0.5718419149076084, + "grad_norm": 4.534353132879591, + "learning_rate": 5e-06, + "loss": 0.0843, + "num_input_tokens_seen": 373411840, + "step": 2174 + }, + { + "epoch": 0.5718419149076084, + "loss": 0.07687939703464508, + "loss_ce": 0.0007075219764374197, + "loss_iou": 0.578125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 373411840, + "step": 2174 + }, + { + "epoch": 0.5721049516669955, + "grad_norm": 12.524713753292392, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 373582408, + "step": 2175 + }, + { + "epoch": 0.5721049516669955, + "loss": 0.17097817361354828, + "loss_ce": 0.0006900950102135539, + "loss_iou": 0.5859375, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 373582408, + "step": 2175 + }, + { + "epoch": 0.5723679884263826, + "grad_norm": 18.760082445449868, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 373754416, + "step": 2176 + }, + { + "epoch": 0.5723679884263826, + "loss": 0.11672262102365494, + "loss_ce": 0.0011525547597557306, + "loss_iou": 0.40625, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 373754416, + "step": 2176 + }, + { + "epoch": 0.5726310251857697, + "grad_norm": 8.516129726413212, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 373922312, + "step": 2177 + }, + { + "epoch": 0.5726310251857697, + "loss": 0.19126161932945251, + "loss_ce": 0.0003436502593103796, + "loss_iou": 0.462890625, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 373922312, + "step": 2177 + }, + { + "epoch": 0.5728940619451568, + "grad_norm": 6.898009634423141, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 374092720, + "step": 2178 + }, + { + "epoch": 0.5728940619451568, + "loss": 0.15485724806785583, + "loss_ce": 0.0002551883808337152, + "loss_iou": 0.6328125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 374092720, + "step": 2178 + }, + { + "epoch": 0.573157098704544, + "grad_norm": 6.043872383339532, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 374264864, + "step": 2179 + }, + { + "epoch": 0.573157098704544, + "loss": 0.12720485031604767, + "loss_ce": 0.0004958686186000705, + "loss_iou": 0.466796875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 374264864, + "step": 2179 + }, + { + "epoch": 0.5734201354639311, + "grad_norm": 5.166426865666374, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 374436928, + "step": 2180 + }, + { + "epoch": 0.5734201354639311, + "loss": 0.10286220163106918, + "loss_ce": 0.0005062465788796544, + "loss_iou": 0.6328125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 374436928, + "step": 2180 + }, + { + "epoch": 0.5736831722233182, + "grad_norm": 3.8181355710391034, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 374608984, + "step": 2181 + }, + { + "epoch": 0.5736831722233182, + "loss": 0.1574546992778778, + "loss_ce": 0.00025864943745546043, + "loss_iou": 0.59375, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 374608984, + "step": 2181 + }, + { + "epoch": 0.5739462089827053, + "grad_norm": 9.672382405473702, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 374779732, + "step": 2182 + }, + { + "epoch": 0.5739462089827053, + "loss": 0.17890840768814087, + "loss_ce": 0.007033395115286112, + "loss_iou": 0.64453125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 374779732, + "step": 2182 + }, + { + "epoch": 0.5742092457420924, + "grad_norm": 5.19568946256922, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 374951880, + "step": 2183 + }, + { + "epoch": 0.5742092457420924, + "loss": 0.11335025727748871, + "loss_ce": 0.0024798910599201918, + "loss_iou": 0.5859375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 374951880, + "step": 2183 + }, + { + "epoch": 0.5744722825014796, + "grad_norm": 4.272972418213817, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 375120908, + "step": 2184 + }, + { + "epoch": 0.5744722825014796, + "loss": 0.16440820693969727, + "loss_ce": 0.000330454291542992, + "loss_iou": 0.310546875, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 375120908, + "step": 2184 + }, + { + "epoch": 0.5747353192608667, + "grad_norm": 4.877678921853992, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 375292940, + "step": 2185 + }, + { + "epoch": 0.5747353192608667, + "loss": 0.19540463387966156, + "loss_ce": 0.0004278157721273601, + "loss_iou": 0.443359375, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 375292940, + "step": 2185 + }, + { + "epoch": 0.5749983560202538, + "grad_norm": 9.795881894231353, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 375463220, + "step": 2186 + }, + { + "epoch": 0.5749983560202538, + "loss": 0.18603767454624176, + "loss_ce": 0.0036951417569071054, + "loss_iou": 0.48046875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 375463220, + "step": 2186 + }, + { + "epoch": 0.575261392779641, + "grad_norm": 14.3298059229615, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 375633332, + "step": 2187 + }, + { + "epoch": 0.575261392779641, + "loss": 0.11824968457221985, + "loss_ce": 0.0010011474369093776, + "loss_iou": 0.546875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 375633332, + "step": 2187 + }, + { + "epoch": 0.575524429539028, + "grad_norm": 5.9170529044976305, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 375803896, + "step": 2188 + }, + { + "epoch": 0.575524429539028, + "loss": 0.09120282530784607, + "loss_ce": 0.0025797830894589424, + "loss_iou": 0.5625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 375803896, + "step": 2188 + }, + { + "epoch": 0.5757874662984152, + "grad_norm": 4.8523353127793944, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 375976212, + "step": 2189 + }, + { + "epoch": 0.5757874662984152, + "loss": 0.11096520721912384, + "loss_ce": 0.0007662302814424038, + "loss_iou": 0.54296875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 375976212, + "step": 2189 + }, + { + "epoch": 0.5760505030578024, + "grad_norm": 3.7437825333865065, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 376148372, + "step": 2190 + }, + { + "epoch": 0.5760505030578024, + "loss": 0.1311783641576767, + "loss_ce": 0.0013108099810779095, + "loss_iou": 0.5546875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 376148372, + "step": 2190 + }, + { + "epoch": 0.5763135398171895, + "grad_norm": 3.8863610288761765, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 376320448, + "step": 2191 + }, + { + "epoch": 0.5763135398171895, + "loss": 0.08886295557022095, + "loss_ce": 0.00014836144691798836, + "loss_iou": 0.53515625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 376320448, + "step": 2191 + }, + { + "epoch": 0.5765765765765766, + "grad_norm": 26.72560789877691, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 376492652, + "step": 2192 + }, + { + "epoch": 0.5765765765765766, + "loss": 0.09363338351249695, + "loss_ce": 0.002172202803194523, + "loss_iou": 0.458984375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 376492652, + "step": 2192 + }, + { + "epoch": 0.5768396133359637, + "grad_norm": 3.4367691751013334, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 376665088, + "step": 2193 + }, + { + "epoch": 0.5768396133359637, + "loss": 0.05514270067214966, + "loss_ce": 0.0010502950754016638, + "loss_iou": 0.4765625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 376665088, + "step": 2193 + }, + { + "epoch": 0.5771026500953508, + "grad_norm": 9.788333451803501, + "learning_rate": 5e-06, + "loss": 0.1029, + "num_input_tokens_seen": 376837384, + "step": 2194 + }, + { + "epoch": 0.5771026500953508, + "loss": 0.09362407773733139, + "loss_ce": 0.0004081379738636315, + "loss_iou": 0.55078125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 376837384, + "step": 2194 + }, + { + "epoch": 0.577365686854738, + "grad_norm": 5.1293333388841935, + "learning_rate": 5e-06, + "loss": 0.1096, + "num_input_tokens_seen": 377006200, + "step": 2195 + }, + { + "epoch": 0.577365686854738, + "loss": 0.14207029342651367, + "loss_ce": 0.0032763422932475805, + "loss_iou": 0.54296875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 377006200, + "step": 2195 + }, + { + "epoch": 0.5776287236141251, + "grad_norm": 9.582310699127605, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 377178552, + "step": 2196 + }, + { + "epoch": 0.5776287236141251, + "loss": 0.14796333014965057, + "loss_ce": 0.0009296314674429595, + "loss_iou": 0.470703125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 377178552, + "step": 2196 + }, + { + "epoch": 0.5778917603735122, + "grad_norm": 5.51238614508857, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 377350508, + "step": 2197 + }, + { + "epoch": 0.5778917603735122, + "loss": 0.12102293223142624, + "loss_ce": 0.011342758312821388, + "loss_iou": 0.66015625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 377350508, + "step": 2197 + }, + { + "epoch": 0.5781547971328993, + "grad_norm": 12.700643820920646, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 377522740, + "step": 2198 + }, + { + "epoch": 0.5781547971328993, + "loss": 0.13567548990249634, + "loss_ce": 0.004617748782038689, + "loss_iou": 0.53125, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 377522740, + "step": 2198 + }, + { + "epoch": 0.5784178338922864, + "grad_norm": 10.703599236021498, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 377694584, + "step": 2199 + }, + { + "epoch": 0.5784178338922864, + "loss": 0.17266914248466492, + "loss_ce": 0.0024420833215117455, + "loss_iou": 0.640625, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 377694584, + "step": 2199 + }, + { + "epoch": 0.5786808706516736, + "grad_norm": 3.7187673646063684, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 377866484, + "step": 2200 + }, + { + "epoch": 0.5786808706516736, + "loss": 0.04466433823108673, + "loss_ce": 0.0001697081606835127, + "loss_iou": 0.486328125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 377866484, + "step": 2200 + }, + { + "epoch": 0.5789439074110607, + "grad_norm": 6.267393061703399, + "learning_rate": 5e-06, + "loss": 0.1317, + "num_input_tokens_seen": 378038564, + "step": 2201 + }, + { + "epoch": 0.5789439074110607, + "loss": 0.18358194828033447, + "loss_ce": 0.0036503085866570473, + "loss_iou": 0.416015625, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 378038564, + "step": 2201 + }, + { + "epoch": 0.5792069441704478, + "grad_norm": 8.846753851466788, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 378211032, + "step": 2202 + }, + { + "epoch": 0.5792069441704478, + "loss": 0.14636895060539246, + "loss_ce": 0.0015783084090799093, + "loss_iou": 0.3203125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 378211032, + "step": 2202 + }, + { + "epoch": 0.5794699809298349, + "grad_norm": 9.056222556282368, + "learning_rate": 5e-06, + "loss": 0.1862, + "num_input_tokens_seen": 378383136, + "step": 2203 + }, + { + "epoch": 0.5794699809298349, + "loss": 0.10566692054271698, + "loss_ce": 0.003677169792354107, + "loss_iou": 0.46484375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 378383136, + "step": 2203 + }, + { + "epoch": 0.579733017689222, + "grad_norm": 4.9933922483079725, + "learning_rate": 5e-06, + "loss": 0.1063, + "num_input_tokens_seen": 378553532, + "step": 2204 + }, + { + "epoch": 0.579733017689222, + "loss": 0.10826604068279266, + "loss_ce": 0.001820725854486227, + "loss_iou": 0.74609375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 378553532, + "step": 2204 + }, + { + "epoch": 0.5799960544486092, + "grad_norm": 15.853504685750568, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 378725804, + "step": 2205 + }, + { + "epoch": 0.5799960544486092, + "loss": 0.05737042799592018, + "loss_ce": 0.0001499689242336899, + "loss_iou": 0.451171875, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 378725804, + "step": 2205 + }, + { + "epoch": 0.5802590912079963, + "grad_norm": 10.934142417787772, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 378897924, + "step": 2206 + }, + { + "epoch": 0.5802590912079963, + "loss": 0.08355730026960373, + "loss_ce": 0.004913499113172293, + "loss_iou": 0.52734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 378897924, + "step": 2206 + }, + { + "epoch": 0.5805221279673835, + "grad_norm": 12.227334484970761, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 379070348, + "step": 2207 + }, + { + "epoch": 0.5805221279673835, + "loss": 0.07382334768772125, + "loss_ce": 0.00047435000305995345, + "loss_iou": 0.5078125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 379070348, + "step": 2207 + }, + { + "epoch": 0.5807851647267706, + "grad_norm": 5.142567427993278, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 379242648, + "step": 2208 + }, + { + "epoch": 0.5807851647267706, + "loss": 0.06755711138248444, + "loss_ce": 0.0022494932636618614, + "loss_iou": 0.62890625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 379242648, + "step": 2208 + }, + { + "epoch": 0.5810482014861577, + "grad_norm": 4.5313412775127055, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 379414684, + "step": 2209 + }, + { + "epoch": 0.5810482014861577, + "loss": 0.050227776169776917, + "loss_ce": 0.0009724035626277328, + "loss_iou": 0.478515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 379414684, + "step": 2209 + }, + { + "epoch": 0.5813112382455449, + "grad_norm": 51.12006755064005, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 379582192, + "step": 2210 + }, + { + "epoch": 0.5813112382455449, + "loss": 0.11350201815366745, + "loss_ce": 0.004676333162933588, + "loss_iou": 0.546875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 379582192, + "step": 2210 + }, + { + "epoch": 0.581574275004932, + "grad_norm": 18.708708783651804, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 379754536, + "step": 2211 + }, + { + "epoch": 0.581574275004932, + "loss": 0.09766032546758652, + "loss_ce": 0.0012552967527881265, + "loss_iou": 0.44140625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 379754536, + "step": 2211 + }, + { + "epoch": 0.5818373117643191, + "grad_norm": 5.58601503702725, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 379926528, + "step": 2212 + }, + { + "epoch": 0.5818373117643191, + "loss": 0.18606778979301453, + "loss_ce": 0.0023214598186314106, + "loss_iou": 0.50390625, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 379926528, + "step": 2212 + }, + { + "epoch": 0.5821003485237062, + "grad_norm": 3.8208876250172965, + "learning_rate": 5e-06, + "loss": 0.1334, + "num_input_tokens_seen": 380096704, + "step": 2213 + }, + { + "epoch": 0.5821003485237062, + "loss": 0.18421456217765808, + "loss_ce": 0.001994105987250805, + "loss_iou": 0.455078125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 380096704, + "step": 2213 + }, + { + "epoch": 0.5823633852830933, + "grad_norm": 4.7316491932517595, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 380269136, + "step": 2214 + }, + { + "epoch": 0.5823633852830933, + "loss": 0.23052500188350677, + "loss_ce": 0.0009107402293011546, + "loss_iou": 0.65625, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 380269136, + "step": 2214 + }, + { + "epoch": 0.5826264220424804, + "grad_norm": 5.131239937970846, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 380441472, + "step": 2215 + }, + { + "epoch": 0.5826264220424804, + "loss": 0.09817831218242645, + "loss_ce": 0.0009187856921926141, + "loss_iou": 0.4921875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 380441472, + "step": 2215 + }, + { + "epoch": 0.5828894588018676, + "grad_norm": 5.520590188036205, + "learning_rate": 5e-06, + "loss": 0.1, + "num_input_tokens_seen": 380613832, + "step": 2216 + }, + { + "epoch": 0.5828894588018676, + "loss": 0.09423954784870148, + "loss_ce": 0.0019696487579494715, + "loss_iou": 0.46875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 380613832, + "step": 2216 + }, + { + "epoch": 0.5831524955612547, + "grad_norm": 18.191266422169875, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 380785988, + "step": 2217 + }, + { + "epoch": 0.5831524955612547, + "loss": 0.135514497756958, + "loss_ce": 0.0037701106630265713, + "loss_iou": 0.54296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 380785988, + "step": 2217 + }, + { + "epoch": 0.5834155323206418, + "grad_norm": 13.995891706301725, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 380958116, + "step": 2218 + }, + { + "epoch": 0.5834155323206418, + "loss": 0.07710295170545578, + "loss_ce": 0.00120573490858078, + "loss_iou": 0.60546875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 380958116, + "step": 2218 + }, + { + "epoch": 0.5836785690800289, + "grad_norm": 3.998007795342675, + "learning_rate": 5e-06, + "loss": 0.1081, + "num_input_tokens_seen": 381130168, + "step": 2219 + }, + { + "epoch": 0.5836785690800289, + "loss": 0.17625044286251068, + "loss_ce": 0.0008964374428614974, + "loss_iou": 0.494140625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 381130168, + "step": 2219 + }, + { + "epoch": 0.583941605839416, + "grad_norm": 8.315710539211903, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 381302476, + "step": 2220 + }, + { + "epoch": 0.583941605839416, + "loss": 0.20635367929935455, + "loss_ce": 0.002496248111128807, + "loss_iou": 0.5703125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 381302476, + "step": 2220 + }, + { + "epoch": 0.5842046425988032, + "grad_norm": 9.769478710683881, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 381474700, + "step": 2221 + }, + { + "epoch": 0.5842046425988032, + "loss": 0.1044168546795845, + "loss_ce": 0.0017252071993425488, + "loss_iou": 0.447265625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 381474700, + "step": 2221 + }, + { + "epoch": 0.5844676793581903, + "grad_norm": 5.959703884673753, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 381646792, + "step": 2222 + }, + { + "epoch": 0.5844676793581903, + "loss": 0.20204247534275055, + "loss_ce": 0.0024575116112828255, + "loss_iou": 0.5625, + "loss_num": 0.0400390625, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 381646792, + "step": 2222 + }, + { + "epoch": 0.5847307161175774, + "grad_norm": 4.639177291482811, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 381818872, + "step": 2223 + }, + { + "epoch": 0.5847307161175774, + "loss": 0.1277788281440735, + "loss_ce": 0.007478540297597647, + "loss_iou": 0.484375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 381818872, + "step": 2223 + }, + { + "epoch": 0.5849937528769645, + "grad_norm": 4.556750582512925, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 381991284, + "step": 2224 + }, + { + "epoch": 0.5849937528769645, + "loss": 0.06165578216314316, + "loss_ce": 0.003062034724280238, + "loss_iou": 0.51953125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 381991284, + "step": 2224 + }, + { + "epoch": 0.5852567896363516, + "grad_norm": 5.717259207619562, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 382163040, + "step": 2225 + }, + { + "epoch": 0.5852567896363516, + "loss": 0.08200475573539734, + "loss_ce": 0.0018045613542199135, + "loss_iou": 0.39453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 382163040, + "step": 2225 + }, + { + "epoch": 0.5855198263957389, + "grad_norm": 4.765363459193876, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 382335172, + "step": 2226 + }, + { + "epoch": 0.5855198263957389, + "loss": 0.08970290422439575, + "loss_ce": 0.001568139297887683, + "loss_iou": 0.7109375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 382335172, + "step": 2226 + }, + { + "epoch": 0.585782863155126, + "grad_norm": 5.040130370486427, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 382507360, + "step": 2227 + }, + { + "epoch": 0.585782863155126, + "loss": 0.06643694639205933, + "loss_ce": 0.0008241523755714297, + "loss_iou": 0.5390625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 382507360, + "step": 2227 + }, + { + "epoch": 0.5860458999145131, + "grad_norm": 27.12917591646992, + "learning_rate": 5e-06, + "loss": 0.1669, + "num_input_tokens_seen": 382677836, + "step": 2228 + }, + { + "epoch": 0.5860458999145131, + "loss": 0.11577419936656952, + "loss_ce": 0.0032864054664969444, + "loss_iou": 0.53515625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 382677836, + "step": 2228 + }, + { + "epoch": 0.5863089366739002, + "grad_norm": 5.353068404352155, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 382849968, + "step": 2229 + }, + { + "epoch": 0.5863089366739002, + "loss": 0.11902253329753876, + "loss_ce": 0.0011636477429419756, + "loss_iou": 0.466796875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 382849968, + "step": 2229 + }, + { + "epoch": 0.5865719734332873, + "grad_norm": 44.6262248448857, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 383022244, + "step": 2230 + }, + { + "epoch": 0.5865719734332873, + "loss": 0.08623628318309784, + "loss_ce": 0.0015957842115312815, + "loss_iou": 0.62109375, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 383022244, + "step": 2230 + }, + { + "epoch": 0.5868350101926745, + "grad_norm": 11.243592990037332, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 383190620, + "step": 2231 + }, + { + "epoch": 0.5868350101926745, + "loss": 0.11198394745588303, + "loss_ce": 0.0013882413040846586, + "loss_iou": 0.490234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 383190620, + "step": 2231 + }, + { + "epoch": 0.5870980469520616, + "grad_norm": 4.97022834965809, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 383362980, + "step": 2232 + }, + { + "epoch": 0.5870980469520616, + "loss": 0.17192208766937256, + "loss_ce": 0.0035566147416830063, + "loss_iou": 0.447265625, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 383362980, + "step": 2232 + }, + { + "epoch": 0.5873610837114487, + "grad_norm": 4.21141022600957, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 383535200, + "step": 2233 + }, + { + "epoch": 0.5873610837114487, + "loss": 0.1112288236618042, + "loss_ce": 0.00690448796376586, + "loss_iou": 0.56640625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 383535200, + "step": 2233 + }, + { + "epoch": 0.5876241204708358, + "grad_norm": 4.036382922773338, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 383705348, + "step": 2234 + }, + { + "epoch": 0.5876241204708358, + "loss": 0.05898036062717438, + "loss_ce": 0.001027482096105814, + "loss_iou": 0.59765625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 383705348, + "step": 2234 + }, + { + "epoch": 0.5878871572302229, + "grad_norm": 7.244981585553582, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 383877692, + "step": 2235 + }, + { + "epoch": 0.5878871572302229, + "loss": 0.10776747018098831, + "loss_ce": 0.0007423229981213808, + "loss_iou": 0.58984375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 383877692, + "step": 2235 + }, + { + "epoch": 0.58815019398961, + "grad_norm": 3.396542457402831, + "learning_rate": 5e-06, + "loss": 0.1261, + "num_input_tokens_seen": 384049540, + "step": 2236 + }, + { + "epoch": 0.58815019398961, + "loss": 0.1896773874759674, + "loss_ce": 0.0011398009955883026, + "loss_iou": 0.72265625, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 384049540, + "step": 2236 + }, + { + "epoch": 0.5884132307489972, + "grad_norm": 8.64901320262372, + "learning_rate": 5e-06, + "loss": 0.1399, + "num_input_tokens_seen": 384221584, + "step": 2237 + }, + { + "epoch": 0.5884132307489972, + "loss": 0.12090113013982773, + "loss_ce": 0.0009365270379930735, + "loss_iou": 0.5078125, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 384221584, + "step": 2237 + }, + { + "epoch": 0.5886762675083843, + "grad_norm": 4.107798805009285, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 384393708, + "step": 2238 + }, + { + "epoch": 0.5886762675083843, + "loss": 0.10467529296875, + "loss_ce": 0.0007019043550826609, + "loss_iou": 0.51171875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 384393708, + "step": 2238 + }, + { + "epoch": 0.5889393042677714, + "grad_norm": 5.687252203871965, + "learning_rate": 5e-06, + "loss": 0.1541, + "num_input_tokens_seen": 384566100, + "step": 2239 + }, + { + "epoch": 0.5889393042677714, + "loss": 0.2502034306526184, + "loss_ce": 0.004079162143170834, + "loss_iou": 0.5078125, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 384566100, + "step": 2239 + }, + { + "epoch": 0.5892023410271585, + "grad_norm": 5.391300577620961, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 384738000, + "step": 2240 + }, + { + "epoch": 0.5892023410271585, + "loss": 0.09789521992206573, + "loss_ce": 0.0031381379812955856, + "loss_iou": 0.63671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 384738000, + "step": 2240 + }, + { + "epoch": 0.5894653777865456, + "grad_norm": 2.700202309873461, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 384910408, + "step": 2241 + }, + { + "epoch": 0.5894653777865456, + "loss": 0.18347935378551483, + "loss_ce": 0.00031284932629205287, + "loss_iou": 0.484375, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 384910408, + "step": 2241 + }, + { + "epoch": 0.5897284145459328, + "grad_norm": 8.687970034600534, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 385082740, + "step": 2242 + }, + { + "epoch": 0.5897284145459328, + "loss": 0.11214028298854828, + "loss_ce": 0.0005375072360038757, + "loss_iou": 0.5859375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 385082740, + "step": 2242 + }, + { + "epoch": 0.5899914513053199, + "grad_norm": 7.347975569465696, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 385254812, + "step": 2243 + }, + { + "epoch": 0.5899914513053199, + "loss": 0.10931709408760071, + "loss_ce": 0.000491409155074507, + "loss_iou": 0.546875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 385254812, + "step": 2243 + }, + { + "epoch": 0.590254488064707, + "grad_norm": 5.972916509227761, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 385427116, + "step": 2244 + }, + { + "epoch": 0.590254488064707, + "loss": 0.06860466301441193, + "loss_ce": 0.0034191168379038572, + "loss_iou": 0.61328125, + "loss_num": 0.01300048828125, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 385427116, + "step": 2244 + }, + { + "epoch": 0.5905175248240941, + "grad_norm": 5.792932058485695, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 385599308, + "step": 2245 + }, + { + "epoch": 0.5905175248240941, + "loss": 0.09140485525131226, + "loss_ce": 0.0006760837859474123, + "loss_iou": 0.578125, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 385599308, + "step": 2245 + }, + { + "epoch": 0.5907805615834812, + "grad_norm": 6.999898072921272, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 385771596, + "step": 2246 + }, + { + "epoch": 0.5907805615834812, + "loss": 0.18632760643959045, + "loss_ce": 0.0021235125605016947, + "loss_iou": 0.416015625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 385771596, + "step": 2246 + }, + { + "epoch": 0.5910435983428685, + "grad_norm": 18.38237494107746, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 385940788, + "step": 2247 + }, + { + "epoch": 0.5910435983428685, + "loss": 0.16689589619636536, + "loss_ce": 0.004466078244149685, + "loss_iou": 0.40625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 385940788, + "step": 2247 + }, + { + "epoch": 0.5913066351022556, + "grad_norm": 7.020065723323422, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 386112940, + "step": 2248 + }, + { + "epoch": 0.5913066351022556, + "loss": 0.06020001322031021, + "loss_ce": 0.0016672981437295675, + "loss_iou": 0.55859375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 386112940, + "step": 2248 + }, + { + "epoch": 0.5915696718616427, + "grad_norm": 4.523572034689062, + "learning_rate": 5e-06, + "loss": 0.1053, + "num_input_tokens_seen": 386285088, + "step": 2249 + }, + { + "epoch": 0.5915696718616427, + "loss": 0.10929292440414429, + "loss_ce": 0.002771313302218914, + "loss_iou": 0.5234375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 386285088, + "step": 2249 + }, + { + "epoch": 0.5918327086210298, + "grad_norm": 4.838564447785546, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_websight_new_CIoU": 0.8620143532752991, + "eval_websight_new_GIoU": 0.8631013035774231, + "eval_websight_new_IoU": 0.8667041063308716, + "eval_websight_new_MAE_all": 0.020488929003477097, + "eval_websight_new_MAE_h": 0.008965343236923218, + "eval_websight_new_MAE_w": 0.0330337006598711, + "eval_websight_new_MAE_x": 0.03212242014706135, + "eval_websight_new_MAE_y": 0.007834249641746283, + "eval_websight_new_NUM_probability": 0.9999847710132599, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.10749460011720657, + "eval_websight_new_loss_ce": 1.4298896530817728e-05, + "eval_websight_new_loss_iou": 0.3743896484375, + "eval_websight_new_loss_num": 0.019153594970703125, + "eval_websight_new_loss_xval": 0.095703125, + "eval_websight_new_runtime": 58.289, + "eval_websight_new_samples_per_second": 0.858, + "eval_websight_new_steps_per_second": 0.034, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_seeclick_CIoU": 0.6236494481563568, + "eval_seeclick_GIoU": 0.6230664253234863, + "eval_seeclick_IoU": 0.6453896760940552, + "eval_seeclick_MAE_all": 0.046443790197372437, + "eval_seeclick_MAE_h": 0.026547173038125038, + "eval_seeclick_MAE_w": 0.06541823036968708, + "eval_seeclick_MAE_x": 0.06924234330654144, + "eval_seeclick_MAE_y": 0.024567410349845886, + "eval_seeclick_NUM_probability": 0.9999750256538391, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.21426968276500702, + "eval_seeclick_loss_ce": 0.009121979121118784, + "eval_seeclick_loss_iou": 0.506591796875, + "eval_seeclick_loss_num": 0.039905548095703125, + "eval_seeclick_loss_xval": 0.1995849609375, + "eval_seeclick_runtime": 71.9379, + "eval_seeclick_samples_per_second": 0.598, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_icons_CIoU": 0.834777295589447, + "eval_icons_GIoU": 0.8241135478019714, + "eval_icons_IoU": 0.8441117405891418, + "eval_icons_MAE_all": 0.02446969971060753, + "eval_icons_MAE_h": 0.024498000741004944, + "eval_icons_MAE_w": 0.02540498599410057, + "eval_icons_MAE_x": 0.02417761366814375, + "eval_icons_MAE_y": 0.02379819191992283, + "eval_icons_NUM_probability": 0.9999534487724304, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.0800996944308281, + "eval_icons_loss_ce": 2.8939639378222637e-05, + "eval_icons_loss_iou": 0.520263671875, + "eval_icons_loss_num": 0.014501571655273438, + "eval_icons_loss_xval": 0.072509765625, + "eval_icons_runtime": 87.9614, + "eval_icons_samples_per_second": 0.568, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_screenspot_CIoU": 0.5634338855743408, + "eval_screenspot_GIoU": 0.5513045191764832, + "eval_screenspot_IoU": 0.6039714018503824, + "eval_screenspot_MAE_all": 0.08249951650698979, + "eval_screenspot_MAE_h": 0.04705421378215154, + "eval_screenspot_MAE_w": 0.14340341091156006, + "eval_screenspot_MAE_x": 0.09280380109945933, + "eval_screenspot_MAE_y": 0.04673664582272371, + "eval_screenspot_NUM_probability": 0.99980628490448, + "eval_screenspot_inside_bbox": 0.8454166650772095, + "eval_screenspot_loss": 0.8701639175415039, + "eval_screenspot_loss_ce": 0.543925940990448, + "eval_screenspot_loss_iou": 0.45556640625, + "eval_screenspot_loss_num": 0.06413777669270833, + "eval_screenspot_loss_xval": 0.3208414713541667, + "eval_screenspot_runtime": 147.2039, + "eval_screenspot_samples_per_second": 0.605, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "loss": 0.8634153604507446, + "loss_ce": 0.5327268838882446, + "loss_iou": 0.388671875, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5920957453804169, + "grad_norm": 17.382874089820607, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 386629432, + "step": 2251 + }, + { + "epoch": 0.5920957453804169, + "loss": 0.06886275112628937, + "loss_ce": 0.0024259830825030804, + "loss_iou": 0.55078125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 386629432, + "step": 2251 + }, + { + "epoch": 0.5923587821398041, + "grad_norm": 4.874671437046262, + "learning_rate": 5e-06, + "loss": 0.1568, + "num_input_tokens_seen": 386801572, + "step": 2252 + }, + { + "epoch": 0.5923587821398041, + "loss": 0.14957007765769958, + "loss_ce": 0.0020786237437278032, + "loss_iou": 0.51171875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 386801572, + "step": 2252 + }, + { + "epoch": 0.5926218188991912, + "grad_norm": 5.10088801695758, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 386973592, + "step": 2253 + }, + { + "epoch": 0.5926218188991912, + "loss": 0.14472725987434387, + "loss_ce": 0.00031808449421077967, + "loss_iou": 0.357421875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 386973592, + "step": 2253 + }, + { + "epoch": 0.5928848556585783, + "grad_norm": 7.206762089035192, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 387143876, + "step": 2254 + }, + { + "epoch": 0.5928848556585783, + "loss": 0.1427307277917862, + "loss_ce": 0.0004577827639877796, + "loss_iou": 0.490234375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 387143876, + "step": 2254 + }, + { + "epoch": 0.5931478924179654, + "grad_norm": 28.833326639866428, + "learning_rate": 5e-06, + "loss": 0.1461, + "num_input_tokens_seen": 387316360, + "step": 2255 + }, + { + "epoch": 0.5931478924179654, + "loss": 0.10336272418498993, + "loss_ce": 0.0003353758074808866, + "loss_iou": 0.578125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 387316360, + "step": 2255 + }, + { + "epoch": 0.5934109291773525, + "grad_norm": 11.869740305960864, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 387488944, + "step": 2256 + }, + { + "epoch": 0.5934109291773525, + "loss": 0.0730450302362442, + "loss_ce": 0.0006573314312845469, + "loss_iou": 0.5859375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 387488944, + "step": 2256 + }, + { + "epoch": 0.5936739659367397, + "grad_norm": 8.676646075770014, + "learning_rate": 5e-06, + "loss": 0.1525, + "num_input_tokens_seen": 387661216, + "step": 2257 + }, + { + "epoch": 0.5936739659367397, + "loss": 0.09287041425704956, + "loss_ce": 0.00037164040259085596, + "loss_iou": 0.48046875, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 387661216, + "step": 2257 + }, + { + "epoch": 0.5939370026961268, + "grad_norm": 6.091102593612703, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 387833316, + "step": 2258 + }, + { + "epoch": 0.5939370026961268, + "loss": 0.047980114817619324, + "loss_ce": 0.002722549019381404, + "loss_iou": 0.52734375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 387833316, + "step": 2258 + }, + { + "epoch": 0.5942000394555139, + "grad_norm": 13.12094289217443, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 388005488, + "step": 2259 + }, + { + "epoch": 0.5942000394555139, + "loss": 0.1262204349040985, + "loss_ce": 0.006103249732404947, + "loss_iou": 0.4453125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 388005488, + "step": 2259 + }, + { + "epoch": 0.594463076214901, + "grad_norm": 6.977408677449423, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 388178100, + "step": 2260 + }, + { + "epoch": 0.594463076214901, + "loss": 0.09303668141365051, + "loss_ce": 0.0011177423875778913, + "loss_iou": 0.57421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 388178100, + "step": 2260 + }, + { + "epoch": 0.5947261129742881, + "grad_norm": 5.548896564721941, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 388349964, + "step": 2261 + }, + { + "epoch": 0.5947261129742881, + "loss": 0.07538396120071411, + "loss_ce": 0.0003412406367715448, + "loss_iou": 0.49609375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 388349964, + "step": 2261 + }, + { + "epoch": 0.5949891497336752, + "grad_norm": 3.9845795265650694, + "learning_rate": 5e-06, + "loss": 0.1365, + "num_input_tokens_seen": 388520668, + "step": 2262 + }, + { + "epoch": 0.5949891497336752, + "loss": 0.18895787000656128, + "loss_ce": 0.00042028201278299093, + "loss_iou": 0.4765625, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 388520668, + "step": 2262 + }, + { + "epoch": 0.5952521864930624, + "grad_norm": 5.603595618443302, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 388692900, + "step": 2263 + }, + { + "epoch": 0.5952521864930624, + "loss": 0.08959123492240906, + "loss_ce": 0.0002662862534634769, + "loss_iou": 0.427734375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 388692900, + "step": 2263 + }, + { + "epoch": 0.5955152232524495, + "grad_norm": 5.071553765162839, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 388865328, + "step": 2264 + }, + { + "epoch": 0.5955152232524495, + "loss": 0.07327542454004288, + "loss_ce": 0.0008877270738594234, + "loss_iou": 0.5390625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 388865328, + "step": 2264 + }, + { + "epoch": 0.5957782600118366, + "grad_norm": 7.1017098248073385, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 389037536, + "step": 2265 + }, + { + "epoch": 0.5957782600118366, + "loss": 0.15171518921852112, + "loss_ce": 0.0005311004933901131, + "loss_iou": 0.44921875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 389037536, + "step": 2265 + }, + { + "epoch": 0.5960412967712237, + "grad_norm": 6.446588247534734, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 389209660, + "step": 2266 + }, + { + "epoch": 0.5960412967712237, + "loss": 0.11115900427103043, + "loss_ce": 0.00028864690102636814, + "loss_iou": 0.50390625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 389209660, + "step": 2266 + }, + { + "epoch": 0.5963043335306109, + "grad_norm": 9.702541385516984, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 389381624, + "step": 2267 + }, + { + "epoch": 0.5963043335306109, + "loss": 0.13201884925365448, + "loss_ce": 0.00581339979544282, + "loss_iou": 0.48046875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 389381624, + "step": 2267 + }, + { + "epoch": 0.5965673702899981, + "grad_norm": 4.325039584368215, + "learning_rate": 5e-06, + "loss": 0.1332, + "num_input_tokens_seen": 389553348, + "step": 2268 + }, + { + "epoch": 0.5965673702899981, + "loss": 0.09481080621480942, + "loss_ce": 0.0006488211220130324, + "loss_iou": 0.6640625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 389553348, + "step": 2268 + }, + { + "epoch": 0.5968304070493852, + "grad_norm": 4.06473977313393, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 389723828, + "step": 2269 + }, + { + "epoch": 0.5968304070493852, + "loss": 0.130482017993927, + "loss_ce": 0.00023298643645830452, + "loss_iou": 0.57421875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 389723828, + "step": 2269 + }, + { + "epoch": 0.5970934438087723, + "grad_norm": 11.83991543858794, + "learning_rate": 5e-06, + "loss": 0.1278, + "num_input_tokens_seen": 389896036, + "step": 2270 + }, + { + "epoch": 0.5970934438087723, + "loss": 0.1708485186100006, + "loss_ce": 0.0017506256699562073, + "loss_iou": 0.5625, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 389896036, + "step": 2270 + }, + { + "epoch": 0.5973564805681594, + "grad_norm": 4.637610311727533, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 390068288, + "step": 2271 + }, + { + "epoch": 0.5973564805681594, + "loss": 0.12343515455722809, + "loss_ce": 0.0004340623854659498, + "loss_iou": NaN, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 390068288, + "step": 2271 + }, + { + "epoch": 0.5976195173275465, + "grad_norm": 4.239687367925931, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 390240724, + "step": 2272 + }, + { + "epoch": 0.5976195173275465, + "loss": 0.09358179569244385, + "loss_ce": 0.0032497686333954334, + "loss_iou": 0.65234375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 390240724, + "step": 2272 + }, + { + "epoch": 0.5978825540869337, + "grad_norm": 3.650780325093881, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 390413092, + "step": 2273 + }, + { + "epoch": 0.5978825540869337, + "loss": 0.0876360684633255, + "loss_ce": 0.00032527127768844366, + "loss_iou": 0.515625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 390413092, + "step": 2273 + }, + { + "epoch": 0.5981455908463208, + "grad_norm": 4.543660924331624, + "learning_rate": 5e-06, + "loss": 0.0989, + "num_input_tokens_seen": 390585320, + "step": 2274 + }, + { + "epoch": 0.5981455908463208, + "loss": 0.1153046190738678, + "loss_ce": 0.0005890398169867694, + "loss_iou": 0.3671875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 390585320, + "step": 2274 + }, + { + "epoch": 0.5984086276057079, + "grad_norm": 5.356363884549334, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 390757376, + "step": 2275 + }, + { + "epoch": 0.5984086276057079, + "loss": 0.1397184282541275, + "loss_ce": 0.0057157427072525024, + "loss_iou": 0.5, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 390757376, + "step": 2275 + }, + { + "epoch": 0.598671664365095, + "grad_norm": 3.3713877207341088, + "learning_rate": 5e-06, + "loss": 0.0875, + "num_input_tokens_seen": 390929460, + "step": 2276 + }, + { + "epoch": 0.598671664365095, + "loss": 0.0523032546043396, + "loss_ce": 0.000316562014631927, + "loss_iou": 0.458984375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 390929460, + "step": 2276 + }, + { + "epoch": 0.5989347011244821, + "grad_norm": 10.008368190926095, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 391100108, + "step": 2277 + }, + { + "epoch": 0.5989347011244821, + "loss": 0.08954879641532898, + "loss_ce": 0.0027873138897120953, + "loss_iou": 0.55078125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 391100108, + "step": 2277 + }, + { + "epoch": 0.5991977378838693, + "grad_norm": 13.178397333981023, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 391272352, + "step": 2278 + }, + { + "epoch": 0.5991977378838693, + "loss": 0.1727224737405777, + "loss_ce": 0.00011504795111250132, + "loss_iou": 0.53125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 391272352, + "step": 2278 + }, + { + "epoch": 0.5994607746432564, + "grad_norm": 5.1375780540443134, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 391444648, + "step": 2279 + }, + { + "epoch": 0.5994607746432564, + "loss": 0.05273713544011116, + "loss_ce": 0.0022915778681635857, + "loss_iou": 0.478515625, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 391444648, + "step": 2279 + }, + { + "epoch": 0.5997238114026435, + "grad_norm": 25.459943093434337, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 391616960, + "step": 2280 + }, + { + "epoch": 0.5997238114026435, + "loss": 0.1544933021068573, + "loss_ce": 0.0004405686049722135, + "loss_iou": 0.6796875, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 391616960, + "step": 2280 + }, + { + "epoch": 0.5999868481620306, + "grad_norm": 4.8890917043699185, + "learning_rate": 5e-06, + "loss": 0.1464, + "num_input_tokens_seen": 391788924, + "step": 2281 + }, + { + "epoch": 0.5999868481620306, + "loss": 0.06912894546985626, + "loss_ce": 0.006689978763461113, + "loss_iou": 0.55859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 391788924, + "step": 2281 + }, + { + "epoch": 0.6002498849214177, + "grad_norm": 6.219770384955685, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 391961072, + "step": 2282 + }, + { + "epoch": 0.6002498849214177, + "loss": 0.07950527220964432, + "loss_ce": 0.00041896995389834046, + "loss_iou": 0.4140625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 391961072, + "step": 2282 + }, + { + "epoch": 0.600512921680805, + "grad_norm": 3.971504685954048, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 392130392, + "step": 2283 + }, + { + "epoch": 0.600512921680805, + "loss": 0.11902518570423126, + "loss_ce": 0.0007390595856122673, + "loss_iou": 0.51953125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 392130392, + "step": 2283 + }, + { + "epoch": 0.600775958440192, + "grad_norm": 3.36927604682008, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 392300644, + "step": 2284 + }, + { + "epoch": 0.600775958440192, + "loss": 0.09242402017116547, + "loss_ce": 0.0041824462823569775, + "loss_iou": 0.375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 392300644, + "step": 2284 + }, + { + "epoch": 0.6010389951995792, + "grad_norm": 5.15292132366606, + "learning_rate": 5e-06, + "loss": 0.1317, + "num_input_tokens_seen": 392472968, + "step": 2285 + }, + { + "epoch": 0.6010389951995792, + "loss": 0.14593744277954102, + "loss_ce": 0.0034508705139160156, + "loss_iou": 0.484375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 392472968, + "step": 2285 + }, + { + "epoch": 0.6013020319589663, + "grad_norm": 4.237274665038102, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 392644756, + "step": 2286 + }, + { + "epoch": 0.6013020319589663, + "loss": 0.07083064317703247, + "loss_ce": 0.00170833186712116, + "loss_iou": 0.59765625, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 392644756, + "step": 2286 + }, + { + "epoch": 0.6015650687183534, + "grad_norm": 43.77503497663098, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 392816816, + "step": 2287 + }, + { + "epoch": 0.6015650687183534, + "loss": 0.09309446811676025, + "loss_ce": 0.00016844802303239703, + "loss_iou": 0.5390625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 392816816, + "step": 2287 + }, + { + "epoch": 0.6018281054777405, + "grad_norm": 8.046683810113267, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 392987164, + "step": 2288 + }, + { + "epoch": 0.6018281054777405, + "loss": 0.12998059391975403, + "loss_ce": 0.0008912362391129136, + "loss_iou": 0.5078125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 392987164, + "step": 2288 + }, + { + "epoch": 0.6020911422371277, + "grad_norm": 6.745453284904858, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 393159584, + "step": 2289 + }, + { + "epoch": 0.6020911422371277, + "loss": 0.18720246851444244, + "loss_ce": 0.0019302507862448692, + "loss_iou": 0.474609375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 393159584, + "step": 2289 + }, + { + "epoch": 0.6023541789965148, + "grad_norm": 9.388482886571397, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 393331696, + "step": 2290 + }, + { + "epoch": 0.6023541789965148, + "loss": 0.153926283121109, + "loss_ce": 0.005275162868201733, + "loss_iou": 0.56640625, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 393331696, + "step": 2290 + }, + { + "epoch": 0.6026172157559019, + "grad_norm": 5.436488955385426, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 393504044, + "step": 2291 + }, + { + "epoch": 0.6026172157559019, + "loss": 0.11098843812942505, + "loss_ce": 0.00027066541952081025, + "loss_iou": 0.5234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 393504044, + "step": 2291 + }, + { + "epoch": 0.602880252515289, + "grad_norm": 8.104274681671972, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 393676272, + "step": 2292 + }, + { + "epoch": 0.602880252515289, + "loss": 0.16863158345222473, + "loss_ce": 0.003592532593756914, + "loss_iou": 0.453125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 393676272, + "step": 2292 + }, + { + "epoch": 0.6031432892746761, + "grad_norm": 12.32428248921536, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 393848392, + "step": 2293 + }, + { + "epoch": 0.6031432892746761, + "loss": 0.08580140769481659, + "loss_ce": 0.0013287551701068878, + "loss_iou": 0.53125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 393848392, + "step": 2293 + }, + { + "epoch": 0.6034063260340633, + "grad_norm": 10.386325441280649, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 394020680, + "step": 2294 + }, + { + "epoch": 0.6034063260340633, + "loss": 0.07794995605945587, + "loss_ce": 0.003273440757766366, + "loss_iou": 0.5546875, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 394020680, + "step": 2294 + }, + { + "epoch": 0.6036693627934504, + "grad_norm": 5.0087549483878355, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 394192740, + "step": 2295 + }, + { + "epoch": 0.6036693627934504, + "loss": 0.12549570202827454, + "loss_ce": 0.0021436563692986965, + "loss_iou": 0.51953125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 394192740, + "step": 2295 + }, + { + "epoch": 0.6039323995528375, + "grad_norm": 12.109968647066866, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 394362948, + "step": 2296 + }, + { + "epoch": 0.6039323995528375, + "loss": 0.1552933156490326, + "loss_ce": 0.0013626604340970516, + "loss_iou": 0.625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 394362948, + "step": 2296 + }, + { + "epoch": 0.6041954363122246, + "grad_norm": 3.064699260610535, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 394534856, + "step": 2297 + }, + { + "epoch": 0.6041954363122246, + "loss": 0.13219855725765228, + "loss_ce": 0.0020563420839607716, + "loss_iou": 0.45703125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 394534856, + "step": 2297 + }, + { + "epoch": 0.6044584730716117, + "grad_norm": 9.354536255922673, + "learning_rate": 5e-06, + "loss": 0.1776, + "num_input_tokens_seen": 394707060, + "step": 2298 + }, + { + "epoch": 0.6044584730716117, + "loss": 0.16126899421215057, + "loss_ce": 0.003599932650104165, + "loss_iou": 0.26171875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 394707060, + "step": 2298 + }, + { + "epoch": 0.6047215098309989, + "grad_norm": 4.517992664125833, + "learning_rate": 5e-06, + "loss": 0.0805, + "num_input_tokens_seen": 394879016, + "step": 2299 + }, + { + "epoch": 0.6047215098309989, + "loss": 0.0760730504989624, + "loss_ce": 0.002098444849252701, + "loss_iou": 0.48046875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 394879016, + "step": 2299 + }, + { + "epoch": 0.604984546590386, + "grad_norm": 4.501760214176101, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 395051188, + "step": 2300 + }, + { + "epoch": 0.604984546590386, + "loss": 0.15630951523780823, + "loss_ce": 0.0008682362968102098, + "loss_iou": 0.578125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 395051188, + "step": 2300 + }, + { + "epoch": 0.6052475833497731, + "grad_norm": 4.195253270921506, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 395223544, + "step": 2301 + }, + { + "epoch": 0.6052475833497731, + "loss": 0.08342162519693375, + "loss_ce": 0.0002612252428662032, + "loss_iou": 0.54296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 395223544, + "step": 2301 + }, + { + "epoch": 0.6055106201091602, + "grad_norm": 5.118374892921478, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 395395376, + "step": 2302 + }, + { + "epoch": 0.6055106201091602, + "loss": 0.11348429322242737, + "loss_ce": 0.0011490845354273915, + "loss_iou": 0.390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 395395376, + "step": 2302 + }, + { + "epoch": 0.6057736568685473, + "grad_norm": 4.210270169640029, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 395567140, + "step": 2303 + }, + { + "epoch": 0.6057736568685473, + "loss": 0.08656048029661179, + "loss_ce": 0.0008671237155795097, + "loss_iou": 0.50390625, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 395567140, + "step": 2303 + }, + { + "epoch": 0.6060366936279346, + "grad_norm": 4.272171126326938, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 395739120, + "step": 2304 + }, + { + "epoch": 0.6060366936279346, + "loss": 0.14756399393081665, + "loss_ce": 0.002758089918643236, + "loss_iou": 0.6171875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 395739120, + "step": 2304 + }, + { + "epoch": 0.6062997303873217, + "grad_norm": 8.535390245901846, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 395911508, + "step": 2305 + }, + { + "epoch": 0.6062997303873217, + "loss": 0.1481376737356186, + "loss_ce": 0.002080546924844384, + "loss_iou": 0.314453125, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 395911508, + "step": 2305 + }, + { + "epoch": 0.6065627671467088, + "grad_norm": 6.5128397067856785, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 396083668, + "step": 2306 + }, + { + "epoch": 0.6065627671467088, + "loss": 0.05626985430717468, + "loss_ce": 0.0008499314426444471, + "loss_iou": 0.62109375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 396083668, + "step": 2306 + }, + { + "epoch": 0.6068258039060959, + "grad_norm": 4.785548469928152, + "learning_rate": 5e-06, + "loss": 0.0998, + "num_input_tokens_seen": 396255888, + "step": 2307 + }, + { + "epoch": 0.6068258039060959, + "loss": 0.08620062470436096, + "loss_ce": 0.002124701626598835, + "loss_iou": 0.37109375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 396255888, + "step": 2307 + }, + { + "epoch": 0.607088840665483, + "grad_norm": 11.035467862846048, + "learning_rate": 5e-06, + "loss": 0.1858, + "num_input_tokens_seen": 396427920, + "step": 2308 + }, + { + "epoch": 0.607088840665483, + "loss": 0.19853177666664124, + "loss_ce": 0.0004726996412500739, + "loss_iou": 0.46875, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 396427920, + "step": 2308 + }, + { + "epoch": 0.6073518774248702, + "grad_norm": 5.6025345838645055, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 396599820, + "step": 2309 + }, + { + "epoch": 0.6073518774248702, + "loss": 0.09216836839914322, + "loss_ce": 0.0037742014974355698, + "loss_iou": 0.4375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 396599820, + "step": 2309 + }, + { + "epoch": 0.6076149141842573, + "grad_norm": 21.234701292276423, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 396771948, + "step": 2310 + }, + { + "epoch": 0.6076149141842573, + "loss": 0.08635897189378738, + "loss_ce": 0.000482511764857918, + "loss_iou": 0.6171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 396771948, + "step": 2310 + }, + { + "epoch": 0.6078779509436444, + "grad_norm": 4.7419638393344545, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 396943768, + "step": 2311 + }, + { + "epoch": 0.6078779509436444, + "loss": 0.12838855385780334, + "loss_ce": 0.002534065628424287, + "loss_iou": 0.55078125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 396943768, + "step": 2311 + }, + { + "epoch": 0.6081409877030315, + "grad_norm": 5.73379136559562, + "learning_rate": 5e-06, + "loss": 0.1965, + "num_input_tokens_seen": 397116008, + "step": 2312 + }, + { + "epoch": 0.6081409877030315, + "loss": 0.17652729153633118, + "loss_ce": 0.0020277751609683037, + "loss_iou": NaN, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 397116008, + "step": 2312 + }, + { + "epoch": 0.6084040244624186, + "grad_norm": 4.062322434152511, + "learning_rate": 5e-06, + "loss": 0.0845, + "num_input_tokens_seen": 397288120, + "step": 2313 + }, + { + "epoch": 0.6084040244624186, + "loss": 0.09052719175815582, + "loss_ce": 0.0007139619556255639, + "loss_iou": 0.470703125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 397288120, + "step": 2313 + }, + { + "epoch": 0.6086670612218057, + "grad_norm": 16.02293214120212, + "learning_rate": 5e-06, + "loss": 0.1668, + "num_input_tokens_seen": 397460304, + "step": 2314 + }, + { + "epoch": 0.6086670612218057, + "loss": 0.13694977760314941, + "loss_ce": 0.0005667208461090922, + "loss_iou": 0.69921875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 397460304, + "step": 2314 + }, + { + "epoch": 0.6089300979811929, + "grad_norm": 7.987103586052065, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 397632608, + "step": 2315 + }, + { + "epoch": 0.6089300979811929, + "loss": 0.06060004234313965, + "loss_ce": 0.002006293274462223, + "loss_iou": 0.5625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 397632608, + "step": 2315 + }, + { + "epoch": 0.60919313474058, + "grad_norm": 7.316466982553544, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 397804820, + "step": 2316 + }, + { + "epoch": 0.60919313474058, + "loss": 0.11626386642456055, + "loss_ce": 0.004111772403120995, + "loss_iou": 0.400390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 397804820, + "step": 2316 + }, + { + "epoch": 0.6094561714999671, + "grad_norm": 5.539376425734617, + "learning_rate": 5e-06, + "loss": 0.1638, + "num_input_tokens_seen": 397976940, + "step": 2317 + }, + { + "epoch": 0.6094561714999671, + "loss": 0.16956084966659546, + "loss_ce": 0.004979567602276802, + "loss_iou": 0.6171875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 397976940, + "step": 2317 + }, + { + "epoch": 0.6097192082593542, + "grad_norm": 24.75685247660265, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 398147040, + "step": 2318 + }, + { + "epoch": 0.6097192082593542, + "loss": 0.18709902465343475, + "loss_ce": 0.0012164636282250285, + "loss_iou": NaN, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 398147040, + "step": 2318 + }, + { + "epoch": 0.6099822450187413, + "grad_norm": 4.315961063244564, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 398318996, + "step": 2319 + }, + { + "epoch": 0.6099822450187413, + "loss": 0.18130794167518616, + "loss_ce": 0.0009795635705813766, + "loss_iou": 0.5234375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 398318996, + "step": 2319 + }, + { + "epoch": 0.6102452817781285, + "grad_norm": 4.640931709162671, + "learning_rate": 5e-06, + "loss": 0.0931, + "num_input_tokens_seen": 398489328, + "step": 2320 + }, + { + "epoch": 0.6102452817781285, + "loss": 0.09983004629611969, + "loss_ce": 0.0005563638987950981, + "loss_iou": 0.546875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 398489328, + "step": 2320 + }, + { + "epoch": 0.6105083185375156, + "grad_norm": 23.53542790698555, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 398661592, + "step": 2321 + }, + { + "epoch": 0.6105083185375156, + "loss": 0.14297714829444885, + "loss_ce": 0.0037559503689408302, + "loss_iou": 0.59765625, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 398661592, + "step": 2321 + }, + { + "epoch": 0.6107713552969027, + "grad_norm": 4.191481208630257, + "learning_rate": 5e-06, + "loss": 0.1143, + "num_input_tokens_seen": 398833940, + "step": 2322 + }, + { + "epoch": 0.6107713552969027, + "loss": 0.1795070767402649, + "loss_ce": 0.003298588562756777, + "loss_iou": 0.51171875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 398833940, + "step": 2322 + }, + { + "epoch": 0.6110343920562898, + "grad_norm": 4.031146460659383, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 399006244, + "step": 2323 + }, + { + "epoch": 0.6110343920562898, + "loss": 0.0546613447368145, + "loss_ce": 0.0011029954766854644, + "loss_iou": 0.45703125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 399006244, + "step": 2323 + }, + { + "epoch": 0.611297428815677, + "grad_norm": 4.121680425084659, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 399178456, + "step": 2324 + }, + { + "epoch": 0.611297428815677, + "loss": 0.16250503063201904, + "loss_ce": 0.002928605070337653, + "loss_iou": 0.61328125, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 399178456, + "step": 2324 + }, + { + "epoch": 0.6115604655750642, + "grad_norm": 4.598379596612616, + "learning_rate": 5e-06, + "loss": 0.0851, + "num_input_tokens_seen": 399350964, + "step": 2325 + }, + { + "epoch": 0.6115604655750642, + "loss": 0.06914816796779633, + "loss_ce": 0.002833473263308406, + "loss_iou": 0.57421875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 399350964, + "step": 2325 + }, + { + "epoch": 0.6118235023344513, + "grad_norm": 7.270975374787245, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 399523228, + "step": 2326 + }, + { + "epoch": 0.6118235023344513, + "loss": 0.08900976926088333, + "loss_ce": 0.0011801763903349638, + "loss_iou": 0.7421875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 399523228, + "step": 2326 + }, + { + "epoch": 0.6120865390938384, + "grad_norm": 4.064818082785728, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 399691856, + "step": 2327 + }, + { + "epoch": 0.6120865390938384, + "loss": 0.10691290348768234, + "loss_ce": 0.00043707285658456385, + "loss_iou": 0.6328125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 399691856, + "step": 2327 + }, + { + "epoch": 0.6123495758532255, + "grad_norm": 7.343987420034391, + "learning_rate": 5e-06, + "loss": 0.1249, + "num_input_tokens_seen": 399864108, + "step": 2328 + }, + { + "epoch": 0.6123495758532255, + "loss": 0.09687530994415283, + "loss_ce": 0.001996160950511694, + "loss_iou": 0.58984375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 399864108, + "step": 2328 + }, + { + "epoch": 0.6126126126126126, + "grad_norm": 5.275060955551622, + "learning_rate": 5e-06, + "loss": 0.0841, + "num_input_tokens_seen": 400036500, + "step": 2329 + }, + { + "epoch": 0.6126126126126126, + "loss": 0.08277605473995209, + "loss_ce": 0.004162768833339214, + "loss_iou": 0.5078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 400036500, + "step": 2329 + }, + { + "epoch": 0.6128756493719998, + "grad_norm": 4.648207674990067, + "learning_rate": 5e-06, + "loss": 0.1259, + "num_input_tokens_seen": 400206892, + "step": 2330 + }, + { + "epoch": 0.6128756493719998, + "loss": 0.19954022765159607, + "loss_ce": 0.0007487052353098989, + "loss_iou": 0.447265625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 400206892, + "step": 2330 + }, + { + "epoch": 0.6131386861313869, + "grad_norm": 6.976991385409509, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 400377220, + "step": 2331 + }, + { + "epoch": 0.6131386861313869, + "loss": 0.10119281709194183, + "loss_ce": 0.0006984363426454365, + "loss_iou": 0.5234375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 400377220, + "step": 2331 + }, + { + "epoch": 0.613401722890774, + "grad_norm": 12.605542023426503, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 400549468, + "step": 2332 + }, + { + "epoch": 0.613401722890774, + "loss": 0.20965467393398285, + "loss_ce": 0.009276244789361954, + "loss_iou": 0.62109375, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 400549468, + "step": 2332 + }, + { + "epoch": 0.6136647596501611, + "grad_norm": 4.707098406894473, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 400721520, + "step": 2333 + }, + { + "epoch": 0.6136647596501611, + "loss": 0.11055370420217514, + "loss_ce": 0.0006599072366952896, + "loss_iou": 0.4140625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 400721520, + "step": 2333 + }, + { + "epoch": 0.6139277964095482, + "grad_norm": 14.198987666898462, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 400893600, + "step": 2334 + }, + { + "epoch": 0.6139277964095482, + "loss": 0.11723913997411728, + "loss_ce": 0.0025235607754439116, + "loss_iou": NaN, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 400893600, + "step": 2334 + }, + { + "epoch": 0.6141908331689354, + "grad_norm": 4.053613863943027, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 401065668, + "step": 2335 + }, + { + "epoch": 0.6141908331689354, + "loss": 0.07270236313343048, + "loss_ce": 0.0009250181610696018, + "loss_iou": 0.51953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 401065668, + "step": 2335 + }, + { + "epoch": 0.6144538699283225, + "grad_norm": 5.7996514772347005, + "learning_rate": 5e-06, + "loss": 0.0778, + "num_input_tokens_seen": 401237708, + "step": 2336 + }, + { + "epoch": 0.6144538699283225, + "loss": 0.07430876046419144, + "loss_ce": 0.0035384970251470804, + "loss_iou": 0.58984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 401237708, + "step": 2336 + }, + { + "epoch": 0.6147169066877096, + "grad_norm": 4.799102165669682, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 401410012, + "step": 2337 + }, + { + "epoch": 0.6147169066877096, + "loss": 0.0582679845392704, + "loss_ce": 0.0014747708337381482, + "loss_iou": 0.61328125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 401410012, + "step": 2337 + }, + { + "epoch": 0.6149799434470967, + "grad_norm": 4.382135516303559, + "learning_rate": 5e-06, + "loss": 0.1124, + "num_input_tokens_seen": 401582352, + "step": 2338 + }, + { + "epoch": 0.6149799434470967, + "loss": 0.09339425712823868, + "loss_ce": 0.00275705405510962, + "loss_iou": 0.63671875, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 401582352, + "step": 2338 + }, + { + "epoch": 0.6152429802064838, + "grad_norm": 4.340567932001983, + "learning_rate": 5e-06, + "loss": 0.1484, + "num_input_tokens_seen": 401754308, + "step": 2339 + }, + { + "epoch": 0.6152429802064838, + "loss": 0.14638805389404297, + "loss_ce": 0.0007886901148594916, + "loss_iou": 0.53125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 401754308, + "step": 2339 + }, + { + "epoch": 0.6155060169658709, + "grad_norm": 6.569770077552981, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 401926216, + "step": 2340 + }, + { + "epoch": 0.6155060169658709, + "loss": 0.12163828313350677, + "loss_ce": 0.004069316200911999, + "loss_iou": 0.50390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 401926216, + "step": 2340 + }, + { + "epoch": 0.6157690537252581, + "grad_norm": 4.067934681272699, + "learning_rate": 5e-06, + "loss": 0.096, + "num_input_tokens_seen": 402098264, + "step": 2341 + }, + { + "epoch": 0.6157690537252581, + "loss": 0.0871957540512085, + "loss_ce": 0.0004342720494605601, + "loss_iou": 0.52734375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 402098264, + "step": 2341 + }, + { + "epoch": 0.6160320904846452, + "grad_norm": 4.130435258439036, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 402270372, + "step": 2342 + }, + { + "epoch": 0.6160320904846452, + "loss": 0.046382177621126175, + "loss_ce": 7.175222708610818e-05, + "loss_iou": 0.400390625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 402270372, + "step": 2342 + }, + { + "epoch": 0.6162951272440323, + "grad_norm": 5.637480244426784, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 402440632, + "step": 2343 + }, + { + "epoch": 0.6162951272440323, + "loss": 0.07675184309482574, + "loss_ce": 0.0019837813451886177, + "loss_iou": 0.361328125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 402440632, + "step": 2343 + }, + { + "epoch": 0.6165581640034195, + "grad_norm": 8.043736182852212, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 402612920, + "step": 2344 + }, + { + "epoch": 0.6165581640034195, + "loss": 0.09050323814153671, + "loss_ce": 0.00018646713579073548, + "loss_iou": 0.6328125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 402612920, + "step": 2344 + }, + { + "epoch": 0.6168212007628066, + "grad_norm": 6.221194498282131, + "learning_rate": 5e-06, + "loss": 0.1581, + "num_input_tokens_seen": 402785192, + "step": 2345 + }, + { + "epoch": 0.6168212007628066, + "loss": 0.14238294959068298, + "loss_ce": 0.0007356047863140702, + "loss_iou": 0.34765625, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 402785192, + "step": 2345 + }, + { + "epoch": 0.6170842375221938, + "grad_norm": 4.044951640988235, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 402955576, + "step": 2346 + }, + { + "epoch": 0.6170842375221938, + "loss": 0.0908626914024353, + "loss_ce": 0.0020260235760360956, + "loss_iou": 0.5, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 402955576, + "step": 2346 + }, + { + "epoch": 0.6173472742815809, + "grad_norm": 6.79401496913443, + "learning_rate": 5e-06, + "loss": 0.1167, + "num_input_tokens_seen": 403125596, + "step": 2347 + }, + { + "epoch": 0.6173472742815809, + "loss": 0.19389519095420837, + "loss_ce": 0.0025499900802969933, + "loss_iou": 0.52734375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 403125596, + "step": 2347 + }, + { + "epoch": 0.617610311040968, + "grad_norm": 4.6407931564353255, + "learning_rate": 5e-06, + "loss": 0.1101, + "num_input_tokens_seen": 403295848, + "step": 2348 + }, + { + "epoch": 0.617610311040968, + "loss": 0.12875889241695404, + "loss_ce": 0.0007681695278733969, + "loss_iou": 0.671875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 403295848, + "step": 2348 + }, + { + "epoch": 0.6178733478003551, + "grad_norm": 15.448434585562659, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 403468128, + "step": 2349 + }, + { + "epoch": 0.6178733478003551, + "loss": 0.12984147667884827, + "loss_ce": 0.00026383629301562905, + "loss_iou": 0.578125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 403468128, + "step": 2349 + }, + { + "epoch": 0.6181363845597422, + "grad_norm": 6.470906735340733, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 403640328, + "step": 2350 + }, + { + "epoch": 0.6181363845597422, + "loss": 0.08621760457754135, + "loss_ce": 0.002935132011771202, + "loss_iou": 0.47265625, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 403640328, + "step": 2350 + }, + { + "epoch": 0.6183994213191294, + "grad_norm": 3.904089267434168, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 403812552, + "step": 2351 + }, + { + "epoch": 0.6183994213191294, + "loss": 0.09386193752288818, + "loss_ce": 0.00026452430756762624, + "loss_iou": 0.388671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 403812552, + "step": 2351 + }, + { + "epoch": 0.6186624580785165, + "grad_norm": 6.850647458248975, + "learning_rate": 5e-06, + "loss": 0.1608, + "num_input_tokens_seen": 403984536, + "step": 2352 + }, + { + "epoch": 0.6186624580785165, + "loss": 0.10526977479457855, + "loss_ce": 0.001601562718860805, + "loss_iou": 0.50390625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 403984536, + "step": 2352 + }, + { + "epoch": 0.6189254948379036, + "grad_norm": 6.380076499827068, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 404156840, + "step": 2353 + }, + { + "epoch": 0.6189254948379036, + "loss": 0.10814794898033142, + "loss_ce": 0.00020728506206069142, + "loss_iou": 0.5, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 404156840, + "step": 2353 + }, + { + "epoch": 0.6191885315972907, + "grad_norm": 16.46365513417012, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 404328912, + "step": 2354 + }, + { + "epoch": 0.6191885315972907, + "loss": 0.17106536030769348, + "loss_ce": 0.00019742565928027034, + "loss_iou": 0.48828125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 404328912, + "step": 2354 + }, + { + "epoch": 0.6194515683566778, + "grad_norm": 12.873967018770987, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 404501048, + "step": 2355 + }, + { + "epoch": 0.6194515683566778, + "loss": 0.1692376732826233, + "loss_ce": 0.0014825284015387297, + "loss_iou": 0.443359375, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 404501048, + "step": 2355 + }, + { + "epoch": 0.619714605116065, + "grad_norm": 3.2257038395599706, + "learning_rate": 5e-06, + "loss": 0.1655, + "num_input_tokens_seen": 404673104, + "step": 2356 + }, + { + "epoch": 0.619714605116065, + "loss": 0.23891915380954742, + "loss_ce": 0.007901079021394253, + "loss_iou": 0.50390625, + "loss_num": 0.046142578125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 404673104, + "step": 2356 + }, + { + "epoch": 0.6199776418754521, + "grad_norm": 7.80566516619346, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 404845264, + "step": 2357 + }, + { + "epoch": 0.6199776418754521, + "loss": 0.17429864406585693, + "loss_ce": 0.005078674294054508, + "loss_iou": 0.4375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 404845264, + "step": 2357 + }, + { + "epoch": 0.6202406786348392, + "grad_norm": 4.957580646805035, + "learning_rate": 5e-06, + "loss": 0.0895, + "num_input_tokens_seen": 405017460, + "step": 2358 + }, + { + "epoch": 0.6202406786348392, + "loss": 0.05168257653713226, + "loss_ce": 0.003480062121525407, + "loss_iou": 0.55078125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 405017460, + "step": 2358 + }, + { + "epoch": 0.6205037153942263, + "grad_norm": 4.916881514910221, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 405189524, + "step": 2359 + }, + { + "epoch": 0.6205037153942263, + "loss": 0.06677095592021942, + "loss_ce": 0.001280229538679123, + "loss_iou": 0.51171875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 405189524, + "step": 2359 + }, + { + "epoch": 0.6207667521536134, + "grad_norm": 3.691480970561855, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 405362028, + "step": 2360 + }, + { + "epoch": 0.6207667521536134, + "loss": 0.09511809051036835, + "loss_ce": 5.582500307355076e-05, + "loss_iou": 0.49609375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 405362028, + "step": 2360 + }, + { + "epoch": 0.6210297889130006, + "grad_norm": 23.640084314089293, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 405534164, + "step": 2361 + }, + { + "epoch": 0.6210297889130006, + "loss": 0.1902218908071518, + "loss_ce": 0.00021944480249658227, + "loss_iou": 0.4140625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 405534164, + "step": 2361 + }, + { + "epoch": 0.6212928256723877, + "grad_norm": 3.82381362472469, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 405706288, + "step": 2362 + }, + { + "epoch": 0.6212928256723877, + "loss": 0.07140006124973297, + "loss_ce": 0.0007060917560011148, + "loss_iou": 0.494140625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 405706288, + "step": 2362 + }, + { + "epoch": 0.6215558624317749, + "grad_norm": 3.7402655746033293, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 405878688, + "step": 2363 + }, + { + "epoch": 0.6215558624317749, + "loss": 0.09456443786621094, + "loss_ce": 0.0030269669368863106, + "loss_iou": 0.390625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 405878688, + "step": 2363 + }, + { + "epoch": 0.621818899191162, + "grad_norm": 5.548477550947741, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 406050532, + "step": 2364 + }, + { + "epoch": 0.621818899191162, + "loss": 0.12644195556640625, + "loss_ce": 0.005317692644894123, + "loss_iou": 0.51171875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 406050532, + "step": 2364 + }, + { + "epoch": 0.6220819359505491, + "grad_norm": 8.39488287912259, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 406220760, + "step": 2365 + }, + { + "epoch": 0.6220819359505491, + "loss": 0.13592961430549622, + "loss_ce": 0.0007520002545788884, + "loss_iou": 0.3203125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 406220760, + "step": 2365 + }, + { + "epoch": 0.6223449727099362, + "grad_norm": 4.84850760753409, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 406393068, + "step": 2366 + }, + { + "epoch": 0.6223449727099362, + "loss": 0.09754068404436111, + "loss_ce": 0.0005253083654679358, + "loss_iou": 0.4296875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 406393068, + "step": 2366 + }, + { + "epoch": 0.6226080094693234, + "grad_norm": 4.613478147053686, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 406565304, + "step": 2367 + }, + { + "epoch": 0.6226080094693234, + "loss": 0.1131492406129837, + "loss_ce": 0.002370435046032071, + "loss_iou": 0.625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 406565304, + "step": 2367 + }, + { + "epoch": 0.6228710462287105, + "grad_norm": 4.098258220864737, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 406737308, + "step": 2368 + }, + { + "epoch": 0.6228710462287105, + "loss": 0.09665839374065399, + "loss_ce": 0.0019318348495289683, + "loss_iou": 0.498046875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 406737308, + "step": 2368 + }, + { + "epoch": 0.6231340829880976, + "grad_norm": 10.800256206470614, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 406909296, + "step": 2369 + }, + { + "epoch": 0.6231340829880976, + "loss": 0.08466358482837677, + "loss_ce": 0.0036088963970541954, + "loss_iou": 0.546875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 406909296, + "step": 2369 + }, + { + "epoch": 0.6233971197474847, + "grad_norm": 4.097548305836531, + "learning_rate": 5e-06, + "loss": 0.133, + "num_input_tokens_seen": 407078532, + "step": 2370 + }, + { + "epoch": 0.6233971197474847, + "loss": 0.14105001091957092, + "loss_ce": 0.0007301777368411422, + "loss_iou": 0.53515625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 407078532, + "step": 2370 + }, + { + "epoch": 0.6236601565068718, + "grad_norm": 4.885935118310897, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 407250520, + "step": 2371 + }, + { + "epoch": 0.6236601565068718, + "loss": 0.185832679271698, + "loss_ce": 0.001598058152012527, + "loss_iou": 0.365234375, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 407250520, + "step": 2371 + }, + { + "epoch": 0.623923193266259, + "grad_norm": 4.243940025684864, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 407420132, + "step": 2372 + }, + { + "epoch": 0.623923193266259, + "loss": 0.17022864520549774, + "loss_ce": 0.0003678113571368158, + "loss_iou": 0.60546875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 407420132, + "step": 2372 + }, + { + "epoch": 0.6241862300256461, + "grad_norm": 6.84932788040415, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 407592472, + "step": 2373 + }, + { + "epoch": 0.6241862300256461, + "loss": 0.07611523568630219, + "loss_ce": 0.0008588911150582135, + "loss_iou": 0.50390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 407592472, + "step": 2373 + }, + { + "epoch": 0.6244492667850332, + "grad_norm": 5.11826467429585, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 407764812, + "step": 2374 + }, + { + "epoch": 0.6244492667850332, + "loss": 0.22299307584762573, + "loss_ce": 0.001649091369472444, + "loss_iou": 0.5078125, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 407764812, + "step": 2374 + }, + { + "epoch": 0.6247123035444203, + "grad_norm": 8.470257803757693, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 407937232, + "step": 2375 + }, + { + "epoch": 0.6247123035444203, + "loss": 0.12985503673553467, + "loss_ce": 0.0015591441188007593, + "loss_iou": 0.5625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 407937232, + "step": 2375 + }, + { + "epoch": 0.6249753403038074, + "grad_norm": 6.1769339682539135, + "learning_rate": 5e-06, + "loss": 0.1287, + "num_input_tokens_seen": 408109660, + "step": 2376 + }, + { + "epoch": 0.6249753403038074, + "loss": 0.1395704746246338, + "loss_ce": 0.0009596287272870541, + "loss_iou": 0.4140625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 408109660, + "step": 2376 + }, + { + "epoch": 0.6252383770631946, + "grad_norm": 16.39235046187932, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 408281820, + "step": 2377 + }, + { + "epoch": 0.6252383770631946, + "loss": 0.12146437168121338, + "loss_ce": 0.0002790701691992581, + "loss_iou": 0.51953125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 408281820, + "step": 2377 + }, + { + "epoch": 0.6255014138225817, + "grad_norm": 7.613001701595713, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 408453984, + "step": 2378 + }, + { + "epoch": 0.6255014138225817, + "loss": 0.120358906686306, + "loss_ce": 0.002347429981455207, + "loss_iou": 0.51171875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 408453984, + "step": 2378 + }, + { + "epoch": 0.6257644505819688, + "grad_norm": 4.614954338110094, + "learning_rate": 5e-06, + "loss": 0.1215, + "num_input_tokens_seen": 408625956, + "step": 2379 + }, + { + "epoch": 0.6257644505819688, + "loss": 0.12903685867786407, + "loss_ce": 0.003334960900247097, + "loss_iou": 0.640625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 408625956, + "step": 2379 + }, + { + "epoch": 0.6260274873413559, + "grad_norm": 4.387875886048538, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 408797808, + "step": 2380 + }, + { + "epoch": 0.6260274873413559, + "loss": 0.1543307602405548, + "loss_ce": 0.0021701250225305557, + "loss_iou": 0.50390625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 408797808, + "step": 2380 + }, + { + "epoch": 0.626290524100743, + "grad_norm": 5.128859786695588, + "learning_rate": 5e-06, + "loss": 0.1393, + "num_input_tokens_seen": 408969768, + "step": 2381 + }, + { + "epoch": 0.626290524100743, + "loss": 0.1084834560751915, + "loss_ce": 0.0040522972121834755, + "loss_iou": 0.54296875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 408969768, + "step": 2381 + }, + { + "epoch": 0.6265535608601303, + "grad_norm": 9.647359334804406, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 409141828, + "step": 2382 + }, + { + "epoch": 0.6265535608601303, + "loss": 0.253137469291687, + "loss_ce": 0.00036035641096532345, + "loss_iou": 0.494140625, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 409141828, + "step": 2382 + }, + { + "epoch": 0.6268165976195174, + "grad_norm": 7.279560183961185, + "learning_rate": 5e-06, + "loss": 0.0772, + "num_input_tokens_seen": 409313972, + "step": 2383 + }, + { + "epoch": 0.6268165976195174, + "loss": 0.07071413099765778, + "loss_ce": 0.00015749200247228146, + "loss_iou": 0.62109375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 409313972, + "step": 2383 + }, + { + "epoch": 0.6270796343789045, + "grad_norm": 34.71304497396249, + "learning_rate": 5e-06, + "loss": 0.1461, + "num_input_tokens_seen": 409482936, + "step": 2384 + }, + { + "epoch": 0.6270796343789045, + "loss": 0.07908271253108978, + "loss_ce": 0.0020258277654647827, + "loss_iou": 0.37890625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 409482936, + "step": 2384 + }, + { + "epoch": 0.6273426711382916, + "grad_norm": 3.2691371809714322, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 409655000, + "step": 2385 + }, + { + "epoch": 0.6273426711382916, + "loss": 0.06744687259197235, + "loss_ce": 0.00012509411317296326, + "loss_iou": 0.5546875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 409655000, + "step": 2385 + }, + { + "epoch": 0.6276057078976787, + "grad_norm": 3.653366346571365, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 409826932, + "step": 2386 + }, + { + "epoch": 0.6276057078976787, + "loss": 0.11388548463582993, + "loss_ce": 0.0006347582675516605, + "loss_iou": 0.5390625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 409826932, + "step": 2386 + }, + { + "epoch": 0.6278687446570659, + "grad_norm": 4.394076994308062, + "learning_rate": 5e-06, + "loss": 0.126, + "num_input_tokens_seen": 409997112, + "step": 2387 + }, + { + "epoch": 0.6278687446570659, + "loss": 0.07691079378128052, + "loss_ce": 0.0022647997830063105, + "loss_iou": 0.625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 409997112, + "step": 2387 + }, + { + "epoch": 0.628131781416453, + "grad_norm": 5.885043334484188, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 410169412, + "step": 2388 + }, + { + "epoch": 0.628131781416453, + "loss": 0.09735430032014847, + "loss_ce": 0.0005830569425597787, + "loss_iou": 0.6171875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 410169412, + "step": 2388 + }, + { + "epoch": 0.6283948181758401, + "grad_norm": 9.328795985316807, + "learning_rate": 5e-06, + "loss": 0.0794, + "num_input_tokens_seen": 410341700, + "step": 2389 + }, + { + "epoch": 0.6283948181758401, + "loss": 0.08082857728004456, + "loss_ce": 0.0010098508791998029, + "loss_iou": 0.5390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 410341700, + "step": 2389 + }, + { + "epoch": 0.6286578549352272, + "grad_norm": 4.002799196322825, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 410513660, + "step": 2390 + }, + { + "epoch": 0.6286578549352272, + "loss": 0.054968155920505524, + "loss_ce": 0.000341689505148679, + "loss_iou": 0.53515625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 410513660, + "step": 2390 + }, + { + "epoch": 0.6289208916946143, + "grad_norm": 6.9030334525022505, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 410685956, + "step": 2391 + }, + { + "epoch": 0.6289208916946143, + "loss": 0.15183863043785095, + "loss_ce": 0.0031264659482985735, + "loss_iou": 0.53125, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 410685956, + "step": 2391 + }, + { + "epoch": 0.6291839284540014, + "grad_norm": 5.941678610362804, + "learning_rate": 5e-06, + "loss": 0.0866, + "num_input_tokens_seen": 410856348, + "step": 2392 + }, + { + "epoch": 0.6291839284540014, + "loss": 0.1089685708284378, + "loss_ce": 0.0001734074903652072, + "loss_iou": 0.671875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 410856348, + "step": 2392 + }, + { + "epoch": 0.6294469652133886, + "grad_norm": 7.460369582099088, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 411028268, + "step": 2393 + }, + { + "epoch": 0.6294469652133886, + "loss": 0.17290328443050385, + "loss_ce": 0.0006010266370140016, + "loss_iou": 0.3671875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 411028268, + "step": 2393 + }, + { + "epoch": 0.6297100019727757, + "grad_norm": 4.673085403235698, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 411200296, + "step": 2394 + }, + { + "epoch": 0.6297100019727757, + "loss": 0.11547736823558807, + "loss_ce": 0.0025928488466888666, + "loss_iou": 0.45703125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 411200296, + "step": 2394 + }, + { + "epoch": 0.6299730387321628, + "grad_norm": 4.914810997482069, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 411372592, + "step": 2395 + }, + { + "epoch": 0.6299730387321628, + "loss": 0.1332027018070221, + "loss_ce": 0.0009089965024031699, + "loss_iou": 0.4140625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 411372592, + "step": 2395 + }, + { + "epoch": 0.6302360754915499, + "grad_norm": 10.387773947226163, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 411544920, + "step": 2396 + }, + { + "epoch": 0.6302360754915499, + "loss": 0.12052314728498459, + "loss_ce": 0.0002533740480430424, + "loss_iou": 0.5625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 411544920, + "step": 2396 + }, + { + "epoch": 0.630499112250937, + "grad_norm": 8.18207434885096, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 411717416, + "step": 2397 + }, + { + "epoch": 0.630499112250937, + "loss": 0.09294469654560089, + "loss_ce": 0.0004459216434042901, + "loss_iou": 0.66015625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 411717416, + "step": 2397 + }, + { + "epoch": 0.6307621490103242, + "grad_norm": 9.784216604991045, + "learning_rate": 5e-06, + "loss": 0.1422, + "num_input_tokens_seen": 411889656, + "step": 2398 + }, + { + "epoch": 0.6307621490103242, + "loss": 0.06576241552829742, + "loss_ce": 0.00011910500325029716, + "loss_iou": 0.46484375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 411889656, + "step": 2398 + }, + { + "epoch": 0.6310251857697113, + "grad_norm": 9.394179875352377, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 412061964, + "step": 2399 + }, + { + "epoch": 0.6310251857697113, + "loss": 0.2204100638628006, + "loss_ce": 0.003613188164308667, + "loss_iou": 0.37109375, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 412061964, + "step": 2399 + }, + { + "epoch": 0.6312882225290984, + "grad_norm": 5.421055277942169, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 412233844, + "step": 2400 + }, + { + "epoch": 0.6312882225290984, + "loss": 0.058117613196372986, + "loss_ce": 0.00016473224968649447, + "loss_iou": 0.390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 412233844, + "step": 2400 + }, + { + "epoch": 0.6315512592884855, + "grad_norm": 32.78384103492781, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 412404404, + "step": 2401 + }, + { + "epoch": 0.6315512592884855, + "loss": 0.11002543568611145, + "loss_ce": 0.0008640556479804218, + "loss_iou": NaN, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 412404404, + "step": 2401 + }, + { + "epoch": 0.6318142960478726, + "grad_norm": 5.144111967585496, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 412576424, + "step": 2402 + }, + { + "epoch": 0.6318142960478726, + "loss": 0.10595827549695969, + "loss_ce": 0.0008252161205746233, + "loss_iou": 0.5390625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 412576424, + "step": 2402 + }, + { + "epoch": 0.6320773328072599, + "grad_norm": 9.535369648896554, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 412748540, + "step": 2403 + }, + { + "epoch": 0.6320773328072599, + "loss": 0.08199536800384521, + "loss_ce": 0.0007880894117988646, + "loss_iou": 0.52734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 412748540, + "step": 2403 + }, + { + "epoch": 0.632340369566647, + "grad_norm": 3.8619766281957557, + "learning_rate": 5e-06, + "loss": 0.0813, + "num_input_tokens_seen": 412919044, + "step": 2404 + }, + { + "epoch": 0.632340369566647, + "loss": 0.07876091450452805, + "loss_ce": 0.0004375489370431751, + "loss_iou": 0.3671875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 412919044, + "step": 2404 + }, + { + "epoch": 0.6326034063260341, + "grad_norm": 4.690903318512814, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 413089276, + "step": 2405 + }, + { + "epoch": 0.6326034063260341, + "loss": 0.12442415952682495, + "loss_ce": 0.0037271445617079735, + "loss_iou": 0.54296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 413089276, + "step": 2405 + }, + { + "epoch": 0.6328664430854212, + "grad_norm": 5.509443001792319, + "learning_rate": 5e-06, + "loss": 0.1865, + "num_input_tokens_seen": 413261512, + "step": 2406 + }, + { + "epoch": 0.6328664430854212, + "loss": 0.2314736247062683, + "loss_ce": 0.0039345622062683105, + "loss_iou": 0.55078125, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 413261512, + "step": 2406 + }, + { + "epoch": 0.6331294798448083, + "grad_norm": 4.789366484519657, + "learning_rate": 5e-06, + "loss": 0.1568, + "num_input_tokens_seen": 413433928, + "step": 2407 + }, + { + "epoch": 0.6331294798448083, + "loss": 0.10793605446815491, + "loss_ce": 0.002009541727602482, + "loss_iou": 0.4609375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 413433928, + "step": 2407 + }, + { + "epoch": 0.6333925166041955, + "grad_norm": 8.59758393749405, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 413606128, + "step": 2408 + }, + { + "epoch": 0.6333925166041955, + "loss": 0.0926143079996109, + "loss_ce": 0.001000535092316568, + "loss_iou": 0.40234375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 413606128, + "step": 2408 + }, + { + "epoch": 0.6336555533635826, + "grad_norm": 9.998355206402028, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 413778176, + "step": 2409 + }, + { + "epoch": 0.6336555533635826, + "loss": 0.12268656492233276, + "loss_ce": 0.0002958154655061662, + "loss_iou": 0.39453125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 413778176, + "step": 2409 + }, + { + "epoch": 0.6339185901229697, + "grad_norm": 4.79036194916934, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 413950292, + "step": 2410 + }, + { + "epoch": 0.6339185901229697, + "loss": 0.22198227047920227, + "loss_ce": 0.0022557121701538563, + "loss_iou": 0.482421875, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 413950292, + "step": 2410 + }, + { + "epoch": 0.6341816268823568, + "grad_norm": 21.148690076997823, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 414119124, + "step": 2411 + }, + { + "epoch": 0.6341816268823568, + "loss": 0.08586390316486359, + "loss_ce": 0.000368903303751722, + "loss_iou": 0.494140625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 414119124, + "step": 2411 + }, + { + "epoch": 0.6344446636417439, + "grad_norm": 7.240778669769743, + "learning_rate": 5e-06, + "loss": 0.1765, + "num_input_tokens_seen": 414291560, + "step": 2412 + }, + { + "epoch": 0.6344446636417439, + "loss": 0.24769088625907898, + "loss_ce": 0.0022685134317725897, + "loss_iou": 0.369140625, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 414291560, + "step": 2412 + }, + { + "epoch": 0.634707700401131, + "grad_norm": 15.982142868420826, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 414463784, + "step": 2413 + }, + { + "epoch": 0.634707700401131, + "loss": 0.22224080562591553, + "loss_ce": 0.0012630214914679527, + "loss_iou": 0.474609375, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 414463784, + "step": 2413 + }, + { + "epoch": 0.6349707371605182, + "grad_norm": 5.563886751421209, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 414635968, + "step": 2414 + }, + { + "epoch": 0.6349707371605182, + "loss": 0.12378741800785065, + "loss_ce": 0.0024190132971853018, + "loss_iou": 0.515625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 414635968, + "step": 2414 + }, + { + "epoch": 0.6352337739199053, + "grad_norm": 6.971489879379984, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 414808184, + "step": 2415 + }, + { + "epoch": 0.6352337739199053, + "loss": 0.09163232147693634, + "loss_ce": 0.00017114286310970783, + "loss_iou": 0.59765625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 414808184, + "step": 2415 + }, + { + "epoch": 0.6354968106792924, + "grad_norm": 10.763166082287821, + "learning_rate": 5e-06, + "loss": 0.1798, + "num_input_tokens_seen": 414978292, + "step": 2416 + }, + { + "epoch": 0.6354968106792924, + "loss": 0.1381605565547943, + "loss_ce": 0.00016007423982955515, + "loss_iou": 0.50390625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 414978292, + "step": 2416 + }, + { + "epoch": 0.6357598474386795, + "grad_norm": 9.836473738616426, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 415150344, + "step": 2417 + }, + { + "epoch": 0.6357598474386795, + "loss": 0.1071944609284401, + "loss_ce": 0.0013594944030046463, + "loss_iou": 0.47265625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 415150344, + "step": 2417 + }, + { + "epoch": 0.6360228841980666, + "grad_norm": 3.9009374145565308, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 415322328, + "step": 2418 + }, + { + "epoch": 0.6360228841980666, + "loss": 0.08769555389881134, + "loss_ce": 0.00026269443333148956, + "loss_iou": 0.54296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 415322328, + "step": 2418 + }, + { + "epoch": 0.6362859209574538, + "grad_norm": 12.748558785828688, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 415494296, + "step": 2419 + }, + { + "epoch": 0.6362859209574538, + "loss": 0.09499529004096985, + "loss_ce": 0.004266531206667423, + "loss_iou": 0.458984375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 415494296, + "step": 2419 + }, + { + "epoch": 0.636548957716841, + "grad_norm": 19.009247758406477, + "learning_rate": 5e-06, + "loss": 0.1063, + "num_input_tokens_seen": 415666184, + "step": 2420 + }, + { + "epoch": 0.636548957716841, + "loss": 0.07810772955417633, + "loss_ce": 0.0017832687590271235, + "loss_iou": 0.54296875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 415666184, + "step": 2420 + }, + { + "epoch": 0.636811994476228, + "grad_norm": 4.624128366794573, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 415838228, + "step": 2421 + }, + { + "epoch": 0.636811994476228, + "loss": 0.11569841206073761, + "loss_ce": 0.0010743860621005297, + "loss_iou": 0.546875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 415838228, + "step": 2421 + }, + { + "epoch": 0.6370750312356152, + "grad_norm": 4.489659891906354, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 416010568, + "step": 2422 + }, + { + "epoch": 0.6370750312356152, + "loss": 0.09839779138565063, + "loss_ce": 0.0043120919726789, + "loss_iou": 0.578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 416010568, + "step": 2422 + }, + { + "epoch": 0.6373380679950023, + "grad_norm": 5.451345385819082, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 416182976, + "step": 2423 + }, + { + "epoch": 0.6373380679950023, + "loss": 0.11042429506778717, + "loss_ce": 0.005230204667896032, + "loss_iou": 0.65234375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 416182976, + "step": 2423 + }, + { + "epoch": 0.6376011047543895, + "grad_norm": 4.648386667387077, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 416354968, + "step": 2424 + }, + { + "epoch": 0.6376011047543895, + "loss": 0.08212631195783615, + "loss_ce": 0.002170257270336151, + "loss_iou": 0.55859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 416354968, + "step": 2424 + }, + { + "epoch": 0.6378641415137766, + "grad_norm": 7.161469269147777, + "learning_rate": 5e-06, + "loss": 0.1544, + "num_input_tokens_seen": 416527268, + "step": 2425 + }, + { + "epoch": 0.6378641415137766, + "loss": 0.1374281942844391, + "loss_ce": 0.0008009903831407428, + "loss_iou": 0.73828125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 416527268, + "step": 2425 + }, + { + "epoch": 0.6381271782731637, + "grad_norm": 7.707879726473491, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 416699532, + "step": 2426 + }, + { + "epoch": 0.6381271782731637, + "loss": 0.06318493187427521, + "loss_ce": 0.00016612766194157302, + "loss_iou": 0.482421875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 416699532, + "step": 2426 + }, + { + "epoch": 0.6383902150325508, + "grad_norm": 4.53622350388229, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 416871872, + "step": 2427 + }, + { + "epoch": 0.6383902150325508, + "loss": 0.14566369354724884, + "loss_ce": 0.0013460592599585652, + "loss_iou": 0.640625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 416871872, + "step": 2427 + }, + { + "epoch": 0.6386532517919379, + "grad_norm": 5.815980535010829, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 417044020, + "step": 2428 + }, + { + "epoch": 0.6386532517919379, + "loss": 0.12992171943187714, + "loss_ce": 0.0009391760104335845, + "loss_iou": 0.5078125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 417044020, + "step": 2428 + }, + { + "epoch": 0.6389162885513251, + "grad_norm": 4.632913290565498, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 417215912, + "step": 2429 + }, + { + "epoch": 0.6389162885513251, + "loss": 0.10175777971744537, + "loss_ce": 0.0023772907443344593, + "loss_iou": 0.53125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 417215912, + "step": 2429 + }, + { + "epoch": 0.6391793253107122, + "grad_norm": 3.31675640465626, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 417388272, + "step": 2430 + }, + { + "epoch": 0.6391793253107122, + "loss": 0.13262677192687988, + "loss_ce": 0.0031712136697024107, + "loss_iou": 0.46875, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 417388272, + "step": 2430 + }, + { + "epoch": 0.6394423620700993, + "grad_norm": 9.57220985801255, + "learning_rate": 5e-06, + "loss": 0.0935, + "num_input_tokens_seen": 417560304, + "step": 2431 + }, + { + "epoch": 0.6394423620700993, + "loss": 0.08074182271957397, + "loss_ce": 0.00218957313336432, + "loss_iou": 0.5078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 417560304, + "step": 2431 + }, + { + "epoch": 0.6397053988294864, + "grad_norm": 5.444532606716201, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 417732592, + "step": 2432 + }, + { + "epoch": 0.6397053988294864, + "loss": 0.23035961389541626, + "loss_ce": 0.0006538145244121552, + "loss_iou": 0.43359375, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 417732592, + "step": 2432 + }, + { + "epoch": 0.6399684355888735, + "grad_norm": 10.22830520629982, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 417904600, + "step": 2433 + }, + { + "epoch": 0.6399684355888735, + "loss": 0.08408404886722565, + "loss_ce": 0.0004964005202054977, + "loss_iou": 0.57421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 417904600, + "step": 2433 + }, + { + "epoch": 0.6402314723482607, + "grad_norm": 4.834822071481386, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 418076896, + "step": 2434 + }, + { + "epoch": 0.6402314723482607, + "loss": 0.07859447598457336, + "loss_ce": 0.0008967254543676972, + "loss_iou": 0.46484375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 418076896, + "step": 2434 + }, + { + "epoch": 0.6404945091076478, + "grad_norm": 11.222393651192586, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 418249128, + "step": 2435 + }, + { + "epoch": 0.6404945091076478, + "loss": 0.17654769122600555, + "loss_ce": 0.0008885157876648009, + "loss_iou": 0.59375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 418249128, + "step": 2435 + }, + { + "epoch": 0.6407575458670349, + "grad_norm": 4.617257720330074, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 418421340, + "step": 2436 + }, + { + "epoch": 0.6407575458670349, + "loss": 0.15517690777778625, + "loss_ce": 0.0020702139008790255, + "loss_iou": 0.5546875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 418421340, + "step": 2436 + }, + { + "epoch": 0.641020582626422, + "grad_norm": 4.8286585158927, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 418591736, + "step": 2437 + }, + { + "epoch": 0.641020582626422, + "loss": 0.17714877426624298, + "loss_ce": 0.003656333312392235, + "loss_iou": 0.48046875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 418591736, + "step": 2437 + }, + { + "epoch": 0.6412836193858091, + "grad_norm": 13.06198419822853, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 418763952, + "step": 2438 + }, + { + "epoch": 0.6412836193858091, + "loss": 0.1256633698940277, + "loss_ce": 0.0007243968429975212, + "loss_iou": 0.5703125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 418763952, + "step": 2438 + }, + { + "epoch": 0.6415466561451962, + "grad_norm": 18.947469777145443, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 418936088, + "step": 2439 + }, + { + "epoch": 0.6415466561451962, + "loss": 0.09127810597419739, + "loss_ce": 0.003967312164604664, + "loss_iou": 0.65234375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 418936088, + "step": 2439 + }, + { + "epoch": 0.6418096929045835, + "grad_norm": 10.72734062355858, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 419108496, + "step": 2440 + }, + { + "epoch": 0.6418096929045835, + "loss": 0.16916052997112274, + "loss_ce": 0.0024124737828969955, + "loss_iou": 0.52734375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 419108496, + "step": 2440 + }, + { + "epoch": 0.6420727296639706, + "grad_norm": 9.781068157842057, + "learning_rate": 5e-06, + "loss": 0.1154, + "num_input_tokens_seen": 419280412, + "step": 2441 + }, + { + "epoch": 0.6420727296639706, + "loss": 0.06414149701595306, + "loss_ce": 0.0008175191469490528, + "loss_iou": 0.5390625, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 419280412, + "step": 2441 + }, + { + "epoch": 0.6423357664233577, + "grad_norm": 4.632619855595305, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 419452340, + "step": 2442 + }, + { + "epoch": 0.6423357664233577, + "loss": 0.11532483249902725, + "loss_ce": 0.0007618411909788847, + "loss_iou": 0.373046875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 419452340, + "step": 2442 + }, + { + "epoch": 0.6425988031827448, + "grad_norm": 6.28676819593018, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 419624260, + "step": 2443 + }, + { + "epoch": 0.6425988031827448, + "loss": 0.08432676643133163, + "loss_ce": 0.0007086057448759675, + "loss_iou": 0.51171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 419624260, + "step": 2443 + }, + { + "epoch": 0.6428618399421319, + "grad_norm": 4.106105061012571, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 419796396, + "step": 2444 + }, + { + "epoch": 0.6428618399421319, + "loss": 0.08845219761133194, + "loss_ce": 0.00016484873776789755, + "loss_iou": 0.55078125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 419796396, + "step": 2444 + }, + { + "epoch": 0.6431248767015191, + "grad_norm": 5.5585844707251555, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 419965620, + "step": 2445 + }, + { + "epoch": 0.6431248767015191, + "loss": 0.08765023946762085, + "loss_ce": 0.0027198141906410456, + "loss_iou": 0.369140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 419965620, + "step": 2445 + }, + { + "epoch": 0.6433879134609062, + "grad_norm": 6.941283237766657, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 420137700, + "step": 2446 + }, + { + "epoch": 0.6433879134609062, + "loss": 0.15698650479316711, + "loss_ce": 0.0023844558745622635, + "loss_iou": 0.60546875, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 420137700, + "step": 2446 + }, + { + "epoch": 0.6436509502202933, + "grad_norm": 4.724784845199971, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 420309964, + "step": 2447 + }, + { + "epoch": 0.6436509502202933, + "loss": 0.05271516367793083, + "loss_ce": 0.0011099397670477629, + "loss_iou": 0.515625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 420309964, + "step": 2447 + }, + { + "epoch": 0.6439139869796804, + "grad_norm": 6.277321029907694, + "learning_rate": 5e-06, + "loss": 0.1226, + "num_input_tokens_seen": 420482028, + "step": 2448 + }, + { + "epoch": 0.6439139869796804, + "loss": 0.19532331824302673, + "loss_ce": 0.0010789325460791588, + "loss_iou": 0.51171875, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 420482028, + "step": 2448 + }, + { + "epoch": 0.6441770237390675, + "grad_norm": 13.80293628596545, + "learning_rate": 5e-06, + "loss": 0.1117, + "num_input_tokens_seen": 420654152, + "step": 2449 + }, + { + "epoch": 0.6441770237390675, + "loss": 0.07553352415561676, + "loss_ce": 0.00017035921337082982, + "loss_iou": 0.443359375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 420654152, + "step": 2449 + }, + { + "epoch": 0.6444400604984547, + "grad_norm": 10.20337023035234, + "learning_rate": 5e-06, + "loss": 0.1029, + "num_input_tokens_seen": 420823676, + "step": 2450 + }, + { + "epoch": 0.6444400604984547, + "loss": 0.1215822771191597, + "loss_ce": 0.0013124945107847452, + "loss_iou": 0.5078125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 420823676, + "step": 2450 + }, + { + "epoch": 0.6447030972578418, + "grad_norm": 3.8758961183069034, + "learning_rate": 5e-06, + "loss": 0.0947, + "num_input_tokens_seen": 420991548, + "step": 2451 + }, + { + "epoch": 0.6447030972578418, + "loss": 0.04900962486863136, + "loss_ce": 0.0008071088814176619, + "loss_iou": 0.61328125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 420991548, + "step": 2451 + }, + { + "epoch": 0.6449661340172289, + "grad_norm": 17.020894064566225, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 421163940, + "step": 2452 + }, + { + "epoch": 0.6449661340172289, + "loss": 0.11804142594337463, + "loss_ce": 6.047027636668645e-05, + "loss_iou": 0.455078125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 421163940, + "step": 2452 + }, + { + "epoch": 0.645229170776616, + "grad_norm": 9.681794920323762, + "learning_rate": 5e-06, + "loss": 0.0851, + "num_input_tokens_seen": 421333576, + "step": 2453 + }, + { + "epoch": 0.645229170776616, + "loss": 0.1210954412817955, + "loss_ce": 0.0014970521442592144, + "loss_iou": 0.52734375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 421333576, + "step": 2453 + }, + { + "epoch": 0.6454922075360031, + "grad_norm": 4.642104923921384, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 421505944, + "step": 2454 + }, + { + "epoch": 0.6454922075360031, + "loss": 0.1038353219628334, + "loss_ce": 0.0009910848457366228, + "loss_iou": 0.53515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 421505944, + "step": 2454 + }, + { + "epoch": 0.6457552442953903, + "grad_norm": 9.523149673908406, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 421677944, + "step": 2455 + }, + { + "epoch": 0.6457552442953903, + "loss": 0.15976807475090027, + "loss_ce": 0.0006189080304466188, + "loss_iou": 0.419921875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 421677944, + "step": 2455 + }, + { + "epoch": 0.6460182810547774, + "grad_norm": 5.022843580524971, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 421850200, + "step": 2456 + }, + { + "epoch": 0.6460182810547774, + "loss": 0.11521396040916443, + "loss_ce": 0.0040994551964104176, + "loss_iou": 0.5390625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 421850200, + "step": 2456 + }, + { + "epoch": 0.6462813178141645, + "grad_norm": 18.53576034320469, + "learning_rate": 5e-06, + "loss": 0.1467, + "num_input_tokens_seen": 422020768, + "step": 2457 + }, + { + "epoch": 0.6462813178141645, + "loss": 0.145416259765625, + "loss_ce": 0.002319341991096735, + "loss_iou": 0.60546875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 422020768, + "step": 2457 + }, + { + "epoch": 0.6465443545735516, + "grad_norm": 3.5647933372156486, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 422188996, + "step": 2458 + }, + { + "epoch": 0.6465443545735516, + "loss": 0.09572234004735947, + "loss_ce": 0.002552174963057041, + "loss_iou": 0.435546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 422188996, + "step": 2458 + }, + { + "epoch": 0.6468073913329387, + "grad_norm": 7.264219107970771, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 422361088, + "step": 2459 + }, + { + "epoch": 0.6468073913329387, + "loss": 0.25412267446517944, + "loss_ce": 0.002886736299842596, + "loss_iou": 0.56640625, + "loss_num": 0.050048828125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 422361088, + "step": 2459 + }, + { + "epoch": 0.647070428092326, + "grad_norm": 4.6344065191429635, + "learning_rate": 5e-06, + "loss": 0.1305, + "num_input_tokens_seen": 422533008, + "step": 2460 + }, + { + "epoch": 0.647070428092326, + "loss": 0.17847508192062378, + "loss_ce": 0.00795811414718628, + "loss_iou": 0.482421875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 422533008, + "step": 2460 + }, + { + "epoch": 0.6473334648517131, + "grad_norm": 8.368380580785693, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 422705020, + "step": 2461 + }, + { + "epoch": 0.6473334648517131, + "loss": 0.11911526322364807, + "loss_ce": 0.0007375775021500885, + "loss_iou": 0.65234375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 422705020, + "step": 2461 + }, + { + "epoch": 0.6475965016111002, + "grad_norm": 14.530786026480682, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 422876816, + "step": 2462 + }, + { + "epoch": 0.6475965016111002, + "loss": 0.1273796260356903, + "loss_ce": 0.0006401161663234234, + "loss_iou": 0.5390625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 422876816, + "step": 2462 + }, + { + "epoch": 0.6478595383704873, + "grad_norm": 5.386199695848856, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 423048624, + "step": 2463 + }, + { + "epoch": 0.6478595383704873, + "loss": 0.07592228055000305, + "loss_ce": 0.00014713153359480202, + "loss_iou": 0.44921875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 423048624, + "step": 2463 + }, + { + "epoch": 0.6481225751298744, + "grad_norm": 4.520647486495735, + "learning_rate": 5e-06, + "loss": 0.0844, + "num_input_tokens_seen": 423220672, + "step": 2464 + }, + { + "epoch": 0.6481225751298744, + "loss": 0.0708494782447815, + "loss_ce": 0.0003843960876110941, + "loss_iou": 0.6015625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 423220672, + "step": 2464 + }, + { + "epoch": 0.6483856118892615, + "grad_norm": 6.550669243795055, + "learning_rate": 5e-06, + "loss": 0.0995, + "num_input_tokens_seen": 423392552, + "step": 2465 + }, + { + "epoch": 0.6483856118892615, + "loss": 0.07499829679727554, + "loss_ce": 0.0005048871971666813, + "loss_iou": 0.6953125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 423392552, + "step": 2465 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 19.46883155452454, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 423564752, + "step": 2466 + }, + { + "epoch": 0.6486486486486487, + "loss": 0.20438973605632782, + "loss_ce": 0.000684903294313699, + "loss_iou": 0.67578125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 423564752, + "step": 2466 + }, + { + "epoch": 0.6489116854080358, + "grad_norm": 5.889523777651774, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 423737504, + "step": 2467 + }, + { + "epoch": 0.6489116854080358, + "loss": 0.1293552964925766, + "loss_ce": 0.002432688605040312, + "loss_iou": 0.609375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 423737504, + "step": 2467 + }, + { + "epoch": 0.6491747221674229, + "grad_norm": 7.016832670115666, + "learning_rate": 5e-06, + "loss": 0.1505, + "num_input_tokens_seen": 423909608, + "step": 2468 + }, + { + "epoch": 0.6491747221674229, + "loss": 0.05709705874323845, + "loss_ce": 0.0013414426939561963, + "loss_iou": 0.49609375, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 423909608, + "step": 2468 + }, + { + "epoch": 0.64943775892681, + "grad_norm": 3.9924309473660817, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 424081612, + "step": 2469 + }, + { + "epoch": 0.64943775892681, + "loss": 0.21573078632354736, + "loss_ce": 0.0024129238445311785, + "loss_iou": 0.50390625, + "loss_num": 0.042724609375, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 424081612, + "step": 2469 + }, + { + "epoch": 0.6497007956861971, + "grad_norm": 8.786316595444035, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 424253804, + "step": 2470 + }, + { + "epoch": 0.6497007956861971, + "loss": 0.05929354950785637, + "loss_ce": 0.00044039852218702435, + "loss_iou": NaN, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 424253804, + "step": 2470 + }, + { + "epoch": 0.6499638324455843, + "grad_norm": 15.002230358263791, + "learning_rate": 5e-06, + "loss": 0.1137, + "num_input_tokens_seen": 424425936, + "step": 2471 + }, + { + "epoch": 0.6499638324455843, + "loss": 0.13272526860237122, + "loss_ce": 0.004475146532058716, + "loss_iou": 0.39453125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 424425936, + "step": 2471 + }, + { + "epoch": 0.6502268692049714, + "grad_norm": 5.3031138296122675, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 424596308, + "step": 2472 + }, + { + "epoch": 0.6502268692049714, + "loss": 0.06613775342702866, + "loss_ce": 0.005651914514601231, + "loss_iou": 0.462890625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 424596308, + "step": 2472 + }, + { + "epoch": 0.6504899059643585, + "grad_norm": 17.448583587295317, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 424768552, + "step": 2473 + }, + { + "epoch": 0.6504899059643585, + "loss": 0.08512883633375168, + "loss_ce": 0.0031891404651105404, + "loss_iou": 0.5625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 424768552, + "step": 2473 + }, + { + "epoch": 0.6507529427237456, + "grad_norm": 6.87156581112055, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 424940764, + "step": 2474 + }, + { + "epoch": 0.6507529427237456, + "loss": 0.07900265604257584, + "loss_ce": 0.0035021707881242037, + "loss_iou": 0.5546875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 424940764, + "step": 2474 + }, + { + "epoch": 0.6510159794831327, + "grad_norm": 7.4967444521140765, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 425112860, + "step": 2475 + }, + { + "epoch": 0.6510159794831327, + "loss": 0.1566176563501358, + "loss_ce": 0.0004592128098011017, + "loss_iou": 0.55078125, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 425112860, + "step": 2475 + }, + { + "epoch": 0.6512790162425199, + "grad_norm": 7.198491382802353, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 425284996, + "step": 2476 + }, + { + "epoch": 0.6512790162425199, + "loss": 0.12442326545715332, + "loss_ce": 0.0022308877669274807, + "loss_iou": 0.5234375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 425284996, + "step": 2476 + }, + { + "epoch": 0.651542053001907, + "grad_norm": 12.660738408941835, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 425457444, + "step": 2477 + }, + { + "epoch": 0.651542053001907, + "loss": 0.139640212059021, + "loss_ce": 0.0008462676778435707, + "loss_iou": 0.65625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 425457444, + "step": 2477 + }, + { + "epoch": 0.6518050897612941, + "grad_norm": 4.788908959245028, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 425629356, + "step": 2478 + }, + { + "epoch": 0.6518050897612941, + "loss": 0.11505892872810364, + "loss_ce": 0.0043716710060834885, + "loss_iou": 0.44921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 425629356, + "step": 2478 + }, + { + "epoch": 0.6520681265206812, + "grad_norm": 14.713914944582257, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 425801412, + "step": 2479 + }, + { + "epoch": 0.6520681265206812, + "loss": 0.09329426288604736, + "loss_ce": 0.0003987499512732029, + "loss_iou": 0.5234375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 425801412, + "step": 2479 + }, + { + "epoch": 0.6523311632800683, + "grad_norm": 11.370966467516885, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 425973396, + "step": 2480 + }, + { + "epoch": 0.6523311632800683, + "loss": 0.12143571674823761, + "loss_ce": 0.0008607673225924373, + "loss_iou": 0.49609375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 425973396, + "step": 2480 + }, + { + "epoch": 0.6525942000394556, + "grad_norm": 16.96417675320112, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 426142788, + "step": 2481 + }, + { + "epoch": 0.6525942000394556, + "loss": 0.058773696422576904, + "loss_ce": 0.00017994196969084442, + "loss_iou": 0.51171875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 426142788, + "step": 2481 + }, + { + "epoch": 0.6528572367988427, + "grad_norm": 3.267968495608878, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 426314852, + "step": 2482 + }, + { + "epoch": 0.6528572367988427, + "loss": 0.14266598224639893, + "loss_ce": 0.0022698603570461273, + "loss_iou": 0.37109375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 426314852, + "step": 2482 + }, + { + "epoch": 0.6531202735582298, + "grad_norm": 12.820952382520073, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 426487324, + "step": 2483 + }, + { + "epoch": 0.6531202735582298, + "loss": 0.09693928062915802, + "loss_ce": 0.0016328811179846525, + "loss_iou": 0.4296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 426487324, + "step": 2483 + }, + { + "epoch": 0.6533833103176169, + "grad_norm": 10.800205106839552, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 426659468, + "step": 2484 + }, + { + "epoch": 0.6533833103176169, + "loss": 0.07587607949972153, + "loss_ce": 0.00039084581658244133, + "loss_iou": 0.4921875, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 426659468, + "step": 2484 + }, + { + "epoch": 0.653646347077004, + "grad_norm": 7.9171669810832395, + "learning_rate": 5e-06, + "loss": 0.0853, + "num_input_tokens_seen": 426831496, + "step": 2485 + }, + { + "epoch": 0.653646347077004, + "loss": 0.07471035420894623, + "loss_ce": 0.0035586238373070955, + "loss_iou": 0.6015625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 426831496, + "step": 2485 + }, + { + "epoch": 0.6539093838363912, + "grad_norm": 8.230428013130412, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 427003612, + "step": 2486 + }, + { + "epoch": 0.6539093838363912, + "loss": 0.08275149762630463, + "loss_ce": 0.0038025237154215574, + "loss_iou": 0.5234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 427003612, + "step": 2486 + }, + { + "epoch": 0.6541724205957783, + "grad_norm": 3.632885838865945, + "learning_rate": 5e-06, + "loss": 0.1363, + "num_input_tokens_seen": 427175916, + "step": 2487 + }, + { + "epoch": 0.6541724205957783, + "loss": 0.1029161587357521, + "loss_ce": 0.000987447565421462, + "loss_iou": 0.52734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 427175916, + "step": 2487 + }, + { + "epoch": 0.6544354573551654, + "grad_norm": 3.5649981841807588, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 427348060, + "step": 2488 + }, + { + "epoch": 0.6544354573551654, + "loss": 0.07192617654800415, + "loss_ce": 0.003597316797822714, + "loss_iou": 0.51171875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 427348060, + "step": 2488 + }, + { + "epoch": 0.6546984941145525, + "grad_norm": 5.397602518661847, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 427520416, + "step": 2489 + }, + { + "epoch": 0.6546984941145525, + "loss": 0.08971969783306122, + "loss_ce": 0.0012797524686902761, + "loss_iou": 0.470703125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 427520416, + "step": 2489 + }, + { + "epoch": 0.6549615308739396, + "grad_norm": 2.996287220023088, + "learning_rate": 5e-06, + "loss": 0.0974, + "num_input_tokens_seen": 427692328, + "step": 2490 + }, + { + "epoch": 0.6549615308739396, + "loss": 0.11320722848176956, + "loss_ce": 0.0006584011716768146, + "loss_iou": 0.3828125, + "loss_num": 0.0225830078125, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 427692328, + "step": 2490 + }, + { + "epoch": 0.6552245676333267, + "grad_norm": 9.620885086024206, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 427864376, + "step": 2491 + }, + { + "epoch": 0.6552245676333267, + "loss": 0.10166727006435394, + "loss_ce": 0.0020884163677692413, + "loss_iou": 0.4609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 427864376, + "step": 2491 + }, + { + "epoch": 0.6554876043927139, + "grad_norm": 38.296683588407596, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 428036792, + "step": 2492 + }, + { + "epoch": 0.6554876043927139, + "loss": 0.06815087795257568, + "loss_ce": 6.61654194118455e-05, + "loss_iou": 0.44140625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 428036792, + "step": 2492 + }, + { + "epoch": 0.655750641152101, + "grad_norm": 29.525923288124442, + "learning_rate": 5e-06, + "loss": 0.1634, + "num_input_tokens_seen": 428207124, + "step": 2493 + }, + { + "epoch": 0.655750641152101, + "loss": 0.23705750703811646, + "loss_ce": 0.0035522649995982647, + "loss_iou": 0.59375, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 428207124, + "step": 2493 + }, + { + "epoch": 0.6560136779114881, + "grad_norm": 5.543812647744825, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 428379688, + "step": 2494 + }, + { + "epoch": 0.6560136779114881, + "loss": 0.09441094100475311, + "loss_ce": 0.0024462228175252676, + "loss_iou": 0.515625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 428379688, + "step": 2494 + }, + { + "epoch": 0.6562767146708752, + "grad_norm": 32.70215077495664, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 428551908, + "step": 2495 + }, + { + "epoch": 0.6562767146708752, + "loss": 0.05353452265262604, + "loss_ce": 0.00015927490312606096, + "loss_iou": 0.67578125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 428551908, + "step": 2495 + }, + { + "epoch": 0.6565397514302623, + "grad_norm": 9.54910159759898, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 428724396, + "step": 2496 + }, + { + "epoch": 0.6565397514302623, + "loss": 0.1285240650177002, + "loss_ce": 0.00035023505915887654, + "loss_iou": 0.609375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 428724396, + "step": 2496 + }, + { + "epoch": 0.6568027881896495, + "grad_norm": 22.277656875348782, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 428891900, + "step": 2497 + }, + { + "epoch": 0.6568027881896495, + "loss": 0.08849343657493591, + "loss_ce": 0.0024643833748996258, + "loss_iou": 0.52734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 428891900, + "step": 2497 + }, + { + "epoch": 0.6570658249490366, + "grad_norm": 6.04979449937967, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 429064016, + "step": 2498 + }, + { + "epoch": 0.6570658249490366, + "loss": 0.04533851146697998, + "loss_ce": 0.000935432268306613, + "loss_iou": 0.482421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 429064016, + "step": 2498 + }, + { + "epoch": 0.6573288617084238, + "grad_norm": 13.279231347366395, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 429236096, + "step": 2499 + }, + { + "epoch": 0.6573288617084238, + "loss": 0.09342057257890701, + "loss_ce": 0.0018373207421973348, + "loss_iou": 0.61328125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 429236096, + "step": 2499 + }, + { + "epoch": 0.6575918984678109, + "grad_norm": 9.388446901609708, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_websight_new_CIoU": 0.8709481358528137, + "eval_websight_new_GIoU": 0.8742890954017639, + "eval_websight_new_IoU": 0.8760707080364227, + "eval_websight_new_MAE_all": 0.01990661583840847, + "eval_websight_new_MAE_h": 0.01017875224351883, + "eval_websight_new_MAE_w": 0.03395752049982548, + "eval_websight_new_MAE_x": 0.03019585181027651, + "eval_websight_new_MAE_y": 0.005294335773214698, + "eval_websight_new_NUM_probability": 0.9999906122684479, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.09985960274934769, + "eval_websight_new_loss_ce": 2.431606571917655e-05, + "eval_websight_new_loss_iou": 0.4124755859375, + "eval_websight_new_loss_num": 0.017595291137695312, + "eval_websight_new_loss_xval": 0.08795166015625, + "eval_websight_new_runtime": 55.684, + "eval_websight_new_samples_per_second": 0.898, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_seeclick_CIoU": 0.6180358529090881, + "eval_seeclick_GIoU": 0.6200732290744781, + "eval_seeclick_IoU": 0.6430684626102448, + "eval_seeclick_MAE_all": 0.04816816933453083, + "eval_seeclick_MAE_h": 0.03338594362139702, + "eval_seeclick_MAE_w": 0.06272775307297707, + "eval_seeclick_MAE_x": 0.06945410370826721, + "eval_seeclick_MAE_y": 0.027104882523417473, + "eval_seeclick_NUM_probability": 0.9999706149101257, + "eval_seeclick_inside_bbox": 0.8764204680919647, + "eval_seeclick_loss": 0.22014649212360382, + "eval_seeclick_loss_ce": 0.008939406834542751, + "eval_seeclick_loss_iou": 0.5093994140625, + "eval_seeclick_loss_num": 0.040313720703125, + "eval_seeclick_loss_xval": 0.201568603515625, + "eval_seeclick_runtime": 68.7244, + "eval_seeclick_samples_per_second": 0.626, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_icons_CIoU": 0.8709467053413391, + "eval_icons_GIoU": 0.8682527244091034, + "eval_icons_IoU": 0.874538779258728, + "eval_icons_MAE_all": 0.01756941620260477, + "eval_icons_MAE_h": 0.02226724848151207, + "eval_icons_MAE_w": 0.020997921004891396, + "eval_icons_MAE_x": 0.0130357148591429, + "eval_icons_MAE_y": 0.013976779766380787, + "eval_icons_NUM_probability": 0.9999793469905853, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.06909541040658951, + "eval_icons_loss_ce": 1.1558900041563902e-05, + "eval_icons_loss_iou": 0.66064453125, + "eval_icons_loss_num": 0.01297760009765625, + "eval_icons_loss_xval": 0.0648956298828125, + "eval_icons_runtime": 83.8008, + "eval_icons_samples_per_second": 0.597, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_screenspot_CIoU": 0.5514570474624634, + "eval_screenspot_GIoU": 0.5452684958775839, + "eval_screenspot_IoU": 0.5914793411890665, + "eval_screenspot_MAE_all": 0.08535195142030716, + "eval_screenspot_MAE_h": 0.06157554934422175, + "eval_screenspot_MAE_w": 0.14490507543087006, + "eval_screenspot_MAE_x": 0.08271919315059979, + "eval_screenspot_MAE_y": 0.05220799893140793, + "eval_screenspot_NUM_probability": 0.9994663198788961, + "eval_screenspot_inside_bbox": 0.850000003973643, + "eval_screenspot_loss": 0.9172521233558655, + "eval_screenspot_loss_ce": 0.5530519783496857, + "eval_screenspot_loss_iou": 0.529052734375, + "eval_screenspot_loss_num": 0.07132466634114583, + "eval_screenspot_loss_xval": 0.3565266927083333, + "eval_screenspot_runtime": 139.3325, + "eval_screenspot_samples_per_second": 0.639, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.657854935227198, + "grad_norm": 4.075831219377808, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 429580660, + "step": 2501 + }, + { + "epoch": 0.657854935227198, + "loss": 0.13817401230335236, + "loss_ce": 0.0033473544754087925, + "loss_iou": 0.59375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 429580660, + "step": 2501 + }, + { + "epoch": 0.6581179719865852, + "grad_norm": 7.116588034171532, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 429752720, + "step": 2502 + }, + { + "epoch": 0.6581179719865852, + "loss": 0.1050395742058754, + "loss_ce": 0.0004253170336596668, + "loss_iou": 0.35546875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 429752720, + "step": 2502 + }, + { + "epoch": 0.6583810087459723, + "grad_norm": 4.475079180174651, + "learning_rate": 5e-06, + "loss": 0.0863, + "num_input_tokens_seen": 429924908, + "step": 2503 + }, + { + "epoch": 0.6583810087459723, + "loss": 0.06600432842969894, + "loss_ce": 0.0007577461656183004, + "loss_iou": 0.60546875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 429924908, + "step": 2503 + }, + { + "epoch": 0.6586440455053594, + "grad_norm": 4.723970217436198, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 430097000, + "step": 2504 + }, + { + "epoch": 0.6586440455053594, + "loss": 0.07487765699625015, + "loss_ce": 0.0005978714907541871, + "loss_iou": 0.54296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 430097000, + "step": 2504 + }, + { + "epoch": 0.6589070822647465, + "grad_norm": 14.464350947162439, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 430266684, + "step": 2505 + }, + { + "epoch": 0.6589070822647465, + "loss": 0.24609506130218506, + "loss_ce": 0.00015389968757517636, + "loss_iou": 0.51171875, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 430266684, + "step": 2505 + }, + { + "epoch": 0.6591701190241336, + "grad_norm": 4.076117698308253, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 430436940, + "step": 2506 + }, + { + "epoch": 0.6591701190241336, + "loss": 0.109318308532238, + "loss_ce": 0.002522045513615012, + "loss_iou": 0.328125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 430436940, + "step": 2506 + }, + { + "epoch": 0.6594331557835208, + "grad_norm": 5.795601298430443, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 430608572, + "step": 2507 + }, + { + "epoch": 0.6594331557835208, + "loss": 0.07380083203315735, + "loss_ce": 0.00034502719063311815, + "loss_iou": 0.6171875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 430608572, + "step": 2507 + }, + { + "epoch": 0.6596961925429079, + "grad_norm": 3.8858425270409165, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 430778744, + "step": 2508 + }, + { + "epoch": 0.6596961925429079, + "loss": 0.1448889970779419, + "loss_ce": 0.00018989352975040674, + "loss_iou": 0.55078125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 430778744, + "step": 2508 + }, + { + "epoch": 0.659959229302295, + "grad_norm": 9.24454413883364, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 430950904, + "step": 2509 + }, + { + "epoch": 0.659959229302295, + "loss": 0.09062638133764267, + "loss_ce": 0.0013014276046305895, + "loss_iou": 0.53515625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 430950904, + "step": 2509 + }, + { + "epoch": 0.6602222660616821, + "grad_norm": 5.68970165561044, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 431121056, + "step": 2510 + }, + { + "epoch": 0.6602222660616821, + "loss": 0.09496060013771057, + "loss_ce": 0.0006155127775855362, + "loss_iou": NaN, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 431121056, + "step": 2510 + }, + { + "epoch": 0.6604853028210692, + "grad_norm": 3.2016449128749023, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 431293012, + "step": 2511 + }, + { + "epoch": 0.6604853028210692, + "loss": 0.03823622688651085, + "loss_ce": 1.2961418178747408e-05, + "loss_iou": 0.5703125, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 431293012, + "step": 2511 + }, + { + "epoch": 0.6607483395804564, + "grad_norm": 7.269277911217335, + "learning_rate": 5e-06, + "loss": 0.145, + "num_input_tokens_seen": 431461708, + "step": 2512 + }, + { + "epoch": 0.6607483395804564, + "loss": 0.27740049362182617, + "loss_ce": 0.004237642977386713, + "loss_iou": 0.255859375, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 431461708, + "step": 2512 + }, + { + "epoch": 0.6610113763398435, + "grad_norm": 3.005900285482203, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 431633828, + "step": 2513 + }, + { + "epoch": 0.6610113763398435, + "loss": 0.0913277417421341, + "loss_ce": 0.0005684680072590709, + "loss_iou": 0.36328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 431633828, + "step": 2513 + }, + { + "epoch": 0.6612744130992306, + "grad_norm": 3.8195415395135934, + "learning_rate": 5e-06, + "loss": 0.098, + "num_input_tokens_seen": 431805968, + "step": 2514 + }, + { + "epoch": 0.6612744130992306, + "loss": 0.04755711182951927, + "loss_ce": 0.0009872873779386282, + "loss_iou": 0.39453125, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 431805968, + "step": 2514 + }, + { + "epoch": 0.6615374498586177, + "grad_norm": 4.443605112638486, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 431977936, + "step": 2515 + }, + { + "epoch": 0.6615374498586177, + "loss": 0.0916135311126709, + "loss_ce": 0.0008542468422092497, + "loss_iou": 0.458984375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 431977936, + "step": 2515 + }, + { + "epoch": 0.6618004866180048, + "grad_norm": 5.518155601836215, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 432150116, + "step": 2516 + }, + { + "epoch": 0.6618004866180048, + "loss": 0.14211586117744446, + "loss_ce": 0.0029557030647993088, + "loss_iou": 0.53125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 432150116, + "step": 2516 + }, + { + "epoch": 0.6620635233773919, + "grad_norm": 5.250876023224286, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 432322696, + "step": 2517 + }, + { + "epoch": 0.6620635233773919, + "loss": 0.07852162420749664, + "loss_ce": 0.00102223118301481, + "loss_iou": 0.58984375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 432322696, + "step": 2517 + }, + { + "epoch": 0.6623265601367792, + "grad_norm": 4.979602548630015, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 432490748, + "step": 2518 + }, + { + "epoch": 0.6623265601367792, + "loss": 0.07791407406330109, + "loss_ce": 0.0011623608879745007, + "loss_iou": 0.5078125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 432490748, + "step": 2518 + }, + { + "epoch": 0.6625895968961663, + "grad_norm": 7.14143158314321, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 432662924, + "step": 2519 + }, + { + "epoch": 0.6625895968961663, + "loss": 0.168125718832016, + "loss_ce": 0.000858865212649107, + "loss_iou": 0.515625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 432662924, + "step": 2519 + }, + { + "epoch": 0.6628526336555534, + "grad_norm": 3.8877457754066387, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 432835068, + "step": 2520 + }, + { + "epoch": 0.6628526336555534, + "loss": 0.12645836174488068, + "loss_ce": 0.0006496440037153661, + "loss_iou": 0.546875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 432835068, + "step": 2520 + }, + { + "epoch": 0.6631156704149405, + "grad_norm": 4.906018490784193, + "learning_rate": 5e-06, + "loss": 0.1167, + "num_input_tokens_seen": 433007048, + "step": 2521 + }, + { + "epoch": 0.6631156704149405, + "loss": 0.08021432906389236, + "loss_ce": 0.000868627626914531, + "loss_iou": 0.55859375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 433007048, + "step": 2521 + }, + { + "epoch": 0.6633787071743276, + "grad_norm": 4.283295443334077, + "learning_rate": 5e-06, + "loss": 0.158, + "num_input_tokens_seen": 433177068, + "step": 2522 + }, + { + "epoch": 0.6633787071743276, + "loss": 0.11892493069171906, + "loss_ce": 0.00060828379355371, + "loss_iou": 0.4921875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 433177068, + "step": 2522 + }, + { + "epoch": 0.6636417439337148, + "grad_norm": 4.831641088226549, + "learning_rate": 5e-06, + "loss": 0.0736, + "num_input_tokens_seen": 433349504, + "step": 2523 + }, + { + "epoch": 0.6636417439337148, + "loss": 0.06323867291212082, + "loss_ce": 0.00031142536317929626, + "loss_iou": 0.4921875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 433349504, + "step": 2523 + }, + { + "epoch": 0.6639047806931019, + "grad_norm": 24.62858299579612, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 433521616, + "step": 2524 + }, + { + "epoch": 0.6639047806931019, + "loss": 0.12700864672660828, + "loss_ce": 0.003351423656567931, + "loss_iou": 0.45703125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 433521616, + "step": 2524 + }, + { + "epoch": 0.664167817452489, + "grad_norm": 6.377416561201896, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 433692012, + "step": 2525 + }, + { + "epoch": 0.664167817452489, + "loss": 0.06224376708269119, + "loss_ce": 0.003238032106310129, + "loss_iou": 0.54296875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 433692012, + "step": 2525 + }, + { + "epoch": 0.6644308542118761, + "grad_norm": 7.501801025322401, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 433864040, + "step": 2526 + }, + { + "epoch": 0.6644308542118761, + "loss": 0.09891114383935928, + "loss_ce": 0.002109395107254386, + "loss_iou": 0.40234375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 433864040, + "step": 2526 + }, + { + "epoch": 0.6646938909712632, + "grad_norm": 4.527105351225178, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 434034556, + "step": 2527 + }, + { + "epoch": 0.6646938909712632, + "loss": 0.14892783761024475, + "loss_ce": 0.0010244110599160194, + "loss_iou": 0.4765625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 434034556, + "step": 2527 + }, + { + "epoch": 0.6649569277306504, + "grad_norm": 31.749018149089892, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 434206672, + "step": 2528 + }, + { + "epoch": 0.6649569277306504, + "loss": 0.12175662815570831, + "loss_ce": 0.0015021114377304912, + "loss_iou": 0.53515625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 434206672, + "step": 2528 + }, + { + "epoch": 0.6652199644900375, + "grad_norm": 4.8576669690091885, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 434378728, + "step": 2529 + }, + { + "epoch": 0.6652199644900375, + "loss": 0.07344581931829453, + "loss_ce": 0.006276628468185663, + "loss_iou": 0.5078125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 434378728, + "step": 2529 + }, + { + "epoch": 0.6654830012494246, + "grad_norm": 4.9340551800619, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 434550708, + "step": 2530 + }, + { + "epoch": 0.6654830012494246, + "loss": 0.0640825480222702, + "loss_ce": 0.0013689253246411681, + "loss_iou": 0.55859375, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 434550708, + "step": 2530 + }, + { + "epoch": 0.6657460380088117, + "grad_norm": 9.63881283843686, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 434723100, + "step": 2531 + }, + { + "epoch": 0.6657460380088117, + "loss": 0.050916872918605804, + "loss_ce": 0.0004713151138275862, + "loss_iou": 0.64453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 434723100, + "step": 2531 + }, + { + "epoch": 0.6660090747681988, + "grad_norm": 5.34856475630482, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 434893672, + "step": 2532 + }, + { + "epoch": 0.6660090747681988, + "loss": 0.11488111317157745, + "loss_ce": 0.00108106411062181, + "loss_iou": 0.5390625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 434893672, + "step": 2532 + }, + { + "epoch": 0.666272111527586, + "grad_norm": 6.482198264697821, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 435065996, + "step": 2533 + }, + { + "epoch": 0.666272111527586, + "loss": 0.1057998389005661, + "loss_ce": 0.00013272129581309855, + "loss_iou": 0.6015625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 435065996, + "step": 2533 + }, + { + "epoch": 0.6665351482869731, + "grad_norm": 15.608471694941006, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 435236164, + "step": 2534 + }, + { + "epoch": 0.6665351482869731, + "loss": 0.08966468274593353, + "loss_ce": 0.004398562014102936, + "loss_iou": 0.4375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 435236164, + "step": 2534 + }, + { + "epoch": 0.6667981850463602, + "grad_norm": 3.821066216794497, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 435408552, + "step": 2535 + }, + { + "epoch": 0.6667981850463602, + "loss": 0.13420158624649048, + "loss_ce": 0.0008855484193190932, + "loss_iou": 0.423828125, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 435408552, + "step": 2535 + }, + { + "epoch": 0.6670612218057473, + "grad_norm": 7.414329883383495, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 435580704, + "step": 2536 + }, + { + "epoch": 0.6670612218057473, + "loss": 0.09095387160778046, + "loss_ce": 0.0024223732762038708, + "loss_iou": 0.53125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 435580704, + "step": 2536 + }, + { + "epoch": 0.6673242585651344, + "grad_norm": 33.27432314606939, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 435752624, + "step": 2537 + }, + { + "epoch": 0.6673242585651344, + "loss": 0.15344524383544922, + "loss_ce": 0.0014066637959331274, + "loss_iou": 0.57421875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 435752624, + "step": 2537 + }, + { + "epoch": 0.6675872953245217, + "grad_norm": 7.327533892405677, + "learning_rate": 5e-06, + "loss": 0.1128, + "num_input_tokens_seen": 435924672, + "step": 2538 + }, + { + "epoch": 0.6675872953245217, + "loss": 0.11101265996694565, + "loss_ce": 0.000935757125262171, + "loss_iou": 0.447265625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 435924672, + "step": 2538 + }, + { + "epoch": 0.6678503320839088, + "grad_norm": 7.348552482036463, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 436096756, + "step": 2539 + }, + { + "epoch": 0.6678503320839088, + "loss": 0.1398707926273346, + "loss_ce": 0.0033656705636531115, + "loss_iou": NaN, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 436096756, + "step": 2539 + }, + { + "epoch": 0.6681133688432959, + "grad_norm": 20.204014590374456, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 436267320, + "step": 2540 + }, + { + "epoch": 0.6681133688432959, + "loss": 0.11994585394859314, + "loss_ce": 0.0005916071822866797, + "loss_iou": 0.4375, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 436267320, + "step": 2540 + }, + { + "epoch": 0.668376405602683, + "grad_norm": 8.407025005461401, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 436439328, + "step": 2541 + }, + { + "epoch": 0.668376405602683, + "loss": 0.11520107090473175, + "loss_ce": 0.003811910282820463, + "loss_iou": 0.61328125, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 436439328, + "step": 2541 + }, + { + "epoch": 0.6686394423620701, + "grad_norm": 5.145720476296494, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 436609828, + "step": 2542 + }, + { + "epoch": 0.6686394423620701, + "loss": 0.06836480647325516, + "loss_ce": 0.00037164578679949045, + "loss_iou": 0.53125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 436609828, + "step": 2542 + }, + { + "epoch": 0.6689024791214572, + "grad_norm": 8.206215780846446, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 436781764, + "step": 2543 + }, + { + "epoch": 0.6689024791214572, + "loss": 0.107704758644104, + "loss_ce": 0.002266527386382222, + "loss_iou": 0.5234375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 436781764, + "step": 2543 + }, + { + "epoch": 0.6691655158808444, + "grad_norm": 7.4768704044685865, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 436953548, + "step": 2544 + }, + { + "epoch": 0.6691655158808444, + "loss": 0.17240890860557556, + "loss_ce": 0.0010527035919949412, + "loss_iou": 0.53515625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 436953548, + "step": 2544 + }, + { + "epoch": 0.6694285526402315, + "grad_norm": 9.203598751541529, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 437125648, + "step": 2545 + }, + { + "epoch": 0.6694285526402315, + "loss": 0.0696527361869812, + "loss_ce": 0.0004388623929116875, + "loss_iou": 0.546875, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 437125648, + "step": 2545 + }, + { + "epoch": 0.6696915893996186, + "grad_norm": 4.3933164564180816, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 437297756, + "step": 2546 + }, + { + "epoch": 0.6696915893996186, + "loss": 0.12276500463485718, + "loss_ce": 0.004127927124500275, + "loss_iou": 0.498046875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 437297756, + "step": 2546 + }, + { + "epoch": 0.6699546261590057, + "grad_norm": 4.6906533688912795, + "learning_rate": 5e-06, + "loss": 0.1226, + "num_input_tokens_seen": 437470168, + "step": 2547 + }, + { + "epoch": 0.6699546261590057, + "loss": 0.16925577819347382, + "loss_ce": 0.00012736135977320373, + "loss_iou": 0.5078125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 437470168, + "step": 2547 + }, + { + "epoch": 0.6702176629183928, + "grad_norm": 5.700480300794898, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 437641860, + "step": 2548 + }, + { + "epoch": 0.6702176629183928, + "loss": 0.08314710855484009, + "loss_ce": 0.0011463778791949153, + "loss_iou": 0.3671875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 437641860, + "step": 2548 + }, + { + "epoch": 0.67048069967778, + "grad_norm": 4.582897287065671, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 437814364, + "step": 2549 + }, + { + "epoch": 0.67048069967778, + "loss": 0.1702890694141388, + "loss_ce": 0.0015268486458808184, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 437814364, + "step": 2549 + }, + { + "epoch": 0.6707437364371671, + "grad_norm": 5.613370832938644, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 437986344, + "step": 2550 + }, + { + "epoch": 0.6707437364371671, + "loss": 0.13855043053627014, + "loss_ce": 0.0012823636643588543, + "loss_iou": 0.53125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 437986344, + "step": 2550 + }, + { + "epoch": 0.6710067731965542, + "grad_norm": 17.044687935022132, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 438158712, + "step": 2551 + }, + { + "epoch": 0.6710067731965542, + "loss": 0.08713387697935104, + "loss_ce": 0.00023507134756073356, + "loss_iou": 0.5703125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 438158712, + "step": 2551 + }, + { + "epoch": 0.6712698099559413, + "grad_norm": 6.906118924960931, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 438330820, + "step": 2552 + }, + { + "epoch": 0.6712698099559413, + "loss": 0.13604578375816345, + "loss_ce": 0.0038131249602884054, + "loss_iou": 0.45703125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 438330820, + "step": 2552 + }, + { + "epoch": 0.6715328467153284, + "grad_norm": 5.001208896831707, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 438503128, + "step": 2553 + }, + { + "epoch": 0.6715328467153284, + "loss": 0.11509568989276886, + "loss_ce": 0.00016648891323711723, + "loss_iou": 0.48046875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 438503128, + "step": 2553 + }, + { + "epoch": 0.6717958834747156, + "grad_norm": 5.88413772044494, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 438675408, + "step": 2554 + }, + { + "epoch": 0.6717958834747156, + "loss": 0.06504229456186295, + "loss_ce": 0.000772272062022239, + "loss_iou": 0.58203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 438675408, + "step": 2554 + }, + { + "epoch": 0.6720589202341027, + "grad_norm": 5.286645052918952, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 438847472, + "step": 2555 + }, + { + "epoch": 0.6720589202341027, + "loss": 0.11925958842039108, + "loss_ce": 0.00024103187024593353, + "loss_iou": 0.65625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 438847472, + "step": 2555 + }, + { + "epoch": 0.6723219569934898, + "grad_norm": 7.503881873733868, + "learning_rate": 5e-06, + "loss": 0.1275, + "num_input_tokens_seen": 439019864, + "step": 2556 + }, + { + "epoch": 0.6723219569934898, + "loss": 0.1399090439081192, + "loss_ce": 0.0017254444537684321, + "loss_iou": 0.51171875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 439019864, + "step": 2556 + }, + { + "epoch": 0.672584993752877, + "grad_norm": 4.642912313294466, + "learning_rate": 5e-06, + "loss": 0.0921, + "num_input_tokens_seen": 439191776, + "step": 2557 + }, + { + "epoch": 0.672584993752877, + "loss": 0.12639446556568146, + "loss_ce": 0.0015470522921532393, + "loss_iou": 0.62890625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 439191776, + "step": 2557 + }, + { + "epoch": 0.672848030512264, + "grad_norm": 5.916383494046343, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 439360860, + "step": 2558 + }, + { + "epoch": 0.672848030512264, + "loss": 0.09076009690761566, + "loss_ce": 0.0006722048274241388, + "loss_iou": 0.6015625, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 439360860, + "step": 2558 + }, + { + "epoch": 0.6731110672716513, + "grad_norm": 20.566659262884578, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 439532496, + "step": 2559 + }, + { + "epoch": 0.6731110672716513, + "loss": 0.10060098767280579, + "loss_ce": 0.003646642668172717, + "loss_iou": 0.546875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 439532496, + "step": 2559 + }, + { + "epoch": 0.6733741040310384, + "grad_norm": 4.011859025446864, + "learning_rate": 5e-06, + "loss": 0.1167, + "num_input_tokens_seen": 439704520, + "step": 2560 + }, + { + "epoch": 0.6733741040310384, + "loss": 0.0790782943367958, + "loss_ce": 0.0015483875758945942, + "loss_iou": 0.404296875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 439704520, + "step": 2560 + }, + { + "epoch": 0.6736371407904255, + "grad_norm": 7.247619498755374, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 439877048, + "step": 2561 + }, + { + "epoch": 0.6736371407904255, + "loss": 0.09221772849559784, + "loss_ce": 0.0026181198190897703, + "loss_iou": 0.50390625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 439877048, + "step": 2561 + }, + { + "epoch": 0.6739001775498126, + "grad_norm": 6.385962315237775, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 440049300, + "step": 2562 + }, + { + "epoch": 0.6739001775498126, + "loss": 0.041686464101076126, + "loss_ce": 0.0010523094097152352, + "loss_iou": 0.55078125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 440049300, + "step": 2562 + }, + { + "epoch": 0.6741632143091997, + "grad_norm": 26.7431708970354, + "learning_rate": 5e-06, + "loss": 0.1921, + "num_input_tokens_seen": 440221368, + "step": 2563 + }, + { + "epoch": 0.6741632143091997, + "loss": 0.18678849935531616, + "loss_ce": 0.002889578230679035, + "loss_iou": 0.47265625, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 440221368, + "step": 2563 + }, + { + "epoch": 0.6744262510685868, + "grad_norm": 3.4897355882240686, + "learning_rate": 5e-06, + "loss": 0.131, + "num_input_tokens_seen": 440393588, + "step": 2564 + }, + { + "epoch": 0.6744262510685868, + "loss": 0.13501334190368652, + "loss_ce": 0.0007817824953235686, + "loss_iou": 0.5078125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 440393588, + "step": 2564 + }, + { + "epoch": 0.674689287827974, + "grad_norm": 5.796770664321287, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 440565712, + "step": 2565 + }, + { + "epoch": 0.674689287827974, + "loss": 0.06512662768363953, + "loss_ce": 0.0005819504731334746, + "loss_iou": 0.50390625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 440565712, + "step": 2565 + }, + { + "epoch": 0.6749523245873611, + "grad_norm": 6.5511648198704755, + "learning_rate": 5e-06, + "loss": 0.0888, + "num_input_tokens_seen": 440737988, + "step": 2566 + }, + { + "epoch": 0.6749523245873611, + "loss": 0.11750826984643936, + "loss_ce": 0.0036166671197861433, + "loss_iou": 0.52734375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 440737988, + "step": 2566 + }, + { + "epoch": 0.6752153613467482, + "grad_norm": 21.358635365800044, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 440910068, + "step": 2567 + }, + { + "epoch": 0.6752153613467482, + "loss": 0.10725726187229156, + "loss_ce": 0.0005067750462330878, + "loss_iou": 0.6875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 440910068, + "step": 2567 + }, + { + "epoch": 0.6754783981061353, + "grad_norm": 4.519751532821699, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 441080800, + "step": 2568 + }, + { + "epoch": 0.6754783981061353, + "loss": 0.0696103498339653, + "loss_ce": 0.00015234279271680862, + "loss_iou": 0.50390625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 441080800, + "step": 2568 + }, + { + "epoch": 0.6757414348655224, + "grad_norm": 4.051067943325395, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 441251032, + "step": 2569 + }, + { + "epoch": 0.6757414348655224, + "loss": 0.09424732625484467, + "loss_ce": 0.00022266953601501882, + "loss_iou": 0.54296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 441251032, + "step": 2569 + }, + { + "epoch": 0.6760044716249096, + "grad_norm": 6.209787964250504, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 441422784, + "step": 2570 + }, + { + "epoch": 0.6760044716249096, + "loss": 0.0798967033624649, + "loss_ce": 0.00027634453726932406, + "loss_iou": 0.5078125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 441422784, + "step": 2570 + }, + { + "epoch": 0.6762675083842967, + "grad_norm": 7.3672437629155505, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 441593108, + "step": 2571 + }, + { + "epoch": 0.6762675083842967, + "loss": 0.12310583889484406, + "loss_ce": 0.0010050098644569516, + "loss_iou": NaN, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 441593108, + "step": 2571 + }, + { + "epoch": 0.6765305451436838, + "grad_norm": 5.196433267413594, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 441765456, + "step": 2572 + }, + { + "epoch": 0.6765305451436838, + "loss": 0.1754513680934906, + "loss_ce": 0.0007077160989865661, + "loss_iou": NaN, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 441765456, + "step": 2572 + }, + { + "epoch": 0.6767935819030709, + "grad_norm": 4.136115815472187, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 441937580, + "step": 2573 + }, + { + "epoch": 0.6767935819030709, + "loss": 0.13903826475143433, + "loss_ce": 0.0002443119592498988, + "loss_iou": 0.46484375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 441937580, + "step": 2573 + }, + { + "epoch": 0.677056618662458, + "grad_norm": 18.488728586585154, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 442108052, + "step": 2574 + }, + { + "epoch": 0.677056618662458, + "loss": 0.12112629413604736, + "loss_ce": 0.0002309026604052633, + "loss_iou": 0.478515625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 442108052, + "step": 2574 + }, + { + "epoch": 0.6773196554218452, + "grad_norm": 7.779321814196593, + "learning_rate": 5e-06, + "loss": 0.1347, + "num_input_tokens_seen": 442279576, + "step": 2575 + }, + { + "epoch": 0.6773196554218452, + "loss": 0.10174375027418137, + "loss_ce": 0.0021038581617176533, + "loss_iou": 0.6015625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 442279576, + "step": 2575 + }, + { + "epoch": 0.6775826921812323, + "grad_norm": 4.027957209739208, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 442447800, + "step": 2576 + }, + { + "epoch": 0.6775826921812323, + "loss": 0.08731138706207275, + "loss_ce": 0.0017401032382622361, + "loss_iou": 0.4921875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 442447800, + "step": 2576 + }, + { + "epoch": 0.6778457289406195, + "grad_norm": 3.5976030894091644, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 442620136, + "step": 2577 + }, + { + "epoch": 0.6778457289406195, + "loss": 0.11548551917076111, + "loss_ce": 0.0009377849055454135, + "loss_iou": 0.5390625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 442620136, + "step": 2577 + }, + { + "epoch": 0.6781087657000066, + "grad_norm": 14.27185242111941, + "learning_rate": 5e-06, + "loss": 0.1706, + "num_input_tokens_seen": 442792328, + "step": 2578 + }, + { + "epoch": 0.6781087657000066, + "loss": 0.20107831060886383, + "loss_ce": 0.01119793951511383, + "loss_iou": 0.447265625, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 442792328, + "step": 2578 + }, + { + "epoch": 0.6783718024593937, + "grad_norm": 2.6862819825925905, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 442964732, + "step": 2579 + }, + { + "epoch": 0.6783718024593937, + "loss": 0.12557528913021088, + "loss_ce": 0.001063565374352038, + "loss_iou": 0.328125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 442964732, + "step": 2579 + }, + { + "epoch": 0.6786348392187809, + "grad_norm": 19.12719362396153, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 443136988, + "step": 2580 + }, + { + "epoch": 0.6786348392187809, + "loss": 0.1754310131072998, + "loss_ce": 0.001114598591811955, + "loss_iou": 0.578125, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 443136988, + "step": 2580 + }, + { + "epoch": 0.678897875978168, + "grad_norm": 4.296169271228249, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 443309004, + "step": 2581 + }, + { + "epoch": 0.678897875978168, + "loss": 0.056013718247413635, + "loss_ce": 0.0018907939083874226, + "loss_iou": 0.470703125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 443309004, + "step": 2581 + }, + { + "epoch": 0.6791609127375551, + "grad_norm": 14.58212290872784, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 443481132, + "step": 2582 + }, + { + "epoch": 0.6791609127375551, + "loss": 0.10740009695291519, + "loss_ce": 0.00046649359865114093, + "loss_iou": 0.310546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 443481132, + "step": 2582 + }, + { + "epoch": 0.6794239494969422, + "grad_norm": 5.824835527761782, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 443651636, + "step": 2583 + }, + { + "epoch": 0.6794239494969422, + "loss": 0.13724330067634583, + "loss_ce": 0.0017452588072046638, + "loss_iou": 0.56640625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 443651636, + "step": 2583 + }, + { + "epoch": 0.6796869862563293, + "grad_norm": 6.27799845618246, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 443823548, + "step": 2584 + }, + { + "epoch": 0.6796869862563293, + "loss": 0.1675836145877838, + "loss_ce": 0.002330929273739457, + "loss_iou": 0.390625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 443823548, + "step": 2584 + }, + { + "epoch": 0.6799500230157165, + "grad_norm": 5.283309975586119, + "learning_rate": 5e-06, + "loss": 0.1072, + "num_input_tokens_seen": 443994196, + "step": 2585 + }, + { + "epoch": 0.6799500230157165, + "loss": 0.07297757267951965, + "loss_ce": 7.10797612555325e-05, + "loss_iou": 0.68359375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 443994196, + "step": 2585 + }, + { + "epoch": 0.6802130597751036, + "grad_norm": 4.83126163860268, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 444166260, + "step": 2586 + }, + { + "epoch": 0.6802130597751036, + "loss": 0.15566733479499817, + "loss_ce": 0.0010347592178732157, + "loss_iou": 0.57421875, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 444166260, + "step": 2586 + }, + { + "epoch": 0.6804760965344907, + "grad_norm": 5.935537777211587, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 444338748, + "step": 2587 + }, + { + "epoch": 0.6804760965344907, + "loss": 0.07495879381895065, + "loss_ce": 0.0001907299447339028, + "loss_iou": 0.6015625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 444338748, + "step": 2587 + }, + { + "epoch": 0.6807391332938778, + "grad_norm": 4.981341494504371, + "learning_rate": 5e-06, + "loss": 0.1501, + "num_input_tokens_seen": 444510996, + "step": 2588 + }, + { + "epoch": 0.6807391332938778, + "loss": 0.2452811598777771, + "loss_ce": 0.002849509473890066, + "loss_iou": 0.3984375, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 444510996, + "step": 2588 + }, + { + "epoch": 0.6810021700532649, + "grad_norm": 34.8294057916369, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 444682880, + "step": 2589 + }, + { + "epoch": 0.6810021700532649, + "loss": 0.20022635161876678, + "loss_ce": 0.002899688435718417, + "loss_iou": 0.64453125, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 444682880, + "step": 2589 + }, + { + "epoch": 0.681265206812652, + "grad_norm": 6.783440844262528, + "learning_rate": 5e-06, + "loss": 0.1355, + "num_input_tokens_seen": 444855080, + "step": 2590 + }, + { + "epoch": 0.681265206812652, + "loss": 0.17689135670661926, + "loss_ce": 0.0014458001824095845, + "loss_iou": 0.431640625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 444855080, + "step": 2590 + }, + { + "epoch": 0.6815282435720392, + "grad_norm": 6.398055261239124, + "learning_rate": 5e-06, + "loss": 0.1061, + "num_input_tokens_seen": 445027648, + "step": 2591 + }, + { + "epoch": 0.6815282435720392, + "loss": 0.06686560064554214, + "loss_ce": 0.003968872129917145, + "loss_iou": 0.6484375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 445027648, + "step": 2591 + }, + { + "epoch": 0.6817912803314263, + "grad_norm": 9.754653714308724, + "learning_rate": 5e-06, + "loss": 0.158, + "num_input_tokens_seen": 445198060, + "step": 2592 + }, + { + "epoch": 0.6817912803314263, + "loss": 0.0956597551703453, + "loss_ce": 0.001345182885415852, + "loss_iou": 0.42578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 445198060, + "step": 2592 + }, + { + "epoch": 0.6820543170908134, + "grad_norm": 11.093960051238781, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 445370376, + "step": 2593 + }, + { + "epoch": 0.6820543170908134, + "loss": 0.14656749367713928, + "loss_ce": 0.001456425990909338, + "loss_iou": 0.482421875, + "loss_num": 0.0289306640625, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 445370376, + "step": 2593 + }, + { + "epoch": 0.6823173538502005, + "grad_norm": 7.226137026161222, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 445542828, + "step": 2594 + }, + { + "epoch": 0.6823173538502005, + "loss": 0.06742848455905914, + "loss_ce": 0.005309954285621643, + "loss_iou": 0.48828125, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 445542828, + "step": 2594 + }, + { + "epoch": 0.6825803906095876, + "grad_norm": 4.617442760822597, + "learning_rate": 5e-06, + "loss": 0.137, + "num_input_tokens_seen": 445715128, + "step": 2595 + }, + { + "epoch": 0.6825803906095876, + "loss": 0.11120542138814926, + "loss_ce": 0.0006707610446028411, + "loss_iou": 0.56640625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 445715128, + "step": 2595 + }, + { + "epoch": 0.6828434273689749, + "grad_norm": 8.070925307261565, + "learning_rate": 5e-06, + "loss": 0.1584, + "num_input_tokens_seen": 445887032, + "step": 2596 + }, + { + "epoch": 0.6828434273689749, + "loss": 0.12233078479766846, + "loss_ce": 0.0008708295645192266, + "loss_iou": 0.486328125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 445887032, + "step": 2596 + }, + { + "epoch": 0.683106464128362, + "grad_norm": 4.079666869126499, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 446059408, + "step": 2597 + }, + { + "epoch": 0.683106464128362, + "loss": 0.1118651032447815, + "loss_ce": 0.00023180160496849567, + "loss_iou": 0.49609375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 446059408, + "step": 2597 + }, + { + "epoch": 0.6833695008877491, + "grad_norm": 6.132037428307681, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 446229696, + "step": 2598 + }, + { + "epoch": 0.6833695008877491, + "loss": 0.13406622409820557, + "loss_ce": 0.0008264797506853938, + "loss_iou": 0.44140625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 446229696, + "step": 2598 + }, + { + "epoch": 0.6836325376471362, + "grad_norm": 3.96696464215725, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 446401824, + "step": 2599 + }, + { + "epoch": 0.6836325376471362, + "loss": 0.07421835511922836, + "loss_ce": 0.0017085927538573742, + "loss_iou": 0.4453125, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 446401824, + "step": 2599 + }, + { + "epoch": 0.6838955744065233, + "grad_norm": 5.0831174679987035, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 446573796, + "step": 2600 + }, + { + "epoch": 0.6838955744065233, + "loss": 0.10569039732217789, + "loss_ce": 0.0010151021415367723, + "loss_iou": 0.484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 446573796, + "step": 2600 + }, + { + "epoch": 0.6841586111659105, + "grad_norm": 15.882119320832834, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 446746000, + "step": 2601 + }, + { + "epoch": 0.6841586111659105, + "loss": 0.1552843451499939, + "loss_ce": 0.000499195302836597, + "loss_iou": 0.3125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 446746000, + "step": 2601 + }, + { + "epoch": 0.6844216479252976, + "grad_norm": 3.8491365485511118, + "learning_rate": 5e-06, + "loss": 0.0928, + "num_input_tokens_seen": 446918100, + "step": 2602 + }, + { + "epoch": 0.6844216479252976, + "loss": 0.050599753856658936, + "loss_ce": 0.0002762702642939985, + "loss_iou": 0.54296875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 446918100, + "step": 2602 + }, + { + "epoch": 0.6846846846846847, + "grad_norm": 4.058506484572602, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 447088492, + "step": 2603 + }, + { + "epoch": 0.6846846846846847, + "loss": 0.043845463544130325, + "loss_ce": 0.0001748104114085436, + "loss_iou": 0.57421875, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 447088492, + "step": 2603 + }, + { + "epoch": 0.6849477214440718, + "grad_norm": 4.841823848779041, + "learning_rate": 5e-06, + "loss": 0.079, + "num_input_tokens_seen": 447260652, + "step": 2604 + }, + { + "epoch": 0.6849477214440718, + "loss": 0.08405909687280655, + "loss_ce": 0.004438734147697687, + "loss_iou": 0.419921875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 447260652, + "step": 2604 + }, + { + "epoch": 0.6852107582034589, + "grad_norm": 9.044057313780108, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 447432936, + "step": 2605 + }, + { + "epoch": 0.6852107582034589, + "loss": 0.07848000526428223, + "loss_ce": 0.0012094933772459626, + "loss_iou": 0.63671875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 447432936, + "step": 2605 + }, + { + "epoch": 0.6854737949628461, + "grad_norm": 7.595343434642406, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 447605384, + "step": 2606 + }, + { + "epoch": 0.6854737949628461, + "loss": 0.12424921244382858, + "loss_ce": 0.0026061516255140305, + "loss_iou": 0.50390625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 447605384, + "step": 2606 + }, + { + "epoch": 0.6857368317222332, + "grad_norm": 5.0763063086969415, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 447777576, + "step": 2607 + }, + { + "epoch": 0.6857368317222332, + "loss": 0.11916627734899521, + "loss_ce": 0.002253434620797634, + "loss_iou": 0.54296875, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 447777576, + "step": 2607 + }, + { + "epoch": 0.6859998684816203, + "grad_norm": 11.527878652419272, + "learning_rate": 5e-06, + "loss": 0.0904, + "num_input_tokens_seen": 447949852, + "step": 2608 + }, + { + "epoch": 0.6859998684816203, + "loss": 0.0797332376241684, + "loss_ce": 0.0030425682198256254, + "loss_iou": 0.5859375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 447949852, + "step": 2608 + }, + { + "epoch": 0.6862629052410074, + "grad_norm": 6.796020734872217, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 448122008, + "step": 2609 + }, + { + "epoch": 0.6862629052410074, + "loss": 0.09912531077861786, + "loss_ce": 0.0006145666702650487, + "loss_iou": 0.58203125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 448122008, + "step": 2609 + }, + { + "epoch": 0.6865259420003945, + "grad_norm": 53.2300135535103, + "learning_rate": 5e-06, + "loss": 0.1731, + "num_input_tokens_seen": 448294084, + "step": 2610 + }, + { + "epoch": 0.6865259420003945, + "loss": 0.14799347519874573, + "loss_ce": 0.0005325321108102798, + "loss_iou": 0.337890625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 448294084, + "step": 2610 + }, + { + "epoch": 0.6867889787597817, + "grad_norm": 8.384011107333327, + "learning_rate": 5e-06, + "loss": 0.1662, + "num_input_tokens_seen": 448466064, + "step": 2611 + }, + { + "epoch": 0.6867889787597817, + "loss": 0.16361187398433685, + "loss_ce": 0.0061564212664961815, + "loss_iou": 0.40625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 448466064, + "step": 2611 + }, + { + "epoch": 0.6870520155191688, + "grad_norm": 58.16654404486195, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 448638396, + "step": 2612 + }, + { + "epoch": 0.6870520155191688, + "loss": 0.11319980025291443, + "loss_ce": 0.008921236731112003, + "loss_iou": 0.4609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 448638396, + "step": 2612 + }, + { + "epoch": 0.6873150522785559, + "grad_norm": 4.791126855568047, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 448808820, + "step": 2613 + }, + { + "epoch": 0.6873150522785559, + "loss": 0.08992376923561096, + "loss_ce": 0.0010565832490101457, + "loss_iou": 0.6640625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 448808820, + "step": 2613 + }, + { + "epoch": 0.687578089037943, + "grad_norm": 4.623158042374579, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 448980968, + "step": 2614 + }, + { + "epoch": 0.687578089037943, + "loss": 0.10407891124486923, + "loss_ce": 0.0005632878746837378, + "loss_iou": 0.39453125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 448980968, + "step": 2614 + }, + { + "epoch": 0.6878411257973301, + "grad_norm": 16.128815011570154, + "learning_rate": 5e-06, + "loss": 0.0861, + "num_input_tokens_seen": 449153104, + "step": 2615 + }, + { + "epoch": 0.6878411257973301, + "loss": 0.05162462592124939, + "loss_ce": 0.0013011416886001825, + "loss_iou": 0.498046875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 449153104, + "step": 2615 + }, + { + "epoch": 0.6881041625567172, + "grad_norm": 12.10527552944659, + "learning_rate": 5e-06, + "loss": 0.0853, + "num_input_tokens_seen": 449325208, + "step": 2616 + }, + { + "epoch": 0.6881041625567172, + "loss": 0.056678079068660736, + "loss_ce": 0.0005867721047252417, + "loss_iou": 0.515625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 449325208, + "step": 2616 + }, + { + "epoch": 0.6883671993161045, + "grad_norm": 4.0544613329310675, + "learning_rate": 5e-06, + "loss": 0.0917, + "num_input_tokens_seen": 449497440, + "step": 2617 + }, + { + "epoch": 0.6883671993161045, + "loss": 0.12780970335006714, + "loss_ce": 0.0006887409836053848, + "loss_iou": 0.5234375, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 449497440, + "step": 2617 + }, + { + "epoch": 0.6886302360754916, + "grad_norm": 6.140781979511808, + "learning_rate": 5e-06, + "loss": 0.1287, + "num_input_tokens_seen": 449669860, + "step": 2618 + }, + { + "epoch": 0.6886302360754916, + "loss": 0.12178224325180054, + "loss_ce": 0.0038012792356312275, + "loss_iou": 0.43359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 449669860, + "step": 2618 + }, + { + "epoch": 0.6888932728348787, + "grad_norm": 4.582153125692399, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 449842236, + "step": 2619 + }, + { + "epoch": 0.6888932728348787, + "loss": 0.11555735766887665, + "loss_ce": 0.003313705325126648, + "loss_iou": 0.5703125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 449842236, + "step": 2619 + }, + { + "epoch": 0.6891563095942658, + "grad_norm": 10.075980623616992, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 450014444, + "step": 2620 + }, + { + "epoch": 0.6891563095942658, + "loss": 0.20255392789840698, + "loss_ce": 0.004891556687653065, + "loss_iou": 0.4765625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 450014444, + "step": 2620 + }, + { + "epoch": 0.6894193463536529, + "grad_norm": 16.09024773291766, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 450186632, + "step": 2621 + }, + { + "epoch": 0.6894193463536529, + "loss": 0.15711162984371185, + "loss_ce": 0.001548278727568686, + "loss_iou": 0.5703125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 450186632, + "step": 2621 + }, + { + "epoch": 0.6896823831130401, + "grad_norm": 4.077753866376941, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 450358652, + "step": 2622 + }, + { + "epoch": 0.6896823831130401, + "loss": 0.17466840147972107, + "loss_ce": 0.0030528109055012465, + "loss_iou": 0.32421875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 450358652, + "step": 2622 + }, + { + "epoch": 0.6899454198724272, + "grad_norm": 5.782402643209976, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 450530992, + "step": 2623 + }, + { + "epoch": 0.6899454198724272, + "loss": 0.09183860570192337, + "loss_ce": 0.000987776555120945, + "loss_iou": 0.51171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 450530992, + "step": 2623 + }, + { + "epoch": 0.6902084566318143, + "grad_norm": 4.962596372966127, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 450702972, + "step": 2624 + }, + { + "epoch": 0.6902084566318143, + "loss": 0.16587726771831512, + "loss_ce": 0.004210404586046934, + "loss_iou": 0.34375, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 450702972, + "step": 2624 + }, + { + "epoch": 0.6904714933912014, + "grad_norm": 4.41836936316546, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 450871644, + "step": 2625 + }, + { + "epoch": 0.6904714933912014, + "loss": 0.08390143513679504, + "loss_ce": 0.00022223126143217087, + "loss_iou": 0.35546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 450871644, + "step": 2625 + }, + { + "epoch": 0.6907345301505885, + "grad_norm": 6.24899387116621, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 451043764, + "step": 2626 + }, + { + "epoch": 0.6907345301505885, + "loss": 0.07276784628629684, + "loss_ce": 0.0041185528971254826, + "loss_iou": 0.515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 451043764, + "step": 2626 + }, + { + "epoch": 0.6909975669099757, + "grad_norm": 3.855416348391677, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 451215860, + "step": 2627 + }, + { + "epoch": 0.6909975669099757, + "loss": 0.0781233012676239, + "loss_ce": 0.00033398933010175824, + "loss_iou": 0.4921875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 451215860, + "step": 2627 + }, + { + "epoch": 0.6912606036693628, + "grad_norm": 9.945431727329952, + "learning_rate": 5e-06, + "loss": 0.0856, + "num_input_tokens_seen": 451387900, + "step": 2628 + }, + { + "epoch": 0.6912606036693628, + "loss": 0.11348669975996017, + "loss_ce": 0.0029215135145932436, + "loss_iou": 0.4140625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 451387900, + "step": 2628 + }, + { + "epoch": 0.6915236404287499, + "grad_norm": 4.281056455844802, + "learning_rate": 5e-06, + "loss": 0.0937, + "num_input_tokens_seen": 451558132, + "step": 2629 + }, + { + "epoch": 0.6915236404287499, + "loss": 0.11048734933137894, + "loss_ce": 0.00626982469111681, + "loss_iou": 0.578125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 451558132, + "step": 2629 + }, + { + "epoch": 0.691786677188137, + "grad_norm": 4.21783711884085, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 451730300, + "step": 2630 + }, + { + "epoch": 0.691786677188137, + "loss": 0.06679116189479828, + "loss_ce": 0.0006290507735684514, + "loss_iou": 0.44921875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 451730300, + "step": 2630 + }, + { + "epoch": 0.6920497139475241, + "grad_norm": 3.6368581797967257, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 451902464, + "step": 2631 + }, + { + "epoch": 0.6920497139475241, + "loss": 0.07377897202968597, + "loss_ce": 0.002199993235990405, + "loss_iou": 0.373046875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 451902464, + "step": 2631 + }, + { + "epoch": 0.6923127507069113, + "grad_norm": 4.812739476349578, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 452074432, + "step": 2632 + }, + { + "epoch": 0.6923127507069113, + "loss": 0.08376286178827286, + "loss_ce": 0.0004193550848867744, + "loss_iou": 0.482421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 452074432, + "step": 2632 + }, + { + "epoch": 0.6925757874662984, + "grad_norm": 17.745450709557872, + "learning_rate": 5e-06, + "loss": 0.1677, + "num_input_tokens_seen": 452246508, + "step": 2633 + }, + { + "epoch": 0.6925757874662984, + "loss": 0.12932631373405457, + "loss_ce": 0.00045057572424411774, + "loss_iou": 0.48046875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 452246508, + "step": 2633 + }, + { + "epoch": 0.6928388242256855, + "grad_norm": 3.8241822612681404, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 452418684, + "step": 2634 + }, + { + "epoch": 0.6928388242256855, + "loss": 0.07180548459291458, + "loss_ce": 0.000638492638245225, + "loss_iou": 0.578125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 452418684, + "step": 2634 + }, + { + "epoch": 0.6931018609850726, + "grad_norm": 5.017758815401084, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 452591112, + "step": 2635 + }, + { + "epoch": 0.6931018609850726, + "loss": 0.14692719280719757, + "loss_ce": 0.001221017329953611, + "loss_iou": 0.52734375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 452591112, + "step": 2635 + }, + { + "epoch": 0.6933648977444598, + "grad_norm": 4.688279396436893, + "learning_rate": 5e-06, + "loss": 0.1001, + "num_input_tokens_seen": 452763100, + "step": 2636 + }, + { + "epoch": 0.6933648977444598, + "loss": 0.11540480703115463, + "loss_ce": 0.0031611607410013676, + "loss_iou": 0.5390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 452763100, + "step": 2636 + }, + { + "epoch": 0.693627934503847, + "grad_norm": 5.800484869059427, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 452932760, + "step": 2637 + }, + { + "epoch": 0.693627934503847, + "loss": 0.20021981000900269, + "loss_ce": 0.002526947297155857, + "loss_iou": 0.62890625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 452932760, + "step": 2637 + }, + { + "epoch": 0.6938909712632341, + "grad_norm": 6.943406352493944, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 453104824, + "step": 2638 + }, + { + "epoch": 0.6938909712632341, + "loss": 0.11978557705879211, + "loss_ce": 0.003910328261554241, + "loss_iou": 0.51953125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 453104824, + "step": 2638 + }, + { + "epoch": 0.6941540080226212, + "grad_norm": 8.392991417214587, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 453276756, + "step": 2639 + }, + { + "epoch": 0.6941540080226212, + "loss": 0.14925961196422577, + "loss_ce": 0.00796323362737894, + "loss_iou": 0.6484375, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 453276756, + "step": 2639 + }, + { + "epoch": 0.6944170447820083, + "grad_norm": 7.152620797713759, + "learning_rate": 5e-06, + "loss": 0.0825, + "num_input_tokens_seen": 453447368, + "step": 2640 + }, + { + "epoch": 0.6944170447820083, + "loss": 0.12115476280450821, + "loss_ce": 0.002624482847750187, + "loss_iou": 0.51171875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 453447368, + "step": 2640 + }, + { + "epoch": 0.6946800815413954, + "grad_norm": 9.172566148062016, + "learning_rate": 5e-06, + "loss": 0.1376, + "num_input_tokens_seen": 453619160, + "step": 2641 + }, + { + "epoch": 0.6946800815413954, + "loss": 0.09918803721666336, + "loss_ce": 0.0022947255056351423, + "loss_iou": 0.53125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 453619160, + "step": 2641 + }, + { + "epoch": 0.6949431183007825, + "grad_norm": 4.761554471447507, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 453791024, + "step": 2642 + }, + { + "epoch": 0.6949431183007825, + "loss": 0.12317492812871933, + "loss_ce": 0.004400510806590319, + "loss_iou": 0.5, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 453791024, + "step": 2642 + }, + { + "epoch": 0.6952061550601697, + "grad_norm": 6.764608063652922, + "learning_rate": 5e-06, + "loss": 0.0985, + "num_input_tokens_seen": 453963268, + "step": 2643 + }, + { + "epoch": 0.6952061550601697, + "loss": 0.17664819955825806, + "loss_ce": 0.0005617668502964079, + "loss_iou": 0.5625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 453963268, + "step": 2643 + }, + { + "epoch": 0.6954691918195568, + "grad_norm": 8.472259366872775, + "learning_rate": 5e-06, + "loss": 0.1241, + "num_input_tokens_seen": 454133580, + "step": 2644 + }, + { + "epoch": 0.6954691918195568, + "loss": 0.05588904023170471, + "loss_ce": 0.0003470498777460307, + "loss_iou": 0.52734375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 454133580, + "step": 2644 + }, + { + "epoch": 0.6957322285789439, + "grad_norm": 9.45776397859236, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 454305316, + "step": 2645 + }, + { + "epoch": 0.6957322285789439, + "loss": 0.11683906614780426, + "loss_ce": 0.002215046202763915, + "loss_iou": NaN, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 454305316, + "step": 2645 + }, + { + "epoch": 0.695995265338331, + "grad_norm": 12.21950005723, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 454477460, + "step": 2646 + }, + { + "epoch": 0.695995265338331, + "loss": 0.19950228929519653, + "loss_ce": 0.00556308263912797, + "loss_iou": 0.455078125, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 454477460, + "step": 2646 + }, + { + "epoch": 0.6962583020977181, + "grad_norm": 4.539781489505394, + "learning_rate": 5e-06, + "loss": 0.1468, + "num_input_tokens_seen": 454649576, + "step": 2647 + }, + { + "epoch": 0.6962583020977181, + "loss": 0.053970806300640106, + "loss_ce": 0.0009465104667469859, + "loss_iou": 0.3828125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 454649576, + "step": 2647 + }, + { + "epoch": 0.6965213388571053, + "grad_norm": 14.32877869742435, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 454821436, + "step": 2648 + }, + { + "epoch": 0.6965213388571053, + "loss": 0.10093516111373901, + "loss_ce": 0.00645274156704545, + "loss_iou": 0.380859375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 454821436, + "step": 2648 + }, + { + "epoch": 0.6967843756164924, + "grad_norm": 7.804241962530211, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 454993508, + "step": 2649 + }, + { + "epoch": 0.6967843756164924, + "loss": 0.1862741857767105, + "loss_ce": 0.0016580985393375158, + "loss_iou": 0.53515625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 454993508, + "step": 2649 + }, + { + "epoch": 0.6970474123758795, + "grad_norm": 16.275796095386262, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 455165748, + "step": 2650 + }, + { + "epoch": 0.6970474123758795, + "loss": 0.07620816677808762, + "loss_ce": 0.0016232050256803632, + "loss_iou": 0.5625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 455165748, + "step": 2650 + }, + { + "epoch": 0.6973104491352666, + "grad_norm": 4.357121015686003, + "learning_rate": 5e-06, + "loss": 0.1124, + "num_input_tokens_seen": 455338084, + "step": 2651 + }, + { + "epoch": 0.6973104491352666, + "loss": 0.13134872913360596, + "loss_ce": 0.0034037926234304905, + "loss_iou": 0.59765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 455338084, + "step": 2651 + }, + { + "epoch": 0.6975734858946537, + "grad_norm": 5.3899957417482725, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 455509908, + "step": 2652 + }, + { + "epoch": 0.6975734858946537, + "loss": 0.14220395684242249, + "loss_ce": 0.0009075828129425645, + "loss_iou": 0.458984375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 455509908, + "step": 2652 + }, + { + "epoch": 0.697836522654041, + "grad_norm": 6.823610210106841, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 455681908, + "step": 2653 + }, + { + "epoch": 0.697836522654041, + "loss": 0.05261433497071266, + "loss_ce": 0.0018330833408981562, + "loss_iou": 0.46484375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 455681908, + "step": 2653 + }, + { + "epoch": 0.698099559413428, + "grad_norm": 6.597254450850545, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 455853952, + "step": 2654 + }, + { + "epoch": 0.698099559413428, + "loss": 0.147089421749115, + "loss_ce": 0.00023882777895778418, + "loss_iou": 0.43359375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 455853952, + "step": 2654 + }, + { + "epoch": 0.6983625961728152, + "grad_norm": 6.378423815524031, + "learning_rate": 5e-06, + "loss": 0.1075, + "num_input_tokens_seen": 456026208, + "step": 2655 + }, + { + "epoch": 0.6983625961728152, + "loss": 0.18122422695159912, + "loss_ce": 0.0033067562617361546, + "loss_iou": 0.57421875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 456026208, + "step": 2655 + }, + { + "epoch": 0.6986256329322023, + "grad_norm": 25.554315055519154, + "learning_rate": 5e-06, + "loss": 0.1181, + "num_input_tokens_seen": 456198288, + "step": 2656 + }, + { + "epoch": 0.6986256329322023, + "loss": 0.10486012697219849, + "loss_ce": 0.0011003611143678427, + "loss_iou": 0.52734375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 456198288, + "step": 2656 + }, + { + "epoch": 0.6988886696915894, + "grad_norm": 4.576359717222489, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 456365496, + "step": 2657 + }, + { + "epoch": 0.6988886696915894, + "loss": 0.13455136120319366, + "loss_ce": 0.0007317845011129975, + "loss_iou": 0.53125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 456365496, + "step": 2657 + }, + { + "epoch": 0.6991517064509766, + "grad_norm": 5.013450140740463, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 456537676, + "step": 2658 + }, + { + "epoch": 0.6991517064509766, + "loss": 0.08290005475282669, + "loss_ce": 0.003767975838854909, + "loss_iou": 0.5, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 456537676, + "step": 2658 + }, + { + "epoch": 0.6994147432103637, + "grad_norm": 4.084720539079834, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 456710364, + "step": 2659 + }, + { + "epoch": 0.6994147432103637, + "loss": 0.06024003401398659, + "loss_ce": 0.0006849807105027139, + "loss_iou": 0.5703125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 456710364, + "step": 2659 + }, + { + "epoch": 0.6996777799697508, + "grad_norm": 3.7990429726373636, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 456880760, + "step": 2660 + }, + { + "epoch": 0.6996777799697508, + "loss": 0.09679127484560013, + "loss_ce": 0.0005083205760456622, + "loss_iou": 0.482421875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 456880760, + "step": 2660 + }, + { + "epoch": 0.6999408167291379, + "grad_norm": 5.055246469854178, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 457051432, + "step": 2661 + }, + { + "epoch": 0.6999408167291379, + "loss": 0.21587547659873962, + "loss_ce": 0.0004213774227537215, + "loss_iou": 0.3671875, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 457051432, + "step": 2661 + }, + { + "epoch": 0.700203853488525, + "grad_norm": 3.8688477802914774, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 457223592, + "step": 2662 + }, + { + "epoch": 0.700203853488525, + "loss": 0.14515820145606995, + "loss_ce": 0.0040449099615216255, + "loss_iou": 0.4296875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 457223592, + "step": 2662 + }, + { + "epoch": 0.7004668902479122, + "grad_norm": 10.967803606252325, + "learning_rate": 5e-06, + "loss": 0.0963, + "num_input_tokens_seen": 457396048, + "step": 2663 + }, + { + "epoch": 0.7004668902479122, + "loss": 0.13383157551288605, + "loss_ce": 0.0020261560566723347, + "loss_iou": 0.55078125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 457396048, + "step": 2663 + }, + { + "epoch": 0.7007299270072993, + "grad_norm": 3.339673678629444, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 457566272, + "step": 2664 + }, + { + "epoch": 0.7007299270072993, + "loss": 0.20303812623023987, + "loss_ce": 0.005711473990231752, + "loss_iou": 0.515625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 457566272, + "step": 2664 + }, + { + "epoch": 0.7009929637666864, + "grad_norm": 10.341407204229276, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 457738384, + "step": 2665 + }, + { + "epoch": 0.7009929637666864, + "loss": 0.14979971945285797, + "loss_ce": 0.0018810234032571316, + "loss_iou": 0.46875, + "loss_num": 0.0296630859375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 457738384, + "step": 2665 + }, + { + "epoch": 0.7012560005260735, + "grad_norm": 3.460962528007649, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 457910472, + "step": 2666 + }, + { + "epoch": 0.7012560005260735, + "loss": 0.12169472873210907, + "loss_ce": 0.004110502544790506, + "loss_iou": 0.5859375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 457910472, + "step": 2666 + }, + { + "epoch": 0.7015190372854606, + "grad_norm": 5.718591075026739, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 458082324, + "step": 2667 + }, + { + "epoch": 0.7015190372854606, + "loss": 0.1776396483182907, + "loss_ce": 0.0005613988032564521, + "loss_iou": 0.359375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 458082324, + "step": 2667 + }, + { + "epoch": 0.7017820740448477, + "grad_norm": 5.655030135877534, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 458254304, + "step": 2668 + }, + { + "epoch": 0.7017820740448477, + "loss": 0.12232168763875961, + "loss_ce": 0.0004344757762737572, + "loss_iou": 0.375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 458254304, + "step": 2668 + }, + { + "epoch": 0.7020451108042349, + "grad_norm": 5.899988129903735, + "learning_rate": 5e-06, + "loss": 0.0895, + "num_input_tokens_seen": 458426532, + "step": 2669 + }, + { + "epoch": 0.7020451108042349, + "loss": 0.09821672737598419, + "loss_ce": 0.0016896221786737442, + "loss_iou": 0.54296875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 458426532, + "step": 2669 + }, + { + "epoch": 0.702308147563622, + "grad_norm": 5.5784599020012715, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 458596896, + "step": 2670 + }, + { + "epoch": 0.702308147563622, + "loss": 0.16502083837985992, + "loss_ce": 0.0003174682497046888, + "loss_iou": 0.3984375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 458596896, + "step": 2670 + }, + { + "epoch": 0.7025711843230091, + "grad_norm": 14.038116606042868, + "learning_rate": 5e-06, + "loss": 0.1154, + "num_input_tokens_seen": 458769240, + "step": 2671 + }, + { + "epoch": 0.7025711843230091, + "loss": 0.15062229335308075, + "loss_ce": 0.0042905076406896114, + "loss_iou": 0.53125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 458769240, + "step": 2671 + }, + { + "epoch": 0.7028342210823962, + "grad_norm": 4.120047179172289, + "learning_rate": 5e-06, + "loss": 0.0767, + "num_input_tokens_seen": 458941376, + "step": 2672 + }, + { + "epoch": 0.7028342210823962, + "loss": 0.08248385787010193, + "loss_ce": 0.0018869286868721247, + "loss_iou": 0.46875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 458941376, + "step": 2672 + }, + { + "epoch": 0.7030972578417833, + "grad_norm": 4.682779609797298, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 459113732, + "step": 2673 + }, + { + "epoch": 0.7030972578417833, + "loss": 0.08461406081914902, + "loss_ce": 0.0013010749826207757, + "loss_iou": 0.515625, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 459113732, + "step": 2673 + }, + { + "epoch": 0.7033602946011706, + "grad_norm": 6.053537160791169, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 459283668, + "step": 2674 + }, + { + "epoch": 0.7033602946011706, + "loss": 0.21815051138401031, + "loss_ce": 0.0011094921501353383, + "loss_iou": 0.419921875, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 459283668, + "step": 2674 + }, + { + "epoch": 0.7036233313605577, + "grad_norm": 3.9631091480761715, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 459455956, + "step": 2675 + }, + { + "epoch": 0.7036233313605577, + "loss": 0.13024334609508514, + "loss_ce": 0.00025372387608513236, + "loss_iou": 0.5625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 459455956, + "step": 2675 + }, + { + "epoch": 0.7038863681199448, + "grad_norm": 5.129114238432721, + "learning_rate": 5e-06, + "loss": 0.137, + "num_input_tokens_seen": 459627968, + "step": 2676 + }, + { + "epoch": 0.7038863681199448, + "loss": 0.08658237755298615, + "loss_ce": 0.002750593703240156, + "loss_iou": 0.46484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 459627968, + "step": 2676 + }, + { + "epoch": 0.7041494048793319, + "grad_norm": 20.187762197991425, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 459800260, + "step": 2677 + }, + { + "epoch": 0.7041494048793319, + "loss": 0.08818955719470978, + "loss_ce": 0.0012297153007239103, + "loss_iou": 0.53125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 459800260, + "step": 2677 + }, + { + "epoch": 0.704412441638719, + "grad_norm": 7.902489579853604, + "learning_rate": 5e-06, + "loss": 0.1564, + "num_input_tokens_seen": 459971088, + "step": 2678 + }, + { + "epoch": 0.704412441638719, + "loss": 0.12185804545879364, + "loss_ce": 0.00024549951194785535, + "loss_iou": 0.3984375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 459971088, + "step": 2678 + }, + { + "epoch": 0.7046754783981062, + "grad_norm": 27.40955645070263, + "learning_rate": 5e-06, + "loss": 0.0666, + "num_input_tokens_seen": 460143240, + "step": 2679 + }, + { + "epoch": 0.7046754783981062, + "loss": 0.06965695321559906, + "loss_ce": 0.0006261939415708184, + "loss_iou": 0.63671875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 460143240, + "step": 2679 + }, + { + "epoch": 0.7049385151574933, + "grad_norm": 8.476577616174554, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 460315544, + "step": 2680 + }, + { + "epoch": 0.7049385151574933, + "loss": 0.14567114412784576, + "loss_ce": 0.0017502475529909134, + "loss_iou": 0.515625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 460315544, + "step": 2680 + }, + { + "epoch": 0.7052015519168804, + "grad_norm": 11.423647115492507, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 460487612, + "step": 2681 + }, + { + "epoch": 0.7052015519168804, + "loss": 0.09613794833421707, + "loss_ce": 0.002235355554148555, + "loss_iou": 0.44921875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 460487612, + "step": 2681 + }, + { + "epoch": 0.7054645886762675, + "grad_norm": 13.90990204859974, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 460655964, + "step": 2682 + }, + { + "epoch": 0.7054645886762675, + "loss": 0.06838610768318176, + "loss_ce": 0.00024035980459302664, + "loss_iou": 0.49609375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 460655964, + "step": 2682 + }, + { + "epoch": 0.7057276254356546, + "grad_norm": 14.957706126877364, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 460827816, + "step": 2683 + }, + { + "epoch": 0.7057276254356546, + "loss": 0.1538136899471283, + "loss_ce": 0.0006154490984044969, + "loss_iou": 0.4609375, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 460827816, + "step": 2683 + }, + { + "epoch": 0.7059906621950418, + "grad_norm": 6.337066775015052, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 461000128, + "step": 2684 + }, + { + "epoch": 0.7059906621950418, + "loss": 0.18371257185935974, + "loss_ce": 0.0005155415856279433, + "loss_iou": 0.57421875, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 461000128, + "step": 2684 + }, + { + "epoch": 0.7062536989544289, + "grad_norm": 4.77556589512149, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 461172480, + "step": 2685 + }, + { + "epoch": 0.7062536989544289, + "loss": 0.06664656102657318, + "loss_ce": 0.001003247918561101, + "loss_iou": 0.55859375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 461172480, + "step": 2685 + }, + { + "epoch": 0.706516735713816, + "grad_norm": 4.934194738374163, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 461344704, + "step": 2686 + }, + { + "epoch": 0.706516735713816, + "loss": 0.07681739330291748, + "loss_ce": 0.002476579276844859, + "loss_iou": 0.56640625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 461344704, + "step": 2686 + }, + { + "epoch": 0.7067797724732031, + "grad_norm": 5.14148867298625, + "learning_rate": 5e-06, + "loss": 0.0942, + "num_input_tokens_seen": 461517244, + "step": 2687 + }, + { + "epoch": 0.7067797724732031, + "loss": 0.08841484785079956, + "loss_ce": 9.696922643342987e-05, + "loss_iou": 0.62890625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 461517244, + "step": 2687 + }, + { + "epoch": 0.7070428092325902, + "grad_norm": 4.290637783097341, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 461689392, + "step": 2688 + }, + { + "epoch": 0.7070428092325902, + "loss": 0.12317334860563278, + "loss_ce": 0.0057722218334674835, + "loss_iou": 0.484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 461689392, + "step": 2688 + }, + { + "epoch": 0.7073058459919774, + "grad_norm": 18.724820792441665, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 461861916, + "step": 2689 + }, + { + "epoch": 0.7073058459919774, + "loss": 0.05893798917531967, + "loss_ce": 0.00014587045006919652, + "loss_iou": 0.59375, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 461861916, + "step": 2689 + }, + { + "epoch": 0.7075688827513645, + "grad_norm": 6.318558285374775, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 462033832, + "step": 2690 + }, + { + "epoch": 0.7075688827513645, + "loss": 0.129222571849823, + "loss_ce": 0.00034684882848523557, + "loss_iou": 0.396484375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 462033832, + "step": 2690 + }, + { + "epoch": 0.7078319195107516, + "grad_norm": 5.104970581967641, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 462206008, + "step": 2691 + }, + { + "epoch": 0.7078319195107516, + "loss": 0.06862036883831024, + "loss_ce": 0.00010840100003406405, + "loss_iou": 0.5390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 462206008, + "step": 2691 + }, + { + "epoch": 0.7080949562701387, + "grad_norm": 7.0445063257027885, + "learning_rate": 5e-06, + "loss": 0.1429, + "num_input_tokens_seen": 462377836, + "step": 2692 + }, + { + "epoch": 0.7080949562701387, + "loss": 0.16992726922035217, + "loss_ce": 0.014318134635686874, + "loss_iou": 0.478515625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 462377836, + "step": 2692 + }, + { + "epoch": 0.7083579930295258, + "grad_norm": 3.568328460902499, + "learning_rate": 5e-06, + "loss": 0.0725, + "num_input_tokens_seen": 462550184, + "step": 2693 + }, + { + "epoch": 0.7083579930295258, + "loss": 0.058605365455150604, + "loss_ce": 0.0025903512723743916, + "loss_iou": 0.48046875, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 462550184, + "step": 2693 + }, + { + "epoch": 0.708621029788913, + "grad_norm": 16.451151717262515, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 462722228, + "step": 2694 + }, + { + "epoch": 0.708621029788913, + "loss": 0.15793108940124512, + "loss_ce": 0.0002467722224537283, + "loss_iou": 0.380859375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 462722228, + "step": 2694 + }, + { + "epoch": 0.7088840665483002, + "grad_norm": 18.144741325116385, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 462892644, + "step": 2695 + }, + { + "epoch": 0.7088840665483002, + "loss": 0.18394407629966736, + "loss_ce": 0.005751936696469784, + "loss_iou": 0.353515625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 462892644, + "step": 2695 + }, + { + "epoch": 0.7091471033076873, + "grad_norm": 4.177749037836763, + "learning_rate": 5e-06, + "loss": 0.0915, + "num_input_tokens_seen": 463059460, + "step": 2696 + }, + { + "epoch": 0.7091471033076873, + "loss": 0.08263557404279709, + "loss_ce": 0.0037171156145632267, + "loss_iou": 0.330078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 463059460, + "step": 2696 + }, + { + "epoch": 0.7094101400670744, + "grad_norm": 24.286466900686108, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 463231680, + "step": 2697 + }, + { + "epoch": 0.7094101400670744, + "loss": 0.1826072484254837, + "loss_ce": 0.005239082965999842, + "loss_iou": 0.40625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 463231680, + "step": 2697 + }, + { + "epoch": 0.7096731768264615, + "grad_norm": 3.90367546541105, + "learning_rate": 5e-06, + "loss": 0.1316, + "num_input_tokens_seen": 463403632, + "step": 2698 + }, + { + "epoch": 0.7096731768264615, + "loss": 0.19678181409835815, + "loss_ce": 0.0011641355231404305, + "loss_iou": NaN, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 463403632, + "step": 2698 + }, + { + "epoch": 0.7099362135858486, + "grad_norm": 4.6638342162135, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 463575764, + "step": 2699 + }, + { + "epoch": 0.7099362135858486, + "loss": 0.05382794886827469, + "loss_ce": 0.0004832194827031344, + "loss_iou": 0.6328125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 463575764, + "step": 2699 + }, + { + "epoch": 0.7101992503452358, + "grad_norm": 10.964970610276819, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 463747960, + "step": 2700 + }, + { + "epoch": 0.7101992503452358, + "loss": 0.10617360472679138, + "loss_ce": 0.002444351091980934, + "loss_iou": 0.494140625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 463747960, + "step": 2700 + }, + { + "epoch": 0.7104622871046229, + "grad_norm": 4.313980855592432, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 463920292, + "step": 2701 + }, + { + "epoch": 0.7104622871046229, + "loss": 0.06053323671221733, + "loss_ce": 0.0002305020607309416, + "loss_iou": 0.66015625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 463920292, + "step": 2701 + }, + { + "epoch": 0.71072532386401, + "grad_norm": 13.238355947380045, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 464092396, + "step": 2702 + }, + { + "epoch": 0.71072532386401, + "loss": 0.09292985498905182, + "loss_ce": 0.002384199295192957, + "loss_iou": 0.39453125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 464092396, + "step": 2702 + }, + { + "epoch": 0.7109883606233971, + "grad_norm": 3.301662541659157, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 464264724, + "step": 2703 + }, + { + "epoch": 0.7109883606233971, + "loss": 0.05558721721172333, + "loss_ce": 7.5738578743767e-05, + "loss_iou": 0.55078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 464264724, + "step": 2703 + }, + { + "epoch": 0.7112513973827842, + "grad_norm": 3.9814210468792854, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 464436668, + "step": 2704 + }, + { + "epoch": 0.7112513973827842, + "loss": 0.0715593621134758, + "loss_ce": 0.004069737158715725, + "loss_iou": 0.494140625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 464436668, + "step": 2704 + }, + { + "epoch": 0.7115144341421714, + "grad_norm": 24.903703628485385, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 464607064, + "step": 2705 + }, + { + "epoch": 0.7115144341421714, + "loss": 0.13525709509849548, + "loss_ce": 0.0011323521612212062, + "loss_iou": 0.5703125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 464607064, + "step": 2705 + }, + { + "epoch": 0.7117774709015585, + "grad_norm": 8.733454229975411, + "learning_rate": 5e-06, + "loss": 0.099, + "num_input_tokens_seen": 464779372, + "step": 2706 + }, + { + "epoch": 0.7117774709015585, + "loss": 0.1117292046546936, + "loss_ce": 0.002689904533326626, + "loss_iou": 0.44140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 464779372, + "step": 2706 + }, + { + "epoch": 0.7120405076609456, + "grad_norm": 4.906686826576695, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 464951196, + "step": 2707 + }, + { + "epoch": 0.7120405076609456, + "loss": 0.09186941385269165, + "loss_ce": 0.00013357413990888745, + "loss_iou": 0.6171875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 464951196, + "step": 2707 + }, + { + "epoch": 0.7123035444203327, + "grad_norm": 3.5122712023777214, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 465119688, + "step": 2708 + }, + { + "epoch": 0.7123035444203327, + "loss": 0.17270034551620483, + "loss_ce": 0.004762125201523304, + "loss_iou": 0.3671875, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 465119688, + "step": 2708 + }, + { + "epoch": 0.7125665811797198, + "grad_norm": 4.667301843217583, + "learning_rate": 5e-06, + "loss": 0.1511, + "num_input_tokens_seen": 465291884, + "step": 2709 + }, + { + "epoch": 0.7125665811797198, + "loss": 0.24567091464996338, + "loss_ce": 0.0022016754373908043, + "loss_iou": 0.5703125, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 465291884, + "step": 2709 + }, + { + "epoch": 0.712829617939107, + "grad_norm": 4.677081009771049, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 465463732, + "step": 2710 + }, + { + "epoch": 0.712829617939107, + "loss": 0.10853127390146255, + "loss_ce": 0.0006211129948496819, + "loss_iou": 0.546875, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 465463732, + "step": 2710 + }, + { + "epoch": 0.7130926546984941, + "grad_norm": 9.121056040645339, + "learning_rate": 5e-06, + "loss": 0.0814, + "num_input_tokens_seen": 465635964, + "step": 2711 + }, + { + "epoch": 0.7130926546984941, + "loss": 0.05089259892702103, + "loss_ce": 0.0005080772680230439, + "loss_iou": 0.4453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 465635964, + "step": 2711 + }, + { + "epoch": 0.7133556914578812, + "grad_norm": 5.554084070604222, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 465808444, + "step": 2712 + }, + { + "epoch": 0.7133556914578812, + "loss": 0.10150805115699768, + "loss_ce": 0.0013188383309170604, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 465808444, + "step": 2712 + }, + { + "epoch": 0.7136187282172683, + "grad_norm": 7.721557509382751, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 465980716, + "step": 2713 + }, + { + "epoch": 0.7136187282172683, + "loss": 0.1155381053686142, + "loss_ce": 0.0032639428973197937, + "loss_iou": 0.52734375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 465980716, + "step": 2713 + }, + { + "epoch": 0.7138817649766555, + "grad_norm": 4.213278179953207, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 466153208, + "step": 2714 + }, + { + "epoch": 0.7138817649766555, + "loss": 0.1079680472612381, + "loss_ce": 0.002407738706097007, + "loss_iou": 0.390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 466153208, + "step": 2714 + }, + { + "epoch": 0.7141448017360427, + "grad_norm": 4.8611508745546725, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 466325716, + "step": 2715 + }, + { + "epoch": 0.7141448017360427, + "loss": 0.07399383187294006, + "loss_ce": 0.0005075072403997183, + "loss_iou": 0.4765625, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 466325716, + "step": 2715 + }, + { + "epoch": 0.7144078384954298, + "grad_norm": 34.00071276149313, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 466497816, + "step": 2716 + }, + { + "epoch": 0.7144078384954298, + "loss": 0.11154329776763916, + "loss_ce": 0.00032198382541537285, + "loss_iou": 0.435546875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 466497816, + "step": 2716 + }, + { + "epoch": 0.7146708752548169, + "grad_norm": 4.289011543265615, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 466666960, + "step": 2717 + }, + { + "epoch": 0.7146708752548169, + "loss": 0.10135161876678467, + "loss_ce": 0.000498652458190918, + "loss_iou": 0.4921875, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 466666960, + "step": 2717 + }, + { + "epoch": 0.714933912014204, + "grad_norm": 5.080575214282277, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 466839356, + "step": 2718 + }, + { + "epoch": 0.714933912014204, + "loss": 0.04582955688238144, + "loss_ce": 9.896683332044631e-05, + "loss_iou": 0.51953125, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 466839356, + "step": 2718 + }, + { + "epoch": 0.7151969487735911, + "grad_norm": 5.237294473459814, + "learning_rate": 5e-06, + "loss": 0.1367, + "num_input_tokens_seen": 467011304, + "step": 2719 + }, + { + "epoch": 0.7151969487735911, + "loss": 0.0945780873298645, + "loss_ce": 0.0005534276133403182, + "loss_iou": 0.72265625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 467011304, + "step": 2719 + }, + { + "epoch": 0.7154599855329782, + "grad_norm": 3.135766341307675, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 467180536, + "step": 2720 + }, + { + "epoch": 0.7154599855329782, + "loss": 0.042348556220531464, + "loss_ce": 0.0003716244827955961, + "loss_iou": 0.3984375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 467180536, + "step": 2720 + }, + { + "epoch": 0.7157230222923654, + "grad_norm": 3.5558668353050815, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 467352776, + "step": 2721 + }, + { + "epoch": 0.7157230222923654, + "loss": 0.175160214304924, + "loss_ce": 0.0027969309594482183, + "loss_iou": 0.40234375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 467352776, + "step": 2721 + }, + { + "epoch": 0.7159860590517525, + "grad_norm": 27.92814548048443, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 467524800, + "step": 2722 + }, + { + "epoch": 0.7159860590517525, + "loss": 0.11873021721839905, + "loss_ce": 0.004441884811967611, + "loss_iou": 0.5078125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 467524800, + "step": 2722 + }, + { + "epoch": 0.7162490958111396, + "grad_norm": 4.250603227803627, + "learning_rate": 5e-06, + "loss": 0.0898, + "num_input_tokens_seen": 467693664, + "step": 2723 + }, + { + "epoch": 0.7162490958111396, + "loss": 0.07381434738636017, + "loss_ce": 0.0007857821765355766, + "loss_iou": 0.66796875, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 467693664, + "step": 2723 + }, + { + "epoch": 0.7165121325705267, + "grad_norm": 29.613017586366666, + "learning_rate": 5e-06, + "loss": 0.1783, + "num_input_tokens_seen": 467865648, + "step": 2724 + }, + { + "epoch": 0.7165121325705267, + "loss": 0.06583578884601593, + "loss_ce": 0.0012300718808546662, + "loss_iou": 0.5078125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 467865648, + "step": 2724 + }, + { + "epoch": 0.7167751693299138, + "grad_norm": 9.322275949588843, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 468035300, + "step": 2725 + }, + { + "epoch": 0.7167751693299138, + "loss": 0.11070144176483154, + "loss_ce": 0.00022780938888899982, + "loss_iou": 0.609375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 468035300, + "step": 2725 + }, + { + "epoch": 0.717038206089301, + "grad_norm": 7.146952822401602, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 468207524, + "step": 2726 + }, + { + "epoch": 0.717038206089301, + "loss": 0.11441102623939514, + "loss_ce": 0.0002447651932016015, + "loss_iou": 0.6015625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 468207524, + "step": 2726 + }, + { + "epoch": 0.7173012428486881, + "grad_norm": 11.196754628011416, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 468379608, + "step": 2727 + }, + { + "epoch": 0.7173012428486881, + "loss": 0.16638273000717163, + "loss_ce": 0.005860275588929653, + "loss_iou": 0.4609375, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 468379608, + "step": 2727 + }, + { + "epoch": 0.7175642796080752, + "grad_norm": 10.776354846598704, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 468551976, + "step": 2728 + }, + { + "epoch": 0.7175642796080752, + "loss": 0.11593279242515564, + "loss_ce": 0.002559985499829054, + "loss_iou": 0.5625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 468551976, + "step": 2728 + }, + { + "epoch": 0.7178273163674623, + "grad_norm": 5.353937326922854, + "learning_rate": 5e-06, + "loss": 0.1075, + "num_input_tokens_seen": 468724232, + "step": 2729 + }, + { + "epoch": 0.7178273163674623, + "loss": 0.09276724606752396, + "loss_ce": 0.00023794792650733143, + "loss_iou": 0.51171875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 468724232, + "step": 2729 + }, + { + "epoch": 0.7180903531268494, + "grad_norm": 4.435607019386441, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 468896348, + "step": 2730 + }, + { + "epoch": 0.7180903531268494, + "loss": 0.14431005716323853, + "loss_ce": 0.001487781759351492, + "loss_iou": 0.41796875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 468896348, + "step": 2730 + }, + { + "epoch": 0.7183533898862366, + "grad_norm": 5.860440789547134, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 469068240, + "step": 2731 + }, + { + "epoch": 0.7183533898862366, + "loss": 0.11086121201515198, + "loss_ce": 0.003713999642059207, + "loss_iou": 0.5, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 469068240, + "step": 2731 + }, + { + "epoch": 0.7186164266456238, + "grad_norm": 11.64009264254108, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 469238748, + "step": 2732 + }, + { + "epoch": 0.7186164266456238, + "loss": 0.14171399176120758, + "loss_ce": 0.0019129666034132242, + "loss_iou": NaN, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 469238748, + "step": 2732 + }, + { + "epoch": 0.7188794634050109, + "grad_norm": 19.26931594616413, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 469409356, + "step": 2733 + }, + { + "epoch": 0.7188794634050109, + "loss": 0.16873466968536377, + "loss_ce": 0.0015593739226460457, + "loss_iou": 0.51953125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 469409356, + "step": 2733 + }, + { + "epoch": 0.719142500164398, + "grad_norm": 39.88591044746162, + "learning_rate": 5e-06, + "loss": 0.1402, + "num_input_tokens_seen": 469581536, + "step": 2734 + }, + { + "epoch": 0.719142500164398, + "loss": 0.13763278722763062, + "loss_ce": 0.0018295738846063614, + "loss_iou": 0.55078125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 469581536, + "step": 2734 + }, + { + "epoch": 0.7194055369237851, + "grad_norm": 5.070448909039763, + "learning_rate": 5e-06, + "loss": 0.0973, + "num_input_tokens_seen": 469753888, + "step": 2735 + }, + { + "epoch": 0.7194055369237851, + "loss": 0.11928269267082214, + "loss_ce": 0.0068864524364471436, + "loss_iou": 0.578125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 469753888, + "step": 2735 + }, + { + "epoch": 0.7196685736831723, + "grad_norm": 4.73412496708124, + "learning_rate": 5e-06, + "loss": 0.0816, + "num_input_tokens_seen": 469926224, + "step": 2736 + }, + { + "epoch": 0.7196685736831723, + "loss": 0.10194897651672363, + "loss_ce": 0.000539067608769983, + "loss_iou": 0.43359375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 469926224, + "step": 2736 + }, + { + "epoch": 0.7199316104425594, + "grad_norm": 14.69635519193442, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 470096688, + "step": 2737 + }, + { + "epoch": 0.7199316104425594, + "loss": 0.14469808340072632, + "loss_ce": 0.0047444626688957214, + "loss_iou": 0.671875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 470096688, + "step": 2737 + }, + { + "epoch": 0.7201946472019465, + "grad_norm": 5.497552383274807, + "learning_rate": 5e-06, + "loss": 0.1507, + "num_input_tokens_seen": 470268692, + "step": 2738 + }, + { + "epoch": 0.7201946472019465, + "loss": 0.07712777704000473, + "loss_ce": 0.0003760677354875952, + "loss_iou": 0.51953125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 470268692, + "step": 2738 + }, + { + "epoch": 0.7204576839613336, + "grad_norm": 4.073372808076834, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 470440752, + "step": 2739 + }, + { + "epoch": 0.7204576839613336, + "loss": 0.1480931043624878, + "loss_ce": 0.001303559634834528, + "loss_iou": 0.5546875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 470440752, + "step": 2739 + }, + { + "epoch": 0.7207207207207207, + "grad_norm": 5.675528284561051, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 470612568, + "step": 2740 + }, + { + "epoch": 0.7207207207207207, + "loss": 0.07299304753541946, + "loss_ce": 0.004481084644794464, + "loss_iou": 0.625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 470612568, + "step": 2740 + }, + { + "epoch": 0.7209837574801078, + "grad_norm": 20.879698464276387, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 470784724, + "step": 2741 + }, + { + "epoch": 0.7209837574801078, + "loss": 0.1316141039133072, + "loss_ce": 0.001822838094085455, + "loss_iou": 0.302734375, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 470784724, + "step": 2741 + }, + { + "epoch": 0.721246794239495, + "grad_norm": 10.194443634028673, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 470957128, + "step": 2742 + }, + { + "epoch": 0.721246794239495, + "loss": 0.2153215855360031, + "loss_ce": 0.0009050846565514803, + "loss_iou": 0.3984375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 470957128, + "step": 2742 + }, + { + "epoch": 0.7215098309988821, + "grad_norm": 4.046503425790058, + "learning_rate": 5e-06, + "loss": 0.1402, + "num_input_tokens_seen": 471129076, + "step": 2743 + }, + { + "epoch": 0.7215098309988821, + "loss": 0.17502932250499725, + "loss_ce": 0.005137969274073839, + "loss_iou": NaN, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 471129076, + "step": 2743 + }, + { + "epoch": 0.7217728677582692, + "grad_norm": 16.718028178353897, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 471301596, + "step": 2744 + }, + { + "epoch": 0.7217728677582692, + "loss": 0.1273680329322815, + "loss_ce": 0.0013304388849064708, + "loss_iou": 0.5078125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 471301596, + "step": 2744 + }, + { + "epoch": 0.7220359045176563, + "grad_norm": 19.351969856870078, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 471473704, + "step": 2745 + }, + { + "epoch": 0.7220359045176563, + "loss": 0.08187679946422577, + "loss_ce": 4.3911892134929076e-05, + "loss_iou": 0.439453125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 471473704, + "step": 2745 + }, + { + "epoch": 0.7222989412770434, + "grad_norm": 3.5611349833279236, + "learning_rate": 5e-06, + "loss": 0.0947, + "num_input_tokens_seen": 471644084, + "step": 2746 + }, + { + "epoch": 0.7222989412770434, + "loss": 0.04295755550265312, + "loss_ce": 0.00018716827617026865, + "loss_iou": 0.48828125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 471644084, + "step": 2746 + }, + { + "epoch": 0.7225619780364306, + "grad_norm": 4.500519410155881, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 471814452, + "step": 2747 + }, + { + "epoch": 0.7225619780364306, + "loss": 0.15782231092453003, + "loss_ce": 0.0029913773760199547, + "loss_iou": 0.48046875, + "loss_num": 0.0308837890625, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 471814452, + "step": 2747 + }, + { + "epoch": 0.7228250147958177, + "grad_norm": 5.3088355134433645, + "learning_rate": 5e-06, + "loss": 0.1564, + "num_input_tokens_seen": 471980364, + "step": 2748 + }, + { + "epoch": 0.7228250147958177, + "loss": 0.16449454426765442, + "loss_ce": 0.0011339561315253377, + "loss_iou": 0.42578125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 471980364, + "step": 2748 + }, + { + "epoch": 0.7230880515552048, + "grad_norm": 8.162974487273814, + "learning_rate": 5e-06, + "loss": 0.1117, + "num_input_tokens_seen": 472152464, + "step": 2749 + }, + { + "epoch": 0.7230880515552048, + "loss": 0.08619838953018188, + "loss_ce": 0.0012984833447262645, + "loss_iou": 0.5234375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 472152464, + "step": 2749 + }, + { + "epoch": 0.7233510883145919, + "grad_norm": 6.354241375539321, + "learning_rate": 5e-06, + "loss": 0.0919, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_websight_new_CIoU": 0.8880318701267242, + "eval_websight_new_GIoU": 0.8903599679470062, + "eval_websight_new_IoU": 0.8917953372001648, + "eval_websight_new_MAE_all": 0.014968848787248135, + "eval_websight_new_MAE_h": 0.008249826729297638, + "eval_websight_new_MAE_w": 0.021866537630558014, + "eval_websight_new_MAE_x": 0.02344994805753231, + "eval_websight_new_MAE_y": 0.006309080636128783, + "eval_websight_new_NUM_probability": 0.9999924898147583, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.0760061964392662, + "eval_websight_new_loss_ce": 5.840479570906609e-05, + "eval_websight_new_loss_iou": 0.35406494140625, + "eval_websight_new_loss_num": 0.013774871826171875, + "eval_websight_new_loss_xval": 0.0689239501953125, + "eval_websight_new_runtime": 59.6817, + "eval_websight_new_samples_per_second": 0.838, + "eval_websight_new_steps_per_second": 0.034, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_seeclick_CIoU": 0.6159887313842773, + "eval_seeclick_GIoU": 0.6205049157142639, + "eval_seeclick_IoU": 0.6426993608474731, + "eval_seeclick_MAE_all": 0.048651453107595444, + "eval_seeclick_MAE_h": 0.030230149626731873, + "eval_seeclick_MAE_w": 0.06347078271210194, + "eval_seeclick_MAE_x": 0.0767427384853363, + "eval_seeclick_MAE_y": 0.024162148125469685, + "eval_seeclick_NUM_probability": 0.9999720454216003, + "eval_seeclick_inside_bbox": 0.8920454680919647, + "eval_seeclick_loss": 0.22562426328659058, + "eval_seeclick_loss_ce": 0.008924027904868126, + "eval_seeclick_loss_iou": 0.5213623046875, + "eval_seeclick_loss_num": 0.04332733154296875, + "eval_seeclick_loss_xval": 0.2166900634765625, + "eval_seeclick_runtime": 76.0092, + "eval_seeclick_samples_per_second": 0.566, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_icons_CIoU": 0.8715368807315826, + "eval_icons_GIoU": 0.8687921464443207, + "eval_icons_IoU": 0.8755140602588654, + "eval_icons_MAE_all": 0.015967791434377432, + "eval_icons_MAE_h": 0.016439005732536316, + "eval_icons_MAE_w": 0.015302729327231646, + "eval_icons_MAE_x": 0.015842752531170845, + "eval_icons_MAE_y": 0.016286680474877357, + "eval_icons_NUM_probability": 0.9999882578849792, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.057806555181741714, + "eval_icons_loss_ce": 1.1600203379202867e-05, + "eval_icons_loss_iou": 0.617919921875, + "eval_icons_loss_num": 0.010974884033203125, + "eval_icons_loss_xval": 0.0548553466796875, + "eval_icons_runtime": 81.9252, + "eval_icons_samples_per_second": 0.61, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_screenspot_CIoU": 0.5503915150960287, + "eval_screenspot_GIoU": 0.5448275804519653, + "eval_screenspot_IoU": 0.5854077339172363, + "eval_screenspot_MAE_all": 0.0787569632132848, + "eval_screenspot_MAE_h": 0.05612564583619436, + "eval_screenspot_MAE_w": 0.1299388830860456, + "eval_screenspot_MAE_x": 0.07724836965401967, + "eval_screenspot_MAE_y": 0.05171496793627739, + "eval_screenspot_NUM_probability": 0.9999065001805624, + "eval_screenspot_inside_bbox": 0.8291666706403097, + "eval_screenspot_loss": 0.9146350622177124, + "eval_screenspot_loss_ce": 0.567759374777476, + "eval_screenspot_loss_iou": 0.5614013671875, + "eval_screenspot_loss_num": 0.06831868489583333, + "eval_screenspot_loss_xval": 0.3416341145833333, + "eval_screenspot_runtime": 140.0636, + "eval_screenspot_samples_per_second": 0.635, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "loss": 0.9040678143501282, + "loss_ce": 0.5593412518501282, + "loss_iou": 0.455078125, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.723614125073979, + "grad_norm": 4.425480410673166, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 472496880, + "step": 2751 + }, + { + "epoch": 0.723614125073979, + "loss": 0.10025835037231445, + "loss_ce": 0.0005421665264293551, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 472496880, + "step": 2751 + }, + { + "epoch": 0.7238771618333663, + "grad_norm": 4.0784104980939135, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 472668928, + "step": 2752 + }, + { + "epoch": 0.7238771618333663, + "loss": 0.0833350419998169, + "loss_ce": 0.00038827050593681633, + "loss_iou": 0.34765625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 472668928, + "step": 2752 + }, + { + "epoch": 0.7241401985927534, + "grad_norm": 6.523452376364869, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 472841040, + "step": 2753 + }, + { + "epoch": 0.7241401985927534, + "loss": 0.13216346502304077, + "loss_ce": 0.00023597091785632074, + "loss_iou": 0.54296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 472841040, + "step": 2753 + }, + { + "epoch": 0.7244032353521405, + "grad_norm": 13.511689037708722, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 473013468, + "step": 2754 + }, + { + "epoch": 0.7244032353521405, + "loss": 0.0918908640742302, + "loss_ce": 0.001101069850847125, + "loss_iou": 0.431640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 473013468, + "step": 2754 + }, + { + "epoch": 0.7246662721115276, + "grad_norm": 4.609556566608224, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 473180556, + "step": 2755 + }, + { + "epoch": 0.7246662721115276, + "loss": 0.2066127210855484, + "loss_ce": 0.0006495795678347349, + "loss_iou": 0.400390625, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 473180556, + "step": 2755 + }, + { + "epoch": 0.7249293088709147, + "grad_norm": 17.674016284792703, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 473352636, + "step": 2756 + }, + { + "epoch": 0.7249293088709147, + "loss": 0.13866102695465088, + "loss_ce": 0.0018507244531065226, + "loss_iou": 0.443359375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 473352636, + "step": 2756 + }, + { + "epoch": 0.7251923456303019, + "grad_norm": 12.378222072828299, + "learning_rate": 5e-06, + "loss": 0.0739, + "num_input_tokens_seen": 473524848, + "step": 2757 + }, + { + "epoch": 0.7251923456303019, + "loss": 0.059792935848236084, + "loss_ce": 0.0014433301985263824, + "loss_iou": 0.419921875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 473524848, + "step": 2757 + }, + { + "epoch": 0.725455382389689, + "grad_norm": 4.317210813813628, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 473697016, + "step": 2758 + }, + { + "epoch": 0.725455382389689, + "loss": 0.1696222722530365, + "loss_ce": 0.0004328044014982879, + "loss_iou": 0.3984375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 473697016, + "step": 2758 + }, + { + "epoch": 0.7257184191490761, + "grad_norm": 14.320905421992641, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 473868992, + "step": 2759 + }, + { + "epoch": 0.7257184191490761, + "loss": 0.07304464280605316, + "loss_ce": 0.0016335132531821728, + "loss_iou": 0.6484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 473868992, + "step": 2759 + }, + { + "epoch": 0.7259814559084632, + "grad_norm": 3.9251643218505756, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 474041228, + "step": 2760 + }, + { + "epoch": 0.7259814559084632, + "loss": 0.09449034929275513, + "loss_ce": 0.0015948471846058965, + "loss_iou": 0.48828125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 474041228, + "step": 2760 + }, + { + "epoch": 0.7262444926678503, + "grad_norm": 4.80079690317671, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 474213616, + "step": 2761 + }, + { + "epoch": 0.7262444926678503, + "loss": 0.14180995523929596, + "loss_ce": 0.001276510301977396, + "loss_iou": 0.53125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 474213616, + "step": 2761 + }, + { + "epoch": 0.7265075294272375, + "grad_norm": 5.815165285313656, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 474385980, + "step": 2762 + }, + { + "epoch": 0.7265075294272375, + "loss": 0.14968647062778473, + "loss_ce": 0.0027748444117605686, + "loss_iou": 0.474609375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 474385980, + "step": 2762 + }, + { + "epoch": 0.7267705661866246, + "grad_norm": 18.689053225654995, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 474558212, + "step": 2763 + }, + { + "epoch": 0.7267705661866246, + "loss": 0.07762917876243591, + "loss_ce": 0.0015488516073673964, + "loss_iou": 0.546875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 474558212, + "step": 2763 + }, + { + "epoch": 0.7270336029460117, + "grad_norm": 26.06170268014936, + "learning_rate": 5e-06, + "loss": 0.1927, + "num_input_tokens_seen": 474730824, + "step": 2764 + }, + { + "epoch": 0.7270336029460117, + "loss": 0.2366107702255249, + "loss_ce": 0.0011676568537950516, + "loss_iou": 0.337890625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 474730824, + "step": 2764 + }, + { + "epoch": 0.7272966397053988, + "grad_norm": 4.246687258335021, + "learning_rate": 5e-06, + "loss": 0.0722, + "num_input_tokens_seen": 474901372, + "step": 2765 + }, + { + "epoch": 0.7272966397053988, + "loss": 0.05860138311982155, + "loss_ce": 0.0005874672788195312, + "loss_iou": 0.5546875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 474901372, + "step": 2765 + }, + { + "epoch": 0.7275596764647859, + "grad_norm": 10.116948882200285, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 475070388, + "step": 2766 + }, + { + "epoch": 0.7275596764647859, + "loss": 0.14292961359024048, + "loss_ce": 0.0015111502725630999, + "loss_iou": 0.484375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 475070388, + "step": 2766 + }, + { + "epoch": 0.727822713224173, + "grad_norm": 5.556641114443334, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 475242528, + "step": 2767 + }, + { + "epoch": 0.727822713224173, + "loss": 0.05033715069293976, + "loss_ce": 0.00010521705553401262, + "loss_iou": 0.51953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 475242528, + "step": 2767 + }, + { + "epoch": 0.7280857499835602, + "grad_norm": 7.020407355867949, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 475414560, + "step": 2768 + }, + { + "epoch": 0.7280857499835602, + "loss": 0.14076019823551178, + "loss_ce": 0.003125916002318263, + "loss_iou": 0.41796875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 475414560, + "step": 2768 + }, + { + "epoch": 0.7283487867429473, + "grad_norm": 3.869087312985774, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 475586900, + "step": 2769 + }, + { + "epoch": 0.7283487867429473, + "loss": 0.1432042121887207, + "loss_ce": 0.001144890207797289, + "loss_iou": NaN, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 475586900, + "step": 2769 + }, + { + "epoch": 0.7286118235023344, + "grad_norm": 5.203369255610556, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 475759012, + "step": 2770 + }, + { + "epoch": 0.7286118235023344, + "loss": 0.09897112846374512, + "loss_ce": 0.000979190575890243, + "loss_iou": 0.478515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 475759012, + "step": 2770 + }, + { + "epoch": 0.7288748602617215, + "grad_norm": 5.470559222873129, + "learning_rate": 5e-06, + "loss": 0.1425, + "num_input_tokens_seen": 475931112, + "step": 2771 + }, + { + "epoch": 0.7288748602617215, + "loss": 0.057690735906362534, + "loss_ce": 0.00324737885966897, + "loss_iou": 0.447265625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 475931112, + "step": 2771 + }, + { + "epoch": 0.7291378970211086, + "grad_norm": 18.974366841188825, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 476100764, + "step": 2772 + }, + { + "epoch": 0.7291378970211086, + "loss": 0.15548212826251984, + "loss_ce": 0.0019024083158001304, + "loss_iou": 0.5078125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 476100764, + "step": 2772 + }, + { + "epoch": 0.7294009337804959, + "grad_norm": 5.671161770383539, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 476272960, + "step": 2773 + }, + { + "epoch": 0.7294009337804959, + "loss": 0.0945616215467453, + "loss_ce": 0.00023179112758953124, + "loss_iou": 0.50390625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 476272960, + "step": 2773 + }, + { + "epoch": 0.729663970539883, + "grad_norm": 34.00148217519992, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 476445160, + "step": 2774 + }, + { + "epoch": 0.729663970539883, + "loss": 0.2345729023218155, + "loss_ce": 0.0021205078810453415, + "loss_iou": 0.4921875, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 476445160, + "step": 2774 + }, + { + "epoch": 0.7299270072992701, + "grad_norm": 4.401043478623018, + "learning_rate": 5e-06, + "loss": 0.0804, + "num_input_tokens_seen": 476617452, + "step": 2775 + }, + { + "epoch": 0.7299270072992701, + "loss": 0.07482883334159851, + "loss_ce": 0.00036594344419427216, + "loss_iou": 0.671875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 476617452, + "step": 2775 + }, + { + "epoch": 0.7301900440586572, + "grad_norm": 4.444423155502646, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 476789580, + "step": 2776 + }, + { + "epoch": 0.7301900440586572, + "loss": 0.08125682920217514, + "loss_ce": 0.0011787032708525658, + "loss_iou": 0.427734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 476789580, + "step": 2776 + }, + { + "epoch": 0.7304530808180443, + "grad_norm": 9.388146192273584, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 476961420, + "step": 2777 + }, + { + "epoch": 0.7304530808180443, + "loss": 0.09009505808353424, + "loss_ce": 0.0032420377247035503, + "loss_iou": 0.41796875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 476961420, + "step": 2777 + }, + { + "epoch": 0.7307161175774315, + "grad_norm": 13.509674001993902, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 477133352, + "step": 2778 + }, + { + "epoch": 0.7307161175774315, + "loss": 0.15684369206428528, + "loss_ce": 0.0034623502288013697, + "loss_iou": 0.50390625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 477133352, + "step": 2778 + }, + { + "epoch": 0.7309791543368186, + "grad_norm": 10.371891827363712, + "learning_rate": 5e-06, + "loss": 0.1536, + "num_input_tokens_seen": 477305700, + "step": 2779 + }, + { + "epoch": 0.7309791543368186, + "loss": 0.1920810043811798, + "loss_ce": 0.003238243516534567, + "loss_iou": 0.490234375, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 477305700, + "step": 2779 + }, + { + "epoch": 0.7312421910962057, + "grad_norm": 4.473462123830256, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 477476192, + "step": 2780 + }, + { + "epoch": 0.7312421910962057, + "loss": 0.12351857125759125, + "loss_ce": 0.003828628221526742, + "loss_iou": 0.3671875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 477476192, + "step": 2780 + }, + { + "epoch": 0.7315052278555928, + "grad_norm": 8.785650264431224, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 477648396, + "step": 2781 + }, + { + "epoch": 0.7315052278555928, + "loss": 0.10561161488294601, + "loss_ce": 0.00282841082662344, + "loss_iou": 0.5390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 477648396, + "step": 2781 + }, + { + "epoch": 0.7317682646149799, + "grad_norm": 4.9317310113146835, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 477820536, + "step": 2782 + }, + { + "epoch": 0.7317682646149799, + "loss": 0.15500710904598236, + "loss_ce": 0.0015952409012243152, + "loss_iou": 0.50390625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 477820536, + "step": 2782 + }, + { + "epoch": 0.7320313013743671, + "grad_norm": 14.38075821387521, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 477992760, + "step": 2783 + }, + { + "epoch": 0.7320313013743671, + "loss": 0.09361109137535095, + "loss_ce": 0.0013869699323549867, + "loss_iou": 0.55859375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 477992760, + "step": 2783 + }, + { + "epoch": 0.7322943381337542, + "grad_norm": 3.3741559107447294, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 478164884, + "step": 2784 + }, + { + "epoch": 0.7322943381337542, + "loss": 0.06834319978952408, + "loss_ce": 0.00392058864235878, + "loss_iou": 0.5625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 478164884, + "step": 2784 + }, + { + "epoch": 0.7325573748931413, + "grad_norm": 4.560868774415877, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 478337276, + "step": 2785 + }, + { + "epoch": 0.7325573748931413, + "loss": 0.08397236466407776, + "loss_ce": 0.0004152356996200979, + "loss_iou": 0.6015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 478337276, + "step": 2785 + }, + { + "epoch": 0.7328204116525284, + "grad_norm": 17.451789372881052, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 478509332, + "step": 2786 + }, + { + "epoch": 0.7328204116525284, + "loss": 0.1723887324333191, + "loss_ce": 0.00039166733040474355, + "loss_iou": 0.44140625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 478509332, + "step": 2786 + }, + { + "epoch": 0.7330834484119155, + "grad_norm": 6.047350371102969, + "learning_rate": 5e-06, + "loss": 0.1491, + "num_input_tokens_seen": 478681352, + "step": 2787 + }, + { + "epoch": 0.7330834484119155, + "loss": 0.04348166286945343, + "loss_ce": 0.0019167213467881083, + "loss_iou": 0.5703125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 478681352, + "step": 2787 + }, + { + "epoch": 0.7333464851713027, + "grad_norm": 4.3763950262189235, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 478853448, + "step": 2788 + }, + { + "epoch": 0.7333464851713027, + "loss": 0.11647917330265045, + "loss_ce": 0.0007565193227492273, + "loss_iou": 0.490234375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 478853448, + "step": 2788 + }, + { + "epoch": 0.7336095219306898, + "grad_norm": 3.397549309274833, + "learning_rate": 5e-06, + "loss": 0.0831, + "num_input_tokens_seen": 479025420, + "step": 2789 + }, + { + "epoch": 0.7336095219306898, + "loss": 0.04994508996605873, + "loss_ce": 0.0003540250181686133, + "loss_iou": 0.37890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 479025420, + "step": 2789 + }, + { + "epoch": 0.733872558690077, + "grad_norm": 6.639247678980395, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 479197512, + "step": 2790 + }, + { + "epoch": 0.733872558690077, + "loss": 0.0962657481431961, + "loss_ce": 0.0033092054072767496, + "loss_iou": 0.5234375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 479197512, + "step": 2790 + }, + { + "epoch": 0.734135595449464, + "grad_norm": 70.18841715820902, + "learning_rate": 5e-06, + "loss": 0.121, + "num_input_tokens_seen": 479369608, + "step": 2791 + }, + { + "epoch": 0.734135595449464, + "loss": 0.057409316301345825, + "loss_ce": 0.000661880592815578, + "loss_iou": 0.61328125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 479369608, + "step": 2791 + }, + { + "epoch": 0.7343986322088512, + "grad_norm": 8.444086759452258, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 479541840, + "step": 2792 + }, + { + "epoch": 0.7343986322088512, + "loss": 0.18742145597934723, + "loss_ce": 0.0015999219613149762, + "loss_iou": 0.37890625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 479541840, + "step": 2792 + }, + { + "epoch": 0.7346616689682383, + "grad_norm": 5.294583182815855, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 479714196, + "step": 2793 + }, + { + "epoch": 0.7346616689682383, + "loss": 0.1654970794916153, + "loss_ce": 0.002441658638417721, + "loss_iou": 0.5234375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 479714196, + "step": 2793 + }, + { + "epoch": 0.7349247057276255, + "grad_norm": 4.160027277201995, + "learning_rate": 5e-06, + "loss": 0.0766, + "num_input_tokens_seen": 479886580, + "step": 2794 + }, + { + "epoch": 0.7349247057276255, + "loss": 0.11283927410840988, + "loss_ce": 0.0021825337316840887, + "loss_iou": 0.5, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 479886580, + "step": 2794 + }, + { + "epoch": 0.7351877424870126, + "grad_norm": 8.261194775106992, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 480058736, + "step": 2795 + }, + { + "epoch": 0.7351877424870126, + "loss": 0.12811481952667236, + "loss_ce": 0.0015584270004183054, + "loss_iou": 0.486328125, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 480058736, + "step": 2795 + }, + { + "epoch": 0.7354507792463997, + "grad_norm": 3.186827546083555, + "learning_rate": 5e-06, + "loss": 0.1094, + "num_input_tokens_seen": 480230860, + "step": 2796 + }, + { + "epoch": 0.7354507792463997, + "loss": 0.12094112485647202, + "loss_ce": 0.00286861858330667, + "loss_iou": 0.296875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 480230860, + "step": 2796 + }, + { + "epoch": 0.7357138160057868, + "grad_norm": 3.6199423783668534, + "learning_rate": 5e-06, + "loss": 0.0792, + "num_input_tokens_seen": 480403260, + "step": 2797 + }, + { + "epoch": 0.7357138160057868, + "loss": 0.0642661452293396, + "loss_ce": 0.0005149244680069387, + "loss_iou": 0.61328125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 480403260, + "step": 2797 + }, + { + "epoch": 0.7359768527651739, + "grad_norm": 4.853254066183438, + "learning_rate": 5e-06, + "loss": 0.0999, + "num_input_tokens_seen": 480575740, + "step": 2798 + }, + { + "epoch": 0.7359768527651739, + "loss": 0.1287141740322113, + "loss_ce": 0.000998095260001719, + "loss_iou": 0.5234375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 480575740, + "step": 2798 + }, + { + "epoch": 0.7362398895245611, + "grad_norm": 6.187299771807659, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 480745892, + "step": 2799 + }, + { + "epoch": 0.7362398895245611, + "loss": 0.11742156744003296, + "loss_ce": 0.0004476910980883986, + "loss_iou": 0.40625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 480745892, + "step": 2799 + }, + { + "epoch": 0.7365029262839482, + "grad_norm": 3.2510976930909194, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 480916480, + "step": 2800 + }, + { + "epoch": 0.7365029262839482, + "loss": 0.14714287221431732, + "loss_ce": 7.865649240557104e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 480916480, + "step": 2800 + }, + { + "epoch": 0.7367659630433353, + "grad_norm": 4.500033616411093, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 481088428, + "step": 2801 + }, + { + "epoch": 0.7367659630433353, + "loss": 0.07280252873897552, + "loss_ce": 0.0012845833553001285, + "loss_iou": 0.427734375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 481088428, + "step": 2801 + }, + { + "epoch": 0.7370289998027224, + "grad_norm": 6.733873919958713, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 481260648, + "step": 2802 + }, + { + "epoch": 0.7370289998027224, + "loss": 0.13488659262657166, + "loss_ce": 0.002318233484402299, + "loss_iou": 0.51171875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 481260648, + "step": 2802 + }, + { + "epoch": 0.7372920365621095, + "grad_norm": 4.443731950557251, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 481432928, + "step": 2803 + }, + { + "epoch": 0.7372920365621095, + "loss": 0.05556986480951309, + "loss_ce": 0.0003940859460271895, + "loss_iou": 0.578125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 481432928, + "step": 2803 + }, + { + "epoch": 0.7375550733214967, + "grad_norm": 3.9159514379676663, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 481603740, + "step": 2804 + }, + { + "epoch": 0.7375550733214967, + "loss": 0.07777837663888931, + "loss_ce": 5.010394670534879e-05, + "loss_iou": 0.58203125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 481603740, + "step": 2804 + }, + { + "epoch": 0.7378181100808838, + "grad_norm": 3.806670191888079, + "learning_rate": 5e-06, + "loss": 0.1196, + "num_input_tokens_seen": 481774292, + "step": 2805 + }, + { + "epoch": 0.7378181100808838, + "loss": 0.046146463602781296, + "loss_ce": 0.00412375945597887, + "loss_iou": 0.40625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 481774292, + "step": 2805 + }, + { + "epoch": 0.7380811468402709, + "grad_norm": 18.2473830072325, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 481946452, + "step": 2806 + }, + { + "epoch": 0.7380811468402709, + "loss": 0.15219584107398987, + "loss_ce": 0.0003708918229676783, + "loss_iou": 0.55078125, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 481946452, + "step": 2806 + }, + { + "epoch": 0.738344183599658, + "grad_norm": 5.953555323593533, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 482118496, + "step": 2807 + }, + { + "epoch": 0.738344183599658, + "loss": 0.14251753687858582, + "loss_ce": 0.00039717700565233827, + "loss_iou": 0.60546875, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 482118496, + "step": 2807 + }, + { + "epoch": 0.7386072203590451, + "grad_norm": 7.279415163791317, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 482287100, + "step": 2808 + }, + { + "epoch": 0.7386072203590451, + "loss": 0.11856916546821594, + "loss_ce": 0.0012290815357118845, + "loss_iou": 0.361328125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 482287100, + "step": 2808 + }, + { + "epoch": 0.7388702571184323, + "grad_norm": 60.57211297247697, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 482459096, + "step": 2809 + }, + { + "epoch": 0.7388702571184323, + "loss": 0.14476290345191956, + "loss_ce": 0.003466519294306636, + "loss_iou": 0.64453125, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 482459096, + "step": 2809 + }, + { + "epoch": 0.7391332938778195, + "grad_norm": 4.216089199918953, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 482631768, + "step": 2810 + }, + { + "epoch": 0.7391332938778195, + "loss": 0.1267136037349701, + "loss_ce": 0.0018051671795547009, + "loss_iou": 0.466796875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 482631768, + "step": 2810 + }, + { + "epoch": 0.7393963306372066, + "grad_norm": 5.529488884431239, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 482803968, + "step": 2811 + }, + { + "epoch": 0.7393963306372066, + "loss": 0.12075556814670563, + "loss_ce": 0.0023778879549354315, + "loss_iou": 0.67578125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 482803968, + "step": 2811 + }, + { + "epoch": 0.7396593673965937, + "grad_norm": 14.970093925458164, + "learning_rate": 5e-06, + "loss": 0.089, + "num_input_tokens_seen": 482976316, + "step": 2812 + }, + { + "epoch": 0.7396593673965937, + "loss": 0.11577010154724121, + "loss_ce": 0.00230574794113636, + "loss_iou": 0.53125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 482976316, + "step": 2812 + }, + { + "epoch": 0.7399224041559808, + "grad_norm": 16.71817355365124, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 483148324, + "step": 2813 + }, + { + "epoch": 0.7399224041559808, + "loss": 0.12452364712953568, + "loss_ce": 0.0020718637388199568, + "loss_iou": 0.2734375, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 483148324, + "step": 2813 + }, + { + "epoch": 0.740185440915368, + "grad_norm": 6.067200171837272, + "learning_rate": 5e-06, + "loss": 0.0904, + "num_input_tokens_seen": 483320396, + "step": 2814 + }, + { + "epoch": 0.740185440915368, + "loss": 0.08812370151281357, + "loss_ce": 0.003498460166156292, + "loss_iou": 0.494140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 483320396, + "step": 2814 + }, + { + "epoch": 0.7404484776747551, + "grad_norm": 4.557027585995325, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 483492288, + "step": 2815 + }, + { + "epoch": 0.7404484776747551, + "loss": 0.11246176064014435, + "loss_ce": 0.0007674178341403604, + "loss_iou": 0.5078125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 483492288, + "step": 2815 + }, + { + "epoch": 0.7407115144341422, + "grad_norm": 4.433500401049952, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 483664284, + "step": 2816 + }, + { + "epoch": 0.7407115144341422, + "loss": 0.11581188440322876, + "loss_ce": 0.0005164705216884613, + "loss_iou": 0.4765625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 483664284, + "step": 2816 + }, + { + "epoch": 0.7409745511935293, + "grad_norm": 4.273944166752719, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 483836152, + "step": 2817 + }, + { + "epoch": 0.7409745511935293, + "loss": 0.1244652196764946, + "loss_ce": 0.0025780070573091507, + "loss_iou": 0.578125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 483836152, + "step": 2817 + }, + { + "epoch": 0.7412375879529164, + "grad_norm": 7.341058016704505, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 484005324, + "step": 2818 + }, + { + "epoch": 0.7412375879529164, + "loss": 0.09527404606342316, + "loss_ce": 0.0004711919464170933, + "loss_iou": 0.51171875, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 484005324, + "step": 2818 + }, + { + "epoch": 0.7415006247123035, + "grad_norm": 5.580265155977227, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 484175008, + "step": 2819 + }, + { + "epoch": 0.7415006247123035, + "loss": 0.061688005924224854, + "loss_ce": 0.000408706720918417, + "loss_iou": 0.8203125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 484175008, + "step": 2819 + }, + { + "epoch": 0.7417636614716907, + "grad_norm": 10.569140888378845, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 484346776, + "step": 2820 + }, + { + "epoch": 0.7417636614716907, + "loss": 0.1439221203327179, + "loss_ce": 0.00351074174977839, + "loss_iou": 0.435546875, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 484346776, + "step": 2820 + }, + { + "epoch": 0.7420266982310778, + "grad_norm": 4.104540427593442, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 484518992, + "step": 2821 + }, + { + "epoch": 0.7420266982310778, + "loss": 0.09965825080871582, + "loss_ce": 0.000567668757867068, + "loss_iou": 0.60546875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 484518992, + "step": 2821 + }, + { + "epoch": 0.7422897349904649, + "grad_norm": 5.580054688233454, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 484691244, + "step": 2822 + }, + { + "epoch": 0.7422897349904649, + "loss": 0.17825458943843842, + "loss_ce": 0.002351263538002968, + "loss_iou": 0.53515625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 484691244, + "step": 2822 + }, + { + "epoch": 0.742552771749852, + "grad_norm": 3.6849080723348067, + "learning_rate": 5e-06, + "loss": 0.1494, + "num_input_tokens_seen": 484861364, + "step": 2823 + }, + { + "epoch": 0.742552771749852, + "loss": 0.12127910554409027, + "loss_ce": 0.0025046863593161106, + "loss_iou": 0.54296875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 484861364, + "step": 2823 + }, + { + "epoch": 0.7428158085092391, + "grad_norm": 13.128530967150855, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 485032096, + "step": 2824 + }, + { + "epoch": 0.7428158085092391, + "loss": 0.044097013771533966, + "loss_ce": 0.0010977452620863914, + "loss_iou": 0.470703125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 485032096, + "step": 2824 + }, + { + "epoch": 0.7430788452686263, + "grad_norm": 29.97671813936596, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 485203848, + "step": 2825 + }, + { + "epoch": 0.7430788452686263, + "loss": 0.1745963990688324, + "loss_ce": 0.0014701783657073975, + "loss_iou": 0.52734375, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 485203848, + "step": 2825 + }, + { + "epoch": 0.7433418820280134, + "grad_norm": 3.457370742890275, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 485376152, + "step": 2826 + }, + { + "epoch": 0.7433418820280134, + "loss": 0.10546036064624786, + "loss_ce": 0.0021430959459394217, + "loss_iou": 0.44921875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 485376152, + "step": 2826 + }, + { + "epoch": 0.7436049187874005, + "grad_norm": 14.472339934978308, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 485546204, + "step": 2827 + }, + { + "epoch": 0.7436049187874005, + "loss": 0.1524585783481598, + "loss_ce": 0.0006031189695931971, + "loss_iou": 0.453125, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 485546204, + "step": 2827 + }, + { + "epoch": 0.7438679555467876, + "grad_norm": 15.092363857837377, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 485718156, + "step": 2828 + }, + { + "epoch": 0.7438679555467876, + "loss": 0.11204935610294342, + "loss_ce": 0.00023295017308555543, + "loss_iou": 0.484375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 485718156, + "step": 2828 + }, + { + "epoch": 0.7441309923061747, + "grad_norm": 3.949937010975672, + "learning_rate": 5e-06, + "loss": 0.0703, + "num_input_tokens_seen": 485889984, + "step": 2829 + }, + { + "epoch": 0.7441309923061747, + "loss": 0.08257782459259033, + "loss_ce": 0.002652282826602459, + "loss_iou": 0.546875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 485889984, + "step": 2829 + }, + { + "epoch": 0.744394029065562, + "grad_norm": 5.74645673965003, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 486062200, + "step": 2830 + }, + { + "epoch": 0.744394029065562, + "loss": 0.10757631063461304, + "loss_ce": 0.0008258260786533356, + "loss_iou": 0.33203125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 486062200, + "step": 2830 + }, + { + "epoch": 0.7446570658249491, + "grad_norm": 5.133478707353028, + "learning_rate": 5e-06, + "loss": 0.0899, + "num_input_tokens_seen": 486234156, + "step": 2831 + }, + { + "epoch": 0.7446570658249491, + "loss": 0.05313008278608322, + "loss_ce": 0.0007771779783070087, + "loss_iou": 0.431640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 486234156, + "step": 2831 + }, + { + "epoch": 0.7449201025843362, + "grad_norm": 3.5956313939936164, + "learning_rate": 5e-06, + "loss": 0.0765, + "num_input_tokens_seen": 486406012, + "step": 2832 + }, + { + "epoch": 0.7449201025843362, + "loss": 0.041897065937519073, + "loss_ce": 0.0002405716950306669, + "loss_iou": 0.49609375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 486406012, + "step": 2832 + }, + { + "epoch": 0.7451831393437233, + "grad_norm": 14.659992900472892, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 486577696, + "step": 2833 + }, + { + "epoch": 0.7451831393437233, + "loss": 0.10231424868106842, + "loss_ce": 0.00011088042083429173, + "loss_iou": 0.61328125, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 486577696, + "step": 2833 + }, + { + "epoch": 0.7454461761031104, + "grad_norm": 4.0898533446969365, + "learning_rate": 5e-06, + "loss": 0.132, + "num_input_tokens_seen": 486749788, + "step": 2834 + }, + { + "epoch": 0.7454461761031104, + "loss": 0.16116830706596375, + "loss_ce": 0.002553200349211693, + "loss_iou": 0.416015625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 486749788, + "step": 2834 + }, + { + "epoch": 0.7457092128624976, + "grad_norm": 7.24387104067779, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 486921812, + "step": 2835 + }, + { + "epoch": 0.7457092128624976, + "loss": 0.15365542471408844, + "loss_ce": 0.0005182233871892095, + "loss_iou": 0.6796875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 486921812, + "step": 2835 + }, + { + "epoch": 0.7459722496218847, + "grad_norm": 4.544468769186057, + "learning_rate": 5e-06, + "loss": 0.1593, + "num_input_tokens_seen": 487092260, + "step": 2836 + }, + { + "epoch": 0.7459722496218847, + "loss": 0.13337098062038422, + "loss_ce": 0.0010772723471745849, + "loss_iou": 0.44921875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 487092260, + "step": 2836 + }, + { + "epoch": 0.7462352863812718, + "grad_norm": 25.066833727752858, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 487264416, + "step": 2837 + }, + { + "epoch": 0.7462352863812718, + "loss": 0.053436558693647385, + "loss_ce": 0.0022585804108530283, + "loss_iou": 0.458984375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 487264416, + "step": 2837 + }, + { + "epoch": 0.7464983231406589, + "grad_norm": 5.520845699973876, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 487436620, + "step": 2838 + }, + { + "epoch": 0.7464983231406589, + "loss": 0.16054442524909973, + "loss_ce": 0.002860102104023099, + "loss_iou": 0.59375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 487436620, + "step": 2838 + }, + { + "epoch": 0.746761359900046, + "grad_norm": 3.6663304276460456, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 487608856, + "step": 2839 + }, + { + "epoch": 0.746761359900046, + "loss": 0.07470214366912842, + "loss_ce": 0.0005749509437009692, + "loss_iou": 0.51953125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 487608856, + "step": 2839 + }, + { + "epoch": 0.7470243966594332, + "grad_norm": 3.816698805705744, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 487779452, + "step": 2840 + }, + { + "epoch": 0.7470243966594332, + "loss": 0.08027718961238861, + "loss_ce": 0.00013802893226966262, + "loss_iou": 0.46484375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 487779452, + "step": 2840 + }, + { + "epoch": 0.7472874334188203, + "grad_norm": 5.187350725945626, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 487951444, + "step": 2841 + }, + { + "epoch": 0.7472874334188203, + "loss": 0.13180628418922424, + "loss_ce": 0.0002907742455136031, + "loss_iou": 0.50390625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 487951444, + "step": 2841 + }, + { + "epoch": 0.7475504701782074, + "grad_norm": 17.82443490036777, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 488123816, + "step": 2842 + }, + { + "epoch": 0.7475504701782074, + "loss": 0.10174276679754257, + "loss_ce": 0.00042440436664037406, + "loss_iou": 0.44921875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 488123816, + "step": 2842 + }, + { + "epoch": 0.7478135069375945, + "grad_norm": 5.349643556816576, + "learning_rate": 5e-06, + "loss": 0.0817, + "num_input_tokens_seen": 488294544, + "step": 2843 + }, + { + "epoch": 0.7478135069375945, + "loss": 0.06341783702373505, + "loss_ce": 6.33449453744106e-05, + "loss_iou": 0.55859375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 488294544, + "step": 2843 + }, + { + "epoch": 0.7480765436969816, + "grad_norm": 5.881783273596092, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 488466512, + "step": 2844 + }, + { + "epoch": 0.7480765436969816, + "loss": 0.07953569293022156, + "loss_ce": 0.0013496556784957647, + "loss_iou": 0.515625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 488466512, + "step": 2844 + }, + { + "epoch": 0.7483395804563687, + "grad_norm": 4.967245481323238, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 488638580, + "step": 2845 + }, + { + "epoch": 0.7483395804563687, + "loss": 0.13291889429092407, + "loss_ce": 0.0028682297561317682, + "loss_iou": 0.55859375, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 488638580, + "step": 2845 + }, + { + "epoch": 0.7486026172157559, + "grad_norm": 26.635770395926077, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 488810652, + "step": 2846 + }, + { + "epoch": 0.7486026172157559, + "loss": 0.13681207597255707, + "loss_ce": 0.006761421915143728, + "loss_iou": 0.3359375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 488810652, + "step": 2846 + }, + { + "epoch": 0.748865653975143, + "grad_norm": 3.755973604918146, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 488982904, + "step": 2847 + }, + { + "epoch": 0.748865653975143, + "loss": 0.08354687690734863, + "loss_ce": 0.0004475130117498338, + "loss_iou": 0.447265625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 488982904, + "step": 2847 + }, + { + "epoch": 0.7491286907345301, + "grad_norm": 3.8356014362129516, + "learning_rate": 5e-06, + "loss": 0.0982, + "num_input_tokens_seen": 489153060, + "step": 2848 + }, + { + "epoch": 0.7491286907345301, + "loss": 0.1231696754693985, + "loss_ce": 0.0056769950315356255, + "loss_iou": 0.478515625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 489153060, + "step": 2848 + }, + { + "epoch": 0.7493917274939172, + "grad_norm": 6.542346686389534, + "learning_rate": 5e-06, + "loss": 0.1367, + "num_input_tokens_seen": 489324920, + "step": 2849 + }, + { + "epoch": 0.7493917274939172, + "loss": 0.10556487739086151, + "loss_ce": 0.00360565772280097, + "loss_iou": 0.56640625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 489324920, + "step": 2849 + }, + { + "epoch": 0.7496547642533044, + "grad_norm": 3.9950005088023786, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 489497376, + "step": 2850 + }, + { + "epoch": 0.7496547642533044, + "loss": 0.11913494765758514, + "loss_ce": 0.001535467803478241, + "loss_iou": 0.474609375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 489497376, + "step": 2850 + }, + { + "epoch": 0.7499178010126916, + "grad_norm": 3.9324452711240707, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 489669656, + "step": 2851 + }, + { + "epoch": 0.7499178010126916, + "loss": 0.14491230249404907, + "loss_ce": 0.0019679656252264977, + "loss_iou": 0.61328125, + "loss_num": 0.0286865234375, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 489669656, + "step": 2851 + }, + { + "epoch": 0.7501808377720787, + "grad_norm": 4.883057174767028, + "learning_rate": 5e-06, + "loss": 0.0772, + "num_input_tokens_seen": 489841572, + "step": 2852 + }, + { + "epoch": 0.7501808377720787, + "loss": 0.06473391503095627, + "loss_ce": 0.000845368776936084, + "loss_iou": 0.4140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 489841572, + "step": 2852 + }, + { + "epoch": 0.7504438745314658, + "grad_norm": 5.5775463930736695, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 490010444, + "step": 2853 + }, + { + "epoch": 0.7504438745314658, + "loss": 0.2125670313835144, + "loss_ce": 0.0003172852157149464, + "loss_iou": 0.55078125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 490010444, + "step": 2853 + }, + { + "epoch": 0.7507069112908529, + "grad_norm": 12.138601127345398, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 490182752, + "step": 2854 + }, + { + "epoch": 0.7507069112908529, + "loss": 0.05896022543311119, + "loss_ce": 0.0001223331200890243, + "loss_iou": 0.5390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 490182752, + "step": 2854 + }, + { + "epoch": 0.75096994805024, + "grad_norm": 7.597618581630176, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 490354572, + "step": 2855 + }, + { + "epoch": 0.75096994805024, + "loss": 0.1745096743106842, + "loss_ce": 0.0004984359256923199, + "loss_iou": 0.498046875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 490354572, + "step": 2855 + }, + { + "epoch": 0.7512329848096272, + "grad_norm": 4.639912180988158, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 490524680, + "step": 2856 + }, + { + "epoch": 0.7512329848096272, + "loss": 0.16067692637443542, + "loss_ce": 0.006074874196201563, + "loss_iou": 0.427734375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 490524680, + "step": 2856 + }, + { + "epoch": 0.7514960215690143, + "grad_norm": 14.921784803655473, + "learning_rate": 5e-06, + "loss": 0.1355, + "num_input_tokens_seen": 490696768, + "step": 2857 + }, + { + "epoch": 0.7514960215690143, + "loss": 0.1124132052063942, + "loss_ce": 0.0032213088124990463, + "loss_iou": 0.515625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 490696768, + "step": 2857 + }, + { + "epoch": 0.7517590583284014, + "grad_norm": 5.608340999176847, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 490869040, + "step": 2858 + }, + { + "epoch": 0.7517590583284014, + "loss": 0.05557282269001007, + "loss_ce": 0.003662426257506013, + "loss_iou": 0.478515625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 490869040, + "step": 2858 + }, + { + "epoch": 0.7520220950877885, + "grad_norm": 4.649441181141725, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 491041492, + "step": 2859 + }, + { + "epoch": 0.7520220950877885, + "loss": 0.08483953773975372, + "loss_ce": 0.00024481338914483786, + "loss_iou": 0.625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 491041492, + "step": 2859 + }, + { + "epoch": 0.7522851318471756, + "grad_norm": 5.629942269274102, + "learning_rate": 5e-06, + "loss": 0.1139, + "num_input_tokens_seen": 491213328, + "step": 2860 + }, + { + "epoch": 0.7522851318471756, + "loss": 0.06876988708972931, + "loss_ce": 0.0007156922947615385, + "loss_iou": 0.4921875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 491213328, + "step": 2860 + }, + { + "epoch": 0.7525481686065628, + "grad_norm": 5.7661162016412355, + "learning_rate": 5e-06, + "loss": 0.0949, + "num_input_tokens_seen": 491384112, + "step": 2861 + }, + { + "epoch": 0.7525481686065628, + "loss": 0.12756821513175964, + "loss_ce": 0.0015000998973846436, + "loss_iou": 0.69921875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 491384112, + "step": 2861 + }, + { + "epoch": 0.7528112053659499, + "grad_norm": 4.585306018678188, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 491556404, + "step": 2862 + }, + { + "epoch": 0.7528112053659499, + "loss": 0.1506051868200302, + "loss_ce": 0.00195405725389719, + "loss_iou": 0.43359375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 491556404, + "step": 2862 + }, + { + "epoch": 0.753074242125337, + "grad_norm": 5.018961759147868, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 491728560, + "step": 2863 + }, + { + "epoch": 0.753074242125337, + "loss": 0.08526686578989029, + "loss_ce": 0.0047004627995193005, + "loss_iou": 0.6015625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 491728560, + "step": 2863 + }, + { + "epoch": 0.7533372788847241, + "grad_norm": 5.649735345560609, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 491900292, + "step": 2864 + }, + { + "epoch": 0.7533372788847241, + "loss": 0.07598397135734558, + "loss_ce": 0.0024213448632508516, + "loss_iou": 0.3984375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 491900292, + "step": 2864 + }, + { + "epoch": 0.7536003156441112, + "grad_norm": 5.741199989182661, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 492072492, + "step": 2865 + }, + { + "epoch": 0.7536003156441112, + "loss": 0.17299330234527588, + "loss_ce": 0.00398694584146142, + "loss_iou": 0.296875, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 492072492, + "step": 2865 + }, + { + "epoch": 0.7538633524034984, + "grad_norm": 6.545995942105509, + "learning_rate": 5e-06, + "loss": 0.0985, + "num_input_tokens_seen": 492244560, + "step": 2866 + }, + { + "epoch": 0.7538633524034984, + "loss": 0.11732736229896545, + "loss_ce": 0.00035348787787370384, + "loss_iou": 0.48828125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 492244560, + "step": 2866 + }, + { + "epoch": 0.7541263891628855, + "grad_norm": 7.690174312653901, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 492416460, + "step": 2867 + }, + { + "epoch": 0.7541263891628855, + "loss": 0.11988115310668945, + "loss_ce": 0.0037617662455886602, + "loss_iou": 0.5546875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 492416460, + "step": 2867 + }, + { + "epoch": 0.7543894259222726, + "grad_norm": 4.911561927440868, + "learning_rate": 5e-06, + "loss": 0.1762, + "num_input_tokens_seen": 492582560, + "step": 2868 + }, + { + "epoch": 0.7543894259222726, + "loss": 0.17355158925056458, + "loss_ce": 0.0006237310590222478, + "loss_iou": 0.41796875, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 492582560, + "step": 2868 + }, + { + "epoch": 0.7546524626816598, + "grad_norm": 9.965344498896014, + "learning_rate": 5e-06, + "loss": 0.1137, + "num_input_tokens_seen": 492754468, + "step": 2869 + }, + { + "epoch": 0.7546524626816598, + "loss": 0.12515220046043396, + "loss_ce": 0.0038448250852525234, + "loss_iou": 0.515625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 492754468, + "step": 2869 + }, + { + "epoch": 0.7549154994410469, + "grad_norm": 7.897878518361561, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 492926916, + "step": 2870 + }, + { + "epoch": 0.7549154994410469, + "loss": 0.07539217174053192, + "loss_ce": 0.002699300181120634, + "loss_iou": 0.5390625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 492926916, + "step": 2870 + }, + { + "epoch": 0.755178536200434, + "grad_norm": 8.119348564456503, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 493099340, + "step": 2871 + }, + { + "epoch": 0.755178536200434, + "loss": 0.10948731005191803, + "loss_ce": 0.002187497215345502, + "loss_iou": 0.5546875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 493099340, + "step": 2871 + }, + { + "epoch": 0.7554415729598212, + "grad_norm": 4.655104945227204, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 493269864, + "step": 2872 + }, + { + "epoch": 0.7554415729598212, + "loss": 0.1148996502161026, + "loss_ce": 9.252215386368334e-05, + "loss_iou": 0.5390625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 493269864, + "step": 2872 + }, + { + "epoch": 0.7557046097192083, + "grad_norm": 7.2487773846350985, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 493441844, + "step": 2873 + }, + { + "epoch": 0.7557046097192083, + "loss": 0.22252312302589417, + "loss_ce": 0.00032462860690429807, + "loss_iou": 0.47265625, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 493441844, + "step": 2873 + }, + { + "epoch": 0.7559676464785954, + "grad_norm": 8.669322747332805, + "learning_rate": 5e-06, + "loss": 0.0904, + "num_input_tokens_seen": 493612416, + "step": 2874 + }, + { + "epoch": 0.7559676464785954, + "loss": 0.1249040961265564, + "loss_ce": 0.0034746630117297173, + "loss_iou": 0.5859375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 493612416, + "step": 2874 + }, + { + "epoch": 0.7562306832379825, + "grad_norm": 16.40313430717725, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 493782864, + "step": 2875 + }, + { + "epoch": 0.7562306832379825, + "loss": 0.09373937547206879, + "loss_ce": 0.0021256012842059135, + "loss_iou": 0.68359375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 493782864, + "step": 2875 + }, + { + "epoch": 0.7564937199973696, + "grad_norm": 6.017272387510744, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 493955332, + "step": 2876 + }, + { + "epoch": 0.7564937199973696, + "loss": 0.1287456452846527, + "loss_ce": 0.0006633760058321059, + "loss_iou": 0.41015625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 493955332, + "step": 2876 + }, + { + "epoch": 0.7567567567567568, + "grad_norm": 5.38013008566038, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 494127604, + "step": 2877 + }, + { + "epoch": 0.7567567567567568, + "loss": 0.10688350349664688, + "loss_ce": 0.0010485434904694557, + "loss_iou": 0.5703125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 494127604, + "step": 2877 + }, + { + "epoch": 0.7570197935161439, + "grad_norm": 16.650882691357587, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 494298004, + "step": 2878 + }, + { + "epoch": 0.7570197935161439, + "loss": 0.1486469805240631, + "loss_ce": 0.0029255489353090525, + "loss_iou": 0.484375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 494298004, + "step": 2878 + }, + { + "epoch": 0.757282830275531, + "grad_norm": 6.750510186398287, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 494470108, + "step": 2879 + }, + { + "epoch": 0.757282830275531, + "loss": 0.1393243670463562, + "loss_ce": 0.0016443128697574139, + "loss_iou": 0.287109375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 494470108, + "step": 2879 + }, + { + "epoch": 0.7575458670349181, + "grad_norm": 18.523627538074937, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 494640736, + "step": 2880 + }, + { + "epoch": 0.7575458670349181, + "loss": 0.11819358170032501, + "loss_ce": 0.0007009088294580579, + "loss_iou": 0.6171875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 494640736, + "step": 2880 + }, + { + "epoch": 0.7578089037943052, + "grad_norm": 6.843824113206992, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 494812784, + "step": 2881 + }, + { + "epoch": 0.7578089037943052, + "loss": 0.18498176336288452, + "loss_ce": 0.001769477385096252, + "loss_iou": 0.390625, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 494812784, + "step": 2881 + }, + { + "epoch": 0.7580719405536924, + "grad_norm": 4.288128333077997, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 494984900, + "step": 2882 + }, + { + "epoch": 0.7580719405536924, + "loss": 0.11615432053804398, + "loss_ce": 0.0020796118769794703, + "loss_iou": 0.51171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 494984900, + "step": 2882 + }, + { + "epoch": 0.7583349773130795, + "grad_norm": 7.805094359622811, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 495157204, + "step": 2883 + }, + { + "epoch": 0.7583349773130795, + "loss": 0.15978480875492096, + "loss_ce": 0.002222547074779868, + "loss_iou": 0.578125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 495157204, + "step": 2883 + }, + { + "epoch": 0.7585980140724666, + "grad_norm": 9.068753339594036, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 495325872, + "step": 2884 + }, + { + "epoch": 0.7585980140724666, + "loss": 0.06683582067489624, + "loss_ce": 0.001055177883245051, + "loss_iou": 0.5625, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 495325872, + "step": 2884 + }, + { + "epoch": 0.7588610508318537, + "grad_norm": 5.303316590010937, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 495497944, + "step": 2885 + }, + { + "epoch": 0.7588610508318537, + "loss": 0.09937077760696411, + "loss_ce": 0.00031071933335624635, + "loss_iou": 0.5625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 495497944, + "step": 2885 + }, + { + "epoch": 0.7591240875912408, + "grad_norm": 5.954668834746019, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 495670336, + "step": 2886 + }, + { + "epoch": 0.7591240875912408, + "loss": 0.04735005646944046, + "loss_ce": 0.0002461753028910607, + "loss_iou": 0.4375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 495670336, + "step": 2886 + }, + { + "epoch": 0.759387124350628, + "grad_norm": 9.86702776219205, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 495842556, + "step": 2887 + }, + { + "epoch": 0.759387124350628, + "loss": 0.09923535585403442, + "loss_ce": 0.0010908262338489294, + "loss_iou": NaN, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 495842556, + "step": 2887 + }, + { + "epoch": 0.7596501611100152, + "grad_norm": 11.076727887110565, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 496012000, + "step": 2888 + }, + { + "epoch": 0.7596501611100152, + "loss": 0.07854500412940979, + "loss_ce": 0.0005115569802001119, + "loss_iou": 0.6171875, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 496012000, + "step": 2888 + }, + { + "epoch": 0.7599131978694023, + "grad_norm": 4.344396012390115, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 496181744, + "step": 2889 + }, + { + "epoch": 0.7599131978694023, + "loss": 0.07574538886547089, + "loss_ce": 0.0021980288438498974, + "loss_iou": 0.5625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 496181744, + "step": 2889 + }, + { + "epoch": 0.7601762346287894, + "grad_norm": 4.8762260883901964, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 496353892, + "step": 2890 + }, + { + "epoch": 0.7601762346287894, + "loss": 0.1910967230796814, + "loss_ce": 0.0009111673571169376, + "loss_iou": 0.349609375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 496353892, + "step": 2890 + }, + { + "epoch": 0.7604392713881765, + "grad_norm": 31.258153024005672, + "learning_rate": 5e-06, + "loss": 0.1188, + "num_input_tokens_seen": 496525916, + "step": 2891 + }, + { + "epoch": 0.7604392713881765, + "loss": 0.07003885507583618, + "loss_ce": 0.0008249912643805146, + "loss_iou": 0.546875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 496525916, + "step": 2891 + }, + { + "epoch": 0.7607023081475637, + "grad_norm": 3.99666824293353, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 496698052, + "step": 2892 + }, + { + "epoch": 0.7607023081475637, + "loss": 0.09317123889923096, + "loss_ce": 0.0031443799380213022, + "loss_iou": 0.59765625, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 496698052, + "step": 2892 + }, + { + "epoch": 0.7609653449069508, + "grad_norm": 3.917894736012383, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 496868580, + "step": 2893 + }, + { + "epoch": 0.7609653449069508, + "loss": 0.1594741940498352, + "loss_ce": 0.0014236548449844122, + "loss_iou": 0.462890625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 496868580, + "step": 2893 + }, + { + "epoch": 0.7612283816663379, + "grad_norm": 6.805778622837066, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 497040720, + "step": 2894 + }, + { + "epoch": 0.7612283816663379, + "loss": 0.04234718531370163, + "loss_ce": 0.0003244808176532388, + "loss_iou": 0.44140625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 497040720, + "step": 2894 + }, + { + "epoch": 0.761491418425725, + "grad_norm": 8.055134529886692, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 497213204, + "step": 2895 + }, + { + "epoch": 0.761491418425725, + "loss": 0.06772617995738983, + "loss_ce": 0.0006180237978696823, + "loss_iou": 0.5546875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 497213204, + "step": 2895 + }, + { + "epoch": 0.7617544551851121, + "grad_norm": 20.74511641540883, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 497382348, + "step": 2896 + }, + { + "epoch": 0.7617544551851121, + "loss": 0.1839737594127655, + "loss_ce": 0.006239374168217182, + "loss_iou": 0.5, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 497382348, + "step": 2896 + }, + { + "epoch": 0.7620174919444992, + "grad_norm": 3.827051566364027, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 497554288, + "step": 2897 + }, + { + "epoch": 0.7620174919444992, + "loss": 0.1408492773771286, + "loss_ce": 0.0003158297040499747, + "loss_iou": 0.59375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 497554288, + "step": 2897 + }, + { + "epoch": 0.7622805287038864, + "grad_norm": 4.251155438400414, + "learning_rate": 5e-06, + "loss": 0.0949, + "num_input_tokens_seen": 497726624, + "step": 2898 + }, + { + "epoch": 0.7622805287038864, + "loss": 0.08204406499862671, + "loss_ce": 0.0024694851599633694, + "loss_iou": 0.6015625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 497726624, + "step": 2898 + }, + { + "epoch": 0.7625435654632735, + "grad_norm": 25.921624546006864, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 497899052, + "step": 2899 + }, + { + "epoch": 0.7625435654632735, + "loss": 0.125847727060318, + "loss_ce": 5.42689704161603e-05, + "loss_iou": 0.546875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 497899052, + "step": 2899 + }, + { + "epoch": 0.7628066022226606, + "grad_norm": 3.225720909448473, + "learning_rate": 5e-06, + "loss": 0.0838, + "num_input_tokens_seen": 498071164, + "step": 2900 + }, + { + "epoch": 0.7628066022226606, + "loss": 0.13742178678512573, + "loss_ce": 0.002015294972807169, + "loss_iou": 0.5625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 498071164, + "step": 2900 + }, + { + "epoch": 0.7630696389820477, + "grad_norm": 10.436595772958983, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 498243380, + "step": 2901 + }, + { + "epoch": 0.7630696389820477, + "loss": 0.08406674116849899, + "loss_ce": 0.0019744576420634985, + "loss_iou": 0.482421875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 498243380, + "step": 2901 + }, + { + "epoch": 0.7633326757414348, + "grad_norm": 5.876814424855767, + "learning_rate": 5e-06, + "loss": 0.0899, + "num_input_tokens_seen": 498415740, + "step": 2902 + }, + { + "epoch": 0.7633326757414348, + "loss": 0.12014832347631454, + "loss_ce": 0.0023199557326734066, + "loss_iou": 0.52734375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 498415740, + "step": 2902 + }, + { + "epoch": 0.763595712500822, + "grad_norm": 6.217199501402136, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 498587740, + "step": 2903 + }, + { + "epoch": 0.763595712500822, + "loss": 0.0937931090593338, + "loss_ce": 0.0013553638709709048, + "loss_iou": 0.6171875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 498587740, + "step": 2903 + }, + { + "epoch": 0.7638587492602091, + "grad_norm": 4.965193192238494, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 498759856, + "step": 2904 + }, + { + "epoch": 0.7638587492602091, + "loss": 0.0735275000333786, + "loss_ce": 0.00013272266369313002, + "loss_iou": 0.578125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 498759856, + "step": 2904 + }, + { + "epoch": 0.7641217860195962, + "grad_norm": 36.02324764024362, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 498929524, + "step": 2905 + }, + { + "epoch": 0.7641217860195962, + "loss": 0.13745470345020294, + "loss_ce": 0.002353382296860218, + "loss_iou": 0.40234375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 498929524, + "step": 2905 + }, + { + "epoch": 0.7643848227789833, + "grad_norm": 4.064476858285287, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 499099092, + "step": 2906 + }, + { + "epoch": 0.7643848227789833, + "loss": 0.12520155310630798, + "loss_ce": 0.0020325970835983753, + "loss_iou": 0.431640625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 499099092, + "step": 2906 + }, + { + "epoch": 0.7646478595383704, + "grad_norm": 9.564182625696946, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 499271132, + "step": 2907 + }, + { + "epoch": 0.7646478595383704, + "loss": 0.10383596271276474, + "loss_ce": 0.0036772743333131075, + "loss_iou": 0.6015625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 499271132, + "step": 2907 + }, + { + "epoch": 0.7649108962977577, + "grad_norm": 7.38548887746826, + "learning_rate": 5e-06, + "loss": 0.1407, + "num_input_tokens_seen": 499443276, + "step": 2908 + }, + { + "epoch": 0.7649108962977577, + "loss": 0.07420553267002106, + "loss_ce": 0.006151327397674322, + "loss_iou": 0.32421875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 499443276, + "step": 2908 + }, + { + "epoch": 0.7651739330571448, + "grad_norm": 3.357102180261463, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 499615436, + "step": 2909 + }, + { + "epoch": 0.7651739330571448, + "loss": 0.13687880337238312, + "loss_ce": 0.0005110005149617791, + "loss_iou": 0.52734375, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 499615436, + "step": 2909 + }, + { + "epoch": 0.7654369698165319, + "grad_norm": 5.532042225189138, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 499787568, + "step": 2910 + }, + { + "epoch": 0.7654369698165319, + "loss": 0.10780411958694458, + "loss_ce": 0.0017250193050131202, + "loss_iou": 0.423828125, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 499787568, + "step": 2910 + }, + { + "epoch": 0.765700006575919, + "grad_norm": 9.098827733621626, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 499959744, + "step": 2911 + }, + { + "epoch": 0.765700006575919, + "loss": 0.11246542632579803, + "loss_ce": 0.0012135956203565001, + "loss_iou": 0.431640625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 499959744, + "step": 2911 + }, + { + "epoch": 0.7659630433353061, + "grad_norm": 4.671256681679922, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 500132012, + "step": 2912 + }, + { + "epoch": 0.7659630433353061, + "loss": 0.07265074551105499, + "loss_ce": 0.003986193798482418, + "loss_iou": 0.478515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 500132012, + "step": 2912 + }, + { + "epoch": 0.7662260800946933, + "grad_norm": 9.318330323174848, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 500304120, + "step": 2913 + }, + { + "epoch": 0.7662260800946933, + "loss": 0.10342703014612198, + "loss_ce": 0.0003996905288659036, + "loss_iou": 0.55859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 500304120, + "step": 2913 + }, + { + "epoch": 0.7664891168540804, + "grad_norm": 4.11205791514663, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 500476220, + "step": 2914 + }, + { + "epoch": 0.7664891168540804, + "loss": 0.16668415069580078, + "loss_ce": 0.00170612963847816, + "loss_iou": 0.33203125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 500476220, + "step": 2914 + }, + { + "epoch": 0.7667521536134675, + "grad_norm": 7.921166949882202, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 500648456, + "step": 2915 + }, + { + "epoch": 0.7667521536134675, + "loss": 0.14688915014266968, + "loss_ce": 0.0016559937503188848, + "loss_iou": 0.546875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 500648456, + "step": 2915 + }, + { + "epoch": 0.7670151903728546, + "grad_norm": 7.89559416185462, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 500817440, + "step": 2916 + }, + { + "epoch": 0.7670151903728546, + "loss": 0.13112960755825043, + "loss_ce": 0.0047868345864117146, + "loss_iou": 0.5, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 500817440, + "step": 2916 + }, + { + "epoch": 0.7672782271322417, + "grad_norm": 7.07373988410314, + "learning_rate": 5e-06, + "loss": 0.1543, + "num_input_tokens_seen": 500989576, + "step": 2917 + }, + { + "epoch": 0.7672782271322417, + "loss": 0.17218878865242004, + "loss_ce": 0.0034876265563070774, + "loss_iou": 0.55859375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 500989576, + "step": 2917 + }, + { + "epoch": 0.7675412638916288, + "grad_norm": 4.658693255259618, + "learning_rate": 5e-06, + "loss": 0.0942, + "num_input_tokens_seen": 501161844, + "step": 2918 + }, + { + "epoch": 0.7675412638916288, + "loss": 0.0911635234951973, + "loss_ce": 0.0017775364685803652, + "loss_iou": 0.474609375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 501161844, + "step": 2918 + }, + { + "epoch": 0.767804300651016, + "grad_norm": 5.6482830730287485, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 501334012, + "step": 2919 + }, + { + "epoch": 0.767804300651016, + "loss": 0.12131966650485992, + "loss_ce": 0.00028695265064015985, + "loss_iou": 0.52734375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 501334012, + "step": 2919 + }, + { + "epoch": 0.7680673374104031, + "grad_norm": 6.055907471900818, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 501504632, + "step": 2920 + }, + { + "epoch": 0.7680673374104031, + "loss": 0.06848346441984177, + "loss_ce": 0.0003071928513236344, + "loss_iou": 0.44921875, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 501504632, + "step": 2920 + }, + { + "epoch": 0.7683303741697902, + "grad_norm": 4.128716866112718, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 501676892, + "step": 2921 + }, + { + "epoch": 0.7683303741697902, + "loss": 0.12142758071422577, + "loss_ce": 0.000974692520685494, + "loss_iou": 0.5, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 501676892, + "step": 2921 + }, + { + "epoch": 0.7685934109291773, + "grad_norm": 6.285708685708194, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 501846408, + "step": 2922 + }, + { + "epoch": 0.7685934109291773, + "loss": 0.13839051127433777, + "loss_ce": 0.0014276191359385848, + "loss_iou": 0.5703125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 501846408, + "step": 2922 + }, + { + "epoch": 0.7688564476885644, + "grad_norm": 5.199523762158551, + "learning_rate": 5e-06, + "loss": 0.0935, + "num_input_tokens_seen": 502018884, + "step": 2923 + }, + { + "epoch": 0.7688564476885644, + "loss": 0.0823674127459526, + "loss_ce": 0.0034794718958437443, + "loss_iou": 0.69921875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 502018884, + "step": 2923 + }, + { + "epoch": 0.7691194844479516, + "grad_norm": 7.865534449035968, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 502189840, + "step": 2924 + }, + { + "epoch": 0.7691194844479516, + "loss": 0.09236955642700195, + "loss_ce": 0.0008473452762700617, + "loss_iou": 0.5234375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 502189840, + "step": 2924 + }, + { + "epoch": 0.7693825212073387, + "grad_norm": 4.464952808674884, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 502361908, + "step": 2925 + }, + { + "epoch": 0.7693825212073387, + "loss": 0.09519391506910324, + "loss_ce": 0.002862985013052821, + "loss_iou": 0.40234375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 502361908, + "step": 2925 + }, + { + "epoch": 0.7696455579667258, + "grad_norm": 3.137863532599948, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 502533852, + "step": 2926 + }, + { + "epoch": 0.7696455579667258, + "loss": 0.062405504286289215, + "loss_ce": 0.000225941272219643, + "loss_iou": 0.52734375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 502533852, + "step": 2926 + }, + { + "epoch": 0.769908594726113, + "grad_norm": 5.424523366119041, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 502706068, + "step": 2927 + }, + { + "epoch": 0.769908594726113, + "loss": 0.15939806401729584, + "loss_ce": 0.0009050165535882115, + "loss_iou": 0.6484375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 502706068, + "step": 2927 + }, + { + "epoch": 0.7701716314855, + "grad_norm": 21.020644002444403, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 502878204, + "step": 2928 + }, + { + "epoch": 0.7701716314855, + "loss": 0.1498870849609375, + "loss_ce": 0.00108338613063097, + "loss_iou": 0.3984375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 502878204, + "step": 2928 + }, + { + "epoch": 0.7704346682448873, + "grad_norm": 6.513721562381314, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 503050516, + "step": 2929 + }, + { + "epoch": 0.7704346682448873, + "loss": 0.14237374067306519, + "loss_ce": 0.0013520093634724617, + "loss_iou": 0.53125, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 503050516, + "step": 2929 + }, + { + "epoch": 0.7706977050042744, + "grad_norm": 21.68669265107994, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 503222664, + "step": 2930 + }, + { + "epoch": 0.7706977050042744, + "loss": 0.09872519969940186, + "loss_ce": 0.0024269861169159412, + "loss_iou": 0.53125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 503222664, + "step": 2930 + }, + { + "epoch": 0.7709607417636615, + "grad_norm": 23.71932442538902, + "learning_rate": 5e-06, + "loss": 0.1131, + "num_input_tokens_seen": 503394820, + "step": 2931 + }, + { + "epoch": 0.7709607417636615, + "loss": 0.14681634306907654, + "loss_ce": 0.0005455805221572518, + "loss_iou": 0.359375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 503394820, + "step": 2931 + }, + { + "epoch": 0.7712237785230486, + "grad_norm": 14.0356485916023, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 503567064, + "step": 2932 + }, + { + "epoch": 0.7712237785230486, + "loss": 0.1981443464756012, + "loss_ce": 0.002374083735048771, + "loss_iou": 0.4453125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 503567064, + "step": 2932 + }, + { + "epoch": 0.7714868152824357, + "grad_norm": 4.952386040850401, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 503739480, + "step": 2933 + }, + { + "epoch": 0.7714868152824357, + "loss": 0.2314581573009491, + "loss_ce": 0.002728914376348257, + "loss_iou": 0.40234375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 503739480, + "step": 2933 + }, + { + "epoch": 0.7717498520418229, + "grad_norm": 5.454737692407838, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 503911536, + "step": 2934 + }, + { + "epoch": 0.7717498520418229, + "loss": 0.1048421710729599, + "loss_ce": 0.004591919481754303, + "loss_iou": 0.474609375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 503911536, + "step": 2934 + }, + { + "epoch": 0.77201288880121, + "grad_norm": 7.9773349981616795, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 504082084, + "step": 2935 + }, + { + "epoch": 0.77201288880121, + "loss": 0.1008715033531189, + "loss_ce": 0.0018419669941067696, + "loss_iou": 0.5234375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 504082084, + "step": 2935 + }, + { + "epoch": 0.7722759255605971, + "grad_norm": 5.256239273173906, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 504254328, + "step": 2936 + }, + { + "epoch": 0.7722759255605971, + "loss": 0.10146059095859528, + "loss_ce": 0.0031024364288896322, + "loss_iou": 0.50390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 504254328, + "step": 2936 + }, + { + "epoch": 0.7725389623199842, + "grad_norm": 7.796343823545596, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 504426408, + "step": 2937 + }, + { + "epoch": 0.7725389623199842, + "loss": 0.09895452111959457, + "loss_ce": 0.00047429182450287044, + "loss_iou": 0.4375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 504426408, + "step": 2937 + }, + { + "epoch": 0.7728019990793713, + "grad_norm": 4.506271114804364, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 504599160, + "step": 2938 + }, + { + "epoch": 0.7728019990793713, + "loss": 0.10439816117286682, + "loss_ce": 0.0011114203371107578, + "loss_iou": 0.53515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 504599160, + "step": 2938 + }, + { + "epoch": 0.7730650358387585, + "grad_norm": 7.051793538200423, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 504771620, + "step": 2939 + }, + { + "epoch": 0.7730650358387585, + "loss": 0.04662296548485756, + "loss_ce": 0.001548499334603548, + "loss_iou": 0.51953125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 504771620, + "step": 2939 + }, + { + "epoch": 0.7733280725981456, + "grad_norm": 4.48321731819358, + "learning_rate": 5e-06, + "loss": 0.1049, + "num_input_tokens_seen": 504942212, + "step": 2940 + }, + { + "epoch": 0.7733280725981456, + "loss": 0.1206519603729248, + "loss_ce": 0.0022132450249046087, + "loss_iou": 0.49609375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 504942212, + "step": 2940 + }, + { + "epoch": 0.7735911093575327, + "grad_norm": 3.542637672706516, + "learning_rate": 5e-06, + "loss": 0.0699, + "num_input_tokens_seen": 505114396, + "step": 2941 + }, + { + "epoch": 0.7735911093575327, + "loss": 0.07642598450183868, + "loss_ce": 0.002970176050439477, + "loss_iou": 0.59375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 505114396, + "step": 2941 + }, + { + "epoch": 0.7738541461169198, + "grad_norm": 13.31557941839502, + "learning_rate": 5e-06, + "loss": 0.152, + "num_input_tokens_seen": 505286572, + "step": 2942 + }, + { + "epoch": 0.7738541461169198, + "loss": 0.11694711446762085, + "loss_ce": 0.000492042163386941, + "loss_iou": 0.484375, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 505286572, + "step": 2942 + }, + { + "epoch": 0.7741171828763069, + "grad_norm": 4.217985208788173, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 505458580, + "step": 2943 + }, + { + "epoch": 0.7741171828763069, + "loss": 0.12103226035833359, + "loss_ce": 0.00256301905028522, + "loss_iou": 0.6640625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 505458580, + "step": 2943 + }, + { + "epoch": 0.774380219635694, + "grad_norm": 5.305953482185517, + "learning_rate": 5e-06, + "loss": 0.1417, + "num_input_tokens_seen": 505630628, + "step": 2944 + }, + { + "epoch": 0.774380219635694, + "loss": 0.13166974484920502, + "loss_ce": 0.0009019209537655115, + "loss_iou": 0.302734375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 505630628, + "step": 2944 + }, + { + "epoch": 0.7746432563950812, + "grad_norm": 3.7265892335604103, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 505802760, + "step": 2945 + }, + { + "epoch": 0.7746432563950812, + "loss": 0.174719899892807, + "loss_ce": 0.005622013006359339, + "loss_iou": 0.515625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 505802760, + "step": 2945 + }, + { + "epoch": 0.7749062931544684, + "grad_norm": 3.906958326198367, + "learning_rate": 5e-06, + "loss": 0.0915, + "num_input_tokens_seen": 505975008, + "step": 2946 + }, + { + "epoch": 0.7749062931544684, + "loss": 0.15144148468971252, + "loss_ce": 0.003522793762385845, + "loss_iou": 0.609375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 505975008, + "step": 2946 + }, + { + "epoch": 0.7751693299138555, + "grad_norm": 30.273164796131727, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 506146864, + "step": 2947 + }, + { + "epoch": 0.7751693299138555, + "loss": 0.12878431379795074, + "loss_ce": 0.00238050683401525, + "loss_iou": 0.52734375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 506146864, + "step": 2947 + }, + { + "epoch": 0.7754323666732426, + "grad_norm": 9.326627239945015, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 506317520, + "step": 2948 + }, + { + "epoch": 0.7754323666732426, + "loss": 0.13574595749378204, + "loss_ce": 0.001163432258181274, + "loss_iou": 0.384765625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 506317520, + "step": 2948 + }, + { + "epoch": 0.7756954034326297, + "grad_norm": 4.291282220054208, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 506489644, + "step": 2949 + }, + { + "epoch": 0.7756954034326297, + "loss": 0.07531201094388962, + "loss_ce": 0.0006965312641113997, + "loss_iou": 0.5625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 506489644, + "step": 2949 + }, + { + "epoch": 0.7759584401920169, + "grad_norm": 5.132046879043553, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 506661552, + "step": 2950 + }, + { + "epoch": 0.7759584401920169, + "loss": 0.12264753133058548, + "loss_ce": 0.003171210875734687, + "loss_iou": 0.6015625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 506661552, + "step": 2950 + }, + { + "epoch": 0.776221476951404, + "grad_norm": 18.605800930237596, + "learning_rate": 5e-06, + "loss": 0.1066, + "num_input_tokens_seen": 506832080, + "step": 2951 + }, + { + "epoch": 0.776221476951404, + "loss": 0.08513291925191879, + "loss_ce": 0.0006144904182292521, + "loss_iou": 0.5390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 506832080, + "step": 2951 + }, + { + "epoch": 0.7764845137107911, + "grad_norm": 13.809771584503252, + "learning_rate": 5e-06, + "loss": 0.148, + "num_input_tokens_seen": 507004396, + "step": 2952 + }, + { + "epoch": 0.7764845137107911, + "loss": 0.16220027208328247, + "loss_ce": 0.007384595461189747, + "loss_iou": 0.51953125, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 507004396, + "step": 2952 + }, + { + "epoch": 0.7767475504701782, + "grad_norm": 6.052098852686667, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 507176636, + "step": 2953 + }, + { + "epoch": 0.7767475504701782, + "loss": 0.10348343849182129, + "loss_ce": 7.461909262929112e-05, + "loss_iou": 0.640625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 507176636, + "step": 2953 + }, + { + "epoch": 0.7770105872295653, + "grad_norm": 6.115673418463162, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 507348812, + "step": 2954 + }, + { + "epoch": 0.7770105872295653, + "loss": 0.17799662053585052, + "loss_ce": 0.0009031177032738924, + "loss_iou": 0.55078125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 507348812, + "step": 2954 + }, + { + "epoch": 0.7772736239889525, + "grad_norm": 4.920564715708625, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 507521260, + "step": 2955 + }, + { + "epoch": 0.7772736239889525, + "loss": 0.12127618491649628, + "loss_ce": 0.002562812063843012, + "loss_iou": 0.625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 507521260, + "step": 2955 + }, + { + "epoch": 0.7775366607483396, + "grad_norm": 4.1269903282175875, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 507693580, + "step": 2956 + }, + { + "epoch": 0.7775366607483396, + "loss": 0.1181151419878006, + "loss_ce": 0.0023619618732482195, + "loss_iou": 0.5234375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 507693580, + "step": 2956 + }, + { + "epoch": 0.7777996975077267, + "grad_norm": 6.530908453055091, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 507866000, + "step": 2957 + }, + { + "epoch": 0.7777996975077267, + "loss": 0.09897395223379135, + "loss_ce": 0.001989088486880064, + "loss_iou": 0.5234375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 507866000, + "step": 2957 + }, + { + "epoch": 0.7780627342671138, + "grad_norm": 13.799197287571243, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 508036520, + "step": 2958 + }, + { + "epoch": 0.7780627342671138, + "loss": 0.12385988235473633, + "loss_ce": 0.0020031901076436043, + "loss_iou": 0.5078125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 508036520, + "step": 2958 + }, + { + "epoch": 0.7783257710265009, + "grad_norm": 6.771630034008941, + "learning_rate": 5e-06, + "loss": 0.1293, + "num_input_tokens_seen": 508208512, + "step": 2959 + }, + { + "epoch": 0.7783257710265009, + "loss": 0.09063053131103516, + "loss_ce": 0.001976970350369811, + "loss_iou": 0.70703125, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 508208512, + "step": 2959 + }, + { + "epoch": 0.7785888077858881, + "grad_norm": 17.947272506590085, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 508380916, + "step": 2960 + }, + { + "epoch": 0.7785888077858881, + "loss": 0.18887397646903992, + "loss_ce": 0.000855189049616456, + "loss_iou": 0.349609375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 508380916, + "step": 2960 + }, + { + "epoch": 0.7788518445452752, + "grad_norm": 8.543500304163382, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 508551252, + "step": 2961 + }, + { + "epoch": 0.7788518445452752, + "loss": 0.10420133173465729, + "loss_ce": 0.000197422195924446, + "loss_iou": 0.412109375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 508551252, + "step": 2961 + }, + { + "epoch": 0.7791148813046623, + "grad_norm": 11.527993211943581, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 508723096, + "step": 2962 + }, + { + "epoch": 0.7791148813046623, + "loss": 0.13255223631858826, + "loss_ce": 0.0016013117274269462, + "loss_iou": 0.431640625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 508723096, + "step": 2962 + }, + { + "epoch": 0.7793779180640494, + "grad_norm": 3.765008399602552, + "learning_rate": 5e-06, + "loss": 0.1139, + "num_input_tokens_seen": 508895336, + "step": 2963 + }, + { + "epoch": 0.7793779180640494, + "loss": 0.15525312721729279, + "loss_ce": 0.00026197341503575444, + "loss_iou": 0.435546875, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 508895336, + "step": 2963 + }, + { + "epoch": 0.7796409548234365, + "grad_norm": 4.0681392149062905, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 509065748, + "step": 2964 + }, + { + "epoch": 0.7796409548234365, + "loss": 0.103541798889637, + "loss_ce": 0.0006670450093224645, + "loss_iou": 0.4765625, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 509065748, + "step": 2964 + }, + { + "epoch": 0.7799039915828238, + "grad_norm": 5.764259729112096, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 509237988, + "step": 2965 + }, + { + "epoch": 0.7799039915828238, + "loss": 0.09293576329946518, + "loss_ce": 0.0004980218946002424, + "loss_iou": 0.578125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 509237988, + "step": 2965 + }, + { + "epoch": 0.7801670283422109, + "grad_norm": 6.653297635090324, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 509407480, + "step": 2966 + }, + { + "epoch": 0.7801670283422109, + "loss": 0.10752134025096893, + "loss_ce": 0.0008471491746604443, + "loss_iou": 0.546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 509407480, + "step": 2966 + }, + { + "epoch": 0.780430065101598, + "grad_norm": 5.8594293697060476, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 509579708, + "step": 2967 + }, + { + "epoch": 0.780430065101598, + "loss": 0.19974008202552795, + "loss_ce": 0.005846647545695305, + "loss_iou": 0.3984375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 509579708, + "step": 2967 + }, + { + "epoch": 0.7806931018609851, + "grad_norm": 4.074243325830813, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 509750260, + "step": 2968 + }, + { + "epoch": 0.7806931018609851, + "loss": 0.09352241456508636, + "loss_ce": 0.00045905529987066984, + "loss_iou": 0.5625, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 509750260, + "step": 2968 + }, + { + "epoch": 0.7809561386203722, + "grad_norm": 18.473086839056062, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 509922416, + "step": 2969 + }, + { + "epoch": 0.7809561386203722, + "loss": 0.10857398062944412, + "loss_ce": 0.002586429938673973, + "loss_iou": 0.50390625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 509922416, + "step": 2969 + }, + { + "epoch": 0.7812191753797593, + "grad_norm": 4.297341068075648, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 510094352, + "step": 2970 + }, + { + "epoch": 0.7812191753797593, + "loss": 0.10554330050945282, + "loss_ce": 0.002088708570227027, + "loss_iou": 0.53125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 510094352, + "step": 2970 + }, + { + "epoch": 0.7814822121391465, + "grad_norm": 4.523551186515909, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 510266568, + "step": 2971 + }, + { + "epoch": 0.7814822121391465, + "loss": 0.1601797491312027, + "loss_ce": 0.000939027639105916, + "loss_iou": 0.51171875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 510266568, + "step": 2971 + }, + { + "epoch": 0.7817452488985336, + "grad_norm": 11.257967210341226, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 510438880, + "step": 2972 + }, + { + "epoch": 0.7817452488985336, + "loss": 0.08428631722927094, + "loss_ce": 0.0006986761000007391, + "loss_iou": 0.44921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 510438880, + "step": 2972 + }, + { + "epoch": 0.7820082856579207, + "grad_norm": 4.879983737803821, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 510610888, + "step": 2973 + }, + { + "epoch": 0.7820082856579207, + "loss": 0.061606645584106445, + "loss_ce": 0.0010292520746588707, + "loss_iou": 0.515625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 510610888, + "step": 2973 + }, + { + "epoch": 0.7822713224173078, + "grad_norm": 6.784338572567158, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 510780736, + "step": 2974 + }, + { + "epoch": 0.7822713224173078, + "loss": 0.09628412127494812, + "loss_ce": 0.0025646386202424765, + "loss_iou": 0.51171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 510780736, + "step": 2974 + }, + { + "epoch": 0.7825343591766949, + "grad_norm": 2.4966016721195827, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 510952740, + "step": 2975 + }, + { + "epoch": 0.7825343591766949, + "loss": 0.11784331500530243, + "loss_ce": 0.002517384709790349, + "loss_iou": 0.5390625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 510952740, + "step": 2975 + }, + { + "epoch": 0.7827973959360821, + "grad_norm": 9.752032559631301, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 511124984, + "step": 2976 + }, + { + "epoch": 0.7827973959360821, + "loss": 0.10057765245437622, + "loss_ce": 0.001853286987170577, + "loss_iou": 0.431640625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 511124984, + "step": 2976 + }, + { + "epoch": 0.7830604326954692, + "grad_norm": 4.047284415442861, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 511297176, + "step": 2977 + }, + { + "epoch": 0.7830604326954692, + "loss": 0.1439676731824875, + "loss_ce": 0.004929587244987488, + "loss_iou": 0.47265625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 511297176, + "step": 2977 + }, + { + "epoch": 0.7833234694548563, + "grad_norm": 4.111909259977495, + "learning_rate": 5e-06, + "loss": 0.121, + "num_input_tokens_seen": 511469216, + "step": 2978 + }, + { + "epoch": 0.7833234694548563, + "loss": 0.1689942479133606, + "loss_ce": 0.008380233310163021, + "loss_iou": 0.337890625, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 511469216, + "step": 2978 + }, + { + "epoch": 0.7835865062142434, + "grad_norm": 3.736811899980652, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 511641400, + "step": 2979 + }, + { + "epoch": 0.7835865062142434, + "loss": 0.09928886592388153, + "loss_ce": 0.0024871169589459896, + "loss_iou": 0.5546875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 511641400, + "step": 2979 + }, + { + "epoch": 0.7838495429736305, + "grad_norm": 3.8885212513343204, + "learning_rate": 5e-06, + "loss": 0.0719, + "num_input_tokens_seen": 511813644, + "step": 2980 + }, + { + "epoch": 0.7838495429736305, + "loss": 0.08123628050088882, + "loss_ce": 0.0019821308087557554, + "loss_iou": 0.57421875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 511813644, + "step": 2980 + }, + { + "epoch": 0.7841125797330177, + "grad_norm": 3.6947801269712643, + "learning_rate": 5e-06, + "loss": 0.1196, + "num_input_tokens_seen": 511985744, + "step": 2981 + }, + { + "epoch": 0.7841125797330177, + "loss": 0.07377283275127411, + "loss_ce": 0.0037197312340140343, + "loss_iou": 0.71875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 511985744, + "step": 2981 + }, + { + "epoch": 0.7843756164924048, + "grad_norm": 4.052383876786155, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 512157708, + "step": 2982 + }, + { + "epoch": 0.7843756164924048, + "loss": 0.18616530299186707, + "loss_ce": 0.00116774532943964, + "loss_iou": 0.59375, + "loss_num": 0.037109375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 512157708, + "step": 2982 + }, + { + "epoch": 0.7846386532517919, + "grad_norm": 3.4825813224277185, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 512330084, + "step": 2983 + }, + { + "epoch": 0.7846386532517919, + "loss": 0.1472734808921814, + "loss_ce": 0.0032915552146732807, + "loss_iou": 0.341796875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 512330084, + "step": 2983 + }, + { + "epoch": 0.784901690011179, + "grad_norm": 3.5815743222985947, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 512502076, + "step": 2984 + }, + { + "epoch": 0.784901690011179, + "loss": 0.10481996834278107, + "loss_ce": 0.0021588318049907684, + "loss_iou": 0.51171875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 512502076, + "step": 2984 + }, + { + "epoch": 0.7851647267705661, + "grad_norm": 7.905852445879054, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 512674288, + "step": 2985 + }, + { + "epoch": 0.7851647267705661, + "loss": 0.12316185235977173, + "loss_ce": 0.0030751884914934635, + "loss_iou": 0.5390625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 512674288, + "step": 2985 + }, + { + "epoch": 0.7854277635299534, + "grad_norm": 5.926087069922328, + "learning_rate": 5e-06, + "loss": 0.0863, + "num_input_tokens_seen": 512846624, + "step": 2986 + }, + { + "epoch": 0.7854277635299534, + "loss": 0.06602786481380463, + "loss_ce": 0.0011474882485345006, + "loss_iou": 0.58203125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 512846624, + "step": 2986 + }, + { + "epoch": 0.7856908002893405, + "grad_norm": 5.039950797702919, + "learning_rate": 5e-06, + "loss": 0.1472, + "num_input_tokens_seen": 513018668, + "step": 2987 + }, + { + "epoch": 0.7856908002893405, + "loss": 0.10930870473384857, + "loss_ce": 0.004847035743296146, + "loss_iou": 0.5078125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 513018668, + "step": 2987 + }, + { + "epoch": 0.7859538370487276, + "grad_norm": 4.823970003989147, + "learning_rate": 5e-06, + "loss": 0.1425, + "num_input_tokens_seen": 513190872, + "step": 2988 + }, + { + "epoch": 0.7859538370487276, + "loss": 0.15449807047843933, + "loss_ce": 0.0005368961137719452, + "loss_iou": 0.486328125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 513190872, + "step": 2988 + }, + { + "epoch": 0.7862168738081147, + "grad_norm": 11.156095172455718, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 513363216, + "step": 2989 + }, + { + "epoch": 0.7862168738081147, + "loss": 0.07052050530910492, + "loss_ce": 0.002100098878145218, + "loss_iou": 0.431640625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 513363216, + "step": 2989 + }, + { + "epoch": 0.7864799105675018, + "grad_norm": 6.545014028470699, + "learning_rate": 5e-06, + "loss": 0.1922, + "num_input_tokens_seen": 513532780, + "step": 2990 + }, + { + "epoch": 0.7864799105675018, + "loss": 0.20626530051231384, + "loss_ce": 0.004162629134953022, + "loss_iou": 0.515625, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 513532780, + "step": 2990 + }, + { + "epoch": 0.786742947326889, + "grad_norm": 5.675250290858523, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 513705312, + "step": 2991 + }, + { + "epoch": 0.786742947326889, + "loss": 0.10429085791110992, + "loss_ce": 0.002728358842432499, + "loss_iou": 0.53515625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 513705312, + "step": 2991 + }, + { + "epoch": 0.7870059840862761, + "grad_norm": 3.771208751532015, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 513877348, + "step": 2992 + }, + { + "epoch": 0.7870059840862761, + "loss": 0.09906225651502609, + "loss_ce": 0.0011008285218849778, + "loss_iou": 0.51171875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 513877348, + "step": 2992 + }, + { + "epoch": 0.7872690208456632, + "grad_norm": 8.702146962798412, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 514046516, + "step": 2993 + }, + { + "epoch": 0.7872690208456632, + "loss": 0.11204151809215546, + "loss_ce": 0.004131356719881296, + "loss_iou": 0.50390625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 514046516, + "step": 2993 + }, + { + "epoch": 0.7875320576050503, + "grad_norm": 6.0868763702009465, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 514218536, + "step": 2994 + }, + { + "epoch": 0.7875320576050503, + "loss": 0.15223419666290283, + "loss_ce": 0.0040408410131931305, + "loss_iou": 0.5390625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 514218536, + "step": 2994 + }, + { + "epoch": 0.7877950943644374, + "grad_norm": 4.672234975964948, + "learning_rate": 5e-06, + "loss": 0.0826, + "num_input_tokens_seen": 514391068, + "step": 2995 + }, + { + "epoch": 0.7877950943644374, + "loss": 0.05085252225399017, + "loss_ce": 0.0038707097992300987, + "loss_iou": 0.51171875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 514391068, + "step": 2995 + }, + { + "epoch": 0.7880581311238245, + "grad_norm": 6.661101757033409, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 514561472, + "step": 2996 + }, + { + "epoch": 0.7880581311238245, + "loss": 0.20267510414123535, + "loss_ce": 0.000404604768846184, + "loss_iou": 0.333984375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 514561472, + "step": 2996 + }, + { + "epoch": 0.7883211678832117, + "grad_norm": 9.694053172636844, + "learning_rate": 5e-06, + "loss": 0.148, + "num_input_tokens_seen": 514733680, + "step": 2997 + }, + { + "epoch": 0.7883211678832117, + "loss": 0.13084860146045685, + "loss_ce": 0.002278049010783434, + "loss_iou": 0.56640625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 514733680, + "step": 2997 + }, + { + "epoch": 0.7885842046425988, + "grad_norm": 69.63701857559735, + "learning_rate": 5e-06, + "loss": 0.1399, + "num_input_tokens_seen": 514905800, + "step": 2998 + }, + { + "epoch": 0.7885842046425988, + "loss": 0.13946260511875153, + "loss_ce": 0.004315503872931004, + "loss_iou": 0.48046875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 514905800, + "step": 2998 + }, + { + "epoch": 0.7888472414019859, + "grad_norm": 16.568333279972496, + "learning_rate": 5e-06, + "loss": 0.0888, + "num_input_tokens_seen": 515077960, + "step": 2999 + }, + { + "epoch": 0.7888472414019859, + "loss": 0.10318569839000702, + "loss_ce": 0.004888580180704594, + "loss_iou": 0.46875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 515077960, + "step": 2999 + }, + { + "epoch": 0.789110278161373, + "grad_norm": 5.577264845510526, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_websight_new_CIoU": 0.8789950311183929, + "eval_websight_new_GIoU": 0.881831705570221, + "eval_websight_new_IoU": 0.8831256628036499, + "eval_websight_new_MAE_all": 0.016948864795267582, + "eval_websight_new_MAE_h": 0.00989671004936099, + "eval_websight_new_MAE_w": 0.027918956242501736, + "eval_websight_new_MAE_x": 0.024959519505500793, + "eval_websight_new_MAE_y": 0.005020270356908441, + "eval_websight_new_NUM_probability": 0.9999921023845673, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.08870729804039001, + "eval_websight_new_loss_ce": 6.795420176786138e-06, + "eval_websight_new_loss_iou": 0.3958740234375, + "eval_websight_new_loss_num": 0.016002655029296875, + "eval_websight_new_loss_xval": 0.0800323486328125, + "eval_websight_new_runtime": 55.9259, + "eval_websight_new_samples_per_second": 0.894, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_seeclick_CIoU": 0.6246416866779327, + "eval_seeclick_GIoU": 0.6264897286891937, + "eval_seeclick_IoU": 0.650597870349884, + "eval_seeclick_MAE_all": 0.04822420887649059, + "eval_seeclick_MAE_h": 0.030418247915804386, + "eval_seeclick_MAE_w": 0.0654730387032032, + "eval_seeclick_MAE_x": 0.07548732310533524, + "eval_seeclick_MAE_y": 0.0215182239189744, + "eval_seeclick_NUM_probability": 0.9999599456787109, + "eval_seeclick_inside_bbox": 0.9375, + "eval_seeclick_loss": 0.2215959131717682, + "eval_seeclick_loss_ce": 0.008739723358303308, + "eval_seeclick_loss_iou": 0.4912109375, + "eval_seeclick_loss_num": 0.043231964111328125, + "eval_seeclick_loss_xval": 0.216217041015625, + "eval_seeclick_runtime": 91.1896, + "eval_seeclick_samples_per_second": 0.472, + "eval_seeclick_steps_per_second": 0.022, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_icons_CIoU": 0.8647165596485138, + "eval_icons_GIoU": 0.8629220426082611, + "eval_icons_IoU": 0.8704103231430054, + "eval_icons_MAE_all": 0.018641653936356306, + "eval_icons_MAE_h": 0.02465621568262577, + "eval_icons_MAE_w": 0.017738113179802895, + "eval_icons_MAE_x": 0.013989617582410574, + "eval_icons_MAE_y": 0.018182669766247272, + "eval_icons_NUM_probability": 0.999986469745636, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.06894619762897491, + "eval_icons_loss_ce": 9.26887514651753e-06, + "eval_icons_loss_iou": 0.6270751953125, + "eval_icons_loss_num": 0.012828826904296875, + "eval_icons_loss_xval": 0.0641326904296875, + "eval_icons_runtime": 101.3174, + "eval_icons_samples_per_second": 0.493, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_screenspot_CIoU": 0.5550089081128439, + "eval_screenspot_GIoU": 0.5516955653826395, + "eval_screenspot_IoU": 0.5956698457400004, + "eval_screenspot_MAE_all": 0.08370348066091537, + "eval_screenspot_MAE_h": 0.056737360854943596, + "eval_screenspot_MAE_w": 0.1454519679148992, + "eval_screenspot_MAE_x": 0.08442502965529759, + "eval_screenspot_MAE_y": 0.04819955242176851, + "eval_screenspot_NUM_probability": 0.9998689492543539, + "eval_screenspot_inside_bbox": 0.8395833373069763, + "eval_screenspot_loss": 0.9374791979789734, + "eval_screenspot_loss_ce": 0.5804212689399719, + "eval_screenspot_loss_iou": 0.5504557291666666, + "eval_screenspot_loss_num": 0.069854736328125, + "eval_screenspot_loss_xval": 0.3492228190104167, + "eval_screenspot_runtime": 149.2861, + "eval_screenspot_samples_per_second": 0.596, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "loss": 0.9090390205383301, + "loss_ce": 0.5727353096008301, + "loss_iou": 0.4609375, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.7893733149207601, + "grad_norm": 4.193864370890535, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 515420392, + "step": 3001 + }, + { + "epoch": 0.7893733149207601, + "loss": 0.12226281315088272, + "loss_ce": 0.0011690594255924225, + "loss_iou": 0.515625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 515420392, + "step": 3001 + }, + { + "epoch": 0.7896363516801473, + "grad_norm": 4.537019888829979, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 515590772, + "step": 3002 + }, + { + "epoch": 0.7896363516801473, + "loss": 0.14288941025733948, + "loss_ce": 0.0013793996768072248, + "loss_iou": 0.35546875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 515590772, + "step": 3002 + }, + { + "epoch": 0.7898993884395344, + "grad_norm": 7.837104532573747, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 515763064, + "step": 3003 + }, + { + "epoch": 0.7898993884395344, + "loss": 0.20384354889392853, + "loss_ce": 0.0005659655435010791, + "loss_iou": 0.443359375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 515763064, + "step": 3003 + }, + { + "epoch": 0.7901624251989215, + "grad_norm": 9.871988037860998, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 515935288, + "step": 3004 + }, + { + "epoch": 0.7901624251989215, + "loss": 0.1931522786617279, + "loss_ce": 0.004248465411365032, + "loss_iou": 0.33203125, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 515935288, + "step": 3004 + }, + { + "epoch": 0.7904254619583087, + "grad_norm": 6.705662570329979, + "learning_rate": 5e-06, + "loss": 0.1395, + "num_input_tokens_seen": 516107836, + "step": 3005 + }, + { + "epoch": 0.7904254619583087, + "loss": 0.24680155515670776, + "loss_ce": 0.0018979848828166723, + "loss_iou": 0.53125, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 516107836, + "step": 3005 + }, + { + "epoch": 0.7906884987176958, + "grad_norm": 3.468178023705032, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 516280260, + "step": 3006 + }, + { + "epoch": 0.7906884987176958, + "loss": 0.12856581807136536, + "loss_ce": 0.0021620113402605057, + "loss_iou": 0.478515625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 516280260, + "step": 3006 + }, + { + "epoch": 0.790951535477083, + "grad_norm": 2.516769311233636, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 516452472, + "step": 3007 + }, + { + "epoch": 0.790951535477083, + "loss": 0.03532446175813675, + "loss_ce": 7.665850716875866e-05, + "loss_iou": 0.404296875, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 516452472, + "step": 3007 + }, + { + "epoch": 0.7912145722364701, + "grad_norm": 2.8390538522305993, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 516624540, + "step": 3008 + }, + { + "epoch": 0.7912145722364701, + "loss": 0.10718082636594772, + "loss_ce": 0.0042908103205263615, + "loss_iou": 0.51953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 516624540, + "step": 3008 + }, + { + "epoch": 0.7914776089958572, + "grad_norm": 4.2650017166958545, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 516796808, + "step": 3009 + }, + { + "epoch": 0.7914776089958572, + "loss": 0.17158488929271698, + "loss_ce": 0.0024106951896101236, + "loss_iou": 0.392578125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 516796808, + "step": 3009 + }, + { + "epoch": 0.7917406457552443, + "grad_norm": 6.803158022173843, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 516969144, + "step": 3010 + }, + { + "epoch": 0.7917406457552443, + "loss": 0.12062282115221024, + "loss_ce": 0.0013601221144199371, + "loss_iou": 0.50390625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 516969144, + "step": 3010 + }, + { + "epoch": 0.7920036825146314, + "grad_norm": 5.268394185167123, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 517139672, + "step": 3011 + }, + { + "epoch": 0.7920036825146314, + "loss": 0.10529518872499466, + "loss_ce": 0.0033054398372769356, + "loss_iou": 0.330078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 517139672, + "step": 3011 + }, + { + "epoch": 0.7922667192740186, + "grad_norm": 4.897299268033889, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 517311812, + "step": 3012 + }, + { + "epoch": 0.7922667192740186, + "loss": 0.15520727634429932, + "loss_ce": 0.0010019636247307062, + "loss_iou": 0.47265625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 517311812, + "step": 3012 + }, + { + "epoch": 0.7925297560334057, + "grad_norm": 6.998508129858562, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 517483804, + "step": 3013 + }, + { + "epoch": 0.7925297560334057, + "loss": 0.14621217548847198, + "loss_ce": 0.004305441863834858, + "loss_iou": 0.60546875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 517483804, + "step": 3013 + }, + { + "epoch": 0.7927927927927928, + "grad_norm": 8.855328154069232, + "learning_rate": 5e-06, + "loss": 0.0784, + "num_input_tokens_seen": 517656016, + "step": 3014 + }, + { + "epoch": 0.7927927927927928, + "loss": 0.07204173505306244, + "loss_ce": 0.003957017324864864, + "loss_iou": 0.515625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 517656016, + "step": 3014 + }, + { + "epoch": 0.7930558295521799, + "grad_norm": 26.2413884381383, + "learning_rate": 5e-06, + "loss": 0.1451, + "num_input_tokens_seen": 517828272, + "step": 3015 + }, + { + "epoch": 0.7930558295521799, + "loss": 0.11117606610059738, + "loss_ce": 0.0008550205966457725, + "loss_iou": 0.455078125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 517828272, + "step": 3015 + }, + { + "epoch": 0.793318866311567, + "grad_norm": 4.148118560901186, + "learning_rate": 5e-06, + "loss": 0.1143, + "num_input_tokens_seen": 518000272, + "step": 3016 + }, + { + "epoch": 0.793318866311567, + "loss": 0.06484581530094147, + "loss_ce": 0.0005757926846854389, + "loss_iou": 0.3828125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 518000272, + "step": 3016 + }, + { + "epoch": 0.7935819030709542, + "grad_norm": 13.9691023081111, + "learning_rate": 5e-06, + "loss": 0.0913, + "num_input_tokens_seen": 518172412, + "step": 3017 + }, + { + "epoch": 0.7935819030709542, + "loss": 0.10305923223495483, + "loss_ce": 0.002427519764751196, + "loss_iou": 0.373046875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 518172412, + "step": 3017 + }, + { + "epoch": 0.7938449398303413, + "grad_norm": 4.54961216940048, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 518344668, + "step": 3018 + }, + { + "epoch": 0.7938449398303413, + "loss": 0.14651378989219666, + "loss_ce": 0.0014942658599466085, + "loss_iou": 0.53125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 518344668, + "step": 3018 + }, + { + "epoch": 0.7941079765897284, + "grad_norm": 4.200570019762456, + "learning_rate": 5e-06, + "loss": 0.107, + "num_input_tokens_seen": 518517120, + "step": 3019 + }, + { + "epoch": 0.7941079765897284, + "loss": 0.11832761764526367, + "loss_ce": 0.00037718465318903327, + "loss_iou": 0.46484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 518517120, + "step": 3019 + }, + { + "epoch": 0.7943710133491155, + "grad_norm": 7.5294315936950005, + "learning_rate": 5e-06, + "loss": 0.0876, + "num_input_tokens_seen": 518689296, + "step": 3020 + }, + { + "epoch": 0.7943710133491155, + "loss": 0.06614542007446289, + "loss_ce": 0.00021218777692411095, + "loss_iou": 0.427734375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 518689296, + "step": 3020 + }, + { + "epoch": 0.7946340501085026, + "grad_norm": 5.635502401126071, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 518859544, + "step": 3021 + }, + { + "epoch": 0.7946340501085026, + "loss": 0.12032654881477356, + "loss_ce": 0.0027118013240396976, + "loss_iou": 0.3359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 518859544, + "step": 3021 + }, + { + "epoch": 0.7948970868678897, + "grad_norm": 6.009060825525287, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 519031676, + "step": 3022 + }, + { + "epoch": 0.7948970868678897, + "loss": 0.1436431109905243, + "loss_ce": 0.0033232811838388443, + "loss_iou": 0.66015625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 519031676, + "step": 3022 + }, + { + "epoch": 0.795160123627277, + "grad_norm": 4.029495813443641, + "learning_rate": 5e-06, + "loss": 0.1666, + "num_input_tokens_seen": 519203700, + "step": 3023 + }, + { + "epoch": 0.795160123627277, + "loss": 0.15425805747509003, + "loss_ce": 0.0003884279867634177, + "loss_iou": 0.4453125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 519203700, + "step": 3023 + }, + { + "epoch": 0.795423160386664, + "grad_norm": 37.082876522501756, + "learning_rate": 5e-06, + "loss": 0.1101, + "num_input_tokens_seen": 519375616, + "step": 3024 + }, + { + "epoch": 0.795423160386664, + "loss": 0.1391124576330185, + "loss_ce": 0.00012015047832392156, + "loss_iou": 0.5625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 519375616, + "step": 3024 + }, + { + "epoch": 0.7956861971460512, + "grad_norm": 3.823112661234258, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 519547736, + "step": 3025 + }, + { + "epoch": 0.7956861971460512, + "loss": 0.09849868714809418, + "loss_ce": 0.0015290760202333331, + "loss_iou": 0.359375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 519547736, + "step": 3025 + }, + { + "epoch": 0.7959492339054383, + "grad_norm": 4.930287612034227, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 519720084, + "step": 3026 + }, + { + "epoch": 0.7959492339054383, + "loss": 0.1211109384894371, + "loss_ce": 0.005083112046122551, + "loss_iou": 0.6328125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 519720084, + "step": 3026 + }, + { + "epoch": 0.7962122706648254, + "grad_norm": 13.986605867255715, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 519892180, + "step": 3027 + }, + { + "epoch": 0.7962122706648254, + "loss": 0.16327086091041565, + "loss_ce": 0.0026263254694640636, + "loss_iou": 0.408203125, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 519892180, + "step": 3027 + }, + { + "epoch": 0.7964753074242126, + "grad_norm": 12.476632712407037, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 520064636, + "step": 3028 + }, + { + "epoch": 0.7964753074242126, + "loss": 0.11255937814712524, + "loss_ce": 0.000788743665907532, + "loss_iou": 0.4609375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 520064636, + "step": 3028 + }, + { + "epoch": 0.7967383441835997, + "grad_norm": 3.6081873902324326, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 520236756, + "step": 3029 + }, + { + "epoch": 0.7967383441835997, + "loss": 0.12160242348909378, + "loss_ce": 0.0018361852271482348, + "loss_iou": 0.31640625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 520236756, + "step": 3029 + }, + { + "epoch": 0.7970013809429868, + "grad_norm": 8.030386724109855, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 520407308, + "step": 3030 + }, + { + "epoch": 0.7970013809429868, + "loss": 0.18061389029026031, + "loss_ce": 0.0010331911034882069, + "loss_iou": 0.5625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 520407308, + "step": 3030 + }, + { + "epoch": 0.7972644177023739, + "grad_norm": 11.21273309596263, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 520579744, + "step": 3031 + }, + { + "epoch": 0.7972644177023739, + "loss": 0.082832470536232, + "loss_ce": 0.0002824235416483134, + "loss_iou": 0.515625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 520579744, + "step": 3031 + }, + { + "epoch": 0.797527454461761, + "grad_norm": 20.85679503542732, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 520752148, + "step": 3032 + }, + { + "epoch": 0.797527454461761, + "loss": 0.12713350355625153, + "loss_ce": 0.0008517719688825309, + "loss_iou": 0.416015625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 520752148, + "step": 3032 + }, + { + "epoch": 0.7977904912211482, + "grad_norm": 37.09104960564763, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 520924220, + "step": 3033 + }, + { + "epoch": 0.7977904912211482, + "loss": 0.10619133710861206, + "loss_ce": 0.0030724371317774057, + "loss_iou": 0.5234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 520924220, + "step": 3033 + }, + { + "epoch": 0.7980535279805353, + "grad_norm": 16.059073148748364, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 521093348, + "step": 3034 + }, + { + "epoch": 0.7980535279805353, + "loss": 0.06166623532772064, + "loss_ce": 0.0008294428698718548, + "loss_iou": 0.455078125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 521093348, + "step": 3034 + }, + { + "epoch": 0.7983165647399224, + "grad_norm": 8.368290368776519, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 521265688, + "step": 3035 + }, + { + "epoch": 0.7983165647399224, + "loss": 0.14087893068790436, + "loss_ce": 0.004129666369408369, + "loss_iou": 0.51171875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 521265688, + "step": 3035 + }, + { + "epoch": 0.7985796014993095, + "grad_norm": 25.178404936799044, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 521437496, + "step": 3036 + }, + { + "epoch": 0.7985796014993095, + "loss": 0.29297274351119995, + "loss_ce": 0.002842147834599018, + "loss_iou": 0.357421875, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 521437496, + "step": 3036 + }, + { + "epoch": 0.7988426382586966, + "grad_norm": 3.991516418389156, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 521609940, + "step": 3037 + }, + { + "epoch": 0.7988426382586966, + "loss": 0.09374190121889114, + "loss_ce": 0.0009379457915201783, + "loss_iou": 0.392578125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 521609940, + "step": 3037 + }, + { + "epoch": 0.7991056750180838, + "grad_norm": 4.808453824486521, + "learning_rate": 5e-06, + "loss": 0.1124, + "num_input_tokens_seen": 521781892, + "step": 3038 + }, + { + "epoch": 0.7991056750180838, + "loss": 0.07893365621566772, + "loss_ce": 0.001052800682373345, + "loss_iou": 0.5078125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 521781892, + "step": 3038 + }, + { + "epoch": 0.7993687117774709, + "grad_norm": 12.778560985993714, + "learning_rate": 5e-06, + "loss": 0.109, + "num_input_tokens_seen": 521954108, + "step": 3039 + }, + { + "epoch": 0.7993687117774709, + "loss": 0.12000415474176407, + "loss_ce": 0.0017790585989132524, + "loss_iou": 0.4765625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 521954108, + "step": 3039 + }, + { + "epoch": 0.799631748536858, + "grad_norm": 5.33906426825674, + "learning_rate": 5e-06, + "loss": 0.1944, + "num_input_tokens_seen": 522126236, + "step": 3040 + }, + { + "epoch": 0.799631748536858, + "loss": 0.2638484239578247, + "loss_ce": 0.00042068029870279133, + "loss_iou": 0.6015625, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 522126236, + "step": 3040 + }, + { + "epoch": 0.7998947852962451, + "grad_norm": 4.623536872405031, + "learning_rate": 5e-06, + "loss": 0.0883, + "num_input_tokens_seen": 522298380, + "step": 3041 + }, + { + "epoch": 0.7998947852962451, + "loss": 0.12680000066757202, + "loss_ce": 0.0012506938073784113, + "loss_iou": 0.337890625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 522298380, + "step": 3041 + }, + { + "epoch": 0.8001578220556322, + "grad_norm": 4.125445081698177, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 522470360, + "step": 3042 + }, + { + "epoch": 0.8001578220556322, + "loss": 0.15849211812019348, + "loss_ce": 0.0013113392051309347, + "loss_iou": 0.50390625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 522470360, + "step": 3042 + }, + { + "epoch": 0.8004208588150195, + "grad_norm": 11.789889533203668, + "learning_rate": 5e-06, + "loss": 0.0876, + "num_input_tokens_seen": 522642472, + "step": 3043 + }, + { + "epoch": 0.8004208588150195, + "loss": 0.09324462711811066, + "loss_ce": 0.0006237818161025643, + "loss_iou": 0.50390625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 522642472, + "step": 3043 + }, + { + "epoch": 0.8006838955744066, + "grad_norm": 4.414508918444819, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 522814808, + "step": 3044 + }, + { + "epoch": 0.8006838955744066, + "loss": 0.07435610890388489, + "loss_ce": 0.0009460713481530547, + "loss_iou": 0.474609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 522814808, + "step": 3044 + }, + { + "epoch": 0.8009469323337937, + "grad_norm": 6.1075510398524395, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 522986892, + "step": 3045 + }, + { + "epoch": 0.8009469323337937, + "loss": 0.08977600932121277, + "loss_ce": 0.0010614084312692285, + "loss_iou": 0.46484375, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 522986892, + "step": 3045 + }, + { + "epoch": 0.8012099690931808, + "grad_norm": 6.86235832016632, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 523159044, + "step": 3046 + }, + { + "epoch": 0.8012099690931808, + "loss": 0.10283628106117249, + "loss_ce": 0.003226900240406394, + "loss_iou": 0.578125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 523159044, + "step": 3046 + }, + { + "epoch": 0.8014730058525679, + "grad_norm": 8.620167546317841, + "learning_rate": 5e-06, + "loss": 0.1508, + "num_input_tokens_seen": 523331372, + "step": 3047 + }, + { + "epoch": 0.8014730058525679, + "loss": 0.17512959241867065, + "loss_ce": 0.0035597749520093203, + "loss_iou": 0.404296875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 523331372, + "step": 3047 + }, + { + "epoch": 0.801736042611955, + "grad_norm": 4.823577960221348, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 523504004, + "step": 3048 + }, + { + "epoch": 0.801736042611955, + "loss": 0.07766547054052353, + "loss_ce": 0.0020429138094186783, + "loss_iou": 0.44921875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 523504004, + "step": 3048 + }, + { + "epoch": 0.8019990793713422, + "grad_norm": 11.00975772697334, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 523675952, + "step": 3049 + }, + { + "epoch": 0.8019990793713422, + "loss": 0.11927802860736847, + "loss_ce": 0.001342848176136613, + "loss_iou": 0.359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 523675952, + "step": 3049 + }, + { + "epoch": 0.8022621161307293, + "grad_norm": 10.685552832243063, + "learning_rate": 5e-06, + "loss": 0.1029, + "num_input_tokens_seen": 523848152, + "step": 3050 + }, + { + "epoch": 0.8022621161307293, + "loss": 0.0827643871307373, + "loss_ce": 0.002106426050886512, + "loss_iou": 0.5859375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 523848152, + "step": 3050 + }, + { + "epoch": 0.8025251528901164, + "grad_norm": 24.17010500449297, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 524020708, + "step": 3051 + }, + { + "epoch": 0.8025251528901164, + "loss": 0.10048617422580719, + "loss_ce": 0.000510584854055196, + "loss_iou": 0.51953125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 524020708, + "step": 3051 + }, + { + "epoch": 0.8027881896495035, + "grad_norm": 4.992845761930844, + "learning_rate": 5e-06, + "loss": 0.1304, + "num_input_tokens_seen": 524191372, + "step": 3052 + }, + { + "epoch": 0.8027881896495035, + "loss": 0.15961629152297974, + "loss_ce": 0.006387531757354736, + "loss_iou": 0.4453125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 524191372, + "step": 3052 + }, + { + "epoch": 0.8030512264088906, + "grad_norm": 2.943381415031694, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 524363912, + "step": 3053 + }, + { + "epoch": 0.8030512264088906, + "loss": 0.15981021523475647, + "loss_ce": 0.0005695016006939113, + "loss_iou": 0.419921875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 524363912, + "step": 3053 + }, + { + "epoch": 0.8033142631682778, + "grad_norm": 5.648094984205903, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 524536316, + "step": 3054 + }, + { + "epoch": 0.8033142631682778, + "loss": 0.1740710288286209, + "loss_ce": 0.0022723155561834574, + "loss_iou": 0.52734375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 524536316, + "step": 3054 + }, + { + "epoch": 0.8035772999276649, + "grad_norm": 7.984973488378181, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 524708444, + "step": 3055 + }, + { + "epoch": 0.8035772999276649, + "loss": 0.07127489894628525, + "loss_ce": 0.000809812976513058, + "loss_iou": 0.5078125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 524708444, + "step": 3055 + }, + { + "epoch": 0.803840336687052, + "grad_norm": 6.031677657650838, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 524880960, + "step": 3056 + }, + { + "epoch": 0.803840336687052, + "loss": 0.0638066828250885, + "loss_ce": 0.0005437473300844431, + "loss_iou": 0.703125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 524880960, + "step": 3056 + }, + { + "epoch": 0.8041033734464391, + "grad_norm": 5.599082172393745, + "learning_rate": 5e-06, + "loss": 0.086, + "num_input_tokens_seen": 525053308, + "step": 3057 + }, + { + "epoch": 0.8041033734464391, + "loss": 0.07932358235120773, + "loss_ce": 0.0024650623090565205, + "loss_iou": 0.546875, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 525053308, + "step": 3057 + }, + { + "epoch": 0.8043664102058262, + "grad_norm": 28.17799645696428, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 525225432, + "step": 3058 + }, + { + "epoch": 0.8043664102058262, + "loss": 0.08576367795467377, + "loss_ce": 0.0013825736241415143, + "loss_iou": 0.578125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 525225432, + "step": 3058 + }, + { + "epoch": 0.8046294469652134, + "grad_norm": 3.4834198316912564, + "learning_rate": 5e-06, + "loss": 0.0788, + "num_input_tokens_seen": 525397804, + "step": 3059 + }, + { + "epoch": 0.8046294469652134, + "loss": 0.07873048633337021, + "loss_ce": 0.0016125671099871397, + "loss_iou": 0.345703125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 525397804, + "step": 3059 + }, + { + "epoch": 0.8048924837246005, + "grad_norm": 2.9860188986605367, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 525570124, + "step": 3060 + }, + { + "epoch": 0.8048924837246005, + "loss": 0.07787738740444183, + "loss_ce": 0.0008205035701394081, + "loss_iou": 0.62890625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 525570124, + "step": 3060 + }, + { + "epoch": 0.8051555204839876, + "grad_norm": 14.35918831416577, + "learning_rate": 5e-06, + "loss": 0.1217, + "num_input_tokens_seen": 525742708, + "step": 3061 + }, + { + "epoch": 0.8051555204839876, + "loss": 0.10341215878725052, + "loss_ce": 0.00123931048437953, + "loss_iou": 0.458984375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 525742708, + "step": 3061 + }, + { + "epoch": 0.8054185572433747, + "grad_norm": 16.381501371718237, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 525914908, + "step": 3062 + }, + { + "epoch": 0.8054185572433747, + "loss": 0.10784236341714859, + "loss_ce": 0.0002678967430256307, + "loss_iou": 0.462890625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 525914908, + "step": 3062 + }, + { + "epoch": 0.8056815940027618, + "grad_norm": 4.73914065847523, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 526087040, + "step": 3063 + }, + { + "epoch": 0.8056815940027618, + "loss": 0.06849893927574158, + "loss_ce": 0.000368443870684132, + "loss_iou": 0.455078125, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 526087040, + "step": 3063 + }, + { + "epoch": 0.8059446307621491, + "grad_norm": 8.562314005917766, + "learning_rate": 5e-06, + "loss": 0.0954, + "num_input_tokens_seen": 526259428, + "step": 3064 + }, + { + "epoch": 0.8059446307621491, + "loss": 0.07737226039171219, + "loss_ce": 0.0030009234324097633, + "loss_iou": 0.59375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 526259428, + "step": 3064 + }, + { + "epoch": 0.8062076675215362, + "grad_norm": 12.302010843258703, + "learning_rate": 5e-06, + "loss": 0.1505, + "num_input_tokens_seen": 526431568, + "step": 3065 + }, + { + "epoch": 0.8062076675215362, + "loss": 0.20694774389266968, + "loss_ce": 0.0032123818527907133, + "loss_iou": 0.51171875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 526431568, + "step": 3065 + }, + { + "epoch": 0.8064707042809233, + "grad_norm": 8.1243600151207, + "learning_rate": 5e-06, + "loss": 0.1291, + "num_input_tokens_seen": 526603728, + "step": 3066 + }, + { + "epoch": 0.8064707042809233, + "loss": 0.14467400312423706, + "loss_ce": 0.0004784482589457184, + "loss_iou": 0.59375, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 526603728, + "step": 3066 + }, + { + "epoch": 0.8067337410403104, + "grad_norm": 92.56005912402708, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 526775572, + "step": 3067 + }, + { + "epoch": 0.8067337410403104, + "loss": 0.08829745650291443, + "loss_ce": 0.0011697689769789577, + "loss_iou": 0.44140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 526775572, + "step": 3067 + }, + { + "epoch": 0.8069967777996975, + "grad_norm": 33.266184929246414, + "learning_rate": 5e-06, + "loss": 0.0732, + "num_input_tokens_seen": 526947576, + "step": 3068 + }, + { + "epoch": 0.8069967777996975, + "loss": 0.060733191668987274, + "loss_ce": 0.0012391710188239813, + "loss_iou": 0.59375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 526947576, + "step": 3068 + }, + { + "epoch": 0.8072598145590847, + "grad_norm": 5.97758373469706, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 527117908, + "step": 3069 + }, + { + "epoch": 0.8072598145590847, + "loss": 0.09366244077682495, + "loss_ce": 0.006046472117304802, + "loss_iou": 0.51953125, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 527117908, + "step": 3069 + }, + { + "epoch": 0.8075228513184718, + "grad_norm": 5.290039917786788, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 527287100, + "step": 3070 + }, + { + "epoch": 0.8075228513184718, + "loss": 0.10383239388465881, + "loss_ce": 0.00046935188584029675, + "loss_iou": 0.56640625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 527287100, + "step": 3070 + }, + { + "epoch": 0.8077858880778589, + "grad_norm": 4.7891570756526125, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 527458888, + "step": 3071 + }, + { + "epoch": 0.8077858880778589, + "loss": 0.10491342842578888, + "loss_ce": 0.0008484934223815799, + "loss_iou": 0.46484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 527458888, + "step": 3071 + }, + { + "epoch": 0.808048924837246, + "grad_norm": 11.230743830755795, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 527630920, + "step": 3072 + }, + { + "epoch": 0.808048924837246, + "loss": 0.06209864094853401, + "loss_ce": 0.0007583063561469316, + "loss_iou": 0.64453125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 527630920, + "step": 3072 + }, + { + "epoch": 0.8083119615966331, + "grad_norm": 5.040744603317089, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 527803168, + "step": 3073 + }, + { + "epoch": 0.8083119615966331, + "loss": 0.1508997231721878, + "loss_ce": 0.0011194492690265179, + "loss_iou": 0.53515625, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 527803168, + "step": 3073 + }, + { + "epoch": 0.8085749983560202, + "grad_norm": 8.63208434905108, + "learning_rate": 5e-06, + "loss": 0.0993, + "num_input_tokens_seen": 527975596, + "step": 3074 + }, + { + "epoch": 0.8085749983560202, + "loss": 0.11231046169996262, + "loss_ce": 0.00017361767822876573, + "loss_iou": 0.408203125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 527975596, + "step": 3074 + }, + { + "epoch": 0.8088380351154074, + "grad_norm": 4.381253880078918, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 528147980, + "step": 3075 + }, + { + "epoch": 0.8088380351154074, + "loss": 0.11533799767494202, + "loss_ce": 0.0029875326436012983, + "loss_iou": 0.275390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 528147980, + "step": 3075 + }, + { + "epoch": 0.8091010718747945, + "grad_norm": 8.161484057964593, + "learning_rate": 5e-06, + "loss": 0.1619, + "num_input_tokens_seen": 528320296, + "step": 3076 + }, + { + "epoch": 0.8091010718747945, + "loss": 0.16661548614501953, + "loss_ce": 0.0008439991506747901, + "loss_iou": 0.337890625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 528320296, + "step": 3076 + }, + { + "epoch": 0.8093641086341816, + "grad_norm": 4.683292048006889, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 528492424, + "step": 3077 + }, + { + "epoch": 0.8093641086341816, + "loss": 0.11115469038486481, + "loss_ce": 0.0029546155128628016, + "loss_iou": 0.51171875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 528492424, + "step": 3077 + }, + { + "epoch": 0.8096271453935687, + "grad_norm": 5.257960533659777, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 528664732, + "step": 3078 + }, + { + "epoch": 0.8096271453935687, + "loss": 0.21525058150291443, + "loss_ce": 0.001490199938416481, + "loss_iou": 0.435546875, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 528664732, + "step": 3078 + }, + { + "epoch": 0.8098901821529558, + "grad_norm": 12.566112829085096, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 528837012, + "step": 3079 + }, + { + "epoch": 0.8098901821529558, + "loss": 0.08169254660606384, + "loss_ce": 0.00021060870494693518, + "loss_iou": 0.45703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 528837012, + "step": 3079 + }, + { + "epoch": 0.810153218912343, + "grad_norm": 5.324934758940462, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 529009052, + "step": 3080 + }, + { + "epoch": 0.810153218912343, + "loss": 0.08878730237483978, + "loss_ce": 0.0001795147400116548, + "loss_iou": 0.5078125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 529009052, + "step": 3080 + }, + { + "epoch": 0.8104162556717301, + "grad_norm": 9.272723862546323, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 529181216, + "step": 3081 + }, + { + "epoch": 0.8104162556717301, + "loss": 0.0852864533662796, + "loss_ce": 0.0025990745052695274, + "loss_iou": 0.51171875, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 529181216, + "step": 3081 + }, + { + "epoch": 0.8106792924311172, + "grad_norm": 28.988099538140567, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 529351788, + "step": 3082 + }, + { + "epoch": 0.8106792924311172, + "loss": 0.05319926142692566, + "loss_ce": 0.0007700645364820957, + "loss_iou": 0.6796875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 529351788, + "step": 3082 + }, + { + "epoch": 0.8109423291905044, + "grad_norm": 5.871269884706642, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 529524200, + "step": 3083 + }, + { + "epoch": 0.8109423291905044, + "loss": 0.180719256401062, + "loss_ce": 0.004815942607820034, + "loss_iou": 0.5078125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 529524200, + "step": 3083 + }, + { + "epoch": 0.8112053659498915, + "grad_norm": 9.034864384864637, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 529696672, + "step": 3084 + }, + { + "epoch": 0.8112053659498915, + "loss": 0.039623767137527466, + "loss_ce": 0.0003476430138107389, + "loss_iou": 0.546875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 529696672, + "step": 3084 + }, + { + "epoch": 0.8114684027092787, + "grad_norm": 7.571992947700351, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 529868932, + "step": 3085 + }, + { + "epoch": 0.8114684027092787, + "loss": 0.15128442645072937, + "loss_ce": 0.002373900031670928, + "loss_iou": 0.375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 529868932, + "step": 3085 + }, + { + "epoch": 0.8117314394686658, + "grad_norm": 29.21584689056005, + "learning_rate": 5e-06, + "loss": 0.0879, + "num_input_tokens_seen": 530041080, + "step": 3086 + }, + { + "epoch": 0.8117314394686658, + "loss": 0.11466438323259354, + "loss_ce": 0.001291584805585444, + "loss_iou": 0.494140625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 530041080, + "step": 3086 + }, + { + "epoch": 0.8119944762280529, + "grad_norm": 8.627125392444821, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 530210668, + "step": 3087 + }, + { + "epoch": 0.8119944762280529, + "loss": 0.1240130364894867, + "loss_ce": 0.0024767834693193436, + "loss_iou": 0.39453125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 530210668, + "step": 3087 + }, + { + "epoch": 0.81225751298744, + "grad_norm": 11.153523319241675, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 530382776, + "step": 3088 + }, + { + "epoch": 0.81225751298744, + "loss": 0.07181555032730103, + "loss_ce": 0.00026709536905400455, + "loss_iou": NaN, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 530382776, + "step": 3088 + }, + { + "epoch": 0.8125205497468271, + "grad_norm": 10.380517960562548, + "learning_rate": 5e-06, + "loss": 0.1332, + "num_input_tokens_seen": 530555004, + "step": 3089 + }, + { + "epoch": 0.8125205497468271, + "loss": 0.16954563558101654, + "loss_ce": 0.0021872336510568857, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 530555004, + "step": 3089 + }, + { + "epoch": 0.8127835865062143, + "grad_norm": 5.4446542029328775, + "learning_rate": 5e-06, + "loss": 0.0898, + "num_input_tokens_seen": 530726976, + "step": 3090 + }, + { + "epoch": 0.8127835865062143, + "loss": 0.08060289919376373, + "loss_ce": 0.00040270722820423543, + "loss_iou": 0.40234375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 530726976, + "step": 3090 + }, + { + "epoch": 0.8130466232656014, + "grad_norm": 5.347357758568155, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 530898852, + "step": 3091 + }, + { + "epoch": 0.8130466232656014, + "loss": 0.08159644901752472, + "loss_ce": 0.0003891719679813832, + "loss_iou": 0.4140625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 530898852, + "step": 3091 + }, + { + "epoch": 0.8133096600249885, + "grad_norm": 4.89308048127463, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 531071100, + "step": 3092 + }, + { + "epoch": 0.8133096600249885, + "loss": 0.05772348493337631, + "loss_ce": 7.577867654617876e-05, + "loss_iou": 0.54296875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 531071100, + "step": 3092 + }, + { + "epoch": 0.8135726967843756, + "grad_norm": 5.314071866923476, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 531243088, + "step": 3093 + }, + { + "epoch": 0.8135726967843756, + "loss": 0.16711823642253876, + "loss_ce": 0.002140207216143608, + "loss_iou": 0.470703125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 531243088, + "step": 3093 + }, + { + "epoch": 0.8138357335437627, + "grad_norm": 4.018224451020325, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 531415364, + "step": 3094 + }, + { + "epoch": 0.8138357335437627, + "loss": 0.07948748767375946, + "loss_ce": 0.0013472279533743858, + "loss_iou": 0.431640625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 531415364, + "step": 3094 + }, + { + "epoch": 0.8140987703031498, + "grad_norm": 5.643939653563094, + "learning_rate": 5e-06, + "loss": 0.0894, + "num_input_tokens_seen": 531587860, + "step": 3095 + }, + { + "epoch": 0.8140987703031498, + "loss": 0.070250503718853, + "loss_ce": 0.00019740140123758465, + "loss_iou": 0.48828125, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 531587860, + "step": 3095 + }, + { + "epoch": 0.814361807062537, + "grad_norm": 6.298167200858131, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 531760160, + "step": 3096 + }, + { + "epoch": 0.814361807062537, + "loss": 0.13414643704891205, + "loss_ce": 0.0014254867564886808, + "loss_iou": 0.48046875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 531760160, + "step": 3096 + }, + { + "epoch": 0.8146248438219241, + "grad_norm": 4.559829968544296, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 531932728, + "step": 3097 + }, + { + "epoch": 0.8146248438219241, + "loss": 0.11818031221628189, + "loss_ce": 0.0006265999400056899, + "loss_iou": 0.44140625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 531932728, + "step": 3097 + }, + { + "epoch": 0.8148878805813112, + "grad_norm": 6.798137203947019, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 532104748, + "step": 3098 + }, + { + "epoch": 0.8148878805813112, + "loss": 0.13361144065856934, + "loss_ce": 0.0008294496219605207, + "loss_iou": 0.44921875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 532104748, + "step": 3098 + }, + { + "epoch": 0.8151509173406983, + "grad_norm": 4.492953640528966, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 532276872, + "step": 3099 + }, + { + "epoch": 0.8151509173406983, + "loss": 0.23120509088039398, + "loss_ce": 0.004703632555902004, + "loss_iou": 0.4296875, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 532276872, + "step": 3099 + }, + { + "epoch": 0.8154139541000854, + "grad_norm": 23.973104234846247, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 532449140, + "step": 3100 + }, + { + "epoch": 0.8154139541000854, + "loss": 0.2094377875328064, + "loss_ce": 0.005488819442689419, + "loss_iou": 0.384765625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 532449140, + "step": 3100 + }, + { + "epoch": 0.8156769908594727, + "grad_norm": 5.077265892021237, + "learning_rate": 5e-06, + "loss": 0.0818, + "num_input_tokens_seen": 532621452, + "step": 3101 + }, + { + "epoch": 0.8156769908594727, + "loss": 0.07252339273691177, + "loss_ce": 0.001692092278972268, + "loss_iou": 0.53125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 532621452, + "step": 3101 + }, + { + "epoch": 0.8159400276188598, + "grad_norm": 4.611749386992743, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 532793420, + "step": 3102 + }, + { + "epoch": 0.8159400276188598, + "loss": 0.10748874396085739, + "loss_ce": 0.0007687745383009315, + "loss_iou": 0.59375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 532793420, + "step": 3102 + }, + { + "epoch": 0.8162030643782469, + "grad_norm": 8.46788739165104, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 532965672, + "step": 3103 + }, + { + "epoch": 0.8162030643782469, + "loss": 0.0986635610461235, + "loss_ce": 0.006317365914583206, + "loss_iou": 0.359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 532965672, + "step": 3103 + }, + { + "epoch": 0.816466101137634, + "grad_norm": 5.21012803335228, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 533137620, + "step": 3104 + }, + { + "epoch": 0.816466101137634, + "loss": 0.1294005811214447, + "loss_ce": 0.0007232190691865981, + "loss_iou": NaN, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 533137620, + "step": 3104 + }, + { + "epoch": 0.8167291378970211, + "grad_norm": 4.29918218259194, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 533308040, + "step": 3105 + }, + { + "epoch": 0.8167291378970211, + "loss": 0.07954747974872589, + "loss_ce": 0.0007816128781996667, + "loss_iou": 0.466796875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 533308040, + "step": 3105 + }, + { + "epoch": 0.8169921746564083, + "grad_norm": 7.9379046963597775, + "learning_rate": 5e-06, + "loss": 0.0867, + "num_input_tokens_seen": 533480088, + "step": 3106 + }, + { + "epoch": 0.8169921746564083, + "loss": 0.055092211812734604, + "loss_ce": 0.002663013059645891, + "loss_iou": 0.578125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 533480088, + "step": 3106 + }, + { + "epoch": 0.8172552114157954, + "grad_norm": 5.798174841670742, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 533652304, + "step": 3107 + }, + { + "epoch": 0.8172552114157954, + "loss": 0.10001754760742188, + "loss_ce": 0.0004997201031073928, + "loss_iou": 0.4765625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 533652304, + "step": 3107 + }, + { + "epoch": 0.8175182481751825, + "grad_norm": 4.397743796531851, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 533824784, + "step": 3108 + }, + { + "epoch": 0.8175182481751825, + "loss": 0.04015748202800751, + "loss_ce": 0.001079726149328053, + "loss_iou": 0.376953125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 533824784, + "step": 3108 + }, + { + "epoch": 0.8177812849345696, + "grad_norm": 3.0634489465977937, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 533997192, + "step": 3109 + }, + { + "epoch": 0.8177812849345696, + "loss": 0.11612477153539658, + "loss_ce": 0.0004936738405376673, + "loss_iou": 0.5234375, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 533997192, + "step": 3109 + }, + { + "epoch": 0.8180443216939567, + "grad_norm": 11.903043667241318, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 534169324, + "step": 3110 + }, + { + "epoch": 0.8180443216939567, + "loss": 0.12225233018398285, + "loss_ce": 0.002577648963779211, + "loss_iou": 0.4609375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 534169324, + "step": 3110 + }, + { + "epoch": 0.8183073584533439, + "grad_norm": 11.232858169527498, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 534341512, + "step": 3111 + }, + { + "epoch": 0.8183073584533439, + "loss": 0.093436598777771, + "loss_ce": 0.00041902740485966206, + "loss_iou": 0.4140625, + "loss_num": 0.0186767578125, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 534341512, + "step": 3111 + }, + { + "epoch": 0.818570395212731, + "grad_norm": 4.531836934905563, + "learning_rate": 5e-06, + "loss": 0.0859, + "num_input_tokens_seen": 534511748, + "step": 3112 + }, + { + "epoch": 0.818570395212731, + "loss": 0.07575514912605286, + "loss_ce": 0.00020888875587843359, + "loss_iou": 0.515625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 534511748, + "step": 3112 + }, + { + "epoch": 0.8188334319721181, + "grad_norm": 7.470057833509574, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 534683872, + "step": 3113 + }, + { + "epoch": 0.8188334319721181, + "loss": 0.10066039860248566, + "loss_ce": 0.00047118880320340395, + "loss_iou": 0.466796875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 534683872, + "step": 3113 + }, + { + "epoch": 0.8190964687315052, + "grad_norm": 6.233336264317783, + "learning_rate": 5e-06, + "loss": 0.139, + "num_input_tokens_seen": 534855832, + "step": 3114 + }, + { + "epoch": 0.8190964687315052, + "loss": 0.09569014608860016, + "loss_ce": 0.0034660203382372856, + "loss_iou": 0.40625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 534855832, + "step": 3114 + }, + { + "epoch": 0.8193595054908923, + "grad_norm": 5.930104072036699, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 535027896, + "step": 3115 + }, + { + "epoch": 0.8193595054908923, + "loss": 0.13678929209709167, + "loss_ce": 0.001047111232765019, + "loss_iou": 0.42578125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 535027896, + "step": 3115 + }, + { + "epoch": 0.8196225422502795, + "grad_norm": 12.505587123966333, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 535200112, + "step": 3116 + }, + { + "epoch": 0.8196225422502795, + "loss": 0.0822412520647049, + "loss_ce": 0.0008356067701242864, + "loss_iou": 0.5078125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 535200112, + "step": 3116 + }, + { + "epoch": 0.8198855790096666, + "grad_norm": 4.231730639769997, + "learning_rate": 5e-06, + "loss": 0.1087, + "num_input_tokens_seen": 535372284, + "step": 3117 + }, + { + "epoch": 0.8198855790096666, + "loss": 0.12019523978233337, + "loss_ce": 0.00018486013868823647, + "loss_iou": 0.5234375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 535372284, + "step": 3117 + }, + { + "epoch": 0.8201486157690537, + "grad_norm": 7.438830292525926, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 535544868, + "step": 3118 + }, + { + "epoch": 0.8201486157690537, + "loss": 0.16251003742218018, + "loss_ce": 0.0003091081453021616, + "loss_iou": 0.45703125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 535544868, + "step": 3118 + }, + { + "epoch": 0.8204116525284408, + "grad_norm": 4.777332996643992, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 535717156, + "step": 3119 + }, + { + "epoch": 0.8204116525284408, + "loss": 0.16800367832183838, + "loss_ce": 0.00139296252746135, + "loss_iou": 0.43359375, + "loss_num": 0.033203125, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 535717156, + "step": 3119 + }, + { + "epoch": 0.8206746892878279, + "grad_norm": 9.684696947025552, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 535887380, + "step": 3120 + }, + { + "epoch": 0.8206746892878279, + "loss": 0.0679091364145279, + "loss_ce": 0.0011366696562618017, + "loss_iou": 0.5390625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 535887380, + "step": 3120 + }, + { + "epoch": 0.820937726047215, + "grad_norm": 5.315834673220869, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 536059444, + "step": 3121 + }, + { + "epoch": 0.820937726047215, + "loss": 0.04389767348766327, + "loss_ce": 0.00013546722766477615, + "loss_iou": 0.4296875, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 536059444, + "step": 3121 + }, + { + "epoch": 0.8212007628066023, + "grad_norm": 4.263271795831078, + "learning_rate": 5e-06, + "loss": 0.1419, + "num_input_tokens_seen": 536231404, + "step": 3122 + }, + { + "epoch": 0.8212007628066023, + "loss": 0.15159711241722107, + "loss_ce": 0.004105648957192898, + "loss_iou": 0.498046875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 536231404, + "step": 3122 + }, + { + "epoch": 0.8214637995659894, + "grad_norm": 7.367303112338102, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 536403668, + "step": 3123 + }, + { + "epoch": 0.8214637995659894, + "loss": 0.1084074005484581, + "loss_ce": 0.0006498372531495988, + "loss_iou": 0.58203125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 536403668, + "step": 3123 + }, + { + "epoch": 0.8217268363253765, + "grad_norm": 6.857717580081254, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 536575892, + "step": 3124 + }, + { + "epoch": 0.8217268363253765, + "loss": 0.0787474736571312, + "loss_ce": 0.00030203917413018644, + "loss_iou": 0.58203125, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 536575892, + "step": 3124 + }, + { + "epoch": 0.8219898730847636, + "grad_norm": 4.826419943692509, + "learning_rate": 5e-06, + "loss": 0.0802, + "num_input_tokens_seen": 536745964, + "step": 3125 + }, + { + "epoch": 0.8219898730847636, + "loss": 0.06057834252715111, + "loss_ce": 7.724385795881972e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 536745964, + "step": 3125 + }, + { + "epoch": 0.8222529098441507, + "grad_norm": 8.883739420539388, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 536918180, + "step": 3126 + }, + { + "epoch": 0.8222529098441507, + "loss": 0.11514750123023987, + "loss_ce": 0.0007676149252802134, + "loss_iou": 0.416015625, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 536918180, + "step": 3126 + }, + { + "epoch": 0.8225159466035379, + "grad_norm": 4.391986789320263, + "learning_rate": 5e-06, + "loss": 0.1256, + "num_input_tokens_seen": 537090196, + "step": 3127 + }, + { + "epoch": 0.8225159466035379, + "loss": 0.13754448294639587, + "loss_ce": 0.0006731519242748618, + "loss_iou": 0.51171875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 537090196, + "step": 3127 + }, + { + "epoch": 0.822778983362925, + "grad_norm": 7.64555343727262, + "learning_rate": 5e-06, + "loss": 0.0728, + "num_input_tokens_seen": 537259572, + "step": 3128 + }, + { + "epoch": 0.822778983362925, + "loss": 0.05659133195877075, + "loss_ce": 5.7516066590324044e-05, + "loss_iou": 0.56640625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 537259572, + "step": 3128 + }, + { + "epoch": 0.8230420201223121, + "grad_norm": 4.3032702337998225, + "learning_rate": 5e-06, + "loss": 0.0678, + "num_input_tokens_seen": 537431812, + "step": 3129 + }, + { + "epoch": 0.8230420201223121, + "loss": 0.07997481524944305, + "loss_ce": 0.0033604400232434273, + "loss_iou": 0.5390625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 537431812, + "step": 3129 + }, + { + "epoch": 0.8233050568816992, + "grad_norm": 11.32084254562734, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 537602468, + "step": 3130 + }, + { + "epoch": 0.8233050568816992, + "loss": 0.12599240243434906, + "loss_ce": 0.0030370799358934164, + "loss_iou": 0.54296875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 537602468, + "step": 3130 + }, + { + "epoch": 0.8235680936410863, + "grad_norm": 7.355242955773187, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 537774448, + "step": 3131 + }, + { + "epoch": 0.8235680936410863, + "loss": 0.18665780127048492, + "loss_ce": 0.0003784986911341548, + "loss_iou": 0.4453125, + "loss_num": 0.037109375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 537774448, + "step": 3131 + }, + { + "epoch": 0.8238311304004735, + "grad_norm": 11.757519786843565, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 537946740, + "step": 3132 + }, + { + "epoch": 0.8238311304004735, + "loss": 0.06623389571905136, + "loss_ce": 0.0015976695576682687, + "loss_iou": 0.359375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 537946740, + "step": 3132 + }, + { + "epoch": 0.8240941671598606, + "grad_norm": 17.264629874858777, + "learning_rate": 5e-06, + "loss": 0.1361, + "num_input_tokens_seen": 538118940, + "step": 3133 + }, + { + "epoch": 0.8240941671598606, + "loss": 0.09394903481006622, + "loss_ce": 0.0025488929823040962, + "loss_iou": 0.51953125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 538118940, + "step": 3133 + }, + { + "epoch": 0.8243572039192477, + "grad_norm": 12.736031512367697, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 538291488, + "step": 3134 + }, + { + "epoch": 0.8243572039192477, + "loss": 0.12837865948677063, + "loss_ce": 0.0019290748750790954, + "loss_iou": 0.5078125, + "loss_num": 0.025390625, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 538291488, + "step": 3134 + }, + { + "epoch": 0.8246202406786348, + "grad_norm": 4.032167194049116, + "learning_rate": 5e-06, + "loss": 0.0797, + "num_input_tokens_seen": 538461628, + "step": 3135 + }, + { + "epoch": 0.8246202406786348, + "loss": 0.09231233596801758, + "loss_ce": 0.0006070120725780725, + "loss_iou": 0.4921875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 538461628, + "step": 3135 + }, + { + "epoch": 0.8248832774380219, + "grad_norm": 3.914309781520812, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 538633772, + "step": 3136 + }, + { + "epoch": 0.8248832774380219, + "loss": 0.07093091309070587, + "loss_ce": 0.00011487161100376397, + "loss_iou": 0.58984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 538633772, + "step": 3136 + }, + { + "epoch": 0.8251463141974091, + "grad_norm": 3.7923535168173763, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 538805732, + "step": 3137 + }, + { + "epoch": 0.8251463141974091, + "loss": 0.1175466924905777, + "loss_ce": 0.0006643689121119678, + "loss_iou": 0.462890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 538805732, + "step": 3137 + }, + { + "epoch": 0.8254093509567962, + "grad_norm": 10.05128274905235, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 538977916, + "step": 3138 + }, + { + "epoch": 0.8254093509567962, + "loss": 0.11163683980703354, + "loss_ce": 0.003146846778690815, + "loss_iou": 0.52734375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 538977916, + "step": 3138 + }, + { + "epoch": 0.8256723877161833, + "grad_norm": 12.00226096912509, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 539150104, + "step": 3139 + }, + { + "epoch": 0.8256723877161833, + "loss": 0.14204728603363037, + "loss_ce": 0.0005983082228340209, + "loss_iou": 0.439453125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 539150104, + "step": 3139 + }, + { + "epoch": 0.8259354244755704, + "grad_norm": 10.327954218233959, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 539322280, + "step": 3140 + }, + { + "epoch": 0.8259354244755704, + "loss": 0.1829340159893036, + "loss_ce": 0.004543509799987078, + "loss_iou": 0.453125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 539322280, + "step": 3140 + }, + { + "epoch": 0.8261984612349575, + "grad_norm": 3.552655492096545, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 539494448, + "step": 3141 + }, + { + "epoch": 0.8261984612349575, + "loss": 0.12512250244617462, + "loss_ce": 0.003601514268666506, + "loss_iou": 0.50390625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 539494448, + "step": 3141 + }, + { + "epoch": 0.8264614979943448, + "grad_norm": 4.0634516483762795, + "learning_rate": 5e-06, + "loss": 0.0828, + "num_input_tokens_seen": 539667072, + "step": 3142 + }, + { + "epoch": 0.8264614979943448, + "loss": 0.1312233954668045, + "loss_ce": 0.0005166015471331775, + "loss_iou": 0.47265625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 539667072, + "step": 3142 + }, + { + "epoch": 0.8267245347537319, + "grad_norm": 12.104159402624534, + "learning_rate": 5e-06, + "loss": 0.1399, + "num_input_tokens_seen": 539839460, + "step": 3143 + }, + { + "epoch": 0.8267245347537319, + "loss": 0.2201562076807022, + "loss_ce": 0.002535369014367461, + "loss_iou": 0.34765625, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 539839460, + "step": 3143 + }, + { + "epoch": 0.826987571513119, + "grad_norm": 5.4839208249491245, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 540011628, + "step": 3144 + }, + { + "epoch": 0.826987571513119, + "loss": 0.1272164285182953, + "loss_ce": 0.0027352366596460342, + "loss_iou": 0.474609375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 540011628, + "step": 3144 + }, + { + "epoch": 0.8272506082725061, + "grad_norm": 4.535384525117057, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 540183608, + "step": 3145 + }, + { + "epoch": 0.8272506082725061, + "loss": 0.0952780544757843, + "loss_ce": 0.002458844566717744, + "loss_iou": 0.3671875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 540183608, + "step": 3145 + }, + { + "epoch": 0.8275136450318932, + "grad_norm": 4.264720708193596, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 540355724, + "step": 3146 + }, + { + "epoch": 0.8275136450318932, + "loss": 0.05311005562543869, + "loss_ce": 0.0009097411530092359, + "loss_iou": 0.51953125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 540355724, + "step": 3146 + }, + { + "epoch": 0.8277766817912803, + "grad_norm": 6.253031178644263, + "learning_rate": 5e-06, + "loss": 0.1647, + "num_input_tokens_seen": 540526424, + "step": 3147 + }, + { + "epoch": 0.8277766817912803, + "loss": 0.1789083182811737, + "loss_ce": 0.0011586775071918964, + "loss_iou": 0.2734375, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 540526424, + "step": 3147 + }, + { + "epoch": 0.8280397185506675, + "grad_norm": 4.716126774756702, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 540698736, + "step": 3148 + }, + { + "epoch": 0.8280397185506675, + "loss": 0.053756728768348694, + "loss_ce": 0.0007171769393607974, + "loss_iou": 0.4453125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 540698736, + "step": 3148 + }, + { + "epoch": 0.8283027553100546, + "grad_norm": 15.946573913857675, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 540870876, + "step": 3149 + }, + { + "epoch": 0.8283027553100546, + "loss": 0.11821180582046509, + "loss_ce": 0.001833026995882392, + "loss_iou": 0.44140625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 540870876, + "step": 3149 + }, + { + "epoch": 0.8285657920694417, + "grad_norm": 3.1443507951345406, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 541041144, + "step": 3150 + }, + { + "epoch": 0.8285657920694417, + "loss": 0.13489758968353271, + "loss_ce": 0.004984267987310886, + "loss_iou": 0.494140625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 541041144, + "step": 3150 + }, + { + "epoch": 0.8288288288288288, + "grad_norm": 4.1136952366915605, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 541213632, + "step": 3151 + }, + { + "epoch": 0.8288288288288288, + "loss": 0.10275200009346008, + "loss_ce": 0.0014183830935508013, + "loss_iou": 0.4765625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 541213632, + "step": 3151 + }, + { + "epoch": 0.8290918655882159, + "grad_norm": 5.711362515317036, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 541384104, + "step": 3152 + }, + { + "epoch": 0.8290918655882159, + "loss": 0.08564618229866028, + "loss_ce": 0.0014176733093336225, + "loss_iou": 0.66796875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 541384104, + "step": 3152 + }, + { + "epoch": 0.8293549023476031, + "grad_norm": 7.096186366509263, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 541556240, + "step": 3153 + }, + { + "epoch": 0.8293549023476031, + "loss": 0.17091956734657288, + "loss_ce": 0.0015012390213087201, + "loss_iou": 0.421875, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 541556240, + "step": 3153 + }, + { + "epoch": 0.8296179391069902, + "grad_norm": 4.29702608695741, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 541728356, + "step": 3154 + }, + { + "epoch": 0.8296179391069902, + "loss": 0.10837417840957642, + "loss_ce": 0.0006623809458687901, + "loss_iou": 0.51171875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 541728356, + "step": 3154 + }, + { + "epoch": 0.8298809758663773, + "grad_norm": 3.729285456687102, + "learning_rate": 5e-06, + "loss": 0.0659, + "num_input_tokens_seen": 541898716, + "step": 3155 + }, + { + "epoch": 0.8298809758663773, + "loss": 0.062001317739486694, + "loss_ce": 0.0011492683552205563, + "loss_iou": 0.50390625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 541898716, + "step": 3155 + }, + { + "epoch": 0.8301440126257644, + "grad_norm": 6.543104796714217, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 542070820, + "step": 3156 + }, + { + "epoch": 0.8301440126257644, + "loss": 0.09034896641969681, + "loss_ce": 0.0022752326913177967, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 542070820, + "step": 3156 + }, + { + "epoch": 0.8304070493851515, + "grad_norm": 3.814660695434685, + "learning_rate": 5e-06, + "loss": 0.0838, + "num_input_tokens_seen": 542242936, + "step": 3157 + }, + { + "epoch": 0.8304070493851515, + "loss": 0.05618397891521454, + "loss_ce": 0.0004894005251117051, + "loss_iou": 0.412109375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 542242936, + "step": 3157 + }, + { + "epoch": 0.8306700861445387, + "grad_norm": 13.94223132380543, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 542415376, + "step": 3158 + }, + { + "epoch": 0.8306700861445387, + "loss": 0.09673337638378143, + "loss_ce": 0.0017016411293298006, + "loss_iou": 0.5703125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 542415376, + "step": 3158 + }, + { + "epoch": 0.8309331229039258, + "grad_norm": 46.47454058969395, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 542587700, + "step": 3159 + }, + { + "epoch": 0.8309331229039258, + "loss": 0.13622400164604187, + "loss_ce": 0.00045128766214475036, + "loss_iou": 0.5703125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 542587700, + "step": 3159 + }, + { + "epoch": 0.831196159663313, + "grad_norm": 7.76157996882612, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 542758148, + "step": 3160 + }, + { + "epoch": 0.831196159663313, + "loss": 0.11631490290164948, + "loss_ce": 0.004040728323161602, + "loss_iou": 0.42578125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 542758148, + "step": 3160 + }, + { + "epoch": 0.8314591964227, + "grad_norm": 20.483608511309008, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 542930836, + "step": 3161 + }, + { + "epoch": 0.8314591964227, + "loss": 0.06239602342247963, + "loss_ce": 0.00014016299974173307, + "loss_iou": 0.55078125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 542930836, + "step": 3161 + }, + { + "epoch": 0.8317222331820872, + "grad_norm": 4.545805831684384, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 543102800, + "step": 3162 + }, + { + "epoch": 0.8317222331820872, + "loss": 0.12887202203273773, + "loss_ce": 0.0006066488567739725, + "loss_iou": 0.6015625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 543102800, + "step": 3162 + }, + { + "epoch": 0.8319852699414744, + "grad_norm": 6.107575063227538, + "learning_rate": 5e-06, + "loss": 0.1, + "num_input_tokens_seen": 543274984, + "step": 3163 + }, + { + "epoch": 0.8319852699414744, + "loss": 0.08694491535425186, + "loss_ce": 0.00036654339055530727, + "loss_iou": 0.498046875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 543274984, + "step": 3163 + }, + { + "epoch": 0.8322483067008615, + "grad_norm": 5.535589838242773, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 543447000, + "step": 3164 + }, + { + "epoch": 0.8322483067008615, + "loss": 0.12083543837070465, + "loss_ce": 0.006333490367978811, + "loss_iou": 0.6796875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 543447000, + "step": 3164 + }, + { + "epoch": 0.8325113434602486, + "grad_norm": 5.113582833614297, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 543619120, + "step": 3165 + }, + { + "epoch": 0.8325113434602486, + "loss": 0.1699899584054947, + "loss_ce": 0.002250079531222582, + "loss_iou": 0.51953125, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 543619120, + "step": 3165 + }, + { + "epoch": 0.8327743802196357, + "grad_norm": 5.479089729021323, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 543791308, + "step": 3166 + }, + { + "epoch": 0.8327743802196357, + "loss": 0.08266595751047134, + "loss_ce": 0.00042108428897336125, + "loss_iou": 0.5625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 543791308, + "step": 3166 + }, + { + "epoch": 0.8330374169790228, + "grad_norm": 4.786501058802981, + "learning_rate": 5e-06, + "loss": 0.0917, + "num_input_tokens_seen": 543958632, + "step": 3167 + }, + { + "epoch": 0.8330374169790228, + "loss": 0.07515060901641846, + "loss_ce": 0.0012675554025918245, + "loss_iou": 0.5703125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 543958632, + "step": 3167 + }, + { + "epoch": 0.83330045373841, + "grad_norm": 8.594141920355682, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 544130848, + "step": 3168 + }, + { + "epoch": 0.83330045373841, + "loss": 0.09625629335641861, + "loss_ce": 0.0025978446938097477, + "loss_iou": 0.51171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 544130848, + "step": 3168 + }, + { + "epoch": 0.8335634904977971, + "grad_norm": 3.7461805856215458, + "learning_rate": 5e-06, + "loss": 0.1468, + "num_input_tokens_seen": 544302820, + "step": 3169 + }, + { + "epoch": 0.8335634904977971, + "loss": 0.09222942590713501, + "loss_ce": 0.0022025699727237225, + "loss_iou": 0.375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 544302820, + "step": 3169 + }, + { + "epoch": 0.8338265272571842, + "grad_norm": 9.92373943246775, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 544474904, + "step": 3170 + }, + { + "epoch": 0.8338265272571842, + "loss": 0.14077287912368774, + "loss_ce": 0.0030775703489780426, + "loss_iou": 0.482421875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 544474904, + "step": 3170 + }, + { + "epoch": 0.8340895640165713, + "grad_norm": 14.235058865758765, + "learning_rate": 5e-06, + "loss": 0.0805, + "num_input_tokens_seen": 544647036, + "step": 3171 + }, + { + "epoch": 0.8340895640165713, + "loss": 0.060213349759578705, + "loss_ce": 0.0022452091798186302, + "loss_iou": 0.4609375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 544647036, + "step": 3171 + }, + { + "epoch": 0.8343526007759584, + "grad_norm": 4.48891217014488, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 544817552, + "step": 3172 + }, + { + "epoch": 0.8343526007759584, + "loss": 0.1579454094171524, + "loss_ce": 0.0024888694752007723, + "loss_iou": 0.482421875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 544817552, + "step": 3172 + }, + { + "epoch": 0.8346156375353455, + "grad_norm": 3.708111692136732, + "learning_rate": 5e-06, + "loss": 0.1094, + "num_input_tokens_seen": 544989748, + "step": 3173 + }, + { + "epoch": 0.8346156375353455, + "loss": 0.14915120601654053, + "loss_ce": 0.0004848288372159004, + "loss_iou": 0.34375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 544989748, + "step": 3173 + }, + { + "epoch": 0.8348786742947327, + "grad_norm": 6.783609066408303, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 545161788, + "step": 3174 + }, + { + "epoch": 0.8348786742947327, + "loss": 0.13735045492649078, + "loss_ce": 0.003683460643514991, + "loss_iou": 0.54296875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 545161788, + "step": 3174 + }, + { + "epoch": 0.8351417110541198, + "grad_norm": 4.8955030087080775, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 545333884, + "step": 3175 + }, + { + "epoch": 0.8351417110541198, + "loss": 0.08379638940095901, + "loss_ce": 0.0006665037362836301, + "loss_iou": 0.51171875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 545333884, + "step": 3175 + }, + { + "epoch": 0.8354047478135069, + "grad_norm": 4.39565885197116, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 545506244, + "step": 3176 + }, + { + "epoch": 0.8354047478135069, + "loss": 0.12928782403469086, + "loss_ce": 0.000915632932446897, + "loss_iou": 0.46875, + "loss_num": 0.0257568359375, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 545506244, + "step": 3176 + }, + { + "epoch": 0.835667784572894, + "grad_norm": 6.2304214245063605, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 545678348, + "step": 3177 + }, + { + "epoch": 0.835667784572894, + "loss": 0.07726402580738068, + "loss_ce": 0.000832755584269762, + "loss_iou": 0.44921875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 545678348, + "step": 3177 + }, + { + "epoch": 0.8359308213322811, + "grad_norm": 4.950850483708038, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 545848136, + "step": 3178 + }, + { + "epoch": 0.8359308213322811, + "loss": 0.12010537087917328, + "loss_ce": 0.0009342365083284676, + "loss_iou": 0.62890625, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 545848136, + "step": 3178 + }, + { + "epoch": 0.8361938580916684, + "grad_norm": 4.252976007842196, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 546020692, + "step": 3179 + }, + { + "epoch": 0.8361938580916684, + "loss": 0.12632903456687927, + "loss_ce": 0.00018462821026332676, + "loss_iou": 0.486328125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 546020692, + "step": 3179 + }, + { + "epoch": 0.8364568948510555, + "grad_norm": 16.684582759145172, + "learning_rate": 5e-06, + "loss": 0.073, + "num_input_tokens_seen": 546192760, + "step": 3180 + }, + { + "epoch": 0.8364568948510555, + "loss": 0.09913001954555511, + "loss_ce": 0.0009549736278131604, + "loss_iou": 0.49609375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 546192760, + "step": 3180 + }, + { + "epoch": 0.8367199316104426, + "grad_norm": 5.017020175241404, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 546363064, + "step": 3181 + }, + { + "epoch": 0.8367199316104426, + "loss": 0.1710553914308548, + "loss_ce": 0.00040109228575602174, + "loss_iou": NaN, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 546363064, + "step": 3181 + }, + { + "epoch": 0.8369829683698297, + "grad_norm": 17.167559763411923, + "learning_rate": 5e-06, + "loss": 0.1342, + "num_input_tokens_seen": 546535208, + "step": 3182 + }, + { + "epoch": 0.8369829683698297, + "loss": 0.15842683613300323, + "loss_ce": 0.0015664853854104877, + "loss_iou": 0.443359375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 546535208, + "step": 3182 + }, + { + "epoch": 0.8372460051292168, + "grad_norm": 3.9098429704759723, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 546707288, + "step": 3183 + }, + { + "epoch": 0.8372460051292168, + "loss": 0.08958999812602997, + "loss_ce": 0.00014297313464339823, + "loss_iou": 0.5234375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 546707288, + "step": 3183 + }, + { + "epoch": 0.837509041888604, + "grad_norm": 5.336892331569848, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 546879532, + "step": 3184 + }, + { + "epoch": 0.837509041888604, + "loss": 0.07368629425764084, + "loss_ce": 0.004197763279080391, + "loss_iou": 0.51953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 546879532, + "step": 3184 + }, + { + "epoch": 0.8377720786479911, + "grad_norm": 3.9486138565288265, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 547051612, + "step": 3185 + }, + { + "epoch": 0.8377720786479911, + "loss": 0.15984772145748138, + "loss_ce": 0.0009426883771084249, + "loss_iou": 0.45703125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 547051612, + "step": 3185 + }, + { + "epoch": 0.8380351154073782, + "grad_norm": 6.643098269759205, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 547223768, + "step": 3186 + }, + { + "epoch": 0.8380351154073782, + "loss": 0.1492346227169037, + "loss_ce": 0.0017126407474279404, + "loss_iou": 0.40625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 547223768, + "step": 3186 + }, + { + "epoch": 0.8382981521667653, + "grad_norm": 30.69973998174967, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 547395788, + "step": 3187 + }, + { + "epoch": 0.8382981521667653, + "loss": 0.13615994155406952, + "loss_ce": 0.00041775350109674037, + "loss_iou": 0.490234375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 547395788, + "step": 3187 + }, + { + "epoch": 0.8385611889261524, + "grad_norm": 35.72025172038926, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 547568000, + "step": 3188 + }, + { + "epoch": 0.8385611889261524, + "loss": 0.04795503616333008, + "loss_ce": 0.0002560606808401644, + "loss_iou": 0.5703125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 547568000, + "step": 3188 + }, + { + "epoch": 0.8388242256855396, + "grad_norm": 7.18874848401809, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 547740064, + "step": 3189 + }, + { + "epoch": 0.8388242256855396, + "loss": 0.12481731176376343, + "loss_ce": 0.0005802565719932318, + "loss_iou": 0.40625, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 547740064, + "step": 3189 + }, + { + "epoch": 0.8390872624449267, + "grad_norm": 6.155592453443207, + "learning_rate": 5e-06, + "loss": 0.0875, + "num_input_tokens_seen": 547912300, + "step": 3190 + }, + { + "epoch": 0.8390872624449267, + "loss": 0.08178332448005676, + "loss_ce": 0.0008201911114156246, + "loss_iou": 0.470703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 547912300, + "step": 3190 + }, + { + "epoch": 0.8393502992043138, + "grad_norm": 12.72945010384761, + "learning_rate": 5e-06, + "loss": 0.139, + "num_input_tokens_seen": 548084548, + "step": 3191 + }, + { + "epoch": 0.8393502992043138, + "loss": 0.19623327255249023, + "loss_ce": 0.002812865423038602, + "loss_iou": 0.4296875, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 548084548, + "step": 3191 + }, + { + "epoch": 0.8396133359637009, + "grad_norm": 5.353324449071839, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 548255176, + "step": 3192 + }, + { + "epoch": 0.8396133359637009, + "loss": 0.12437019497156143, + "loss_ce": 0.0003467575879767537, + "loss_iou": 0.4921875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 548255176, + "step": 3192 + }, + { + "epoch": 0.839876372723088, + "grad_norm": 4.347588948765136, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 548427488, + "step": 3193 + }, + { + "epoch": 0.839876372723088, + "loss": 0.08202692121267319, + "loss_ce": 0.0022234548814594746, + "loss_iou": 0.53515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 548427488, + "step": 3193 + }, + { + "epoch": 0.8401394094824752, + "grad_norm": 5.210446302172759, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 548599828, + "step": 3194 + }, + { + "epoch": 0.8401394094824752, + "loss": 0.12387488037347794, + "loss_ce": 0.00018713258032221347, + "loss_iou": 0.59765625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 548599828, + "step": 3194 + }, + { + "epoch": 0.8404024462418623, + "grad_norm": 3.9642883700177514, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 548771932, + "step": 3195 + }, + { + "epoch": 0.8404024462418623, + "loss": 0.12389804422855377, + "loss_ce": 0.00396396778523922, + "loss_iou": 0.490234375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 548771932, + "step": 3195 + }, + { + "epoch": 0.8406654830012494, + "grad_norm": 4.444986430362012, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 548944332, + "step": 3196 + }, + { + "epoch": 0.8406654830012494, + "loss": 0.07641720026731491, + "loss_ce": 0.0017254289705306292, + "loss_iou": 0.578125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 548944332, + "step": 3196 + }, + { + "epoch": 0.8409285197606365, + "grad_norm": 17.152304646652222, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 549116460, + "step": 3197 + }, + { + "epoch": 0.8409285197606365, + "loss": 0.07439431548118591, + "loss_ce": 0.0016404138877987862, + "loss_iou": 0.435546875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 549116460, + "step": 3197 + }, + { + "epoch": 0.8411915565200236, + "grad_norm": 6.835966777853977, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 549286920, + "step": 3198 + }, + { + "epoch": 0.8411915565200236, + "loss": 0.23670437932014465, + "loss_ce": 0.0033669895492494106, + "loss_iou": 0.4609375, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 549286920, + "step": 3198 + }, + { + "epoch": 0.8414545932794107, + "grad_norm": 3.334370051059133, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 549458932, + "step": 3199 + }, + { + "epoch": 0.8414545932794107, + "loss": 0.10936430096626282, + "loss_ce": 0.001118454267270863, + "loss_iou": 0.47265625, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 549458932, + "step": 3199 + }, + { + "epoch": 0.841717630038798, + "grad_norm": 3.3887650622610863, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 549630896, + "step": 3200 + }, + { + "epoch": 0.841717630038798, + "loss": 0.1543048769235611, + "loss_ce": 0.00280036055482924, + "loss_iou": 0.451171875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 549630896, + "step": 3200 + }, + { + "epoch": 0.8419806667981851, + "grad_norm": 8.90667441822763, + "learning_rate": 5e-06, + "loss": 0.1431, + "num_input_tokens_seen": 549802980, + "step": 3201 + }, + { + "epoch": 0.8419806667981851, + "loss": 0.10668284446001053, + "loss_ce": 0.0037165414541959763, + "loss_iou": 0.322265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 549802980, + "step": 3201 + }, + { + "epoch": 0.8422437035575722, + "grad_norm": 4.581013999909635, + "learning_rate": 5e-06, + "loss": 0.0768, + "num_input_tokens_seen": 549973288, + "step": 3202 + }, + { + "epoch": 0.8422437035575722, + "loss": 0.05071475729346275, + "loss_ce": 0.0011084338184446096, + "loss_iou": 0.578125, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 549973288, + "step": 3202 + }, + { + "epoch": 0.8425067403169593, + "grad_norm": 7.123665679875533, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 550143700, + "step": 3203 + }, + { + "epoch": 0.8425067403169593, + "loss": 0.1053222045302391, + "loss_ce": 0.0030272852163761854, + "loss_iou": 0.404296875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 550143700, + "step": 3203 + }, + { + "epoch": 0.8427697770763464, + "grad_norm": 14.86581378718403, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 550315752, + "step": 3204 + }, + { + "epoch": 0.8427697770763464, + "loss": 0.08674832433462143, + "loss_ce": 0.000688750937115401, + "loss_iou": NaN, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 550315752, + "step": 3204 + }, + { + "epoch": 0.8430328138357336, + "grad_norm": 15.15255979041917, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 550487764, + "step": 3205 + }, + { + "epoch": 0.8430328138357336, + "loss": 0.11826883256435394, + "loss_ce": 0.001401771791279316, + "loss_iou": 0.47265625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 550487764, + "step": 3205 + }, + { + "epoch": 0.8432958505951207, + "grad_norm": 5.341317771181548, + "learning_rate": 5e-06, + "loss": 0.1085, + "num_input_tokens_seen": 550659696, + "step": 3206 + }, + { + "epoch": 0.8432958505951207, + "loss": 0.11918849498033524, + "loss_ce": 0.0002920094411820173, + "loss_iou": 0.5859375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 550659696, + "step": 3206 + }, + { + "epoch": 0.8435588873545078, + "grad_norm": 5.395123389791678, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 550831904, + "step": 3207 + }, + { + "epoch": 0.8435588873545078, + "loss": 0.07498294115066528, + "loss_ce": 0.001038852147758007, + "loss_iou": 0.33984375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 550831904, + "step": 3207 + }, + { + "epoch": 0.8438219241138949, + "grad_norm": 3.643785530851578, + "learning_rate": 5e-06, + "loss": 0.1128, + "num_input_tokens_seen": 551004156, + "step": 3208 + }, + { + "epoch": 0.8438219241138949, + "loss": 0.06928001344203949, + "loss_ce": 0.004369123373180628, + "loss_iou": 0.494140625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 551004156, + "step": 3208 + }, + { + "epoch": 0.844084960873282, + "grad_norm": 5.391792901404867, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 551176320, + "step": 3209 + }, + { + "epoch": 0.844084960873282, + "loss": 0.0887857973575592, + "loss_ce": 0.0004984364495612681, + "loss_iou": 0.376953125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 551176320, + "step": 3209 + }, + { + "epoch": 0.8443479976326692, + "grad_norm": 7.538916165469519, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 551348168, + "step": 3210 + }, + { + "epoch": 0.8443479976326692, + "loss": 0.16849397122859955, + "loss_ce": 0.0030734348110854626, + "loss_iou": 0.419921875, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 551348168, + "step": 3210 + }, + { + "epoch": 0.8446110343920563, + "grad_norm": 7.685615482077233, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 551520728, + "step": 3211 + }, + { + "epoch": 0.8446110343920563, + "loss": 0.0783834308385849, + "loss_ce": 0.001479133265092969, + "loss_iou": 0.5390625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 551520728, + "step": 3211 + }, + { + "epoch": 0.8448740711514434, + "grad_norm": 8.632575689436024, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 551692804, + "step": 3212 + }, + { + "epoch": 0.8448740711514434, + "loss": 0.15737096965312958, + "loss_ce": 0.0006174240261316299, + "loss_iou": 0.5234375, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 551692804, + "step": 3212 + }, + { + "epoch": 0.8451371079108305, + "grad_norm": 12.457163123139722, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 551864880, + "step": 3213 + }, + { + "epoch": 0.8451371079108305, + "loss": 0.07481381297111511, + "loss_ce": 0.003173800650984049, + "loss_iou": 0.44921875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 551864880, + "step": 3213 + }, + { + "epoch": 0.8454001446702176, + "grad_norm": 3.8683898896692166, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 552036812, + "step": 3214 + }, + { + "epoch": 0.8454001446702176, + "loss": 0.05259804055094719, + "loss_ce": 0.0005503093125298619, + "loss_iou": 0.51171875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 552036812, + "step": 3214 + }, + { + "epoch": 0.8456631814296048, + "grad_norm": 4.539951161935164, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 552208856, + "step": 3215 + }, + { + "epoch": 0.8456631814296048, + "loss": 0.07421931624412537, + "loss_ce": 0.0011754983570426702, + "loss_iou": 0.478515625, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 552208856, + "step": 3215 + }, + { + "epoch": 0.8459262181889919, + "grad_norm": 10.573780213794782, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 552380904, + "step": 3216 + }, + { + "epoch": 0.8459262181889919, + "loss": 0.06666961312294006, + "loss_ce": 0.001804507803171873, + "loss_iou": 0.484375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 552380904, + "step": 3216 + }, + { + "epoch": 0.846189254948379, + "grad_norm": 4.275878567324078, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 552551132, + "step": 3217 + }, + { + "epoch": 0.846189254948379, + "loss": 0.09229104220867157, + "loss_ce": 0.0006162393838167191, + "loss_iou": 0.4921875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 552551132, + "step": 3217 + }, + { + "epoch": 0.8464522917077661, + "grad_norm": 4.658023344353002, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 552722824, + "step": 3218 + }, + { + "epoch": 0.8464522917077661, + "loss": 0.07020144164562225, + "loss_ce": 0.0020709503442049026, + "loss_iou": 0.49609375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 552722824, + "step": 3218 + }, + { + "epoch": 0.8467153284671532, + "grad_norm": 6.1002348516078095, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 552894948, + "step": 3219 + }, + { + "epoch": 0.8467153284671532, + "loss": 0.10183661431074142, + "loss_ce": 0.0007318751304410398, + "loss_iou": 0.5703125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 552894948, + "step": 3219 + }, + { + "epoch": 0.8469783652265405, + "grad_norm": 3.6957881460841646, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 553065492, + "step": 3220 + }, + { + "epoch": 0.8469783652265405, + "loss": 0.20681847631931305, + "loss_ce": 0.005616086535155773, + "loss_iou": 0.4609375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 553065492, + "step": 3220 + }, + { + "epoch": 0.8472414019859276, + "grad_norm": 31.1352481206466, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 553237384, + "step": 3221 + }, + { + "epoch": 0.8472414019859276, + "loss": 0.0710492879152298, + "loss_ce": 0.0004316139966249466, + "loss_iou": 0.42578125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 553237384, + "step": 3221 + }, + { + "epoch": 0.8475044387453147, + "grad_norm": 15.822969391935775, + "learning_rate": 5e-06, + "loss": 0.1572, + "num_input_tokens_seen": 553409788, + "step": 3222 + }, + { + "epoch": 0.8475044387453147, + "loss": 0.24513369798660278, + "loss_ce": 0.001847569365054369, + "loss_iou": NaN, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 553409788, + "step": 3222 + }, + { + "epoch": 0.8477674755047018, + "grad_norm": 4.903880792540357, + "learning_rate": 5e-06, + "loss": 0.082, + "num_input_tokens_seen": 553581532, + "step": 3223 + }, + { + "epoch": 0.8477674755047018, + "loss": 0.0573626384139061, + "loss_ce": 0.0015459894202649593, + "loss_iou": NaN, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 553581532, + "step": 3223 + }, + { + "epoch": 0.8480305122640889, + "grad_norm": 11.193676890263411, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 553753504, + "step": 3224 + }, + { + "epoch": 0.8480305122640889, + "loss": 0.12998200953006744, + "loss_ce": 0.0016861144686117768, + "loss_iou": 0.515625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 553753504, + "step": 3224 + }, + { + "epoch": 0.848293549023476, + "grad_norm": 6.892798676870787, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 553925232, + "step": 3225 + }, + { + "epoch": 0.848293549023476, + "loss": 0.06351655721664429, + "loss_ce": 0.0010470744455233216, + "loss_iou": 0.33203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 553925232, + "step": 3225 + }, + { + "epoch": 0.8485565857828632, + "grad_norm": 3.94134933091549, + "learning_rate": 5e-06, + "loss": 0.0882, + "num_input_tokens_seen": 554097380, + "step": 3226 + }, + { + "epoch": 0.8485565857828632, + "loss": 0.06772696226835251, + "loss_ce": 0.00040518559399060905, + "loss_iou": 0.5, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 554097380, + "step": 3226 + }, + { + "epoch": 0.8488196225422503, + "grad_norm": 4.553319445567292, + "learning_rate": 5e-06, + "loss": 0.1467, + "num_input_tokens_seen": 554269324, + "step": 3227 + }, + { + "epoch": 0.8488196225422503, + "loss": 0.06106797605752945, + "loss_ce": 0.00015489489305764437, + "loss_iou": 0.50390625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 554269324, + "step": 3227 + }, + { + "epoch": 0.8490826593016374, + "grad_norm": 8.122842269719726, + "learning_rate": 5e-06, + "loss": 0.0758, + "num_input_tokens_seen": 554441420, + "step": 3228 + }, + { + "epoch": 0.8490826593016374, + "loss": 0.07994222640991211, + "loss_ce": 0.0005202332977205515, + "loss_iou": 0.5078125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 554441420, + "step": 3228 + }, + { + "epoch": 0.8493456960610245, + "grad_norm": 4.946244650717768, + "learning_rate": 5e-06, + "loss": 0.0891, + "num_input_tokens_seen": 554613348, + "step": 3229 + }, + { + "epoch": 0.8493456960610245, + "loss": 0.07239595800638199, + "loss_ce": 0.0015646612737327814, + "loss_iou": 0.4921875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 554613348, + "step": 3229 + }, + { + "epoch": 0.8496087328204116, + "grad_norm": 10.327431498521296, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 554785544, + "step": 3230 + }, + { + "epoch": 0.8496087328204116, + "loss": 0.1082451343536377, + "loss_ce": 0.001616714522242546, + "loss_iou": 0.56640625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 554785544, + "step": 3230 + }, + { + "epoch": 0.8498717695797988, + "grad_norm": 6.2381116819087605, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 554957700, + "step": 3231 + }, + { + "epoch": 0.8498717695797988, + "loss": 0.05288837477564812, + "loss_ce": 0.0004286564071662724, + "loss_iou": 0.41015625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 554957700, + "step": 3231 + }, + { + "epoch": 0.8501348063391859, + "grad_norm": 5.665331437954715, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 555128400, + "step": 3232 + }, + { + "epoch": 0.8501348063391859, + "loss": 0.17892731726169586, + "loss_ce": 0.008456122130155563, + "loss_iou": 0.47265625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 555128400, + "step": 3232 + }, + { + "epoch": 0.850397843098573, + "grad_norm": 4.02543856443553, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 555300712, + "step": 3233 + }, + { + "epoch": 0.850397843098573, + "loss": 0.1149306371808052, + "loss_ce": 0.0023512912448495626, + "loss_iou": 0.51171875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 555300712, + "step": 3233 + }, + { + "epoch": 0.8506608798579601, + "grad_norm": 4.049090978454337, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 555472756, + "step": 3234 + }, + { + "epoch": 0.8506608798579601, + "loss": 0.14269746840000153, + "loss_ce": 0.0037509393878281116, + "loss_iou": 0.56640625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 555472756, + "step": 3234 + }, + { + "epoch": 0.8509239166173472, + "grad_norm": 4.24693988860905, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 555644856, + "step": 3235 + }, + { + "epoch": 0.8509239166173472, + "loss": 0.06741193681955338, + "loss_ce": 0.0012955997372046113, + "loss_iou": 0.478515625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 555644856, + "step": 3235 + }, + { + "epoch": 0.8511869533767344, + "grad_norm": 4.223553863328503, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 555817488, + "step": 3236 + }, + { + "epoch": 0.8511869533767344, + "loss": 0.09499558061361313, + "loss_ce": 0.0007572993636131287, + "loss_iou": 0.5078125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 555817488, + "step": 3236 + }, + { + "epoch": 0.8514499901361215, + "grad_norm": 8.070861336660833, + "learning_rate": 5e-06, + "loss": 0.1101, + "num_input_tokens_seen": 555989844, + "step": 3237 + }, + { + "epoch": 0.8514499901361215, + "loss": 0.09148094058036804, + "loss_ce": 0.0017287411028519273, + "loss_iou": 0.515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 555989844, + "step": 3237 + }, + { + "epoch": 0.8517130268955087, + "grad_norm": 4.7628320746905235, + "learning_rate": 5e-06, + "loss": 0.13, + "num_input_tokens_seen": 556162144, + "step": 3238 + }, + { + "epoch": 0.8517130268955087, + "loss": 0.08755885809659958, + "loss_ce": 0.001667136326432228, + "loss_iou": 0.421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 556162144, + "step": 3238 + }, + { + "epoch": 0.8519760636548958, + "grad_norm": 8.201240707199219, + "learning_rate": 5e-06, + "loss": 0.068, + "num_input_tokens_seen": 556334020, + "step": 3239 + }, + { + "epoch": 0.8519760636548958, + "loss": 0.06216670200228691, + "loss_ce": 0.001772413495928049, + "loss_iou": 0.404296875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 556334020, + "step": 3239 + }, + { + "epoch": 0.8522391004142829, + "grad_norm": 5.1836633623539266, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 556502704, + "step": 3240 + }, + { + "epoch": 0.8522391004142829, + "loss": 0.09547331184148788, + "loss_ce": 0.0012350315228104591, + "loss_iou": 0.333984375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 556502704, + "step": 3240 + }, + { + "epoch": 0.8525021371736701, + "grad_norm": 4.1790455766130234, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 556674848, + "step": 3241 + }, + { + "epoch": 0.8525021371736701, + "loss": 0.15765714645385742, + "loss_ce": 0.00018642976647242904, + "loss_iou": 0.353515625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 556674848, + "step": 3241 + }, + { + "epoch": 0.8527651739330572, + "grad_norm": 32.42085523655999, + "learning_rate": 5e-06, + "loss": 0.0973, + "num_input_tokens_seen": 556847120, + "step": 3242 + }, + { + "epoch": 0.8527651739330572, + "loss": 0.10435596853494644, + "loss_ce": 0.0030986424535512924, + "loss_iou": 0.48046875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 556847120, + "step": 3242 + }, + { + "epoch": 0.8530282106924443, + "grad_norm": 3.085990680432512, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 557015380, + "step": 3243 + }, + { + "epoch": 0.8530282106924443, + "loss": 0.12935911118984222, + "loss_ce": 0.0042217751033604145, + "loss_iou": 0.3515625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 557015380, + "step": 3243 + }, + { + "epoch": 0.8532912474518314, + "grad_norm": 5.554407683422717, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 557186020, + "step": 3244 + }, + { + "epoch": 0.8532912474518314, + "loss": 0.12462737411260605, + "loss_ce": 0.002557065337896347, + "loss_iou": 0.5234375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 557186020, + "step": 3244 + }, + { + "epoch": 0.8535542842112185, + "grad_norm": 77.39805630943022, + "learning_rate": 5e-06, + "loss": 0.0876, + "num_input_tokens_seen": 557358056, + "step": 3245 + }, + { + "epoch": 0.8535542842112185, + "loss": 0.06247711926698685, + "loss_ce": 0.0029983618296682835, + "loss_iou": 0.5390625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 557358056, + "step": 3245 + }, + { + "epoch": 0.8538173209706057, + "grad_norm": 6.568132581838013, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 557530164, + "step": 3246 + }, + { + "epoch": 0.8538173209706057, + "loss": 0.10226649791002274, + "loss_ce": 0.0013143508695065975, + "loss_iou": 0.48828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 557530164, + "step": 3246 + }, + { + "epoch": 0.8540803577299928, + "grad_norm": 8.945021217700889, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 557702352, + "step": 3247 + }, + { + "epoch": 0.8540803577299928, + "loss": 0.14076803624629974, + "loss_ce": 0.002218232722952962, + "loss_iou": 0.40625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 557702352, + "step": 3247 + }, + { + "epoch": 0.8543433944893799, + "grad_norm": 5.970819800467794, + "learning_rate": 5e-06, + "loss": 0.113, + "num_input_tokens_seen": 557874756, + "step": 3248 + }, + { + "epoch": 0.8543433944893799, + "loss": 0.12330596148967743, + "loss_ce": 0.002425831276923418, + "loss_iou": 0.470703125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 557874756, + "step": 3248 + }, + { + "epoch": 0.854606431248767, + "grad_norm": 12.952896902366254, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 558045276, + "step": 3249 + }, + { + "epoch": 0.854606431248767, + "loss": 0.15818831324577332, + "loss_ce": 0.0018772899638861418, + "loss_iou": 0.443359375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 558045276, + "step": 3249 + }, + { + "epoch": 0.8548694680081541, + "grad_norm": 10.077522334681213, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_websight_new_CIoU": 0.8994152843952179, + "eval_websight_new_GIoU": 0.9002452492713928, + "eval_websight_new_IoU": 0.9029170572757721, + "eval_websight_new_MAE_all": 0.014043471310287714, + "eval_websight_new_MAE_h": 0.007522843778133392, + "eval_websight_new_MAE_w": 0.020659465342760086, + "eval_websight_new_MAE_x": 0.021266265772283077, + "eval_websight_new_MAE_y": 0.006725311512127519, + "eval_websight_new_NUM_probability": 0.9999923408031464, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.07195720076560974, + "eval_websight_new_loss_ce": 4.601216005539754e-06, + "eval_websight_new_loss_iou": 0.31280517578125, + "eval_websight_new_loss_num": 0.012767791748046875, + "eval_websight_new_loss_xval": 0.06391143798828125, + "eval_websight_new_runtime": 56.9054, + "eval_websight_new_samples_per_second": 0.879, + "eval_websight_new_steps_per_second": 0.035, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_seeclick_CIoU": 0.6642794907093048, + "eval_seeclick_GIoU": 0.6613934338092804, + "eval_seeclick_IoU": 0.6828196048736572, + "eval_seeclick_MAE_all": 0.04043097607791424, + "eval_seeclick_MAE_h": 0.023812726140022278, + "eval_seeclick_MAE_w": 0.055972687900066376, + "eval_seeclick_MAE_x": 0.05744660459458828, + "eval_seeclick_MAE_y": 0.02449188195168972, + "eval_seeclick_NUM_probability": 0.999969094991684, + "eval_seeclick_inside_bbox": 0.9375, + "eval_seeclick_loss": 0.1842024326324463, + "eval_seeclick_loss_ce": 0.008998175617307425, + "eval_seeclick_loss_iou": 0.4769287109375, + "eval_seeclick_loss_num": 0.0336456298828125, + "eval_seeclick_loss_xval": 0.1681365966796875, + "eval_seeclick_runtime": 76.1272, + "eval_seeclick_samples_per_second": 0.565, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_icons_CIoU": 0.8727431297302246, + "eval_icons_GIoU": 0.8711867332458496, + "eval_icons_IoU": 0.8781991004943848, + "eval_icons_MAE_all": 0.017583131790161133, + "eval_icons_MAE_h": 0.019472193904221058, + "eval_icons_MAE_w": 0.015529958996921778, + "eval_icons_MAE_x": 0.015652839560061693, + "eval_icons_MAE_y": 0.019677532836794853, + "eval_icons_NUM_probability": 0.9999885261058807, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.06516695767641068, + "eval_icons_loss_ce": 9.223055712936912e-06, + "eval_icons_loss_iou": 0.504638671875, + "eval_icons_loss_num": 0.012132644653320312, + "eval_icons_loss_xval": 0.0607147216796875, + "eval_icons_runtime": 94.9537, + "eval_icons_samples_per_second": 0.527, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_screenspot_CIoU": 0.6023598512013754, + "eval_screenspot_GIoU": 0.5998436013857523, + "eval_screenspot_IoU": 0.6310188174247742, + "eval_screenspot_MAE_all": 0.06860506162047386, + "eval_screenspot_MAE_h": 0.04429138886431853, + "eval_screenspot_MAE_w": 0.11814649154742558, + "eval_screenspot_MAE_x": 0.06877896686395009, + "eval_screenspot_MAE_y": 0.043203407898545265, + "eval_screenspot_NUM_probability": 0.9998593727747599, + "eval_screenspot_inside_bbox": 0.8841666579246521, + "eval_screenspot_loss": 0.8844305276870728, + "eval_screenspot_loss_ce": 0.5899222294489542, + "eval_screenspot_loss_iou": 0.4657796223958333, + "eval_screenspot_loss_num": 0.057614644368489586, + "eval_screenspot_loss_xval": 0.2879842122395833, + "eval_screenspot_runtime": 153.6916, + "eval_screenspot_samples_per_second": 0.579, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "loss": 0.8671462535858154, + "loss_ce": 0.5839431285858154, + "loss_iou": 0.404296875, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8551325047675412, + "grad_norm": 38.29206628356214, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 558389192, + "step": 3251 + }, + { + "epoch": 0.8551325047675412, + "loss": 0.21791958808898926, + "loss_ce": 0.0010311761870980263, + "loss_iou": 0.5859375, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 558389192, + "step": 3251 + }, + { + "epoch": 0.8553955415269284, + "grad_norm": 7.6502620366688525, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 558561204, + "step": 3252 + }, + { + "epoch": 0.8553955415269284, + "loss": 0.16455963253974915, + "loss_ce": 0.0034573215525597334, + "loss_iou": 0.376953125, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 558561204, + "step": 3252 + }, + { + "epoch": 0.8556585782863155, + "grad_norm": 7.815933727502156, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 558733140, + "step": 3253 + }, + { + "epoch": 0.8556585782863155, + "loss": 0.118372842669487, + "loss_ce": 0.0003461065352894366, + "loss_iou": NaN, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 558733140, + "step": 3253 + }, + { + "epoch": 0.8559216150457026, + "grad_norm": 26.03211500217736, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 558905528, + "step": 3254 + }, + { + "epoch": 0.8559216150457026, + "loss": 0.0649479404091835, + "loss_ce": 0.0013187924632802606, + "loss_iou": 0.484375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 558905528, + "step": 3254 + }, + { + "epoch": 0.8561846518050897, + "grad_norm": 3.695118212351963, + "learning_rate": 5e-06, + "loss": 0.0848, + "num_input_tokens_seen": 559077788, + "step": 3255 + }, + { + "epoch": 0.8561846518050897, + "loss": 0.05111432075500488, + "loss_ce": 0.00036358798388391733, + "loss_iou": 0.419921875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 559077788, + "step": 3255 + }, + { + "epoch": 0.8564476885644768, + "grad_norm": 4.828867302993965, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 559249740, + "step": 3256 + }, + { + "epoch": 0.8564476885644768, + "loss": 0.15594083070755005, + "loss_ce": 0.00011808436829596758, + "loss_iou": 0.279296875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 559249740, + "step": 3256 + }, + { + "epoch": 0.856710725323864, + "grad_norm": 4.4588084335736, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 559422008, + "step": 3257 + }, + { + "epoch": 0.856710725323864, + "loss": 0.1171468049287796, + "loss_ce": 0.005177808925509453, + "loss_iou": 0.458984375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 559422008, + "step": 3257 + }, + { + "epoch": 0.8569737620832512, + "grad_norm": 22.52084230262158, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 559594180, + "step": 3258 + }, + { + "epoch": 0.8569737620832512, + "loss": 0.12303026020526886, + "loss_ce": 0.00025803959579207003, + "loss_iou": 0.49609375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 559594180, + "step": 3258 + }, + { + "epoch": 0.8572367988426383, + "grad_norm": 5.00194835125284, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 559766068, + "step": 3259 + }, + { + "epoch": 0.8572367988426383, + "loss": 0.14650404453277588, + "loss_ce": 0.0012403683504089713, + "loss_iou": 0.29296875, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 559766068, + "step": 3259 + }, + { + "epoch": 0.8574998356020254, + "grad_norm": 5.872594878613971, + "learning_rate": 5e-06, + "loss": 0.107, + "num_input_tokens_seen": 559937984, + "step": 3260 + }, + { + "epoch": 0.8574998356020254, + "loss": 0.14809638261795044, + "loss_ce": 0.0009711501188576221, + "loss_iou": 0.40234375, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 559937984, + "step": 3260 + }, + { + "epoch": 0.8577628723614125, + "grad_norm": 7.89915891449399, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 560108224, + "step": 3261 + }, + { + "epoch": 0.8577628723614125, + "loss": 0.12339873611927032, + "loss_ce": 0.0029458554927259684, + "loss_iou": 0.54296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 560108224, + "step": 3261 + }, + { + "epoch": 0.8580259091207997, + "grad_norm": 5.069283331721027, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 560280532, + "step": 3262 + }, + { + "epoch": 0.8580259091207997, + "loss": 0.12928983569145203, + "loss_ce": 0.004533977247774601, + "loss_iou": 0.69921875, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 560280532, + "step": 3262 + }, + { + "epoch": 0.8582889458801868, + "grad_norm": 14.252906607697872, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 560452680, + "step": 3263 + }, + { + "epoch": 0.8582889458801868, + "loss": 0.10280074179172516, + "loss_ce": 0.0005515902303159237, + "loss_iou": 0.341796875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 560452680, + "step": 3263 + }, + { + "epoch": 0.8585519826395739, + "grad_norm": 4.981476516908149, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 560625236, + "step": 3264 + }, + { + "epoch": 0.8585519826395739, + "loss": 0.11402536928653717, + "loss_ce": 0.0004999822122044861, + "loss_iou": 0.453125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 560625236, + "step": 3264 + }, + { + "epoch": 0.858815019398961, + "grad_norm": 8.268610027664979, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 560797248, + "step": 3265 + }, + { + "epoch": 0.858815019398961, + "loss": 0.13858602941036224, + "loss_ce": 0.0017146880272775888, + "loss_iou": 0.57421875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 560797248, + "step": 3265 + }, + { + "epoch": 0.8590780561583481, + "grad_norm": 8.133733497165355, + "learning_rate": 5e-06, + "loss": 0.1637, + "num_input_tokens_seen": 560969220, + "step": 3266 + }, + { + "epoch": 0.8590780561583481, + "loss": 0.09112342447042465, + "loss_ce": 0.00012000648712273687, + "loss_iou": 0.59375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 560969220, + "step": 3266 + }, + { + "epoch": 0.8593410929177353, + "grad_norm": 3.6754936276624406, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 561141552, + "step": 3267 + }, + { + "epoch": 0.8593410929177353, + "loss": 0.12336836755275726, + "loss_ce": 0.0018015915993601084, + "loss_iou": 0.443359375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 561141552, + "step": 3267 + }, + { + "epoch": 0.8596041296771224, + "grad_norm": 12.378662074106767, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 561313780, + "step": 3268 + }, + { + "epoch": 0.8596041296771224, + "loss": 0.14065909385681152, + "loss_ce": 0.0051305294036865234, + "loss_iou": 0.6015625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 561313780, + "step": 3268 + }, + { + "epoch": 0.8598671664365095, + "grad_norm": 3.0561071778648485, + "learning_rate": 5e-06, + "loss": 0.103, + "num_input_tokens_seen": 561486192, + "step": 3269 + }, + { + "epoch": 0.8598671664365095, + "loss": 0.0891718789935112, + "loss_ce": 0.003921023570001125, + "loss_iou": 0.453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 561486192, + "step": 3269 + }, + { + "epoch": 0.8601302031958966, + "grad_norm": 4.439332180024113, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 561658324, + "step": 3270 + }, + { + "epoch": 0.8601302031958966, + "loss": 0.0599624440073967, + "loss_ce": 0.0006515316781587899, + "loss_iou": 0.44140625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 561658324, + "step": 3270 + }, + { + "epoch": 0.8603932399552837, + "grad_norm": 5.0489240884621305, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 561829100, + "step": 3271 + }, + { + "epoch": 0.8603932399552837, + "loss": 0.06052926927804947, + "loss_ce": 0.0006232602754607797, + "loss_iou": 0.408203125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 561829100, + "step": 3271 + }, + { + "epoch": 0.8606562767146708, + "grad_norm": 14.0110320202428, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 562001212, + "step": 3272 + }, + { + "epoch": 0.8606562767146708, + "loss": 0.05530541390180588, + "loss_ce": 0.0009230896248482168, + "loss_iou": 0.59765625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 562001212, + "step": 3272 + }, + { + "epoch": 0.860919313474058, + "grad_norm": 5.01456561485217, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 562173524, + "step": 3273 + }, + { + "epoch": 0.860919313474058, + "loss": 0.2010606825351715, + "loss_ce": 0.002330208197236061, + "loss_iou": 0.50390625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 562173524, + "step": 3273 + }, + { + "epoch": 0.8611823502334451, + "grad_norm": 9.881111822277372, + "learning_rate": 5e-06, + "loss": 0.1502, + "num_input_tokens_seen": 562345824, + "step": 3274 + }, + { + "epoch": 0.8611823502334451, + "loss": 0.106649249792099, + "loss_ce": 0.0010584269184619188, + "loss_iou": 0.59765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 562345824, + "step": 3274 + }, + { + "epoch": 0.8614453869928322, + "grad_norm": 4.536681210731165, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 562517872, + "step": 3275 + }, + { + "epoch": 0.8614453869928322, + "loss": 0.07371491193771362, + "loss_ce": 0.0001065141914295964, + "loss_iou": 0.48046875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 562517872, + "step": 3275 + }, + { + "epoch": 0.8617084237522193, + "grad_norm": 4.65141834474055, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 562689796, + "step": 3276 + }, + { + "epoch": 0.8617084237522193, + "loss": 0.09085643291473389, + "loss_ce": 0.001287340302951634, + "loss_iou": 0.55859375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 562689796, + "step": 3276 + }, + { + "epoch": 0.8619714605116064, + "grad_norm": 8.721252840556735, + "learning_rate": 5e-06, + "loss": 0.1602, + "num_input_tokens_seen": 562860476, + "step": 3277 + }, + { + "epoch": 0.8619714605116064, + "loss": 0.1055789366364479, + "loss_ce": 0.00273469858802855, + "loss_iou": 0.5234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 562860476, + "step": 3277 + }, + { + "epoch": 0.8622344972709937, + "grad_norm": 4.218638673445259, + "learning_rate": 5e-06, + "loss": 0.0896, + "num_input_tokens_seen": 563032560, + "step": 3278 + }, + { + "epoch": 0.8622344972709937, + "loss": 0.10979656875133514, + "loss_ce": 0.00536542059853673, + "loss_iou": 0.609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 563032560, + "step": 3278 + }, + { + "epoch": 0.8624975340303808, + "grad_norm": 5.172843844259764, + "learning_rate": 5e-06, + "loss": 0.0986, + "num_input_tokens_seen": 563204720, + "step": 3279 + }, + { + "epoch": 0.8624975340303808, + "loss": 0.08518050611019135, + "loss_ce": 0.0019438066519796848, + "loss_iou": 0.458984375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 563204720, + "step": 3279 + }, + { + "epoch": 0.8627605707897679, + "grad_norm": 4.560338661852982, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 563377056, + "step": 3280 + }, + { + "epoch": 0.8627605707897679, + "loss": 0.08894481509923935, + "loss_ce": 0.0001691804500296712, + "loss_iou": 0.578125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 563377056, + "step": 3280 + }, + { + "epoch": 0.863023607549155, + "grad_norm": 10.082366990468211, + "learning_rate": 5e-06, + "loss": 0.1264, + "num_input_tokens_seen": 563549604, + "step": 3281 + }, + { + "epoch": 0.863023607549155, + "loss": 0.1488599181175232, + "loss_ce": 0.0033978780265897512, + "loss_iou": 0.494140625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 563549604, + "step": 3281 + }, + { + "epoch": 0.8632866443085421, + "grad_norm": 4.832733016773012, + "learning_rate": 5e-06, + "loss": 0.081, + "num_input_tokens_seen": 563721852, + "step": 3282 + }, + { + "epoch": 0.8632866443085421, + "loss": 0.14535953104496002, + "loss_ce": 0.0013928530970588326, + "loss_iou": 0.46484375, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 563721852, + "step": 3282 + }, + { + "epoch": 0.8635496810679293, + "grad_norm": 6.063993109800719, + "learning_rate": 5e-06, + "loss": 0.0887, + "num_input_tokens_seen": 563893984, + "step": 3283 + }, + { + "epoch": 0.8635496810679293, + "loss": 0.06962516903877258, + "loss_ce": 0.0006554402643814683, + "loss_iou": 0.4453125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 563893984, + "step": 3283 + }, + { + "epoch": 0.8638127178273164, + "grad_norm": 4.985736636691599, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 564062972, + "step": 3284 + }, + { + "epoch": 0.8638127178273164, + "loss": 0.11227120459079742, + "loss_ce": 0.002926721004769206, + "loss_iou": 0.625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 564062972, + "step": 3284 + }, + { + "epoch": 0.8640757545867035, + "grad_norm": 10.90397210954732, + "learning_rate": 5e-06, + "loss": 0.1087, + "num_input_tokens_seen": 564235088, + "step": 3285 + }, + { + "epoch": 0.8640757545867035, + "loss": 0.1431923508644104, + "loss_ce": 0.0032692591194063425, + "loss_iou": 0.51953125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 564235088, + "step": 3285 + }, + { + "epoch": 0.8643387913460906, + "grad_norm": 10.801058055973197, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 564407212, + "step": 3286 + }, + { + "epoch": 0.8643387913460906, + "loss": 0.17996424436569214, + "loss_ce": 0.005098515655845404, + "loss_iou": 0.48828125, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 564407212, + "step": 3286 + }, + { + "epoch": 0.8646018281054777, + "grad_norm": 3.7908688756477646, + "learning_rate": 5e-06, + "loss": 0.1763, + "num_input_tokens_seen": 564579544, + "step": 3287 + }, + { + "epoch": 0.8646018281054777, + "loss": 0.1904245764017105, + "loss_ce": 0.0021311198361217976, + "loss_iou": 0.455078125, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 564579544, + "step": 3287 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 20.50727894197877, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 564751736, + "step": 3288 + }, + { + "epoch": 0.8648648648648649, + "loss": 0.13853441178798676, + "loss_ce": 0.006606926675885916, + "loss_iou": 0.34765625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 564751736, + "step": 3288 + }, + { + "epoch": 0.865127901624252, + "grad_norm": 7.515905813339519, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 564923836, + "step": 3289 + }, + { + "epoch": 0.865127901624252, + "loss": 0.09346568584442139, + "loss_ce": 0.0014246755745261908, + "loss_iou": 0.453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 564923836, + "step": 3289 + }, + { + "epoch": 0.8653909383836391, + "grad_norm": 4.659865141429364, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 565093036, + "step": 3290 + }, + { + "epoch": 0.8653909383836391, + "loss": 0.05857858434319496, + "loss_ce": 0.0009613969596102834, + "loss_iou": 0.46875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 565093036, + "step": 3290 + }, + { + "epoch": 0.8656539751430262, + "grad_norm": 7.156818926737148, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 565264968, + "step": 3291 + }, + { + "epoch": 0.8656539751430262, + "loss": 0.05671301484107971, + "loss_ce": 0.0005454107304103673, + "loss_iou": 0.4453125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 565264968, + "step": 3291 + }, + { + "epoch": 0.8659170119024133, + "grad_norm": 4.159578348893538, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 565437008, + "step": 3292 + }, + { + "epoch": 0.8659170119024133, + "loss": 0.10393117368221283, + "loss_ce": 0.0047185225412249565, + "loss_iou": 0.380859375, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 565437008, + "step": 3292 + }, + { + "epoch": 0.8661800486618005, + "grad_norm": 5.125947407861983, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 565609352, + "step": 3293 + }, + { + "epoch": 0.8661800486618005, + "loss": 0.07895916700363159, + "loss_ce": 0.004221613518893719, + "loss_iou": 0.51171875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 565609352, + "step": 3293 + }, + { + "epoch": 0.8664430854211876, + "grad_norm": 4.631562530302505, + "learning_rate": 5e-06, + "loss": 0.0987, + "num_input_tokens_seen": 565781624, + "step": 3294 + }, + { + "epoch": 0.8664430854211876, + "loss": 0.10047349333763123, + "loss_ce": 0.0010014427825808525, + "loss_iou": 0.609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 565781624, + "step": 3294 + }, + { + "epoch": 0.8667061221805747, + "grad_norm": 12.748430441696817, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 565954124, + "step": 3295 + }, + { + "epoch": 0.8667061221805747, + "loss": 0.10027378797531128, + "loss_ce": 0.0005728579708375037, + "loss_iou": 0.478515625, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 565954124, + "step": 3295 + }, + { + "epoch": 0.8669691589399618, + "grad_norm": 8.307592437145553, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 566124916, + "step": 3296 + }, + { + "epoch": 0.8669691589399618, + "loss": 0.10838001221418381, + "loss_ce": 0.006359752267599106, + "loss_iou": 0.61328125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 566124916, + "step": 3296 + }, + { + "epoch": 0.867232195699349, + "grad_norm": 4.778870142686005, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 566296912, + "step": 3297 + }, + { + "epoch": 0.867232195699349, + "loss": 0.04819861054420471, + "loss_ce": 0.00030127062927931547, + "loss_iou": 0.45703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 566296912, + "step": 3297 + }, + { + "epoch": 0.867495232458736, + "grad_norm": 7.859655636877698, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 566467596, + "step": 3298 + }, + { + "epoch": 0.867495232458736, + "loss": 0.1346224993467331, + "loss_ce": 0.0007113683386705816, + "loss_iou": 0.57421875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 566467596, + "step": 3298 + }, + { + "epoch": 0.8677582692181233, + "grad_norm": 15.137416498429843, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 566639732, + "step": 3299 + }, + { + "epoch": 0.8677582692181233, + "loss": 0.10433374345302582, + "loss_ce": 0.00313745578750968, + "loss_iou": 0.49609375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 566639732, + "step": 3299 + }, + { + "epoch": 0.8680213059775104, + "grad_norm": 2.9506634806748657, + "learning_rate": 5e-06, + "loss": 0.118, + "num_input_tokens_seen": 566811792, + "step": 3300 + }, + { + "epoch": 0.8680213059775104, + "loss": 0.07893712818622589, + "loss_ce": 0.0004764424229506403, + "loss_iou": 0.53125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 566811792, + "step": 3300 + }, + { + "epoch": 0.8682843427368975, + "grad_norm": 21.308475468927227, + "learning_rate": 5e-06, + "loss": 0.1217, + "num_input_tokens_seen": 566984152, + "step": 3301 + }, + { + "epoch": 0.8682843427368975, + "loss": 0.07327578961849213, + "loss_ce": 0.004519685637205839, + "loss_iou": 0.41015625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 566984152, + "step": 3301 + }, + { + "epoch": 0.8685473794962846, + "grad_norm": 4.854189489350901, + "learning_rate": 5e-06, + "loss": 0.0778, + "num_input_tokens_seen": 567156660, + "step": 3302 + }, + { + "epoch": 0.8685473794962846, + "loss": 0.05097039043903351, + "loss_ce": 0.0012877746485173702, + "loss_iou": 0.44140625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 567156660, + "step": 3302 + }, + { + "epoch": 0.8688104162556717, + "grad_norm": 16.314064517801537, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 567328912, + "step": 3303 + }, + { + "epoch": 0.8688104162556717, + "loss": 0.11892714351415634, + "loss_ce": 0.0012513676192611456, + "loss_iou": 0.5546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 567328912, + "step": 3303 + }, + { + "epoch": 0.8690734530150589, + "grad_norm": 16.431940991025456, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 567501148, + "step": 3304 + }, + { + "epoch": 0.8690734530150589, + "loss": 0.12847568094730377, + "loss_ce": 8.82247113622725e-05, + "loss_iou": 0.46875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 567501148, + "step": 3304 + }, + { + "epoch": 0.869336489774446, + "grad_norm": 3.641251656159218, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 567673160, + "step": 3305 + }, + { + "epoch": 0.869336489774446, + "loss": 0.2121918946504593, + "loss_ce": 0.0007966216653585434, + "loss_iou": 0.3984375, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 567673160, + "step": 3305 + }, + { + "epoch": 0.8695995265338331, + "grad_norm": 27.660934774490972, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 567845644, + "step": 3306 + }, + { + "epoch": 0.8695995265338331, + "loss": 0.07302013039588928, + "loss_ce": 0.00043406913755461574, + "loss_iou": 0.5078125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 567845644, + "step": 3306 + }, + { + "epoch": 0.8698625632932202, + "grad_norm": 5.415514477165426, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 568017832, + "step": 3307 + }, + { + "epoch": 0.8698625632932202, + "loss": 0.14274545013904572, + "loss_ce": 0.0012964779743924737, + "loss_iou": 0.412109375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 568017832, + "step": 3307 + }, + { + "epoch": 0.8701256000526073, + "grad_norm": 6.457549289729974, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 568189908, + "step": 3308 + }, + { + "epoch": 0.8701256000526073, + "loss": 0.17567481100559235, + "loss_ce": 0.0026706610806286335, + "loss_iou": 0.58203125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 568189908, + "step": 3308 + }, + { + "epoch": 0.8703886368119945, + "grad_norm": 4.776984151346558, + "learning_rate": 5e-06, + "loss": 0.0712, + "num_input_tokens_seen": 568358640, + "step": 3309 + }, + { + "epoch": 0.8703886368119945, + "loss": 0.050816282629966736, + "loss_ce": 0.0006148651009425521, + "loss_iou": 0.50390625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 568358640, + "step": 3309 + }, + { + "epoch": 0.8706516735713816, + "grad_norm": 8.715556903813102, + "learning_rate": 5e-06, + "loss": 0.0681, + "num_input_tokens_seen": 568529372, + "step": 3310 + }, + { + "epoch": 0.8706516735713816, + "loss": 0.055395372211933136, + "loss_ce": 0.002645737724378705, + "loss_iou": 0.546875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 568529372, + "step": 3310 + }, + { + "epoch": 0.8709147103307687, + "grad_norm": 4.571078001011392, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 568701592, + "step": 3311 + }, + { + "epoch": 0.8709147103307687, + "loss": 0.07437048852443695, + "loss_ce": 0.0011893401388078928, + "loss_iou": 0.408203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 568701592, + "step": 3311 + }, + { + "epoch": 0.8711777470901558, + "grad_norm": 5.629346035154349, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 568871992, + "step": 3312 + }, + { + "epoch": 0.8711777470901558, + "loss": 0.08190266788005829, + "loss_ce": 0.001977135892957449, + "loss_iou": 0.58203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 568871992, + "step": 3312 + }, + { + "epoch": 0.8714407838495429, + "grad_norm": 5.163413374700219, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 569044108, + "step": 3313 + }, + { + "epoch": 0.8714407838495429, + "loss": 0.08449655771255493, + "loss_ce": 0.0003901080635841936, + "loss_iou": 0.515625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 569044108, + "step": 3313 + }, + { + "epoch": 0.8717038206089301, + "grad_norm": 5.300317232023734, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 569216304, + "step": 3314 + }, + { + "epoch": 0.8717038206089301, + "loss": 0.18054433166980743, + "loss_ce": 9.388441685587168e-05, + "loss_iou": 0.484375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 569216304, + "step": 3314 + }, + { + "epoch": 0.8719668573683172, + "grad_norm": 5.650101854564751, + "learning_rate": 5e-06, + "loss": 0.101, + "num_input_tokens_seen": 569388684, + "step": 3315 + }, + { + "epoch": 0.8719668573683172, + "loss": 0.08428364247083664, + "loss_ce": 0.0036256806924939156, + "loss_iou": 0.37890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 569388684, + "step": 3315 + }, + { + "epoch": 0.8722298941277044, + "grad_norm": 6.227695364300726, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 569558992, + "step": 3316 + }, + { + "epoch": 0.8722298941277044, + "loss": 0.18722307682037354, + "loss_ce": 0.005307785701006651, + "loss_iou": 0.55859375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 569558992, + "step": 3316 + }, + { + "epoch": 0.8724929308870915, + "grad_norm": 5.420469146952656, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 569731372, + "step": 3317 + }, + { + "epoch": 0.8724929308870915, + "loss": 0.09945103526115417, + "loss_ce": 0.002435657661408186, + "loss_iou": 0.4921875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 569731372, + "step": 3317 + }, + { + "epoch": 0.8727559676464786, + "grad_norm": 4.496256221139891, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 569902092, + "step": 3318 + }, + { + "epoch": 0.8727559676464786, + "loss": 0.09977222979068756, + "loss_ce": 8.65593392518349e-05, + "loss_iou": 0.5234375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 569902092, + "step": 3318 + }, + { + "epoch": 0.8730190044058658, + "grad_norm": 3.818210026049398, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 570074040, + "step": 3319 + }, + { + "epoch": 0.8730190044058658, + "loss": 0.11558879911899567, + "loss_ce": 0.0015446072211489081, + "loss_iou": 0.447265625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 570074040, + "step": 3319 + }, + { + "epoch": 0.8732820411652529, + "grad_norm": 48.02513571155541, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 570246368, + "step": 3320 + }, + { + "epoch": 0.8732820411652529, + "loss": 0.1751258671283722, + "loss_ce": 0.00022961836657486856, + "loss_iou": 0.56640625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 570246368, + "step": 3320 + }, + { + "epoch": 0.87354507792464, + "grad_norm": 9.348108956967192, + "learning_rate": 5e-06, + "loss": 0.1217, + "num_input_tokens_seen": 570418348, + "step": 3321 + }, + { + "epoch": 0.87354507792464, + "loss": 0.1822582334280014, + "loss_ce": 0.001411056611686945, + "loss_iou": 0.53125, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 570418348, + "step": 3321 + }, + { + "epoch": 0.8738081146840271, + "grad_norm": 10.70912004570317, + "learning_rate": 5e-06, + "loss": 0.1067, + "num_input_tokens_seen": 570590896, + "step": 3322 + }, + { + "epoch": 0.8738081146840271, + "loss": 0.1525057554244995, + "loss_ce": 0.001626854995265603, + "loss_iou": 0.439453125, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 570590896, + "step": 3322 + }, + { + "epoch": 0.8740711514434142, + "grad_norm": 5.432623634439659, + "learning_rate": 5e-06, + "loss": 0.0846, + "num_input_tokens_seen": 570763096, + "step": 3323 + }, + { + "epoch": 0.8740711514434142, + "loss": 0.10707151144742966, + "loss_ce": 0.00158750603441149, + "loss_iou": 0.392578125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 570763096, + "step": 3323 + }, + { + "epoch": 0.8743341882028013, + "grad_norm": 5.502873741651398, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 570935100, + "step": 3324 + }, + { + "epoch": 0.8743341882028013, + "loss": 0.0433497279882431, + "loss_ce": 0.0022425525821745396, + "loss_iou": 0.625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 570935100, + "step": 3324 + }, + { + "epoch": 0.8745972249621885, + "grad_norm": 9.035108225672646, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 571107528, + "step": 3325 + }, + { + "epoch": 0.8745972249621885, + "loss": 0.11922352015972137, + "loss_ce": 0.008475230075418949, + "loss_iou": 0.490234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 571107528, + "step": 3325 + }, + { + "epoch": 0.8748602617215756, + "grad_norm": 4.9129107081845635, + "learning_rate": 5e-06, + "loss": 0.1196, + "num_input_tokens_seen": 571279464, + "step": 3326 + }, + { + "epoch": 0.8748602617215756, + "loss": 0.09204280376434326, + "loss_ce": 0.0020769857801496983, + "loss_iou": 0.640625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 571279464, + "step": 3326 + }, + { + "epoch": 0.8751232984809627, + "grad_norm": 4.808896887234646, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 571451676, + "step": 3327 + }, + { + "epoch": 0.8751232984809627, + "loss": 0.11273814737796783, + "loss_ce": 0.00046397349797189236, + "loss_iou": 0.439453125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 571451676, + "step": 3327 + }, + { + "epoch": 0.8753863352403498, + "grad_norm": 4.264250577253754, + "learning_rate": 5e-06, + "loss": 0.0882, + "num_input_tokens_seen": 571620552, + "step": 3328 + }, + { + "epoch": 0.8753863352403498, + "loss": 0.06821378320455551, + "loss_ce": 0.0043710097670555115, + "loss_iou": 0.38671875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 571620552, + "step": 3328 + }, + { + "epoch": 0.8756493719997369, + "grad_norm": 19.986204474022973, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 571792724, + "step": 3329 + }, + { + "epoch": 0.8756493719997369, + "loss": 0.10326668620109558, + "loss_ce": 0.002741775708273053, + "loss_iou": 0.5859375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 571792724, + "step": 3329 + }, + { + "epoch": 0.8759124087591241, + "grad_norm": 4.794277486673457, + "learning_rate": 5e-06, + "loss": 0.1107, + "num_input_tokens_seen": 571964880, + "step": 3330 + }, + { + "epoch": 0.8759124087591241, + "loss": 0.11237187683582306, + "loss_ce": 0.00040288365562446415, + "loss_iou": 0.5703125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 571964880, + "step": 3330 + }, + { + "epoch": 0.8761754455185112, + "grad_norm": 4.388563206165435, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 572137040, + "step": 3331 + }, + { + "epoch": 0.8761754455185112, + "loss": 0.1180691048502922, + "loss_ce": 0.003506115637719631, + "loss_iou": 0.5625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 572137040, + "step": 3331 + }, + { + "epoch": 0.8764384822778983, + "grad_norm": 4.318891449603682, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 572309112, + "step": 3332 + }, + { + "epoch": 0.8764384822778983, + "loss": 0.11371566355228424, + "loss_ce": 0.0035472088493406773, + "loss_iou": 0.51171875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 572309112, + "step": 3332 + }, + { + "epoch": 0.8767015190372854, + "grad_norm": 10.0048154448047, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 572481700, + "step": 3333 + }, + { + "epoch": 0.8767015190372854, + "loss": 0.21858729422092438, + "loss_ce": 0.0018819711403921247, + "loss_iou": 0.42578125, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 572481700, + "step": 3333 + }, + { + "epoch": 0.8769645557966725, + "grad_norm": 20.86653328138894, + "learning_rate": 5e-06, + "loss": 0.1063, + "num_input_tokens_seen": 572653588, + "step": 3334 + }, + { + "epoch": 0.8769645557966725, + "loss": 0.1189296618103981, + "loss_ce": 0.002413547597825527, + "loss_iou": 0.392578125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 572653588, + "step": 3334 + }, + { + "epoch": 0.8772275925560598, + "grad_norm": 4.936814293101831, + "learning_rate": 5e-06, + "loss": 0.1146, + "num_input_tokens_seen": 572825664, + "step": 3335 + }, + { + "epoch": 0.8772275925560598, + "loss": 0.11313323676586151, + "loss_ce": 0.002995288698002696, + "loss_iou": 0.4765625, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 572825664, + "step": 3335 + }, + { + "epoch": 0.8774906293154469, + "grad_norm": 45.48466563185734, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 572997764, + "step": 3336 + }, + { + "epoch": 0.8774906293154469, + "loss": 0.12880939245224, + "loss_ce": 0.0020393752492964268, + "loss_iou": 0.61328125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 572997764, + "step": 3336 + }, + { + "epoch": 0.877753666074834, + "grad_norm": 5.329772367724437, + "learning_rate": 5e-06, + "loss": 0.145, + "num_input_tokens_seen": 573167692, + "step": 3337 + }, + { + "epoch": 0.877753666074834, + "loss": 0.06320846080780029, + "loss_ce": 0.0007694964297115803, + "loss_iou": 0.4609375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 573167692, + "step": 3337 + }, + { + "epoch": 0.8780167028342211, + "grad_norm": 9.592551272557182, + "learning_rate": 5e-06, + "loss": 0.0773, + "num_input_tokens_seen": 573340028, + "step": 3338 + }, + { + "epoch": 0.8780167028342211, + "loss": 0.08143293112516403, + "loss_ce": 0.0008054894860833883, + "loss_iou": 0.57421875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 573340028, + "step": 3338 + }, + { + "epoch": 0.8782797395936082, + "grad_norm": 3.2912653441237016, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 573510240, + "step": 3339 + }, + { + "epoch": 0.8782797395936082, + "loss": 0.11927518248558044, + "loss_ce": 0.0026980361435562372, + "loss_iou": 0.4296875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 573510240, + "step": 3339 + }, + { + "epoch": 0.8785427763529954, + "grad_norm": 6.074875569536377, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 573682136, + "step": 3340 + }, + { + "epoch": 0.8785427763529954, + "loss": 0.14973485469818115, + "loss_ce": 0.0035251472145318985, + "loss_iou": 0.53125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 573682136, + "step": 3340 + }, + { + "epoch": 0.8788058131123825, + "grad_norm": 4.101578026279459, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 573854492, + "step": 3341 + }, + { + "epoch": 0.8788058131123825, + "loss": 0.09308157861232758, + "loss_ce": 0.0016814331756904721, + "loss_iou": 0.57421875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 573854492, + "step": 3341 + }, + { + "epoch": 0.8790688498717696, + "grad_norm": 3.5139785635989704, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 574026312, + "step": 3342 + }, + { + "epoch": 0.8790688498717696, + "loss": 0.12709392607212067, + "loss_ce": 0.002124447375535965, + "loss_iou": 0.498046875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 574026312, + "step": 3342 + }, + { + "epoch": 0.8793318866311567, + "grad_norm": 8.712064685299605, + "learning_rate": 5e-06, + "loss": 0.1039, + "num_input_tokens_seen": 574198272, + "step": 3343 + }, + { + "epoch": 0.8793318866311567, + "loss": 0.07467342913150787, + "loss_ce": 0.0009124410571530461, + "loss_iou": 0.443359375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 574198272, + "step": 3343 + }, + { + "epoch": 0.8795949233905438, + "grad_norm": 3.320527799830495, + "learning_rate": 5e-06, + "loss": 0.0674, + "num_input_tokens_seen": 574370244, + "step": 3344 + }, + { + "epoch": 0.8795949233905438, + "loss": 0.04423138499259949, + "loss_ce": 0.00010297012340743095, + "loss_iou": 0.486328125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 574370244, + "step": 3344 + }, + { + "epoch": 0.879857960149931, + "grad_norm": 16.55083596075495, + "learning_rate": 5e-06, + "loss": 0.1285, + "num_input_tokens_seen": 574542464, + "step": 3345 + }, + { + "epoch": 0.879857960149931, + "loss": 0.05567498877644539, + "loss_ce": 0.0007433480932377279, + "loss_iou": 0.53125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 574542464, + "step": 3345 + }, + { + "epoch": 0.8801209969093181, + "grad_norm": 4.233870416516864, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 574714876, + "step": 3346 + }, + { + "epoch": 0.8801209969093181, + "loss": 0.19878937304019928, + "loss_ce": 0.0015847685281187296, + "loss_iou": 0.443359375, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 574714876, + "step": 3346 + }, + { + "epoch": 0.8803840336687052, + "grad_norm": 28.780242588246683, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 574886952, + "step": 3347 + }, + { + "epoch": 0.8803840336687052, + "loss": 0.11355656385421753, + "loss_ce": 0.0028998262714594603, + "loss_iou": 0.40625, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 574886952, + "step": 3347 + }, + { + "epoch": 0.8806470704280923, + "grad_norm": 3.1096156375201027, + "learning_rate": 5e-06, + "loss": 0.0687, + "num_input_tokens_seen": 575059096, + "step": 3348 + }, + { + "epoch": 0.8806470704280923, + "loss": 0.049926742911338806, + "loss_ce": 0.0002136088878614828, + "loss_iou": 0.4921875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 575059096, + "step": 3348 + }, + { + "epoch": 0.8809101071874794, + "grad_norm": 11.92064516300679, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 575231232, + "step": 3349 + }, + { + "epoch": 0.8809101071874794, + "loss": 0.10825909674167633, + "loss_ce": 0.00019635571516118944, + "loss_iou": 0.482421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 575231232, + "step": 3349 + }, + { + "epoch": 0.8811731439468665, + "grad_norm": 3.9827768470010754, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 575403592, + "step": 3350 + }, + { + "epoch": 0.8811731439468665, + "loss": 0.11462774872779846, + "loss_ce": 0.005161193665117025, + "loss_iou": 0.70703125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 575403592, + "step": 3350 + }, + { + "epoch": 0.8814361807062537, + "grad_norm": 6.779641757995467, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 575574024, + "step": 3351 + }, + { + "epoch": 0.8814361807062537, + "loss": 0.08754941821098328, + "loss_ce": 0.0006353590288199484, + "loss_iou": 0.3828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 575574024, + "step": 3351 + }, + { + "epoch": 0.8816992174656408, + "grad_norm": 6.941492265981877, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 575746304, + "step": 3352 + }, + { + "epoch": 0.8816992174656408, + "loss": 0.08934507519006729, + "loss_ce": 0.0032244701869785786, + "loss_iou": 0.40625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 575746304, + "step": 3352 + }, + { + "epoch": 0.8819622542250279, + "grad_norm": 5.409664650483643, + "learning_rate": 5e-06, + "loss": 0.1231, + "num_input_tokens_seen": 575918724, + "step": 3353 + }, + { + "epoch": 0.8819622542250279, + "loss": 0.08703694492578506, + "loss_ce": 0.0007637504604645073, + "loss_iou": 0.482421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 575918724, + "step": 3353 + }, + { + "epoch": 0.882225290984415, + "grad_norm": 7.600652328701811, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 576090812, + "step": 3354 + }, + { + "epoch": 0.882225290984415, + "loss": 0.25224149227142334, + "loss_ce": 0.00353849190287292, + "loss_iou": 0.44140625, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 576090812, + "step": 3354 + }, + { + "epoch": 0.8824883277438021, + "grad_norm": 8.07764479644441, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 576262592, + "step": 3355 + }, + { + "epoch": 0.8824883277438021, + "loss": 0.08115795254707336, + "loss_ce": 0.0004999967059120536, + "loss_iou": 0.5703125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 576262592, + "step": 3355 + }, + { + "epoch": 0.8827513645031894, + "grad_norm": 9.615405351973429, + "learning_rate": 5e-06, + "loss": 0.087, + "num_input_tokens_seen": 576434568, + "step": 3356 + }, + { + "epoch": 0.8827513645031894, + "loss": 0.08256545662879944, + "loss_ce": 0.0005952401552349329, + "loss_iou": 0.5703125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 576434568, + "step": 3356 + }, + { + "epoch": 0.8830144012625765, + "grad_norm": 4.944244594714697, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 576606792, + "step": 3357 + }, + { + "epoch": 0.8830144012625765, + "loss": 0.05560879409313202, + "loss_ce": 0.00032620219280943274, + "loss_iou": 0.50390625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 576606792, + "step": 3357 + }, + { + "epoch": 0.8832774380219636, + "grad_norm": 3.905791245202154, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 576778952, + "step": 3358 + }, + { + "epoch": 0.8832774380219636, + "loss": 0.05678252875804901, + "loss_ce": 0.0012634244048967957, + "loss_iou": 0.404296875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 576778952, + "step": 3358 + }, + { + "epoch": 0.8835404747813507, + "grad_norm": 3.3297332937077506, + "learning_rate": 5e-06, + "loss": 0.0826, + "num_input_tokens_seen": 576951468, + "step": 3359 + }, + { + "epoch": 0.8835404747813507, + "loss": 0.15261146426200867, + "loss_ce": 0.0013968587154522538, + "loss_iou": 0.447265625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 576951468, + "step": 3359 + }, + { + "epoch": 0.8838035115407378, + "grad_norm": 7.82434473224169, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 577121952, + "step": 3360 + }, + { + "epoch": 0.8838035115407378, + "loss": 0.10315507650375366, + "loss_ce": 0.0030421605333685875, + "loss_iou": 0.427734375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 577121952, + "step": 3360 + }, + { + "epoch": 0.884066548300125, + "grad_norm": 10.425535033707964, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 577293848, + "step": 3361 + }, + { + "epoch": 0.884066548300125, + "loss": 0.07322396337985992, + "loss_ce": 0.0005005778511986136, + "loss_iou": 0.546875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 577293848, + "step": 3361 + }, + { + "epoch": 0.8843295850595121, + "grad_norm": 14.476290505686679, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 577466084, + "step": 3362 + }, + { + "epoch": 0.8843295850595121, + "loss": 0.1955878734588623, + "loss_ce": 0.0021064176689833403, + "loss_iou": 0.5234375, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 577466084, + "step": 3362 + }, + { + "epoch": 0.8845926218188992, + "grad_norm": 7.100653555231675, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 577638416, + "step": 3363 + }, + { + "epoch": 0.8845926218188992, + "loss": 0.051397159695625305, + "loss_ce": 0.00037176761543378234, + "loss_iou": 0.462890625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 577638416, + "step": 3363 + }, + { + "epoch": 0.8848556585782863, + "grad_norm": 17.047045722794394, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 577810576, + "step": 3364 + }, + { + "epoch": 0.8848556585782863, + "loss": 0.11024264991283417, + "loss_ce": 0.0004404087667353451, + "loss_iou": 0.482421875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 577810576, + "step": 3364 + }, + { + "epoch": 0.8851186953376734, + "grad_norm": 6.6116170819185625, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 577982704, + "step": 3365 + }, + { + "epoch": 0.8851186953376734, + "loss": 0.10162153840065002, + "loss_ce": 0.003171829041093588, + "loss_iou": 0.474609375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 577982704, + "step": 3365 + }, + { + "epoch": 0.8853817320970606, + "grad_norm": 8.013299917102648, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 578154696, + "step": 3366 + }, + { + "epoch": 0.8853817320970606, + "loss": 0.09505804628133774, + "loss_ce": 0.002620304934680462, + "loss_iou": 0.458984375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 578154696, + "step": 3366 + }, + { + "epoch": 0.8856447688564477, + "grad_norm": 34.03888154985408, + "learning_rate": 5e-06, + "loss": 0.0988, + "num_input_tokens_seen": 578326940, + "step": 3367 + }, + { + "epoch": 0.8856447688564477, + "loss": 0.10307721793651581, + "loss_ce": 0.0007899247575551271, + "loss_iou": 0.427734375, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 578326940, + "step": 3367 + }, + { + "epoch": 0.8859078056158348, + "grad_norm": 5.847556882862909, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 578499332, + "step": 3368 + }, + { + "epoch": 0.8859078056158348, + "loss": 0.14421170949935913, + "loss_ce": 0.0017556664533913136, + "loss_iou": 0.4609375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 578499332, + "step": 3368 + }, + { + "epoch": 0.8861708423752219, + "grad_norm": 6.636470819413435, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 578671272, + "step": 3369 + }, + { + "epoch": 0.8861708423752219, + "loss": 0.128991961479187, + "loss_ce": 0.0024660732597112656, + "loss_iou": 0.421875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 578671272, + "step": 3369 + }, + { + "epoch": 0.886433879134609, + "grad_norm": 17.54919755928276, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 578842196, + "step": 3370 + }, + { + "epoch": 0.886433879134609, + "loss": 0.10547197610139847, + "loss_ce": 0.0005067643942311406, + "loss_iou": 0.5078125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 578842196, + "step": 3370 + }, + { + "epoch": 0.8866969158939962, + "grad_norm": 7.184938374175427, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 579014236, + "step": 3371 + }, + { + "epoch": 0.8866969158939962, + "loss": 0.08108609914779663, + "loss_ce": 0.0009774556383490562, + "loss_iou": 0.439453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 579014236, + "step": 3371 + }, + { + "epoch": 0.8869599526533833, + "grad_norm": 6.863755671185031, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 579186596, + "step": 3372 + }, + { + "epoch": 0.8869599526533833, + "loss": 0.1200333908200264, + "loss_ce": 0.00034345051972195506, + "loss_iou": 0.47265625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 579186596, + "step": 3372 + }, + { + "epoch": 0.8872229894127704, + "grad_norm": 7.255619753434164, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 579358828, + "step": 3373 + }, + { + "epoch": 0.8872229894127704, + "loss": 0.11177671700716019, + "loss_ce": 0.0016998156206682324, + "loss_iou": 0.46484375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 579358828, + "step": 3373 + }, + { + "epoch": 0.8874860261721575, + "grad_norm": 4.645601347618675, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 579531144, + "step": 3374 + }, + { + "epoch": 0.8874860261721575, + "loss": 0.13369636237621307, + "loss_ce": 0.0010364485206082463, + "loss_iou": 0.59765625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 579531144, + "step": 3374 + }, + { + "epoch": 0.8877490629315447, + "grad_norm": 19.381762342400947, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 579703288, + "step": 3375 + }, + { + "epoch": 0.8877490629315447, + "loss": 0.15896877646446228, + "loss_ce": 0.0007961708470247686, + "loss_iou": 0.2578125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 579703288, + "step": 3375 + }, + { + "epoch": 0.8880120996909318, + "grad_norm": 4.08558956843784, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 579875620, + "step": 3376 + }, + { + "epoch": 0.8880120996909318, + "loss": 0.06248597800731659, + "loss_ce": 0.0004437366151250899, + "loss_iou": 0.58203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 579875620, + "step": 3376 + }, + { + "epoch": 0.888275136450319, + "grad_norm": 4.308623737664192, + "learning_rate": 5e-06, + "loss": 0.0812, + "num_input_tokens_seen": 580047776, + "step": 3377 + }, + { + "epoch": 0.888275136450319, + "loss": 0.07517598569393158, + "loss_ce": 0.0003468855866231024, + "loss_iou": 0.48046875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 580047776, + "step": 3377 + }, + { + "epoch": 0.8885381732097061, + "grad_norm": 17.189878667773364, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 580219696, + "step": 3378 + }, + { + "epoch": 0.8885381732097061, + "loss": 0.1588488519191742, + "loss_ce": 0.00040158609044738114, + "loss_iou": 0.46875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 580219696, + "step": 3378 + }, + { + "epoch": 0.8888012099690932, + "grad_norm": 17.94232408746683, + "learning_rate": 5e-06, + "loss": 0.1435, + "num_input_tokens_seen": 580389856, + "step": 3379 + }, + { + "epoch": 0.8888012099690932, + "loss": 0.16451287269592285, + "loss_ce": 0.002464542631059885, + "loss_iou": 0.255859375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 580389856, + "step": 3379 + }, + { + "epoch": 0.8890642467284803, + "grad_norm": 24.21781414429754, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 580562140, + "step": 3380 + }, + { + "epoch": 0.8890642467284803, + "loss": 0.15492644906044006, + "loss_ce": 0.0021554557606577873, + "loss_iou": 0.515625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 580562140, + "step": 3380 + }, + { + "epoch": 0.8893272834878674, + "grad_norm": 10.546114100067816, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 580734572, + "step": 3381 + }, + { + "epoch": 0.8893272834878674, + "loss": 0.10413020849227905, + "loss_ce": 0.0011639007134363055, + "loss_iou": 0.4453125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 580734572, + "step": 3381 + }, + { + "epoch": 0.8895903202472546, + "grad_norm": 6.233208405285995, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 580906660, + "step": 3382 + }, + { + "epoch": 0.8895903202472546, + "loss": 0.14456871151924133, + "loss_ce": 0.0018837791867554188, + "loss_iou": 0.53515625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 580906660, + "step": 3382 + }, + { + "epoch": 0.8898533570066417, + "grad_norm": 3.9495859544374516, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 581078540, + "step": 3383 + }, + { + "epoch": 0.8898533570066417, + "loss": 0.13160666823387146, + "loss_ce": 0.0018764439737424254, + "loss_iou": 0.25390625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 581078540, + "step": 3383 + }, + { + "epoch": 0.8901163937660288, + "grad_norm": 4.909648559933354, + "learning_rate": 5e-06, + "loss": 0.0663, + "num_input_tokens_seen": 581247384, + "step": 3384 + }, + { + "epoch": 0.8901163937660288, + "loss": 0.06679339706897736, + "loss_ce": 0.00032610760536044836, + "loss_iou": 0.58984375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 581247384, + "step": 3384 + }, + { + "epoch": 0.8903794305254159, + "grad_norm": 4.667075414121075, + "learning_rate": 5e-06, + "loss": 0.1195, + "num_input_tokens_seen": 581419360, + "step": 3385 + }, + { + "epoch": 0.8903794305254159, + "loss": 0.1276930421590805, + "loss_ce": 0.0018690668512135744, + "loss_iou": 0.5, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 581419360, + "step": 3385 + }, + { + "epoch": 0.890642467284803, + "grad_norm": 4.483616610873386, + "learning_rate": 5e-06, + "loss": 0.0794, + "num_input_tokens_seen": 581589952, + "step": 3386 + }, + { + "epoch": 0.890642467284803, + "loss": 0.047724828124046326, + "loss_ce": 0.00011740828631445765, + "loss_iou": 0.50390625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 581589952, + "step": 3386 + }, + { + "epoch": 0.8909055040441902, + "grad_norm": 21.859242645746537, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 581762156, + "step": 3387 + }, + { + "epoch": 0.8909055040441902, + "loss": 0.18419209122657776, + "loss_ce": 0.0005067941965535283, + "loss_iou": 0.33984375, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 581762156, + "step": 3387 + }, + { + "epoch": 0.8911685408035773, + "grad_norm": 8.665372364747144, + "learning_rate": 5e-06, + "loss": 0.0776, + "num_input_tokens_seen": 581934152, + "step": 3388 + }, + { + "epoch": 0.8911685408035773, + "loss": 0.08677740395069122, + "loss_ce": 0.00022955110762268305, + "loss_iou": 0.50390625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 581934152, + "step": 3388 + }, + { + "epoch": 0.8914315775629644, + "grad_norm": 4.1210884311067835, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 582106144, + "step": 3389 + }, + { + "epoch": 0.8914315775629644, + "loss": 0.13846494257450104, + "loss_ce": 0.0010137634817510843, + "loss_iou": 0.53125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 582106144, + "step": 3389 + }, + { + "epoch": 0.8916946143223515, + "grad_norm": 11.500479931217354, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 582278400, + "step": 3390 + }, + { + "epoch": 0.8916946143223515, + "loss": 0.12132581323385239, + "loss_ce": 0.002826055744662881, + "loss_iou": 0.5859375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 582278400, + "step": 3390 + }, + { + "epoch": 0.8919576510817386, + "grad_norm": 4.055853466494655, + "learning_rate": 5e-06, + "loss": 0.094, + "num_input_tokens_seen": 582450564, + "step": 3391 + }, + { + "epoch": 0.8919576510817386, + "loss": 0.0762665793299675, + "loss_ce": 0.0018952443497255445, + "loss_iou": 0.4453125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 582450564, + "step": 3391 + }, + { + "epoch": 0.8922206878411258, + "grad_norm": 8.665290308457982, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 582622336, + "step": 3392 + }, + { + "epoch": 0.8922206878411258, + "loss": 0.16068054735660553, + "loss_ce": 0.0011956822127103806, + "loss_iou": 0.546875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 582622336, + "step": 3392 + }, + { + "epoch": 0.892483724600513, + "grad_norm": 14.955097188866974, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 582794384, + "step": 3393 + }, + { + "epoch": 0.892483724600513, + "loss": 0.08523661643266678, + "loss_ce": 0.0010691368952393532, + "loss_iou": 0.494140625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 582794384, + "step": 3393 + }, + { + "epoch": 0.8927467613599, + "grad_norm": 14.050392295123078, + "learning_rate": 5e-06, + "loss": 0.0769, + "num_input_tokens_seen": 582966476, + "step": 3394 + }, + { + "epoch": 0.8927467613599, + "loss": 0.11920854449272156, + "loss_ce": 0.0003883557510562241, + "loss_iou": 0.5234375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 582966476, + "step": 3394 + }, + { + "epoch": 0.8930097981192872, + "grad_norm": 3.5189407633589225, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 583138656, + "step": 3395 + }, + { + "epoch": 0.8930097981192872, + "loss": 0.23967677354812622, + "loss_ce": 0.0011361411307007074, + "loss_iou": 0.27734375, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 583138656, + "step": 3395 + }, + { + "epoch": 0.8932728348786743, + "grad_norm": 4.239345260315017, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 583310580, + "step": 3396 + }, + { + "epoch": 0.8932728348786743, + "loss": 0.045496270060539246, + "loss_ce": 0.00010137087519979104, + "loss_iou": 0.58203125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 583310580, + "step": 3396 + }, + { + "epoch": 0.8935358716380615, + "grad_norm": 4.318293293535835, + "learning_rate": 5e-06, + "loss": 0.1154, + "num_input_tokens_seen": 583482984, + "step": 3397 + }, + { + "epoch": 0.8935358716380615, + "loss": 0.11194127053022385, + "loss_ce": 0.000826766830869019, + "loss_iou": 0.44140625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 583482984, + "step": 3397 + }, + { + "epoch": 0.8937989083974486, + "grad_norm": 4.660547370469876, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 583655292, + "step": 3398 + }, + { + "epoch": 0.8937989083974486, + "loss": 0.07680627703666687, + "loss_ce": 0.00017663151083979756, + "loss_iou": 0.451171875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 583655292, + "step": 3398 + }, + { + "epoch": 0.8940619451568357, + "grad_norm": 10.828455614089098, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 583827408, + "step": 3399 + }, + { + "epoch": 0.8940619451568357, + "loss": 0.1173757016658783, + "loss_ce": 3.5622346331365407e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 583827408, + "step": 3399 + }, + { + "epoch": 0.8943249819162228, + "grad_norm": 5.042018374187092, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 583999664, + "step": 3400 + }, + { + "epoch": 0.8943249819162228, + "loss": 0.061997972428798676, + "loss_ce": 0.000367723434465006, + "loss_iou": 0.48046875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 583999664, + "step": 3400 + }, + { + "epoch": 0.8945880186756099, + "grad_norm": 26.304936486213094, + "learning_rate": 5e-06, + "loss": 0.1601, + "num_input_tokens_seen": 584171616, + "step": 3401 + }, + { + "epoch": 0.8945880186756099, + "loss": 0.2535288333892822, + "loss_ce": 0.004078162834048271, + "loss_iou": 0.34765625, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 584171616, + "step": 3401 + }, + { + "epoch": 0.894851055434997, + "grad_norm": 4.926581765464391, + "learning_rate": 5e-06, + "loss": 0.1067, + "num_input_tokens_seen": 584343844, + "step": 3402 + }, + { + "epoch": 0.894851055434997, + "loss": 0.11322697252035141, + "loss_ce": 0.0008307351381517947, + "loss_iou": NaN, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 584343844, + "step": 3402 + }, + { + "epoch": 0.8951140921943842, + "grad_norm": 7.363528943726032, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 584516156, + "step": 3403 + }, + { + "epoch": 0.8951140921943842, + "loss": 0.08316925168037415, + "loss_ce": 0.002114569302648306, + "loss_iou": 0.37109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 584516156, + "step": 3403 + }, + { + "epoch": 0.8953771289537713, + "grad_norm": 4.344737704222575, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 584688680, + "step": 3404 + }, + { + "epoch": 0.8953771289537713, + "loss": 0.061312295496463776, + "loss_ce": 0.0012231803266331553, + "loss_iou": 0.58203125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 584688680, + "step": 3404 + }, + { + "epoch": 0.8956401657131584, + "grad_norm": 9.026057514105759, + "learning_rate": 5e-06, + "loss": 0.1616, + "num_input_tokens_seen": 584860880, + "step": 3405 + }, + { + "epoch": 0.8956401657131584, + "loss": 0.09903927892446518, + "loss_ce": 0.00197811983525753, + "loss_iou": 0.52734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 584860880, + "step": 3405 + }, + { + "epoch": 0.8959032024725455, + "grad_norm": 9.19169137185048, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 585032800, + "step": 3406 + }, + { + "epoch": 0.8959032024725455, + "loss": 0.06624776124954224, + "loss_ce": 0.00032979153911583126, + "loss_iou": 0.443359375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 585032800, + "step": 3406 + }, + { + "epoch": 0.8961662392319326, + "grad_norm": 9.011492701681583, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 585203264, + "step": 3407 + }, + { + "epoch": 0.8961662392319326, + "loss": 0.21042554080486298, + "loss_ce": 0.002722906181588769, + "loss_iou": 0.40234375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 585203264, + "step": 3407 + }, + { + "epoch": 0.8964292759913198, + "grad_norm": 17.979008103100615, + "learning_rate": 5e-06, + "loss": 0.093, + "num_input_tokens_seen": 585375592, + "step": 3408 + }, + { + "epoch": 0.8964292759913198, + "loss": 0.041413549333810806, + "loss_ce": 0.0023205317556858063, + "loss_iou": 0.62890625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 585375592, + "step": 3408 + }, + { + "epoch": 0.8966923127507069, + "grad_norm": 10.115194008381021, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 585546216, + "step": 3409 + }, + { + "epoch": 0.8966923127507069, + "loss": 0.15392959117889404, + "loss_ce": 0.0011891128960996866, + "loss_iou": 0.447265625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 585546216, + "step": 3409 + }, + { + "epoch": 0.896955349510094, + "grad_norm": 7.995711146610571, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 585718304, + "step": 3410 + }, + { + "epoch": 0.896955349510094, + "loss": 0.05361251160502434, + "loss_ce": 0.0008171012159436941, + "loss_iou": 0.4921875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 585718304, + "step": 3410 + }, + { + "epoch": 0.8972183862694811, + "grad_norm": 4.960227305854337, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 585890728, + "step": 3411 + }, + { + "epoch": 0.8972183862694811, + "loss": 0.08083316683769226, + "loss_ce": 0.0013043570797890425, + "loss_iou": 0.455078125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 585890728, + "step": 3411 + }, + { + "epoch": 0.8974814230288682, + "grad_norm": 5.883674464139159, + "learning_rate": 5e-06, + "loss": 0.1462, + "num_input_tokens_seen": 586062896, + "step": 3412 + }, + { + "epoch": 0.8974814230288682, + "loss": 0.06225815415382385, + "loss_ce": 0.003023534081876278, + "loss_iou": 0.515625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 586062896, + "step": 3412 + }, + { + "epoch": 0.8977444597882555, + "grad_norm": 5.4958687199216625, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 586235628, + "step": 3413 + }, + { + "epoch": 0.8977444597882555, + "loss": 0.08900677412748337, + "loss_ce": 0.0012687351554632187, + "loss_iou": 0.50390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 586235628, + "step": 3413 + }, + { + "epoch": 0.8980074965476426, + "grad_norm": 6.158695953111894, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 586407736, + "step": 3414 + }, + { + "epoch": 0.8980074965476426, + "loss": 0.13948456943035126, + "loss_ce": 0.0007364002522081137, + "loss_iou": 0.5625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 586407736, + "step": 3414 + }, + { + "epoch": 0.8982705333070297, + "grad_norm": 6.799680477506266, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 586579816, + "step": 3415 + }, + { + "epoch": 0.8982705333070297, + "loss": 0.1316445767879486, + "loss_ce": 0.0035775681026279926, + "loss_iou": 0.447265625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 586579816, + "step": 3415 + }, + { + "epoch": 0.8985335700664168, + "grad_norm": 4.757727392184302, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 586750092, + "step": 3416 + }, + { + "epoch": 0.8985335700664168, + "loss": 0.1803445667028427, + "loss_ce": 0.0017557005630806088, + "loss_iou": 0.30078125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 586750092, + "step": 3416 + }, + { + "epoch": 0.8987966068258039, + "grad_norm": 4.7130808920215825, + "learning_rate": 5e-06, + "loss": 0.0988, + "num_input_tokens_seen": 586920724, + "step": 3417 + }, + { + "epoch": 0.8987966068258039, + "loss": 0.04298722371459007, + "loss_ce": 0.0010102951200678945, + "loss_iou": 0.39453125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 586920724, + "step": 3417 + }, + { + "epoch": 0.8990596435851911, + "grad_norm": 9.716073099163628, + "learning_rate": 5e-06, + "loss": 0.0909, + "num_input_tokens_seen": 587093308, + "step": 3418 + }, + { + "epoch": 0.8990596435851911, + "loss": 0.11212408542633057, + "loss_ce": 0.0025659759994596243, + "loss_iou": 0.78125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 587093308, + "step": 3418 + }, + { + "epoch": 0.8993226803445782, + "grad_norm": 9.72495754411886, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 587265328, + "step": 3419 + }, + { + "epoch": 0.8993226803445782, + "loss": 0.09303727746009827, + "loss_ce": 0.0017286788206547499, + "loss_iou": 0.53125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 587265328, + "step": 3419 + }, + { + "epoch": 0.8995857171039653, + "grad_norm": 27.58546147063481, + "learning_rate": 5e-06, + "loss": 0.1334, + "num_input_tokens_seen": 587435704, + "step": 3420 + }, + { + "epoch": 0.8995857171039653, + "loss": 0.20503398776054382, + "loss_ce": 0.0012986233923584223, + "loss_iou": 0.28515625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 587435704, + "step": 3420 + }, + { + "epoch": 0.8998487538633524, + "grad_norm": 3.780121082337908, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 587605756, + "step": 3421 + }, + { + "epoch": 0.8998487538633524, + "loss": 0.09629541635513306, + "loss_ce": 0.0002413361653452739, + "loss_iou": 0.435546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 587605756, + "step": 3421 + }, + { + "epoch": 0.9001117906227395, + "grad_norm": 4.905276205162047, + "learning_rate": 5e-06, + "loss": 0.1407, + "num_input_tokens_seen": 587776128, + "step": 3422 + }, + { + "epoch": 0.9001117906227395, + "loss": 0.22563423216342926, + "loss_ce": 0.003191609401255846, + "loss_iou": 0.462890625, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 587776128, + "step": 3422 + }, + { + "epoch": 0.9003748273821266, + "grad_norm": 4.24824425523058, + "learning_rate": 5e-06, + "loss": 0.0902, + "num_input_tokens_seen": 587948348, + "step": 3423 + }, + { + "epoch": 0.9003748273821266, + "loss": 0.04901735112071037, + "loss_ce": 0.0007690612110309303, + "loss_iou": 0.5, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 587948348, + "step": 3423 + }, + { + "epoch": 0.9006378641415138, + "grad_norm": 14.901804223286693, + "learning_rate": 5e-06, + "loss": 0.0734, + "num_input_tokens_seen": 588115160, + "step": 3424 + }, + { + "epoch": 0.9006378641415138, + "loss": 0.0699070394039154, + "loss_ce": 0.001684993039816618, + "loss_iou": 0.515625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 588115160, + "step": 3424 + }, + { + "epoch": 0.9009009009009009, + "grad_norm": 4.750314737648371, + "learning_rate": 5e-06, + "loss": 0.0881, + "num_input_tokens_seen": 588287148, + "step": 3425 + }, + { + "epoch": 0.9009009009009009, + "loss": 0.07479090988636017, + "loss_ce": 0.006248427089303732, + "loss_iou": 0.498046875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 588287148, + "step": 3425 + }, + { + "epoch": 0.901163937660288, + "grad_norm": 3.9857635354375183, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 588457468, + "step": 3426 + }, + { + "epoch": 0.901163937660288, + "loss": 0.04558904469013214, + "loss_ce": 0.0004535481857601553, + "loss_iou": 0.5546875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 588457468, + "step": 3426 + }, + { + "epoch": 0.9014269744196751, + "grad_norm": 7.929685479499518, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 588627832, + "step": 3427 + }, + { + "epoch": 0.9014269744196751, + "loss": 0.11351916939020157, + "loss_ce": 0.00022265504230745137, + "loss_iou": 0.41796875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 588627832, + "step": 3427 + }, + { + "epoch": 0.9016900111790622, + "grad_norm": 5.274686939603064, + "learning_rate": 5e-06, + "loss": 0.1085, + "num_input_tokens_seen": 588797844, + "step": 3428 + }, + { + "epoch": 0.9016900111790622, + "loss": 0.07151903212070465, + "loss_ce": 0.004655018448829651, + "loss_iou": 0.2314453125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 588797844, + "step": 3428 + }, + { + "epoch": 0.9019530479384494, + "grad_norm": 5.241669176622665, + "learning_rate": 5e-06, + "loss": 0.1302, + "num_input_tokens_seen": 588966836, + "step": 3429 + }, + { + "epoch": 0.9019530479384494, + "loss": 0.12862293422222137, + "loss_ce": 0.0008153152884915471, + "loss_iou": 0.5546875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 588966836, + "step": 3429 + }, + { + "epoch": 0.9022160846978365, + "grad_norm": 7.584585654330743, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 589136916, + "step": 3430 + }, + { + "epoch": 0.9022160846978365, + "loss": 0.1643849015235901, + "loss_ce": 0.0007801597821526229, + "loss_iou": 0.4609375, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 589136916, + "step": 3430 + }, + { + "epoch": 0.9024791214572236, + "grad_norm": 3.532701818396997, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 589309164, + "step": 3431 + }, + { + "epoch": 0.9024791214572236, + "loss": 0.077778160572052, + "loss_ce": 0.0013621454127132893, + "loss_iou": 0.48046875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 589309164, + "step": 3431 + }, + { + "epoch": 0.9027421582166107, + "grad_norm": 7.674994076422982, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 589481132, + "step": 3432 + }, + { + "epoch": 0.9027421582166107, + "loss": 0.09501226991415024, + "loss_ce": 0.0008197662536986172, + "loss_iou": 0.404296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 589481132, + "step": 3432 + }, + { + "epoch": 0.9030051949759978, + "grad_norm": 6.592301292301914, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 589649864, + "step": 3433 + }, + { + "epoch": 0.9030051949759978, + "loss": 0.06211673840880394, + "loss_ce": 0.0013867560774087906, + "loss_iou": 0.48828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 589649864, + "step": 3433 + }, + { + "epoch": 0.9032682317353851, + "grad_norm": 5.45319787734761, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 589822176, + "step": 3434 + }, + { + "epoch": 0.9032682317353851, + "loss": 0.06987213343381882, + "loss_ce": 0.0025808701757341623, + "loss_iou": 0.58984375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 589822176, + "step": 3434 + }, + { + "epoch": 0.9035312684947722, + "grad_norm": 35.99527436418574, + "learning_rate": 5e-06, + "loss": 0.1639, + "num_input_tokens_seen": 589992448, + "step": 3435 + }, + { + "epoch": 0.9035312684947722, + "loss": 0.2300114631652832, + "loss_ce": 0.0013890261761844158, + "loss_iou": 0.66796875, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 589992448, + "step": 3435 + }, + { + "epoch": 0.9037943052541593, + "grad_norm": 8.340774703127748, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 590162576, + "step": 3436 + }, + { + "epoch": 0.9037943052541593, + "loss": 0.09980174899101257, + "loss_ce": 0.001687736832536757, + "loss_iou": 0.478515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 590162576, + "step": 3436 + }, + { + "epoch": 0.9040573420135464, + "grad_norm": 3.893496744409823, + "learning_rate": 5e-06, + "loss": 0.1361, + "num_input_tokens_seen": 590334768, + "step": 3437 + }, + { + "epoch": 0.9040573420135464, + "loss": 0.14963126182556152, + "loss_ce": 0.002658609300851822, + "loss_iou": 0.443359375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 590334768, + "step": 3437 + }, + { + "epoch": 0.9043203787729335, + "grad_norm": 4.512105245241041, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 590506964, + "step": 3438 + }, + { + "epoch": 0.9043203787729335, + "loss": 0.2095954418182373, + "loss_ce": 0.0010688342154026031, + "loss_iou": 0.423828125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 590506964, + "step": 3438 + }, + { + "epoch": 0.9045834155323207, + "grad_norm": 10.79594707291913, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 590679132, + "step": 3439 + }, + { + "epoch": 0.9045834155323207, + "loss": 0.056604690849781036, + "loss_ce": 0.0010932188015431166, + "loss_iou": 0.470703125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 590679132, + "step": 3439 + }, + { + "epoch": 0.9048464522917078, + "grad_norm": 7.232598771359226, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 590851488, + "step": 3440 + }, + { + "epoch": 0.9048464522917078, + "loss": 0.07088696211576462, + "loss_ce": 0.0023749994579702616, + "loss_iou": 0.396484375, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 590851488, + "step": 3440 + }, + { + "epoch": 0.9051094890510949, + "grad_norm": 5.761072356819141, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 591023672, + "step": 3441 + }, + { + "epoch": 0.9051094890510949, + "loss": 0.05779781565070152, + "loss_ce": 0.002149013802409172, + "loss_iou": 0.48828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 591023672, + "step": 3441 + }, + { + "epoch": 0.905372525810482, + "grad_norm": 5.464589960258587, + "learning_rate": 5e-06, + "loss": 0.0756, + "num_input_tokens_seen": 591196064, + "step": 3442 + }, + { + "epoch": 0.905372525810482, + "loss": 0.11280819773674011, + "loss_ce": 0.004058802034705877, + "loss_iou": 0.392578125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 591196064, + "step": 3442 + }, + { + "epoch": 0.9056355625698691, + "grad_norm": 10.0338321043045, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 591368108, + "step": 3443 + }, + { + "epoch": 0.9056355625698691, + "loss": 0.11732570827007294, + "loss_ce": 0.0005044231074862182, + "loss_iou": 0.3671875, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 591368108, + "step": 3443 + }, + { + "epoch": 0.9058985993292563, + "grad_norm": 4.9322678909564805, + "learning_rate": 5e-06, + "loss": 0.1497, + "num_input_tokens_seen": 591539968, + "step": 3444 + }, + { + "epoch": 0.9058985993292563, + "loss": 0.10426676273345947, + "loss_ce": 0.0025516818277537823, + "loss_iou": 0.478515625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 591539968, + "step": 3444 + }, + { + "epoch": 0.9061616360886434, + "grad_norm": 10.20363728877728, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 591709020, + "step": 3445 + }, + { + "epoch": 0.9061616360886434, + "loss": 0.049313947558403015, + "loss_ce": 0.0002569416828919202, + "loss_iou": 0.515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 591709020, + "step": 3445 + }, + { + "epoch": 0.9064246728480305, + "grad_norm": 8.985092426433443, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 591881092, + "step": 3446 + }, + { + "epoch": 0.9064246728480305, + "loss": 0.0865587666630745, + "loss_ce": 0.0004686739994212985, + "loss_iou": 0.462890625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 591881092, + "step": 3446 + }, + { + "epoch": 0.9066877096074176, + "grad_norm": 27.73232811749845, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 592053524, + "step": 3447 + }, + { + "epoch": 0.9066877096074176, + "loss": 0.10780518501996994, + "loss_ce": 0.0006884862086735666, + "loss_iou": 0.5, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 592053524, + "step": 3447 + }, + { + "epoch": 0.9069507463668047, + "grad_norm": 11.824522545760333, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 592225468, + "step": 3448 + }, + { + "epoch": 0.9069507463668047, + "loss": 0.0678405836224556, + "loss_ce": 0.002670294838026166, + "loss_iou": 0.546875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 592225468, + "step": 3448 + }, + { + "epoch": 0.9072137831261918, + "grad_norm": 5.549108113367569, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 592397784, + "step": 3449 + }, + { + "epoch": 0.9072137831261918, + "loss": 0.06828893721103668, + "loss_ce": 0.0015164725482463837, + "loss_iou": 0.482421875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 592397784, + "step": 3449 + }, + { + "epoch": 0.907476819885579, + "grad_norm": 34.02061333154032, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 592569716, + "step": 3450 + }, + { + "epoch": 0.907476819885579, + "loss": 0.18558475375175476, + "loss_ce": 0.002723418176174164, + "loss_iou": 0.4296875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 592569716, + "step": 3450 + }, + { + "epoch": 0.9077398566449661, + "grad_norm": 8.419964301658604, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 592742332, + "step": 3451 + }, + { + "epoch": 0.9077398566449661, + "loss": 0.10704197734594345, + "loss_ce": 0.0033432499039918184, + "loss_iou": 0.447265625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 592742332, + "step": 3451 + }, + { + "epoch": 0.9080028934043533, + "grad_norm": 6.543315599280218, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 592914548, + "step": 3452 + }, + { + "epoch": 0.9080028934043533, + "loss": 0.1184120774269104, + "loss_ce": 0.00665670819580555, + "loss_iou": 0.482421875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 592914548, + "step": 3452 + }, + { + "epoch": 0.9082659301637404, + "grad_norm": 17.663325985728193, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 593086732, + "step": 3453 + }, + { + "epoch": 0.9082659301637404, + "loss": 0.10038851201534271, + "loss_ce": 0.0001993027253774926, + "loss_iou": 0.48046875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 593086732, + "step": 3453 + }, + { + "epoch": 0.9085289669231275, + "grad_norm": 4.446508836333715, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 593257004, + "step": 3454 + }, + { + "epoch": 0.9085289669231275, + "loss": 0.10031658411026001, + "loss_ce": 0.0005546216852962971, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 593257004, + "step": 3454 + }, + { + "epoch": 0.9087920036825147, + "grad_norm": 33.88108016172298, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 593429128, + "step": 3455 + }, + { + "epoch": 0.9087920036825147, + "loss": 0.08902574330568314, + "loss_ce": 0.0005247698863968253, + "loss_iou": 0.326171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 593429128, + "step": 3455 + }, + { + "epoch": 0.9090550404419018, + "grad_norm": 5.647244857793579, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 593601516, + "step": 3456 + }, + { + "epoch": 0.9090550404419018, + "loss": 0.05231146514415741, + "loss_ce": 0.001621769741177559, + "loss_iou": 0.42578125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 593601516, + "step": 3456 + }, + { + "epoch": 0.9093180772012889, + "grad_norm": 9.778948998175075, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 593773792, + "step": 3457 + }, + { + "epoch": 0.9093180772012889, + "loss": 0.09074349701404572, + "loss_ce": 0.0006250919541344047, + "loss_iou": 0.3359375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 593773792, + "step": 3457 + }, + { + "epoch": 0.909581113960676, + "grad_norm": 8.900238633139468, + "learning_rate": 5e-06, + "loss": 0.0842, + "num_input_tokens_seen": 593945832, + "step": 3458 + }, + { + "epoch": 0.909581113960676, + "loss": 0.10212016105651855, + "loss_ce": 0.0015037069097161293, + "loss_iou": 0.515625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 593945832, + "step": 3458 + }, + { + "epoch": 0.9098441507200631, + "grad_norm": 5.072537904030542, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 594118188, + "step": 3459 + }, + { + "epoch": 0.9098441507200631, + "loss": 0.051810335367918015, + "loss_ce": 0.0029516899958252907, + "loss_iou": 0.41796875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 594118188, + "step": 3459 + }, + { + "epoch": 0.9101071874794503, + "grad_norm": 4.7625991934389305, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 594290712, + "step": 3460 + }, + { + "epoch": 0.9101071874794503, + "loss": 0.07667580991983414, + "loss_ce": 0.00038186419988051057, + "loss_iou": 0.49609375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 594290712, + "step": 3460 + }, + { + "epoch": 0.9103702242388374, + "grad_norm": 10.907454103811546, + "learning_rate": 5e-06, + "loss": 0.1191, + "num_input_tokens_seen": 594461168, + "step": 3461 + }, + { + "epoch": 0.9103702242388374, + "loss": 0.07658274471759796, + "loss_ce": 0.00019724905723705888, + "loss_iou": 0.455078125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 594461168, + "step": 3461 + }, + { + "epoch": 0.9106332609982245, + "grad_norm": 6.097275668896845, + "learning_rate": 5e-06, + "loss": 0.1622, + "num_input_tokens_seen": 594633236, + "step": 3462 + }, + { + "epoch": 0.9106332609982245, + "loss": 0.16042682528495789, + "loss_ce": 0.0004689389606937766, + "loss_iou": 0.43359375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 594633236, + "step": 3462 + }, + { + "epoch": 0.9108962977576116, + "grad_norm": 4.28871295829322, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 594805240, + "step": 3463 + }, + { + "epoch": 0.9108962977576116, + "loss": 0.10126248002052307, + "loss_ce": 0.0024465657770633698, + "loss_iou": 0.3828125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 594805240, + "step": 3463 + }, + { + "epoch": 0.9111593345169987, + "grad_norm": 29.617769917448427, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 594977116, + "step": 3464 + }, + { + "epoch": 0.9111593345169987, + "loss": 0.08838079869747162, + "loss_ce": 0.0014514753129333258, + "loss_iou": 0.421875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 594977116, + "step": 3464 + }, + { + "epoch": 0.9114223712763859, + "grad_norm": 12.912626025390432, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 595148996, + "step": 3465 + }, + { + "epoch": 0.9114223712763859, + "loss": 0.05163312330842018, + "loss_ce": 0.0015690373256802559, + "loss_iou": 0.55859375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 595148996, + "step": 3465 + }, + { + "epoch": 0.911685408035773, + "grad_norm": 3.7162474056368, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 595321500, + "step": 3466 + }, + { + "epoch": 0.911685408035773, + "loss": 0.12145687639713287, + "loss_ce": 0.0029113469645380974, + "loss_iou": 0.3828125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 595321500, + "step": 3466 + }, + { + "epoch": 0.9119484447951601, + "grad_norm": 26.611038275341258, + "learning_rate": 5e-06, + "loss": 0.0815, + "num_input_tokens_seen": 595493996, + "step": 3467 + }, + { + "epoch": 0.9119484447951601, + "loss": 0.08623991906642914, + "loss_ce": 8.879496454028413e-05, + "loss_iou": 0.369140625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 595493996, + "step": 3467 + }, + { + "epoch": 0.9122114815545472, + "grad_norm": 3.8019745447441835, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 595666232, + "step": 3468 + }, + { + "epoch": 0.9122114815545472, + "loss": 0.10731638222932816, + "loss_ce": 0.0009015857940539718, + "loss_iou": 0.546875, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 595666232, + "step": 3468 + }, + { + "epoch": 0.9124745183139343, + "grad_norm": 10.09490833472444, + "learning_rate": 5e-06, + "loss": 0.126, + "num_input_tokens_seen": 595838532, + "step": 3469 + }, + { + "epoch": 0.9124745183139343, + "loss": 0.16073833405971527, + "loss_ce": 0.0017417498165741563, + "loss_iou": 0.58984375, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 595838532, + "step": 3469 + }, + { + "epoch": 0.9127375550733215, + "grad_norm": 2.9635391739801356, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 596010728, + "step": 3470 + }, + { + "epoch": 0.9127375550733215, + "loss": 0.09987487643957138, + "loss_ce": 0.0005096413660794497, + "loss_iou": 0.5625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 596010728, + "step": 3470 + }, + { + "epoch": 0.9130005918327087, + "grad_norm": 5.819990486053208, + "learning_rate": 5e-06, + "loss": 0.077, + "num_input_tokens_seen": 596183036, + "step": 3471 + }, + { + "epoch": 0.9130005918327087, + "loss": 0.08611226826906204, + "loss_ce": 0.0005867574363946915, + "loss_iou": 0.4609375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 596183036, + "step": 3471 + }, + { + "epoch": 0.9132636285920958, + "grad_norm": 13.706964401152458, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 596353748, + "step": 3472 + }, + { + "epoch": 0.9132636285920958, + "loss": 0.11669529974460602, + "loss_ce": 0.007839101366698742, + "loss_iou": 0.490234375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 596353748, + "step": 3472 + }, + { + "epoch": 0.9135266653514829, + "grad_norm": 12.86793443272329, + "learning_rate": 5e-06, + "loss": 0.0992, + "num_input_tokens_seen": 596525804, + "step": 3473 + }, + { + "epoch": 0.9135266653514829, + "loss": 0.14639021456241608, + "loss_ce": 0.002057329285889864, + "loss_iou": 0.5390625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 596525804, + "step": 3473 + }, + { + "epoch": 0.91378970211087, + "grad_norm": 3.549687957056835, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 596698176, + "step": 3474 + }, + { + "epoch": 0.91378970211087, + "loss": 0.07896921038627625, + "loss_ce": 0.0026447533164173365, + "loss_iou": 0.37890625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 596698176, + "step": 3474 + }, + { + "epoch": 0.9140527388702571, + "grad_norm": 7.626577083859758, + "learning_rate": 5e-06, + "loss": 0.0851, + "num_input_tokens_seen": 596870272, + "step": 3475 + }, + { + "epoch": 0.9140527388702571, + "loss": 0.110136017203331, + "loss_ce": 0.000928854919038713, + "loss_iou": 0.453125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 596870272, + "step": 3475 + }, + { + "epoch": 0.9143157756296443, + "grad_norm": 4.749367923818828, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 597042624, + "step": 3476 + }, + { + "epoch": 0.9143157756296443, + "loss": 0.1048622876405716, + "loss_ce": 0.0006752688204869628, + "loss_iou": 0.48046875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 597042624, + "step": 3476 + }, + { + "epoch": 0.9145788123890314, + "grad_norm": 7.997778290032009, + "learning_rate": 5e-06, + "loss": 0.0919, + "num_input_tokens_seen": 597214744, + "step": 3477 + }, + { + "epoch": 0.9145788123890314, + "loss": 0.0939270555973053, + "loss_ce": 0.00036016173544339836, + "loss_iou": 0.40234375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 597214744, + "step": 3477 + }, + { + "epoch": 0.9148418491484185, + "grad_norm": 9.635212836595876, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 597387080, + "step": 3478 + }, + { + "epoch": 0.9148418491484185, + "loss": 0.12316415458917618, + "loss_ce": 0.0010938385967165232, + "loss_iou": 0.68359375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 597387080, + "step": 3478 + }, + { + "epoch": 0.9151048859078056, + "grad_norm": 3.963240318321649, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 597559392, + "step": 3479 + }, + { + "epoch": 0.9151048859078056, + "loss": 0.05641660839319229, + "loss_ce": 0.0005084058502689004, + "loss_iou": 0.498046875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 597559392, + "step": 3479 + }, + { + "epoch": 0.9153679226671927, + "grad_norm": 3.4338009584504947, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 597731528, + "step": 3480 + }, + { + "epoch": 0.9153679226671927, + "loss": 0.09766215085983276, + "loss_ce": 0.001623332966119051, + "loss_iou": 0.443359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 597731528, + "step": 3480 + }, + { + "epoch": 0.9156309594265799, + "grad_norm": 7.619842626421138, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 597903520, + "step": 3481 + }, + { + "epoch": 0.9156309594265799, + "loss": 0.04850924387574196, + "loss_ce": 0.0030533126555383205, + "loss_iou": 0.3984375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 597903520, + "step": 3481 + }, + { + "epoch": 0.915893996185967, + "grad_norm": 3.2410150384393765, + "learning_rate": 5e-06, + "loss": 0.0627, + "num_input_tokens_seen": 598075560, + "step": 3482 + }, + { + "epoch": 0.915893996185967, + "loss": 0.06452830880880356, + "loss_ce": 0.00015147785597946495, + "loss_iou": NaN, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 598075560, + "step": 3482 + }, + { + "epoch": 0.9161570329453541, + "grad_norm": 3.618001442656621, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 598247796, + "step": 3483 + }, + { + "epoch": 0.9161570329453541, + "loss": 0.07420238852500916, + "loss_ce": 0.00047191951307468116, + "loss_iou": 0.51171875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 598247796, + "step": 3483 + }, + { + "epoch": 0.9164200697047412, + "grad_norm": 10.045861260608898, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 598419940, + "step": 3484 + }, + { + "epoch": 0.9164200697047412, + "loss": 0.1423761397600174, + "loss_ce": 0.0001794886775314808, + "loss_iou": NaN, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 598419940, + "step": 3484 + }, + { + "epoch": 0.9166831064641283, + "grad_norm": 13.006322984837828, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 598592140, + "step": 3485 + }, + { + "epoch": 0.9166831064641283, + "loss": 0.08574345707893372, + "loss_ce": 0.0002637250581756234, + "loss_iou": 0.6640625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 598592140, + "step": 3485 + }, + { + "epoch": 0.9169461432235155, + "grad_norm": 7.24999783542604, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 598764500, + "step": 3486 + }, + { + "epoch": 0.9169461432235155, + "loss": 0.0942230224609375, + "loss_ce": 0.002975467825308442, + "loss_iou": 0.41796875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 598764500, + "step": 3486 + }, + { + "epoch": 0.9172091799829026, + "grad_norm": 4.120990685906085, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 598936608, + "step": 3487 + }, + { + "epoch": 0.9172091799829026, + "loss": 0.08670195192098618, + "loss_ce": 0.0005203153123147786, + "loss_iou": 0.6171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 598936608, + "step": 3487 + }, + { + "epoch": 0.9174722167422897, + "grad_norm": 7.691079480924726, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 599109116, + "step": 3488 + }, + { + "epoch": 0.9174722167422897, + "loss": 0.07726689428091049, + "loss_ce": 0.0006982971681281924, + "loss_iou": 0.48828125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 599109116, + "step": 3488 + }, + { + "epoch": 0.9177352535016768, + "grad_norm": 5.641902324014326, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 599281296, + "step": 3489 + }, + { + "epoch": 0.9177352535016768, + "loss": 0.07277127355337143, + "loss_ce": 0.0036947373300790787, + "loss_iou": 0.375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 599281296, + "step": 3489 + }, + { + "epoch": 0.9179982902610639, + "grad_norm": 5.439497936856968, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 599450632, + "step": 3490 + }, + { + "epoch": 0.9179982902610639, + "loss": 0.08008137345314026, + "loss_ce": 0.0042757089249789715, + "loss_iou": 0.44140625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 599450632, + "step": 3490 + }, + { + "epoch": 0.9182613270204512, + "grad_norm": 3.272614734962344, + "learning_rate": 5e-06, + "loss": 0.1474, + "num_input_tokens_seen": 599622648, + "step": 3491 + }, + { + "epoch": 0.9182613270204512, + "loss": 0.21320411562919617, + "loss_ce": 0.002327651483938098, + "loss_iou": 0.296875, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 599622648, + "step": 3491 + }, + { + "epoch": 0.9185243637798383, + "grad_norm": 4.968216314604122, + "learning_rate": 5e-06, + "loss": 0.0668, + "num_input_tokens_seen": 599793016, + "step": 3492 + }, + { + "epoch": 0.9185243637798383, + "loss": 0.05503164976835251, + "loss_ce": 0.001076570013538003, + "loss_iou": 0.435546875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 599793016, + "step": 3492 + }, + { + "epoch": 0.9187874005392254, + "grad_norm": 13.600408653665847, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 599965196, + "step": 3493 + }, + { + "epoch": 0.9187874005392254, + "loss": 0.2306230664253235, + "loss_ce": 0.002626231173053384, + "loss_iou": 0.453125, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 599965196, + "step": 3493 + }, + { + "epoch": 0.9190504372986125, + "grad_norm": 13.080198547497895, + "learning_rate": 5e-06, + "loss": 0.1038, + "num_input_tokens_seen": 600137328, + "step": 3494 + }, + { + "epoch": 0.9190504372986125, + "loss": 0.0880010575056076, + "loss_ce": 0.0009954443667083979, + "loss_iou": 0.486328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 600137328, + "step": 3494 + }, + { + "epoch": 0.9193134740579996, + "grad_norm": 6.526249469256959, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 600309400, + "step": 3495 + }, + { + "epoch": 0.9193134740579996, + "loss": 0.11348491907119751, + "loss_ce": 0.000966615742072463, + "loss_iou": 0.515625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 600309400, + "step": 3495 + }, + { + "epoch": 0.9195765108173868, + "grad_norm": 8.360743966936655, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 600481616, + "step": 3496 + }, + { + "epoch": 0.9195765108173868, + "loss": 0.09168469160795212, + "loss_ce": 0.0012611029669642448, + "loss_iou": 0.5390625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 600481616, + "step": 3496 + }, + { + "epoch": 0.9198395475767739, + "grad_norm": 6.554644415896188, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 600653744, + "step": 3497 + }, + { + "epoch": 0.9198395475767739, + "loss": 0.11767074465751648, + "loss_ce": 0.0016429107636213303, + "loss_iou": 0.59375, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 600653744, + "step": 3497 + }, + { + "epoch": 0.920102584336161, + "grad_norm": 16.679597431920058, + "learning_rate": 5e-06, + "loss": 0.0954, + "num_input_tokens_seen": 600824128, + "step": 3498 + }, + { + "epoch": 0.920102584336161, + "loss": 0.14813083410263062, + "loss_ce": 0.0007919695926830173, + "loss_iou": NaN, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 600824128, + "step": 3498 + }, + { + "epoch": 0.9203656210955481, + "grad_norm": 4.858084855581349, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 600996452, + "step": 3499 + }, + { + "epoch": 0.9203656210955481, + "loss": 0.10890492051839828, + "loss_ce": 0.0007811367395333946, + "loss_iou": 0.5625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 600996452, + "step": 3499 + }, + { + "epoch": 0.9206286578549352, + "grad_norm": 5.0714335035388896, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_websight_new_CIoU": 0.8971402049064636, + "eval_websight_new_GIoU": 0.8995657861232758, + "eval_websight_new_IoU": 0.9006073176860809, + "eval_websight_new_MAE_all": 0.013850971590727568, + "eval_websight_new_MAE_h": 0.00682047987356782, + "eval_websight_new_MAE_w": 0.02169650699943304, + "eval_websight_new_MAE_x": 0.022679010406136513, + "eval_websight_new_MAE_y": 0.004207887570373714, + "eval_websight_new_NUM_probability": 0.9999927878379822, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.07072407752275467, + "eval_websight_new_loss_ce": 9.538403446640586e-06, + "eval_websight_new_loss_iou": 0.32720947265625, + "eval_websight_new_loss_num": 0.012699127197265625, + "eval_websight_new_loss_xval": 0.06354522705078125, + "eval_websight_new_runtime": 56.2114, + "eval_websight_new_samples_per_second": 0.889, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_seeclick_CIoU": 0.6764970123767853, + "eval_seeclick_GIoU": 0.6781544089317322, + "eval_seeclick_IoU": 0.6978051662445068, + "eval_seeclick_MAE_all": 0.040375180542469025, + "eval_seeclick_MAE_h": 0.021458005532622337, + "eval_seeclick_MAE_w": 0.05631308630108833, + "eval_seeclick_MAE_x": 0.06287308409810066, + "eval_seeclick_MAE_y": 0.02085655089467764, + "eval_seeclick_NUM_probability": 0.9999865889549255, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.17490381002426147, + "eval_seeclick_loss_ce": 0.008978934027254581, + "eval_seeclick_loss_iou": 0.46295166015625, + "eval_seeclick_loss_num": 0.031360626220703125, + "eval_seeclick_loss_xval": 0.1567230224609375, + "eval_seeclick_runtime": 74.0396, + "eval_seeclick_samples_per_second": 0.581, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_icons_CIoU": 0.8881092965602875, + "eval_icons_GIoU": 0.8860294818878174, + "eval_icons_IoU": 0.8926480114459991, + "eval_icons_MAE_all": 0.0159080708399415, + "eval_icons_MAE_h": 0.020047838799655437, + "eval_icons_MAE_w": 0.015083736274391413, + "eval_icons_MAE_x": 0.013403147924691439, + "eval_icons_MAE_y": 0.015097561292350292, + "eval_icons_NUM_probability": 0.9999927878379822, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.060060471296310425, + "eval_icons_loss_ce": 9.6267791604987e-06, + "eval_icons_loss_iou": 0.605712890625, + "eval_icons_loss_num": 0.01105499267578125, + "eval_icons_loss_xval": 0.05532073974609375, + "eval_icons_runtime": 80.8235, + "eval_icons_samples_per_second": 0.619, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_screenspot_CIoU": 0.5590948661168417, + "eval_screenspot_GIoU": 0.5591723521550497, + "eval_screenspot_IoU": 0.5979611476262411, + "eval_screenspot_MAE_all": 0.08105809738238652, + "eval_screenspot_MAE_h": 0.05730322003364563, + "eval_screenspot_MAE_w": 0.1365982194741567, + "eval_screenspot_MAE_x": 0.0790914719303449, + "eval_screenspot_MAE_y": 0.05123948057492574, + "eval_screenspot_NUM_probability": 0.9999733567237854, + "eval_screenspot_inside_bbox": 0.8841666579246521, + "eval_screenspot_loss": 0.9397080540657043, + "eval_screenspot_loss_ce": 0.5959697167078654, + "eval_screenspot_loss_iou": 0.539306640625, + "eval_screenspot_loss_num": 0.06738789876302083, + "eval_screenspot_loss_xval": 0.3368326822916667, + "eval_screenspot_runtime": 151.1273, + "eval_screenspot_samples_per_second": 0.589, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "loss": 0.9230542182922363, + "loss_ce": 0.5880932807922363, + "loss_iou": 0.435546875, + "loss_num": 0.06689453125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9208916946143223, + "grad_norm": 3.0758731887113147, + "learning_rate": 5e-06, + "loss": 0.0803, + "num_input_tokens_seen": 601340840, + "step": 3501 + }, + { + "epoch": 0.9208916946143223, + "loss": 0.07921823859214783, + "loss_ce": 0.0005973259685561061, + "loss_iou": 0.474609375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 601340840, + "step": 3501 + }, + { + "epoch": 0.9211547313737095, + "grad_norm": 12.765305175609484, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 601513112, + "step": 3502 + }, + { + "epoch": 0.9211547313737095, + "loss": 0.13090460002422333, + "loss_ce": 0.002868090523406863, + "loss_iou": 0.35546875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 601513112, + "step": 3502 + }, + { + "epoch": 0.9214177681330966, + "grad_norm": 3.071857580938524, + "learning_rate": 5e-06, + "loss": 0.0751, + "num_input_tokens_seen": 601685348, + "step": 3503 + }, + { + "epoch": 0.9214177681330966, + "loss": 0.11101450026035309, + "loss_ce": 0.002280366839841008, + "loss_iou": 0.48046875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 601685348, + "step": 3503 + }, + { + "epoch": 0.9216808048924837, + "grad_norm": 3.8727649532640296, + "learning_rate": 5e-06, + "loss": 0.0905, + "num_input_tokens_seen": 601857460, + "step": 3504 + }, + { + "epoch": 0.9216808048924837, + "loss": 0.0590723380446434, + "loss_ce": 0.0022028274834156036, + "loss_iou": 0.421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 601857460, + "step": 3504 + }, + { + "epoch": 0.9219438416518708, + "grad_norm": 12.18479200262375, + "learning_rate": 5e-06, + "loss": 0.0989, + "num_input_tokens_seen": 602029992, + "step": 3505 + }, + { + "epoch": 0.9219438416518708, + "loss": 0.12037432193756104, + "loss_ce": 0.0029426885303109884, + "loss_iou": 0.5625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 602029992, + "step": 3505 + }, + { + "epoch": 0.9222068784112579, + "grad_norm": 4.289786395943063, + "learning_rate": 5e-06, + "loss": 0.1091, + "num_input_tokens_seen": 602202184, + "step": 3506 + }, + { + "epoch": 0.9222068784112579, + "loss": 0.16438013315200806, + "loss_ce": 0.00022608340077567846, + "loss_iou": 0.453125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 602202184, + "step": 3506 + }, + { + "epoch": 0.9224699151706451, + "grad_norm": 27.091794274961455, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 602372792, + "step": 3507 + }, + { + "epoch": 0.9224699151706451, + "loss": 0.081387460231781, + "loss_ce": 0.0013398483861237764, + "loss_iou": 0.640625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 602372792, + "step": 3507 + }, + { + "epoch": 0.9227329519300322, + "grad_norm": 8.942524792614973, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 602542048, + "step": 3508 + }, + { + "epoch": 0.9227329519300322, + "loss": 0.11642280220985413, + "loss_ce": 0.005247259978204966, + "loss_iou": 0.52734375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 602542048, + "step": 3508 + }, + { + "epoch": 0.9229959886894193, + "grad_norm": 5.673853173524807, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 602714148, + "step": 3509 + }, + { + "epoch": 0.9229959886894193, + "loss": 0.22935867309570312, + "loss_ce": 0.0008430513553321362, + "loss_iou": 0.396484375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 602714148, + "step": 3509 + }, + { + "epoch": 0.9232590254488064, + "grad_norm": 4.438849137936148, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 602886264, + "step": 3510 + }, + { + "epoch": 0.9232590254488064, + "loss": 0.1615859568119049, + "loss_ce": 0.0012771158944815397, + "loss_iou": 0.376953125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 602886264, + "step": 3510 + }, + { + "epoch": 0.9235220622081936, + "grad_norm": 4.872038444582204, + "learning_rate": 5e-06, + "loss": 0.1038, + "num_input_tokens_seen": 603058732, + "step": 3511 + }, + { + "epoch": 0.9235220622081936, + "loss": 0.13943278789520264, + "loss_ce": 0.0005015181959606707, + "loss_iou": 0.423828125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 603058732, + "step": 3511 + }, + { + "epoch": 0.9237850989675808, + "grad_norm": 4.92552213324199, + "learning_rate": 5e-06, + "loss": 0.1144, + "num_input_tokens_seen": 603230928, + "step": 3512 + }, + { + "epoch": 0.9237850989675808, + "loss": 0.14707276225090027, + "loss_ce": 0.0023278831504285336, + "loss_iou": 0.40234375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 603230928, + "step": 3512 + }, + { + "epoch": 0.9240481357269679, + "grad_norm": 8.817456239528582, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 603403300, + "step": 3513 + }, + { + "epoch": 0.9240481357269679, + "loss": 0.12186002731323242, + "loss_ce": 0.0018343898700550199, + "loss_iou": 0.5625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 603403300, + "step": 3513 + }, + { + "epoch": 0.924311172486355, + "grad_norm": 10.676170008596674, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 603575704, + "step": 3514 + }, + { + "epoch": 0.924311172486355, + "loss": 0.15692317485809326, + "loss_ce": 0.004670977126806974, + "loss_iou": NaN, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 603575704, + "step": 3514 + }, + { + "epoch": 0.9245742092457421, + "grad_norm": 11.658802940011123, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 603745852, + "step": 3515 + }, + { + "epoch": 0.9245742092457421, + "loss": 0.08981953561306, + "loss_ce": 0.00015888996131252497, + "loss_iou": 0.53515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 603745852, + "step": 3515 + }, + { + "epoch": 0.9248372460051292, + "grad_norm": 11.699348951917594, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 603918360, + "step": 3516 + }, + { + "epoch": 0.9248372460051292, + "loss": 0.1186264157295227, + "loss_ce": 0.0009811592753976583, + "loss_iou": 0.416015625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 603918360, + "step": 3516 + }, + { + "epoch": 0.9251002827645164, + "grad_norm": 16.59746302136246, + "learning_rate": 5e-06, + "loss": 0.1447, + "num_input_tokens_seen": 604090468, + "step": 3517 + }, + { + "epoch": 0.9251002827645164, + "loss": 0.15999768674373627, + "loss_ce": 0.0011536948150023818, + "loss_iou": 0.5234375, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 604090468, + "step": 3517 + }, + { + "epoch": 0.9253633195239035, + "grad_norm": 7.69976984803875, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 604262492, + "step": 3518 + }, + { + "epoch": 0.9253633195239035, + "loss": 0.14189431071281433, + "loss_ce": 0.006487809121608734, + "loss_iou": 0.470703125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 604262492, + "step": 3518 + }, + { + "epoch": 0.9256263562832906, + "grad_norm": 12.75000933447967, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 604432856, + "step": 3519 + }, + { + "epoch": 0.9256263562832906, + "loss": 0.0861673578619957, + "loss_ce": 0.004319215193390846, + "loss_iou": 0.484375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 604432856, + "step": 3519 + }, + { + "epoch": 0.9258893930426777, + "grad_norm": 5.042427155046337, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 604605056, + "step": 3520 + }, + { + "epoch": 0.9258893930426777, + "loss": 0.1412590742111206, + "loss_ce": 0.002571945311501622, + "loss_iou": 0.455078125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 604605056, + "step": 3520 + }, + { + "epoch": 0.9261524298020648, + "grad_norm": 5.26588732086772, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 604775372, + "step": 3521 + }, + { + "epoch": 0.9261524298020648, + "loss": 0.05718105286359787, + "loss_ce": 0.0014254315756261349, + "loss_iou": 0.54296875, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 604775372, + "step": 3521 + }, + { + "epoch": 0.926415466561452, + "grad_norm": 6.371747445648948, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 604947376, + "step": 3522 + }, + { + "epoch": 0.926415466561452, + "loss": 0.08819465339183807, + "loss_ce": 0.0007312724483199418, + "loss_iou": 0.322265625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 604947376, + "step": 3522 + }, + { + "epoch": 0.9266785033208391, + "grad_norm": 10.505886717736445, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 605119568, + "step": 3523 + }, + { + "epoch": 0.9266785033208391, + "loss": 0.17979584634304047, + "loss_ce": 0.004075629636645317, + "loss_iou": 0.609375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 605119568, + "step": 3523 + }, + { + "epoch": 0.9269415400802262, + "grad_norm": 6.455190531468335, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 605291524, + "step": 3524 + }, + { + "epoch": 0.9269415400802262, + "loss": 0.08140784502029419, + "loss_ce": 0.0016959276981651783, + "loss_iou": 0.51171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 605291524, + "step": 3524 + }, + { + "epoch": 0.9272045768396133, + "grad_norm": 14.745208066661506, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 605463808, + "step": 3525 + }, + { + "epoch": 0.9272045768396133, + "loss": 0.11558607965707779, + "loss_ce": 0.0017555101076141, + "loss_iou": 0.51171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 605463808, + "step": 3525 + }, + { + "epoch": 0.9274676135990004, + "grad_norm": 3.8942399340921825, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 605635872, + "step": 3526 + }, + { + "epoch": 0.9274676135990004, + "loss": 0.06215044856071472, + "loss_ce": 0.00019976735347881913, + "loss_iou": 0.546875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 605635872, + "step": 3526 + }, + { + "epoch": 0.9277306503583875, + "grad_norm": 4.175922010381311, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 605807804, + "step": 3527 + }, + { + "epoch": 0.9277306503583875, + "loss": 0.16303026676177979, + "loss_ce": 0.0018974501872435212, + "loss_iou": 0.482421875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 605807804, + "step": 3527 + }, + { + "epoch": 0.9279936871177747, + "grad_norm": 3.844069149031415, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 605979952, + "step": 3528 + }, + { + "epoch": 0.9279936871177747, + "loss": 0.0688394084572792, + "loss_ce": 0.0023721237666904926, + "loss_iou": 0.4609375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 605979952, + "step": 3528 + }, + { + "epoch": 0.9282567238771618, + "grad_norm": 4.202436418234633, + "learning_rate": 5e-06, + "loss": 0.081, + "num_input_tokens_seen": 606150360, + "step": 3529 + }, + { + "epoch": 0.9282567238771618, + "loss": 0.04098789393901825, + "loss_ce": 0.00038425601087510586, + "loss_iou": 0.5, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 606150360, + "step": 3529 + }, + { + "epoch": 0.928519760636549, + "grad_norm": 3.743613714946928, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 606320544, + "step": 3530 + }, + { + "epoch": 0.928519760636549, + "loss": 0.12318438291549683, + "loss_ce": 0.003891176311299205, + "loss_iou": 0.357421875, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 606320544, + "step": 3530 + }, + { + "epoch": 0.928782797395936, + "grad_norm": 7.541725516396188, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 606492452, + "step": 3531 + }, + { + "epoch": 0.928782797395936, + "loss": 0.05570812523365021, + "loss_ce": 0.0006238996866159141, + "loss_iou": 0.66796875, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 606492452, + "step": 3531 + }, + { + "epoch": 0.9290458341553232, + "grad_norm": 7.283637997577956, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 606664904, + "step": 3532 + }, + { + "epoch": 0.9290458341553232, + "loss": 0.2060985267162323, + "loss_ce": 0.0019969542045146227, + "loss_iou": 0.40625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 606664904, + "step": 3532 + }, + { + "epoch": 0.9293088709147104, + "grad_norm": 3.96510750197539, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 606836960, + "step": 3533 + }, + { + "epoch": 0.9293088709147104, + "loss": 0.07297110557556152, + "loss_ce": 0.002353430027142167, + "loss_iou": 0.384765625, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 606836960, + "step": 3533 + }, + { + "epoch": 0.9295719076740975, + "grad_norm": 2.973198469624849, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 607009360, + "step": 3534 + }, + { + "epoch": 0.9295719076740975, + "loss": 0.13494953513145447, + "loss_ce": 0.0017403117381036282, + "loss_iou": 0.4375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 607009360, + "step": 3534 + }, + { + "epoch": 0.9298349444334846, + "grad_norm": 3.119729004971168, + "learning_rate": 5e-06, + "loss": 0.0664, + "num_input_tokens_seen": 607181304, + "step": 3535 + }, + { + "epoch": 0.9298349444334846, + "loss": 0.06605780124664307, + "loss_ce": 0.002443910576403141, + "loss_iou": 0.421875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 607181304, + "step": 3535 + }, + { + "epoch": 0.9300979811928717, + "grad_norm": 4.352773383034863, + "learning_rate": 5e-06, + "loss": 0.0763, + "num_input_tokens_seen": 607353736, + "step": 3536 + }, + { + "epoch": 0.9300979811928717, + "loss": 0.06802303344011307, + "loss_ce": 0.001021688454784453, + "loss_iou": 0.4296875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 607353736, + "step": 3536 + }, + { + "epoch": 0.9303610179522588, + "grad_norm": 3.9578360400969794, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 607525920, + "step": 3537 + }, + { + "epoch": 0.9303610179522588, + "loss": 0.06541258096694946, + "loss_ce": 0.00288206129334867, + "loss_iou": 0.5703125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 607525920, + "step": 3537 + }, + { + "epoch": 0.930624054711646, + "grad_norm": 4.492300233458511, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 607697980, + "step": 3538 + }, + { + "epoch": 0.930624054711646, + "loss": 0.09697936475276947, + "loss_ce": 0.0013983015669509768, + "loss_iou": 0.53515625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 607697980, + "step": 3538 + }, + { + "epoch": 0.9308870914710331, + "grad_norm": 4.302295585236548, + "learning_rate": 5e-06, + "loss": 0.1081, + "num_input_tokens_seen": 607870144, + "step": 3539 + }, + { + "epoch": 0.9308870914710331, + "loss": 0.10722782462835312, + "loss_ce": 0.000843545887619257, + "loss_iou": 0.49609375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 607870144, + "step": 3539 + }, + { + "epoch": 0.9311501282304202, + "grad_norm": 7.0499450082503365, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 608042240, + "step": 3540 + }, + { + "epoch": 0.9311501282304202, + "loss": 0.1015363559126854, + "loss_ce": 0.006443582940846682, + "loss_iou": 0.44140625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 608042240, + "step": 3540 + }, + { + "epoch": 0.9314131649898073, + "grad_norm": 6.586834009281099, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 608214284, + "step": 3541 + }, + { + "epoch": 0.9314131649898073, + "loss": 0.12908074259757996, + "loss_ce": 0.004782632924616337, + "loss_iou": 0.59765625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 608214284, + "step": 3541 + }, + { + "epoch": 0.9316762017491944, + "grad_norm": 4.53807614520661, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 608386616, + "step": 3542 + }, + { + "epoch": 0.9316762017491944, + "loss": 0.1289425790309906, + "loss_ce": 0.0027523916214704514, + "loss_iou": 0.458984375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 608386616, + "step": 3542 + }, + { + "epoch": 0.9319392385085816, + "grad_norm": 7.59401410995318, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 608558468, + "step": 3543 + }, + { + "epoch": 0.9319392385085816, + "loss": 0.16303950548171997, + "loss_ce": 0.00520259328186512, + "loss_iou": 0.4375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 608558468, + "step": 3543 + }, + { + "epoch": 0.9322022752679687, + "grad_norm": 8.796770729596103, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 608730916, + "step": 3544 + }, + { + "epoch": 0.9322022752679687, + "loss": 0.11866636574268341, + "loss_ce": 0.0005633389810100198, + "loss_iou": 0.341796875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 608730916, + "step": 3544 + }, + { + "epoch": 0.9324653120273558, + "grad_norm": 5.984167827436451, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 608902956, + "step": 3545 + }, + { + "epoch": 0.9324653120273558, + "loss": 0.09633171558380127, + "loss_ce": 0.000903245760127902, + "loss_iou": 0.55078125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 608902956, + "step": 3545 + }, + { + "epoch": 0.9327283487867429, + "grad_norm": 13.864562508217144, + "learning_rate": 5e-06, + "loss": 0.1121, + "num_input_tokens_seen": 609072616, + "step": 3546 + }, + { + "epoch": 0.9327283487867429, + "loss": 0.11617599427700043, + "loss_ce": 0.0005448899464681745, + "loss_iou": 0.54296875, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 609072616, + "step": 3546 + }, + { + "epoch": 0.93299138554613, + "grad_norm": 7.250119178294138, + "learning_rate": 5e-06, + "loss": 0.0744, + "num_input_tokens_seen": 609244840, + "step": 3547 + }, + { + "epoch": 0.93299138554613, + "loss": 0.08952006697654724, + "loss_ce": 0.001415827078744769, + "loss_iou": 0.498046875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 609244840, + "step": 3547 + }, + { + "epoch": 0.9332544223055173, + "grad_norm": 8.965223152196344, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 609417116, + "step": 3548 + }, + { + "epoch": 0.9332544223055173, + "loss": 0.058124981820583344, + "loss_ce": 0.0005077911773696542, + "loss_iou": 0.4921875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 609417116, + "step": 3548 + }, + { + "epoch": 0.9335174590649044, + "grad_norm": 4.578416828961302, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 609589092, + "step": 3549 + }, + { + "epoch": 0.9335174590649044, + "loss": 0.06900876015424728, + "loss_ce": 0.0006951588438823819, + "loss_iou": 0.671875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 609589092, + "step": 3549 + }, + { + "epoch": 0.9337804958242915, + "grad_norm": 8.028931966527075, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 609761432, + "step": 3550 + }, + { + "epoch": 0.9337804958242915, + "loss": 0.10969488322734833, + "loss_ce": 0.0008691949769854546, + "loss_iou": 0.32421875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 609761432, + "step": 3550 + }, + { + "epoch": 0.9340435325836786, + "grad_norm": 3.0608586378113225, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 609933500, + "step": 3551 + }, + { + "epoch": 0.9340435325836786, + "loss": 0.1336784064769745, + "loss_ce": 0.003475167090073228, + "loss_iou": 0.5546875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 609933500, + "step": 3551 + }, + { + "epoch": 0.9343065693430657, + "grad_norm": 6.227248956666904, + "learning_rate": 5e-06, + "loss": 0.0903, + "num_input_tokens_seen": 610105792, + "step": 3552 + }, + { + "epoch": 0.9343065693430657, + "loss": 0.08306320756673813, + "loss_ce": 0.007104953285306692, + "loss_iou": 0.453125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 610105792, + "step": 3552 + }, + { + "epoch": 0.9345696061024528, + "grad_norm": 5.469597791438911, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 610277880, + "step": 3553 + }, + { + "epoch": 0.9345696061024528, + "loss": 0.08409252762794495, + "loss_ce": 0.002595331287011504, + "loss_iou": 0.42578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 610277880, + "step": 3553 + }, + { + "epoch": 0.93483264286184, + "grad_norm": 4.1047405914838455, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 610449924, + "step": 3554 + }, + { + "epoch": 0.93483264286184, + "loss": 0.10268253833055496, + "loss_ce": 0.0010895198211073875, + "loss_iou": 0.5, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 610449924, + "step": 3554 + }, + { + "epoch": 0.9350956796212271, + "grad_norm": 4.649820698561932, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 610622052, + "step": 3555 + }, + { + "epoch": 0.9350956796212271, + "loss": 0.08575969934463501, + "loss_ce": 0.0018668812699615955, + "loss_iou": 0.337890625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 610622052, + "step": 3555 + }, + { + "epoch": 0.9353587163806142, + "grad_norm": 9.738163582969452, + "learning_rate": 5e-06, + "loss": 0.0872, + "num_input_tokens_seen": 610794168, + "step": 3556 + }, + { + "epoch": 0.9353587163806142, + "loss": 0.17649231851100922, + "loss_ce": 0.004281637258827686, + "loss_iou": 0.423828125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 610794168, + "step": 3556 + }, + { + "epoch": 0.9356217531400013, + "grad_norm": 60.60865892554417, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 610964304, + "step": 3557 + }, + { + "epoch": 0.9356217531400013, + "loss": 0.21576589345932007, + "loss_ce": 0.01731007918715477, + "loss_iou": 0.390625, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 610964304, + "step": 3557 + }, + { + "epoch": 0.9358847898993884, + "grad_norm": 26.41744180605267, + "learning_rate": 5e-06, + "loss": 0.278, + "num_input_tokens_seen": 611136572, + "step": 3558 + }, + { + "epoch": 0.9358847898993884, + "loss": 0.2503662705421448, + "loss_ce": 0.15902717411518097, + "loss_iou": 0.515625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 611136572, + "step": 3558 + }, + { + "epoch": 0.9361478266587756, + "grad_norm": 31.460812663910158, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 611308712, + "step": 3559 + }, + { + "epoch": 0.9361478266587756, + "loss": 0.10697901993989944, + "loss_ce": 0.014724383130669594, + "loss_iou": 0.59765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 611308712, + "step": 3559 + }, + { + "epoch": 0.9364108634181627, + "grad_norm": 125.0152995081179, + "learning_rate": 5e-06, + "loss": 0.2523, + "num_input_tokens_seen": 611481064, + "step": 3560 + }, + { + "epoch": 0.9364108634181627, + "loss": 0.19752028584480286, + "loss_ce": 0.055674582719802856, + "loss_iou": 0.6484375, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 611481064, + "step": 3560 + }, + { + "epoch": 0.9366739001775498, + "grad_norm": 42.388335248969774, + "learning_rate": 5e-06, + "loss": 0.6327, + "num_input_tokens_seen": 611653432, + "step": 3561 + }, + { + "epoch": 0.9366739001775498, + "loss": 0.6363104581832886, + "loss_ce": 0.5759619474411011, + "loss_iou": 0.546875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 611653432, + "step": 3561 + }, + { + "epoch": 0.9369369369369369, + "grad_norm": 34.483421318180156, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 611825552, + "step": 3562 + }, + { + "epoch": 0.9369369369369369, + "loss": 0.10395447909832001, + "loss_ce": 0.05541627109050751, + "loss_iou": 0.435546875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 611825552, + "step": 3562 + }, + { + "epoch": 0.937199973696324, + "grad_norm": 3.5466208788177513, + "learning_rate": 5e-06, + "loss": 0.0777, + "num_input_tokens_seen": 611997960, + "step": 3563 + }, + { + "epoch": 0.937199973696324, + "loss": 0.1155683621764183, + "loss_ce": 0.0075666578486561775, + "loss_iou": 0.47265625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 611997960, + "step": 3563 + }, + { + "epoch": 0.9374630104557112, + "grad_norm": 6.556188606634138, + "learning_rate": 5e-06, + "loss": 0.0901, + "num_input_tokens_seen": 612170180, + "step": 3564 + }, + { + "epoch": 0.9374630104557112, + "loss": 0.0452946312725544, + "loss_ce": 0.0018070839578285813, + "loss_iou": 0.455078125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 612170180, + "step": 3564 + }, + { + "epoch": 0.9377260472150983, + "grad_norm": 12.949003378413929, + "learning_rate": 5e-06, + "loss": 0.1504, + "num_input_tokens_seen": 612342360, + "step": 3565 + }, + { + "epoch": 0.9377260472150983, + "loss": 0.16198524832725525, + "loss_ce": 0.001249166438356042, + "loss_iou": 0.625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 612342360, + "step": 3565 + }, + { + "epoch": 0.9379890839744854, + "grad_norm": 5.1711873858566335, + "learning_rate": 5e-06, + "loss": 0.074, + "num_input_tokens_seen": 612514340, + "step": 3566 + }, + { + "epoch": 0.9379890839744854, + "loss": 0.050079330801963806, + "loss_ce": 0.0008087016176432371, + "loss_iou": 0.455078125, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 612514340, + "step": 3566 + }, + { + "epoch": 0.9382521207338725, + "grad_norm": 15.271109982643123, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 612686644, + "step": 3567 + }, + { + "epoch": 0.9382521207338725, + "loss": 0.051343828439712524, + "loss_ce": 0.0003336968075018376, + "loss_iou": 0.4609375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 612686644, + "step": 3567 + }, + { + "epoch": 0.9385151574932596, + "grad_norm": 6.894404249326668, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 612854000, + "step": 3568 + }, + { + "epoch": 0.9385151574932596, + "loss": 0.1327725201845169, + "loss_ce": 0.0005093337967991829, + "loss_iou": 0.46484375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 612854000, + "step": 3568 + }, + { + "epoch": 0.9387781942526469, + "grad_norm": 16.019697398064586, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 613026136, + "step": 3569 + }, + { + "epoch": 0.9387781942526469, + "loss": 0.14443224668502808, + "loss_ce": 0.0018541140016168356, + "loss_iou": 0.4453125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 613026136, + "step": 3569 + }, + { + "epoch": 0.939041231012034, + "grad_norm": 2.778217951943394, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 613198388, + "step": 3570 + }, + { + "epoch": 0.939041231012034, + "loss": 0.05536004900932312, + "loss_ce": 0.0027019698172807693, + "loss_iou": 0.53515625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 613198388, + "step": 3570 + }, + { + "epoch": 0.9393042677714211, + "grad_norm": 4.873345772996428, + "learning_rate": 5e-06, + "loss": 0.0827, + "num_input_tokens_seen": 613370288, + "step": 3571 + }, + { + "epoch": 0.9393042677714211, + "loss": 0.09475830942392349, + "loss_ce": 0.0007489121053367853, + "loss_iou": 0.392578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 613370288, + "step": 3571 + }, + { + "epoch": 0.9395673045308082, + "grad_norm": 6.737346874423115, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 613542216, + "step": 3572 + }, + { + "epoch": 0.9395673045308082, + "loss": 0.2296835035085678, + "loss_ce": 0.005287751089781523, + "loss_iou": 0.330078125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 613542216, + "step": 3572 + }, + { + "epoch": 0.9398303412901953, + "grad_norm": 5.428921826665437, + "learning_rate": 5e-06, + "loss": 0.0901, + "num_input_tokens_seen": 613714320, + "step": 3573 + }, + { + "epoch": 0.9398303412901953, + "loss": 0.04020649567246437, + "loss_ce": 0.002578320913016796, + "loss_iou": 0.50390625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 613714320, + "step": 3573 + }, + { + "epoch": 0.9400933780495825, + "grad_norm": 5.730418477266325, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 613884004, + "step": 3574 + }, + { + "epoch": 0.9400933780495825, + "loss": 0.12099497765302658, + "loss_ce": 0.000435287831351161, + "loss_iou": 0.1884765625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 613884004, + "step": 3574 + }, + { + "epoch": 0.9403564148089696, + "grad_norm": 2.8371958686948315, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 614056360, + "step": 3575 + }, + { + "epoch": 0.9403564148089696, + "loss": 0.06765338778495789, + "loss_ce": 0.00017902448598761111, + "loss_iou": 0.392578125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 614056360, + "step": 3575 + }, + { + "epoch": 0.9406194515683567, + "grad_norm": 3.3483226181812173, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 614228248, + "step": 3576 + }, + { + "epoch": 0.9406194515683567, + "loss": 0.07497625052928925, + "loss_ce": 0.0009863873710855842, + "loss_iou": 0.404296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 614228248, + "step": 3576 + }, + { + "epoch": 0.9408824883277438, + "grad_norm": 3.0470015747124837, + "learning_rate": 5e-06, + "loss": 0.0987, + "num_input_tokens_seen": 614400460, + "step": 3577 + }, + { + "epoch": 0.9408824883277438, + "loss": 0.15504948794841766, + "loss_ce": 0.005391271784901619, + "loss_iou": 0.23828125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 614400460, + "step": 3577 + }, + { + "epoch": 0.9411455250871309, + "grad_norm": 14.590254214221755, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 614572668, + "step": 3578 + }, + { + "epoch": 0.9411455250871309, + "loss": 0.12589725852012634, + "loss_ce": 0.00352177070453763, + "loss_iou": 0.453125, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 614572668, + "step": 3578 + }, + { + "epoch": 0.941408561846518, + "grad_norm": 4.098895136914831, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 614744728, + "step": 3579 + }, + { + "epoch": 0.941408561846518, + "loss": 0.07378913462162018, + "loss_ce": 0.0005316926399245858, + "loss_iou": 0.5, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 614744728, + "step": 3579 + }, + { + "epoch": 0.9416715986059052, + "grad_norm": 6.551103187216315, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 614917064, + "step": 3580 + }, + { + "epoch": 0.9416715986059052, + "loss": 0.11033067107200623, + "loss_ce": 0.0037785512395203114, + "loss_iou": 0.625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 614917064, + "step": 3580 + }, + { + "epoch": 0.9419346353652923, + "grad_norm": 6.125579245234497, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 615089092, + "step": 3581 + }, + { + "epoch": 0.9419346353652923, + "loss": 0.09463340044021606, + "loss_ce": 0.002287208568304777, + "loss_iou": 0.421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 615089092, + "step": 3581 + }, + { + "epoch": 0.9421976721246794, + "grad_norm": 4.52462668676838, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 615259732, + "step": 3582 + }, + { + "epoch": 0.9421976721246794, + "loss": 0.09844870865345001, + "loss_ce": 0.0007771998061798513, + "loss_iou": 0.546875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 615259732, + "step": 3582 + }, + { + "epoch": 0.9424607088840665, + "grad_norm": 4.261037832557463, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 615431984, + "step": 3583 + }, + { + "epoch": 0.9424607088840665, + "loss": 0.10644324868917465, + "loss_ce": 0.0039041785057634115, + "loss_iou": 0.53515625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 615431984, + "step": 3583 + }, + { + "epoch": 0.9427237456434536, + "grad_norm": 11.026204038590599, + "learning_rate": 5e-06, + "loss": 0.0974, + "num_input_tokens_seen": 615604052, + "step": 3584 + }, + { + "epoch": 0.9427237456434536, + "loss": 0.08766089379787445, + "loss_ce": 0.0031577199697494507, + "loss_iou": 0.455078125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 615604052, + "step": 3584 + }, + { + "epoch": 0.9429867824028408, + "grad_norm": 4.892521797802774, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 615776672, + "step": 3585 + }, + { + "epoch": 0.9429867824028408, + "loss": 0.04693538695573807, + "loss_ce": 0.0009148788521997631, + "loss_iou": 0.380859375, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 615776672, + "step": 3585 + }, + { + "epoch": 0.9432498191622279, + "grad_norm": 8.102761106727687, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 615948804, + "step": 3586 + }, + { + "epoch": 0.9432498191622279, + "loss": 0.0826391950249672, + "loss_ce": 0.00152347341645509, + "loss_iou": 0.40234375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 615948804, + "step": 3586 + }, + { + "epoch": 0.943512855921615, + "grad_norm": 4.699200150667693, + "learning_rate": 5e-06, + "loss": 0.0853, + "num_input_tokens_seen": 616120904, + "step": 3587 + }, + { + "epoch": 0.943512855921615, + "loss": 0.07182273268699646, + "loss_ce": 0.004500953480601311, + "loss_iou": 0.431640625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 616120904, + "step": 3587 + }, + { + "epoch": 0.9437758926810021, + "grad_norm": 5.979856684754754, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 616293232, + "step": 3588 + }, + { + "epoch": 0.9437758926810021, + "loss": 0.14250054955482483, + "loss_ce": 0.004179632291197777, + "loss_iou": 0.515625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 616293232, + "step": 3588 + }, + { + "epoch": 0.9440389294403893, + "grad_norm": 4.477659380807632, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 616465544, + "step": 3589 + }, + { + "epoch": 0.9440389294403893, + "loss": 0.13571983575820923, + "loss_ce": 0.004356917925179005, + "loss_iou": 0.3671875, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 616465544, + "step": 3589 + }, + { + "epoch": 0.9443019661997765, + "grad_norm": 5.354193803812341, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 616638112, + "step": 3590 + }, + { + "epoch": 0.9443019661997765, + "loss": 0.08732321113348007, + "loss_ce": 0.0015383013524115086, + "loss_iou": 0.458984375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 616638112, + "step": 3590 + }, + { + "epoch": 0.9445650029591636, + "grad_norm": 3.712673541690186, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 616808952, + "step": 3591 + }, + { + "epoch": 0.9445650029591636, + "loss": 0.0873071700334549, + "loss_ce": 0.0063440315425395966, + "loss_iou": 0.45703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 616808952, + "step": 3591 + }, + { + "epoch": 0.9448280397185507, + "grad_norm": 5.018692274423124, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 616981056, + "step": 3592 + }, + { + "epoch": 0.9448280397185507, + "loss": 0.16758012771606445, + "loss_ce": 0.0011983029544353485, + "loss_iou": 0.416015625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 616981056, + "step": 3592 + }, + { + "epoch": 0.9450910764779378, + "grad_norm": 8.157994634855726, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 617152956, + "step": 3593 + }, + { + "epoch": 0.9450910764779378, + "loss": 0.09211128950119019, + "loss_ce": 0.0016877016751095653, + "loss_iou": 0.466796875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 617152956, + "step": 3593 + }, + { + "epoch": 0.9453541132373249, + "grad_norm": 6.815211067626931, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 617323192, + "step": 3594 + }, + { + "epoch": 0.9453541132373249, + "loss": 0.15205200016498566, + "loss_ce": 0.0006542917108163238, + "loss_iou": 0.373046875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 617323192, + "step": 3594 + }, + { + "epoch": 0.9456171499967121, + "grad_norm": 5.698573656915325, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 617495528, + "step": 3595 + }, + { + "epoch": 0.9456171499967121, + "loss": 0.15452706813812256, + "loss_ce": 0.004380582831799984, + "loss_iou": 0.466796875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 617495528, + "step": 3595 + }, + { + "epoch": 0.9458801867560992, + "grad_norm": 5.492995557036467, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 617667772, + "step": 3596 + }, + { + "epoch": 0.9458801867560992, + "loss": 0.11533143371343613, + "loss_ce": 0.0028283819556236267, + "loss_iou": 0.453125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 617667772, + "step": 3596 + }, + { + "epoch": 0.9461432235154863, + "grad_norm": 3.750755987920789, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 617836324, + "step": 3597 + }, + { + "epoch": 0.9461432235154863, + "loss": 0.07357309758663177, + "loss_ce": 0.0006360823172144592, + "loss_iou": 0.5546875, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 617836324, + "step": 3597 + }, + { + "epoch": 0.9464062602748734, + "grad_norm": 3.634136051792054, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 618006748, + "step": 3598 + }, + { + "epoch": 0.9464062602748734, + "loss": 0.05711160972714424, + "loss_ce": 0.0004099512880202383, + "loss_iou": 0.474609375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 618006748, + "step": 3598 + }, + { + "epoch": 0.9466692970342605, + "grad_norm": 17.367283166631303, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 618179204, + "step": 3599 + }, + { + "epoch": 0.9466692970342605, + "loss": 0.11188434064388275, + "loss_ce": 0.0004646638117264956, + "loss_iou": 0.53125, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 618179204, + "step": 3599 + }, + { + "epoch": 0.9469323337936476, + "grad_norm": 4.326428384534629, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 618351176, + "step": 3600 + }, + { + "epoch": 0.9469323337936476, + "loss": 0.1435290426015854, + "loss_ce": 0.002629393944516778, + "loss_iou": 0.34375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 618351176, + "step": 3600 + }, + { + "epoch": 0.9471953705530348, + "grad_norm": 19.80317191157718, + "learning_rate": 5e-06, + "loss": 0.0993, + "num_input_tokens_seen": 618523308, + "step": 3601 + }, + { + "epoch": 0.9471953705530348, + "loss": 0.055401187390089035, + "loss_ce": 0.0005763589288108051, + "loss_iou": 0.5, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 618523308, + "step": 3601 + }, + { + "epoch": 0.9474584073124219, + "grad_norm": 7.650182889486073, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 618695336, + "step": 3602 + }, + { + "epoch": 0.9474584073124219, + "loss": 0.15820011496543884, + "loss_ce": 0.0013244987931102514, + "loss_iou": 0.51171875, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 618695336, + "step": 3602 + }, + { + "epoch": 0.947721444071809, + "grad_norm": 15.018190748040105, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 618868120, + "step": 3603 + }, + { + "epoch": 0.947721444071809, + "loss": 0.08152879774570465, + "loss_ce": 0.0010081640211865306, + "loss_iou": 0.388671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 618868120, + "step": 3603 + }, + { + "epoch": 0.9479844808311961, + "grad_norm": 3.724726783342823, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 619040216, + "step": 3604 + }, + { + "epoch": 0.9479844808311961, + "loss": 0.07213738560676575, + "loss_ce": 0.0031829241197556257, + "loss_iou": 0.3671875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 619040216, + "step": 3604 + }, + { + "epoch": 0.9482475175905832, + "grad_norm": 28.840812127557175, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 619212192, + "step": 3605 + }, + { + "epoch": 0.9482475175905832, + "loss": 0.1626545786857605, + "loss_ce": 0.0024830668698996305, + "loss_iou": 0.482421875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 619212192, + "step": 3605 + }, + { + "epoch": 0.9485105543499704, + "grad_norm": 5.069934241766405, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 619382656, + "step": 3606 + }, + { + "epoch": 0.9485105543499704, + "loss": 0.1947801113128662, + "loss_ce": 0.00343489833176136, + "loss_iou": 0.53515625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 619382656, + "step": 3606 + }, + { + "epoch": 0.9487735911093576, + "grad_norm": 13.189826643263768, + "learning_rate": 5e-06, + "loss": 0.0894, + "num_input_tokens_seen": 619554772, + "step": 3607 + }, + { + "epoch": 0.9487735911093576, + "loss": 0.10128442198038101, + "loss_ce": 0.001995484111830592, + "loss_iou": 0.388671875, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 619554772, + "step": 3607 + }, + { + "epoch": 0.9490366278687447, + "grad_norm": 3.7735949518372953, + "learning_rate": 5e-06, + "loss": 0.0685, + "num_input_tokens_seen": 619727116, + "step": 3608 + }, + { + "epoch": 0.9490366278687447, + "loss": 0.11414018273353577, + "loss_ce": 0.005039841867983341, + "loss_iou": 0.4140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 619727116, + "step": 3608 + }, + { + "epoch": 0.9492996646281318, + "grad_norm": 4.250123008721577, + "learning_rate": 5e-06, + "loss": 0.1075, + "num_input_tokens_seen": 619899428, + "step": 3609 + }, + { + "epoch": 0.9492996646281318, + "loss": 0.1226678267121315, + "loss_ce": 0.007280868943780661, + "loss_iou": 0.45703125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 619899428, + "step": 3609 + }, + { + "epoch": 0.9495627013875189, + "grad_norm": 6.863676153540119, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 620071540, + "step": 3610 + }, + { + "epoch": 0.9495627013875189, + "loss": 0.21938937902450562, + "loss_ce": 0.0038132029585540295, + "loss_iou": 0.3984375, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 620071540, + "step": 3610 + }, + { + "epoch": 0.9498257381469061, + "grad_norm": 3.813325984971825, + "learning_rate": 5e-06, + "loss": 0.1107, + "num_input_tokens_seen": 620243836, + "step": 3611 + }, + { + "epoch": 0.9498257381469061, + "loss": 0.07250767946243286, + "loss_ce": 0.0018289745785295963, + "loss_iou": 0.33984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 620243836, + "step": 3611 + }, + { + "epoch": 0.9500887749062932, + "grad_norm": 8.694293187792477, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 620416164, + "step": 3612 + }, + { + "epoch": 0.9500887749062932, + "loss": 0.08137984573841095, + "loss_ce": 0.0004167144070379436, + "loss_iou": 0.421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 620416164, + "step": 3612 + }, + { + "epoch": 0.9503518116656803, + "grad_norm": 4.011759905693181, + "learning_rate": 5e-06, + "loss": 0.0881, + "num_input_tokens_seen": 620588248, + "step": 3613 + }, + { + "epoch": 0.9503518116656803, + "loss": 0.10577777028083801, + "loss_ce": 0.003009829204529524, + "loss_iou": 0.57421875, + "loss_num": 0.0206298828125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 620588248, + "step": 3613 + }, + { + "epoch": 0.9506148484250674, + "grad_norm": 4.8583016740360545, + "learning_rate": 5e-06, + "loss": 0.0856, + "num_input_tokens_seen": 620760212, + "step": 3614 + }, + { + "epoch": 0.9506148484250674, + "loss": 0.07320687174797058, + "loss_ce": 0.00593086751177907, + "loss_iou": 0.4609375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 620760212, + "step": 3614 + }, + { + "epoch": 0.9508778851844545, + "grad_norm": 7.183426742063696, + "learning_rate": 5e-06, + "loss": 0.092, + "num_input_tokens_seen": 620932584, + "step": 3615 + }, + { + "epoch": 0.9508778851844545, + "loss": 0.12085875123739243, + "loss_ce": 0.004022202454507351, + "loss_iou": 0.3828125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 620932584, + "step": 3615 + }, + { + "epoch": 0.9511409219438417, + "grad_norm": 5.9763835890459855, + "learning_rate": 5e-06, + "loss": 0.0623, + "num_input_tokens_seen": 621104624, + "step": 3616 + }, + { + "epoch": 0.9511409219438417, + "loss": 0.057411566376686096, + "loss_ce": 0.00405157683417201, + "loss_iou": 0.40625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 621104624, + "step": 3616 + }, + { + "epoch": 0.9514039587032288, + "grad_norm": 4.148575034087351, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 621276932, + "step": 3617 + }, + { + "epoch": 0.9514039587032288, + "loss": 0.17621394991874695, + "loss_ce": 0.0008599417633377016, + "loss_iou": 0.515625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 621276932, + "step": 3617 + }, + { + "epoch": 0.9516669954626159, + "grad_norm": 9.831716470581155, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 621449128, + "step": 3618 + }, + { + "epoch": 0.9516669954626159, + "loss": 0.17716863751411438, + "loss_ce": 0.0019672252237796783, + "loss_iou": 0.5, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 621449128, + "step": 3618 + }, + { + "epoch": 0.951930032222003, + "grad_norm": 6.726193865130754, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 621621260, + "step": 3619 + }, + { + "epoch": 0.951930032222003, + "loss": 0.08456599712371826, + "loss_ce": 0.0014971550554037094, + "loss_iou": 0.4453125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 621621260, + "step": 3619 + }, + { + "epoch": 0.9521930689813901, + "grad_norm": 5.019143488565095, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 621792008, + "step": 3620 + }, + { + "epoch": 0.9521930689813901, + "loss": 0.05885142832994461, + "loss_ce": 0.0001813878770917654, + "loss_iou": 0.59375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 621792008, + "step": 3620 + }, + { + "epoch": 0.9524561057407773, + "grad_norm": 55.831161754212744, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 621964056, + "step": 3621 + }, + { + "epoch": 0.9524561057407773, + "loss": 0.08644313365221024, + "loss_ce": 0.0015737485373392701, + "loss_iou": 0.419921875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 621964056, + "step": 3621 + }, + { + "epoch": 0.9527191425001644, + "grad_norm": 6.339787905898623, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 622136360, + "step": 3622 + }, + { + "epoch": 0.9527191425001644, + "loss": 0.07180093228816986, + "loss_ce": 0.0007102307863533497, + "loss_iou": 0.423828125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 622136360, + "step": 3622 + }, + { + "epoch": 0.9529821792595515, + "grad_norm": 5.256007258631962, + "learning_rate": 5e-06, + "loss": 0.1175, + "num_input_tokens_seen": 622308160, + "step": 3623 + }, + { + "epoch": 0.9529821792595515, + "loss": 0.16570287942886353, + "loss_ce": 0.0014267577789723873, + "loss_iou": 0.396484375, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 622308160, + "step": 3623 + }, + { + "epoch": 0.9532452160189386, + "grad_norm": 16.742735373121388, + "learning_rate": 5e-06, + "loss": 0.0872, + "num_input_tokens_seen": 622480548, + "step": 3624 + }, + { + "epoch": 0.9532452160189386, + "loss": 0.052828967571258545, + "loss_ce": 0.0007049451814964414, + "loss_iou": 0.5078125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 622480548, + "step": 3624 + }, + { + "epoch": 0.9535082527783257, + "grad_norm": 17.572925497634134, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 622652520, + "step": 3625 + }, + { + "epoch": 0.9535082527783257, + "loss": 0.0615709125995636, + "loss_ce": 0.00045946481986902654, + "loss_iou": 0.48046875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 622652520, + "step": 3625 + }, + { + "epoch": 0.9537712895377128, + "grad_norm": 6.0047852962098975, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 622823148, + "step": 3626 + }, + { + "epoch": 0.9537712895377128, + "loss": 0.13855966925621033, + "loss_ce": 0.0012610815465450287, + "loss_iou": 0.43359375, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 622823148, + "step": 3626 + }, + { + "epoch": 0.9540343262971, + "grad_norm": 7.792667970397866, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 622995452, + "step": 3627 + }, + { + "epoch": 0.9540343262971, + "loss": 0.13608847558498383, + "loss_ce": 0.001505955122411251, + "loss_iou": 0.48828125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 622995452, + "step": 3627 + }, + { + "epoch": 0.9542973630564872, + "grad_norm": 4.565665131118125, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 623167560, + "step": 3628 + }, + { + "epoch": 0.9542973630564872, + "loss": 0.11297139525413513, + "loss_ce": 0.0007735221879556775, + "loss_iou": 0.490234375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 623167560, + "step": 3628 + }, + { + "epoch": 0.9545603998158743, + "grad_norm": 5.57299165717365, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 623339872, + "step": 3629 + }, + { + "epoch": 0.9545603998158743, + "loss": 0.09495042264461517, + "loss_ce": 0.002497426699846983, + "loss_iou": 0.365234375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 623339872, + "step": 3629 + }, + { + "epoch": 0.9548234365752614, + "grad_norm": 21.302937351842843, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 623512180, + "step": 3630 + }, + { + "epoch": 0.9548234365752614, + "loss": 0.11213900148868561, + "loss_ce": 0.0011923413258045912, + "loss_iou": 0.50390625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 623512180, + "step": 3630 + }, + { + "epoch": 0.9550864733346485, + "grad_norm": 12.098614631817124, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 623680972, + "step": 3631 + }, + { + "epoch": 0.9550864733346485, + "loss": 0.21909213066101074, + "loss_ce": 0.0015017889672890306, + "loss_iou": 0.453125, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 623680972, + "step": 3631 + }, + { + "epoch": 0.9553495100940357, + "grad_norm": 9.034625023288243, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 623851276, + "step": 3632 + }, + { + "epoch": 0.9553495100940357, + "loss": 0.0687929093837738, + "loss_ce": 0.0007081945077516139, + "loss_iou": 0.5859375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 623851276, + "step": 3632 + }, + { + "epoch": 0.9556125468534228, + "grad_norm": 4.18724157975137, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 624023500, + "step": 3633 + }, + { + "epoch": 0.9556125468534228, + "loss": 0.12112436443567276, + "loss_ce": 0.0027008973993360996, + "loss_iou": 0.4140625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 624023500, + "step": 3633 + }, + { + "epoch": 0.9558755836128099, + "grad_norm": 10.76826846477555, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 624195576, + "step": 3634 + }, + { + "epoch": 0.9558755836128099, + "loss": 0.11673011630773544, + "loss_ce": 0.004501717630773783, + "loss_iou": 0.47265625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 624195576, + "step": 3634 + }, + { + "epoch": 0.956138620372197, + "grad_norm": 3.420122712086701, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 624366376, + "step": 3635 + }, + { + "epoch": 0.956138620372197, + "loss": 0.13994066417217255, + "loss_ce": 0.0026115677319467068, + "loss_iou": 0.3125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 624366376, + "step": 3635 + }, + { + "epoch": 0.9564016571315841, + "grad_norm": 3.188466600397663, + "learning_rate": 5e-06, + "loss": 0.1096, + "num_input_tokens_seen": 624538928, + "step": 3636 + }, + { + "epoch": 0.9564016571315841, + "loss": 0.19849437475204468, + "loss_ce": 0.0006184080266393721, + "loss_iou": 0.443359375, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 624538928, + "step": 3636 + }, + { + "epoch": 0.9566646938909713, + "grad_norm": 4.145251385205127, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 624711036, + "step": 3637 + }, + { + "epoch": 0.9566646938909713, + "loss": 0.07516495883464813, + "loss_ce": 0.0005189694347791374, + "loss_iou": 0.51953125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 624711036, + "step": 3637 + }, + { + "epoch": 0.9569277306503584, + "grad_norm": 7.0181560986956395, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 624883520, + "step": 3638 + }, + { + "epoch": 0.9569277306503584, + "loss": 0.06267523765563965, + "loss_ce": 0.0029981140978634357, + "loss_iou": 0.60546875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 624883520, + "step": 3638 + }, + { + "epoch": 0.9571907674097455, + "grad_norm": 3.9660695052723813, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 625055640, + "step": 3639 + }, + { + "epoch": 0.9571907674097455, + "loss": 0.1359872817993164, + "loss_ce": 0.00042819694499485195, + "loss_iou": 0.5546875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 625055640, + "step": 3639 + }, + { + "epoch": 0.9574538041691326, + "grad_norm": 17.732753202882193, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 625227732, + "step": 3640 + }, + { + "epoch": 0.9574538041691326, + "loss": 0.09507328271865845, + "loss_ce": 0.0008502600830979645, + "loss_iou": 0.458984375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 625227732, + "step": 3640 + }, + { + "epoch": 0.9577168409285197, + "grad_norm": 3.4840402032914257, + "learning_rate": 5e-06, + "loss": 0.0918, + "num_input_tokens_seen": 625398028, + "step": 3641 + }, + { + "epoch": 0.9577168409285197, + "loss": 0.13700971007347107, + "loss_ce": 0.0006876978441141546, + "loss_iou": 0.408203125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 625398028, + "step": 3641 + }, + { + "epoch": 0.9579798776879069, + "grad_norm": 7.413398524469702, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 625570244, + "step": 3642 + }, + { + "epoch": 0.9579798776879069, + "loss": 0.12876945734024048, + "loss_ce": 0.00047354548587463796, + "loss_iou": 0.5234375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 625570244, + "step": 3642 + }, + { + "epoch": 0.958242914447294, + "grad_norm": 5.336702710138045, + "learning_rate": 5e-06, + "loss": 0.0898, + "num_input_tokens_seen": 625742464, + "step": 3643 + }, + { + "epoch": 0.958242914447294, + "loss": 0.05379210785031319, + "loss_ce": 0.00020323891658335924, + "loss_iou": 0.44140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 625742464, + "step": 3643 + }, + { + "epoch": 0.9585059512066811, + "grad_norm": 23.972493383254932, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 625914372, + "step": 3644 + }, + { + "epoch": 0.9585059512066811, + "loss": 0.08553168922662735, + "loss_ce": 0.0017609409987926483, + "loss_iou": 0.45703125, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 625914372, + "step": 3644 + }, + { + "epoch": 0.9587689879660682, + "grad_norm": 10.340048159606546, + "learning_rate": 5e-06, + "loss": 0.1191, + "num_input_tokens_seen": 626086592, + "step": 3645 + }, + { + "epoch": 0.9587689879660682, + "loss": 0.234289288520813, + "loss_ce": 0.00026523511041887105, + "loss_iou": 0.404296875, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 626086592, + "step": 3645 + }, + { + "epoch": 0.9590320247254553, + "grad_norm": 9.300486446962857, + "learning_rate": 5e-06, + "loss": 0.094, + "num_input_tokens_seen": 626257416, + "step": 3646 + }, + { + "epoch": 0.9590320247254553, + "loss": 0.045688219368457794, + "loss_ce": 0.0002475440560374409, + "loss_iou": 0.3125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 626257416, + "step": 3646 + }, + { + "epoch": 0.9592950614848426, + "grad_norm": 11.589258821015147, + "learning_rate": 5e-06, + "loss": 0.1346, + "num_input_tokens_seen": 626427708, + "step": 3647 + }, + { + "epoch": 0.9592950614848426, + "loss": 0.10979291796684265, + "loss_ce": 0.0016691365744918585, + "loss_iou": 0.33984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 626427708, + "step": 3647 + }, + { + "epoch": 0.9595580982442297, + "grad_norm": 3.2081047701574943, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 626599988, + "step": 3648 + }, + { + "epoch": 0.9595580982442297, + "loss": 0.09939119219779968, + "loss_ce": 0.0016281325370073318, + "loss_iou": 0.5546875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 626599988, + "step": 3648 + }, + { + "epoch": 0.9598211350036168, + "grad_norm": 4.783540700409292, + "learning_rate": 5e-06, + "loss": 0.1455, + "num_input_tokens_seen": 626772316, + "step": 3649 + }, + { + "epoch": 0.9598211350036168, + "loss": 0.09440785646438599, + "loss_ce": 0.0029161556158214808, + "loss_iou": 0.4296875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 626772316, + "step": 3649 + }, + { + "epoch": 0.9600841717630039, + "grad_norm": 5.125730804994831, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 626942620, + "step": 3650 + }, + { + "epoch": 0.9600841717630039, + "loss": 0.06443943828344345, + "loss_ce": 0.0007797717116773129, + "loss_iou": 0.482421875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 626942620, + "step": 3650 + }, + { + "epoch": 0.960347208522391, + "grad_norm": 4.565795888601258, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 627114816, + "step": 3651 + }, + { + "epoch": 0.960347208522391, + "loss": 0.12303532660007477, + "loss_ce": 0.0003851845976896584, + "loss_iou": 0.453125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 627114816, + "step": 3651 + }, + { + "epoch": 0.9606102452817781, + "grad_norm": 8.198006738555572, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 627286832, + "step": 3652 + }, + { + "epoch": 0.9606102452817781, + "loss": 0.12663108110427856, + "loss_ce": 0.0022109271958470345, + "loss_iou": 0.6875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 627286832, + "step": 3652 + }, + { + "epoch": 0.9608732820411653, + "grad_norm": 6.977304479694191, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 627458840, + "step": 3653 + }, + { + "epoch": 0.9608732820411653, + "loss": 0.08323599398136139, + "loss_ce": 0.003646154422312975, + "loss_iou": 0.4921875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 627458840, + "step": 3653 + }, + { + "epoch": 0.9611363188005524, + "grad_norm": 6.143584577276683, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 627631048, + "step": 3654 + }, + { + "epoch": 0.9611363188005524, + "loss": 0.08655130863189697, + "loss_ce": 0.0005985412281006575, + "loss_iou": 0.44921875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 627631048, + "step": 3654 + }, + { + "epoch": 0.9613993555599395, + "grad_norm": 4.962748401603593, + "learning_rate": 5e-06, + "loss": 0.0843, + "num_input_tokens_seen": 627801572, + "step": 3655 + }, + { + "epoch": 0.9613993555599395, + "loss": 0.06723140180110931, + "loss_ce": 0.0012218892807140946, + "loss_iou": 0.232421875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 627801572, + "step": 3655 + }, + { + "epoch": 0.9616623923193266, + "grad_norm": 3.888233294845763, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 627973444, + "step": 3656 + }, + { + "epoch": 0.9616623923193266, + "loss": 0.19316411018371582, + "loss_ce": 0.0060761114582419395, + "loss_iou": 0.4765625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 627973444, + "step": 3656 + }, + { + "epoch": 0.9619254290787137, + "grad_norm": 12.231564786844833, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 628145692, + "step": 3657 + }, + { + "epoch": 0.9619254290787137, + "loss": 0.08886748552322388, + "loss_ce": 0.004608447663486004, + "loss_iou": 0.37109375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 628145692, + "step": 3657 + }, + { + "epoch": 0.9621884658381009, + "grad_norm": 6.681516105502576, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 628317968, + "step": 3658 + }, + { + "epoch": 0.9621884658381009, + "loss": 0.11374935507774353, + "loss_ce": 0.00020870784646831453, + "loss_iou": 0.61328125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 628317968, + "step": 3658 + }, + { + "epoch": 0.962451502597488, + "grad_norm": 9.221687537639268, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 628489248, + "step": 3659 + }, + { + "epoch": 0.962451502597488, + "loss": 0.07078136503696442, + "loss_ce": 0.000972404726780951, + "loss_iou": 0.41015625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 628489248, + "step": 3659 + }, + { + "epoch": 0.9627145393568751, + "grad_norm": 3.888238330257217, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 628661488, + "step": 3660 + }, + { + "epoch": 0.9627145393568751, + "loss": 0.148333340883255, + "loss_ce": 0.001513272407464683, + "loss_iou": 0.361328125, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 628661488, + "step": 3660 + }, + { + "epoch": 0.9629775761162622, + "grad_norm": 3.548818147424885, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 628833384, + "step": 3661 + }, + { + "epoch": 0.9629775761162622, + "loss": 0.07117056846618652, + "loss_ce": 0.0004918586346320808, + "loss_iou": 0.455078125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 628833384, + "step": 3661 + }, + { + "epoch": 0.9632406128756493, + "grad_norm": 3.779136602687443, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 629005652, + "step": 3662 + }, + { + "epoch": 0.9632406128756493, + "loss": 0.06165676563978195, + "loss_ce": 0.0007131616584956646, + "loss_iou": 0.62109375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 629005652, + "step": 3662 + }, + { + "epoch": 0.9635036496350365, + "grad_norm": 5.7914523260663495, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 629177884, + "step": 3663 + }, + { + "epoch": 0.9635036496350365, + "loss": 0.09137916564941406, + "loss_ce": 0.00181007559876889, + "loss_iou": 0.46484375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 629177884, + "step": 3663 + }, + { + "epoch": 0.9637666863944236, + "grad_norm": 3.071560976269854, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 629350348, + "step": 3664 + }, + { + "epoch": 0.9637666863944236, + "loss": 0.08777488768100739, + "loss_ce": 0.0004946062108501792, + "loss_iou": 0.455078125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 629350348, + "step": 3664 + }, + { + "epoch": 0.9640297231538107, + "grad_norm": 4.779549366101704, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 629522792, + "step": 3665 + }, + { + "epoch": 0.9640297231538107, + "loss": 0.1326950043439865, + "loss_ce": 0.0025680402759462595, + "loss_iou": 0.5234375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 629522792, + "step": 3665 + }, + { + "epoch": 0.9642927599131978, + "grad_norm": 4.346102092926262, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 629694856, + "step": 3666 + }, + { + "epoch": 0.9642927599131978, + "loss": 0.11643362045288086, + "loss_ce": 0.0008330341661348939, + "loss_iou": 0.6328125, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 629694856, + "step": 3666 + }, + { + "epoch": 0.964555796672585, + "grad_norm": 5.7406338456333845, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 629866908, + "step": 3667 + }, + { + "epoch": 0.964555796672585, + "loss": 0.13996919989585876, + "loss_ce": 0.0034030412789434195, + "loss_iou": 0.37890625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 629866908, + "step": 3667 + }, + { + "epoch": 0.9648188334319722, + "grad_norm": 7.17686270310257, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 630039516, + "step": 3668 + }, + { + "epoch": 0.9648188334319722, + "loss": 0.12104253470897675, + "loss_ce": 0.00020818831399083138, + "loss_iou": 0.49609375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 630039516, + "step": 3668 + }, + { + "epoch": 0.9650818701913593, + "grad_norm": 4.2613778122636035, + "learning_rate": 5e-06, + "loss": 0.0821, + "num_input_tokens_seen": 630211624, + "step": 3669 + }, + { + "epoch": 0.9650818701913593, + "loss": 0.062059760093688965, + "loss_ce": 0.0022910854313522577, + "loss_iou": 0.34375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 630211624, + "step": 3669 + }, + { + "epoch": 0.9653449069507464, + "grad_norm": 3.5238324652450235, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 630382332, + "step": 3670 + }, + { + "epoch": 0.9653449069507464, + "loss": 0.15831097960472107, + "loss_ce": 0.00016888529353309423, + "loss_iou": 0.453125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 630382332, + "step": 3670 + }, + { + "epoch": 0.9656079437101335, + "grad_norm": 3.564039297129955, + "learning_rate": 5e-06, + "loss": 0.0998, + "num_input_tokens_seen": 630552592, + "step": 3671 + }, + { + "epoch": 0.9656079437101335, + "loss": 0.08011811971664429, + "loss_ce": 0.0016269085463136435, + "loss_iou": 0.470703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 630552592, + "step": 3671 + }, + { + "epoch": 0.9658709804695206, + "grad_norm": 9.76288401818362, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 630722244, + "step": 3672 + }, + { + "epoch": 0.9658709804695206, + "loss": 0.08490733802318573, + "loss_ce": 0.0020673726685345173, + "loss_iou": 0.53125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 630722244, + "step": 3672 + }, + { + "epoch": 0.9661340172289078, + "grad_norm": 7.354300413013905, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 630894380, + "step": 3673 + }, + { + "epoch": 0.9661340172289078, + "loss": 0.13285204768180847, + "loss_ce": 0.0009245476103387773, + "loss_iou": 0.54296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 630894380, + "step": 3673 + }, + { + "epoch": 0.9663970539882949, + "grad_norm": 5.188368697186112, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 631066652, + "step": 3674 + }, + { + "epoch": 0.9663970539882949, + "loss": 0.09987783432006836, + "loss_ce": 0.003976346459239721, + "loss_iou": 0.5546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 631066652, + "step": 3674 + }, + { + "epoch": 0.966660090747682, + "grad_norm": 12.325083640976278, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 631237000, + "step": 3675 + }, + { + "epoch": 0.966660090747682, + "loss": 0.17316505312919617, + "loss_ce": 0.002648080699145794, + "loss_iou": 0.404296875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 631237000, + "step": 3675 + }, + { + "epoch": 0.9669231275070691, + "grad_norm": 6.105729054233263, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 631409164, + "step": 3676 + }, + { + "epoch": 0.9669231275070691, + "loss": 0.09672191739082336, + "loss_ce": 0.0017207016935572028, + "loss_iou": 0.466796875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 631409164, + "step": 3676 + }, + { + "epoch": 0.9671861642664562, + "grad_norm": 4.682056592281961, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 631581116, + "step": 3677 + }, + { + "epoch": 0.9671861642664562, + "loss": 0.12396599352359772, + "loss_ce": 0.0040776850655674934, + "loss_iou": 0.33984375, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 631581116, + "step": 3677 + }, + { + "epoch": 0.9674492010258433, + "grad_norm": 8.33340254914201, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 631750180, + "step": 3678 + }, + { + "epoch": 0.9674492010258433, + "loss": 0.09776239842176437, + "loss_ce": 0.0013878863537684083, + "loss_iou": 0.59375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 631750180, + "step": 3678 + }, + { + "epoch": 0.9677122377852305, + "grad_norm": 3.892518620403468, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 631922528, + "step": 3679 + }, + { + "epoch": 0.9677122377852305, + "loss": 0.10747776180505753, + "loss_ce": 0.0042520565912127495, + "loss_iou": 0.498046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 631922528, + "step": 3679 + }, + { + "epoch": 0.9679752745446176, + "grad_norm": 7.586861538526353, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 632094768, + "step": 3680 + }, + { + "epoch": 0.9679752745446176, + "loss": 0.08680423349142075, + "loss_ce": 0.001721223583444953, + "loss_iou": 0.55078125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 632094768, + "step": 3680 + }, + { + "epoch": 0.9682383113040047, + "grad_norm": 4.2210475168735355, + "learning_rate": 5e-06, + "loss": 0.0757, + "num_input_tokens_seen": 632266740, + "step": 3681 + }, + { + "epoch": 0.9682383113040047, + "loss": 0.05067894607782364, + "loss_ce": 0.0006911527598276734, + "loss_iou": 0.5546875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 632266740, + "step": 3681 + }, + { + "epoch": 0.9685013480633918, + "grad_norm": 3.8121173575745755, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 632439188, + "step": 3682 + }, + { + "epoch": 0.9685013480633918, + "loss": 0.11126217991113663, + "loss_ce": 0.00033078622072935104, + "loss_iou": 0.60546875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 632439188, + "step": 3682 + }, + { + "epoch": 0.9687643848227789, + "grad_norm": 5.838321531180536, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 632611332, + "step": 3683 + }, + { + "epoch": 0.9687643848227789, + "loss": 0.08182443678379059, + "loss_ce": 0.0006171565037220716, + "loss_iou": 0.57421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 632611332, + "step": 3683 + }, + { + "epoch": 0.9690274215821661, + "grad_norm": 4.164644957819613, + "learning_rate": 5e-06, + "loss": 0.086, + "num_input_tokens_seen": 632783660, + "step": 3684 + }, + { + "epoch": 0.9690274215821661, + "loss": 0.12429676204919815, + "loss_ce": 0.0015245481627061963, + "loss_iou": 0.609375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 632783660, + "step": 3684 + }, + { + "epoch": 0.9692904583415533, + "grad_norm": 26.321407570319618, + "learning_rate": 5e-06, + "loss": 0.1137, + "num_input_tokens_seen": 632955748, + "step": 3685 + }, + { + "epoch": 0.9692904583415533, + "loss": 0.05816134810447693, + "loss_ce": 0.0002847612486220896, + "loss_iou": 0.427734375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 632955748, + "step": 3685 + }, + { + "epoch": 0.9695534951009404, + "grad_norm": 3.994252764794672, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 633126124, + "step": 3686 + }, + { + "epoch": 0.9695534951009404, + "loss": 0.10153771191835403, + "loss_ce": 0.00023461380624212325, + "loss_iou": 0.51171875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 633126124, + "step": 3686 + }, + { + "epoch": 0.9698165318603275, + "grad_norm": 7.846355332992627, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 633298344, + "step": 3687 + }, + { + "epoch": 0.9698165318603275, + "loss": 0.15658894181251526, + "loss_ce": 0.001040847273543477, + "loss_iou": 0.478515625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 633298344, + "step": 3687 + }, + { + "epoch": 0.9700795686197146, + "grad_norm": 6.085055991191323, + "learning_rate": 5e-06, + "loss": 0.0813, + "num_input_tokens_seen": 633470408, + "step": 3688 + }, + { + "epoch": 0.9700795686197146, + "loss": 0.14665310084819794, + "loss_ce": 0.0056618861854076385, + "loss_iou": 0.49609375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 633470408, + "step": 3688 + }, + { + "epoch": 0.9703426053791018, + "grad_norm": 4.551457844454551, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 633642716, + "step": 3689 + }, + { + "epoch": 0.9703426053791018, + "loss": 0.0793912261724472, + "loss_ce": 0.0006558679160661995, + "loss_iou": 0.5234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 633642716, + "step": 3689 + }, + { + "epoch": 0.9706056421384889, + "grad_norm": 5.945079605333328, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 633812440, + "step": 3690 + }, + { + "epoch": 0.9706056421384889, + "loss": 0.08010639995336533, + "loss_ce": 0.0009438038687221706, + "loss_iou": 0.498046875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 633812440, + "step": 3690 + }, + { + "epoch": 0.970868678897876, + "grad_norm": 4.065153864479657, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 633985000, + "step": 3691 + }, + { + "epoch": 0.970868678897876, + "loss": 0.0722232460975647, + "loss_ce": 0.0003238367207814008, + "loss_iou": 0.4609375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 633985000, + "step": 3691 + }, + { + "epoch": 0.9711317156572631, + "grad_norm": 4.197621752026495, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 634157200, + "step": 3692 + }, + { + "epoch": 0.9711317156572631, + "loss": 0.09805089235305786, + "loss_ce": 0.003385363146662712, + "loss_iou": 0.51953125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 634157200, + "step": 3692 + }, + { + "epoch": 0.9713947524166502, + "grad_norm": 4.118683496322007, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 634329136, + "step": 3693 + }, + { + "epoch": 0.9713947524166502, + "loss": 0.06334918737411499, + "loss_ce": 0.0014290215913206339, + "loss_iou": 0.52734375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 634329136, + "step": 3693 + }, + { + "epoch": 0.9716577891760374, + "grad_norm": 5.569436401286201, + "learning_rate": 5e-06, + "loss": 0.0858, + "num_input_tokens_seen": 634501660, + "step": 3694 + }, + { + "epoch": 0.9716577891760374, + "loss": 0.056709855794906616, + "loss_ce": 0.0002981142024509609, + "loss_iou": 0.447265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 634501660, + "step": 3694 + }, + { + "epoch": 0.9719208259354245, + "grad_norm": 15.633714461557659, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 634673676, + "step": 3695 + }, + { + "epoch": 0.9719208259354245, + "loss": 0.16554518043994904, + "loss_ce": 0.0010554337641224265, + "loss_iou": 0.416015625, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 634673676, + "step": 3695 + }, + { + "epoch": 0.9721838626948116, + "grad_norm": 26.512182592600844, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 634845964, + "step": 3696 + }, + { + "epoch": 0.9721838626948116, + "loss": 0.08471856266260147, + "loss_ce": 0.0012224669335409999, + "loss_iou": 0.43359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 634845964, + "step": 3696 + }, + { + "epoch": 0.9724468994541987, + "grad_norm": 6.8173523395664715, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 635018328, + "step": 3697 + }, + { + "epoch": 0.9724468994541987, + "loss": 0.08134950697422028, + "loss_ce": 0.0013324212050065398, + "loss_iou": 0.5859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 635018328, + "step": 3697 + }, + { + "epoch": 0.9727099362135858, + "grad_norm": 4.383461318684907, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 635190608, + "step": 3698 + }, + { + "epoch": 0.9727099362135858, + "loss": 0.08322876691818237, + "loss_ce": 0.0011364765232428908, + "loss_iou": 0.6171875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 635190608, + "step": 3698 + }, + { + "epoch": 0.972972972972973, + "grad_norm": 8.267610666760818, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 635362588, + "step": 3699 + }, + { + "epoch": 0.972972972972973, + "loss": 0.11783481389284134, + "loss_ce": 0.0006015403778292239, + "loss_iou": 0.486328125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 635362588, + "step": 3699 + }, + { + "epoch": 0.9732360097323601, + "grad_norm": 4.332996360892621, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 635534740, + "step": 3700 + }, + { + "epoch": 0.9732360097323601, + "loss": 0.1074044331908226, + "loss_ce": 0.00025721488054841757, + "loss_iou": 0.51953125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 635534740, + "step": 3700 + }, + { + "epoch": 0.9734990464917472, + "grad_norm": 5.513199468514905, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 635707288, + "step": 3701 + }, + { + "epoch": 0.9734990464917472, + "loss": 0.09230555593967438, + "loss_ce": 0.0021413678769022226, + "loss_iou": 0.56640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 635707288, + "step": 3701 + }, + { + "epoch": 0.9737620832511343, + "grad_norm": 3.768479254290149, + "learning_rate": 5e-06, + "loss": 0.0828, + "num_input_tokens_seen": 635879648, + "step": 3702 + }, + { + "epoch": 0.9737620832511343, + "loss": 0.03509838879108429, + "loss_ce": 0.000766111770644784, + "loss_iou": 0.484375, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 635879648, + "step": 3702 + }, + { + "epoch": 0.9740251200105214, + "grad_norm": 3.566831404475063, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 636051432, + "step": 3703 + }, + { + "epoch": 0.9740251200105214, + "loss": 0.09813763201236725, + "loss_ce": 0.001030704821459949, + "loss_iou": 0.45703125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 636051432, + "step": 3703 + }, + { + "epoch": 0.9742881567699085, + "grad_norm": 3.4711728772501447, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 636223588, + "step": 3704 + }, + { + "epoch": 0.9742881567699085, + "loss": 0.09130216389894485, + "loss_ce": 0.0020840244833379984, + "loss_iou": 0.25390625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 636223588, + "step": 3704 + }, + { + "epoch": 0.9745511935292958, + "grad_norm": 10.080851555182159, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 636395540, + "step": 3705 + }, + { + "epoch": 0.9745511935292958, + "loss": 0.07607042789459229, + "loss_ce": 0.0012871015351265669, + "loss_iou": 0.2353515625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 636395540, + "step": 3705 + }, + { + "epoch": 0.9748142302886829, + "grad_norm": 3.5401703294732814, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 636567872, + "step": 3706 + }, + { + "epoch": 0.9748142302886829, + "loss": 0.1415339708328247, + "loss_ce": 0.0009700124501250684, + "loss_iou": 0.44140625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 636567872, + "step": 3706 + }, + { + "epoch": 0.97507726704807, + "grad_norm": 4.65430606928134, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 636739796, + "step": 3707 + }, + { + "epoch": 0.97507726704807, + "loss": 0.06319437175989151, + "loss_ce": 0.0008469584863632917, + "loss_iou": 0.341796875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 636739796, + "step": 3707 + }, + { + "epoch": 0.9753403038074571, + "grad_norm": 12.08107208422745, + "learning_rate": 5e-06, + "loss": 0.0954, + "num_input_tokens_seen": 636911900, + "step": 3708 + }, + { + "epoch": 0.9753403038074571, + "loss": 0.09893044084310532, + "loss_ce": 0.004631124436855316, + "loss_iou": 0.48828125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 636911900, + "step": 3708 + }, + { + "epoch": 0.9756033405668442, + "grad_norm": 4.1664851424909894, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 637083804, + "step": 3709 + }, + { + "epoch": 0.9756033405668442, + "loss": 0.14125725626945496, + "loss_ce": 0.0035619523841887712, + "loss_iou": 0.408203125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 637083804, + "step": 3709 + }, + { + "epoch": 0.9758663773262314, + "grad_norm": 4.483302931910663, + "learning_rate": 5e-06, + "loss": 0.0909, + "num_input_tokens_seen": 637253480, + "step": 3710 + }, + { + "epoch": 0.9758663773262314, + "loss": 0.09771590679883957, + "loss_ce": 0.0010667359456419945, + "loss_iou": 0.498046875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 637253480, + "step": 3710 + }, + { + "epoch": 0.9761294140856185, + "grad_norm": 4.570273140954518, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 637425832, + "step": 3711 + }, + { + "epoch": 0.9761294140856185, + "loss": 0.15927918255329132, + "loss_ce": 0.0016558904899284244, + "loss_iou": 0.3671875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 637425832, + "step": 3711 + }, + { + "epoch": 0.9763924508450056, + "grad_norm": 4.261644785229519, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 637597956, + "step": 3712 + }, + { + "epoch": 0.9763924508450056, + "loss": 0.09720858931541443, + "loss_ce": 0.005060770083218813, + "loss_iou": 0.400390625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 637597956, + "step": 3712 + }, + { + "epoch": 0.9766554876043927, + "grad_norm": 4.316121186379811, + "learning_rate": 5e-06, + "loss": 0.1502, + "num_input_tokens_seen": 637770516, + "step": 3713 + }, + { + "epoch": 0.9766554876043927, + "loss": 0.23752275109291077, + "loss_ce": 0.003071451559662819, + "loss_iou": 0.33984375, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 637770516, + "step": 3713 + }, + { + "epoch": 0.9769185243637798, + "grad_norm": 3.9858618973194195, + "learning_rate": 5e-06, + "loss": 0.0865, + "num_input_tokens_seen": 637940932, + "step": 3714 + }, + { + "epoch": 0.9769185243637798, + "loss": 0.09894842654466629, + "loss_ce": 0.0026502024848014116, + "loss_iou": 0.3984375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 637940932, + "step": 3714 + }, + { + "epoch": 0.977181561123167, + "grad_norm": 6.459702059191179, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 638112972, + "step": 3715 + }, + { + "epoch": 0.977181561123167, + "loss": 0.05996452271938324, + "loss_ce": 0.0013707715552300215, + "loss_iou": 0.54296875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 638112972, + "step": 3715 + }, + { + "epoch": 0.9774445978825541, + "grad_norm": 4.702404331881163, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 638283296, + "step": 3716 + }, + { + "epoch": 0.9774445978825541, + "loss": 0.09870034456253052, + "loss_ce": 0.0003421921283006668, + "loss_iou": 0.3671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 638283296, + "step": 3716 + }, + { + "epoch": 0.9777076346419412, + "grad_norm": 6.311133724359332, + "learning_rate": 5e-06, + "loss": 0.1405, + "num_input_tokens_seen": 638455388, + "step": 3717 + }, + { + "epoch": 0.9777076346419412, + "loss": 0.10849727690219879, + "loss_ce": 0.001868859282694757, + "loss_iou": 0.39453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 638455388, + "step": 3717 + }, + { + "epoch": 0.9779706714013283, + "grad_norm": 4.841088605766017, + "learning_rate": 5e-06, + "loss": 0.0692, + "num_input_tokens_seen": 638627620, + "step": 3718 + }, + { + "epoch": 0.9779706714013283, + "loss": 0.0839821919798851, + "loss_ce": 0.0003640282666310668, + "loss_iou": 0.5625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 638627620, + "step": 3718 + }, + { + "epoch": 0.9782337081607154, + "grad_norm": 4.2847622894404696, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 638799776, + "step": 3719 + }, + { + "epoch": 0.9782337081607154, + "loss": 0.18468543887138367, + "loss_ce": 0.0003745291323866695, + "loss_iou": 0.4453125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 638799776, + "step": 3719 + }, + { + "epoch": 0.9784967449201026, + "grad_norm": 4.11039448761818, + "learning_rate": 5e-06, + "loss": 0.0832, + "num_input_tokens_seen": 638971952, + "step": 3720 + }, + { + "epoch": 0.9784967449201026, + "loss": 0.1171189695596695, + "loss_ce": 0.002739093266427517, + "loss_iou": 0.41796875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 638971952, + "step": 3720 + }, + { + "epoch": 0.9787597816794897, + "grad_norm": 6.78823750558351, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 639144380, + "step": 3721 + }, + { + "epoch": 0.9787597816794897, + "loss": 0.10253561288118362, + "loss_ce": 0.0012172528076916933, + "loss_iou": 0.5078125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 639144380, + "step": 3721 + }, + { + "epoch": 0.9790228184388768, + "grad_norm": 3.963795445916244, + "learning_rate": 5e-06, + "loss": 0.0748, + "num_input_tokens_seen": 639316848, + "step": 3722 + }, + { + "epoch": 0.9790228184388768, + "loss": 0.13296961784362793, + "loss_ce": 0.0031783583108335733, + "loss_iou": 0.453125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 639316848, + "step": 3722 + }, + { + "epoch": 0.9792858551982639, + "grad_norm": 11.532632559627872, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 639489024, + "step": 3723 + }, + { + "epoch": 0.9792858551982639, + "loss": 0.12053656578063965, + "loss_ce": 0.005637889727950096, + "loss_iou": 0.3828125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 639489024, + "step": 3723 + }, + { + "epoch": 0.979548891957651, + "grad_norm": 9.989053455854867, + "learning_rate": 5e-06, + "loss": 0.0825, + "num_input_tokens_seen": 639661440, + "step": 3724 + }, + { + "epoch": 0.979548891957651, + "loss": 0.13625101745128632, + "loss_ce": 0.00029520769021473825, + "loss_iou": 0.5078125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 639661440, + "step": 3724 + }, + { + "epoch": 0.9798119287170383, + "grad_norm": 4.9459595738196285, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 639833676, + "step": 3725 + }, + { + "epoch": 0.9798119287170383, + "loss": 0.1375160813331604, + "loss_ce": 0.0008125934982672334, + "loss_iou": 0.392578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 639833676, + "step": 3725 + }, + { + "epoch": 0.9800749654764254, + "grad_norm": 4.566071751318605, + "learning_rate": 5e-06, + "loss": 0.1086, + "num_input_tokens_seen": 640006092, + "step": 3726 + }, + { + "epoch": 0.9800749654764254, + "loss": 0.15862302482128143, + "loss_ce": 0.006462385877966881, + "loss_iou": 0.44140625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 640006092, + "step": 3726 + }, + { + "epoch": 0.9803380022358125, + "grad_norm": 5.733708403269408, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 640178452, + "step": 3727 + }, + { + "epoch": 0.9803380022358125, + "loss": 0.045382432639598846, + "loss_ce": 0.00027745150146074593, + "loss_iou": 0.515625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 640178452, + "step": 3727 + }, + { + "epoch": 0.9806010389951996, + "grad_norm": 4.6254627269264095, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 640350624, + "step": 3728 + }, + { + "epoch": 0.9806010389951996, + "loss": 0.054897043853998184, + "loss_ce": 0.0018880083225667477, + "loss_iou": 0.63671875, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 640350624, + "step": 3728 + }, + { + "epoch": 0.9808640757545867, + "grad_norm": 5.915011241669676, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 640522796, + "step": 3729 + }, + { + "epoch": 0.9808640757545867, + "loss": 0.12351959943771362, + "loss_ce": 0.002242741174995899, + "loss_iou": 0.56640625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 640522796, + "step": 3729 + }, + { + "epoch": 0.9811271125139738, + "grad_norm": 13.40274422198988, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 640694968, + "step": 3730 + }, + { + "epoch": 0.9811271125139738, + "loss": 0.08854502439498901, + "loss_ce": 0.00027292766026221216, + "loss_iou": 0.455078125, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 640694968, + "step": 3730 + }, + { + "epoch": 0.981390149273361, + "grad_norm": 6.656264975637515, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 640867112, + "step": 3731 + }, + { + "epoch": 0.981390149273361, + "loss": 0.12663085758686066, + "loss_ce": 0.0017834422178566456, + "loss_iou": 0.46484375, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 640867112, + "step": 3731 + }, + { + "epoch": 0.9816531860327481, + "grad_norm": 11.247340076351778, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 641039384, + "step": 3732 + }, + { + "epoch": 0.9816531860327481, + "loss": 0.1410384476184845, + "loss_ce": 0.0051131523214280605, + "loss_iou": 0.5859375, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 641039384, + "step": 3732 + }, + { + "epoch": 0.9819162227921352, + "grad_norm": 5.304544125644644, + "learning_rate": 5e-06, + "loss": 0.1061, + "num_input_tokens_seen": 641211452, + "step": 3733 + }, + { + "epoch": 0.9819162227921352, + "loss": 0.12673211097717285, + "loss_ce": 0.004204033873975277, + "loss_iou": 0.361328125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 641211452, + "step": 3733 + }, + { + "epoch": 0.9821792595515223, + "grad_norm": 4.504449466935876, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 641383184, + "step": 3734 + }, + { + "epoch": 0.9821792595515223, + "loss": 0.09244327247142792, + "loss_ce": 0.0025995145551860332, + "loss_iou": 0.5546875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 641383184, + "step": 3734 + }, + { + "epoch": 0.9824422963109094, + "grad_norm": 4.706100049802098, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 641555380, + "step": 3735 + }, + { + "epoch": 0.9824422963109094, + "loss": 0.08688107877969742, + "loss_ce": 0.0007604720303788781, + "loss_iou": 0.46484375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 641555380, + "step": 3735 + }, + { + "epoch": 0.9827053330702966, + "grad_norm": 8.136156546275306, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 641727456, + "step": 3736 + }, + { + "epoch": 0.9827053330702966, + "loss": 0.11328569054603577, + "loss_ce": 0.0008589247590862215, + "loss_iou": 0.404296875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 641727456, + "step": 3736 + }, + { + "epoch": 0.9829683698296837, + "grad_norm": 4.005733802836262, + "learning_rate": 5e-06, + "loss": 0.074, + "num_input_tokens_seen": 641899684, + "step": 3737 + }, + { + "epoch": 0.9829683698296837, + "loss": 0.09893114119768143, + "loss_ce": 0.00038988247979432344, + "loss_iou": 0.470703125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 641899684, + "step": 3737 + }, + { + "epoch": 0.9832314065890708, + "grad_norm": 3.6593398060214795, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 642072000, + "step": 3738 + }, + { + "epoch": 0.9832314065890708, + "loss": 0.0708111971616745, + "loss_ce": 0.003092692233622074, + "loss_iou": 0.609375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 642072000, + "step": 3738 + }, + { + "epoch": 0.9834944433484579, + "grad_norm": 112.80387932882391, + "learning_rate": 5e-06, + "loss": 0.075, + "num_input_tokens_seen": 642243916, + "step": 3739 + }, + { + "epoch": 0.9834944433484579, + "loss": 0.07938267290592194, + "loss_ce": 0.001837508985772729, + "loss_iou": 0.5078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 642243916, + "step": 3739 + }, + { + "epoch": 0.983757480107845, + "grad_norm": 46.399369914383044, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 642415992, + "step": 3740 + }, + { + "epoch": 0.983757480107845, + "loss": 0.12807899713516235, + "loss_ce": 0.0024686530232429504, + "loss_iou": 0.458984375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 642415992, + "step": 3740 + }, + { + "epoch": 0.9840205168672322, + "grad_norm": 4.972320382385805, + "learning_rate": 5e-06, + "loss": 0.0813, + "num_input_tokens_seen": 642586472, + "step": 3741 + }, + { + "epoch": 0.9840205168672322, + "loss": 0.10946183651685715, + "loss_ce": 0.0011854701442644, + "loss_iou": 0.412109375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 642586472, + "step": 3741 + }, + { + "epoch": 0.9842835536266193, + "grad_norm": 3.8095943920850632, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 642758552, + "step": 3742 + }, + { + "epoch": 0.9842835536266193, + "loss": 0.1769047975540161, + "loss_ce": 0.002832533325999975, + "loss_iou": 0.35546875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 642758552, + "step": 3742 + }, + { + "epoch": 0.9845465903860064, + "grad_norm": 5.317599890556273, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 642930684, + "step": 3743 + }, + { + "epoch": 0.9845465903860064, + "loss": 0.05296643078327179, + "loss_ce": 0.0012238813797011971, + "loss_iou": 0.5078125, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 642930684, + "step": 3743 + }, + { + "epoch": 0.9848096271453936, + "grad_norm": 4.846803333953094, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 643102812, + "step": 3744 + }, + { + "epoch": 0.9848096271453936, + "loss": 0.10566210746765137, + "loss_ce": 0.0007274242816492915, + "loss_iou": 0.5234375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 643102812, + "step": 3744 + }, + { + "epoch": 0.9850726639047807, + "grad_norm": 3.406688370945616, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 643275096, + "step": 3745 + }, + { + "epoch": 0.9850726639047807, + "loss": 0.08728724718093872, + "loss_ce": 0.0003884438192471862, + "loss_iou": 0.41796875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 643275096, + "step": 3745 + }, + { + "epoch": 0.9853357006641679, + "grad_norm": 5.344442258228798, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 643444760, + "step": 3746 + }, + { + "epoch": 0.9853357006641679, + "loss": 0.07199759781360626, + "loss_ce": 0.002844766713678837, + "loss_iou": 0.455078125, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 643444760, + "step": 3746 + }, + { + "epoch": 0.985598737423555, + "grad_norm": 9.063261890916973, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 643617080, + "step": 3747 + }, + { + "epoch": 0.985598737423555, + "loss": 0.06594318896532059, + "loss_ce": 0.0005440223030745983, + "loss_iou": 0.39453125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 643617080, + "step": 3747 + }, + { + "epoch": 0.9858617741829421, + "grad_norm": 3.8879508865341124, + "learning_rate": 5e-06, + "loss": 0.0591, + "num_input_tokens_seen": 643789448, + "step": 3748 + }, + { + "epoch": 0.9858617741829421, + "loss": 0.056218698620796204, + "loss_ce": 0.001195505610667169, + "loss_iou": 0.65625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 643789448, + "step": 3748 + }, + { + "epoch": 0.9861248109423292, + "grad_norm": 193.16068563077133, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 643958208, + "step": 3749 + }, + { + "epoch": 0.9861248109423292, + "loss": 0.057384688407182693, + "loss_ce": 0.003338058013468981, + "loss_iou": 0.3984375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 643958208, + "step": 3749 + }, + { + "epoch": 0.9863878477017163, + "grad_norm": 11.212759382877062, + "learning_rate": 5e-06, + "loss": 0.0866, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_websight_new_CIoU": 0.8978322446346283, + "eval_websight_new_GIoU": 0.8991535305976868, + "eval_websight_new_IoU": 0.9012524485588074, + "eval_websight_new_MAE_all": 0.013891254551708698, + "eval_websight_new_MAE_h": 0.008930663112550974, + "eval_websight_new_MAE_w": 0.020785433240234852, + "eval_websight_new_MAE_x": 0.0205678790807724, + "eval_websight_new_MAE_y": 0.005281045567244291, + "eval_websight_new_NUM_probability": 0.9999656677246094, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.06887268275022507, + "eval_websight_new_loss_ce": 5.8660152717493474e-06, + "eval_websight_new_loss_iou": 0.30145263671875, + "eval_websight_new_loss_num": 0.012205123901367188, + "eval_websight_new_loss_xval": 0.06097412109375, + "eval_websight_new_runtime": 65.4719, + "eval_websight_new_samples_per_second": 0.764, + "eval_websight_new_steps_per_second": 0.031, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_seeclick_CIoU": 0.657441109418869, + "eval_seeclick_GIoU": 0.6616063714027405, + "eval_seeclick_IoU": 0.6793454885482788, + "eval_seeclick_MAE_all": 0.04169847071170807, + "eval_seeclick_MAE_h": 0.024878486059606075, + "eval_seeclick_MAE_w": 0.05510186776518822, + "eval_seeclick_MAE_x": 0.06434983387589455, + "eval_seeclick_MAE_y": 0.02246370818465948, + "eval_seeclick_NUM_probability": 0.9999178946018219, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.169124573469162, + "eval_seeclick_loss_ce": 0.009124522097408772, + "eval_seeclick_loss_iou": 0.455078125, + "eval_seeclick_loss_num": 0.030914306640625, + "eval_seeclick_loss_xval": 0.15460205078125, + "eval_seeclick_runtime": 75.5663, + "eval_seeclick_samples_per_second": 0.569, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_icons_CIoU": 0.8627608418464661, + "eval_icons_GIoU": 0.863149881362915, + "eval_icons_IoU": 0.8711875081062317, + "eval_icons_MAE_all": 0.022217202931642532, + "eval_icons_MAE_h": 0.01895691081881523, + "eval_icons_MAE_w": 0.0274124164134264, + "eval_icons_MAE_x": 0.0250897784717381, + "eval_icons_MAE_y": 0.017409704625606537, + "eval_icons_NUM_probability": 0.9999702572822571, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.07059822231531143, + "eval_icons_loss_ce": 1.0734858733485453e-05, + "eval_icons_loss_iou": 0.47100830078125, + "eval_icons_loss_num": 0.012834548950195312, + "eval_icons_loss_xval": 0.064178466796875, + "eval_icons_runtime": 81.7327, + "eval_icons_samples_per_second": 0.612, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_screenspot_CIoU": 0.6053812901178995, + "eval_screenspot_GIoU": 0.5979885856310526, + "eval_screenspot_IoU": 0.637059231599172, + "eval_screenspot_MAE_all": 0.07145863150556882, + "eval_screenspot_MAE_h": 0.04250209157665571, + "eval_screenspot_MAE_w": 0.12718970080216727, + "eval_screenspot_MAE_x": 0.07470711196462314, + "eval_screenspot_MAE_y": 0.04143562292059263, + "eval_screenspot_NUM_probability": 0.9999008377393087, + "eval_screenspot_inside_bbox": 0.8841666579246521, + "eval_screenspot_loss": 0.9072001576423645, + "eval_screenspot_loss_ce": 0.6087295611699423, + "eval_screenspot_loss_iou": 0.4524739583333333, + "eval_screenspot_loss_num": 0.058499654134114586, + "eval_screenspot_loss_xval": 0.29248046875, + "eval_screenspot_runtime": 146.6335, + "eval_screenspot_samples_per_second": 0.607, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 644130472, + "step": 3750 + } + ], + "logging_steps": 1.0, + "max_steps": 11403, + "num_input_tokens_seen": 644130472, + "num_train_epochs": 3, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4647228784377856.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}